Initial version of opensource goma
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..928103e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,17 @@
+out
+Makefile
+*.Makefile
+*.mk
+*.vcproj
+*.vcxproj
+*.user
+*.sln
+*.rules
+*.filters
+*.props
+*.targets
+*.xml
+*.ncb
+*.suo
+*.sdf
+gyp-mac-tool
diff --git a/.gn b/.gn
new file mode 100644
index 0000000..d579213
--- /dev/null
+++ b/.gn
@@ -0,0 +1,16 @@
+# Copyright (c) 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Copied from chromium's src/.gn.
+#
+# This file is used by the experimental meta-buildsystem in src/tools/gn to
+# find the root of the source tree and to set startup options.
+
+# The location of the build configuration file.
+buildconfig = "//build/config/BUILDCONFIG.gn"
+
+# The secondary source root is a parallel directory tree where
+# GN build files are placed when they can not be placed directly
+# in the source tree, e.g. for third party source trees.
+secondary_source = "//build/secondary/"
diff --git a/BUILD.gn b/BUILD.gn
new file mode 100644
index 0000000..ca58c4e
--- /dev/null
+++ b/BUILD.gn
@@ -0,0 +1,11 @@
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+group("root") {
+  deps = [
+    "//base",
+    "//client",
+    "//lib",
+  ]
+}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..6790a8b
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,21 @@
+# How to Contribute
+
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution,
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code reviews
+
+All submissions, including submissions by project members, require review. We
+use `git cl` (via `depot_tools`) for this process.
diff --git a/DEPS b/DEPS
new file mode 100644
index 0000000..9442a88
--- /dev/null
+++ b/DEPS
@@ -0,0 +1,136 @@
+vars = {
+     "chromium_git": "https://chromium.googlesource.com",
+}
+
+deps = {
+     # protobuf 3.3.0
+     # Note: When you update protobuf, you will need to update
+     # test/goma_data.pb.{h,cc}. Copying them from your output directory should
+     # work.
+     "client/third_party/protobuf/protobuf":
+     "https://github.com/google/protobuf.git@a6189acd18b00611c1dc7042299ad75486f08a1a",
+
+     # google-glog
+     "client/third_party/glog":
+     "https://github.com/google/glog.git@2063b387080c1e7adffd33ca07adff0eb346ff1a",
+
+     # googletest 1.7.0
+     "client/third_party/gtest":
+     Var("chromium_git") + "/external/googletest.git@6215b1cab9c2cb93cc0110fd536af3be5ac18f93",
+
+     # zlib 1.2.8
+     "client/third_party/zlib":
+     "https://goma.googlesource.com/zlib.git@50893291621658f355bc5b4d450a8d06a563053d",
+
+     # xz v5.2.0
+     "client/third_party/xz":
+     "https://goma.googlesource.com/xz.git@fbafe6dd0892b04fdef601580f2c5b0e3745655b",
+
+     # jsoncpp
+     "client/third_party/jsoncpp/source":
+     Var("chromium_git") + '/external/github.com/open-source-parsers/jsoncpp.git@f572e8e42e22cfcf5ab0aea26574f408943edfa4', # from svn 248
+
+     # gyp
+     # Note: this is used by build/vs_toolchain.py, and nobody else may
+     # use this.
+     "client/tools/gyp":
+     Var("chromium_git") + "/external/gyp.git@" +
+         "c6f471687407bf28ddfc63f1a8f47aeb7bf54edc",
+
+     # chrome's tools/clang
+     "client/tools/clang":
+     "https://chromium.googlesource.com/chromium/src/tools/clang.git",
+
+     # chrome's deps/third_party/boringssl
+     "client/third_party/boringssl/src":
+     "https://boringssl.googlesource.com/boringssl@650d8c393e132669dc55462d70e801e06104e99e",
+
+     # google-breakpad
+     "client/third_party/breakpad/breakpad":
+     Var("chromium_git") + "/breakpad/breakpad.git@" +
+         "70914b2d380d893364ad0110b8af18ba1ed5aaa3",
+
+     # lss
+     "client/third_party/lss":
+     Var("chromium_git") + "/linux-syscall-support.git@" +
+         "a91633d172407f6c83dd69af11510b37afebb7f9",
+
+     # chrome's patched-yasm
+     "client/third_party/yasm/source/patched-yasm":
+     Var("chromium_git") + "/chromium/deps/yasm/patched-yasm.git@" +
+         "b98114e18d8b9b84586b10d24353ab8616d4c5fc",
+
+     # libc++ r256621
+     "client/third_party/libc++/trunk":
+     Var("chromium_git") + "/chromium/llvm-project/libcxx.git@" +
+         "b1ece9c037d879843b0b0f5a2802e1e9d443b75a",
+
+     # libc++abi r256623
+     "client/third_party/libc++abi/trunk":
+     Var("chromium_git") + "/chromium/llvm-project/libcxxabi.git@" +
+         "0edb61e2e581758fc4cd4cd09fc588b3fc91a653",
+
+     # libFuzzer
+     "client/third_party/libFuzzer/src":
+     Var("chromium_git") + "/chromium/llvm-project/llvm/lib/Fuzzer.git@" +
+         "9aa0bddeb6820f6e5d897da410e1e8a3f7fd4b8e",
+}
+
+hooks = [
+     {
+       "name": "clang",
+       "pattern": ".",
+       "action": ["python", "client/tools/clang/scripts/update.py"],
+     },
+
+     # Pull binutils for linux, it is used for simpletry test.
+     {
+       "name": "binutils",
+       "pattern": ".",
+       "action": [
+         "python",
+         "client/test/third_party/binutils/download.py",
+       ],
+     },
+
+     # Pull GN binaries.
+     {
+       "name": "gn_win",
+       "pattern": ".",
+       "action": [ "download_from_google_storage",
+                   "--no_resume",
+                   "--platform=win32",
+                   "--no_auth",
+                   "--bucket", "chromium-gn",
+                   "-s", "client/buildtools/win/gn.exe.sha1",
+       ],
+     },
+     {
+       "name": "gn_mac",
+       "pattern": ".",
+       "action": [ "download_from_google_storage",
+                   "--no_resume",
+                   "--platform=darwin",
+                   "--no_auth",
+                   "--bucket", "chromium-gn",
+                   "-s", "client/buildtools/mac/gn.sha1",
+       ],
+     },
+     {
+       "name": "gn_linux64",
+       "pattern": ".",
+       "action": [ "download_from_google_storage",
+                   "--no_resume",
+                   "--platform=linux*",
+                   "--no_auth",
+                   "--bucket", "chromium-gn",
+                   "-s", "client/buildtools/linux64/gn.sha1",
+       ],
+     },
+     # Update the Windows toolchain if necessary.
+     {
+       'name': 'win_toolchain',
+       'pattern': '.',
+       'action': ['python', 'client/build/vs_toolchain.py', 'update'],
+     },
+]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..c834910
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,27 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/PRESUBMIT.py b/PRESUBMIT.py
new file mode 100644
index 0000000..26ad264
--- /dev/null
+++ b/PRESUBMIT.py
@@ -0,0 +1,83 @@
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Top-level presubmit script for goma/client.
+
+To run presubmit,
+  $ git cl presubmit --upload
+
+See http://dev.chromium.org/developers/how-tos/depottools/presubmit-scripts
+for more details about the presubmit API built into git-cl.
+"""
+
+def CheckChangeLintsClean(input_api, output_api, source_file_filter=None):
+  """Checks that all '.cc' and '.h' files pass cpplint.py.
+
+  It is clone of depot_tools/presubmit_canned_checks.py, but hacks on
+  cpplint.GetHeaderGuardCPPVariable, because our code uses 'DEVTOOLS_GOMA_'
+  prefix for header guard.
+  """
+  _RE_IS_TEST = input_api.re.compile(r'.*tests?.(cc|h)$')
+  result = []
+  import cpplint
+  # pylint: disable=W0212
+  cpplint._cpplint_state.ResetErrorCounts()
+
+  getHeaderGuardCPPVariable = cpplint.GetHeaderGuardCPPVariable
+  def gomaGetHeaderGuardCPPVariable(filename):
+    return 'DEVTOOLS_GOMA_' + getHeaderGuardCPPVariable(filename)
+  cpplint.GetHeaderGuardCPPVariable = gomaGetHeaderGuardCPPVariable
+
+  cpplint._SetFilters('-build/include,-build/include_order,'
+                      '-readability/casting,-runtime/int')
+  files = [f.AbsoluteLocalPath() for f in
+           input_api.AffectedSourceFiles(source_file_filter)]
+  for file_name in files:
+    if _RE_IS_TEST.match(file_name):
+      level = 5
+    else:
+      level = 4
+    cpplint.ProcessFile(file_name, level)
+
+  if cpplint._cpplint_state.error_count > 0:
+    if input_api.is_committing:
+      res_type = output_api.PresubmitError
+    else:
+      res_type = output_api.PresubmitPromptWarning
+    result = [res_type('Changelist failed cpplint.py check.')]
+
+  return result
+
+
+def CheckChangeOnUpload(input_api, output_api):
+  results = []
+  results += input_api.canned_checks.CheckChangeHasDescription(
+      input_api, output_api)
+  results += CheckChangeLintsClean(input_api, output_api)
+  results += input_api.canned_checks.CheckChangeHasNoCrAndHasOnlyOneEol(
+      input_api, output_api)
+  results += input_api.canned_checks.CheckChangeHasNoTabs(
+      input_api, output_api)
+  results += input_api.canned_checks.CheckChangeTodoHasOwner(
+      input_api, output_api)
+  results += input_api.canned_checks.CheckChangeHasNoStrayWhitespace(
+      input_api, output_api)
+  results += input_api.canned_checks.CheckLongLines(input_api, output_api, 80)
+  results += input_api.canned_checks.CheckLicense(
+      input_api, output_api,
+      r'(Copyright 201\d Google Inc. All Rights Reserved.|' +
+       'Copyright.*The Chromium Authors. All rights reserved.)')
+  results += input_api.canned_checks.CheckDoNotSubmit(
+      input_api, output_api)
+  results += input_api.canned_checks.RunPylint(
+      input_api, output_api,
+      black_list=(r'third_party[\\/].*',
+                  r'build[\\/]tools[\\/].*',
+                  r'tools[\\/].*',
+                  r'out[\\/].*',
+                  r'build[\\/](Debug|Release).*'))
+  results += input_api.canned_checks.CheckGNFormatted(input_api, output_api)
+  return results
+
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..fe1436c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,143 @@
+# Goma
+
+*Goma* is a distributed compiler service for open-source project such as
+Chromium and Android. It's some kind of replacement of distcc+ccache.
+
+NOTE: currently the goma backend is not available for non googlers.
+We're working so that chromium developers can use it. Stay tuned.
+
+# How goma works
+
+Goma hooks a compile request, and sends it to a backend compile server.
+If you have plenty of backend servers, a lot of compile can be processed in
+parallel, for example, -j100, -j500 or -j1000.
+
+Also, the goma backend caches the compile result. If the same compile request
+comes, the cached result is returned from the goma cache server.
+
+# How to build
+
+goma client can be built on Linux, Mac, and Win.
+
+## Install dependencies
+
+1. Install [depot_tools](http://dev.chromium.org/developers/how-tos/install-depot-tools).
+2. Install dependencies.
+
+On debian or ubuntu,
+
+```
+$ sudo apt-get install libssl-dev libc6-dev-i386
+```
+
+On Mac, install Xcode.
+
+On Windows, install Visual Studio 2017. Community edition is OK.
+
+
+## Checkout source
+
+Public version is not available yet.
+
+
+We assume goma is checked out to `$GOMA_SRC`.
+
+## Build
+
+```
+$ cd "$GOMA_SRC/client"
+$ gclient sync
+$ gn gen --args='is_debug=false' out/Release
+$ ninja -C out/Release
+```
+
+### Several important gn args
+
+The build option can be modified with gn args.
+
+```
+is_debug=true/false
+  Do debug build if true.
+dcheck_always_on=true/false
+  Enable DCHECK always (even in release build).
+is_asan=true/false
+  Use ASan build (with clang).
+use_link_time_optimization=true/false
+  Currently working only on Win. If true, /LTCG is enable.
+use_lld=true/false
+  Use lld for link (it will be fast)
+```
+
+## Run unittest
+
+```
+$ cd "$GOMA_SRC/client"
+$ ./build/run_unittest.py --target=Release --build-dir=out
+```
+
+# How to use
+
+## For Chromium/Android development
+
+Goma can be integrated with Chromium/Android development easily.
+
+1. Build goma client
+2. Start compiler_proxy
+
+```
+$ "$GOMA_SRC/client/out/Release/goma_ctl.py" start
+```
+
+### For Chromium
+
+In Chromium src, specify the following args in `gn args`
+
+```
+use_goma = true
+goma_dir = "$GOMA_SRC/client/out/Release"  (Replace $GOMA_SRC to your checkout)
+```
+
+Then build like the following:
+
+```
+$ cd /path/to/chromium/src/out/Release
+$ ninja -j100 chrome
+```
+
+### For Android
+
+```
+$ source build/envsetup.sh
+$ lunch aosp_arm-eng
+$ GOMA_DIR=$GOMA_SRC/client/out/Release USE_GOMA=true make -j4
+```
+
+Here, `-j4` is not related to goma parallelism. Android internally sets
+`-j500` (or `-j` with `NINJA_REMOTE_NUM_JOBS` environment variable) for goma.
+
+## For general development
+
+1. Build goma client
+2. Start `compiler_proxy`
+
+```
+$ ./goma_ctl.py ensure_start
+```
+
+3. Change your build script so that `gomacc` is prepended to compiler command.
+   For example:
+
+```
+$ gomacc clang++ -c foo.cc
+```
+
+4. Build your product with `make -j100`, `ninja -j100` or larger -j.
+   Check http://localhost:8080 to see compiler_proxy is actually working.
+
+
+### Tips
+
+* You can use [autoninja](https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/master/autoninja) in depot_tools instead of specifying gomacc manually.
+
+
+
diff --git a/base/BUILD.gn b/base/BUILD.gn
new file mode 100644
index 0000000..d69fdd8
--- /dev/null
+++ b/base/BUILD.gn
@@ -0,0 +1,147 @@
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+config("base_config") {
+  include_dirs = [ "." ]
+}
+
+static_library("base") {
+  sources = [
+    "basictypes.h",
+    "compiler_specific.h",
+    "file.cc",
+    "file.h",
+    "file_dir.cc",
+    "file_dir.h",
+    "join.cc",
+    "join.h",
+    "lockhelper.cc",
+    "lockhelper.h",
+    "path.cc",
+    "path.h",
+    "split.cc",
+    "split.h",
+    "string_piece_utils.cc",
+    "string_piece_utils.h",
+    "strutil.cc",
+    "strutil.h",
+  ]
+  deps = [
+    "//third_party:glog",
+  ]
+  public_deps = [
+    "//third_party/chromium_base:platform_thread",
+    "//third_party/chromium_base:string",
+  ]
+
+  if (os == "win") {
+    sources += [
+      "config_win.h",
+      "socket_helper_win.cc",
+      "socket_helper_win.h",
+    ]
+
+    cflags = [
+      # These warnings come from int and SOCKET type mismatch.
+      "/wd4309",  # Truncation of constant value
+    ]
+  }
+  public_configs = [ ":base_config" ]
+}
+
+static_library("goma_unittest") {
+  testonly = true
+  sources = [
+    "goma_unittest.cc",
+  ]
+  deps = [
+    "//third_party:gtest",
+  ]
+}
+
+executable("join_unittest") {
+  testonly = true
+  sources = [
+    "join_unittest.cc",
+  ]
+  deps = [
+    ":base",
+    ":goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("lockhelper_unittest") {
+  testonly = true
+  sources = [
+    "lockhelper_unittest.cc",
+  ]
+  deps = [
+    ":base",
+    ":goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("split_unittest") {
+  testonly = true
+  sources = [
+    "split_unittest.cc",
+  ]
+  deps = [
+    ":base",
+    ":goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("string_piece_utils_unittest") {
+  testonly = true
+  sources = [
+    "string_piece_utils_unittest.cc",
+  ]
+  deps = [
+    ":base",
+    ":goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("strutil_unittest") {
+  testonly = true
+  sources = [
+    "strutil_unittest.cc",
+  ]
+  deps = [
+    ":base",
+    ":goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+if (os == "win") {
+  executable("socket_helper_win_unittest") {
+    testonly = true
+    sources = [
+      "socket_helper_win_unittest.cc",
+    ]
+    deps = [
+      ":base",
+      ":goma_unittest",
+      "//build/config/sanitizers:deps",
+      "//third_party:glog",
+      "//third_party:gtest",
+    ]
+  }
+}
diff --git a/base/basictypes.h b/base/basictypes.h
new file mode 100644
index 0000000..a689f54
--- /dev/null
+++ b/base/basictypes.h
@@ -0,0 +1,77 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_BASICTYPES_H_
+#define DEVTOOLS_GOMA_BASE_BASICTYPES_H_
+
+#include <stddef.h>  // for size_t
+
+// Put this in the private: declarations for a class to be uncopyable.
+#define DISALLOW_COPY(TypeName) \
+  TypeName(const TypeName&) = delete;
+
+// Put this in the private: declarations for a class to be unassignable.
+#define DISALLOW_ASSIGN(TypeName) \
+  void operator=(const TypeName&) = delete;
+
+// A macro to disallow the copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&) = delete;               \
+  void operator=(const TypeName&) = delete;
+
+// A macro to disallow all the implicit constructors, namely the
+// default constructor, copy constructor and operator= functions.
+//
+// This should be used in the private: declarations for a class
+// that wants to prevent anyone from instantiating it. This is
+// especially useful for classes containing only static methods.
+#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
+  TypeName() = delete;                                    \
+  DISALLOW_COPY_AND_ASSIGN(TypeName);
+
+// The arraysize(arr) macro returns the # of elements in an array arr.  The
+// expression is a compile-time constant, and therefore can be used in defining
+// new arrays, for example.  If you use arraysize on a pointer by mistake, you
+// will get a compile-time error.  For the technical details, refer to
+// http://blogs.msdn.com/b/the1/archive/2004/05/07/128242.aspx.
+
+// This template function declaration is used in defining arraysize.
+// Note that the function doesn't need an implementation, as we only
+// use its type.
+template <typename T, size_t N> char (&ArraySizeHelper(T (&array)[N]))[N];
+#define arraysize(arr) (sizeof(ArraySizeHelper(arr)))
+
+// The COMPILE_ASSERT macro can be used to verify that a compile time
+// expression is true. For example, you could use it to verify the
+// size of a static array:
+//
+//   COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
+//                  content_type_names_incorrect_size);
+//
+// or to make sure a struct is smaller than a certain size:
+//
+//   COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
+//
+// The second argument to the macro is the name of the variable. If
+// the expression is false, most compilers will issue a warning/error
+// containing the name of the variable.
+
+#undef COMPILE_ASSERT
+#define COMPILE_ASSERT(expr, msg) static_assert(expr, #msg)
+
+// The FALLTHROUGH_INTENDED macro can be used to annotate implicit fall-through
+// between switch labels.
+#if defined(__clang__) && defined(__has_warning)
+#if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")
+#define FALLTHROUGH_INTENDED [[clang::fallthrough]]  // NOLINT
+#endif
+#endif
+
+#ifndef FALLTHROUGH_INTENDED
+#define FALLTHROUGH_INTENDED do { } while (0)
+#endif
+
+#endif  // DEVTOOLS_GOMA_BASE_BASICTYPES_H_
diff --git a/base/compiler_specific.h b/base/compiler_specific.h
new file mode 100644
index 0000000..4d1bb83
--- /dev/null
+++ b/base/compiler_specific.h
@@ -0,0 +1,180 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_COMPILER_SPECIFIC_H_
+#define DEVTOOLS_GOMA_BASE_COMPILER_SPECIFIC_H_
+
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#pragma once
+
+#if defined(_MSC_VER)  // COMPILER_MSVC
+
+// Macros for suppressing and disabling warnings on MSVC.
+//
+// Warning numbers are enumerated at:
+// http://msdn.microsoft.com/en-us/library/8x5x43k7(VS.80).aspx
+//
+// The warning pragma:
+// http://msdn.microsoft.com/en-us/library/2c8f766e(VS.80).aspx
+//
+// Using __pragma instead of #pragma inside macros:
+// http://msdn.microsoft.com/en-us/library/d9x1s805.aspx
+
+// MSVC_SUPPRESS_WARNING disables warning |n| for the remainder of the line and
+// for the next line of the source file.
+#define MSVC_SUPPRESS_WARNING(n) __pragma(warning(suppress:n))
+
+// MSVC_PUSH_DISABLE_WARNING pushes |n| onto a stack of warnings to be disabled.
+// The warning remains disabled until popped by MSVC_POP_WARNING.
+#define MSVC_PUSH_DISABLE_WARNING(n) __pragma(warning(push)) \
+                                     __pragma(warning(disable:n))
+
+// MSVC_PUSH_WARNING_LEVEL pushes |n| as the global warning level.  The level
+// remains in effect until popped by MSVC_POP_WARNING().  Use 0 to disable all
+// warnings.
+#define MSVC_PUSH_WARNING_LEVEL(n) __pragma(warning(push, n))
+
+// Pop effects of innermost MSVC_PUSH_* macro.
+#define MSVC_POP_WARNING() __pragma(warning(pop))
+
+#define MSVC_DISABLE_OPTIMIZE() __pragma(optimize("", off))
+#define MSVC_ENABLE_OPTIMIZE() __pragma(optimize("", on))
+
+// Allows |this| to be passed as an argument in constructor initializer lists.
+// This uses push/pop instead of the seemingly simpler suppress feature to avoid
+// having the warning be disabled for more than just |code|.
+//
+// Example usage:
+// Foo::Foo() : x(NULL), ALLOW_THIS_IN_INITIALIZER_LIST(y(this)), z(3) {}
+//
+// Compiler warning C4355: 'this': used in base member initializer list:
+// http://msdn.microsoft.com/en-us/library/3c594ae3(VS.80).aspx
+#define ALLOW_THIS_IN_INITIALIZER_LIST(code) MSVC_PUSH_DISABLE_WARNING(4355) \
+                                             code \
+                                             MSVC_POP_WARNING()
+
+// Allows exporting a class that inherits from a non-exported base class.
+// This uses suppress instead of push/pop because the delimiter after the
+// declaration (either "," or "{") has to be placed before the pop macro.
+//
+// Example usage:
+// class EXPORT_API Foo : NON_EXPORTED_BASE(public Bar) {
+//
+// MSVC Compiler warning C4275:
+// non dll-interface class 'Bar' used as base for dll-interface class 'Foo'.
+// Note that this is intended to be used only when no access to the base class'
+// static data is done through derived classes or inline methods. For more info,
+// see http://msdn.microsoft.com/en-us/library/3tdb471s(VS.80).aspx
+#define NON_EXPORTED_BASE(code) MSVC_SUPPRESS_WARNING(4275) \
+                                code
+
+// Added for proto warnings
+// - third_party\protobuf\src\google/protobuf/io/coded_stream.h(901):
+//   warning C4244: '=' : conversion from 'google::protobuf::uint32' to
+//   'google::protobuf::uint8', possible loss of data
+// - third_party\protobuf\src\google/protobuf/repeated_field.h(474):
+//   warning C4127: conditional expression is constant
+// - third_party\protobuf\src\google/protobuf/stubs/common.h(1201):
+//   warning C4512: 'google::protobuf::FatalException' :
+//   assignment operator could not be generated
+#define MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()   \
+    __pragma(warning(push)) \
+    __pragma(warning(disable:4244 4127 4512))
+
+// C4127: conditional expression is constant. FD_SET uses do {..} while (0,0)
+// C4389: '==' : signed/unsigned mismatch if we use 'int' for fd (fd is SOCKET)
+#define MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET() \
+  MSVC_PUSH_DISABLE_WARNING(4127 4389)
+
+#else  // Not MSVC
+
+#define MSVC_SUPPRESS_WARNING(n)
+#define MSVC_PUSH_DISABLE_WARNING(n)
+#define MSVC_PUSH_WARNING_LEVEL(n)
+#define MSVC_POP_WARNING()
+#define MSVC_DISABLE_OPTIMIZE()
+#define MSVC_ENABLE_OPTIMIZE()
+#define ALLOW_THIS_IN_INITIALIZER_LIST(code) code
+#define NON_EXPORTED_BASE(code) code
+#define MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#define MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET()
+
+#endif  // COMPILER_MSVC
+
+
+// Annotate a variable indicating it's ok if the variable is not used.
+// (Typically used to silence a compiler warning when the assignment
+// is important for some other reason.)
+// Use like:
+//   int x ALLOW_UNUSED = ...;
+#if defined(__GNUC__)  // COMPILER_GCC
+#define ALLOW_UNUSED __attribute__((unused))
+#else
+#define ALLOW_UNUSED
+#endif
+
+// Annotate a function indicating it should not be inlined.
+// Use like:
+//   NOINLINE void DoStuff() { ... }
+#if defined(__GNUC__)  // COMPILER_GCC
+#define NOINLINE __attribute__((noinline))
+#elif defined(_MSC_VER)  // COMPILER_MSVC
+#define NOINLINE __declspec(noinline)
+#else
+#define NOINLINE
+#endif
+
+// Specify memory alignment for structs, classes, etc.
+// Use like:
+//   class ALIGNAS(16) MyClass { ... }
+//   ALIGNAS(16) int array[4];
+#if defined(_MSC_VER)  // COMPILER_MSVC
+#define ALIGNAS(byte_alignment) __declspec(align(byte_alignment))
+#elif defined(__GNUC__)  // COMPILER_GCC
+#define ALIGNAS(byte_alignment) __attribute__((aligned(byte_alignment)))
+#endif
+
+// Return the byte alignment of the given type (available at compile time).
+// Use like:
+//   ALIGNOF(int32)  // this would be 4
+#if defined(_MSC_VER)  // COMPILER_MSVC
+#define ALIGNOF(type) __alignof(type)
+#elif defined(__GNUC__)  // COMPILER_GCC
+#define ALIGNOF(type) __alignof__(type)
+#endif
+
+// Annotate a function indicating the caller must examine the return value.
+// Use like:
+//   int foo() WARN_UNUSED_RESULT;
+// To explicitly ignore a result, see |ignore_result()| in <base/basictypes.h>.
+#if defined(__GNUC__)  // COMPILER_GCC
+#define WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#else
+#define WARN_UNUSED_RESULT
+#endif
+
+// Tell the compiler a function is using a printf-style format string.
+// |format_param| is the one-based index of the format string parameter;
+// |dots_param| is the one-based index of the "..." parameter.
+// For v*printf functions (which take a va_list), pass 0 for dots_param.
+// (This is undocumented but matches what the system C headers do.)
+#if defined(__GNUC__)  // COMPILER_GCC
+#define PRINTF_FORMAT(format_param, dots_param) \
+    __attribute__((format(printf, format_param, dots_param)))
+#else
+#define PRINTF_FORMAT(format_param, dots_param)
+#endif
+
+// WPRINTF_FORMAT is the same, but for wide format strings.
+// This doesn't appear to yet be implemented in any compiler.
+// See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38308 .
+#define WPRINTF_FORMAT(format_param, dots_param)
+// If available, it would look like:
+//   __attribute__((format(wprintf, format_param, dots_param)))
+
+#endif  // DEVTOOLS_GOMA_BASE_COMPILER_SPECIFIC_H_
diff --git a/base/config_win.h b/base/config_win.h
new file mode 100644
index 0000000..19d7a5e
--- /dev/null
+++ b/base/config_win.h
@@ -0,0 +1,58 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_CONFIG_WIN_H_
+#define DEVTOOLS_GOMA_BASE_CONFIG_WIN_H_
+
+#ifdef _WIN32
+#pragma once
+
+// This must be defined before the windows.h is included.
+#ifndef WINVER
+#define WINVER 0x0600
+#endif
+
+#ifndef _WIN32_WINNT
+#define _WIN32_WINNT 0x0600
+#endif
+
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN     // Avoid a bunch of conflicts.
+#endif
+
+#define PATH_MAX MAX_PATH
+
+#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
+# define PRECISION_DIVIDER         10000000Ui64
+# define DELTA_EPOCH_IN_MICROSECS  11644473600000000Ui64
+#else
+# define PRECISION_DIVIDER         10000000ULL
+# define DELTA_EPOCH_IN_MICROSECS  11644473600000000ULL
+#endif
+
+// Following definitions are only valid for win32.
+typedef int uid_t;
+typedef int gid_t;
+typedef unsigned long pid_t;
+typedef int mode_t;
+typedef int ssize_t;
+
+#if defined (_MSC_VER) && (_MSC_VER < 1600)
+  typedef unsigned char     uint8_t;
+  typedef signed char       int8_t;
+  typedef unsigned __int16  uint16_t;
+  typedef signed __int16    int16_t;
+  typedef unsigned __int32  uint32_t;
+  typedef signed __int32    int32_t;
+  typedef unsigned __int64  uint64_t;
+  typedef signed __int64    int64_t;
+#else
+  #include <stdint.h>
+#endif
+
+#include <windows.h>
+
+#endif  // _WIN32
+#endif  // DEVTOOLS_GOMA_BASE_CONFIG_WIN_H_
diff --git a/base/file.cc b/base/file.cc
new file mode 100644
index 0000000..1f1245d
--- /dev/null
+++ b/base/file.cc
@@ -0,0 +1,112 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "file.h"
+
+#ifndef _WIN32
+# include <libgen.h>
+# include <sys/stat.h>
+# include <sys/types.h>
+# include <unistd.h>
+#else
+# include "config_win.h"
+#endif
+
+#include <fstream>
+
+#include "file_dir.h"
+#include "glog/logging.h"
+#include "path.h"
+
+// TODO: Refactor code with Chromium base/file_util
+// Chrome base has file_util.h that pretty much replaces most functions in this
+// file.  I chose to do a quick patch here so that we can move on WIN32 build
+// without being blocked by refactoring with Chrome base.
+
+namespace File {
+
+bool Copy(const char* from, const char* to, bool overwrite) {
+#ifdef _WIN32
+  if (!CopyFileA(from, to, !overwrite)) {
+    DWORD err = GetLastError();
+    LOG_SYSRESULT(err);
+    LOG(WARNING) << "failed to copy file:"
+                 << " from=" << from
+                 << " to=" << to;
+    return false;
+  }
+
+  return true;
+#else
+  std::ifstream ifs(from, std::ifstream::binary);
+  if (!ifs) {
+    LOG(WARNING) << "Input file not found: " << from;
+    return false;
+  }
+
+  struct stat stat_buf;
+  if (!overwrite &&
+      (0 == stat(to, &stat_buf))) {
+    LOG(ERROR) << "File " << to << " exists and overwrite is disabled";
+    return false;
+  }
+
+  std::ofstream ofs(to, std::ofstream::binary);
+  if (!ofs) {
+    LOG(WARNING) << "Cannot open output file: " << to;
+    return false;
+  }
+
+  bool ok = true;
+  while (!ifs.eof()) {
+    char buf[4096];
+    ifs.read(buf, sizeof(buf));
+    if (ifs.fail() && !ifs.eof()) {
+      LOG(WARNING) << "Failed to read file from: " << from;
+      ok = false;
+      break;
+    }
+    ofs.write(buf, ifs.gcount());
+  }
+
+  return ok;
+#endif
+}
+
+bool CreateDir(const std::string& path, int mode) {
+#ifndef _WIN32
+  int r = mkdir(path.c_str(), mode);
+  if (r < 0) {
+    PLOG(ERROR) << "CreateDir failed: " << path;
+    return false;
+  }
+#else
+  UNREFERENCED_PARAMETER(mode);
+  if (!CreateDirectoryA(path.c_str(), nullptr)) {
+    DWORD err = GetLastError();
+    LOG(ERROR) << "CreateDir failed: " << path << ": " << err;
+    LOG_SYSRESULT(err);
+    return false;
+  }
+#endif
+  return true;
+}
+
+bool IsDirectory(const char* path) {
+#ifndef _WIN32
+  struct stat st;
+  if (stat(path, &st) == 0) {
+    return S_ISDIR(st.st_mode);
+  }
+  return false;
+#else
+  DWORD attr = GetFileAttributesA(path);
+  if (attr == INVALID_FILE_ATTRIBUTES)
+    return false;
+  return (attr & FILE_ATTRIBUTE_DIRECTORY) == FILE_ATTRIBUTE_DIRECTORY;
+#endif
+}
+
+}  // namespace File
diff --git a/base/file.h b/base/file.h
new file mode 100644
index 0000000..ed1d7da
--- /dev/null
+++ b/base/file.h
@@ -0,0 +1,23 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_FILE_H_
+#define DEVTOOLS_GOMA_BASE_FILE_H_
+
+#include <string>
+#include "string_piece.h"
+
+namespace File {
+
+bool Copy(const char* from, const char* to, bool overwrite);
+
+// |mode| will be ignored in Windows.
+bool CreateDir(const std::string& path, int mode);
+
+bool IsDirectory(const char* path);
+
+}  // namespace File
+
+#endif  // DEVTOOLS_GOMA_BASE_FILE_H_
diff --git a/base/file_dir.cc b/base/file_dir.cc
new file mode 100644
index 0000000..05c0d8c
--- /dev/null
+++ b/base/file_dir.cc
@@ -0,0 +1,136 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "file_dir.h"
+
+#ifndef _WIN32
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#else
+#include "config_win.h"
+#endif
+
+#include "file.h"
+#include "path.h"
+
+namespace devtools_goma {
+
+#ifndef _WIN32
+bool ListDirectory(const string& dirname, std::vector<DirEntry>* entries) {
+  DIR* dir = opendir(dirname.c_str());
+  if (dir == nullptr) {
+    struct stat st;
+    if (stat(dirname.c_str(), &st) != 0) {
+      return false;
+    }
+    return !S_ISDIR(st.st_mode);
+  }
+
+  struct dirent* ent;
+  while ((ent = readdir(dir)) != nullptr) {
+    DirEntry dent;
+    dent.name = ent->d_name;
+    dent.is_dir = ent->d_type == DT_DIR;
+    if (ent->d_type == DT_LNK) {
+      struct stat st;
+      if (stat(file::JoinPath(dirname, dent.name).c_str(), &st) == 0) {
+        dent.is_dir = S_ISDIR(st.st_mode);
+      }
+    }
+    entries->push_back(std::move(dent));
+  }
+  closedir(dir);
+  return true;
+}
+
+bool DeleteDirectory(const string& dirname) {
+  return rmdir(dirname.c_str()) == 0;
+}
+
+#else
+bool ListDirectory(const string& dirname, std::vector<DirEntry>* entries) {
+  DWORD attr = GetFileAttributesA(dirname.c_str());
+  if (attr == INVALID_FILE_ATTRIBUTES)
+    return false;
+  if (!(attr & FILE_ATTRIBUTE_DIRECTORY))
+    return true;
+
+  const string pattern = dirname + "\\*";
+  WIN32_FIND_DATAA find_data = {0};
+  HANDLE find_handle = FindFirstFileA(pattern.c_str(), &find_data);
+  if (find_handle == INVALID_HANDLE_VALUE)
+    return false;
+
+  BOOL reading = TRUE;
+  for (; reading == TRUE; reading = FindNextFileA(find_handle, &find_data)) {
+    DirEntry dent;
+    dent.name = find_data.cFileName;
+    dent.is_dir = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
+    entries->push_back(std::move(dent));
+  }
+  FindClose(find_handle);
+  return true;
+}
+
+bool DeleteDirectory(const string& dirname) {
+  return RemoveDirectoryA(dirname.c_str()) != 0;
+}
+
+int unlink(const char* path) {
+  if (DeleteFileA(path) != TRUE) {
+    return -1;
+  }
+  return 0;
+}
+#endif
+
+bool RecursivelyDelete(const string& name) {
+  // TODO: rewrite non recursive like devtools/goma/server/dirutil.cc?
+  std::vector<devtools_goma::DirEntry> entries;
+  if (!devtools_goma::ListDirectory(name, &entries)) {
+    return false;
+  }
+  if (entries.empty()) {
+    if (unlink(name.c_str()) != 0) {
+      return false;
+    }
+  }
+  for (const auto& ent : entries) {
+    if (ent.name == "." || ent.name == "..") {
+      continue;
+    }
+    const string& filename = file::JoinPath(name, ent.name);
+    if (ent.is_dir) {
+      if (!RecursivelyDelete(filename)) {
+        return false;
+      }
+    } else {
+      if (unlink(filename.c_str()) != 0) {
+        return false;
+      }
+    }
+  }
+  if (!devtools_goma::DeleteDirectory(name)) {
+    return false;
+  }
+  return true;
+}
+
+bool EnsureDirectory(const string& dirname, int mode) {
+  if (File::IsDirectory(dirname.c_str())) {
+    return true;
+  }
+  if (File::CreateDir(dirname.c_str(), mode)) {
+    return true;
+  }
+
+  // When multiple processes call EnsureDirectory simultaneously, race might
+  // happen. So, we need to check IsDirectory again for the safe here.
+  return File::IsDirectory(dirname.c_str());
+}
+
+}  // namespace devtools_goma
diff --git a/base/file_dir.h b/base/file_dir.h
new file mode 100644
index 0000000..892721a
--- /dev/null
+++ b/base/file_dir.h
@@ -0,0 +1,40 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_FILE_DIR_H_
+#define DEVTOOLS_GOMA_BASE_FILE_DIR_H_
+
+#include <string>
+#include <vector>
+
+using std::string;
+
+namespace devtools_goma {
+
+struct DirEntry {
+  DirEntry() : is_dir(false) {}
+  string name;
+  bool is_dir;
+};
+
+// Gets a entries of directory.
+// If dirname does not exist, it returns false.
+// If dirname is not a directory, it returns true, but *entries is empty.
+// If dirname is a directory, it returns true, and *entries is filled with
+// the directory's entries.
+bool ListDirectory(const string& dirname, std::vector<DirEntry>* entries);
+
+// Returns true if dirname is successfully deleted.
+bool DeleteDirectory(const string& dirname);
+
+// Returns true if dirname and its children are successfully deleted.
+bool RecursivelyDelete(const string& dirname);
+
+// Ensure directory exists.
+bool EnsureDirectory(const string& dirname, int mode);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_BASE_FILE_DIR_H_
diff --git a/base/goma_unittest.cc b/base/goma_unittest.cc
new file mode 100644
index 0000000..12017d5
--- /dev/null
+++ b/base/goma_unittest.cc
@@ -0,0 +1,24 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+// Modified version of gtest_main.cc so that Winsock can be initialized
+
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+#ifdef _WIN32
+#include "socket_helper_win.h"
+#endif
+
+int main(int argc, char **argv) {
+#ifdef _WIN32
+  WinsockHelper wsa;
+#endif
+  std::cout << "Running main() from gtest_main.cc\n";
+
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/base/join.cc b/base/join.cc
new file mode 100644
index 0000000..381c8aa
--- /dev/null
+++ b/base/join.cc
@@ -0,0 +1,6 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+// only for join_unittest rule.
diff --git a/base/join.h b/base/join.h
new file mode 100644
index 0000000..84aafec
--- /dev/null
+++ b/base/join.h
@@ -0,0 +1,46 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_JOIN_H_
+#define DEVTOOLS_GOMA_BASE_JOIN_H_
+
+#include <string>
+using std::string;
+
+template <class Container>
+void JoinStrings(const Container& components,
+                 const string& delim,
+                 string* result) {
+  for (typename Container::const_iterator iter = components.begin();
+       iter != components.end();
+       ++iter) {
+    if (iter != components.begin())
+      *result += delim;
+    *result += *iter;
+  }
+}
+
+namespace strings {
+
+template <class Container>
+string Join(const Container& components, const string& delim) {
+  string s;
+  JoinStrings(components, delim, &s);
+  return s;
+}
+
+}  // namespace strings
+
+// absl compatibility layer
+namespace absl {
+
+template<class Container>
+string StrJoin(const Container& components, const string& delim) {
+  return strings::Join(components, delim);
+}
+
+}  // namespace absl
+
+#endif  // DEVTOOLS_GOMA_BASE_JOIN_H_
diff --git a/base/join_unittest.cc b/base/join_unittest.cc
new file mode 100644
index 0000000..ce12bdd
--- /dev/null
+++ b/base/join_unittest.cc
@@ -0,0 +1,24 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "join.h"
+
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using std::string;
+
+TEST(JoinTest, JoinStrings) {
+  std::vector<string> tokens;
+  tokens.push_back("foo");
+  tokens.push_back("bar");
+  tokens.push_back("baz");
+  string result;
+  JoinStrings(tokens, "::", &result);
+  EXPECT_EQ("foo::bar::baz", result);
+}
diff --git a/base/lockhelper.cc b/base/lockhelper.cc
new file mode 100644
index 0000000..4a9cdf3
--- /dev/null
+++ b/base/lockhelper.cc
@@ -0,0 +1,157 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "lockhelper.h"
+
+#ifdef _WIN32
+# include <stack>
+# include <glog/logging.h>
+#endif
+
+namespace devtools_goma {
+#if defined (_WIN32)
+
+Lock::Lock() {
+  // The second parameter is the spin count, for short-held locks it avoid the
+  // contending thread from going to sleep which helps performance greatly.
+  ::InitializeCriticalSectionAndSpinCount(&os_lock_, 2000);
+}
+
+Lock::~Lock() {
+  ::DeleteCriticalSection(&os_lock_);
+}
+
+bool Lock::Try() const {
+  if (::TryEnterCriticalSection(&os_lock_) != FALSE) {
+    return true;
+  }
+  return false;
+}
+
+void Lock::Acquire() const {
+  ::EnterCriticalSection(&os_lock_);
+}
+
+void Lock::Release() const {
+  ::LeaveCriticalSection(&os_lock_);
+}
+
+ReadWriteLock::ReadWriteLock() {
+  ::InitializeSRWLock(&srw_lock_);
+}
+
+ReadWriteLock::~ReadWriteLock() {
+}
+
+void ReadWriteLock::AcquireShared() const {
+  ::AcquireSRWLockShared(&srw_lock_);
+}
+
+void ReadWriteLock::ReleaseShared() const {
+  ::ReleaseSRWLockShared(&srw_lock_);
+}
+
+void ReadWriteLock::AcquireExclusive() const {
+  ::AcquireSRWLockExclusive(&srw_lock_);
+}
+
+void ReadWriteLock::ReleaseExclusive() const {
+  ::ReleaseSRWLockExclusive(&srw_lock_);
+}
+
+ConditionVariable::ConditionVariable(Lock* user_lock)
+    : user_lock_(user_lock) {
+  ::InitializeConditionVariable(&cv_);
+  DCHECK(user_lock);
+}
+
+ConditionVariable::~ConditionVariable() {
+}
+
+void ConditionVariable::Wait() {
+  CRITICAL_SECTION* cs = &user_lock_->os_lock_;
+
+  if (FALSE == SleepConditionVariableCS(&cv_, cs, INFINITE)) {
+    DCHECK(GetLastError() != WAIT_TIMEOUT);
+  }
+}
+
+void ConditionVariable::Broadcast() {
+  WakeAllConditionVariable(&cv_);
+}
+
+void ConditionVariable::Signal() {
+  WakeConditionVariable(&cv_);
+}
+
+#else
+
+Lock::Lock() {
+  pthread_mutex_init(&os_lock_, nullptr);
+}
+
+Lock::~Lock() {
+  pthread_mutex_destroy(&os_lock_);
+}
+
+bool Lock::Try() const {
+  return (pthread_mutex_trylock(&os_lock_) == 0);
+}
+
+void Lock::Acquire() const {
+  pthread_mutex_lock(&os_lock_);
+}
+
+void Lock::Release() const {
+  pthread_mutex_unlock(&os_lock_);
+}
+
+ReadWriteLock::ReadWriteLock() {
+  pthread_rwlock_init(&os_rwlock_, nullptr);
+}
+
+ReadWriteLock::~ReadWriteLock() {
+  pthread_rwlock_destroy(&os_rwlock_);
+}
+
+void ReadWriteLock::AcquireShared() const {
+  pthread_rwlock_rdlock(&os_rwlock_);
+}
+
+void ReadWriteLock::ReleaseShared() const {
+  pthread_rwlock_unlock(&os_rwlock_);
+}
+
+void ReadWriteLock::AcquireExclusive() const {
+  pthread_rwlock_wrlock(&os_rwlock_);
+}
+
+void ReadWriteLock::ReleaseExclusive() const {
+  pthread_rwlock_unlock(&os_rwlock_);
+}
+
+ConditionVariable::ConditionVariable(Lock* user_lock)
+    : user_mutex_(&user_lock->os_lock_) {
+  pthread_cond_init(&condition_, nullptr);
+}
+
+ConditionVariable::~ConditionVariable() {
+  pthread_cond_destroy(&condition_);
+}
+
+void ConditionVariable::Wait() {
+  pthread_cond_wait(&condition_, user_mutex_);
+}
+
+void ConditionVariable::Signal() {
+  pthread_cond_signal(&condition_);
+}
+
+void ConditionVariable::Broadcast() {
+  pthread_cond_broadcast(&condition_);
+}
+#endif
+
+}  // namespace devtools_goma
diff --git a/base/lockhelper.h b/base/lockhelper.h
new file mode 100644
index 0000000..4497059
--- /dev/null
+++ b/base/lockhelper.h
@@ -0,0 +1,204 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_LOCKHELPER_H_
+#define DEVTOOLS_GOMA_BASE_LOCKHELPER_H_
+
+#include "basictypes.h"
+#include "thread_annotations.h"
+
+#ifdef __MACH__
+# include <libkern/OSAtomic.h>
+#endif
+
+#ifdef _WIN32
+# include "config_win.h"
+typedef CRITICAL_SECTION OSLockType;
+typedef SRWLOCK OSRWLockType;
+#else
+# include <pthread.h>
+# include <errno.h>
+typedef pthread_mutex_t OSLockType;
+typedef pthread_rwlock_t OSRWLockType;
+#endif
+
+namespace devtools_goma {
+
+// NOTE: capability based thread safety analysis is not working well
+// for shared lock. So, let me keep using older style thread safety analysis.
+
+class LOCKABLE Lock {
+ public:
+  Lock();
+  ~Lock();
+
+  // If the lock is not held, take it and return true.  If the lock is already
+  // held by something else, immediately return false.
+  bool Try() const EXCLUSIVE_TRYLOCK_FUNCTION(true);
+
+  // Take the lock, blocking until it is available if necessary.
+  void Acquire() const EXCLUSIVE_LOCK_FUNCTION();
+
+  // Release the lock.  This must only be called by the lock's holder: after
+  // a successful call to Try, or a call to Lock.
+  void Release() const UNLOCK_FUNCTION();
+
+ private:
+  friend class ConditionVariable;
+#ifdef _WIN32
+  friend class WinVistaCondVar;
+#endif
+  mutable OSLockType os_lock_;
+  DISALLOW_COPY_AND_ASSIGN(Lock);
+};
+
+#ifdef __MACH__
+
+// In Mac, pthread becomes very slow when contention happens.
+// Using OSSpinLock improves performance for short lock holding.
+class LOCKABLE FastLock {
+ public:
+  FastLock(const FastLock&) = delete;
+  FastLock& operator=(const FastLock&) = delete;
+
+  FastLock() : lock_(OS_SPINLOCK_INIT) {}
+
+  void Acquire() const EXCLUSIVE_LOCK_FUNCTION() {
+    OSSpinLockLock(&lock_);
+  }
+
+  void Release() const UNLOCK_FUNCTION() {
+    OSSpinLockUnlock(&lock_);
+  }
+ private:
+  // TODO: Use os_unfair_lock if available.
+  // OSSpinLock is deprecated in 10.12.
+  mutable OSSpinLock lock_;
+};
+
+#else
+
+using FastLock = Lock;
+
+#endif
+
+// ReadWriteLock provides readers-writer lock.
+class LOCKABLE ReadWriteLock {
+ public:
+  ReadWriteLock();
+  ~ReadWriteLock();
+
+  void AcquireShared() const SHARED_LOCK_FUNCTION();
+  void ReleaseShared() const UNLOCK_FUNCTION();
+
+  void AcquireExclusive() const EXCLUSIVE_LOCK_FUNCTION();
+  void ReleaseExclusive() const UNLOCK_FUNCTION();
+
+ private:
+#ifdef _WIN32
+  mutable SRWLOCK srw_lock_;
+#else
+  mutable OSRWLockType os_rwlock_;
+#endif
+  DISALLOW_COPY_AND_ASSIGN(ReadWriteLock);
+};
+
+class SCOPED_LOCKABLE AutoLock {
+ public:
+  // Does not take ownership of |lock|, which must refer to a valid Lock
+  // that outlives this object.
+  explicit AutoLock(const Lock* lock) EXCLUSIVE_LOCK_FUNCTION(lock)
+      : lock_(*lock) {
+    lock_.Acquire();
+  }
+
+  ~AutoLock() UNLOCK_FUNCTION() {
+    lock_.Release();
+  }
+
+ private:
+  const Lock& lock_;
+  DISALLOW_COPY_AND_ASSIGN(AutoLock);
+};
+
+class SCOPED_LOCKABLE AutoFastLock {
+ public:
+  AutoFastLock(const AutoFastLock&) = delete;
+  AutoFastLock& operator=(const AutoFastLock&) = delete;
+
+  // Does not take ownership of |lock|, which must refer to a valid FastLock
+  // that outlives this object.
+  explicit AutoFastLock(const FastLock* lock) EXCLUSIVE_LOCK_FUNCTION(lock)
+      : lock_(*lock) {
+    lock_.Acquire();
+  }
+
+  ~AutoFastLock() UNLOCK_FUNCTION() {
+    lock_.Release();
+  }
+
+ private:
+  const FastLock& lock_;
+};
+
+class SCOPED_LOCKABLE AutoExclusiveLock {
+ public:
+  // Does not take ownership of |lock|, which must refer to a valid
+  // ReadWriteLock that outlives this object.
+  explicit AutoExclusiveLock(const ReadWriteLock* lock)
+      EXCLUSIVE_LOCK_FUNCTION(lock) : lock_(*lock) {
+    lock_.AcquireExclusive();
+  }
+
+  ~AutoExclusiveLock() UNLOCK_FUNCTION() {
+    lock_.ReleaseExclusive();
+  }
+
+ private:
+  const ReadWriteLock& lock_;
+  DISALLOW_COPY_AND_ASSIGN(AutoExclusiveLock);
+};
+
+class SCOPED_LOCKABLE AutoSharedLock {
+ public:
+  // Does not take ownership of |lock|, which must refer to a valid
+  // ReadWriteLock that outlives this object.
+  explicit AutoSharedLock(const ReadWriteLock* lock) SHARED_LOCK_FUNCTION(lock)
+      : lock_(*lock) {
+    lock_.AcquireShared();
+  }
+
+  ~AutoSharedLock() UNLOCK_FUNCTION() {
+    lock_.ReleaseShared();
+  }
+
+ private:
+  const ReadWriteLock& lock_;
+  DISALLOW_COPY_AND_ASSIGN(AutoSharedLock);
+};
+
+// POSIX conditional variable
+class ConditionVariable {
+ public:
+  explicit ConditionVariable(Lock* user_lock);
+  ~ConditionVariable();
+
+  void Wait();
+  void Signal();
+  void Broadcast();
+
+ private:
+#ifdef _WIN32
+  Lock* user_lock_;
+  CONDITION_VARIABLE cv_;
+#else  // Assume POSIX
+  pthread_cond_t condition_;
+  pthread_mutex_t* user_mutex_;
+#endif
+  DISALLOW_COPY_AND_ASSIGN(ConditionVariable);
+};
+
+}  // namespace devtools_goma
+#endif  // DEVTOOLS_GOMA_BASE_LOCKHELPER_H_
diff --git a/base/lockhelper_unittest.cc b/base/lockhelper_unittest.cc
new file mode 100644
index 0000000..4252e77
--- /dev/null
+++ b/base/lockhelper_unittest.cc
@@ -0,0 +1,672 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdlib.h>
+
+#include <vector>
+#include <memory>
+
+#include <gtest/gtest.h>
+
+#include "lockhelper.h"
+#include "platform_thread.h"
+
+namespace devtools_goma {
+
+// Basic test to make sure that Acquire()/Release()/Try() don't crash
+
+class BasicLockTestThread : public PlatformThread::Delegate {
+ public:
+  explicit BasicLockTestThread(Lock* lock) : lock_(lock), acquired_(0) {
+  }
+
+  void ThreadMain() override {
+    for (int i = 0; i < 10; i++) {
+      lock_->Acquire();
+      acquired_++;
+      lock_->Release();
+    }
+    for (int i = 0; i < 10; i++) {
+      lock_->Acquire();
+      acquired_++;
+      PlatformThread::Sleep(rand() % 20);
+      lock_->Release();
+    }
+    for (int i = 0; i < 10; i++) {
+      if (lock_->Try()) {
+        acquired_++;
+        PlatformThread::Sleep(rand() % 20);
+        lock_->Release();
+      }
+    }
+  }
+
+  int acquired() const { return acquired_; }
+
+ private:
+  Lock* lock_;
+  int acquired_;
+
+  DISALLOW_COPY_AND_ASSIGN(BasicLockTestThread);
+};
+
+bool BasicLockTest() {
+  Lock lock;
+  BasicLockTestThread thread(&lock);
+  PlatformThreadHandle handle = kNullThreadHandle;
+
+  EXPECT_TRUE(PlatformThread::Create(&thread, &handle));
+
+  int acquired = 0;
+  for (int i = 0; i < 5; i++) {
+    lock.Acquire();
+    acquired++;
+    lock.Release();
+  }
+  for (int i = 0; i < 10; i++) {
+    lock.Acquire();
+    acquired++;
+    PlatformThread::Sleep(rand() % 20);
+    lock.Release();
+  }
+  for (int i = 0; i < 10; i++) {
+    if (lock.Try()) {
+      acquired++;
+      PlatformThread::Sleep(rand() % 20);
+      lock.Release();
+    }
+  }
+  for (int i = 0; i < 5; i++) {
+    lock.Acquire();
+    acquired++;
+    PlatformThread::Sleep(rand() % 20);
+    lock.Release();
+  }
+
+  PlatformThread::Join(handle);
+
+  EXPECT_GE(acquired, 20);
+  EXPECT_GE(thread.acquired(), 20);
+
+  return true;
+}
+
+// Test that Try() works as expected -------------------------------------------
+
+class TryLockTestThread : public PlatformThread::Delegate {
+ public:
+  explicit TryLockTestThread(Lock* lock) : lock_(lock), got_lock_(false) {
+  }
+
+  void ThreadMain() override {
+    if (lock_->Try()) {
+      got_lock_ = true;
+      lock_->Release();
+    } else {
+      got_lock_ = false;
+    }
+  }
+
+  bool got_lock() const { return got_lock_; }
+
+ private:
+  Lock* lock_;
+  bool got_lock_;
+
+  DISALLOW_COPY_AND_ASSIGN(TryLockTestThread);
+};
+
+bool TryLockTest() {
+  Lock lock;
+
+  if (lock.Try()) {
+    // We now have the lock....
+    // This thread will not be able to get the lock.
+    TryLockTestThread thread(&lock);
+    PlatformThreadHandle handle = kNullThreadHandle;
+
+    EXPECT_TRUE(PlatformThread::Create(&thread, &handle));
+
+    PlatformThread::Join(handle);
+
+    EXPECT_FALSE(thread.got_lock());
+
+    lock.Release();
+  } else {
+    EXPECT_TRUE(false) << "taking lock failed";
+  }
+
+  // This thread will....
+  {
+    TryLockTestThread thread(&lock);
+    PlatformThreadHandle handle = kNullThreadHandle;
+
+    EXPECT_TRUE(PlatformThread::Create(&thread, &handle));
+
+    PlatformThread::Join(handle);
+
+    EXPECT_TRUE(thread.got_lock());
+    // But it released it....
+    if (lock.Try()) {
+      lock.Release();
+    } else {
+      EXPECT_TRUE(false) << "taking lock failed";
+    }
+  }
+
+  return true;
+}
+
+// Tests that locks actually exclude -------------------------------------------
+
+class MutexLockTestThread : public PlatformThread::Delegate {
+ public:
+  MutexLockTestThread(Lock* lock, int* value) : lock_(lock), value_(value) {}
+
+  // Static helper which can also be called from the main thread.
+  static void DoStuff(Lock* lock, int* value) {
+    for (int i = 0; i < 40; i++) {
+      lock->Acquire();
+      int v = *value;
+      PlatformThread::Sleep(rand() % 10);
+      *value = v + 1;
+      lock->Release();
+    }
+  }
+
+  void ThreadMain() override {
+    DoStuff(lock_, value_);
+  }
+
+ private:
+  Lock* lock_;
+  int* value_;
+
+  DISALLOW_COPY_AND_ASSIGN(MutexLockTestThread);
+};
+
+bool MutexTwoThreads() {
+  Lock lock;
+  int value = 0;
+
+  MutexLockTestThread thread(&lock, &value);
+  PlatformThreadHandle handle = kNullThreadHandle;
+
+  EXPECT_TRUE(PlatformThread::Create(&thread, &handle));
+
+  MutexLockTestThread::DoStuff(&lock, &value);
+
+  PlatformThread::Join(handle);
+
+  EXPECT_EQ(2 * 40, value);
+  return true;
+}
+
+bool MutexFourThreads() {
+  Lock lock;
+  int value = 0;
+
+  MutexLockTestThread thread1(&lock, &value);
+  MutexLockTestThread thread2(&lock, &value);
+  MutexLockTestThread thread3(&lock, &value);
+  PlatformThreadHandle handle1 = kNullThreadHandle;
+  PlatformThreadHandle handle2 = kNullThreadHandle;
+  PlatformThreadHandle handle3 = kNullThreadHandle;
+
+  EXPECT_TRUE(PlatformThread::Create(&thread1, &handle1));
+  EXPECT_TRUE(PlatformThread::Create(&thread2, &handle2));
+  EXPECT_TRUE(PlatformThread::Create(&thread3, &handle3));
+
+  MutexLockTestThread::DoStuff(&lock, &value);
+
+  PlatformThread::Join(handle1);
+  PlatformThread::Join(handle2);
+  PlatformThread::Join(handle3);
+
+  EXPECT_EQ(4 * 40, value);
+  return true;
+}
+
+class ConditionVariableTestThread : public PlatformThread::Delegate {
+ public:
+  struct Data {
+    Data() : result{}, index(0), count(0) {}
+
+    char result[10];
+    int index;
+    int count;
+  };
+
+  ConditionVariableTestThread(int id,
+                              Lock* lock,
+                              ConditionVariable* cond,
+                              Data* data)
+      : id_(id), lock_(lock), cond_(cond), data_(data) {
+  }
+
+  void ThreadMain() override {
+    if (id_ == 1) {
+      Count1();
+    } else {
+      Count2();
+    }
+  }
+
+ private:
+  // Write numbers 1-3 and 7-9 as permitted by Count2()
+  void Count1() {
+    for (;;) {
+      lock_->Acquire();
+      cond_->Wait();
+      data_->count++;
+      // Use EXPECT_TRUE instead of ASSERT_TRUE, since when the condition
+      // does not hold, ASSERT_TRUE will cause function exit, it means
+      // lock is not released.
+      EXPECT_TRUE((0 <= data_->index && data_->index < 3) ||
+                  (6 <= data_->index && data_->index < 9))
+          << data_->index;
+      data_->result[data_->index++] = static_cast<char>('0' + data_->count);
+      int c = data_->count;
+      lock_->Release();
+      if (c >= 9) {
+        return;
+      }
+    }
+  }
+
+  // Write numbers 4-6 in Count2 thread.
+  void Count2() {
+    for (;;) {
+      lock_->Acquire();
+      if (data_->count < 3 || 6 <= data_->count) {
+        cond_->Signal();
+      } else {
+        data_->count++;
+        // Use EXPECT_TRUE instead of ASSERT_TRUE, since when the condition
+        // does not hold, ASSERT_TRUE will cause function exit, it means
+        // lock is not released.
+        EXPECT_TRUE(3 <= data_->index && data_->index < 6) << data_->index;
+        data_->result[data_->index++] = static_cast<char>('0' + data_->count);
+      }
+      int c = data_->count;
+      lock_->Release();
+      if (c >= 9) {
+        return;
+      }
+    }
+  }
+
+  const int id_;
+
+  Lock* lock_;
+  ConditionVariable* cond_;
+  Data* data_;
+
+  DISALLOW_COPY_AND_ASSIGN(ConditionVariableTestThread);
+};
+
+bool ConditionVar() {
+  Lock lock;
+  ConditionVariable cond(&lock);
+  ConditionVariableTestThread::Data data;
+
+  std::unique_ptr<ConditionVariableTestThread> threads[2];
+  PlatformThreadHandle handles[2];
+  for (int i = 0; i < 2; ++i) {
+    threads[i].reset(new ConditionVariableTestThread(i, &lock, &cond, &data));
+    handles[i] = kNullThreadHandle;
+  }
+
+  EXPECT_TRUE(PlatformThread::Create(threads[0].get(), &handles[0]));
+  EXPECT_TRUE(PlatformThread::Create(threads[1].get(), &handles[1]));
+
+  PlatformThread::Join(handles[0]);
+  PlatformThread::Join(handles[1]);
+
+  EXPECT_STREQ("123456789", data.result);
+  return true;
+}
+
+// ReadwriteLock BasicTest  ---------------------------------
+
+class ReadWriteLockBasicTestThread : public PlatformThread::Delegate {
+ public:
+  ReadWriteLockBasicTestThread(ReadWriteLock* lock, int* num)
+      : lock_(lock),
+        num_(num) {
+  }
+
+  void ThreadMain() override {
+    for (int i = 0; i < 10; i++) {
+      lock_->AcquireExclusive();
+      *num_ += 1;
+      lock_->ReleaseExclusive();
+    }
+    for (int i = 0; i < 10; i++) {
+      AutoSharedLock shared_autolock(lock_);
+      int num1 = *num_;
+      PlatformThread::Sleep(rand() % 20);
+      int num2 = *num_;
+      EXPECT_EQ(num1, num2);
+    }
+    for (int i = 0; i < 10; i++) {
+      AutoExclusiveLock exclusive_autolock(lock_);
+      *num_ += 1;
+      PlatformThread::Sleep(rand() % 20);
+    }
+  }
+
+ private:
+  ReadWriteLock* lock_;
+  int* num_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadWriteLockBasicTestThread);
+};
+
+bool ReadWriteLockBasicTest() {
+  ReadWriteLock lock;
+  int num = 0;
+
+  ReadWriteLockBasicTestThread thread1(&lock, &num);
+  ReadWriteLockBasicTestThread thread2(&lock, &num);
+  PlatformThreadHandle handle1 = kNullThreadHandle;
+  PlatformThreadHandle handle2 = kNullThreadHandle;
+
+  EXPECT_TRUE(PlatformThread::Create(&thread1, &handle1));
+  EXPECT_TRUE(PlatformThread::Create(&thread2, &handle2));
+
+  PlatformThread::Join(handle1);
+  PlatformThread::Join(handle2);
+
+  EXPECT_EQ(40, num);
+  return true;
+}
+
+// AcquireExclusive  -------------------------------------------
+
+class ReadWriteLockAcquireExclusiveThread : public PlatformThread::Delegate {
+ public:
+  ReadWriteLockAcquireExclusiveThread(ReadWriteLock* lock, int* num) :
+      lock_(lock),
+      num_(num),
+      started_(false) {
+  }
+
+  void ThreadMain() override {
+    SetStarted();
+    AutoExclusiveLock autolock(lock_);
+    *num_ += 1;
+  }
+
+  void SetStarted() {
+    AutoLock lock(&mu_);
+    started_ = true;
+  }
+
+  bool started() const {
+    AutoLock lock(&mu_);
+    return started_;
+  }
+
+ private:
+  ReadWriteLock* lock_;
+  int* num_;
+
+  mutable Lock mu_;
+  bool started_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadWriteLockAcquireExclusiveThread);
+};
+
+bool ReadWriteLockAcquireExclusiveTest1() {
+  ReadWriteLock lock;
+  int num = 0;
+
+  lock.AcquireExclusive();
+
+  // This thread will be blocked by |lock|.
+  ReadWriteLockAcquireExclusiveThread thread(&lock, &num);
+  PlatformThreadHandle handle = kNullThreadHandle;
+  EXPECT_TRUE(PlatformThread::Create(&thread, &handle));
+
+  // Wait until |thread| is really started.
+  while (!thread.started()) {
+    PlatformThread::Sleep(1);
+  }
+
+  // Try to run the thread.
+  EXPECT_EQ(num, 0);
+  num += 1;
+  EXPECT_EQ(num, 1);
+
+  lock.ReleaseExclusive();
+
+  // Now the thread can go on.
+
+  PlatformThread::Join(handle);
+  EXPECT_EQ(num, 2);
+
+  return true;
+}
+
+bool ReadWriteLockAcquireExclusiveTest2() {
+  ReadWriteLock lock;
+  int num = 0;
+
+  lock.AcquireShared();
+
+  // This thread will be blocked by |lock|.
+  ReadWriteLockAcquireExclusiveThread thread(&lock, &num);
+  PlatformThreadHandle handle = kNullThreadHandle;
+  EXPECT_TRUE(PlatformThread::Create(&thread, &handle));
+
+  // Wait until |thread| is really started.
+  while (!thread.started()) {
+    PlatformThread::Sleep(1);
+  }
+
+  EXPECT_EQ(num, 0);
+  lock.ReleaseShared();
+
+  // Now the thread can go on.
+
+  PlatformThread::Join(handle);
+  EXPECT_EQ(num, 1);
+
+  return true;
+}
+
+// AcquireShared  -------------------------------------------
+
+class ReadWriteLockAcquireSharedThread : public PlatformThread::Delegate {
+ public:
+  ReadWriteLockAcquireSharedThread(ReadWriteLock* lock, int* num) :
+      lock_(lock),
+      num_(num),
+      gotten_num_(0),
+      started_(false) {
+  }
+
+  void ThreadMain() override {
+    SetStarted();
+    AutoSharedLock shared_lock(lock_);
+    gotten_num_ = *num_;
+  }
+
+  int gotten_num() const {
+    return gotten_num_;
+  }
+
+  void SetStarted() {
+    AutoLock lock(&mu_);
+    started_ = true;
+  }
+
+  bool started() const {
+    AutoLock lock(&mu_);
+    return started_;
+  }
+
+ private:
+  ReadWriteLock* lock_;
+  int* num_;
+  int gotten_num_;
+
+  mutable Lock mu_;
+  bool started_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadWriteLockAcquireSharedThread);
+};
+
+
+bool ReadWriteLockAcquireSharedWithExclusiveLockTest() {
+  ReadWriteLock lock;
+  int num = 0;
+
+  lock.AcquireExclusive();
+
+  ReadWriteLockAcquireSharedThread thread(&lock, &num);
+  PlatformThreadHandle handle = kNullThreadHandle;
+  EXPECT_TRUE(PlatformThread::Create(&thread, &handle));
+
+  // Wait until |thread| is really started.
+  while (!thread.started()) {
+    PlatformThread::Sleep(1);
+  }
+
+  EXPECT_EQ(num, 0);
+  num += 1;
+  EXPECT_EQ(num, 1);
+
+  lock.ReleaseExclusive();
+
+  PlatformThread::Join(handle);
+  EXPECT_EQ(1, thread.gotten_num());
+
+  return true;
+}
+
+bool ReadWriteLockAcquireSharedWithSharedLockTest() {
+  ReadWriteLock lock;
+  int num = 1;
+
+  lock.AcquireShared();
+  ReadWriteLockAcquireSharedThread thread(&lock, &num);
+  PlatformThreadHandle handle = kNullThreadHandle;
+
+  // Before releasing |lock|, the thread can be finished.
+  EXPECT_TRUE(PlatformThread::Create(&thread, &handle));
+  PlatformThread::Join(handle);
+  EXPECT_EQ(1, thread.gotten_num());
+
+  lock.ReleaseShared();
+
+  return true;
+}
+
+template<typename LockType, typename AutoLockType>
+class IncrementThread : public PlatformThread::Delegate {
+ public:
+  IncrementThread(LockType* lock, int* x, int loop_num)
+    : lock_(lock), x_(x), loop_num_(loop_num) {
+  }
+
+  void ThreadMain() override {
+    for (int i = 0; i < loop_num_; ++i) {
+      AutoLockType lock(lock_);
+      ++*x_;
+    }
+  }
+
+ private:
+  LockType* lock_;
+  int* x_;
+  const int loop_num_;
+};
+
+using FastIncrement = IncrementThread<FastLock, AutoFastLock>;
+using NormalIncrement = IncrementThread<Lock, AutoLock>;
+
+}  // namespace devtools_goma
+
+TEST(LockTest, Basic) {
+  ASSERT_TRUE(devtools_goma::BasicLockTest());
+}
+
+TEST(LockTest, TryLock) {
+  ASSERT_TRUE(devtools_goma::TryLockTest());
+}
+
+TEST(LockTest, Mutex) {
+  ASSERT_TRUE(devtools_goma::MutexTwoThreads());
+  ASSERT_TRUE(devtools_goma::MutexFourThreads());
+}
+
+TEST(LockTest, ConditionVar) {
+  ASSERT_TRUE(devtools_goma::ConditionVar());
+}
+
+TEST(ReadWriteLockTest, ReadWriteLockBasic) {
+  ASSERT_TRUE(devtools_goma::ReadWriteLockBasicTest());
+}
+
+TEST(ReadWriteLockTest, ReadWriteLockAcquireExclusive) {
+  ASSERT_TRUE(devtools_goma::ReadWriteLockAcquireExclusiveTest1());
+  ASSERT_TRUE(devtools_goma::ReadWriteLockAcquireExclusiveTest2());
+}
+
+TEST(ReadWriteLockTest, ReadWriteLockAcquireShared) {
+  ASSERT_TRUE(devtools_goma::ReadWriteLockAcquireSharedWithExclusiveLockTest());
+  ASSERT_TRUE(devtools_goma::ReadWriteLockAcquireSharedWithSharedLockTest());
+}
+
+TEST(LockhelperTest, FastLockBenchmark) {
+  const int thread_num = 8;
+  const int loop_num = 100000;
+  std::vector<devtools_goma::PlatformThreadHandle> thread_ids(thread_num);
+  std::vector<std::unique_ptr<devtools_goma::FastIncrement>> incrementers;
+  int x = 0;
+  devtools_goma::FastLock lock;
+
+  for (int i = 0; i < thread_num; ++i) {
+    incrementers.emplace_back(
+        new devtools_goma::FastIncrement(&lock, &x, loop_num));
+  }
+
+  for (int i = 0; i < thread_num; ++i) {
+    devtools_goma::PlatformThread::Create(
+        incrementers[i].get(), &thread_ids[i]);
+  }
+
+  for (int i = 0; i < thread_num; ++i) {
+    devtools_goma::PlatformThread::Join(thread_ids[i]);
+  }
+
+  EXPECT_EQ(x, loop_num * thread_num);
+}
+
+TEST(LockhelperTest, NormalLockBenchmark) {
+  const int thread_num = 8;
+  const int loop_num = 100000;
+  std::vector<devtools_goma::PlatformThreadHandle> thread_ids(thread_num);
+  std::vector<std::unique_ptr<devtools_goma::NormalIncrement>> incrementers;
+  int x = 0;
+  devtools_goma::Lock lock;
+
+  for (int i = 0; i < thread_num; ++i) {
+    incrementers.emplace_back(
+      new devtools_goma::NormalIncrement(&lock, &x, loop_num));
+  }
+
+  for (int i = 0; i < thread_num; ++i) {
+    devtools_goma::PlatformThread::Create(
+      incrementers[i].get(), &thread_ids[i]);
+  }
+
+  for (int i = 0; i < thread_num; ++i) {
+    devtools_goma::PlatformThread::Join(thread_ids[i]);
+  }
+
+  EXPECT_EQ(x, loop_num * thread_num);
+}
diff --git a/base/path.cc b/base/path.cc
new file mode 100644
index 0000000..6bf75ae
--- /dev/null
+++ b/base/path.cc
@@ -0,0 +1,166 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "path.h"
+
+#include <string>
+
+#include "glog/logging.h"
+
+using std::string;
+
+namespace {
+
+// Appends |path2| to |path1|.
+// Assuming |path1| and |path2| are not empty.
+void AppendPath(string* path1, StringPiece path2) {
+  DCHECK(!path1->empty());
+  DCHECK(!path2.empty());
+
+#ifndef _WIN32
+  if (path2[0] == '/') {
+    path2.remove_prefix(1);
+  }
+  if (path1->back() != '/') {
+    path1->push_back('/');
+  }
+#else
+  if (path2[0] == '\\' || path2[0] == '/') {
+    path2.remove_prefix(1);
+  }
+  if (path1->back() != '\\' && path1->back() != '/') {
+    path1->push_back('\\');
+  }
+#endif
+
+  path1->append(path2.begin(), path2.end());
+}
+
+}  // anonymous namespace
+
+namespace file {
+
+namespace internal {
+
+string JoinPathImpl(std::initializer_list<StringPiece> paths) {
+  size_t cap = 0;
+  for (const auto& path : paths) {
+    cap += path.size() + 1;
+  }
+
+  string result;
+  result.reserve(cap);
+
+  for (const auto& path : paths) {
+    if (path.empty()) {
+      continue;
+    }
+    if (result.empty()) {
+      result.append(path.begin(), path.end());
+      continue;
+    }
+    AppendPath(&result, path);
+  }
+  return result;
+}
+
+string JoinPathRespectAbsoluteImpl(std::initializer_list<StringPiece> paths) {
+  string result;
+  for (const auto& path : paths) {
+    if (path.empty()) {
+      continue;
+    }
+    if (result.empty()) {
+      result.append(path.begin(), path.end());
+      continue;
+    }
+    if (IsAbsolutePath(path)) {
+      result = string(path);
+      continue;
+    }
+    AppendPath(&result, path);
+  }
+  return result;
+}
+
+}  // namespace internal
+
+StringPiece Basename(StringPiece fname) {
+#ifndef _WIN32
+  StringPiece::size_type pos = fname.find_last_of('/');
+  // Handle the case with no '/' in |fname|.
+  if (pos == StringPiece::npos)
+    return fname;
+  return fname.substr(pos + 1);
+#else
+  // TODO: support UNC path.
+  char name[_MAX_FNAME] = {0};
+  char ext[_MAX_EXT] = {0};
+  CHECK_EQ(_splitpath_s(string(fname).c_str(), nullptr, 0, nullptr, 0,
+                        name, sizeof name, ext, sizeof ext), 0);
+  size_t basename_length = strlen(name);
+  if (ext[0] == '.')
+    basename_length += strlen(ext);
+  fname.remove_prefix(fname.length() - basename_length);
+  return fname;
+#endif
+}
+
+StringPiece Dirname(StringPiece fname) {
+#ifndef _WIN32
+  StringPiece::size_type pos = fname.find_last_of('/');
+
+  // Handle the case with no '/' in 'path'.
+  if (pos == StringPiece::npos)
+    return fname.substr(0, 0);
+
+  // Handle the case with a single leading '/' in 'path'.
+  if (pos == 0)
+    return fname.substr(0, 1);
+
+  return fname.substr(0, pos);
+#else
+  // TODO: support UNC path.
+  char drive[_MAX_DRIVE] = {0};
+  char dir[_MAX_DIR] = {0};
+  _splitpath_s(string(fname).c_str(), drive, sizeof drive, dir, sizeof dir,
+               nullptr, 0, nullptr, 0);
+  const size_t dir_length = strlen(dir);
+  size_t dirname_length = strlen(drive) + dir_length;
+  // Cut off last '\\' or '/' if dir is not single '\\' or '/'.
+  if (dir_length > 1) {
+    dirname_length--;
+  }
+  return fname.substr(0, dirname_length);
+#endif
+}
+
+StringPiece Stem(StringPiece fname) {
+  StringPiece path = Basename(fname);
+  StringPiece::size_type pos = path.find_last_of('.');
+  if (pos == StringPiece::npos)
+    return path;
+  return path.substr(0, pos);
+}
+
+StringPiece Extension(StringPiece fname) {
+  StringPiece path = Basename(fname);
+  StringPiece::size_type pos = path.find_last_of('.');
+  if (pos == StringPiece::npos)
+    return fname.substr(fname.size());
+  return path.substr(pos + 1);
+}
+
+bool IsAbsolutePath(StringPiece path) {
+#ifndef _WIN32
+  return !path.empty() && path[0] == '/';
+#else
+  return (!path.empty() && path[0] == '\\') ||
+      (!path.empty() && path[0] == '/') ||
+      (path.size() > 1 && path[1] == ':');
+#endif
+}
+
+}  // namespace file
diff --git a/base/path.h b/base/path.h
new file mode 100644
index 0000000..8bc5c8f
--- /dev/null
+++ b/base/path.h
@@ -0,0 +1,59 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_PATH_H_
+#define DEVTOOLS_GOMA_BASE_PATH_H_
+
+#include <initializer_list>
+#include <string>
+
+#include "string_piece.h"
+
+// BEGIN GOOGLE-INTERNAL
+// path.h emulation layer
+// END GOOGLE INTERNAL
+namespace file {
+
+namespace internal {
+std::string JoinPathImpl(std::initializer_list<StringPiece> paths);
+std::string JoinPathRespectAbsoluteImpl(
+    std::initializer_list<StringPiece> paths);
+}  // namespace internal
+
+StringPiece Basename(StringPiece path);
+
+// Returns dirname.
+// For example:
+//   Dirname("a/b") --> "a"
+//   Dirname("a") --> ""
+// On Windows, drive letter is handled:
+//   Dirname("C:\\foo") --> "C:\\"
+//   Dirname("C:a") --> "C:"
+// See lib/path_unittest.cc for more examples.
+StringPiece Dirname(StringPiece fname);
+
+StringPiece Stem(StringPiece path);
+
+StringPiece Extension(StringPiece path);
+
+// New file path API.
+// It always returns path1/path2 even when path2 is absolute.
+template<typename... Strs>
+inline std::string JoinPath(const Strs&... paths) {
+  return internal::JoinPathImpl({paths...});
+}
+
+// It would return path2, if path2 is absolute.
+template<typename... Strs>
+inline std::string JoinPathRespectAbsolute(const Strs&... paths) {
+  return internal::JoinPathRespectAbsoluteImpl({paths...});
+}
+
+// Return true if path is absolute.
+bool IsAbsolutePath(StringPiece path);
+
+}  // namespace file
+
+#endif  // DEVTOOLS_GOMA_BASE_PATH_H_
diff --git a/base/socket_helper_win.cc b/base/socket_helper_win.cc
new file mode 100644
index 0000000..41e4f64
--- /dev/null
+++ b/base/socket_helper_win.cc
@@ -0,0 +1,388 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+// Note for 64-bit SOCKET usage:
+// It's okay to cast between int and SOCKET since Windows Handle value will not
+// exceed 2^24 according to Windows Internals. See
+// http://blogs.technet.com/b/markrussinovich/archive/2009/09/29/3283844.aspx.
+
+#ifdef _WIN32
+
+#include "socket_helper_win.h"
+#include "glog/logging.h"
+#include "lockhelper.h"
+#include "platform_thread.h"
+
+#define WSA_VERSION MAKEWORD(2, 2)  // using winsock 2.2
+
+int inet_aton(const char* input, struct in_addr* output) {
+  return inet_pton(AF_INET, input, &output->s_addr);
+}
+
+/* socketpair.c
+ * Copyright 2007, 2010 by Nathan C. Myers <ncm@cantrip.org>
+ * This code is Free Software.  It may be copied freely, in original or
+ * modified form, subject only to the restrictions that (1) the author is
+ * relieved from all responsibilities for any use for any purpose, and (2)
+ * this copyright notice must be retained, unchanged, in its entirety.  If
+ * for any reason the author might be held responsible for any consequences
+ * of copying or use, license is withheld.
+ */
+// Original version:
+// https://github.com/ncm/selectable-socketpair/blob/master/socketpair.c
+// This implementation can only be blocking and is not select-able.
+int socketpair(sa_family_t domain, int type, int protocol, int socks[2]) {
+  union {
+    struct sockaddr_in inaddr;
+    struct sockaddr addr;
+  } addr;
+
+  SOCKET listener;
+  socklen_t addr_len = sizeof(addr.inaddr);
+  if (socks == nullptr) {
+    WSASetLastError(WSA_INVALID_PARAMETER);
+    return SOCKET_ERROR;
+  }
+
+  listener = socket(domain, type, protocol);
+  if (listener == INVALID_SOCKET) {
+    return SOCKET_ERROR;
+  }
+  memset(&addr, 0, sizeof(addr));
+  addr.inaddr.sin_family = domain;
+  addr.inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  addr.inaddr.sin_port = 0;
+
+  socks[0] = INVALID_SOCKET;
+  socks[1] = INVALID_SOCKET;
+
+  int reuse = 1;
+  for (;;) {
+    if (setsockopt(listener, SOL_SOCKET, SO_REUSEADDR, (char*)&reuse,
+                   (socklen_t)sizeof(reuse)) == -1) {
+      break;
+    }
+    if (bind(listener, &addr.addr, sizeof(addr.inaddr)) == SOCKET_ERROR) {
+      break;
+    }
+    memset(&addr, 0, sizeof(addr));
+    if (getsockname(listener, &addr.addr, &addr_len) == SOCKET_ERROR) {
+      break;
+    }
+    addr.inaddr.sin_family = AF_INET;
+    addr.inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+    if (listen(listener, 1) == SOCKET_ERROR) {
+      break;
+    }
+
+    socks[0] = static_cast<int>(WSASocket(
+        domain, type, protocol, nullptr, 0, 0));
+    if (socks[0] == INVALID_SOCKET) {
+      break;
+    }
+    if (connect(socks[0], &addr.addr, sizeof(addr.inaddr)) == SOCKET_ERROR) {
+      break;
+    }
+
+    socks[1] = static_cast<int>(accept(listener, nullptr, nullptr));
+    if (socks[1] == INVALID_SOCKET) {
+      break;
+    }
+    closesocket(listener);
+    return 0;
+  }
+
+  int last_error = WSAGetLastError();
+  closesocket(listener);
+  closesocket(socks[1]);
+  closesocket(socks[0]);
+  WSASetLastError(last_error);
+  return SOCKET_ERROR;
+}
+
+namespace {
+
+class ServerThread : public devtools_goma::PlatformThread::Delegate {
+ public:
+  // Creates |*listener| socket and starts listen at |*listener| on |*port|.
+  // Returns WSA error code.  If success, returns 0.
+  // |*port| is allocated from available port by system.
+  // It is stored in host byte order.
+  static DWORD StartListen(SOCKET* listener, int* port) {
+    DCHECK(listener != nullptr);
+    DCHECK(port != nullptr);
+    *listener = INVALID_SOCKET;
+    *port = 0;
+    sockaddr_in inaddr = {0};
+
+    while (*port == 0) {
+      *listener = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+      if (*listener == INVALID_SOCKET) {
+        DWORD err = WSAGetLastError();
+        LOG(ERROR) << "listen failed:" << err;
+        return err;
+      }
+      memset(&inaddr, 0, sizeof(inaddr));
+      inaddr.sin_family = AF_INET;
+      inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+      inaddr.sin_port = htons(0);
+      socklen_t addr_len = sizeof(inaddr);
+
+      unsigned long non_blocking = 1;
+      if (ioctlsocket(*listener, FIONBIO, &non_blocking) == SOCKET_ERROR) {
+        DWORD err = WSAGetLastError();
+        LOG(ERROR) << "socket non blocking failed:" << err;
+        closesocket(*listener);
+        *listener = INVALID_SOCKET;
+        return err;
+      }
+      if (bind(*listener, (sockaddr*)&inaddr, addr_len) == SOCKET_ERROR) {
+        // bind may fail if other process/thread uses the port.
+        LOG(WARNING) << "bind failed:" << WSAGetLastError();
+        closesocket(*listener);
+        continue;
+      }
+      memset(&inaddr, 0, sizeof(inaddr));
+      addr_len = sizeof(inaddr);
+      if (getsockname(*listener, (sockaddr*)&inaddr, &addr_len)
+          == SOCKET_ERROR) {
+        DWORD err = WSAGetLastError();
+        LOG(ERROR) << "getsockname failed:" << err;
+        closesocket(*listener);
+        *listener = INVALID_SOCKET;
+        return err;
+      }
+      *port = ntohs(inaddr.sin_port);
+    }
+    if (listen(*listener, 1) == SOCKET_ERROR) {
+      DWORD err = WSAGetLastError();
+      LOG(ERROR) << "listen failed:" << err;
+      closesocket(*listener);
+      *listener = INVALID_SOCKET;
+      return err;
+    }
+    return 0;
+  }
+
+  // ServerThread listen at |listener| and set accepted socket in |*accept|.
+  // ServerThread will close |listener|.
+  ServerThread(SOCKET listener, SOCKET* accept)
+      : listener_(listener), accept_(accept), result_(WSAETIMEDOUT) {
+    DCHECK_NE(listener_, INVALID_SOCKET);
+    DCHECK_EQ(*accept_, INVALID_SOCKET);
+  }
+
+  ~ServerThread() {
+    closesocket(listener_);
+  }
+
+  void ThreadMain() override {
+    VLOG(1) << "socketpair ServerThread: start";
+    fd_set r_set;
+    SOCKET s = INVALID_SOCKET;
+    for (;;) {
+      timeval tv;
+      tv.tv_sec = 2;
+      tv.tv_usec = 0;  // timeout is two seconds
+      FD_ZERO(&r_set);
+      MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+      FD_SET(listener_, &r_set);
+      MSVC_POP_WARNING();
+      int r = select(static_cast<int>(listener_ + 1),
+                     &r_set, nullptr, nullptr, &tv);
+      if (r < 0) {
+        LOG(WARNING) << "select error:" << r
+                     << " result=" << WSAGetLastError();
+        continue;
+      } else if (r == 0) {
+        LOG(WARNING) << "select timed-out";
+        continue;
+      }
+      if (FD_ISSET(listener_, &r_set)) {
+        sockaddr_in client_addr;
+        socklen_t addr_len = sizeof(client_addr);
+        s = accept(listener_, (sockaddr*)&client_addr, &addr_len);
+        if (s < 0) {
+          result_ = WSAGetLastError();
+          if (result_ == WSAEWOULDBLOCK) {
+            continue;
+          }
+          DCHECK_NE(result_, 0);
+          LOG(ERROR) << "accpet failed:" << result_;
+          closesocket(s);
+          return;
+        }
+        // accepted.
+        *accept_ = s;
+        result_ = 0;
+        VLOG(1) << "socketpair ServerThread: ready";
+        return;
+      }
+    }
+  }
+
+  int result() { return result_; }
+
+ private:
+  SOCKET listener_;
+  SOCKET* accept_;
+  int result_;
+
+  DISALLOW_COPY_AND_ASSIGN(ServerThread);
+};
+
+class ClientThread : public devtools_goma::PlatformThread::Delegate {
+ public:
+  explicit ClientThread(SOCKET* client, int port)
+      : client_(client), port_(port), result_(WSAETIMEDOUT) {}
+
+  void ThreadMain() override {
+    *client_ = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+    if (*client_ == INVALID_SOCKET) {
+      result_ = WSAGetLastError();
+      return;
+    }
+
+    sockaddr_in inaddr;
+    memset(&inaddr, 0, sizeof(inaddr));
+    inaddr.sin_family = AF_INET;
+    inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+    inaddr.sin_port = htons((unsigned short)port_);
+    socklen_t addr_len = sizeof(inaddr);
+
+    timeval tv;
+    tv.tv_sec = 2;
+    tv.tv_usec = 0;  // timeout is two seconds
+    fd_set w_set, e_set;
+    FD_ZERO(&w_set);
+    FD_ZERO(&e_set);
+
+    for (;;) {
+      int r = connect(*client_, (sockaddr*)&inaddr, addr_len);
+      if (r != SOCKET_ERROR) {  // Connected immediately
+        result_ = 0;
+        return;
+      }
+
+      result_ = WSAGetLastError();
+      if (result_ != WSAEWOULDBLOCK) {
+        break;
+      } else {
+        // need select
+        MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+        FD_SET(*client_, &w_set);
+        MSVC_POP_WARNING();
+        e_set = w_set;
+        r = select(static_cast<int>(*client_ + 1), nullptr,
+                   &w_set, &e_set, &tv);
+        if (r == 0) {  // Connection timeout
+          result_ = WSAETIMEDOUT;
+          break;
+        }
+        if (FD_ISSET(*client_, &w_set) || FD_ISSET(*client_, &e_set)) {
+          int len = sizeof(result_);
+          if (getsockopt(*client_, SOL_SOCKET, SO_ERROR, (char*)&result_,
+                         &len) >= 0) {  // Connection established
+            return;
+          }
+          result_ = WSAGetLastError();
+        } else {  // Unknown error in connect
+          result_ = WSAGetLastError();
+        }
+      }
+      break;
+    }
+    closesocket(*client_);
+  }
+
+  int result() { return result_; }
+
+ private:
+  SOCKET* client_;
+  int port_;
+  int result_;
+
+  DISALLOW_COPY_AND_ASSIGN(ClientThread);
+};
+
+}  // namespace
+
+int async_socketpair(int socks[2]) {
+  if (socks == nullptr) {
+    WSASetLastError(WSA_INVALID_PARAMETER);
+    return SOCKET_ERROR;
+  }
+  SOCKET listener = INVALID_SOCKET;
+  int port = 0;
+  DWORD err = ServerThread::StartListen(&listener, &port);
+  if (err != 0) {
+    LOG(ERROR) << "StartListen failed:" << err;
+    WSASetLastError(err);
+    return SOCKET_ERROR;
+  }
+  DCHECK_NE(listener, INVALID_SOCKET);
+  DCHECK_NE(port, 0);
+
+  SOCKET server_socket = INVALID_SOCKET;
+  SOCKET client_socket = INVALID_SOCKET;
+
+  ServerThread server(listener, &server_socket);
+  devtools_goma::PlatformThreadHandle server_thread_handle =
+      devtools_goma::kNullThreadHandle;
+  devtools_goma::PlatformThread::Create(&server, &server_thread_handle);
+
+  // This will be blocked until server started listening.
+  ClientThread client(&client_socket, port);
+  devtools_goma::PlatformThreadHandle client_thread_handle =
+      devtools_goma::kNullThreadHandle;
+  devtools_goma::PlatformThread::Create(&client, &client_thread_handle);
+
+  DWORD result = WaitForSingleObject(client_thread_handle, INFINITE);
+  if (result == WAIT_OBJECT_0) {
+    socks[1] = static_cast<int>(client_socket);
+    if (client_socket == INVALID_SOCKET) {
+      LOG(ERROR) << "client thread result=" << client.result();
+    }
+  } else {
+    socks[1] = INVALID_SOCKET;
+    LOG(ERROR) << "client wait error: result=" << result;
+  }
+  result = WaitForSingleObject(server_thread_handle, INFINITE);
+  if (result == WAIT_OBJECT_0) {
+    socks[0] = static_cast<int>(server_socket);
+    if (server_socket == INVALID_SOCKET) {
+      LOG(ERROR) << "server thread result=" << server.result();
+    }
+  } else {
+    socks[0] = INVALID_SOCKET;
+    LOG(ERROR) << "server wait error: result=" << result;
+  }
+  if (socks[0] != INVALID_SOCKET && socks[1] != INVALID_SOCKET) {
+    return 0;
+  }
+  return SOCKET_ERROR;
+}
+
+WinsockHelper::WinsockHelper() : initialized_(false) {
+  WSADATA WSAData = { 0 };
+  if (WSAStartup(WSA_VERSION, &WSAData) != 0) {
+    // Tell the user that we could not find a usable WinSock DLL.
+    if (LOBYTE(WSAData.wVersion) != LOBYTE(WSA_VERSION) ||
+        HIBYTE(WSAData.wVersion) != HIBYTE(WSA_VERSION)) {
+        PLOG(ERROR) << "GOMA: Incorrect winsock version, required 2.2 and up";
+    }
+    WSACleanup();
+  } else {
+    initialized_ = true;
+  }
+}
+
+WinsockHelper::~WinsockHelper() {
+  if (initialized_) {
+    WSACleanup();
+  }
+}
+
+#endif  // _WIN32
diff --git a/base/socket_helper_win.h b/base/socket_helper_win.h
new file mode 100644
index 0000000..37bee18
--- /dev/null
+++ b/base/socket_helper_win.h
@@ -0,0 +1,45 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_SOCKET_HELPER_WIN_H_
+#define DEVTOOLS_GOMA_BASE_SOCKET_HELPER_WIN_H_
+
+#ifdef _WIN32
+
+// Note: In this port, we mix the use of SOCKET and int, which is okay for
+//       32-bits but will trigger bunch of warnings for 64-bits
+//       (SOCKET is UINT_PTR, and UINT_PTR is __w64 unsigned int)
+//       It should be safe to ignore those warnings.
+
+#pragma once
+#include "basictypes.h"
+#include "compiler_specific.h"
+#include "config_win.h"
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#pragma comment (lib, "ws2_32")
+
+typedef unsigned short sa_family_t;
+
+int inet_aton(const char* input, struct in_addr* output);
+int socketpair(sa_family_t domain, int type, int protocol, int socks[2]);
+int async_socketpair(int socks[2]);
+
+// Helper class to init/destroy winsock correctly.
+// Instantiate this class object in your main().
+class WinsockHelper {
+ public:
+  WinsockHelper();
+  ~WinsockHelper();
+  bool initialized() const { return initialized_; }
+
+ private:
+  bool initialized_;
+
+  DISALLOW_COPY_AND_ASSIGN(WinsockHelper);
+};
+
+#endif  // _WIN32
+#endif  // DEVTOOLS_GOMA_BASE_SOCKET_HELPER_WIN_H_
diff --git a/base/socket_helper_win_unittest.cc b/base/socket_helper_win_unittest.cc
new file mode 100644
index 0000000..9fcafe6
--- /dev/null
+++ b/base/socket_helper_win_unittest.cc
@@ -0,0 +1,185 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+// This is a Windows-only unit test
+#ifdef _WIN32
+#include "socket_helper_win.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "lockhelper.h"
+#include "platform_thread.h"
+using std::string;
+
+namespace devtools_goma {
+
+const char* TEST_STRING = "Hola! Amigo!";
+
+class SocketPairTestThread : public PlatformThread::Delegate {
+ public:
+  typedef enum {
+    kInitial,
+    kAsync,
+    kBlockRecv,
+    kBlockSend,
+    kTerminate
+  } State;
+
+  explicit SocketPairTestThread(int fd)
+      : signal_(::CreateEvent(nullptr, TRUE, FALSE, nullptr)),
+        socket_(fd), state_(kInitial) {
+    CHECK_NE(signal_, INVALID_HANDLE_VALUE);
+  }
+
+  ~SocketPairTestThread() override {
+    closesocket(socket_);
+    CloseHandle(signal_);
+  }
+
+  void ThreadMain() override {
+    bool terminate_signaled = false;
+    for (; !terminate_signaled; ) {
+      WaitForSingleObject(signal_, INFINITE);
+      AutoLock lock(&lock_);
+      switch (state_) {
+        case kInitial:
+          break;
+
+        case kAsync:
+          {
+            timeval tv;
+            tv.tv_sec = 2;
+            tv.tv_usec = 0;
+            bool r_done = false;
+            fd_set r_set;
+            send(socket_, TEST_STRING,
+                 static_cast<int>(strlen(TEST_STRING)), 0);
+            FD_ZERO(&r_set);
+            MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+            FD_SET(socket_, &r_set);
+            MSVC_POP_WARNING();
+            while (!r_done) {
+              int result = select(socket_ + 1, &r_set, 0, 0, &tv);
+              if (FD_ISSET(socket_, &r_set)) {
+                char buf[256] = {0};
+                recv(socket_, buf, 256, 0);
+                message_ = buf;
+                r_done = true;
+              }
+              if (result == 0 || result == SOCKET_ERROR) {
+                break;  // recv timeout or select failed
+              }
+            }
+          }
+          break;
+
+        case kBlockRecv:
+          {
+            char buf[256] = {0};
+            recv(socket_, buf, 256, 0);
+            message_ = buf;
+          }
+          break;
+
+        case kBlockSend:
+          send(socket_, TEST_STRING, static_cast<int>(strlen(TEST_STRING)), 0);
+          break;
+
+        default:  // kTerminate
+          terminate_signaled = true;
+          break;
+      }
+      ResetEvent(signal_);
+    }
+  }
+
+  void set_state(State state) {
+    // Block set_state until |signal_| is not set.
+    while (::WaitForSingleObjectEx(signal_, 0, TRUE) != WAIT_TIMEOUT) {
+      PlatformThread::Sleep(100);
+    }
+    AutoLock lock(&lock_);
+    state_ = state;
+    SetEvent(signal_);
+  }
+
+  string message() {
+    AutoLock lock(&lock_);
+    return message_;
+  }
+
+  void Reset() {
+    AutoLock lock(&lock_);
+    message_.clear();
+  }
+
+ private:
+  Lock lock_;
+  State state_;
+  string message_;
+  HANDLE signal_;
+  int socket_;
+  DISALLOW_COPY_AND_ASSIGN(SocketPairTestThread);
+};
+
+TEST(SocketHelperWin, BlockingSocketPair) {
+  int fd[2] = {0};
+  EXPECT_NE(SOCKET_ERROR, socketpair(AF_INET, SOCK_STREAM, 0, fd));
+
+  SocketPairTestThread thread0(fd[0]);
+  SocketPairTestThread thread1(fd[1]);
+
+  PlatformThreadHandle handle0 = kNullThreadHandle;
+  PlatformThreadHandle handle1 = kNullThreadHandle;
+
+  EXPECT_TRUE(PlatformThread::Create(&thread0, &handle0));
+  EXPECT_TRUE(PlatformThread::Create(&thread1, &handle1));
+
+  thread0.set_state(SocketPairTestThread::kBlockRecv);
+  thread1.set_state(SocketPairTestThread::kBlockSend);
+
+  thread0.set_state(SocketPairTestThread::kBlockSend);
+  thread1.set_state(SocketPairTestThread::kBlockRecv);
+
+  thread0.set_state(SocketPairTestThread::kTerminate);
+  thread1.set_state(SocketPairTestThread::kTerminate);
+
+  WaitForSingleObject(handle0, 2000);
+  WaitForSingleObject(handle1, 2000);
+
+  EXPECT_STREQ(TEST_STRING, thread0.message().c_str());
+  EXPECT_STREQ(TEST_STRING, thread1.message().c_str());
+}
+
+TEST(SocketHelperWin, AsyncSocketPair) {
+  int fd[2] = {0};
+  EXPECT_EQ(0, async_socketpair(fd));
+
+  SocketPairTestThread thread0(fd[0]);
+  SocketPairTestThread thread1(fd[1]);
+
+  PlatformThreadHandle handle0 = kNullThreadHandle;
+  PlatformThreadHandle handle1 = kNullThreadHandle;
+
+  EXPECT_TRUE(PlatformThread::Create(&thread0, &handle0));
+  EXPECT_TRUE(PlatformThread::Create(&thread1, &handle1));
+
+  thread0.set_state(SocketPairTestThread::kAsync);
+  thread1.set_state(SocketPairTestThread::kAsync);
+
+  thread0.set_state(SocketPairTestThread::kTerminate);
+  thread1.set_state(SocketPairTestThread::kTerminate);
+
+  WaitForSingleObject(handle0, 2000);
+  WaitForSingleObject(handle1, 2000);
+
+  EXPECT_STREQ(TEST_STRING, thread0.message().c_str());
+  EXPECT_STREQ(TEST_STRING, thread1.message().c_str());
+}
+
+}  // namespace devtools_goma
+
+#endif  // _WIN32
diff --git a/base/split.cc b/base/split.cc
new file mode 100644
index 0000000..12b04c5
--- /dev/null
+++ b/base/split.cc
@@ -0,0 +1,42 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "split.h"
+
+#include <string>
+#include <vector>
+
+using std::string;
+
+void SplitStringUsing(const string& full, const char* delim,
+                      std::vector<string>* res) {
+  *res = strings::Split(full, delim);
+}
+
+namespace strings {
+
+std::vector<string> Split(const string& full, const string& delim) {
+  std::vector<string> res;
+  size_t index = 0;
+  for (;;) {
+    size_t found = full.find_first_of(delim, index);
+    if (found == string::npos) {
+      break;
+    }
+    res.push_back(full.substr(index, found - index));
+    index = found + 1;
+
+    // Skip consecutive delimiters.
+    while (index < full.size() &&
+           delim.find_first_of(full[index]) != string::npos) {
+      index++;
+    }
+  }
+
+  res.push_back(full.substr(index));
+  return res;
+}
+
+}  // namespace strings
diff --git a/base/split.h b/base/split.h
new file mode 100644
index 0000000..59fc22a
--- /dev/null
+++ b/base/split.h
@@ -0,0 +1,28 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_SPLIT_H_
+#define DEVTOOLS_GOMA_BASE_SPLIT_H_
+
+#include <string>
+#include <vector>
+
+using std::string;
+
+void SplitStringUsing(const string& full, const char* delim,
+                      std::vector<string>* res);
+
+namespace strings {
+
+// Support only Example 1 of new Split API.
+// TODO: full support of new Split API.
+std::vector<string> Split(const string& full, const string& delim);
+inline std::vector<string> Split(const string& full, char c) {
+  return Split(full, string(1, c));
+}
+
+}  // namespace strings
+
+#endif  // DEVTOOLS_GOMA_BASE_SPLIT_H_
diff --git a/base/split_unittest.cc b/base/split_unittest.cc
new file mode 100644
index 0000000..3779338
--- /dev/null
+++ b/base/split_unittest.cc
@@ -0,0 +1,126 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "split.h"
+
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using std::string;
+
+TEST(SplitTest, SplitStringUsing) {
+  std::vector<string> tokens;
+
+  SplitStringUsing("foo:bar:baz", ":", &tokens);
+  ASSERT_EQ(3, static_cast<int>(tokens.size()));
+  EXPECT_EQ("foo", tokens[0]);
+  EXPECT_EQ("bar", tokens[1]);
+  EXPECT_EQ("baz", tokens[2]);
+
+  SplitStringUsing(":bar:baz", ":", &tokens);
+  ASSERT_EQ(3, static_cast<int>(tokens.size()));
+  EXPECT_EQ("", tokens[0]);
+  EXPECT_EQ("bar", tokens[1]);
+  EXPECT_EQ("baz", tokens[2]);
+
+  SplitStringUsing("::", "::", &tokens);
+  ASSERT_EQ(2, static_cast<int>(tokens.size()));
+  EXPECT_EQ("", tokens[0]);
+  EXPECT_EQ("", tokens[1]);
+
+  SplitStringUsing("ab:cd;ef:", ":;", &tokens);
+  ASSERT_EQ(4, static_cast<int>(tokens.size()));
+  EXPECT_EQ("ab", tokens[0]);
+  EXPECT_EQ("cd", tokens[1]);
+  EXPECT_EQ("ef", tokens[2]);
+  EXPECT_EQ("", tokens[3]);
+
+  SplitStringUsing("ab:;cd;:ef:;", ":;", &tokens);
+  ASSERT_EQ(4, static_cast<int>(tokens.size()));
+  EXPECT_EQ("ab", tokens[0]);
+  EXPECT_EQ("cd", tokens[1]);
+  EXPECT_EQ("ef", tokens[2]);
+  EXPECT_EQ("", tokens[3]);
+
+  SplitStringUsing("foo", "::", &tokens);
+  ASSERT_EQ(1, static_cast<int>(tokens.size()));
+  EXPECT_EQ("foo", tokens[0]);
+}
+
+TEST(SplitTest, SplitStringWithNul) {
+  static const char orig[] = {
+    'f', 'o', 'o', '\0',
+    'b', 'a', 'r', '\0', '\0',
+    'b', 'a', 'z',
+    '\0', '\0',
+  };
+
+  string s1(orig, sizeof(orig) - 2);
+  ASSERT_EQ(12U, s1.size());
+  string s2(orig, sizeof(orig) - 1);
+  ASSERT_EQ(13U, s2.size());
+  string s3(orig, sizeof(orig));
+  ASSERT_EQ(14U, s3.size());
+
+  std::vector<string> tokens = strings::Split(s1, '\0');
+  ASSERT_EQ(3U, tokens.size());
+  EXPECT_EQ("foo", tokens[0]);
+  EXPECT_EQ("bar", tokens[1]);
+  EXPECT_EQ("baz", tokens[2]);
+
+  tokens = strings::Split(s2, '\0');
+  ASSERT_EQ(4U, tokens.size());
+  EXPECT_EQ("foo", tokens[0]);
+  EXPECT_EQ("bar", tokens[1]);
+  EXPECT_EQ("baz", tokens[2]);
+  EXPECT_EQ("", tokens[3]);
+
+  // The consequence delimiter is skipped.
+  tokens = strings::Split(s3, '\0');
+  ASSERT_EQ(4U, tokens.size());
+  EXPECT_EQ("foo", tokens[0]);
+  EXPECT_EQ("bar", tokens[1]);
+  EXPECT_EQ("baz", tokens[2]);
+  EXPECT_EQ("", tokens[3]);
+}
+
+TEST(SplitTest, IncludeProcessor) {
+  std::vector<string> tokens;
+
+  SplitStringUsing(
+      " /usr/include/c++/4.2\n"
+      " /usr/include/c++/4.2/x86_64-linux-gnu\n"
+      " /usr/include/c++/4.2/backward\n"
+      " /usr/local/include\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.2.4/include\n"
+      " /usr/include\n", "\r\n ", &tokens);
+  ASSERT_EQ(8, static_cast<int>(tokens.size()));
+  EXPECT_EQ("", tokens[0]);
+  EXPECT_EQ("/usr/include/c++/4.2", tokens[1]);
+  EXPECT_EQ("/usr/include/c++/4.2/x86_64-linux-gnu", tokens[2]);
+  EXPECT_EQ("/usr/include/c++/4.2/backward", tokens[3]);
+  EXPECT_EQ("/usr/local/include", tokens[4]);
+  EXPECT_EQ("/usr/lib/gcc/x86_64-linux-gnu/4.2.4/include", tokens[5]);
+  EXPECT_EQ("/usr/include", tokens[6]);
+  EXPECT_EQ("", tokens[7]);
+
+  std::vector<string> new_tokens;
+  for (const auto& it : tokens) {
+    if (it != "") {
+      new_tokens.push_back(it);
+    }
+  }
+
+  ASSERT_EQ(6, static_cast<int>(new_tokens.size()));
+  EXPECT_EQ("/usr/include/c++/4.2", new_tokens[0]);
+  EXPECT_EQ("/usr/include/c++/4.2/x86_64-linux-gnu", new_tokens[1]);
+  EXPECT_EQ("/usr/include/c++/4.2/backward", new_tokens[2]);
+  EXPECT_EQ("/usr/local/include", new_tokens[3]);
+  EXPECT_EQ("/usr/lib/gcc/x86_64-linux-gnu/4.2.4/include", new_tokens[4]);
+  EXPECT_EQ("/usr/include", new_tokens[5]);
+}
diff --git a/base/string_piece_utils.cc b/base/string_piece_utils.cc
new file mode 100644
index 0000000..cfd5ee6
--- /dev/null
+++ b/base/string_piece_utils.cc
@@ -0,0 +1,37 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+// BEGIN GOOGLE-INTERNAL
+// strings/stringpiece_utils emulation layer.
+// END GOOGLE-INTENRAL
+
+#include "string_piece_utils.h"
+
+#include <cstring>
+#include <sstream>
+
+namespace strings {
+
+namespace internal {
+
+std::string StrCatImpl(std::initializer_list<StringPiece> pieces) {
+  std::stringstream ss;
+  for (const auto& s : pieces) {
+    ss << s;
+  }
+  return ss.str();
+}
+
+}  // namespace internal
+
+bool StartsWith(StringPiece s, StringPiece x) {
+  return s.size() >= x.size() &&
+      std::memcmp(s.data(), x.data(), x.size()) == 0;
+}
+
+bool EndsWith(StringPiece s, StringPiece x) {
+  return s.size() >= x.size() &&
+      std::memcmp(s.data() + (s.size() - x.size()), x.data(), x.size()) == 0;
+}
+
+}  // namespace strings
diff --git a/base/string_piece_utils.h b/base/string_piece_utils.h
new file mode 100644
index 0000000..4105537
--- /dev/null
+++ b/base/string_piece_utils.h
@@ -0,0 +1,32 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_BASE_STRING_PIECE_UTILS_H_
+#define DEVTOOLS_GOMA_BASE_STRING_PIECE_UTILS_H_
+
+#include <initializer_list>
+
+#include "string_piece.h"
+
+// TODO: Replace strings -> absl
+namespace strings {
+
+namespace internal {
+std::string StrCatImpl(std::initializer_list<StringPiece> pieces);
+}
+
+// Returns whether s begins with x.
+bool StartsWith(StringPiece s, StringPiece x);
+
+// Returns whether s ends with x.
+bool EndsWith(StringPiece s, StringPiece x);
+
+template<typename... Strs>
+inline std::string StrCat(const Strs&... pieces) {
+  return internal::StrCatImpl({pieces...});
+}
+
+}  // namespace strings
+
+#endif  // DEVTOOLS_GOMA_BASE_STRING_PIECE_UTILS_H_
diff --git a/base/string_piece_utils_unittest.cc b/base/string_piece_utils_unittest.cc
new file mode 100644
index 0000000..523d92c
--- /dev/null
+++ b/base/string_piece_utils_unittest.cc
@@ -0,0 +1,73 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "string_piece_utils.h"
+
+#include <string>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+TEST(StringPieceUtils, StartsWith) {
+  const std::string s1("123" "\0" "456", 7);
+  const StringPiece a("foobar");
+  const StringPiece b(s1);
+  const StringPiece e;
+  EXPECT_TRUE(strings::StartsWith(a, a));
+  EXPECT_TRUE(strings::StartsWith(a, "foo"));
+  EXPECT_TRUE(strings::StartsWith(a, e));
+  EXPECT_TRUE(strings::StartsWith(b, s1));
+  EXPECT_TRUE(strings::StartsWith(b, b));
+  EXPECT_TRUE(strings::StartsWith(b, e));
+  EXPECT_TRUE(strings::StartsWith(e, ""));
+  EXPECT_FALSE(strings::StartsWith(a, b));
+  EXPECT_FALSE(strings::StartsWith(b, a));
+  EXPECT_FALSE(strings::StartsWith(e, a));
+
+  // Same tests with HasPrefixString.
+  EXPECT_TRUE(strings::StartsWith("foo/bar", "foo"));
+  EXPECT_FALSE(strings::StartsWith("foo/bar", "bar"));
+  EXPECT_TRUE(strings::StartsWith("foo/bar", "foo/bar"));
+  EXPECT_FALSE(strings::StartsWith("foo/bar", "foo/bar/"));
+
+  StringPiece abc("abcdefghijklmnopqrstuvwxyz");
+  EXPECT_TRUE(strings::StartsWith(abc, abc));
+  EXPECT_TRUE(strings::StartsWith(abc, "abcdefghijklm"));
+  EXPECT_FALSE(strings::StartsWith(abc, "abcdefguvwxyz"));
+}
+
+TEST(StringPieceUtils, EndsWith) {
+  const std::string s1("123" "\0" "456", 7);
+  const StringPiece a("foobar");
+  const StringPiece b(s1);
+  const StringPiece e;
+  EXPECT_TRUE(strings::EndsWith(a, a));
+  EXPECT_TRUE(strings::EndsWith(a, "bar"));
+  EXPECT_TRUE(strings::EndsWith(a, e));
+  EXPECT_TRUE(strings::EndsWith(b, s1));
+  EXPECT_TRUE(strings::EndsWith(b, b));
+  EXPECT_TRUE(strings::EndsWith(b, e));
+  EXPECT_TRUE(strings::EndsWith(e, ""));
+  EXPECT_FALSE(strings::EndsWith(a, b));
+  EXPECT_FALSE(strings::EndsWith(b, a));
+  EXPECT_FALSE(strings::EndsWith(e, a));
+
+  // Same tests with HasSuffixString.
+  EXPECT_FALSE(strings::EndsWith("foo/bar", "foo"));
+  EXPECT_TRUE(strings::EndsWith("foo/bar", "bar"));
+  EXPECT_TRUE(strings::EndsWith("foo/bar", "foo/bar"));
+  EXPECT_FALSE(strings::EndsWith("foo/bar", "foo/bar/"));
+
+  StringPiece abc("abcdefghijklmnopqrstuvwxyz");
+  EXPECT_TRUE(strings::EndsWith(abc, abc));
+  EXPECT_FALSE(strings::EndsWith(abc, "abcdefguvwxyz"));
+  EXPECT_TRUE(strings::EndsWith(abc, "nopqrstuvwxyz"));
+}
+
+TEST(StringPieceUtils, StrCat) {
+  EXPECT_EQ("", strings::StrCat());
+  EXPECT_EQ("a", strings::StrCat("a"));
+  EXPECT_EQ("ab", strings::StrCat("a", "b"));
+  EXPECT_EQ("abab", strings::StrCat("a", "b", "ab"));
+}
diff --git a/base/strutil.cc b/base/strutil.cc
new file mode 100644
index 0000000..f32f3e1
--- /dev/null
+++ b/base/strutil.cc
@@ -0,0 +1,76 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "strutil.h"
+
+#include <assert.h>
+#include <cstring>
+
+using std::string;
+
+// ----------------------------------------------------------------------
+// StringReplace()
+//    Give me a string and two patterns "old" and "new", and I replace
+//    the first instance of "old" in the string with "new", if it
+//    exists.  If "global" is true; call this repeatedly until it
+//    fails.  RETURN a new string, regardless of whether the replacement
+//    happened or not.
+// ----------------------------------------------------------------------
+
+string StringReplace(StringPiece s, StringPiece oldsub,
+                     StringPiece newsub, bool replace_all) {
+  string ret;
+  StringReplace(s, oldsub, newsub, replace_all, &ret);
+  return ret;
+}
+
+
+// ----------------------------------------------------------------------
+// StringReplace()
+//    Replace the "old" pattern with the "new" pattern in a string,
+//    and append the result to "res".  If replace_all is false,
+//    it only replaces the first instance of "old."
+// ----------------------------------------------------------------------
+
+void StringReplace(StringPiece s, StringPiece oldsub,
+                   StringPiece newsub, bool replace_all,
+                   string* res) {
+  if (oldsub.empty()) {
+    res->append(s.data(), s.length());  // If empty, append the given string.
+    return;
+  }
+
+  StringPiece::size_type start_pos = 0;
+  StringPiece::size_type pos;
+  do {
+    pos = s.find(oldsub, start_pos);
+    if (pos == StringPiece::npos) {
+      break;
+    }
+    res->append(s.data() + start_pos, pos - start_pos);
+    res->append(newsub.data(), newsub.length());
+    // Start searching again after the "old".
+    start_pos = pos + oldsub.length();
+  } while (replace_all);
+  res->append(s.data() + start_pos, s.length() - start_pos);
+}
+
+// TODO: adapted from Chrome base, remove when base is here.
+#ifdef _WIN32
+static int strncasecmp(const char* s1, const char* s2, size_t count) {
+  return _strnicmp(s1, s2, count);
+}
+#endif
+
+const char* strncaseprefix(const char* haystack, int haystack_size,
+                           const char* needle, int needle_size) {
+  if (haystack_size < needle_size) {
+    return nullptr;
+  }
+  if (strncasecmp(haystack, needle, needle_size) == 0) {
+    return haystack + needle_size;
+  }
+  return nullptr;
+}
diff --git a/base/strutil.h b/base/strutil.h
new file mode 100644
index 0000000..8ba2421
--- /dev/null
+++ b/base/strutil.h
@@ -0,0 +1,58 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_BASE_STRUTIL_H_
+#define DEVTOOLS_GOMA_BASE_STRUTIL_H_
+
+#include <string>
+#include <vector>
+
+#include "string_piece.h"
+
+using std::string;
+
+// ----------------------------------------------------------------------
+// StringReplace()
+//    Give me a string and two patterns "old" and "new", and I replace
+//    the first instance of "old" in the string with "new", if it
+//    exists.  RETURN a new string, regardless of whether the replacement
+//    happened or not.
+// ----------------------------------------------------------------------
+
+string StringReplace(StringPiece s, StringPiece oldsub,
+                     StringPiece newsub, bool replace_all);
+
+// ----------------------------------------------------------------------
+// StringReplace()
+//    Replace the "old" pattern with the "new" pattern in a string,
+//    and append the result to "res".  If replace_all is false,
+//    it only replaces the first instance of "old."
+//
+//    Here is a couple of notes on the "res" string:
+//      The "res" should not point to any of the input strings.
+//      Reserving enough capacity for the "res" prior to calling this
+//      function will improve the speed.
+// ----------------------------------------------------------------------
+
+void StringReplace(StringPiece s, StringPiece oldsub,
+                   StringPiece newsub, bool replace_all,
+                   string* res);
+
+// Matches a case-insensitive prefix (up to the first needle_size bytes of
+// needle) in the first haystack_size byte of haystack. Returns a pointer past
+// the prefix, or NULL if the prefix wasn't matched.
+//
+// Always returns either NULL or haystack + needle_size.
+const char* strncaseprefix(const char* haystack, int haystack_size,
+                           const char* needle, int needle_size);
+
+// Matches a case-insensitive prefix; returns a pointer past the prefix,
+// or NULL if not found.
+template<class CharStar>
+inline CharStar var_strcaseprefix(CharStar str, const char* prefix) {
+  return strncaseprefix(str, strlen(str), prefix, strlen(prefix));
+}
+
+#endif  // DEVTOOLS_GOMA_BASE_STRUTIL_H_
diff --git a/base/strutil_unittest.cc b/base/strutil_unittest.cc
new file mode 100644
index 0000000..0edc2d5
--- /dev/null
+++ b/base/strutil_unittest.cc
@@ -0,0 +1,89 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "strutil.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using std::string;
+
+TEST(StrUtilTest, StringReplace) {
+  LOG(INFO) << "Testing StringReplace";
+  string result2;
+
+  // test StringReplace core functionality
+  string value = "<attribute name=abcd/>";
+  string sub = "=";
+  string newsub = " = ";
+  string expected_result = "<attribute name = abcd/>";
+  string result = StringReplace(value, sub, newsub, 0);
+  CHECK_EQ(expected_result, result);
+
+  result2.clear();
+  StringReplace(value, sub, newsub, 0, &result2);
+  CHECK_EQ(expected_result, result2);
+
+  // test for negative case
+  value = "<attribute name=abcd/>";
+  sub = "-";
+  newsub = "=";
+  expected_result = "<attribute name=abcd/>";
+  result = StringReplace(value, sub, newsub, 0);
+  CHECK_EQ(expected_result, result);
+
+  result2.clear();
+  StringReplace(value, sub, newsub, 0, &result2);
+  CHECK_EQ(expected_result, result2);
+
+  // test StringReplace core functionality with repeated flag set
+  value = "<attribute name==abcd/>";
+  sub = "=";
+  newsub = " = ";
+  expected_result = "<attribute name =  = abcd/>";
+  result = StringReplace(value, sub, newsub, 1);
+  CHECK_EQ(expected_result, result);
+
+  result2.clear();
+  StringReplace(value, sub, newsub, 1, &result2);
+  CHECK_EQ(expected_result, result2);
+
+  // input is an empty string
+  value = "";
+  sub = "=";
+  newsub = " = ";
+  expected_result = "";
+  result = StringReplace(value, sub, newsub, 0);
+  CHECK_EQ(expected_result, result);
+
+  result2.clear();
+  StringReplace(value, sub, newsub, 0, &result2);
+  CHECK_EQ(expected_result, result2);
+
+  // input is an empty string and this is a request for repeated
+  // string replaces.
+  value = "";
+  sub = "=";
+  newsub = " = ";
+  expected_result = "";
+  result = StringReplace(value, sub, newsub, 1);
+  CHECK_EQ(expected_result, result);
+
+  result2.clear();
+  StringReplace(value, sub, newsub, 1, &result2);
+  CHECK_EQ(expected_result, result2);
+
+  // input and string to replace is an empty string.
+  value = "";
+  sub = "";
+  newsub = " = ";
+  expected_result = "";
+  result = StringReplace(value, sub, newsub, 0);
+  CHECK_EQ(expected_result, result);
+
+  result2.clear();
+  StringReplace(value, sub, newsub, 0, &result2);
+  CHECK_EQ(expected_result, result2);
+}
diff --git a/base/thread_annotations.h b/base/thread_annotations.h
new file mode 100644
index 0000000..e57c18e
--- /dev/null
+++ b/base/thread_annotations.h
@@ -0,0 +1,158 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_BASE_THREAD_ANNOTATIONS_H_
+#define DEVTOOLS_GOMA_BASE_THREAD_ANNOTATIONS_H_
+
+
+#if defined(__clang__) && (!defined(SWIG))
+# define THREAD_ANNOTATION_ATTRIBUTE__(x)   __attribute__((x))
+#else
+# define THREAD_ANNOTATION_ATTRIBUTE__(x)   // no-op
+#endif
+
+// Document if a shared variable/field needs to be protected by a mutex.
+// GUARDED_BY allows the user to specify a particular mutex that should be
+// held when accessing the annotated variable.  GUARDED_VAR indicates that
+// a shared variable is guarded by some unspecified mutex, for use in rare
+// cases where a valid mutex expression cannot be specified.
+#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
+#define GUARDED_VAR   THREAD_ANNOTATION_ATTRIBUTE__(guarded)
+
+// Document if the memory location pointed to by a pointer should be guarded
+// by a mutex when dereferencing the pointer.  PT_GUARDED_VAR is analagous to
+// GUARDED_VAR.   Note that a pointer variable to a shared memory location
+// could itself be a shared variable. For example, if a shared global pointer
+// q, which is guarded by mu1, points to a shared memory location that is
+// guarded by mu2, q should be annotated as follows:
+//     int *q GUARDED_BY(mu1) PT_GUARDED_BY(mu2);
+#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
+#define PT_GUARDED_VAR   THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded)
+
+// Document the acquisition order between locks that can be held
+// simultaneously by a thread. For any two locks that need to be annotated
+// to establish an acquisition order, only one of them needs the annotation.
+// (i.e. You don't have to annotate both locks with both ACQUIRED_AFTER
+// and ACQUIRED_BEFORE.)
+#define ACQUIRED_AFTER(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
+
+#define ACQUIRED_BEFORE(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
+
+// Document a function that expects a mutex to be held prior to entry.
+// The mutex is expected to be held both on entry to and exit from the
+// function.
+#define EXCLUSIVE_LOCKS_REQUIRED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(__VA_ARGS__))
+
+#define SHARED_LOCKS_REQUIRED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(__VA_ARGS__))
+
+// Document the locks acquired in the body of the function. These locks
+// cannot be held when calling this function.
+#define LOCKS_EXCLUDED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
+
+// Document a function that returns a mutex without acquiring it.  For example,
+// a public getter method that returns a pointer to a private mutex should
+// be annotated with LOCK_RETURNED.
+#define LOCK_RETURNED(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
+
+// Document if a class/type is a lockable type (such as the Mutex class).
+#define LOCKABLE \
+  THREAD_ANNOTATION_ATTRIBUTE__(lockable)
+
+// Document if a class does RAII locking (such as the MutexLock class).
+// The constructor should use LOCK_FUNCTION to specify the mutex that is
+// acquired, and the destructor should use UNLOCK_FUNCTION with no arguments;
+// the analysis will assume that the destructor unlocks whatever the
+// constructor locked.
+#define SCOPED_LOCKABLE \
+  THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
+
+// Document functions that acquire a lock in the body of a function, and do
+// not release it.
+#define EXCLUSIVE_LOCK_FUNCTION(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock_function(__VA_ARGS__))
+
+#define SHARED_LOCK_FUNCTION(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(shared_lock_function(__VA_ARGS__))
+
+// Document functions that expect a lock to be held on entry to the function,
+// and release it in the body of the function.
+#define UNLOCK_FUNCTION(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(unlock_function(__VA_ARGS__))
+
+// Document functions that try to acquire a lock, and return success or failure
+// (or a non-boolean value that can be interpreted as a boolean).
+// The first argument should be true for functions that return true on success,
+// or false for functions that return false on success. The second argument
+// specifies the mutex that is locked on success. If unspecified, it is assumed
+// to be 'this'.
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock_function(__VA_ARGS__))
+
+#define SHARED_TRYLOCK_FUNCTION(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock_function(__VA_ARGS__))
+
+// Document functions that dynamically check to see if a lock is held, and fail
+// if it is not held.
+#define ASSERT_EXCLUSIVE_LOCK(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(assert_exclusive_lock(__VA_ARGS__))
+
+#define ASSERT_SHARED_LOCK(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_lock(__VA_ARGS__))
+
+// Turns off thread safety checking within the body of a particular function.
+// This is used as an escape hatch for cases where the function is
+// correct, but the locking is more complicated than the analyzer can handle.
+#define NO_THREAD_SAFETY_ANALYSIS \
+  THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
+
+// TS_UNCHECKED should be placed around lock expressions that are not valid
+// C++ syntax, but which are present for documentation purposes.  These
+// annotations will be ignored by the analysis.
+#define TS_UNCHECKED(x) ""
+
+// TS_FIXME is used to mark lock expressions that are not valid C++ syntax.
+// It is used by automated tools to mark and disable invalid expressions.
+// The annotation should either be fixed, or changed to TS_UNCHECKED.
+#define TS_FIXME(x) ""
+
+// Like NO_THREAD_SAFETY_ANALYSIS, this turns off checking within the body of
+// a particular function.  However, this attribute is used to mark functions
+// that are incorrect and need to be fixed.  It is used by automated tools to
+// avoid breaking the build when the analysis is updated.
+// Code owners are expected to eventually fix the routine.
+#define NO_THREAD_SAFETY_ANALYSIS_FIXME  NO_THREAD_SAFETY_ANALYSIS
+
+// Similar to NO_THREAD_SAFETY_ANALYSIS_FIXME, this macro marks a GUARDED_BY
+// annotation that needs to be fixed, because it is producing thread safety
+// warning.  It disables the GUARDED_BY.
+#define GUARDED_BY_FIXME(x)
+
+// Disables warnings for a single read operation.  This can be used to avoid
+// warnings when it is known that the read is not actually involved in a race,
+// but the compiler cannot confirm that.
+#define TS_UNCHECKED_READ(x) thread_safety_analysis::ts_unchecked_read(x)
+
+namespace thread_safety_analysis {
+
+// Takes a reference to a guarded data member, and returns an unguarded
+// reference.
+template <class T>
+inline const T& ts_unchecked_read(const T& v) NO_THREAD_SAFETY_ANALYSIS {
+  return v;
+}
+
+template <class T>
+inline T& ts_unchecked_read(T& v) NO_THREAD_SAFETY_ANALYSIS {
+  return v;
+}
+
+}  // namespace thread_safety_analysis
+
+#endif  // DEVTOOLS_GOMA_BASE_THREAD_ANNOTATIONS_H_
diff --git a/build/.gitignore b/build/.gitignore
new file mode 100644
index 0000000..9c2960a
--- /dev/null
+++ b/build/.gitignore
@@ -0,0 +1,7 @@
+*.mk
+*.pyc
+win_toolchain.json
+Debug/
+Debug64/
+Release/
+Release64/
diff --git a/build/archive.py b/build/archive.py
new file mode 100755
index 0000000..7ca7da3
--- /dev/null
+++ b/build/archive.py
@@ -0,0 +1,289 @@
+#!/usr/bin/env python
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Creates goma client release archives."""
+
+
+
+import hashlib
+import optparse
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tarfile
+import zipfile
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+GOMACC_CMDS = ('g++', 'gcc', 'javac', 'cc', 'c++', 'clang', 'clang++')
+CHROMEOS_GOMACC_CMDS = (
+    'i686-pc-linux-gnu-gcc',
+    'i686-pc-linux-gnu-g++',
+    'armv7a-cros-linux-gnueabi-gcc',
+    'armv7a-cros-linux-gnueabi-g++',
+    'x86_64-pc-linux-gnu-gcc',
+    'x86_64-pc-linux-gnu-g++',
+    'arm-none-eabi-gcc',
+    'arm-none-eabi-g++',
+    'x86_64-cros-linux-gnu-gcc',
+    'x86_64-cros-linux-gnu-g++')
+
+
+try:
+  os.symlink
+except AttributeError:
+  # no os.symlink on Windows.
+  def __fake_symlink(src, dst):
+    raise NotImplementedError('symlink %s %s' % (src, dst))
+  os.symlink = __fake_symlink
+
+
+def CreatePlatformGomacc(distname, platform):
+  """Creates gomacc symlinks in distname.
+
+  Args:
+    distname: distribution directory
+    platform: platform name
+  """
+  if platform in ('goobuntu', 'chromeos', 'mac'):
+    gomacc = list(GOMACC_CMDS)
+  else:
+    raise NotImplementedError(platform)
+  if platform == 'chromeos':
+    gomacc.extend(CHROMEOS_GOMACC_CMDS)
+  for cmd in gomacc:
+    os.symlink('gomacc', os.path.join(distname, cmd))
+
+
+def DeleteSymlinksToGomacc(distname):
+  """Deletes symlinks to gomacc in distname.
+
+  Args:
+    distname: distribution directory
+  """
+  for name in os.listdir(distname):
+    abs_name = os.path.join(distname, name)
+    # since symlink only works on posix, we do not need to check gomacc.exe.
+    if os.path.islink(abs_name) and os.readlink(abs_name) == 'gomacc':
+      os.remove(abs_name)
+
+
+def InstallPlatformFiles(distname, platform):
+  """Install platform specific files in distname.
+
+  Args:
+    distname: distribution directory
+    platform: platname name.
+  Returns:
+    a list of files.
+  """
+  if platform == 'goobuntu' or platform == 'mac':
+    return
+  if platform != 'chromeos':
+    raise NotImplementedError(platform)
+  files = ['goma-wrapper', 'goma-make']
+  for f in files:
+    shutil.copy(f, distname)
+
+
+def CreateAndroidDir(distname, platform):
+  """Creates android support directory if necessary.
+
+  Args:
+    distname: distribution directory.
+    platform: platform name.
+  Returns:
+    a list of files to be released.
+  """
+  if platform in ('goobuntu', 'mac'):
+    distname = os.path.join(distname, 'android')
+    shutil.rmtree(distname, ignore_errors=True)
+    os.mkdir(distname)
+    for cmd in ('gomacc', 'compiler_proxy', 'goma_fetch',
+                'goma_auth.py', 'goma_ctl.py'):
+      os.symlink(os.path.join('..', cmd), os.path.join(distname, cmd))
+    for cmd in GOMACC_CMDS:
+      os.symlink('gomacc', os.path.join(distname, cmd))
+    for prefix in ('arm-eabi', 'arm-linux-androideabi',
+                   'i686-android-linux', 'i686-linux',
+                   'i686-unknown-linux-gnu',
+                   'i686-unknown-linux-gnu-i686-unknown-linux-gnu',
+                   'sh-linux-gnu'):
+      os.symlink('gomacc', os.path.join(distname, '%s-gcc' % prefix))
+      os.symlink('gomacc', os.path.join(distname, '%s-g++' % prefix))
+
+
+def MkTarball(src, dst_tar_file):
+  """Make tarball.
+
+  Note: basename of |src| would show up as a file's directory name in
+  a tar file.
+  e.g.
+  If you give "/tmp/foo/bar" that has followings inside as |src|:
+    /tmp/foo/bar/gomacc
+    /tmp/foo/bar/compiler_proxy
+  then, the generated archive would have files with following path names:
+    bar/gomacc
+    bar/compiler_proxy
+
+  Args:
+    src: an absolute path name of the directory to archive.
+    dst_tar_file: a filename (with extension) to output tarball.
+  """
+  dirname = os.path.dirname(src)
+  assert os.path.abspath(dirname)
+  def Filter(info):
+    assert info.name.startswith(dirname[1:])
+    info.name = info.name[len(dirname):]
+    if info.name:
+      print 'Adding: %s' % info.name
+      return info
+
+  mode = 'w:gz'
+  if os.path.splitext(dst_tar_file)[1] == '.tbz':
+    mode = 'w:bz2'
+
+  with tarfile.open(dst_tar_file, mode) as tf:
+    for path in os.listdir(src):
+      tf.add(os.path.join(src, path), filter=Filter)
+
+
+def MkZip(src, dst_zip_file):
+  """Make zip file.
+
+  Note: basename of |src| would show up as a file's directory name in
+  a zip file.
+  e.g.
+  If you give "c:\\Users\\foo\\bar" that has followings inside as |src|:
+    c:\\Users\\foo\\bar\\gomacc
+    c:\\Users\\foo\\bar\\compiler_proxy
+  then, the generated archive would have files with following path names:
+    bar\\gomacc
+    bar\\compiler_proxy
+
+  Args:
+    src: a full path name of the directory to archive.
+    dst_tar_file: an output zip filename.
+  """
+  dirname = os.path.dirname(src)
+  with zipfile.ZipFile(dst_zip_file, 'w',
+                       compression=zipfile.ZIP_DEFLATED) as zf:
+    for dirpath, _, filenames in os.walk(src):
+      for f in filenames:
+        orig_path = os.path.join(dirpath, f)
+        path = orig_path[len(dirname) + 1:]
+        print 'Adding: %s' % path
+        zf.write(orig_path, arcname=path)
+
+
+def main():
+  option_parser = optparse.OptionParser()
+  option_parser.add_option('--platform',
+                           default={'darwin': 'mac',
+                                    'win32': 'win64',
+                                    'cygwin': 'win64'}.get(sys.platform, None),
+                           choices=('chromeos', 'goobuntu', 'mac', 'win64'),
+                           help='platform name')
+  option_parser.add_option('--build_dir', default='out',
+                           help='directory of build output')
+  option_parser.add_option('--target_dir', default='Release',
+                           help='subdirectory in build_dir to archive')
+  option_parser.add_option('--dist_dir', default='..',
+                           help='directory to put tgz')
+  option_parser.add_option('--store_in_commit_dir', action='store_true',
+                           help='store tgz in commit dir under dist_dir')
+
+  options, args = option_parser.parse_args()
+  if args:
+    option_parser.error('Unsupported args: %s' % ' '.join(args))
+  dist_top_absdir = os.path.abspath(options.dist_dir)
+  dist_absdir = dist_top_absdir
+  src_dir = os.getcwd()
+
+  if not os.path.isdir(dist_absdir):
+    os.makedirs(dist_absdir, 0755)
+  if options.store_in_commit_dir:
+    gitproc = subprocess.Popen(['git', 'log', '-1', '--pretty=%H'],
+                               shell=(sys.platform == 'win32'),
+                               stdout=subprocess.PIPE,
+                               cwd=src_dir)
+    commit = gitproc.communicate()[0].strip()
+    if gitproc.returncode:
+      print 'ERROR: git failed to get commit. exit=%d' % gitproc.returncode
+      return gitproc.returncode
+    if not commit:
+      print 'ERROR: empty commit hash?'
+      return 1
+    print 'Commit: %s' % commit
+    dist_absdir = os.path.join(dist_absdir, commit)
+    shutil.rmtree(dist_absdir, ignore_errors=True)
+    os.mkdir(dist_absdir, 0755)
+
+  os.chdir(os.path.join(src_dir, options.build_dir, options.target_dir))
+
+  distname = 'goma-%s' % options.platform
+  shutil.rmtree(distname, ignore_errors=True)
+
+  print 'Preparing files in %s in %s...' % (distname, os.getcwd())
+  print 'mkdir %s' % distname
+  os.mkdir(distname, 0755)
+  if options.platform in ('win64'):
+    for cmd in ('gomacc.exe', 'compiler_proxy.exe', 'vcflags.exe',
+                'goma_fetch.exe'):
+      shutil.copy(cmd, distname)
+      pdb = os.path.splitext(cmd)[0] + '.pdb'
+      if not os.path.exists(pdb):
+        pdb = cmd + '.pdb'
+      shutil.copy(pdb, distname)
+    for f in ('.vpython', 'goma_auth.py', 'goma_ctl.py', 'goma_ctl.bat',
+              'diagnose_goma_log.py', 'compiler_proxy.sym', 'sha256.json',
+              'gomacc.sym'):
+      shutil.copy(f, distname)
+  else:
+    for f in ('.vpython', 'gomacc', 'compiler_proxy', 'goma_fetch',
+              'report_env.sh', 'diagnose_goma_log.py', 'compiler_proxy.sym',
+              'goma_auth.py', 'goma_ctl.py', 'sha256.json', 'gomacc.sym'):
+      shutil.copy(f, distname)
+    CreatePlatformGomacc(distname, options.platform)
+    InstallPlatformFiles(distname, options.platform)
+    CreateAndroidDir(distname, options.platform)
+
+  # Create an archive file.
+  if options.platform in ('win64'):
+    target_file = os.path.join(dist_absdir, '%s.zip' % distname)
+    print 'Archiving in %s.zip' % distname
+    MkZip(os.path.realpath(distname), target_file)
+    compiler_proxy_path = 'compiler_proxy.exe'
+  else:
+    target_file = os.path.join(dist_absdir, '%s.tgz' % distname)
+    print 'Archiving in %s.tgz' % distname
+    MkTarball(os.path.realpath(distname), target_file)
+    compiler_proxy_path = os.path.join(distname, 'compiler_proxy')
+    # Since CIPD uses this directory for creating CIPD package,
+    # we need to remove gomacc symlinks.
+    DeleteSymlinksToGomacc(distname)
+
+  print
+  print '%s created.' % target_file
+
+  cp = open(compiler_proxy_path, 'rb')
+  # Finds user-agent string (starts with 'compiler-proxy' and ends with 'Z',
+  # which is the last letter of timestamp) for compiler_proxy_user_agent.csv
+  # e.g. "compiler-proxy built by goma at " +
+  # "9d6775c48911ad1b80624720121a5e0d0c320adf@1330938783 " +
+  # "on 2012-03-05T09:20:30.931701Z"
+  m = re.search(r'(compiler-proxy[- a-zA-Z0-9:.@]*Z)', cp.read())
+  if m:
+    print '"%s",,%s' % (m.group(1), options.platform)
+  else:
+    print 'ERROR: user-agent string not found in %s' % compiler_proxy_path
+    return 1
+  cp.close()
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/build/clean_vcproj.bat b/build/clean_vcproj.bat
new file mode 100644
index 0000000..286288b
--- /dev/null
+++ b/build/clean_vcproj.bat
@@ -0,0 +1,18 @@
+REM Copyright 2012 The Goma Authors. All rights reserved.

+REM Use of this source code is governed by a BSD-style license that can be

+REM found in the LICENSE file.

+

+pushd

+cd %~dp0

+rd /s /q ipch

+rd /s /q Debug

+rd /s /q Release

+del /q *.sln *.vcproj *.user *.filters *.vcxproj *.ncb *.sdf

+cd ..\lib

+del /q *.sln *.vcproj *.user *.filters *.vcxproj *.ncb *.sdf

+cd ..\client

+del /q *.sln *.vcproj *.user *.filters *.vcxproj *.ncb *.sdf

+cd ..\third_party

+del /q *.sln *.vcproj *.user *.filters *.vcxproj *.ncb *.sdf

+popd

+

diff --git a/build/compiled_action.gni b/build/compiled_action.gni
new file mode 100644
index 0000000..3d520e7
--- /dev/null
+++ b/build/compiled_action.gni
@@ -0,0 +1,175 @@
+# Copied from chromium build/.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This file introduces two related templates that act like action and
+# action_foreach but instead of running a Python script, it will compile a
+# given tool in the host toolchain and run that (either once or over the list
+# of inputs, depending on the variant).
+#
+# Parameters
+#
+#   tool (required)
+#       [label] Label of the tool to run. This should be an executable, and
+#       this label should not include a toolchain (anything in parens). The
+#       host compile of this tool will be used.
+#
+#   outputs (required)
+#       [list of files] Like the outputs of action (if using "compiled_action",
+#       this would be just the list of outputs), or action_foreach (if using
+#       "compiled_action_foreach", this would contain source expansions mapping
+#       input to output files).
+#
+#   args (required)
+#       [list of strings] Same meaning as action/action_foreach.
+#
+#   inputs (optional)
+#       Files the binary takes as input. The step will be re-run whenever any
+#       of these change. If inputs is empty, the step will run only when the
+#       binary itself changes.
+#
+#   visibility
+#   deps
+#   args   (all optional)
+#       Same meaning as action/action_foreach.
+#
+#
+# Example of usage:
+#
+#   compiled_action("run_my_tool") {
+#     tool = "//tools/something:mytool"
+#     outputs = [
+#       "$target_gen_dir/mysource.cc",
+#       "$target_gen_dir/mysource.h",
+#     ]
+#
+#     # The tool takes this input.
+#     inputs = [ "my_input_file.idl" ]
+#
+#     # In this case, the tool takes as arguments the input file and the output
+#     # build dir (both relative to the "cd" that the script will be run in)
+#     # and will produce the output files listed above.
+#     args = [
+#       rebase_path("my_input_file.idl", root_build_dir),
+#       "--output-dir", rebase_path(target_gen_dir, root_build_dir),
+#     ]
+#   }
+#
+# You would typically declare your tool like this:
+#   if (host_toolchain == current_toolchain) {
+#     executable("mytool") {
+#       ...
+#     }
+#   }
+# The if statement around the executable is optional. That says "I only care
+# about this target in the host toolchain". Usually this is what you want, and
+# saves unnecessarily compiling your tool for the target platform. But if you
+# need a target build of your tool as well, just leave off the if statement.
+
+if (host_os == "win") {
+  _host_executable_suffix = ".exe"
+} else {
+  _host_executable_suffix = ""
+}
+
+template("compiled_action") {
+  assert(defined(invoker.tool), "tool must be defined for $target_name")
+  assert(defined(invoker.outputs), "outputs must be defined for $target_name")
+  assert(defined(invoker.args), "args must be defined for $target_name")
+
+  assert(!defined(invoker.sources),
+         "compiled_action doesn't take a sources arg. Use inputs instead.")
+
+  action(target_name) {
+    if (defined(invoker.visibility)) {
+      visibility = invoker.visibility
+    }
+
+    script = "//build/gn_run_binary.py"
+
+    if (defined(invoker.inputs)) {
+      inputs = invoker.inputs
+    } else {
+      inputs = []
+    }
+    outputs = invoker.outputs
+
+    # Constuct the host toolchain version of the tool.
+    host_tool = invoker.tool + "($host_toolchain)"
+
+    # Get the path to the executable. Currently, this assumes that the tool
+    # does not specify output_name so that the target name is the name to use.
+    # If that's not the case, we'll need another argument to the script to
+    # specify this, since we can't know what the output name is (it might be in
+    # another file not processed yet).
+    host_executable =
+        get_label_info(host_tool, "root_out_dir") + "/" +
+        get_label_info(host_tool, "name") + _host_executable_suffix
+
+    # Add the executable itself as an input.
+    inputs += [ host_executable ]
+
+    deps = [
+      host_tool,
+    ]
+    if (defined(invoker.deps)) {
+      deps += invoker.deps
+    }
+
+    # The script takes as arguments the binary to run, and then the arguments
+    # to pass it.
+    args = [ rebase_path(host_executable, root_build_dir) ] + invoker.args
+  }
+}
+
+template("compiled_action_foreach") {
+  assert(defined(invoker.sources), "sources must be defined for $target_name")
+  assert(defined(invoker.tool), "tool must be defined for $target_name")
+  assert(defined(invoker.outputs), "outputs must be defined for $target_name")
+  assert(defined(invoker.args), "args must be defined for $target_name")
+
+  action_foreach(target_name) {
+    # Otherwise this is a standalone action, define visibility if requested.
+    if (defined(invoker.visibility)) {
+      visibility = invoker.visibility
+    }
+
+    script = "//build/gn_run_binary.py"
+    sources = invoker.sources
+
+    if (defined(invoker.inputs)) {
+      inputs = invoker.inputs
+    } else {
+      inputs = []
+    }
+    outputs = invoker.outputs
+
+    # Constuct the host toolchain version of the tool.
+    host_tool = invoker.tool + "($host_toolchain)"
+
+    # Get the path to the executable. Currently, this assumes that the tool
+    # does not specify output_name so that the target name is the name to use.
+    # If that's not the case, we'll need another argument to the script to
+    # specify this, since we can't know what the output name is (it might be in
+    # another file not processed yet).
+    host_executable =
+        get_label_info(host_tool, "root_out_dir") + "/" +
+        get_label_info(host_tool, "name") + _host_executable_suffix
+
+    # Add the executable itself as an input.
+    inputs += [ host_executable ]
+
+    deps = [
+      host_tool,
+    ]
+    if (defined(invoker.deps)) {
+      deps += invoker.deps
+    }
+
+    # The script takes as arguments the binary to run, and then the arguments
+    # to pass it.
+    args = [ rebase_path(host_executable, root_build_dir) ] + invoker.args
+  }
+}
diff --git a/build/config/BUILD.gn b/build/config/BUILD.gn
new file mode 100644
index 0000000..7f51e10
--- /dev/null
+++ b/build/config/BUILD.gn
@@ -0,0 +1,49 @@
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+config("debug") {
+  defines = [ "_DEBUG" ]
+  if (os == "linux") {
+    defines += [ "_GLIBCXX_DEBUG=1" ]
+  }
+}
+
+config("release") {
+  defines = [ "NDEBUG" ]
+}
+
+config("default_libs") {
+  if (os == "win") {
+    libs = [
+      "kernel32.lib",
+      "user32.lib",
+      "gdi32.lib",
+      "winspool.lib",
+      "comdlg32.lib",
+      "advapi32.lib",
+      "shell32.lib",
+      "ole32.lib",
+      "oleaut32.lib",
+      "uuid.lib",
+      "odbc32.lib",
+      "odbccp32.lib",
+      "psapi.lib",
+    ]
+  }
+}
+
+# Dependencies that all executables and shared libraries should have.
+group("exe_and_shlib_deps") {
+  public_deps = []
+  if (using_sanitizer) {
+    public_deps += [ "//build/config/sanitizers:deps" ]
+  }
+  if (use_custom_libcxx) {
+    public_deps += [ "//buildtools/third_party/libc++" ]
+  }
+
+  #if (use_afl) {
+  #  public_deps += [ "//third_party/afl" ]
+  #}
+}
diff --git a/build/config/BUILDCONFIG.gn b/build/config/BUILDCONFIG.gn
new file mode 100644
index 0000000..5f3db3f
--- /dev/null
+++ b/build/config/BUILDCONFIG.gn
@@ -0,0 +1,177 @@
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+declare_args() {
+  os = host_os
+  is_debug = true
+  is_asan = false
+  is_tsan = false
+
+  # Enables incremental link. Currently effective only on Win.
+  use_incremental_link = false
+
+  # Enables link time optimization. is_debug=false is required.
+  # Currently effective only on Win.
+  use_link_time_optimization = true
+
+  use_custom_libcxx = false
+  use_libfuzzer = false
+  use_sanitizer_coverage = false
+  with_cpu_profiling = false
+  with_heap_profiling = false
+  dcheck_always_on = false
+  with_counterz = false
+
+  # Enable revision check. If true, gomacc revision and compiler_proxy
+  # revision differ, a user will see warnings.
+  enable_revision_check = false
+
+  enable_lzma = false
+  cpu_arch = host_cpu
+}
+
+declare_args() {
+  is_clang = os == "mac" || os == "linux"
+}
+
+if (target_os == "") {
+  target_os = host_os
+}
+if (target_cpu == "") {
+  target_cpu = host_cpu
+}
+if (current_cpu == "") {
+  current_cpu = target_cpu
+}
+if (current_os == "") {
+  current_os = target_os
+}
+
+is_posix = os == "mac" || os == "linux"
+using_sanitizer = is_asan || is_tsan
+
+# Since we cannot read result of perftools if we create position independent
+# executable (PIE), let me disable hardening if profiling options are set.
+may_make_pie = !with_cpu_profiling && !with_heap_profiling
+
+default_compiler_configs = [
+  "//build/config/compiler:compiler",
+  "//build/config/compiler:no_rtti",
+  "//build/config/compiler:goma_code",
+  "//build/config:default_libs",
+  "//build/config/sanitizers:default_sanitizer_flags",
+]
+if (is_debug) {
+  default_compiler_configs += [
+    "//build/config:debug",
+    "//build/config/compiler:no_optimize",
+  ]
+
+  if (os == "win" && use_incremental_link) {
+    # Note: we don't make "incremental_linking" on when is_debug=false, because
+    # the existence of /LTCG causes /INCREMENTAL ignorance.
+    # When use_incremental_link is on, we use /LTCG:INCREMENTAL instead.
+    default_compiler_configs += [ "//build/config/win:incremental_linking" ]
+  }
+} else {
+  default_compiler_configs += [
+    "//build/config:release",
+    "//build/config/compiler:optimize",
+  ]
+
+  if (os == "win" && !use_link_time_optimization) {
+    # When !is_debug && !use_link_time_optimization, /INCREMENTAL works.
+    # Note: When use_link_time_optimization is true, we enable
+    # /LTCG:INCREMENTAL, but it might cause b/35825478.
+    default_compiler_configs += [ "//build/config/win:incremental_linking" ]
+  }
+}
+
+if (is_posix) {
+  default_compiler_configs += [ "//build/config/gcc:no_exceptions" ]
+}
+if (is_asan) {
+  default_compiler_configs += [ "//build/config/compiler:asan" ]
+  is_clang = true
+}
+if (is_tsan) {
+  default_compiler_configs += [ "//build/config/compiler:tsan" ]
+  is_clang = true
+}
+if (enable_lzma) {
+  default_compiler_configs += [ "//build/config/compiler:enable_lzma" ]
+}
+
+if (with_cpu_profiling) {
+  default_compiler_configs += [ "//build/config/compiler:cpu_profiling" ]
+}
+
+if (with_heap_profiling) {
+  default_compiler_configs += [ "//build/config/compiler:heap_profiling" ]
+}
+
+if (with_counterz) {
+  default_compiler_configs += [ "//build/config/compiler:counterz" ]
+}
+
+if (enable_revision_check) {
+  default_compiler_configs +=
+      [ "//build/config/compiler:enable_revision_check" ]
+}
+
+if (os == "linux") {
+  if (is_clang) {
+    set_default_toolchain("//build/toolchain/linux:clang_$cpu_arch")
+    host_toolchain = "//build/toolchain/linux:clang_$cpu_arch"
+  } else {
+    set_default_toolchain("//build/toolchain/linux:$cpu_arch")
+    host_toolchain = "//build/toolchain/linux:$cpu_arch"
+  }
+}
+if (os == "mac" && is_clang) {
+  set_default_toolchain("//build/toolchain/mac:clang_x64")
+  host_toolchain = "//build/toolchain/mac:clang_x64"
+  default_compiler_configs += [ "//build/config/mac:sdk" ]
+  default_compiler_configs += [ "//build/config/compiler:mac" ]
+  cpu_arch = "x64"
+}
+
+if (os == "win") {
+  current_cpu = cpu_arch
+  if (is_clang) {
+    host_toolchain = "//build/toolchain/win:clang_$cpu_arch"
+  } else {
+    host_toolchain = "//build/toolchain/win:$cpu_arch"
+  }
+  set_default_toolchain(host_toolchain)
+  win_configs = [
+    "//build/config/win:lean_and_mean",
+    "//build/config/win:nominmax",
+    "//build/config/win:sdk",
+    "//build/config/win:unicode",
+    "//build/config/win:winver",
+    "//build/config/win:zlib",
+    "//build/config/win:glog",
+    "//build/config/win:rand_s",
+    "//build/config/win:sdk_link",
+    "//build/config/win:console",
+  ]
+  default_compiler_configs += win_configs
+}
+
+set_defaults("executable") {
+  configs = default_compiler_configs
+}
+
+set_defaults("static_library") {
+  configs = default_compiler_configs
+}
+
+set_defaults("shared_library") {
+  configs = default_compiler_configs
+}
+
+set_defaults("source_set") {
+  configs = default_compiler_configs
+}
diff --git a/build/config/clang/clang.gni b/build/config/clang/clang.gni
new file mode 100644
index 0000000..276643f
--- /dev/null
+++ b/build/config/clang/clang.gni
@@ -0,0 +1,11 @@
+# Copied from chromium build/config/clang and modified for goma.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/toolchain/toolchain.gni")
+
+declare_args() {
+  clang_base_path = "//third_party/llvm-build/Release+Asserts"
+}
diff --git a/build/config/compiler/BUILD.gn b/build/config/compiler/BUILD.gn
new file mode 100644
index 0000000..47b1563
--- /dev/null
+++ b/build/config/compiler/BUILD.gn
@@ -0,0 +1,356 @@
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+if (os == "win") {
+  import("//build/config/win/visual_studio_version.gni")
+  import("//build/toolchain/goma.gni")
+}
+
+import("//build/toolchain/toolchain.gni")
+
+config("compiler") {
+  asmflags = []
+  cflags = []
+  cflags_c = []
+  cflags_cc = []
+  cflags_objc = []
+  cflags_objcc = []
+  ldflags = []
+  defines = []
+  if (is_posix) {
+    cflags_cc += [ "-std=gnu++11" ]
+
+    cflags_c += [ "-std=c99" ]
+
+    # hardening.
+    # stack protection.
+    cflags += [ "-fstack-protector-all" ]
+
+    # making Address space layout randomization (ASLR) fully functional.
+    if (may_make_pie) {
+      cflags += [ "-fPIE" ]
+    }
+    defines += [ "_FORTIFY_SOURCE=2" ]
+  }
+  if (os == "linux") {
+    cflags += [
+      "-g",
+      "-pthread",
+      "-fPIC",
+      "-pipe",
+    ]
+    ldflags += [
+      "-pthread",
+      "-fPIC",
+      "-Wl,-z,noexecstack",
+      "-Wl,-z,now",
+      "-Wl,-z,relro",
+    ]
+    if (current_cpu == "x64") {
+      cflags += [
+        "-m64",
+        "-march=x86-64",
+      ]
+      ldflags += [ "-m64" ]
+    } else if (current_cpu == "x86") {
+      cflags += [ "-m32" ]
+      ldflags += [ "-m32" ]
+      cflags += [
+        "-msse2",
+        "-mfpmath=sse",
+        "-mmmx",
+      ]
+    }
+
+    if (use_lld) {
+      ldflags += [ "-fuse-ld=lld" ]
+    }
+  }
+  if (os == "mac") {
+    cflags_cc += [ "-stdlib=libc++" ]
+    ldflags += [ "-stdlib=libc++" ]
+    cflags += [ "-gdwarf-2" ]
+  }
+  if (os == "win") {
+    if (is_debug) {
+      cflags += [ "/MTd" ]
+    } else {
+      cflags += [ "/MT" ]
+    }
+
+    # Treat warnings as errors.
+    cflags += [ "/WX" ]
+
+    if (!use_goma || is_clang) {
+      if (is_debug) {
+        cflags += [ "/ZI" ]
+      } else {
+        cflags += [ "/Zi" ]
+      }
+    }
+
+    cflags += [
+      "/Gy",  # Enable Function-Level Linking
+      "/EHsc",  # Exception Handling used by STL.
+    ]
+    ldflags += [ "/DEBUG" ]
+    defines += [ "_HAS_EXCEPTIONS=0" ]
+    if (!is_clang) {
+      cflags += [
+        "/GS",  # Buffer Security Check
+        "/MP",  # Build With Multiple Processes
+        "/FS",  # Preserve previous PDB behavior.
+      ]
+    } else {
+      cflags += [
+        # Many files use intrinsics without including this header.
+        "/FIIntrin.h",
+      ]
+      if (visual_studio_version == "2013") {
+        cflags += [ "-fmsc-version=1800" ]
+      } else if (visual_studio_version == "2015") {
+        cflags += [ "-fmsc-version=1900" ]
+      } else if (visual_studio_version == "2017") {
+        cflags += [ "-fmsc-version=1911" ]
+      }
+      if (current_cpu == "x86") {
+        cflags += [ "-m32" ]
+      } else {
+        cflags += [ "-m64" ]
+      }
+    }
+  }
+  if (dcheck_always_on) {
+    defines += [ "DCHECK_ALWAYS_ON=1" ]
+  }
+
+  cflags_objc += cflags_c
+  cflags_objcc += cflags_cc
+
+  if (is_posix) {
+    asmflags += cflags
+    asmflags += cflags_c
+  }
+}
+
+config("mac") {
+  if (current_toolchain == "//build/toolchain/mac:clang_universal") {
+    cflags = [
+      "-arch",
+      "i386",
+      "-arch",
+      "x86_64",
+    ]
+    ldflags = [
+      "-arch",
+      "i386",
+      "-arch",
+      "x86_64",
+    ]
+  }
+  if (current_toolchain == "//build/toolchain/mac:clang") {
+    cflags = [
+      "-arch",
+      "x86_64",
+    ]
+    ldflags = [
+      "-arch",
+      "x86_64",
+    ]
+  }
+}
+
+config("asan") {
+  cflags = [ "-fsanitize=address" ]
+
+  # On Windows, we use link.exe even if clang-cl.exe used, and it doesn't
+  # understand /fsanizied=address. So we need to omit the flag on Windows.
+  # TODO: Might want to re-visit here when we use lld.exe?
+  if (os != "win") {
+    ldflags = [ "-fsanitize=address" ]
+  }
+  defines = [ "ADDRESS_SANITIZER" ]
+}
+
+config("tsan") {
+  cflags = [ "-fsanitize=thread" ]
+  ldflags = [ "-fsanitize=thread" ]
+  defines = [ "THREAD_SANITIZER" ]
+}
+
+config("cpu_profiling") {
+  defines = [ "HAVE_CPU_PROFILER=1" ]
+  libs = [ "profiler" ]
+}
+
+config("heap_profiling") {
+  defines = [ "HAVE_HEAP_PROFILER=1" ]
+  libs = [ "tcmalloc" ]
+}
+
+config("counterz") {
+  defines = [ "HAVE_COUNTERZ=1" ]
+}
+
+config("enable_revision_check") {
+  defines = [ "ENABLE_REVISION_CHECK" ]
+}
+
+config("enable_lzma") {
+  defines = [ "ENABLE_LZMA" ]
+}
+
+# Optimization --------------------------------------------------------------
+
+config("optimize") {
+  if (os == "win") {
+    cflags = [
+      "/Ox",  # full optimization.
+      "/Ot",  # speed.
+      "/Oy",  # omit frame pointers.
+    ]
+
+    # TODO: support LTO for lld
+    if (use_link_time_optimization && !use_lld) {
+      if (!is_clang) {
+        cflags += [ "/GL" ]
+      }
+      if (use_incremental_link) {
+        ldflags = [ "/LTCG:INCREMENTAL" ]  # useLinkTimeCodeGeneration
+        arflags = [ "/LTCG:INCREMENTAL" ]  # useLinkTimeCodeGeneration
+      } else {
+        ldflags = [ "/LTCG" ]  # useLinkTimeCodeGeneration
+        arflags = [ "/LTCG" ]  # useLinkTimeCodeGeneration
+      }
+    }
+  } else {
+    cflags = [ "-O2" ]
+  }
+  if (os == "linux") {
+    cflags += [
+      "-msse2",
+
+      # Put data and code in their own sections, so that unused symbols can
+      # be removed at link time with --gc-sections.
+      "-fdata-sections",
+      "-ffunction-sections",
+    ]
+    ldflags = [
+      # http://lwn.net/Articles/192624/
+      "-Wl,-O1",
+      "-Wl,--as-needed",
+      "-Wl,--gc-sections",
+    ]
+  }
+  defines = [ "NDEBUG" ]
+}
+
+config("no_optimize") {
+  if (os == "win") {
+    cflags = [
+      "/Od",  # Disable optimization.
+      "/Ob0",  # Disable all inlining (on by default).
+      "/RTC1",  # Runtime checks for stack frame and uninitialized variables.
+    ]
+  } else {
+    cflags = [ "-O0" ]
+  }
+}
+
+# rtti -------------------------------------------------------------------
+config("rtti") {
+  if (os == "win") {
+    cflags_cc = [ "/GR" ]
+  }
+}
+
+config("no_rtti") {
+  if (os == "win") {
+    cflags_cc = [ "/GR-" ]
+  } else {
+    cflags_cc = [ "-fno-rtti" ]
+  }
+}
+
+# On Windows compiling on x64, VC will issue a warning when converting size_t
+# to int because it will truncate the value.
+config("no_size_t_to_int_warning") {
+  if (os == "win" && current_cpu == "x64") {
+    cflags = [ "/wd4267" ]
+  }
+}
+
+# goma_code --------------------------------------------------------------
+#
+# Toggles between higher and lower warnings for code that is part of goma.
+config("goma_code") {
+  if (os == "win") {
+    cflags = [ "/W4" ]  # Warning level 4.
+  } else {
+    cflags = [
+      "-Wall",
+      "-Wextra",
+      "-Wsign-compare",
+    ]
+  }
+  if (is_clang) {
+    cflags_cc = [
+      "-Wimplicit-fallthrough",
+      "-Wthread-safety",
+    ]
+  }
+}
+
+config("no_goma_code") {
+  cflags = []
+  cflags_cc = []
+  defines = []
+
+  if (os == "win") {
+    cflags += [
+      "/W3",  # Warning level 3.
+      "/wd4800",  # Disable warning when forcing value to bool.
+
+      # Suppress warning on conversion that cause possibly loss of data.
+      "/wd4244",  # any conversion that may cause loss of data.
+      "/wd4267",  # conversion from __size_t.
+
+      # Allow POSIX style functions.
+      "/wd4996",
+
+      "/wd4291",  # no matching operator delete found
+
+      # conversion from 'unsigned int' to 'yasm_symrec_get_label_bytecodep'
+      # of greater size come from yasm.
+      "/wd4312",
+    ]
+    defines += [
+      "_CRT_NONSTDC_NO_WARNINGS",
+      "_CRT_NONSTDC_NO_DEPRECATE",
+
+      # Suppress warning on unsafe functions.
+      "_CRT_SECURE_NO_WARNINGS",
+    ]
+  }
+
+  if (os == "linux") {
+    cflags += [
+      "-Wno-unused-result",
+      "-Wno-format",
+    ]
+    cflags_cc += [ "-Wno-deprecated" ]
+  }
+}
+
+# Some code presumes that pointers to structures/objects are compatible
+# regardless of whether what they point to is already known to be valid.
+# gcc 4.9 and earlier had no way of suppressing this warning without
+# suppressing the rest of them.  Here we centralize the identification of
+# the gcc 4.9 toolchains.
+config("no_incompatible_pointer_warnings") {
+  cflags = []
+  if (is_clang) {
+    cflags += [ "-Wno-incompatible-pointer-types" ]
+  }
+}
diff --git a/build/config/gcc/BUILD.gn b/build/config/gcc/BUILD.gn
new file mode 100644
index 0000000..3acf522
--- /dev/null
+++ b/build/config/gcc/BUILD.gn
@@ -0,0 +1,8 @@
+# Copyright 2016 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+config("no_exceptions") {
+  cflags_cc = [ "-fno-exceptions" ]
+  cflags_objcc = cflags_cc
+}
diff --git a/build/config/mac/BUILD.gn b/build/config/mac/BUILD.gn
new file mode 100644
index 0000000..e6f1631
--- /dev/null
+++ b/build/config/mac/BUILD.gn
@@ -0,0 +1,17 @@
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/config/mac/mac_sdk.gni")
+
+config("sdk") {
+  common_flags = [
+    "-isysroot",
+    mac_sdk_path,
+    "-mmacosx-version-min=10.7",
+  ]
+
+  cflags = common_flags
+  ldflags = common_flags
+  asmflags = common_flags
+}
diff --git a/build/config/mac/mac_sdk.gni b/build/config/mac/mac_sdk.gni
new file mode 100644
index 0000000..f1dea17
--- /dev/null
+++ b/build/config/mac/mac_sdk.gni
@@ -0,0 +1,33 @@
+# Copied from chromium build/config/mac/ and modified for goma client.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+declare_args() {
+  # Minimum supported version of the Mac SDK.
+  mac_sdk_min = "10.10"
+
+  # Minimum supported version of OSX.
+  mac_deployment_target = "10.7"
+
+  # Path to a specific version of the Mac SDK, not including a backslash at
+  # the end. If empty, the path to the lowest version greater than or equal to
+  # mac_sdk_min is used.
+  mac_sdk_path = ""
+
+  # The SDK name as accepted by xcodebuild.
+  mac_sdk_name = "macosx"
+}
+
+find_sdk_args = [ "--print_sdk_path" ]
+find_sdk_args += [ mac_sdk_min ]
+
+# The tool will print the SDK path on the first line, and the version on the
+# second line.
+find_sdk_lines =
+    exec_script("//build/mac/find_sdk.py", find_sdk_args, "list lines")
+mac_sdk_version = find_sdk_lines[1]
+if (mac_sdk_path == "") {
+  mac_sdk_path = find_sdk_lines[0]
+}
diff --git a/build/config/sanitizers/BUILD.gn b/build/config/sanitizers/BUILD.gn
new file mode 100644
index 0000000..a78732b
--- /dev/null
+++ b/build/config/sanitizers/BUILD.gn
@@ -0,0 +1,459 @@
+# Copied from chromium's build/config/sanitizers/BUILD.gn,
+# and modified for goma.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/config/clang/clang.gni")
+import("//build/toolchain/toolchain.gni")
+
+declare_args() {
+  sanitizer_coverage_flags = ""
+}
+
+if (using_sanitizer) {
+  sanitizer_coverage_flags = "edge,indirect-calls,8bit-counters"
+}
+
+# Contains the dependencies needed for sanitizers to link into executables and
+# shared_libraries. Unconditionally depend upon this target as it is empty if
+# |is_asan|, |is_lsan|, |is_tsan|, |is_msan| and |use_custom_libcxx| are false.
+group("deps") {
+  public_deps = [
+    ":deps_no_options",
+  ]
+  if (using_sanitizer) {
+    public_configs = [
+      ":sanitizer_options_link_helper",
+
+      # Even when a target removes default_sanitizer_flags, it may be depending
+      # on a library that did not remove default_sanitizer_flags. Thus, we need
+      # to add the ldflags here as well as in default_sanitizer_flags.
+      ":default_sanitizer_ldflags",
+    ]
+    deps = [
+      ":options_sources",
+    ]
+  }
+
+  #if (use_afl) {
+  #  deps += [ "//third_party/afl" ]
+  #}
+}
+
+group("deps_no_options") {
+  if (using_sanitizer) {
+    public_configs = [
+      # Even when a target removes default_sanitizer_flags, it may be depending
+      # on a library that did not remove default_sanitizer_flags. Thus, we need
+      # to add the ldflags here as well as in default_sanitizer_flags.
+      ":default_sanitizer_ldflags",
+    ]
+    deps = []
+    public_deps = []
+
+    data = [
+      "//tools/valgrind/asan/",
+    ]
+    if (os == "win") {
+      exe = ".exe"
+    } else {
+      exe = ""
+    }
+    data += [ "$clang_base_path/bin/llvm-symbolizer${exe}" ]
+    if (os == "linux") {
+      # llvm-symbolizer needs this.
+      data += [ "$clang_base_path/lib/libstdc++.so.6" ]
+    }
+
+    #if (use_prebuilt_instrumented_libraries ||
+    #    use_locally_built_instrumented_libraries) {
+    #  deps += [ "//third_party/instrumented_libraries:deps" ]
+    #}
+    if (use_custom_libcxx) {
+      public_deps += [ "//third_party/libc++:libcxx_proxy" ]
+      data += [ "$root_out_dir/libc++.so" ]
+    }
+
+    # ASAN is supported on iOS but the runtime library depends on the compiler
+    # used (Chromium version of clang versus Xcode version of clang). Only copy
+    # the ASAN runtime on iOS if building with Chromium clang.
+    if (os == "win" || os == "mac") {
+      data_deps = [
+        ":copy_asan_runtime",
+      ]
+    }
+    if (os == "mac") {
+      public_deps += [ ":asan_runtime_bundle_data" ]
+    }
+  }
+}
+
+if ((os == "mac" || os == "win") && using_sanitizer) {
+  if (os == "mac") {
+    _clang_rt_dso_path = "darwin/libclang_rt.asan_osx_dynamic.dylib"
+  } else if (os == "win" && target_cpu == "x86") {
+    _clang_rt_dso_path = "windows/clang_rt.asan_dynamic-i386.dll"
+  } else if (os == "win" && target_cpu == "x64") {
+    _clang_rt_dso_path = "windows/clang_rt.asan_dynamic-x86_64.dll"
+  }
+
+  _clang_rt_dso_full_path =
+      "$clang_base_path/lib/clang/$clang_version/lib/$_clang_rt_dso_path"
+
+  copy("copy_asan_runtime") {
+    sources = [
+      _clang_rt_dso_full_path,
+    ]
+    outputs = [
+      "$root_out_dir/{{source_file_part}}",
+    ]
+  }
+
+  if (os == "mac") {
+    bundle_data("asan_runtime_bundle_data") {
+      sources = get_target_outputs(":copy_asan_runtime")
+      outputs = [
+        "{{bundle_executable_dir}}/{{source_file_part}}",
+      ]
+      public_deps = [
+        ":copy_asan_runtime",
+      ]
+    }
+  }
+}
+
+config("sanitizer_options_link_helper") {
+  if (os == "mac") {
+    ldflags = [ "-Wl,-U,_sanitizer_options_link_helper" ]
+  } else if (os != "win") {
+    ldflags = [ "-Wl,-u_sanitizer_options_link_helper" ]
+  }
+}
+
+static_library("options_sources") {
+  # This is a static_library instead of a source_set, as it shouldn't be
+  # unconditionally linked into targets.
+  visibility = [
+    ":deps",
+    "//:gn_visibility",
+  ]
+  sources = [
+    "//build/sanitizers/sanitizer_options.cc",
+  ]
+
+  # Don't compile this target with any sanitizer code. It can be called from
+  # the sanitizer runtimes, so instrumenting these functions could cause
+  # recursive calls into the runtime if there is an error.
+  configs -= [ "//build/config/sanitizers:default_sanitizer_flags" ]
+
+  if (is_asan) {
+    sources += [ "//build/sanitizers/asan_suppressions.cc" ]
+  }
+
+  #if (is_lsan) {
+  #  sources += [ lsan_suppressions_file ]
+  #}
+
+  if (is_tsan) {
+    sources += [ "//build/sanitizers/tsan_suppressions.cc" ]
+  }
+}
+
+# Applies linker flags necessary when either :deps or :default_sanitizer_flags
+# are used.
+config("default_sanitizer_ldflags") {
+  visibility = [
+    ":default_sanitizer_flags",
+    ":deps",
+  ]
+
+  if (is_posix) {
+    ldflags = []
+    if (is_asan) {
+      ldflags += [ "-fsanitize=address" ]
+    }
+
+    #if (is_lsan) {
+    #  ldflags += [ "-fsanitize=leak" ]
+    #}
+    if (is_tsan) {
+      ldflags += [ "-fsanitize=thread" ]
+    }
+
+    #if (is_msan) {
+    #  ldflags += [ "-fsanitize=memory" ]
+    #}
+    #if (is_ubsan || is_ubsan_security) {
+    #  ldflags += [ "-fsanitize=undefined" ]
+    #}
+    #if (is_ubsan_null) {
+    #  ldflags += [ "-fsanitize=null" ]
+    #}
+    #if (is_ubsan_vptr) {
+    #  ldflags += [ "-fsanitize=vptr" ]
+    #}
+
+    if (use_sanitizer_coverage) {
+      ldflags += [ "-fsanitize-coverage=$sanitizer_coverage_flags" ]
+    }
+
+    #if (is_cfi && !is_nacl) {
+    #  ldflags += [ "-fsanitize=cfi-vcall" ]
+    #  if (use_cfi_cast) {
+    #    ldflags += [
+    #      "-fsanitize=cfi-derived-cast",
+    #      "-fsanitize=cfi-unrelated-cast",
+    #    ]
+    #  }
+    #  if (use_cfi_diag) {
+    #    ldflags += [
+    #      "-fno-sanitize-trap=cfi",
+    #      "-fsanitize-recover=cfi",
+    #    ]
+    #  }
+    #}
+  }
+
+  if (is_asan && os == "win") {
+    if (target_cpu == "x64") {
+      libs = [ "$clang_base_path/lib/clang/$clang_version/lib/windows/clang_rt.asan-x86_64.lib" ]
+    } else {
+      libs = [ "$clang_base_path/lib/clang/$clang_version/lib/windows/clang_rt.asan-i386.lib" ]
+    }
+  }
+}
+
+config("common_sanitizer_flags") {
+  cflags = []
+  cflags_cc = []
+
+  # Sanitizers need line table info for stack traces. They don't need type info
+  # or variable info, so we can leave that out to speed up the build.
+  if (using_sanitizer) {
+    assert(is_clang, "sanitizers only supported with clang")
+    cflags += [
+      "-gline-tables-only",
+
+      # Column info in debug data confuses Visual Studio's debugger, so don't
+      # use this by default.  However, clusterfuzz needs it for good attribution
+      # of reports to CLs, so turn it on there.
+      "-gcolumn-info",
+    ]
+  }
+
+  # Common options for AddressSanitizer, LeakSanitizer, ThreadSanitizer,
+  # MemorySanitizer and non-official CFI builds.
+  if (using_sanitizer) {
+    if (is_posix) {
+      cflags += [ "-fno-omit-frame-pointer" ]
+    } else {
+      cflags += [ "/Oy-" ]
+    }
+  }
+
+  if (use_custom_libcxx) {
+    prefix = "//third_party"
+    include = "trunk/include"
+    cflags_cc += [
+      "-nostdinc++",
+      "-isystem" + rebase_path("$prefix/libc++/$include", root_build_dir),
+      "-isystem" + rebase_path("$prefix/libc++abi/$include", root_build_dir),
+    ]
+  }
+}
+
+config("asan_flags") {
+  cflags = []
+  if (is_asan) {
+    cflags += [ "-fsanitize=address" ]
+  }
+}
+
+#config("cfi_flags") {
+#  cflags = []
+#  if (is_cfi && !is_nacl) {
+#    cfi_blacklist_path =
+#        rebase_path("//tools/cfi/blacklist.txt", root_build_dir)
+#    cflags += [
+#      "-fsanitize=cfi-vcall",
+#      "-fsanitize-blacklist=$cfi_blacklist_path",
+#    ]
+#
+#    if (use_cfi_cast) {
+#      cflags += [
+#        "-fsanitize=cfi-derived-cast",
+#        "-fsanitize=cfi-unrelated-cast",
+#      ]
+#    }
+#
+#    if (use_cfi_diag) {
+#      cflags += [
+#        "-fno-sanitize-trap=cfi",
+#        "-fsanitize-recover=cfi",
+#        "-fno-inline-functions",
+#        "-fno-inline",
+#        "-fno-omit-frame-pointer",
+#        "-O1",
+#      ]
+#    } else {
+#      defines = [ "CFI_ENFORCEMENT" ]
+#    }
+#  }
+#}
+
+config("coverage_flags") {
+  cflags = []
+
+  if (use_sanitizer_coverage) {
+    cflags += [
+      "-fsanitize-coverage=$sanitizer_coverage_flags",
+      "-mllvm",
+      "-sanitizer-coverage-prune-blocks=1",
+    ]
+    defines = [ "SANITIZER_COVERAGE" ]
+  }
+}
+
+#config("lsan_flags") {
+#  if (is_lsan) {
+#    cflags = [ "-fsanitize=leak" ]
+#  }
+#}
+
+#config("msan_flags") {
+#  if (is_msan) {
+#    assert(is_linux, "msan only supported on linux x86_64")
+#    msan_blacklist_path =
+#        rebase_path("//tools/msan/blacklist.txt", root_build_dir)
+#    cflags = [
+#      "-fsanitize=memory",
+#      "-fsanitize-memory-track-origins=$msan_track_origins",
+#      "-fsanitize-blacklist=$msan_blacklist_path",
+#    ]
+#  }
+#}
+
+config("tsan_flags") {
+  if (is_tsan) {
+    assert(os == "linux", "tsan only supported on linux x86_64")
+    cflags = [ "-fsanitize=thread" ]
+  }
+}
+
+#config("ubsan_flags") {
+#  cflags = []
+#  if (is_ubsan) {
+#    ubsan_blacklist_path =
+#        rebase_path("//tools/ubsan/blacklist.txt", root_build_dir)
+#    cflags += [
+#      # Yasm dies with an "Illegal instruction" error when bounds checking is
+#      # enabled. See http://crbug.com/489901
+#      # "-fsanitize=bounds",
+#      "-fsanitize=float-divide-by-zero",
+#      "-fsanitize=integer-divide-by-zero",
+#      "-fsanitize=null",
+#      "-fsanitize=object-size",
+#      "-fsanitize=return",
+#      "-fsanitize=returns-nonnull-attribute",
+#      "-fsanitize=shift-exponent",
+#      "-fsanitize=signed-integer-overflow",
+#      "-fsanitize=unreachable",
+#      "-fsanitize=vla-bound",
+#      "-fsanitize-blacklist=$ubsan_blacklist_path",
+#    ]
+#
+#    # Chromecast ubsan builds fail to compile with these
+#    # experimental flags, so only add them to non-chromecast ubsan builds.
+#    if (!is_chromecast) {
+#      cflags += [
+#        # Employ the experimental PBQP register allocator to avoid slow
+#        # compilation on files with too many basic blocks.
+#        # See http://crbug.com/426271.
+#        "-mllvm",
+#        "-regalloc=pbqp",
+#
+#        # Speculatively use coalescing to slightly improve the code generated
+#        # by PBQP regallocator. May increase compile time.
+#        "-mllvm",
+#        "-pbqp-coalescing",
+#      ]
+#    }
+#  }
+#}
+
+#config("ubsan_no_recover") {
+#  if (is_ubsan_no_recover) {
+#    cflags = [ "-fno-sanitize-recover=undefined" ]
+#  }
+#}
+
+#config("ubsan_security_flags") {
+#  if (is_ubsan_security) {
+#    ubsan_security_blacklist_path =
+#        rebase_path("//tools/ubsan/security_blacklist.txt", root_build_dir)
+#    cflags = [
+#      "-fsanitize=signed-integer-overflow,shift,vptr,function,vla-bound",
+#      "-fsanitize-blacklist=$ubsan_security_blacklist_path",
+#    ]
+#  }
+#}
+
+#config("ubsan_null_flags") {
+#  if (is_ubsan_null) {
+#    cflags = [ "-fsanitize=null" ]
+#  }
+#}
+
+#config("ubsan_vptr_flags") {
+#  if (is_ubsan_vptr) {
+#    ubsan_vptr_blacklist_path =
+#        rebase_path("//tools/ubsan/vptr_blacklist.txt", root_build_dir)
+#    cflags = [
+#      "-fsanitize=vptr",
+#      "-fsanitize-blacklist=$ubsan_vptr_blacklist_path",
+#    ]
+#  }
+#}
+
+all_sanitizer_configs = [
+  ":common_sanitizer_flags",
+
+  ":coverage_flags",
+  ":default_sanitizer_ldflags",
+  ":asan_flags",
+  ":tsan_flags",
+
+  #":cfi_flags",
+  #":lsan_flags",
+  #":msan_flags",
+  #":ubsan_flags",
+  #":ubsan_no_recover",
+  #":ubsan_null_flags",
+  #":ubsan_security_flags",
+  #":ubsan_vptr_flags",
+]
+
+# This config is applied by default to all targets. It sets the compiler flags
+# for sanitizer usage, or, if no sanitizer is set, does nothing.
+#
+# This needs to be in a separate config so that targets can opt out of
+# sanitizers (by removing the config) if they desire. Even if a target
+# removes this config, executables & shared libraries should still depend on
+# :deps if any of their dependencies have not opted out of sanitizers.
+# Keep this list in sync with default_sanitizer_flags_but_ubsan_vptr.
+config("default_sanitizer_flags") {
+  configs = all_sanitizer_configs
+}
+
+# This config is equivalent to default_sanitizer_flags, but excludes ubsan_vptr.
+# This allows to selectively disable ubsan_vptr, when needed. In particular,
+# if some third_party code is required to be compiled without rtti, which
+# is a requirement for ubsan_vptr.
+#config("default_sanitizer_flags_but_ubsan_vptr") {
+#  configs = all_sanitizer_configs - [ ":ubsan_vptr_flags" ]
+#}
+config("default_sanitizer_flags_but_coverage") {
+  configs = all_sanitizer_configs - [ ":coverage_flags" ]
+}
diff --git a/build/config/win/BUILD.gn b/build/config/win/BUILD.gn
new file mode 100644
index 0000000..25e5dbe
--- /dev/null
+++ b/build/config/win/BUILD.gn
@@ -0,0 +1,170 @@
+# Copied from chromium build/config/win/ and revised for goma.
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/config/win/visual_studio_version.gni")
+
+# Compiler setup for the Windows SDK. Applied to all targets.
+config("sdk") {
+  # The include path is the stuff returned by the script.
+  #include_dirs = msvc_config[0]  TODO make this work.
+
+  defines = [
+    "_ATL_NO_OPENGL",
+    "_WINDOWS",
+    "CERT_CHAIN_PARA_HAS_EXTRA_FIELDS",
+    "NTDDI_VERSION=0x06030000",
+    "PSAPI_VERSION=1",
+    "WIN32",
+    "_SECURE_ATL",
+  ]
+}
+
+# Sets the default Windows build version. This is separated because some
+# targets need to manually override it for their compiles.
+config("winver") {
+  defines = [
+    "_WIN32_WINNT=0x0603",
+    "WINVER=0x0603",
+  ]
+}
+
+# Linker flags for Windows SDK setup, this is applied only to EXEs and DLLs.
+config("sdk_link") {
+  if (cpu_arch == "x64") {
+    ldflags = [ "/MACHINE:X64" ]
+    lib_dirs = [
+      "$windows_sdk_path\Lib\winv6.3\um\x64",
+      "$visual_studio_path\VC\lib\amd64",
+      "$visual_studio_path\VC\atlmfc\lib\amd64",
+
+      # for goma's buildbot.
+      "$windows_sdk_path\Lib\win8\um\x64",
+    ]
+  } else {
+    ldflags = [
+      "/MACHINE:X86",
+      "/SAFESEH",  # Not compatible with x64 so use only for x86.
+      "/largeaddressaware",
+    ]
+    lib_dirs = [
+      "$windows_sdk_path\Lib\winv6.3\um\x86",
+      "$visual_studio_path\VC\lib",
+      "$visual_studio_path\VC\atlmfc\lib",
+
+      # for goma's buildbot.
+      "$windows_sdk_path\Lib\win8\um\x86",
+    ]
+  }
+}
+
+# This default linker setup is provided separately from the SDK setup so
+# targets who want different library configurations can remove this and specify
+# their own.
+config("common_linker_setup") {
+  ldflags = [
+    "/FIXED:NO",
+    "/ignore:4199",
+    "/ignore:4221",
+    "/NXCOMPAT",
+
+    # Suggested by Microsoft Devrel to avoid
+    #   LINK : fatal error LNK1248: image size (80000000)
+    #   exceeds maximum allowable size (80000000)
+    # which started happening more regularly after VS2013 Update 4.
+    "/maxilksize:2147483647",
+  ]
+
+  # ASLR makes debugging with windbg difficult because Chrome.exe and
+  # Chrome.dll share the same base name. As result, windbg will name the
+  # Chrome.dll module like chrome_<base address>, where <base address>
+  # typically changes with each launch. This in turn means that breakpoints in
+  # Chrome.dll don't stick from one launch to the next. For this reason, we
+  # turn ASLR off in debug builds.
+  if (is_debug) {
+    ldflags += [ "/DYNAMICBASE:NO" ]
+  } else {
+    ldflags += [ "/DYNAMICBASE" ]
+  }
+
+  # Delay loaded DLLs.
+  ldflags += [
+    "/DELAYLOAD:dbghelp.dll",
+    "/DELAYLOAD:dwmapi.dll",
+    "/DELAYLOAD:shell32.dll",
+    "/DELAYLOAD:uxtheme.dll",
+  ]
+}
+
+# Subsystem --------------------------------------------------------------------
+
+config("console") {
+  ldflags = [ "/SUBSYSTEM:CONSOLE" ]
+}
+config("windowed") {
+  ldflags = [ "/SUBSYSTEM:WINDOWS" ]
+}
+
+# Incremental linking ----------------------------------------------------------
+
+config("incremental_linking") {
+  ldflags = [ "/INCREMENTAL" ]
+}
+config("no_incremental_linking") {
+  ldflags = [ "/INCREMENTAL:NO" ]
+}
+
+# Character set ----------------------------------------------------------------
+
+# Not including this config means "ansi" (8-bit system codepage).
+config("unicode") {
+  defines = [
+    "_UNICODE",
+    "UNICODE",
+  ]
+}
+
+# Lean and mean ----------------------------------------------------------------
+
+# Some third party code might not compile with WIN32_LEAN_AND_MEAN so we have
+# to have a separate config for it. Remove this config from your target to
+# get the "bloaty and accomodating" version of windows.h.
+config("lean_and_mean") {
+  defines = [ "WIN32_LEAN_AND_MEAN" ]
+}
+
+# Nominmax --------------------------------------------------------------------
+
+# Some third party code defines NOMINMAX before including windows.h, which
+# then causes warnings when it's been previously defined on the command line.
+# For such targets, this config can be removed.
+
+config("nominmax") {
+  defines = [ "NOMINMAX" ]
+}
+
+# Zlib --------------------------------------------------------------------
+
+config("zlib") {
+  defines = [ "HAVE_ZLIB" ]
+}
+
+# glog --------------------------------------------------------------------
+
+config("glog") {
+  defines = [
+    "GOOGLE_GLOG_DLL_DECL=",
+    "GOOGLE_GLOG_IS_A_DLL=0",
+    "GOOGLE_GLOG_DLL_DECL_FOR_UNITTESTS=",
+    "GLOG_NO_ABBREVIATED_SEVERITIES",
+  ]
+}
+
+# rand_s --------------------------------------------------------------------
+
+# _CRT_RAND_S should be defined before use of rand_s.
+
+config("rand_s") {
+  defines = [ "_CRT_RAND_S" ]
+}
diff --git a/build/config/win/manifest.gni b/build/config/win/manifest.gni
new file mode 100644
index 0000000..c94a04c
--- /dev/null
+++ b/build/config/win/manifest.gni
@@ -0,0 +1,114 @@
+# Copied from Chromium's build/config/win.
+#
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# HOW MANIFESTS WORK IN THE GN BUILD
+#
+# Use the windows_manifest template to declare a manifest generation step.
+# This will combine all listed .manifest files. To link this manifest, just
+# depend on the manifest target from your executable or shared library.
+#
+# This will define an empty placeholder target on non-Windows platforms so
+# the manifest declarations and dependencies do not need to be inside of OS
+# conditionals.
+#
+# A binary can depend on only one manifest target, but the manifest target
+# can depend on many individual .manifest files which will be merged. As a
+# result, only executables and shared libraries should depend on manifest
+# targets. If you want to add a manifest to a component, put the dependency
+# behind a "if (is_component_build)" conditional.
+#
+# Generally you will just want the defaults for the Chrome build. In this case
+# the binary should just depend on one of the targets in //build/win/. There
+# are also individual manifest files in that directory you can reference via
+# the *_manifest variables defined below to pick and choose only some defaults.
+# You might combine these with a custom manifest file to get specific behavior.
+
+# Reference this manifest as a source from windows_manifest targets to get
+# the default Chrome OS compatibility list.
+default_compatibility_manifest = "//build/win/compatibility.manifest"
+
+# Reference this manifest as a source from windows_manifest targets to get
+# the default Chrome common constrols compatibility.
+common_controls_manifest = "//build/win/common_controls.manifest"
+
+# Reference this manifest to request that Windows not perform any elevation
+# when running your program. Otherwise, it might do some autodetection and
+# request elevated privileges from the user. This is normally what you want.
+as_invoker_manifest = "//build/win/as_invoker.manifest"
+
+# An alternative to as_invoker_manifest when you want the application to always
+# elevate.
+require_administrator_manifest = "//build/win/require_administrator.manifest"
+
+# Construct a target to combine the given manifest files into a .rc file.
+#
+# Variables for the windows_manifest template:
+#
+#   sources: (required)
+#     List of source .manifest files to add.
+#
+#   deps: (optional)
+#   visibility: (optional)
+#     Normal meaning.
+#
+# Example:
+#
+#   windows_manifest("doom_melon_manifest") {
+#     sources = [
+#       "doom_melon.manifest",   # Custom values in here.
+#       default_compatibility_manifest,  # Want the normal OS compat list.
+#     ]
+#   }
+#
+#   executable("doom_melon") {
+#     deps = [ ":doom_melon_manifest" ]
+#     ...
+#   }
+
+if (os == "win") {
+  template("windows_manifest") {
+    config_name = "${target_name}__config"
+    source_set_name = target_name
+
+    config(config_name) {
+      visibility = [ ":$source_set_name" ]
+      assert(defined(invoker.sources),
+             "\"sources\" must be defined for a windows_manifest target")
+      manifests = []
+      foreach(i, rebase_path(invoker.sources, root_build_dir)) {
+        manifests += [ "/manifestinput:" + i ]
+      }
+      ldflags = [
+                  "/manifest:embed",
+
+                  # We handle UAC by adding explicit .manifest files instead.
+                  "/manifestuac:no",
+                ] + manifests
+    }
+
+    # This source set only exists to add a dep on the invoker's deps and to
+    # add a public_config that sets ldflags on dependents.
+    source_set(source_set_name) {
+      forward_variables_from(invoker, [ "visibility" ])
+      public_configs = [ ":$config_name" ]
+
+      # Apply any dependencies from the invoker to this target, since those
+      # dependencies may have created the input manifest files.
+      forward_variables_from(invoker, [ "deps" ])
+    }
+  }
+} else {
+  # Make a no-op group on non-Windows platforms so windows_manifest
+  # instantiations don't need to be inside windows blocks.
+  template("windows_manifest") {
+    group(target_name) {
+      # Prevent unused variable warnings on non-Windows platforms.
+      assert(invoker.sources != "")
+      assert(!defined(invoker.deps) || invoker.deps != "")
+      assert(!defined(invoker.visibility) || invoker.visibility != "")
+    }
+  }
+}
diff --git a/build/config/win/visual_studio_version.gni b/build/config/win/visual_studio_version.gni
new file mode 100644
index 0000000..88a3c30
--- /dev/null
+++ b/build/config/win/visual_studio_version.gni
@@ -0,0 +1,40 @@
+# Copied from chromium's build/config/win/.
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+declare_args() {
+  # Path to Visual Studio. If empty, the default is used which is to use the
+  # automatic toolchain in depot_tools. If set, you must also set the
+  # visual_studio_version and wdk_path.
+  visual_studio_path = ""
+
+  # Version of Visual Studio pointed to by the visual_studio_path.
+  # Use "2013" for Visual Studio 2013, or "2013e" for the Express version.
+  visual_studio_version = ""
+
+  # Directory of the Windows driver kit. If visual_studio_path is empty, this
+  # will be auto-filled.
+  wdk_path = ""
+
+  # Full path to the Windows SDK, not including a backslash at the end.
+  # This value is the default location, override if you have a different
+  # installation location.
+  windows_sdk_path = "C:\Program Files (x86)\Windows Kits\10"
+}
+
+if (visual_studio_path == "") {
+  toolchain_data =
+      exec_script("../../vs_toolchain.py", [ "get_toolchain_dir" ], "scope")
+  visual_studio_path = toolchain_data.vs_path
+  windows_sdk_path = toolchain_data.sdk_path
+  visual_studio_version = toolchain_data.vs_version
+  wdk_path = toolchain_data.wdk_dir
+  visual_studio_runtime_dirs = toolchain_data.runtime_dirs
+} else {
+  assert(visual_studio_version != "",
+         "You must set the visual_studio_version if you set the path")
+  assert(wdk_path != "",
+         "You must set the wdk_path if you set the visual studio path")
+  visual_studio_runtime_dirs = []
+}
diff --git a/build/detect_host_arch.py b/build/detect_host_arch.py
new file mode 100755
index 0000000..8c0b14e
--- /dev/null
+++ b/build/detect_host_arch.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+#
+# Copied from chromium's build directory.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Outputs host CPU architecture in format recognized by gyp."""
+
+import platform
+import re
+import sys
+
+
+def HostArch():
+  """Returns the host architecture with a predictable string."""
+  host_arch = platform.machine()
+
+  # Convert machine type to format recognized by gyp.
+  if re.match(r'i.86', host_arch) or host_arch == 'i86pc':
+    host_arch = 'ia32'
+  elif host_arch in ['x86_64', 'amd64']:
+    host_arch = 'x64'
+  elif host_arch.startswith('arm'):
+    host_arch = 'arm'
+
+  # platform.machine is based on running kernel. It's possible to use 64-bit
+  # kernel with 32-bit userland, e.g. to give linker slightly more memory.
+  # Distinguish between different userland bitness by querying
+  # the python binary.
+  if host_arch == 'x64' and platform.architecture()[0] == '32bit':
+    host_arch = 'ia32'
+
+  return host_arch
+
+def DoMain(_):
+  """Hook to be called from gyp without starting a separate python
+  interpreter."""
+  return HostArch()
+
+if __name__ == '__main__':
+  print DoMain([])
diff --git a/build/download_gold_plugin.py b/build/download_gold_plugin.py
new file mode 100755
index 0000000..4a32bbd
--- /dev/null
+++ b/build/download_gold_plugin.py
@@ -0,0 +1,46 @@
+# This file is copied from chromium/src/download_gold_plugin.py
+#
+#!/usr/bin/env python
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Script to download LLVM gold plugin from google storage."""
+
+import find_depot_tools
+import json
+import os
+import shutil
+import subprocess
+import sys
+import zipfile
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+CHROME_SRC = os.path.abspath(os.path.join(SCRIPT_DIR, os.pardir))
+
+
+DEPOT_PATH = find_depot_tools.add_depot_tools_to_path()
+GSUTIL_PATH = os.path.join(DEPOT_PATH, 'gsutil.py')
+
+LLVM_BUILD_PATH = os.path.join(CHROME_SRC, 'third_party', 'llvm-build',
+                               'Release+Asserts')
+CLANG_UPDATE_PY = os.path.join(CHROME_SRC, 'tools', 'clang', 'scripts',
+                               'update.py')
+CLANG_REVISION = os.popen(CLANG_UPDATE_PY + ' --print-revision').read().rstrip()
+
+CLANG_BUCKET = 'gs://chromium-browser-clang/Linux_x64'
+
+def main():
+  targz_name = 'llvmgold-%s.tgz' % CLANG_REVISION
+  remote_path = '%s/%s' % (CLANG_BUCKET, targz_name)
+
+  os.chdir(LLVM_BUILD_PATH)
+
+  subprocess.check_call(['python', GSUTIL_PATH,
+                         'cp', remote_path, targz_name])
+  subprocess.check_call(['tar', 'xzf', targz_name])
+  os.remove(targz_name)
+  return 0
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/build/dump_app_syms.bat b/build/dump_app_syms.bat
new file mode 100644
index 0000000..1b551d1
--- /dev/null
+++ b/build/dump_app_syms.bat
@@ -0,0 +1,13 @@
+REM Copyright 2013 The Goma Authors. All rights reserved.

+REM Use of this source code is governed by a BSD-style license that can be

+REM found in the LICENSE file.

+

+REM dump_sym_app for Windows based on:

+REM https://chromium.googlesource.com/chromium/+/trunk/build/linux/dump_app_syms

+

+set DUMPSYMS_TMP="%1"

+set DUMPSYMS=%DUMPSYMS_TMP:/=\%

+set INFILE="%2"

+set OUTFILE="%3"

+

+%DUMPSYMS% %INFILE% > %OUTFILE%

diff --git a/build/find_depot_tools.py b/build/find_depot_tools.py
new file mode 100644
index 0000000..9abd524
--- /dev/null
+++ b/build/find_depot_tools.py
@@ -0,0 +1,46 @@
+# Copied from Chromium's tools/.
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Small utility function to find depot_tools and add it to the python path.
+
+Will throw an ImportError exception if depot_tools can't be found since it
+imports breakpad.
+"""
+
+import os
+import sys
+
+
+def IsRealDepotTools(path):
+  return os.path.isfile(os.path.join(path, 'gclient.py'))
+
+
+def add_depot_tools_to_path():
+  """Search for depot_tools and add it to sys.path."""
+  # First look if depot_tools is already in PYTHONPATH.
+  for i in sys.path:
+    if i.rstrip(os.sep).endswith('depot_tools') and IsRealDepotTools(i):
+      return i
+  # Then look if depot_tools is in PATH, common case.
+  for i in os.environ['PATH'].split(os.pathsep):
+    if IsRealDepotTools(i):
+      sys.path.append(i.rstrip(os.sep))
+      return i
+  # Rare case, it's not even in PATH, look upward up to root.
+  root_dir = os.path.dirname(os.path.abspath(__file__))
+  previous_dir = os.path.abspath(__file__)
+  while root_dir and root_dir != previous_dir:
+    i = os.path.join(root_dir, 'depot_tools')
+    if IsRealDepotTools(i):
+      sys.path.append(i)
+      return i
+    previous_dir = root_dir
+    root_dir = os.path.dirname(root_dir)
+  print >> sys.stderr, 'Failed to find depot_tools'
+  return None
+
+add_depot_tools_to_path()
+
+# pylint: disable=W0611
+import breakpad
diff --git a/build/generate_known_warnings_list.go b/build/generate_known_warnings_list.go
new file mode 100644
index 0000000..43ef0a1
--- /dev/null
+++ b/build/generate_known_warnings_list.go
@@ -0,0 +1,562 @@
+/* Generate known_warnings_options.cc from gcc documents.
+
+How to run:
+  $ go run generate_known_warnings_list.go > lib/known_warnings_options.h
+*/
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	"os"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+// Already known warnings. These warnings will be merged.
+var knownWarnings = []string{
+	"",
+	"address",
+	"aggregate-return",
+	"aligned-new",
+	"all",
+	"alloc-size-larger-than=",
+	"alloc-zero",
+	"alloca",
+	"alloca-larger-than=",
+	"array-bounds",
+	"array-bounds=",
+	"attribute-alias",
+	"bad-function-cast",
+	"bool-compare",
+	"bool-operation",
+	"c++-compat",
+	"c++11-compat",
+	"c++11-narrowing",
+	"c++14-compat",
+	"c++17-compat",
+	"c90-c99-compat",
+	"c99-c11-compat",
+	"cast-align",
+	"cast-align=",
+	"cast-qual",
+	"catch-value",
+	"catch-value=",
+	"char-subscripts",
+	"chkp",
+	"clobbered",
+	"comment",
+	"comments",
+	"conditionally-supported",
+	"conversion",
+	"covered-switch-default",
+	"dangling-else",
+	"date-time",
+	"declaration-after-statement",
+	"delete-incomplete",
+	"delete-non-virtual-dtor",
+	"deprecated",
+	"disabled-optimization",
+	"double-promotion",
+	"duplicate-decl-specifier",
+	"duplicated-branches",
+	"duplicated-cond",
+	"effc++",
+	"empty-body",
+	"endif-labels",
+	"enum-compare",
+	"error",
+	"error-implicit-function-declaration",
+	"error=",
+	"everything",
+	"exit-time-destructors",
+	"expansion-to-defined",
+	"extra",
+	"extra-semi",
+	"fatal-errors",
+	"float-conversion",
+	"float-equal",
+	"format",
+	"format-nonliteral",
+	"format-overflow",
+	"format-overflow=",
+	"format-security",
+	"format-signedness",
+	"format-truncation",
+	"format-truncation=",
+	"format-y2k",
+	"format=",
+	"frame-address",
+	"frame-larger-than",
+	"frame-larger-than=",
+	"global-constructors",
+	"header-hygiene",
+	"hsa",
+	"if-not-aligned",
+	"ignored-attributes",
+	"ignored-qualifiers",
+	"implicit",
+	"implicit-fallthrough",
+	"implicit-fallthrough=",
+	"implicit-function-declaration",
+	"implicit-int",
+	"inconsistent-missing-override",
+	"init-self",
+	"inline",
+	"int-in-bool-context",
+	"int-to-void-pointer-cast",
+	"invalid-memory-model",
+	"invalid-pch",
+	"jump-misses-init",
+	"larger-than=",
+	"logical-not-parentheses",
+	"logical-op",
+	"long-long",
+	"main",
+	"maybe-uninitialized",
+	"memset-elt-size",
+	"memset-transposed-args",
+	"misleading-indentation",
+	"missing-braces",
+	"missing-declarations",
+	"missing-field-initializers",
+	"missing-format-attribute",
+	"missing-include-dirs",
+	"missing-noreturn",
+	"missing-parameter-type",
+	"missing-prototypes",
+	"multistatement-macros",
+	"nested-externs",
+	"no-#pragma-messages",
+	"no-#warnings",
+	"no-abi",
+	"no-absolute-value",
+	"no-abstract-vbase-init",
+	"no-address-of-packed-member",
+	"no-aggressive-loop-optimizations",
+	"no-array-bounds",
+	"no-attributes",
+	"no-bitfield-width",
+	"no-bool-conversion",
+	"no-builtin-declaration-mismatch",
+	"no-builtin-macro-redefined",
+	"no-builtin-requires-header",
+	"no-c++11-compat",
+	"no-c++11-extensions",
+	"no-c++11-narrowing",
+	"no-c++98-compat",
+	"no-c++98-compat-pedantic",
+	"no-c99-extensions",
+	"no-cast-align",
+	"no-cast-qual",
+	"no-char-subscripts",
+	"no-comment",
+	"no-conditional-uninitialized",
+	"no-constant-conversion",
+	"no-constant-logical-operand",
+	"no-conversion",
+	"no-conversion-null",
+	"no-coverage-mismatch",
+	"no-covered-switch-default",
+	"no-cpp",
+	"no-dangling-else",
+	"no-delete-incomplete",
+	"no-delete-non-virtual-dtor",
+	"no-deprecated",
+	"no-deprecated-declarations",
+	"no-deprecated-register",
+	"no-designated-init",
+	"no-disabled-macro-expansion",
+	"no-discarded-array-qualifiers",
+	"no-discarded-qualifiers",
+	"no-div-by-zero",
+	"no-documentation",
+	"no-documentation-unknown-command",
+	"no-double-promotion",
+	"no-duplicate-decl-specifier",
+	"no-empty-body",
+	"no-endif-labels",
+	"no-enum-compare",
+	"no-enum-compare-switch",
+	"no-enum-conversion",
+	"no-error",
+	"no-error-sometimes-uninitialized",
+	"no-error-unused",
+	"no-exit-time-destructors",
+	"no-expansion-to-defined",
+	"no-extern-c-compat",
+	"no-extern-initializer",
+	"no-extra",
+	"no-extra-tokens",
+	"no-float-conversion",
+	"no-float-equal",
+	"no-for-loop-analysis",
+	"no-format",
+	"no-format-contains-nul",
+	"no-format-extra-args",
+	"no-format-nonliteral",
+	"no-format-pedantic",
+	"no-format-security",
+	"no-format-y2k",
+	"no-format-zero-length",
+	"no-four-char-constants",
+	"no-frame-larger-than",
+	"no-free-nonheap-object",
+	"no-gcc-compat",
+	"no-global-constructors",
+	"no-gnu-anonymous-struct",
+	"no-gnu-designator",
+	"no-gnu-variable-sized-type-not-at-end",
+	"no-gnu-zero-variadic-macro-arguments",
+	"no-header-guard",
+	"no-header-hygiene",
+	"no-ignored-attributes",
+	"no-ignored-qualifiers",
+	"no-implicit-exception-spec-mismatch",
+	"no-implicit-fallthrough",
+	"no-implicit-function-declaration",
+	"no-implicit-int",
+	"no-implicitly-unsigned-literal",
+	"no-import",
+	"no-incompatible-library-redeclaration",
+	"no-incompatible-pointer-types",
+	"no-incompatible-pointer-types-discards-qualifiers",
+	"no-inconsistent-dllimport",
+	"no-inconsistent-missing-override",
+	"no-inherited-variadic-ctor",
+	"no-initializer-overrides",
+	"no-inline-asm",
+	"no-inline-new-delete",
+	"no-int-conversion",
+	"no-int-to-pointer-cast",
+	"no-int-to-void-pointer-cast",
+	"no-invalid-noreturn",
+	"no-invalid-offsetof",
+	"no-literal-conversion",
+	"no-logical-op-parentheses",
+	"no-long-long",
+	"no-macro-redefined",
+	"no-max-unsigned-zero",
+	"no-maybe-uninitialized",
+	"no-microsoft-cast",
+	"no-microsoft-enum-forward-reference",
+	"no-microsoft-extra-qualification",
+	"no-microsoft-goto",
+	"no-microsoft-include",
+	"no-mismatched-tags",
+	"no-missing-braces",
+	"no-missing-field-initializers",
+	"no-missing-noescape",
+	"no-missing-noreturn",
+	"no-missing-prototypes",
+	"no-missing-variable-declarations",
+	"no-multichar",
+	"no-narrowing",
+	"no-nested-anon-types",
+	"no-newline-eof",
+	"no-non-literal-null-conversion",
+	"no-non-pod-varargs",
+	"no-non-virtual-dtor",
+	"no-nonnull",
+	"no-nonportable-include-path",
+	"no-null-conversion",
+	"no-null-dereference",
+	"no-null-pointer-arithmetic",
+	"no-nullability-completeness",
+	"no-objc-missing-property-synthesis",
+	"no-odr",
+	"no-old-style-cast",
+	"no-overflow",
+	"no-overloaded-virtual",
+	"no-override-init",
+	"no-padded",
+	"no-parentheses",
+	"no-parentheses-equality",
+	"no-pedantic",
+	"no-pedantic-ms-format",
+	"no-pessimizing-move",
+	"no-pointer-arith",
+	"no-pointer-bool-conversion",
+	"no-pointer-sign",
+	"no-pointer-to-int-cast",
+	"no-pragmas",
+	"no-psabi",
+	"no-reorder",
+	"no-reserved-id-macro",
+	"no-return-local-addr",
+	"no-return-type",
+	"no-scalar-storage-order",
+	"no-self-assign",
+	"no-semicolon-before-method-body",
+	"no-sequence-point",
+	"no-shadow",
+	"no-shadow-ivar",
+	"no-shift-count-overflow",
+	"no-shift-negative-value",
+	"no-shift-op-parentheses",
+	"no-shift-overflow",
+	"no-shift-sign-overflow",
+	"no-shorten-64-to-32",
+	"no-sign-compare",
+	"no-sign-conversion",
+	"no-sign-promo",
+	"no-signed-enum-bitfield",
+	"no-sizeof-pointer-memaccess",
+	"no-sometimes-uninitialized",
+	"no-strict-aliasing",
+	"no-strict-overflow",
+	"no-string-conversion",
+	"no-string-plus-int",
+	"no-switch",
+	"no-switch-enum",
+	"no-system-headers",
+	"no-tautological-compare",
+	"no-tautological-constant-compare",
+	"no-tautological-constant-out-of-range-compare",
+	"no-tautological-pointer-compare",
+	"no-tautological-undefined-compare",
+	"no-tautological-unsigned-enum-zero-compare",
+	"no-tautological-unsigned-zero-compare",
+	"no-thread-safety-analysis",
+	"no-thread-safety-negative",
+	"no-trigraphs",
+	"no-type-limits",
+	"no-typedef-redefinition",
+	"no-undeclared-selector",
+	"no-undef",
+	"no-undefined-bool-conversion",
+	"no-undefined-func-template",
+	"no-undefined-var-template",
+	"no-unguarded-availability",
+	"no-uninitialized",
+	"no-unknown-attributes",
+	"no-unknown-pragmas",
+	"no-unknown-warning-option",
+	"no-unnamed-type-template-args",
+	"no-unneeded-internal-declaration",
+	"no-unreachable-code",
+	"no-unreachable-code-break",
+	"no-unreachable-code-return",
+	"no-unused",
+	"no-unused-but-set-variable",
+	"no-unused-command-line-argument",
+	"no-unused-const-variable",
+	"no-unused-function",
+	"no-unused-label",
+	"no-unused-lambda-capture",
+	"no-unused-local-typedef",
+	"no-unused-local-typedefs",
+	"no-unused-macros",
+	"no-unused-member-function",
+	"no-unused-parameter",
+	"no-unused-private-field",
+	"no-unused-result",
+	"no-unused-template",
+	"no-unused-value",
+	"no-unused-variable",
+	"no-used-but-marked-unused",
+	"no-user-defined-warnings",
+	"no-varargs",
+	"no-variadic-macros",
+	"no-virtual-move-assign",
+	"no-vla",
+	"no-weak-vtables",
+	"no-writable-strings",
+	"no-write-strings",
+	"no-zero-as-null-pointer-constant",
+	"no-zero-length-array",
+	"non-virtual-dtor",
+	"nonnull",
+	"nonnull-compare",
+	"normalized=",
+	"null-dereference",
+	"objc-missing-property-synthesis",
+	"old-style-cast",
+	"old-style-declaration",
+	"old-style-definition",
+	"openmp-simd",
+	"overlength-strings",
+	"overloaded-virtual",
+	"override-init",
+	"override-init-side-effects",
+	"packed",
+	"packed-bitfield-compat",
+	"packed-not-aligned",
+	"padded",
+	"parentheses",
+	"partial-availability",
+	"pedantic",
+	"placement-new",
+	"placement-new=",
+	"pointer-arith",
+	"pointer-compare",
+	"pointer-sign",
+	"redundant-decls",
+	"restrict",
+	"return-type",
+	"sequence-point",
+	"shadow",
+	"shadow=",
+	"shift-count-negative",
+	"shift-count-overflow",
+	"shift-negative-value",
+	"shift-overflow",
+	"shift-overflow=",
+	"shorten-64-to-32",
+	"sign-compare",
+	"sign-conversion",
+	"sign-promo",
+	"sized-deallocation",
+	"sizeof-array-argument",
+	"sizeof-pointer-div",
+	"sizeof-pointer-memaccess",
+	"stack-protector",
+	"stack-usage",
+	"stack-usage=",
+	"strict-aliasing",
+	"strict-aliasing=",
+	"strict-overflow",
+	"strict-overflow=",
+	"strict-prototypes",
+	"string-conversion",
+	"stringop-overflow",
+	"stringop-overflow=",
+	"stringop-truncation",
+	"subobject-linkage",
+	"suggest-attribute=",
+	"suggest-final-methods",
+	"suggest-final-types",
+	"suggest-override",
+	"switch",
+	"switch-bool",
+	"switch-default",
+	"switch-enum",
+	"switch-unreachable",
+	"sync-nand",
+	"system-headers",
+	"tautological-compare",
+	"tautological-constant-out-of-range-compare",
+	"tautological-overlap-compare",
+	"tautological-unsigned-zero-compare",
+	"thread-safety",
+	"thread-safety-negative",
+	"traditional",
+	"traditional-conversion",
+	"trampolines",
+	"trigraphs",
+	"type-limits",
+	"undeclared-selector",
+	"undef",
+	"unguarded-availability",
+	"uninitialized",
+	"unknown-pragmas",
+	"unreachable-code",
+	"unreachable-code-break",
+	"unreachable-code-return",
+	"unsafe-loop-optimizations",
+	"unsuffixed-float-constants",
+	"unused",
+	"unused-but-set-parameter",
+	"unused-but-set-variable",
+	"unused-const-variable",
+	"unused-const-variable=",
+	"unused-function",
+	"unused-label",
+	"unused-lambda-capture",
+	"unused-local-typedefs",
+	"unused-macros",
+	"unused-parameter",
+	"unused-value",
+	"unused-variable",
+	"used-but-marked-unused",
+	"useless-cast",
+	"user-defined-warnings",
+	"varargs",
+	"variadic-macros",
+	"vector-operation-performance",
+	"vla",
+	"vla-larger-than=",
+	"volatile-register-var",
+	"write-strings",
+	"zero-as-null-pointer-constant",
+}
+
+// loadFromWeb reads gnu documents.
+func loadFromWeb() (string, error) {
+	resp, err := http.Get("https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html")
+	if err != nil {
+		return "", err
+	}
+	defer resp.Body.Close()
+
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return "", err
+	}
+
+	return string(body), nil
+}
+
+func main() {
+	body, err := loadFromWeb()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "failed to load from the web: %v", err)
+		os.Exit(1)
+	}
+
+	warnings := knownWarnings
+
+	codeRE := regexp.MustCompile(`(?m)<dt><code>-W(.*)</code></dt>`)
+	removeTagRE := regexp.MustCompile(`<.*>`)
+	for _, matched := range codeRE.FindAllStringSubmatch(body, -1) {
+		s := matched[1]
+		s = removeTagRE.ReplaceAllString(s, "")
+
+		// Remove after '='. Don't remove '='.
+		p := strings.Index(s, "=")
+		if p >= 0 {
+			s = s[:p+1]
+		}
+
+		// Remove spaces
+		s = strings.TrimSpace(s)
+
+		warnings = append(warnings, s)
+	}
+
+	sort.Strings(warnings)
+
+	// unique.
+	var uniqueWarnings []string
+	visited := make(map[string]bool)
+	for _, w := range warnings {
+		if visited[w] {
+			continue
+		}
+
+		visited[w] = true
+		uniqueWarnings = append(uniqueWarnings, w)
+	}
+
+	fmt.Print(`// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// This is auto generated by build/generate_known_warnings_list.go
+// DO NOT EDIT
+
+#ifndef DEVTOOLS_GOMA_LIB_KNOWN_WARNING_OPTIONS_H_
+#define DEVTOOLS_GOMA_LIB_KNOWN_WARNING_OPTIONS_H_
+
+namespace devtools_goma {
+const char* const kKnownWarningOptions[] {
+`)
+	for _, w := range uniqueWarnings {
+		fmt.Printf("  \"%s\",\n", w)
+	}
+	fmt.Print(`};
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_KNOWN_WARNING_OPTIONS_H_
+`)
+}
diff --git a/build/get_diff_from_previous.py b/build/get_diff_from_previous.py
new file mode 100755
index 0000000..9bc98af
--- /dev/null
+++ b/build/get_diff_from_previous.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+# Copyright 2016 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Script to show diff from previous commit."""
+
+import argparse
+import json
+import os
+import subprocess
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('-o', '--output-json',
+                      help=('A path to output filenames in JSON.'))
+  options = parser.parse_args()
+
+  git = 'git.bat' if os.name == 'nt' else 'git'
+  out = subprocess.check_output([git, 'diff', '--name-only', 'HEAD~1'],
+                                cwd=SCRIPT_DIR)
+
+  if options.output_json:
+    with open(options.output_json, 'w') as f:
+      result = out.splitlines()
+      json.dump(result, f)
+  print out
+
+
+if __name__ == '__main__':
+  main()
diff --git a/build/gn_helpers.py b/build/gn_helpers.py
new file mode 100644
index 0000000..9bd9865
--- /dev/null
+++ b/build/gn_helpers.py
@@ -0,0 +1,353 @@
+# Copied from chromium build/.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Helper functions useful when writing scripts that integrate with GN.
+
+The main functions are ToGNString and FromGNString which convert between
+serialized GN veriables and Python variables.
+
+To use in a random python file in the build:
+
+  import os
+  import sys
+
+  sys.path.append(os.path.join(os.path.dirname(__file__),
+                               os.pardir, os.pardir, "build"))
+  import gn_helpers
+
+Where the sequence of parameters to join is the relative path from your source
+file to the build directory."""
+
+class GNException(Exception):
+  pass
+
+
+def ToGNString(value, allow_dicts = True):
+  """Returns a stringified GN equivalent of the Python value.
+
+  allow_dicts indicates if this function will allow converting dictionaries
+  to GN scopes. This is only possible at the top level, you can't nest a
+  GN scope in a list, so this should be set to False for recursive calls."""
+  if isinstance(value, basestring):
+    if value.find('\n') >= 0:
+      raise GNException("Trying to print a string with a newline in it.")
+    return '"' + \
+        value.replace('\\', '\\\\').replace('"', '\\"').replace('$', '\\$') + \
+        '"'
+
+  if isinstance(value, unicode):
+    return ToGNString(value.encode('utf-8'))
+
+  if isinstance(value, bool):
+    if value:
+      return "true"
+    return "false"
+
+  if isinstance(value, list):
+    return '[ %s ]' % ', '.join(ToGNString(v) for v in value)
+
+  if isinstance(value, dict):
+    if not allow_dicts:
+      raise GNException("Attempting to recursively print a dictionary.")
+    result = ""
+    for key in sorted(value):
+      if not isinstance(key, basestring):
+        raise GNException("Dictionary key is not a string.")
+      result += "%s = %s\n" % (key, ToGNString(value[key], False))
+    return result
+
+  if isinstance(value, int):
+    return str(value)
+
+  raise GNException("Unsupported type when printing to GN.")
+
+
+def FromGNString(input_string):
+  """Converts the input string from a GN serialized value to Python values.
+
+  For details on supported types see GNValueParser.Parse() below.
+
+  If your GN script did:
+    something = [ "file1", "file2" ]
+    args = [ "--values=$something" ]
+  The command line would look something like:
+    --values="[ \"file1\", \"file2\" ]"
+  Which when interpreted as a command line gives the value:
+    [ "file1", "file2" ]
+
+  You can parse this into a Python list using GN rules with:
+    input_values = FromGNValues(options.values)
+  Although the Python 'ast' module will parse many forms of such input, it
+  will not handle GN escaping properly, nor GN booleans. You should use this
+  function instead.
+
+
+  A NOTE ON STRING HANDLING:
+
+  If you just pass a string on the command line to your Python script, or use
+  string interpolation on a string variable, the strings will not be quoted:
+    str = "asdf"
+    args = [ str, "--value=$str" ]
+  Will yield the command line:
+    asdf --value=asdf
+  The unquoted asdf string will not be valid input to this function, which
+  accepts only quoted strings like GN scripts. In such cases, you can just use
+  the Python string literal directly.
+
+  The main use cases for this is for other types, in particular lists. When
+  using string interpolation on a list (as in the top example) the embedded
+  strings will be quoted and escaped according to GN rules so the list can be
+  re-parsed to get the same result."""
+  parser = GNValueParser(input_string)
+  return parser.Parse()
+
+
+def FromGNArgs(input_string):
+  """Converts a string with a bunch of gn arg assignments into a Python dict.
+
+  Given a whitespace-separated list of
+
+    <ident> = (integer | string | boolean | <list of the former>)
+
+  gn assignments, this returns a Python dict, i.e.:
+
+    FromGNArgs("foo=true\nbar=1\n") -> { 'foo': True, 'bar': 1 }.
+
+  Only simple types and lists supported; variables, structs, calls
+  and other, more complicated things are not.
+
+  This routine is meant to handle only the simple sorts of values that
+  arise in parsing --args.
+  """
+  parser = GNValueParser(input_string)
+  return parser.ParseArgs()
+
+
+def UnescapeGNString(value):
+  """Given a string with GN escaping, returns the unescaped string.
+
+  Be careful not to feed with input from a Python parsing function like
+  'ast' because it will do Python unescaping, which will be incorrect when
+  fed into the GN unescaper."""
+  result = ''
+  i = 0
+  while i < len(value):
+    if value[i] == '\\':
+      if i < len(value) - 1:
+        next_char = value[i + 1]
+        if next_char in ('$', '"', '\\'):
+          # These are the escaped characters GN supports.
+          result += next_char
+          i += 1
+        else:
+          # Any other backslash is a literal.
+          result += '\\'
+    else:
+      result += value[i]
+    i += 1
+  return result
+
+
+def _IsDigitOrMinus(char):
+  return char in "-0123456789"
+
+
+class GNValueParser(object):
+  """Duplicates GN parsing of values and converts to Python types.
+
+  Normally you would use the wrapper function FromGNValue() below.
+
+  If you expect input as a specific type, you can also call one of the Parse*
+  functions directly. All functions throw GNException on invalid input. """
+  def __init__(self, string):
+    self.input = string
+    self.cur = 0
+
+  def IsDone(self):
+    return self.cur == len(self.input)
+
+  def ConsumeWhitespace(self):
+    while not self.IsDone() and self.input[self.cur] in ' \t\n':
+      self.cur += 1
+
+  def Parse(self):
+    """Converts a string representing a printed GN value to the Python type.
+
+    See additional usage notes on FromGNString above.
+
+    - GN booleans ('true', 'false') will be converted to Python booleans.
+
+    - GN numbers ('123') will be converted to Python numbers.
+
+    - GN strings (double-quoted as in '"asdf"') will be converted to Python
+      strings with GN escaping rules. GN string interpolation (embedded
+      variables preceeded by $) are not supported and will be returned as
+      literals.
+
+    - GN lists ('[1, "asdf", 3]') will be converted to Python lists.
+
+    - GN scopes ('{ ... }') are not supported."""
+    result = self._ParseAllowTrailing()
+    self.ConsumeWhitespace()
+    if not self.IsDone():
+      raise GNException("Trailing input after parsing:\n  " +
+                        self.input[self.cur:])
+    return result
+
+  def ParseArgs(self):
+    """Converts a whitespace-separated list of ident=literals to a dict.
+
+    See additional usage notes on FromGNArgs, above.
+    """
+    d = {}
+
+    self.ConsumeWhitespace()
+    while not self.IsDone():
+      ident = self._ParseIdent()
+      self.ConsumeWhitespace()
+      if self.input[self.cur] != '=':
+        raise GNException("Unexpected token: " + self.input[self.cur:])
+      self.cur += 1
+      self.ConsumeWhitespace()
+      val = self._ParseAllowTrailing()
+      self.ConsumeWhitespace()
+      d[ident] = val
+
+    return d
+
+  def _ParseAllowTrailing(self):
+    """Internal version of Parse that doesn't check for trailing stuff."""
+    self.ConsumeWhitespace()
+    if self.IsDone():
+      raise GNException("Expected input to parse.")
+
+    next_char = self.input[self.cur]
+    if next_char == '[':
+      return self.ParseList()
+    elif _IsDigitOrMinus(next_char):
+      return self.ParseNumber()
+    elif next_char == '"':
+      return self.ParseString()
+    elif self._ConstantFollows('true'):
+      return True
+    elif self._ConstantFollows('false'):
+      return False
+    else:
+      raise GNException("Unexpected token: " + self.input[self.cur:])
+
+  def _ParseIdent(self):
+    ident = ''
+
+    next_char = self.input[self.cur]
+    if not next_char.isalpha() and not next_char=='_':
+      raise GNException("Expected an identifier: " + self.input[self.cur:])
+
+    ident += next_char
+    self.cur += 1
+
+    next_char = self.input[self.cur]
+    while next_char.isalpha() or next_char.isdigit() or next_char=='_':
+      ident += next_char
+      self.cur += 1
+      next_char = self.input[self.cur]
+
+    return ident
+
+  def ParseNumber(self):
+    self.ConsumeWhitespace()
+    if self.IsDone():
+      raise GNException('Expected number but got nothing.')
+
+    begin = self.cur
+
+    # The first character can include a negative sign.
+    if not self.IsDone() and _IsDigitOrMinus(self.input[self.cur]):
+      self.cur += 1
+    while not self.IsDone() and self.input[self.cur].isdigit():
+      self.cur += 1
+
+    number_string = self.input[begin:self.cur]
+    if not len(number_string) or number_string == '-':
+      raise GNException("Not a valid number.")
+    return int(number_string)
+
+  def ParseString(self):
+    self.ConsumeWhitespace()
+    if self.IsDone():
+      raise GNException('Expected string but got nothing.')
+
+    if self.input[self.cur] != '"':
+      raise GNException('Expected string beginning in a " but got:\n  ' +
+                        self.input[self.cur:])
+    self.cur += 1  # Skip over quote.
+
+    begin = self.cur
+    while not self.IsDone() and self.input[self.cur] != '"':
+      if self.input[self.cur] == '\\':
+        self.cur += 1  # Skip over the backslash.
+        if self.IsDone():
+          raise GNException("String ends in a backslash in:\n  " +
+                            self.input)
+      self.cur += 1
+
+    if self.IsDone():
+      raise GNException('Unterminated string:\n  ' + self.input[begin:])
+
+    end = self.cur
+    self.cur += 1  # Consume trailing ".
+
+    return UnescapeGNString(self.input[begin:end])
+
+  def ParseList(self):
+    self.ConsumeWhitespace()
+    if self.IsDone():
+      raise GNException('Expected list but got nothing.')
+
+    # Skip over opening '['.
+    if self.input[self.cur] != '[':
+      raise GNException("Expected [ for list but got:\n  " +
+                        self.input[self.cur:])
+    self.cur += 1
+    self.ConsumeWhitespace()
+    if self.IsDone():
+      raise GNException("Unterminated list:\n  " + self.input)
+
+    list_result = []
+    previous_had_trailing_comma = True
+    while not self.IsDone():
+      if self.input[self.cur] == ']':
+        self.cur += 1  # Skip over ']'.
+        return list_result
+
+      if not previous_had_trailing_comma:
+        raise GNException("List items not separated by comma.")
+
+      list_result += [ self._ParseAllowTrailing() ]
+      self.ConsumeWhitespace()
+      if self.IsDone():
+        break
+
+      # Consume comma if there is one.
+      previous_had_trailing_comma = self.input[self.cur] == ','
+      if previous_had_trailing_comma:
+        # Consume comma.
+        self.cur += 1
+        self.ConsumeWhitespace()
+
+    raise GNException("Unterminated list:\n  " + self.input)
+
+  def _ConstantFollows(self, constant):
+    """Returns true if the given constant follows immediately at the current
+    location in the input. If it does, the text is consumed and the function
+    returns true. Otherwise, returns false and the current position is
+    unchanged."""
+    end = self.cur + len(constant)
+    if end > len(self.input):
+      return False  # Not enough room.
+    if self.input[self.cur:end] == constant:
+      self.cur = end
+      return True
+    return False
diff --git a/build/gn_run_binary.py b/build/gn_run_binary.py
new file mode 100644
index 0000000..430e12b
--- /dev/null
+++ b/build/gn_run_binary.py
@@ -0,0 +1,24 @@
+# Copied from chromium build/.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Helper script for GN to run an arbitrary binary. See compiled_action.gni.
+
+Run with:
+  python gn_run_binary.py <binary_name> [args ...]
+"""
+
+import sys
+import subprocess
+
+# This script is designed to run binaries produced by the current build. We
+# always prefix it with "./" to avoid picking up system versions that might
+# also be on the path.
+path = './' + sys.argv[1]
+
+# The rest of the arguements are passed directly to the executable.
+args = [path] + sys.argv[2:]
+
+sys.exit(subprocess.call(args))
diff --git a/build/installer/.gitignore b/build/installer/.gitignore
new file mode 100644
index 0000000..c00df13
--- /dev/null
+++ b/build/installer/.gitignore
@@ -0,0 +1 @@
+*.deb
diff --git a/build/mac/find_sdk.py b/build/mac/find_sdk.py
new file mode 100755
index 0000000..fbc2d3a
--- /dev/null
+++ b/build/mac/find_sdk.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Note: copy of chromium's src/build/mac/find_sdk.py
+# Use of this source code is governed by a BSD-style license that can be
+# found in the chromium's LICENSE file.
+
+"""Prints the lowest locally available SDK version greater than or equal to a
+given minimum sdk version to standard output.
+
+Usage:
+  python find_sdk.py 10.6  # Ignores SDKs < 10.6
+"""
+
+import os
+import re
+import subprocess
+import sys
+
+
+from optparse import OptionParser
+
+
+def parse_version(version_str):
+  """'10.6' => [10, 6]"""
+  return map(int, re.findall(r'(\d+)', version_str))
+
+
+def main():
+  parser = OptionParser()
+  parser.add_option("--verify",
+                    action="store_true", dest="verify", default=False,
+                    help="return the sdk argument and warn if it doesn't exist")
+  parser.add_option("--sdk_path",
+                    action="store", type="string", dest="sdk_path", default="",
+                    help="user-specified SDK path; bypasses verification")
+  parser.add_option("--print_sdk_path",
+                    action="store_true", dest="print_sdk_path", default=False,
+                    help="Additionaly print the path the SDK (appears first).")
+  options, args = parser.parse_args()
+  if len(args) != 1:
+    parser.error('Please specify a minimum SDK version')
+  min_sdk_version = args[0]
+
+  job = subprocess.Popen(['xcode-select', '-print-path'],
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.STDOUT)
+  out, err = job.communicate()
+  if job.returncode != 0:
+    print >> sys.stderr, out
+    print >> sys.stderr, err
+    raise Exception(('Error %d running xcode-select, you might have to run '
+      '|sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer| '
+      'if you are using Xcode 4.') % job.returncode)
+  # The Developer folder moved in Xcode 4.3.
+  xcode43_sdk_path = os.path.join(
+      out.rstrip(), 'Platforms/MacOSX.platform/Developer/SDKs')
+  if os.path.isdir(xcode43_sdk_path):
+    sdk_dir = xcode43_sdk_path
+  else:
+    sdk_dir = os.path.join(out.rstrip(), 'SDKs')
+  sdks = [re.findall('^MacOSX(10\.\d+)\.sdk$', s) for s in os.listdir(sdk_dir)]
+  sdks = [s[0] for s in sdks if s]  # [['10.5'], ['10.6']] => ['10.5', '10.6']
+  sdks = [s for s in sdks  # ['10.5', '10.6'] => ['10.6']
+          if parse_version(s) >= parse_version(min_sdk_version)]
+  if not sdks:
+    raise Exception('No %s+ SDK found' % min_sdk_version)
+  best_sdk = sorted(sdks, key=parse_version)[0]
+
+  if options.verify and best_sdk != min_sdk_version and not options.sdk_path:
+    print >> sys.stderr, ''
+    print >> sys.stderr, '                                           vvvvvvv'
+    print >> sys.stderr, ''
+    print >> sys.stderr, \
+        'This build requires the %s SDK, but it was not found on your system.' \
+        % min_sdk_version
+    print >> sys.stderr, \
+        'Either install it, or explicitly set mac_sdk in your GYP_DEFINES.'
+    print >> sys.stderr, ''
+    print >> sys.stderr, '                                           ^^^^^^^'
+    print >> sys.stderr, ''
+    return min_sdk_version
+
+  if options.print_sdk_path:
+    print subprocess.check_output(['xcodebuild', '-version', '-sdk',
+                                   'macosx' + best_sdk, 'Path']).strip()
+
+  return best_sdk
+
+
+if __name__ == '__main__':
+  if sys.platform != 'darwin':
+    raise Exception("This script only runs on Mac")
+  print main()
+  sys.exit(0)
diff --git a/build/mac_toolchain.py b/build/mac_toolchain.py
new file mode 100755
index 0000000..428dcc9
--- /dev/null
+++ b/build/mac_toolchain.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+#
+# Copied from chromium's build directory.
+#
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Download necessary mac toolchain files under certain conditions.  If
+xcode-select is already set and points to an external folder
+(e.g. /Application/Xcode.app), this script only runs if the GYP_DEFINE
+|force_mac_toolchain| is set.  To override the values in
+|TOOLCHAIN_REVISION|-|TOOLCHAIN_SUB_REVISION| below, GYP_DEFINE
+mac_toolchain_revision can be used instead.
+
+This script will only run on machines if /usr/bin/xcodebuild and
+/usr/bin/xcode-select has been added to the sudoers list so the license can be
+accepted.
+
+Otherwise, user input would be required to complete the script.  Perhaps future
+versions can be modified to allow for user input on developer machines.
+"""
+
+import os
+import plistlib
+import shutil
+import subprocess
+import sys
+import tarfile
+import time
+import tempfile
+import urllib2
+
+# This can be changed after running /build/package_mac_toolchain.py.
+TOOLCHAIN_REVISION = '5B1008'
+TOOLCHAIN_SUB_REVISION = 2
+TOOLCHAIN_VERSION = '%s-%s' % (TOOLCHAIN_REVISION, TOOLCHAIN_SUB_REVISION)
+
+BASE_DIR = os.path.abspath(os.path.dirname(__file__))
+TOOLCHAIN_BUILD_DIR = os.path.join(BASE_DIR, 'mac_files', 'Xcode.app')
+STAMP_FILE = os.path.join(BASE_DIR, 'mac_files', 'toolchain_build_revision')
+TOOLCHAIN_URL = 'gs://chrome-mac-sdk/'
+
+
+def GetToolchainDirectory():
+  if sys.platform == 'darwin' and not UseLocalMacSDK():
+    return TOOLCHAIN_BUILD_DIR
+  else:
+    return None
+
+
+def SetToolchainEnvironment():
+  mac_toolchain_dir = GetToolchainDirectory()
+  if mac_toolchain_dir:
+    os.environ['DEVELOPER_DIR'] = mac_toolchain_dir
+
+
+def ReadStampFile():
+  """Return the contents of the stamp file, or '' if it doesn't exist."""
+  try:
+    with open(STAMP_FILE, 'r') as f:
+      return f.read().rstrip()
+  except IOError:
+    return ''
+
+
+def WriteStampFile(s):
+  """Write s to the stamp file."""
+  EnsureDirExists(os.path.dirname(STAMP_FILE))
+  with open(STAMP_FILE, 'w') as f:
+    f.write(s)
+    f.write('\n')
+
+
+def EnsureDirExists(path):
+  if not os.path.exists(path):
+    os.makedirs(path)
+
+
+def DownloadAndUnpack(url, output_dir):
+  """Decompresses |url| into a cleared |output_dir|."""
+  temp_name = tempfile.mktemp(prefix='mac_toolchain')
+  try:
+    print 'Downloading new toolchain.'
+    subprocess.check_call(['gsutil.py', 'cp', url, temp_name])
+    if os.path.exists(output_dir):
+      print 'Deleting old toolchain.'
+      shutil.rmtree(output_dir)
+    EnsureDirExists(output_dir)
+    print 'Unpacking new toolchain.'
+    tarfile.open(mode='r:gz', name=temp_name).extractall(path=output_dir)
+  finally:
+    if os.path.exists(temp_name):
+      os.unlink(temp_name)
+
+
+def CanAccessToolchainBucket():
+  """Checks whether the user has access to |TOOLCHAIN_URL|."""
+  proc = subprocess.Popen(['gsutil.py', 'ls', TOOLCHAIN_URL],
+                           stdout=subprocess.PIPE)
+  proc.communicate()
+  return proc.returncode == 0
+
+def LoadPlist(path):
+  """Loads Plist at |path| and returns it as a dictionary."""
+  fd, name = tempfile.mkstemp()
+  try:
+    subprocess.check_call(['plutil', '-convert', 'xml1', '-o', name, path])
+    with os.fdopen(fd, 'r') as f:
+      return plistlib.readPlist(f)
+  finally:
+    os.unlink(name)
+
+
+def AcceptLicense():
+  """Use xcodebuild to accept new toolchain license if necessary.  Don't accept
+  the license if a newer license has already been accepted. This only works if
+  xcodebuild and xcode-select are passwordless in sudoers."""
+
+  # Check old license
+  try:
+    target_license_plist_path = \
+        os.path.join(TOOLCHAIN_BUILD_DIR,
+                     *['Contents','Resources','LicenseInfo.plist'])
+    target_license_plist = LoadPlist(target_license_plist_path)
+    build_type = target_license_plist['licenseType']
+    build_version = target_license_plist['licenseID']
+
+    accepted_license_plist = LoadPlist(
+        '/Library/Preferences/com.apple.dt.Xcode.plist')
+    agreed_to_key = 'IDELast%sLicenseAgreedTo' % build_type
+    last_license_agreed_to = accepted_license_plist[agreed_to_key]
+
+    # Historically all Xcode build numbers have been in the format of AANNNN, so
+    # a simple string compare works.  If Xcode's build numbers change this may
+    # need a more complex compare.
+    if build_version <= last_license_agreed_to:
+      # Don't accept the license of older toolchain builds, this will break the
+      # license of newer builds.
+      return
+  except (subprocess.CalledProcessError, KeyError):
+    # If there's never been a license of type |build_type| accepted,
+    # |target_license_plist_path| or |agreed_to_key| may not exist.
+    pass
+
+  print "Accepting license."
+  old_path = subprocess.Popen(['/usr/bin/xcode-select', '-p'],
+                               stdout=subprocess.PIPE).communicate()[0].strip()
+  try:
+    build_dir = os.path.join(TOOLCHAIN_BUILD_DIR, 'Contents/Developer')
+    subprocess.check_call(['sudo', '/usr/bin/xcode-select', '-s', build_dir])
+    subprocess.check_call(['sudo', '/usr/bin/xcodebuild', '-license', 'accept'])
+  finally:
+    subprocess.check_call(['sudo', '/usr/bin/xcode-select', '-s', old_path])
+
+
+def UseLocalMacSDK():
+  force_pull = os.environ.has_key('FORCE_MAC_TOOLCHAIN')
+
+  # Don't update the toolchain if there's already one installed outside of the
+  # expected location for a Chromium mac toolchain, unless |force_pull| is set.
+  proc = subprocess.Popen(['xcode-select', '-p'], stdout=subprocess.PIPE)
+  xcode_select_dir = proc.communicate()[0]
+  rc = proc.returncode
+  return (not force_pull and rc == 0 and
+          TOOLCHAIN_BUILD_DIR not in xcode_select_dir)
+
+
+def main():
+  if sys.platform != 'darwin':
+    return 0
+
+  # TODO: Add support for GN per crbug.com/570091
+  if UseLocalMacSDK():
+    print 'Using local toolchain.'
+    return 0
+
+  toolchain_revision = os.environ.get('MAC_TOOLCHAIN_REVISION',
+                                      TOOLCHAIN_VERSION)
+  if ReadStampFile() == toolchain_revision:
+    print 'Toolchain (%s) is already up to date.' % toolchain_revision
+    AcceptLicense()
+    return 0
+
+  if not CanAccessToolchainBucket():
+    print 'Cannot access toolchain bucket.'
+    return 0
+
+  # Reset the stamp file in case the build is unsuccessful.
+  WriteStampFile('')
+
+  toolchain_file = '%s.tgz' % toolchain_revision
+  toolchain_full_url = TOOLCHAIN_URL + toolchain_file
+
+  print 'Updating toolchain to %s...' % toolchain_revision
+  try:
+    toolchain_file = 'toolchain-%s.tgz' % toolchain_revision
+    toolchain_full_url = TOOLCHAIN_URL + toolchain_file
+    DownloadAndUnpack(toolchain_full_url, TOOLCHAIN_BUILD_DIR)
+    AcceptLicense()
+
+    print 'Toolchain %s unpacked.' % toolchain_revision
+    WriteStampFile(toolchain_revision)
+    return 0
+  except Exception as e:
+    print 'Failed to download toolchain %s.' % toolchain_file
+    print 'Exception %s' % e
+    print 'Exiting.'
+    return 1
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/build/run_gn.py b/build/run_gn.py
new file mode 100755
index 0000000..9899a12
--- /dev/null
+++ b/build/run_gn.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# Copyright 2016 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Wrapper script to run gn in this directory."""
+
+import os
+import sys
+import subprocess
+
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def GetDepotToolsPath():
+  """Returns path to depot_tools."""
+  paths = os.environ.get('PATH', '').split(os.path.pathsep)
+  for path in paths:
+    if os.path.basename(path) == 'depot_tools':
+      return path
+
+
+def main(args):
+  depot_tools_path = GetDepotToolsPath()
+  if not depot_tools_path:
+    raise Exception('depot_tools path not found in PATH')
+  subprocess.check_call(
+      [sys.executable, os.path.join(depot_tools_path, 'gn.py')] + args[1:],
+      cwd=_SCRIPT_DIR)
+
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv))
diff --git a/build/run_pylint.py b/build/run_pylint.py
new file mode 100644
index 0000000..4c7fa49
--- /dev/null
+++ b/build/run_pylint.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Script for running pylint for python scripts."""
+
+
+
+import glob
+import os
+import subprocess
+import sys
+
+
+class FileNotFoundError(Exception):
+  pass
+
+
+def FindPath(target, paths):
+  """Finds target from paths
+
+  Args:
+    target: a filename
+    paths: a list of paths.
+  Returns:
+    a path name of the target
+  """
+  for path in paths:
+    fname = os.path.join(path, target)
+    if os.path.exists(fname):
+      return fname
+  raise FileNotFoundError('%s not found' % target)
+
+
+def main():
+  result = []
+  err = 0
+  pylint = FindPath('pylint.py', os.environ.get('PATH', '').split(os.pathsep))
+  base_dir = os.path.dirname(os.path.abspath(__file__))
+  os.chdir(os.path.join(base_dir, '..'))
+
+  print 'run %s */*.py at %s' % (pylint, os.path.abspath('.'))
+
+  # depot_tools/pylint set this env.
+  os.environ['PYTHONDONTWRITEBYTECODE'] = '1'
+
+  for py in glob.iglob(os.path.join('*', '*.py')):
+    proc = subprocess.Popen([sys.executable, pylint, py],
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.STDOUT)
+    output = proc.communicate()[0]
+    print 'pylint %s...' % py
+    print output
+    if proc.returncode:
+      # pylint --long-help shows pylint output status code, which is bit-ORed
+      err_type = []
+      is_test = py.endswith('_test.py')
+      if is_test:
+        err_type.append('TEST')
+      if proc.returncode & 1:
+        err_type.append('fatal')
+      if proc.returncode & 2:
+        err_type.append('error')
+      if proc.returncode & 4:
+        err_type.append('warning')
+      if proc.returncode & 8:
+        err_type.append('refactor')
+      if proc.returncode & 16:
+        err_type.append('convention')
+      if proc.returncode & 32:
+        err_type.append('usage')
+      result.append('%s: %s' % (','.join(err_type), py))
+      if not is_test:
+        err |= proc.returncode
+    else:
+      result.append('OK: %s' % py)
+    print
+  print 'run_pylint.py results:'
+  for r in result:
+    print r
+  # failed for fatal/error messages were issued.
+  return err & 3
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/build/run_unittest.py b/build/run_unittest.py
new file mode 100755
index 0000000..7a75a7b
--- /dev/null
+++ b/build/run_unittest.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Script for running all goma unit tests.
+Use -h to see its usage.
+"""
+
+
+
+import getopt
+import os
+import subprocess
+import sys
+
+script_dir = os.path.dirname(__file__)
+script_absdir = os.path.abspath(script_dir)
+client_absdir = os.path.abspath(os.path.join(script_dir, '..'))
+is_windows = (sys.platform == 'cygwin' or sys.platform.startswith('win'))
+is_mac = sys.platform == 'darwin'
+
+TEST_CASES = [
+  ("lib", [
+    "cmdline_parser_unittest",
+    "compress_util_unittest",
+    "compiler_flags_test",
+    "execreq_normalizer_unittest",
+    "execreq_verifier_unittest",
+    "file_reader_unittest",
+    "flag_parser_unittest",
+    "goma_file_unittest",
+    "goma_hash_unittest",
+    "join_unittest",
+    "lockhelper_unittest",
+    "path_resolver_unittest",
+    "path_unittest",
+    "path_util_unittest",
+    "string_piece_unittest",
+    "string_piece_utils_unittest",
+    "strutil_unittest",
+    ]
+  ),
+  ("client", [
+    "arfile_reader_unittest",
+    "atomic_stats_counter_unittest",
+    "base64_unittest",
+    "callback_unittest",
+    "compilation_database_reader_unittest",
+    "compile_task_unittest",
+    "compiler_info_cache_unittest",
+    "compiler_info_unittest",
+    "content_cursor_unittest",
+    "cpp_parser_unittest",
+    "cpp_tokenizer_unittest",
+    "cpu_unittest",
+    "deps_cache_unittest",
+    "directive_filter_unittest",
+    "env_flags_unittest",
+    "filename_id_table_unittest",
+    "goma_ipc_unittest",
+    "gomacc_argv_unittest",
+    "hash_rewrite_parser_unittest",
+    "histogram_unittest",
+    "http_rpc_unittest",
+    "http_unittest",
+    "include_cache_unittest",
+    "include_file_utils_unittest",
+    "include_processor_unittest",
+    "ioutil_unittest",
+    "jar_parser_unittest",
+    "jarfile_reader_unittest",
+    "jwt_unittest",
+    "linked_unordered_map_unittest",
+    "linker_script_parser_unittest",
+    "local_output_cache_unittest",
+    "log_cleaner_unittest",
+    "luci_context_unittest",
+    "machine_info_unittest",
+    "mypath_unittest",
+    "oauth2_unittest",
+    "openssl_engine_unittest",
+    "rand_util_unittest",
+    "simple_timer_unittest",
+    "split_unittest",
+    "static_darray_unittest",
+    "subprocess_task_unittest",
+    "threadpool_http_server_unittest",
+    "trustedipsmanager_unittest",
+    "util_unittest",
+    "worker_thread_manager_unittest",
+    "worker_thread_unittest",
+    ]
+  ),
+]
+
+if is_mac:
+  TEST_CASES[1][1].append('mac_version_unittest')
+
+if is_windows:
+  TEST_CASES[0][1].append('socket_helper_win_unittest')
+  TEST_CASES[1][1].append('named_pipe_client_win_unittest')
+  TEST_CASES[1][1].append('named_pipe_server_win_unittest')
+  TEST_CASES[1][1].append('posix_helper_win_unittest')
+  TEST_CASES[1][1].append('spawner_win_unittest')
+else:
+  TEST_CASES[1][1].append('arfile_unittest')
+  TEST_CASES[1][1].append('compiler_flags_util_unittest')
+  TEST_CASES[1][1].append('linker_input_processor_unittest')
+  if sys.platform.startswith('linux'):
+    TEST_CASES[1][1].append('elf_parser_unittest')
+    TEST_CASES[1][1].append('library_path_resolver_unittest')
+    TEST_CASES[1][1].append('goma-make_unittest')
+
+
+class TestError(Exception):
+  pass
+
+
+def Usage():
+  sys.stdout.write("Usage: python run_unittest.py [options]\n")
+  sys.stdout.write("--build-dir=<path>  output folder\n")
+  sys.stdout.write("--target=<Release (Default)|(or any)>  "
+                   "build config in output folder to test\n")
+  sys.stdout.write("--test-cases=<all (default)|lib|client>  "
+                   "test cases to run\n")
+  sys.stdout.write("-n, --non-stop  "
+                   "do not stop when errors occur in test cases\n")
+  sys.stdout.write("-h, --help  display Usage\n")
+  sys.exit(2)
+
+
+def SetupClang():
+  clang_path = os.path.join(client_absdir, 'third_party', 'llvm-build',
+                            'Release+Asserts', 'bin', 'clang')
+  if subprocess.call([clang_path, "-v"]) == 0:
+    os.environ['GOMATEST_CLANG_PATH'] = clang_path
+    print 'GOMATEST_CLANG_PATH=' + os.environ['GOMATEST_CLANG_PATH']
+  else:
+    print 'clang is not runnable here. disable clang test'
+
+
+def RunTest(build_dir, target, case_opt, non_stop):
+  tests_passed = 0
+  expected_passes = 0
+  failed_tests = []
+
+  config_dir = os.path.join(build_dir, target)
+  try:
+    os.chdir(config_dir)
+  except OSError:
+    sys.stdout.write("\nERROR: folder not found: " + target)
+    return (tests_passed, expected_passes, failed_tests)
+
+  for case_key, case_names in TEST_CASES:
+    if case_opt != "all" and case_opt != case_key:
+      continue
+    expected_passes += len(case_names)
+    for case in case_names:
+      try:
+        sys.stdout.write("\nINFO: <" + target + "> case: " + case + "\n")
+        return_code = subprocess.call(os.path.join('.', case),
+                                      stdout=sys.stdout, stderr=sys.stderr)
+        if return_code != 0:
+          error_message = case + " failed"
+          raise TestError(error_message)
+        tests_passed += 1
+      except Exception, ex:
+        sys.stdout.write("\nERROR: " + str(ex))
+        failed_tests.append('target:' + target + ' test:' + case)
+        if not non_stop:
+          return (tests_passed, expected_passes, failed_tests)
+  return (tests_passed, expected_passes, failed_tests)
+
+
+def main():
+  # parse command line options
+  try:
+    opts, _ = getopt.getopt(sys.argv[1:], "nh", [
+        "build-dir=", "target=", "test-cases=", "non-stop", "help"
+    ])
+  except getopt.GetoptError, err:
+    # print help information and exit
+    sys.stdout.write(str(err) + "\n")
+    Usage()
+
+  build_dir = script_absdir
+  case_value = "all"
+  target_value = "Release"
+  non_stop = False
+  for key, value in opts:
+    if key == "--build-dir":
+      build_dir = value
+    elif key == "--target":
+      target_value = value
+    elif key == "--test-cases":
+      case_value = value
+    elif key == "--non-stop" or key == "-n":
+      non_stop = True
+    elif key == "--help" or key == "-h":
+      Usage()
+    else:
+      sys.stderr.write('Unknown option:' + key)
+      Usage()
+
+  if not is_windows:
+    SetupClang()
+
+  passed, expected, failed_tests = RunTest(
+      build_dir, target_value, case_value, non_stop)
+  sys.stdout.write("\nINFO: Total tests passed: " + str(passed) +
+                   " expected: " + str(expected) + "\n")
+  if passed != expected:
+    sys.stdout.write("ERROR: Test failed\n")
+    for failed in failed_tests:
+      sys.stdout.write(" " + failed + "\n")
+    sys.exit(1)
+  else:
+    sys.stdout.write("INFO: All tests passed\n")
+
+if __name__ == "__main__":
+  main()
diff --git a/build/sanitizers/README b/build/sanitizers/README
new file mode 100644
index 0000000..c557720
--- /dev/null
+++ b/build/sanitizers/README
@@ -0,0 +1 @@
+Files in this directory are subset of chromium's build/sanitizer/.
diff --git a/build/sanitizers/asan_suppressions.cc b/build/sanitizers/asan_suppressions.cc
new file mode 100644
index 0000000..df94bc8
--- /dev/null
+++ b/build/sanitizers/asan_suppressions.cc
@@ -0,0 +1,23 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This file contains the default suppressions for AddressSanitizer.
+// It should only be used under very limited circumstances such as suppressing
+// a report caused by an interceptor call in a system-installed library.
+
+#if defined(ADDRESS_SANITIZER)
+
+// Please make sure the code below declares a single string variable
+// kASanDefaultSuppressions which contains ASan suppressions delimited by
+// newlines.
+char kASanDefaultSuppressions[] =
+// http://crbug.com/178677
+"interceptor_via_lib:libsqlite3.so\n"
+
+// PLEASE READ ABOVE BEFORE ADDING NEW SUPPRESSIONS.
+
+// End of suppressions.
+;  // Please keep this semicolon.
+
+#endif  // ADDRESS_SANITIZER
diff --git a/build/sanitizers/sanitizer_options.cc b/build/sanitizers/sanitizer_options.cc
new file mode 100644
index 0000000..0f0a24f
--- /dev/null
+++ b/build/sanitizers/sanitizer_options.cc
@@ -0,0 +1,175 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// This file contains the default options for various compiler-based dynamic
+// tools.
+
+#if defined(ADDRESS_SANITIZER) && defined(__MACH__)
+#include <crt_externs.h>  // for _NSGetArgc, _NSGetArgv
+#include <string.h>
+#endif  // ADDRESS_SANITIZER && OS_MACOSX
+
+#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) ||  \
+    defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER) || \
+    defined(UNDEFINED_SANITIZER)
+// Functions returning default options are declared weak in the tools' runtime
+// libraries. To make the linker pick the strong replacements for those
+// functions from this module, we explicitly force its inclusion by passing
+// -Wl,-u_sanitizer_options_link_helper
+extern "C"
+void _sanitizer_options_link_helper() { }
+
+// The callbacks we define here will be called from the sanitizer runtime, but
+// aren't referenced from the Chrome executable. We must ensure that those
+// callbacks are not sanitizer-instrumented, and that they aren't stripped by
+// the linker.
+#define SANITIZER_HOOK_ATTRIBUTE                                           \
+  extern "C"                                                               \
+  __attribute__((no_sanitize("address", "memory", "thread", "undefined"))) \
+  __attribute__((visibility("default")))                                   \
+  __attribute__((used))
+#endif
+
+#if defined(ADDRESS_SANITIZER)
+// Default options for AddressSanitizer in various configurations:
+//   malloc_context_size=5 - limit the size of stack traces collected by ASan
+//     for each malloc/free by 5 frames. These stack traces tend to accumulate
+//     very fast in applications using JIT (v8 in Chrome's case), see
+//     https://code.google.com/p/address-sanitizer/issues/detail?id=177
+//   symbolize=1 - enable in-process symbolization.
+//   legacy_pthread_cond=1 - run in the libpthread 2.2.5 compatibility mode to
+//     work around libGL.so using the obsolete API, see
+//     http://crbug.com/341805. This may break if pthread_cond_t objects are
+//     accessed by both instrumented and non-instrumented binaries (e.g. if
+//     they reside in shared memory). This option is going to be deprecated in
+//     upstream AddressSanitizer and must not be used anywhere except the
+//     official builds.
+//   check_printf=1 - check the memory accesses to printf (and other formatted
+//     output routines) arguments.
+//   use_sigaltstack=1 - handle signals on an alternate signal stack. Useful
+//     for stack overflow detection.
+//   strip_path_prefix=/../../ - prefixes up to and including this
+//     substring will be stripped from source file paths in symbolized reports
+//   fast_unwind_on_fatal=1 - use the fast (frame-pointer-based) stack unwinder
+//     to print error reports. V8 doesn't generate debug info for the JIT code,
+//     so the slow unwinder may not work properly.
+//   detect_stack_use_after_return=1 - use fake stack to delay the reuse of
+//     stack allocations and detect stack-use-after-return errors.
+#if defined(__linux__)
+// Default AddressSanitizer options for buildbots and non-official builds.
+const char *kAsanDefaultOptions =
+    "symbolize=1 check_printf=1 use_sigaltstack=1 "
+    "detect_leaks=0 strip_path_prefix=/../../ fast_unwind_on_fatal=1 "
+    "detect_stack_use_after_return=1 ";
+
+#elif defined(__MACH__)
+const char *kAsanDefaultOptions =
+    "check_printf=1 use_sigaltstack=1 "
+    "strip_path_prefix=/../../ fast_unwind_on_fatal=1 "
+    "detect_stack_use_after_return=1 detect_odr_violation=0 ";
+static const char kNaClDefaultOptions[] = "handle_segv=0";
+static const char kNaClFlag[] = "--type=nacl-loader";
+#endif  // __linux__
+
+#if defined(__linux__) || defined(__MACH__)
+SANITIZER_HOOK_ATTRIBUTE const char *__asan_default_options() {
+#if defined(__MACH__)
+  char*** argvp = _NSGetArgv();
+  int* argcp = _NSGetArgc();
+  if (!argvp || !argcp) return kAsanDefaultOptions;
+  char** argv = *argvp;
+  int argc = *argcp;
+  for (int i = 0; i < argc; ++i) {
+    if (strcmp(argv[i], kNaClFlag) == 0) {
+      return kNaClDefaultOptions;
+    }
+  }
+#endif  // __MACH__
+  return kAsanDefaultOptions;
+}
+
+extern "C" char kASanDefaultSuppressions[];
+
+SANITIZER_HOOK_ATTRIBUTE const char *__asan_default_suppressions() {
+  return kASanDefaultSuppressions;
+}
+#endif  // __linux__ || __MACH__
+#endif  // ADDRESS_SANITIZER
+
+#if defined(THREAD_SANITIZER) && defined(__linux__)
+// Default options for ThreadSanitizer in various configurations:
+//   detect_deadlocks=1 - enable deadlock (lock inversion) detection.
+//   second_deadlock_stack=1 - more verbose deadlock reports.
+//   report_signal_unsafe=0 - do not report async-signal-unsafe functions
+//     called from signal handlers.
+//   report_thread_leaks=0 - do not report unjoined threads at the end of
+//     the program execution.
+//   print_suppressions=1 - print the list of matched suppressions.
+//   history_size=7 - make the history buffer proportional to 2^7 (the maximum
+//     value) to keep more stack traces.
+//   strip_path_prefix=/../../ - prefixes up to and including this
+//     substring will be stripped from source file paths in symbolized reports.
+const char kTsanDefaultOptions[] =
+    "detect_deadlocks=1 second_deadlock_stack=1 report_signal_unsafe=0 "
+    "report_thread_leaks=0 print_suppressions=1 history_size=7 "
+    "strict_memcmp=0 strip_path_prefix=/../../ ";
+
+SANITIZER_HOOK_ATTRIBUTE const char *__tsan_default_options() {
+  return kTsanDefaultOptions;
+}
+
+extern "C" char kTSanDefaultSuppressions[];
+
+SANITIZER_HOOK_ATTRIBUTE const char *__tsan_default_suppressions() {
+  return kTSanDefaultSuppressions;
+}
+
+#endif  // THREAD_SANITIZER && __linux__
+
+#if defined(MEMORY_SANITIZER)
+// Default options for MemorySanitizer:
+//   intercept_memcmp=0 - do not detect uninitialized memory in memcmp() calls.
+//     Pending cleanup, see http://crbug.com/523428
+//   strip_path_prefix=/../../ - prefixes up to and including this
+//     substring will be stripped from source file paths in symbolized reports.
+const char kMsanDefaultOptions[] =
+    "intercept_memcmp=0 strip_path_prefix=/../../ ";
+
+SANITIZER_HOOK_ATTRIBUTE const char *__msan_default_options() {
+  return kMsanDefaultOptions;
+}
+
+#endif  // MEMORY_SANITIZER
+
+#if defined(LEAK_SANITIZER)
+// Default options for LeakSanitizer:
+//   print_suppressions=1 - print the list of matched suppressions.
+//   strip_path_prefix=/../../ - prefixes up to and including this
+//     substring will be stripped from source file paths in symbolized reports.
+const char kLsanDefaultOptions[] =
+    "print_suppressions=1 strip_path_prefix=/../../ ";
+
+SANITIZER_HOOK_ATTRIBUTE const char *__lsan_default_options() {
+  return kLsanDefaultOptions;
+}
+
+extern "C" char kLSanDefaultSuppressions[];
+
+SANITIZER_HOOK_ATTRIBUTE const char *__lsan_default_suppressions() {
+  return kLSanDefaultSuppressions;
+}
+
+#endif  // LEAK_SANITIZER
+
+#if defined(UNDEFINED_SANITIZER)
+// Default options for UndefinedBehaviorSanitizer:
+//   print_stacktrace=1 - print the stacktrace when UBSan reports an error.
+const char kUbsanDefaultOptions[] =
+    "print_stacktrace=1 strip_path_prefix=/../../ ";
+
+SANITIZER_HOOK_ATTRIBUTE const char* __ubsan_default_options() {
+  return kUbsanDefaultOptions;
+}
+
+#endif  // UNDEFINED_SANITIZER
diff --git a/build/sanitizers/tsan_suppressions.cc b/build/sanitizers/tsan_suppressions.cc
new file mode 100644
index 0000000..d239416
--- /dev/null
+++ b/build/sanitizers/tsan_suppressions.cc
@@ -0,0 +1,21 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This file contains the default suppressions for ThreadSanitizer.
+// It should only be used under very limited circumstances such as suppressing
+// a report caused by an interceptor call in a system-installed library.
+
+#if defined(THREAD_SANITIZER)
+
+// Please make sure the code below declares a single string variable
+// kTSanDefaultSuppressions which contains TSan suppressions delimited by
+// newlines.
+char kTSanDefaultSuppressions[] =
+  "race:third_party/glog/src/vlog_is_on.cc\n"
+  "race:third_party/glog/src/raw_logging.cc\n"
+  // PLEASE READ ABOVE BEFORE ADDING NEW SUPPRESSIONS.
+  // End of suppressions.
+;  // Please keep this semicolon.
+
+#endif  // THREAD_SANITIZER
diff --git a/build/store_git_revision.py b/build/store_git_revision.py
new file mode 100755
index 0000000..cc8fcb6
--- /dev/null
+++ b/build/store_git_revision.py
@@ -0,0 +1,45 @@
+#!/usr/bin/python
+#
+# Copyright 2015 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Get compiler_proxy git revision."""
+
+import argparse
+import os
+import subprocess
+
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def GetRevisionNumber(args):
+  """Obtain a number to represent revision of source code.
+
+  Args:
+    args: an instance of argparse.namespace.
+  """
+  # <commit hash>@<committer date unix timestamp>
+  git_hash = subprocess.check_output(
+      ['git', 'log', '-1', '--pretty=format:%H@%ct'],
+      cwd=_SCRIPT_DIR).strip()
+  if not git_hash:
+    print 'No git hash set. use unknown as fallback.'
+    git_hash = 'unknown'
+
+  if args.output_file:
+    with open(args.output_file, 'w') as f:
+      f.write(git_hash)
+  else:
+    print git_hash
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('-o', '--output-file', help='Output filename')
+  args = parser.parse_args()
+  GetRevisionNumber(args)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/build/toolchain/BUILD.gn b/build/toolchain/BUILD.gn
new file mode 100644
index 0000000..b3e45e6
--- /dev/null
+++ b/build/toolchain/BUILD.gn
@@ -0,0 +1,13 @@
+# Copied from Chromium's build/toolchain.
+#
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/toolchain/concurrent_links.gni")
+
+if (current_toolchain == default_toolchain) {
+  pool("link_pool") {
+    depth = concurrent_links
+  }
+}
diff --git a/build/toolchain/concurrent_links.gni b/build/toolchain/concurrent_links.gni
new file mode 100644
index 0000000..cee6d29
--- /dev/null
+++ b/build/toolchain/concurrent_links.gni
@@ -0,0 +1,32 @@
+# Copied from chormium build/toolchain and modified for goma.
+#
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This file should only be imported from files that define toolchains.
+# There's no way to enforce this exactly, but all toolchains are processed
+# in the context of the default_toolchain, so we can at least check for that.
+assert(current_toolchain == default_toolchain)
+
+declare_args() {
+  # Limit the number of concurrent links; we often want to run fewer
+  # links at once than we do compiles, because linking is memory-intensive.
+  # The default to use varies by platform and by the amount of memory
+  # available, so we call out to a script to get the right value.
+  concurrent_links = -1
+}
+
+if (concurrent_links == -1) {
+  if (os == "win") {
+    _args = [ "--mem_per_link_gb=1" ]
+  } else if (os == "mac") {
+    _args = [ "--mem_per_link_gb=4" ]
+  } else {
+    _args = []
+  }
+
+  # TODO Pass more build configuration info to the script
+  # so that we can compute better values.
+  concurrent_links = exec_script("get_concurrent_links.py", _args, "value")
+}
diff --git a/build/toolchain/gcc_ar_wrapper.py b/build/toolchain/gcc_ar_wrapper.py
new file mode 100755
index 0000000..c5c9304
--- /dev/null
+++ b/build/toolchain/gcc_ar_wrapper.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+# Copied from chromium build/toolchain.
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Runs the 'ar' command after removing its output file first.
+
+This script is invoked like:
+  python gcc_ar_wrapper.py --ar=$AR --output=$OUT $OP $INPUTS
+to do the equivalent of:
+  rm -f $OUT && $AR $OP $OUT $INPUTS
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+
+import wrapper_utils
+
+
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('--ar',
+                      required=True,
+                      help='The ar binary to run',
+                      metavar='PATH')
+  parser.add_argument('--output',
+                      required=True,
+                      help='Output archive file',
+                      metavar='ARCHIVE')
+  parser.add_argument('--plugin',
+                      help='Load plugin')
+  parser.add_argument('--resource-whitelist',
+                      help='Merge all resource whitelists into a single file.',
+                      metavar='PATH')
+  parser.add_argument('operation',
+                      help='Operation on the archive')
+  parser.add_argument('inputs', nargs='+',
+                      help='Input files')
+  args = parser.parse_args()
+
+  if args.resource_whitelist:
+    whitelist_candidates = wrapper_utils.ResolveRspLinks(args.inputs)
+    wrapper_utils.CombineResourceWhitelists(
+        whitelist_candidates, args.resource_whitelist)
+
+  command = [args.ar, args.operation]
+  if args.plugin is not None:
+    command += ['--plugin', args.plugin]
+  command.append(args.output)
+  command += args.inputs
+
+  # Remove the output file first.
+  try:
+    os.remove(args.output)
+  except OSError as e:
+    if e.errno != os.errno.ENOENT:
+      raise
+
+  # Now just run the ar command.
+  return subprocess.call(wrapper_utils.CommandToRun(command))
+
+
+if __name__ == "__main__":
+  sys.exit(main())
diff --git a/build/toolchain/gcc_solink_wrapper.py b/build/toolchain/gcc_solink_wrapper.py
new file mode 100755
index 0000000..b1b7c28
--- /dev/null
+++ b/build/toolchain/gcc_solink_wrapper.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+# Copied from chromium build/toolchain.
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Runs 'ld -shared' and generates a .TOC file that's untouched when unchanged.
+
+This script exists to avoid using complex shell commands in
+gcc_toolchain.gni's tool("solink"), in case the host running the compiler
+does not have a POSIX-like shell (e.g. Windows).
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+
+import wrapper_utils
+
+
+def CollectSONAME(args):
+  """Replaces: readelf -d $sofile | grep SONAME"""
+  toc = ''
+  readelf = subprocess.Popen(wrapper_utils.CommandToRun(
+      [args.readelf, '-d', args.sofile]), stdout=subprocess.PIPE, bufsize=-1)
+  for line in readelf.stdout:
+    if 'SONAME' in line:
+      toc += line
+  return readelf.wait(), toc
+
+
+def CollectDynSym(args):
+  """Replaces: nm --format=posix -g -D $sofile | cut -f1-2 -d' '"""
+  toc = ''
+  nm = subprocess.Popen(wrapper_utils.CommandToRun([
+      args.nm, '--format=posix', '-g', '-D', args.sofile]),
+                        stdout=subprocess.PIPE, bufsize=-1)
+  for line in nm.stdout:
+    toc += ' '.join(line.split(' ', 2)[:2]) + '\n'
+  return nm.wait(), toc
+
+
+def CollectTOC(args):
+  result, toc = CollectSONAME(args)
+  if result == 0:
+    result, dynsym = CollectDynSym(args)
+    toc += dynsym
+  return result, toc
+
+
+def UpdateTOC(tocfile, toc):
+  if os.path.exists(tocfile):
+    old_toc = open(tocfile, 'r').read()
+  else:
+    old_toc = None
+  if toc != old_toc:
+    open(tocfile, 'w').write(toc)
+
+
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('--readelf',
+                      required=True,
+                      help='The readelf binary to run',
+                      metavar='PATH')
+  parser.add_argument('--nm',
+                      required=True,
+                      help='The nm binary to run',
+                      metavar='PATH')
+  parser.add_argument('--strip',
+                      help='The strip binary to run',
+                      metavar='PATH')
+  parser.add_argument('--sofile',
+                      required=True,
+                      help='Shared object file produced by linking command',
+                      metavar='FILE')
+  parser.add_argument('--tocfile',
+                      required=True,
+                      help='Output table-of-contents file',
+                      metavar='FILE')
+  parser.add_argument('--output',
+                      required=True,
+                      help='Final output shared object file',
+                      metavar='FILE')
+  parser.add_argument('--resource-whitelist',
+                      help='Merge all resource whitelists into a single file.',
+                      metavar='PATH')
+  parser.add_argument('command', nargs='+',
+                      help='Linking command')
+  args = parser.parse_args()
+
+  # Work-around for gold being slow-by-default. http://crbug.com/632230
+  fast_env = dict(os.environ)
+  fast_env['LC_ALL'] = 'C'
+
+  if args.resource_whitelist:
+    whitelist_candidates = wrapper_utils.ResolveRspLinks(args.command)
+    wrapper_utils.CombineResourceWhitelists(
+        whitelist_candidates, args.resource_whitelist)
+
+  # First, run the actual link.
+  result = subprocess.call(
+      wrapper_utils.CommandToRun(args.command), env=fast_env)
+  if result != 0:
+    return result
+
+  # Next, generate the contents of the TOC file.
+  result, toc = CollectTOC(args)
+  if result != 0:
+    return result
+
+  # If there is an existing TOC file with identical contents, leave it alone.
+  # Otherwise, write out the TOC file.
+  UpdateTOC(args.tocfile, toc)
+
+  # Finally, strip the linked shared object file (if desired).
+  if args.strip:
+    result = subprocess.call(wrapper_utils.CommandToRun(
+        [args.strip, '--strip-unneeded', '-o', args.output, args.sofile]))
+
+  return result
+
+
+if __name__ == "__main__":
+  sys.exit(main())
diff --git a/build/toolchain/gcc_toolchain.gni b/build/toolchain/gcc_toolchain.gni
new file mode 100644
index 0000000..a5004cf
--- /dev/null
+++ b/build/toolchain/gcc_toolchain.gni
@@ -0,0 +1,451 @@
+# Copied from chromium's src/build/toolchain.
+#
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/config/clang/clang.gni")
+import("//build/toolchain/goma.gni")
+
+# This template defines a toolchain for something that works like gcc
+# (including clang).
+#
+# It requires the following variables specifying the executables to run:
+#  - ar
+#  - cc
+#  - cxx
+#  - ld
+#
+# Optional parameters that control the tools:
+#
+#  - extra_cflags
+#      Extra flags to be appended when compiling C files (but not C++ files).
+#  - extra_cppflags
+#      Extra flags to be appended when compiling both C and C++ files. "CPP"
+#      stands for "C PreProcessor" in this context, although it can be
+#      used for non-preprocessor flags as well. Not to be confused with
+#      "CXX" (which follows).
+#  - extra_cxxflags
+#      Extra flags to be appended when compiling C++ files (but not C files).
+#  - extra_ldflags
+#      Extra flags to be appended when linking
+#
+#  - libs_section_prefix
+#  - libs_section_postfix
+#      The contents of these strings, if specified, will be placed around
+#      the libs section of the linker line. It allows one to inject libraries
+#      at the beginning and end for all targets in a toolchain.
+#  - solink_libs_section_prefix
+#  - solink_libs_section_postfix
+#      Same as libs_section_{pre,post}fix except used for solink instead of link.
+#  - link_outputs
+#      The content of this array, if specified, will be added to the list of
+#      outputs from the link command. This can be useful in conjunction with
+#      the post_link parameter.
+#  - post_link
+#      The content of this string, if specified, will be run as a separate
+#      command following the the link command.
+#  - deps
+#      Just forwarded to the toolchain definition.
+#  - executable_extension
+#      If this string is specified it will be used for the file extension
+#      for an executable, rather than using no extension; targets will
+#      still be able to override the extension using the output_extension
+#      variable.
+#  - shlib_extension
+#      If this string is specified it will be used for the file extension
+#      for a shared library, rather than default value specified in
+#      toolchain.gni
+#  - strip
+#      Location of the strip executable. When specified, strip will be run on
+#      all shared libraries and executables as they are built. The pre-stripped
+#      artifacts will be put in lib.unstripped/ and exe.unstripped/.
+template("gcc_toolchain") {
+  toolchain(target_name) {
+    assert(defined(invoker.ar), "gcc_toolchain() must specify a \"ar\" value")
+    assert(defined(invoker.cc), "gcc_toolchain() must specify a \"cc\" value")
+    assert(defined(invoker.cxx), "gcc_toolchain() must specify a \"cxx\" value")
+    assert(defined(invoker.ld), "gcc_toolchain() must specify a \"ld\" value")
+
+    # GN's syntax can't handle more than one scope dereference at once, like
+    # "invoker.toolchain_args.foo", so make a temporary to hold the toolchain
+    # args so we can do "invoker_toolchain_args.foo".
+    assert(defined(invoker.toolchain_args),
+           "Toolchains must specify toolchain_args")
+    invoker_toolchain_args = invoker.toolchain_args
+    assert(defined(invoker_toolchain_args.current_cpu),
+           "toolchain_args must specify a current_cpu")
+    assert(defined(invoker_toolchain_args.current_os),
+           "toolchain_args must specify a current_os")
+
+    # When invoking this toolchain not as the default one, these args will be
+    # passed to the build. They are ignored when this is the default toolchain.
+    toolchain_args = {
+      # Populate toolchain args from the invoker.
+      forward_variables_from(invoker_toolchain_args, "*")
+      # The host toolchain value computed by the default toolchain's setup
+      # needs to be passed through unchanged to all secondary toolchains to
+      # ensure that it's always the same, regardless of the values that may be
+      # set on those toolchains.
+      #host_toolchain = host_toolchain
+    }
+
+    # When the invoker has explicitly overridden use_goma or cc_wrapper in the
+    # toolchain args, use those values, otherwise default to the global one.
+    # This works because the only reasonable override that toolchains might
+    # supply for these values are to force-disable them.
+    if (defined(toolchain_args.use_goma)) {
+      toolchain_uses_goma = toolchain_args.use_goma
+    } else {
+      toolchain_uses_goma = use_goma
+    }
+
+    # Compute the compiler prefix.
+    if (toolchain_uses_goma) {
+      compiler_prefix = "$goma_dir/gomacc "
+    } else {
+      compiler_prefix = ""
+    }
+
+    cc = compiler_prefix + invoker.cc
+    cxx = compiler_prefix + invoker.cxx
+    ar = invoker.ar
+    ld = invoker.ld
+    if (defined(invoker.readelf)) {
+      readelf = invoker.readelf
+    } else {
+      readelf = "readelf"
+    }
+    if (defined(invoker.nm)) {
+      nm = invoker.nm
+    } else {
+      nm = "nm"
+    }
+
+    if (defined(invoker.shlib_extension)) {
+      default_shlib_extension = invoker.shlib_extension
+    } else {
+      default_shlib_extension = shlib_extension
+    }
+
+    if (defined(invoker.executable_extension)) {
+      default_executable_extension = invoker.executable_extension
+    } else {
+      default_executable_extension = ""
+    }
+
+    # Bring these into our scope for string interpolation with default values.
+    if (defined(invoker.libs_section_prefix)) {
+      libs_section_prefix = invoker.libs_section_prefix
+    } else {
+      libs_section_prefix = ""
+    }
+
+    if (defined(invoker.libs_section_postfix)) {
+      libs_section_postfix = invoker.libs_section_postfix
+    } else {
+      libs_section_postfix = ""
+    }
+
+    if (defined(invoker.solink_libs_section_prefix)) {
+      solink_libs_section_prefix = invoker.solink_libs_section_prefix
+    } else {
+      solink_libs_section_prefix = ""
+    }
+
+    if (defined(invoker.solink_libs_section_postfix)) {
+      solink_libs_section_postfix = invoker.solink_libs_section_postfix
+    } else {
+      solink_libs_section_postfix = ""
+    }
+
+    if (defined(invoker.extra_cflags) && invoker.extra_cflags != "") {
+      extra_cflags = " " + invoker.extra_cflags
+    } else {
+      extra_cflags = ""
+    }
+
+    if (defined(invoker.extra_cppflags) && invoker.extra_cppflags != "") {
+      extra_cppflags = " " + invoker.extra_cppflags
+    } else {
+      extra_cppflags = ""
+    }
+
+    if (defined(invoker.extra_cxxflags) && invoker.extra_cxxflags != "") {
+      extra_cxxflags = " " + invoker.extra_cxxflags
+    } else {
+      extra_cxxflags = ""
+    }
+
+    if (defined(invoker.extra_ldflags) && invoker.extra_ldflags != "") {
+      extra_ldflags = " " + invoker.extra_ldflags
+    } else {
+      extra_ldflags = ""
+    }
+
+    # These library switches can apply to all tools below.
+    lib_switch = "-l"
+    lib_dir_switch = "-L"
+
+    # Object files go in this directory.
+    object_subdir = "{{target_out_dir}}/{{label_name}}"
+
+    tool("cc") {
+      depfile = "{{output}}.d"
+      command = "$cc -MMD -MF $depfile {{defines}} {{include_dirs}} {{cflags}} {{cflags_c}}${extra_cppflags}${extra_cflags} -c {{source}} -o {{output}}"
+      depsformat = "gcc"
+      description = "CC {{output}}"
+      outputs = [
+        # The whitelist file is also an output, but ninja does not
+        # currently support multiple outputs for tool("cc").
+        "$object_subdir/{{source_name_part}}.o",
+      ]
+    }
+
+    tool("cxx") {
+      depfile = "{{output}}.d"
+      command = "$cxx -MMD -MF $depfile {{defines}} {{include_dirs}} {{cflags}} {{cflags_cc}}${extra_cppflags}${extra_cxxflags} -c {{source}} -o {{output}}"
+      depsformat = "gcc"
+      description = "CXX {{output}}"
+      outputs = [
+        # The whitelist file is also an output, but ninja does not
+        # currently support multiple outputs for tool("cxx").
+        "$object_subdir/{{source_name_part}}.o",
+      ]
+    }
+
+    tool("asm") {
+      # For GCC we can just use the C compiler to compile assembly.
+      depfile = "{{output}}.d"
+      command = "$cc -MMD -MF $depfile {{defines}} {{include_dirs}} {{asmflags}} -c {{source}} -o {{output}}"
+      depsformat = "gcc"
+      description = "ASM {{output}}"
+      outputs = [
+        "$object_subdir/{{source_name_part}}.o",
+      ]
+    }
+
+    tool("alink") {
+      rspfile = "{{output}}.rsp"
+      whitelist_flag = " "
+
+      # This needs a Python script to avoid using simple sh features in this
+      # command, in case the host does not use a POSIX shell (e.g. compiling
+      # POSIX-like toolchains such as NaCl on Windows).
+      ar_wrapper =
+          rebase_path("//build/toolchain/gcc_ar_wrapper.py", root_build_dir)
+      command = "$python_path \"$ar_wrapper\"$whitelist_flag --output={{output}} --ar=\"$ar\" {{arflags}} rcsD @\"$rspfile\""
+      description = "AR {{output}}"
+      rspfile_content = "{{inputs}}"
+      outputs = [
+        "{{output_dir}}/{{target_output_name}}{{output_extension}}",
+      ]
+
+      # Shared libraries go in the target out directory by default so we can
+      # generate different targets with the same name and not have them collide.
+      default_output_dir = "{{target_out_dir}}"
+      default_output_extension = ".a"
+      output_prefix = "lib"
+    }
+
+    tool("solink") {
+      soname = "{{target_output_name}}{{output_extension}}"  # e.g. "libfoo.so".
+      sofile = "{{output_dir}}/$soname"  # Possibly including toolchain dir.
+      rspfile = sofile + ".rsp"
+      pool = "//build/toolchain:link_pool($default_toolchain)"
+      whitelist_flag = " "
+
+      if (defined(invoker.strip)) {
+        unstripped_sofile = "{{root_out_dir}}/lib.unstripped/$soname"
+      } else {
+        unstripped_sofile = sofile
+      }
+
+      # These variables are not built into GN but are helpers that
+      # implement (1) linking to produce a .so, (2) extracting the symbols
+      # from that file (3) if the extracted list differs from the existing
+      # .TOC file, overwrite it, otherwise, don't change it.
+      tocfile = sofile + ".TOC"
+
+      link_command = "$ld -shared {{ldflags}}${extra_ldflags} -o \"$unstripped_sofile\" -Wl,-soname=\"$soname\" @\"$rspfile\""
+
+      assert(defined(readelf), "to solink you must have a readelf")
+      assert(defined(nm), "to solink you must have an nm")
+      strip_switch = ""
+      if (defined(invoker.strip)) {
+        strip_switch = "--strip=${invoker.strip}"
+      }
+
+      # This needs a Python script to avoid using a complex shell command
+      # requiring sh control structures, pipelines, and POSIX utilities.
+      # The host might not have a POSIX shell and utilities (e.g. Windows).
+      solink_wrapper = rebase_path("//build/toolchain/gcc_solink_wrapper.py")
+      command = "$python_path \"$solink_wrapper\" --readelf=\"$readelf\" --nm=\"$nm\" $strip_switch --sofile=\"$unstripped_sofile\" --tocfile=\"$tocfile\" --output=\"$sofile\"$whitelist_flag -- $link_command"
+
+      rspfile_content = "-Wl,--whole-archive {{inputs}} {{solibs}} -Wl,--no-whole-archive $solink_libs_section_prefix {{libs}} $solink_libs_section_postfix"
+
+      description = "SOLINK $sofile"
+
+      # Use this for {{output_extension}} expansions unless a target manually
+      # overrides it (in which case {{output_extension}} will be what the target
+      # specifies).
+      default_output_extension = default_shlib_extension
+
+      default_output_dir = "{{root_out_dir}}"
+      if (shlib_subdir != ".") {
+        default_output_dir += "/$shlib_subdir"
+      }
+
+      output_prefix = "lib"
+
+      # Since the above commands only updates the .TOC file when it changes, ask
+      # Ninja to check if the timestamp actually changed to know if downstream
+      # dependencies should be recompiled.
+      restat = true
+
+      # Tell GN about the output files. It will link to the sofile but use the
+      # tocfile for dependency management.
+      outputs = [
+        sofile,
+        tocfile,
+      ]
+      if (sofile != unstripped_sofile) {
+        outputs += [ unstripped_sofile ]
+      }
+      link_output = sofile
+      depend_output = tocfile
+    }
+
+    tool("solink_module") {
+      soname = "{{target_output_name}}{{output_extension}}"  # e.g. "libfoo.so".
+      sofile = "{{output_dir}}/$soname"
+      rspfile = sofile + ".rsp"
+      pool = "//build/toolchain:link_pool($default_toolchain)"
+
+      if (defined(invoker.strip)) {
+        unstripped_sofile = "{{root_out_dir}}/lib.unstripped/$soname"
+      } else {
+        unstripped_sofile = sofile
+      }
+
+      command = "$ld -shared {{ldflags}}${extra_ldflags} -o \"$unstripped_sofile\" -Wl,-soname=\"$soname\" @\"$rspfile\""
+
+      if (defined(invoker.strip)) {
+        strip_command = "${invoker.strip} --strip-unneeded -o \"$sofile\" \"$unstripped_sofile\""
+        command += " && " + strip_command
+      }
+      rspfile_content = "-Wl,--whole-archive {{inputs}} {{solibs}} -Wl,--no-whole-archive $solink_libs_section_prefix {{libs}} $solink_libs_section_postfix"
+
+      description = "SOLINK_MODULE $sofile"
+
+      # Use this for {{output_extension}} expansions unless a target manually
+      # overrides it (in which case {{output_extension}} will be what the target
+      # specifies).
+      if (defined(invoker.loadable_module_extension)) {
+        default_output_extension = invoker.loadable_module_extension
+      } else {
+        default_output_extension = default_shlib_extension
+      }
+
+      default_output_dir = "{{root_out_dir}}"
+      if (shlib_subdir != ".") {
+        default_output_dir += "/$shlib_subdir"
+      }
+
+      output_prefix = "lib"
+
+      outputs = [
+        sofile,
+      ]
+      if (sofile != unstripped_sofile) {
+        outputs += [ unstripped_sofile ]
+      }
+    }
+
+    tool("link") {
+      exename = "{{target_output_name}}{{output_extension}}"
+      outfile = "{{output_dir}}/$exename"
+      rspfile = "$outfile.rsp"
+      unstripped_outfile = outfile
+      pool = "//build/toolchain:link_pool($default_toolchain)"
+
+      # Use this for {{output_extension}} expansions unless a target manually
+      # overrides it (in which case {{output_extension}} will be what the target
+      # specifies).
+      default_output_extension = default_executable_extension
+
+      default_output_dir = "{{root_out_dir}}"
+
+      if (defined(invoker.strip)) {
+        unstripped_outfile = "{{root_out_dir}}/exe.unstripped/$exename"
+      }
+
+      command = "$ld {{ldflags}}${extra_ldflags} -o \"$unstripped_outfile\" -Wl,--start-group @\"$rspfile\" {{solibs}} -Wl,--end-group $libs_section_prefix {{libs}} $libs_section_postfix"
+      if (defined(invoker.strip)) {
+        link_wrapper =
+            rebase_path("//build/toolchain/gcc_link_wrapper.py", root_build_dir)
+        command = "$python_path \"$link_wrapper\" --strip=\"${invoker.strip}\" --unstripped-file=\"$unstripped_outfile\" --output=\"$outfile\" -- $command"
+      }
+      description = "LINK $outfile"
+      rspfile_content = "{{inputs}}"
+      outputs = [
+        outfile,
+      ]
+      if (outfile != unstripped_outfile) {
+        outputs += [ unstripped_outfile ]
+      }
+      if (defined(invoker.link_outputs)) {
+        outputs += invoker.link_outputs
+      }
+    }
+
+    # These two are really entirely generic, but have to be repeated in
+    # each toolchain because GN doesn't allow a template to be used here.
+    # See //build/toolchain/toolchain.gni for details.
+    tool("stamp") {
+      command = stamp_command
+      description = stamp_description
+    }
+    tool("copy") {
+      command = copy_command
+      description = copy_description
+    }
+
+    forward_variables_from(invoker, [ "deps" ])
+  }
+}
+
+# This is a shorthand for gcc_toolchain instances based on the Chromium-built
+# version of Clang. Only the toolchain_cpu and toolchain_os variables need to
+# be specified by the invoker, and optionally toolprefix if it's a
+# cross-compile case. Note that for a cross-compile case this toolchain
+# requires a config to pass the appropriate -target option, or else it will
+# actually just be doing a native compile. The invoker can optionally override
+# use_gold too.
+template("clang_toolchain") {
+  if (defined(invoker.toolprefix)) {
+    toolprefix = invoker.toolprefix
+  } else {
+    toolprefix = ""
+  }
+
+  gcc_toolchain(target_name) {
+    prefix = rebase_path("$clang_base_path/bin", root_build_dir)
+    cc = "$prefix/clang"
+    cxx = "$prefix/clang++"
+    ld = cxx
+
+    readelf = "${toolprefix}readelf"
+    ar = "${toolprefix}ar"
+    nm = "${toolprefix}nm"
+
+    forward_variables_from(invoker, [ "strip" ])
+
+    toolchain_args = {
+      if (defined(invoker.toolchain_args)) {
+        forward_variables_from(invoker.toolchain_args, "*")
+      }
+      is_clang = true
+    }
+  }
+}
diff --git a/build/toolchain/get_concurrent_links.py b/build/toolchain/get_concurrent_links.py
new file mode 100644
index 0000000..f6bb046
--- /dev/null
+++ b/build/toolchain/get_concurrent_links.py
@@ -0,0 +1,79 @@
+# Copied from Chromium's build/toolchain.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This script computs the number of concurrent links we want to run in the build
+# as a function of machine spec. It's based on GetDefaultConcurrentLinks in GYP.
+
+import optparse
+import os
+import re
+import subprocess
+import sys
+
+def _GetTotalMemoryInBytes():
+  if sys.platform in ('win32', 'cygwin'):
+    import ctypes
+
+    class MEMORYSTATUSEX(ctypes.Structure):
+      _fields_ = [
+        ("dwLength", ctypes.c_ulong),
+        ("dwMemoryLoad", ctypes.c_ulong),
+        ("ullTotalPhys", ctypes.c_ulonglong),
+        ("ullAvailPhys", ctypes.c_ulonglong),
+        ("ullTotalPageFile", ctypes.c_ulonglong),
+        ("ullAvailPageFile", ctypes.c_ulonglong),
+        ("ullTotalVirtual", ctypes.c_ulonglong),
+        ("ullAvailVirtual", ctypes.c_ulonglong),
+        ("sullAvailExtendedVirtual", ctypes.c_ulonglong),
+      ]
+
+    stat = MEMORYSTATUSEX(dwLength=ctypes.sizeof(MEMORYSTATUSEX))
+    ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(stat))
+    return stat.ullTotalPhys
+  elif sys.platform.startswith('linux'):
+    if os.path.exists("/proc/meminfo"):
+      with open("/proc/meminfo") as meminfo:
+        memtotal_re = re.compile(r'^MemTotal:\s*(\d*)\s*kB')
+        for line in meminfo:
+          match = memtotal_re.match(line)
+          if not match:
+            continue
+          return float(match.group(1)) * 2**10
+  elif sys.platform == 'darwin':
+    try:
+      return int(subprocess.check_output(['sysctl', '-n', 'hw.memsize']))
+    except Exception:
+      return 0
+  # TODO: Implement this for other platforms.
+  return 0
+
+
+def _GetDefaultConcurrentLinks(mem_per_link_gb, reserve_mem_gb):
+  # Inherit the legacy environment variable for people that have set it in GYP.
+  pool_size = int(os.getenv('GYP_LINK_CONCURRENCY', 0))
+  if pool_size:
+    return pool_size
+
+  mem_total_bytes = _GetTotalMemoryInBytes()
+  mem_total_bytes = max(0, mem_total_bytes - reserve_mem_gb * 2**30)
+  num_concurrent_links = int(max(1, mem_total_bytes / mem_per_link_gb / 2**30))
+  hard_cap = max(1, int(os.getenv('GYP_LINK_CONCURRENCY_MAX', 2**32)))
+  return min(num_concurrent_links, hard_cap)
+
+
+def main():
+  parser = optparse.OptionParser()
+  parser.add_option('--mem_per_link_gb', action="store", type="int", default=8)
+  parser.add_option('--reserve_mem_gb', action="store", type="int", default=0)
+  parser.disable_interspersed_args()
+  options, _ = parser.parse_args()
+
+  print _GetDefaultConcurrentLinks(options.mem_per_link_gb,
+                                   options.reserve_mem_gb)
+  return 0
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/build/toolchain/goma.gni b/build/toolchain/goma.gni
new file mode 100644
index 0000000..1587560
--- /dev/null
+++ b/build/toolchain/goma.gni
@@ -0,0 +1,21 @@
+# Copied from chromium's src/build/toolchain.
+#
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Defines the configuration of Goma.
+
+declare_args() {
+  # Set to true to enable distributed compilation using Goma.
+  use_goma = false
+
+  # Set the default value based on the platform.
+  if (os == "win") {
+    # Absolute directory containing the gomacc.exe.
+    goma_dir = "C:\goma\goma-win64"
+  } else {
+    # Absolute directory containing the gomacc.
+    goma_dir = getenv("HOME") + "/goma"
+  }
+}
diff --git a/build/toolchain/linux/BUILD.gn b/build/toolchain/linux/BUILD.gn
new file mode 100644
index 0000000..3a49487
--- /dev/null
+++ b/build/toolchain/linux/BUILD.gn
@@ -0,0 +1,51 @@
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/toolchain/gcc_toolchain.gni")
+
+clang_toolchain("clang_x86") {
+  toolchain_args = {
+    current_cpu = "x86"
+    current_os = "linux"
+  }
+}
+
+gcc_toolchain("x86") {
+  cc = "gcc"
+  cxx = "g++"
+
+  readelf = "readelf"
+  nm = "nm"
+  ar = "ar"
+  ld = cxx
+
+  toolchain_args = {
+    current_cpu = "x86"
+    current_os = "linux"
+    is_clang = false
+  }
+}
+
+clang_toolchain("clang_x64") {
+  toolchain_args = {
+    current_cpu = "x64"
+    current_os = "linux"
+  }
+}
+
+gcc_toolchain("x64") {
+  cc = "gcc"
+  cxx = "g++"
+
+  readelf = "readelf"
+  nm = "nm"
+  ar = "ar"
+  ld = cxx
+
+  toolchain_args = {
+    current_cpu = "x64"
+    current_os = "linux"
+    is_clang = false
+  }
+}
diff --git a/build/toolchain/mac/BUILD.gn b/build/toolchain/mac/BUILD.gn
new file mode 100644
index 0000000..48010ce
--- /dev/null
+++ b/build/toolchain/mac/BUILD.gn
@@ -0,0 +1,391 @@
+# Copied from chromium build/toolchain/mac/ and modified for goma client.
+#
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# TODO Use "gcc_toolchain.gni" like the Linux toolchains. This requires
+# some enhancements since the commands on Mac are slightly different than on
+# Linux.
+
+import("//build/config/clang/clang.gni")
+import("//build/config/mac/mac_sdk.gni")
+
+assert(host_os == "mac")
+
+import("//build/toolchain/goma.gni")
+import("//build/toolchain/concurrent_links.gni")
+
+declare_args() {
+  # Reduce the number of tasks using the copy_bundle_data and compile_xcassets
+  # tools as they can cause lots of I/O contention when invoking ninja with a
+  # large number of parallel jobs (e.g. when using distributed build like goma).
+  bundle_pool_depth = -1
+}
+
+if (current_toolchain == default_toolchain) {
+  pool("bundle_pool") {
+    if (bundle_pool_depth == -1) {
+      depth = concurrent_links
+    } else {
+      depth = bundle_pool_depth
+    }
+  }
+}
+
+# When implementing tools using Python scripts, a TOOL_VERSION=N env
+# variable is placed in front of the command. The N should be incremented
+# whenever the script is changed, so that the build system rebuilds all
+# edges that utilize the script. Ideally this should be changed to use
+# proper input-dirty checking, but that could be expensive. Instead, use a
+# script to get the tool scripts' modification time to use as the version.
+# This won't cause a re-generation of GN files when the tool script changes
+# but it will cause edges to be marked as dirty if the ninja files are
+# regenerated. See https://crbug.com/619083 for details. A proper fix
+# would be to have inputs to tools (https://crbug.com/621119).
+tool_versions =
+    exec_script("get_tool_mtime.py",
+                rebase_path([
+                              "//build/toolchain/mac/compile_xcassets.py",
+                              "//build/toolchain/mac/filter_libtool.py",
+                              "//build/toolchain/mac/linker_driver.py",
+                            ],
+                            root_build_dir),
+                "trim scope")
+
+# Shared toolchain definition. Invocations should set current_os to set the
+# build args in this definition.
+template("mac_toolchain") {
+  toolchain(target_name) {
+    # When invoking this toolchain not as the default one, these args will be
+    # passed to the build. They are ignored when this is the default toolchain.
+    assert(defined(invoker.toolchain_args),
+           "Toolchains must declare toolchain_args")
+    toolchain_args = {
+      # Populate toolchain args from the invoker.
+      forward_variables_from(invoker.toolchain_args, "*")
+      # The host toolchain value computed by the default toolchain's setup
+      # needs to be passed through unchanged to all secondary toolchains to
+      # ensure that it's always the same, regardless of the values that may be
+      # set on those toolchains.
+      #host_toolchain = host_toolchain
+    }
+
+    # When the invoker has explicitly overridden use_goma or cc_wrapper in the
+    # toolchain args, use those values, otherwise default to the global one.
+    # This works because the only reasonable override that toolchains might
+    # supply for these values are to force-disable them.
+    if (defined(toolchain_args.use_goma)) {
+      toolchain_uses_goma = toolchain_args.use_goma
+    } else {
+      toolchain_uses_goma = use_goma
+    }
+
+    # Compute the compiler prefix.
+    if (toolchain_uses_goma) {
+      compiler_prefix = "$goma_dir/gomacc "
+    } else {
+      compiler_prefix = ""
+    }
+
+    compiler_prefix += rebase_path("$clang_base_path/bin/", root_build_dir)
+
+    cc = "${compiler_prefix}clang"
+    cxx = "${compiler_prefix}clang++"
+    ld = cxx
+
+    linker_driver =
+        "TOOL_VERSION=${tool_versions.linker_driver} " +
+        rebase_path("//build/toolchain/mac/linker_driver.py", root_build_dir)
+
+    _enable_dsyms = false
+    _save_unstripped_output = false
+
+    # Make these apply to all tools below.
+    lib_switch = "-l"
+    lib_dir_switch = "-L"
+
+    # Object files go in this directory. Use label_name instead of
+    # target_output_name since labels will generally have no spaces and will be
+    # unique in the directory.
+    object_subdir = "{{target_out_dir}}/{{label_name}}"
+
+    # If dSYMs are enabled, this flag will be added to the link tools.
+    if (_enable_dsyms) {
+      dsym_switch = " -Wcrl,dsym,{{root_out_dir}} "
+      dsym_output_dir =
+          "{{root_out_dir}}/{{target_output_name}}{{output_extension}}.dSYM"
+      dsym_output = [
+        "$dsym_output_dir/",
+        "$dsym_output_dir/Contents/Info.plist",
+        "$dsym_output_dir/Contents/Resources/DWARF/" +
+            "{{target_output_name}}{{output_extension}}",
+      ]
+    } else {
+      dsym_switch = ""
+    }
+
+    if (_save_unstripped_output) {
+      _unstripped_output = "{{root_out_dir}}/{{target_output_name}}{{output_extension}}.unstripped"
+    }
+
+    tool("cc") {
+      depfile = "{{output}}.d"
+      precompiled_header_type = "gcc"
+      command = "$cc -MMD -MF $depfile {{defines}} {{include_dirs}} {{cflags}} {{cflags_c}} -c {{source}} -o {{output}}"
+      depsformat = "gcc"
+      description = "CC {{output}}"
+      outputs = [
+        "$object_subdir/{{source_name_part}}.o",
+      ]
+    }
+
+    tool("cxx") {
+      depfile = "{{output}}.d"
+      precompiled_header_type = "gcc"
+      command = "$cxx -MMD -MF $depfile {{defines}} {{include_dirs}} {{cflags}} {{cflags_cc}} -c {{source}} -o {{output}}"
+      depsformat = "gcc"
+      description = "CXX {{output}}"
+      outputs = [
+        "$object_subdir/{{source_name_part}}.o",
+      ]
+    }
+
+    tool("asm") {
+      # For GCC we can just use the C compiler to compile assembly.
+      depfile = "{{output}}.d"
+      command = "$cc -MMD -MF $depfile {{defines}} {{include_dirs}} {{asmflags}} -c {{source}} -o {{output}}"
+      depsformat = "gcc"
+      description = "ASM {{output}}"
+      outputs = [
+        "$object_subdir/{{source_name_part}}.o",
+      ]
+    }
+
+    tool("objc") {
+      depfile = "{{output}}.d"
+      precompiled_header_type = "gcc"
+      command = "$cc -MMD -MF $depfile {{defines}} {{include_dirs}} {{cflags}} {{cflags_objc}} -c {{source}} -o {{output}}"
+      depsformat = "gcc"
+      description = "OBJC {{output}}"
+      outputs = [
+        "$object_subdir/{{source_name_part}}.o",
+      ]
+    }
+
+    tool("objcxx") {
+      depfile = "{{output}}.d"
+      precompiled_header_type = "gcc"
+      command = "$cxx -MMD -MF $depfile {{defines}} {{include_dirs}} {{cflags}} {{cflags_objcc}} -c {{source}} -o {{output}}"
+      depsformat = "gcc"
+      description = "OBJCXX {{output}}"
+      outputs = [
+        "$object_subdir/{{source_name_part}}.o",
+      ]
+    }
+
+    tool("alink") {
+      script =
+          rebase_path("//build/toolchain/mac/filter_libtool.py", root_build_dir)
+      command = "rm -f {{output}} && TOOL_VERSION=${tool_versions.filter_libtool} python $script libtool -static {{arflags}} -o {{output}} {{inputs}}"
+      description = "LIBTOOL-STATIC {{output}}"
+      outputs = [
+        "{{output_dir}}/{{target_output_name}}{{output_extension}}",
+      ]
+      default_output_dir = "{{target_out_dir}}"
+      default_output_extension = ".a"
+      output_prefix = "lib"
+    }
+
+    tool("solink") {
+      dylib = "{{output_dir}}/{{target_output_name}}{{output_extension}}"  # eg "./libfoo.dylib"
+      rspfile = dylib + ".rsp"
+      pool = "//build/toolchain:link_pool($default_toolchain)"
+
+      # These variables are not built into GN but are helpers that implement
+      # (1) linking to produce a .dylib, (2) extracting the symbols from that
+      # file to a temporary file, (3) if the temporary file has differences from
+      # the existing .TOC file, overwrite it, otherwise, don't change it.
+      #
+      # As a special case, if the library reexports symbols from other dynamic
+      # libraries, we always update the .TOC and skip the temporary file and
+      # diffing steps, since that library always needs to be re-linked.
+      tocname = dylib + ".TOC"
+      temporary_tocname = dylib + ".tmp"
+
+      does_reexport_command = "[ ! -e \"$dylib\" -o ! -e \"$tocname\" ] || otool -l \"$dylib\" | grep -q LC_REEXPORT_DYLIB"
+
+      link_command = "$linker_driver $ld -shared "
+      link_command += dsym_switch
+      link_command += "{{ldflags}} -o \"$dylib\" -Wl,-filelist,\"$rspfile\" {{libs}} {{solibs}}"
+
+      replace_command = "if ! cmp -s \"$temporary_tocname\" \"$tocname\"; then mv \"$temporary_tocname\" \"$tocname\""
+      extract_toc_command = "{ otool -l \"$dylib\" | grep LC_ID_DYLIB -A 5; nm -gP \"$dylib\" | cut -f1-2 -d' ' | grep -v U\$\$; true; }"
+
+      command = "if $does_reexport_command ; then $link_command && $extract_toc_command > \"$tocname\"; else $link_command && $extract_toc_command > \"$temporary_tocname\" && $replace_command ; fi; fi"
+
+      rspfile_content = "{{inputs_newline}}"
+
+      description = "SOLINK {{output}}"
+
+      # Use this for {{output_extension}} expansions unless a target manually
+      # overrides it (in which case {{output_extension}} will be what the target
+      # specifies).
+      default_output_dir = "{{root_out_dir}}"
+      default_output_extension = ".dylib"
+
+      output_prefix = "lib"
+
+      # Since the above commands only updates the .TOC file when it changes, ask
+      # Ninja to check if the timestamp actually changed to know if downstream
+      # dependencies should be recompiled.
+      restat = true
+
+      # Tell GN about the output files. It will link to the dylib but use the
+      # tocname for dependency management.
+      outputs = [
+        dylib,
+        tocname,
+      ]
+      link_output = dylib
+      depend_output = tocname
+
+      if (_enable_dsyms) {
+        outputs += dsym_output
+      }
+      if (_save_unstripped_output) {
+        outputs += [ _unstripped_output ]
+      }
+    }
+
+    tool("solink_module") {
+      sofile = "{{output_dir}}/{{target_output_name}}{{output_extension}}"  # eg "./libfoo.so"
+      rspfile = sofile + ".rsp"
+      pool = "//build/toolchain:link_pool($default_toolchain)"
+
+      link_command = "$linker_driver $ld -bundle {{ldflags}} -o \"$sofile\" -Wl,-filelist,\"$rspfile\""
+      link_command += dsym_switch
+      link_command += " {{solibs}} {{libs}}"
+      command = link_command
+
+      rspfile_content = "{{inputs_newline}}"
+
+      description = "SOLINK_MODULE {{output}}"
+
+      # Use this for {{output_extension}} expansions unless a target manually
+      # overrides it (in which case {{output_extension}} will be what the target
+      # specifies).
+      default_output_dir = "{{root_out_dir}}"
+      default_output_extension = ".so"
+
+      outputs = [
+        sofile,
+      ]
+
+      if (_enable_dsyms) {
+        outputs += dsym_output
+      }
+      if (_save_unstripped_output) {
+        outputs += [ _unstripped_output ]
+      }
+    }
+
+    tool("link") {
+      outfile = "{{output_dir}}/{{target_output_name}}{{output_extension}}"
+      rspfile = "$outfile.rsp"
+      pool = "//build/toolchain:link_pool($default_toolchain)"
+
+      # Note about --filelist: Apple's linker reads the file list file and
+      # interprets each newline-separated chunk of text as a file name. It
+      # doesn't do the things one would expect from the shell like unescaping
+      # or handling quotes. In contrast, when Ninja finds a file name with
+      # spaces, it single-quotes them in $inputs_newline as it would normally
+      # do for command-line arguments. Thus any source names with spaces, or
+      # label names with spaces (which GN bases the output paths on) will be
+      # corrupted by this process. Don't use spaces for source files or labels.
+      command = "$linker_driver $ld $dsym_switch {{ldflags}} -o \"$outfile\" -Wl,-filelist,\"$rspfile\" {{solibs}} {{libs}}"
+      description = "LINK $outfile"
+      rspfile_content = "{{inputs_newline}}"
+      outputs = [
+        outfile,
+      ]
+
+      if (_enable_dsyms) {
+        outputs += dsym_output
+      }
+      if (_save_unstripped_output) {
+        outputs += [ _unstripped_output ]
+      }
+
+      default_output_dir = "{{root_out_dir}}"
+    }
+
+    # These two are really entirely generic, but have to be repeated in
+    # each toolchain because GN doesn't allow a template to be used here.
+    # See //build/toolchain/toolchain.gni for details.
+    tool("stamp") {
+      command = stamp_command
+      description = stamp_description
+    }
+    tool("copy") {
+      command = copy_command
+      description = copy_description
+    }
+
+    tool("copy_bundle_data") {
+      # copy_command use hardlink if possible but this does not work with
+      # directories. If source is a directory, instead use "pax" to create
+      # the same tree structure using hardlinks to individual files (this
+      # preserve symbolic links too) as recommended in the replies to the
+      # question at http://serverfault.com/q/209888/43689 ("cp -al" isn't
+      # available on macOS).
+      #
+      # According to the man page for pax, the commands to use to clone
+      # olddir to newdir using pax are the following:
+      #
+      #   $ mkdir newdir
+      #   $ cd olddir
+      #   $ pax -rwl . ../newdir
+      #
+      # The _copydir command does exactly that but use an absolute path
+      # constructed using shell variable $OLDPWD (automatically set when
+      # cd is used) as computing the relative path is a bit complex and
+      # using pwd would requires a sub-shell to be created.
+      _copydir = "mkdir -p {{output}} && cd {{source}} && " +
+                 "pax -rwl . \"\$OLDPWD\"/{{output}}"
+      command = "rm -rf {{output}} && if [[ -d {{source}} ]]; then " +
+                _copydir + "; else " + copy_command + "; fi"
+
+      description = "COPY_BUNDLE_DATA {{source}} {{output}}"
+      pool = ":bundle_pool($default_toolchain)"
+    }
+    tool("compile_xcassets") {
+      _tool = rebase_path("//build/toolchain/mac/compile_xcassets.py",
+                          root_build_dir)
+      _sdk_name = mac_sdk_name
+      _min_deployment_target = mac_deployment_target
+      command = "rm -f {{output}} && " +
+                "TOOL_VERSION=${tool_versions.compile_xcassets} " +
+                "python $_tool -p $_sdk_name -t $_min_deployment_target " +
+                "-T {{bundle_product_type}} -o {{output}} {{inputs}}"
+
+      description = "COMPILE_XCASSETS {{output}}"
+      pool = ":bundle_pool($default_toolchain)"
+    }
+  }
+}
+
+mac_toolchain("clang_x64") {
+  toolchain_args = {
+    current_cpu = "x64"
+    current_os = "mac"
+  }
+}
+
+# To split environment for universal binaries.
+mac_toolchain("clang_universal") {
+  toolchain_args = {
+    current_cpu = "x64"
+    current_os = "mac"
+  }
+}
diff --git a/build/toolchain/mac/compile_xcassets.py b/build/toolchain/mac/compile_xcassets.py
new file mode 100644
index 0000000..e7cd2c4
--- /dev/null
+++ b/build/toolchain/mac/compile_xcassets.py
@@ -0,0 +1,107 @@
+# Copied from chromium build/toolchain/mac/
+#
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import argparse
+import os
+import subprocess
+import sys
+
+
+def CompileXCAssets(
+    output, platform, product_type, min_deployment_target, inputs):
+  """Compile the .xcassets bundles to an asset catalog using actool.
+
+  Args:
+    output: absolute path to the containing bundle
+    platform: the targetted platform
+    product_type: the bundle type
+    min_deployment_target: minimum deployment target
+    inputs: list of absolute paths to .xcassets bundles
+  """
+  command = [
+      'xcrun', 'actool', '--output-format=human-readable-text',
+      '--compress-pngs', '--notices', '--warnings', '--errors',
+      '--platform', platform, '--minimum-deployment-target',
+      min_deployment_target,
+  ]
+
+  if product_type != '':
+    command.extend(['--product-type', product_type])
+
+  if platform == 'macosx':
+    command.extend(['--target-device', 'mac'])
+  else:
+    command.extend(['--target-device', 'iphone', '--target-device', 'ipad'])
+
+  # actool crashes if paths are relative, so convert input and output paths
+  # to absolute paths.
+  command.extend(['--compile', os.path.dirname(os.path.abspath(output))])
+  command.extend(map(os.path.abspath, inputs))
+
+  # Run actool and redirect stdout and stderr to the same pipe (as actool
+  # is confused about what should go to stderr/stdout).
+  process = subprocess.Popen(
+      command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+  stdout, _ = process.communicate()
+
+  if process.returncode:
+    sys.stderr.write(stdout)
+    sys.exit(process.returncode)
+
+  # In case of success, the output looks like the following:
+  #   /* com.apple.actool.compilation-results */
+  #   /Full/Path/To/Bundle.app/Assets.car
+  #
+  # Ignore any lines in the output matching those (last line is an empty line)
+  # and consider that the build failed if the output contains any other lines.
+  for line in stdout.splitlines():
+    if not line:
+      continue
+    if line == '/* com.apple.actool.compilation-results */':
+      continue
+    if line == os.path.abspath(output):
+      continue
+    sys.stderr.write(stdout)
+    sys.exit(1)
+
+
+def Main():
+  parser = argparse.ArgumentParser(
+      description='compile assets catalog for a bundle')
+  parser.add_argument(
+      '--platform', '-p', required=True,
+      choices=('macosx', 'iphoneos', 'iphonesimulator'),
+      help='target platform for the compiled assets catalog')
+  parser.add_argument(
+      '--minimum-deployment-target', '-t', required=True,
+      help='minimum deployment target for the compiled assets catalog')
+  parser.add_argument(
+      '--output', '-o', required=True,
+      help='path to the compiled assets catalog')
+  parser.add_argument(
+      '--product-type', '-T',
+      help='type of the containing bundle')
+  parser.add_argument(
+      'inputs', nargs='+',
+      help='path to input assets catalog sources')
+  args = parser.parse_args()
+
+  if os.path.basename(args.output) != 'Assets.car':
+    sys.stderr.write(
+        'output should be path to compiled asset catalog, not '
+        'to the containing bundle: %s\n' % (args.output,))
+    sys.exit(1)
+
+  CompileXCAssets(
+      args.output,
+      args.platform,
+      args.product_type,
+      args.minimum_deployment_target,
+      args.inputs)
+
+
+if __name__ == '__main__':
+  sys.exit(Main())
diff --git a/build/toolchain/mac/filter_libtool.py b/build/toolchain/mac/filter_libtool.py
new file mode 100644
index 0000000..c9edfa0
--- /dev/null
+++ b/build/toolchain/mac/filter_libtool.py
@@ -0,0 +1,44 @@
+# Copied from chromium build/toolchain/mac/.
+#
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import re
+import subprocess
+import sys
+
+# This script executes libool and filters out logspam lines like:
+#    '/path/to/libtool: file: foo.o has no symbols'
+
+def Main(cmd_list):
+  libtool_re = re.compile(r'^.*libtool: (?:for architecture: \S* )?'
+                          r'file: .* has no symbols$')
+  libtool_re5 = re.compile(
+      r'^.*libtool: warning for library: ' +
+      r'.* the table of contents is empty ' +
+      r'\(no object file members in the library define global symbols\)$')
+  env = os.environ.copy()
+  # Ref:
+  # http://www.opensource.apple.com/source/cctools/cctools-809/misc/libtool.c
+  # The problem with this flag is that it resets the file mtime on the file to
+  # epoch=0, e.g. 1970-1-1 or 1969-12-31 depending on timezone.
+  env['ZERO_AR_DATE'] = '1'
+  libtoolout = subprocess.Popen(cmd_list, stderr=subprocess.PIPE, env=env)
+  _, err = libtoolout.communicate()
+  for line in err.splitlines():
+    if not libtool_re.match(line) and not libtool_re5.match(line):
+      print >>sys.stderr, line
+  # Unconditionally touch the output .a file on the command line if present
+  # and the command succeeded. A bit hacky.
+  if not libtoolout.returncode:
+    for i in range(len(cmd_list) - 1):
+      if cmd_list[i] == '-o' and cmd_list[i+1].endswith('.a'):
+        os.utime(cmd_list[i+1], None)
+        break
+  return libtoolout.returncode
+
+
+if __name__ == '__main__':
+  sys.exit(Main(sys.argv[1:]))
diff --git a/build/toolchain/mac/get_tool_mtime.py b/build/toolchain/mac/get_tool_mtime.py
new file mode 100644
index 0000000..4106344
--- /dev/null
+++ b/build/toolchain/mac/get_tool_mtime.py
@@ -0,0 +1,17 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+
+# Usage: python get_tool_mtime.py path/to/file1.py path/to/file2.py
+#
+# Prints a GN scope with the variable name being the basename sans-extension
+# and the value being the file modification time. A variable is emitted for
+# each file argument on the command line.
+
+if __name__ == '__main__':
+  for f in sys.argv[1:]:
+    variable = os.path.splitext(os.path.basename(f))[0]
+    print '%s = %d' % (variable, os.path.getmtime(f))
diff --git a/build/toolchain/mac/linker_driver.py b/build/toolchain/mac/linker_driver.py
new file mode 100755
index 0000000..1422235
--- /dev/null
+++ b/build/toolchain/mac/linker_driver.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python
+# Copied from chromium build/toolchain/mac/.
+
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import os.path
+import shutil
+import subprocess
+import sys
+
+# The linker_driver.py is responsible for forwarding a linker invocation to
+# the compiler driver, while processing special arguments itself.
+#
+# Usage: linker_driver.py clang++ main.o -L. -llib -o prog -Wcrl,dsym,out
+#
+# On Mac, the logical step of linking is handled by three discrete tools to
+# perform the image link, debug info link, and strip. The linker_driver.py
+# combines these three steps into a single tool.
+#
+# The command passed to the linker_driver.py should be the compiler driver
+# invocation for the linker. It is first invoked unaltered (except for the
+# removal of the special driver arguments, described below). Then the driver
+# performs additional actions, based on these arguments:
+#
+#   -Wcrl,dsym,<dsym_path_prefix>
+#       After invoking the linker, this will run `dsymutil` on the linker's
+#       output, producing a dSYM bundle, stored at dsym_path_prefix. As an
+#       example, if the linker driver were invoked with:
+#         "... -o out/gn/obj/foo/libbar.dylib ... -Wcrl,dsym,out/gn ..."
+#       The resulting dSYM would be out/gn/libbar.dylib.dSYM/.
+#
+#   -Wcrl,unstripped,<unstripped_path_prefix>
+#       After invoking the linker, and before strip, this will save a copy of
+#       the unstripped linker output in the directory unstripped_path_prefix.
+#
+#   -Wcrl,strip,<strip_arguments>
+#       After invoking the linker, and optionally dsymutil, this will run
+#       the strip command on the linker's output. strip_arguments are
+#       comma-separated arguments to be passed to the strip command.
+
+def Main(args):
+  """Main function for the linker driver. Separates out the arguments for
+  the main compiler driver and the linker driver, then invokes all the
+  required tools.
+
+  Args:
+    args: list of string, Arguments to the script.
+  """
+
+  if len(args) < 2:
+    raise RuntimeError("Usage: linker_driver.py [linker-invocation]")
+
+  # Collect arguments to the linker driver (this script) and remove them from
+  # the arguments being passed to the compiler driver.
+  linker_driver_actions = {}
+  compiler_driver_args = []
+  for arg in args[1:]:
+    if arg.startswith(_LINKER_DRIVER_ARG_PREFIX):
+      # Convert driver actions into a map of name => lambda to invoke.
+      driver_action = ProcessLinkerDriverArg(arg)
+      assert driver_action[0] not in linker_driver_actions
+      linker_driver_actions[driver_action[0]] = driver_action[1]
+    else:
+      compiler_driver_args.append(arg)
+
+  linker_driver_outputs = [_FindLinkerOutput(compiler_driver_args)]
+
+  try:
+    # Run the linker by invoking the compiler driver.
+    subprocess.check_call(compiler_driver_args)
+
+    # Run the linker driver actions, in the order specified by the actions list.
+    for action in _LINKER_DRIVER_ACTIONS:
+      name = action[0]
+      if name in linker_driver_actions:
+        linker_driver_outputs += linker_driver_actions[name](args)
+  except:
+    # If a linker driver action failed, remove all the outputs to make the
+    # build step atomic.
+    map(_RemovePath, linker_driver_outputs)
+
+    # Re-report the original failure.
+    raise
+
+
+def ProcessLinkerDriverArg(arg):
+  """Processes a linker driver argument and returns a tuple containing the
+  name and unary lambda to invoke for that linker driver action.
+
+  Args:
+    arg: string, The linker driver argument.
+
+  Returns:
+    A 2-tuple:
+      0: The driver action name, as in _LINKER_DRIVER_ACTIONS.
+      1: An 1-ary lambda that takes the full list of arguments passed to
+         Main(). The lambda should call the linker driver action that
+         corresponds to the argument and return a list of outputs from the
+         action.
+  """
+  if not arg.startswith(_LINKER_DRIVER_ARG_PREFIX):
+    raise ValueError('%s is not a linker driver argument' % (arg,))
+
+  sub_arg = arg[len(_LINKER_DRIVER_ARG_PREFIX):]
+
+  for driver_action in _LINKER_DRIVER_ACTIONS:
+    (name, action) = driver_action
+    if sub_arg.startswith(name):
+      return (name,
+          lambda full_args: action(sub_arg[len(name):], full_args))
+
+  raise ValueError('Unknown linker driver argument: %s' % (arg,))
+
+
+def RunDsymUtil(dsym_path_prefix, full_args):
+  """Linker driver action for -Wcrl,dsym,<dsym-path-prefix>. Invokes dsymutil
+  on the linker's output and produces a dsym file at |dsym_file| path.
+
+  Args:
+    dsym_path_prefix: string, The path at which the dsymutil output should be
+        located.
+    full_args: list of string, Full argument list for the linker driver.
+
+  Returns:
+      list of string, Build step outputs.
+  """
+  if not len(dsym_path_prefix):
+    raise ValueError('Unspecified dSYM output file')
+
+  linker_out = _FindLinkerOutput(full_args)
+  base = os.path.basename(linker_out)
+  dsym_out = os.path.join(dsym_path_prefix, base + '.dSYM')
+
+  # Remove old dSYMs before invoking dsymutil.
+  _RemovePath(dsym_out)
+  subprocess.check_call(['xcrun', 'dsymutil', '-o', dsym_out, linker_out])
+  return [dsym_out]
+
+
+def RunSaveUnstripped(unstripped_path_prefix, full_args):
+  """Linker driver action for -Wcrl,unstripped,<unstripped_path_prefix>. Copies
+  the linker output to |unstripped_path_prefix| before stripping.
+
+  Args:
+    unstripped_path_prefix: string, The path at which the unstripped output
+        should be located.
+    full_args: list of string, Full argument list for the linker driver.
+
+  Returns:
+    list of string, Build step outputs.
+  """
+  if not len(unstripped_path_prefix):
+    raise ValueError('Unspecified unstripped output file')
+
+  linker_out = _FindLinkerOutput(full_args)
+  base = os.path.basename(linker_out)
+  unstripped_out = os.path.join(unstripped_path_prefix, base + '.unstripped')
+
+  shutil.copyfile(linker_out, unstripped_out)
+  return [unstripped_out]
+
+
+def RunStrip(strip_args_string, full_args):
+  """Linker driver action for -Wcrl,strip,<strip_arguments>.
+
+  Args:
+      strip_args_string: string, Comma-separated arguments for `strip`.
+      full_args: list of string, Full arguments for the linker driver.
+
+  Returns:
+      list of string, Build step outputs.
+  """
+  strip_command = ['xcrun', 'strip']
+  if len(strip_args_string) > 0:
+    strip_command += strip_args_string.split(',')
+  strip_command.append(_FindLinkerOutput(full_args))
+  subprocess.check_call(strip_command)
+  return []
+
+
+def _FindLinkerOutput(full_args):
+  """Finds the output of the linker by looking for the output flag in its
+  argument list. As this is a required linker argument, raises an error if it
+  cannot be found.
+  """
+  # The linker_driver.py script may be used to wrap either the compiler linker
+  # (uses -o to configure the output) or lipo (uses -output to configure the
+  # output). Since wrapping the compiler linker is the most likely possibility
+  # use try/except and fallback to checking for -output if -o is not found.
+  try:
+    output_flag_index = full_args.index('-o')
+  except ValueError:
+    output_flag_index = full_args.index('-output')
+  return full_args[output_flag_index + 1]
+
+
+def _RemovePath(path):
+  """Removes the file or directory at |path| if it exists."""
+  if os.path.exists(path):
+    if os.path.isdir(path):
+      shutil.rmtree(path)
+    else:
+      os.unlink(path)
+
+
+_LINKER_DRIVER_ARG_PREFIX = '-Wcrl,'
+
+"""List of linker driver actions. The sort order of this list affects the
+order in which the actions are invoked. The first item in the tuple is the
+argument's -Wcrl,<sub_argument> and the second is the function to invoke.
+"""
+_LINKER_DRIVER_ACTIONS = [
+    ('dsym,', RunDsymUtil),
+    ('unstripped,', RunSaveUnstripped),
+    ('strip,', RunStrip),
+]
+
+
+if __name__ == '__main__':
+  Main(sys.argv)
+  sys.exit(0)
diff --git a/build/toolchain/mac/setup_toolchain.py b/build/toolchain/mac/setup_toolchain.py
new file mode 100755
index 0000000..0133be6
--- /dev/null
+++ b/build/toolchain/mac/setup_toolchain.py
@@ -0,0 +1,31 @@
+# Copied from chromium build/toolchain/mac/.
+#
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import stat
+import sys
+
+def CopyTool(source_path):
+  """Copies the given tool to the current directory, including a warning not
+  to edit it."""
+  with open(source_path) as source_file:
+    tool_source = source_file.readlines()
+
+  # Add header and write it out to the current directory (which should be the
+  # root build dir).
+  out_path = 'gyp-mac-tool'
+  with open(out_path, 'w') as tool_file:
+    tool_file.write(''.join([tool_source[0],
+                             '# Generated by setup_toolchain.py do not edit.\n']
+                            + tool_source[1:]))
+  st = os.stat(out_path)
+  os.chmod(out_path, st.st_mode | stat.S_IEXEC)
+
+# Find the tool source, it's the first argument, and copy it.
+if len(sys.argv) != 2:
+  print "Need one argument (mac_tool source path)."
+  sys.exit(1)
+CopyTool(sys.argv[1])
diff --git a/build/toolchain/toolchain.gni b/build/toolchain/toolchain.gni
new file mode 100644
index 0000000..72cc7ad
--- /dev/null
+++ b/build/toolchain/toolchain.gni
@@ -0,0 +1,95 @@
+# Copied from chromium's build/toolchain and modified for goma.
+#
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Toolchain-related configuration that may be needed outside the context of the
+# toolchain() rules themselves.
+
+declare_args() {
+  # Enable Link Time Optimization in optimized builds (output programs run
+  # faster, but linking is up to 5-20x slower).
+  # Note: use target_os == "linux" rather than is_linux so that it does not
+  # apply to host_toolchain when target_os="android".
+  allow_posix_link_time_opt = target_os == "linux" && target_cpu == "x64"
+
+  # Set to true to use lld, the LLVM linker. This flag may be used on Windows
+  # with the shipped LLVM toolchain.
+  #
+  # It turned out gomacc linked with lld does not work on chromeos sandbox.
+  # So, don't make it enabled by default without confirming it works.
+  # See crbug.com/780045
+  use_lld = os == "win" && host_os != "win"
+
+  # If used with allow_posix_link_time_opt, it enables the experimental support
+  # of ThinLTO that links 3x-10x faster but (as of now) does not have all the
+  # important optimizations such us devirtualization implemented. See also
+  # http://blog.llvm.org/2016/06/thinlto-scalable-and-incremental-lto.html
+  use_thin_lto = false
+
+  # If this is set to true, or if LLVM_FORCE_HEAD_REVISION is set to 1
+  # in the environment, we use the revision in the llvm repo to determine
+  # the CLANG_REVISION to use, instead of the version hard-coded into
+  # //tools/clang/scripts/update.py. This should only be used in
+  # conjunction with setting LLVM_FORCE_HEAD_REVISION in the
+  # environment when `gclient runhooks` is run as well.
+  llvm_force_head_revision = false
+}
+
+declare_args() {
+  if (is_clang) {
+    # Clang compiler version. Clang files are placed at version-dependent paths.
+    clang_version = "6.0.0"
+  }
+}
+
+# Subdirectory within root_out_dir for shared library files.
+# TODO: GYP sets this to "lib" for Linux & Android, but this won't work
+#     in GN until support for loadable_module() is added.
+#     See: https://codereview.chromium.org/1236503002/
+shlib_subdir = "."
+
+# Root out dir for shared library files.
+root_shlib_dir = root_out_dir
+if (shlib_subdir != ".") {
+  root_shlib_dir += "/$shlib_subdir"
+}
+
+# Extension for shared library files (including leading dot).
+if (os == "mac") {
+  shlib_extension = ".dylib"
+} else if (is_posix) {
+  shlib_extension = ".so"
+} else if (os == "win") {
+  shlib_extension = ".dll"
+} else {
+  assert(false, "Platform not supported")
+}
+
+# Prefix for shared library files.
+if (is_posix) {
+  shlib_prefix = "lib"
+} else {
+  shlib_prefix = ""
+}
+
+# While other "tool"s in a toolchain are specific to the target of that
+# toolchain, the "stamp" and "copy" tools are really generic to the host;
+# but each toolchain must define them separately.  GN doesn't allow a
+# template instantiation inside a toolchain definition, so some boilerplate
+# has to be repeated in each toolchain to define these two tools.  These
+# four variables reduce the duplication in that boilerplate.
+stamp_description = "STAMP {{output}}"
+copy_description = "COPY {{source}} {{output}}"
+if (host_os == "win") {
+  _tool_wrapper_path =
+      rebase_path("//build/toolchain/win/tool_wrapper.py", root_build_dir)
+
+  stamp_command = "$python_path $_tool_wrapper_path stamp {{output}}"
+  copy_command =
+      "$python_path $_tool_wrapper_path recursive-mirror {{source}} {{output}}"
+} else {
+  stamp_command = "touch {{output}}"
+  copy_command = "ln -f {{source}} {{output}} 2>/dev/null || (rm -rf {{output}} && cp -af {{source}} {{output}})"
+}
diff --git a/build/toolchain/win/BUILD.gn b/build/toolchain/win/BUILD.gn
new file mode 100644
index 0000000..abb13d7
--- /dev/null
+++ b/build/toolchain/win/BUILD.gn
@@ -0,0 +1,423 @@
+# Copied from chromium's build/toolchain/win/ and modified for goma.
+#
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/config/clang/clang.gni")
+import("//build/config/win/visual_studio_version.gni")
+import("//build/toolchain/goma.gni")
+
+# Should only be running on Windows.
+assert(os == "win")
+
+symbol_level = 2
+if (use_goma && !is_clang) {
+  symbol_level = 1
+}
+
+# Setup the Visual Studio state.
+#
+# Its arguments are the VS path and the compiler wrapper tool. It will write
+# "environment.x86" and "environment.x64" to the build directory and return a
+# list to us.
+
+# This tool will is used as a wrapper for various commands below.
+tool_wrapper_path = rebase_path("tool_wrapper.py", root_build_dir)
+
+if (use_goma) {
+  goma_prefix = "$goma_dir/gomacc.exe "
+} else {
+  goma_prefix = ""
+}
+
+# Copy the VS runtime DLL for the default toolchain to the root build directory
+# so things will run.
+if (current_toolchain == default_toolchain) {
+  if (is_debug) {
+    configuration_name = "Debug"
+  } else {
+    configuration_name = "Release"
+  }
+  exec_script("../../vs_toolchain.py",
+              [
+                "copy_dlls",
+                rebase_path(root_build_dir),
+                configuration_name,
+                target_cpu,
+              ])
+}
+
+# Parameters:
+#   environment: File name of environment file.
+#
+# You would also define a toolchain_args variable with at least these set:
+#   current_cpu: current_cpu to pass as a build arg
+#   current_os: current_os to pass as a build arg
+template("msvc_toolchain") {
+  toolchain(target_name) {
+    # When invoking this toolchain not as the default one, these args will be
+    # passed to the build. They are ignored when this is the default toolchain.
+    assert(defined(invoker.toolchain_args))
+    toolchain_args = {
+      if (defined(invoker.toolchain_args)) {
+        forward_variables_from(invoker.toolchain_args, "*")
+      }
+
+      # This value needs to be passed through unchanged.
+      #host_toolchain = host_toolchain
+
+      current_os = "win"
+    }
+
+    # Make these apply to all tools below.
+    lib_switch = ""
+    lib_dir_switch = "/LIBPATH:"
+
+    # Object files go in this directory.
+    object_subdir = "{{target_out_dir}}/{{label_name}}"
+
+    env = invoker.environment
+
+    # When the invoker has explicitly overridden use_goma or cc_wrapper in the
+    # toolchain args, use those values, otherwise default to the global one.
+    # This works because the only reasonable override that toolchains might
+    # supply for these values are to force-disable them.
+    if (defined(toolchain_args.is_clang)) {
+      toolchain_uses_clang = toolchain_args.is_clang
+    } else {
+      toolchain_uses_clang = is_clang
+    }
+
+    if (toolchain_uses_clang && host_os != "win") {
+      # This toolchain definition uses response files for compilations.  GN uses
+      # the quoting rules of the host OS, while clang-cl always defaults to
+      # cmd.exe quoting rules for parsing response files.  Tell clang-cl to use
+      # POSIX quoting rules, so it can understand what GN generates.
+      cl = "${invoker.cl} --rsp-quoting=posix"
+    } else {
+      cl = invoker.cl
+    }
+
+    if (use_lld) {
+      if (host_os == "win") {
+        lld_link = "lld-link.exe"
+      } else {
+        lld_link = "lld-link"
+      }
+      prefix = rebase_path("$clang_base_path/bin", root_build_dir)
+
+      # lld-link includes a replacement for lib.exe that can produce thin
+      # archives and understands bitcode (for lto builds).
+      lib = "$prefix/$lld_link /lib /llvmlibthin"
+      link = "$prefix/$lld_link"
+    } else {
+      lib = "lib.exe"
+      link = "link.exe"
+    }
+
+    # If possible, pass system includes as flags to the compiler.  When that's
+    # not possible, load a full environment file (containing %INCLUDE% and
+    # %PATH%) -- e.g. 32-bit MSVS builds require %PATH% to be set and just
+    # passing in a list of include directories isn't enough.
+    if (defined(invoker.sys_include_flags)) {
+      env_wrapper = ""
+      sys_include_flags = "${invoker.sys_include_flags} "  # Note trailing space.
+    } else {
+      # clang-cl doesn't need this env hoop, so omit it there.
+      assert(!toolchain_uses_clang)
+      env_wrapper = "ninja -t msvc -e $env -- "  # Note trailing space.
+      sys_include_flags = ""
+    }
+
+    tool("cc") {
+      rspfile = "{{output}}.rsp"
+      precompiled_header_type = "msvc"
+      pdbname = "{{target_out_dir}}/{{label_name}}_c.pdb"
+
+      # Label names may have spaces in them so the pdbname must be quoted. The
+      # source and output don't need to be quoted because GN knows they're a
+      # full file name and will quote automatically when necessary.
+      command = "$env_wrapper$cl /nologo /showIncludes /FC @$rspfile /c {{source}} /Fo{{output}} /Fd\"$pdbname\""
+      depsformat = "msvc"
+      description = "CC {{output}}"
+      outputs = [
+        "$object_subdir/{{source_name_part}}.obj",
+      ]
+      rspfile_content = "$sys_include_flags{{defines}} {{include_dirs}} {{cflags}} {{cflags_c}}"
+    }
+
+    tool("cxx") {
+      rspfile = "{{output}}.rsp"
+      precompiled_header_type = "msvc"
+
+      # The PDB name needs to be different between C and C++ compiled files.
+      pdbname = "{{target_out_dir}}/{{label_name}}_cc.pdb"
+
+      # See comment in CC tool about quoting.
+      command = "$env_wrapper$cl /nologo /showIncludes /FC @$rspfile /c {{source}} /Fo{{output}} /Fd\"$pdbname\""
+      depsformat = "msvc"
+      description = "CXX {{output}}"
+      outputs = [
+        "$object_subdir/{{source_name_part}}.obj",
+      ]
+      rspfile_content = "$sys_include_flags{{defines}} {{include_dirs}} {{cflags}} {{cflags_cc}}"
+    }
+
+    tool("rc") {
+      command = "$python_path $tool_wrapper_path rc-wrapper $env rc.exe {{defines}} {{include_dirs}} /fo{{output}} {{source}}"
+      outputs = [
+        "$object_subdir/{{source_name_part}}.res",
+      ]
+      description = "RC {{output}}"
+    }
+
+    tool("asm") {
+      if (toolchain_args.current_cpu == "x64") {
+        ml = "ml64.exe"
+      } else {
+        ml = "ml.exe"
+      }
+      command = "$python_path $tool_wrapper_path asm-wrapper $env $ml {{defines}} {{include_dirs}} {{asmflags}} /c /Fo{{output}} {{source}}"
+      description = "ASM {{output}}"
+      outputs = [
+        "$object_subdir/{{source_name_part}}.obj",
+      ]
+    }
+
+    tool("alink") {
+      rspfile = "{{output}}.rsp"
+      command = "$python_path $tool_wrapper_path link-wrapper $env False $lib /nologo /OUT:{{output}} @$rspfile"
+      description = "LIB {{output}}"
+      outputs = [
+        # Ignore {{output_extension}} and always use .lib, there's no reason to
+        # allow targets to override this extension on Windows.
+        "{{output_dir}}/{{target_output_name}}.lib",
+      ]
+      default_output_extension = ".lib"
+      default_output_dir = "{{target_out_dir}}"
+
+      # The use of inputs_newline is to work around a fixed per-line buffer
+      # size in the linker.
+      rspfile_content = "{{inputs_newline}} {{arflags}}"
+    }
+
+    tool("solink") {
+      dllname = "{{output_dir}}/{{target_output_name}}{{output_extension}}"  # e.g. foo.dll
+      libname = "${dllname}.lib"  # e.g. foo.dll.lib
+      pdbname = "${dllname}.pdb"
+      rspfile = "${dllname}.rsp"
+      pool = "//build/toolchain:link_pool($default_toolchain)"
+
+      command = "$python_path $tool_wrapper_path link-wrapper $env False $link /nologo /IMPLIB:$libname /DLL /OUT:$dllname /PDB:$pdbname @$rspfile"
+
+      default_output_extension = ".dll"
+      default_output_dir = "{{root_out_dir}}"
+      description = "LINK(DLL) {{output}}"
+      outputs = [
+        dllname,
+        libname,
+      ]
+      link_output = libname
+      depend_output = libname
+      runtime_outputs = [ dllname ]
+      if (symbol_level != 0) {
+        outputs += [ pdbname ]
+        runtime_outputs += [ pdbname ]
+      }
+
+      # Since the above commands only updates the .lib file when it changes, ask
+      # Ninja to check if the timestamp actually changed to know if downstream
+      # dependencies should be recompiled.
+      restat = true
+
+      # The use of inputs_newline is to work around a fixed per-line buffer
+      # size in the linker.
+      rspfile_content = "{{libs}} {{solibs}} {{inputs_newline}} {{ldflags}}"
+    }
+
+    tool("solink_module") {
+      dllname = "{{output_dir}}/{{target_output_name}}{{output_extension}}"  # e.g. foo.dll
+      pdbname = "${dllname}.pdb"
+      rspfile = "${dllname}.rsp"
+      pool = "//build/toolchain:link_pool($default_toolchain)"
+
+      command = "$python_path $tool_wrapper_path link-wrapper $env False $link /nologo /DLL /OUT:$dllname /PDB:$pdbname @$rspfile"
+
+      default_output_extension = ".dll"
+      default_output_dir = "{{root_out_dir}}"
+      description = "LINK_MODULE(DLL) {{output}}"
+      outputs = [
+        dllname,
+      ]
+      if (symbol_level != 0) {
+        outputs += [ pdbname ]
+      }
+      runtime_outputs = outputs
+
+      # The use of inputs_newline is to work around a fixed per-line buffer
+      # size in the linker.
+      rspfile_content = "{{libs}} {{solibs}} {{inputs_newline}} {{ldflags}}"
+    }
+
+    tool("link") {
+      exename = "{{output_dir}}/{{target_output_name}}{{output_extension}}"
+      pdbname = "$exename.pdb"
+      rspfile = "$exename.rsp"
+      pool = "//build/toolchain:link_pool($default_toolchain)"
+
+      command = "$python_path $tool_wrapper_path link-wrapper $env False $link /nologo /OUT:$exename /PDB:$pdbname @$rspfile"
+
+      default_output_extension = ".exe"
+      default_output_dir = "{{root_out_dir}}"
+      description = "LINK {{output}}"
+      outputs = [
+        exename,
+      ]
+      if (symbol_level != 0) {
+        outputs += [ pdbname ]
+      }
+      runtime_outputs = outputs
+
+      # The use of inputs_newline is to work around a fixed per-line buffer
+      # size in the linker.
+      rspfile_content = "{{inputs_newline}} {{libs}} {{solibs}} {{ldflags}}"
+    }
+
+    # These two are really entirely generic, but have to be repeated in
+    # each toolchain because GN doesn't allow a template to be used here.
+    # See //build/toolchain/toolchain.gni for details.
+    tool("stamp") {
+      command = stamp_command
+      description = stamp_description
+    }
+    tool("copy") {
+      command = copy_command
+      description = copy_description
+    }
+  }
+}
+
+if (is_clang) {
+  sys_include_prefix = "-imsvc"
+} else {
+  # MSVC doesn't have the concept of system headers.
+  sys_include_prefix = "/I"
+}
+
+if (host_os == "win") {
+  clang_cl = "clang-cl.exe"
+} else {
+  clang_cl = "clang-cl"
+}
+
+# 32-bit toolchains. Only define these when the target architecture is 32-bit
+# since we don't do any 32-bit cross compiles when targeting 64-bit (the
+# build does generate some 64-bit stuff from 32-bit target builds).
+if (target_cpu == "x86") {
+  x86_toolchain_data = exec_script("setup_toolchain.py",
+                                   [
+                                     visual_studio_path,
+                                     windows_sdk_path,
+                                     visual_studio_runtime_dirs,
+                                     "x86",
+                                     "${sys_include_prefix}",
+                                   ],
+                                   "scope")
+
+  msvc_toolchain("x86") {
+    environment = "environment.x86"
+    cl = "${goma_prefix}\"${x86_toolchain_data.vc_bin_dir}/cl.exe\""
+    toolchain_args = {
+      current_cpu = "x86"
+      is_clang = false
+    }
+  }
+
+  msvc_toolchain("clang_x86") {
+    environment = "environment.x86"
+    prefix = rebase_path("$clang_base_path/bin", root_build_dir)
+    cl = "${goma_prefix}$prefix/${clang_cl}"
+    sys_include_flags = "${x86_toolchain_data.include_flags}"
+
+    toolchain_args = {
+      current_cpu = "x86"
+      is_clang = true
+    }
+  }
+}
+
+# 64-bit toolchains.
+x64_toolchain_data = exec_script("setup_toolchain.py",
+                                 [
+                                   visual_studio_path,
+                                   windows_sdk_path,
+                                   visual_studio_runtime_dirs,
+                                   "x64",
+                                   "${sys_include_prefix}",
+                                 ],
+                                 "scope")
+
+template("win_x64_toolchains") {
+  msvc_toolchain(target_name) {
+    environment = "environment.x64"
+    cl = "${goma_prefix}\"${x64_toolchain_data.vc_bin_dir}/cl.exe\""
+
+    toolchain_args = {
+      if (defined(invoker.toolchain_args)) {
+        forward_variables_from(invoker.toolchain_args, "*")
+      }
+      is_clang = false
+      current_cpu = "x64"
+    }
+  }
+
+  msvc_toolchain("clang_" + target_name) {
+    environment = "environment.x64"
+    prefix = rebase_path("$clang_base_path/bin", root_build_dir)
+    cl = "${goma_prefix}$prefix/${clang_cl}"
+    sys_include_flags = "${x64_toolchain_data.include_flags}"
+
+    toolchain_args = {
+      if (defined(invoker.toolchain_args)) {
+        forward_variables_from(invoker.toolchain_args, "*")
+      }
+      is_clang = true
+      current_cpu = "x64"
+    }
+  }
+}
+
+win_x64_toolchains("x64") {
+  toolchain_args = {
+    # Use the defaults.
+  }
+}
+
+# WinRT toolchains. Only define these when targeting them.
+#
+# NOTE: This is currently broken because it references vc_bin_dir. brettw@
+# changed this around a bit, and I don't know what this should be set to
+# in terms of what setup_toolchain returns for a certain CPU architecture.
+if (target_os == "winrt_81" || target_os == "winrt_81_phone" ||
+    target_os == "winrt_10") {
+  msvc_toolchain("winrt_x86") {
+    environment = "environment.winrt_x86"
+    cl = "${goma_prefix}\"${vc_bin_dir}/cl.exe\""
+
+    toolchain_args = {
+      is_clang = false
+      current_cpu = "x86"
+    }
+  }
+
+  msvc_toolchain("winrt_x64") {
+    environment = "environment.winrt_x64"
+    cl = "${goma_prefix}\"${vc_bin_dir}/cl.exe\""
+
+    toolchain_args = {
+      is_clang = false
+      current_cpu = "x64"
+    }
+  }
+}
diff --git a/build/toolchain/win/setup_toolchain.py b/build/toolchain/win/setup_toolchain.py
new file mode 100644
index 0000000..c9774d0
--- /dev/null
+++ b/build/toolchain/win/setup_toolchain.py
@@ -0,0 +1,204 @@
+# Copied from chromium build/toolchain/win/.
+#
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+#
+# Copies the given "win tool" (which the toolchain uses to wrap compiler
+# invocations) and the environment blocks for the 32-bit and 64-bit builds on
+# Windows to the build directory.
+#
+# The arguments are the visual studio install location and the location of the
+# win tool. The script assumes that the root build directory is the current dir
+# and the files will be written to the current directory.
+
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
+import gn_helpers
+
+SCRIPT_DIR = os.path.dirname(__file__)
+
+def _ExtractImportantEnvironment(output_of_set):
+  """Extracts environment variables required for the toolchain to run from
+  a textual dump output by the cmd.exe 'set' command."""
+  envvars_to_save = (
+      'goma_.*', # TODO: This is ugly, but needed for goma.
+      'include',
+      'lib',
+      'libpath',
+      'path',
+      'pathext',
+      'systemroot',
+      'temp',
+      'tmp',
+      )
+  env = {}
+  # This occasionally happens and leads to misleading SYSTEMROOT error messages
+  # if not caught here.
+  if output_of_set.count('=') == 0:
+    raise Exception('Invalid output_of_set. Value is:\n%s' % output_of_set)
+  for line in output_of_set.splitlines():
+    for envvar in envvars_to_save:
+      if re.match(envvar + '=', line.lower()):
+        var, setting = line.split('=', 1)
+        if envvar == 'path':
+          # Our own rules and actions in Chromium rely on python being in the
+          # path. Add the path to this python here so that if it's not in the
+          # path when ninja is run later, python will still be found.
+          setting = os.path.dirname(sys.executable) + os.pathsep + setting
+        env[var.upper()] = setting
+        break
+  if sys.platform in ('win32', 'cygwin'):
+    for required in ('SYSTEMROOT', 'TEMP', 'TMP'):
+      if required not in env:
+        raise Exception('Environment variable "%s" '
+                        'required to be set to valid path' % required)
+  return env
+
+
+def _DetectVisualStudioPath():
+  """Return path to the GYP_MSVS_VERSION of Visual Studio.
+  """
+
+  # Use the code in build/vs_toolchain.py to avoid duplicating code.
+  chromium_dir = os.path.abspath(os.path.join(SCRIPT_DIR, '..', '..', '..'))
+  sys.path.append(os.path.join(chromium_dir, 'build'))
+  import vs_toolchain
+  return vs_toolchain.DetectVisualStudioPath()
+
+
+def _LoadEnvFromBat(args):
+  """Given a bat command, runs it and returns env vars set by it."""
+  args = args[:]
+  args.extend(('&&', 'set'))
+  popen = subprocess.Popen(
+      args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+  variables, _ = popen.communicate()
+  if popen.returncode != 0:
+    raise Exception('"%s" failed with error %d' % (args, popen.returncode))
+  return variables
+
+
+def _LoadToolchainEnv(cpu, sdk_dir):
+  """Returns a dictionary with environment variables that must be set while
+  running binaries from the toolchain (e.g. INCLUDE and PATH for cl.exe)."""
+  # Check if we are running in the SDK command line environment and use
+  # the setup script from the SDK if so. |cpu| should be either
+  # 'x86' or 'x64'.
+  assert cpu in ('x86', 'x64')
+  if bool(int(os.environ.get('DEPOT_TOOLS_WIN_TOOLCHAIN', 1))) and sdk_dir:
+    # Load environment from json file.
+    env = os.path.normpath(os.path.join(sdk_dir, 'bin/SetEnv.%s.json' % cpu))
+    env = json.load(open(env))['env']
+    for k in env:
+      entries = [os.path.join(*([os.path.join(sdk_dir, 'bin')] + e))
+                 for e in env[k]]
+      # clang-cl wants INCLUDE to be ;-separated even on non-Windows,
+      # lld-link wants LIB to be ;-separated even on non-Windows.  Path gets :.
+      # The separator for INCLUDE here must match the one used in main() below.
+      sep = os.pathsep if k == 'PATH' else ';'
+      env[k] = sep.join(entries)
+    # PATH is a bit of a special case, it's in addition to the current PATH.
+    env['PATH'] = env['PATH'] + os.pathsep + os.environ['PATH']
+    # Augment with the current env to pick up TEMP and friends.
+    for k in os.environ:
+      if k not in env:
+        env[k] = os.environ[k]
+
+    varlines = []
+    for k in sorted(env.keys()):
+      varlines.append('%s=%s' % (str(k), str(env[k])))
+    variables = '\n'.join(varlines)
+
+    # Check that the json file contained the same environment as the .cmd file.
+    if sys.platform in ('win32', 'cygwin'):
+      script = os.path.normpath(os.path.join(sdk_dir, 'Bin/SetEnv.cmd'))
+      assert _ExtractImportantEnvironment(variables) == \
+             _ExtractImportantEnvironment(_LoadEnvFromBat([script, '/' + cpu]))
+  else:
+    if 'GYP_MSVS_OVERRIDE_PATH' not in os.environ:
+      os.environ['GYP_MSVS_OVERRIDE_PATH'] = _DetectVisualStudioPath()
+    # We only support x64-hosted tools.
+    script_path = os.path.normpath(os.path.join(
+                                       os.environ['GYP_MSVS_OVERRIDE_PATH'],
+                                       'VC/vcvarsall.bat'))
+    if not os.path.exists(script_path):
+      raise Exception('%s is missing - make sure VC++ tools are installed.' %
+                      script_path)
+    args = [script_path, 'amd64_x86' if cpu == 'x86' else 'amd64']
+    variables = _LoadEnvFromBat(args)
+  return _ExtractImportantEnvironment(variables)
+
+
+def _FormatAsEnvironmentBlock(envvar_dict):
+  """Format as an 'environment block' directly suitable for CreateProcess.
+  Briefly this is a list of key=value\0, terminated by an additional \0. See
+  CreateProcess documentation for more details."""
+  block = ''
+  nul = '\0'
+  for key, value in envvar_dict.iteritems():
+    block += key + '=' + value + nul
+  block += nul
+  return block
+
+
+def main():
+  if len(sys.argv) != 6:
+    print('Usage setup_toolchain.py '
+          '<visual studio path> <win sdk path> '
+          '<runtime dirs> <target_cpu> <include prefix>')
+    sys.exit(2)
+  win_sdk_path = sys.argv[2]
+  runtime_dirs = sys.argv[3]
+  target_cpu = sys.argv[4]
+  include_prefix = sys.argv[5]
+
+  cpus = ('x86', 'x64')
+  assert target_cpu in cpus
+  vc_bin_dir = ''
+  include = ''
+
+  # TODO: Do we need an equivalent of
+  # ninja_use_custom_environment_files?
+
+  for cpu in cpus:
+    # Extract environment variables for subprocesses.
+    env = _LoadToolchainEnv(cpu, win_sdk_path)
+    env['PATH'] = runtime_dirs + os.pathsep + env['PATH']
+
+    if cpu == target_cpu:
+      for path in env['PATH'].split(os.pathsep):
+        if os.path.exists(os.path.join(path, 'cl.exe')):
+          vc_bin_dir = os.path.realpath(path)
+          break
+      # The separator for INCLUDE here must match the one used in
+      # _LoadToolchainEnv() above.
+      include = [include_prefix + p for p in env['INCLUDE'].split(';') if p]
+      include = ' '.join(['"' + i.replace('"', r'\"') + '"' for i in include])
+
+    env_block = _FormatAsEnvironmentBlock(env)
+    with open('environment.' + cpu, 'wb') as f:
+      f.write(env_block)
+
+    # Create a store app version of the environment.
+    if 'LIB' in env:
+      env['LIB']     = env['LIB']    .replace(r'\VC\LIB', r'\VC\LIB\STORE')
+    if 'LIBPATH' in env:
+      env['LIBPATH'] = env['LIBPATH'].replace(r'\VC\LIB', r'\VC\LIB\STORE')
+    env_block = _FormatAsEnvironmentBlock(env)
+    with open('environment.winrt_' + cpu, 'wb') as f:
+      f.write(env_block)
+
+  assert vc_bin_dir
+  print 'vc_bin_dir = ' + gn_helpers.ToGNString(vc_bin_dir)
+  assert include
+  print 'include_flags = ' + gn_helpers.ToGNString(include)
+
+if __name__ == '__main__':
+  main()
diff --git a/build/toolchain/win/tool_wrapper.py b/build/toolchain/win/tool_wrapper.py
new file mode 100644
index 0000000..a6ae0cf
--- /dev/null
+++ b/build/toolchain/win/tool_wrapper.py
@@ -0,0 +1,325 @@
+# Copied from chromium's build/toolchain/win/.
+#
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Utility functions for Windows builds.
+
+This file is copied to the build directory as part of toolchain setup and
+is used to set up calls to tools used by the build that need wrappers.
+"""
+
+import os
+import re
+import shutil
+import subprocess
+import stat
+import string
+import sys
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+
+# A regex matching an argument corresponding to the output filename passed to
+# link.exe.
+_LINK_EXE_OUT_ARG = re.compile('/OUT:(?P<out>.+)$', re.IGNORECASE)
+
+def main(args):
+  exit_code = WinTool().Dispatch(args)
+  if exit_code is not None:
+    sys.exit(exit_code)
+
+
+class WinTool(object):
+  """This class performs all the Windows tooling steps. The methods can either
+  be executed directly, or dispatched from an argument list."""
+
+  def _UseSeparateMspdbsrv(self, env, args):
+    """Allows to use a unique instance of mspdbsrv.exe per linker instead of a
+    shared one."""
+    if len(args) < 1:
+      raise Exception("Not enough arguments")
+
+    if args[0] != 'link.exe':
+      return
+
+    # Use the output filename passed to the linker to generate an endpoint name
+    # for mspdbsrv.exe.
+    endpoint_name = None
+    for arg in args:
+      m = _LINK_EXE_OUT_ARG.match(arg)
+      if m:
+        endpoint_name = re.sub(r'\W+', '',
+            '%s_%d' % (m.group('out'), os.getpid()))
+        break
+
+    if endpoint_name is None:
+      return
+
+    # Adds the appropriate environment variable. This will be read by link.exe
+    # to know which instance of mspdbsrv.exe it should connect to (if it's
+    # not set then the default endpoint is used).
+    env['_MSPDBSRV_ENDPOINT_'] = endpoint_name
+
+  def Dispatch(self, args):
+    """Dispatches a string command to a method."""
+    if len(args) < 1:
+      raise Exception("Not enough arguments")
+
+    method = "Exec%s" % self._CommandifyName(args[0])
+    return getattr(self, method)(*args[1:])
+
+  def _CommandifyName(self, name_string):
+    """Transforms a tool name like recursive-mirror to RecursiveMirror."""
+    return name_string.title().replace('-', '')
+
+  def _GetEnv(self, arch):
+    """Gets the saved environment from a file for a given architecture."""
+    # The environment is saved as an "environment block" (see CreateProcess
+    # and msvs_emulation for details). We convert to a dict here.
+    # Drop last 2 NULs, one for list terminator, one for trailing vs. separator.
+    pairs = open(arch).read()[:-2].split('\0')
+    kvs = [item.split('=', 1) for item in pairs]
+    return dict(kvs)
+
+  def ExecStamp(self, path):
+    """Simple stamp command."""
+    open(path, 'w').close()
+
+  def ExecRecursiveMirror(self, source, dest):
+    """Emulation of rm -rf out && cp -af in out."""
+    if os.path.exists(dest):
+      if os.path.isdir(dest):
+        def _on_error(fn, path, dummy_excinfo):
+          # The operation failed, possibly because the file is set to
+          # read-only. If that's why, make it writable and try the op again.
+          if not os.access(path, os.W_OK):
+            os.chmod(path, stat.S_IWRITE)
+          fn(path)
+        shutil.rmtree(dest, onerror=_on_error)
+      else:
+        if not os.access(dest, os.W_OK):
+          # Attempt to make the file writable before deleting it.
+          os.chmod(dest, stat.S_IWRITE)
+        os.unlink(dest)
+
+    if os.path.isdir(source):
+      shutil.copytree(source, dest)
+    else:
+      shutil.copy2(source, dest)
+
+  def ExecLinkWrapper(self, arch, use_separate_mspdbsrv, *args):
+    """Filter diagnostic output from link that looks like:
+    '   Creating library ui.dll.lib and object ui.dll.exp'
+    This happens when there are exports from the dll or exe.
+    """
+    env = self._GetEnv(arch)
+    if use_separate_mspdbsrv == 'True':
+      self._UseSeparateMspdbsrv(env, args)
+    if sys.platform == 'win32':
+      args = list(args)  # *args is a tuple by default, which is read-only.
+      args[0] = args[0].replace('/', '\\')
+    # https://docs.python.org/2/library/subprocess.html:
+    # "On Unix with shell=True [...] if args is a sequence, the first item
+    # specifies the command string, and any additional items will be treated as
+    # additional arguments to the shell itself.  That is to say, Popen does the
+    # equivalent of:
+    #   Popen(['/bin/sh', '-c', args[0], args[1], ...])"
+    # For that reason, since going through the shell doesn't seem necessary on
+    # non-Windows don't do that there.
+    link = subprocess.Popen(args, shell=sys.platform == 'win32', env=env,
+                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    out, _ = link.communicate()
+    for line in out.splitlines():
+      if (not line.startswith('   Creating library ') and
+          not line.startswith('Generating code') and
+          not line.startswith('Finished generating code')):
+        print line
+    return link.returncode
+
+  def ExecLinkWithManifests(self, arch, embed_manifest, out, ldcmd, resname,
+                            mt, rc, intermediate_manifest, *manifests):
+    """A wrapper for handling creating a manifest resource and then executing
+    a link command."""
+    # The 'normal' way to do manifests is to have link generate a manifest
+    # based on gathering dependencies from the object files, then merge that
+    # manifest with other manifests supplied as sources, convert the merged
+    # manifest to a resource, and then *relink*, including the compiled
+    # version of the manifest resource. This breaks incremental linking, and
+    # is generally overly complicated. Instead, we merge all the manifests
+    # provided (along with one that includes what would normally be in the
+    # linker-generated one, see msvs_emulation.py), and include that into the
+    # first and only link. We still tell link to generate a manifest, but we
+    # only use that to assert that our simpler process did not miss anything.
+    variables = {
+      'python': sys.executable,
+      'arch': arch,
+      'out': out,
+      'ldcmd': ldcmd,
+      'resname': resname,
+      'mt': mt,
+      'rc': rc,
+      'intermediate_manifest': intermediate_manifest,
+      'manifests': ' '.join(manifests),
+    }
+    add_to_ld = ''
+    if manifests:
+      subprocess.check_call(
+          '%(python)s tool_wrapper.py manifest-wrapper %(arch)s %(mt)s -nologo '
+          '-manifest %(manifests)s -out:%(out)s.manifest' % variables)
+      if embed_manifest == 'True':
+        subprocess.check_call(
+            '%(python)s tool_wrapper.py manifest-to-rc %(arch)s'
+                '%(out)s.manifest %(out)s.manifest.rc %(resname)s' % variables)
+        subprocess.check_call(
+            '%(python)s tool_wrapper.py rc-wrapper %(arch)s %(rc)s '
+            '%(out)s.manifest.rc' % variables)
+        add_to_ld = ' %(out)s.manifest.res' % variables
+    subprocess.check_call(ldcmd + add_to_ld)
+
+    # Run mt.exe on the theoretically complete manifest we generated, merging
+    # it with the one the linker generated to confirm that the linker
+    # generated one does not add anything. This is strictly unnecessary for
+    # correctness, it's only to verify that e.g. /MANIFESTDEPENDENCY was not
+    # used in a #pragma comment.
+    if manifests:
+      # Merge the intermediate one with ours to .assert.manifest, then check
+      # that .assert.manifest is identical to ours.
+      subprocess.check_call(
+          '%(python)s tool_wrapper.py manifest-wrapper %(arch)s %(mt)s -nologo '
+          '-manifest %(out)s.manifest %(intermediate_manifest)s '
+          '-out:%(out)s.assert.manifest' % variables)
+      assert_manifest = '%(out)s.assert.manifest' % variables
+      our_manifest = '%(out)s.manifest' % variables
+      # Load and normalize the manifests. mt.exe sometimes removes whitespace,
+      # and sometimes doesn't unfortunately.
+      with open(our_manifest, 'rb') as our_f:
+        with open(assert_manifest, 'rb') as assert_f:
+          our_data = our_f.read().translate(None, string.whitespace)
+          assert_data = assert_f.read().translate(None, string.whitespace)
+      if our_data != assert_data:
+        os.unlink(out)
+        def dump(filename):
+          sys.stderr.write('%s\n-----\n' % filename)
+          with open(filename, 'rb') as f:
+            sys.stderr.write(f.read() + '\n-----\n')
+        dump(intermediate_manifest)
+        dump(our_manifest)
+        dump(assert_manifest)
+        sys.stderr.write(
+            'Linker generated manifest "%s" added to final manifest "%s" '
+            '(result in "%s"). '
+            'Were /MANIFEST switches used in #pragma statements? ' % (
+              intermediate_manifest, our_manifest, assert_manifest))
+        return 1
+
+  def ExecManifestWrapper(self, arch, *args):
+    """Run manifest tool with environment set. Strip out undesirable warning
+    (some XML blocks are recognized by the OS loader, but not the manifest
+    tool)."""
+    env = self._GetEnv(arch)
+    popen = subprocess.Popen(args, shell=True, env=env,
+                             stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    out, _ = popen.communicate()
+    for line in out.splitlines():
+      if line and 'manifest authoring warning 81010002' not in line:
+        print line
+    return popen.returncode
+
+  def ExecManifestToRc(self, dummy_arch, *args):
+    """Creates a resource file pointing a SxS assembly manifest.
+    |args| is tuple containing path to resource file, path to manifest file
+    and resource name which can be "1" (for executables) or "2" (for DLLs)."""
+    manifest_path, resource_path, resource_name = args
+    with open(resource_path, 'wb') as output:
+      output.write('#include <windows.h>\n%s RT_MANIFEST "%s"' % (
+        resource_name,
+        os.path.abspath(manifest_path).replace('\\', '/')))
+
+  def ExecMidlWrapper(self, arch, outdir, tlb, h, dlldata, iid, proxy, idl,
+                      *flags):
+    """Filter noisy filenames output from MIDL compile step that isn't
+    quietable via command line flags.
+    """
+    args = ['midl', '/nologo'] + list(flags) + [
+        '/out', outdir,
+        '/tlb', tlb,
+        '/h', h,
+        '/dlldata', dlldata,
+        '/iid', iid,
+        '/proxy', proxy,
+        idl]
+    env = self._GetEnv(arch)
+    popen = subprocess.Popen(args, shell=True, env=env,
+                             stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    out, _ = popen.communicate()
+    # Filter junk out of stdout, and write filtered versions. Output we want
+    # to filter is pairs of lines that look like this:
+    # Processing C:\Program Files (x86)\Microsoft SDKs\...\include\objidl.idl
+    # objidl.idl
+    lines = out.splitlines()
+    prefixes = ('Processing ', '64 bit Processing ')
+    processing = set(os.path.basename(x)
+                     for x in lines if x.startswith(prefixes))
+    for line in lines:
+      if not line.startswith(prefixes) and line not in processing:
+        print line
+    return popen.returncode
+
+  def ExecAsmWrapper(self, arch, *args):
+    """Filter logo banner from invocations of asm.exe."""
+    env = self._GetEnv(arch)
+    popen = subprocess.Popen(args, shell=True, env=env,
+                             stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    out, _ = popen.communicate()
+    for line in out.splitlines():
+      # Split to avoid triggering license checks:
+      if (not line.startswith('Copy' + 'right (C' +
+                              ') Microsoft Corporation') and
+          not line.startswith('Microsoft (R) Macro Assembler') and
+          not line.startswith(' Assembling: ') and
+          line):
+        print line
+    return popen.returncode
+
+  def ExecRcWrapper(self, arch, *args):
+    """Filter logo banner from invocations of rc.exe. Older versions of RC
+    don't support the /nologo flag."""
+    env = self._GetEnv(arch)
+    popen = subprocess.Popen(args, shell=True, env=env,
+                             stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    out, _ = popen.communicate()
+    for line in out.splitlines():
+      if (not line.startswith('Microsoft (R) Windows (R) Resource Compiler') and
+          not line.startswith('Copy' + 'right (C' +
+                              ') Microsoft Corporation') and
+          line):
+        print line
+    return popen.returncode
+
+  def ExecActionWrapper(self, arch, rspfile, *dirname):
+    """Runs an action command line from a response file using the environment
+    for |arch|. If |dirname| is supplied, use that as the working directory."""
+    env = self._GetEnv(arch)
+    # TODO: This is a temporary hack to get some specific variables
+    # through to actions that are set after GN-time. http://crbug.com/333738.
+    for k, v in os.environ.iteritems():
+      if k not in env:
+        env[k] = v
+    args = open(rspfile).read()
+    dirname = dirname[0] if dirname else None
+    return subprocess.call(args, shell=True, env=env, cwd=dirname)
+
+  def ExecClCompile(self, project_dir, selected_files):
+    """Executed by msvs-ninja projects when the 'ClCompile' target is used to
+    build selected C/C++ files."""
+    project_dir = os.path.relpath(project_dir, BASE_DIR)
+    selected_files = selected_files.split(';')
+    ninja_targets = [os.path.join(project_dir, filename) + '^^'
+        for filename in selected_files]
+    cmd = ['ninja.exe']
+    cmd.extend(ninja_targets)
+    return subprocess.call(cmd, shell=True, cwd=BASE_DIR)
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv[1:]))
diff --git a/build/toolchain/wrapper_utils.py b/build/toolchain/wrapper_utils.py
new file mode 100644
index 0000000..8774c27
--- /dev/null
+++ b/build/toolchain/wrapper_utils.py
@@ -0,0 +1,106 @@
+# Copied from chromium build/toolchain.
+# Copyright (c) 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Helper functions for gcc_toolchain.gni wrappers."""
+
+import os
+import re
+import subprocess
+import shlex
+import sys
+
+_BAT_PREFIX = 'cmd /c call '
+_WHITELIST_RE = re.compile('whitelisted_resource_(?P<resource_id>[0-9]+)')
+
+
+def CommandToRun(command):
+  """Generates commands compatible with Windows.
+
+  When running on a Windows host and using a toolchain whose tools are
+  actually wrapper scripts (i.e. .bat files on Windows) rather than binary
+  executables, the |command| to run has to be prefixed with this magic.
+  The GN toolchain definitions take care of that for when GN/Ninja is
+  running the tool directly.  When that command is passed in to this
+  script, it appears as a unitary string but needs to be split up so that
+  just 'cmd' is the actual command given to Python's subprocess module.
+
+  Args:
+    command: List containing the UNIX style |command|.
+
+  Returns:
+    A list containing the Windows version of the |command|.
+  """
+  if command[0].startswith(_BAT_PREFIX):
+    command = command[0].split(None, 3) + command[1:]
+  return command
+
+
+def ResolveRspLinks(inputs):
+  """Return a list of files contained in a response file.
+
+  Args:
+    inputs: A command containing rsp files.
+
+  Returns:
+    A set containing the rsp file content."""
+  rspfiles = [a[1:] for a in inputs if a.startswith('@')]
+  resolved = set()
+  for rspfile in rspfiles:
+    with open(rspfile, 'r') as f:
+      resolved.update(shlex.split(f.read()))
+
+  return resolved
+
+
+def CombineResourceWhitelists(whitelist_candidates, outfile):
+  """Combines all whitelists for a resource file into a single whitelist.
+
+  Args:
+    whitelist_candidates: List of paths to rsp files containing all targets.
+    outfile: Path to save the combined whitelist.
+  """
+  whitelists = ('%s.whitelist' % candidate for candidate in whitelist_candidates
+                if os.path.exists('%s.whitelist' % candidate))
+
+  resources = set()
+  for whitelist in whitelists:
+    with open(whitelist, 'r') as f:
+      resources.update(f.readlines())
+
+  with open(outfile, 'w') as f:
+    f.writelines(resources)
+
+
+def ExtractResourceIdsFromPragmaWarnings(text):
+  """Returns set of resource IDs that are inside unknown pragma warnings.
+
+  Args:
+    text: The text that will be scanned for unknown pragma warnings.
+
+  Returns:
+    A set containing integers representing resource IDs.
+  """
+  used_resources = set()
+  lines = text.splitlines()
+  for ln in lines:
+    match = _WHITELIST_RE.search(ln)
+    if match:
+      resource_id = int(match.group('resource_id'))
+      used_resources.add(resource_id)
+
+  return used_resources
+
+
+def CaptureCommandStderr(command):
+  """Returns the stderr of a command.
+
+  Args:
+    args: A list containing the command and arguments.
+    cwd: The working directory from where the command should be made.
+    env: Environment variables for the new process.
+  """
+  child = subprocess.Popen(command, stderr=subprocess.PIPE)
+  _, stderr = child.communicate()
+  return child.returncode, stderr
diff --git a/build/tools/protoc_wrapper.py b/build/tools/protoc_wrapper.py
new file mode 100755
index 0000000..ada046b
--- /dev/null
+++ b/build/tools/protoc_wrapper.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+# Copied from chromium tools/protoc_wrapper/.
+
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""
+A simple wrapper for protoc.
+
+- Adds includes in generated headers.
+- Handles building with system protobuf as an option.
+"""
+
+import fnmatch
+import optparse
+import os.path
+import shutil
+import subprocess
+import sys
+import tempfile
+
+PROTOC_INCLUDE_POINT = '// @@protoc_insertion_point(includes)\n'
+
+def ModifyHeader(header_file, extra_header):
+  """Adds |extra_header| to |header_file|. Returns 0 on success.
+
+  |extra_header| is the name of the header file to include.
+  |header_file| is a generated protobuf cpp header.
+  """
+  include_point_found = False
+  header_contents = []
+  with open(header_file) as f:
+    for line in f:
+      header_contents.append(line)
+      if line == PROTOC_INCLUDE_POINT:
+        extra_header_msg = '#include "%s"\n' % extra_header
+        header_contents.append(extra_header_msg)
+        include_point_found = True;
+  if not include_point_found:
+    return 1
+
+  with open(header_file, 'wb') as f:
+    f.write(''.join(header_contents))
+  return 0
+
+def ScanForBadFiles(scan_root):
+  """Scan for bad file names, see http://crbug.com/386125 for details.
+  Returns True if any filenames are bad. Outputs errors to stderr.
+
+  |scan_root| is the path to the directory to be recursively scanned.
+  """
+  badname = False
+  real_scan_root = os.path.realpath(scan_root)
+  for dirpath, dirnames, filenames in os.walk(real_scan_root):
+    matches = fnmatch.filter(filenames, '*-*.proto')
+    if len(matches) > 0:
+      if not badname:
+        badname = True
+        sys.stderr.write('proto files must not have hyphens in their names ('
+                         'see http://crbug.com/386125 for more information):\n')
+      for filename in matches:
+        sys.stderr.write('  ' + os.path.join(real_scan_root,
+                                             dirpath, filename) + '\n')
+  return badname
+
+
+def RewriteProtoFilesForSystemProtobuf(path):
+  wrapper_dir = tempfile.mkdtemp()
+  try:
+    for filename in os.listdir(path):
+      if not filename.endswith('.proto'):
+        continue
+      with open(os.path.join(path, filename), 'r') as src_file:
+        with open(os.path.join(wrapper_dir, filename), 'w') as dst_file:
+          for line in src_file:
+            # Remove lines that break build with system protobuf.
+            # We cannot optimize for lite runtime, because system lite runtime
+            # does not have a Chromium-specific hack to retain unknown fields.
+            # Similarly, it does not understand corresponding option to control
+            # the usage of that hack.
+            if 'LITE_RUNTIME' in line or 'retain_unknown_fields' in line:
+              continue
+            dst_file.write(line)
+
+    return wrapper_dir
+  except:
+    shutil.rmtree(wrapper_dir)
+    raise
+
+
+def main(argv):
+  parser = optparse.OptionParser()
+  parser.add_option('--include', dest='extra_header',
+                    help='The extra header to include. This must be specified '
+                         'along with --protobuf.')
+  parser.add_option('--protobuf', dest='generated_header',
+                    help='The c++ protobuf header to add the extra header to. '
+                         'This must be specified along with --include.')
+  parser.add_option('--proto-in-dir',
+                    help='The directory containing .proto files.')
+  parser.add_option('--proto-in-file', help='Input file to compile.')
+  parser.add_option('--use-system-protobuf', type=int, default=0,
+                    help='Option to use system-installed protobuf '
+                         'instead of bundled one.')
+  (options, args) = parser.parse_args(sys.argv)
+  if len(args) < 2:
+    return 1
+
+  if ScanForBadFiles(options.proto_in_dir):
+    return 1
+
+  proto_path = options.proto_in_dir
+  if options.use_system_protobuf == 1:
+    proto_path = RewriteProtoFilesForSystemProtobuf(proto_path)
+  try:
+    # Run what is hopefully protoc.
+    protoc_args = args[1:]
+    protoc_args += ['--proto_path=%s' % proto_path,
+                    os.path.join(proto_path, options.proto_in_file)]
+    ret = subprocess.call(protoc_args)
+    if ret != 0:
+      return ret
+  finally:
+    if options.use_system_protobuf == 1:
+      # Remove temporary directory holding re-written files.
+      shutil.rmtree(proto_path)
+
+  # protoc succeeded, check to see if the generated cpp header needs editing.
+  if not options.extra_header or not options.generated_header:
+    return 0
+  return ModifyHeader(options.generated_header, options.extra_header)
+
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv))
diff --git a/build/vs_toolchain.py b/build/vs_toolchain.py
new file mode 100644
index 0000000..b1a5f4c
--- /dev/null
+++ b/build/vs_toolchain.py
@@ -0,0 +1,462 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import glob
+import json
+import os
+import pipes
+import platform
+import re
+import shutil
+import stat
+import subprocess
+import sys
+
+
+script_dir = os.path.dirname(os.path.realpath(__file__))
+chrome_src = os.path.abspath(os.path.join(script_dir, os.pardir))
+SRC_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, os.path.join(chrome_src, 'tools', 'gyp', 'pylib'))
+json_data_file = os.path.join(script_dir, 'win_toolchain.json')
+
+
+# Use MSVS2017 as the default toolchain.
+CURRENT_DEFAULT_TOOLCHAIN_VERSION = '2017'
+
+
+def SetEnvironmentAndGetRuntimeDllDirs():
+  """Sets up os.environ to use the depot_tools VS toolchain with gyp, and
+  returns the location of the VS runtime DLLs so they can be copied into
+  the output directory after gyp generation.
+
+  Return value is [x64path, x86path] or None
+  """
+  vs_runtime_dll_dirs = None
+  depot_tools_win_toolchain = \
+      bool(int(os.environ.get('DEPOT_TOOLS_WIN_TOOLCHAIN', '1')))
+  # When running on a non-Windows host, only do this if the SDK has explicitly
+  # been downloaded before (in which case json_data_file will exist).
+  if ((sys.platform in ('win32', 'cygwin') or os.path.exists(json_data_file))
+      and depot_tools_win_toolchain):
+    if ShouldUpdateToolchain():
+      Update()
+    with open(json_data_file, 'r') as tempf:
+      toolchain_data = json.load(tempf)
+
+    toolchain = toolchain_data['path']
+    version = toolchain_data['version']
+    win_sdk = toolchain_data.get('win_sdk')
+    if not win_sdk:
+      win_sdk = toolchain_data['win8sdk']
+    wdk = toolchain_data['wdk']
+    # TODO: The order unfortunately matters in these. They should be
+    # split into separate keys for x86 and x64. (See CopyDlls call below).
+    # http://crbug.com/345992
+    vs_runtime_dll_dirs = toolchain_data['runtime_dirs']
+
+    os.environ['GYP_MSVS_OVERRIDE_PATH'] = toolchain
+    os.environ['GYP_MSVS_VERSION'] = version
+
+    # Limit the scope of the gyp import to only where it is used. This
+    # potentially lets build configs that never execute this block to drop
+    # their GYP checkout.
+    import gyp
+
+    # We need to make sure windows_sdk_path is set to the automated
+    # toolchain values in GYP_DEFINES, but don't want to override any
+    # otheroptions.express
+    # values there.
+    gyp_defines_dict = gyp.NameValueListToDict(gyp.ShlexEnv('GYP_DEFINES'))
+    gyp_defines_dict['windows_sdk_path'] = win_sdk
+    os.environ['GYP_DEFINES'] = ' '.join('%s=%s' % (k, pipes.quote(str(v)))
+        for k, v in gyp_defines_dict.iteritems())
+
+    os.environ['WINDOWSSDKDIR'] = win_sdk
+    os.environ['WDK_DIR'] = wdk
+    # Include the VS runtime in the PATH in case it's not machine-installed.
+    runtime_path = os.path.pathsep.join(vs_runtime_dll_dirs)
+    os.environ['PATH'] = runtime_path + os.path.pathsep + os.environ['PATH']
+  elif sys.platform == 'win32' and not depot_tools_win_toolchain:
+    if not 'GYP_MSVS_OVERRIDE_PATH' in os.environ:
+      os.environ['GYP_MSVS_OVERRIDE_PATH'] = DetectVisualStudioPath()
+    if not 'GYP_MSVS_VERSION' in os.environ:
+      os.environ['GYP_MSVS_VERSION'] = GetVisualStudioVersion()
+
+    # When using an installed toolchain these files aren't needed in the output
+    # directory in order to run binaries locally, but they are needed in order
+    # to create isolates or the mini_installer. Copying them to the output
+    # directory ensures that they are available when needed.
+    bitness = platform.architecture()[0]
+    # When running 64-bit python the x64 DLLs will be in System32
+    x64_path = 'System32' if bitness == '64bit' else 'Sysnative'
+    x64_path = os.path.join(r'C:\Windows', x64_path)
+    vs_runtime_dll_dirs = [x64_path, r'C:\Windows\SysWOW64']
+
+  return vs_runtime_dll_dirs
+
+
+def _RegistryGetValueUsingWinReg(key, value):
+  """Use the _winreg module to obtain the value of a registry key.
+
+  Args:
+    key: The registry key.
+    value: The particular registry value to read.
+  Return:
+    contents of the registry key's value, or None on failure.  Throws
+    ImportError if _winreg is unavailable.
+  """
+  import _winreg
+  try:
+    root, subkey = key.split('\\', 1)
+    assert root == 'HKLM'  # Only need HKLM for now.
+    with _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, subkey) as hkey:
+      return _winreg.QueryValueEx(hkey, value)[0]
+  except WindowsError:
+    return None
+
+
+def _RegistryGetValue(key, value):
+  try:
+    return _RegistryGetValueUsingWinReg(key, value)
+  except ImportError:
+    raise Exception('The python library _winreg not found.')
+
+
+def GetVisualStudioVersion():
+  """Return GYP_MSVS_VERSION of Visual Studio.
+  """
+  return os.environ.get('GYP_MSVS_VERSION', CURRENT_DEFAULT_TOOLCHAIN_VERSION)
+
+
+def DetectVisualStudioPath():
+  """Return path to the GYP_MSVS_VERSION of Visual Studio.
+  """
+
+  # Note that this code is used from
+  # build/toolchain/win/setup_toolchain.py as well.
+  version_as_year = GetVisualStudioVersion()
+  year_to_version = {
+      '2015': '14.0',
+      '2017': '15.0',
+  }
+  if version_as_year not in year_to_version:
+    raise Exception(('Visual Studio version %s (from GYP_MSVS_VERSION)'
+                     ' not supported. Supported versions are: %s') % (
+                       version_as_year, ', '.join(year_to_version.keys())))
+  version = year_to_version[version_as_year]
+  if version_as_year == '2017':
+    # The VC++ 2017 install location needs to be located using COM instead of
+    # the registry. For details see:
+    # https://blogs.msdn.microsoft.com/heaths/2016/09/15/changes-to-visual-studio-15-setup/
+    # For now we use a hardcoded default with an environment variable override.
+    for path in (
+        os.environ.get('vs2017_install'),
+        r'C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise',
+        r'C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional',
+        r'C:\Program Files (x86)\Microsoft Visual Studio\2017\Community'):
+      if path and os.path.exists(path):
+        return path
+  else:
+    keys = [r'HKLM\Software\Microsoft\VisualStudio\%s' % version,
+            r'HKLM\Software\Wow6432Node\Microsoft\VisualStudio\%s' % version]
+    for key in keys:
+      path = _RegistryGetValue(key, 'InstallDir')
+      if not path:
+        continue
+      path = os.path.normpath(os.path.join(path, '..', '..'))
+      return path
+
+  raise Exception(('Visual Studio Version %s (from GYP_MSVS_VERSION)'
+                   ' not found.') % (version_as_year))
+
+
+def _CopyRuntimeImpl(target, source, verbose=True):
+  """Copy |source| to |target| if it doesn't already exist or if it needs to be
+  updated (comparing last modified time as an approximate float match as for
+  some reason the values tend to differ by ~1e-07 despite being copies of the
+  same file... https://crbug.com/603603).
+  """
+  if (os.path.isdir(os.path.dirname(target)) and
+      (not os.path.isfile(target) or
+       abs(os.stat(target).st_mtime - os.stat(source).st_mtime) >= 0.01)):
+    if verbose:
+      print 'Copying %s to %s...' % (source, target)
+    if os.path.exists(target):
+      # Make the file writable so that we can delete it now, and keep it
+      # readable.
+      os.chmod(target, stat.S_IWRITE | stat.S_IREAD)
+      os.unlink(target)
+    shutil.copy2(source, target)
+    # Make the file writable so that we can overwrite or delete it later,
+    # keep it readable.
+    os.chmod(target, stat.S_IWRITE | stat.S_IREAD)
+
+
+def _CopyUCRTRuntime(target_dir, source_dir, target_cpu, dll_pattern, suffix):
+  """Copy both the msvcp and vccorlib runtime DLLs, only if the target doesn't
+  exist, but the target directory does exist."""
+  for file_part in ('msvcp', 'vccorlib', 'vcruntime'):
+    dll = dll_pattern % file_part
+    target = os.path.join(target_dir, dll)
+    source = os.path.join(source_dir, dll)
+    _CopyRuntimeImpl(target, source)
+  # Copy the UCRT files needed by VS 2015 from the Windows SDK. This location
+  # includes the api-ms-win-crt-*.dll files that are not found in the Windows
+  # directory. These files are needed for component builds.
+  # If WINDOWSSDKDIR is not set use the default SDK path. This will be the case
+  # when DEPOT_TOOLS_WIN_TOOLCHAIN=0 and vcvarsall.bat has not been run.
+  win_sdk_dir = os.path.normpath(
+      os.environ.get('WINDOWSSDKDIR',
+                     'C:\\Program Files (x86)\\Windows Kits\\10'))
+  ucrt_dll_dirs = os.path.join(win_sdk_dir, 'Redist', 'ucrt', 'DLLs',
+                               target_cpu)
+  ucrt_files = glob.glob(os.path.join(ucrt_dll_dirs, 'api-ms-win-*.dll'))
+  assert len(ucrt_files) > 0
+  for ucrt_src_file in ucrt_files:
+    file_part = os.path.basename(ucrt_src_file)
+    ucrt_dst_file = os.path.join(target_dir, file_part)
+    _CopyRuntimeImpl(ucrt_dst_file, ucrt_src_file, False)
+  _CopyRuntimeImpl(os.path.join(target_dir, 'ucrtbase' + suffix),
+                    os.path.join(source_dir, 'ucrtbase' + suffix))
+
+
+def FindVCToolsRoot():
+  """In VS2017 the PGO runtime dependencies are located in
+  {toolchain_root}/VC/Tools/MSVC/{x.y.z}/bin/Host{target_cpu}/{target_cpu}/, the
+  {version_number} part is likely to change in case of a minor update of the
+  toolchain so we don't hardcode this value here (except for the major number).
+
+  This returns the '{toolchain_root}/VC/Tools/MSVC/{x.y.z}/bin/' path.
+
+  This function should only be called when using VS2017.
+  """
+  assert GetVisualStudioVersion() == '2017'
+  SetEnvironmentAndGetRuntimeDllDirs()
+  assert ('GYP_MSVS_OVERRIDE_PATH' in os.environ)
+  vc_tools_msvc_root = os.path.join(os.environ['GYP_MSVS_OVERRIDE_PATH'],
+      'VC', 'Tools', 'MSVC')
+  for directory in os.listdir(vc_tools_msvc_root):
+    if not os.path.isdir(os.path.join(vc_tools_msvc_root, directory)):
+      continue
+    if re.match('14\.\d+\.\d+', directory):
+      return os.path.join(vc_tools_msvc_root, directory, 'bin')
+  raise Exception('Unable to find the VC tools directory.')
+
+
+def _CopyPGORuntime(target_dir, target_cpu):
+  """Copy the runtime dependencies required during a PGO build.
+  """
+  env_version = GetVisualStudioVersion()
+  # These dependencies will be in a different location depending on the version
+  # of the toolchain.
+  if env_version == '2015':
+    pgo_x86_runtime_dir = os.path.join(os.environ.get('GYP_MSVS_OVERRIDE_PATH'),
+                                       'VC', 'bin')
+    pgo_x64_runtime_dir = os.path.join(pgo_x86_runtime_dir, 'amd64')
+  elif env_version == '2017':
+    pgo_runtime_root = FindVCToolsRoot()
+    assert pgo_runtime_root
+    # There's no version of pgosweep.exe in HostX64/x86, so we use the copy
+    # from HostX86/x86.
+    pgo_x86_runtime_dir = os.path.join(pgo_runtime_root, 'HostX86', 'x86')
+    pgo_x64_runtime_dir = os.path.join(pgo_runtime_root, 'HostX64', 'x64')
+  else:
+    raise Exception('Unexpected toolchain version: %s.' % env_version)
+
+  # We need to copy 2 runtime dependencies used during the profiling step:
+  #     - pgort140.dll: runtime library required to run the instrumented image.
+  #     - pgosweep.exe: executable used to collect the profiling data
+  pgo_runtimes = ['pgort140.dll', 'pgosweep.exe']
+  for runtime in pgo_runtimes:
+    if target_cpu == 'x86':
+      source = os.path.join(pgo_x86_runtime_dir, runtime)
+    elif target_cpu == 'x64':
+      source = os.path.join(pgo_x64_runtime_dir, runtime)
+    else:
+      raise NotImplementedError("Unexpected target_cpu value: " + target_cpu)
+    if not os.path.exists(source):
+      raise Exception('Unable to find %s.' % source)
+    _CopyRuntimeImpl(os.path.join(target_dir, runtime), source)
+
+
+def _CopyRuntime(target_dir, source_dir, target_cpu, debug):
+  """Copy the VS runtime DLLs, only if the target doesn't exist, but the target
+  directory does exist. Handles VS 2015 and VS 2017."""
+  suffix = "d.dll" if debug else ".dll"
+  # VS 2017 uses the same CRT DLLs as VS 2015.
+  _CopyUCRTRuntime(target_dir, source_dir, target_cpu, '%s140' + suffix,
+                    suffix)
+
+
+def CopyDlls(target_dir, configuration, target_cpu):
+  """Copy the VS runtime DLLs into the requested directory as needed.
+
+  configuration is one of 'Debug' or 'Release'.
+  target_cpu is one of 'x86' or 'x64'.
+
+  The debug configuration gets both the debug and release DLLs; the
+  release config only the latter.
+  """
+  vs_runtime_dll_dirs = SetEnvironmentAndGetRuntimeDllDirs()
+  if not vs_runtime_dll_dirs:
+    return
+
+  x64_runtime, x86_runtime = vs_runtime_dll_dirs
+  runtime_dir = x64_runtime if target_cpu == 'x64' else x86_runtime
+  _CopyRuntime(target_dir, runtime_dir, target_cpu, debug=False)
+  if configuration == 'Debug':
+    _CopyRuntime(target_dir, runtime_dir, target_cpu, debug=True)
+  else:
+    _CopyPGORuntime(target_dir, target_cpu)
+
+  _CopyDebugger(target_dir, target_cpu)
+
+
+def _CopyDebugger(target_dir, target_cpu):
+  """Copy dbghelp.dll and dbgcore.dll into the requested directory as needed.
+
+  target_cpu is one of 'x86' or 'x64'.
+
+  dbghelp.dll is used when Chrome needs to symbolize stacks. Copying this file
+  from the SDK directory avoids using the system copy of dbghelp.dll which then
+  ensures compatibility with recent debug information formats, such as VS
+  2017 /debug:fastlink PDBs.
+
+  dbgcore.dll is needed when using some functions from dbghelp.dll (like
+  MinidumpWriteDump).
+  """
+  win_sdk_dir = SetEnvironmentAndGetSDKDir()
+  if not win_sdk_dir:
+    return
+
+  # List of debug files that should be copied, the first element of the tuple is
+  # the name of the file and the second indicates if it's optional.
+  debug_files = [('dbghelp.dll', False), ('dbgcore.dll', True)]
+  for debug_file, is_optional in debug_files:
+    full_path = os.path.join(win_sdk_dir, 'Debuggers', target_cpu, debug_file)
+    if not os.path.exists(full_path):
+      if is_optional:
+        continue
+      else:
+        raise Exception('%s not found in "%s"\r\nYou must install the '
+                        '"Debugging Tools for Windows" feature from the Windows'
+                        ' 10 SDK.' % (debug_file, full_path))
+    target_path = os.path.join(target_dir, debug_file)
+    _CopyRuntimeImpl(target_path, full_path)
+
+
+def _GetDesiredVsToolchainHashes():
+  """Load a list of SHA1s corresponding to the toolchains that we want installed
+  to build with."""
+  env_version = GetVisualStudioVersion()
+  if env_version == '2015':
+    # Update 3 final with 10.0.15063.468 SDK and no vctip.exe.
+    return ['f53e4598951162bad6330f7a167486c7ae5db1e5']
+  if env_version == '2017':
+    # VS 2017 Update 3.2 with 10.0.15063.468 SDK.
+    return ['9bc7ccbf9f4bd50d4a3bd185e8ca94ff1618de0b']
+  raise Exception('Unsupported VS version %s' % env_version)
+
+
+def ShouldUpdateToolchain():
+  """Check if the toolchain should be upgraded."""
+  if not os.path.exists(json_data_file):
+    return True
+  with open(json_data_file, 'r') as tempf:
+    toolchain_data = json.load(tempf)
+  version = toolchain_data['version']
+  env_version = GetVisualStudioVersion()
+  # If there's a mismatch between the version set in the environment and the one
+  # in the json file then the toolchain should be updated.
+  return version != env_version
+
+
+def Update(force=False):
+  """Requests an update of the toolchain to the specific hashes we have at
+  this revision. The update outputs a .json of the various configuration
+  information required to pass to gyp which we use in |GetToolchainDir()|.
+  """
+  if force != False and force != '--force':
+    print >>sys.stderr, 'Unknown parameter "%s"' % force
+    return 1
+  if force == '--force' or os.path.exists(json_data_file):
+    force = True
+
+  depot_tools_win_toolchain = \
+      bool(int(os.environ.get('DEPOT_TOOLS_WIN_TOOLCHAIN', '1')))
+  if ((sys.platform in ('win32', 'cygwin') or force) and
+        depot_tools_win_toolchain):
+    import find_depot_tools
+    depot_tools_path = find_depot_tools.add_depot_tools_to_path()
+    # Necessary so that get_toolchain_if_necessary.py will put the VS toolkit
+    # in the correct directory.
+    os.environ['GYP_MSVS_VERSION'] = GetVisualStudioVersion()
+    get_toolchain_args = [
+        sys.executable,
+        os.path.join(depot_tools_path,
+                    'win_toolchain',
+                    'get_toolchain_if_necessary.py'),
+        '--output-json', json_data_file,
+      ] + _GetDesiredVsToolchainHashes()
+    if force:
+      get_toolchain_args.append('--force')
+    subprocess.check_call(get_toolchain_args)
+
+  return 0
+
+
+def NormalizePath(path):
+  while path.endswith("\\"):
+    path = path[:-1]
+  return path
+
+
+def SetEnvironmentAndGetSDKDir():
+  """Gets location information about the current sdk (must have been
+  previously updated by 'update'). This is used for the GN build."""
+  SetEnvironmentAndGetRuntimeDllDirs()
+
+  # If WINDOWSSDKDIR is not set, search the default SDK path and set it.
+  if not 'WINDOWSSDKDIR' in os.environ:
+    default_sdk_path = 'C:\\Program Files (x86)\\Windows Kits\\10'
+    if os.path.isdir(default_sdk_path):
+      os.environ['WINDOWSSDKDIR'] = default_sdk_path
+
+  return NormalizePath(os.environ['WINDOWSSDKDIR'])
+
+
+def GetToolchainDir():
+  """Gets location information about the current toolchain (must have been
+  previously updated by 'update'). This is used for the GN build."""
+  runtime_dll_dirs = SetEnvironmentAndGetRuntimeDllDirs()
+  win_sdk_dir = SetEnvironmentAndGetSDKDir()
+
+  print '''vs_path = "%s"
+sdk_path = "%s"
+vs_version = "%s"
+wdk_dir = "%s"
+runtime_dirs = "%s"
+''' % (
+      NormalizePath(os.environ['GYP_MSVS_OVERRIDE_PATH']),
+      win_sdk_dir,
+      GetVisualStudioVersion(),
+      NormalizePath(os.environ.get('WDK_DIR', '')),
+      os.path.pathsep.join(runtime_dll_dirs or ['None']))
+
+
+def main():
+  commands = {
+      'update': Update,
+      'get_toolchain_dir': GetToolchainDir,
+      'copy_dlls': CopyDlls,
+  }
+  if len(sys.argv) < 2 or sys.argv[1] not in commands:
+    print >>sys.stderr, 'Expected one of: %s' % ', '.join(commands)
+    return 1
+  return commands[sys.argv[1]](*sys.argv[2:])
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/build/win/BUILD.gn b/build/win/BUILD.gn
new file mode 100644
index 0000000..7ad3597
--- /dev/null
+++ b/build/win/BUILD.gn
@@ -0,0 +1,79 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/config/win/manifest.gni")
+
+# Depending on this target will cause the manifests for Chrome's default
+# Windows and common control compatibility and elevation for executables.
+windows_manifest("default_exe_manifest") {
+  sources = [
+    as_invoker_manifest,
+    common_controls_manifest,
+    default_compatibility_manifest,
+  ]
+}
+
+if (os == "win") {
+  action("copy_cdb_to_output") {
+    script = "//build/win/copy_cdb_to_output.py"
+    inputs = [
+      script,
+    ]
+    outputs = [
+      "$root_out_dir/cdb/cdb.exe",
+      "$root_out_dir/cdb/dbgeng.dll",
+      "$root_out_dir/cdb/dbghelp.dll",
+      "$root_out_dir/cdb/dbgmodel.dll",
+      "$root_out_dir/cdb/winext/ext.dll",
+      "$root_out_dir/cdb/winext/uext.dll",
+      "$root_out_dir/cdb/winxp/exts.dll",
+      "$root_out_dir/cdb/winxp/ntsdexts.dll",
+      "$root_out_dir/cdb/api-ms-win-core-console-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-datetime-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-debug-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-errorhandling-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-file-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-file-l1-2-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-file-l2-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-handle-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-heap-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-interlocked-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-libraryloader-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-localization-l1-2-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-memory-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-namedpipe-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-processenvironment-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-processthreads-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-processthreads-l1-1-1.dll",
+      "$root_out_dir/cdb/api-ms-win-core-profile-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-rtlsupport-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-string-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-synch-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-synch-l1-2-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-sysinfo-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-timezone-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-core-util-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-conio-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-convert-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-environment-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-filesystem-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-heap-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-locale-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-math-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-multibyte-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-private-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-process-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-runtime-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-stdio-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-string-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-time-l1-1-0.dll",
+      "$root_out_dir/cdb/api-ms-win-crt-utility-l1-1-0.dll",
+      "$root_out_dir/cdb/ucrtbase.dll",
+    ]
+    args = [
+      rebase_path("$root_out_dir/cdb", root_out_dir),
+      current_cpu,
+    ]
+  }
+}
diff --git a/build/win/README.goma b/build/win/README.goma
new file mode 100644
index 0000000..d0b8fae
--- /dev/null
+++ b/build/win/README.goma
@@ -0,0 +1,2 @@
+Files in this directory has been copied from Chromium's build/win
+and some files are modified for goma.
diff --git a/build/win/as_invoker.manifest b/build/win/as_invoker.manifest
new file mode 100644
index 0000000..df046fd
--- /dev/null
+++ b/build/win/as_invoker.manifest
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
+  <security>
+    <requestedPrivileges>
+      <requestedExecutionLevel level="asInvoker" uiAccess="false"></requestedExecutionLevel>
+    </requestedPrivileges>
+  </security>
+</trustInfo></assembly>
diff --git a/build/win/common_controls.manifest b/build/win/common_controls.manifest
new file mode 100644
index 0000000..1710196
--- /dev/null
+++ b/build/win/common_controls.manifest
@@ -0,0 +1,8 @@
+<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
+<assembly xmlns='urn:schemas-microsoft-com:asm.v1' manifestVersion='1.0'>
+  <dependency>
+    <dependentAssembly>
+      <assemblyIdentity type='win32' name='Microsoft.Windows.Common-Controls' version='6.0.0.0' processorArchitecture='*' publicKeyToken='6595b64144ccf1df' language='*' />
+    </dependentAssembly>
+  </dependency>
+</assembly>
diff --git a/build/win/compatibility.manifest b/build/win/compatibility.manifest
new file mode 100644
index 0000000..10d10da
--- /dev/null
+++ b/build/win/compatibility.manifest
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+  <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
+    <application>
+      <!--The ID below indicates application support for Windows Vista -->
+      <supportedOS Id="{e2011457-1546-43c5-a5fe-008deee3d3f0}"/>
+      <!--The ID below indicates application support for Windows 7 -->
+      <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
+      <!--The ID below indicates application support for Windows 8 -->
+      <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
+      <!--The ID below indicates application support for Windows 8.1 -->
+      <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
+      <!--The ID below indicates application support for Windows 10 -->
+      <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
+    </application>
+  </compatibility>
+</assembly>
diff --git a/build/win/copy_cdb_to_output.py b/build/win/copy_cdb_to_output.py
new file mode 100644
index 0000000..33bcac7
--- /dev/null
+++ b/build/win/copy_cdb_to_output.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import glob
+import hashlib
+import os
+import shutil
+import sys
+
+script_dir = os.path.dirname(os.path.realpath(__file__))
+src_build_dir = os.path.abspath(os.path.join(script_dir, os.pardir))
+sys.path.insert(0, src_build_dir)
+
+import vs_toolchain
+
+
+def _HexDigest(file_name):
+  hasher = hashlib.sha256()
+  afile = open(file_name, 'rb')
+  blocksize = 65536
+  buf = afile.read(blocksize)
+  while len(buf) > 0:
+    hasher.update(buf)
+    buf = afile.read(blocksize)
+  afile.close()
+  return hasher.hexdigest()
+
+
+def _CopyImpl(file_name, target_dir, source_dir, verbose=False):
+  """Copy |source| to |target| if it doesn't already exist or if it
+  needs to be updated.
+  """
+  target = os.path.join(target_dir, file_name)
+  source = os.path.join(source_dir, file_name)
+  if (os.path.isdir(os.path.dirname(target)) and
+      ((not os.path.isfile(target)) or
+       _HexDigest(source) != _HexDigest(target))):
+    if verbose:
+      print 'Copying %s to %s...' % (source, target)
+    if os.path.exists(target):
+      os.unlink(target)
+    shutil.copy(source, target)
+
+
+def _ConditionalMkdir(output_dir):
+  if not os.path.isdir(output_dir):
+    os.makedirs(output_dir)
+
+
+def _CopyCDBToOutput(output_dir, target_arch):
+  """Copies the Windows debugging executable cdb.exe to the output
+  directory, which is created if it does not exist. The output
+  directory, and target architecture that should be copied, are
+  passed. Supported values for the target architecture are the GYP
+  values "ia32" and "x64" and the GN values "x86" and "x64".
+  """
+  _ConditionalMkdir(output_dir)
+  vs_toolchain.SetEnvironmentAndGetRuntimeDllDirs()
+  # If WINDOWSSDKDIR is not set use the default SDK path. This will be the case
+  # when DEPOT_TOOLS_WIN_TOOLCHAIN=0 and vcvarsall.bat has not been run.
+  win_sdk_dir = os.path.normpath(
+      os.environ.get('WINDOWSSDKDIR',
+                     'C:\\Program Files (x86)\\Windows Kits\\10'))
+  if target_arch == 'ia32' or target_arch == 'x86':
+    src_arch = 'x86'
+  elif target_arch == 'x64':
+    src_arch = 'x64'
+  else:
+    print 'copy_cdb_to_output.py: unknown target_arch %s' % target_arch
+    sys.exit(1)
+  # We need to copy multiple files, so cache the computed source directory.
+  src_dir = os.path.join(win_sdk_dir, 'Debuggers', src_arch)
+  # We need to copy some helper DLLs to get access to the !uniqstack
+  # command to dump all threads' stacks.
+  src_winext_dir = os.path.join(src_dir, 'winext')
+  dst_winext_dir = os.path.join(output_dir, 'winext')
+  src_winxp_dir = os.path.join(src_dir, 'winxp')
+  dst_winxp_dir = os.path.join(output_dir, 'winxp')
+  src_crt_dir = os.path.join(win_sdk_dir, r'Redist\ucrt\DLLs', src_arch)
+  _ConditionalMkdir(dst_winext_dir)
+  _ConditionalMkdir(dst_winxp_dir)
+  # Note that the outputs from the "copy_cdb_to_output" target need to
+  # be kept in sync with this list.
+  _CopyImpl('cdb.exe', output_dir, src_dir)
+  _CopyImpl('dbgeng.dll', output_dir, src_dir)
+  _CopyImpl('dbghelp.dll', output_dir, src_dir)
+  _CopyImpl('dbgmodel.dll', output_dir, src_dir)
+  _CopyImpl('ext.dll', dst_winext_dir, src_winext_dir)
+  _CopyImpl('uext.dll', dst_winext_dir, src_winext_dir)
+  _CopyImpl('exts.dll', dst_winxp_dir, src_winxp_dir)
+  _CopyImpl('ntsdexts.dll', dst_winxp_dir, src_winxp_dir)
+  for dll_path in glob.glob(os.path.join(src_crt_dir, 'api-ms-win-*.dll')):
+    _CopyImpl(os.path.split(dll_path)[1], output_dir, src_crt_dir)
+  _CopyImpl('ucrtbase.dll', output_dir, src_crt_dir)
+  return 0
+
+
+def main():
+  if len(sys.argv) < 2:
+    print >>sys.stderr, 'Usage: copy_cdb_to_output.py <output_dir> ' + \
+        '<target_arch>'
+    return 1
+  return _CopyCDBToOutput(sys.argv[1], sys.argv[2])
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/build/win/require_administrator.manifest b/build/win/require_administrator.manifest
new file mode 100644
index 0000000..4142e73
--- /dev/null
+++ b/build/win/require_administrator.manifest
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
+  <security>
+    <requestedPrivileges>
+      <requestedExecutionLevel level="requireAdministrator" uiAccess="false"></requestedExecutionLevel>
+    </requestedPrivileges>
+  </security>
+</trustInfo></assembly>
diff --git a/buildtools/.gitignore b/buildtools/.gitignore
new file mode 100644
index 0000000..02767dd
--- /dev/null
+++ b/buildtools/.gitignore
@@ -0,0 +1,3 @@
+linux*/gn
+mac/gn
+win/gn.exe
diff --git a/buildtools/linux64/gn.sha1 b/buildtools/linux64/gn.sha1
new file mode 100644
index 0000000..87d5563
--- /dev/null
+++ b/buildtools/linux64/gn.sha1
@@ -0,0 +1 @@
+113f5b30a7cfae72015600a119590d45d64c0d0d
diff --git a/buildtools/mac/gn.sha1 b/buildtools/mac/gn.sha1
new file mode 100644
index 0000000..c7d13b6
--- /dev/null
+++ b/buildtools/mac/gn.sha1
@@ -0,0 +1 @@
+b33a6d33c2b2f42762f902672ffdf4837fa1c662
diff --git a/buildtools/win/gn.exe.sha1 b/buildtools/win/gn.exe.sha1
new file mode 100644
index 0000000..3b11723
--- /dev/null
+++ b/buildtools/win/gn.exe.sha1
@@ -0,0 +1 @@
+b1981c189f40c3ae42b965277ad1bc3449d26d2a
diff --git a/buildtools/win/toolchain_vs2013.hash b/buildtools/win/toolchain_vs2013.hash
new file mode 100644
index 0000000..4afc33b
--- /dev/null
+++ b/buildtools/win/toolchain_vs2013.hash
@@ -0,0 +1,2 @@
+3fa540f7ff4aaf5e1e0d2dca1f4b99dda91bb281
+9d9a93134b3eabd003b85b4e7dea06c0eae150ed
diff --git a/client/.vpython b/client/.vpython
new file mode 100644
index 0000000..efbc586
--- /dev/null
+++ b/client/.vpython
@@ -0,0 +1,17 @@
+# This is a vpython "spec" file.
+#
+# This includes references for all non-stdlib python packages that the Goma
+# client depends on.
+#
+# Read more about `vpython` and how to modify this file here:
+#   https://chromium.googlesource.com/infra/infra/+/master/doc/users/vpython.md
+
+python_version: "2.7"
+
+wheel: <
+  name: "infra/python/wheels/pypiwin32/${vpython_platform}"
+  version: "version:219"
+
+  match_tag: < platform: "win32" >
+  match_tag: < platform: "win_amd64" >
+>
diff --git a/client/BUILD.gn b/client/BUILD.gn
new file mode 100644
index 0000000..4bf3d99
--- /dev/null
+++ b/client/BUILD.gn
@@ -0,0 +1,1769 @@
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+proto_out_dir = "prototmp"
+generate_compiler_proxy_info = "generate_compiler_proxy_info.py"
+generate_static_darray = "generate_static_darray.py"
+
+import("//testing/libfuzzer/fuzzer_test.gni")
+import("//third_party/protobuf/proto_library.gni")
+
+group("client") {
+  deps = [
+    ":calc_sha256_checksum",
+    ":compiler_proxy",
+    ":goma_fetch",
+    ":gomacc",
+  ]
+}
+
+static_library("common") {
+  sources = [
+    "atomic_stats_counter.cc",
+    "atomic_stats_counter.h",
+    "autolock_timer.cc",
+    "autolock_timer.h",
+    "callback.h",
+    "counterz.cc",
+    "counterz.h",
+    "env_flags.cc",
+    "env_flags.h",
+    "file_id.cc",
+    "file_id.h",
+    "goma_flags.cc",
+    "goma_ipc_addr.cc",
+    "goma_ipc_addr.h",
+    "goma_ipc_peer.cc",
+    "goma_ipc_peer.h",
+    "json_util.cc",
+    "json_util.h",
+    "machine_info.cc",
+    "machine_info.h",
+    "mypath.cc",
+    "mypath.h",
+    "oauth2.cc",
+    "oauth2.h",
+    "simple_timer.cc",
+    "simple_timer.h",
+    "spawner.h",
+    "subprocess.cc",
+    "subprocess.h",
+    "timestamp.cc",
+    "timestamp.h",
+    "util.cc",
+    "util.h",
+  ]
+  include_dirs = [ "." ]
+  deps = [
+    "//base",
+    "//lib",
+    "//third_party:glog",
+    "//third_party/chromium_base:cpu",
+    "//third_party/chromium_base:string",
+    "//third_party/jsoncpp",
+  ]
+  if (os == "win") {
+    sources += [
+      "filetime_win.cc",
+      "filetime_win.h",
+      "gettimeofday_helper_win.cc",
+      "gettimeofday_helper_win.h",
+      "named_pipe_client_win.cc",
+      "named_pipe_client_win.h",
+      "named_pipe_win.cc",
+      "named_pipe_win.h",
+      "posix_helper_win.cc",
+      "posix_helper_win.h",
+      "simple_timer_win.cc",
+      "spawner_win.cc",
+      "spawner_win.h",
+    ]
+  } else {
+    sources += [
+      "spawner_posix.cc",
+      "spawner_posix.h",
+    ]
+  }
+
+  if (os == "linux") {
+    sources += [ "simple_timer_linux.cc" ]
+    libs = [ "rt" ]
+  }
+  if (os == "mac") {
+    sources += [ "simple_timer_mac.cc" ]
+  }
+}
+
+proto_library("compiler_info_data_proto") {
+  sources = [
+    "compiler_info_data.proto",
+  ]
+}
+
+proto_library("deps_cache_proto") {
+  sources = [
+    "deps_cache_data.proto",
+  ]
+}
+
+proto_library("error_notice") {
+  sources = [
+    "error_notice.proto",
+  ]
+}
+
+proto_library("local_output_cache_proto") {
+  sources = [
+    "local_output_cache_data.proto",
+  ]
+}
+
+proto_library("subprocess_proto") {
+  sources = [
+    "subprocess.protodevel",
+  ]
+}
+
+proto_library("settings_proto") {
+  sources = [
+    "settings.proto",
+  ]
+}
+
+config("include_target_gen_dir") {
+  include_dirs = [ "$target_gen_dir" ]
+}
+
+action("gen_compiler_proxy_info") {
+  script = generate_compiler_proxy_info
+  outputs = [
+    "$target_gen_dir/compiler_proxy_info.h",
+  ]
+  args = [
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+  ]
+  public_configs = [ ":include_target_gen_dir" ]
+}
+
+action("gen_cpp_parser_darray") {
+  script = generate_static_darray
+  outputs = [
+    "$target_gen_dir/cpp_parser_darray.h",
+  ]
+  args = [
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+  ]
+  public_configs = [ ":include_target_gen_dir" ]
+}
+
+static_library("gomacc_lib") {
+  sources = [
+    "goma_ipc.cc",
+    "goma_ipc.h",
+    "gomacc_argv.cc",
+    "gomacc_argv.h",
+    "gomacc_common.cc",
+    "gomacc_common.h",
+
+    # generated files.
+    "$target_gen_dir/compiler_proxy_info.h",
+  ]
+  deps = [
+    ":common",
+    ":gen_compiler_proxy_info",
+    ":ioutil_lib",
+    "//lib",
+    "//third_party:glog",
+    "//third_party/protobuf:protobuf_lite",
+  ]
+  include_dirs = [ "." ]
+  if (os == "linux") {
+    sources += [
+      "cros_util.cc",
+      "cros_util.h",
+    ]
+  }
+}
+
+config("compiler_proxy_config") {
+  if (os == "linux") {
+    defines = [ "USE_EPOLL" ]
+  } else if (os == "mac" || os == "freebsd") {
+    defines = [ "USE_KQUEUE" ]
+  }
+}
+
+static_library("compiler_proxy_lib") {
+  libs = []
+  sources = [
+    "arfile.cc",
+    "arfile.h",
+    "arfile_reader.cc",
+    "arfile_reader.h",
+    "auto_updater.cc",
+    "auto_updater.h",
+    "cache_file.cc",
+    "cache_file.h",
+    "compilation_database_reader.cc",
+    "compilation_database_reader.h",
+    "compile_service.cc",
+    "compile_service.h",
+    "compile_stats.cc",
+    "compile_stats.h",
+    "compile_task.cc",
+    "compile_task.h",
+    "compiler_flags_util.cc",
+    "compiler_flags_util.h",
+    "compiler_info.cc",
+    "compiler_info.h",
+    "compiler_info_cache.cc",
+    "compiler_info_cache.h",
+    "compiler_proxy_histogram.cc",
+    "compiler_proxy_histogram.h",
+    "content.cc",
+    "content.h",
+    "content_cursor.cc",
+    "content_cursor.h",
+    "deps_cache.cc",
+    "deps_cache.h",
+    "descriptor_poller.cc",
+    "descriptor_poller.h",
+    "descriptor_poller_epoll.cc",
+    "descriptor_poller_kqueue.cc",
+    "descriptor_poller_select.cc",
+    "file_hash_cache.cc",
+    "file_hash_cache.h",
+    "file_id_cache.cc",
+    "file_id_cache.h",
+    "filename_id_table.cc",
+    "filename_id_table.h",
+    "framework_path_resolver.cc",
+    "framework_path_resolver.h",
+    "goma_file_dump.cc",
+    "goma_file_dump.h",
+    "goma_file_http.cc",
+    "goma_file_http.h",
+    "goma_init.cc",
+    "goma_init.h",
+    "hash_rewrite_parser.cc",
+    "hash_rewrite_parser.h",
+    "histogram.cc",
+    "histogram.h",
+    "http.cc",
+    "http.h",
+    "http_init.cc",
+    "http_init.h",
+    "http_rpc.cc",
+    "http_rpc.h",
+    "http_rpc_init.cc",
+    "http_rpc_init.h",
+    "include_cache.cc",
+    "include_cache.h",
+    "jar_parser.cc",
+    "jar_parser.h",
+    "jarfile_reader.cc",
+    "jarfile_reader.h",
+    "library_path_resolver.cc",
+    "library_path_resolver.h",
+    "linked_unordered_map.h",
+    "linker_input_processor.cc",
+    "linker_input_processor.h",
+    "linker_script_parser.cc",
+    "linker_script_parser.h",
+    "log_cleaner.cc",
+    "log_cleaner.h",
+    "log_service_client.cc",
+    "log_service_client.h",
+    "luci_context.cc",
+    "luci_context.h",
+    "multi_http_rpc.cc",
+    "multi_http_rpc.h",
+    "oauth2_token.cc",
+    "oauth2_token.h",
+    "openssl_engine.cc",
+    "openssl_engine.h",
+    "rand_util.cc",
+    "rand_util.h",
+    "scoped_tmp_file.cc",
+    "scoped_tmp_file.h",
+    "settings.cc",
+    "settings.h",
+    "sha256hash_hasher.h",
+    "socket_descriptor.cc",
+    "socket_descriptor.h",
+    "socket_pool.cc",
+    "socket_pool.h",
+    "subprocess_controller.cc",
+    "subprocess_controller.h",
+    "subprocess_controller_client.cc",
+    "subprocess_controller_client.h",
+    "subprocess_controller_server.cc",
+    "subprocess_controller_server.h",
+    "subprocess_impl.cc",
+    "subprocess_impl.h",
+    "subprocess_option_setter.cc",
+    "subprocess_option_setter.h",
+    "subprocess_task.cc",
+    "subprocess_task.h",
+    "threadpool_http_server.cc",
+    "threadpool_http_server.h",
+    "tls_descriptor.cc",
+    "tls_descriptor.h",
+    "trustedipsmanager.cc",
+    "trustedipsmanager.h",
+    "watchdog.cc",
+    "watchdog.h",
+    "worker_thread.cc",
+    "worker_thread.h",
+    "worker_thread_manager.cc",
+    "worker_thread_manager.h",
+
+    # generated files.
+    "$target_gen_dir/compiler_proxy_info.h",
+    "$target_gen_dir/resources/compiler_proxy_contentionz_script.c",
+    "$target_gen_dir/resources/compiler_proxy_contentionz_script.h",
+    "$target_gen_dir/resources/compiler_proxy_status_html5.c",
+    "$target_gen_dir/resources/compiler_proxy_status_html5.h",
+    "$target_gen_dir/resources/compiler_proxy_status_script.c",
+    "$target_gen_dir/resources/compiler_proxy_status_script.h",
+    "$target_gen_dir/resources/compiler_proxy_status_style.c",
+    "$target_gen_dir/resources/compiler_proxy_status_style.h",
+    "$target_gen_dir/resources/compilerz_html.c",
+    "$target_gen_dir/resources/compilerz_html.h",
+    "$target_gen_dir/resources/compilerz_script.c",
+    "$target_gen_dir/resources/compilerz_script.h",
+    "$target_gen_dir/resources/compilerz_style.c",
+    "$target_gen_dir/resources/compilerz_style.h",
+
+  ]
+  include_dirs = [ "." ]
+  deps = [
+    ":compiler_info_data_proto",
+    ":deps_cache_proto",
+    ":error_notice",
+    ":jwt_lib",
+    ":local_output_cache_proto",  # for compile_task
+    ":settings_proto",
+    ":subprocess_proto",
+    "//third_party:minizip",
+    "//third_party/boringssl",
+    "//third_party/jsoncpp",
+    "//third_party/protobuf:protobuf_lite",
+  ]
+  public_deps = [
+    ":common",
+    ":gen_compiler_proxy_info",
+    ":subprocess_proto",
+    "//client/resources:gen_compiler_proxy_contentionz_script",
+    "//client/resources:gen_compiler_proxy_status_html5",
+    "//client/resources:gen_compiler_proxy_status_script",
+    "//client/resources:gen_compiler_proxy_status_style",
+    "//client/resources:gen_compilerz_html",
+    "//client/resources:gen_compilerz_script",
+    "//client/resources:gen_compilerz_style",
+    "//lib",
+    "//lib:goma_file",
+    "//lib:goma_hash",
+    "//lib:goma_stats_proto",
+    "//lib:goma_statz_stats_proto",
+    "//third_party:glog",
+    "//third_party/jquery:jquery",
+    "//third_party/jsoncpp",
+
+  ]
+  public_configs = [
+    ":compiler_proxy_config",
+    "//third_party:gtest_prod",
+  ]
+  if (os == "linux") {
+    sources += [
+      "elf_parser.cc",
+      "elf_parser.h",
+    ]
+  }
+  if (os == "mac") {
+    sources += [
+      "mach_o_parser.cc",
+      "mach_o_parser.h",
+      "openssl_engine_helper.h",
+      "openssl_engine_helper_mac.cc",
+    ]
+    libs += [
+      "Security.framework",
+      "Foundation.framework",
+    ]
+    deps += [ "//third_party/chromium_base:mac_version" ]
+  }
+  if (os == "win") {
+    sources += [
+      "named_pipe_server_win.cc",
+      "named_pipe_server_win.h",
+      "openssl_engine_helper.h",
+      "openssl_engine_helper_win.cc",
+    ]
+    deps += [ "//client/certs:certs_resource" ]
+  }
+  if (os != "mac" && os != "win") {
+    sources += [
+      "openssl_engine_helper.h",
+      "openssl_engine_helper_generic.cc",
+    ]
+    deps += [ "//client/certs" ]
+  }
+}
+
+static_library("base64_lib") {
+  sources = [
+    "base64.cc",
+    "base64.h",
+  ]
+  deps = [
+    "//base",
+  ]
+}
+
+static_library("jwt_lib") {
+  if (os == "win") {
+    cflags = [
+      # C4267: conversion from 'size_t' to 'int', possible loss of data
+      # Come from passing string::size() to
+      # BIO_new_mem_buf(const void*, int len).
+      "/wd4267",
+    ]
+  }
+
+  sources = [
+    "jwt.cc",
+    "jwt.h",
+  ]
+  deps = [
+    ":base64_lib",
+    ":ioutil_lib",
+    "//base",
+    "//third_party:glog",
+    "//third_party/boringssl",
+  ]
+}
+
+static_library("directive_filter_lib") {
+  sources = [
+    "directive_filter.cc",
+    "directive_filter.h",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+  ]
+}
+
+static_library("cpp_parser_lib") {
+  sources = [
+    "$target_gen_dir/cpp_parser_darray.h",
+    "cpp_input.h",
+    "cpp_input_stream.cc",
+    "cpp_input_stream.h",
+    "cpp_macro.cc",
+    "cpp_macro.h",
+    "cpp_parser.cc",
+    "cpp_parser.h",
+    "cpp_token.cc",
+    "cpp_token.h",
+    "cpp_tokenizer.cc",
+    "cpp_tokenizer.h",
+    "include_file_finder.cc",
+    "include_file_finder.h",
+    "include_file_utils.cc",
+    "include_file_utils.h",
+    "include_guard_detector.cc",
+    "include_guard_detector.h",
+    "predefined_macros.h",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":directive_filter_lib",
+    ":gen_cpp_parser_darray",
+    ":static_darray_lib",
+  ]
+}
+
+static_library("include_processor_lib") {
+  sources = [
+    "include_processor.cc",
+    "include_processor.h",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":cpp_parser_lib",
+  ]
+}
+
+static_library("ioutil_lib") {
+  sources = [
+    "ioutil.cc",
+    "ioutil.h",
+  ]
+  deps = [
+    "//base",
+    "//lib",
+    "//third_party:glog",
+  ]
+}
+
+static_library("static_darray_lib") {
+  sources = [
+    "static_darray.cc",
+    "static_darray.h",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+  ]
+}
+
+static_library("local_output_cache_lib") {
+  sources = [
+    "local_output_cache.cc",
+    "local_output_cache.h",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":local_output_cache_proto",
+  ]
+}
+
+static_library("breakpad_lib") {
+  sources = [
+    "breakpad.h",
+  ]
+  if (os == "linux") {
+    sources += [ "breakpad_linux.cc" ]
+    deps = [
+      "//base",
+      "//third_party:glog",
+      "//third_party/breakpad:client",
+    ]
+  }
+  if (os == "mac") {
+    sources += [ "breakpad_mac.cc" ]
+    deps = [
+      "//third_party:glog",
+      "//third_party/breakpad:breakpad",
+    ]
+    libs = [ "Foundation.framework" ]
+  }
+  if (os == "win") {
+    sources += [ "breakpad_win.cc" ]
+    deps = [
+      "//third_party:glog",
+      "//third_party/breakpad:breakpad_handler",
+    ]
+  }
+  include_dirs = [
+    "//third_party/breakpad/breakpad/src",
+
+    # For including third_party/lss/linux_syscall_support.h from
+    # third_party/breakpad/src/common/memory.h.
+    # linux_syscall_support.h do not have .c or .cc file.
+    "//",
+  ]
+}
+
+executable("gomacc") {
+  sources = [
+    "gomacc.cc",
+  ]
+  include_dirs = [ "." ]
+  deps = [
+    ":breakpad_lib",
+    ":gomacc_lib",
+    "//build/config/sanitizers:deps",
+    "//lib",
+    "//third_party:glog",
+    "//third_party/protobuf:protobuf_lite",
+  ]
+  if (is_posix && may_make_pie) {
+    ldflags = [ "-pie" ]
+  }
+}
+
+executable("compiler_proxy") {
+  sources = [
+    "compiler_proxy.cc",
+  ]
+  include_dirs = [ "." ]
+  deps = [
+    ":breakpad_lib",
+    ":compiler_proxy_lib",
+    ":include_processor_lib",
+    ":local_output_cache_lib",
+    "//build/config/sanitizers:deps",
+    "//third_party/boringssl",
+    "//third_party/protobuf:protobuf_lite",
+  ]
+
+  # TODO: check reading .rc file from two point is OK or not.
+  if (os == "win") {
+    sources += [ "//client/certs/certs.rc" ]
+  }
+
+  if (is_posix && may_make_pie) {
+    ldflags = [ "-pie" ]
+  }
+}
+
+executable("goma_fetch") {
+  sources = [
+    "goma_fetch.cc",
+  ]
+  include_dirs = [ "." ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":ioutil_lib",
+    "//build/config/sanitizers:deps",
+    "//third_party/protobuf:protobuf_lite",
+  ]
+
+  # TODO: check reading .rc file from two point is OK or not.
+  if (os == "win") {
+    sources += [ "//client/certs/certs.rc" ]
+  }
+}
+
+executable("cpp_parser") {
+  sources = [
+    "cpp_parser.cc",
+  ]
+  include_dirs = [ "." ]
+  defines = [ "TEST" ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":cpp_parser_lib",
+    ":ioutil_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("directive_filter") {
+  sources = [
+    "directive_filter.cc",
+  ]
+  include_dirs = [ "." ]
+  defines = [ "TEST" ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":directive_filter_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("include_processor") {
+  sources = [
+    "include_processor.cc",
+  ]
+  include_dirs = [ "." ]
+  defines = [ "TEST" ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":include_processor_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("jarfile_normalizer") {
+  sources = [
+    "jarfile_normalizer.cc",
+  ]
+  include_dirs = [ "." ]
+  deps = [
+    ":compiler_proxy_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+copy("copy_goma_auth_py") {
+  sources = [
+    "goma_auth.py",
+  ]
+  outputs = [
+    "$root_out_dir/{{source_file_part}}",
+  ]
+}
+
+copy("copy_goma_ctl_py") {
+  sources = [
+    "goma_ctl.py",
+  ]
+  outputs = [
+    "$root_out_dir/{{source_file_part}}",
+  ]
+}
+
+copy("copy_vpython") {
+  sources = [
+    ".vpython",
+  ]
+  outputs = [
+    "$root_out_dir/{{source_file_part}}",
+  ]
+}
+
+copy("copy_diagnose_goma_log_py") {
+  sources = [
+    "diagnose_goma_log.py",
+  ]
+  outputs = [
+    "$root_out_dir/{{source_file_part}}",
+  ]
+}
+
+if (os == "linux") {
+  executable("elf_parser") {
+    sources = [
+      "elf_parser.cc",
+    ]
+    include_dirs = [ "." ]
+    defines = [ "TEST" ]
+    deps = [
+      ":compiler_proxy_lib",
+      "//build/config/sanitizers:deps",
+    ]
+  }
+}
+
+if (os != "win") {
+  executable("linker_input_processor") {
+    sources = [
+      "linker_input_processor.cc",
+    ]
+    include_dirs = [ "." ]
+    defines = [ "TEST" ]
+    deps = [
+      ":compiler_proxy_lib",
+      "//build/config/sanitizers:deps",
+    ]
+  }
+  copy("goma_tool") {
+    sources = [
+      "goma_ctl.sh",
+      "report_env.sh",
+
+    ]
+    if (os == "linux") {
+      sources += [
+        # for chromeos
+        "goma-make",
+        "goma-make_unittest",
+        "goma-wrapper",
+      ]
+    }
+    outputs = [
+      "$root_out_dir/{{source_file_part}}",
+    ]
+  }
+
+  action("symlink_gomacc") {
+    script = "symlink.py"
+    deps = [
+      ":gomacc",
+    ]
+    outputs = [
+      "$root_out_dir/g++",
+      "$root_out_dir/gcc",
+      "$root_out_dir/javac",
+      "$root_out_dir/cc",
+      "$root_out_dir/c++",
+      "$root_out_dir/clang",
+      "$root_out_dir/clang++",
+    ]
+    args = [
+             "--force",
+             "--target",
+             "gomacc",
+           ] + rebase_path(outputs)
+  }
+
+  action("dump_compiler_proxy_symbols") {
+    script = "dump_syms.py"
+    deps = [
+      ":compiler_proxy",
+      "//third_party/breakpad:dump_syms",
+    ]
+    outputs = [
+      "$root_out_dir/compiler_proxy.sym",
+    ]
+    args = [
+      "--dump_syms",
+      rebase_path("$root_out_dir/dump_syms"),
+      "--input",
+      rebase_path("$root_out_dir/compiler_proxy"),
+      "--output",
+      rebase_path("$root_out_dir/compiler_proxy.sym"),
+    ]
+  }
+
+  action("dump_gomacc_symbols") {
+    script = "dump_syms.py"
+    deps = [
+      ":gomacc",
+      "//third_party/breakpad:dump_syms",
+    ]
+    outputs = [
+      "$root_out_dir/gomacc.sym",
+    ]
+    args = [
+      "--dump_syms",
+      rebase_path("$root_out_dir/dump_syms"),
+      "--input",
+      rebase_path("$root_out_dir/gomacc"),
+      "--output",
+      rebase_path("$root_out_dir/gomacc.sym"),
+    ]
+  }
+
+  action("calc_sha256_checksum") {
+    script = "calculate_sha256.py"
+    deps = [
+      ":compiler_proxy",
+      ":copy_diagnose_goma_log_py",
+      ":copy_goma_auth_py",
+      ":copy_goma_ctl_py",
+      ":copy_vpython",
+      ":dump_compiler_proxy_symbols",
+      ":dump_gomacc_symbols",
+      ":goma_fetch",
+      ":goma_tool",
+      ":gomacc",
+    ]
+    outputs = [
+      "$root_out_dir/sha256.json",
+    ]
+    args = [
+      "--output",
+      rebase_path("$root_out_dir/sha256.json"),
+      rebase_path("$root_out_dir/compiler_proxy"),
+      rebase_path("$root_out_dir/compiler_proxy.sym"),
+      rebase_path("$root_out_dir/diagnose_goma_log.py"),
+      rebase_path("$root_out_dir/goma_auth.py"),
+      rebase_path("$root_out_dir/gomacc"),
+      rebase_path("$root_out_dir/gomacc.sym"),
+      rebase_path("$root_out_dir/goma_ctl.py"),
+      rebase_path("$root_out_dir/.vpython"),
+      rebase_path("$root_out_dir/goma_fetch"),
+      rebase_path("$root_out_dir/report_env.sh"),
+    ]
+  }
+} else {  # win
+  executable("vcflags") {
+    sources = [
+      "vcflags.c",
+    ]
+    deps = [
+      "//build/config/sanitizers:deps",
+    ]
+  }
+  copy("goma_tool") {
+    sources = [
+      "goma_ctl.bat",
+    ]
+    outputs = [
+      "$root_out_dir/{{source_file_part}}",
+    ]
+  }
+
+  action("dump_gomacc_symbols") {
+    script = "dump_syms.py"
+    deps = [
+      ":gomacc",
+      "//third_party/breakpad:dump_syms",
+    ]
+    outputs = [
+      "$root_out_dir/gomacc.sym",
+    ]
+    args = [
+      "--dump_syms",
+      rebase_path("$root_out_dir/dump_syms.exe"),
+      "--input",
+      rebase_path("$root_out_dir/gomacc.exe"),
+      "--output",
+      rebase_path("$root_out_dir/gomacc.sym"),
+    ]
+  }
+
+  action("dump_compiler_proxy_symbols") {
+    script = "dump_syms.py"
+    deps = [
+      ":compiler_proxy",
+      "//third_party/breakpad:dump_syms",
+    ]
+    outputs = [
+      "$root_out_dir/compiler_proxy.sym",
+    ]
+    args = [
+      "--dump_syms",
+      rebase_path("$root_out_dir/dump_syms.exe"),
+      "--input",
+      rebase_path("$root_out_dir/compiler_proxy.exe"),
+      "--output",
+      rebase_path("$root_out_dir/compiler_proxy.sym"),
+    ]
+  }
+
+  action("calc_sha256_checksum") {
+    script = "calculate_sha256.py"
+    deps = [
+      ":compiler_proxy",
+      ":copy_diagnose_goma_log_py",
+      ":copy_goma_auth_py",
+      ":copy_goma_ctl_py",
+      ":copy_vpython",
+      ":dump_compiler_proxy_symbols",
+      ":dump_gomacc_symbols",
+      ":goma_fetch",
+      ":goma_tool",
+      ":gomacc",
+      ":vcflags",
+    ]
+    outputs = [
+      "$root_out_dir/sha256.json",
+    ]
+    args = [
+      "--output",
+      rebase_path("$root_out_dir/sha256.json"),
+      rebase_path("$root_out_dir/compiler_proxy.exe"),
+      rebase_path("$root_out_dir/compiler_proxy.exe.pdb"),
+      rebase_path("$root_out_dir/compiler_proxy.sym"),
+      rebase_path("$root_out_dir/diagnose_goma_log.py"),
+      rebase_path("$root_out_dir/goma_auth.py"),
+      rebase_path("$root_out_dir/gomacc.exe"),
+      rebase_path("$root_out_dir/gomacc.exe.pdb"),
+      rebase_path("$root_out_dir/gomacc.sym"),
+      rebase_path("$root_out_dir/goma_ctl.bat"),
+      rebase_path("$root_out_dir/goma_ctl.py"),
+      rebase_path("$root_out_dir/.vpython"),
+      rebase_path("$root_out_dir/goma_fetch.exe"),
+      rebase_path("$root_out_dir/goma_fetch.exe.pdb"),
+      rebase_path("$root_out_dir/vcflags.exe"),
+      rebase_path("$root_out_dir/vcflags.exe.pdb"),
+    ]
+  }
+}
+
+static_library("goma_test_lib") {
+  testonly = true
+  sources = [
+    "unittest_util.cc",
+    "unittest_util.h",
+  ]
+  if (os == "win") {
+    sources += [
+      "vsvars.cc",
+      "vsvars.h",
+    ]
+  }
+  deps = [
+    ":ioutil_lib",
+    "//base",
+    "//base:goma_unittest",
+    "//third_party:glog",
+  ]
+  public_deps = [
+    "//third_party:gtest",
+  ]
+}
+
+static_library("gomacc_test_lib") {
+  testonly = true
+  sources = [
+    "unittest_util.cc",
+    "unittest_util.h",
+  ]
+  deps = [
+    ":gomacc_lib",
+    "//base:goma_unittest",
+    "//lib",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("arfile_unittest") {
+  testonly = true
+  sources = [
+    "arfile_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("atomic_stats_counter_unittest") {
+  testonly = true
+  sources = [
+    "atomic_stats_counter_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("base64_unittest") {
+  testonly = true
+  sources = [
+    "base64_unittest.cc",
+  ]
+  deps = [
+    ":base64_lib",
+    ":goma_test_lib",
+    "//base",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("callback_unittest") {
+  testonly = true
+  sources = [
+    "callback_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("compilation_database_reader_unittest") {
+  testonly = true
+  sources = [
+    "compilation_database_reader_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("compiler_info_unittest") {
+  testonly = true
+  sources = [
+    "compiler_info_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("compiler_info_cache_unittest") {
+  testonly = true
+  sources = [
+    "compiler_info_cache_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("content_cursor_unittest") {
+  testonly = true
+  sources = [
+    "content_cursor_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("cpp_parser_unittest") {
+  testonly = true
+  sources = [
+    "cpp_parser_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":cpp_parser_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("cpp_tokenizer_unittest") {
+  testonly = true
+  sources = [
+    "cpp_tokenizer_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":cpp_parser_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("directive_filter_unittest") {
+  testonly = true
+  sources = [
+    "directive_filter_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":directive_filter_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("env_flags_unittest") {
+  testonly = true
+  sources = [
+    "env_flags_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("goma_ipc_unittest") {
+  testonly = true
+  sources = [
+    "goma_ipc_unittest.cc",
+    "mock_socket_factory.cc",
+    "mock_socket_factory.h",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    ":gomacc_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("gomacc_argv_unittest") {
+  testonly = true
+  sources = [
+    "gomacc_argv_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    ":gomacc_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("histogram_unittest") {
+  testonly = true
+  sources = [
+    "histogram_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("http_unittest") {
+  testonly = true
+  sources = [
+    "http_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("http_rpc_unittest") {
+  testonly = true
+  sources = [
+    "fake_tls_engine.cc",
+    "fake_tls_engine.h",
+    "http_rpc_unittest.cc",
+    "mock_socket_factory.cc",
+    "mock_socket_factory.h",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+    "//third_party:minizip",
+  ]
+}
+
+executable("hash_rewrite_parser_unittest") {
+  testonly = true
+  sources = [
+    "hash_rewrite_parser_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("include_file_utils_unittest") {
+  testonly = true
+  sources = [
+    "include_file_utils_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":cpp_parser_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("include_processor_unittest") {
+  testonly = true
+  sources = [
+    "include_processor_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    ":include_processor_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("ioutil_unittest") {
+  testonly = true
+  sources = [
+    "ioutil_unittest.cc",
+  ]
+  deps = [
+    ":common",
+    ":goma_test_lib",
+    ":ioutil_lib",
+    "//base",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("jar_parser_unittest") {
+  testonly = true
+  sources = [
+    "jar_parser_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("jwt_unittest") {
+  testonly = true
+  sources = [
+    "jwt_unittest.cc",
+  ]
+  deps = [
+    ":base64_lib",
+    ":common",
+    ":goma_test_lib",
+    ":jwt_lib",
+    "//base",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party/boringssl",
+  ]
+}
+
+executable("library_path_resolver_unittest") {
+  testonly = true
+  sources = [
+    "library_path_resolver_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("linked_unordered_map_unittest") {
+  testonly = true
+  sources = [
+    "linked_unordered_map_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("linker_input_processor_unittest") {
+  testonly = true
+  sources = [
+    "linker_input_processor_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("linker_script_parser_unittest") {
+  testonly = true
+  sources = [
+    "linker_script_parser_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("log_cleaner_unittest") {
+  testonly = true
+  sources = [
+    "log_cleaner_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("luci_context_unittest") {
+  testonly = true
+  sources = [
+    "luci_context_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("machine_info_unittest") {
+  testonly = true
+  sources = [
+    "machine_info_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("mypath_unittest") {
+  testonly = true
+  sources = [
+    "mypath_unittest.cc",
+  ]
+  deps = [
+    ":common",
+    ":goma_test_lib",
+    "//base",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+  ]
+}
+
+executable("oauth2_unittest") {
+  testonly = true
+  sources = [
+    "oauth2_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("simple_timer_unittest") {
+  testonly = true
+  sources = [
+    "simple_timer_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+config("gen_static_darray_test_config") {
+  include_dirs = [ "." ]
+}
+action("gen_static_darray_test") {
+  testonly = true
+  script = generate_static_darray
+  outputs = [
+    "$target_gen_dir/static_darray_test_array.h",
+  ]
+  args = [
+    "--test",
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+  ]
+  public_configs = [ ":gen_static_darray_test_config" ]
+}
+
+executable("static_darray_unittest") {
+  testonly = true
+  sources = [
+    "static_darray_unittest.cc",
+
+    # generated files.
+    "$target_gen_dir/static_darray_test_array.h",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":gen_static_darray_test",
+    ":goma_test_lib",
+    ":static_darray_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("subprocess_task_unittest") {
+  testonly = true
+  sources = [
+    "subprocess_task_unittest.cc",
+  ]
+  deps = [
+    ":breakpad_lib",
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("threadpool_http_server_unittest") {
+  testonly = true
+  sources = [
+    "threadpool_http_server_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("trustedipsmanager_unittest") {
+  testonly = true
+  sources = [
+    "trustedipsmanager_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("worker_thread_unittest") {
+  testonly = true
+  sources = [
+    "worker_thread_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("worker_thread_manager_unittest") {
+  testonly = true
+  sources = [
+    "mock_socket_factory.cc",
+    "mock_socket_factory.h",
+    "worker_thread_manager_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("arfile_reader_unittest") {
+  testonly = true
+  sources = [
+    "arfile_reader.cc",
+    "arfile_reader.h",
+    "arfile_reader_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("jarfile_reader_unittest") {
+  testonly = true
+  sources = [
+    "jarfile_reader_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("util_unittest") {
+  testonly = true
+  sources = [
+    "util.cc",
+    "util.h",
+    "util_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("openssl_engine_unittest") {
+  testonly = true
+  sources = [
+    "openssl_engine.cc",
+    "openssl_engine.h",
+    "openssl_engine_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+    "//third_party/boringssl",
+  ]
+  if (os == "win") {
+    sources += [ "//client/certs/certs.rc" ]
+  }
+}
+
+executable("rand_util_unittest") {
+  testonly = true
+  sources = [
+    "rand_util.cc",
+    "rand_util.h",
+    "rand_util_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+    "//third_party/boringssl",
+  ]
+}
+
+executable("include_cache_unittest") {
+  testonly = true
+  sources = [
+    "include_cache_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":directive_filter_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("deps_cache_unittest") {
+  testonly = true
+  sources = [
+    "deps_cache_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":directive_filter_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("filename_id_table_unittest") {
+  testonly = true
+  sources = [
+    "filename_id_table_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("compile_task_unittest") {
+  testonly = true
+  sources = [
+    "compile_task_unittest.cc",
+  ]
+  deps = [
+    ":breakpad_lib",
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    ":include_processor_lib",
+    ":local_output_cache_lib",
+    "//build/config/sanitizers:deps",
+  ]
+}
+
+executable("local_output_cache_unittest") {
+  testonly = true
+  sources = [
+    "local_output_cache_unittest.cc",
+  ]
+  deps = [
+    ":compiler_proxy_lib",
+    ":goma_test_lib",
+    ":local_output_cache_lib",
+  ]
+}
+
+if (os == "linux") {
+  executable("elf_parser_unittest") {
+    testonly = true
+    sources = [
+      "elf_parser_unittest.cc",
+    ]
+    deps = [
+      ":compiler_proxy_lib",
+      ":goma_test_lib",
+      "//build/config/sanitizers:deps",
+    ]
+  }
+  executable("cros_util_unittest") {
+    testonly = true
+    sources = [
+      "cros_util_unittest.cc",
+    ]
+    deps = [
+      ":gomacc_lib",
+      ":gomacc_test_lib",
+      "//build/config/sanitizers:deps",
+      "//third_party:gtest",
+    ]
+  }
+}
+
+if (os == "win") {
+  executable("dump_env") {
+    testonly = true
+    sources = [
+      "dump_env.c",
+    ]
+    deps = [
+      "//build/config/sanitizers:deps",
+    ]
+  }
+  executable("named_pipe_client_win_unittest") {
+    testonly = true
+    sources = [
+      "named_pipe_client_win_unittest.cc",
+    ]
+    deps = [
+      ":compiler_proxy_lib",
+      ":goma_test_lib",
+      "//build/config/sanitizers:deps",
+    ]
+  }
+  executable("named_pipe_server_win_unittest") {
+    testonly = true
+    sources = [
+      "named_pipe_server_win_unittest.cc",
+    ]
+    deps = [
+      ":compiler_proxy_lib",
+      ":goma_test_lib",
+      "//build/config/sanitizers:deps",
+    ]
+  }
+  executable("posix_helper_win_unittest") {
+    testonly = true
+    sources = [
+      "posix_helper_win_unittest.cc",
+    ]
+    deps = [
+      ":compiler_proxy_lib",
+      ":goma_test_lib",
+      "//build/config/sanitizers:deps",
+    ]
+  }
+  executable("spawner_win_unittest") {
+    testonly = true
+    sources = [
+      "spawner_win_unittest.cc",
+    ]
+    deps = [
+      ":compiler_proxy_lib",
+      ":goma_test_lib",
+      "//build/config/sanitizers:deps",
+    ]
+  }
+  executable("vstestrun") {
+    testonly = true
+    sources = [
+      "vstestrun.cc",
+    ]
+    deps = [
+      ":compiler_proxy_lib",
+      ":goma_test_lib",
+      "//build/config/sanitizers:deps",
+    ]
+  }
+} else {
+  executable("compiler_flags_util_unittest") {
+    testonly = true
+    sources = [
+      "compiler_flags_util_unittest.cc",
+    ]
+    deps = [
+      ":compiler_proxy_lib",
+      ":goma_test_lib",
+      "//build/config/sanitizers:deps",
+    ]
+  }
+}
+
+fuzzer_test("base64_fuzzer") {
+  sources = [
+    "base64_fuzzer.cc",
+  ]
+  deps = [
+    ":common",
+    "//base",
+  ]
+}
+
+fuzzer_test("ioutil_parse_http_response_fuzzer") {
+  sources = [
+    "ioutil_parse_http_response_fuzzer.cc",
+  ]
+  deps = [
+    ":common",
+    "//base",
+  ]
+  dict = "data/fuzzer_dictionaries/net_url_request_fuzzer.dict"
+}
diff --git a/client/arfile.cc b/client/arfile.cc
new file mode 100644
index 0000000..76c177f
--- /dev/null
+++ b/client/arfile.cc
@@ -0,0 +1,471 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "arfile.h"
+
+#include <cstdio>
+#ifndef _WIN32
+// TODO: evaluate replacing following code using stdio, or Chromium
+//                  base library.
+// TODO: add code to parse Win32 .lib format.
+#include <ar.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#ifdef __MACH__
+#include <mach-o/ranlib.h>
+#endif
+#else
+// hack to provide snprintf.
+#define snprintf _snprintf_s
+
+// Copied from GNU C ar.h
+#define ARMAG   "!<arch>\n"     /* String that begins an archive file.  */
+#define SARMAG  8               /* Size of that string.  */
+
+#define ARFMAG  "`\n"           /* String in ar_fmag at end of each header.  */
+
+extern "C" {
+  struct ar_hdr {
+    char ar_name[16];           /* Member file name, sometimes / terminated. */
+    char ar_date[12];           /* File date, decimal seconds since Epoch.  */
+    char ar_uid[6], ar_gid[6];  /* User and group IDs, in ASCII decimal.  */
+    char ar_mode[8];            /* File mode, in ASCII octal.  */
+    char ar_size[10];           /* File size, in ASCII decimal.  */
+    char ar_fmag[2];            /* Always contains ARFMAG.  */
+  };
+}
+
+#endif
+
+#include <sstream>
+
+#include "glog/logging.h"
+#include "string_piece_utils.h"
+
+// VS2010 and VS2012 doesn't provide C99's atoll(), but VS2013 does.
+#if defined(_WIN32) && defined(_MSC_VER) && (_MSC_VER < 1800)
+namespace {
+
+long long atoll(const char* nptr) {
+  return _strtoi64(nptr, 0, 10);
+}
+
+}  // namespace
+#endif
+
+namespace devtools_goma {
+
+static const char* kThinArMagic = "!<thin>\n";
+// GNU variant support.
+static const char* kSymbolTableName = "/               ";
+static const char* kSym64TableName = "/SYM64/         ";
+static const char* kLongnameTableName = "//              ";
+
+// BSD variant support? "#1/<length>" and name will come after ar_hdr.
+// but BSD variant doesn't support thin archive?
+
+static string DumpArHdr(const struct ar_hdr& ar_hdr) {
+  std::stringstream ss;
+  ss << "name: " << std::hex;
+  for (size_t i = 0; i < sizeof ar_hdr.ar_name; ++i) {
+    ss << static_cast<int>(ar_hdr.ar_name[i]) << " ";
+  }
+  ss << std::endl;
+  ss << "date: " << std::hex;
+  for (size_t i = 0; i < sizeof ar_hdr.ar_date; ++i) {
+    ss << static_cast<int>(ar_hdr.ar_date[i]) << " ";
+  }
+  ss << std::endl;
+  ss << "uid: " << std::hex;
+  for (size_t i = 0; i < sizeof ar_hdr.ar_uid; ++i) {
+    ss << static_cast<int>(ar_hdr.ar_uid[i]) << " ";
+  }
+  ss << std::endl;
+  ss << "mode: " << std::hex;
+  for (size_t i = 0; i < sizeof ar_hdr.ar_mode; ++i) {
+    ss << static_cast<int>(ar_hdr.ar_mode[i]) << " ";
+  }
+  ss << std::endl;
+  ss << "size: " << std::hex;
+  for (size_t i = 0; i < sizeof ar_hdr.ar_size; ++i) {
+    ss << static_cast<int>(ar_hdr.ar_size[i]) << " ";
+  }
+  ss << std::endl;
+  ss << "fmag: " << std::hex;
+  for (size_t i = 0; i < sizeof ar_hdr.ar_fmag; ++i) {
+    ss << static_cast<int>(ar_hdr.ar_fmag[i]) << " ";
+  }
+  ss << std::endl;
+  return ss.str();
+}
+
+string ArFile::EntryHeader::DebugString() const {
+  std::stringstream ss;
+  ss << "name:" << ar_name << " ";
+  ss << "date:" << ar_date << " ";
+  ss << "uid:" << ar_uid << " ";
+  ss << "gid:" << ar_gid << " ";
+  ss << "mode:" << ar_mode << " ";
+  ss << "size:" << ar_size;
+  return ss.str();
+}
+
+bool ArFile::EntryHeader::SerializeToString(string* output) const {
+  DCHECK(output);
+  struct ar_hdr hdr;
+  size_t len;
+  memset(&hdr, ' ', sizeof(hdr));
+  memmove(hdr.ar_name, orig_ar_name.c_str(), sizeof(hdr.ar_name));
+  len = snprintf(hdr.ar_date, sizeof(hdr.ar_date), "%llu",
+      static_cast<unsigned long long>(ar_date));
+  if (len < arraysize(hdr.ar_date))
+    hdr.ar_date[len] = ' ';
+  len = snprintf(hdr.ar_uid,  sizeof(hdr.ar_uid),   "%u", ar_uid);
+  if (len < arraysize(hdr.ar_uid))
+    hdr.ar_uid[len] = ' ';
+  len = snprintf(hdr.ar_gid,  sizeof(hdr.ar_gid),   "%u", ar_gid);
+  if (len < arraysize(hdr.ar_gid))
+    hdr.ar_gid[len] = ' ';
+  len = snprintf(hdr.ar_mode, sizeof(hdr.ar_mode),  "%o", ar_mode);
+  if (len < arraysize(hdr.ar_mode))
+    hdr.ar_mode[len] = ' ';
+  len = snprintf(hdr.ar_size, sizeof(hdr.ar_size), "%zu", ar_size);
+  if (len < arraysize(hdr.ar_size))
+    hdr.ar_size[len] = ' ';
+  memmove(hdr.ar_fmag, ARFMAG, sizeof(hdr.ar_fmag));
+  output->assign(reinterpret_cast<char*>(&hdr),  sizeof(hdr));
+  return true;
+}
+
+ArFile::ArFile(const string& filename, off_t offset)
+    : filename_(filename),
+      thin_archive_(false),
+      valid_(true),
+      offset_(offset) {
+  Init();
+}
+
+ArFile::ArFile(const string& filename)
+    : filename_(filename),
+      thin_archive_(false),
+      valid_(true),
+      offset_(0) {
+  Init();
+}
+
+ArFile::~ArFile() {
+}
+
+void ArFile::Init() {
+  fd_.reset(ScopedFd::OpenForRead(filename_));
+  if (fd_.Seek(offset_, ScopedFd::SeekAbsolute) == static_cast<off_t>(-1)) {
+    PLOG(WARNING) << "seek " << offset_ << ":" << filename_;
+    fd_.Close();
+    return;
+  }
+
+  char magic[SARMAG];
+  if (fd_.Read(&magic, SARMAG) != SARMAG) {
+    PLOG(WARNING) << "read magic:" << filename_;
+    fd_.Close();
+    return;
+  }
+  if (memcmp(magic, ARMAG, SARMAG) == 0) {
+    VLOG(1) << "normal ar file:" << filename_;
+    return;
+  }
+  if (memcmp(magic, kThinArMagic, SARMAG) == 0) {
+    VLOG(1) << "thin ar file:" << filename_;
+    thin_archive_ = true;
+    return;
+  }
+
+  // This is not expected ar file.  It is possibly linker script.
+  valid_ = false;
+}
+
+bool ArFile::Exists() const {
+  return fd_.valid();
+}
+
+bool ArFile::IsThinArchive() const {
+  return thin_archive_;
+}
+
+bool ArFile::ReadHeader(string* ar_header) const {
+  DCHECK(ar_header);
+  if (!fd_.valid() || !valid_) {
+    LOG(WARNING) << "invalid file:" << filename_
+                 << fd_.valid() << valid_;
+    return false;
+  }
+
+  if (fd_.Seek(offset_, ScopedFd::SeekAbsolute) == static_cast<off_t>(-1)) {
+    PLOG(WARNING) << "seek " << offset_ << ":" << filename_;
+    return false;
+  }
+
+  ar_header->resize(SARMAG);
+  if (fd_.Read(&(*ar_header)[0], SARMAG) != SARMAG) {
+    PLOG(WARNING) << "read SARMAG:" << filename_;
+    return false;
+  }
+  return true;
+}
+
+bool ArFile::ReadEntry(EntryHeader* header, string* body) {
+  DCHECK(header);
+  DCHECK(body);
+  const off_t offset = fd_.Seek(0, ScopedFd::SeekRelative);
+  VLOG(3) << "offset=" << offset;
+  LOG_IF(WARNING, (offset & 1) != 0)
+      << "ar_hdr must be on even boundary: offset:" << offset;
+
+  struct ar_hdr hdr;
+  if (fd_.Read(&hdr, sizeof(hdr)) != sizeof(hdr)) {
+    LOG(ERROR) << "failed to read."
+               << " offset=" << offset;
+    return false;
+  }
+
+  if (!ConvertArHeader(hdr, header)) {
+    LOG(ERROR) << "failed to convert."
+               << " offset=" << offset;
+    return false;
+  }
+
+  body->clear();
+  if (IsSymbolTableEntry(*header) ||
+      IsLongnameEntry(*header) ||
+      !thin_archive_) {
+    if (!ReadEntryData(*header, body)) {
+      PLOG(ERROR) << "read failed:" << header->ar_name;
+      return false;
+    }
+    if (header->ar_size & 1) {
+      body->append(1, '\n');
+    }
+  }
+#ifdef __MACH__
+  if (!CleanIfRanlib(*header, body)) {
+    LOG(WARNING) << "failed to clean ranlib:"
+                 << " filename=" << filename_;
+  }
+#endif
+
+  return true;
+}
+
+void ArFile::GetEntries(std::vector<EntryHeader>* entries) {
+  if (fd_.Seek(offset_ + SARMAG, ScopedFd::SeekAbsolute)
+      == static_cast<off_t>(-1)) {
+    PLOG(WARNING) << "seek SARMAG:" << filename_;
+    return;
+  }
+  string longnames;
+  struct ar_hdr hdr;
+  int i = 0;
+  while (fd_.Read(&hdr, sizeof(hdr)) == sizeof(hdr)) {
+    // offset of the beginning of each entry.
+    const off_t offset = fd_.Seek(0, ScopedFd::SeekRelative) - sizeof(hdr);
+    LOG_IF(WARNING, (offset & 1) != 0)
+        << "ar_hdr must be on even boundary: i:" << i << " offset:" << offset;
+    VLOG(2) << "i:" << i << " offset:" << offset << " " << DumpArHdr(hdr);
+    ++i;
+    EntryHeader entry;
+    if (!ConvertArHeader(hdr, &entry)) {
+      VLOG(1) << DumpArHdr(hdr);
+      continue;
+    }
+    VLOG(1) << "entry:" << entry.DebugString();
+    if (IsSymbolTableEntry(entry)) {
+      if (!SkipEntryData(entry)) {
+        PLOG(ERROR) << "skip failed:" << entry.ar_name;
+      }
+      continue;
+    }
+    if (IsLongnameEntry(entry)) {
+      if (!ReadEntryData(entry, &longnames_)) {
+        PLOG(ERROR) << "read failed:" << entry.ar_name;
+      }
+      continue;
+    }
+    if (!FixEntryName(&entry.ar_name)) {
+      LOG(ERROR) << "Fix name failed:" << entry.ar_name;
+      continue;
+    }
+    entries->push_back(entry);
+    if (!thin_archive_) {
+      SkipEntryData(entry);
+    }
+  }
+}
+
+/* static */
+bool ArFile::ConvertArHeader(const struct ar_hdr& hdr,
+                             EntryHeader* entry_header) {
+  DCHECK(entry_header != nullptr);
+  if (memcmp(hdr.ar_fmag, ARFMAG, sizeof hdr.ar_fmag) != 0) {
+    LOG(ERROR) << "BAD header name: ["
+               << string(hdr.ar_name, sizeof hdr.ar_name)
+               << "] fmag: [" << string(hdr.ar_fmag, 2) << "]";
+    return false;
+  }
+  entry_header->orig_ar_name = entry_header->ar_name =
+      string(hdr.ar_name, sizeof hdr.ar_name);
+  entry_header->ar_date = static_cast<time_t>(
+      atoll(string(hdr.ar_date, sizeof hdr.ar_date).c_str()));
+  entry_header->ar_uid = static_cast<uid_t>(
+      atoi(string(hdr.ar_uid, sizeof hdr.ar_uid).c_str()));
+  entry_header->ar_gid = static_cast<gid_t>(
+      atoi(string(hdr.ar_gid, sizeof hdr.ar_gid).c_str()));
+  entry_header->ar_mode = static_cast<mode_t>(
+      strtol(string(hdr.ar_mode, sizeof hdr.ar_mode).c_str(), nullptr, 8));
+  entry_header->ar_size = static_cast<size_t>(
+      atoi(string(hdr.ar_size, sizeof hdr.ar_size).c_str()));
+  return true;
+}
+
+bool ArFile::SkipEntryData(const EntryHeader& entry_header) {
+  size_t size = entry_header.ar_size + (entry_header.ar_size & 1);
+  if (fd_.Seek(size, ScopedFd::SeekRelative) == static_cast<off_t>(-1)) {
+    return false;
+  }
+  return true;
+}
+
+bool ArFile::ReadEntryData(const EntryHeader& entry_header, string* data) {
+  DCHECK(data != nullptr);
+  data->resize(entry_header.ar_size);
+  size_t nr = 0;
+  while (nr < entry_header.ar_size) {
+    int n = fd_.Read(const_cast<char*>(data->data() + nr),
+                      entry_header.ar_size - nr);
+    if (n <= 0) {
+      return false;
+    }
+    nr += n;
+  }
+  if (entry_header.ar_size & 1) {
+    if (fd_.Seek(1, ScopedFd::SeekRelative) == static_cast<off_t>(-1)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool ArFile::FixEntryName(string* name) {
+  if ((*name)[0] == '/') {
+    /* long name */
+    size_t i = static_cast<size_t>(strtoul(name->c_str() + 1, nullptr, 10));
+    size_t j = i;
+    while ((j < longnames_.size()) &&
+           longnames_[j] != '\n' &&
+           longnames_[j] != '\0') {
+      ++j;
+    }
+    if (longnames_[j - 1] == '/')
+      --j;
+    name->assign(longnames_.data() + i, j - i);
+    return true;
+  }
+  /* short name */
+  const char* kDelimiters = " /";
+  size_t pos = name->find_last_not_of(kDelimiters);
+  if (pos != string::npos)
+    name->erase(pos + 1);
+
+  return true;
+}
+
+/* static */
+bool ArFile::IsSymbolTableEntry(const EntryHeader& entry_header) {
+  return (entry_header.ar_name == kSymbolTableName ||
+          entry_header.ar_name == kSym64TableName);
+}
+
+/* static */
+bool ArFile::IsLongnameEntry(const EntryHeader& entry_header) {
+  return (entry_header.ar_name == kLongnameTableName);
+}
+
+#ifdef __MACH__
+/* static */
+bool ArFile::CleanIfRanlib(const EntryHeader& hdr, string* body) {
+  // Only support ar files on Intel mac (little endian).
+  // You need to convert endian if you need support of big endian such as ppc.
+  //
+  // It is known that mac has a special pattern at the beginning of ranlib.
+  // The magic is given as BSD 4.4 style long name.
+  // However, I do not provide full-spec parser of BSD 4.4 style long name
+  // because thin archive might not be used on mac.
+  static const char* kRanlibName = "#1/20           ";
+  static const size_t kSymdefMagicSize = 20;  // size of SYMDEF magic.
+  if (hdr.orig_ar_name != kRanlibName ||
+      body->size() <= kSymdefMagicSize || !strings::StartsWith(*body, SYMDEF)) {
+    VLOG(1) << "Not mac ranlib file.";
+    return true;
+  }
+
+  // Format of the ranlib entry:
+  // ar header
+  // SYMDEF magic (e.g. __.SYMDEF SORTED): 20 bytes
+  // ranlib area size: 4 bytes.
+  // ranlib area
+  // string area size: 4 bytes.
+  // string area.
+  //
+  // We need to remove garbage bytes at the end of string area.
+  const char* base = &(*body)[0];
+  char* pos = const_cast<char*>(base) + kSymdefMagicSize;
+  uint32_t ranlib_size;
+  memcpy(&ranlib_size, pos, sizeof(ranlib_size));
+  const ranlib* ranlib_base = reinterpret_cast<const ranlib*>(
+      pos + sizeof(ranlib_size));
+  pos += sizeof(ranlib_size) + ranlib_size;
+  if (pos - base > static_cast<ssize_t>(hdr.ar_size)) {
+    LOG(WARNING) << "ranlib size broken:"
+                 << " ar_size=" << hdr.ar_size
+                 << " ranlib size=0x" << std::hex << ranlib_size;
+    return false;
+  }
+  uint32_t string_size;
+  memcpy(&string_size, pos, sizeof(string_size));
+  const char* string_base = pos + sizeof(string_size);
+  pos += sizeof(string_size) + string_size;
+  if (pos - base > static_cast<ssize_t>(hdr.ar_size)) {
+    LOG(WARNING) << "string size broken:"
+                 << " ar_size=" << hdr.ar_size
+                 << " string size=0x" << std::hex << string_size;
+    return false;
+  }
+
+  // See ranlib entries to recognize end of strings.
+  uint32_t last_offset = 0;
+  for (size_t i = 0; i < ranlib_size / sizeof(ranlib); ++i) {
+    uint32_t str_offset = ranlib_base[i].ran_un.ran_strx;
+    if (last_offset < str_offset)
+      last_offset = str_offset;
+  }
+  if (last_offset > string_size) {
+    LOG(WARNING) << "string size in ranlib entry broken:"
+                 << " ar_size=" << hdr.ar_size
+                 << " string size=" << string_size
+                 << " str_offset=0x" << std::hex << last_offset
+                 << " offset=" << std::dec << pos - base;
+    return false;
+  }
+  uint32_t last_end_of_string =
+      last_offset + strlen(string_base + last_offset) + 1;
+  int32_t diff = string_size - last_end_of_string;
+  if (diff > 0)
+    memset(pos - diff, '\0', diff);
+  return true;
+}
+#endif
+
+}  // namespace devtools_goma
diff --git a/client/arfile.h b/client/arfile.h
new file mode 100644
index 0000000..efbc20c
--- /dev/null
+++ b/client/arfile.h
@@ -0,0 +1,97 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_ARFILE_H_
+#define DEVTOOLS_GOMA_CLIENT_ARFILE_H_
+
+#include <sys/types.h>
+
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "gtest/gtest_prod.h"
+#include "scoped_fd.h"
+
+struct ar_hdr;
+
+namespace devtools_goma {
+
+// Ar file parser.
+class ArFile {
+ public:
+  struct EntryHeader {
+    std::string ar_name;
+    time_t ar_date;
+    uid_t ar_uid;
+    gid_t ar_gid;
+    mode_t ar_mode;
+    size_t ar_size;
+
+    // original ar_name of the entry. ar_name would be modified for long name in
+    // FixEntryName().
+    std::string orig_ar_name;
+    bool SerializeToString(std::string* output) const;
+    std::string DebugString() const;
+  };
+  explicit ArFile(const std::string& filename);
+  explicit ArFile(const std::string& filename, off_t offset);
+  virtual ~ArFile();
+
+  virtual const std::string& filename() const { return filename_; }
+  virtual bool Exists() const;
+  virtual bool IsThinArchive() const;
+  virtual off_t offset() const { return offset_; }
+
+  // Note:
+  // You SHOULD NOT use GetEntries with ReadEntry.
+  // It may break ReadEntry result.
+  virtual void GetEntries(std::vector<EntryHeader>* entries);
+
+  // Read a header of an archive file.
+  // Returns true for success and the header is stored to |ar_header|.
+  virtual bool ReadHeader(std::string* ar_header) const;
+  // Read an entry in an archive file.
+  // Returns true for success.
+  // The entry header is stored to |header|.
+  // The entry body is stored to |body|.  For thin archive, body could be set to
+  // empty string.
+  virtual bool ReadEntry(EntryHeader* header, std::string* body);
+
+ private:
+  friend class StubArFile;
+#ifdef __MACH__
+  FRIEND_TEST(ArFileTest, CleanIfRanlibTest);
+#endif
+  // ArFile() is provided only for testing. You SHOULD NOT use this.
+  ArFile() : thin_archive_(false) {}
+  static bool ConvertArHeader(const struct ar_hdr& hdr,
+                              EntryHeader* entry_header);
+  bool SkipEntryData(const EntryHeader& entry_header);
+  bool ReadEntryData(const EntryHeader& entry_header, std::string* data);
+  bool FixEntryName(string* name);
+  void Init();
+
+#ifdef __MACH__
+  // Clean garbages in ranlib entry.
+  static bool CleanIfRanlib(const EntryHeader& hdr, std::string* body);
+#endif
+
+  static bool IsSymbolTableEntry(const EntryHeader& entry_header);
+  static bool IsLongnameEntry(const EntryHeader& entry_header);
+
+  std::string filename_;
+  ScopedFd fd_;
+  bool thin_archive_;
+  std::string longnames_;
+  bool valid_;
+  off_t offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArFile);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_ARFILE_H_
diff --git a/client/arfile_reader.cc b/client/arfile_reader.cc
new file mode 100644
index 0000000..f8e051e
--- /dev/null
+++ b/client/arfile_reader.cc
@@ -0,0 +1,233 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "arfile_reader.h"
+
+#include <memory>
+
+#include "glog/logging.h"
+#ifdef __MACH__
+#include "mach_o_parser.h"
+#endif
+#include "path_util.h"
+#include "scoped_fd.h"
+#include "string_piece_utils.h"
+
+namespace devtools_goma {
+
+ArFileReader::ArFileReader(std::unique_ptr<ArFile> arfile)
+    : FileReader(arfile->filename()),
+      current_offset_(arfile->offset()),
+      arfile_(std::move(arfile)), is_valid_(true) {
+  if (!arfile_->ReadHeader(&read_buffer_)) {
+    VLOG(2) << "invalid arfile:" << arfile->filename();
+    is_valid_ = false;
+  }
+}
+
+/* static */
+std::unique_ptr<FileReader> ArFileReader::Create(const string& filename) {
+  if (!CanHandle(filename)) {
+    return nullptr;
+  }
+
+#ifdef __MACH__
+  std::unique_ptr<MacFatHeader> f_hdr(new MacFatHeader);
+  ScopedFd fd(ScopedFd::OpenForRead(filename));
+  if (GetFatHeader(fd, f_hdr.get())) {
+    std::unique_ptr<FileReader>
+        fr(new FatArFileReader(filename, std::move(f_hdr)));
+    if (!fr->valid()) {
+      LOG(INFO) << "Invalid .a file: " << filename;
+      return nullptr;
+    }
+    return fr;
+  }
+#endif
+
+  std::unique_ptr<ArFile> arfile(new ArFile(filename));
+  std::unique_ptr<FileReader> fr(new ArFileReader(std::move(arfile)));
+  if (!fr->valid()) {
+    LOG(INFO) << "Invalid .a file: " << filename;
+    return nullptr;
+  }
+  return fr;
+}
+
+/* static */
+bool ArFileReader::CanHandle(const string& filename) {
+  return strings::EndsWith(filename, ".a");
+}
+
+ssize_t ArFileReader::Read(void* ptr, size_t len) {
+  size_t read_bytes = 0;
+  read_bytes += FileReader::FlushDataInBuffer(&read_buffer_, &ptr, &len);
+  while (len > 0) {
+    VLOG(3) << "reading ...:"
+            << " read_bytes=" << read_bytes
+            << " len=" << len
+            << " total_off=" << read_bytes + current_offset_;
+    ArFile::EntryHeader entry_header;
+    string entry_body;
+    if (!arfile_->ReadEntry(&entry_header, &entry_body)) {
+      LOG(ERROR) << "failed to read entry."
+                 << " current_offset_=" << current_offset_
+                 << " read_bytes=" << read_bytes
+                 << " len=" << len;
+      return -1;
+    }
+    NormalizeArHdr(&entry_header);
+    entry_header.SerializeToString(&read_buffer_);
+    read_buffer_.append(entry_body);
+    read_bytes += FileReader::FlushDataInBuffer(&read_buffer_, &ptr, &len);
+  }
+  current_offset_ += read_bytes;
+
+  return read_bytes;
+}
+
+off_t ArFileReader::Seek(off_t offset, ScopedFd::Whence whence) const {
+  // ArFileReader should be asked to seek just next to the last read.
+  DCHECK_EQ(whence, ScopedFd::SeekAbsolute)
+      << "Sorry, this function only support to set absolute position.";
+  DCHECK_EQ(offset, current_offset_)
+      << "Sorry, this function expects the users to set just next position"
+      << " of the last seek.";
+  return offset;
+}
+
+void ArFileReader::NormalizeArHdr(ArFile::EntryHeader* hdr) {
+  hdr->ar_date = 0;
+  hdr->ar_uid = 0;
+  hdr->ar_gid = 0;
+  hdr->ar_mode = 0;
+}
+
+#ifdef __MACH__
+FatArFileReader::FatArFileReader(
+    const string& filename, std::unique_ptr<MacFatHeader> f_hdr)
+    : FileReader(filename),
+      is_valid_(true),
+      filename_(filename),
+      f_hdr_(std::move(f_hdr)),
+      current_offset_(0),
+      cur_arch_idx_(0),
+      create_arfile_reader_factory_(nullptr) {
+  Init();
+}
+
+FatArFileReader::FatArFileReader(
+    const string& filename, std::unique_ptr<MacFatHeader> f_hdr,
+    ArFileReaderFactory* create_arfile_reader)
+    : FileReader(filename),
+      is_valid_(true),
+      filename_(filename),
+      f_hdr_(std::move(f_hdr)),
+      current_offset_(0),
+      cur_arch_idx_(0),
+      create_arfile_reader_factory_(create_arfile_reader) {
+  Init();
+}
+
+void FatArFileReader::Init() {
+  read_buffer_.assign(&f_hdr_->raw[0], f_hdr_->raw.size());
+  cur_arch_ = &f_hdr_->archs[0];
+  arr_ = CreateArFileReader(filename_, cur_arch_->offset);
+  if (!arr_->valid()) {
+    is_valid_ = false;
+    return;
+  }
+  read_buffer_.resize(f_hdr_->raw.size() + cur_arch_->size);
+  if (arr_->Read(&read_buffer_[f_hdr_->raw.size()], cur_arch_->size)
+      != static_cast<ssize_t>(cur_arch_->size)) {
+    LOG(WARNING) << "Read failed:"
+                 << " arch=" << cur_arch_->arch_name
+                 << " off=" << cur_arch_->offset
+                 << " size=" << cur_arch_->size
+                 << " buf_size=" << read_buffer_.size();
+    is_valid_ = false;
+  }
+}
+
+ssize_t FatArFileReader::Read(void* ptr, size_t len) {
+  size_t read_bytes = 0;
+  if (!is_valid_) {
+    return -1;
+  }
+
+  read_bytes += FileReader::FlushDataInBuffer(&read_buffer_, &ptr, &len);
+  while (len > 0) {  // OK, I need to read the next arch.
+    cur_arch_idx_++;
+    if (cur_arch_idx_ >= f_hdr_->archs.size()) {
+      LOG(WARNING) << "No more data:"
+                   << " filename=" << filename_
+                   << " len=" << len
+                   << " off=" << current_offset_ + read_bytes;
+      return ReturnReadError(read_bytes);
+    }
+    cur_arch_ = &f_hdr_->archs[cur_arch_idx_];
+    arr_ = CreateArFileReader(filename_, cur_arch_->offset);
+    if (!arr_->valid()) {
+      is_valid_ = false;
+      LOG(WARNING) << "got invalid during reading from arfile."
+                   << " filename=" << filename_
+                   << " off=" << current_offset_;
+      return ReturnReadError(read_bytes);
+    }
+    read_buffer_.resize(cur_arch_->size);
+    ssize_t read_size = arr_->Read(&read_buffer_[0], read_buffer_.size());
+    if (read_size == -1) {
+      LOG(WARNING) << "Read ar file failed:"
+                   << " filename=" << filename_
+                   << " off=" << cur_arch_->offset
+                   << " size=" << cur_arch_->size;
+      return ReturnReadError(read_bytes);
+    }
+    CHECK_EQ(read_size, static_cast<ssize_t>(read_buffer_.size()));
+    CHECK(!read_buffer_.empty());
+    read_bytes += FileReader::FlushDataInBuffer(&read_buffer_, &ptr, &len);
+  }
+  CHECK_EQ(0U, len)
+      << "Read failed:"
+      << " arch=" << cur_arch_->arch_name
+      << " off=" << cur_arch_->offset
+      << " size=" << cur_arch_->size
+      << " len=" << len
+      << " buf_size=" << read_buffer_.size();
+
+  current_offset_ += read_bytes;
+  return read_bytes;
+}
+
+off_t FatArFileReader::Seek(off_t offset, ScopedFd::Whence whence) const {
+  // ArFileReader should be asked to seek just next to the last read.
+  DCHECK_EQ(whence, ScopedFd::SeekAbsolute)
+      << "Sorry, this function only support to set absolute position.";
+  DCHECK_EQ(offset, current_offset_)
+      << "Sorry, this function expects the users to set just next position"
+      << " of the last seek.";
+  return offset;
+}
+
+std::unique_ptr<ArFileReader> FatArFileReader::CreateArFileReader(
+    const string& filename, off_t offset) {
+  if (create_arfile_reader_factory_) {
+    return create_arfile_reader_factory_->CreateArFileReader(filename, offset);
+  } else {
+    std::unique_ptr<ArFile> arfile(new ArFile(filename, offset));
+    return std::unique_ptr<ArFileReader>(new ArFileReader(std::move(arfile)));
+  }
+}
+
+ssize_t FatArFileReader::ReturnReadError(ssize_t read_bytes) {
+  is_valid_ = false;
+  if (read_bytes == 0)
+    return -1;
+  return read_bytes;
+}
+
+#endif
+
+}  // namespace devtools_goma
diff --git a/client/arfile_reader.h b/client/arfile_reader.h
new file mode 100644
index 0000000..d2f013d
--- /dev/null
+++ b/client/arfile_reader.h
@@ -0,0 +1,131 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// FileReader subclass that normalize ar file during reading time.
+//
+// Even if you create ar file from same object files, The created files
+// are different. That is because ar file contains information that
+// comes from file stat's. For the better cache hit, we want the same ar file
+// for the same objects. ArFileReader normalize it during reading.
+// The class is thread-unsafe.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_ARFILE_READER_H_
+#define DEVTOOLS_GOMA_CLIENT_ARFILE_READER_H_
+
+#include <memory>
+#include <string>
+
+#include "arfile.h"
+#ifdef _WIN32
+#include "config_win.h"
+#endif
+#include "file_reader.h"
+#include "gtest/gtest_prod.h"
+#include "scoped_fd.h"
+
+namespace devtools_goma {
+
+struct MacFatHeader;
+struct MacFatArch;
+
+// A subclass of FileReader to normalize ar file during reading.
+class ArFileReader : public FileReader {
+ public:
+  ssize_t Read(void* ptr, size_t len) override;
+  off_t Seek(off_t offset, ScopedFd::Whence whence) const override;
+  bool valid() const override { return is_valid_; }
+  static void Register() {
+    FileReaderFactory::Register(&Create);
+  }
+
+ private:
+  // Returns an instance of ArFileReader if this class can handle |filename|.
+  // Otherwise, returns nullptr.
+  static std::unique_ptr<FileReader> Create(const std::string& filename);
+  // Returns true if |filename| is .a file's name.
+  static bool CanHandle(const std::string& filename);
+  // Takes ownership of |arfile|.
+  explicit ArFileReader(std::unique_ptr<ArFile> arfile);
+
+  // DON'T USE THIS.
+  // This is only provided for the test.
+  explicit ArFileReader(const std::string& filename) : FileReader(filename) {}
+
+  // Normalizes |hdr|.
+  // Keeps fields that won't change for the same object files.  Clears anything
+  // else.
+  static void NormalizeArHdr(ArFile::EntryHeader* hdr);
+
+  off_t current_offset_;
+  // Data to be copied by Read function is stored to |read_buffer_|.
+  // If |len| of Read function is less than |read_buffer_|, remained data will
+  // be kept here until next call of Read.
+  std::string read_buffer_;
+  std::unique_ptr<ArFile> arfile_;
+  bool is_valid_;
+
+  friend class FatArFileReader;
+  friend class ArFileReaderTest;
+  friend class StubArFileReader;
+  FRIEND_TEST(ArFileReaderTest, Read);
+  FRIEND_TEST(ArFileReaderTest, valid);
+  FRIEND_TEST(ArFileReaderTest, CanHandle);
+  FRIEND_TEST(ArFileReaderTest, NormalizeArHeader);
+  DISALLOW_COPY_AND_ASSIGN(ArFileReader);
+};
+
+#ifdef __MACH__
+class FatArFileReader : public FileReader {
+ public:
+  ssize_t Read(void* ptr, size_t len) override;
+  off_t Seek(off_t offset, ScopedFd::Whence whence) const override;
+  bool valid() const override { return is_valid_; }
+
+ private:
+  class ArFileReaderFactory {
+   public:
+    virtual std::unique_ptr<ArFileReader> CreateArFileReader(
+        const std::string& filename, off_t offset) = 0;
+  };
+
+  // Take ownership of |f_hdr|.
+  FatArFileReader(const std::string& filename,
+                  std::unique_ptr<MacFatHeader> f_hdr);
+
+  // Register creators to get ArFile and ArFileReader instance.
+  // This is only provided for test.
+  // Does not take ownership of |create_arfile_reader|.
+  FatArFileReader(const std::string& filename,
+                  std::unique_ptr<MacFatHeader> f_hdr,
+                  ArFileReaderFactory* create_arfile_reader);
+
+  std::unique_ptr<ArFileReader> CreateArFileReader(const std::string& filename,
+                                                   off_t offset);
+
+  void Init();
+  ssize_t ReturnReadError(ssize_t read_bytes);
+
+  friend class ArFileReader;
+  friend class FatArFileReaderTest;
+  FRIEND_TEST(FatArFileReaderTest, Read);
+  bool is_valid_;
+  std::string filename_;
+  std::unique_ptr<MacFatHeader> f_hdr_;
+  std::unique_ptr<ArFileReader> arr_;
+  std::string read_buffer_;
+  off_t current_offset_;
+
+  // To point architecture-related arfile position.
+  MacFatArch* cur_arch_;
+  size_t cur_arch_idx_;
+
+  ArFileReaderFactory* create_arfile_reader_factory_;
+  DISALLOW_COPY_AND_ASSIGN(FatArFileReader);
+};
+#endif
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_ARFILE_READER_H_
diff --git a/client/arfile_reader_unittest.cc b/client/arfile_reader_unittest.cc
new file mode 100644
index 0000000..4d90ffc
--- /dev/null
+++ b/client/arfile_reader_unittest.cc
@@ -0,0 +1,623 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "arfile_reader.h"
+
+#include <list>
+#include <string>
+#include <vector>
+
+#include "compiler_specific.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "gtest/gtest.h"
+
+#ifdef __MACH__
+#include "mach_o_parser.h"
+#endif
+
+namespace {
+#ifdef __MACH__
+// dummy filename to create ArFileReader.
+static const char kDummyFilename[] = "dummyfilename";
+#endif
+// dummy value to be used in dummy arfile header and ar entry body.
+static const char kDummyValue[] = "dummy value";
+// ar_name field of entry header.  This should be 16 bytes.
+static const char kDummyArname[] = "dummy           ";
+// read buffer size in bytes.
+static const size_t kBufSize = 1024;
+}  // namespace
+
+namespace devtools_goma {
+
+class StubArFile : public ArFile {
+ public:
+  StubArFile() : read_header_return_(true) {}
+  ~StubArFile() override {}
+  bool IsThinArchive() const override { return true; }
+  void GetEntries(std::vector<ArFile::EntryHeader>* entries ALLOW_UNUSED)
+      override {
+    LOG(FATAL) << "Not implemented";
+  }
+  bool ReadHeader(string* ar_header) const override {
+    ar_header->assign(header_);
+    return read_header_return_;
+  }
+
+  bool ReadEntry(ArFile::EntryHeader* header, string* body) override {
+    if (entries_.empty())
+      return false;
+
+    EntryInfo info = entries_.front();
+    entries_.pop_front();
+    *header = info.header;
+    body->assign(info.body);
+    return info.return_value;
+  }
+
+  void SetReadHeaderReturn(bool return_value, const string& header) {
+    read_header_return_ = return_value;
+    header_.assign(header);
+  }
+
+  void AddReadEntryReturn(bool return_value,
+                          const ArFile::EntryHeader& header,
+                          const string& body) {
+    EntryInfo info;
+    info.return_value = return_value;
+    info.header = header;
+    info.body.assign(body);
+    entries_.push_back(info);
+  }
+
+ private:
+  bool read_header_return_;
+  string header_;
+  struct EntryInfo {
+    bool return_value;
+    ArFile::EntryHeader header;
+    string body;
+  };
+  std::list<EntryInfo> entries_;
+};
+
+TEST(ArFileReaderTest, CanHandle) {
+  EXPECT_TRUE(ArFileReader::CanHandle("example.a"));
+  EXPECT_FALSE(ArFileReader::CanHandle("example.cc"));
+  EXPECT_FALSE(ArFileReader::CanHandle("example.h"));
+  EXPECT_FALSE(ArFileReader::CanHandle("example.o"));
+  EXPECT_FALSE(ArFileReader::CanHandle("example.a.cc"));
+}
+
+TEST(ArFileReaderTest, valid) {
+  {
+    // Should be invalid if failed to read arfile header.
+    std::unique_ptr<StubArFile> arfile(new StubArFile());
+    arfile->SetReadHeaderReturn(false, "");
+    std::unique_ptr<ArFileReader> reader(new ArFileReader(std::move(arfile)));
+    EXPECT_FALSE(reader->valid());
+  }
+
+  {
+    // Should be valid if succeeded to read arfile header.
+    std::unique_ptr<StubArFile> arfile(new StubArFile());
+    arfile->SetReadHeaderReturn(true, kDummyValue);
+    std::unique_ptr<ArFileReader> reader(new ArFileReader(std::move(arfile)));
+    EXPECT_TRUE(reader->valid());
+    EXPECT_EQ(kDummyValue, reader->read_buffer_);
+  }
+}
+
+TEST(ArFileReaderTest, NormalizeArHeader) {
+  // ar header should be normalized.
+  ArFile::EntryHeader hdr;
+  hdr.ar_name.assign(kDummyValue);
+  hdr.orig_ar_name.assign(kDummyValue);
+  hdr.ar_date = 1;
+  hdr.ar_uid = 1;
+  hdr.ar_gid = 1;
+  hdr.ar_mode = 1;
+  hdr.ar_size = 1;
+
+  std::unique_ptr<StubArFile> arfile(new StubArFile());
+  std::unique_ptr<ArFileReader> reader(new ArFileReader(std::move(arfile)));
+  reader->NormalizeArHdr(&hdr);
+  EXPECT_EQ(kDummyValue, hdr.ar_name);
+  EXPECT_EQ(kDummyValue, hdr.orig_ar_name);
+  EXPECT_EQ(0L, hdr.ar_date);
+  EXPECT_EQ(0U, hdr.ar_uid);
+  EXPECT_EQ(0U, hdr.ar_gid);
+  EXPECT_EQ(0U, hdr.ar_mode);
+  EXPECT_EQ(1U, hdr.ar_size);
+}
+
+TEST(ArFileReaderTest, Read) {
+  char buf[kBufSize];
+  ssize_t len, copied;
+  ArFile::EntryHeader dummy_entry_header, expected_entry_header;
+  dummy_entry_header.ar_name.assign(kDummyArname);
+  dummy_entry_header.orig_ar_name.assign(kDummyArname);
+  dummy_entry_header.ar_date = 0xaa;
+  dummy_entry_header.ar_uid = 0xaa;
+  dummy_entry_header.ar_gid = 0xaa;
+  dummy_entry_header.ar_mode = 0xaa;
+  dummy_entry_header.ar_size = strlen(kDummyValue);
+  expected_entry_header.ar_name.assign(kDummyArname);
+  expected_entry_header.orig_ar_name.assign(kDummyArname);
+  expected_entry_header.ar_date = 0;
+  expected_entry_header.ar_uid = 0;
+  expected_entry_header.ar_gid = 0;
+  expected_entry_header.ar_mode = 0;
+  expected_entry_header.ar_size = strlen(kDummyValue);
+  string entry_header_string;
+  CHECK(expected_entry_header.SerializeToString(&entry_header_string));
+  string expected_out;
+
+  {
+    // 0. Should not read anything if len = 0.
+    std::unique_ptr<StubArFile> arfile(new StubArFile());
+    arfile->SetReadHeaderReturn(true, kDummyValue);
+    std::unique_ptr<ArFileReader> reader(new ArFileReader(std::move(arfile)));
+    CHECK(reader);
+    len = 0;
+    buf[0] = '\0';
+    copied = reader->Read(buf, len);
+    EXPECT_EQ(0U, copied);
+    EXPECT_EQ(kDummyValue, reader->read_buffer_);
+    EXPECT_EQ('\0', buf[0]);
+  }
+
+  {
+    // 1. Should read just header file if buffer size is kDummyValue length.
+    std::unique_ptr<StubArFile> arfile(new StubArFile());
+    arfile->SetReadHeaderReturn(true, kDummyValue);
+    std::unique_ptr<ArFileReader> reader(new ArFileReader(std::move(arfile)));
+    CHECK(reader);
+    len = strlen(kDummyValue);
+    buf[0] = '\0';
+    copied = reader->Read(buf, len);
+    EXPECT_EQ(len, copied);
+    EXPECT_EQ("", reader->read_buffer_);
+    EXPECT_EQ(kDummyValue, string(buf, copied));
+  }
+
+  {
+    // 2. Should refill if len > header length.
+    std::unique_ptr<StubArFile> arfile(new StubArFile());
+    arfile->SetReadHeaderReturn(true, kDummyValue);
+    arfile->AddReadEntryReturn(true, dummy_entry_header, kDummyValue);
+    expected_out.assign(kDummyValue);
+    expected_out.append(entry_header_string);
+    expected_out.append(kDummyValue);
+    std::unique_ptr<ArFileReader> reader(new ArFileReader(std::move(arfile)));
+    CHECK(reader);
+    len = expected_out.size();
+    buf[0] = '\0';
+    copied = reader->Read(buf, len);
+    EXPECT_EQ(len, copied);
+    EXPECT_EQ("", reader->read_buffer_);
+    EXPECT_EQ(expected_out, string(buf, copied));
+  }
+
+  {
+    // 3. Should allow remaining data if len < size and able to read remaining.
+    // +---------------+
+    // + arfile header | <- 3-1
+    // +---------------+
+    // | entry header  | <- 3-2
+    // +-.-.-.-.-.-.-.-+ <- 3-3
+    // | entry body    | <- 3-4
+    // +---------------+ <- 3-5.
+    // | entry header 2|
+    // +-.-.-.-.-.-.-.-+
+    // | entry body 2  |
+    // +---------------+ <- 3-6.
+    std::unique_ptr<StubArFile> arfile(new StubArFile());
+    arfile->SetReadHeaderReturn(true, kDummyValue);
+    arfile->AddReadEntryReturn(true, dummy_entry_header, kDummyValue);
+    arfile->AddReadEntryReturn(true, dummy_entry_header, kDummyValue);
+    std::unique_ptr<ArFileReader> reader(new ArFileReader(std::move(arfile)));
+    CHECK(reader);
+
+    // 3-1. len is middle of the arfile header.
+    expected_out.assign(kDummyValue);
+    expected_out = expected_out.substr(0, expected_out.size() / 2);
+    len = expected_out.size();
+    buf[0] = '\0';
+    copied = reader->Read(buf, len);
+    EXPECT_EQ(len, copied);
+    EXPECT_EQ(expected_out, string(buf, copied));
+
+    // 3-2. len is middle of the entry header.
+    expected_out.assign(kDummyValue);
+    expected_out = expected_out.substr(expected_out.size() / 2);
+    expected_out.append(
+        entry_header_string.substr(0, entry_header_string.size() / 2));
+    len = expected_out.size();
+    buf[0] = '\0';
+    copied = reader->Read(buf, len);
+    EXPECT_EQ(len, copied);
+    EXPECT_EQ(expected_out, string(buf, copied));
+
+    // 3-3. len is end of the entry header.
+    expected_out.assign(
+        entry_header_string.substr(entry_header_string.size() / 2));
+    len = expected_out.size();
+    buf[0] = '\0';
+    copied = reader->Read(buf, len);
+    EXPECT_EQ(len, copied);
+    EXPECT_EQ(expected_out, string(buf, copied));
+
+    // 3-4. len is middle of the entry body.
+    expected_out.assign(kDummyValue);
+    expected_out = expected_out.substr(0, expected_out.size() / 2);
+    len = expected_out.size();
+    buf[0] = '\0';
+    copied = reader->Read(buf, len);
+    EXPECT_EQ(len, copied);
+    EXPECT_EQ(expected_out, string(buf, copied));
+
+    // 3-5 read the remaining data.
+    expected_out.assign(kDummyValue);
+    expected_out = expected_out.substr(expected_out.size() / 2);
+    len = expected_out.size();
+    buf[0] = '\0';
+    copied = reader->Read(buf, len);
+    EXPECT_EQ(len, copied);
+    EXPECT_EQ("", reader->read_buffer_);
+    EXPECT_EQ(expected_out, string(buf, copied));
+
+    // 3-6 read the remaining data.
+    expected_out.assign(entry_header_string);
+    expected_out.append(kDummyValue);
+    len = expected_out.size();
+    buf[0] = '\0';
+    copied = reader->Read(buf, len);
+    EXPECT_EQ(len, copied);
+    EXPECT_EQ("", reader->read_buffer_);
+    EXPECT_EQ(expected_out, string(buf, copied));
+  }
+
+  {
+    // 4. Should return -1 if ReadEntry failed.
+    std::unique_ptr<StubArFile> arfile(new StubArFile());
+    arfile->SetReadHeaderReturn(true, kDummyValue);
+    std::unique_ptr<ArFileReader> reader(new ArFileReader(std::move(arfile)));
+    CHECK(reader);
+    len = strlen(kDummyValue) + 1;
+    EXPECT_EQ(-1, reader->Read(buf, len));
+  }
+}
+
+#ifdef __MACH__
+class StubArFileReader : public ArFileReader {
+ public:
+  explicit StubArFileReader(const string& filename)
+      : ArFileReader(filename), valid_(true) {}
+
+  bool valid() const override { return valid_; }
+  ssize_t Read(void* ptr, size_t len) override {
+    FileReader::FlushDataInBuffer(&contents_, &ptr, &len);
+    return read_return_;
+  }
+
+  void SetValid(bool valid) {
+    valid_ = valid;
+  }
+
+  void SetReadReturn(ssize_t return_value, const string contents) {
+    read_return_ = return_value;
+    contents_.assign(contents);
+  }
+
+ private:
+  bool valid_;
+  ssize_t read_return_;
+  string contents_;
+};
+
+
+class FatArFileReaderTest : public FatArFileReader::ArFileReaderFactory,
+                            public testing::Test {
+ protected:
+  // Takes ownership of |fhdr|.
+  std::unique_ptr<FatArFileReader> CreateFatArFileReader(
+      std::unique_ptr<MacFatHeader> fhdr) {
+    return std::unique_ptr<FatArFileReader>(new FatArFileReader(
+        kDummyFilename, std::move(fhdr), this));
+  }
+
+  // Takes ownership of |fhdr|.
+  std::unique_ptr<FatArFileReader> CreateValidArFileReader(
+      std::unique_ptr<MacFatHeader> fhdr) {
+    CHECK(fhdr);
+    CHECK_GT(fhdr->archs.size(), static_cast<size_t>(0));
+    CHECK_GT(fhdr->archs[0].size, static_cast<size_t>(0));
+
+    std::unique_ptr<StubArFileReader> stub_reader(
+        new StubArFileReader(kDummyFilename));
+    stub_reader->SetValid(true);
+    stub_reader->SetReadReturn(fhdr->archs[0].size, kDummyValue);
+    arfile_reader_.push_back(std::move(stub_reader));
+    std::unique_ptr<FatArFileReader> fat_reader(
+        CreateFatArFileReader(std::move(fhdr)));
+    EXPECT_TRUE(fat_reader->valid());
+    return fat_reader;
+  }
+
+  // Makes dummy ArFileReader.
+  std::unique_ptr<ArFileReader> CreateArFileReader(
+      const string& filename, off_t offset) override {
+    CHECK(!arfile_reader_.empty());
+    std::unique_ptr<ArFileReader> ret(std::move(arfile_reader_.front()));
+    arfile_reader_.pop_front();
+    return ret;
+  }
+
+  std::list<std::unique_ptr<ArFileReader>> arfile_reader_;
+};
+
+TEST_F(FatArFileReaderTest, valid) {
+  std::unique_ptr<FatArFileReader> fat_reader;
+  std::unique_ptr<MacFatHeader> f_hdr;
+  MacFatArch dummy_arch;
+  std::unique_ptr<StubArFileReader> stub_reader;
+
+  // Should be invalid if arfile_reader is invalid.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  f_hdr->archs.push_back(dummy_arch);
+
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(false);
+  arfile_reader_.push_back(std::move(stub_reader));
+  fat_reader = CreateFatArFileReader(std::move(f_hdr));
+  EXPECT_FALSE(fat_reader->valid());
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // Should be invalid if arfile_reader Read failed.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(true);
+  stub_reader->SetReadReturn(-1, kDummyValue);
+  arfile_reader_.push_back(std::move(stub_reader));
+  fat_reader = CreateFatArFileReader(std::move(f_hdr));
+  EXPECT_FALSE(fat_reader->valid());
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // Should be invalid if arfile_reader Read size small.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 3;
+  f_hdr->archs.push_back(dummy_arch);
+
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(true);
+  stub_reader->SetReadReturn(1, kDummyValue);
+  arfile_reader_.push_back(std::move(stub_reader));
+  fat_reader = CreateFatArFileReader(std::move(f_hdr));
+  EXPECT_FALSE(fat_reader->valid());
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // Should be valid if arfile_reader Read success.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(true);
+  stub_reader->SetReadReturn(1, kDummyValue);
+  arfile_reader_.push_back(std::move(stub_reader));
+  fat_reader = CreateFatArFileReader(std::move(f_hdr));
+  EXPECT_TRUE(fat_reader->valid());
+  EXPECT_TRUE(arfile_reader_.empty());
+}
+
+
+TEST_F(FatArFileReaderTest, Read) {
+  std::unique_ptr<FatArFileReader> fat;
+  std::unique_ptr<MacFatHeader> f_hdr;
+  std::unique_ptr<StubArFileReader> stub_reader;
+  MacFatArch dummy_arch;
+  char buf[kBufSize];
+  ssize_t len, copied;
+
+  // 0. Returns -1 if invalid.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+  fat = CreateValidArFileReader(std::move(f_hdr));
+  len = fat->read_buffer_.size();
+  fat->is_valid_ = false;
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, static_cast<ssize_t>(-1));
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // 1. able to fill data only from read_buffer_.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+  fat =  CreateValidArFileReader(std::move(f_hdr));
+  len = fat->read_buffer_.size();
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, len);
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // 2. refill and can return data.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+  dummy_arch.size = sizeof(kDummyValue) - 1;
+  f_hdr->archs.push_back(dummy_arch);
+
+  fat = CreateValidArFileReader(std::move(f_hdr));
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(true);
+  stub_reader->SetReadReturn(sizeof(kDummyValue) - 1, kDummyValue);
+  arfile_reader_.push_back(std::move(stub_reader));
+
+  len = fat->read_buffer_.size() + 1;
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, len);
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // 3. 2 times refill and can return data.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+
+  fat = CreateValidArFileReader(std::move(f_hdr));
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(true);
+  stub_reader->SetReadReturn(1, kDummyValue);
+  arfile_reader_.push_back(std::move(stub_reader));
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(true);
+  stub_reader->SetReadReturn(1, kDummyValue);
+  arfile_reader_.push_back(std::move(stub_reader));
+
+  len = fat->read_buffer_.size() + 2;
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, len);
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // 4-1. try to refill but no more archs.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+
+  fat = CreateValidArFileReader(std::move(f_hdr));
+  len = fat->read_buffer_.size() + 2;
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, len - 2);
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, static_cast<ssize_t>(-1));
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // 4-2. try to refill but no more archs with empty read buffer.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+
+  fat = CreateValidArFileReader(std::move(f_hdr));
+  fat->read_buffer_.clear();
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, static_cast<ssize_t>(-1));
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // 5-1. try to refill but got invalid arfile reader.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+
+  fat = CreateValidArFileReader(std::move(f_hdr));
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(false);
+  stub_reader->SetReadReturn(1, kDummyValue);
+  arfile_reader_.push_back(std::move(stub_reader));
+
+  len = fat->read_buffer_.size() + 2;
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, len - 2);
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, static_cast<ssize_t>(-1));
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // 5-2. try to refill but got invalid arfile reader with emtpy read buffer.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+
+  fat = CreateValidArFileReader(std::move(f_hdr));
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(false);
+  stub_reader->SetReadReturn(1, kDummyValue);
+  arfile_reader_.push_back(std::move(stub_reader));
+
+  fat->read_buffer_.clear();
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, static_cast<ssize_t>(-1));
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // 6-1. try to refill but failed to read.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+
+  fat = CreateValidArFileReader(std::move(f_hdr));
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(true);
+  stub_reader->SetReadReturn(-1, kDummyValue);
+  arfile_reader_.push_back(std::move(stub_reader));
+
+  len = fat->read_buffer_.size() + 2;
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, len - 2);
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, static_cast<ssize_t>(-1));
+  EXPECT_TRUE(arfile_reader_.empty());
+
+  // 6-2. try to refill but failed to read with emtpy read buffer.
+  arfile_reader_.clear();
+  f_hdr.reset(new MacFatHeader);
+  f_hdr->raw.assign(kDummyValue);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+  dummy_arch.size = 1;
+  f_hdr->archs.push_back(dummy_arch);
+
+  fat = CreateValidArFileReader(std::move(f_hdr));
+  stub_reader.reset(new StubArFileReader(kDummyFilename));
+  stub_reader->SetValid(true);
+  stub_reader->SetReadReturn(-1, kDummyValue);
+  arfile_reader_.push_back(std::move(stub_reader));
+
+  fat->read_buffer_.clear();
+  copied = fat->Read(buf, len);
+  EXPECT_EQ(copied, static_cast<ssize_t>(-1));
+  EXPECT_TRUE(arfile_reader_.empty());
+}
+
+#endif
+
+}  // namespace devtools_goma
diff --git a/client/arfile_unittest.cc b/client/arfile_unittest.cc
new file mode 100644
index 0000000..2796f3d
--- /dev/null
+++ b/client/arfile_unittest.cc
@@ -0,0 +1,281 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <limits.h>
+#include <stdio.h>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+
+#ifndef _WIN32
+#include <unistd.h>
+#else
+# include "string_piece.h"
+# include "config_win.h"
+#endif
+#ifdef __MACH__
+#include <ar.h>
+#include <mach-o/ranlib.h>
+#endif
+
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arfile.h"
+#include "file.h"
+#include "ioutil.h"
+#include "unittest_util.h"
+#include "util.h"
+
+// Note: There's no ar/cc in Windows.  As a result, the commands used in the
+//       test cases are run on a Linux machine and the data files are carried
+//       over in build/testdata
+
+namespace devtools_goma {
+
+class ArFileTest : public testing::Test {
+  void SetUp() override {
+    cwd_ = GetCurrentDirNameOrDie();
+    tmpdir_util_.reset(new TmpdirUtil("arfile_unittest"));
+    PCHECK(Chdir(tmpdir_util_->tmpdir().c_str()));
+  }
+
+  void TearDown() override {
+    PCHECK(Chdir(cwd_.c_str()));
+    tmpdir_util_.reset();
+  }
+
+ protected:
+  void Compile(const std::string& output) {
+#ifndef _WIN32
+    std::stringstream ss;
+    ss << "echo 'int x;' | cc -xc -o " << output << " -c -";
+    PCHECK(system(ss.str().c_str()) == 0);
+#else
+    UNREFERENCED_PARAMETER(output);
+#endif
+  }
+
+#ifndef _WIN32
+  void Archive(const std::string& op, const std::string& archive,
+               const std::vector<std::string>& files) {
+#else
+  void Archive(const std::string& test_name, const std::string& archive) {
+#endif
+#ifndef _WIN32
+    std::stringstream ss;
+    ss << "ar " << op << " " << archive;
+    for (size_t i = 0; i < files.size(); ++i) {
+      ss << " " << files[i];
+    }
+    PCHECK(system(ss.str().c_str()) == 0);
+#else
+    CopyFileA(GetTestFilePath(test_name + ".a").c_str(),
+                              archive.c_str(), FALSE);
+#endif
+  }
+
+ protected:
+  std::string cwd_;
+  std::unique_ptr<TmpdirUtil> tmpdir_util_;
+};
+
+TEST_F(ArFileTest, NotThinArchive) {
+  std::vector<std::string> files;
+  files.push_back("x.o");
+#ifndef _WIN32
+  Compile("x.o");
+  Archive("rcu", "t.a", files);
+#else
+  Archive("NotThinArchive", "t.a");
+#endif
+  ArFile a("t.a");
+  EXPECT_TRUE(a.Exists());
+  EXPECT_FALSE(a.IsThinArchive());
+  std::vector<ArFile::EntryHeader> entries;
+  a.GetEntries(&entries);
+  CHECK_EQ(1U, files.size());
+  EXPECT_EQ(files.size(), entries.size());
+  EXPECT_EQ(files[0], entries[0].ar_name);
+}
+
+#ifndef __MACH__    // We usually do not use thin archive and long name on mac.
+TEST_F(ArFileTest, ThinArchive) {
+  std::vector<std::string> files;
+  files.push_back("x.o");
+#ifndef _WIN32
+  Compile("x.o");
+  Archive("rcuT", "t.a", files);
+#else
+  Archive("ThinArchive", "t.a");
+#endif
+  ArFile a("t.a");
+  EXPECT_TRUE(a.Exists());
+  EXPECT_TRUE(a.IsThinArchive());
+  std::vector<ArFile::EntryHeader> entries;
+  a.GetEntries(&entries);
+  CHECK_EQ(1U, files.size());
+  EXPECT_EQ(files.size(), entries.size());
+  EXPECT_EQ(files[0], entries[0].ar_name);
+}
+
+TEST_F(ArFileTest, NotThinArchiveLongName) {
+  std::vector<std::string> files;
+  files.push_back("long_long_long_long_name.o");
+  files.push_back("long_long_long_long_name1.o");
+  files.push_back("long_long_long_long_name2.o");
+  files.push_back("long_long_long_long_name3.o");
+#ifndef _WIN32
+  Compile("long_long_long_long_name.o");
+  Compile("long_long_long_long_name1.o");
+  Compile("long_long_long_long_name2.o");
+  Compile("long_long_long_long_name3.o");
+  Archive("rcu", "t.a", files);
+#else
+  Archive("NotThinArchiveLongName", "t.a");
+#endif
+  ArFile a("t.a");
+  EXPECT_TRUE(a.Exists());
+  EXPECT_FALSE(a.IsThinArchive());
+  std::vector<ArFile::EntryHeader> entries;
+  a.GetEntries(&entries);
+  CHECK_EQ(4U, files.size());
+  EXPECT_EQ(files.size(), entries.size());
+  EXPECT_EQ(files[0], entries[0].ar_name);
+  EXPECT_EQ(files[1], entries[1].ar_name);
+  EXPECT_EQ(files[2], entries[2].ar_name);
+  EXPECT_EQ(files[3], entries[3].ar_name);
+}
+
+TEST_F(ArFileTest, ThinArchiveLongName) {
+  std::vector<std::string> files;
+  files.push_back("long_long_long_long_name.o");
+  files.push_back("long_long_long_long_name1.o");
+  files.push_back("long_long_long_long_name2.o");
+  files.push_back("long_long_long_long_name3.o");
+#ifndef _WIN32
+  Compile("long_long_long_long_name.o");
+  Compile("long_long_long_long_name1.o");
+  Compile("long_long_long_long_name2.o");
+  Compile("long_long_long_long_name3.o");
+  Archive("rcuT", "t.a", files);
+#else
+  Archive("ThinArchiveLongName", "t.a");
+#endif
+  ArFile a("t.a");
+  EXPECT_TRUE(a.Exists());
+  EXPECT_TRUE(a.IsThinArchive());
+  std::vector<ArFile::EntryHeader> entries;
+  a.GetEntries(&entries);
+  CHECK_EQ(4U, files.size());
+  EXPECT_EQ(files.size(), entries.size());
+  EXPECT_EQ(files[0], entries[0].ar_name);
+  EXPECT_EQ(files[1], entries[1].ar_name);
+  EXPECT_EQ(files[2], entries[2].ar_name);
+  EXPECT_EQ(files[3], entries[3].ar_name);
+}
+#endif  // __MACH__
+
+TEST_F(ArFileTest, ArEntryHeaderSize) {
+  ArFile::EntryHeader entry_header;
+  std::string buf;
+
+  EXPECT_TRUE(entry_header.SerializeToString(&buf));
+  // according to the spec, sizeof(struct ar_hdr) == 60.
+  EXPECT_EQ(60U, buf.length());
+}
+
+TEST_F(ArFileTest, ArEntryHeader) {
+  ArFile::EntryHeader entry_header;
+  std::string buf;
+  entry_header.orig_ar_name = "test";
+  entry_header.orig_ar_name.append(16 - 4, ' ');
+  entry_header.ar_date = 12;
+  entry_header.ar_uid = 34;
+  entry_header.ar_gid = 56;
+  entry_header.ar_mode = 07;
+  entry_header.ar_size = 89;
+  std::string expected;
+  // ar_name (16 bytes, decimal)
+  expected.append("test");
+  expected.append(16 - 4, ' ');
+  // ar_date (12 bytes, decimal)
+  expected.append("12");
+  expected.append(12 - 2, ' ');
+  // ar_uid (6 bytes, decimal)
+  expected.append("34");
+  expected.append(6 - 2, ' ');
+  // ar_gid (6 bytes, decimal)
+  expected.append("56");
+  expected.append(6 - 2, ' ');
+  // ar_mode (8 bytes, octal)
+  expected.append("7");
+  expected.append(8 - 1, ' ');
+  // ar_size (10 bytes, decimal)
+  expected.append("89");
+  expected.append(10 - 2, ' ');
+  // ar_fmag (2 bytes, magic)
+  expected.append("`\n");
+
+  EXPECT_TRUE(entry_header.SerializeToString(&buf));
+  // according to the spec, sizeof(struct ar_hdr) == 60.
+  EXPECT_EQ(expected, buf);
+}
+
+TEST_F(ArFileTest, CleanIfRanlibTest) {
+#ifdef __MACH__
+  // How MacDirtyRanlib.a is created:
+  // % echo 'void test(){}' | cc -xc -o test.o -c -
+  // % ar rcu MacDirtyRanlib.a test.o
+  // % bvi MacDirtyRanlib.a
+  //   (Add garbage in string area)
+  ArFile a(GetTestFilePath("MacDirtyRanlib.a"));
+  EXPECT_TRUE(a.Exists());
+  EXPECT_FALSE(a.IsThinArchive());
+
+  // Skip header.
+  std::string header;
+  EXPECT_TRUE(a.ReadHeader(&header));
+
+  // Read entry.
+  ArFile::EntryHeader entry_header;
+  std::string entry_body;
+  EXPECT_TRUE(a.ReadEntry(&entry_header, &entry_body));
+
+  // Pick up string area.
+  //
+  // Format of the ranlib entry:
+  // ar header
+  // SYMDEF magic (e.g. __.SYMDEF SORTED): 20 bytes
+  // ranlib area size: 4 bytes.
+  // ranlib area
+  // string area size: 4 bytes.
+  // string area.
+  const size_t string_pos = 20 + 4 + sizeof(ranlib) + 4;
+  std::string actual = entry_body.substr(string_pos);
+  std::string expected = actual;
+  const size_t len = strlen(&actual[0]);
+
+  EXPECT_GT(expected.size() - len, 1U);
+  memset(&expected[len], '\0', expected.size() - len);
+  EXPECT_EQ(expected, actual);
+
+  // Making doubly sure.
+  // Fill the end of string area with garbage.
+  for (size_t i = 0; i < expected.size() - len - 1; ++i)
+    entry_body[entry_body.size() - 1 - i] = '\xff';
+  actual = entry_body.substr(string_pos, expected.size());
+  EXPECT_NE(expected, actual);
+  EXPECT_TRUE(ArFile::CleanIfRanlib(entry_header, &entry_body));
+  actual = entry_body.substr(string_pos, expected.size());
+  EXPECT_EQ(expected, actual);
+#endif
+}
+
+}  // namespace devtools_goma
diff --git a/client/atomic_stats_counter.cc b/client/atomic_stats_counter.cc
new file mode 100644
index 0000000..bc5bce7
--- /dev/null
+++ b/client/atomic_stats_counter.cc
@@ -0,0 +1,36 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "atomic_stats_counter.h"
+
+#include <stdint.h>
+
+namespace devtools_goma {
+
+StatsCounter::StatsCounter() : value_(0) {
+}
+
+void StatsCounter::Clear() {
+#ifndef _WIN32
+  __atomic_store_n(&value_, 0, __ATOMIC_RELAXED);
+#else
+  _InterlockedExchange64(&value_, 0);
+#endif  // _WIN32
+}
+
+int64_t StatsCounter::value() const {
+#ifndef _WIN32
+  return __atomic_load_n(&value_, __ATOMIC_RELAXED);
+#elif defined(_WIN64)
+  // In x64, as far as the value is properly 64bit aligned,
+  // 64bit read is atomic.
+  // To guarantee the value is read from memory, volatile is used here.
+  // https://msdn.microsoft.com/en-us/library/windows/desktop/ms684122(v=vs.85).aspx
+  return *const_cast<const volatile int64_t*>(&value_);
+#else
+  #error "Windows 32bit environment is not supported"
+#endif  // _WIN32
+}
+
+}  // namespace devtools_goma
diff --git a/client/atomic_stats_counter.h b/client/atomic_stats_counter.h
new file mode 100644
index 0000000..72b68f0
--- /dev/null
+++ b/client/atomic_stats_counter.h
@@ -0,0 +1,40 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_ATOMIC_STATS_COUNTER_H_
+#define DEVTOOLS_GOMA_CLIENT_ATOMIC_STATS_COUNTER_H_
+
+#include <stdint.h>
+
+#ifdef _WIN32
+# include <intrin.h>
+#endif
+
+namespace devtools_goma {
+
+class StatsCounter {
+ public:
+  StatsCounter();
+
+  StatsCounter(const StatsCounter&) = delete;
+  StatsCounter& operator=(const StatsCounter&) = delete;
+
+  void Add(int64_t n);
+  void Clear();
+  int64_t value() const;
+ private:
+  int64_t value_;
+};
+
+inline void StatsCounter::Add(int64_t n) {
+#ifndef _WIN32
+  __atomic_add_fetch(&value_, n, __ATOMIC_RELAXED);
+#else
+  _InterlockedExchangeAdd64(&value_, n);
+#endif  // _WIN32
+}
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_ATOMIC_STATS_COUNTER_H_
diff --git a/client/atomic_stats_counter_unittest.cc b/client/atomic_stats_counter_unittest.cc
new file mode 100644
index 0000000..f60d3aa
--- /dev/null
+++ b/client/atomic_stats_counter_unittest.cc
@@ -0,0 +1,39 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "atomic_stats_counter.h"
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+// TODO: Write multithread tests.
+// TODO: Write performance tests.
+
+TEST(StatsCounterTest, Basic) {
+  StatsCounter sc;
+  EXPECT_EQ(0, sc.value());
+  sc.Add(1);
+  EXPECT_EQ(1, sc.value());
+  sc.Add(2);
+  EXPECT_EQ(3, sc.value());
+  sc.Clear();
+  EXPECT_EQ(0, sc.value());
+}
+
+TEST(StatsCounterTest, Int32overflow) {
+  StatsCounter sc;
+  sc.Add(0x7FFFFFFFLL);
+  EXPECT_EQ(0x7FFFFFFFLL, sc.value());
+  sc.Add(1);
+  EXPECT_EQ(0x80000000LL, sc.value());
+  sc.Add(0x80000000LL);
+  EXPECT_EQ(0x100000000LL, sc.value());
+  sc.Clear();
+
+  sc.Add(0x7FFFFFFFFFFFLL);
+  EXPECT_EQ(0x7FFFFFFFFFFFLL, sc.value());
+}
+
+}  // namespace devtools_goma
diff --git a/client/auto_updater.cc b/client/auto_updater.cc
new file mode 100644
index 0000000..75e406d
--- /dev/null
+++ b/client/auto_updater.cc
@@ -0,0 +1,228 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "auto_updater.h"
+
+#include <vector>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "file_helper.h"
+#include "glog/logging.h"
+#include "ioutil.h"
+#include "mypath.h"
+#include "path.h"
+#include "subprocess_task.h"
+#include "threadpool_http_server.h"
+
+#ifdef _WIN32
+#include "posix_helper_win.h"
+#endif
+
+namespace devtools_goma {
+
+AutoUpdater::AutoUpdater(const string& goma_ctl)
+    : dir_(GetMyDirectory()),
+      my_version_(-1),
+      pulled_version_(-1),
+      idle_counter_(-1),
+      server_(nullptr),
+      pull_closure_id_(ThreadpoolHttpServer::kInvalidClosureId),
+      cond_(&mu_),
+      subproc_(nullptr),
+      goma_ctl_(goma_ctl) {
+  ReadManifest(file::JoinPath(dir_, "MANIFEST"), &my_version_);
+}
+
+AutoUpdater::~AutoUpdater() {
+  Stop();
+  Wait();
+}
+
+void AutoUpdater::SetEnv(const char* envp[]) {
+  for (const char** p = envp; *p != nullptr; ++p) {
+    env_.push_back(*p);
+  }
+}
+
+void AutoUpdater::Start(ThreadpoolHttpServer* server, int count) {
+  if (my_version_ <= 0) {
+    LOG(INFO) << "no goma version, disable auto update";
+    return;
+  }
+  if (count <= 0) {
+    LOG(INFO) << "disable auto_updater.";
+    return;
+  }
+  if (access(file::JoinPath(dir_, "no_auto_update").c_str(), R_OK) == 0) {
+    LOG(INFO) << "no_auto_update exists, disable auto update";
+    return;
+  }
+  LOG(INFO) << "start autoupdate in " << count << " idle count.";
+  server_ = server;
+  idle_counter_ = count;
+  std::unique_ptr<PermanentClosure> pull_closure(
+      NewPermanentCallback(this, &AutoUpdater::CheckUpdate));
+  pull_closure_id_ = server_->RegisterIdleClosure(
+      ThreadpoolHttpServer::SOCKET_IPC, count, std::move(pull_closure));
+}
+
+void AutoUpdater::Stop() {
+  if (server_) {
+    server_->UnregisterIdleClosure(pull_closure_id_);
+  }
+  pull_closure_id_ = ThreadpoolHttpServer::kInvalidClosureId;
+  server_ = nullptr;
+
+  AUTOLOCK(lock, &mu_);
+  if (subproc_) {
+    subproc_->Kill();
+  }
+}
+
+void AutoUpdater::Wait() {
+  AUTOLOCK(lock, &mu_);
+  while (subproc_ != nullptr) {
+    cond_.Wait();
+  }
+}
+
+bool AutoUpdater::ReadManifest(const string& path, int* version) {
+  string manifest;
+  if (!ReadFileToString(path.c_str(), &manifest))
+    return false;
+  static const int kVersionLen = strlen("VERSION=");
+  size_t version_start = manifest.find("VERSION=");
+  if (version_start == string::npos)
+    return false;
+  size_t version_end = manifest.find("\n", version_start + kVersionLen);
+  if (version_end == string::npos)
+    return false;
+  string version_str = manifest.substr(
+      version_start + kVersionLen,
+      version_end - (version_start + kVersionLen));
+  *version = atoi(version_str.c_str());
+  LOG(INFO) << "manifest " << path << " VERSION=" << *version;
+  return (*version > 0);
+}
+
+void AutoUpdater::CheckUpdate() {
+  {
+    AUTOLOCK(lock, &mu_);
+    if (subproc_ != nullptr)
+      return;
+    if (server_ == nullptr)
+      return;
+  }
+  int last_idle_counter =
+      server_->idle_counter(ThreadpoolHttpServer::SOCKET_IPC);
+  if (last_idle_counter < idle_counter_) {
+    LOG(WARNING) << "not idle:" << last_idle_counter << " < " << idle_counter_;
+    return;
+  }
+  StartGomaCtlPull();
+}
+
+void AutoUpdater::StartGomaCtlPull() {
+  CHECK(server_ != nullptr);
+  string goma_ctl = file::JoinPath(dir_, goma_ctl_);
+  std::vector<const char*> args;
+  args.push_back(goma_ctl.c_str());
+  args.push_back("pull");
+  args.push_back(nullptr);
+  SubProcessTask* subproc = nullptr;
+  {
+    AUTOLOCK(lock, &mu_);
+    if (subproc_ != nullptr)
+      return;
+    subproc_ = new SubProcessTask(
+        "auto_updater", goma_ctl.c_str(), const_cast<char**>(&args[0]));
+    subproc = subproc_;
+  }
+  SubProcessReq* req = subproc->mutable_req();
+  req->set_cwd(dir_);
+  req->set_stdout_filename(file::JoinPath(dir_, "goma_pull.out"));
+  req->set_stderr_filename(file::JoinPath(dir_, "goma_pull.err"));
+  for (size_t i = 0; i < env_.size(); ++i) {
+    req->add_env(env_[i]);
+  }
+  req->set_weight(SubProcessReq::HEAVY_WEIGHT);
+  req->set_priority(SubProcessReq::LOW_PRIORITY);
+  subproc->Start(NewCallback(this, &AutoUpdater::GomaCtlPullDone));
+}
+
+void AutoUpdater::GomaCtlPullDone() {
+  int status = -1;
+  {
+    AUTOLOCK(lock, &mu_);
+    if (subproc_ == nullptr)
+      return;
+    status = subproc_->terminated().status();
+    subproc_ = nullptr;
+    cond_.Signal();
+  }
+  if (status != 0) {
+    LOG(ERROR) << goma_ctl_ << " pull failed. exit=" << status;
+    return;
+  }
+  if (!ReadManifest(file::JoinPath(dir_, "latest/MANIFEST"),
+                    &pulled_version_)) {
+    LOG(ERROR) << "failed to read latest/MANIFEST";
+    return;
+  }
+  if (my_version_ == pulled_version_) {
+    LOG(INFO) << "no update";
+    return;
+  }
+  if (my_version_ > pulled_version_) {
+    LOG(ERROR) << "Version downgrade? " << my_version_
+               << "=>" << pulled_version_
+               << " ignored";
+    return;
+  }
+  if (server_ == nullptr) {
+    LOG(ERROR) << "Auto updater already stopped.";
+    return;
+  }
+  // Check if server is still idle.  If it processes some requests, postpone
+  // updating.
+  int last_idle_counter =
+      server_->idle_counter(ThreadpoolHttpServer::SOCKET_IPC);
+  if (last_idle_counter < idle_counter_) {
+    LOG(WARNING) << "not idle:" << last_idle_counter << " < " << idle_counter_;
+    return;
+  }
+  StartGomaCtlUpdate();
+}
+
+void AutoUpdater::StartGomaCtlUpdate() {
+  CHECK(server_ != nullptr);
+  LOG(INFO) << "Update version " << my_version_ << " to " << pulled_version_;
+  string goma_ctl = file::JoinPath(dir_, goma_ctl_);
+  std::vector<const char*> args;
+  args.push_back(goma_ctl.c_str());
+  args.push_back("update");
+  args.push_back(nullptr);
+  SubProcessTask* subproc = new SubProcessTask(
+      "auto_updater", goma_ctl.c_str(), const_cast<char**>(&args[0]));
+  SubProcessReq* req = subproc->mutable_req();
+  req->set_cwd(dir_);
+  req->set_stdout_filename(file::JoinPath(dir_, "goma_update.out"));
+  req->set_stderr_filename(file::JoinPath(dir_, "goma_update.err"));
+  for (size_t i = 0; i < env_.size(); ++i) {
+    req->add_env(env_[i]);
+  }
+  req->set_weight(SubProcessReq::HEAVY_WEIGHT);
+  req->set_priority(SubProcessReq::LOW_PRIORITY);
+  req->set_detach(true);
+  subproc->Start(nullptr);
+  // "goma_ctl.py update" runs in detached mode.
+  // subproc will be deleted in Start(), and never send feedback from
+  // subprocess_controller_server.
+  // In "goma_ctl.py update", it will stop the compiler_proxy, updates
+  // new binaries, and restart compiler_proxy again.
+}
+
+}  // namespace devtools_goma
diff --git a/client/auto_updater.h b/client/auto_updater.h
new file mode 100644
index 0000000..b8f9b09
--- /dev/null
+++ b/client/auto_updater.h
@@ -0,0 +1,78 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_AUTO_UPDATER_H_
+#define DEVTOOLS_GOMA_CLIENT_AUTO_UPDATER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "lockhelper.h"
+#include "threadpool_http_server.h"
+
+namespace devtools_goma {
+
+class Closure;
+class SubProcessTask;
+
+class AutoUpdater {
+ public:
+  explicit AutoUpdater(const string& goma_ctl);
+  ~AutoUpdater();
+
+  // Sets environments to run goma_ctl.
+  void SetEnv(const char* envp[]);
+
+  // Starts auto updater.  If server's idle counter reaches "count",
+  // it will check latest version of goma by calling "goma_ctl pull", and
+  // if it finds newer version, it runs "goma_ctl update" to update binaries.
+  // "goma_ctl update" will restart the running process.
+  void Start(ThreadpoolHttpServer* server, int count);
+
+  // Stops auto updater.
+  void Stop();
+
+  // Wait subproc_ finished.
+  void Wait();
+
+  int my_version() const { return my_version_; }
+  int pulled_version() const { return pulled_version_; }
+
+ private:
+  // Reads manifest file specified by path, and sets version.
+  // Returns true if success, false otherwise.
+  bool ReadManifest(const string& path, int* version);
+
+  void CheckUpdate();
+
+  void StartGomaCtlPull();
+  void GomaCtlPullDone();
+
+  void StartGomaCtlUpdate();
+
+  string dir_;
+  int my_version_;
+  int pulled_version_;
+  int idle_counter_;
+  // server_ != nullptr while AutoUpdater is running.
+  ThreadpoolHttpServer* server_;
+  ThreadpoolHttpServer::RegisteredClosureID pull_closure_id_;
+
+  // protect |subproc_|
+  Lock mu_;
+  // signaled if subproc_ become nullptr.
+  ConditionVariable cond_;
+  // If subproc_ != nullptr, "goma_ctl pull" is running.
+  SubProcessTask* subproc_;
+  std::vector<string> env_;
+  string goma_ctl_;
+
+  DISALLOW_COPY_AND_ASSIGN(AutoUpdater);
+};
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_AUTO_UPDATER_H_
diff --git a/client/autolock_timer.cc b/client/autolock_timer.cc
new file mode 100644
index 0000000..a2a0f35
--- /dev/null
+++ b/client/autolock_timer.cc
@@ -0,0 +1,122 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "autolock_timer.h"
+
+#include <algorithm>
+#include <iomanip>
+
+namespace devtools_goma {
+
+AutoLockStats* g_auto_lock_stats;
+
+static const double kNanosecondsPerSecond = 1000000000;
+
+AutoLockStat* AutoLockStats::NewStat(const char* name) {
+  AutoLock lock(&mu_);
+  AutoLockStat* statp = new AutoLockStat(name);
+  stats_.push_back(statp);
+  return statp;
+}
+
+AutoLockStats::~AutoLockStats() {
+  for (size_t i = 0; i < stats_.size(); ++i) {
+    delete stats_[i];
+  }
+  stats_.clear();
+}
+
+void AutoLockStats::TextReport(std::ostringstream* ss) {
+
+  AutoLock lock(&mu_);
+  for (size_t i = 0; i < stats_.size(); ++i) {
+    AutoLockStat* stat = stats_[i];
+
+    int count = 0;
+    int64_t total_wait_time_ns = 0;
+    int64_t max_wait_time_ns = 0;
+    int64_t total_hold_time_ns = 0;
+    int64_t max_hold_time_ns = 0;
+    stat->GetStats(&count, &total_wait_time_ns, &max_wait_time_ns,
+                   &total_hold_time_ns, &max_hold_time_ns);
+    (*ss) << stat->name
+          << " count: " << count
+          << " total-wait: "
+          << total_wait_time_ns / kNanosecondsPerSecond
+          << " max-wait:"
+          << max_wait_time_ns / kNanosecondsPerSecond
+          << " ave-wait:"
+          << total_wait_time_ns / std::max(count, 1) / kNanosecondsPerSecond
+          << " total-hold:"
+          << total_hold_time_ns / kNanosecondsPerSecond
+          << " max-hold:"
+          << max_hold_time_ns / kNanosecondsPerSecond
+          << " ave-hold:"
+          << total_hold_time_ns / std::max(count, 1) / kNanosecondsPerSecond
+          << "\n";
+  }
+}
+
+void AutoLockStats::Report(std::ostringstream* ss,
+                           const std::unordered_set<std::string>& skip_names) {
+  (*ss) << "<html>"
+        << "<script src=\"/static/jquery.min.js\"></script>"
+        << "<script src=\"/static/compiler_proxy_contentionz_script.js\">"
+        << "</script>"
+        << "<body onload=\"init()\">"
+        << (skip_names.empty() ?
+            "<a href=\"./contentionz\">simplified</a>" :
+            "<a href=\"./contentionz?detailed=1\">detailed</a>")
+        << "<table border=\"1\"><thead>"
+        << "<tr><th>name</th>"
+        << "<th class=\"count\">count</th>"
+        << "<th class=\"total-wait\">total wait</th>"
+        << "<th class=\"max-wait\">max wait</th>"
+        << "<th class=\"ave-wait\">ave wait</th>"
+        << "<th class=\"total-hold\">total hold</th>"
+        << "<th class=\"max-hold\">max hold</th>"
+        << "<th class=\"ave-hold\">ave hold</th>"
+        << "</tr></thead>\n"
+        << "<tbody>"
+        << std::fixed << std::setprecision(9);
+
+  {
+    AutoLock lock(&mu_);
+    for (size_t i = 0; i < stats_.size(); ++i) {
+      AutoLockStat* stat = stats_[i];
+      if (skip_names.find(stat->name) != skip_names.end()) {
+        continue;
+      }
+
+      int count = 0;
+      int64_t total_wait_time_ns = 0;
+      int64_t max_wait_time_ns = 0;
+      int64_t total_hold_time_ns = 0;
+      int64_t max_hold_time_ns = 0;
+      stat->GetStats(&count, &total_wait_time_ns, &max_wait_time_ns,
+                     &total_hold_time_ns, &max_hold_time_ns);
+      (*ss) << "<tr><td>" << stat->name << "</td>"
+            << "<td class=\"count\">" << count << "</td>"
+            << "<td class=\"total-wait\">"
+            << total_wait_time_ns / kNanosecondsPerSecond << "</td>"
+            << "<td class=\"max-wait\">"
+            << max_wait_time_ns / kNanosecondsPerSecond << "</td>"
+            << "<td class=\"ave-wait\">"
+            << total_wait_time_ns / count / kNanosecondsPerSecond << "</td>"
+            << "<td class=\"total-hold\">"
+            << total_hold_time_ns / kNanosecondsPerSecond << "</td>"
+            << "<td class=\"max-hold\">"
+            << max_hold_time_ns / kNanosecondsPerSecond << "</td>"
+            << "<td class=\"ave-hold\">"
+            << total_hold_time_ns / count / kNanosecondsPerSecond << "</td>"
+            << "</tr>\n";
+    }
+  }
+
+  (*ss) << "</tbody>"
+        << "</table></body></html>";
+}
+
+}  // namespace devtools_goma
diff --git a/client/autolock_timer.h b/client/autolock_timer.h
new file mode 100644
index 0000000..d099626
--- /dev/null
+++ b/client/autolock_timer.h
@@ -0,0 +1,244 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_AUTOLOCK_TIMER_H_
+#define DEVTOOLS_GOMA_CLIENT_AUTOLOCK_TIMER_H_
+
+#include <sstream>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "basictypes.h"
+#include "glog/logging.h"
+#include "lockhelper.h"
+#include "simple_timer.h"
+
+namespace devtools_goma {
+
+class AutoLockStat {
+ public:
+  explicit AutoLockStat(const char* auto_lock_name)
+      : name(auto_lock_name),
+        count_(0),
+        total_wait_time_ns_(0), max_wait_time_ns_(0),
+        total_hold_time_ns_(0), max_hold_time_ns_(0) {}
+  const char* name;
+
+  void GetStats(int* count,
+                int64_t* total_wait_time_ns,
+                int64_t* max_wait_time_ns,
+                int64_t* total_hold_time_ns,
+                int64_t* max_hold_time_ns) {
+    AutoFastLock lock(&lock_);
+    *count = count_;
+    *total_wait_time_ns = total_wait_time_ns_;
+    *max_wait_time_ns = max_wait_time_ns_;
+    *total_hold_time_ns = total_hold_time_ns_;
+    *max_hold_time_ns = max_hold_time_ns_;
+  }
+
+  void UpdateWaitTime(int64_t wait_time_ns) {
+    AutoFastLock lock(&lock_);
+    ++count_;
+    total_wait_time_ns_ += wait_time_ns;
+    if (wait_time_ns > max_wait_time_ns_)
+      max_wait_time_ns_ = wait_time_ns;
+  }
+
+  void UpdateHoldTime(int64_t hold_time_ns) {
+    AutoFastLock lock(&lock_);
+    total_hold_time_ns_ += hold_time_ns;
+    if (hold_time_ns > max_hold_time_ns_)
+      max_hold_time_ns_ = hold_time_ns;
+  }
+
+ private:
+  FastLock lock_;  // protects member fields below
+
+  int count_ GUARDED_BY(lock_);
+  int64_t total_wait_time_ns_ GUARDED_BY(lock_);
+  int64_t max_wait_time_ns_ GUARDED_BY(lock_);
+  int64_t total_hold_time_ns_ GUARDED_BY(lock_);
+  int64_t max_hold_time_ns_ GUARDED_BY(lock_);
+
+  DISALLOW_COPY_AND_ASSIGN(AutoLockStat);
+};
+
+class AutoLockStats {
+ public:
+  AutoLockStats() {}
+  ~AutoLockStats();
+
+  // Return initialized AutoLockStat for |name|.
+  // |name| should be string literal (it must not be released).
+  // This should be called once in a location.
+  // e.g.
+  //   static AutoLockStat* stat = g_auto_lock_stats_->NewStat(name);
+  //
+  AutoLockStat* NewStat(const char* name);
+
+  void Report(std::ostringstream* ss,
+              const std::unordered_set<std::string>& skip_names);
+  void TextReport(std::ostringstream* ss);
+
+ private:
+  Lock mu_;
+  std::vector<AutoLockStat*> stats_ GUARDED_BY(mu_);
+  DISALLOW_COPY_AND_ASSIGN(AutoLockStats);
+};
+
+extern AutoLockStats* g_auto_lock_stats;
+
+class MutexAcquireStrategy {
+ public:
+  static void Acquire(const Lock& lock) EXCLUSIVE_LOCK_FUNCTION(lock) {
+    lock.Acquire();
+  }
+
+  static void Release(const Lock& lock) UNLOCK_FUNCTION(lock) {
+    lock.Release();
+  }
+
+ private:
+  DISALLOW_IMPLICIT_CONSTRUCTORS(MutexAcquireStrategy);
+};
+
+class ReadWriteLockAcquireSharedStrategy {
+ public:
+  static void Acquire(const ReadWriteLock& lock) SHARED_LOCK_FUNCTION(lock) {
+    lock.AcquireShared();
+  }
+
+  static void Release(const ReadWriteLock& lock) UNLOCK_FUNCTION(lock) {
+    lock.ReleaseShared();
+  }
+
+ private:
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ReadWriteLockAcquireSharedStrategy);
+};
+
+class ReadWriteLockAcquireExclusiveStrategy {
+ public:
+  static void Acquire(const ReadWriteLock& lock) EXCLUSIVE_LOCK_FUNCTION(lock) {
+    lock.AcquireExclusive();
+  }
+
+  static void Release(const ReadWriteLock& lock) UNLOCK_FUNCTION(lock) {
+    lock.ReleaseExclusive();
+  }
+
+ private:
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ReadWriteLockAcquireExclusiveStrategy);
+};
+
+template<typename LockType, typename LockAcquireStrategy>
+class AutoLockTimerBase {
+ public:
+  // Auto lock on |lock| with stats of |name|.
+  // |name| must be string literal. It must not be deleted.
+  // If |statp| is NULL, it doesn't collect stats (i.e. it works as
+  // almost same as AutoLock).
+  // If |statp| is not NULL, it holds stats for lock wait/hold time.
+  AutoLockTimerBase(const LockType* lock, AutoLockStat* statp)
+      : lock_(*lock), stat_(nullptr), timer_(SimpleTimer::NO_START) {
+    if (statp)
+      timer_.Start();
+    LockAcquireStrategy::Acquire(lock_);
+    if (statp) {
+      stat_ = statp;
+      stat_->UpdateWaitTime(timer_.GetInNanoSeconds());
+      timer_.Start();
+    }
+  }
+
+  ~AutoLockTimerBase() {
+    if (stat_) {
+      stat_->UpdateHoldTime(timer_.GetInNanoSeconds());
+    }
+    LockAcquireStrategy::Release(lock_);
+  }
+
+ private:
+  const LockType& lock_;
+  AutoLockStat* stat_;
+  SimpleTimer timer_;
+  DISALLOW_COPY_AND_ASSIGN(AutoLockTimerBase);
+};
+
+class SCOPED_LOCKABLE AutoLockTimer
+    : private AutoLockTimerBase<Lock, MutexAcquireStrategy> {
+ public:
+  AutoLockTimer(const Lock* lock,
+                AutoLockStat* statp) EXCLUSIVE_LOCK_FUNCTION(lock)
+      : AutoLockTimerBase(lock, statp) {
+  }
+
+  ~AutoLockTimer() UNLOCK_FUNCTION() {
+  }
+};
+
+class SCOPED_LOCKABLE AutoReadWriteLockSharedTimer
+    : private AutoLockTimerBase<ReadWriteLock,
+                                ReadWriteLockAcquireSharedStrategy> {
+ public:
+  AutoReadWriteLockSharedTimer(const ReadWriteLock* lock,
+                               AutoLockStat* statp) SHARED_LOCK_FUNCTION(lock)
+      : AutoLockTimerBase(lock, statp) {
+  }
+
+  ~AutoReadWriteLockSharedTimer() UNLOCK_FUNCTION() {}
+};
+
+class SCOPED_LOCKABLE AutoReadWriteLockExclusiveTimer
+    : private AutoLockTimerBase<ReadWriteLock,
+                                ReadWriteLockAcquireExclusiveStrategy> {
+ public:
+  AutoReadWriteLockExclusiveTimer(const ReadWriteLock* lock,
+                                  AutoLockStat* statp)
+      EXCLUSIVE_LOCK_FUNCTION(lock)
+      : AutoLockTimerBase(lock, statp) {
+  }
+
+  ~AutoReadWriteLockExclusiveTimer() UNLOCK_FUNCTION() {}
+};
+
+#define GOMA_AUTOLOCK_TIMER_STRINGFY(i) #i
+#define GOMA_AUTOLOCK_TIMER_STR(i) GOMA_AUTOLOCK_TIMER_STRINGFY(i)
+// #define NO_AUTOLOCK_STAT
+#ifdef NO_AUTOLOCK_STAT
+#define AUTOLOCK(lock, mu) AutoLock lock(mu)
+#define AUTOLOCK_WITH_STAT(lock, mu, statp) AutoLock lock(mu)
+#define AUTO_SHARED_LOCK(lock, rwlock) AutoSharedLock lock(rwlock)
+#define AUTO_EXCLUSIVE_LOCK(lock, rwlock) AutoExclusiveLock lock(rwlock)
+#else
+#define AUTOLOCK(lock, mu)                                              \
+  static AutoLockStat* auto_lock_stat_for_the_source_location =         \
+      g_auto_lock_stats ? g_auto_lock_stats->NewStat(                   \
+          __FILE__ ":" GOMA_AUTOLOCK_TIMER_STR(__LINE__) "(" #mu ")") : \
+      NULL;                                                             \
+  AutoLockTimer lock(mu, auto_lock_stat_for_the_source_location);
+
+#define AUTOLOCK_WITH_STAT(lock, mu, statp)                             \
+  AutoLockTimer lock(mu, statp);
+#define AUTO_SHARED_LOCK(lock, rwlock)                                  \
+  static AutoLockStat* auto_lock_stat_for_the_source_location =         \
+      g_auto_lock_stats ? g_auto_lock_stats->NewStat(                   \
+          __FILE__ ":" GOMA_AUTOLOCK_TIMER_STR(__LINE__) "(" #rwlock ":r)") : \
+      NULL;                                                             \
+  AutoReadWriteLockSharedTimer lock(                                    \
+      rwlock, auto_lock_stat_for_the_source_location);
+#define AUTO_EXCLUSIVE_LOCK(lock, rwlock)                               \
+  static AutoLockStat* auto_lock_stat_for_the_source_location =         \
+      g_auto_lock_stats ? g_auto_lock_stats->NewStat(                   \
+          __FILE__ ":" GOMA_AUTOLOCK_TIMER_STR(__LINE__) "(" #rwlock ":w)") : \
+      NULL;                                                             \
+  AutoReadWriteLockExclusiveTimer lock(                                 \
+      rwlock, auto_lock_stat_for_the_source_location);
+#endif  // NO_AUTOLOCK_STAT
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_AUTOLOCK_TIMER_H_
diff --git a/client/base64.cc b/client/base64.cc
new file mode 100644
index 0000000..cd1c4e7
--- /dev/null
+++ b/client/base64.cc
@@ -0,0 +1,68 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base64.h"
+
+#include <string>
+
+#include "basictypes.h"
+#include "string_piece.h"
+
+namespace devtools_goma {
+
+static const char* kEncodeURL =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    "abcdefghijklmnopqrstuvwxyz"
+    "0123456789-_";
+
+std::string Base64UrlEncode(StringPiece str, bool padding) {
+  if (str.empty()) {
+    return "";
+  }
+  std::string dst;
+  int si = 0;
+  int n = (static_cast<int>(str.size()) / 3) * 3;
+  while (si < n) {
+    int val = (((str[si+0])&0xFFU) << 16) | ((str[si+1]&0xFFU) << 8) |
+        (str[si+2]&0xFFU);
+
+    dst += kEncodeURL[(val >> 18) & 0x3F];
+    dst += kEncodeURL[(val >> 12) & 0x3F];
+    dst += kEncodeURL[(val >>  6) & 0x3F];
+    dst += kEncodeURL[val & 0x3F];
+
+    si += 3;
+  }
+  int remain = static_cast<int>(str.size()) - si;
+  int val = 0;
+  switch (remain) {
+    case 2:
+      val |= (str[si+1] & 0xFFU) << 8;
+      FALLTHROUGH_INTENDED;
+    case 1:
+      val |= (str[si+0] & 0xFFU) << 16;
+      break;
+    case 0:
+      return dst;
+  }
+  dst += kEncodeURL[(val >> 18) & 0x3F];
+  dst += kEncodeURL[(val >> 12) & 0x3F];
+  switch (remain) {
+    case 1:
+      if (padding) {
+        dst += '=';
+        dst += '=';
+      }
+      break;
+    case 2:
+      dst += kEncodeURL[(val >> 6) & 0x3F];
+      if (padding) {
+        dst += '=';
+      }
+      break;
+  }
+  return dst;
+}
+
+}  // namespace devtools_goma
diff --git a/client/base64.h b/client/base64.h
new file mode 100644
index 0000000..a62b4c3
--- /dev/null
+++ b/client/base64.h
@@ -0,0 +1,19 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_BASE64_H_
+#define DEVTOOLS_GOMA_CLIENT_BASE64_H_
+
+#include <string>
+
+#include "string_piece.h"
+
+namespace devtools_goma {
+
+// Base64UrlEncode encodes str with base64 url encoding.
+std::string Base64UrlEncode(StringPiece str, bool padding);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_BASE64_H_
diff --git a/client/base64_fuzzer.cc b/client/base64_fuzzer.cc
new file mode 100644
index 0000000..75ba660
--- /dev/null
+++ b/client/base64_fuzzer.cc
@@ -0,0 +1,16 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+
+#include "base64.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  std::string input(reinterpret_cast<const char*>(data), size);
+
+  devtools_goma::Base64UrlEncode(input, true);
+  devtools_goma::Base64UrlEncode(input, false);
+
+  return 0;
+}
diff --git a/client/base64_unittest.cc b/client/base64_unittest.cc
new file mode 100644
index 0000000..f0b2606
--- /dev/null
+++ b/client/base64_unittest.cc
@@ -0,0 +1,53 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base64.h"
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+TEST(Base64Test, Base64UrlEncodeRFC4648TestVector) {
+  // https://tools.ietf.org/html/rfc4648#page-12
+  EXPECT_EQ("", Base64UrlEncode("", true));
+  EXPECT_EQ("Zg==", Base64UrlEncode("f", true));
+  EXPECT_EQ("Zm8=", Base64UrlEncode("fo", true));
+  EXPECT_EQ("Zm9v", Base64UrlEncode("foo", true));
+  EXPECT_EQ("Zm9vYg==", Base64UrlEncode("foob", true));
+  EXPECT_EQ("Zm9vYmE=", Base64UrlEncode("fooba", true));
+  EXPECT_EQ("Zm9vYmFy", Base64UrlEncode("foobar", true));
+}
+
+TEST(Base64Test, Base64UrlEncodeRFC4648TestVectorNoPadding) {
+  EXPECT_EQ("", Base64UrlEncode("", false));
+  EXPECT_EQ("Zg", Base64UrlEncode("f", false));
+  EXPECT_EQ("Zm8", Base64UrlEncode("fo", false));
+  EXPECT_EQ("Zm9v", Base64UrlEncode("foo", false));
+  EXPECT_EQ("Zm9vYg", Base64UrlEncode("foob", false));
+  EXPECT_EQ("Zm9vYmE", Base64UrlEncode("fooba", false));
+  EXPECT_EQ("Zm9vYmFy", Base64UrlEncode("foobar", false));
+}
+
+TEST(Base64Test, Base64UrlEncodeForJsonWebToken) {
+  EXPECT_EQ("eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9",
+            Base64UrlEncode("{\"alg\":\"RS256\",\"typ\":\"JWT\"}", false));
+
+  EXPECT_EQ("eyJpc3MiOiI3NjEzMjY3OTgwNjktcjVtbGpsbG4xcmQ0bHJiaG"
+            "c3NWVmZ2lncDM2bTc4ajVAZGV2ZWxvcGVyLmdzZXJ2aWNlYWNj"
+            "b3VudC5jb20iLCJzY29wZSI6Imh0dHBzOi8vd3d3Lmdvb2dsZW"
+            "FwaXMuY29tL2F1dGgvcHJlZGljdGlvbiIsImF1ZCI6Imh0dHBz"
+            "Oi8vYWNjb3VudHMuZ29vZ2xlLmNvbS9vL29hdXRoMi90b2tlbi"
+            "IsImV4cCI6MTMyODU1NDM4NSwiaWF0IjoxMzI4NTUwNzg1fQ",
+            Base64UrlEncode("{"
+                            "\"iss\":\"761326798069-"
+                            "r5mljlln1rd4lrbhg75efgigp36m78j5"
+                            "@developer.gserviceaccount.com\","
+                            "\"scope\":\"https://www.googleapis.com/auth/"
+                            "prediction\","
+                            "\"aud\":\"https://accounts.google.com/o/oauth2/"
+                            "token\","
+                            "\"exp\":1328554385,\"iat\":1328550785}", false));
+}
+
+}  // namespace devtools_goma
diff --git a/client/breakpad.h b/client/breakpad.h
new file mode 100644
index 0000000..b7c903f
--- /dev/null
+++ b/client/breakpad.h
@@ -0,0 +1,15 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_BREAKPAD_H_
+#define DEVTOOLS_GOMA_CLIENT_BREAKPAD_H_
+
+#include <string>
+
+namespace devtools_goma {
+void InitCrashReporter(const std::string& dump_output_dir);
+bool IsCrashReporterEnabled();
+}  // namespace devtools_goma
+#endif  // DEVTOOLS_GOMA_CLIENT_BREAKPAD_H_
diff --git a/client/breakpad_linux.cc b/client/breakpad_linux.cc
new file mode 100644
index 0000000..2f9b067
--- /dev/null
+++ b/client/breakpad_linux.cc
@@ -0,0 +1,60 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "breakpad.h"
+
+#include "client/linux/handler/exception_handler.h"
+#include "compiler_specific.h"
+#include "glog/logging.h"
+
+namespace {
+
+using google_breakpad::ExceptionHandler;
+using google_breakpad::MinidumpDescriptor;
+
+bool g_is_crash_reporter_enabled = false;
+ExceptionHandler* g_breakpad = nullptr;
+
+bool DumpCallback(const google_breakpad::MinidumpDescriptor& descriptor,
+                  void* context ALLOW_UNUSED,
+                  bool succeeded) {
+  LOG(INFO) << "Crash Dump path: " << descriptor.path()
+             << " succeeded=" << succeeded;
+#ifndef GLOG_NO_ABBREVIATED_SEVERITIES
+  google::FlushLogFilesUnsafe(google::INFO);
+#else
+  google::FlushLogFilesUnsafe(google::GLOG_INFO);
+#endif
+  return succeeded;
+}
+
+void CleanUpReporter() {
+  g_is_crash_reporter_enabled = false;
+  delete g_breakpad;
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+void InitCrashReporter(const string& dump_output_dir) {
+  g_is_crash_reporter_enabled = true;
+
+  DCHECK(!g_breakpad);
+  MinidumpDescriptor descriptor(dump_output_dir);
+  g_breakpad = new ExceptionHandler(descriptor,
+                                    nullptr,
+                                    DumpCallback,
+                                    nullptr,
+                                    true,  // Install handlers.
+                                    -1);   // Server file descriptor.
+  atexit(CleanUpReporter);
+}
+
+bool IsCrashReporterEnabled() {
+  return g_is_crash_reporter_enabled;
+}
+
+}  // namespace devtools_goma
diff --git a/client/breakpad_mac.cc b/client/breakpad_mac.cc
new file mode 100644
index 0000000..339fdaa
--- /dev/null
+++ b/client/breakpad_mac.cc
@@ -0,0 +1,62 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+// TODO: Rewrite with obj-C++ and use Breakpad framework instead
+//                    of exception_handler.h.  I know it requires changes on
+//                    the way we handles crash dump.
+
+#include "breakpad.h"
+
+#include "base/compiler_specific.h"
+#include "client/mac/handler/exception_handler.h"
+#include "glog/logging.h"
+
+namespace {
+
+bool g_is_crash_reporter_enabled = false;
+google_breakpad::ExceptionHandler* g_breakpad = nullptr;
+
+bool DumpCallback(const char* dump_dir,
+                  const char* minidump_id,
+                  void* context ALLOW_UNUSED,
+                  bool succeeded) {
+  LOG(INFO) << "Crash Dump dir: " << dump_dir
+            << " minidump_id=" << minidump_id
+            << " succeeded=" << succeeded;
+#ifndef GLOG_NO_ABBREVIATED_SEVERITIES
+  google::FlushLogFilesUnsafe(google::INFO);
+#else
+  google::FlushLogFilesUnsafe(google::GLOG_INFO);
+#endif
+  return succeeded;
+}
+
+void CleanUpReporter() {
+  g_is_crash_reporter_enabled = false;
+  delete g_breakpad;
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+void InitCrashReporter(const std::string& dump_output_dir) {
+  g_is_crash_reporter_enabled = true;
+
+  DCHECK(!g_breakpad);
+  g_breakpad = new google_breakpad::ExceptionHandler(dump_output_dir,
+                                                     nullptr,
+                                                     DumpCallback,
+                                                     nullptr,
+                                                     true,
+                                                     nullptr);
+  atexit(CleanUpReporter);
+}
+
+bool IsCrashReporterEnabled() {
+  return g_is_crash_reporter_enabled;
+}
+
+}  // namespace devtools_goma
diff --git a/client/breakpad_win.cc b/client/breakpad_win.cc
new file mode 100644
index 0000000..9b1288e
--- /dev/null
+++ b/client/breakpad_win.cc
@@ -0,0 +1,94 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "breakpad.h"
+
+#include <signal.h>
+
+#include <string>
+
+#include "client/windows/handler/exception_handler.h"
+#include "glog/logging.h"
+
+namespace {
+
+bool g_is_crash_reporter_enabled = false;
+google_breakpad::ExceptionHandler* g_breakpad = nullptr;
+
+// TODO: revise followings when glog supports wchar_t*.
+std::ostream& operator<<(std::ostream& out, const wchar_t* str) {
+  std::wstring wide(str);
+  out << std::string(wide.begin(), wide.end());
+  return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const std::wstring& str) {
+  return operator<<(out, str.c_str());
+}
+
+bool DumpCallback(const wchar_t* dump_dir,
+                  const wchar_t* minidump_id,
+                  void*,
+                  EXCEPTION_POINTERS*,
+                  MDRawAssertionInfo*,
+                  bool succeeded) {
+  LOG(INFO) << "Crash Dump dir: " << dump_dir
+            << " minidump_id=" << minidump_id
+            << " succeeded=" << succeeded;
+#ifndef GLOG_NO_ABBREVIATED_SEVERITIES
+  google::FlushLogFilesUnsafe(google::INFO);
+#else
+  google::FlushLogFilesUnsafe(google::GLOG_INFO);
+#endif
+  return succeeded;
+}
+
+void CleanUpReporter() {
+  g_is_crash_reporter_enabled = false;
+  delete g_breakpad;
+}
+
+// This function is copied from Chromium's base/win/win_util.cc.
+void __cdecl ForceCrashOnSigAbort(int) {
+  *((volatile int*)0) = 0x1337;
+}
+
+// This function is copied from Chromium's base/win/win_util.cc.
+void SetAbortBehaviorForCrashReporting() {
+  // Prevent CRT's abort code from prompting a dialog or trying to "report" it.
+  // Disabling the _CALL_REPORTFAULT behavior is important since otherwise it
+  // has the sideffect of clearing our exception filter, which means we
+  // don't get any crash.
+  _set_abort_behavior(0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT);
+
+  // Set a SIGABRT handler for good measure. We will crash even if the default
+  // is left in place, however this allows us to crash earlier. And it also
+  // lets us crash in response to code which might directly call raise(SIGABRT)
+  signal(SIGABRT, ForceCrashOnSigAbort);
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+void InitCrashReporter(const std::string& dump_output_dir) {
+  g_is_crash_reporter_enabled = true;
+
+  DCHECK(!g_breakpad);
+  std::wstring wide_dump_dir(dump_output_dir.begin(), dump_output_dir.end());
+  g_breakpad = new google_breakpad::ExceptionHandler(
+    wide_dump_dir, nullptr, DumpCallback, nullptr,
+    google_breakpad::ExceptionHandler::HANDLER_ALL);
+
+  SetAbortBehaviorForCrashReporting();
+
+  atexit(CleanUpReporter);
+}
+
+bool IsCrashReporterEnabled() {
+  return g_is_crash_reporter_enabled;
+}
+
+}  // namespace devtools_goma
diff --git a/client/cache_file.cc b/client/cache_file.cc
new file mode 100644
index 0000000..cf11e06
--- /dev/null
+++ b/client/cache_file.cc
@@ -0,0 +1,112 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "cache_file.h"
+
+#include <fstream>
+
+#include "file.h"
+#include "file_helper.h"
+#include "glog/logging.h"
+#include "goma_hash.h"
+#include "google/protobuf/io/coded_stream.h"
+#include "google/protobuf/io/zero_copy_stream_impl.h"
+#include "google/protobuf/message.h"
+
+namespace devtools_goma {
+
+using std::string;
+
+CacheFile::CacheFile(const string& filename) : filename_(filename) {
+}
+
+CacheFile::~CacheFile() {}
+
+bool CacheFile::Load(google::protobuf::Message* msg) const {
+  return LoadWithMaxLimit(msg, -1, -1);
+}
+
+bool CacheFile::LoadWithMaxLimit(google::protobuf::Message* msg,
+                                 int total_bytes_limit,
+                                 int warning_threshold) const {
+  const string sha256_path = filename_ + ".sha256";
+  {
+    // First, check *.sha256, so that it is not corrupted.
+    string sha256_expected;
+    if (!ReadFileToString(sha256_path, &sha256_expected)) {
+      LOG(INFO) << sha256_path << " does not exist.";
+      return false;
+    }
+    string sha256_actual;
+    if (!GomaSha256FromFile(filename_, &sha256_actual)) {
+      LOG(INFO) << "failed to calculate sha256 of " << filename_;
+      return false;
+    }
+
+    if (sha256_actual != sha256_expected) {
+      LOG(ERROR) << "sha256 digest of " << filename_ << ": " << sha256_actual
+                 << " but expected: " << sha256_expected;
+      return false;
+    }
+
+    LOG(INFO) << filename_ << " integrity OK.";
+  }
+
+  std::ifstream f(filename_.c_str(), std::ifstream::binary);
+  if (!f.is_open()) {
+    LOG(INFO) << "failed to open " << filename_;
+    return false;
+  }
+
+  // Note: FileInputStream is more efficient than IstreamInputStream.
+  // However, FileInputStream takes fd and we need to support Windows.
+  google::protobuf::io::IstreamInputStream iis(&f);
+  google::protobuf::io::CodedInputStream input(&iis);
+  if (total_bytes_limit >= 0 && warning_threshold >= 0) {
+    input.SetTotalBytesLimit(total_bytes_limit, warning_threshold);
+  } else if (total_bytes_limit >= 0 || warning_threshold >= 0) {
+    LOG(ERROR) << "only one of total_bytes_limit or warning_threshold"
+               << " is set. Set both."
+               << " total_bytes_limit=" << total_bytes_limit
+               << " warning_threshold=" << warning_threshold;
+  }
+
+  if (!msg->ParseFromCodedStream(&input)) {
+    LOG(ERROR) << "failed to parse " << filename_;
+    return false;
+  }
+
+  return true;
+}
+
+bool CacheFile::Save(const google::protobuf::Message& msg) const {
+  {
+    string msg_buf;
+    msg.SerializeToString(&msg_buf);
+    if (!WriteStringToFile(msg_buf, filename_)) {
+      LOG(ERROR) << "failed to write " << filename_;
+      return false;
+    }
+  }
+
+  // Calculate sha256 of filename_, so that we can check it's not corrupted.
+  const string sha256_path = filename_ + ".sha256";
+  string sha256_str;
+  if (!GomaSha256FromFile(filename_, &sha256_str)) {
+    LOG(ERROR) << "failed to calculate sha256 of " << filename_;
+    if (remove(filename_.c_str()) != 0) {
+      LOG(ERROR) << "failed to delete corrupted " << filename_;
+    }
+    return false;
+  }
+
+  if (!WriteStringToFile(sha256_str, sha256_path)) {
+    LOG(ERROR) << "failed to write " << sha256_path;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/cache_file.h b/client/cache_file.h
new file mode 100644
index 0000000..7d52783
--- /dev/null
+++ b/client/cache_file.h
@@ -0,0 +1,48 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CACHE_FILE_H_
+#define DEVTOOLS_GOMA_CLIENT_CACHE_FILE_H_
+
+#include <string>
+
+#include "basictypes.h"
+
+namespace google {
+namespace protobuf {
+class Message;
+}  // namespace protobuf
+}  // namespace google
+
+namespace devtools_goma {
+
+// CacheFile manages cache file of serialized protocol buffer message.
+// It also saves sha256 sum of cache file in *.sha256 file to detect file
+// corruption. it checks sha256 matches with cache file when loading.
+class CacheFile {
+ public:
+  explicit CacheFile(const std::string& filename);
+  ~CacheFile();
+
+  bool Load(google::protobuf::Message* data) const;
+  // Load message with max limit. if |total_bytes_limit| < 0
+  // and warning_threshold < 0, the default limit will be used.
+  bool LoadWithMaxLimit(google::protobuf::Message* data,
+                        int total_bytes_limit,
+                        int warning_threshold) const;
+  bool Save(const google::protobuf::Message& data) const;
+
+  const std::string& filename() const { return filename_; }
+  bool Enabled() const { return !filename_.empty(); }
+
+ private:
+  std::string filename_;
+
+  DISALLOW_COPY_AND_ASSIGN(CacheFile);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CACHE_FILE_H_
diff --git a/client/calculate_sha256.py b/client/calculate_sha256.py
new file mode 100644
index 0000000..617d7b9
--- /dev/null
+++ b/client/calculate_sha256.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Calculate checksum of files.
+
+Usage:
+  % calculate_sha256.py --output sha256.json compiler_proxy gomacc
+"""
+
+import argparse
+import hashlib
+import json
+import os
+
+
+class Error(Exception):
+  """Raised on Error."""
+
+
+def CalculateFileSHA256(filename):
+  """Returns sha256sum of file."""
+  with open(filename, 'rb') as f:
+    return hashlib.sha256(f.read()).hexdigest()
+
+
+def main():
+  parser = argparse.ArgumentParser(description='calculate sha256')
+  parser.add_argument('inputs', metavar='FILENAME', type=str, nargs='+',
+                      help='input file')
+  parser.add_argument('--output', help='json filename', required=True)
+  args = parser.parse_args()
+
+  sha256 = {}
+  for input_file in args.inputs:
+    base = os.path.basename(input_file)
+    sha256[base] = CalculateFileSHA256(input_file)
+
+  with open(args.output, 'w') as f:
+    json.dump(sha256, f)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/client/callback.h b/client/callback.h
new file mode 100644
index 0000000..68112ec
--- /dev/null
+++ b/client/callback.h
@@ -0,0 +1,228 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CALLBACK_H_
+#define DEVTOOLS_GOMA_CLIENT_CALLBACK_H_
+
+#include <memory>
+#include <tuple>
+#include <utility>
+
+#include <glog/logging.h>
+
+#include "basictypes.h"
+
+namespace devtools_goma {
+
+// Closure has two types: OneshotClosure and PermanentClosure.
+// * Both has a fundamental type Closure.
+// * OneshotClosure can take move-only arguments, especially std::unique_ptr.
+//   When Run() is called, such arguments are passed with std::move.
+// * PermanentClosure cannot take a move-only argument, because arguments can be
+//   passed to the internal function several times. So, we cannot move them.
+
+class Closure {
+ public:
+  Closure() {}
+  virtual ~Closure() {}
+  virtual void Run() = 0;
+
+  virtual bool IsRepeatable() const = 0;
+  void CheckIsRepeatable() const { CHECK(IsRepeatable()); }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Closure);
+};
+
+class OneshotClosure : public Closure {
+ public:
+  OneshotClosure() {}
+  ~OneshotClosure() override {}
+
+  bool IsRepeatable() const final { return false; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(OneshotClosure);
+};
+
+class PermanentClosure : public Closure {
+ public:
+  PermanentClosure() {}
+  ~PermanentClosure() override {}
+
+  bool IsRepeatable() const final { return true; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(PermanentClosure);
+};
+
+namespace internal {
+
+// ----------------------------------------------------------------------
+// Making integer sequence (template).
+//
+// TODO: C++14 has std::index_sequence for the same purpose.
+// We cannot implement the same thing since C++14 version uses
+// template variable, which does not exist in C++11.
+// Currently we need to call type().
+
+// index_sequence will be made via index_sequence_for<Args>::type().
+// The template arguments are <0, 1, 2, ..., N - 1> where N is the size of Args.
+template<size_t...>
+struct index_sequence {};
+
+template<size_t I, typename T, typename... Types>
+struct make_indexes_impl;
+
+template<size_t I, size_t... Indexes, typename T, typename... Types>
+struct make_indexes_impl<I, index_sequence<Indexes...>, T, Types...> {
+  typedef typename make_indexes_impl<I + 1,
+                                     index_sequence<Indexes..., I>,
+                                     Types...>::type type;
+};
+
+template<size_t I, size_t... Indexes>
+struct make_indexes_impl<I, index_sequence<Indexes...>> {
+  typedef index_sequence<Indexes...> type;
+};
+
+template<typename... Types>
+struct index_sequence_for : make_indexes_impl<0, index_sequence<>, Types...> {
+};
+
+// ----------------------------------------------------------------------
+
+template<typename... Args>
+class OneshotFunctionClosure : public OneshotClosure {
+ public:
+  typedef void (*FunctionType)(Args...);
+  OneshotFunctionClosure(FunctionType f, Args&&... args)
+      : function_(f), args_(std::forward<Args>(args)...) {
+  }
+  ~OneshotFunctionClosure() override {}
+
+  void Run() override {
+    Apply(typename index_sequence_for<Args...>::type());
+    delete this;
+  }
+
+ private:
+  template<size_t... Indexes>
+  void Apply(index_sequence<Indexes...>) {
+    function_(std::forward<Args>(std::get<Indexes>(args_))...);
+  }
+
+  typename std::decay<FunctionType>::type function_;
+  std::tuple<typename std::decay<Args>::type...> args_;
+};
+
+template<typename Class, typename... Args>
+class OneshotMethodClosure : public OneshotClosure {
+ public:
+  typedef void (Class::*MethodType)(Args... args);
+
+  OneshotMethodClosure(Class* object, MethodType method, Args&&... args)
+      : object_(object), method_(method), args_(std::forward<Args>(args)...) {}
+  ~OneshotMethodClosure() override {}
+
+  void Run() override {
+    Apply(typename index_sequence_for<Args...>::type());
+    delete this;
+  }
+
+ private:
+  template<size_t... Indexes>
+  void Apply(index_sequence<Indexes...>) {
+    (object_->*method_)(std::forward<Args>(std::get<Indexes>(args_))...);
+  }
+
+  Class* object_;
+  typename std::decay<MethodType>::type method_;
+  std::tuple<typename std::decay<Args>::type...> args_;
+};
+
+template<typename... Args>
+class PermanentFunctionClosure : public PermanentClosure {
+ public:
+  typedef void (*FunctionType)(Args...);
+  PermanentFunctionClosure(FunctionType f, Args... args)
+      : function_(f), args_(std::forward<Args>(args)...) {
+  }
+  ~PermanentFunctionClosure() override {}
+
+  void Run() override {
+    Apply(typename index_sequence_for<Args...>::type());
+  }
+
+ private:
+  template<size_t... Indexes>
+  void Apply(index_sequence<Indexes...>) {
+    function_(std::get<Indexes>(args_)...);
+  }
+
+  typename std::decay<FunctionType>::type function_;
+  std::tuple<typename std::decay<Args>::type...> args_;
+};
+
+template<typename Class, typename... Args>
+class PermanentMethodClosure : public PermanentClosure {
+ public:
+  typedef void (Class::*MethodType)(Args... args);
+
+  PermanentMethodClosure(Class* object, MethodType method, Args... args)
+      : object_(object), method_(method), args_(std::forward<Args>(args)...) {}
+  ~PermanentMethodClosure() override {}
+
+  void Run() override {
+    Apply(typename index_sequence_for<Args...>::type());
+  }
+
+ private:
+  template<size_t... Indexes>
+  void Apply(index_sequence<Indexes...>) {
+    (object_->*method_)(std::get<Indexes>(args_)...);
+  }
+
+  Class* object_;
+  typename std::decay<MethodType>::type method_;
+  std::tuple<typename std::decay<Args>::type...> args_;
+};
+
+}  // namespace internal
+
+// TODO: NewCallback might have to return std::unique_ptr, too.
+
+template<typename... Args>
+inline OneshotClosure* NewCallback(void (*function)(Args...), Args... args) {
+  return new internal::OneshotFunctionClosure<Args...>(
+      function, std::forward<Args>(args)...);
+}
+
+template<typename Class, typename... Args>
+inline OneshotClosure* NewCallback(
+    Class* object, void (Class::*method)(Args...), Args... args) {
+  return new internal::OneshotMethodClosure<Class, Args...>(
+      object, method, std::forward<Args>(args)...);
+}
+
+template<typename... Args>
+inline std::unique_ptr<PermanentClosure> NewPermanentCallback(
+    void (*function)(Args...), Args... args) {
+  return std::unique_ptr<PermanentClosure>(
+      new internal::PermanentFunctionClosure<Args...>(
+          function, std::forward<Args>(args)...));
+}
+
+template<typename Class, typename... Args>
+inline std::unique_ptr<PermanentClosure> NewPermanentCallback(
+    Class* object, void (Class::*method)(Args...), Args... args) {
+  return std::unique_ptr<PermanentClosure>(
+      new internal::PermanentMethodClosure<Class, Args...>(
+          object, method, std::forward<Args>(args)...));
+}
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CALLBACK_H_
diff --git a/client/callback_unittest.cc b/client/callback_unittest.cc
new file mode 100644
index 0000000..f8f32c1
--- /dev/null
+++ b/client/callback_unittest.cc
@@ -0,0 +1,151 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "callback.h"
+
+#include <memory>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "compiler_specific.h"
+
+namespace {
+
+void TestFunc0() {}
+
+void TestFunc1(int x) {
+  ASSERT_EQ(x, 1);
+}
+void TestFunc2(int x, int y) {
+  ASSERT_EQ(x, 1);
+  ASSERT_EQ(y, 2);
+}
+
+void TestFunc1UP(std::unique_ptr<int> x) {
+  ASSERT_EQ(*x, 1);
+}
+void TestFunc2UP(std::unique_ptr<int> x, std::unique_ptr<int> y) {
+  ASSERT_EQ(*x, 1);
+  ASSERT_EQ(*y, 2);
+}
+
+class TestObject {
+ public:
+  void TestMethod0() {}
+
+  void TestMethod1(int x) {
+    ASSERT_EQ(x, 1);
+  }
+  void TestMethod2(int x, int y) {
+    ASSERT_EQ(x, 1);
+    ASSERT_EQ(y, 2);
+  }
+
+  void TestMethod1UP(std::unique_ptr<int> x) {
+    ASSERT_EQ(*x, 1);
+  }
+  void TestMethod2UP(std::unique_ptr<int> x, std::unique_ptr<int> y) {
+    ASSERT_EQ(*x, 1);
+    ASSERT_EQ(*y, 2);
+  }
+};
+
+}  // anonymous namespace
+
+TEST(CallbackTest, PermanentCallback) {
+  TestObject obj;
+
+  std::unique_ptr<devtools_goma::PermanentClosure> closures[] = {
+      devtools_goma::NewPermanentCallback(TestFunc0),
+      devtools_goma::NewPermanentCallback(TestFunc1, 1),
+      devtools_goma::NewPermanentCallback(TestFunc2, 1, 2),
+      devtools_goma::NewPermanentCallback(&obj, &TestObject::TestMethod0),
+      devtools_goma::NewPermanentCallback(&obj, &TestObject::TestMethod1, 1),
+      devtools_goma::NewPermanentCallback(&obj, &TestObject::TestMethod2, 1, 2),
+  };
+
+  // Should be repeatable
+  for (const auto& cl : closures) {
+    EXPECT_TRUE(cl->IsRepeatable());
+  }
+
+  // Should OK to run multiple times.
+  for (auto& cl : closures) {
+    cl->Run();
+    cl->Run();
+  }
+}
+
+TEST(CallbackTest, OneshotCallback) {
+  TestObject obj;
+
+  devtools_goma::OneshotClosure* closures[] = {
+      devtools_goma::NewCallback(TestFunc0),
+      devtools_goma::NewCallback(TestFunc1, 1),
+      devtools_goma::NewCallback(TestFunc2, 1, 2),
+      devtools_goma::NewCallback(&obj, &TestObject::TestMethod0),
+      devtools_goma::NewCallback(&obj, &TestObject::TestMethod1, 1),
+      devtools_goma::NewCallback(&obj, &TestObject::TestMethod2, 1, 2),
+  };
+
+  // Should not be repeatable
+  for (const auto& cl : closures) {
+    EXPECT_FALSE(cl->IsRepeatable());
+  }
+
+  for (auto& cl : closures) {
+    cl->Run();
+  }
+}
+
+TEST(CallbackTest, PassUniquePtr) {
+  // If we have some memory leak, asan buildbot will detect it.
+
+  {
+    devtools_goma::OneshotClosure* c = devtools_goma::NewCallback(TestFunc0);
+    c->Run();
+  }
+  {
+    TestObject obj;
+    devtools_goma::OneshotClosure* c =
+        devtools_goma::NewCallback(&obj, &TestObject::TestMethod0);
+    c->Run();
+  }
+
+  {
+    std::unique_ptr<int> x(new int(1));
+    devtools_goma::OneshotClosure* c =
+        devtools_goma::NewCallback(TestFunc1UP, std::move(x));
+    c->Run();
+  }
+  {
+    std::unique_ptr<int> x(new int(1));
+
+    TestObject obj;
+    devtools_goma::OneshotClosure* c =
+        devtools_goma::NewCallback(&obj, &TestObject::TestMethod1UP,
+                                   std::move(x));
+    c->Run();
+  }
+
+  {
+    std::unique_ptr<int> x(new int(1));
+    std::unique_ptr<int> y(new int(2));
+    devtools_goma::OneshotClosure* c =
+        devtools_goma::NewCallback(TestFunc2UP, std::move(x), std::move(y));
+    c->Run();
+  }
+  {
+    std::unique_ptr<int> x(new int(1));
+    std::unique_ptr<int> y(new int(2));
+
+    TestObject obj;
+    devtools_goma::OneshotClosure* c =
+        devtools_goma::NewCallback(&obj, &TestObject::TestMethod2UP,
+                                   std::move(x), std::move(y));
+    c->Run();
+  }
+}
diff --git a/client/certs/BUILD.gn b/client/certs/BUILD.gn
new file mode 100644
index 0000000..b39e072
--- /dev/null
+++ b/client/certs/BUILD.gn
@@ -0,0 +1,54 @@
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+roots = "roots"
+
+if (os == "linux") {
+  genc = "//client/genc.py"
+
+  action("gen_certs") {
+    script = genc
+    sources = [
+      "$roots.pem",
+    ]
+    outputs = [
+      "$target_gen_dir/$roots.c",
+      "$target_gen_dir/$roots.h",
+    ]
+    args = [
+      "--prefix=certs_",
+      "--out-dir",
+      rebase_path("$target_gen_dir"),
+      rebase_path("$roots.pem"),
+    ]
+  }
+
+  config("certs_config") {
+    include_dirs = [ "$target_gen_dir" ]
+  }
+  static_library("certs") {
+    sources = [
+      "$target_gen_dir/$roots.c",
+      "$target_gen_dir/$roots.h",
+    ]
+    deps = [
+      ":gen_certs",
+    ]
+    public_configs = [ ":certs_config" ]
+  }
+}
+
+if (os == "win") {
+  config("certs_resource_config") {
+    include_dirs = [ "." ]
+  }
+  static_library("certs_resource") {
+    sources = [
+      "$roots.pem",
+      "certs.rc",
+      "certs_resource.h",
+    ]
+    public_configs = [ ":certs_resource_config" ]
+  }
+}
diff --git a/client/certs/README b/client/certs/README
new file mode 100644
index 0000000..d827a23
--- /dev/null
+++ b/client/certs/README
@@ -0,0 +1,3 @@
+Minimum certificates to connect to google.
+It is provided at https://pki.goog/roots.pem.
+
diff --git a/client/certs/certs.rc b/client/certs/certs.rc
new file mode 100644
index 0000000..716bd46
--- /dev/null
+++ b/client/certs/certs.rc
@@ -0,0 +1,4 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+
+#include "certs_resource.h"
+ROOT_CA_NAME RCDATA "roots.pem"
diff --git a/client/certs/certs_resource.h b/client/certs/certs_resource.h
new file mode 100644
index 0000000..f45a2fd
--- /dev/null
+++ b/client/certs/certs_resource.h
@@ -0,0 +1,10 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CERTS_CERTS_RESOURCE_H_
+#define DEVTOOLS_GOMA_CLIENT_CERTS_CERTS_RESOURCE_H_
+
+#define ROOT_CA_NAME 101
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CERTS_CERTS_RESOURCE_H_
diff --git a/client/certs/roots.pem b/client/certs/roots.pem
new file mode 100644
index 0000000..92d82de
--- /dev/null
+++ b/client/certs/roots.pem
@@ -0,0 +1,1911 @@
+# Operating CA: Comodo Group
+# Issuer: CN=AAA Certificate Services O=Comodo CA Limited
+# Subject: CN=AAA Certificate Services O=Comodo CA Limited
+# Label: "Comodo AAA Services root"
+# Serial: 1
+# MD5 Fingerprint: 49:79:04:b0:eb:87:19:ac:47:b0:bc:11:51:9b:74:d0
+# SHA1 Fingerprint: d1:eb:23:a4:6d:17:d6:8f:d9:25:64:c2:f1:f1:60:17:64:d8:e3:49
+# SHA256 Fingerprint: d7:a7:a0:fb:5d:7e:27:31:d7:71:e9:48:4e:bc:de:f7:1d:5f:0c:3e:0a:29:48:78:2b:c8:3e:e0:ea:69:9e:f4
+-----BEGIN CERTIFICATE-----
+MIIEMjCCAxqgAwIBAgIBATANBgkqhkiG9w0BAQUFADB7MQswCQYDVQQGEwJHQjEb
+MBkGA1UECAwSR3JlYXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHDAdTYWxmb3JkMRow
+GAYDVQQKDBFDb21vZG8gQ0EgTGltaXRlZDEhMB8GA1UEAwwYQUFBIENlcnRpZmlj
+YXRlIFNlcnZpY2VzMB4XDTA0MDEwMTAwMDAwMFoXDTI4MTIzMTIzNTk1OVowezEL
+MAkGA1UEBhMCR0IxGzAZBgNVBAgMEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4GA1UE
+BwwHU2FsZm9yZDEaMBgGA1UECgwRQ29tb2RvIENBIExpbWl0ZWQxITAfBgNVBAMM
+GEFBQSBDZXJ0aWZpY2F0ZSBTZXJ2aWNlczCCASIwDQYJKoZIhvcNAQEBBQADggEP
+ADCCAQoCggEBAL5AnfRu4ep2hxxNRUSOvkbIgwadwSr+GB+O5AL686tdUIoWMQua
+BtDFcCLNSS1UY8y2bmhGC1Pqy0wkwLxyTurxFa70VJoSCsN6sjNg4tqJVfMiWPPe
+3M/vg4aijJRPn2jymJBGhCfHdr/jzDUsi14HZGWCwEiwqJH5YZ92IFCokcdmtet4
+YgNW8IoaE+oxox6gmf049vYnMlhvB/VruPsUK6+3qszWY19zjNoFmag4qMsXeDZR
+rOme9Hg6jc8P2ULimAyrL58OAd7vn5lJ8S3frHRNG5i1R8XlKdH5kBjHYpy+g8cm
+ez6KJcfA3Z3mNWgQIJ2P2N7Sw4ScDV7oL8kCAwEAAaOBwDCBvTAdBgNVHQ4EFgQU
+oBEKIz6W8Qfs4q8p74Klf9AwpLQwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQF
+MAMBAf8wewYDVR0fBHQwcjA4oDagNIYyaHR0cDovL2NybC5jb21vZG9jYS5jb20v
+QUFBQ2VydGlmaWNhdGVTZXJ2aWNlcy5jcmwwNqA0oDKGMGh0dHA6Ly9jcmwuY29t
+b2RvLm5ldC9BQUFDZXJ0aWZpY2F0ZVNlcnZpY2VzLmNybDANBgkqhkiG9w0BAQUF
+AAOCAQEACFb8AvCb6P+k+tZ7xkSAzk/ExfYAWMymtrwUSWgEdujm7l3sAg9g1o1Q
+GE8mTgHj5rCl7r+8dFRBv/38ErjHT1r0iWAFf2C3BUrz9vHCv8S5dIa2LX1rzNLz
+Rt0vxuBqw8M0Ayx9lt1awg6nCpnBBYurDC/zXDrPbDdVCYfeU0BsWO/8tqtlbgT2
+G9w84FoVxp7Z8VlIMCFlA2zs6SFz7JsDoeA3raAVGI/6ugLOpyypEBMs1OUIJqsi
+l2D4kF501KKaU73yqWjgom7C12yxow+ev+to51byrvLjKzg6CYG1a4XXvi3tPxq3
+smPi9WIsgtRqAEFQ8TmDn5XpNpaYbg==
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=AddTrust Class 1 CA Root O=AddTrust AB OU=AddTrust TTP Network
+# Subject: CN=AddTrust Class 1 CA Root O=AddTrust AB OU=AddTrust TTP Network
+# Label: "AddTrust Low-Value Services Root"
+# Serial: 1
+# MD5 Fingerprint: 1e:42:95:02:33:92:6b:b9:5f:c0:7f:da:d6:b2:4b:fc
+# SHA1 Fingerprint: cc:ab:0e:a0:4c:23:01:d6:69:7b:dd:37:9f:cd:12:eb:24:e3:94:9d
+# SHA256 Fingerprint: 8c:72:09:27:9a:c0:4e:27:5e:16:d0:7f:d3:b7:75:e8:01:54:b5:96:80:46:e3:1f:52:dd:25:76:63:24:e9:a7
+-----BEGIN CERTIFICATE-----
+MIIEGDCCAwCgAwIBAgIBATANBgkqhkiG9w0BAQUFADBlMQswCQYDVQQGEwJTRTEU
+MBIGA1UEChMLQWRkVHJ1c3QgQUIxHTAbBgNVBAsTFEFkZFRydXN0IFRUUCBOZXR3
+b3JrMSEwHwYDVQQDExhBZGRUcnVzdCBDbGFzcyAxIENBIFJvb3QwHhcNMDAwNTMw
+MTAzODMxWhcNMjAwNTMwMTAzODMxWjBlMQswCQYDVQQGEwJTRTEUMBIGA1UEChML
+QWRkVHJ1c3QgQUIxHTAbBgNVBAsTFEFkZFRydXN0IFRUUCBOZXR3b3JrMSEwHwYD
+VQQDExhBZGRUcnVzdCBDbGFzcyAxIENBIFJvb3QwggEiMA0GCSqGSIb3DQEBAQUA
+A4IBDwAwggEKAoIBAQCWltQhSWDia+hBBwzexODcEyPNwTXH+9ZOEQpnXvUGW2ul
+CDtbKRY654eyNAbFvAWlA3yCyykQruGIgb3WntP+LVbBFc7jJp0VLhD7Bo8wBN6n
+tGO0/7Gcrjyvd7ZWxbWroulpOj0OM3kyP3CCkplhbY0wCI9xP6ZIVxn4JdxLZlyl
+dI+Yrsj5wAYi56xz36Uu+1LcsRVlIPo1Zmne3yzxbrww2ywkEtvrNTVokMsAsJch
+PXQhI2U0K7t4WaPW4XY5mqRJjox0r26kmqPZm9I4XJuiGMx1I4S+6+JNM3GOGvDC
++Mcdoq0Dlyz4zyXG9rgkMbFjXZJ/Y/AlyVMuH79NAgMBAAGjgdIwgc8wHQYDVR0O
+BBYEFJWxtPCUtr3H2tERCSG+wa9J/RB7MAsGA1UdDwQEAwIBBjAPBgNVHRMBAf8E
+BTADAQH/MIGPBgNVHSMEgYcwgYSAFJWxtPCUtr3H2tERCSG+wa9J/RB7oWmkZzBl
+MQswCQYDVQQGEwJTRTEUMBIGA1UEChMLQWRkVHJ1c3QgQUIxHTAbBgNVBAsTFEFk
+ZFRydXN0IFRUUCBOZXR3b3JrMSEwHwYDVQQDExhBZGRUcnVzdCBDbGFzcyAxIENB
+IFJvb3SCAQEwDQYJKoZIhvcNAQEFBQADggEBACxtZBsfzQ3duQH6lmM0MkhHma6X
+7f1yFqZzR1r0693p9db7RcwpiURdv0Y5PejuvE1Uhh4dbOMXJ0PhiVYrqW9yTkkz
+43J8KiOavD7/KCrto/8cI7pDVwlnTUtiBi34/2ydYB7YHEt9tTEv2dB8Xfjea4MY
+eDdXL+gzB2ffHsdrKpV2ro9Xo/D0UrSpUwjP4E/TelOL/bscVjby/rK25Xa71SJl
+pz/+0WatC7xrmYbvP33zGDLKe8bjq2RGlfgmadlVg3sslgf/WSxEo8bl6ancoWOA
+WiFeIc9TVPC6b4nbqKqVz4vjccweGyBECMB6tkD9xOQ14R0WHNC8K47Wcdk=
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=AddTrust External CA Root O=AddTrust AB OU=AddTrust External TTP Network
+# Subject: CN=AddTrust External CA Root O=AddTrust AB OU=AddTrust External TTP Network
+# Label: "AddTrust External Root"
+# Serial: 1
+# MD5 Fingerprint: 1d:35:54:04:85:78:b0:3f:42:42:4d:bf:20:73:0a:3f
+# SHA1 Fingerprint: 02:fa:f3:e2:91:43:54:68:60:78:57:69:4d:f5:e4:5b:68:85:18:68
+# SHA256 Fingerprint: 68:7f:a4:51:38:22:78:ff:f0:c8:b1:1f:8d:43:d5:76:67:1c:6e:b2:bc:ea:b4:13:fb:83:d9:65:d0:6d:2f:f2
+-----BEGIN CERTIFICATE-----
+MIIENjCCAx6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBvMQswCQYDVQQGEwJTRTEU
+MBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFkZFRydXN0IEV4dGVybmFs
+IFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBFeHRlcm5hbCBDQSBSb290
+MB4XDTAwMDUzMDEwNDgzOFoXDTIwMDUzMDEwNDgzOFowbzELMAkGA1UEBhMCU0Ux
+FDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1BZGRUcnVzdCBFeHRlcm5h
+bCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3QgRXh0ZXJuYWwgQ0EgUm9v
+dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALf3GjPm8gAELTngTlvt
+H7xsD821+iO2zt6bETOXpClMfZOfvUq8k+0DGuOPz+VtUFrWlymUWoCwSXrbLpX9
+uMq/NzgtHj6RQa1wVsfwTz/oMp50ysiQVOnGXw94nZpAPA6sYapeFI+eh6FqUNzX
+mk6vBbOmcZSccbNQYArHE504B4YCqOmoaSYYkKtMsE8jqzpPhNjfzp/haW+710LX
+a0Tkx63ubUFfclpxCDezeWWkWaCUN/cALw3CknLa0Dhy2xSoRcRdKn23tNbE7qzN
+E0S3ySvdQwAl+mG5aWpYIxG3pzOPVnVZ9c0p10a3CitlttNCbxWyuHv77+ldU9U0
+WicCAwEAAaOB3DCB2TAdBgNVHQ4EFgQUrb2YejS0Jvf6xCZU7wO94CTLVBowCwYD
+VR0PBAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wgZkGA1UdIwSBkTCBjoAUrb2YejS0
+Jvf6xCZU7wO94CTLVBqhc6RxMG8xCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtBZGRU
+cnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0ZXJuYWwgVFRQIE5ldHdvcmsx
+IjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENBIFJvb3SCAQEwDQYJKoZIhvcN
+AQEFBQADggEBALCb4IUlwtYj4g+WBpKdQZic2YR5gdkeWxQHIzZlj7DYd7usQWxH
+YINRsPkyPef89iYTx4AWpb9a/IfPeHmJIZriTAcKhjW88t5RxNKWt9x+Tu5w/Rw5
+6wwCURQtjr0W4MHfRnXnJK3s9EK0hZNwEGe6nQY1ShjTK3rMUUKhemPR5ruhxSvC
+Nr4TDea9Y355e6cJDUCrat2PisP29owaQgVR1EX1n6diIWgVIEM8med8vSTYqZEX
+c4g/VhsxOBi0cQ+azcgOno4uG+GMmIPLHzHxREzGBHNJdmAPx/i9F4BrLunMTA5a
+mnkPIAou1Z5jJh5VkpTYghdae9C8x49OhgQ=
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=AddTrust Public CA Root O=AddTrust AB OU=AddTrust TTP Network
+# Subject: CN=AddTrust Public CA Root O=AddTrust AB OU=AddTrust TTP Network
+# Label: "AddTrust Public Services Root"
+# Serial: 1
+# MD5 Fingerprint: c1:62:3e:23:c5:82:73:9c:03:59:4b:2b:e9:77:49:7f
+# SHA1 Fingerprint: 2a:b6:28:48:5e:78:fb:f3:ad:9e:79:10:dd:6b:df:99:72:2c:96:e5
+# SHA256 Fingerprint: 07:91:ca:07:49:b2:07:82:aa:d3:c7:d7:bd:0c:df:c9:48:58:35:84:3e:b2:d7:99:60:09:ce:43:ab:6c:69:27
+-----BEGIN CERTIFICATE-----
+MIIEFTCCAv2gAwIBAgIBATANBgkqhkiG9w0BAQUFADBkMQswCQYDVQQGEwJTRTEU
+MBIGA1UEChMLQWRkVHJ1c3QgQUIxHTAbBgNVBAsTFEFkZFRydXN0IFRUUCBOZXR3
+b3JrMSAwHgYDVQQDExdBZGRUcnVzdCBQdWJsaWMgQ0EgUm9vdDAeFw0wMDA1MzAx
+MDQxNTBaFw0yMDA1MzAxMDQxNTBaMGQxCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtB
+ZGRUcnVzdCBBQjEdMBsGA1UECxMUQWRkVHJ1c3QgVFRQIE5ldHdvcmsxIDAeBgNV
+BAMTF0FkZFRydXN0IFB1YmxpYyBDQSBSb290MIIBIjANBgkqhkiG9w0BAQEFAAOC
+AQ8AMIIBCgKCAQEA6Rowj4OIFMEg2Dybjxt+A3S72mnTRqX4jsIMEZBRpS9mVEBV
+6tsfSlbunyNu9DnLoblv8n75XYcmYZ4c+OLspoH4IcUkzBEMP9smcnrHAZcHF/nX
+GCwwfQ56HmIexkvA/X1id9NEHif2P0tEs7c42TkfYNVRknMDtABp4/MUTu7R3AnP
+dzRGULD4EfL+OHn3Bzn+UZKXC1sIXzSGAa2Il+tmzV7R/9x98oTaunet3IAIx6eH
+1lWfl2royBFkuucZKT8Rs3iQhCBSWxHveNCD9tVIkNAwHM+A+WD+eeSI8t0A65RF
+62WUaUC6wNW0uLp9BBGo6zEFlpROWCGOn9Bg/QIDAQABo4HRMIHOMB0GA1UdDgQW
+BBSBPjfYkrAfd59ctKtzquf2NGAv+jALBgNVHQ8EBAMCAQYwDwYDVR0TAQH/BAUw
+AwEB/zCBjgYDVR0jBIGGMIGDgBSBPjfYkrAfd59ctKtzquf2NGAv+qFopGYwZDEL
+MAkGA1UEBhMCU0UxFDASBgNVBAoTC0FkZFRydXN0IEFCMR0wGwYDVQQLExRBZGRU
+cnVzdCBUVFAgTmV0d29yazEgMB4GA1UEAxMXQWRkVHJ1c3QgUHVibGljIENBIFJv
+b3SCAQEwDQYJKoZIhvcNAQEFBQADggEBAAP3FUr4JNojVhaTdt02KLmuG7jD8WS6
+IBh4lSknVwW8fCr0uVFV2ocC3g8WFzH4qnkuCRO7r7IgGRLlk/lL+YPoRNWyQSW/
+iHVv/xD8SlTQX/D67zZzfRs2RcYhbbQVuE7PnFylPVoAjgbjPGsye/Kf8Lb93/Ao
+GEjwxrzQvzSAlsJKsW2Ox5BF3i9nrEUEo3rcVZLJR2bYGozH7ZxOmuASu7VqTITh
+4SINhwBk/ox9Yjllpu9CtoAlEmEBqCQTcAARJl/6NVDFSMwGR+gn2HCNX2TmoUQm
+XiLsks3/QppEIW1cxeMiHV9HEufOX1362KqxMy3ZdvJOOjMMK7MtkAY=
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=AddTrust Qualified CA Root O=AddTrust AB OU=AddTrust TTP Network
+# Subject: CN=AddTrust Qualified CA Root O=AddTrust AB OU=AddTrust TTP Network
+# Label: "AddTrust Qualified Certificates Root"
+# Serial: 1
+# MD5 Fingerprint: 27:ec:39:47:cd:da:5a:af:e2:9a:01:65:21:a9:4c:bb
+# SHA1 Fingerprint: 4d:23:78:ec:91:95:39:b5:00:7f:75:8f:03:3b:21:1e:c5:4d:8b:cf
+# SHA256 Fingerprint: 80:95:21:08:05:db:4b:bc:35:5e:44:28:d8:fd:6e:c2:cd:e3:ab:5f:b9:7a:99:42:98:8e:b8:f4:dc:d0:60:16
+-----BEGIN CERTIFICATE-----
+MIIEHjCCAwagAwIBAgIBATANBgkqhkiG9w0BAQUFADBnMQswCQYDVQQGEwJTRTEU
+MBIGA1UEChMLQWRkVHJ1c3QgQUIxHTAbBgNVBAsTFEFkZFRydXN0IFRUUCBOZXR3
+b3JrMSMwIQYDVQQDExpBZGRUcnVzdCBRdWFsaWZpZWQgQ0EgUm9vdDAeFw0wMDA1
+MzAxMDQ0NTBaFw0yMDA1MzAxMDQ0NTBaMGcxCzAJBgNVBAYTAlNFMRQwEgYDVQQK
+EwtBZGRUcnVzdCBBQjEdMBsGA1UECxMUQWRkVHJ1c3QgVFRQIE5ldHdvcmsxIzAh
+BgNVBAMTGkFkZFRydXN0IFF1YWxpZmllZCBDQSBSb290MIIBIjANBgkqhkiG9w0B
+AQEFAAOCAQ8AMIIBCgKCAQEA5B6a/twJWoekn0e+EV+vhDTbYjx5eLfpMLXsDBwq
+xBb/4Oxx64r1EW7tTw2R0hIYLUkVAcKkIhPHEWT/IhKauY5cLwjPcWqzZwFZ8V1G
+87B4pfYOQnrjfxvM0PC3KP0q6p6zsLkEqv32x7SxuCqg+1jxGaBvcCV+PmlKfw8i
+2O+tCBGaKZnhqkRFmhJePp1tUvznoD1oL/BLcHwTOK28FSXx1s6rosAx1i+f4P8U
+WfyEk9mHfExUE+uf0S0R+Bg6Ot4l2ffTQO2kBhLEO+GRwVY18BTcZTYJbqukB8c1
+0cIDMzZbdSZtQvESa0NvS3GU+jQd7RNuyoB/mC9suWXY6QIDAQABo4HUMIHRMB0G
+A1UdDgQWBBQ5lYtii1zJ1IC6WA+XPxUIQ8yYpzALBgNVHQ8EBAMCAQYwDwYDVR0T
+AQH/BAUwAwEB/zCBkQYDVR0jBIGJMIGGgBQ5lYtii1zJ1IC6WA+XPxUIQ8yYp6Fr
+pGkwZzELMAkGA1UEBhMCU0UxFDASBgNVBAoTC0FkZFRydXN0IEFCMR0wGwYDVQQL
+ExRBZGRUcnVzdCBUVFAgTmV0d29yazEjMCEGA1UEAxMaQWRkVHJ1c3QgUXVhbGlm
+aWVkIENBIFJvb3SCAQEwDQYJKoZIhvcNAQEFBQADggEBABmrder4i2VhlRO6aQTv
+hsoToMeqT2QbPxj2qC0sVY8FtzDqQmodwCVRLae/DLPt7wh/bDxGGuoYQ992zPlm
+hpwsaPXpF/gxsxjE1kh9I0xowX67ARRvxdlu3rsEQmr49lx95dr6h+sNNVJn0J6X
+dgWTP5XHAeZpVTh/EGGZyeNfpso+gmNIquIISD6q8rKFYqa0p9m9N5xotS1WfbC3
+P6CxB9bpT9zeRXEwMn8bLgn5v1Kh7sKAPgZcLlVAwRv1cEWw3F369nJad9Jjzc9Y
+iQBCYz95OdBEsIJuQRno3eDBiFrRHnGTHyQwdOUeqN48Jzd/g66ed8/wMLH/S5no
+xqE=
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=COMODO Certification Authority O=COMODO CA Limited
+# Subject: CN=COMODO Certification Authority O=COMODO CA Limited
+# Label: "COMODO Certification Authority"
+# Serial: 104350513648249232941998508985834464573
+# MD5 Fingerprint: 5c:48:dc:f7:42:72:ec:56:94:6d:1c:cc:71:35:80:75
+# SHA1 Fingerprint: 66:31:bf:9e:f7:4f:9e:b6:c9:d5:a6:0c:ba:6a:be:d1:f7:bd:ef:7b
+# SHA256 Fingerprint: 0c:2c:d6:3d:f7:80:6f:a3:99:ed:e8:09:11:6b:57:5b:f8:79:89:f0:65:18:f9:80:8c:86:05:03:17:8b:af:66
+-----BEGIN CERTIFICATE-----
+MIIEHTCCAwWgAwIBAgIQToEtioJl4AsC7j41AkblPTANBgkqhkiG9w0BAQUFADCB
+gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV
+BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0wNjEyMDEwMDAw
+MDBaFw0yOTEyMzEyMzU5NTlaMIGBMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P
+RE8gQ0EgTGltaXRlZDEnMCUGA1UEAxMeQ09NT0RPIENlcnRpZmljYXRpb24gQXV0
+aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0ECLi3LjkRv3
+UcEbVASY06m/weaKXTuH+7uIzg3jLz8GlvCiKVCZrts7oVewdFFxze1CkU1B/qnI
+2GqGd0S7WWaXUF601CxwRM/aN5VCaTwwxHGzUvAhTaHYujl8HJ6jJJ3ygxaYqhZ8
+Q5sVW7euNJH+1GImGEaaP+vB+fGQV+useg2L23IwambV4EajcNxo2f8ESIl33rXp
++2dtQem8Ob0y2WIC8bGoPW43nOIv4tOiJovGuFVDiOEjPqXSJDlqR6sA1KGzqSX+
+DT+nHbrTUcELpNqsOO9VUCQFZUaTNE8tja3G1CEZ0o7KBWFxB3NH5YoZEr0ETc5O
+nKVIrLsm9wIDAQABo4GOMIGLMB0GA1UdDgQWBBQLWOWLxkwVN6RAqTCpIb5HNlpW
+/zAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zBJBgNVHR8EQjBAMD6g
+PKA6hjhodHRwOi8vY3JsLmNvbW9kb2NhLmNvbS9DT01PRE9DZXJ0aWZpY2F0aW9u
+QXV0aG9yaXR5LmNybDANBgkqhkiG9w0BAQUFAAOCAQEAPpiem/Yb6dc5t3iuHXIY
+SdOH5EOC6z/JqvWote9VfCFSZfnVDeFs9D6Mk3ORLgLETgdxb8CPOGEIqB6BCsAv
+IC9Bi5HcSEW88cbeunZrM8gALTFGTO3nnc+IlP8zwFboJIYmuNg4ON8qa90SzMc/
+RxdMosIGlgnW2/4/PEZB31jiVg88O8EckzXZOFKs7sjsLjBOlDW0JB9LeGna8gI4
+zJVSk/BwJVmcIGfE7vmLV2H0knZ9P4SNVbfo5azV8fUZVqZa+5Acr5Pr5RzUZ5dd
+BA6+C4OmF4O5MBKgxTMVBbkN+8cFduPYSo38NBejxiEovjBFMR7HeL5YYTisO+IB
+ZQ==
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=COMODO ECC Certification Authority O=COMODO CA Limited
+# Subject: CN=COMODO ECC Certification Authority O=COMODO CA Limited
+# Label: "COMODO ECC Certification Authority"
+# Serial: 41578283867086692638256921589707938090
+# MD5 Fingerprint: 7c:62:ff:74:9d:31:53:5e:68:4a:d5:78:aa:1e:bf:23
+# SHA1 Fingerprint: 9f:74:4e:9f:2b:4d:ba:ec:0f:31:2c:50:b6:56:3b:8e:2d:93:c3:11
+# SHA256 Fingerprint: 17:93:92:7a:06:14:54:97:89:ad:ce:2f:8f:34:f7:f0:b6:6d:0f:3a:e3:a3:b8:4d:21:ec:15:db:ba:4f:ad:c7
+-----BEGIN CERTIFICATE-----
+MIICiTCCAg+gAwIBAgIQH0evqmIAcFBUTAGem2OZKjAKBggqhkjOPQQDAzCBhTEL
+MAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4GA1UE
+BxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxKzApBgNVBAMT
+IkNPTU9ETyBFQ0MgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMDgwMzA2MDAw
+MDAwWhcNMzgwMTE4MjM1OTU5WjCBhTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdy
+ZWF0ZXIgTWFuY2hlc3RlcjEQMA4GA1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09N
+T0RPIENBIExpbWl0ZWQxKzApBgNVBAMTIkNPTU9ETyBFQ0MgQ2VydGlmaWNhdGlv
+biBBdXRob3JpdHkwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAAQDR3svdcmCFYX7deSR
+FtSrYpn1PlILBs5BAH+X4QokPB0BBO490o0JlwzgdeT6+3eKKvUDYEs2ixYjFq0J
+cfRK9ChQtP6IHG4/bC8vCVlbpVsLM5niwz2J+Wos77LTBumjQjBAMB0GA1UdDgQW
+BBR1cacZSBm8nZ3qQUfflMRId5nTeTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/
+BAUwAwEB/zAKBggqhkjOPQQDAwNoADBlAjEA7wNbeqy3eApyt4jf/7VGFAkK+qDm
+fQjGGoe9GKhzvSbKYAydzpmfz1wPMOG+FDHqAjAU9JM8SaczepBGR7NjfRObTrdv
+GDeAU/7dIOA1mjbRxwG55tzd8/8dLDoWV9mSOdY=
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=COMODO RSA Certification Authority O=COMODO CA Limited
+# Subject: CN=COMODO RSA Certification Authority O=COMODO CA Limited
+# Label: "COMODO RSA Certification Authority"
+# Serial: 101909084537582093308941363524873193117
+# MD5 Fingerprint: 1b:31:b0:71:40:36:cc:14:36:91:ad:c4:3e:fd:ec:18
+# SHA1 Fingerprint: af:e5:d2:44:a8:d1:19:42:30:ff:47:9f:e2:f8:97:bb:cd:7a:8c:b4
+# SHA256 Fingerprint: 52:f0:e1:c4:e5:8e:c6:29:29:1b:60:31:7f:07:46:71:b8:5d:7e:a8:0d:5b:07:27:34:63:53:4b:32:b4:02:34
+-----BEGIN CERTIFICATE-----
+MIIF2DCCA8CgAwIBAgIQTKr5yttjb+Af907YWwOGnTANBgkqhkiG9w0BAQwFADCB
+hTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxKzApBgNV
+BAMTIkNPTU9ETyBSU0EgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMTAwMTE5
+MDAwMDAwWhcNMzgwMTE4MjM1OTU5WjCBhTELMAkGA1UEBhMCR0IxGzAZBgNVBAgT
+EkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4GA1UEBxMHU2FsZm9yZDEaMBgGA1UEChMR
+Q09NT0RPIENBIExpbWl0ZWQxKzApBgNVBAMTIkNPTU9ETyBSU0EgQ2VydGlmaWNh
+dGlvbiBBdXRob3JpdHkwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCR
+6FSS0gpWsawNJN3Fz0RndJkrN6N9I3AAcbxT38T6KhKPS38QVr2fcHK3YX/JSw8X
+pz3jsARh7v8Rl8f0hj4K+j5c+ZPmNHrZFGvnnLOFoIJ6dq9xkNfs/Q36nGz637CC
+9BR++b7Epi9Pf5l/tfxnQ3K9DADWietrLNPtj5gcFKt+5eNu/Nio5JIk2kNrYrhV
+/erBvGy2i/MOjZrkm2xpmfh4SDBF1a3hDTxFYPwyllEnvGfDyi62a+pGx8cgoLEf
+Zd5ICLqkTqnyg0Y3hOvozIFIQ2dOciqbXL1MGyiKXCJ7tKuY2e7gUYPDCUZObT6Z
++pUX2nwzV0E8jVHtC7ZcryxjGt9XyD+86V3Em69FmeKjWiS0uqlWPc9vqv9JWL7w
+qP/0uK3pN/u6uPQLOvnoQ0IeidiEyxPx2bvhiWC4jChWrBQdnArncevPDt09qZah
+SL0896+1DSJMwBGB7FY79tOi4lu3sgQiUpWAk2nojkxl8ZEDLXB0AuqLZxUpaVIC
+u9ffUGpVRr+goyhhf3DQw6KqLCGqR84onAZFdr+CGCe01a60y1Dma/RMhnEw6abf
+Fobg2P9A3fvQQoh/ozM6LlweQRGBY84YcWsr7KaKtzFcOmpH4MN5WdYgGq/yapiq
+crxXStJLnbsQ/LBMQeXtHT1eKJ2czL+zUdqnR+WEUwIDAQABo0IwQDAdBgNVHQ4E
+FgQUu69+Aj36pvE8hI6t7jiY7NkyMtQwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB
+/wQFMAMBAf8wDQYJKoZIhvcNAQEMBQADggIBAArx1UaEt65Ru2yyTUEUAJNMnMvl
+wFTPoCWOAvn9sKIN9SCYPBMtrFaisNZ+EZLpLrqeLppysb0ZRGxhNaKatBYSaVqM
+4dc+pBroLwP0rmEdEBsqpIt6xf4FpuHA1sj+nq6PK7o9mfjYcwlYRm6mnPTXJ9OV
+2jeDchzTc+CiR5kDOF3VSXkAKRzH7JsgHAckaVd4sjn8OoSgtZx8jb8uk2Intzna
+FxiuvTwJaP+EmzzV1gsD41eeFPfR60/IvYcjt7ZJQ3mFXLrrkguhxuhoqEwWsRqZ
+CuhTLJK7oQkYdQxlqHvLI7cawiiFwxv/0Cti76R7CZGYZ4wUAc1oBmpjIXUDgIiK
+boHGhfKppC3n9KUkEEeDys30jXlYsQab5xoq2Z0B15R97QNKyvDb6KkBPvVWmcke
+jkk9u+UJueBPSZI9FoJAzMxZxuY67RIuaTxslbH9qh17f4a+Hg4yRvv7E491f0yL
+S0Zj/gA0QHDBw7mh3aZw4gSzQbzpgJHqZJx64SIDqZxubw5lT2yHh17zbqD5daWb
+QOhTsiedSrnAdyGN/4fy3ryM7xfft0kL0fJuMAsaDk527RH89elWsn2/x20Kk4yl
+0MC2Hb46TpSi125sC8KKfPog88Tk5c0NqMuRkrF8hey1FGlmDoLnzc7ILaZRfyHB
+NVOFBkpdn627G190
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=Secure Certificate Services O=Comodo CA Limited
+# Subject: CN=Secure Certificate Services O=Comodo CA Limited
+# Label: "Comodo Secure Services root"
+# Serial: 1
+# MD5 Fingerprint: d3:d9:bd:ae:9f:ac:67:24:b3:c8:1b:52:e1:b9:a9:bd
+# SHA1 Fingerprint: 4a:65:d5:f4:1d:ef:39:b8:b8:90:4a:4a:d3:64:81:33:cf:c7:a1:d1
+# SHA256 Fingerprint: bd:81:ce:3b:4f:65:91:d1:1a:67:b5:fc:7a:47:fd:ef:25:52:1b:f9:aa:4e:18:b9:e3:df:2e:34:a7:80:3b:e8
+-----BEGIN CERTIFICATE-----
+MIIEPzCCAyegAwIBAgIBATANBgkqhkiG9w0BAQUFADB+MQswCQYDVQQGEwJHQjEb
+MBkGA1UECAwSR3JlYXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHDAdTYWxmb3JkMRow
+GAYDVQQKDBFDb21vZG8gQ0EgTGltaXRlZDEkMCIGA1UEAwwbU2VjdXJlIENlcnRp
+ZmljYXRlIFNlcnZpY2VzMB4XDTA0MDEwMTAwMDAwMFoXDTI4MTIzMTIzNTk1OVow
+fjELMAkGA1UEBhMCR0IxGzAZBgNVBAgMEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBwwHU2FsZm9yZDEaMBgGA1UECgwRQ29tb2RvIENBIExpbWl0ZWQxJDAiBgNV
+BAMMG1NlY3VyZSBDZXJ0aWZpY2F0ZSBTZXJ2aWNlczCCASIwDQYJKoZIhvcNAQEB
+BQADggEPADCCAQoCggEBAMBxM4KK0HDrc4eCQNUd5MvJDkKQ+d40uaG6EfQlhfPM
+cm3ye5drswfxdySRXyWP9nQ95IDC+DwN879A6vfIUtFyb+/Iq0G4bi4XKpVpDM3S
+HpR7LZQdqnXXs5jLrLxkU0C8j6ysNstcrbvd4JQX7NFc0L/vpZXJkMWwrPsbQ996
+CF23uPJAGysnnlDOXmWCiIxe004MeuoIkbY2qitC++rCoznl2yY4rYsK7hljxxwk
+3wN42ubqwUcaCwtGCd0C/N7Lh1/XMGNooa7cMqG6vv5Eq2i2pRcV/b3Vp6ea5EQz
+6YiO/O1R65NxTq0B50SOqy3LqP4BSUjwwN3HaNiS/j0CAwEAAaOBxzCBxDAdBgNV
+HQ4EFgQUPNiTiMLAggnMAZkGkyDpnnAJY08wDgYDVR0PAQH/BAQDAgEGMA8GA1Ud
+EwEB/wQFMAMBAf8wgYEGA1UdHwR6MHgwO6A5oDeGNWh0dHA6Ly9jcmwuY29tb2Rv
+Y2EuY29tL1NlY3VyZUNlcnRpZmljYXRlU2VydmljZXMuY3JsMDmgN6A1hjNodHRw
+Oi8vY3JsLmNvbW9kby5uZXQvU2VjdXJlQ2VydGlmaWNhdGVTZXJ2aWNlcy5jcmww
+DQYJKoZIhvcNAQEFBQADggEBAIcBbSMdflsXfcFhMs+P5/OKlFlm4J4oqF7Tt/Q0
+5qo5spcWxYJvMqTpjOev/e/C6LlLqqP05tqNZSH7uoDrJiiFGv45jN5bBAS0VPmj
+Z55B+glSzAVIqMk/IQQezkhr/IXownuvf7fM+F86/TXGDe+X3EyrEeFryzHRbPtI
+gKvcnDe4IRRLDXE97IMzbtFuMhbsmMcWi1mmNKsFVy2T96oTy9IT4rcuO81rUBcJ
+aD61JlfutuC23bkpgHl9j6PwpCikFcSF9CfUa7/lXORlAnZUtOM3ZiTTGWHIUhDl
+izeauan5Hb/qmZJhlv8BzaFfDbxxvA6sCx1HRR3B7Hzs/Sk=
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=Trusted Certificate Services O=Comodo CA Limited
+# Subject: CN=Trusted Certificate Services O=Comodo CA Limited
+# Label: "Comodo Trusted Services root"
+# Serial: 1
+# MD5 Fingerprint: 91:1b:3f:6e:cd:9e:ab:ee:07:fe:1f:71:d2:b3:61:27
+# SHA1 Fingerprint: e1:9f:e3:0e:8b:84:60:9e:80:9b:17:0d:72:a8:c5:ba:6e:14:09:bd
+# SHA256 Fingerprint: 3f:06:e5:56:81:d4:96:f5:be:16:9e:b5:38:9f:9f:2b:8f:f6:1e:17:08:df:68:81:72:48:49:cd:5d:27:cb:69
+-----BEGIN CERTIFICATE-----
+MIIEQzCCAyugAwIBAgIBATANBgkqhkiG9w0BAQUFADB/MQswCQYDVQQGEwJHQjEb
+MBkGA1UECAwSR3JlYXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHDAdTYWxmb3JkMRow
+GAYDVQQKDBFDb21vZG8gQ0EgTGltaXRlZDElMCMGA1UEAwwcVHJ1c3RlZCBDZXJ0
+aWZpY2F0ZSBTZXJ2aWNlczAeFw0wNDAxMDEwMDAwMDBaFw0yODEyMzEyMzU5NTla
+MH8xCzAJBgNVBAYTAkdCMRswGQYDVQQIDBJHcmVhdGVyIE1hbmNoZXN0ZXIxEDAO
+BgNVBAcMB1NhbGZvcmQxGjAYBgNVBAoMEUNvbW9kbyBDQSBMaW1pdGVkMSUwIwYD
+VQQDDBxUcnVzdGVkIENlcnRpZmljYXRlIFNlcnZpY2VzMIIBIjANBgkqhkiG9w0B
+AQEFAAOCAQ8AMIIBCgKCAQEA33FvNlhTWvI2VFeAxHQIIO0Yfyod5jWaHiWsnOWW
+fnJSoBVC21ndZHoa0Lh73TkVvFVIxO06AOoxEbrycXQaZ7jPM8yoMa+j49d/vzMt
+TGo87IvDktJTdyR0nAducPy9C1t2ul/y/9c3S0pgePfw+spwtOpZqqPOSC+pw7IL
+fhdyFgymBwwbOM/JYrc/oJOlh0Hyt3BAd9i+FHzjqMB6juljatEPmsbS9Is6FARW
+1O24zG71++IsWL1/T2sr92AkWCTOJu80kTrV44HQsvAEAtdbtz6SrGsSivnkBbA7
+kUlcsutT6vifR4buv5XAwAaf0lteERv0xwQ1KdJVXOTt6wIDAQABo4HJMIHGMB0G
+A1UdDgQWBBTFe1i97doladL3WRaoszLAeydb9DAOBgNVHQ8BAf8EBAMCAQYwDwYD
+VR0TAQH/BAUwAwEB/zCBgwYDVR0fBHwwejA8oDqgOIY2aHR0cDovL2NybC5jb21v
+ZG9jYS5jb20vVHJ1c3RlZENlcnRpZmljYXRlU2VydmljZXMuY3JsMDqgOKA2hjRo
+dHRwOi8vY3JsLmNvbW9kby5uZXQvVHJ1c3RlZENlcnRpZmljYXRlU2VydmljZXMu
+Y3JsMA0GCSqGSIb3DQEBBQUAA4IBAQDIk4E7ibSvuIQSTI3S8NtwuleGFTQQuS9/
+HrCoiWChisJ3DFBKmwCL2Iv0QeLQg4pKHBQGsKNoBXAxMKdTmw7pSqBYaWcOrp32
+pSxBvzwGa+RZzG0Q8ZZvH9/0BAKkn0U+yNj6NkZEUD+Cl5EfKNsYEYwq5GWDVxIS
+jBc/lDb+XbDABHcTuPQV1T84zJQ6VdCsmPW6AF/ghhmBeC8owH7TzEIK9a5QoNE+
+xqFx7D+gIIxmOom0jtTYsU0lR+4viMi14QVFwL4Ucd56/Y57fU0IlqUSc/Atyjcn
+dBInTMu2l+nZrghtWjlA3QVHdWpaIbOjGM9O9y5Xt5hwXsjEeLBi
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=USERTrust ECC Certification Authority O=The USERTRUST Network
+# Subject: CN=USERTrust ECC Certification Authority O=The USERTRUST Network
+# Label: "USERTrust ECC Certification Authority"
+# Serial: 123013823720199481456569720443997572134
+# MD5 Fingerprint: fa:68:bc:d9:b5:7f:ad:fd:c9:1d:06:83:28:cc:24:c1
+# SHA1 Fingerprint: d1:cb:ca:5d:b2:d5:2a:7f:69:3b:67:4d:e5:f0:5a:1d:0c:95:7d:f0
+# SHA256 Fingerprint: 4f:f4:60:d5:4b:9c:86:da:bf:bc:fc:57:12:e0:40:0d:2b:ed:3f:bc:4d:4f:bd:aa:86:e0:6a:dc:d2:a9:ad:7a
+-----BEGIN CERTIFICATE-----
+MIICjzCCAhWgAwIBAgIQXIuZxVqUxdJxVt7NiYDMJjAKBggqhkjOPQQDAzCBiDEL
+MAkGA1UEBhMCVVMxEzARBgNVBAgTCk5ldyBKZXJzZXkxFDASBgNVBAcTC0plcnNl
+eSBDaXR5MR4wHAYDVQQKExVUaGUgVVNFUlRSVVNUIE5ldHdvcmsxLjAsBgNVBAMT
+JVVTRVJUcnVzdCBFQ0MgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMTAwMjAx
+MDAwMDAwWhcNMzgwMTE4MjM1OTU5WjCBiDELMAkGA1UEBhMCVVMxEzARBgNVBAgT
+Ck5ldyBKZXJzZXkxFDASBgNVBAcTC0plcnNleSBDaXR5MR4wHAYDVQQKExVUaGUg
+VVNFUlRSVVNUIE5ldHdvcmsxLjAsBgNVBAMTJVVTRVJUcnVzdCBFQ0MgQ2VydGlm
+aWNhdGlvbiBBdXRob3JpdHkwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAAQarFRaqflo
+I+d61SRvU8Za2EurxtW20eZzca7dnNYMYf3boIkDuAUU7FfO7l0/4iGzzvfUinng
+o4N+LZfQYcTxmdwlkWOrfzCjtHDix6EznPO/LlxTsV+zfTJ/ijTjeXmjQjBAMB0G
+A1UdDgQWBBQ64QmG1M8ZwpZ2dEl23OA1xmNjmjAOBgNVHQ8BAf8EBAMCAQYwDwYD
+VR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAwNoADBlAjA2Z6EWCNzklwBBHU6+4WMB
+zzuqQhFkoJ2UOQIReVx7Hfpkue4WQrO/isIJxOzksU0CMQDpKmFHjFJKS04YcPbW
+RNZu9YO6bVi9JNlWSOrvxKJGgYhqOkbRqZtNyWHa0V1Xahg=
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=USERTrust RSA Certification Authority O=The USERTRUST Network
+# Subject: CN=USERTrust RSA Certification Authority O=The USERTRUST Network
+# Label: "USERTrust RSA Certification Authority"
+# Serial: 2645093764781058787591871645665788717
+# MD5 Fingerprint: 1b:fe:69:d1:91:b7:19:33:a3:72:a8:0f:e1:55:e5:b5
+# SHA1 Fingerprint: 2b:8f:1b:57:33:0d:bb:a2:d0:7a:6c:51:f7:0e:e9:0d:da:b9:ad:8e
+# SHA256 Fingerprint: e7:93:c9:b0:2f:d8:aa:13:e2:1c:31:22:8a:cc:b0:81:19:64:3b:74:9c:89:89:64:b1:74:6d:46:c3:d4:cb:d2
+-----BEGIN CERTIFICATE-----
+MIIF3jCCA8agAwIBAgIQAf1tMPyjylGoG7xkDjUDLTANBgkqhkiG9w0BAQwFADCB
+iDELMAkGA1UEBhMCVVMxEzARBgNVBAgTCk5ldyBKZXJzZXkxFDASBgNVBAcTC0pl
+cnNleSBDaXR5MR4wHAYDVQQKExVUaGUgVVNFUlRSVVNUIE5ldHdvcmsxLjAsBgNV
+BAMTJVVTRVJUcnVzdCBSU0EgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMTAw
+MjAxMDAwMDAwWhcNMzgwMTE4MjM1OTU5WjCBiDELMAkGA1UEBhMCVVMxEzARBgNV
+BAgTCk5ldyBKZXJzZXkxFDASBgNVBAcTC0plcnNleSBDaXR5MR4wHAYDVQQKExVU
+aGUgVVNFUlRSVVNUIE5ldHdvcmsxLjAsBgNVBAMTJVVTRVJUcnVzdCBSU0EgQ2Vy
+dGlmaWNhdGlvbiBBdXRob3JpdHkwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK
+AoICAQCAEmUXNg7D2wiz0KxXDXbtzSfTTK1Qg2HiqiBNCS1kCdzOiZ/MPans9s/B
+3PHTsdZ7NygRK0faOca8Ohm0X6a9fZ2jY0K2dvKpOyuR+OJv0OwWIJAJPuLodMkY
+tJHUYmTbf6MG8YgYapAiPLz+E/CHFHv25B+O1ORRxhFnRghRy4YUVD+8M/5+bJz/
+Fp0YvVGONaanZshyZ9shZrHUm3gDwFA66Mzw3LyeTP6vBZY1H1dat//O+T23LLb2
+VN3I5xI6Ta5MirdcmrS3ID3KfyI0rn47aGYBROcBTkZTmzNg95S+UzeQc0PzMsNT
+79uq/nROacdrjGCT3sTHDN/hMq7MkztReJVni+49Vv4M0GkPGw/zJSZrM233bkf6
+c0Plfg6lZrEpfDKEY1WJxA3Bk1QwGROs0303p+tdOmw1XNtB1xLaqUkL39iAigmT
+Yo61Zs8liM2EuLE/pDkP2QKe6xJMlXzzawWpXhaDzLhn4ugTncxbgtNMs+1b/97l
+c6wjOy0AvzVVdAlJ2ElYGn+SNuZRkg7zJn0cTRe8yexDJtC/QV9AqURE9JnnV4ee
+UB9XVKg+/XRjL7FQZQnmWEIuQxpMtPAlR1n6BB6T1CZGSlCBst6+eLf8ZxXhyVeE
+Hg9j1uliutZfVS7qXMYoCAQlObgOK6nyTJccBz8NUvXt7y+CDwIDAQABo0IwQDAd
+BgNVHQ4EFgQUU3m/WqorSs9UgOHYm8Cd8rIDZsswDgYDVR0PAQH/BAQDAgEGMA8G
+A1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEMBQADggIBAFzUfA3P9wF9QZllDHPF
+Up/L+M+ZBn8b2kMVn54CVVeWFPFSPCeHlCjtHzoBN6J2/FNQwISbxmtOuowhT6KO
+VWKR82kV2LyI48SqC/3vqOlLVSoGIG1VeCkZ7l8wXEskEVX/JJpuXior7gtNn3/3
+ATiUFJVDBwn7YKnuHKsSjKCaXqeYalltiz8I+8jRRa8YFWSQEg9zKC7F4iRO/Fjs
+8PRF/iKz6y+O0tlFYQXBl2+odnKPi4w2r78NBc5xjeambx9spnFixdjQg3IM8WcR
+iQycE0xyNN+81XHfqnHd4blsjDwSXWXavVcStkNr/+XeTWYRUc+ZruwXtuhxkYze
+Sf7dNXGiFSeUHM9h4ya7b6NnJSFd5t0dCy5oGzuCr+yDZ4XUmFF0sbmZgIn/f3gZ
+XHlKYC6SQK5MNyosycdiyA5d9zZbyuAlJQG03RoHnHcAP9Dc1ew91Pq7P8yF1m9/
+qS3fuQL39ZeatTXaw2ewh0qpKJ4jjv9cJ2vhsE/zB+4ALtRZh8tSQZXq9EfX7mRB
+VXyNWQKV3WKdwrnuWih0hKWbt5DHDAff9Yk2dDLWKMGwsAvgnEzDHNb842m1R0aB
+L6KCq9NjRHDEjf8tM7qtj3u1cIiuPhnPQCjY/MiQu12ZIvVS5ljFH4gxQ+6IHdfG
+jjxDah2nGN59PRbxYvnKkKj9
+-----END CERTIFICATE-----
+
+# Operating CA: Comodo Group
+# Issuer: CN=UTN-USERFirst-Hardware O=The USERTRUST Network OU=http://www.usertrust.com
+# Subject: CN=UTN-USERFirst-Hardware O=The USERTRUST Network OU=http://www.usertrust.com
+# Label: "UTN USERFirst Hardware Root CA"
+# Serial: 91374294542884704022267039221184531197
+# MD5 Fingerprint: 4c:56:41:e5:0d:bb:2b:e8:ca:a3:ed:18:08:ad:43:39
+# SHA1 Fingerprint: 04:83:ed:33:99:ac:36:08:05:87:22:ed:bc:5e:46:00:e3:be:f9:d7
+# SHA256 Fingerprint: 6e:a5:47:41:d0:04:66:7e:ed:1b:48:16:63:4a:a3:a7:9e:6e:4b:96:95:0f:82:79:da:fc:8d:9b:d8:81:21:37
+-----BEGIN CERTIFICATE-----
+MIIEdDCCA1ygAwIBAgIQRL4Mi1AAJLQR0zYq/mUK/TANBgkqhkiG9w0BAQUFADCB
+lzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAlVUMRcwFQYDVQQHEw5TYWx0IExha2Ug
+Q2l0eTEeMBwGA1UEChMVVGhlIFVTRVJUUlVTVCBOZXR3b3JrMSEwHwYDVQQLExho
+dHRwOi8vd3d3LnVzZXJ0cnVzdC5jb20xHzAdBgNVBAMTFlVUTi1VU0VSRmlyc3Qt
+SGFyZHdhcmUwHhcNOTkwNzA5MTgxMDQyWhcNMTkwNzA5MTgxOTIyWjCBlzELMAkG
+A1UEBhMCVVMxCzAJBgNVBAgTAlVUMRcwFQYDVQQHEw5TYWx0IExha2UgQ2l0eTEe
+MBwGA1UEChMVVGhlIFVTRVJUUlVTVCBOZXR3b3JrMSEwHwYDVQQLExhodHRwOi8v
+d3d3LnVzZXJ0cnVzdC5jb20xHzAdBgNVBAMTFlVUTi1VU0VSRmlyc3QtSGFyZHdh
+cmUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCx98M4P7Sof885glFn
+0G2f0v9Y8+efK+wNiVSZuTiZFvfgIXlIwrthdBKWHTxqctU8EGc6Oe0rE81m65UJ
+M6Rsl7HoxuzBdXmcRl6Nq9Bq/bkqVRcQVLMZ8Jr28bFdtqdt++BxF2uiiPsA3/4a
+MXcMmgF6sTLjKwEHOG7DpV4jvEWbe1DByTCP2+UretNb+zNAHqDVmBe8i4fDidNd
+oI6yqqr2jmmIBsX6iSHzCJ1pLgkzmykNRg+MzEk0sGlRvfkGzWitZky8PqxhvQqI
+DsjfPe58BEydCl5rkdbux+0ojatNh4lz0G6k0B4WixThdkQDf2Os5M1JnMWS9Ksy
+oUhbAgMBAAGjgbkwgbYwCwYDVR0PBAQDAgHGMA8GA1UdEwEB/wQFMAMBAf8wHQYD
+VR0OBBYEFKFyXyYbKJhDlV0HN9WFlp1L0sNFMEQGA1UdHwQ9MDswOaA3oDWGM2h0
+dHA6Ly9jcmwudXNlcnRydXN0LmNvbS9VVE4tVVNFUkZpcnN0LUhhcmR3YXJlLmNy
+bDAxBgNVHSUEKjAoBggrBgEFBQcDAQYIKwYBBQUHAwUGCCsGAQUFBwMGBggrBgEF
+BQcDBzANBgkqhkiG9w0BAQUFAAOCAQEARxkP3nTGmZev/K0oXnWO6y1n7k57K9cM
+//bey1WiCuFMVGWTYGufEpytXoMs61quwOQt9ABjHbjAbPLPSbtNk28Gpgoiskli
+CE7/yMgUsogWXecB5BKV5UU0s4tpvc+0hY91UZ59Ojg6FEgSxvunOxqNDYJAB+gE
+CJChicsZUN/KHAG8HQQZexB2lzvukJDKxA4fFm517zP4029bHpbj4HR3dHuKom4t
+3XbWOTCC8KucUvIqx69JXn7HaOWCgchqJ/kniCrVWFCVH/A7HFe7fRQ5YiuayZSS
+KqMiDP+JJn1fIytH1xUdqWqeUQ0qUZ6B+dQ7XnASfxAynB67nfhmqA==
+-----END CERTIFICATE-----
+
+# Operating CA: DigiCert
+# Issuer: CN=Baltimore CyberTrust Root O=Baltimore OU=CyberTrust
+# Subject: CN=Baltimore CyberTrust Root O=Baltimore OU=CyberTrust
+# Label: "Baltimore CyberTrust Root"
+# Serial: 33554617
+# MD5 Fingerprint: ac:b6:94:a5:9c:17:e0:d7:91:52:9b:b1:97:06:a6:e4
+# SHA1 Fingerprint: d4:de:20:d0:5e:66:fc:53:fe:1a:50:88:2c:78:db:28:52:ca:e4:74
+# SHA256 Fingerprint: 16:af:57:a9:f6:76:b0:ab:12:60:95:aa:5e:ba:de:f2:2a:b3:11:19:d6:44:ac:95:cd:4b:93:db:f3:f2:6a:eb
+-----BEGIN CERTIFICATE-----
+MIIDdzCCAl+gAwIBAgIEAgAAuTANBgkqhkiG9w0BAQUFADBaMQswCQYDVQQGEwJJ
+RTESMBAGA1UEChMJQmFsdGltb3JlMRMwEQYDVQQLEwpDeWJlclRydXN0MSIwIAYD
+VQQDExlCYWx0aW1vcmUgQ3liZXJUcnVzdCBSb290MB4XDTAwMDUxMjE4NDYwMFoX
+DTI1MDUxMjIzNTkwMFowWjELMAkGA1UEBhMCSUUxEjAQBgNVBAoTCUJhbHRpbW9y
+ZTETMBEGA1UECxMKQ3liZXJUcnVzdDEiMCAGA1UEAxMZQmFsdGltb3JlIEN5YmVy
+VHJ1c3QgUm9vdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKMEuyKr
+mD1X6CZymrV51Cni4eiVgLGw41uOKymaZN+hXe2wCQVt2yguzmKiYv60iNoS6zjr
+IZ3AQSsBUnuId9Mcj8e6uYi1agnnc+gRQKfRzMpijS3ljwumUNKoUMMo6vWrJYeK
+mpYcqWe4PwzV9/lSEy/CG9VwcPCPwBLKBsua4dnKM3p31vjsufFoREJIE9LAwqSu
+XmD+tqYF/LTdB1kC1FkYmGP1pWPgkAx9XbIGevOF6uvUA65ehD5f/xXtabz5OTZy
+dc93Uk3zyZAsuT3lySNTPx8kmCFcB5kpvcY67Oduhjprl3RjM71oGDHweI12v/ye
+jl0qhqdNkNwnGjkCAwEAAaNFMEMwHQYDVR0OBBYEFOWdWTCCR1jMrPoIVDaGezq1
+BE3wMBIGA1UdEwEB/wQIMAYBAf8CAQMwDgYDVR0PAQH/BAQDAgEGMA0GCSqGSIb3
+DQEBBQUAA4IBAQCFDF2O5G9RaEIFoN27TyclhAO992T9Ldcw46QQF+vaKSm2eT92
+9hkTI7gQCvlYpNRhcL0EYWoSihfVCr3FvDB81ukMJY2GQE/szKN+OMY3EU/t3Wgx
+jkzSswF07r51XgdIGn9w/xZchMB5hbgF/X++ZRGjD8ACtPhSNzkE1akxehi/oCr0
+Epn3o0WC4zxe9Z2etciefC7IpJ5OCBRLbf1wbWsaY71k5h+3zvDyny67G7fyUIhz
+ksLi4xaNmjICq44Y3ekQEe5+NauQrz4wlHrQMz2nZQ/1/I6eYs9HRCwBXbsdtTLS
+R9I4LtD+gdwyah617jzV/OeBHRnDJELqYzmp
+-----END CERTIFICATE-----
+
+# Operating CA: DigiCert
+# Issuer: CN=Cybertrust Global Root O=Cybertrust, Inc
+# Subject: CN=Cybertrust Global Root O=Cybertrust, Inc
+# Label: "Cybertrust Global Root"
+# Serial: 4835703278459682877484360
+# MD5 Fingerprint: 72:e4:4a:87:e3:69:40:80:77:ea:bc:e3:f4:ff:f0:e1
+# SHA1 Fingerprint: 5f:43:e5:b1:bf:f8:78:8c:ac:1c:c7:ca:4a:9a:c6:22:2b:cc:34:c6
+# SHA256 Fingerprint: 96:0a:df:00:63:e9:63:56:75:0c:29:65:dd:0a:08:67:da:0b:9c:bd:6e:77:71:4a:ea:fb:23:49:ab:39:3d:a3
+-----BEGIN CERTIFICATE-----
+MIIDoTCCAomgAwIBAgILBAAAAAABD4WqLUgwDQYJKoZIhvcNAQEFBQAwOzEYMBYG
+A1UEChMPQ3liZXJ0cnVzdCwgSW5jMR8wHQYDVQQDExZDeWJlcnRydXN0IEdsb2Jh
+bCBSb290MB4XDTA2MTIxNTA4MDAwMFoXDTIxMTIxNTA4MDAwMFowOzEYMBYGA1UE
+ChMPQ3liZXJ0cnVzdCwgSW5jMR8wHQYDVQQDExZDeWJlcnRydXN0IEdsb2JhbCBS
+b290MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA+Mi8vRRQZhP/8NN5
+7CPytxrHjoXxEnOmGaoQ25yiZXRadz5RfVb23CO21O1fWLE3TdVJDm71aofW0ozS
+J8bi/zafmGWgE07GKmSb1ZASzxQG9Dvj1Ci+6A74q05IlG2OlTEQXO2iLb3VOm2y
+HLtgwEZLAfVJrn5GitB0jaEMAs7u/OePuGtm839EAL9mJRQr3RAwHQeWP032a7iP
+t3sMpTjr3kfb1V05/Iin89cqdPHoWqI7n1C6poxFNcJQZZXcY4Lv3b93TZxiyWNz
+FtApD0mpSPCzqrdsxacwOUBdrsTiXSZT8M4cIwhhqJQZugRiQOwfOHB3EgZxpzAY
+XSUnpQIDAQABo4GlMIGiMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/
+MB0GA1UdDgQWBBS2CHsNesysIEyGVjJez6tuhS1wVzA/BgNVHR8EODA2MDSgMqAw
+hi5odHRwOi8vd3d3Mi5wdWJsaWMtdHJ1c3QuY29tL2NybC9jdC9jdHJvb3QuY3Js
+MB8GA1UdIwQYMBaAFLYIew16zKwgTIZWMl7Pq26FLXBXMA0GCSqGSIb3DQEBBQUA
+A4IBAQBW7wojoFROlZfJ+InaRcHUowAl9B8Tq7ejhVhpwjCt2BWKLePJzYFa+HMj
+Wqd8BfP9IjsO0QbE2zZMcwSO5bAi5MXzLqXZI+O4Tkogp24CJJ8iYGd7ix1yCcUx
+XOl5n4BHPa2hCwcUPUf/A2kaDAtE52Mlp3+yybh2hO0j9n0Hq0V+09+zv+mKts2o
+omcrUtW3ZfA5TGOgkXmTUg9U3YO7n9GPp1Nzw8v/MOx8BLjYRB+TX3EJIrduPuoc
+A06dGiBh+4E37F78CkWr1+cXVdCg6mCbpvbjjFspwgZgFJ0tl0ypkxWdYcQBX0jW
+WL1WMRJOEcgh4LMRkWXbtKaIOM5V
+-----END CERTIFICATE-----
+
+# Operating CA: DigiCert
+# Issuer: CN=DigiCert Assured ID Root CA O=DigiCert Inc OU=www.digicert.com
+# Subject: CN=DigiCert Assured ID Root CA O=DigiCert Inc OU=www.digicert.com
+# Label: "DigiCert Assured ID Root CA"
+# Serial: 17154717934120587862167794914071425081
+# MD5 Fingerprint: 87:ce:0b:7b:2a:0e:49:00:e1:58:71:9b:37:a8:93:72
+# SHA1 Fingerprint: 05:63:b8:63:0d:62:d7:5a:bb:c8:ab:1e:4b:df:b5:a8:99:b2:4d:43
+# SHA256 Fingerprint: 3e:90:99:b5:01:5e:8f:48:6c:00:bc:ea:9d:11:1e:e7:21:fa:ba:35:5a:89:bc:f1:df:69:56:1e:3d:c6:32:5c
+-----BEGIN CERTIFICATE-----
+MIIDtzCCAp+gAwIBAgIQDOfg5RfYRv6P5WD8G/AwOTANBgkqhkiG9w0BAQUFADBl
+MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
+d3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv
+b3QgQ0EwHhcNMDYxMTEwMDAwMDAwWhcNMzExMTEwMDAwMDAwWjBlMQswCQYDVQQG
+EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl
+cnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJvb3QgQ0EwggEi
+MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCtDhXO5EOAXLGH87dg+XESpa7c
+JpSIqvTO9SA5KFhgDPiA2qkVlTJhPLWxKISKityfCgyDF3qPkKyK53lTXDGEKvYP
+mDI2dsze3Tyoou9q+yHyUmHfnyDXH+Kx2f4YZNISW1/5WBg1vEfNoTb5a3/UsDg+
+wRvDjDPZ2C8Y/igPs6eD1sNuRMBhNZYW/lmci3Zt1/GiSw0r/wty2p5g0I6QNcZ4
+VYcgoc/lbQrISXwxmDNsIumH0DJaoroTghHtORedmTpyoeb6pNnVFzF1roV9Iq4/
+AUaG9ih5yLHa5FcXxH4cDrC0kqZWs72yl+2qp/C3xag/lRbQ/6GW6whfGHdPAgMB
+AAGjYzBhMA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQW
+BBRF66Kv9JLLgjEtUYunpyGd823IDzAfBgNVHSMEGDAWgBRF66Kv9JLLgjEtUYun
+pyGd823IDzANBgkqhkiG9w0BAQUFAAOCAQEAog683+Lt8ONyc3pklL/3cmbYMuRC
+dWKuh+vy1dneVrOfzM4UKLkNl2BcEkxY5NM9g0lFWJc1aRqoR+pWxnmrEthngYTf
+fwk8lOa4JiwgvT2zKIn3X/8i4peEH+ll74fg38FnSbNd67IJKusm7Xi+fT8r87cm
+NW1fiQG2SVufAQWbqz0lwcy2f8Lxb4bG+mRo64EtlOtCt/qMHt1i8b5QZ7dsvfPx
+H2sMNgcWfzd8qVttevESRmCD1ycEvkvOl77DZypoEd+A5wwzZr8TDRRu838fYxAe
++o0bJW1sj6W3YQGx0qMmoRBxna3iw/nDmVG3KwcIzi7mULKn+gpFL6Lw8g==
+-----END CERTIFICATE-----
+
+# Operating CA: DigiCert
+# Issuer: CN=DigiCert Assured ID Root G2 O=DigiCert Inc OU=www.digicert.com
+# Subject: CN=DigiCert Assured ID Root G2 O=DigiCert Inc OU=www.digicert.com
+# Label: "DigiCert Assured ID Root G2"
+# Serial: 15385348160840213938643033620894905419
+# MD5 Fingerprint: 92:38:b9:f8:63:24:82:65:2c:57:33:e6:fe:81:8f:9d
+# SHA1 Fingerprint: a1:4b:48:d9:43:ee:0a:0e:40:90:4f:3c:e0:a4:c0:91:93:51:5d:3f
+# SHA256 Fingerprint: 7d:05:eb:b6:82:33:9f:8c:94:51:ee:09:4e:eb:fe:fa:79:53:a1:14:ed:b2:f4:49:49:45:2f:ab:7d:2f:c1:85
+-----BEGIN CERTIFICATE-----
+MIIDljCCAn6gAwIBAgIQC5McOtY5Z+pnI7/Dr5r0SzANBgkqhkiG9w0BAQsFADBl
+MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
+d3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv
+b3QgRzIwHhcNMTMwODAxMTIwMDAwWhcNMzgwMTE1MTIwMDAwWjBlMQswCQYDVQQG
+EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl
+cnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJvb3QgRzIwggEi
+MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDZ5ygvUj82ckmIkzTz+GoeMVSA
+n61UQbVH35ao1K+ALbkKz3X9iaV9JPrjIgwrvJUXCzO/GU1BBpAAvQxNEP4Htecc
+biJVMWWXvdMX0h5i89vqbFCMP4QMls+3ywPgym2hFEwbid3tALBSfK+RbLE4E9Hp
+EgjAALAcKxHad3A2m67OeYfcgnDmCXRwVWmvo2ifv922ebPynXApVfSr/5Vh88lA
+bx3RvpO704gqu52/clpWcTs/1PPRCv4o76Pu2ZmvA9OPYLfykqGxvYmJHzDNw6Yu
+YjOuFgJ3RFrngQo8p0Quebg/BLxcoIfhG69Rjs3sLPr4/m3wOnyqi+RnlTGNAgMB
+AAGjQjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQW
+BBTOw0q5mVXyuNtgv6l+vVa1lzan1jANBgkqhkiG9w0BAQsFAAOCAQEAyqVVjOPI
+QW5pJ6d1Ee88hjZv0p3GeDgdaZaikmkuOGybfQTUiaWxMTeKySHMq2zNixya1r9I
+0jJmwYrA8y8678Dj1JGG0VDjA9tzd29KOVPt3ibHtX2vK0LRdWLjSisCx1BL4Gni
+lmwORGYQRI+tBev4eaymG+g3NJ1TyWGqolKvSnAWhsI6yLETcDbYz+70CjTVW0z9
+B5yiutkBclzzTcHdDrEcDcRjvq30FPuJ7KJBDkzMyFdA0G4Dqs0MjomZmWzwPDCv
+ON9vvKO+KSAnq3T/EyJ43pdSVR6DtVQgA+6uwE9W3jfMw3+qBCe703e4YtsXfJwo
+IhNzbM8m9Yop5w==
+-----END CERTIFICATE-----
+
+# Operating CA: DigiCert
+# Issuer: CN=DigiCert Assured ID Root G3 O=DigiCert Inc OU=www.digicert.com
+# Subject: CN=DigiCert Assured ID Root G3 O=DigiCert Inc OU=www.digicert.com
+# Label: "DigiCert Assured ID Root G3"
+# Serial: 15459312981008553731928384953135426796
+# MD5 Fingerprint: 7c:7f:65:31:0c:81:df:8d:ba:3e:99:e2:5c:ad:6e:fb
+# SHA1 Fingerprint: f5:17:a2:4f:9a:48:c6:c9:f8:a2:00:26:9f:dc:0f:48:2c:ab:30:89
+# SHA256 Fingerprint: 7e:37:cb:8b:4c:47:09:0c:ab:36:55:1b:a6:f4:5d:b8:40:68:0f:ba:16:6a:95:2d:b1:00:71:7f:43:05:3f:c2
+-----BEGIN CERTIFICATE-----
+MIICRjCCAc2gAwIBAgIQC6Fa+h3foLVJRK/NJKBs7DAKBggqhkjOPQQDAzBlMQsw
+CQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cu
+ZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJvb3Qg
+RzMwHhcNMTMwODAxMTIwMDAwWhcNMzgwMTE1MTIwMDAwWjBlMQswCQYDVQQGEwJV
+UzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNlcnQu
+Y29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJvb3QgRzMwdjAQBgcq
+hkjOPQIBBgUrgQQAIgNiAAQZ57ysRGXtzbg/WPuNsVepRC0FFfLvC/8QdJ+1YlJf
+Zn4f5dwbRXkLzMZTCp2NXQLZqVneAlr2lSoOjThKiknGvMYDOAdfVdp+CW7if17Q
+RSAPWXYQ1qAk8C3eNvJsKTmjQjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/
+BAQDAgGGMB0GA1UdDgQWBBTL0L2p4ZgFUaFNN6KDec6NHSrkhDAKBggqhkjOPQQD
+AwNnADBkAjAlpIFFAmsSS3V0T8gj43DydXLefInwz5FyYZ5eEJJZVrmDxxDnOOlY
+JjZ91eQ0hjkCMHw2U/Aw5WJjOpnitqM7mzT6HtoQknFekROn3aRukswy1vUhZscv
+6pZjamVFkpUBtA==
+-----END CERTIFICATE-----
+
+# Operating CA: DigiCert
+# Issuer: CN=DigiCert Global Root CA O=DigiCert Inc OU=www.digicert.com
+# Subject: CN=DigiCert Global Root CA O=DigiCert Inc OU=www.digicert.com
+# Label: "DigiCert Global Root CA"
+# Serial: 10944719598952040374951832963794454346
+# MD5 Fingerprint: 79:e4:a9:84:0d:7d:3a:96:d7:c0:4f:e2:43:4c:89:2e
+# SHA1 Fingerprint: a8:98:5d:3a:65:e5:e5:c4:b2:d7:d6:6d:40:c6:dd:2f:b1:9c:54:36
+# SHA256 Fingerprint: 43:48:a0:e9:44:4c:78:cb:26:5e:05:8d:5e:89:44:b4:d8:4f:96:62:bd:26:db:25:7f:89:34:a4:43:c7:01:61
+-----BEGIN CERTIFICATE-----
+MIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh
+MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
+d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD
+QTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT
+MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j
+b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG
+9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB
+CSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97
+nh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt
+43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P
+T19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4
+gdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO
+BgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR
+TLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw
+DQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr
+hMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg
+06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF
+PnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls
+YSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk
+CAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4=
+-----END CERTIFICATE-----
+
+# Operating CA: DigiCert
+# Issuer: CN=DigiCert Global Root G2 O=DigiCert Inc OU=www.digicert.com
+# Subject: CN=DigiCert Global Root G2 O=DigiCert Inc OU=www.digicert.com
+# Label: "DigiCert Global Root G2"
+# Serial: 4293743540046975378534879503202253541
+# MD5 Fingerprint: e4:a6:8a:c8:54:ac:52:42:46:0a:fd:72:48:1b:2a:44
+# SHA1 Fingerprint: df:3c:24:f9:bf:d6:66:76:1b:26:80:73:fe:06:d1:cc:8d:4f:82:a4
+# SHA256 Fingerprint: cb:3c:cb:b7:60:31:e5:e0:13:8f:8d:d3:9a:23:f9:de:47:ff:c3:5e:43:c1:14:4c:ea:27:d4:6a:5a:b1:cb:5f
+-----BEGIN CERTIFICATE-----
+MIIDjjCCAnagAwIBAgIQAzrx5qcRqaC7KGSxHQn65TANBgkqhkiG9w0BAQsFADBh
+MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
+d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBH
+MjAeFw0xMzA4MDExMjAwMDBaFw0zODAxMTUxMjAwMDBaMGExCzAJBgNVBAYTAlVT
+MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j
+b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IEcyMIIBIjANBgkqhkiG
+9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuzfNNNx7a8myaJCtSnX/RrohCgiN9RlUyfuI
+2/Ou8jqJkTx65qsGGmvPrC3oXgkkRLpimn7Wo6h+4FR1IAWsULecYxpsMNzaHxmx
+1x7e/dfgy5SDN67sH0NO3Xss0r0upS/kqbitOtSZpLYl6ZtrAGCSYP9PIUkY92eQ
+q2EGnI/yuum06ZIya7XzV+hdG82MHauVBJVJ8zUtluNJbd134/tJS7SsVQepj5Wz
+tCO7TG1F8PapspUwtP1MVYwnSlcUfIKdzXOS0xZKBgyMUNGPHgm+F6HmIcr9g+UQ
+vIOlCsRnKPZzFBQ9RnbDhxSJITRNrw9FDKZJobq7nMWxM4MphQIDAQABo0IwQDAP
+BgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBhjAdBgNVHQ4EFgQUTiJUIBiV
+5uNu5g/6+rkS7QYXjzkwDQYJKoZIhvcNAQELBQADggEBAGBnKJRvDkhj6zHd6mcY
+1Yl9PMWLSn/pvtsrF9+wX3N3KjITOYFnQoQj8kVnNeyIv/iPsGEMNKSuIEyExtv4
+NeF22d+mQrvHRAiGfzZ0JFrabA0UWTW98kndth/Jsw1HKj2ZL7tcu7XUIOGZX1NG
+Fdtom/DzMNU+MeKNhJ7jitralj41E6Vf8PlwUHBHQRFXGU7Aj64GxJUTFy8bJZ91
+8rGOmaFvE7FBcf6IKshPECBV1/MUReXgRPTqh5Uykw7+U0b6LJ3/iyK5S9kJRaTe
+pLiaWN0bfVKfjllDiIGknibVb63dDcY3fe0Dkhvld1927jyNxF1WW6LZZm6zNTfl
+MrY=
+-----END CERTIFICATE-----
+
+# Operating CA: DigiCert
+# Issuer: CN=DigiCert Global Root G3 O=DigiCert Inc OU=www.digicert.com
+# Subject: CN=DigiCert Global Root G3 O=DigiCert Inc OU=www.digicert.com
+# Label: "DigiCert Global Root G3"
+# Serial: 7089244469030293291760083333884364146
+# MD5 Fingerprint: f5:5d:a4:50:a5:fb:28:7e:1e:0f:0d:cc:96:57:56:ca
+# SHA1 Fingerprint: 7e:04:de:89:6a:3e:66:6d:00:e6:87:d3:3f:fa:d9:3b:e8:3d:34:9e
+# SHA256 Fingerprint: 31:ad:66:48:f8:10:41:38:c7:38:f3:9e:a4:32:01:33:39:3e:3a:18:cc:02:29:6e:f9:7c:2a:c9:ef:67:31:d0
+-----BEGIN CERTIFICATE-----
+MIICPzCCAcWgAwIBAgIQBVVWvPJepDU1w6QP1atFcjAKBggqhkjOPQQDAzBhMQsw
+CQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cu
+ZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBHMzAe
+Fw0xMzA4MDExMjAwMDBaFw0zODAxMTUxMjAwMDBaMGExCzAJBgNVBAYTAlVTMRUw
+EwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20x
+IDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IEczMHYwEAYHKoZIzj0CAQYF
+K4EEACIDYgAE3afZu4q4C/sLfyHS8L6+c/MzXRq8NOrexpu80JX28MzQC7phW1FG
+fp4tn+6OYwwX7Adw9c+ELkCDnOg/QW07rdOkFFk2eJ0DQ+4QE2xy3q6Ip6FrtUPO
+Z9wj/wMco+I+o0IwQDAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBhjAd
+BgNVHQ4EFgQUs9tIpPmhxdiuNkHMEWNpYim8S8YwCgYIKoZIzj0EAwMDaAAwZQIx
+AK288mw/EkrRLTnDCgmXc/SINoyIJ7vmiI1Qhadj+Z4y3maTD/HMsQmP3Wyr+mt/
+oAIwOWZbwmSNuJ5Q3KjVSaLtx9zRSX8XAbjIho9OjIgrqJqpisXRAL34VOKa5Vt8
+sycX
+-----END CERTIFICATE-----
+
+# Operating CA: DigiCert
+# Issuer: CN=DigiCert High Assurance EV Root CA O=DigiCert Inc OU=www.digicert.com
+# Subject: CN=DigiCert High Assurance EV Root CA O=DigiCert Inc OU=www.digicert.com
+# Label: "DigiCert High Assurance EV Root CA"
+# Serial: 3553400076410547919724730734378100087
+# MD5 Fingerprint: d4:74:de:57:5c:39:b2:d3:9c:85:83:c5:c0:65:49:8a
+# SHA1 Fingerprint: 5f:b7:ee:06:33:e2:59:db:ad:0c:4c:9a:e6:d3:8f:1a:61:c7:dc:25
+# SHA256 Fingerprint: 74:31:e5:f4:c3:c1:ce:46:90:77:4f:0b:61:e0:54:40:88:3b:a9:a0:1e:d0:0b:a6:ab:d7:80:6e:d3:b1:18:cf
+-----BEGIN CERTIFICATE-----
+MIIDxTCCAq2gAwIBAgIQAqxcJmoLQJuPC3nyrkYldzANBgkqhkiG9w0BAQUFADBs
+MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
+d3cuZGlnaWNlcnQuY29tMSswKQYDVQQDEyJEaWdpQ2VydCBIaWdoIEFzc3VyYW5j
+ZSBFViBSb290IENBMB4XDTA2MTExMDAwMDAwMFoXDTMxMTExMDAwMDAwMFowbDEL
+MAkGA1UEBhMCVVMxFTATBgNVBAoTDERpZ2lDZXJ0IEluYzEZMBcGA1UECxMQd3d3
+LmRpZ2ljZXJ0LmNvbTErMCkGA1UEAxMiRGlnaUNlcnQgSGlnaCBBc3N1cmFuY2Ug
+RVYgUm9vdCBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMbM5XPm
++9S75S0tMqbf5YE/yc0lSbZxKsPVlDRnogocsF9ppkCxxLeyj9CYpKlBWTrT3JTW
+PNt0OKRKzE0lgvdKpVMSOO7zSW1xkX5jtqumX8OkhPhPYlG++MXs2ziS4wblCJEM
+xChBVfvLWokVfnHoNb9Ncgk9vjo4UFt3MRuNs8ckRZqnrG0AFFoEt7oT61EKmEFB
+Ik5lYYeBQVCmeVyJ3hlKV9Uu5l0cUyx+mM0aBhakaHPQNAQTXKFx01p8VdteZOE3
+hzBWBOURtCmAEvF5OYiiAhF8J2a3iLd48soKqDirCmTCv2ZdlYTBoSUeh10aUAsg
+EsxBu24LUTi4S8sCAwEAAaNjMGEwDgYDVR0PAQH/BAQDAgGGMA8GA1UdEwEB/wQF
+MAMBAf8wHQYDVR0OBBYEFLE+w2kD+L9HAdSYJhoIAu9jZCvDMB8GA1UdIwQYMBaA
+FLE+w2kD+L9HAdSYJhoIAu9jZCvDMA0GCSqGSIb3DQEBBQUAA4IBAQAcGgaX3Nec
+nzyIZgYIVyHbIUf4KmeqvxgydkAQV8GK83rZEWWONfqe/EW1ntlMMUu4kehDLI6z
+eM7b41N5cdblIZQB2lWHmiRk9opmzN6cN82oNLFpmyPInngiK3BD41VHMWEZ71jF
+hS9OMPagMRYjyOfiZRYzy78aG6A9+MpeizGLYAiJLQwGXFK3xPkKmNEVX58Svnw2
+Yzi9RKR/5CYrCsSXaQ3pjOLAEFe4yHYSkVXySGnYvCoCWw9E1CAx2/S6cCZdkGCe
+vEsXCS+0yx5DaMkHJ8HSXPfqIbloEpw8nL+e/IBcm2PN7EeqJSdnoDfzAIJ9VNep
++OkuE6N36B9K
+-----END CERTIFICATE-----
+
+# Operating CA: DigiCert
+# Issuer: CN=DigiCert Trusted Root G4 O=DigiCert Inc OU=www.digicert.com
+# Subject: CN=DigiCert Trusted Root G4 O=DigiCert Inc OU=www.digicert.com
+# Label: "DigiCert Trusted Root G4"
+# Serial: 7451500558977370777930084869016614236
+# MD5 Fingerprint: 78:f2:fc:aa:60:1f:2f:b4:eb:c9:37:ba:53:2e:75:49
+# SHA1 Fingerprint: dd:fb:16:cd:49:31:c9:73:a2:03:7d:3f:c8:3a:4d:7d:77:5d:05:e4
+# SHA256 Fingerprint: 55:2f:7b:dc:f1:a7:af:9e:6c:e6:72:01:7f:4f:12:ab:f7:72:40:c7:8e:76:1a:c2:03:d1:d9:d2:0a:c8:99:88
+-----BEGIN CERTIFICATE-----
+MIIFkDCCA3igAwIBAgIQBZsbV56OITLiOQe9p3d1XDANBgkqhkiG9w0BAQwFADBi
+MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
+d3cuZGlnaWNlcnQuY29tMSEwHwYDVQQDExhEaWdpQ2VydCBUcnVzdGVkIFJvb3Qg
+RzQwHhcNMTMwODAxMTIwMDAwWhcNMzgwMTE1MTIwMDAwWjBiMQswCQYDVQQGEwJV
+UzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNlcnQu
+Y29tMSEwHwYDVQQDExhEaWdpQ2VydCBUcnVzdGVkIFJvb3QgRzQwggIiMA0GCSqG
+SIb3DQEBAQUAA4ICDwAwggIKAoICAQC/5pBzaN675F1KPDAiMGkz7MKnJS7JIT3y
+ithZwuEppz1Yq3aaza57G4QNxDAf8xukOBbrVsaXbR2rsnnyyhHS5F/WBTxSD1If
+xp4VpX6+n6lXFllVcq9ok3DCsrp1mWpzMpTREEQQLt+C8weE5nQ7bXHiLQwb7iDV
+ySAdYyktzuxeTsiT+CFhmzTrBcZe7FsavOvJz82sNEBfsXpm7nfISKhmV1efVFiO
+DCu3T6cw2Vbuyntd463JT17lNecxy9qTXtyOj4DatpGYQJB5w3jHtrHEtWoYOAMQ
+jdjUN6QuBX2I9YI+EJFwq1WCQTLX2wRzKm6RAXwhTNS8rhsDdV14Ztk6MUSaM0C/
+CNdaSaTC5qmgZ92kJ7yhTzm1EVgX9yRcRo9k98FpiHaYdj1ZXUJ2h4mXaXpI8OCi
+EhtmmnTK3kse5w5jrubU75KSOp493ADkRSWJtppEGSt+wJS00mFt6zPZxd9LBADM
+fRyVw4/3IbKyEbe7f/LVjHAsQWCqsWMYRJUadmJ+9oCw++hkpjPRiQfhvbfmQ6QY
+uKZ3AeEPlAwhHbJUKSWJbOUOUlFHdL4mrLZBdd56rF+NP8m800ERElvlEFDrMcXK
+chYiCd98THU/Y+whX8QgUWtvsauGi0/C1kVfnSD8oR7FwI+isX4KJpn15GkvmB0t
+9dmpsh3lGwIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIB
+hjAdBgNVHQ4EFgQU7NfjgtJxXWRM3y5nP+e6mK4cD08wDQYJKoZIhvcNAQEMBQAD
+ggIBALth2X2pbL4XxJEbw6GiAI3jZGgPVs93rnD5/ZpKmbnJeFwMDF/k5hQpVgs2
+SV1EY+CtnJYYZhsjDT156W1r1lT40jzBQ0CuHVD1UvyQO7uYmWlrx8GnqGikJ9yd
++SeuMIW59mdNOj6PWTkiU0TryF0Dyu1Qen1iIQqAyHNm0aAFYF/opbSnr6j3bTWc
+fFqK1qI4mfN4i/RN0iAL3gTujJtHgXINwBQy7zBZLq7gcfJW5GqXb5JQbZaNaHqa
+sjYUegbyJLkJEVDXCLG4iXqEI2FCKeWjzaIgQdfRnGTZ6iahixTXTBmyUEFxPT9N
+cCOGDErcgdLMMpSEDQgJlxxPwO5rIHQw0uA5NBCFIRUBCOhVMt5xSdkoF1BN5r5N
+0XWs0Mr7QbhDparTwwVETyw2m+L64kW4I1NsBm9nVX9GtUw/bihaeSbSpKhil9Ie
+4u1Ki7wb/UdKDd9nZn6yW0HQO+T0O/QEY+nvwlQAUaCKKsnOeMzV6ocEGLPOr0mI
+r/OSmbaz5mEP0oUA51Aa5BuVnRmhuZyxm7EAHu/QD09CbMkKvO5D+jpxpchNJqU1
+/YldvIViHTLSoCtU7ZpXwdv6EM8Zt4tKG48BtieVU+i2iW1bvGjUI+iLUaJW+fCm
+gKDWHrO8Dw9TdSmq6hN35N6MgSGtBxBHEa2HPQfRdbzP82Z+
+-----END CERTIFICATE-----
+
+# Operating CA: Entrust Datacard
+# Issuer: CN=Entrust Root Certification Authority O=Entrust, Inc. OU=www.entrust.net/CPS is incorporated by reference/(c) 2006 Entrust, Inc.
+# Subject: CN=Entrust Root Certification Authority O=Entrust, Inc. OU=www.entrust.net/CPS is incorporated by reference/(c) 2006 Entrust, Inc.
+# Label: "Entrust Root Certification Authority"
+# Serial: 1164660820
+# MD5 Fingerprint: d6:a5:c3:ed:5d:dd:3e:00:c1:3d:87:92:1f:1d:3f:e4
+# SHA1 Fingerprint: b3:1e:b1:b7:40:e3:6c:84:02:da:dc:37:d4:4d:f5:d4:67:49:52:f9
+# SHA256 Fingerprint: 73:c1:76:43:4f:1b:c6:d5:ad:f4:5b:0e:76:e7:27:28:7c:8d:e5:76:16:c1:e6:e6:14:1a:2b:2c:bc:7d:8e:4c
+-----BEGIN CERTIFICATE-----
+MIIEkTCCA3mgAwIBAgIERWtQVDANBgkqhkiG9w0BAQUFADCBsDELMAkGA1UEBhMC
+VVMxFjAUBgNVBAoTDUVudHJ1c3QsIEluYy4xOTA3BgNVBAsTMHd3dy5lbnRydXN0
+Lm5ldC9DUFMgaXMgaW5jb3Jwb3JhdGVkIGJ5IHJlZmVyZW5jZTEfMB0GA1UECxMW
+KGMpIDIwMDYgRW50cnVzdCwgSW5jLjEtMCsGA1UEAxMkRW50cnVzdCBSb290IENl
+cnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTA2MTEyNzIwMjM0MloXDTI2MTEyNzIw
+NTM0MlowgbAxCzAJBgNVBAYTAlVTMRYwFAYDVQQKEw1FbnRydXN0LCBJbmMuMTkw
+NwYDVQQLEzB3d3cuZW50cnVzdC5uZXQvQ1BTIGlzIGluY29ycG9yYXRlZCBieSBy
+ZWZlcmVuY2UxHzAdBgNVBAsTFihjKSAyMDA2IEVudHJ1c3QsIEluYy4xLTArBgNV
+BAMTJEVudHJ1c3QgUm9vdCBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTCCASIwDQYJ
+KoZIhvcNAQEBBQADggEPADCCAQoCggEBALaVtkNC+sZtKm9I35RMOVcF7sN5EUFo
+Nu3s/poBj6E4KPz3EEZmLk0eGrEaTsbRwJWIsMn/MYszA9u3g3s+IIRe7bJWKKf4
+4LlAcTfFy0cOlypowCKVYhXbR9n10Cv/gkvJrT7eTNuQgFA/CYqEAOwwCj0Yzfv9
+KlmaI5UXLEWeH25DeW0MXJj+SKfFI0dcXv1u5x609mhF0YaDW6KKjbHjKYD+JXGI
+rb68j6xSlkuqUY3kEzEZ6E5Nn9uss2rVvDlUccp6en+Q3X0dgNmBu1kmwhH+5pPi
+94DkZfs0Nw4pgHBNrziGLp5/V6+eF67rHMsoIV+2HNjnogQi+dPa2MsCAwEAAaOB
+sDCBrTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zArBgNVHRAEJDAi
+gA8yMDA2MTEyNzIwMjM0MlqBDzIwMjYxMTI3MjA1MzQyWjAfBgNVHSMEGDAWgBRo
+kORnpKZTgMeGZqTx90tD+4S9bTAdBgNVHQ4EFgQUaJDkZ6SmU4DHhmak8fdLQ/uE
+vW0wHQYJKoZIhvZ9B0EABBAwDhsIVjcuMTo0LjADAgSQMA0GCSqGSIb3DQEBBQUA
+A4IBAQCT1DCw1wMgKtD5Y+iRDAUgqV8ZyntyTtSx29CW+1RaGSwMCPeyvIWonX9t
+O1KzKtvn1ISMY/YPyyYBkVBs9F8U4pN0wBOeMDpQ47RgxRzwIkSNcUesyBrJ6Zua
+AGAT/3B+XxFNSRuzFVJ7yVTav52Vr2ua2J7p8eRDjeIRRDq/r72DQnNSi6q7pynP
+9WQcCk3RvKqsnyrQ/39/2n3qse0wJcGE2jTSW3iDVuycNsMm4hH2Z0kdkquM++v/
+eu6FSqdQgPCnXEqULl8FmTxSQeDNtGPPAUO6nIPcj2A781q0tHuu2guQOHXvgR1m
+0vdXcDazv/wor3ElhVsT/h5/WrQ8
+-----END CERTIFICATE-----
+
+# Operating CA: Entrust Datacard
+# Issuer: CN=Entrust Root Certification Authority - EC1 O=Entrust, Inc. OU=See www.entrust.net/legal-terms/(c) 2012 Entrust, Inc. - for authorized use only
+# Subject: CN=Entrust Root Certification Authority - EC1 O=Entrust, Inc. OU=See www.entrust.net/legal-terms/(c) 2012 Entrust, Inc. - for authorized use only
+# Label: "Entrust Root Certification Authority - EC1"
+# Serial: 51543124481930649114116133369
+# MD5 Fingerprint: b6:7e:1d:f0:58:c5:49:6c:24:3b:3d:ed:98:18:ed:bc
+# SHA1 Fingerprint: 20:d8:06:40:df:9b:25:f5:12:25:3a:11:ea:f7:59:8a:eb:14:b5:47
+# SHA256 Fingerprint: 02:ed:0e:b2:8c:14:da:45:16:5c:56:67:91:70:0d:64:51:d7:fb:56:f0:b2:ab:1d:3b:8e:b0:70:e5:6e:df:f5
+-----BEGIN CERTIFICATE-----
+MIIC+TCCAoCgAwIBAgINAKaLeSkAAAAAUNCR+TAKBggqhkjOPQQDAzCBvzELMAkG
+A1UEBhMCVVMxFjAUBgNVBAoTDUVudHJ1c3QsIEluYy4xKDAmBgNVBAsTH1NlZSB3
+d3cuZW50cnVzdC5uZXQvbGVnYWwtdGVybXMxOTA3BgNVBAsTMChjKSAyMDEyIEVu
+dHJ1c3QsIEluYy4gLSBmb3IgYXV0aG9yaXplZCB1c2Ugb25seTEzMDEGA1UEAxMq
+RW50cnVzdCBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IC0gRUMxMB4XDTEy
+MTIxODE1MjUzNloXDTM3MTIxODE1NTUzNlowgb8xCzAJBgNVBAYTAlVTMRYwFAYD
+VQQKEw1FbnRydXN0LCBJbmMuMSgwJgYDVQQLEx9TZWUgd3d3LmVudHJ1c3QubmV0
+L2xlZ2FsLXRlcm1zMTkwNwYDVQQLEzAoYykgMjAxMiBFbnRydXN0LCBJbmMuIC0g
+Zm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxMzAxBgNVBAMTKkVudHJ1c3QgUm9vdCBD
+ZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEVDMTB2MBAGByqGSM49AgEGBSuBBAAi
+A2IABIQTydC6bUF74mzQ61VfZgIaJPRbiWlH47jCffHyAsWfoPZb1YsGGYZPUxBt
+ByQnoaD41UcZYUx9ypMn6nQM72+WCf5j7HBdNq1nd67JnXxVRDqiY1Ef9eNi1KlH
+Bz7MIKNCMEAwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0O
+BBYEFLdj5xrdjekIplWDpOBqUEFlEUJJMAoGCCqGSM49BAMDA2cAMGQCMGF52OVC
+R98crlOZF7ZvHH3hvxGU0QOIdeSNiaSKd0bebWHvAvX7td/M/k7//qnmpwIwW5nX
+hTcGtXsI/esni0qU+eH6p44mCOh8kmhtc9hvJqwhAriZtyZBWyVgrtBIGu4G
+-----END CERTIFICATE-----
+
+# Operating CA: Entrust Datacard
+# Issuer: CN=Entrust Root Certification Authority - G2 O=Entrust, Inc. OU=See www.entrust.net/legal-terms/(c) 2009 Entrust, Inc. - for authorized use only
+# Subject: CN=Entrust Root Certification Authority - G2 O=Entrust, Inc. OU=See www.entrust.net/legal-terms/(c) 2009 Entrust, Inc. - for authorized use only
+# Label: "Entrust Root Certification Authority - G2"
+# Serial: 1246989352
+# MD5 Fingerprint: 4b:e2:c9:91:96:65:0c:f4:0e:5a:93:92:a0:0a:fe:b2
+# SHA1 Fingerprint: 8c:f4:27:fd:79:0c:3a:d1:66:06:8d:e8:1e:57:ef:bb:93:22:72:d4
+# SHA256 Fingerprint: 43:df:57:74:b0:3e:7f:ef:5f:e4:0d:93:1a:7b:ed:f1:bb:2e:6b:42:73:8c:4e:6d:38:41:10:3d:3a:a7:f3:39
+-----BEGIN CERTIFICATE-----
+MIIEPjCCAyagAwIBAgIESlOMKDANBgkqhkiG9w0BAQsFADCBvjELMAkGA1UEBhMC
+VVMxFjAUBgNVBAoTDUVudHJ1c3QsIEluYy4xKDAmBgNVBAsTH1NlZSB3d3cuZW50
+cnVzdC5uZXQvbGVnYWwtdGVybXMxOTA3BgNVBAsTMChjKSAyMDA5IEVudHJ1c3Qs
+IEluYy4gLSBmb3IgYXV0aG9yaXplZCB1c2Ugb25seTEyMDAGA1UEAxMpRW50cnVz
+dCBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IC0gRzIwHhcNMDkwNzA3MTcy
+NTU0WhcNMzAxMjA3MTc1NTU0WjCBvjELMAkGA1UEBhMCVVMxFjAUBgNVBAoTDUVu
+dHJ1c3QsIEluYy4xKDAmBgNVBAsTH1NlZSB3d3cuZW50cnVzdC5uZXQvbGVnYWwt
+dGVybXMxOTA3BgNVBAsTMChjKSAyMDA5IEVudHJ1c3QsIEluYy4gLSBmb3IgYXV0
+aG9yaXplZCB1c2Ugb25seTEyMDAGA1UEAxMpRW50cnVzdCBSb290IENlcnRpZmlj
+YXRpb24gQXV0aG9yaXR5IC0gRzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQC6hLZy254Ma+KZ6TABp3bqMriVQRrJ2mFOWHLP/vaCeb9zYQYKpSfYs1/T
+RU4cctZOMvJyig/3gxnQaoCAAEUesMfnmr8SVycco2gvCoe9amsOXmXzHHfV1IWN
+cCG0szLni6LVhjkCsbjSR87kyUnEO6fe+1R9V77w6G7CebI6C1XiUJgWMhNcL3hW
+wcKUs/Ja5CeanyTXxuzQmyWC48zCxEXFjJd6BmsqEZ+pCm5IO2/b1BEZQvePB7/1
+U1+cPvQXLOZprE4yTGJ36rfo5bs0vBmLrpxR57d+tVOxMyLlbc9wPBr64ptntoP0
+jaWvYkxN4FisZDQSA/i2jZRjJKRxAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAP
+BgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBRqciZ60B7vfec7aVHUbI2fkBJmqzAN
+BgkqhkiG9w0BAQsFAAOCAQEAeZ8dlsa2eT8ijYfThwMEYGprmi5ZiXMRrEPR9RP/
+jTkrwPK9T3CMqS/qF8QLVJ7UG5aYMzyorWKiAHarWWluBh1+xLlEjZivEtRh2woZ
+Rkfz6/djwUAFQKXSt/S1mja/qYh2iARVBCuch38aNzx+LaUa2NSJXsq9rD1s2G2v
+1fN2D807iDginWyTmsQ9v4IbZT+mD12q/OWyFcq1rca8PdCE6OoGcrBNOTJ4vz4R
+nAuknZoh8/CbCzB428Hch0P+vGOaysXCHMnHjf87ElgI5rY97HosTvuDls4MPGmH
+VHOkc8KT/1EQrBVUAdj8BbGJoX90g5pJ19xOe4pIb4tF9g==
+-----END CERTIFICATE-----
+
+# Operating CA: Entrust Datacard
+# Issuer: CN=Entrust.net Certification Authority (2048) O=Entrust.net OU=www.entrust.net/CPS_2048 incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited
+# Subject: CN=Entrust.net Certification Authority (2048) O=Entrust.net OU=www.entrust.net/CPS_2048 incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited
+# Label: "Entrust.net Premium 2048 Secure Server CA"
+# Serial: 946069240
+# MD5 Fingerprint: ee:29:31:bc:32:7e:9a:e6:e8:b5:f7:51:b4:34:71:90
+# SHA1 Fingerprint: 50:30:06:09:1d:97:d4:f5:ae:39:f7:cb:e7:92:7d:7d:65:2d:34:31
+# SHA256 Fingerprint: 6d:c4:71:72:e0:1c:bc:b0:bf:62:58:0d:89:5f:e2:b8:ac:9a:d4:f8:73:80:1e:0c:10:b9:c8:37:d2:1e:b1:77
+-----BEGIN CERTIFICATE-----
+MIIEKjCCAxKgAwIBAgIEOGPe+DANBgkqhkiG9w0BAQUFADCBtDEUMBIGA1UEChML
+RW50cnVzdC5uZXQxQDA+BgNVBAsUN3d3dy5lbnRydXN0Lm5ldC9DUFNfMjA0OCBp
+bmNvcnAuIGJ5IHJlZi4gKGxpbWl0cyBsaWFiLikxJTAjBgNVBAsTHChjKSAxOTk5
+IEVudHJ1c3QubmV0IExpbWl0ZWQxMzAxBgNVBAMTKkVudHJ1c3QubmV0IENlcnRp
+ZmljYXRpb24gQXV0aG9yaXR5ICgyMDQ4KTAeFw05OTEyMjQxNzUwNTFaFw0yOTA3
+MjQxNDE1MTJaMIG0MRQwEgYDVQQKEwtFbnRydXN0Lm5ldDFAMD4GA1UECxQ3d3d3
+LmVudHJ1c3QubmV0L0NQU18yMDQ4IGluY29ycC4gYnkgcmVmLiAobGltaXRzIGxp
+YWIuKTElMCMGA1UECxMcKGMpIDE5OTkgRW50cnVzdC5uZXQgTGltaXRlZDEzMDEG
+A1UEAxMqRW50cnVzdC5uZXQgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkgKDIwNDgp
+MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArU1LqRKGsuqjIAcVFmQq
+K0vRvwtKTY7tgHalZ7d4QMBzQshowNtTK91euHaYNZOLGp18EzoOH1u3Hs/lJBQe
+sYGpjX24zGtLA/ECDNyrpUAkAH90lKGdCCmziAv1h3edVc3kw37XamSrhRSGlVuX
+MlBvPci6Zgzj/L24ScF2iUkZ/cCovYmjZy/Gn7xxGWC4LeksyZB2ZnuU4q941mVT
+XTzWnLLPKQP5L6RQstRIzgUyVYr9smRMDuSYB3Xbf9+5CFVghTAp+XtIpGmG4zU/
+HoZdenoVve8AjhUiVBcAkCaTvA5JaJG/+EfTnZVCwQ5N328mz8MYIWJmQ3DW1cAH
+4QIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNV
+HQ4EFgQUVeSB0RGAvtiJuQijMfmhJAkWuXAwDQYJKoZIhvcNAQEFBQADggEBADub
+j1abMOdTmXx6eadNl9cZlZD7Bh/KM3xGY4+WZiT6QBshJ8rmcnPyT/4xmf3IDExo
+U8aAghOY+rat2l098c5u9hURlIIM7j+VrxGrD9cv3h8Dj1csHsm7mhpElesYT6Yf
+zX1XEC+bBAlahLVu2B064dae0Wx5XnkcFMXj0EyTO2U87d89vqbllRrDtRnDvV5b
+u/8j72gZyxKTJ1wDLW8w0B62GqzeWvfRqqgnpv55gcR5mTNXuhKwqeBCbJPKVt7+
+bYQLCIt+jerXmCHG8+c8eS9enNFMFY3h7CI3zJpDC5fcgJCNs2ebb0gIFVbPv/Er
+fF6adulZkMV8gzURZVE=
+-----END CERTIFICATE-----
+
+# Operating CA: Entrust Datacard
+# Issuer: CN=AffirmTrust Commercial O=AffirmTrust
+# Subject: CN=AffirmTrust Commercial O=AffirmTrust
+# Label: "AffirmTrust Commercial"
+# Serial: 8608355977964138876
+# MD5 Fingerprint: 82:92:ba:5b:ef:cd:8a:6f:a6:3d:55:f9:84:f6:d6:b7
+# SHA1 Fingerprint: f9:b5:b6:32:45:5f:9c:be:ec:57:5f:80:dc:e9:6e:2c:c7:b2:78:b7
+# SHA256 Fingerprint: 03:76:ab:1d:54:c5:f9:80:3c:e4:b2:e2:01:a0:ee:7e:ef:7b:57:b6:36:e8:a9:3c:9b:8d:48:60:c9:6f:5f:a7
+-----BEGIN CERTIFICATE-----
+MIIDTDCCAjSgAwIBAgIId3cGJyapsXwwDQYJKoZIhvcNAQELBQAwRDELMAkGA1UE
+BhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MR8wHQYDVQQDDBZBZmZpcm1UcnVz
+dCBDb21tZXJjaWFsMB4XDTEwMDEyOTE0MDYwNloXDTMwMTIzMTE0MDYwNlowRDEL
+MAkGA1UEBhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MR8wHQYDVQQDDBZBZmZp
+cm1UcnVzdCBDb21tZXJjaWFsMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEA9htPZwcroRX1BiLLHwGy43NFBkRJLLtJJRTWzsO3qyxPxkEylFf6EqdbDuKP
+Hx6GGaeqtS25Xw2Kwq+FNXkyLbscYjfysVtKPcrNcV/pQr6U6Mje+SJIZMblq8Yr
+ba0F8PrVC8+a5fBQpIs7R6UjW3p6+DM/uO+Zl+MgwdYoic+U+7lF7eNAFxHUdPAL
+MeIrJmqbTFeurCA+ukV6BfO9m2kVrn1OIGPENXY6BwLJN/3HR+7o8XYdcxXyl6S1
+yHp52UKqK39c/s4mT6NmgTWvRLpUHhwwMmWd5jyTXlBOeuM61G7MGvv50jeuJCqr
+VwMiKA1JdX+3KNp1v47j3A55MQIDAQABo0IwQDAdBgNVHQ4EFgQUnZPGU4teyq8/
+nx4P5ZmVvCT2lI8wDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwDQYJ
+KoZIhvcNAQELBQADggEBAFis9AQOzcAN/wr91LoWXym9e2iZWEnStB03TX8nfUYG
+XUPGhi4+c7ImfU+TqbbEKpqrIZcUsd6M06uJFdhrJNTxFq7YpFzUf1GO7RgBsZNj
+vbz4YYCanrHOQnDiqX0GJX0nof5v7LMeJNrjS1UaADs1tDvZ110w/YETifLCBivt
+Z8SOyUOyXGsViQK8YvxO8rUzqrJv0wqiUOP2O+guRMLbZjipM1ZI8W0bM40NjD9g
+N53Tym1+NH4Nn3J2ixufcv1SNUFFApYvHLKac0khsUlHRUe072o0EclNmsxZt9YC
+nlpOZbWUrhvfKbAW8b8Angc6F2S1BLUjIZkKlTuXfO8=
+-----END CERTIFICATE-----
+
+# Operating CA: Entrust Datacard
+# Issuer: CN=AffirmTrust Networking O=AffirmTrust
+# Subject: CN=AffirmTrust Networking O=AffirmTrust
+# Label: "AffirmTrust Networking"
+# Serial: 8957382827206547757
+# MD5 Fingerprint: 42:65:ca:be:01:9a:9a:4c:a9:8c:41:49:cd:c0:d5:7f
+# SHA1 Fingerprint: 29:36:21:02:8b:20:ed:02:f5:66:c5:32:d1:d6:ed:90:9f:45:00:2f
+# SHA256 Fingerprint: 0a:81:ec:5a:92:97:77:f1:45:90:4a:f3:8d:5d:50:9f:66:b5:e2:c5:8f:cd:b5:31:05:8b:0e:17:f3:f0:b4:1b
+-----BEGIN CERTIFICATE-----
+MIIDTDCCAjSgAwIBAgIIfE8EORzUmS0wDQYJKoZIhvcNAQEFBQAwRDELMAkGA1UE
+BhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MR8wHQYDVQQDDBZBZmZpcm1UcnVz
+dCBOZXR3b3JraW5nMB4XDTEwMDEyOTE0MDgyNFoXDTMwMTIzMTE0MDgyNFowRDEL
+MAkGA1UEBhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MR8wHQYDVQQDDBZBZmZp
+cm1UcnVzdCBOZXR3b3JraW5nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAtITMMxcua5Rsa2FSoOujz3mUTOWUgJnLVWREZY9nZOIG41w3SfYvm4SEHi3y
+YJ0wTsyEheIszx6e/jarM3c1RNg1lho9Nuh6DtjVR6FqaYvZ/Ls6rnla1fTWcbua
+kCNrmreIdIcMHl+5ni36q1Mr3Lt2PpNMCAiMHqIjHNRqrSK6mQEubWXLviRmVSRL
+QESxG9fhwoXA3hA/Pe24/PHxI1Pcv2WXb9n5QHGNfb2V1M6+oF4nI979ptAmDgAp
+6zxG8D1gvz9Q0twmQVGeFDdCBKNwV6gbh+0t+nvujArjqWaJGctB+d1ENmHP4ndG
+yH329JKBNv3bNPFyfvMMFr20FQIDAQABo0IwQDAdBgNVHQ4EFgQUBx/S55zawm6i
+QLSwelAQUHTEyL0wDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwDQYJ
+KoZIhvcNAQEFBQADggEBAIlXshZ6qML91tmbmzTCnLQyFE2npN/svqe++EPbkTfO
+tDIuUFUaNU52Q3Eg75N3ThVwLofDwR1t3Mu1J9QsVtFSUzpE0nPIxBsFZVpikpzu
+QY0x2+c06lkh1QF612S4ZDnNye2v7UsDSKegmQGA3GWjNq5lWUhPgkvIZfFXHeVZ
+Lgo/bNjR9eUJtGxUAArgFU2HdW23WJZa3W3SAKD0m0i+wzekujbgfIeFlxoVot4u
+olu9rxj5kFDNcFn4J2dHy8egBzp90SxdbBk6ZrV9/ZFvgrG+CJPbFEfxojfHRZ48
+x3evZKiT3/Zpg4Jg8klCNO1aAFSFHBY2kgxc+qatv9s=
+-----END CERTIFICATE-----
+
+# Operating CA: Entrust Datacard
+# Issuer: CN=AffirmTrust Premium O=AffirmTrust
+# Subject: CN=AffirmTrust Premium O=AffirmTrust
+# Label: "AffirmTrust Premium"
+# Serial: 7893706540734352110
+# MD5 Fingerprint: c4:5d:0e:48:b6:ac:28:30:4e:0a:bc:f9:38:16:87:57
+# SHA1 Fingerprint: d8:a6:33:2c:e0:03:6f:b1:85:f6:63:4f:7d:6a:06:65:26:32:28:27
+# SHA256 Fingerprint: 70:a7:3f:7f:37:6b:60:07:42:48:90:45:34:b1:14:82:d5:bf:0e:69:8e:cc:49:8d:f5:25:77:eb:f2:e9:3b:9a
+-----BEGIN CERTIFICATE-----
+MIIFRjCCAy6gAwIBAgIIbYwURrGmCu4wDQYJKoZIhvcNAQEMBQAwQTELMAkGA1UE
+BhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MRwwGgYDVQQDDBNBZmZpcm1UcnVz
+dCBQcmVtaXVtMB4XDTEwMDEyOTE0MTAzNloXDTQwMTIzMTE0MTAzNlowQTELMAkG
+A1UEBhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MRwwGgYDVQQDDBNBZmZpcm1U
+cnVzdCBQcmVtaXVtMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAxBLf
+qV/+Qd3d9Z+K4/as4Tx4mrzY8H96oDMq3I0gW64tb+eT2TZwamjPjlGjhVtnBKAQ
+JG9dKILBl1fYSCkTtuG+kU3fhQxTGJoeJKJPj/CihQvL9Cl/0qRY7iZNyaqoe5rZ
++jjeRFcV5fiMyNlI4g0WJx0eyIOFJbe6qlVBzAMiSy2RjYvmia9mx+n/K+k8rNrS
+s8PhaJyJ+HoAVt70VZVs+7pk3WKL3wt3MutizCaam7uqYoNMtAZ6MMgpv+0GTZe5
+HMQxK9VfvFMSF5yZVylmd2EhMQcuJUmdGPLu8ytxjLW6OQdJd/zvLpKQBY0tL3d7
+70O/Nbua2Plzpyzy0FfuKE4mX4+QaAkvuPjcBukumj5Rp9EixAqnOEhss/n/fauG
+V+O61oV4d7pD6kh/9ti+I20ev9E2bFhc8e6kGVQa9QPSdubhjL08s9NIS+LI+H+S
+qHZGnEJlPqQewQcDWkYtuJfzt9WyVSHvutxMAJf7FJUnM7/oQ0dG0giZFmA7mn7S
+5u046uwBHjxIVkkJx0w3AJ6IDsBz4W9m6XJHMD4Q5QsDyZpCAGzFlH5hxIrff4Ia
+C1nEWTJ3s7xgaVY5/bQGeyzWZDbZvUjthB9+pSKPKrhC9IK31FOQeE4tGv2Bb0TX
+OwF0lkLgAOIua+rF7nKsu7/+6qqo+Nz2snmKtmcCAwEAAaNCMEAwHQYDVR0OBBYE
+FJ3AZ6YMItkm9UWrpmVSESfYRaxjMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/
+BAQDAgEGMA0GCSqGSIb3DQEBDAUAA4ICAQCzV00QYk465KzquByvMiPIs0laUZx2
+KI15qldGF9X1Uva3ROgIRL8YhNILgM3FEv0AVQVhh0HctSSePMTYyPtwni94loMg
+Nt58D2kTiKV1NpgIpsbfrM7jWNa3Pt668+s0QNiigfV4Py/VpfzZotReBA4Xrf5B
+8OWycvpEgjNC6C1Y91aMYj+6QrCcDFx+LmUmXFNPALJ4fqENmS2NuB2OosSw/WDQ
+MKSOyARiqcTtNd56l+0OOF6SL5Nwpamcb6d9Ex1+xghIsV5n61EIJenmJWtSKZGc
+0jlzCFfemQa0W50QBuHCAKi4HEoCChTQwUHK+4w1IX2COPKpVJEZNZOUbWo6xbLQ
+u4mGk+ibyQ86p3q4ofB4Rvr8Ny/lioTz3/4E2aFooC8k4gmVBtWVyuEklut89pMF
+u+1z6S3RdTnX5yTb2E5fQ4+e0BQ5v1VwSJlXMbSc7kqYA5YwH2AG7hsj/oFgIxpH
+YoWlzBk0gG+zrBrjn/B7SK3VAdlntqlyk+otZrWyuOQ9PLLvTIzq6we/qzWaVYa8
+GKa1qF60g2xraUDTn9zxw2lrueFtCfTxqlB2Cnp9ehehVZZCmTEJ3WARjQUwfuaO
+RtGdFNrHF+QFlozEJLUbzxQHskD4o55BhrwE0GuWyCqANP2/7waj3VjFhT0+j/6e
+KeC2uAloGRwYQw==
+-----END CERTIFICATE-----
+
+# Operating CA: Entrust Datacard
+# Issuer: CN=AffirmTrust Premium ECC O=AffirmTrust
+# Subject: CN=AffirmTrust Premium ECC O=AffirmTrust
+# Label: "AffirmTrust Premium ECC"
+# Serial: 8401224907861490260
+# MD5 Fingerprint: 64:b0:09:55:cf:b1:d5:99:e2:be:13:ab:a6:5d:ea:4d
+# SHA1 Fingerprint: b8:23:6b:00:2f:1d:16:86:53:01:55:6c:11:a4:37:ca:eb:ff:c3:bb
+# SHA256 Fingerprint: bd:71:fd:f6:da:97:e4:cf:62:d1:64:7a:dd:25:81:b0:7d:79:ad:f8:39:7e:b4:ec:ba:9c:5e:84:88:82:14:23
+-----BEGIN CERTIFICATE-----
+MIIB/jCCAYWgAwIBAgIIdJclisc/elQwCgYIKoZIzj0EAwMwRTELMAkGA1UEBhMC
+VVMxFDASBgNVBAoMC0FmZmlybVRydXN0MSAwHgYDVQQDDBdBZmZpcm1UcnVzdCBQ
+cmVtaXVtIEVDQzAeFw0xMDAxMjkxNDIwMjRaFw00MDEyMzExNDIwMjRaMEUxCzAJ
+BgNVBAYTAlVTMRQwEgYDVQQKDAtBZmZpcm1UcnVzdDEgMB4GA1UEAwwXQWZmaXJt
+VHJ1c3QgUHJlbWl1bSBFQ0MwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAAQNMF4bFZ0D
+0KF5Nbc6PJJ6yhUczWLznCZcBz3lVPqj1swS6vQUX+iOGasvLkjmrBhDeKzQN8O9
+ss0s5kfiGuZjuD0uL3jET9v0D6RoTFVya5UdThhClXjMNzyR4ptlKymjQjBAMB0G
+A1UdDgQWBBSaryl6wBE1NSZRMADDav5A1a7WPDAPBgNVHRMBAf8EBTADAQH/MA4G
+A1UdDwEB/wQEAwIBBjAKBggqhkjOPQQDAwNnADBkAjAXCfOHiFBar8jAQr9HX/Vs
+aobgxCd05DhT1wV/GzTjxi+zygk8N53X57hG8f2h4nECMEJZh0PUUd+60wkyWs6I
+flc9nF9Ca/UHLbXwgpP5WW+uZPpY5Yse42O+tYHNbwKMeQ==
+-----END CERTIFICATE-----
+
+# Operating CA: GlobalSign
+# Issuer: CN=GlobalSign Root CA O=GlobalSign nv-sa OU=Root CA
+# Subject: CN=GlobalSign Root CA O=GlobalSign nv-sa OU=Root CA
+# Label: "GlobalSign Root CA"
+# Serial: 4835703278459707669005204
+# MD5 Fingerprint: 3e:45:52:15:09:51:92:e1:b7:5d:37:9f:b1:87:29:8a
+# SHA1 Fingerprint: b1:bc:96:8b:d4:f4:9d:62:2a:a8:9a:81:f2:15:01:52:a4:1d:82:9c
+# SHA256 Fingerprint: eb:d4:10:40:e4:bb:3e:c7:42:c9:e3:81:d3:1e:f2:a4:1a:48:b6:68:5c:96:e7:ce:f3:c1:df:6c:d4:33:1c:99
+-----BEGIN CERTIFICATE-----
+MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG
+A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv
+b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw
+MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9i
+YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT
+aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDaDuaZ
+jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp
+xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp
+1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG
+snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ
+U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8
+9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E
+BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B
+AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz
+yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE
+38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP
+AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad
+DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME
+HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A==
+-----END CERTIFICATE-----
+
+# Operating CA: GlobalSign
+# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R2
+# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R2
+# Label: "GlobalSign Root CA - R2"
+# Serial: 4835703278459682885658125
+# MD5 Fingerprint: 94:14:77:7e:3e:5e:fd:8f:30:bd:41:b0:cf:e7:d0:30
+# SHA1 Fingerprint: 75:e0:ab:b6:13:85:12:27:1c:04:f8:5f:dd:de:38:e4:b7:24:2e:fe
+# SHA256 Fingerprint: ca:42:dd:41:74:5f:d0:b8:1e:b9:02:36:2c:f9:d8:bf:71:9d:a1:bd:1b:1e:fc:94:6f:5b:4c:99:f4:2c:1b:9e
+-----BEGIN CERTIFICATE-----
+MIIDujCCAqKgAwIBAgILBAAAAAABD4Ym5g0wDQYJKoZIhvcNAQEFBQAwTDEgMB4G
+A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp
+Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMDYxMjE1MDgwMDAwWhcNMjExMjE1
+MDgwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMjETMBEG
+A1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCASIwDQYJKoZI
+hvcNAQEBBQADggEPADCCAQoCggEBAKbPJA6+Lm8omUVCxKs+IVSbC9N/hHD6ErPL
+v4dfxn+G07IwXNb9rfF73OX4YJYJkhD10FPe+3t+c4isUoh7SqbKSaZeqKeMWhG8
+eoLrvozps6yWJQeXSpkqBy+0Hne/ig+1AnwblrjFuTosvNYSuetZfeLQBoZfXklq
+tTleiDTsvHgMCJiEbKjNS7SgfQx5TfC4LcshytVsW33hoCmEofnTlEnLJGKRILzd
+C9XZzPnqJworc5HGnRusyMvo4KD0L5CLTfuwNhv2GXqF4G3yYROIXJ/gkwpRl4pa
+zq+r1feqCapgvdzZX99yqWATXgAByUr6P6TqBwMhAo6CygPCm48CAwEAAaOBnDCB
+mTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUm+IH
+V2ccHsBqBt5ZtJot39wZhi4wNgYDVR0fBC8wLTAroCmgJ4YlaHR0cDovL2NybC5n
+bG9iYWxzaWduLm5ldC9yb290LXIyLmNybDAfBgNVHSMEGDAWgBSb4gdXZxwewGoG
+3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAmYFThxxol4aR7OBKuEQLq4Gs
+J0/WwbgcQ3izDJr86iw8bmEbTUsp9Z8FHSbBuOmDAGJFtqkIk7mpM0sYmsL4h4hO
+291xNBrBVNpGP+DTKqttVCL1OmLNIG+6KYnX3ZHu01yiPqFbQfXf5WRDLenVOavS
+ot+3i9DAgBkcRcAtjOj4LaR0VknFBbVPFd5uRHg5h6h+u/N5GJG79G+dwfCMNYxd
+AfvDbbnvRG15RjF+Cv6pgsH/76tuIMRQyV+dTZsXjAzlAcmgQWpzU/qlULRuJQ/7
+TBj0/VLZjmmx6BEP3ojY+x1J96relc8geMJgEtslQIxq/H5COEBkEveegeGTLg==
+-----END CERTIFICATE-----
+
+# Operating CA: GlobalSign
+# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R3
+# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R3
+# Label: "GlobalSign Root CA - R3"
+# Serial: 4835703278459759426209954
+# MD5 Fingerprint: c5:df:b8:49:ca:05:13:55:ee:2d:ba:1a:c3:3e:b0:28
+# SHA1 Fingerprint: d6:9b:56:11:48:f0:1c:77:c5:45:78:c1:09:26:df:5b:85:69:76:ad
+# SHA256 Fingerprint: cb:b5:22:d7:b7:f1:27:ad:6a:01:13:86:5b:df:1c:d4:10:2e:7d:07:59:af:63:5a:7c:f4:72:0d:c9:63:c5:3b
+-----BEGIN CERTIFICATE-----
+MIIDXzCCAkegAwIBAgILBAAAAAABIVhTCKIwDQYJKoZIhvcNAQELBQAwTDEgMB4G
+A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjMxEzARBgNVBAoTCkdsb2JhbFNp
+Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMDkwMzE4MTAwMDAwWhcNMjkwMzE4
+MTAwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMzETMBEG
+A1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCASIwDQYJKoZI
+hvcNAQEBBQADggEPADCCAQoCggEBAMwldpB5BngiFvXAg7aEyiie/QV2EcWtiHL8
+RgJDx7KKnQRfJMsuS+FggkbhUqsMgUdwbN1k0ev1LKMPgj0MK66X17YUhhB5uzsT
+gHeMCOFJ0mpiLx9e+pZo34knlTifBtc+ycsmWQ1z3rDI6SYOgxXG71uL0gRgykmm
+KPZpO/bLyCiR5Z2KYVc3rHQU3HTgOu5yLy6c+9C7v/U9AOEGM+iCK65TpjoWc4zd
+QQ4gOsC0p6Hpsk+QLjJg6VfLuQSSaGjlOCZgdbKfd/+RFO+uIEn8rUAVSNECMWEZ
+XriX7613t2Saer9fwRPvm2L7DWzgVGkWqQPabumDk3F2xmmFghcCAwEAAaNCMEAw
+DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFI/wS3+o
+LkUkrk1Q+mOai97i3Ru8MA0GCSqGSIb3DQEBCwUAA4IBAQBLQNvAUKr+yAzv95ZU
+RUm7lgAJQayzE4aGKAczymvmdLm6AC2upArT9fHxD4q/c2dKg8dEe3jgr25sbwMp
+jjM5RcOO5LlXbKr8EpbsU8Yt5CRsuZRj+9xTaGdWPoO4zzUhw8lo/s7awlOqzJCK
+6fBdRoyV3XpYKBovHd7NADdBj+1EbddTKJd+82cEHhXXipa0095MJ6RMG3NzdvQX
+mcIfeg7jLQitChws/zyrVQ4PkX4268NXSb7hLi18YIvDQVETI53O9zJrlAGomecs
+Mx86OyXShkDOOyyGeMlhLxS67ttVb9+E7gUJTb0o2HLO02JQZR7rkpeDMdmztcpH
+WD9f
+-----END CERTIFICATE-----
+
+# Operating CA: GlobalSign
+# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4
+# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4
+# Label: "GlobalSign ECC Root CA - R4"
+# Serial: 14367148294922964480859022125800977897474
+# MD5 Fingerprint: 20:f0:27:68:d1:7e:a0:9d:0e:e6:2a:ca:df:5c:89:8e
+# SHA1 Fingerprint: 69:69:56:2e:40:80:f4:24:a1:e7:19:9f:14:ba:f3:ee:58:ab:6a:bb
+# SHA256 Fingerprint: be:c9:49:11:c2:95:56:76:db:6c:0a:55:09:86:d7:6e:3b:a0:05:66:7c:44:2c:97:62:b4:fb:b7:73:de:22:8c
+-----BEGIN CERTIFICATE-----
+MIIB4TCCAYegAwIBAgIRKjikHJYKBN5CsiilC+g0mAIwCgYIKoZIzj0EAwIwUDEk
+MCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBDQSAtIFI0MRMwEQYDVQQKEwpH
+bG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWduMB4XDTEyMTExMzAwMDAwMFoX
+DTM4MDExOTAzMTQwN1owUDEkMCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBD
+QSAtIFI0MRMwEQYDVQQKEwpHbG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWdu
+MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEuMZ5049sJQ6fLjkZHAOkrprlOQcJ
+FspjsbmG+IpXwVfOQvpzofdlQv8ewQCybnMO/8ch5RikqtlxP6jUuc6MHaNCMEAw
+DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFFSwe61F
+uOJAf/sKbvu+M8k8o4TVMAoGCCqGSM49BAMCA0gAMEUCIQDckqGgE6bPA7DmxCGX
+kPoUVy0D7O48027KqGx2vKLeuwIgJ6iFJzWbVsaj8kfSt24bAgAXqmemFZHe+pTs
+ewv4n4Q=
+-----END CERTIFICATE-----
+
+# Operating CA: GlobalSign
+# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R5
+# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R5
+# Label: "GlobalSign ECC Root CA - R5"
+# Serial: 32785792099990507226680698011560947931244
+# MD5 Fingerprint: 9f:ad:3b:1c:02:1e:8a:ba:17:74:38:81:0c:a2:bc:08
+# SHA1 Fingerprint: 1f:24:c6:30:cd:a4:18:ef:20:69:ff:ad:4f:dd:5f:46:3a:1b:69:aa
+# SHA256 Fingerprint: 17:9f:bc:14:8a:3d:d0:0f:d2:4e:a1:34:58:cc:43:bf:a7:f5:9c:81:82:d7:83:a5:13:f6:eb:ec:10:0c:89:24
+-----BEGIN CERTIFICATE-----
+MIICHjCCAaSgAwIBAgIRYFlJ4CYuu1X5CneKcflK2GwwCgYIKoZIzj0EAwMwUDEk
+MCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBDQSAtIFI1MRMwEQYDVQQKEwpH
+bG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWduMB4XDTEyMTExMzAwMDAwMFoX
+DTM4MDExOTAzMTQwN1owUDEkMCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBD
+QSAtIFI1MRMwEQYDVQQKEwpHbG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWdu
+MHYwEAYHKoZIzj0CAQYFK4EEACIDYgAER0UOlvt9Xb/pOdEh+J8LttV7HpI6SFkc
+8GIxLcB6KP4ap1yztsyX50XUWPrRd21DosCHZTQKH3rd6zwzocWdTaRvQZU4f8ke
+hOvRnkmSh5SHDDqFSmafnVmTTZdhBoZKo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYD
+VR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUPeYpSJvqB8ohREom3m7e0oPQn1kwCgYI
+KoZIzj0EAwMDaAAwZQIxAOVpEslu28YxuglB4Zf4+/2a4n0Sye18ZNPLBSWLVtmg
+515dTguDnFt2KaAJJiFqYgIwcdK1j1zqO+F4CYWodZI7yFz9SO8NdCKoCOJuxUnO
+xwy8p2Fp8fc74SrL+SvzZpA3
+-----END CERTIFICATE-----
+
+# Operating CA: GlobalSign
+# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R6
+# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R6
+# Label: "GlobalSign Root CA - R6"
+# Serial: 1417766617973444989252670301619537
+# MD5 Fingerprint: 4f:dd:07:e4:d4:22:64:39:1e:0c:37:42:ea:d1:c6:ae
+# SHA1 Fingerprint: 80:94:64:0e:b5:a7:a1:ca:11:9c:1f:dd:d5:9f:81:02:63:a7:fb:d1
+# SHA256 Fingerprint: 2c:ab:ea:fe:37:d0:6c:a2:2a:ba:73:91:c0:03:3d:25:98:29:52:c4:53:64:73:49:76:3a:3a:b5:ad:6c:cf:69
+-----BEGIN CERTIFICATE-----
+MIIFgzCCA2ugAwIBAgIORea7A4Mzw4VlSOb/RVEwDQYJKoZIhvcNAQEMBQAwTDEg
+MB4GA1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjYxEzARBgNVBAoTCkdsb2Jh
+bFNpZ24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTQxMjEwMDAwMDAwWhcNMzQx
+MjEwMDAwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSNjET
+MBEGA1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCAiIwDQYJ
+KoZIhvcNAQEBBQADggIPADCCAgoCggIBAJUH6HPKZvnsFMp7PPcNCPG0RQssgrRI
+xutbPK6DuEGSMxSkb3/pKszGsIhrxbaJ0cay/xTOURQh7ErdG1rG1ofuTToVBu1k
+ZguSgMpE3nOUTvOniX9PeGMIyBJQbUJmL025eShNUhqKGoC3GYEOfsSKvGRMIRxD
+aNc9PIrFsmbVkJq3MQbFvuJtMgamHvm566qjuL++gmNQ0PAYid/kD3n16qIfKtJw
+LnvnvJO7bVPiSHyMEAc4/2ayd2F+4OqMPKq0pPbzlUoSB239jLKJz9CgYXfIWHSw
+1CM69106yqLbnQneXUQtkPGBzVeS+n68UARjNN9rkxi+azayOeSsJDa38O+2HBNX
+k7besvjihbdzorg1qkXy4J02oW9UivFyVm4uiMVRQkQVlO6jxTiWm05OWgtH8wY2
+SXcwvHE35absIQh1/OZhFj931dmRl4QKbNQCTXTAFO39OfuD8l4UoQSwC+n+7o/h
+bguyCLNhZglqsQY6ZZZZwPA1/cnaKI0aEYdwgQqomnUdnjqGBQCe24DWJfncBZ4n
+WUx2OVvq+aWh2IMP0f/fMBH5hc8zSPXKbWQULHpYT9NLCEnFlWQaYw55PfWzjMpY
+rZxCRXluDocZXFSxZba/jJvcE+kNb7gu3GduyYsRtYQUigAZcIN5kZeR1Bonvzce
+MgfYFGM8KEyvAgMBAAGjYzBhMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTAD
+AQH/MB0GA1UdDgQWBBSubAWjkxPioufi1xzWx/B/yGdToDAfBgNVHSMEGDAWgBSu
+bAWjkxPioufi1xzWx/B/yGdToDANBgkqhkiG9w0BAQwFAAOCAgEAgyXt6NH9lVLN
+nsAEoJFp5lzQhN7craJP6Ed41mWYqVuoPId8AorRbrcWc+ZfwFSY1XS+wc3iEZGt
+Ixg93eFyRJa0lV7Ae46ZeBZDE1ZXs6KzO7V33EByrKPrmzU+sQghoefEQzd5Mr61
+55wsTLxDKZmOMNOsIeDjHfrYBzN2VAAiKrlNIC5waNrlU/yDXNOd8v9EDERm8tLj
+vUYAGm0CuiVdjaExUd1URhxN25mW7xocBFymFe944Hn+Xds+qkxV/ZoVqW/hpvvf
+cDDpw+5CRu3CkwWJ+n1jez/QcYF8AOiYrg54NMMl+68KnyBr3TsTjxKM4kEaSHpz
+oHdpx7Zcf4LIHv5YGygrqGytXm3ABdJ7t+uA/iU3/gKbaKxCXcPu9czc8FB10jZp
+nOZ7BN9uBmm23goJSFmH63sUYHpkqmlD75HHTOwY3WzvUy2MmeFe8nI+z1TIvWfs
+pA9MRf/TuTAjB0yPEL+GltmZWrSZVxykzLsViVO6LAUP5MSeGbEYNNVMnbrt9x+v
+JJUEeKgDu+6B5dpffItKoZB0JaezPkvILFa9x8jvOOJckvB595yEunQtYQEgfn7R
+8k8HWV+LLUNS60YMlOH1Zkd5d9VUWx+tJDfLRVpOoERIyNiwmcUVhAn21klJwGW4
+5hpxbqCo8YLoRT5s1gLXCmeDBVrJpBA=
+-----END CERTIFICATE-----
+
+# Note: "GlobalSign Root CA - R7" not added on purpose. It is P-521.
+
+# Operating CA: GlobalSign
+# Issuer: C=BE, O=GlobalSign nv-sa, OU=Root CA, CN=GlobalSign Root CA - R8
+# Subject: C=BE, O=GlobalSign nv-sa, OU=Root CA, CN=GlobalSign Root CA - R8
+# Label: "GlobalSign Root CA - R8"
+# Serial: 1462505469299036457243287072048861
+# MD5 Fingerprint: 26:15:db:de:38:b4:45:5e:19:3f:1b:57:af:53:2b:36
+# SHA1 Fingerprint: 62:01:ff:ce:4f:09:cd:c7:e0:2f:e1:10:f4:fd:67:f0:37:1a:2f:2a
+# SHA256 Fingerprint: ae:48:51:ff:42:03:9b:ad:e0:58:27:91:51:d8:26:83:04:1d:25:98:e2:40:68:3c:c5:6d:76:fb:8c:f5:3d:42
+-----BEGIN CERTIFICATE-----
+MIICMzCCAbmgAwIBAgIOSBtqCfT5YHE6/oHMht0wCgYIKoZIzj0EAwMwXDELMAkG
+A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv
+b3QgQ0ExIDAeBgNVBAMTF0dsb2JhbFNpZ24gUm9vdCBDQSAtIFI4MB4XDTE2MDYx
+NTAwMDAwMFoXDTM2MDYxNTAwMDAwMFowXDELMAkGA1UEBhMCQkUxGTAXBgNVBAoT
+EEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jvb3QgQ0ExIDAeBgNVBAMTF0ds
+b2JhbFNpZ24gUm9vdCBDQSAtIFI4MHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEuO58
+MIfYlB9Ua22Ynfx1+1uIq0K6jX05ft1EPTk84QWhSmRgrDemc7D5yUVLCwbQOuDx
+bV/6XltaUrV240bb1R6MdHpCyUE1T8bU4ihgqzSKzrFAI0alrhkkUnyQVUTOo0Iw
+QDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQULzoS
+JoDoisJQeG0GxDR+4kk5V3YwCgYIKoZIzj0EAwMDaAAwZQIxAMehPbKSkPrKXeAn
+hII7Icz0jfiUVvIgXxHArLxfFaULyBZDp/jFf40goH9e/BYcJwIwHoz1Vr8425zm
+pteEKebfDVMu6CsBt30JPLEyahqauArq6K0I8nQ51SsiNtzvRmbY
+-----END CERTIFICATE-----
+
+# Operating CA: GoDaddy
+# Issuer: CN=Go Daddy Root Certificate Authority - G2 O=GoDaddy.com, Inc.
+# Subject: CN=Go Daddy Root Certificate Authority - G2 O=GoDaddy.com, Inc.
+# Label: "Go Daddy Root Certificate Authority - G2"
+# Serial: 0
+# MD5 Fingerprint: 80:3a:bc:22:c1:e6:fb:8d:9b:3b:27:4a:32:1b:9a:01
+# SHA1 Fingerprint: 47:be:ab:c9:22:ea:e8:0e:78:78:34:62:a7:9f:45:c2:54:fd:e6:8b
+# SHA256 Fingerprint: 45:14:0b:32:47:eb:9c:c8:c5:b4:f0:d7:b5:30:91:f7:32:92:08:9e:6e:5a:63:e2:74:9d:d3:ac:a9:19:8e:da
+-----BEGIN CERTIFICATE-----
+MIIDxTCCAq2gAwIBAgIBADANBgkqhkiG9w0BAQsFADCBgzELMAkGA1UEBhMCVVMx
+EDAOBgNVBAgTB0FyaXpvbmExEzARBgNVBAcTClNjb3R0c2RhbGUxGjAYBgNVBAoT
+EUdvRGFkZHkuY29tLCBJbmMuMTEwLwYDVQQDEyhHbyBEYWRkeSBSb290IENlcnRp
+ZmljYXRlIEF1dGhvcml0eSAtIEcyMB4XDTA5MDkwMTAwMDAwMFoXDTM3MTIzMTIz
+NTk1OVowgYMxCzAJBgNVBAYTAlVTMRAwDgYDVQQIEwdBcml6b25hMRMwEQYDVQQH
+EwpTY290dHNkYWxlMRowGAYDVQQKExFHb0RhZGR5LmNvbSwgSW5jLjExMC8GA1UE
+AxMoR28gRGFkZHkgUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkgLSBHMjCCASIw
+DQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAL9xYgjx+lk09xvJGKP3gElY6SKD
+E6bFIEMBO4Tx5oVJnyfq9oQbTqC023CYxzIBsQU+B07u9PpPL1kwIuerGVZr4oAH
+/PMWdYA5UXvl+TW2dE6pjYIT5LY/qQOD+qK+ihVqf94Lw7YZFAXK6sOoBJQ7Rnwy
+DfMAZiLIjWltNowRGLfTshxgtDj6AozO091GB94KPutdfMh8+7ArU6SSYmlRJQVh
+GkSBjCypQ5Yj36w6gZoOKcUcqeldHraenjAKOc7xiID7S13MMuyFYkMlNAJWJwGR
+tDtwKj9useiciAF9n9T521NtYJ2/LOdYq7hfRvzOxBsDPAnrSTFcaUaz4EcCAwEA
+AaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYE
+FDqahQcQZyi27/a9BUFuIMGU2g/eMA0GCSqGSIb3DQEBCwUAA4IBAQCZ21151fmX
+WWcDYfF+OwYxdS2hII5PZYe096acvNjpL9DbWu7PdIxztDhC2gV7+AJ1uP2lsdeu
+9tfeE8tTEH6KRtGX+rcuKxGrkLAngPnon1rpN5+r5N9ss4UXnT3ZJE95kTXWXwTr
+gIOrmgIttRD02JDHBHNA7XIloKmf7J6raBKZV8aPEjoJpL1E/QYVN8Gb5DKj7Tjo
+2GTzLH4U/ALqn83/B2gX2yKQOC16jdFU8WnjXzPKej17CuPKf1855eJ1usV2GDPO
+LPAvTK33sefOT6jEm0pUBsV/fdUID+Ic/n4XuKxe9tQWskMJDE32p2u0mYRlynqI
+4uJEvlz36hz1
+-----END CERTIFICATE-----
+
+# Operating CA: GoDaddy
+# Issuer: CN=Starfield Root Certificate Authority - G2 O=Starfield Technologies, Inc.
+# Subject: CN=Starfield Root Certificate Authority - G2 O=Starfield Technologies, Inc.
+# Label: "Starfield Root Certificate Authority - G2"
+# Serial: 0
+# MD5 Fingerprint: d6:39:81:c6:52:7e:96:69:fc:fc:ca:66:ed:05:f2:96
+# SHA1 Fingerprint: b5:1c:06:7c:ee:2b:0c:3d:f8:55:ab:2d:92:f4:fe:39:d4:e7:0f:0e
+# SHA256 Fingerprint: 2c:e1:cb:0b:f9:d2:f9:e1:02:99:3f:be:21:51:52:c3:b2:dd:0c:ab:de:1c:68:e5:31:9b:83:91:54:db:b7:f5
+-----BEGIN CERTIFICATE-----
+MIID3TCCAsWgAwIBAgIBADANBgkqhkiG9w0BAQsFADCBjzELMAkGA1UEBhMCVVMx
+EDAOBgNVBAgTB0FyaXpvbmExEzARBgNVBAcTClNjb3R0c2RhbGUxJTAjBgNVBAoT
+HFN0YXJmaWVsZCBUZWNobm9sb2dpZXMsIEluYy4xMjAwBgNVBAMTKVN0YXJmaWVs
+ZCBSb290IENlcnRpZmljYXRlIEF1dGhvcml0eSAtIEcyMB4XDTA5MDkwMTAwMDAw
+MFoXDTM3MTIzMTIzNTk1OVowgY8xCzAJBgNVBAYTAlVTMRAwDgYDVQQIEwdBcml6
+b25hMRMwEQYDVQQHEwpTY290dHNkYWxlMSUwIwYDVQQKExxTdGFyZmllbGQgVGVj
+aG5vbG9naWVzLCBJbmMuMTIwMAYDVQQDEylTdGFyZmllbGQgUm9vdCBDZXJ0aWZp
+Y2F0ZSBBdXRob3JpdHkgLSBHMjCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
+ggEBAL3twQP89o/8ArFvW59I2Z154qK3A2FWGMNHttfKPTUuiUP3oWmb3ooa/RMg
+nLRJdzIpVv257IzdIvpy3Cdhl+72WoTsbhm5iSzchFvVdPtrX8WJpRBSiUZV9Lh1
+HOZ/5FSuS/hVclcCGfgXcVnrHigHdMWdSL5stPSksPNkN3mSwOxGXn/hbVNMYq/N
+Hwtjuzqd+/x5AJhhdM8mgkBj87JyahkNmcrUDnXMN/uLicFZ8WJ/X7NfZTD4p7dN
+dloedl40wOiWVpmKs/B/pM293DIxfJHP4F8R+GuqSVzRmZTRouNjWwl2tVZi4Ut0
+HZbUJtQIBFnQmA4O5t78w+wfkPECAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAO
+BgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFHwMMh+n2TB/xH1oo2Kooc6rB1snMA0G
+CSqGSIb3DQEBCwUAA4IBAQARWfolTwNvlJk7mh+ChTnUdgWUXuEok21iXQnCoKjU
+sHU48TRqneSfioYmUeYs0cYtbpUgSpIB7LiKZ3sx4mcujJUDJi5DnUox9g61DLu3
+4jd/IroAow57UvtruzvE03lRTs2Q9GcHGcg8RnoNAX3FWOdt5oUwF5okxBDgBPfg
+8n/Uqgr/Qh037ZTlZFkSIHc40zI+OIF1lnP6aI+xy84fxez6nH7PfrHxBy22/L/K
+pL/QlwVKvOoYKAKQvVR4CSFx09F9HdkWsKlhPdAKACL8x3vLCWRFCztAgfd9fDL1
+mMpYjn0q7pBZc2T5NnReJaH1ZgUufzkVqSr7UIuOhWn0
+-----END CERTIFICATE-----
+
+# Operating CA: GoDaddy
+# Issuer: O=Starfield Technologies, Inc. OU=Starfield Class 2 Certification Authority
+# Subject: O=Starfield Technologies, Inc. OU=Starfield Class 2 Certification Authority
+# Label: "Starfield Class 2 CA"
+# Serial: 0
+# MD5 Fingerprint: 32:4a:4b:bb:c8:63:69:9b:be:74:9a:c6:dd:1d:46:24
+# SHA1 Fingerprint: ad:7e:1c:28:b0:64:ef:8f:60:03:40:20:14:c3:d0:e3:37:0e:b5:8a
+# SHA256 Fingerprint: 14:65:fa:20:53:97:b8:76:fa:a6:f0:a9:95:8e:55:90:e4:0f:cc:7f:aa:4f:b7:c2:c8:67:75:21:fb:5f:b6:58
+-----BEGIN CERTIFICATE-----
+MIIEDzCCAvegAwIBAgIBADANBgkqhkiG9w0BAQUFADBoMQswCQYDVQQGEwJVUzEl
+MCMGA1UEChMcU3RhcmZpZWxkIFRlY2hub2xvZ2llcywgSW5jLjEyMDAGA1UECxMp
+U3RhcmZpZWxkIENsYXNzIDIgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMDQw
+NjI5MTczOTE2WhcNMzQwNjI5MTczOTE2WjBoMQswCQYDVQQGEwJVUzElMCMGA1UE
+ChMcU3RhcmZpZWxkIFRlY2hub2xvZ2llcywgSW5jLjEyMDAGA1UECxMpU3RhcmZp
+ZWxkIENsYXNzIDIgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwggEgMA0GCSqGSIb3
+DQEBAQUAA4IBDQAwggEIAoIBAQC3Msj+6XGmBIWtDBFk385N78gDGIc/oav7PKaf
+8MOh2tTYbitTkPskpD6E8J7oX+zlJ0T1KKY/e97gKvDIr1MvnsoFAZMej2YcOadN
++lq2cwQlZut3f+dZxkqZJRRU6ybH838Z1TBwj6+wRir/resp7defqgSHo9T5iaU0
+X9tDkYI22WY8sbi5gv2cOj4QyDvvBmVmepsZGD3/cVE8MC5fvj13c7JdBmzDI1aa
+K4UmkhynArPkPw2vCHmCuDY96pzTNbO8acr1zJ3o/WSNF4Azbl5KXZnJHoe0nRrA
+1W4TNSNe35tfPe/W93bC6j67eA0cQmdrBNj41tpvi/JEoAGrAgEDo4HFMIHCMB0G
+A1UdDgQWBBS/X7fRzt0fhvRbVazc1xDCDqmI5zCBkgYDVR0jBIGKMIGHgBS/X7fR
+zt0fhvRbVazc1xDCDqmI56FspGowaDELMAkGA1UEBhMCVVMxJTAjBgNVBAoTHFN0
+YXJmaWVsZCBUZWNobm9sb2dpZXMsIEluYy4xMjAwBgNVBAsTKVN0YXJmaWVsZCBD
+bGFzcyAyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5ggEAMAwGA1UdEwQFMAMBAf8w
+DQYJKoZIhvcNAQEFBQADggEBAAWdP4id0ckaVaGsafPzWdqbAYcaT1epoXkJKtv3
+L7IezMdeatiDh6GX70k1PncGQVhiv45YuApnP+yz3SFmH8lU+nLMPUxA2IGvd56D
+eruix/U0F47ZEUD0/CwqTRV/p2JdLiXTAAsgGh1o+Re49L2L7ShZ3U0WixeDyLJl
+xy16paq8U4Zt3VekyvggQQto8PT7dL5WXXp59fkdheMtlb71cZBDzI0fmgAKhynp
+VSJYACPq4xJDKVtHCN2MQWplBqjlIapBtJUhlbl90TSrE9atvNziPTnNvT51cKEY
+WQPJIrSPnNVeKtelttQKbfi3QBFGmh95DmK/D5fs4C8fF5Q=
+-----END CERTIFICATE-----
+
+# Operating CA: GoDaddy
+# Issuer: O=The Go Daddy Group, Inc. OU=Go Daddy Class 2 Certification Authority
+# Subject: O=The Go Daddy Group, Inc. OU=Go Daddy Class 2 Certification Authority
+# Label: "Go Daddy Class 2 CA"
+# Serial: 0
+# MD5 Fingerprint: 91:de:06:25:ab:da:fd:32:17:0c:bb:25:17:2a:84:67
+# SHA1 Fingerprint: 27:96:ba:e6:3f:18:01:e2:77:26:1b:a0:d7:77:70:02:8f:20:ee:e4
+# SHA256 Fingerprint: c3:84:6b:f2:4b:9e:93:ca:64:27:4c:0e:c6:7c:1e:cc:5e:02:4f:fc:ac:d2:d7:40:19:35:0e:81:fe:54:6a:e4
+-----BEGIN CERTIFICATE-----
+MIIEADCCAuigAwIBAgIBADANBgkqhkiG9w0BAQUFADBjMQswCQYDVQQGEwJVUzEh
+MB8GA1UEChMYVGhlIEdvIERhZGR5IEdyb3VwLCBJbmMuMTEwLwYDVQQLEyhHbyBE
+YWRkeSBDbGFzcyAyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTA0MDYyOTE3
+MDYyMFoXDTM0MDYyOTE3MDYyMFowYzELMAkGA1UEBhMCVVMxITAfBgNVBAoTGFRo
+ZSBHbyBEYWRkeSBHcm91cCwgSW5jLjExMC8GA1UECxMoR28gRGFkZHkgQ2xhc3Mg
+MiBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTCCASAwDQYJKoZIhvcNAQEBBQADggEN
+ADCCAQgCggEBAN6d1+pXGEmhW+vXX0iG6r7d/+TvZxz0ZWizV3GgXne77ZtJ6XCA
+PVYYYwhv2vLM0D9/AlQiVBDYsoHUwHU9S3/Hd8M+eKsaA7Ugay9qK7HFiH7Eux6w
+wdhFJ2+qN1j3hybX2C32qRe3H3I2TqYXP2WYktsqbl2i/ojgC95/5Y0V4evLOtXi
+EqITLdiOr18SPaAIBQi2XKVlOARFmR6jYGB0xUGlcmIbYsUfb18aQr4CUWWoriMY
+avx4A6lNf4DD+qta/KFApMoZFv6yyO9ecw3ud72a9nmYvLEHZ6IVDd2gWMZEewo+
+YihfukEHU1jPEX44dMX4/7VpkI+EdOqXG68CAQOjgcAwgb0wHQYDVR0OBBYEFNLE
+sNKR1EwRcbNhyz2h/t2oatTjMIGNBgNVHSMEgYUwgYKAFNLEsNKR1EwRcbNhyz2h
+/t2oatTjoWekZTBjMQswCQYDVQQGEwJVUzEhMB8GA1UEChMYVGhlIEdvIERhZGR5
+IEdyb3VwLCBJbmMuMTEwLwYDVQQLEyhHbyBEYWRkeSBDbGFzcyAyIENlcnRpZmlj
+YXRpb24gQXV0aG9yaXR5ggEAMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD
+ggEBADJL87LKPpH8EsahB4yOd6AzBhRckB4Y9wimPQoZ+YeAEW5p5JYXMP80kWNy
+OO7MHAGjHZQopDH2esRU1/blMVgDoszOYtuURXO1v0XJJLXVggKtI3lpjbi2Tc7P
+TMozI+gciKqdi0FuFskg5YmezTvacPd+mSYgFFQlq25zheabIZ0KbIIOqPjCDPoQ
+HmyW74cNxA9hi63ugyuV+I6ShHI56yDqg+2DzZduCLzrTia2cyvk0/ZM/iZx4mER
+dEr/VxqHD3VILs9RaRegAhJhldXRQLIQTO7ErBBDpqWeCtWVYpoNz4iCxTIM5Cuf
+ReYNnyicsbkqWletNw+vHX/bvZ8=
+-----END CERTIFICATE-----
+
+# Operating CA: Google Trust Services LLC
+# Issuer: C=US, O=Google Trust Services LLC, CN=GTS Root R1
+# Subject: C=US, O=Google Trust Services LLC, CN=GTS Root R1
+# Label: "GTS Root R1"
+# Serial: 6e:47:a9:c5:4b:47:0c:0d:ec:33:d0:89:b9:1c:f4:e1
+# MD5 Fingerprint: 82:1A:EF:D4:D2:4A:F2:9F:E2:3D:97:06:14:70:72:85
+# SHA1 Fingerprint: E1:C9:50:E6:EF:22:F8:4C:56:45:72:8B:92:20:60:D7:D5:A7:A3:E8
+# SHA256 Fingerprint: 2A:57:54:71:E3:13:40:BC:21:58:1C:BD:2C:F1:3E:15:84:63:20:3E:CE:94:BC:F9:D3:CC:19:6B:F0:9A:54:72
+-----BEGIN CERTIFICATE-----
+MIIFWjCCA0KgAwIBAgIQbkepxUtHDA3sM9CJuRz04TANBgkqhkiG9w0BAQwFADBH
+MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM
+QzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIy
+MDAwMDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNl
+cnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwggIiMA0GCSqGSIb3DQEB
+AQUAA4ICDwAwggIKAoICAQC2EQKLHuOhd5s73L+UPreVp0A8of2C+X0yBoJx9vaM
+f/vo27xqLpeXo4xL+Sv2sfnOhB2x+cWX3u+58qPpvBKJXqeqUqv4IyfLpLGcY9vX
+mX7wCl7raKb0xlpHDU0QM+NOsROjyBhsS+z8CZDfnWQpJSMHobTSPS5g4M/SCYe7
+zUjwTcLCeoiKu7rPWRnWr4+wB7CeMfGCwcDfLqZtbBkOtdh+JhpFAz2weaSUKK0P
+fyblqAj+lug8aJRT7oM6iCsVlgmy4HqMLnXWnOunVmSPlk9orj2XwoSPwLxAwAtc
+vfaHszVsrBhQf4TgTM2S0yDpM7xSma8ytSmzJSq0SPly4cpk9+aCEI3oncKKiPo4
+Zor8Y/kB+Xj9e1x3+naH+uzfsQ55lVe0vSbv1gHR6xYKu44LtcXFilWr06zqkUsp
+zBmkMiVOKvFlRNACzqrOSbTqn3yDsEB750Orp2yjj32JgfpMpf/VjsPOS+C12LOO
+Rc92wO1AK/1TD7Cn1TsNsYqiA94xrcx36m97PtbfkSIS5r762DL8EGMUUXLeXdYW
+k70paDPvOmbsB4om3xPXV2V4J95eSRQAogB/mqghtqmxlbCluQ0WEdrHbEg8QOB+
+DVrNVjzRlwW5y0vtOUucxD/SVRNuJLDWcfr0wbrM7Rv1/oFB2ACYPTrIrnqYNxgF
+lQIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNV
+HQ4EFgQU5K8rJnEaK0gnhS9SZizv8IkTcT4wDQYJKoZIhvcNAQEMBQADggIBADiW
+Cu49tJYeX++dnAsznyvgyv3SjgofQXSlfKqE1OXyHuY3UjKcC9FhHb8owbZEKTV1
+d5iyfNm9dKyKaOOpMQkpAWBz40d8U6iQSifvS9efk+eCNs6aaAyC58/UEBZvXw6Z
+XPYfcX3v73svfuo21pdwCxXu11xWajOl40k4DLh9+42FpLFZXvRq4d2h9mREruZR
+gyFmxhE+885H7pwoHyXa/6xmld01D1zvICxi/ZG6qcz8WpyTgYMpl0p8WnK0OdC3
+d8t5/Wk6kjftbjhlRn7pYL15iJdfOBL07q9bgsiG1eGZbYwE8na6SfZu6W0eX6Dv
+J4J2QPim01hcDyxC2kLGe4g0x8HYRZvBPsVhHdljUEn2NIVq4BjFbkerQUIpm/Zg
+DdIx02OYI5NaAIFItO/Nis3Jz5nu2Z6qNuFoS3FJFDYoOj0dzpqPJeaAcWErtXvM
++SUWgeExX6GjfhaknBZqlxi9dnKlC54dNuYvoS++cJEPqOba+MSSQGwlfnuzCdyy
+F62ARPBopY+Udf90WuioAnwMCeKpSwughQtiue+hMZL77/ZRBIls6Kl0obsXs7X9
+SQ98POyDGCBDTtWTurQ0sR8WNh8M5mQ5Fkzc4P4dyKliPUDqysU0ArSuiYgzNdws
+E3PYJ/HQcu51OyLemGhmW/HGY0dVHLqlCFF1pkgl
+-----END CERTIFICATE-----
+
+# Operating CA: Google Trust Services LLC
+# Issuer: C=US, O=Google Trust Services LLC, CN=GTS Root R2
+# Subject: C=US, O=Google Trust Services LLC, CN=GTS Root R2
+# Label: "GTS Root R2"
+# Serial: 6e:47:a9:c6:5a:b3:e7:20:c5:30:9a:3f:68:52:f2:6f
+# MD5 Fingerprint: 44:ED:9A:0E:A4:09:3B:00:F2:AE:4C:A3:C6:61:B0:8B
+# SHA1 Fingerprint: D2:73:96:2A:2A:5E:39:9F:73:3F:E1:C7:1E:64:3F:03:38:34:FC:4D
+# SHA256 Fingerprint: C4:5D:7B:B0:8E:6D:67:E6:2E:42:35:11:0B:56:4E:5F:78:FD:92:EF:05:8C:84:0A:EA:4E:64:55:D7:58:5C:60
+-----BEGIN CERTIFICATE-----
+MIIFWjCCA0KgAwIBAgIQbkepxlqz5yDFMJo/aFLybzANBgkqhkiG9w0BAQwFADBH
+MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM
+QzEUMBIGA1UEAxMLR1RTIFJvb3QgUjIwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIy
+MDAwMDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNl
+cnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjIwggIiMA0GCSqGSIb3DQEB
+AQUAA4ICDwAwggIKAoICAQDO3v2m++zsFDQ8BwZabFn3GTXd98GdVarTzTukk3Lv
+CvptnfbwhYBboUhSnznFt+4orO/LdmgUud+tAWyZH8QiHZ/+cnfgLFuv5AS/T3Kg
+GjSY6Dlo7JUle3ah5mm5hRm9iYz+re026nO8/4Piy33B0s5Ks40FnotJk9/BW9Bu
+XvAuMC6C/Pq8tBcKSOWIm8Wba96wyrQD8Nr0kLhlZPdcTK3ofmZemde4wj7I0BOd
+re7kRXuJVfeKH2JShBKzwkCX44ofR5GmdFrS+LFjKBC4swm4VndAoiaYecb+3yXu
+PuWgf9RhD1FLPD+M2uFwdNjCaKH5wQzpoeJ/u1U8dgbuak7MkogwTZq9TwtImoS1
+mKPV+3PBV2HdKFZ1E66HjucMUQkQdYhMvI35ezzUIkgfKtzra7tEscszcTJGr61K
+8YzodDqs5xoic4DSMPclQsciOzsSrZYuxsN2B6ogtzVJV+mSSeh2FnIxZyuWfoqj
+x5RWIr9qS34BIbIjMt/kmkRtWVtd9QCgHJvGeJeNkP+byKq0rxFROV7Z+2et1VsR
+nTKaG73VululycslaVNVJ1zgyjbLiGH7HrfQy+4W+9OmTN6SpdTi3/UGVN4unUu0
+kzCqgc7dGtxRcw1PcOnlthYhGXmy5okLdWTK1au8CcEYof/UVKGFPP0UJAOyh9Ok
+twIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNV
+HQ4EFgQUu//KjiOfT5nK2+JopqUVJxce2Q4wDQYJKoZIhvcNAQEMBQADggIBALZp
+8KZ3/p7uC4Gt4cCpx/k1HUCCq+YEtN/L9x0Pg/B+E02NjO7jMyLDOfxA325BS0JT
+vhaI8dI4XsRomRyYUpOM52jtG2pzegVATX9lO9ZY8c6DR2Dj/5epnGB3GFW1fgiT
+z9D2PGcDFWEJ+YF59exTpJ/JjwGLc8R3dtyDovUMSRqodt6Sm2T4syzFJ9MHwAiA
+pJiS4wGWAqoC7o87xdFtCjMwc3i5T1QWvwsHoaRc5svJXISPD+AVdyx+Jn7axEvb
+pxZ3B7DNdehyQtaVhJ2Gg/LkkM0JR9SLA3DaWsYDQvTtN6LwG1BUSw7YhN4ZKJmB
+R64JGz9I0cNv4rBgF/XuIwKl2gBbbZCr7qLpGzvpx0QnRY5rn/WkhLx3+WuXrD5R
+RaIRpsyF7gpo8j5QOHokYh4XIDdtak23CZvJ/KRY9bb7nE4Yu5UC56GtmwfuNmsk
+0jmGwZODUNKBRqhfYlcsu2xkiAhu7xNUX90txGdj08+JN7+dIPT7eoOboB6BAFDC
+5AwiWVIQ7UNWhwD4FFKnHYuTjKJNRn8nxnGbJN7k2oaLDX5rIMHAnuFl2GqjpuiF
+izoHCBy69Y9Vmhh1fuXsgWbRIXOhNUQLgD1bnF5vKheW0YMjiGZt5obicDIvUiLn
+yOd/xCxgXS/Dr55FBcOEArf9LAhST4Ldo/DUhgkC
+-----END CERTIFICATE-----
+
+# Operating CA: Google Trust Services LLC
+# Issuer: C=US, O=Google Trust Services LLC, CN=GTS Root R3
+# Subject: C=US, O=Google Trust Services LLC, CN=GTS Root R3
+# Label: "GTS Root R3"
+# Serial: 6e:47:a9:c7:6c:a9:73:24:40:89:0f:03:55:dd:8d:1d
+# MD5 Fingerprint: 1A:79:5B:6B:04:52:9C:5D:C7:74:33:1B:25:9A:F9:25
+# SHA1 Fingerprint: 30:D4:24:6F:07:FF:DB:91:89:8A:0B:E9:49:66:11:EB:8C:5E:46:E5
+# SHA256 Fingerprint: 15:D5:B8:77:46:19:EA:7D:54:CE:1C:A6:D0:B0:C4:03:E0:37:A9:17:F1:31:E8:A0:4E:1E:6B:7A:71:BA:BC:E5
+-----BEGIN CERTIFICATE-----
+MIICDDCCAZGgAwIBAgIQbkepx2ypcyRAiQ8DVd2NHTAKBggqhkjOPQQDAzBHMQsw
+CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU
+MBIGA1UEAxMLR1RTIFJvb3QgUjMwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw
+MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp
+Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjMwdjAQBgcqhkjOPQIBBgUrgQQA
+IgNiAAQfTzOHMymKoYTey8chWEGJ6ladK0uFxh1MJ7x/JlFyb+Kf1qPKzEUURout
+736GjOyxfi//qXGdGIRFBEFVbivqJn+7kAHjSxm65FSWRQmx1WyRRK2EE46ajA2A
+DDL24CejQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1Ud
+DgQWBBTB8Sa6oC2uhYHP0/EqEr24Cmf9vDAKBggqhkjOPQQDAwNpADBmAjEAgFuk
+fCPAlaUs3L6JbyO5o91lAFJekazInXJ0glMLfalAvWhgxeG4VDvBNhcl2MG9AjEA
+njWSdIUlUfUk7GRSJFClH9voy8l27OyCbvWFGFPouOOaKaqW04MjyaR7YbPMAuhd
+-----END CERTIFICATE-----
+
+# Operating CA: Google Trust Services LLC
+# Issuer: C=US, O=Google Trust Services LLC, CN=GTS Root R4
+# Subject: C=US, O=Google Trust Services LLC, CN=GTS Root R4
+# Label: "GTS Root R4"
+# Serial: 6e:47:a9:c8:8b:94:b6:e8:bb:3b:2a:d8:a2:b2:c1:99
+# MD5 Fingerprint: 5D:B6:6A:C4:60:17:24:6A:1A:99:A8:4B:EE:5E:B4:26
+# SHA1 Fingerprint: 2A:1D:60:27:D9:4A:B1:0A:1C:4D:91:5C:CD:33:A0:CB:3E:2D:54:CB
+# SHA256 Fingerprint: 71:CC:A5:39:1F:9E:79:4B:04:80:25:30:B3:63:E1:21:DA:8A:30:43:BB:26:66:2F:EA:4D:CA:7F:C9:51:A4:BD
+-----BEGIN CERTIFICATE-----
+MIICCjCCAZGgAwIBAgIQbkepyIuUtui7OyrYorLBmTAKBggqhkjOPQQDAzBHMQsw
+CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU
+MBIGA1UEAxMLR1RTIFJvb3QgUjQwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw
+MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp
+Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjQwdjAQBgcqhkjOPQIBBgUrgQQA
+IgNiAATzdHOnaItgrkO4NcWBMHtLSZ37wWHO5t5GvWvVYRg1rkDdc/eJkTBa6zzu
+hXyiQHY7qca4R9gq55KRanPpsXI5nymfopjTX15YhmUPoYRlBtHci8nHc8iMai/l
+xKvRHYqjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1Ud
+DgQWBBSATNbrdP9JNqPV2Py1PsVq8JQdjDAKBggqhkjOPQQDAwNnADBkAjBqUFJ0
+CMRw3J5QdCHojXohw0+WbhXRIjVhLfoIN+4Zba3bssx9BzT1YBkstTTZbyACMANx
+sbqjYAuG7ZoIapVon+Kz4ZNkfF6Tpt95LY2F45TPI11xzPKwTdb+mciUqXWi4w==
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (GeoTrust)
+# Issuer: CN=GeoTrust Global CA O=GeoTrust Inc.
+# Subject: CN=GeoTrust Global CA O=GeoTrust Inc.
+# Label: "GeoTrust Global CA"
+# Serial: 144470
+# MD5 Fingerprint: f7:75:ab:29:fb:51:4e:b7:77:5e:ff:05:3c:99:8e:f5
+# SHA1 Fingerprint: de:28:f4:a4:ff:e5:b9:2f:a3:c5:03:d1:a3:49:a7:f9:96:2a:82:12
+# SHA256 Fingerprint: ff:85:6a:2d:25:1d:cd:88:d3:66:56:f4:50:12:67:98:cf:ab:aa:de:40:79:9c:72:2d:e4:d2:b5:db:36:a7:3a
+-----BEGIN CERTIFICATE-----
+MIIDVDCCAjygAwIBAgIDAjRWMA0GCSqGSIb3DQEBBQUAMEIxCzAJBgNVBAYTAlVT
+MRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMRswGQYDVQQDExJHZW9UcnVzdCBHbG9i
+YWwgQ0EwHhcNMDIwNTIxMDQwMDAwWhcNMjIwNTIxMDQwMDAwWjBCMQswCQYDVQQG
+EwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMSR2VvVHJ1c3Qg
+R2xvYmFsIENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2swYYzD9
+9BcjGlZ+W988bDjkcbd4kdS8odhM+KhDtgPpTSEHCIjaWC9mOSm9BXiLnTjoBbdq
+fnGk5sRgprDvgOSJKA+eJdbtg/OtppHHmMlCGDUUna2YRpIuT8rxh0PBFpVXLVDv
+iS2Aelet8u5fa9IAjbkU+BQVNdnARqN7csiRv8lVK83Qlz6cJmTM386DGXHKTubU
+1XupGc1V3sjs0l44U+VcT4wt/lAjNvxm5suOpDkZALeVAjmRCw7+OC7RHQWa9k0+
+bw8HHa8sHo9gOeL6NlMTOdReJivbPagUvTLrGAMoUgRx5aszPeE4uwc2hGKceeoW
+MPRfwCvocWvk+QIDAQABo1MwUTAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTA
+ephojYn7qwVkDBF9qn1luMrMTjAfBgNVHSMEGDAWgBTAephojYn7qwVkDBF9qn1l
+uMrMTjANBgkqhkiG9w0BAQUFAAOCAQEANeMpauUvXVSOKVCUn5kaFOSPeCpilKIn
+Z57QzxpeR+nBsqTP3UEaBU6bS+5Kb1VSsyShNwrrZHYqLizz/Tt1kL/6cdjHPTfS
+tQWVYrmm3ok9Nns4d0iXrKYgjy6myQzCsplFAMfOEVEiIuCl6rYVSAlk6l5PdPcF
+PseKUgzbFbS9bZvlxrFUaKnjaZC2mqUPuLk/IH2uSrW4nOQdtqvmlKXBx4Ot2/Un
+hw4EbNX/3aBd7YdStysVAq45pmp06drE57xNNB6pXE0zX5IJL4hmXXeXxx12E6nV
+5fEWCRE11azbJHFwLJhWC9kXtNHjUStedejV0NxPNO3CBWaAocvmMw==
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (GeoTrust)
+# Issuer: CN=GeoTrust Global CA 2 O=GeoTrust Inc.
+# Subject: CN=GeoTrust Global CA 2 O=GeoTrust Inc.
+# Label: "GeoTrust Global CA 2"
+# Serial: 1
+# MD5 Fingerprint: 0e:40:a7:6c:de:03:5d:8f:d1:0f:e4:d1:8d:f9:6c:a9
+# SHA1 Fingerprint: a9:e9:78:08:14:37:58:88:f2:05:19:b0:6d:2b:0d:2b:60:16:90:7d
+# SHA256 Fingerprint: ca:2d:82:a0:86:77:07:2f:8a:b6:76:4f:f0:35:67:6c:fe:3e:5e:32:5e:01:21:72:df:3f:92:09:6d:b7:9b:85
+-----BEGIN CERTIFICATE-----
+MIIDZjCCAk6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBEMQswCQYDVQQGEwJVUzEW
+MBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEdMBsGA1UEAxMUR2VvVHJ1c3QgR2xvYmFs
+IENBIDIwHhcNMDQwMzA0MDUwMDAwWhcNMTkwMzA0MDUwMDAwWjBEMQswCQYDVQQG
+EwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEdMBsGA1UEAxMUR2VvVHJ1c3Qg
+R2xvYmFsIENBIDIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDvPE1A
+PRDfO1MA4Wf+lGAVPoWI8YkNkMgoI5kF6CsgncbzYEbYwbLVjDHZ3CB5JIG/NTL8
+Y2nbsSpr7iFY8gjpeMtvy/wWUsiRxP89c96xPqfCfWbB9X5SJBri1WeR0IIQ13hL
+TytCOb1kLUCgsBDTOEhGiKEMuzozKmKY+wCdE1l/bztyqu6mD4b5BWHqZ38MN5aL
+5mkWRxHCJ1kDs6ZgwiFAVvqgx306E+PsV8ez1q6diYD3Aecs9pYrEw15LNnA5IZ7
+S4wMcoKK+xfNAGw6EzywhIdLFnopsk/bHdQL82Y3vdj2V7teJHq4PIu5+pIaGoSe
+2HSPqht/XvT+RSIhAgMBAAGjYzBhMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYE
+FHE4NvICMVNHK266ZUapEBVYIAUJMB8GA1UdIwQYMBaAFHE4NvICMVNHK266ZUap
+EBVYIAUJMA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQUFAAOCAQEAA/e1K6td
+EPx7srJerJsOflN4WT5CBP51o62sgU7XAotexC3IUnbHLB/8gTKY0UvGkpMzNTEv
+/NgdRN3ggX+d6YvhZJFiCzkIjKx0nVnZellSlxG5FntvRdOW2TF9AjYPnDtuzywN
+A0ZF66D0f0hExghAzN4bcLUprbqLOzRldRtxIR0sFAqwlpW41uryZfspuk/qkZN0
+abby/+Ea0AzRdoXLiiW9l14sbxWZJue2Kf8i7MkCx1YAzUm5s2x7UwQa4qjJqhIF
+I8LO57sEAszAR6LkxCkvW0VXiVHuPOtSCP8HNR6fNWpHSlaY0VqFH4z1Ir+rzoPz
+4iIprn2DQKi6bA==
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (GeoTrust)
+# Issuer: CN=GeoTrust Primary Certification Authority O=GeoTrust Inc.
+# Subject: CN=GeoTrust Primary Certification Authority O=GeoTrust Inc.
+# Label: "GeoTrust Primary Certification Authority"
+# Serial: 32798226551256963324313806436981982369
+# MD5 Fingerprint: 02:26:c3:01:5e:08:30:37:43:a9:d0:7d:cf:37:e6:bf
+# SHA1 Fingerprint: 32:3c:11:8e:1b:f7:b8:b6:52:54:e2:e2:10:0d:d6:02:90:37:f0:96
+# SHA256 Fingerprint: 37:d5:10:06:c5:12:ea:ab:62:64:21:f1:ec:8c:92:01:3f:c5:f8:2a:e9:8e:e5:33:eb:46:19:b8:de:b4:d0:6c
+-----BEGIN CERTIFICATE-----
+MIIDfDCCAmSgAwIBAgIQGKy1av1pthU6Y2yv2vrEoTANBgkqhkiG9w0BAQUFADBY
+MQswCQYDVQQGEwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjExMC8GA1UEAxMo
+R2VvVHJ1c3QgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0wNjEx
+MjcwMDAwMDBaFw0zNjA3MTYyMzU5NTlaMFgxCzAJBgNVBAYTAlVTMRYwFAYDVQQK
+Ew1HZW9UcnVzdCBJbmMuMTEwLwYDVQQDEyhHZW9UcnVzdCBQcmltYXJ5IENlcnRp
+ZmljYXRpb24gQXV0aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAvrgVe//UfH1nrYNke8hCUy3f9oQIIGHWAVlqnEQRr+92/ZV+zmEwu3qDXwK9
+AWbK7hWNb6EwnL2hhZ6UOvNWiAAxz9juapYC2e0DjPt1befquFUWBRaa9OBesYjA
+ZIVcFU2Ix7e64HXprQU9nceJSOC7KMgD4TCTZF5SwFlwIjVXiIrxlQqD17wxcwE0
+7e9GceBrAqg1cmuXm2bgyxx5X9gaBGgeRwLmnWDiNpcB3841kt++Z8dtd1k7j53W
+kBWUvEI0EME5+bEnPn7WinXFsq+W06Lem+SYvn3h6YGttm/81w7a4DSwDRp35+MI
+mO9Y+pyEtzavwt+s0vQQBnBxNQIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/MA4G
+A1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQULNVQQZcVi/CPNmFbSvtr2ZnJM5IwDQYJ
+KoZIhvcNAQEFBQADggEBAFpwfyzdtzRP9YZRqSa+S7iq8XEN3GHHoOo0Hnp3DwQ1
+6CePbJC/kRYkRj5KTs4rFtULUh38H2eiAkUxT87z+gOneZ1TatnaYzr4gNfTmeGl
+4b7UVXGYNTq+k+qurUKykG/g/CFNNWMziUnWm07Kx+dOCQD32sfvmWKZd7aVIl6K
+oKv0uHiYyjgZmclynnjNS6yvGaBzEi38wkG6gZHaFloxt/m0cYASSJlyc1pZU8Fj
+UjPtp8nSOQJw+uCxQmYpqptR7TBUIhRf2asdweSU8Pj1K/fqynhG1riR/aYNKxoU
+AT6A8EKglQdebc3MS6RFjasS6LPeWuWgfOgPIh1a6Vk=
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (GeoTrust)
+# Issuer: CN=GeoTrust Primary Certification Authority - G2 O=GeoTrust Inc. OU=(c) 2007 GeoTrust Inc. - For authorized use only
+# Subject: CN=GeoTrust Primary Certification Authority - G2 O=GeoTrust Inc. OU=(c) 2007 GeoTrust Inc. - For authorized use only
+# Label: "GeoTrust Primary Certification Authority - G2"
+# Serial: 80682863203381065782177908751794619243
+# MD5 Fingerprint: 01:5e:d8:6b:bd:6f:3d:8e:a1:31:f8:12:e0:98:73:6a
+# SHA1 Fingerprint: 8d:17:84:d5:37:f3:03:7d:ec:70:fe:57:8b:51:9a:99:e6:10:d7:b0
+# SHA256 Fingerprint: 5e:db:7a:c4:3b:82:a0:6a:87:61:e8:d7:be:49:79:eb:f2:61:1f:7d:d7:9b:f9:1c:1c:6b:56:6a:21:9e:d7:66
+-----BEGIN CERTIFICATE-----
+MIICrjCCAjWgAwIBAgIQPLL0SAoA4v7rJDteYD7DazAKBggqhkjOPQQDAzCBmDEL
+MAkGA1UEBhMCVVMxFjAUBgNVBAoTDUdlb1RydXN0IEluYy4xOTA3BgNVBAsTMChj
+KSAyMDA3IEdlb1RydXN0IEluYy4gLSBGb3IgYXV0aG9yaXplZCB1c2Ugb25seTE2
+MDQGA1UEAxMtR2VvVHJ1c3QgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0
+eSAtIEcyMB4XDTA3MTEwNTAwMDAwMFoXDTM4MDExODIzNTk1OVowgZgxCzAJBgNV
+BAYTAlVTMRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMTkwNwYDVQQLEzAoYykgMjAw
+NyBHZW9UcnVzdCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxNjA0BgNV
+BAMTLUdlb1RydXN0IFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkgLSBH
+MjB2MBAGByqGSM49AgEGBSuBBAAiA2IABBWx6P0DFUPlrOuHNxFi79KDNlJ9RVcL
+So17VDs6bl8VAsBQps8lL33KSLjHUGMcKiEIfJo22Av+0SbFWDEwKCXzXV2juLal
+tJLtbCyf691DiaI8S0iRHVDsJt/WYC69IaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAO
+BgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFBVfNVdRVfslsq0DafwBo/q+EVXVMAoG
+CCqGSM49BAMDA2cAMGQCMGSWWaboCd6LuvpaiIjwH5HTRqjySkwCY/tsXzjbLkGT
+qQ7mndwxHLKgpxgceeHHNgIwOlavmnRs9vuD4DPTCF+hnMJbn0bWtsuRBmOiBucz
+rD6ogRLQy7rQkgu2npaqBA+K
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (GeoTrust)
+# Issuer: CN=GeoTrust Primary Certification Authority - G3 O=GeoTrust Inc. OU=(c) 2008 GeoTrust Inc. - For authorized use only
+# Subject: CN=GeoTrust Primary Certification Authority - G3 O=GeoTrust Inc. OU=(c) 2008 GeoTrust Inc. - For authorized use only
+# Label: "GeoTrust Primary Certification Authority - G3"
+# Serial: 28809105769928564313984085209975885599
+# MD5 Fingerprint: b5:e8:34:36:c9:10:44:58:48:70:6d:2e:83:d4:b8:05
+# SHA1 Fingerprint: 03:9e:ed:b8:0b:e7:a0:3c:69:53:89:3b:20:d2:d9:32:3a:4c:2a:fd
+# SHA256 Fingerprint: b4:78:b8:12:25:0d:f8:78:63:5c:2a:a7:ec:7d:15:5e:aa:62:5e:e8:29:16:e2:cd:29:43:61:88:6c:d1:fb:d4
+-----BEGIN CERTIFICATE-----
+MIID/jCCAuagAwIBAgIQFaxulBmyeUtB9iepwxgPHzANBgkqhkiG9w0BAQsFADCB
+mDELMAkGA1UEBhMCVVMxFjAUBgNVBAoTDUdlb1RydXN0IEluYy4xOTA3BgNVBAsT
+MChjKSAyMDA4IEdlb1RydXN0IEluYy4gLSBGb3IgYXV0aG9yaXplZCB1c2Ugb25s
+eTE2MDQGA1UEAxMtR2VvVHJ1c3QgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhv
+cml0eSAtIEczMB4XDTA4MDQwMjAwMDAwMFoXDTM3MTIwMTIzNTk1OVowgZgxCzAJ
+BgNVBAYTAlVTMRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMTkwNwYDVQQLEzAoYykg
+MjAwOCBHZW9UcnVzdCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxNjA0
+BgNVBAMTLUdlb1RydXN0IFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkg
+LSBHMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANziXmJYHTNXOTIz
++uvLh4yn1ErdBojqZI4xmKU4kB6Yzy5jK/BGvESyiaHAKAxJcCGVn2TAppMSAmUm
+hsalifD614SgcK9PGpc/BkTVyetyEH3kMSj7HGHmKAdEc5IiaacDiGydY8hS2pgn
+5whMcD60yRLBxWeDXTPzAxHsatBT4tG6NmCUgLthY2xbF37fQJQeqw3CIShwiP/W
+JmxsYAQlTlV+fe+/lEjetx3dcI0FX4ilm/LC7urRQEFtYjgdVgbFA0dRIBn8exAL
+DmKudlW/X3e+PkkBUz2YJQN2JFodtNuJ6nnltrM7P7pMKEF/BqxqjsHQ9gUdfeZC
+huOl1UcCAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYw
+HQYDVR0OBBYEFMR5yo6hTgMdHNxr2zFblD4/MH8tMA0GCSqGSIb3DQEBCwUAA4IB
+AQAtxRPPVoB7eni9n64smefv2t+UXglpp+duaIy9cr5HqQ6XErhK8WTTOd8lNNTB
+zU6B8A8ExCSzNJbGpqow32hhc9f5joWJ7w5elShKKiePEI4ufIbEAp7aDHdlDkQN
+kv39sxY2+hENHYwOB4lqKVb3cvTdFZx3NWZXqxNT2I7BQMXXExZacse3aQHEerGD
+AWh9jUGhlBjBJVz88P6DAod8DQ3PLghcSkANPuyBYeYk28rgDi0Hsj5W3I31QYUH
+SJsMC8tJP33st/3LjWeJGqvtux6jAAgIFyqCXDFdRootD4abdNlF+9RAsXqqaC2G
+spki4cErx5z481+oghLrGREt
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (GeoTrust)
+# Issuer: CN=GeoTrust Universal CA O=GeoTrust Inc.
+# Subject: CN=GeoTrust Universal CA O=GeoTrust Inc.
+# Label: "GeoTrust Universal CA"
+# Serial: 1
+# MD5 Fingerprint: 92:65:58:8b:a2:1a:31:72:73:68:5c:b4:a5:7a:07:48
+# SHA1 Fingerprint: e6:21:f3:35:43:79:05:9a:4b:68:30:9d:8a:2f:74:22:15:87:ec:79
+# SHA256 Fingerprint: a0:45:9b:9f:63:b2:25:59:f5:fa:5d:4c:6d:b3:f9:f7:2f:f1:93:42:03:35:78:f0:73:bf:1d:1b:46:cb:b9:12
+-----BEGIN CERTIFICATE-----
+MIIFaDCCA1CgAwIBAgIBATANBgkqhkiG9w0BAQUFADBFMQswCQYDVQQGEwJVUzEW
+MBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEeMBwGA1UEAxMVR2VvVHJ1c3QgVW5pdmVy
+c2FsIENBMB4XDTA0MDMwNDA1MDAwMFoXDTI5MDMwNDA1MDAwMFowRTELMAkGA1UE
+BhMCVVMxFjAUBgNVBAoTDUdlb1RydXN0IEluYy4xHjAcBgNVBAMTFUdlb1RydXN0
+IFVuaXZlcnNhbCBDQTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAKYV
+VaCjxuAfjJ0hUNfBvitbtaSeodlyWL0AG0y/YckUHUWCq8YdgNY96xCcOq9tJPi8
+cQGeBvV8Xx7BDlXKg5pZMK4ZyzBIle0iN430SppyZj6tlcDgFgDgEB8rMQ7XlFTT
+QjOgNB0eRXbdT8oYN+yFFXoZCPzVx5zw8qkuEKmS5j1YPakWaDwvdSEYfyh3peFh
+F7em6fgemdtzbvQKoiFs7tqqhZJmr/Z6a4LauiIINQ/PQvE1+mrufislzDoR5G2v
+c7J2Ha3QsnhnGqQ5HFELZ1aD/ThdDc7d8Lsrlh/eezJS/R27tQahsiFepdaVaH/w
+mZ7cRQg+59IJDTWU3YBOU5fXtQlEIGQWFwMCTFMNaN7VqnJNk22CDtucvc+081xd
+VHppCZbW2xHBjXWotM85yM48vCR85mLK4b19p71XZQvk/iXttmkQ3CgaRr0BHdCX
+teGYO8A3ZNY9lO4L4fUorgtWv3GLIylBjobFS1J72HGrH4oVpjuDWtdYAVHGTEHZ
+f9hBZ3KiKN9gg6meyHv8U3NyWfWTehd2Ds735VzZC1U0oqpbtWpU5xPKV+yXbfRe
+Bi9Fi1jUIxaS5BZuKGNZMN9QAZxjiRqf2xeUgnA3wySemkfWWspOqGmJch+RbNt+
+nhutxx9z3SxPGWX9f5NAEC7S8O08ni4oPmkmM8V7AgMBAAGjYzBhMA8GA1UdEwEB
+/wQFMAMBAf8wHQYDVR0OBBYEFNq7LqqwDLiIJlF0XG0D08DYj3rWMB8GA1UdIwQY
+MBaAFNq7LqqwDLiIJlF0XG0D08DYj3rWMA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG
+9w0BAQUFAAOCAgEAMXjmx7XfuJRAyXHEqDXsRh3ChfMoWIawC/yOsjmPRFWrZIRc
+aanQmjg8+uUfNeVE44B5lGiku8SfPeE0zTBGi1QrlaXv9z+ZhP015s8xxtxqv6fX
+IwjhmF7DWgh2qaavdy+3YL1ERmrvl/9zlcGO6JP7/TG37FcREUWbMPEaiDnBTzyn
+ANXH/KttgCJwpQzgXQQpAvvLoJHRfNbDflDVnVi+QTjruXU8FdmbyUqDWcDaU/0z
+uzYYm4UPFd3uLax2k7nZAY1IEKj79TiG8dsKxr2EoyNB3tZ3b4XUhRxQ4K5RirqN
+Pnbiucon8l+f725ZDQbYKxek0nxru18UGkiPGkzns0ccjkxFKyDuSN/n3QmOGKja
+QI2SJhFTYXNd673nxE0pN2HrrDktZy4W1vUAg4WhzH92xH3kt0tm7wNFYGm2DFKW
+koRepqO1pD4r2czYG0eq8kTaT/kD6PAUyz/zg97QwVTjt+gKN02LIFkDMBmhLMi9
+ER/frslKxfMnZmaGrGiR/9nmUxwPi1xpZQomyB40w11Re9epnAahNt3ViZS82eQt
+DF4JbAiXfKM9fJP/P6EUp8+1Xevb2xzEdt+Iub1FBZUbrvxGakyvSOPOrg/Sfuvm
+bJxPgWp6ZKy7PtXny3YuxadIwVyQD8vIP/rmMuGNG2+k5o7Y+SlIis5z/iw=
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (GeoTrust)
+# Issuer: CN=GeoTrust Universal CA 2 O=GeoTrust Inc.
+# Subject: CN=GeoTrust Universal CA 2 O=GeoTrust Inc.
+# Label: "GeoTrust Universal CA 2"
+# Serial: 1
+# MD5 Fingerprint: 34:fc:b8:d0:36:db:9e:14:b3:c2:f2:db:8f:e4:94:c7
+# SHA1 Fingerprint: 37:9a:19:7b:41:85:45:35:0c:a6:03:69:f3:3c:2e:af:47:4f:20:79
+# SHA256 Fingerprint: a0:23:4f:3b:c8:52:7c:a5:62:8e:ec:81:ad:5d:69:89:5d:a5:68:0d:c9:1d:1c:b8:47:7f:33:f8:78:b9:5b:0b
+-----BEGIN CERTIFICATE-----
+MIIFbDCCA1SgAwIBAgIBATANBgkqhkiG9w0BAQUFADBHMQswCQYDVQQGEwJVUzEW
+MBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEgMB4GA1UEAxMXR2VvVHJ1c3QgVW5pdmVy
+c2FsIENBIDIwHhcNMDQwMzA0MDUwMDAwWhcNMjkwMzA0MDUwMDAwWjBHMQswCQYD
+VQQGEwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEgMB4GA1UEAxMXR2VvVHJ1
+c3QgVW5pdmVyc2FsIENBIDIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoIC
+AQCzVFLByT7y2dyxUxpZKeexw0Uo5dfR7cXFS6GqdHtXr0om/Nj1XqduGdt0DE81
+WzILAePb63p3NeqqWuDW6KFXlPCQo3RWlEQwAx5cTiuFJnSCegx2oG9NzkEtoBUG
+FF+3Qs17j1hhNNwqCPkuwwGmIkQcTAeC5lvO0Ep8BNMZcyfwqph/Lq9O64ceJHdq
+XbboW0W63MOhBW9Wjo8QJqVJwy7XQYci4E+GymC16qFjwAGXEHm9ADwSbSsVsaxL
+se4YuU6W3Nx2/zu+z18DwPw76L5GG//aQMJS9/7jOvdqdzXQ2o3rXhhqMcceujwb
+KNZrVMaqW9eiLBsZzKIC9ptZvTdrhrVtgrrY6slWvKk2WP0+GfPtDCapkzj4T8Fd
+IgbQl+rhrcZV4IErKIM6+vR7IVEAvlI4zs1meaj0gVbi0IMJR1FbUGrP20gaXT73
+y/Zl92zxlfgCOzJWgjl6W70viRu/obTo/3+NjN8D8WBOWBFM66M/ECuDmgFz2ZRt
+hAAnZqzwcEAJQpKtT5MNYQlRJNiS1QuUYbKHsu3/mjX/hVTK7URDrBs8FmtISgoc
+QIgfksILAAX/8sgCSqSqqcyZlpwvWOB94b67B9xfBHJcMTTD7F8t4D1kkCLm0ey4
+Lt1ZrtmhN79UNdxzMk+MBB4zsslG8dhcyFVQyWi9qLo2CQIDAQABo2MwYTAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR281Xh+qQ2+/CfXGJx7Tz0RzgQKzAfBgNV
+HSMEGDAWgBR281Xh+qQ2+/CfXGJx7Tz0RzgQKzAOBgNVHQ8BAf8EBAMCAYYwDQYJ
+KoZIhvcNAQEFBQADggIBAGbBxiPz2eAubl/oz66wsCVNK/g7WJtAJDday6sWSf+z
+dXkzoS9tcBc0kf5nfo/sm+VegqlVHy/c1FEHEv6sFj4sNcZj/NwQ6w2jqtB8zNHQ
+L1EuxBRa3ugZ4T7GzKQp5y6EqgYweHZUcyiYWTjgAA1i00J9IZ+uPTqM1fp3DRgr
+Fg5fNuH8KrUwJM/gYwx7WBr+mbpCErGR9Hxo4sjoryzqyX6uuyo9DRXcNJW2GHSo
+ag/HtPQTxORb7QrSpJdMKu0vbBKJPfEncKpqA1Ihn0CoZ1Dy81of398j9tx4TuaY
+T1U6U+Pv8vSfx3zYWK8pIpe44L2RLrB27FcRz+8pRPPphXpgY+RdM4kX2TGq2tbz
+GDVyz4crL2MjhF2EjD9XoIj8mZEoJmmZ1I+XRL6O1UixpCgp8RW04eWe3fiPpm8m
+1wk8OhwRDqZsN/etRIcsKMfYdIKz0G9KV7s1KSegi+ghp4dkNl3M2Basx7InQJJV
+OCiNUW7dFGdTbHFcJoRNdVq2fmBWqU2t+5sel/MN2dKXVHfaPRK34B7vCAas+YWH
+6aLcr34YEoP9VhdBLtUpgn2Z9DH2canPLAEnpQW5qrJITirvn5NSUZU8UnOOVkwX
+QMAJKOSLakhT2+zNVVXxxvjpoixMptEmX36vWkzaH6byHCx+rgIW0lbQL1dTR+iS
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (Thawte)
+# Issuer: CN=thawte Primary Root CA O=thawte, Inc. OU=Certification Services Division/(c) 2006 thawte, Inc. - For authorized use only
+# Subject: CN=thawte Primary Root CA O=thawte, Inc. OU=Certification Services Division/(c) 2006 thawte, Inc. - For authorized use only
+# Label: "thawte Primary Root CA"
+# Serial: 69529181992039203566298953787712940909
+# MD5 Fingerprint: 8c:ca:dc:0b:22:ce:f5:be:72:ac:41:1a:11:a8:d8:12
+# SHA1 Fingerprint: 91:c6:d6:ee:3e:8a:c8:63:84:e5:48:c2:99:29:5c:75:6c:81:7b:81
+# SHA256 Fingerprint: 8d:72:2f:81:a9:c1:13:c0:79:1d:f1:36:a2:96:6d:b2:6c:95:0a:97:1d:b4:6b:41:99:f4:ea:54:b7:8b:fb:9f
+-----BEGIN CERTIFICATE-----
+MIIEIDCCAwigAwIBAgIQNE7VVyDV7exJ9C/ON9srbTANBgkqhkiG9w0BAQUFADCB
+qTELMAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5jLjEoMCYGA1UECxMf
+Q2VydGlmaWNhdGlvbiBTZXJ2aWNlcyBEaXZpc2lvbjE4MDYGA1UECxMvKGMpIDIw
+MDYgdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxHzAdBgNV
+BAMTFnRoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EwHhcNMDYxMTE3MDAwMDAwWhcNMzYw
+NzE2MjM1OTU5WjCBqTELMAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5j
+LjEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBTZXJ2aWNlcyBEaXZpc2lvbjE4MDYG
+A1UECxMvKGMpIDIwMDYgdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNl
+IG9ubHkxHzAdBgNVBAMTFnRoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EwggEiMA0GCSqG
+SIb3DQEBAQUAA4IBDwAwggEKAoIBAQCsoPD7gFnUnMekz52hWXMJEEUMDSxuaPFs
+W0hoSVk3/AszGcJ3f8wQLZU0HObrTQmnHNK4yZc2AreJ1CRfBsDMRJSUjQJib+ta
+3RGNKJpchJAQeg29dGYvajig4tVUROsdB58Hum/u6f1OCyn1PoSgAfGcq/gcfomk
+6KHYcWUNo1F77rzSImANuVud37r8UVsLr5iy6S7pBOhih94ryNdOwUxkHt3Ph1i6
+Sk/KaAcdHJ1KxtUvkcx8cXIcxcBn6zL9yZJclNqFwJu/U30rCfSMnZEfl2pSy94J
+NqR32HuHUETVPm4pafs5SSYeCaWAe0At6+gnhcn+Yf1+5nyXHdWdAgMBAAGjQjBA
+MA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMB0GA1UdDgQWBBR7W0XP
+r87Lev0xkhpqtvNG61dIUDANBgkqhkiG9w0BAQUFAAOCAQEAeRHAS7ORtvzw6WfU
+DW5FvlXok9LOAz/t2iWwHVfLHjp2oEzsUHboZHIMpKnxuIvW1oeEuzLlQRHAd9mz
+YJ3rG9XRbkREqaYB7FViHXe4XI5ISXycO1cRrK1zN44veFyQaEfZYGDm/Ac9IiAX
+xPcW6cTYcvnIc3zfFi8VqT79aie2oetaupgf1eNNZAqdE8hhuvU5HIe6uL17In/2
+/qxAeeWsEG89jxt5dovEN7MhGITlNgDrYyCZuen+MwS7QcjBAvlEYyCegc5C09Y/
+LHbTY5xZ3Y+m4Q6gLkH3LpVHz7z9M/P2C2F+fpErgUfCJzDupxBdN49cOSvkBPB7
+jVaMaA==
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (Thawte)
+# Issuer: CN=thawte Primary Root CA - G2 O=thawte, Inc. OU=(c) 2007 thawte, Inc. - For authorized use only
+# Subject: CN=thawte Primary Root CA - G2 O=thawte, Inc. OU=(c) 2007 thawte, Inc. - For authorized use only
+# Label: "thawte Primary Root CA - G2"
+# Serial: 71758320672825410020661621085256472406
+# MD5 Fingerprint: 74:9d:ea:60:24:c4:fd:22:53:3e:cc:3a:72:d9:29:4f
+# SHA1 Fingerprint: aa:db:bc:22:23:8f:c4:01:a1:27:bb:38:dd:f4:1d:db:08:9e:f0:12
+# SHA256 Fingerprint: a4:31:0d:50:af:18:a6:44:71:90:37:2a:86:af:af:8b:95:1f:fb:43:1d:83:7f:1e:56:88:b4:59:71:ed:15:57
+-----BEGIN CERTIFICATE-----
+MIICiDCCAg2gAwIBAgIQNfwmXNmET8k9Jj1Xm67XVjAKBggqhkjOPQQDAzCBhDEL
+MAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5jLjE4MDYGA1UECxMvKGMp
+IDIwMDcgdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxJDAi
+BgNVBAMTG3RoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EgLSBHMjAeFw0wNzExMDUwMDAw
+MDBaFw0zODAxMTgyMzU5NTlaMIGEMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMdGhh
+d3RlLCBJbmMuMTgwNgYDVQQLEy8oYykgMjAwNyB0aGF3dGUsIEluYy4gLSBGb3Ig
+YXV0aG9yaXplZCB1c2Ugb25seTEkMCIGA1UEAxMbdGhhd3RlIFByaW1hcnkgUm9v
+dCBDQSAtIEcyMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEotWcgnuVnfFSeIf+iha/
+BebfowJPDQfGAFG6DAJSLSKkQjnE/o/qycG+1E3/n3qe4rF8mq2nhglzh9HnmuN6
+papu+7qzcMBniKI11KOasf2twu8x+qi58/sIxpHR+ymVo0IwQDAPBgNVHRMBAf8E
+BTADAQH/MA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUmtgAMADna3+FGO6Lts6K
+DPgR4bswCgYIKoZIzj0EAwMDaQAwZgIxAN344FdHW6fmCsO99YCKlzUNG4k8VIZ3
+KMqh9HneteY4sPBlcIx/AlTCv//YoT7ZzwIxAMSNlPzcU9LcnXgWHxUzI1NS41ox
+XZ3Krr0TKUQNJ1uo52icEvdYPy5yAlejj6EULg==
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (Thawte)
+# Issuer: CN=thawte Primary Root CA - G3 O=thawte, Inc. OU=Certification Services Division/(c) 2008 thawte, Inc. - For authorized use only
+# Subject: CN=thawte Primary Root CA - G3 O=thawte, Inc. OU=Certification Services Division/(c) 2008 thawte, Inc. - For authorized use only
+# Label: "thawte Primary Root CA - G3"
+# Serial: 127614157056681299805556476275995414779
+# MD5 Fingerprint: fb:1b:5d:43:8a:94:cd:44:c6:76:f2:43:4b:47:e7:31
+# SHA1 Fingerprint: f1:8b:53:8d:1b:e9:03:b6:a6:f0:56:43:5b:17:15:89:ca:f3:6b:f2
+# SHA256 Fingerprint: 4b:03:f4:58:07:ad:70:f2:1b:fc:2c:ae:71:c9:fd:e4:60:4c:06:4c:f5:ff:b6:86:ba:e5:db:aa:d7:fd:d3:4c
+-----BEGIN CERTIFICATE-----
+MIIEKjCCAxKgAwIBAgIQYAGXt0an6rS0mtZLL/eQ+zANBgkqhkiG9w0BAQsFADCB
+rjELMAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5jLjEoMCYGA1UECxMf
+Q2VydGlmaWNhdGlvbiBTZXJ2aWNlcyBEaXZpc2lvbjE4MDYGA1UECxMvKGMpIDIw
+MDggdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxJDAiBgNV
+BAMTG3RoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EgLSBHMzAeFw0wODA0MDIwMDAwMDBa
+Fw0zNzEyMDEyMzU5NTlaMIGuMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMdGhhd3Rl
+LCBJbmMuMSgwJgYDVQQLEx9DZXJ0aWZpY2F0aW9uIFNlcnZpY2VzIERpdmlzaW9u
+MTgwNgYDVQQLEy8oYykgMjAwOCB0aGF3dGUsIEluYy4gLSBGb3IgYXV0aG9yaXpl
+ZCB1c2Ugb25seTEkMCIGA1UEAxMbdGhhd3RlIFByaW1hcnkgUm9vdCBDQSAtIEcz
+MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAsr8nLPvb2FvdeHsbnndm
+gcs+vHyu86YnmjSjaDFxODNi5PNxZnmxqWWjpYvVj2AtP0LMqmsywCPLLEHd5N/8
+YZzic7IilRFDGF/Eth9XbAoFWCLINkw6fKXRz4aviKdEAhN0cXMKQlkC+BsUa0Lf
+b1+6a4KinVvnSr0eAXLbS3ToO39/fR8EtCab4LRarEc9VbjXsCZSKAExQGbY2SS9
+9irY7CFJXJv2eul/VTV+lmuNk5Mny5K76qxAwJ/C+IDPXfRa3M50hqY+bAtTyr2S
+zhkGcuYMXDhpxwTWvGzOW/b3aJzcJRVIiKHpqfiYnODz1TEoYRFsZ5aNOZnLwkUk
+OQIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBBjAdBgNV
+HQ4EFgQUrWyqlGCc7eT/+j4KdCtjA/e2Wb8wDQYJKoZIhvcNAQELBQADggEBABpA
+2JVlrAmSicY59BDlqQ5mU1143vokkbvnRFHfxhY0Cu9qRFHqKweKA3rD6z8KLFIW
+oCtDuSWQP3CpMyVtRRooOyfPqsMpQhvfO0zAMzRbQYi/aytlryjvsvXDqmbOe1bu
+t8jLZ8HJnBoYuMTDSQPxYA5QzUbF83d597YV4Djbxy8ooAw/dyZ02SUS2jHaGh7c
+KUGRIjxpp7sC8rZcJwOJ9Abqm+RyguOhCcHpABnTPtRwa7pxpqpYrvS76Wy274fM
+m7v/OeZWYdMKp8RcTGB7BXcmer/YB1IsYvdwY9k5vG8cwnncdimvzsUsZAReiDZu
+MdRAGmI0Nj81Aa6sY6A=
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (VeriSign)
+# Issuer: CN=VeriSign Class 3 Public Primary Certification Authority - G3 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 1999 VeriSign, Inc. - For authorized use only
+# Subject: CN=VeriSign Class 3 Public Primary Certification Authority - G3 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 1999 VeriSign, Inc. - For authorized use only
+# Label: "Verisign Class 3 Public Primary Certification Authority - G3"
+# Serial: 206684696279472310254277870180966723415
+# MD5 Fingerprint: cd:68:b6:a7:c7:c4:ce:75:e0:1d:4f:57:44:61:92:09
+# SHA1 Fingerprint: 13:2d:0d:45:53:4b:69:97:cd:b2:d5:c3:39:e2:55:76:60:9b:5c:c6
+# SHA256 Fingerprint: eb:04:cf:5e:b1:f3:9a:fa:76:2f:2b:b1:20:f2:96:cb:a5:20:c1:b9:7d:b1:58:95:65:b8:1c:b9:a1:7b:72:44
+-----BEGIN CERTIFICATE-----
+MIIEGjCCAwICEQCbfgZJoz5iudXukEhxKe9XMA0GCSqGSIb3DQEBBQUAMIHKMQsw
+CQYDVQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZl
+cmlTaWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWdu
+LCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlT
+aWduIENsYXNzIDMgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3Jp
+dHkgLSBHMzAeFw05OTEwMDEwMDAwMDBaFw0zNjA3MTYyMzU5NTlaMIHKMQswCQYD
+VQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZlcmlT
+aWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWduLCBJ
+bmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlTaWdu
+IENsYXNzIDMgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkg
+LSBHMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMu6nFL8eB8aHm8b
+N3O9+MlrlBIwT/A2R/XQkQr1F8ilYcEWQE37imGQ5XYgwREGfassbqb1EUGO+i2t
+KmFZpGcmTNDovFJbcCAEWNF6yaRpvIMXZK0Fi7zQWM6NjPXr8EJJC52XJ2cybuGu
+kxUccLwgTS8Y3pKI6GyFVxEa6X7jJhFUokWWVYPKMIno3Nij7SqAP395ZVc+FSBm
+CC+Vk7+qRy+oRpfwEuL+wgorUeZ25rdGt+INpsyow0xZVYnm6FNcHOqd8GIWC6fJ
+Xwzw3sJ2zq/3avL6QaaiMxTJ5Xpj055iN9WFZZ4O5lMkdBteHRJTW8cs54NJOxWu
+imi5V5cCAwEAATANBgkqhkiG9w0BAQUFAAOCAQEAERSWwauSCPc/L8my/uRan2Te
+2yFPhpk0djZX3dAVL8WtfxUfN2JzPtTnX84XA9s1+ivbrmAJXx5fj267Cz3qWhMe
+DGBvtcC1IyIuBwvLqXTLR7sdwdela8wv0kL9Sd2nic9TutoAWii/gt/4uhMdUIaC
+/Y4wjylGsB49Ndo4YhYYSq3mtlFs3q9i6wHQHiT+eo8SGhJouPtmmRQURVyu565p
+F4ErWjfJXir0xuKhXFSbplQAz/DxwceYMBo7Nhbbo27q/a2ywtrvAkcTisDxszGt
+TxzhT5yvDwyd93gN2PQ1VoDat20Xj50egWTh/sVFuq1ruQp6Tk9LhO5L8X3dEQ==
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (VeriSign)
+# Issuer: CN=VeriSign Class 3 Public Primary Certification Authority - G4 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2007 VeriSign, Inc. - For authorized use only
+# Subject: CN=VeriSign Class 3 Public Primary Certification Authority - G4 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2007 VeriSign, Inc. - For authorized use only
+# Label: "VeriSign Class 3 Public Primary Certification Authority - G4"
+# Serial: 63143484348153506665311985501458640051
+# MD5 Fingerprint: 3a:52:e1:e7:fd:6f:3a:e3:6f:f3:6f:99:1b:f9:22:41
+# SHA1 Fingerprint: 22:d5:d8:df:8f:02:31:d1:8d:f7:9d:b7:cf:8a:2d:64:c9:3f:6c:3a
+# SHA256 Fingerprint: 69:dd:d7:ea:90:bb:57:c9:3e:13:5d:c8:5e:a6:fc:d5:48:0b:60:32:39:bd:c4:54:fc:75:8b:2a:26:cf:7f:79
+-----BEGIN CERTIFICATE-----
+MIIDhDCCAwqgAwIBAgIQL4D+I4wOIg9IZxIokYesszAKBggqhkjOPQQDAzCByjEL
+MAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQLExZW
+ZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNyBWZXJpU2ln
+biwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxWZXJp
+U2lnbiBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9y
+aXR5IC0gRzQwHhcNMDcxMTA1MDAwMDAwWhcNMzgwMTE4MjM1OTU5WjCByjELMAkG
+A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQLExZWZXJp
+U2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNyBWZXJpU2lnbiwg
+SW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxWZXJpU2ln
+biBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5
+IC0gRzQwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAASnVnp8Utpkmw4tXNherJI9/gHm
+GUo9FANL+mAnINmDiWn6VMaaGF5VKmTeBvaNSjutEDxlPZCIBIngMGGzrl0Bp3ve
+fLK+ymVhAIau2o970ImtTR1ZmkGxvEeA3J5iw/mjgbIwga8wDwYDVR0TAQH/BAUw
+AwEB/zAOBgNVHQ8BAf8EBAMCAQYwbQYIKwYBBQUHAQwEYTBfoV2gWzBZMFcwVRYJ
+aW1hZ2UvZ2lmMCEwHzAHBgUrDgMCGgQUj+XTGoasjY5rw8+AatRIGCx7GS4wJRYj
+aHR0cDovL2xvZ28udmVyaXNpZ24uY29tL3ZzbG9nby5naWYwHQYDVR0OBBYEFLMW
+kf3upm7ktS5Jj4d4gYDs5bG1MAoGCCqGSM49BAMDA2gAMGUCMGYhDBgmYFo4e1ZC
+4Kf8NoRRkSAsdk1DPcQdhCPQrNZ8NQbOzWm9kA3bbEhCHQ6qQgIxAJw9SDkjOVga
+FRJZap7v1VmyHVIsmXHNxynfGyphe3HR3vPA5Q06Sqotp9iGKt0uEA==
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (VeriSign)
+# Issuer: CN=VeriSign Class 3 Public Primary Certification Authority - G5 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2006 VeriSign, Inc. - For authorized use only
+# Subject: CN=VeriSign Class 3 Public Primary Certification Authority - G5 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2006 VeriSign, Inc. - For authorized use only
+# Label: "VeriSign Class 3 Public Primary Certification Authority - G5"
+# Serial: 33037644167568058970164719475676101450
+# MD5 Fingerprint: cb:17:e4:31:67:3e:e2:09:fe:45:57:93:f3:0a:fa:1c
+# SHA1 Fingerprint: 4e:b6:d5:78:49:9b:1c:cf:5f:58:1e:ad:56:be:3d:9b:67:44:a5:e5
+# SHA256 Fingerprint: 9a:cf:ab:7e:43:c8:d8:80:d0:6b:26:2a:94:de:ee:e4:b4:65:99:89:c3:d0:ca:f1:9b:af:64:05:e4:1a:b7:df
+-----BEGIN CERTIFICATE-----
+MIIE0zCCA7ugAwIBAgIQGNrRniZ96LtKIVjNzGs7SjANBgkqhkiG9w0BAQUFADCB
+yjELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQL
+ExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNiBWZXJp
+U2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxW
+ZXJpU2lnbiBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0
+aG9yaXR5IC0gRzUwHhcNMDYxMTA4MDAwMDAwWhcNMzYwNzE2MjM1OTU5WjCByjEL
+MAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQLExZW
+ZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNiBWZXJpU2ln
+biwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxWZXJp
+U2lnbiBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9y
+aXR5IC0gRzUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCvJAgIKXo1
+nmAMqudLO07cfLw8RRy7K+D+KQL5VwijZIUVJ/XxrcgxiV0i6CqqpkKzj/i5Vbex
+t0uz/o9+B1fs70PbZmIVYc9gDaTY3vjgw2IIPVQT60nKWVSFJuUrjxuf6/WhkcIz
+SdhDY2pSS9KP6HBRTdGJaXvHcPaz3BJ023tdS1bTlr8Vd6Gw9KIl8q8ckmcY5fQG
+BO+QueQA5N06tRn/Arr0PO7gi+s3i+z016zy9vA9r911kTMZHRxAy3QkGSGT2RT+
+rCpSx4/VBEnkjWNHiDxpg8v+R70rfk/Fla4OndTRQ8Bnc+MUCH7lP59zuDMKz10/
+NIeWiu5T6CUVAgMBAAGjgbIwga8wDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8E
+BAMCAQYwbQYIKwYBBQUHAQwEYTBfoV2gWzBZMFcwVRYJaW1hZ2UvZ2lmMCEwHzAH
+BgUrDgMCGgQUj+XTGoasjY5rw8+AatRIGCx7GS4wJRYjaHR0cDovL2xvZ28udmVy
+aXNpZ24uY29tL3ZzbG9nby5naWYwHQYDVR0OBBYEFH/TZafC3ey78DAJ80M5+gKv
+MzEzMA0GCSqGSIb3DQEBBQUAA4IBAQCTJEowX2LP2BqYLz3q3JktvXf2pXkiOOzE
+p6B4Eq1iDkVwZMXnl2YtmAl+X6/WzChl8gGqCBpH3vn5fJJaCGkgDdk+bW48DW7Y
+5gaRQBi5+MHt39tBquCWIMnNZBU4gcmU7qKEKQsTb47bDN0lAtukixlE0kF6BWlK
+WE9gyn6CagsCqiUXObXbf+eEZSqVir2G3l6BFoMtEMze/aiCKm0oHw0LxOXnGiYZ
+4fQRbxC1lfznQgUy286dUV4otp6F01vvpX1FQHKOtw5rDgb7MzVIcbidJ4vEZV8N
+hnacRHr2lVz2XTIIM6RUthg/aFzyQkqFOFSDX9HoLPKsEdao7WNq
+-----END CERTIFICATE-----
+
+# Operating CA: Symantec (VeriSign)
+# Issuer: CN=VeriSign Universal Root Certification Authority O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2008 VeriSign, Inc. - For authorized use only
+# Subject: CN=VeriSign Universal Root Certification Authority O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2008 VeriSign, Inc. - For authorized use only
+# Label: "VeriSign Universal Root Certification Authority"
+# Serial: 85209574734084581917763752644031726877
+# MD5 Fingerprint: 8e:ad:b5:01:aa:4d:81:e4:8c:1d:d1:e1:14:00:95:19
+# SHA1 Fingerprint: 36:79:ca:35:66:87:72:30:4d:30:a5:fb:87:3b:0f:a7:7b:b7:0d:54
+# SHA256 Fingerprint: 23:99:56:11:27:a5:71:25:de:8c:ef:ea:61:0d:df:2f:a0:78:b5:c8:06:7f:4e:82:82:90:bf:b8:60:e8:4b:3c
+-----BEGIN CERTIFICATE-----
+MIIEuTCCA6GgAwIBAgIQQBrEZCGzEyEDDrvkEhrFHTANBgkqhkiG9w0BAQsFADCB
+vTELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQL
+ExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwOCBWZXJp
+U2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MTgwNgYDVQQDEy9W
+ZXJpU2lnbiBVbml2ZXJzYWwgUm9vdCBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAe
+Fw0wODA0MDIwMDAwMDBaFw0zNzEyMDEyMzU5NTlaMIG9MQswCQYDVQQGEwJVUzEX
+MBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZlcmlTaWduIFRydXN0
+IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAyMDA4IFZlcmlTaWduLCBJbmMuIC0gRm9y
+IGF1dGhvcml6ZWQgdXNlIG9ubHkxODA2BgNVBAMTL1ZlcmlTaWduIFVuaXZlcnNh
+bCBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEF
+AAOCAQ8AMIIBCgKCAQEAx2E3XrEBNNti1xWb/1hajCMj1mCOkdeQmIN65lgZOIzF
+9uVkhbSicfvtvbnazU0AtMgtc6XHaXGVHzk8skQHnOgO+k1KxCHfKWGPMiJhgsWH
+H26MfF8WIFFE0XBPV+rjHOPMee5Y2A7Cs0WTwCznmhcrewA3ekEzeOEz4vMQGn+H
+LL729fdC4uW/h2KJXwBL38Xd5HVEMkE6HnFuacsLdUYI0crSK5XQz/u5QGtkjFdN
+/BMReYTtXlT2NJ8IAfMQJQYXStrxHXpma5hgZqTZ79IugvHw7wnqRMkVauIDbjPT
+rJ9VAMf2CGqUuV/c4DPxhGD5WycRtPwW8rtWaoAljQIDAQABo4GyMIGvMA8GA1Ud
+EwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMG0GCCsGAQUFBwEMBGEwX6FdoFsw
+WTBXMFUWCWltYWdlL2dpZjAhMB8wBwYFKw4DAhoEFI/l0xqGrI2Oa8PPgGrUSBgs
+exkuMCUWI2h0dHA6Ly9sb2dvLnZlcmlzaWduLmNvbS92c2xvZ28uZ2lmMB0GA1Ud
+DgQWBBS2d/ppSEefUxLVwuoHMnYH0ZcHGTANBgkqhkiG9w0BAQsFAAOCAQEASvj4
+sAPmLGd75JR3Y8xuTPl9Dg3cyLk1uXBPY/ok+myDjEedO2Pzmvl2MpWRsXe8rJq+
+seQxIcaBlVZaDrHC1LGmWazxY8u4TB1ZkErvkBYoH1quEPuBUDgMbMzxPcP1Y+Oz
+4yHJJDnp/RVmRvQbEdBNc6N9Rvk97ahfYtTxP/jgdFcrGJ2BtMQo2pSXpXDrrB2+
+BxHw1dvd5Yzw1TKwg+ZX4o+/vqGqvz0dtdQ46tewXDpPaj+PwGZsY6rp2aQW9IHR
+lRQOfc2VNNnSj3BzgXucfr2YYdhFh5iQxeuGMMY1v/D/w1WIg0vvBZIGcfK4mJO3
+7M2CYfE45k+XmCpajQ==
+-----END CERTIFICATE-----
diff --git a/client/clang_features.cc b/client/clang_features.cc
new file mode 100644
index 0000000..b4b6c1f
--- /dev/null
+++ b/client/clang_features.cc
@@ -0,0 +1,1827 @@
+// This is auto-generated file from generate_feature_list.py.
+// Clang revision: 321858.
+// *** DO NOT EDIT ***
+
+static const char* KNOWN_FEATURES[] = {
+  "address_sanitizer",
+  "arc_cf_code_audited",
+  "assume_nonnull",
+  "attribute_analyzer_noreturn",
+  "attribute_availability",
+  "attribute_availability_app_extension",
+  "attribute_availability_in_templates",
+  "attribute_availability_tvos",
+  "attribute_availability_watchos",
+  "attribute_availability_with_message",
+  "attribute_availability_with_replacement",
+  "attribute_availability_with_strict",
+  "attribute_availability_with_version_underscores",
+  "attribute_cf_consumed",
+  "attribute_cf_returns_not_retained",
+  "attribute_cf_returns_on_parameters",
+  "attribute_cf_returns_retained",
+  "attribute_deprecated_with_message",
+  "attribute_deprecated_with_replacement",
+  "attribute_diagnose_if_objc",
+  "attribute_ext_vector_type",
+  "attribute_ns_consumed",
+  "attribute_ns_consumes_self",
+  "attribute_ns_returns_not_retained",
+  "attribute_ns_returns_retained",
+  "attribute_objc_ivar_unused",
+  "attribute_objc_method_family",
+  "attribute_overloadable",
+  "attribute_unavailable_with_message",
+  "attribute_unused_on_fields",
+  "blocks",
+  "c_alignas",
+  "c_alignof",
+  "c_atomic",
+  "c_generic_selections",
+  "c_static_assert",
+  "c_thread_local",
+  "c_thread_safety_attributes",
+  "cxx_access_control_sfinae",
+  "cxx_aggregate_nsdmi",
+  "cxx_alias_templates",
+  "cxx_alignas",
+  "cxx_alignof",
+  "cxx_atomic",
+  "cxx_attributes",
+  "cxx_auto_type",
+  "cxx_binary_literals",
+  "cxx_concepts",
+  "cxx_constexpr",
+  "cxx_constexpr_string_builtins",
+  "cxx_contextual_conversions",
+  "cxx_decltype",
+  "cxx_decltype_auto",
+  "cxx_decltype_incomplete_return_types",
+  "cxx_default_function_template_args",
+  "cxx_defaulted_functions",
+  "cxx_delegating_constructors",
+  "cxx_deleted_functions",
+  "cxx_exceptions",
+  "cxx_explicit_conversions",
+  "cxx_generalized_initializers",
+  "cxx_generic_lambdas",
+  "cxx_implicit_moves",
+  "cxx_inheriting_constructors",
+  "cxx_init_captures",
+  "cxx_inline_namespaces",
+  "cxx_lambdas",
+  "cxx_local_type_template_args",
+  "cxx_noexcept",
+  "cxx_nonstatic_member_init",
+  "cxx_nullptr",
+  "cxx_override_control",
+  "cxx_range_for",
+  "cxx_raw_string_literals",
+  "cxx_reference_qualified_functions",
+  "cxx_relaxed_constexpr",
+  "cxx_return_type_deduction",
+  "cxx_rtti",
+  "cxx_runtime_arrays",
+  "cxx_rvalue_references",
+  "cxx_static_assert",
+  "cxx_strong_enums",
+  "cxx_thread_local",
+  "cxx_trailing_return",
+  "cxx_unicode_literals",
+  "cxx_unrestricted_unions",
+  "cxx_user_literals",
+  "cxx_variable_templates",
+  "cxx_variadic_templates",
+  "dataflow_sanitizer",
+  "efficiency_sanitizer",
+  "enumerator_attributes",
+  "has_nothrow_assign",
+  "has_nothrow_constructor",
+  "has_nothrow_copy",
+  "has_trivial_assign",
+  "has_trivial_constructor",
+  "has_trivial_copy",
+  "has_trivial_destructor",
+  "has_virtual_destructor",
+  "hwaddress_sanitizer",
+  "is_abstract",
+  "is_base_of",
+  "is_class",
+  "is_constructible",
+  "is_convertible_to",
+  "is_empty",
+  "is_enum",
+  "is_final",
+  "is_literal",
+  "is_pod",
+  "is_polymorphic",
+  "is_sealed",
+  "is_standard_layout",
+  "is_trivial",
+  "is_trivially_assignable",
+  "is_trivially_constructible",
+  "is_trivially_copyable",
+  "is_union",
+  "memory_sanitizer",
+  "modules",
+  "nullability",
+  "nullability_on_arrays",
+  "objc_arc",
+  "objc_arc_weak",
+  "objc_arr",
+  "objc_array_literals",
+  "objc_bool",
+  "objc_boxed_expressions",
+  "objc_boxed_nsvalue_expressions",
+  "objc_bridge_id",
+  "objc_bridge_id_on_typedefs",
+  "objc_class_property",
+  "objc_default_synthesize_properties",
+  "objc_dictionary_literals",
+  "objc_fixed_enum",
+  "objc_generics",
+  "objc_generics_variance",
+  "objc_instancetype",
+  "objc_kindof",
+  "objc_modules",
+  "objc_nonfragile_abi",
+  "objc_property_explicit_atomic",
+  "objc_protocol_qualifier_mangling",
+  "objc_subscripting",
+  "objc_weak_class",
+  "ownership_holds",
+  "ownership_returns",
+  "ownership_takes",
+  "raw_invocation_type",
+  "safe_stack",
+  "scudo",
+  "thread_sanitizer",
+  "tls",
+  "underlying_type",
+};
+static const unsigned long NUM_KNOWN_FEATURES =
+    sizeof(KNOWN_FEATURES) / sizeof(KNOWN_FEATURES[0]);
+
+static const char* KNOWN_EXTENSIONS[] = {
+  "c_alignas",
+  "c_alignof",
+  "c_atomic",
+  "c_generic_selections",
+  "c_static_assert",
+  "c_thread_local",
+  "cxx_atomic",
+  "cxx_binary_literals",
+  "cxx_deleted_functions",
+  "cxx_explicit_conversions",
+  "cxx_init_captures",
+  "cxx_inline_namespaces",
+  "cxx_local_type_template_args",
+  "cxx_nonstatic_member_init",
+  "cxx_override_control",
+  "cxx_range_for",
+  "cxx_reference_qualified_functions",
+  "cxx_rvalue_references",
+  "cxx_variable_templates",
+  "cxx_variadic_templates",
+  "overloadable_unmarked",
+};
+static const unsigned long NUM_KNOWN_EXTENSIONS =
+    sizeof(KNOWN_EXTENSIONS) / sizeof(KNOWN_EXTENSIONS[0]);
+
+static const char* KNOWN_ATTRIBUTES[] = {
+  "NSObject",
+  "_Alignas",
+  "_Nonnull",
+  "_Noreturn",
+  "_Null_unspecified",
+  "_Nullable",
+  "__asm__",
+  "__cdecl",
+  "__const",
+  "__constant",
+  "__constant__",
+  "__cudart_builtin__",
+  "__device__",
+  "__device_builtin__",
+  "__device_builtin_surface_type__",
+  "__device_builtin_texture_type__",
+  "__fastcall",
+  "__forceinline",
+  "__generic",
+  "__global",
+  "__global__",
+  "__host__",
+  "__kernel",
+  "__kindof",
+  "__launch_bounds__",
+  "__local",
+  "__multiple_inheritance",
+  "__pascal",
+  "__private",
+  "__ptr32",
+  "__ptr64",
+  "__read_only",
+  "__read_write",
+  "__regcall",
+  "__shared__",
+  "__single_inheritance",
+  "__sptr",
+  "__stdcall",
+  "__thiscall",
+  "__unspecified_inheritance",
+  "__uptr",
+  "__vectorcall",
+  "__virtual_inheritance",
+  "__w64",
+  "__write_only",
+  "_cdecl",
+  "_fastcall",
+  "_pascal",
+  "_stdcall",
+  "_thiscall",
+  "_vectorcall",
+  "abi_tag",
+  "acquire_capability",
+  "acquire_shared_capability",
+  "acquired_after",
+  "acquired_before",
+  "address_space",
+  "alias",
+  "align",
+  "align_value",
+  "alignas",
+  "aligned",
+  "alloc_align",
+  "alloc_size",
+  "allocate",
+  "always_inline",
+  "amdgpu_flat_work_group_size",
+  "amdgpu_num_sgpr",
+  "amdgpu_num_vgpr",
+  "amdgpu_waves_per_eu",
+  "analyzer_noreturn",
+  "argument_with_type_tag",
+  "asm",
+  "assert_capability",
+  "assert_exclusive_lock",
+  "assert_shared_capability",
+  "assert_shared_lock",
+  "assume_aligned",
+  "availability",
+  "blocks",
+  "bounded",
+  "callable_when",
+  "capability",
+  "carries_dependency",
+  "cdecl",
+  "cf_audited_transfer",
+  "cf_consumed",
+  "cf_returns_not_retained",
+  "cf_returns_retained",
+  "cf_unknown_transfer",
+  "cleanup",
+  "cold",
+  "common",
+  "const",
+  "constant",
+  "constructor",
+  "consumable",
+  "consumable_auto_cast_state",
+  "consumable_set_state_on_read",
+  "convergent",
+  "cudart_builtin",
+  "deprecated",
+  "destructor",
+  "device",
+  "device_builtin",
+  "device_builtin_surface_type",
+  "device_builtin_texture_type",
+  "diagnose_if",
+  "disable_tail_calls",
+  "dllexport",
+  "dllimport",
+  "empty_bases",
+  "enable_if",
+  "enum_extensibility",
+  "exclusive_lock_function",
+  "exclusive_locks_required",
+  "exclusive_trylock_function",
+  "ext_vector_type",
+  "external_source_symbol",
+  "fallthrough",
+  "far",
+  "fastcall",
+  "final",
+  "flag_enum",
+  "flatten",
+  "force_align_arg_pointer",
+  "format",
+  "format_arg",
+  "generic",
+  "global",
+  "gnu_inline",
+  "guarded_by",
+  "guarded_var",
+  "host",
+  "hot",
+  "ibaction",
+  "iboutlet",
+  "iboutletcollection",
+  "ifunc",
+  "init_priority",
+  "intel_ocl_bicc",
+  "intel_reqd_sub_group_size",
+  "internal_linkage",
+  "interrupt",
+  "kernel",
+  "launch_bounds",
+  "layout_version",
+  "local",
+  "lock_returned",
+  "lockable",
+  "locks_excluded",
+  "long_call",
+  "lto_visibility_public",
+  "malloc",
+  "may_alias",
+  "maybe_unused",
+  "micromips",
+  "minsize",
+  "mips16",
+  "mode",
+  "ms_abi",
+  "ms_struct",
+  "naked",
+  "near",
+  "neon_polyvector_type",
+  "neon_vector_type",
+  "no_address_safety_analysis",
+  "no_caller_saved_registers",
+  "no_instrument_function",
+  "no_sanitize",
+  "no_sanitize_address",
+  "no_sanitize_memory",
+  "no_sanitize_thread",
+  "no_split_stack",
+  "no_thread_safety_analysis",
+  "noalias",
+  "nocommon",
+  "nodebug",
+  "nodiscard",
+  "noduplicate",
+  "noescape",
+  "noinline",
+  "nomicromips",
+  "nomips16",
+  "nonnull",
+  "noreturn",
+  "nosvm",
+  "not_tail_called",
+  "nothrow",
+  "nounroll",
+  "novtable",
+  "ns_consumed",
+  "ns_consumes_self",
+  "ns_returns_autoreleased",
+  "ns_returns_not_retained",
+  "ns_returns_retained",
+  "nv_weak",
+  "objc_arc_weak_reference_unavailable",
+  "objc_boxable",
+  "objc_bridge",
+  "objc_bridge_mutable",
+  "objc_bridge_related",
+  "objc_designated_initializer",
+  "objc_exception",
+  "objc_gc",
+  "objc_independent_class",
+  "objc_method_family",
+  "objc_ownership",
+  "objc_precise_lifetime",
+  "objc_protocol_requires_explicit_implementation",
+  "objc_requires_property_definitions",
+  "objc_requires_super",
+  "objc_returns_inner_pointer",
+  "objc_root_class",
+  "objc_runtime_name",
+  "objc_runtime_visible",
+  "objc_subclassing_restricted",
+  "opencl_unroll_hint",
+  "optnone",
+  "overloadable",
+  "override",
+  "ownership_holds",
+  "ownership_returns",
+  "ownership_takes",
+  "packed",
+  "param_typestate",
+  "pascal",
+  "pass_object_size",
+  "pcs",
+  "pointer_with_type_tag",
+  "preserve_all",
+  "preserve_most",
+  "private",
+  "property",
+  "pt_guarded_by",
+  "pt_guarded_var",
+  "pure",
+  "read_only",
+  "read_write",
+  "regcall",
+  "regparm",
+  "release_capability",
+  "release_generic_capability",
+  "release_shared_capability",
+  "reqd_work_group_size",
+  "require_constant_initialization",
+  "requires_capability",
+  "requires_shared_capability",
+  "restrict",
+  "return_typestate",
+  "returns_nonnull",
+  "returns_twice",
+  "scoped_lockable",
+  "sealed",
+  "section",
+  "selectany",
+  "sentinel",
+  "set_typestate",
+  "shared",
+  "shared_capability",
+  "shared_lock_function",
+  "shared_locks_required",
+  "shared_trylock_function",
+  "short_call",
+  "signal",
+  "stdcall",
+  "suppress",
+  "swift_context",
+  "swift_error_result",
+  "swift_indirect_result",
+  "swiftcall",
+  "sysv_abi",
+  "target",
+  "test_typestate",
+  "thiscall",
+  "thread",
+  "tls_model",
+  "transparent_union",
+  "try_acquire_capability",
+  "try_acquire_shared_capability",
+  "type_tag_for_datatype",
+  "type_visibility",
+  "unavailable",
+  "unlock_function",
+  "unroll",
+  "unused",
+  "used",
+  "uuid",
+  "vec_type_hint",
+  "vecreturn",
+  "vector_size",
+  "vectorcall",
+  "visibility",
+  "warn_unused",
+  "warn_unused_result",
+  "weak",
+  "weak_import",
+  "weakref",
+  "work_group_size_hint",
+  "write_only",
+  "xray_always_instrument",
+  "xray_log_args",
+  "xray_never_instrument",
+};
+static const unsigned long NUM_KNOWN_ATTRIBUTES =
+    sizeof(KNOWN_ATTRIBUTES) / sizeof(KNOWN_ATTRIBUTES[0]);
+
+static const char* KNOWN_CPP_ATTRIBUTES[] = {
+  "carries_dependency",
+  "clang::fallthrough",
+  "clang::warn_unused_result",
+  "deprecated",
+  "fallthrough",
+  "gsl::suppress",
+  "maybe_unused",
+  "nodiscard",
+  "noreturn",
+};
+static const unsigned long NUM_KNOWN_CPP_ATTRIBUTES =
+    sizeof(KNOWN_CPP_ATTRIBUTES) / sizeof(KNOWN_CPP_ATTRIBUTES[0]);
+
+static const char* KNOWN_DECLSPEC_ATTRIBUTES[] = {
+  "NSObject",
+  "_Alignas",
+  "_Nonnull",
+  "_Noreturn",
+  "_Null_unspecified",
+  "_Nullable",
+  "__asm__",
+  "__cdecl",
+  "__const",
+  "__constant",
+  "__constant__",
+  "__cudart_builtin__",
+  "__device__",
+  "__device_builtin__",
+  "__device_builtin_surface_type__",
+  "__device_builtin_texture_type__",
+  "__fastcall",
+  "__forceinline",
+  "__generic",
+  "__global",
+  "__global__",
+  "__host__",
+  "__kernel",
+  "__kindof",
+  "__launch_bounds__",
+  "__local",
+  "__multiple_inheritance",
+  "__pascal",
+  "__private",
+  "__ptr32",
+  "__ptr64",
+  "__read_only",
+  "__read_write",
+  "__regcall",
+  "__shared__",
+  "__single_inheritance",
+  "__sptr",
+  "__stdcall",
+  "__thiscall",
+  "__unspecified_inheritance",
+  "__uptr",
+  "__vectorcall",
+  "__virtual_inheritance",
+  "__w64",
+  "__write_only",
+  "_cdecl",
+  "_fastcall",
+  "_pascal",
+  "_stdcall",
+  "_thiscall",
+  "_vectorcall",
+  "abi_tag",
+  "acquire_capability",
+  "acquire_shared_capability",
+  "acquired_after",
+  "acquired_before",
+  "address_space",
+  "alias",
+  "align",
+  "align_value",
+  "alignas",
+  "aligned",
+  "alloc_align",
+  "alloc_size",
+  "allocate",
+  "always_inline",
+  "amdgpu_flat_work_group_size",
+  "amdgpu_num_sgpr",
+  "amdgpu_num_vgpr",
+  "amdgpu_waves_per_eu",
+  "analyzer_noreturn",
+  "argument_with_type_tag",
+  "asm",
+  "assert_capability",
+  "assert_exclusive_lock",
+  "assert_shared_capability",
+  "assert_shared_lock",
+  "assume_aligned",
+  "availability",
+  "blocks",
+  "bounded",
+  "callable_when",
+  "capability",
+  "carries_dependency",
+  "cdecl",
+  "cf_audited_transfer",
+  "cf_consumed",
+  "cf_returns_not_retained",
+  "cf_returns_retained",
+  "cf_unknown_transfer",
+  "cleanup",
+  "cold",
+  "common",
+  "const",
+  "constant",
+  "constructor",
+  "consumable",
+  "consumable_auto_cast_state",
+  "consumable_set_state_on_read",
+  "convergent",
+  "cudart_builtin",
+  "deprecated",
+  "destructor",
+  "device",
+  "device_builtin",
+  "device_builtin_surface_type",
+  "device_builtin_texture_type",
+  "diagnose_if",
+  "disable_tail_calls",
+  "dllexport",
+  "dllimport",
+  "empty_bases",
+  "enable_if",
+  "enum_extensibility",
+  "exclusive_lock_function",
+  "exclusive_locks_required",
+  "exclusive_trylock_function",
+  "ext_vector_type",
+  "external_source_symbol",
+  "fallthrough",
+  "far",
+  "fastcall",
+  "final",
+  "flag_enum",
+  "flatten",
+  "force_align_arg_pointer",
+  "format",
+  "format_arg",
+  "generic",
+  "global",
+  "gnu_inline",
+  "guarded_by",
+  "guarded_var",
+  "host",
+  "hot",
+  "ibaction",
+  "iboutlet",
+  "iboutletcollection",
+  "ifunc",
+  "init_priority",
+  "intel_ocl_bicc",
+  "intel_reqd_sub_group_size",
+  "internal_linkage",
+  "interrupt",
+  "kernel",
+  "launch_bounds",
+  "layout_version",
+  "local",
+  "lock_returned",
+  "lockable",
+  "locks_excluded",
+  "long_call",
+  "lto_visibility_public",
+  "malloc",
+  "may_alias",
+  "maybe_unused",
+  "micromips",
+  "minsize",
+  "mips16",
+  "mode",
+  "ms_abi",
+  "ms_struct",
+  "naked",
+  "near",
+  "neon_polyvector_type",
+  "neon_vector_type",
+  "no_address_safety_analysis",
+  "no_caller_saved_registers",
+  "no_instrument_function",
+  "no_sanitize",
+  "no_sanitize_address",
+  "no_sanitize_memory",
+  "no_sanitize_thread",
+  "no_split_stack",
+  "no_thread_safety_analysis",
+  "noalias",
+  "nocommon",
+  "nodebug",
+  "nodiscard",
+  "noduplicate",
+  "noescape",
+  "noinline",
+  "nomicromips",
+  "nomips16",
+  "nonnull",
+  "noreturn",
+  "nosvm",
+  "not_tail_called",
+  "nothrow",
+  "nounroll",
+  "novtable",
+  "ns_consumed",
+  "ns_consumes_self",
+  "ns_returns_autoreleased",
+  "ns_returns_not_retained",
+  "ns_returns_retained",
+  "nv_weak",
+  "objc_arc_weak_reference_unavailable",
+  "objc_boxable",
+  "objc_bridge",
+  "objc_bridge_mutable",
+  "objc_bridge_related",
+  "objc_designated_initializer",
+  "objc_exception",
+  "objc_gc",
+  "objc_independent_class",
+  "objc_method_family",
+  "objc_ownership",
+  "objc_precise_lifetime",
+  "objc_protocol_requires_explicit_implementation",
+  "objc_requires_property_definitions",
+  "objc_requires_super",
+  "objc_returns_inner_pointer",
+  "objc_root_class",
+  "objc_runtime_name",
+  "objc_runtime_visible",
+  "objc_subclassing_restricted",
+  "opencl_unroll_hint",
+  "optnone",
+  "overloadable",
+  "override",
+  "ownership_holds",
+  "ownership_returns",
+  "ownership_takes",
+  "packed",
+  "param_typestate",
+  "pascal",
+  "pass_object_size",
+  "pcs",
+  "pointer_with_type_tag",
+  "preserve_all",
+  "preserve_most",
+  "private",
+  "property",
+  "pt_guarded_by",
+  "pt_guarded_var",
+  "pure",
+  "read_only",
+  "read_write",
+  "regcall",
+  "regparm",
+  "release_capability",
+  "release_generic_capability",
+  "release_shared_capability",
+  "reqd_work_group_size",
+  "require_constant_initialization",
+  "requires_capability",
+  "requires_shared_capability",
+  "restrict",
+  "return_typestate",
+  "returns_nonnull",
+  "returns_twice",
+  "scoped_lockable",
+  "sealed",
+  "section",
+  "selectany",
+  "sentinel",
+  "set_typestate",
+  "shared",
+  "shared_capability",
+  "shared_lock_function",
+  "shared_locks_required",
+  "shared_trylock_function",
+  "short_call",
+  "signal",
+  "stdcall",
+  "suppress",
+  "swift_context",
+  "swift_error_result",
+  "swift_indirect_result",
+  "swiftcall",
+  "sysv_abi",
+  "target",
+  "test_typestate",
+  "thiscall",
+  "thread",
+  "tls_model",
+  "transparent_union",
+  "try_acquire_capability",
+  "try_acquire_shared_capability",
+  "type_tag_for_datatype",
+  "type_visibility",
+  "unavailable",
+  "unlock_function",
+  "unroll",
+  "unused",
+  "used",
+  "uuid",
+  "vec_type_hint",
+  "vecreturn",
+  "vector_size",
+  "vectorcall",
+  "visibility",
+  "warn_unused",
+  "warn_unused_result",
+  "weak",
+  "weak_import",
+  "weakref",
+  "work_group_size_hint",
+  "write_only",
+  "xray_always_instrument",
+  "xray_log_args",
+  "xray_never_instrument",
+};
+static const unsigned long NUM_KNOWN_DECLSPEC_ATTRIBUTES =
+    sizeof(KNOWN_DECLSPEC_ATTRIBUTES) /
+    sizeof(KNOWN_DECLSPEC_ATTRIBUTES[0]);
+
+static const char* KNOWN_BUILTINS[] = {
+  "ID",
+  "NSLog",
+  "NSLogv",
+  "_Block_object_assign",
+  "_Block_object_dispose",
+  "_Exit",
+  "_InterlockedAnd",
+  "_InterlockedAnd16",
+  "_InterlockedAnd8",
+  "_InterlockedCompareExchange",
+  "_InterlockedCompareExchange16",
+  "_InterlockedCompareExchange64",
+  "_InterlockedCompareExchange8",
+  "_InterlockedCompareExchangePointer",
+  "_InterlockedDecrement",
+  "_InterlockedDecrement16",
+  "_InterlockedExchange",
+  "_InterlockedExchange16",
+  "_InterlockedExchange8",
+  "_InterlockedExchangeAdd",
+  "_InterlockedExchangeAdd16",
+  "_InterlockedExchangeAdd8",
+  "_InterlockedExchangePointer",
+  "_InterlockedExchangeSub",
+  "_InterlockedExchangeSub16",
+  "_InterlockedExchangeSub8",
+  "_InterlockedIncrement",
+  "_InterlockedIncrement16",
+  "_InterlockedOr",
+  "_InterlockedOr16",
+  "_InterlockedOr8",
+  "_InterlockedXor",
+  "_InterlockedXor16",
+  "_InterlockedXor8",
+  "_ReturnAddress",
+  "__GetExceptionInfo",
+  "__abnormal_termination",
+  "__annotation",
+  "__assume",
+  "__atomic_add_fetch",
+  "__atomic_always_lock_free",
+  "__atomic_and_fetch",
+  "__atomic_clear",
+  "__atomic_compare_exchange",
+  "__atomic_compare_exchange_n",
+  "__atomic_exchange",
+  "__atomic_exchange_n",
+  "__atomic_fetch_add",
+  "__atomic_fetch_and",
+  "__atomic_fetch_nand",
+  "__atomic_fetch_or",
+  "__atomic_fetch_sub",
+  "__atomic_fetch_xor",
+  "__atomic_is_lock_free",
+  "__atomic_load",
+  "__atomic_load_n",
+  "__atomic_nand_fetch",
+  "__atomic_or_fetch",
+  "__atomic_signal_fence",
+  "__atomic_store",
+  "__atomic_store_n",
+  "__atomic_sub_fetch",
+  "__atomic_test_and_set",
+  "__atomic_thread_fence",
+  "__atomic_xor_fetch",
+  "__builtin___CFStringMakeConstantString",
+  "__builtin___NSStringMakeConstantString",
+  "__builtin___clear_cache",
+  "__builtin___fprintf_chk",
+  "__builtin___get_unsafe_stack_ptr",
+  "__builtin___get_unsafe_stack_start",
+  "__builtin___memccpy_chk",
+  "__builtin___memcpy_chk",
+  "__builtin___memmove_chk",
+  "__builtin___mempcpy_chk",
+  "__builtin___memset_chk",
+  "__builtin___printf_chk",
+  "__builtin___snprintf_chk",
+  "__builtin___sprintf_chk",
+  "__builtin___stpcpy_chk",
+  "__builtin___stpncpy_chk",
+  "__builtin___strcat_chk",
+  "__builtin___strcpy_chk",
+  "__builtin___strlcat_chk",
+  "__builtin___strlcpy_chk",
+  "__builtin___strncat_chk",
+  "__builtin___strncpy_chk",
+  "__builtin___vfprintf_chk",
+  "__builtin___vprintf_chk",
+  "__builtin___vsnprintf_chk",
+  "__builtin___vsprintf_chk",
+  "__builtin_abort",
+  "__builtin_acosf",
+  "__builtin_acoshf",
+  "__builtin_acoshl",
+  "__builtin_acosl",
+  "__builtin_add_overflow",
+  "__builtin_addc",
+  "__builtin_addcb",
+  "__builtin_addcl",
+  "__builtin_addcll",
+  "__builtin_addcs",
+  "__builtin_addressof",
+  "__builtin_alloca",
+  "__builtin_alloca_with_align",
+  "__builtin_annotation",
+  "__builtin_asinf",
+  "__builtin_asinhf",
+  "__builtin_asinhl",
+  "__builtin_asinl",
+  "__builtin_assume",
+  "__builtin_assume_aligned",
+  "__builtin_atan2f",
+  "__builtin_atan2l",
+  "__builtin_atanf",
+  "__builtin_atanhf",
+  "__builtin_atanhl",
+  "__builtin_atanl",
+  "__builtin_bcmp",
+  "__builtin_bcopy",
+  "__builtin_bitreverse16",
+  "__builtin_bitreverse32",
+  "__builtin_bitreverse64",
+  "__builtin_bitreverse8",
+  "__builtin_bswap16",
+  "__builtin_bswap32",
+  "__builtin_bswap64",
+  "__builtin_bzero",
+  "__builtin_cabs",
+  "__builtin_cabsf",
+  "__builtin_cabsl",
+  "__builtin_cacos",
+  "__builtin_cacosf",
+  "__builtin_cacosh",
+  "__builtin_cacoshf",
+  "__builtin_cacoshl",
+  "__builtin_cacosl",
+  "__builtin_call_with_static_chain",
+  "__builtin_canonicalize",
+  "__builtin_canonicalizef",
+  "__builtin_canonicalizel",
+  "__builtin_carg",
+  "__builtin_cargf",
+  "__builtin_cargl",
+  "__builtin_casin",
+  "__builtin_casinf",
+  "__builtin_casinh",
+  "__builtin_casinhf",
+  "__builtin_casinhl",
+  "__builtin_casinl",
+  "__builtin_catan",
+  "__builtin_catanf",
+  "__builtin_catanh",
+  "__builtin_catanhf",
+  "__builtin_catanhl",
+  "__builtin_catanl",
+  "__builtin_cbrtf",
+  "__builtin_cbrtl",
+  "__builtin_ccos",
+  "__builtin_ccosf",
+  "__builtin_ccosh",
+  "__builtin_ccoshf",
+  "__builtin_ccoshl",
+  "__builtin_ccosl",
+  "__builtin_ceilf",
+  "__builtin_ceill",
+  "__builtin_cexp",
+  "__builtin_cexpf",
+  "__builtin_cexpl",
+  "__builtin_char_memchr",
+  "__builtin_cimag",
+  "__builtin_cimagf",
+  "__builtin_cimagl",
+  "__builtin_classify_type",
+  "__builtin_clog",
+  "__builtin_clogf",
+  "__builtin_clogl",
+  "__builtin_clzll",
+  "__builtin_conj",
+  "__builtin_conjf",
+  "__builtin_conjl",
+  "__builtin_constant_p",
+  "__builtin_convertvector",
+  "__builtin_copysign",
+  "__builtin_copysignf",
+  "__builtin_copysignl",
+  "__builtin_coro_alloc",
+  "__builtin_coro_begin",
+  "__builtin_coro_destroy",
+  "__builtin_coro_done",
+  "__builtin_coro_end",
+  "__builtin_coro_frame",
+  "__builtin_coro_free",
+  "__builtin_coro_id",
+  "__builtin_coro_param",
+  "__builtin_coro_promise",
+  "__builtin_coro_resume",
+  "__builtin_coro_size",
+  "__builtin_coro_suspend",
+  "__builtin_cosf",
+  "__builtin_coshf",
+  "__builtin_coshl",
+  "__builtin_cosl",
+  "__builtin_cpow",
+  "__builtin_cpowf",
+  "__builtin_cpowl",
+  "__builtin_cproj",
+  "__builtin_cprojf",
+  "__builtin_cprojl",
+  "__builtin_creal",
+  "__builtin_crealf",
+  "__builtin_creall",
+  "__builtin_csin",
+  "__builtin_csinf",
+  "__builtin_csinh",
+  "__builtin_csinhf",
+  "__builtin_csinhl",
+  "__builtin_csinl",
+  "__builtin_csqrt",
+  "__builtin_csqrtf",
+  "__builtin_csqrtl",
+  "__builtin_ctan",
+  "__builtin_ctanf",
+  "__builtin_ctanh",
+  "__builtin_ctanhf",
+  "__builtin_ctanhl",
+  "__builtin_ctanl",
+  "__builtin_ctzll",
+  "__builtin_debugtrap",
+  "__builtin_dwarf_cfa",
+  "__builtin_dwarf_sp_column",
+  "__builtin_eh_return",
+  "__builtin_eh_return_data_regno",
+  "__builtin_erfcf",
+  "__builtin_erfcl",
+  "__builtin_erff",
+  "__builtin_erfl",
+  "__builtin_exp2f",
+  "__builtin_exp2l",
+  "__builtin_expect",
+  "__builtin_expf",
+  "__builtin_expl",
+  "__builtin_expm1f",
+  "__builtin_expm1l",
+  "__builtin_extend_pointer",
+  "__builtin_extract_return_addr",
+  "__builtin_fabsf",
+  "__builtin_fabsl",
+  "__builtin_fdim",
+  "__builtin_fdimf",
+  "__builtin_fdiml",
+  "__builtin_ffsll",
+  "__builtin_floorf",
+  "__builtin_floorl",
+  "__builtin_flt_rounds",
+  "__builtin_fma",
+  "__builtin_fmaf",
+  "__builtin_fmal",
+  "__builtin_fmax",
+  "__builtin_fmaxf",
+  "__builtin_fmaxl",
+  "__builtin_fmin",
+  "__builtin_fminf",
+  "__builtin_fminl",
+  "__builtin_fmodf",
+  "__builtin_fmodl",
+  "__builtin_fpclassify",
+  "__builtin_fprintf",
+  "__builtin_frame_address",
+  "__builtin_frexpf",
+  "__builtin_frexpl",
+  "__builtin_frob_return_addr",
+  "__builtin_huge_val",
+  "__builtin_huge_valf",
+  "__builtin_huge_vall",
+  "__builtin_hypotf",
+  "__builtin_hypotl",
+  "__builtin_ilogbf",
+  "__builtin_ilogbl",
+  "__builtin_index",
+  "__builtin_init_dwarf_reg_size_table",
+  "__builtin_isfinite",
+  "__builtin_isgreaterequal",
+  "__builtin_isinf",
+  "__builtin_isinf_sign",
+  "__builtin_isnan",
+  "__builtin_isnormal",
+  "__builtin_ldexpf",
+  "__builtin_ldexpl",
+  "__builtin_lgammaf",
+  "__builtin_lgammal",
+  "__builtin_llabs",
+  "__builtin_llrint",
+  "__builtin_llrintf",
+  "__builtin_llrintl",
+  "__builtin_llroundf",
+  "__builtin_llroundl",
+  "__builtin_load_half",
+  "__builtin_load_halff",
+  "__builtin_log10f",
+  "__builtin_log10l",
+  "__builtin_log1pf",
+  "__builtin_log1pl",
+  "__builtin_log2",
+  "__builtin_log2f",
+  "__builtin_log2l",
+  "__builtin_logbf",
+  "__builtin_logbl",
+  "__builtin_logf",
+  "__builtin_logl",
+  "__builtin_longjmp",
+  "__builtin_lrintf",
+  "__builtin_lrintl",
+  "__builtin_lroundf",
+  "__builtin_lroundl",
+  "__builtin_memchr",
+  "__builtin_memcmp",
+  "__builtin_memcpy",
+  "__builtin_memmove",
+  "__builtin_mempcpy",
+  "__builtin_memset",
+  "__builtin_modff",
+  "__builtin_modfl",
+  "__builtin_ms_va_copy",
+  "__builtin_ms_va_end",
+  "__builtin_ms_va_start",
+  "__builtin_mul_overflow",
+  "__builtin_nan",
+  "__builtin_nanf",
+  "__builtin_nanl",
+  "__builtin_nans",
+  "__builtin_nansf",
+  "__builtin_nansl",
+  "__builtin_nearbyintf",
+  "__builtin_nearbyintl",
+  "__builtin_nextafterf",
+  "__builtin_nextafterl",
+  "__builtin_nexttowardf",
+  "__builtin_nexttowardl",
+  "__builtin_nontemporal_load",
+  "__builtin_nontemporal_store",
+  "__builtin_objc_memmove_collectable",
+  "__builtin_object_size",
+  "__builtin_operator_delete",
+  "__builtin_operator_new",
+  "__builtin_os_log_format",
+  "__builtin_os_log_format_buffer_size",
+  "__builtin_parityll",
+  "__builtin_popcountll",
+  "__builtin_powf",
+  "__builtin_powif",
+  "__builtin_powil",
+  "__builtin_powl",
+  "__builtin_prefetch",
+  "__builtin_printf",
+  "__builtin_readcyclecounter",
+  "__builtin_remainderf",
+  "__builtin_remainderl",
+  "__builtin_remquof",
+  "__builtin_remquol",
+  "__builtin_return_address",
+  "__builtin_rindex",
+  "__builtin_rintf",
+  "__builtin_rintl",
+  "__builtin_round",
+  "__builtin_roundf",
+  "__builtin_roundl",
+  "__builtin_sadd_overflow",
+  "__builtin_saddl_overflow",
+  "__builtin_saddll_overflow",
+  "__builtin_scalblnf",
+  "__builtin_scalblnl",
+  "__builtin_scalbnf",
+  "__builtin_scalbnl",
+  "__builtin_setjmp",
+  "__builtin_shufflevector",
+  "__builtin_signbit",
+  "__builtin_signbitf",
+  "__builtin_signbitl",
+  "__builtin_sinf",
+  "__builtin_sinhf",
+  "__builtin_sinhl",
+  "__builtin_sinl",
+  "__builtin_smul_overflow",
+  "__builtin_smull_overflow",
+  "__builtin_smulll_overflow",
+  "__builtin_snprintf",
+  "__builtin_sqrtf",
+  "__builtin_sqrtl",
+  "__builtin_ssub_overflow",
+  "__builtin_ssubl_overflow",
+  "__builtin_ssubll_overflow",
+  "__builtin_stdarg_start",
+  "__builtin_store_half",
+  "__builtin_store_halff",
+  "__builtin_stpcpy",
+  "__builtin_stpncpy",
+  "__builtin_strcasecmp",
+  "__builtin_strcat",
+  "__builtin_strchr",
+  "__builtin_strcmp",
+  "__builtin_strcpy",
+  "__builtin_strcspn",
+  "__builtin_strdup",
+  "__builtin_strlen",
+  "__builtin_strncasecmp",
+  "__builtin_strncat",
+  "__builtin_strncmp",
+  "__builtin_strncpy",
+  "__builtin_strndup",
+  "__builtin_strpbrk",
+  "__builtin_strrchr",
+  "__builtin_strspn",
+  "__builtin_strstr",
+  "__builtin_sub_overflow",
+  "__builtin_subc",
+  "__builtin_subcb",
+  "__builtin_subcl",
+  "__builtin_subcll",
+  "__builtin_subcs",
+  "__builtin_tanf",
+  "__builtin_tanhf",
+  "__builtin_tanhl",
+  "__builtin_tanl",
+  "__builtin_tgammaf",
+  "__builtin_tgammal",
+  "__builtin_thread_pointer",
+  "__builtin_trap",
+  "__builtin_truncf",
+  "__builtin_truncl",
+  "__builtin_uadd_overflow",
+  "__builtin_uaddl_overflow",
+  "__builtin_uaddll_overflow",
+  "__builtin_umul_overflow",
+  "__builtin_umull_overflow",
+  "__builtin_umulll_overflow",
+  "__builtin_unpredictable",
+  "__builtin_unreachable",
+  "__builtin_unwind_init",
+  "__builtin_usub_overflow",
+  "__builtin_usubl_overflow",
+  "__builtin_usubll_overflow",
+  "__builtin_va_copy",
+  "__builtin_va_end",
+  "__builtin_va_start",
+  "__builtin_vsnprintf",
+  "__builtin_vsprintf",
+  "__builtin_wcschr",
+  "__builtin_wcscmp",
+  "__builtin_wcslen",
+  "__builtin_wcsncmp",
+  "__builtin_wmemchr",
+  "__builtin_wmemcmp",
+  "__c11_atomic_compare_exchange_strong",
+  "__c11_atomic_compare_exchange_weak",
+  "__c11_atomic_exchange",
+  "__c11_atomic_fetch_add",
+  "__c11_atomic_fetch_and",
+  "__c11_atomic_fetch_or",
+  "__c11_atomic_fetch_sub",
+  "__c11_atomic_fetch_xor",
+  "__c11_atomic_init",
+  "__c11_atomic_is_lock_free",
+  "__c11_atomic_load",
+  "__c11_atomic_signal_fence",
+  "__c11_atomic_store",
+  "__c11_atomic_thread_fence",
+  "__cospi",
+  "__cospif",
+  "__debugbreak",
+  "__exception_code",
+  "__exception_info",
+  "__exp10",
+  "__exp10f",
+  "__fastfail",
+  "__finite",
+  "__finitef",
+  "__finitel",
+  "__noop",
+  "__opencl_atomic_compare_exchange_strong",
+  "__opencl_atomic_compare_exchange_weak",
+  "__opencl_atomic_exchange",
+  "__opencl_atomic_fetch_add",
+  "__opencl_atomic_fetch_and",
+  "__opencl_atomic_fetch_max",
+  "__opencl_atomic_fetch_min",
+  "__opencl_atomic_fetch_or",
+  "__opencl_atomic_fetch_sub",
+  "__opencl_atomic_fetch_xor",
+  "__opencl_atomic_init",
+  "__opencl_atomic_load",
+  "__opencl_atomic_store",
+  "__popcnt",
+  "__popcnt16",
+  "__popcnt64",
+  "__sigsetjmp",
+  "__sinpi",
+  "__sinpif",
+  "__sync_add_and_fetch",
+  "__sync_add_and_fetch_1",
+  "__sync_add_and_fetch_16",
+  "__sync_add_and_fetch_2",
+  "__sync_add_and_fetch_4",
+  "__sync_add_and_fetch_8",
+  "__sync_and_and_fetch",
+  "__sync_and_and_fetch_1",
+  "__sync_and_and_fetch_16",
+  "__sync_and_and_fetch_2",
+  "__sync_and_and_fetch_4",
+  "__sync_and_and_fetch_8",
+  "__sync_bool_compare_and_swap",
+  "__sync_bool_compare_and_swap_1",
+  "__sync_bool_compare_and_swap_16",
+  "__sync_bool_compare_and_swap_2",
+  "__sync_bool_compare_and_swap_4",
+  "__sync_bool_compare_and_swap_8",
+  "__sync_fetch_and_add",
+  "__sync_fetch_and_add_1",
+  "__sync_fetch_and_add_16",
+  "__sync_fetch_and_add_2",
+  "__sync_fetch_and_add_4",
+  "__sync_fetch_and_add_8",
+  "__sync_fetch_and_and",
+  "__sync_fetch_and_and_1",
+  "__sync_fetch_and_and_16",
+  "__sync_fetch_and_and_2",
+  "__sync_fetch_and_and_4",
+  "__sync_fetch_and_and_8",
+  "__sync_fetch_and_max",
+  "__sync_fetch_and_min",
+  "__sync_fetch_and_nand",
+  "__sync_fetch_and_nand_1",
+  "__sync_fetch_and_nand_16",
+  "__sync_fetch_and_nand_2",
+  "__sync_fetch_and_nand_4",
+  "__sync_fetch_and_nand_8",
+  "__sync_fetch_and_or",
+  "__sync_fetch_and_or_1",
+  "__sync_fetch_and_or_16",
+  "__sync_fetch_and_or_2",
+  "__sync_fetch_and_or_4",
+  "__sync_fetch_and_or_8",
+  "__sync_fetch_and_sub",
+  "__sync_fetch_and_sub_1",
+  "__sync_fetch_and_sub_16",
+  "__sync_fetch_and_sub_2",
+  "__sync_fetch_and_sub_4",
+  "__sync_fetch_and_sub_8",
+  "__sync_fetch_and_umax",
+  "__sync_fetch_and_umin",
+  "__sync_fetch_and_xor",
+  "__sync_fetch_and_xor_1",
+  "__sync_fetch_and_xor_16",
+  "__sync_fetch_and_xor_2",
+  "__sync_fetch_and_xor_4",
+  "__sync_fetch_and_xor_8",
+  "__sync_lock_release",
+  "__sync_lock_release_1",
+  "__sync_lock_release_16",
+  "__sync_lock_release_2",
+  "__sync_lock_release_4",
+  "__sync_lock_release_8",
+  "__sync_lock_test_and_set",
+  "__sync_lock_test_and_set_1",
+  "__sync_lock_test_and_set_16",
+  "__sync_lock_test_and_set_2",
+  "__sync_lock_test_and_set_4",
+  "__sync_lock_test_and_set_8",
+  "__sync_nand_and_fetch",
+  "__sync_nand_and_fetch_1",
+  "__sync_nand_and_fetch_16",
+  "__sync_nand_and_fetch_2",
+  "__sync_nand_and_fetch_4",
+  "__sync_nand_and_fetch_8",
+  "__sync_or_and_fetch",
+  "__sync_or_and_fetch_1",
+  "__sync_or_and_fetch_16",
+  "__sync_or_and_fetch_2",
+  "__sync_or_and_fetch_4",
+  "__sync_or_and_fetch_8",
+  "__sync_sub_and_fetch",
+  "__sync_sub_and_fetch_1",
+  "__sync_sub_and_fetch_16",
+  "__sync_sub_and_fetch_2",
+  "__sync_sub_and_fetch_4",
+  "__sync_sub_and_fetch_8",
+  "__sync_swap",
+  "__sync_swap_1",
+  "__sync_swap_16",
+  "__sync_swap_2",
+  "__sync_swap_4",
+  "__sync_swap_8",
+  "__sync_synchronize",
+  "__sync_val_compare_and_swap",
+  "__sync_val_compare_and_swap_1",
+  "__sync_val_compare_and_swap_16",
+  "__sync_val_compare_and_swap_2",
+  "__sync_val_compare_and_swap_4",
+  "__sync_val_compare_and_swap_8",
+  "__sync_xor_and_fetch",
+  "__sync_xor_and_fetch_1",
+  "__sync_xor_and_fetch_16",
+  "__sync_xor_and_fetch_2",
+  "__sync_xor_and_fetch_4",
+  "__sync_xor_and_fetch_8",
+  "__tanpi",
+  "__tanpif",
+  "__va_start",
+  "__xray_customevent",
+  "_abnormal_termination",
+  "_alloca",
+  "_byteswap_uint64",
+  "_byteswap_ulong",
+  "_byteswap_ushort",
+  "_exception_code",
+  "_exception_info",
+  "_exit",
+  "_interlockedbittestandset",
+  "_longjmp",
+  "_lrotl",
+  "_lrotr",
+  "_rotl",
+  "_rotl16",
+  "_rotl64",
+  "_rotl8",
+  "_rotr",
+  "_rotr16",
+  "_rotr64",
+  "_rotr8",
+  "_setjmp",
+  "_setjmpex",
+  "abort",
+  "abs",
+  "acos",
+  "acosf",
+  "acosh",
+  "acoshf",
+  "acoshl",
+  "acosl",
+  "alloca",
+  "asin",
+  "asinf",
+  "asinh",
+  "asinhf",
+  "asinhl",
+  "asinl",
+  "atan",
+  "atan2",
+  "atan2f",
+  "atan2l",
+  "atanf",
+  "atanh",
+  "atanhf",
+  "atanhl",
+  "atanl",
+  "bzero",
+  "cabs",
+  "cabsf",
+  "cabsl",
+  "cacos",
+  "cacosf",
+  "cacosh",
+  "cacoshf",
+  "cacoshl",
+  "cacosl",
+  "calloc",
+  "carg",
+  "cargf",
+  "cargl",
+  "casin",
+  "casinf",
+  "casinh",
+  "casinhf",
+  "casinhl",
+  "casinl",
+  "catan",
+  "catanf",
+  "catanh",
+  "catanhf",
+  "catanhl",
+  "catanl",
+  "cbrt",
+  "cbrtf",
+  "cbrtl",
+  "ccos",
+  "ccosf",
+  "ccosh",
+  "ccoshf",
+  "ccoshl",
+  "ccosl",
+  "ceil",
+  "ceilf",
+  "ceill",
+  "cexp",
+  "cexpf",
+  "cexpl",
+  "cimag",
+  "cimagf",
+  "cimagl",
+  "clog",
+  "clogf",
+  "clogl",
+  "commit_read_pipe",
+  "commit_write_pipe",
+  "conj",
+  "conjf",
+  "conjl",
+  "copysign",
+  "copysignf",
+  "copysignl",
+  "cos",
+  "cosf",
+  "cosh",
+  "coshf",
+  "coshl",
+  "cosl",
+  "cpow",
+  "cpowf",
+  "cpowl",
+  "cproj",
+  "cprojf",
+  "cprojl",
+  "creal",
+  "crealf",
+  "creall",
+  "csin",
+  "csinf",
+  "csinh",
+  "csinhf",
+  "csinhl",
+  "csinl",
+  "csqrt",
+  "csqrtf",
+  "csqrtl",
+  "ctan",
+  "ctanf",
+  "ctanh",
+  "ctanhf",
+  "ctanhl",
+  "ctanl",
+  "enqueue_kernel",
+  "erf",
+  "erfc",
+  "erfcf",
+  "erfcl",
+  "erff",
+  "erfl",
+  "exit",
+  "exp",
+  "exp2",
+  "exp2f",
+  "exp2l",
+  "expf",
+  "expl",
+  "expm1",
+  "expm1f",
+  "expm1l",
+  "fabs",
+  "fabsf",
+  "fabsl",
+  "fdim",
+  "fdimf",
+  "fdiml",
+  "finite",
+  "finitef",
+  "finitel",
+  "floor",
+  "floorf",
+  "floorl",
+  "fma",
+  "fmaf",
+  "fmal",
+  "fmax",
+  "fmaxf",
+  "fmaxl",
+  "fmin",
+  "fminf",
+  "fminl",
+  "fmod",
+  "fmodf",
+  "fmodl",
+  "fprintf",
+  "frexp",
+  "frexpf",
+  "frexpl",
+  "fscanf",
+  "get_kernel_max_sub_group_size_for_ndrange",
+  "get_kernel_preferred_work_group_size_multiple",
+  "get_kernel_sub_group_count_for_ndrange",
+  "get_kernel_work_group_size",
+  "get_pipe_max_packets",
+  "get_pipe_num_packets",
+  "getcontext",
+  "hypot",
+  "hypotf",
+  "hypotl",
+  "ilogb",
+  "ilogbf",
+  "ilogbl",
+  "index",
+  "isalnum",
+  "isalpha",
+  "isblank",
+  "iscntrl",
+  "isdigit",
+  "isgraph",
+  "islower",
+  "isprint",
+  "ispunct",
+  "isspace",
+  "isupper",
+  "isxdigit",
+  "labs",
+  "ldexp",
+  "ldexpf",
+  "ldexpl",
+  "lgamma",
+  "lgammaf",
+  "lgammal",
+  "llabs",
+  "llrint",
+  "llrintf",
+  "llrintl",
+  "llround",
+  "llroundf",
+  "llroundl",
+  "log",
+  "log10",
+  "log10f",
+  "log10l",
+  "log1p",
+  "log1pf",
+  "log1pl",
+  "log2",
+  "log2f",
+  "log2l",
+  "logb",
+  "logbf",
+  "logbl",
+  "logf",
+  "logl",
+  "longjmp",
+  "lrint",
+  "lrintf",
+  "lrintl",
+  "lround",
+  "lroundf",
+  "lroundl",
+  "malloc",
+  "memchr",
+  "memcmp",
+  "memcpy",
+  "memmove",
+  "memset",
+  "modf",
+  "modff",
+  "modfl",
+  "nan",
+  "nanf",
+  "nanl",
+  "nearbyint",
+  "nearbyintf",
+  "nearbyintl",
+  "nextafter",
+  "nextafterf",
+  "nextafterl",
+  "nexttoward",
+  "nexttowardf",
+  "nexttowardl",
+  "objc_assign_global",
+  "objc_assign_ivar",
+  "objc_assign_strongCast",
+  "objc_assign_weak",
+  "objc_enumerationMutation",
+  "objc_exception_extract",
+  "objc_exception_match",
+  "objc_exception_throw",
+  "objc_exception_try_enter",
+  "objc_exception_try_exit",
+  "objc_getClass",
+  "objc_getMetaClass",
+  "objc_msgSend",
+  "objc_msgSendSuper",
+  "objc_msgSendSuper_stret",
+  "objc_msgSend_fp2ret",
+  "objc_msgSend_fpret",
+  "objc_msgSend_stret",
+  "objc_read_weak",
+  "objc_sync_enter",
+  "objc_sync_exit",
+  "omp_is_initial_device",
+  "pow",
+  "powf",
+  "powl",
+  "printf",
+  "qsetjmp",
+  "read_pipe",
+  "realloc",
+  "remainder",
+  "remainderf",
+  "remainderl",
+  "remquo",
+  "remquof",
+  "remquol",
+  "reserve_read_pipe",
+  "reserve_write_pipe",
+  "rindex",
+  "rint",
+  "rintf",
+  "rintl",
+  "round",
+  "roundf",
+  "roundl",
+  "savectx",
+  "scalbln",
+  "scalblnf",
+  "scalblnl",
+  "scalbn",
+  "scalbnf",
+  "scalbnl",
+  "scanf",
+  "setjmp",
+  "setjmp_syscall",
+  "siglongjmp",
+  "sigsetjmp",
+  "sin",
+  "sinf",
+  "sinh",
+  "sinhf",
+  "sinhl",
+  "sinl",
+  "snprintf",
+  "sprintf",
+  "sqrt",
+  "sqrtf",
+  "sqrtl",
+  "sscanf",
+  "stpcpy",
+  "stpncpy",
+  "strcasecmp",
+  "strcat",
+  "strchr",
+  "strcmp",
+  "strcpy",
+  "strcspn",
+  "strdup",
+  "strerror",
+  "strlcat",
+  "strlcpy",
+  "strlen",
+  "strncasecmp",
+  "strncat",
+  "strncmp",
+  "strncpy",
+  "strndup",
+  "strpbrk",
+  "strrchr",
+  "strspn",
+  "strstr",
+  "strtok",
+  "strxfrm",
+  "sub_group_commit_read_pipe",
+  "sub_group_commit_write_pipe",
+  "sub_group_reserve_read_pipe",
+  "sub_group_reserve_write_pipe",
+  "tan",
+  "tanf",
+  "tanh",
+  "tanhf",
+  "tanhl",
+  "tanl",
+  "tgamma",
+  "tgammaf",
+  "tgammal",
+  "to_global",
+  "to_local",
+  "to_private",
+  "tolower",
+  "toupper",
+  "trunc",
+  "truncf",
+  "truncl",
+  "vfork",
+  "vfprintf",
+  "vfscanf",
+  "vprintf",
+  "vscanf",
+  "vsnprintf",
+  "vsprintf",
+  "vsscanf",
+  "wcschr",
+  "wcscmp",
+  "wcslen",
+  "wcsncmp",
+  "wmemchr",
+  "wmemcmp",
+  "work_group_commit_read_pipe",
+  "work_group_commit_write_pipe",
+  "work_group_reserve_read_pipe",
+  "work_group_reserve_write_pipe",
+  "write_pipe",
+};
+static const unsigned long NUM_KNOWN_BUILTINS =
+    sizeof(KNOWN_BUILTINS) /
+    sizeof(KNOWN_BUILTINS[0]);
diff --git a/client/closure_compiler.py b/client/closure_compiler.py
new file mode 100755
index 0000000..cce00fc
--- /dev/null
+++ b/client/closure_compiler.py
@@ -0,0 +1,95 @@
+#!/usr/bin/python
+
+# Copyright 2015 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""
+Compiles Javascript using Closure Compiler API.
+
+Usage:
+% closure_compiler.py <source.js> -o <output.js>
+
+See Also:
+https://developers.google.com/closure/compiler/docs/api-tutorial1
+"""
+
+import argparse
+import copy
+import sys
+import StringIO
+import urllib
+import urllib2
+import threading
+
+
+def Communicate(params):
+  """Communicate with Closure Compiler API."""
+  encoded = urllib.urlencode(params)
+  conn = urllib2.urlopen('http://closure-compiler.appspot.com/compile', encoded)
+  return conn.read()
+
+
+class CompileThread(threading.Thread):
+  """Thread for compiling."""
+
+  def __init__(self, params):
+    threading.Thread.__init__(self)
+    self.params = params
+    self._out = StringIO.StringIO()
+
+  def run(self):
+    data = Communicate(self.params)
+    if data:
+      self._out.write(data)
+
+  @property
+  def output(self):
+    return self._out.getvalue()
+
+  @property
+  def output_info(self):
+    return self.params['output_info']
+
+
+PARAMS = {
+    'js_code': open(sys.argv[1]).read(),
+    'compilation_level': 'SIMPLE_OPTIMIZATIONS',
+    'output_format': 'text',
+    'language': 'ECMASCRIPT5_STRICT',
+}
+
+
+def main():
+  parser = argparse.ArgumentParser(description='Closure Compiler')
+  parser.add_argument('file', help='a file to compile')
+  parser.add_argument('-o', '--output', help='output filename')
+  opts = parser.parse_args()
+
+  infos = ['errors', 'warnings', 'compiled_code']
+  threads = []
+  for output_info in infos:
+    params = copy.copy(PARAMS)
+    params['output_info'] = output_info
+    t = CompileThread(params)
+    t.start()
+    threads.append(t)
+
+  is_error = False
+  compiled_code = None
+  for t in threads:
+    t.join()
+    if t.output:
+      if t.output_info == 'compiled_code' and opts.output:
+        compiled_code = t.output
+        continue
+      if t.output_info == 'errors':
+        is_error = True
+      sys.stderr.write(t.output)
+  if compiled_code and not is_error:
+    with open(opts.output, 'w') as f:
+      f.write(t.output)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/client/compilation_database_reader.cc b/client/compilation_database_reader.cc
new file mode 100644
index 0000000..c5f1a64
--- /dev/null
+++ b/client/compilation_database_reader.cc
@@ -0,0 +1,262 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "compilation_database_reader.h"
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+#include <json/json.h>
+
+#include "cmdline_parser.h"
+#include "file_helper.h"
+#include "flag_parser.h"
+#include "path.h"
+#include "path_resolver.h"
+
+#ifndef _WIN32
+# include <unistd.h>
+#else
+# include "config_win.h"
+# include "posix_helper_win.h"
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+// From clang-tidy --help, compile_commands.json is searched
+// from -p directory (build path). If no build path is specified,
+// the directory in the first input file and its all parent paths.
+// static
+string CompilationDatabaseReader::FindCompilationDatabase(
+    StringPiece build_path, StringPiece first_input_file_dir) {
+  static const char kCompileCommandsJson[] = "compile_commands.json";
+
+  if (!build_path.empty()) {
+    string compdb_path = file::JoinPath(build_path, kCompileCommandsJson);
+    if (access(compdb_path.c_str(), R_OK) == 0) {
+      return compdb_path;
+    }
+    return string();
+  }
+
+  StringPiece dir = first_input_file_dir;
+  while (!dir.empty()) {
+    string s = file::JoinPath(dir, kCompileCommandsJson);
+
+    if (access(s.c_str(), R_OK) == 0) {
+      return s;
+    }
+
+    if (dir == file::Dirname(dir)) {
+      break;
+    }
+    dir = file::Dirname(dir);
+  }
+
+  return string();
+}
+
+// static
+bool CompilationDatabaseReader::MakeClangArgs(
+    const ClangTidyFlags& clang_tidy_flags,
+    const std::string& compdb_path,
+    std::vector<string>* clang_args,
+    string* build_dir) {
+  // Make clang command from clang-tidy command.
+  //
+  // If clang command line is specified after '--', we use it.
+  // When '--' is not specified, we need to check compile_commands.json.
+  //
+  // The current command order:
+  // With compilation database:
+  //   1. options in -extra-arg-before
+  //   2. options in compilation database
+  //   3. options in -extra-arg
+  // Without compilation database:
+  //   1. options in -extra-arg-before
+  //   2. options after '--'
+  //   3. options in -extra-arg
+  //   4. -c <input source file>
+
+  if (clang_tidy_flags.input_filenames().size() != 1) {
+    LOG(ERROR) << "No input source file or multiple source files. "
+               << "size=" << clang_tidy_flags.input_filenames().size();
+    return false;
+  }
+
+  // -x lang is set later for IncludeProcessor. So, it would be OK to use
+  // clang here.
+  const std::vector<string>& args = clang_tidy_flags.expanded_args();
+  clang_args->push_back(file::JoinPath(file::Dirname(args[0]), "clang"));
+
+  return MakeClangArgsFromCommandLine(
+      clang_tidy_flags.seen_hyphen_hyphen(),
+      clang_tidy_flags.args_after_hyphen_hyphen(),
+      clang_tidy_flags.input_filenames()[0],
+      clang_tidy_flags.cwd(),
+      clang_tidy_flags.build_path(),
+      clang_tidy_flags.extra_arg(),
+      clang_tidy_flags.extra_arg_before(),
+      compdb_path,
+      clang_args,
+      build_dir);
+}
+
+// static
+bool CompilationDatabaseReader::MakeClangArgsFromCommandLine(
+    bool seen_hyphen_hyphen,
+    const std::vector<string>& args_after_hyphen_hyphen,
+    const string& input_file,
+    const string& cwd,
+    const string& build_path,
+    const std::vector<string>& extra_arg,
+    const std::vector<string>& extra_arg_before,
+    const string& compdb_path,
+    std::vector<string>* clang_args,
+    string* build_dir) {
+
+  // clang_args should have a path to clang only.
+  DCHECK_EQ(1U, clang_args->size());
+
+  for (const auto& arg : extra_arg_before) {
+    clang_args->push_back(arg);
+  }
+
+  if (seen_hyphen_hyphen) {
+    // When '--' is seen, compilation database won't be read.
+    // In that case, we can consider the current directory is the build dir.
+
+    // Implementation note: args_after_hyphen_hyphen could be still empty.
+    // e.g. "clang-tidy foo.cc --"
+    // In this case, compilation database should be ignored.
+
+    *build_dir = cwd;
+    for (const auto& arg : args_after_hyphen_hyphen) {
+      clang_args->push_back(arg);
+    }
+  } else {
+    string source = file::JoinPathRespectAbsolute(cwd, input_file);
+
+    // TODO: Cache the content.
+    std::vector<string> new_compile_options;
+    bool compdb_successful = AddCompileOptions(
+        source, compdb_path, &new_compile_options, build_dir);
+    if (!compdb_successful) {
+      LOG(ERROR) << "compilation database is corrupted or no entry is found"
+                 << " for " << source;
+      return false;
+    }
+    for (const auto& arg : new_compile_options) {
+      clang_args->push_back(arg);
+    }
+  }
+
+  for (const auto& arg : extra_arg) {
+    clang_args->push_back(arg);
+  }
+
+  if (!args_after_hyphen_hyphen.empty()) {
+    clang_args->push_back("-c");
+    clang_args->push_back(input_file);
+  }
+
+  return true;
+}
+
+// static
+bool CompilationDatabaseReader::AddCompileOptions(
+    const string& source,
+    const string& db_path,
+    std::vector<string>* clang_args,
+    string* build_dir) {
+  if (db_path.empty()) {
+    // compile_commands.json is not found.
+    return false;
+  }
+
+  // TODO: Cache the parsed content.
+  string content;
+  if (!ReadFileToString(db_path, &content)) {
+    // couldn't read compile_commands.json
+    return false;
+  }
+
+  // compile_commands.json should be something like this:
+  // [
+  //  { "directory": "/home/user/llvm/build",
+  //    "command": "/usr/bin/clang++ -Irelative ...",
+  //    "file": "file.cc" },
+  //  ...
+  // ]
+
+  Json::Reader reader;
+  Json::Value root;
+  if (!reader.parse(content, root, false)) {
+    // couldn't parse json in compile_commands.json
+    return false;
+  }
+
+  if (!root.isArray()) {
+    return false;
+  }
+
+  string resolved_source = PathResolver::ResolvePath(source);
+
+  string command;
+  for (const auto& v : root) {
+    if (!v.isMember("directory") || !v["directory"].isString()
+        || !v.isMember("command") || !v["command"].isString()
+        || !v.isMember("file") || !v["file"].isString()) {
+      return false;
+    }
+
+    const string db_dir = v["directory"].asString();
+    const string db_command = v["command"].asString();
+    const string db_file = v["file"].asString();
+
+    string resolved_source_in_db =
+        PathResolver::ResolvePath(file::JoinPath(db_dir, db_file));
+
+    if (resolved_source == resolved_source_in_db) {
+      // Entry found.
+      *build_dir = db_dir;
+      command = db_command;
+      break;
+    }
+  }
+
+  if (command.empty()) {
+    // corresponding compilation entry is not found.
+    return false;
+  }
+
+  std::vector<string> argv;
+  ParsePosixCommandLineToArgv(command, &argv);
+
+  // When gomacc is used, compilation database might contain gomacc as the first
+  // argument. We need to skip it. Also we'd like to skip compiler itself, too.
+  // Note: when gomacc is prepended in compilation database command, and goma
+  // is not used, clang-tidy looks working well. (Otherwise, we need to change
+  // compile_commands.json content before sending goma server.)
+
+  // TODO: Might be better to remove -c and input files?
+  // It looks it won't change the result, though...
+
+  size_t init_pos = 1;
+  if (!argv.empty()) {
+    string argv0 = string(file::Stem(argv[0]));
+    std::transform(argv0.begin(), argv0.end(), argv0.begin(), ::tolower);
+    if (argv0 == "gomacc") {
+      init_pos = 2;
+    }
+  }
+  for (size_t i = init_pos; i < argv.size(); ++i) {
+    clang_args->push_back(argv[i]);
+  }
+
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/compilation_database_reader.h b/client/compilation_database_reader.h
new file mode 100644
index 0000000..e5f8296
--- /dev/null
+++ b/client/compilation_database_reader.h
@@ -0,0 +1,71 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_COMPILATION_DATABASE_READER_H_
+#define DEVTOOLS_GOMA_CLIENT_COMPILATION_DATABASE_READER_H_
+
+#include <string>
+#include <vector>
+
+#include "compiler_flags.h"
+#include "string_piece.h"
+
+namespace devtools_goma {
+
+// The implementation to read a compilation database (compile_commands.json).
+class CompilationDatabaseReader {
+ public:
+  CompilationDatabaseReader() = delete;
+  CompilationDatabaseReader(const CompilationDatabaseReader&) = delete;
+
+  CompilationDatabaseReader& operator=(
+      const CompilationDatabaseReader&) = delete;
+
+  // Finds compile_commands.json in |build_path|, or |dir| and its ancestors.
+  // The ancestors of |build_path| won't be searched.
+  //
+  // Returns the path to compile_commands.json.
+  // Return empty string if not found.
+  static std::string FindCompilationDatabase(
+      StringPiece build_path, StringPiece dir);
+
+  // Creates corresponding clang args from clang tidy flag for IncludeProcessor.
+  static bool MakeClangArgs(const ClangTidyFlags& clang_tidy_flags,
+                            const std::string& compdb_path,
+                            std::vector<std::string>* clang_args,
+                            std::string* build_dir);
+
+ private:
+  // Parses a compilation database at |db_path|, and add options to
+  // |clang_args|.
+  // Returns true if succeeded. |build_dir| will contain the directory in
+  // the compilation database entry.
+  // Returns false if parsing compilation database is failed or
+  // compilation entry for |source| is not found in the compilation database.
+  static bool AddCompileOptions(const std::string& source,
+                                const std::string& db_path,
+                                std::vector<std::string>* clang_args,
+                                std::string* build_dir);
+
+  // MakeClangArgs that does not depend on ClangTidyFlags.
+  // Note: When command line is "clang-tidy foo.cc --", compilation database
+  // should be ignored.
+  static bool MakeClangArgsFromCommandLine(
+      bool seen_hyphen_hyphen,
+      const std::vector<std::string>& args_after_hyphen_hyphen,
+      const string& input_file,
+      const std::string& cwd,
+      const std::string& build_path,
+      const std::vector<std::string>& extra_arg,
+      const std::vector<std::string>& extra_arg_before,
+      const std::string& compdb_path,
+      std::vector<std::string>* clang_args,
+      std::string* build_dir);
+
+  friend class CompilationDatabaseReaderTest;
+};
+
+} // namespace devtools_goma
+
+#endif // DEVTOOLS_GOMA_CLIENT_COMPILATION_DATABASE_READER_H_
diff --git a/client/compilation_database_reader_unittest.cc b/client/compilation_database_reader_unittest.cc
new file mode 100644
index 0000000..21f655f
--- /dev/null
+++ b/client/compilation_database_reader_unittest.cc
@@ -0,0 +1,262 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <limits.h>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+#include <gtest/gtest.h>
+#include <json/json.h>
+
+#include "compilation_database_reader.h"
+#include "file.h"
+#include "file_dir.h"
+#include "file_helper.h"
+#include "path.h"
+#include "unittest_util.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class CompilationDatabaseReaderTest : public testing::Test {
+ protected:
+  static bool AddCompileOptions(const std::string& source,
+                                const std::string& db_path,
+                                std::vector<std::string>* clang_args,
+                                string* build_dir) {
+    return CompilationDatabaseReader::AddCompileOptions(
+        source, db_path, clang_args, build_dir);
+  }
+
+  static bool MakeClangArgsFromCommandLine(
+      bool seen_hyphen_hyphen,
+      const std::vector<string>& args_after_hyphen_hyphen,
+      const string& input_file,
+      const string& cwd,
+      const string& build_path,
+      const std::vector<string>& extra_arg,
+      const std::vector<string>& extra_arg_before,
+      const string& compdb_path,
+      std::vector<string>* clang_args,
+      string* build_dir) {
+    return CompilationDatabaseReader::MakeClangArgsFromCommandLine(
+        seen_hyphen_hyphen, args_after_hyphen_hyphen, input_file, cwd,
+        build_path, extra_arg, extra_arg_before, compdb_path, clang_args,
+        build_dir);
+  }
+
+  static string MakeCompilationDatabaseContent(const string& directory,
+                                               const string& command,
+                                               const string& file) {
+    Json::Value comp;
+    comp["directory"] = directory;
+    comp["command"] = command;
+    comp["file"] = file;
+
+    Json::Value root;
+    root.append(comp);
+
+    Json::FastWriter writer;
+    return writer.write(root);
+  }
+};
+
+TEST_F(CompilationDatabaseReaderTest, FindCompilationDatabase) {
+  TmpdirUtil tmpdir("compdb_unittest_fcd");
+  tmpdir.SetCwd("/");
+
+  string ab_rel = file::JoinPath("a", "b");
+  string ab_abs = tmpdir.FullPath(ab_rel);
+
+  string compdb_content = MakeCompilationDatabaseContent(
+      ab_abs,
+      "clang -IA -IB -c foo.cc",
+      "foo.cc");
+
+  // The following directories and file are created.
+  // /a/b/
+  // /c/d/
+  //   /compile_commands.json
+
+  tmpdir.MkdirForPath(ab_rel, true);
+  tmpdir.MkdirForPath(file::JoinPath("c", "d"), true);
+  tmpdir.CreateTmpFile(file::JoinPath("c", "compile_commands.json"),
+                       compdb_content);
+
+  const string c_abs = tmpdir.FullPath("c");
+  const string cd_abs = tmpdir.FullPath(file::JoinPath("c", "d"));
+  const string expected_compdb_path =
+      file::JoinPath(c_abs, "compile_commands.json");
+
+  // Set build_path is /c, first input file dir is /a/b
+  {
+    string compdb_path =
+        CompilationDatabaseReader::FindCompilationDatabase(c_abs, ab_abs);
+    EXPECT_EQ(expected_compdb_path, compdb_path);
+  }
+
+  // Set build_path is empty, first input file dir is /c/d.
+  {
+    string compdb_path =
+        CompilationDatabaseReader::FindCompilationDatabase(string(), cd_abs);
+    EXPECT_EQ(expected_compdb_path, compdb_path);
+  }
+
+  // Set build_path id /c/d, first input file dir is /a/b.
+  // Since we shouldn't search ancestor directory of build_path,
+  // compilation database should not be found.
+  {
+    string dbpath =
+        CompilationDatabaseReader::FindCompilationDatabase(cd_abs, ab_abs);
+    EXPECT_TRUE(dbpath.empty());
+  }
+}
+
+TEST_F(CompilationDatabaseReaderTest, WithCompilationDatabase) {
+  TmpdirUtil tmpdir("compdb_unittest");
+  tmpdir.SetCwd("/");
+
+  // Make the following directories and files, and move cwd to /a/b.
+  // /a/b/
+  // /compile_commands.json
+
+  string ab_rel = file::JoinPath("a", "b");
+  string ab_abs = tmpdir.FullPath(ab_rel);
+
+  tmpdir.MkdirForPath(ab_rel, true);
+
+  string compdb_content = MakeCompilationDatabaseContent(
+      ab_abs,
+      "clang -IA -IB -c foo.cc",
+      "foo.cc");
+  tmpdir.CreateTmpFile("compile_commands.json", compdb_content);
+
+  string compile_commands_json = tmpdir.FullPath("compile_commands.json");
+
+  string dbpath =
+      CompilationDatabaseReader::FindCompilationDatabase("", ab_abs);
+  EXPECT_EQ(compile_commands_json, dbpath);
+
+  tmpdir.SetCwd(ab_rel);
+  string foo_path = tmpdir.FullPath("foo.cc");
+
+  std::vector<string> clang_args {
+    "clang++",
+  };
+  string build_dir;
+  EXPECT_TRUE(AddCompileOptions(foo_path, dbpath, &clang_args, &build_dir));
+
+  std::vector<string> expected_clang_args {
+    "clang++", "-IA", "-IB", "-c", "foo.cc"
+  };
+  EXPECT_EQ(expected_clang_args, clang_args);
+  EXPECT_EQ(ab_abs, build_dir);
+}
+
+TEST_F(CompilationDatabaseReaderTest, WithCompilationDatabaseHavingGomaCC) {
+  TmpdirUtil tmpdir("compdb_unittest");
+  tmpdir.SetCwd("/");
+
+  // Make the following directories and files, and move cwd to /a/b.
+  // /a/b/
+  // /compile_commands.json
+
+  string ab_rel = file::JoinPath("a", "b");
+  string ab_abs = tmpdir.FullPath(ab_rel);
+
+  tmpdir.MkdirForPath(ab_rel, true);
+
+  string compdb_content = MakeCompilationDatabaseContent(
+      ab_abs,
+      "/home/goma/goma/gomacc clang -IA -IB -c foo.cc",
+      "foo.cc");
+  tmpdir.CreateTmpFile("compile_commands.json", compdb_content);
+
+  string compile_commands_json = tmpdir.FullPath("compile_commands.json");
+
+  string dbpath =
+      CompilationDatabaseReader::FindCompilationDatabase("", ab_abs);
+  EXPECT_EQ(compile_commands_json, dbpath);
+
+  tmpdir.SetCwd(ab_rel);
+  string foo_path = tmpdir.FullPath("foo.cc");
+
+  std::vector<string> clang_args {
+    "clang++",
+  };
+  string build_dir;
+  EXPECT_TRUE(AddCompileOptions(foo_path, dbpath, &clang_args, &build_dir));
+
+  std::vector<string> expected_clang_args {
+    "clang++", "-IA", "-IB", "-c", "foo.cc"
+  };
+  EXPECT_EQ(expected_clang_args, clang_args);
+  EXPECT_EQ(ab_abs, build_dir);
+}
+
+TEST_F(CompilationDatabaseReaderTest,
+       WithCompilationDatabaseHavingGomaCCCapitalCaseWithExtension) {
+  TmpdirUtil tmpdir("compdb_unittest");
+  tmpdir.SetCwd("/");
+
+  // Make the following directories and files, and move cwd to /a/b.
+  // /a/b/
+  // /compile_commands.json
+
+  string ab_rel = file::JoinPath("a", "b");
+  string ab_abs = tmpdir.FullPath(ab_rel);
+
+  tmpdir.MkdirForPath(ab_rel, true);
+
+  string compdb_content = MakeCompilationDatabaseContent(
+      ab_abs,
+      "/home/goma/goma/GOMACC.exe clang -IA -IB -c foo.cc",
+      "foo.cc");
+  tmpdir.CreateTmpFile("compile_commands.json", compdb_content);
+
+  string compile_commands_json = tmpdir.FullPath("compile_commands.json");
+
+  string dbpath =
+      CompilationDatabaseReader::FindCompilationDatabase("", ab_abs);
+  EXPECT_EQ(compile_commands_json, dbpath);
+
+  tmpdir.SetCwd(ab_rel);
+  string foo_path = tmpdir.FullPath("foo.cc");
+
+  std::vector<string> clang_args {
+    "clang++",
+  };
+  string build_dir;
+  EXPECT_TRUE(AddCompileOptions(foo_path, dbpath, &clang_args, &build_dir));
+
+  std::vector<string> expected_clang_args {
+    "clang++", "-IA", "-IB", "-c", "foo.cc"
+  };
+  EXPECT_EQ(expected_clang_args, clang_args);
+  EXPECT_EQ(ab_abs, build_dir);
+}
+
+TEST_F(CompilationDatabaseReaderTest, WithoutCompilationDatabase) {
+  std::vector<string> args_after_hyphen_hyphen { "-IA", "-IB" };
+  string cwd = "/";
+  std::vector<string> extra_arg { "-IC" };
+  std::vector<string> extra_arg_before { "-ID" };
+
+  std::vector<string> clang_args { "clang" };
+  string build_dir;
+  EXPECT_TRUE(MakeClangArgsFromCommandLine(true, args_after_hyphen_hyphen,
+                                           "foo.cc", cwd, "", extra_arg,
+                                           extra_arg_before,
+                                           "", &clang_args, &build_dir));
+
+  std::vector<string> expected_clang_args {
+    "clang", "-ID", "-IA", "-IB", "-IC", "-c", "foo.cc"
+  };
+
+  EXPECT_EQ(expected_clang_args, clang_args);
+  EXPECT_EQ(cwd, build_dir);
+}
+
+}  // namespace devtools_goma
diff --git a/client/compile_service.cc b/client/compile_service.cc
new file mode 100644
index 0000000..5546cf3
--- /dev/null
+++ b/client/compile_service.cc
@@ -0,0 +1,1928 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "compile_service.h"
+
+#ifndef _WIN32
+#include <sys/types.h>
+#else
+#include "config_win.h"
+#endif
+
+#include <algorithm>
+#include <deque>
+#include <fstream>
+#include <memory>
+#include <sstream>
+
+#include <json/json.h>
+
+#include "atomic_stats_counter.h"
+#include "autolock_timer.h"
+#include "auto_updater.h"
+#include "callback.h"
+#include "compile_stats.h"
+#include "compile_task.h"
+#include "compiler_flags.h"
+#include "compiler_proxy_histogram.h"
+#include "compiler_proxy_info.h"
+#include "deps_cache.h"
+#include "file_hash_cache.h"
+#include "file_helper.h"
+#include "file_id.h"
+#include "glog/logging.h"
+#include "goma_file_http.h"
+#include "goma_hash.h"
+#include "google/protobuf/util/json_util.h"
+#include "http.h"
+#include "http_rpc.h"
+#include "include_cache.h"
+#include "include_processor.h"
+#include "ioutil.h"
+#include "join.h"
+#include "local_output_cache.h"
+#include "lockhelper.h"
+#include "log_service_client.h"
+#include "machine_info.h"
+#include "multi_http_rpc.h"
+#include "mypath.h"
+#include "path.h"
+#include "path_resolver.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/error_notice.pb.h"
+#include "prototmp/goma_stats.pb.h"
+#include "prototmp/goma_statz_stats.pb.h"
+MSVC_POP_WARNING()
+#include "split.h"
+#include "string_piece_utils.h"
+#include "timestamp.h"
+#include "util.h"
+#include "watchdog.h"
+#include "worker_thread_manager.h"
+
+#ifdef _WIN32
+# include "file_helper.h"
+#endif
+
+namespace {
+// Path separators are platform dependent
+#ifndef _WIN32
+const char* kSep = ":";
+const char* kCurrentDir = ":.:";
+#else
+const char* kSep = ";";
+const char* kCurrentDir = ";.;";
+#endif
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+class CompareTaskHandlerTime {
+ public:
+  bool operator()(CompileTask* a, CompileTask* b) const {
+    return a->stats().handler_time() > b->stats().handler_time();
+  }
+};
+
+CompileService::RpcController::RpcController(
+     ThreadpoolHttpServer::HttpServerRequest* http_server_request)
+  : http_server_request_(http_server_request),
+    server_port_(http_server_request->server().port()),
+#ifdef _WIN32
+    multi_rpc_(nullptr),
+#endif
+    gcc_req_size_(0),
+    gcc_resp_size_(nullptr) {
+  DCHECK(http_server_request_ != nullptr);
+}
+
+CompileService::RpcController::~RpcController() {
+  DCHECK(http_server_request_ == nullptr);
+}
+
+#ifdef _WIN32
+void CompileService::RpcController::AttachMultiRpcController(
+    CompileService::MultiRpcController* multi_rpc) {
+  CHECK_EQ(gcc_req_size_, 0U);
+  multi_rpc_ = multi_rpc;
+  http_server_request_ = nullptr;
+}
+#endif
+
+// Returns true if header looks like a request coming from browser.
+// see also goma_ipc.cc:GomaIPC::SendRequest.
+bool IsBrowserRequest(StringPiece header) {
+  if (header.find("\r\nHost: 0.0.0.0\r\n") != StringPiece::npos) {
+    return false;
+  }
+  // TODO: check it doesn't contain Origin header etc?
+  return true;
+}
+
+bool CompileService::RpcController::ParseRequest(ExecReq* req) {
+  StringPiece header = http_server_request_->header();
+  if (http_server_request_->request_content_length() <= 0) {
+    LOG(WARNING) << "Invalid request from client (no content-length):"
+                 << header;
+    return false;
+  }
+  // it won't protect request by using network communications API.
+  // https://developer.chrome.com/apps/app_network
+  if (IsBrowserRequest(header)) {
+    LOG(WARNING) << "Unallowed request from browser:" << header;
+    return false;
+  }
+  if (header.find("\r\nContent-Type: binary/x-protocol-buffer\r\n") ==
+      StringPiece::npos) {
+    LOG(WARNING) << "Invalid request from client (invalid content-type):"
+                 << header;
+    return false;
+  }
+
+  gcc_req_size_ = http_server_request_->request_content_length();
+  return req->ParseFromArray(
+        http_server_request_->request_content(),
+        http_server_request_->request_content_length());
+}
+
+void CompileService::RpcController::SendReply(const ExecResp& resp) {
+  CHECK(http_server_request_ != nullptr);
+
+  size_t gcc_resp_size = resp.ByteSize();
+  std::ostringstream http_response_message;
+  http_response_message
+    << "HTTP/1.1 200 OK\r\n"
+    << "Content-Type: binary/x-protocol-buffer\r\n"
+    << "Content-Length: " << gcc_resp_size << "\r\n\r\n";
+  string response_string = http_response_message.str();
+  int header_size = response_string.size();
+  response_string.resize(header_size + gcc_resp_size);
+  resp.SerializeToArray(&response_string[header_size], gcc_resp_size);
+  http_server_request_->SendReply(response_string);
+  http_server_request_ = nullptr;
+  if (gcc_resp_size_ != nullptr)
+    *gcc_resp_size_ = gcc_resp_size;
+}
+
+void CompileService::RpcController::NotifyWhenClosed(OneshotClosure* callback) {
+#ifdef _WIN32
+  if (multi_rpc_) {
+    multi_rpc_->NotifyWhenClosed(callback);
+    return;
+  }
+#endif
+  CHECK(http_server_request_ != nullptr);
+  http_server_request_->NotifyWhenClosed(callback);
+}
+
+#ifdef _WIN32
+CompileService::MultiRpcController::MultiRpcController(
+    WorkerThreadManager* wm,
+    ThreadpoolHttpServer::HttpServerRequest* http_server_request)
+    : wm_(wm),
+      caller_thread_id_(wm->GetCurrentThreadId()),
+      http_server_request_(http_server_request),
+      resp_(new MultiExecResp),
+      ALLOW_THIS_IN_INITIALIZER_LIST(
+          closed_callback_(NewCallback(
+              this, &CompileService::MultiRpcController::RequestClosed))),
+      gcc_req_size_(0) {
+  DCHECK(http_server_request_ != nullptr);
+  http_server_request_->NotifyWhenClosed(closed_callback_);
+}
+
+CompileService::MultiRpcController::~MultiRpcController() {
+  DCHECK(http_server_request_ == nullptr);
+  CHECK(rpcs_.empty());
+  CHECK_EQ(caller_thread_id_, wm_->GetCurrentThreadId());
+}
+
+bool CompileService::MultiRpcController::ParseRequest(MultiExecReq* req) {
+  CHECK_EQ(caller_thread_id_, wm_->GetCurrentThreadId());
+  if (http_server_request_->request_content_length() <= 0) {
+    LOG(WARNING) << "Invalid request from client (no content-length):"
+                 << http_server_request_->request();
+    return false;
+  }
+  gcc_req_size_ = http_server_request_->request_content_length();
+  bool ok = req->ParseFromArray(
+        http_server_request_->request_content(),
+        http_server_request_->request_content_length());
+  if (ok) {
+    for (int i = 0; i < req->req_size(); ++i) {
+      CompileService::RpcController* rpc =
+          new CompileService::RpcController(http_server_request_);
+      rpc->AttachMultiRpcController(this);
+      rpcs_.push_back(rpc);
+      resp_->add_response();
+    }
+    CHECK_EQ(req->req_size(), static_cast<int>(rpcs_.size()));
+    CHECK_EQ(req->req_size(), resp_->response_size());
+  }
+  return ok;
+}
+
+CompileService::RpcController* CompileService::MultiRpcController::rpc(
+    int i) const {
+  CHECK_EQ(caller_thread_id_, wm_->GetCurrentThreadId());
+  DCHECK_GE(i, 0);
+  DCHECK_LT(i, static_cast<int>(rpcs_.size()));
+  return rpcs_[i];
+}
+
+ExecResp* CompileService::MultiRpcController::mutable_resp(int i) const {
+  CHECK_EQ(caller_thread_id_, wm_->GetCurrentThreadId());
+  DCHECK_GE(i, 0);
+  DCHECK_LT(i, resp_->response_size());
+  return resp_->mutable_response(i)->mutable_resp();
+}
+
+bool CompileService::MultiRpcController::ExecDone(int i) {
+  CHECK_EQ(caller_thread_id_, wm_->GetCurrentThreadId());
+  DCHECK_GE(i, 0);
+  DCHECK_LT(i, static_cast<int>(rpcs_.size()));
+  DCHECK(rpcs_[i] != nullptr);
+  delete rpcs_[i];
+  rpcs_[i] = nullptr;
+  for (const auto* rpc : rpcs_) {
+    if (rpc != nullptr)
+      return false;
+  }
+  rpcs_.clear();
+  return true;
+}
+
+void CompileService::MultiRpcController::SendReply() {
+  CHECK_EQ(caller_thread_id_, wm_->GetCurrentThreadId());
+  CHECK(http_server_request_ != nullptr);
+  CHECK(rpcs_.empty());
+
+  size_t gcc_resp_size = resp_->ByteSize();
+  std::ostringstream http_response_message;
+  http_response_message
+    << "HTTP/1.1 200 OK\r\n"
+    << "Content-Type: binary/x-protocol-buffer\r\n"
+    << "Content-Length: " << gcc_resp_size << "\r\n\r\n";
+  string response_string = http_response_message.str();
+  int header_size = response_string.size();
+  response_string.resize(header_size + gcc_resp_size);
+  resp_->SerializeToArray(&response_string[header_size], gcc_resp_size);
+  http_server_request_->SendReply(response_string);
+  http_server_request_ = nullptr;
+}
+
+void CompileService::MultiRpcController::NotifyWhenClosed(
+    OneshotClosure* callback) {
+  // This might be called on the different thread than caller_thread_id_.
+  {
+    AUTOLOCK(lock, &mu_);
+    if (closed_callback_ != nullptr) {
+      closed_callbacks_.emplace_back(wm_->GetCurrentThreadId(), callback);
+      return;
+    }
+  }
+  // closed_callback_ has been called, that is, http_server_request_
+  // was already closed, so runs callback now on the same thread.
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      wm_->GetCurrentThreadId(), callback,
+      WorkerThreadManager::PRIORITY_IMMEDIATE);
+}
+
+void CompileService::MultiRpcController::RequestClosed() {
+  std::vector<std::pair<WorkerThreadManager::ThreadId,
+                        OneshotClosure*>> callbacks;
+  {
+    AUTOLOCK(lock, &mu_);
+    closed_callback_ = nullptr;
+    callbacks.swap(closed_callbacks_);
+  }
+  for (const auto& callback : callbacks) {
+    wm_->RunClosureInThread(FROM_HERE, callback.first,
+                            callback.second,
+                            WorkerThreadManager::PRIORITY_IMMEDIATE);
+  }
+}
+#endif
+
+CompileService::CompileService(WorkerThreadManager* wm)
+    : wm_(wm),
+      quit_(false),
+      task_id_(0),
+      cond_(&mu_),
+      max_active_tasks_(1000),
+      max_finished_tasks_(1000),
+      max_failed_tasks_(1000),
+      max_long_tasks_(50),
+      username_(GetUsername()),
+      nodename_(GetNodename()),
+      start_time_(time(nullptr)),
+      compiler_info_builder_(new CompilerInfoBuilder),
+      compiler_info_pool_(wm_->StartPool(1, "compiler_info")),
+      file_hash_cache_(new FileHashCache),
+      include_processor_pool_(WorkerThreadManager::kFreePool),
+      histogram_(new CompilerProxyHistogram),
+      need_to_send_content_(false),
+      new_file_threshold_(60),
+      enable_gch_hack_(true),
+      use_relative_paths_in_argv_(false),
+      hermetic_(false),
+      hermetic_fallback_(false),
+      dont_kill_subprocess_(false),
+      max_subprocs_pending_(0),
+      local_run_preference_(0),
+      local_run_for_failed_input_(false),
+      local_run_delay_msec_(0),
+      store_local_run_output_(false),
+      enable_remote_link_(false),
+      num_exec_request_(0),
+      num_exec_success_(0),
+      num_exec_failure_(0),
+      num_exec_compiler_proxy_failure_(0),
+      num_exec_goma_finished_(0),
+      num_exec_goma_cache_hit_(0),
+      num_exec_goma_local_cache_hit_(0),
+      num_exec_goma_aborted_(0),
+      num_exec_goma_retry_(0),
+      num_exec_local_run_(0),
+      num_exec_local_killed_(0),
+      num_exec_local_finished_(0),
+      num_exec_fail_fallback_(0),
+      num_file_requested_(0),
+      num_file_uploaded_(0),
+      num_file_missed_(0),
+      num_file_output_(0),
+      num_file_rename_output_(0),
+      num_file_output_buf_(0),
+      num_include_processor_total_files_(0),
+      num_include_processor_skipped_files_(0),
+      include_processor_total_wait_time_(0),
+      include_processor_total_run_time_(0),
+      cur_sum_output_size_(0),
+      max_sum_output_size_(0),
+      req_sum_output_size_(0),
+      peak_req_sum_output_size_(0),
+      can_send_user_info_(false),
+      allowed_network_error_duration_in_sec_(-1),
+      num_active_fail_fallback_tasks_(0),
+      max_active_fail_fallback_tasks_(-1),
+      allowed_max_active_fail_fallback_duration_in_sec_(-1),
+      reached_max_active_fail_fallback_time_(0),
+      num_forced_fallback_in_setup_{},
+      max_compiler_disabled_tasks_(-1) {
+  if (username_.empty() || username_ == "unknown") {
+    LOG(WARNING) << "Failed to obtain username:" << username_;
+  }
+  tmp_dir_ = GetGomaTmpDir();
+}
+
+CompileService::~CompileService() {
+  ClearTasksUnlocked();
+}
+
+void CompileService::SetActiveTaskThrottle(int max_active_tasks) {
+  AUTOLOCK(lock, &mu_);
+  max_active_tasks_ = max_active_tasks;
+}
+
+void CompileService::SetCompileTaskHistorySize(
+    int max_finished_tasks, int max_failed_tasks, int max_long_tasks) {
+  AUTOLOCK(lock, &mu_);
+  max_finished_tasks_ = max_finished_tasks;
+  max_failed_tasks_ = max_failed_tasks;
+  max_long_tasks_ = max_long_tasks;
+}
+
+void CompileService::SetCompilerProxyIdPrefix(const string& prefix) {
+  AUTOLOCK(lock, &mu_);
+  if (!compiler_proxy_id_prefix_.empty()) {
+    LOG_IF(WARNING, compiler_proxy_id_prefix_ != prefix)
+        << "different compiler_proxy_id_prefix:"
+        << compiler_proxy_id_prefix_
+        << " " << prefix;
+    return;
+  }
+  compiler_proxy_id_prefix_ = prefix;
+  LOG(INFO) << "compiler_proxy_id_prefix:" << compiler_proxy_id_prefix_;
+}
+
+void CompileService::SetSubProcessOptionSetter(
+    std::unique_ptr<SubProcessOptionSetter> option_setter) {
+  subprocess_option_setter_ = std::move(option_setter);
+}
+
+void CompileService::SetHttpClient(std::unique_ptr<HttpClient> http_client) {
+  http_client_ = std::move(http_client);
+}
+
+void CompileService::SetHttpRPC(std::unique_ptr<HttpRPC> http_rpc) {
+  http_rpc_ = std::move(http_rpc);
+}
+
+void CompileService::SetExecServiceClient(
+    std::unique_ptr<ExecServiceClient> exec_service_client) {
+  exec_service_client_ = std::move(exec_service_client);
+}
+
+void CompileService::SetMultiFileStore(
+    std::unique_ptr<MultiFileStore> multi_file_store) {
+  multi_file_store_ = std::move(multi_file_store);
+}
+
+void CompileService::SetFileServiceHttpClient(
+    std::unique_ptr<FileServiceHttpClient> file_service) {
+  file_service_ = std::move(file_service);
+}
+
+void CompileService::StartIncludeProcessorWorkers(int num_threads) {
+  if (num_threads <= 0) {
+    return;
+  }
+  include_processor_pool_ = wm_->StartPool(num_threads, "include_processor");
+  LOG(INFO) << "include_processor_pool=" << include_processor_pool_
+            << " num_thread=" << num_threads;
+}
+
+void CompileService::SetLogServiceClient(
+    std::unique_ptr<LogServiceClient> log_service_client) {
+  log_service_client_ = std::move(log_service_client);
+}
+
+void CompileService::SetAutoUpdater(std::unique_ptr<AutoUpdater> auto_updater) {
+  auto_updater_ = std::move(auto_updater);
+}
+
+void CompileService::SetWatchdog(std::unique_ptr<Watchdog> watchdog,
+                                 const std::vector<string>& goma_ipc_env) {
+  watchdog_ = std::move(watchdog);
+  watchdog_->SetTarget(this, goma_ipc_env);
+}
+
+void CompileService::SetTimeoutSecs(const std::vector<int>& timeout_secs) {
+  copy(timeout_secs.begin(), timeout_secs.end(), back_inserter(timeout_secs_));
+}
+
+void CompileService::Exec(
+    RpcController* rpc,
+    const ExecReq* req, ExecResp* resp,
+    OneshotClosure* done) {
+  CompileTask* task = nullptr;
+  // done will be called on this thread when Exec done.
+  OneshotClosure* callback = NewCallback(this, &CompileService::ExecDone,
+                                         wm_->GetCurrentThreadId(), done);
+  {
+    int task_id = 0;
+    {
+      AUTOLOCK(lock, &task_id_mu_);
+      task_id = task_id_++;
+    }
+
+    task = new CompileTask(this, task_id);
+    task->mutable_stats()->gcc_req_size = rpc->gcc_req_size_;
+    rpc->gcc_resp_size_ = &task->mutable_stats()->gcc_resp_size;
+    task->Init(rpc, req, resp, callback);
+
+    AUTOLOCK(lock, &mu_);
+    if (static_cast<int>(active_tasks_.size()) >= max_active_tasks_) {
+      LOG(INFO) << task->trace_id() << " pending";
+      pending_tasks_.push_back(task);
+      return;
+    }
+    active_tasks_.insert(task);
+    ++num_exec_request_;
+  }
+  // Starts handling RPC requests.
+  // When response to gomacc is ready, ExecDone will be called on tasks' thread
+  // and done callback will be called on this thread.
+  // When all activities of task is finished, CompileTaskDone will be called
+  // on task's thread.
+  wm_->RunClosure(
+      FROM_HERE,
+      NewCallback(task, &CompileTask::Start),
+      WorkerThreadManager::PRIORITY_LOW);
+}
+
+void CompileService::ExecDone(WorkerThreadManager::ThreadId thread_id,
+                              OneshotClosure* done) {
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id,
+      NewCallback(static_cast<Closure*>(done), &Closure::Run),
+      WorkerThreadManager::PRIORITY_HIGH);
+}
+
+void CompileService::CompileTaskDone(CompileTask* task) {
+  task->SetFrozenTimestampMs(GetCurrentTimestampMs());
+  histogram_->UpdateCompileStat(task->stats());
+  if (log_service_client_.get())
+    log_service_client_->SaveExecLog(task->stats());
+
+  std::vector<CompileTask*> start_tasks;
+  std::vector<CompileTask*> deref_tasks;
+  {
+    AUTOLOCK(lock, &mu_);
+
+    active_tasks_.erase(task);
+    int num_start_tasks =
+        max_active_tasks_ - static_cast<int>(active_tasks_.size());
+    if (!pending_tasks_.empty()) {
+      LOG(INFO) << "Run at most " << num_start_tasks << " pending_tasks "
+                << "(active=" << active_tasks_.size()
+                << " max=" << max_active_tasks_
+                << " pending=" << pending_tasks_.size() << ")";
+    }
+    for (int i = 0; i < num_start_tasks && !pending_tasks_.empty(); ++i) {
+      CompileTask* start_task = pending_tasks_.front();
+      pending_tasks_.pop_front();
+      active_tasks_.insert(start_task);
+      start_tasks.push_back(start_task);
+      ++num_exec_request_;
+    }
+    finished_tasks_.push_front(task);
+    if (static_cast<int>(finished_tasks_.size()) > max_finished_tasks_) {
+      deref_tasks.push_back(finished_tasks_.back());
+      finished_tasks_.pop_back();
+    }
+    num_include_processor_total_files_ +=
+        task->stats().include_preprocess_total_files();
+    num_include_processor_skipped_files_ +=
+        task->stats().include_preprocess_skipped_files();
+    include_processor_total_wait_time_ +=
+        task->stats().include_processor_wait_time();
+    include_processor_total_run_time_ +=
+        task->stats().include_processor_run_time();
+
+    switch (task->state()) {
+      case CompileTask::FINISHED:
+        ++num_exec_goma_finished_;
+        if (task->local_cache_hit()) {
+          ++num_exec_goma_local_cache_hit_;
+        } else if (task->cache_hit()) {
+          ++num_exec_goma_cache_hit_;
+        }
+        break;
+      case CompileTask::LOCAL_FINISHED:
+        ++num_exec_local_finished_;
+        break;
+      default:
+        CHECK(task->abort());
+        ++num_exec_goma_aborted_;
+        break;
+    }
+    num_exec_goma_retry_ += task->stats().exec_request_retry();
+
+    num_file_requested_ += task->stats().num_total_input_file();
+    num_file_uploaded_ +=
+        SumRepeatedInt32(task->stats().num_uploading_input_file());
+    num_file_missed_ +=
+        SumRepeatedInt32(task->stats().num_missing_input_file());
+
+    if (task->local_run()) {
+      ++num_exec_local_run_;
+      ++local_run_reason_[task->stats().local_run_reason()];
+    }
+    if (task->local_killed()) {
+      ++num_exec_local_killed_;
+    }
+    if ((task->failed() || task->fail_fallback()) && !task->canceled()) {
+      if (task->failed())
+        ++num_exec_failure_;
+      if (task->fail_fallback()) {
+        ++num_exec_fail_fallback_;
+        --num_active_fail_fallback_tasks_;
+        DCHECK_GE(num_active_fail_fallback_tasks_, 0);
+        if (num_active_fail_fallback_tasks_ <=
+            max_active_fail_fallback_tasks_) {
+          LOG_IF(INFO, reached_max_active_fail_fallback_time_ != 0)
+              << "clearing reached_max_active_fail_fallback_time.";
+          reached_max_active_fail_fallback_time_ = 0;
+        }
+      }
+      if (task->stats().compiler_proxy_error())
+        ++num_exec_compiler_proxy_failure_;
+      task->Ref();
+      failed_tasks_.push_front(task);
+      if (static_cast<int>(failed_tasks_.size()) > max_failed_tasks_) {
+        deref_tasks.push_back(failed_tasks_.back());
+        failed_tasks_.pop_back();
+      }
+    } else {
+      ++num_exec_success_;
+    }
+
+    bool is_longest = false;
+    if (static_cast<int>(long_tasks_.size()) < max_long_tasks_) {
+      task->Ref();
+      long_tasks_.push_back(task);
+      is_longest = true;
+    } else if (task->stats().handler_time() >
+               long_tasks_[0]->stats().handler_time()) {
+      pop_heap(long_tasks_.begin(), long_tasks_.end(),
+               CompareTaskHandlerTime());
+      deref_tasks.push_back(long_tasks_.back());
+      task->Ref();
+      long_tasks_.back() = task;
+      is_longest = true;
+    }
+    if (is_longest) {
+      // Create heap on long_tasks_.
+      // long_tasks_[0] should have shortest handler time in longest_tasks_.
+      push_heap(long_tasks_.begin(), long_tasks_.end(),
+                CompareTaskHandlerTime());
+    }
+
+    cond_.Signal();
+  }
+  for (auto* start_task : start_tasks) {
+    wm_->RunClosure(
+        FROM_HERE,
+        NewCallback(start_task, &CompileTask::Start),
+        WorkerThreadManager::PRIORITY_LOW);
+  }
+  for (auto* deref_task : deref_tasks) {
+    deref_task->Deref();
+  }
+}
+
+void CompileService::Quit() {
+  {
+    AUTOLOCK(lock, &quit_mu_);
+    quit_ = true;
+  }
+  auto_updater_->Stop();
+  if (log_service_client_.get())
+    log_service_client_->Flush();
+#ifndef GLOG_NO_ABBREVIATED_SEVERITIES
+  google::FlushLogFiles(google::INFO);
+#else
+  google::FlushLogFiles(google::GLOG_INFO);
+#endif
+}
+
+bool CompileService::quit() const {
+  AUTOLOCK(lock, &quit_mu_);
+  return quit_;
+}
+
+void CompileService::Wait() {
+  // Sends logs before shutting down http rpc.
+  if (log_service_client_.get())
+    log_service_client_->Flush();
+
+  auto_updater_->Wait();
+  http_client_->Shutdown();
+  wm_->Shutdown();
+  {
+    AUTOLOCK(lock, &mu_);
+    LOG(INFO) << "Waiting all active tasks finished....";
+    while (!pending_tasks_.empty() || !active_tasks_.empty()) {
+      LOG(INFO) << "pending_tasks=" << pending_tasks_.size()
+                << "active_tasks=" << active_tasks_.size();
+      cond_.Wait();
+    }
+  }
+  CHECK(active_tasks_.empty());
+  if (log_service_client_.get())
+    log_service_client_->Wait();
+  log_service_client_.reset();
+  histogram_.reset();
+  file_hash_cache_.reset();
+  if (multi_file_store_.get())
+    multi_file_store_->Wait();
+  file_service_.reset();
+  exec_service_client_.reset();
+
+  // Stop all HttpClient tasks before resetting http_rpc_ b/26551623
+  http_client_->WaitNoActive();
+  http_rpc_.reset();
+  http_client_.reset();
+  watchdog_.reset();
+}
+
+bool CompileService::DumpTask(int task_id, string* out) {
+  AUTOLOCK(lock, &mu_);
+  const CompileTask* task = FindTaskByIdUnlocked(task_id, true);
+  if (task == nullptr)
+    return false;
+  Json::Value json;
+  task->DumpToJson(true, &json);
+  *out = json.toStyledString();
+  return true;
+}
+
+bool CompileService::DumpTaskRequest(int task_id) {
+  const CompileTask* task = nullptr;
+  {
+    AUTOLOCK(lock, &mu_);
+    task = FindTaskByIdUnlocked(task_id, false);
+    if (task == nullptr)
+      return false;
+    const_cast<CompileTask*>(task)->Ref();
+  }
+  task->DumpRequest();
+  {
+    AUTOLOCK(lock, &mu_);
+    const_cast<CompileTask*>(task)->Deref();
+  }
+  return true;
+}
+
+void CompileService::DumpToJson(Json::Value* json, long long after) {
+  AUTOLOCK(lock, &mu_);
+
+  long long last_update_ms = after;
+
+  {
+    Json::Value active(Json::arrayValue);
+    for (const auto* task : active_tasks_) {
+      Json::Value json_task;
+      task->DumpToJson(false, &json_task);
+      active.append(std::move(json_task));
+    }
+    (*json)["active"] = std::move(active);
+  }
+
+  {
+    Json::Value finished(Json::arrayValue);
+    for (const auto* task : finished_tasks_) {
+      Json::Value json_task;
+      task->DumpToJson(false, &json_task);
+      finished.append(std::move(json_task));
+    }
+    (*json)["finished"] = std::move(finished);
+  }
+
+  {
+    Json::Value failed(Json::arrayValue);
+    for (const auto* task : failed_tasks_) {
+      if (task->GetFrozenTimestampMs() <= after)
+        continue;
+      last_update_ms = std::max(last_update_ms, task->GetFrozenTimestampMs());
+      Json::Value json_task;
+      task->DumpToJson(false, &json_task);
+      failed.append(std::move(json_task));
+    }
+    (*json)["failed"] = std::move(failed);
+  }
+
+  {
+    Json::Value long_json(Json::arrayValue);
+    std::vector<CompileTask*> long_tasks(long_tasks_);
+    sort(long_tasks.begin(), long_tasks.end(), CompareTaskHandlerTime());
+    for (const auto* task : long_tasks) {
+      Json::Value json_task;
+      task->DumpToJson(false, &json_task);
+      long_json.append(std::move(json_task));
+    }
+    (*json)["long"] = std::move(long_json);
+  }
+
+  {
+    Json::Value num_exec(Json::objectValue);
+
+    num_exec["max_active_tasks"] = max_active_tasks_;
+    num_exec["pending"] = Json::Int64(pending_tasks_.size());
+    num_exec["request"] = num_exec_request_;
+    num_exec["success"] = num_exec_success_;
+    num_exec["failure"] = num_exec_failure_;
+    num_exec["compiler_proxy_fail"] = num_exec_compiler_proxy_failure_;
+    num_exec["compiler_info_stores"] =
+        CompilerInfoCache::instance()->NumStores();
+    num_exec["compiler_info_store_dups"] =
+        CompilerInfoCache::instance()->NumStoreDups();
+    num_exec["compiler_info_fail"] = CompilerInfoCache::instance()->NumFail();
+    num_exec["compiler_info_miss"] = CompilerInfoCache::instance()->NumMiss();
+    num_exec["goma_finished"] = num_exec_goma_finished_;
+    num_exec["goma_cache_hit"] = num_exec_goma_cache_hit_;
+    num_exec["goma_aborted"] = num_exec_goma_aborted_;
+    num_exec["goma_retry"] = num_exec_goma_retry_;
+    num_exec["local_run"] = num_exec_local_run_;
+    num_exec["local_killed"] = num_exec_local_killed_;
+    num_exec["local_finished"] = num_exec_local_finished_;
+    num_exec["fail_fallback"] = num_exec_fail_fallback_;
+
+    Json::Value version_mismatch(Json::objectValue);
+    for (const auto& iter : command_version_mismatch_) {
+      version_mismatch[iter.first] = iter.second;
+    }
+    num_exec["version_mismatch"] = std::move(version_mismatch);
+
+    Json::Value binary_hash_mismatch(Json::objectValue);
+    for (const auto& iter : command_binary_hash_mismatch_) {
+      binary_hash_mismatch[iter.first] = iter.second;
+    }
+    num_exec["binary_hash_mismatch"] = std::move(binary_hash_mismatch);
+
+    (*json)["num_exec"] = std::move(num_exec);
+  }
+
+  {
+    Json::Value num_file;
+    num_file["requested"] = num_file_requested_;
+    num_file["uploaded"] = num_file_uploaded_;
+    num_file["missed"] = num_file_missed_;
+    (*json)["num_file"] = std::move(num_file);
+  }
+
+  {
+    Json::Value http_rpc;
+    http_rpc_->DumpToJson(&http_rpc);
+    (*json)["http_rpc"] = std::move(http_rpc);
+  }
+
+  if (auto_updater_.get()) {
+    int version = auto_updater_->my_version();
+    if (version > 0) {
+      Json::Value goma_version(Json::arrayValue);
+      goma_version.append(version);
+      goma_version.append(auto_updater_->pulled_version());
+
+      (*json)["goma_version"] = std::move(goma_version);
+    }
+  }
+  (*json)["last_update_ms"] = last_update_ms;
+}
+
+void CompileService::DumpStats(std::ostringstream* ss) {
+  GomaStats gstats;
+  std::ostringstream error_ss;
+  std::ostringstream localrun_ss;
+  std::ostringstream mismatches_ss;
+  {
+    AUTOLOCK(lock, &mu_);
+    DumpCommonStatsUnlocked(&gstats);
+    // Note that followings are not included in GomaStats.
+    // GomaStats is used for storing statistics data for buildbot monitoring.
+    // We are suggested by c-i-t monitoring folks not to store string data to
+    // reduce concerns by privacy reviewers. The reviewers may think that
+    // string fields can be used to send arbitrary privacy information.
+    if (!error_to_user_.empty() || !error_to_log_.empty()) {
+      error_ss << "error:" << std::endl;
+      if (!error_to_user_.empty()) {
+        error_ss << " user:" << std::endl;
+      }
+      for (const auto& it : error_to_user_) {
+        error_ss << "  E:"
+                 << it.second << " "  // count
+                 << it.first << std::endl;  // message
+      }
+      if (!error_to_log_.empty()) {
+        error_ss << " log:"
+                 << " E=" << gstats.error_stats().log_error()
+                 << " W=" << gstats.error_stats().log_warning()
+                 << std::endl;
+      }
+    }
+    if (!local_run_reason_.empty()) {
+      localrun_ss << " local run reason:" << std::endl;
+      for (const auto& it : local_run_reason_) {
+        localrun_ss << "  " << it.first << "=" << it.second << std::endl;
+      }
+    }
+    if (!command_version_mismatch_.empty()) {
+      mismatches_ss << "version_mismatch:" << std::endl;
+      for (const auto& it : command_version_mismatch_) {
+        mismatches_ss << " " << it.first << " " << it.second << std::endl;
+      }
+    }
+    if (!command_binary_hash_mismatch_.empty()) {
+      mismatches_ss << "binary_hash_mismatch:" << std::endl;
+      for (const auto& it : command_binary_hash_mismatch_) {
+        mismatches_ss << " " << it.first << " " << it.second << std::endl;
+      }
+    }
+    if (!subprogram_mismatch_.empty()) {
+      mismatches_ss << "subprogram_mismatch:" << std::endl;
+      for (const auto& it : subprogram_mismatch_) {
+        mismatches_ss << " " << it.first << " " << it.second << std::endl;
+      }
+    }
+  }
+
+  (*ss) << "request:"
+        << " total=" << gstats.request_stats().total()
+        << " success=" << gstats.request_stats().success()
+        << " failure=" << gstats.request_stats().failure()
+        << std::endl;
+  (*ss) << " compiler_proxy:"
+        << " fail=" << gstats.request_stats().compiler_proxy().fail()
+        << std::endl;
+  (*ss) << " compiler_info:"
+        << " stores=" << gstats.request_stats().compiler_info().stores()
+        << " store_dups=" << gstats.request_stats().compiler_info().store_dups()
+        << " miss=" << gstats.request_stats().compiler_info().miss()
+        << " fail=" << gstats.request_stats().compiler_info().fail()
+        << std::endl;
+  (*ss) << " goma:"
+        << " finished=" << gstats.request_stats().goma().finished()
+        << " cache_hit=" << gstats.request_stats().goma().cache_hit()
+        << " local_cachehit=" << gstats.request_stats().goma().local_cache_hit()
+        << " aborted=" << gstats.request_stats().goma().aborted()
+        << " retry=" << gstats.request_stats().goma().retry()
+        << " fail=" << gstats.request_stats().goma().fail()
+        << std::endl;
+  const FallbackInSetupStats& fallback_in_setup =
+      gstats.request_stats().fallback_in_setup();
+  (*ss) << " fallback_in_setup:" << std::endl
+        << "  parse_fail=" << fallback_in_setup.failed_to_parse_flags()
+        << " no_remote=" << fallback_in_setup.no_remote_compile_supported()
+        << " http_disabled=" << fallback_in_setup.http_disabled()
+        << std::endl
+        << "  compiler_info_fail="
+        << fallback_in_setup.fail_to_get_compiler_info()
+        << " compiler_disabled=" << fallback_in_setup.compiler_disabled()
+        << " requested_by_user=" << fallback_in_setup.requested_by_user()
+        << std::endl;
+  (*ss) << " local:"
+        << " run=" << gstats.request_stats().local().run()
+        << " killed=" << gstats.request_stats().local().killed()
+        << " finished=" << gstats.request_stats().local().finished()
+        << std::endl;
+  (*ss) << localrun_ss.str();
+  (*ss) << mismatches_ss.str();
+  (*ss) << error_ss.str();
+  (*ss) << "files:"
+        << " requested=" << gstats.file_stats().requested()
+        << " uploaded=" << gstats.file_stats().uploaded()
+        << " missed=" << gstats.file_stats().missed()
+        << std::endl;
+  (*ss) << "outputs:"
+        << " files=" << gstats.output_stats().files()
+        << " rename=" << gstats.output_stats().rename()
+        << " buf=" << gstats.output_stats().buf()
+        << " peak_req=" << gstats.output_stats().peak_req()
+        << std::endl;
+  (*ss) << "memory:"
+        << " consuming=" << gstats.memory_stats().consuming()
+        << std::endl;
+  (*ss) << "time:"
+        << " uptime=" << gstats.time_stats().uptime()
+        << std::endl;
+  (*ss) << "include_processor:"
+        << " total=" << gstats.include_processor_stats().total()
+        << " skipped=" << gstats.include_processor_stats().skipped()
+        << " total_wait_time="
+        << gstats.include_processor_stats().total_wait_time()
+        << " total_run_time="
+        << gstats.include_processor_stats().total_run_time()
+        << std::endl;
+  if (gstats.has_includecache_stats()) {
+    const IncludeCacheStats& ic_stats = gstats.includecache_stats();
+    int original_ave = 0;
+    int filtered_ave = 0;
+    if (ic_stats.total_entries() > 0) {
+      original_ave = ic_stats.original_total_size() / ic_stats.total_entries();
+      filtered_ave = ic_stats.filtered_total_size() / ic_stats.total_entries();
+    }
+
+    (*ss) << "includecache:" << std::endl;
+    (*ss) << "  entries=" << ic_stats.total_entries()
+          << " cache_size=" << ic_stats.total_cache_size()
+          << " hit=" << ic_stats.hit()
+          << " missed=" << ic_stats.missed()
+          << " updated=" << ic_stats.updated()
+          << " evicted=" << ic_stats.evicted() << std::endl;
+    (*ss) << "  orig_total=" << ic_stats.original_total_size()
+          << " orig_max=" << ic_stats.original_max_size()
+          << " orig_ave=" << original_ave
+          << " filter_total=" << ic_stats.filtered_total_size()
+          << " filter_max=" << ic_stats.filtered_max_size()
+          << " filter_ave=" << filtered_ave
+          << std::endl;
+  }
+  if (gstats.has_depscache_stats()) {
+    const DepsCacheStats& dc_stats = gstats.depscache_stats();
+    (*ss) << "depscache:"
+          << " table_size=" << dc_stats.deps_table_size()
+          << " max=" << dc_stats.max_entries()
+          << " total=" << dc_stats.total_entries();
+    size_t average_entries = 0;
+    if (dc_stats.deps_table_size() > 0) {
+      average_entries = dc_stats.total_entries() / dc_stats.deps_table_size();
+    }
+    (*ss) << " average=" << average_entries;
+    (*ss) << " idtable=" << dc_stats.idtable_size()
+          << " hit=" << dc_stats.hit()
+          << " updated=" << dc_stats.updated()
+          << " missed=" << dc_stats.missed()
+          << std::endl;
+  }
+  if (gstats.has_local_output_cache_stats()) {
+    const LocalOutputCacheStats& loc_stats = gstats.local_output_cache_stats();
+    (*ss) << "localoutputcache:"
+          << std::endl
+          << " save_success=" << loc_stats.save_success()
+          << " save_success_time_ms=" << loc_stats.save_success_time_ms()
+          << " save_failure=" << loc_stats.save_failure()
+          << std::endl
+          << " lookup_success=" << loc_stats.lookup_success()
+          << " lookup_success_time_ms=" << loc_stats.lookup_success_time_ms()
+          << " lookup_miss=" << loc_stats.lookup_miss()
+          << " lookup_failure=" << loc_stats.lookup_failure()
+          << std::endl
+          << " commit_success=" << loc_stats.commit_success()
+          << " commit_success_time_ms=" << loc_stats.commit_success_time_ms()
+          << " commit_failure=" << loc_stats.commit_failure()
+          << std::endl
+          << " gc_count=" << loc_stats.gc_count()
+          << " gc_total_time_ms=" << loc_stats.gc_total_time_ms()
+          << std::endl;
+    // TODO: Merge these to stats.
+    if (LocalOutputCache::IsEnabled()) {
+      (*ss) << " gc_removed_items="
+            << LocalOutputCache::instance()->TotalGCRemovedItems()
+            << " gc_removed_bytes="
+            << LocalOutputCache::instance()->TotalGCRemovedBytes()
+            << std::endl
+            << " total_cache_count="
+            << LocalOutputCache::instance()->TotalCacheCount()
+            << " total_cache_bytes="
+            << LocalOutputCache::instance()->TotalCacheAmountInByte()
+            << std::endl;
+    }
+  }
+
+  (*ss) << "http_rpc:"
+        << " query=" << gstats.http_rpc_stats().query()
+        << " retry=" << gstats.http_rpc_stats().retry()
+        << " timeout=" << gstats.http_rpc_stats().timeout()
+        << " error=" << gstats.http_rpc_stats().error()
+        << std::endl;
+
+  if (gstats.has_subprocess_stats()) {
+    (*ss) << "burst_mode:"
+          << " by_network="
+          << gstats.subprocess_stats().count_burst_by_network_error()
+          << " by_compiler_disabled="
+          << gstats.subprocess_stats().count_burst_by_compiler_disabled()
+          << std::endl;
+  }
+}
+
+void CompileService::DumpStatsJson(
+    std::string* json_string,
+    CompileService::HumanReadability human_readable) {
+  GomaStatzStats statz;
+  {
+    AUTOLOCK(lock, &mu_);
+    DumpCommonStatsUnlocked(statz.mutable_stats());
+
+    if (!error_to_user_.empty()) {
+      *statz.mutable_error_to_user() = google::protobuf::Map<string, int64_t>(
+          error_to_user_.begin(), error_to_user_.end());
+    }
+    if (!local_run_reason_.empty()) {
+      *statz.mutable_local_run_reason() =
+          google::protobuf::Map<string, int64_t>(
+              local_run_reason_.begin(),
+              local_run_reason_.end());
+    }
+    if (!command_version_mismatch_.empty()) {
+      *statz.mutable_version_mismatch() =
+          google::protobuf::Map<string, int64_t>(
+              command_version_mismatch_.begin(),
+              command_version_mismatch_.end());
+    }
+    if (!command_binary_hash_mismatch_.empty()) {
+      *statz.mutable_subprogram_mismatch() =
+          google::protobuf::Map<string, int64_t>(
+              command_binary_hash_mismatch_.begin(),
+              command_binary_hash_mismatch_.end());
+    }
+    if (!subprogram_mismatch_.empty()) {
+      *statz.mutable_subprogram_mismatch() =
+          google::protobuf::Map<string, int64_t>(subprogram_mismatch_.begin(),
+                                                 subprogram_mismatch_.end());
+    }
+  }
+
+  // Then, convert statz to json string.
+  google::protobuf::util::JsonPrintOptions options;
+  // This is necessary, otherwise field whose value is 0 won't be printed.
+  options.always_print_primitive_fields = true;
+  if (human_readable == HumanReadability::kHumanReadable) {
+    options.add_whitespace = true;
+  }
+  google::protobuf::util::Status status =
+      google::protobuf::util::MessageToJsonString(statz, json_string, options);
+  if (!status.ok()) {
+    LOG(ERROR) << "failed to convert GomaStatzStats to json"
+               << " error_code=" << status.error_code()
+               << " error_message=" << status.error_message();
+    json_string->clear();
+  }
+}
+
+void CompileService::ClearTasks() {
+  AUTOLOCK(lock, &mu_);
+  ClearTasksUnlocked();
+}
+
+void CompileService::ClearTasksUnlocked() {
+  LOG(INFO) << "active tasks:" << active_tasks_.size();
+  for (auto* task : active_tasks_) {
+    task->Deref();
+  }
+  active_tasks_.clear();
+  LOG(INFO) << "finished_tasks: " << finished_tasks_.size()
+            << ", failed_tasks: " << failed_tasks_.size()
+            << ", long_tasks: " << long_tasks_.size();
+  for (auto* task : finished_tasks_) {
+    task->Deref();
+  }
+  finished_tasks_.clear();
+  for (auto* task : failed_tasks_) {
+    task->Deref();
+  }
+  failed_tasks_.clear();
+
+  for (auto* task : long_tasks_) {
+    task->Deref();
+  }
+  long_tasks_.clear();
+}
+
+void CompileService::DumpCompilerInfo(std::ostringstream* ss) {
+  if (hermetic_) {
+    (*ss) << "hermetic mode\n";
+    if (hermetic_fallback_) {
+      (*ss) << " local fallback if same compiler doesn't exist on server\n";
+    } else {
+      (*ss) << " error if same compiler doesn't exist on server\n";
+    }
+  } else {
+    (*ss) << "non-hermetic mode\n";
+  }
+  (*ss) << "\n";
+
+  compiler_info_builder_->Dump(ss);
+
+  CompilerInfoCache::instance()->Dump(ss);
+
+  {
+    AUTO_SHARED_LOCK(lock, &compiler_mu_);
+
+    (*ss) << "local compiler path:" << local_compiler_paths_.size() << "\n";
+    (*ss) << "\n[local compiler path]\n\n";
+    for (const auto& entry : local_compiler_paths_) {
+      (*ss) << "key: " << entry.first << "\n"
+            << "local_compiler:" << entry.second.first << "\n"
+            << "local_path:" << entry.second.second << "\n\n";
+    }
+  }
+}
+
+bool CompileService::FindLocalCompilerPath(
+    const string& gomacc_path,
+    const string& basename_orig,
+    const string& cwd,
+    const string& local_path,
+    const string& pathext,
+    string* local_compiler_path,
+    string* no_goma_local_path) {
+  // If all PATH components are absolute paths, local compiler path doesn't
+  // depend on cwd.  In this case, we'll use "." in cwd field for key.
+  // Otherwise, use key_cwd.
+  string basename = basename_orig;
+  string key(gomacc_path + kSep + basename + kCurrentDir + local_path);
+  const string key_cwd(
+      gomacc_path + kSep + basename + kSep + cwd + kSep + local_path);
+
+  VLOG(1) << "find local compiler: key=" << key << " or " << key_cwd;
+
+  {
+    AUTO_SHARED_LOCK(lock, &compiler_mu_);
+    if (FindLocalCompilerPathUnlocked(
+            key, key_cwd, local_compiler_path, no_goma_local_path)) {
+      return true;
+    }
+  }
+  return FindLocalCompilerPathAndUpdate(
+      key, key_cwd, gomacc_path, basename, cwd, local_path, pathext,
+      local_compiler_path, no_goma_local_path);
+}
+
+bool CompileService::FindLocalCompilerPathUnlocked(
+    const string& key,
+    const string& key_cwd,
+    string* local_compiler_path,
+    string* no_goma_local_path) const {
+  // assert compiler_mu held either exclusive or shared.
+  unordered_map<string, std::pair<string, string> >::const_iterator found =
+      local_compiler_paths_.find(key);
+  if (found != local_compiler_paths_.end()) {
+    *local_compiler_path = found->second.first;
+    *no_goma_local_path = found->second.second;
+    return true;
+  }
+  found = local_compiler_paths_.find(key_cwd);
+  if (found != local_compiler_paths_.end()) {
+    *local_compiler_path = found->second.first;
+    *no_goma_local_path = found->second.second;
+    return true;
+  }
+  return false;
+}
+
+bool CompileService::FindLocalCompilerPathAndUpdate(
+    const string& key,
+    const string& key_cwd,
+    const string& gomacc_path,
+    const string& basename,
+    const string& cwd,
+    const string& local_path,
+    const string& pathext,
+    string* local_compiler_path,
+    string* no_goma_local_path) {
+  {
+    AUTO_SHARED_LOCK(lock, &compiler_mu_);
+    if (FindLocalCompilerPathUnlocked(
+            key, key_cwd,
+            local_compiler_path, no_goma_local_path)) {
+      return true;
+    }
+  }
+
+  AUTO_EXCLUSIVE_LOCK(lock, &compiler_mu_);
+  if (FindLocalCompilerPathUnlocked(
+          key, key_cwd,
+          local_compiler_path, no_goma_local_path)) {
+    return true;
+  }
+
+  string local_compiler_key = key;
+
+  if (!local_compiler_path->empty()) {
+    if (!IsGomacc(*local_compiler_path, local_path, pathext, cwd)) {
+      // Convert to an absolute path if the path is a relative path.
+      string orig_local_compiler_path = *local_compiler_path;
+#ifndef _WIN32
+      local_compiler_path->assign(
+          PathResolver::ResolvePath(
+              file::JoinPathRespectAbsolute(cwd, orig_local_compiler_path)));
+#else
+      local_compiler_path->assign(
+          PathResolver::ResolvePath(
+              ResolveExtension(orig_local_compiler_path, pathext, cwd)));
+#endif
+      if (local_compiler_path->empty()) {
+        LOG(ERROR) << "cannot find local_compiler:"
+                   << " cwd=" << cwd
+                   << " local_compiler=" << orig_local_compiler_path;
+        return false;
+      }
+      *no_goma_local_path = local_path;
+      if (*local_compiler_path != orig_local_compiler_path)
+        local_compiler_key = key_cwd;
+      local_compiler_paths_.insert(
+          std::make_pair(local_compiler_key,
+                         std::make_pair(*local_compiler_path,
+                                        *no_goma_local_path)));
+      return true;
+    }
+    LOG(ERROR) << "local_compiler is gomacc:" << *local_compiler_path;
+  }
+
+  FileId gomacc_fileid(gomacc_path);
+  if (!gomacc_fileid.IsValid()) {
+    PLOG(ERROR) << "stat gomacc_path:" << gomacc_path;
+    return false;
+  }
+
+  bool is_relative;
+  string no_goma_path_env;
+  if (GetRealExecutablePath(&gomacc_fileid, basename, cwd,
+                            local_path, pathext,
+                            local_compiler_path, &no_goma_path_env,
+                            &is_relative)) {
+    if (is_relative)
+      local_compiler_key = key_cwd;
+    no_goma_local_path->assign(no_goma_path_env);
+    local_compiler_paths_.insert(
+        std::make_pair(local_compiler_key,
+                       std::make_pair(*local_compiler_path,
+                                      *no_goma_local_path)));
+    return true;
+  }
+  LOG(WARNING) << basename << " not found in " << local_path;
+  return false;
+}
+
+void CompileService::GetCompilerInfo(
+    GetCompilerInfoParam* param,
+    OneshotClosure* callback) {
+  param->state.reset(CompilerInfoCache::instance()->Lookup(param->key));
+  if (param->state.get() != nullptr) {
+    param->cache_hit = true;
+    param->state.get()->Use(param->key.local_compiler_path, *param->flags);
+    callback->Run();
+    return;
+  }
+  {
+    AUTOLOCK(lock, &compiler_info_mu_);
+    auto p = compiler_info_waiters_.insert(
+      std::make_pair(
+          param->key.ToString(
+              CompilerInfoCache::Key::kCwdRelative),
+          static_cast<CompilerInfoWaiterList*>(nullptr)));
+    if (p.second) {
+      // first call for the key.
+      p.first->second = new CompilerInfoWaiterList;
+      LOG(INFO) << param->trace_id << " call GetCompilerInfoInternal";
+    } else {
+      // another task already requested the same key.
+      // callback will be called once the other task gets compiler info.
+      p.first->second->emplace_back(param, callback);
+      LOG(INFO) << param->trace_id << " wait GetCompilerInfoInternal"
+                << " queue=" << p.first->second->size();
+      return;
+    }
+  }
+  wm_->RunClosureInPool(FROM_HERE,
+                        compiler_info_pool_,
+                        NewCallback(
+                            this, &CompileService::GetCompilerInfoInternal,
+                            param, callback),
+                        WorkerThreadManager::PRIORITY_MED);
+}
+
+void CompileService::GetCompilerInfoInternal(
+    GetCompilerInfoParam* param,
+    OneshotClosure* callback) {
+  param->state.reset(CompilerInfoCache::instance()->Lookup(param->key));
+  if (param->state.get() == nullptr) {
+    SimpleTimer timer;
+
+    // Set invalid GOMA env flag to fail when local_compiler_path
+    // is (masquraded) gomacc.
+    // FillFromCompilerOutputs will run local_compiler_path.
+    // If local_compiler_path is (masquerated) gomacc, it'll reenter
+    // this routine and deadlock on mu_.  Invalid GOMA env flag
+    // avoid this deadlock.
+    std::vector<string> env(param->run_envs);
+    env.push_back("GOMA_WILL_FAIL_WITH_UKNOWN_FLAG=true");
+    std::unique_ptr<CompilerInfoData> cid(
+        compiler_info_builder_->FillFromCompilerOutputs(
+            *param->flags, param->key.local_compiler_path, env));
+
+    param->state.reset(CompilerInfoCache::instance()->Store(
+        param->key, std::move(cid)));
+    param->updated = true;
+    LOG(INFO) << param->trace_id
+              << " FillFromCompilerOutputs"
+              << " state=" << param->state.get()
+              << " found=" << param->state.get()->info().found()
+              << " in " << timer.GetInMilliSeconds() << "[ms]";
+  }
+  param->state.get()->Use(param->key.local_compiler_path, *param->flags);
+  std::unique_ptr<CompilerInfoWaiterList> waiters;
+  {
+    AUTOLOCK(lock, &compiler_info_mu_);
+    const string key_cwd = param->key.ToString(
+        CompilerInfoCache::Key::kCwdRelative);
+    auto p = compiler_info_waiters_.find(key_cwd);
+    CHECK(p != compiler_info_waiters_.end())
+        << param->trace_id << " state=" << param->state.get()
+        << " key_cwd=" << key_cwd;
+    waiters.reset(p->second);
+    compiler_info_waiters_.erase(p);
+  }
+  // keep alive at least in this func.
+  // param->state might be derefed so CompilerInfoState may be deleted.
+  ScopedCompilerInfoState state(param->state.get());
+
+  string trace_id = param->trace_id;
+
+  wm_->RunClosureInThread(FROM_HERE,
+                          param->thread_id,
+                          callback,
+                          WorkerThreadManager::PRIORITY_MED);
+  // param may be invalidated here.
+  CHECK(waiters.get() != nullptr) << trace_id << " state=" << state.get();
+  LOG(INFO) << trace_id << " callback " << waiters->size() << " waiters";
+  for (const auto& p : *waiters) {
+    GetCompilerInfoParam* wparam = p.first;
+    OneshotClosure* wcallback = p.second;
+    wparam->state.reset(state.get());
+    VLOG(1) << trace_id << " callback for " << wparam->trace_id;
+    wparam->state.get()->Use(wparam->key.local_compiler_path, *wparam->flags);
+    wm_->RunClosureInThread(FROM_HERE,
+                            wparam->thread_id,
+                            wcallback,
+                            WorkerThreadManager::PRIORITY_MED);
+  }
+}
+
+bool CompileService::DisableCompilerInfo(CompilerInfoState* state,
+                                         const string& disabled_reason) {
+  return CompilerInfoCache::instance()->Disable(state, disabled_reason);
+}
+
+bool CompileService::RecordCommandSpecVersionMismatch(
+    const string& exec_command_version_mismatch) {
+  AUTOLOCK(lock, &mu_);
+  auto p = command_version_mismatch_.insert(
+      std::make_pair(exec_command_version_mismatch, 0));
+  p.first->second += 1;
+  return p.second;
+}
+
+bool CompileService::RecordCommandSpecBinaryHashMismatch(
+    const string& exec_command_binary_hash_mismatch) {
+  AUTOLOCK(lock, &mu_);
+  auto p = command_binary_hash_mismatch_.insert(
+      std::make_pair(exec_command_binary_hash_mismatch, 0));
+  p.first->second += 1;
+  return p.second;
+}
+
+bool CompileService::RecordSubprogramMismatch(
+    const string& subprogram_mismatch) {
+  AUTOLOCK(lock, &mu_);
+  auto p = subprogram_mismatch_.insert(std::make_pair(subprogram_mismatch, 0));
+  p.first->second += 1;
+  return p.second;
+}
+
+void CompileService::RecordErrorToLog(
+    const string& error_message, bool is_error) {
+  AUTOLOCK(lock, &mu_);
+  auto p = error_to_log_.insert(
+      std::make_pair(error_message, std::make_pair(is_error, 0)));
+  p.first->second.second += 1;
+  if (!p.second) {
+    LOG_IF(ERROR, p.first->second.first != is_error)
+        << error_message << " was is_error=" << p.first->second.first
+        << " but is_error=" << is_error;
+  }
+}
+
+void CompileService::RecordErrorsToUser(
+    const std::vector<string>& error_messages) {
+  AUTOLOCK(lock, &mu_);
+  for (const auto& errmsg : error_messages) {
+    auto p = error_to_user_.insert(std::make_pair(errmsg,  0));
+    p.first->second += 1;
+  }
+}
+
+void CompileService::RecordInputResult(
+    const std::vector<string>& inputs, bool success) {
+  AUTO_EXCLUSIVE_LOCK(lock, &failed_inputs_mu_);
+  for (const auto& input : inputs) {
+    if (success) {
+      failed_inputs_.erase(input);
+    } else {
+      failed_inputs_.insert(input);
+    }
+  }
+}
+
+bool CompileService::ContainFailedInput(
+    const std::vector<string>& inputs) const {
+  AUTO_SHARED_LOCK(lock, &failed_inputs_mu_);
+  for (const auto& input : inputs) {
+    if (failed_inputs_.count(input)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool CompileService::AcquireOutputBuffer(size_t filesize, string* buf) {
+  DCHECK_EQ(0U, buf->size());
+
+  bool success = false;
+
+  {
+    // Since buf->resize() or buf->clear() could be slow,
+    // call it without holding a lock.
+
+    AUTOLOCK(lock, &mu_);
+    if (filesize > max_sum_output_size_ ||
+        req_sum_output_size_ + filesize < req_sum_output_size_ ||
+        cur_sum_output_size_ + filesize < cur_sum_output_size_) {
+      LOG(ERROR) << "too large output buf size:" << filesize;
+      success = false;
+    } else {
+      req_sum_output_size_ += filesize;
+      if (req_sum_output_size_ > peak_req_sum_output_size_) {
+        peak_req_sum_output_size_ = req_sum_output_size_;
+      }
+
+      if (cur_sum_output_size_ + filesize < max_sum_output_size_) {
+        cur_sum_output_size_ += filesize;
+        num_file_output_buf_++;
+        success = true;
+      } else {
+        LOG(INFO) << "output buf size over:"
+                  << " cur=" << cur_sum_output_size_
+                  << " req=" << filesize
+                  << " max=" << max_sum_output_size_;
+        success = false;
+      }
+    }
+  }
+
+  if (success) {
+    buf->resize(filesize);
+    return true;
+  }
+
+  buf->clear();
+  return false;
+}
+
+void CompileService::ReleaseOutputBuffer(size_t filesize, string* buf) {
+  AUTOLOCK(lock, &mu_);
+  if (req_sum_output_size_ < filesize) {
+    req_sum_output_size_ = 0;
+  } else {
+    req_sum_output_size_ -= filesize;
+  }
+  size_t size = buf->size();
+  buf->clear();
+  if (size > cur_sum_output_size_) {
+    LOG(ERROR) << "output buf size error:"
+               << " cur=" << cur_sum_output_size_
+               << " release=" << size;
+    cur_sum_output_size_ = 0;
+    return;
+  }
+  cur_sum_output_size_ -= size;
+  return;
+}
+
+void CompileService::RecordOutputRename(bool rename) {
+  AUTOLOCK(lock, &mu_);
+  ++num_file_output_;
+  if (rename) {
+    ++num_file_rename_output_;
+  }
+}
+
+int CompileService::GetEstimatedSubprocessDelayTime() {
+  static int count = 0;
+  static int delay = 0;
+  static const int kTimeUpdateCount = 20;
+  {
+    AUTOLOCK(lock, &mu_);
+    if ((count % kTimeUpdateCount) == 0) {
+      int mean_include_fileload_time = histogram_->GetStatMean(
+          CompilerProxyHistogram::IncludeFileloadTime);
+      int mean_rpc_call_time = histogram_->GetStatMean(
+          CompilerProxyHistogram::RPCCallTime);
+      int mean_file_response_time = histogram_->GetStatMean(
+          CompilerProxyHistogram::FileResponseTime);
+      int mean_local_pending_time = histogram_->GetStatMean(
+          CompilerProxyHistogram::LocalPendingTime);
+      int mean_local_run_time = histogram_->GetStatMean(
+          CompilerProxyHistogram::LocalRunTime);
+
+      int mean_remote_time = mean_include_fileload_time
+          + mean_rpc_call_time
+          + mean_file_response_time;
+      int mean_local_time = mean_local_pending_time + mean_local_run_time;
+
+      if (mean_remote_time >= mean_local_time) {
+        // If local run is fast enough, it uses local as much as possible.
+        delay = 0;
+      } else {
+        // Otherwise, local run is slower than remote call.
+        // In this case, it would be better to use remote call as much as
+        // possible.  local run, however, will be used to mitigate a remote
+        // call stall case (e.g. http shows no activity for long time).
+        if (dont_kill_subprocess_) {
+          // delay will be 99.7% of remote time.
+          double sd_include_fileload_time =
+              histogram_->GetStatStandardDeviation(
+                  CompilerProxyHistogram::IncludeFileloadTime);
+          double sd_rpc_call_time =
+              histogram_->GetStatStandardDeviation(
+                  CompilerProxyHistogram::RPCCallTime);
+          double sd_file_response_time =
+              histogram_->GetStatStandardDeviation(
+                  CompilerProxyHistogram::FileResponseTime);
+          delay = static_cast<int>(mean_remote_time
+                                   + 3 * sd_include_fileload_time
+                                   + 3 * sd_rpc_call_time
+                                   + 3 * sd_file_response_time);
+        } else {
+          delay = mean_remote_time;
+        }
+      }
+      VLOG(2) << "estimated delay subproc:"
+              << " remote=" << mean_remote_time
+              << " local=" << mean_local_time
+              << " delay=" << delay;
+      DCHECK_GE(delay, 0);
+      delay += local_run_delay_msec_;
+    }
+    ++count;
+  }
+  return delay;
+}
+
+const CompileTask* CompileService::FindTaskByIdUnlocked(
+    int task_id, bool include_active) {
+  if (include_active) {
+    for (const auto* task : active_tasks_) {
+      if (task->id() == task_id)
+        return task;
+    }
+  }
+  for (const auto* task : finished_tasks_) {
+    if (task->id() == task_id)
+      return task;
+  }
+  for (const auto* task : failed_tasks_) {
+    if (task->id() == task_id)
+      return task;
+  }
+  for (const auto* task : long_tasks_) {
+    if (task->id() == task_id)
+      return task;
+  }
+  return nullptr;
+}
+
+void CompileService::DumpErrorStatus(std::ostringstream* ss) {
+  const int kGomaErrorNoticeVersion = 1;
+
+  ErrorNotices error_notices;
+  ErrorNotice* notice = error_notices.add_notice();
+  notice->set_version(kGomaErrorNoticeVersion);
+
+  // TODO: decide the design and implement more error notice.
+  GomaStats gstats;
+  {
+    AUTOLOCK(lock, &mu_);
+    DumpCommonStatsUnlocked(&gstats);
+  }
+  InfraStatus* infra_status = notice->mutable_infra_status();
+  infra_status->set_ping_status_code(
+      gstats.http_rpc_stats().ping_status_code());
+  infra_status->set_num_http_sent(
+      gstats.http_rpc_stats().query());
+  infra_status->set_num_http_active(
+      gstats.http_rpc_stats().active());
+  infra_status->set_num_http_retry(
+      gstats.http_rpc_stats().retry());
+  infra_status->set_num_http_timeout(
+      gstats.http_rpc_stats().timeout());
+  infra_status->set_num_http_error(
+      gstats.http_rpc_stats().error());
+  infra_status->set_num_network_error(
+      gstats.http_rpc_stats().network_error());
+  infra_status->set_num_network_recovered(
+      gstats.http_rpc_stats().network_recovered());
+  infra_status->set_num_compiler_info_miss(
+      gstats.request_stats().compiler_info().miss());
+  infra_status->set_num_compiler_info_fail(
+      gstats.request_stats().compiler_info().fail());
+  infra_status->set_num_exec_fail_fallback(
+      gstats.request_stats().goma().fail());
+  infra_status->set_num_exec_compiler_proxy_failure(
+      gstats.request_stats().compiler_proxy().fail());
+  infra_status->set_num_user_error(
+      gstats.error_stats().user_error());
+
+  if (infra_status->num_exec_compiler_proxy_failure() > 0) {
+    notice->set_compile_error(ErrorNotice::COMPILER_PROXY_FAILURE);
+  }
+  // If GOMA_HERMETIC=error, compile error should also be goma's failure,
+  // not compiled code is bad.
+  bool compiler_mismatch = CompilerInfoCache::instance()->HasCompilerMismatch();
+  if (hermetic_ && !hermetic_fallback_ && compiler_mismatch) {
+    notice->set_compile_error(ErrorNotice::COMPILER_PROXY_FAILURE);
+  }
+
+  std::string s;
+  google::protobuf::util::JsonPrintOptions options;
+  options.preserve_proto_field_names = true;
+  google::protobuf::util::MessageToJsonString(error_notices, &s, options);
+  *ss << s << '\n';
+}
+
+void CompileService::DumpCommonStatsUnlocked(GomaStats* stats) {
+    RequestStats* request = stats->mutable_request_stats();
+    request->set_total(num_exec_request_);
+    request->set_success(num_exec_success_);
+    request->set_failure(num_exec_failure_);
+    request->mutable_compiler_proxy()->set_fail(
+        num_exec_compiler_proxy_failure_);
+    request->mutable_compiler_info()->set_stores(
+        CompilerInfoCache::instance()->NumStores());
+    request->mutable_compiler_info()->set_store_dups(
+        CompilerInfoCache::instance()->NumStoreDups());
+    request->mutable_compiler_info()->set_miss(
+        CompilerInfoCache::instance()->NumMiss());
+    request->mutable_compiler_info()->set_fail(
+        CompilerInfoCache::instance()->NumFail());
+    request->mutable_compiler_info()->set_loaded_size_bytes(
+        CompilerInfoCache::instance()->LoadedSize());
+    request->mutable_goma()->set_finished(num_exec_goma_finished_);
+    request->mutable_goma()->set_cache_hit(num_exec_goma_cache_hit_);
+    request->mutable_goma()->set_local_cache_hit(
+        num_exec_goma_local_cache_hit_);
+    request->mutable_goma()->set_aborted(num_exec_goma_aborted_);
+    request->mutable_goma()->set_retry(num_exec_goma_retry_);
+    request->mutable_goma()->set_fail(num_exec_fail_fallback_);
+    request->mutable_local()->set_run(num_exec_local_run_);
+    request->mutable_local()->set_killed(num_exec_local_killed_);
+    request->mutable_local()->set_finished(num_exec_local_finished_);
+    // TODO: local run reason.  list up enum and show with it.
+    //                    might need to avoid string field for privacy reason.
+    // TODO: error reason. make it enum & show.
+    FallbackInSetupStats* fallback = request->mutable_fallback_in_setup();
+    fallback->set_failed_to_parse_flags(
+        num_forced_fallback_in_setup_[kFailToParseFlags]);
+    fallback->set_no_remote_compile_supported(
+        num_forced_fallback_in_setup_[kNoRemoteCompileSupported]);
+    fallback->set_http_disabled(
+        num_forced_fallback_in_setup_[kHTTPDisabled]);
+    fallback->set_fail_to_get_compiler_info(
+        num_forced_fallback_in_setup_[kFailToGetCompilerInfo]);
+    fallback->set_compiler_disabled(
+        num_forced_fallback_in_setup_[kCompilerDisabled]);
+    fallback->set_requested_by_user(
+        num_forced_fallback_in_setup_[kRequestedByUser]);
+    FileStats* files = stats->mutable_file_stats();
+    files->set_requested(num_file_requested_);
+    files->set_uploaded(num_file_uploaded_);
+    files->set_missed(num_file_missed_);
+    OutputStats* outputs = stats->mutable_output_stats();
+    outputs->set_files(num_file_output_);
+    outputs->set_rename(num_file_rename_output_);
+    outputs->set_buf(num_file_output_buf_);
+    outputs->set_peak_req(peak_req_sum_output_size_);
+    stats->mutable_memory_stats()->set_consuming(
+        GetConsumingMemoryOfCurrentProcess());
+    stats->mutable_time_stats()->set_uptime(time(nullptr) - start_time());
+
+    {
+      IncludeProcessorStats* processor =
+          stats->mutable_include_processor_stats();
+      processor->set_total(num_include_processor_total_files_);
+      processor->set_skipped(num_include_processor_skipped_files_);
+      processor->set_total_wait_time(include_processor_total_wait_time_);
+      processor->set_total_run_time(include_processor_total_run_time_);
+    }
+    if (IncludeCache::IsEnabled()) {
+      IncludeCache::instance()->DumpStatsToProto(
+          stats->mutable_includecache_stats());
+    }
+    if (DepsCache::IsEnabled()) {
+      DepsCache::instance()->DumpStatsToProto(stats->mutable_depscache_stats());
+    }
+    if (LocalOutputCache::IsEnabled()) {
+      LocalOutputCache::instance()->DumpStatsToProto(
+          stats->mutable_local_output_cache_stats());
+    }
+    http_rpc_->DumpStatsToProto(stats->mutable_http_rpc_stats());
+    subprocess_option_setter_->DumpStatsToProto(
+        stats->mutable_subprocess_stats());
+
+    int num_user_error = 0;
+    int num_log_error = 0;
+    int num_log_warning = 0;
+    for (const auto& it : error_to_user_) {
+      num_user_error += it.second;
+    }
+    for (const auto& it : error_to_log_) {
+      if (it.second.first) {
+        num_log_error += it.second.second;
+      } else {
+        num_log_warning += it.second.second;
+      }
+    }
+    stats->mutable_error_stats()->set_user_error(num_user_error);
+    stats->mutable_error_stats()->set_log_error(num_log_error);
+    stats->mutable_error_stats()->set_log_warning(num_log_warning);
+
+    int num_command_version_mismatch = 0;
+    int num_binary_hash_mismatch = 0;
+    int num_subprogram_mismatch = 0;
+    for (const auto& it : command_version_mismatch_) {
+      num_command_version_mismatch += it.second;
+    }
+    for (const auto& it : command_binary_hash_mismatch_) {
+      num_binary_hash_mismatch += it.second;
+    }
+    for (const auto& it : subprogram_mismatch_) {
+      num_subprogram_mismatch += it.second;
+    }
+    stats->mutable_mismatch_stats()->set_command_version_mismatch(
+        num_command_version_mismatch);
+    stats->mutable_mismatch_stats()->set_binary_hash_mismatch(
+        num_binary_hash_mismatch);
+    stats->mutable_mismatch_stats()->set_subprogram_mismatch(
+        num_subprogram_mismatch);
+}
+
+void CompileService::DumpStatsToFile(const string& filename) {
+  GomaStats stats;
+  {
+    AUTOLOCK(lock, &mu_);
+    DumpCommonStatsUnlocked(&stats);
+  }
+  histogram_->DumpToProto(stats.mutable_histogram());
+  stats.mutable_machine_info()->set_goma_revision(kBuiltRevisionString);
+#if defined(__linux__)
+  stats.mutable_machine_info()->set_os(MachineInfo_OSType_LINUX);
+#elif defined(__MACH__)
+  stats.mutable_machine_info()->set_os(MachineInfo_OSType_MAC);
+#elif defined(_WIN32)
+  stats.mutable_machine_info()->set_os(MachineInfo_OSType_WIN);
+#else
+  stats.mutable_machine_info()->set_os(MachineInfo_OSType_UNKNOWN);
+#endif
+  stats.mutable_machine_info()->set_ncpus(GetNumCPUs());
+  stats.mutable_machine_info()->set_memory_size(GetSystemTotalMemory());
+
+  string stats_buf;
+  if (strings::EndsWith(filename, ".json")) {
+    google::protobuf::util::JsonPrintOptions options;
+    options.preserve_proto_field_names = true;
+    google::protobuf::util::MessageToJsonString(stats, &stats_buf, options);
+  } else {
+    stats.SerializeToString(&stats_buf);
+  }
+  if (!WriteStringToFile(stats_buf, filename)) {
+    LOG(ERROR) << "failed to dump stats to " << filename;
+    return;
+  }
+  LOG(INFO) << "dumped stats to " << filename;
+}
+
+bool CompileService::IncrementActiveFailFallbackTasks() {
+  AUTOLOCK(lock, &mu_);
+  ++num_active_fail_fallback_tasks_;
+  if (max_active_fail_fallback_tasks_ < 0 ||
+      num_active_fail_fallback_tasks_ <= max_active_fail_fallback_tasks_)
+    return true;
+
+  time_t now = time(nullptr);
+  if (reached_max_active_fail_fallback_time_ == 0) {
+    reached_max_active_fail_fallback_time_ = now;
+    LOG(INFO) << "reached max_active_fail_fallback_tasks."
+              << " reached_max_active_fail_fallback_time="
+              << reached_max_active_fail_fallback_time_;
+  }
+  if (now < reached_max_active_fail_fallback_time_ +
+            allowed_max_active_fail_fallback_duration_in_sec_) {
+    LOG(INFO) << "reached max_active_fail_fallback_tasks but not reached "
+              << "end of allowed duration."
+              << " max_active_fail_fallback_tasks="
+              << max_active_fail_fallback_tasks_
+              << " num_active_fail_fallback_tasks="
+              << num_active_fail_fallback_tasks_
+              << " reached_max_active_fail_fallback_time="
+              << reached_max_active_fail_fallback_time_;
+    return true;
+  }
+
+  LOG(WARNING) << "reached allowed duration of max_active_fail_fallback_tasks."
+               << " max_active_fail_fallback_tasks="
+               << max_active_fail_fallback_tasks_
+               << " num_active_fail_fallback_tasks="
+               << num_active_fail_fallback_tasks_
+               << " reached_max_active_fail_fallback_time="
+               << reached_max_active_fail_fallback_time_;
+  return false;
+}
+
+void CompileService::RecordForcedFallbackInSetup(
+    ForcedFallbackReasonInSetup r) {
+  DCHECK(r >= 0 && r < arraysize(num_forced_fallback_in_setup_))
+      << "Unknown fallback reason:" << r;
+  {
+    AUTOLOCK(lock, &mu_);
+    ++num_forced_fallback_in_setup_[r];
+    if (r != kCompilerDisabled || max_compiler_disabled_tasks_ < 0) {
+      return;
+    }
+
+    int num_compiler_disabled = num_forced_fallback_in_setup_[r];
+    if (num_compiler_disabled < max_compiler_disabled_tasks_) {
+      return;
+    }
+    LOG(WARNING) << "setup step failed more than the threshold."
+                 << "Turning on SubProcessController burst mode to make "
+                 << "local fallbacks runs more."
+                 << " num_compiler_disabled="
+                 << num_compiler_disabled
+                 << " max_compiler_disabled_tasks="
+                 << max_compiler_disabled_tasks_;
+    max_compiler_disabled_tasks_ = -1;
+  }
+  // Execution reaches here only if
+  // num_compiler_disabled >= max_compiler_disabled_tasks.
+  subprocess_option_setter_->TurnOnBurstMode(
+      BurstModeReason::COMPILER_DISABLED);
+}
+
+}  // namespace devtools_goma
diff --git a/client/compile_service.h b/client/compile_service.h
new file mode 100644
index 0000000..94db1dc
--- /dev/null
+++ b/client/compile_service.h
@@ -0,0 +1,638 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_COMPILE_SERVICE_H_
+#define DEVTOOLS_GOMA_CLIENT_COMPILE_SERVICE_H_
+
+#include <stdint.h>
+
+#include <deque>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include "atomic_stats_counter.h"
+#include "basictypes.h"
+#include "compiler_info.h"
+#include "compiler_info_cache.h"
+#include "lockhelper.h"
+#include "subprocess_option_setter.h"
+#include "threadpool_http_server.h"
+#include "unordered.h"
+#include "worker_thread_manager.h"
+#include "watchdog.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class AutoUpdater;
+class CompileTask;
+class CompilerFlags;
+class CompilerProxyHistogram;
+class ExecReq;
+class ExecResp;
+class ExecServiceClient;
+class FileServiceHttpClient;
+class FileHashCache;
+class GomaStats;
+class HttpClient;
+class HttpRPC;
+class LogServiceClient;
+class MultiExecReq;
+class MultiExecResp;
+class MultiFileStore;
+
+// CompileService provides ExecService API in compiler proxy.
+// It is proxy to goma service's ExecService API and FileService API, which is
+// managed by CompileTask.
+// It also provides the followings:
+//   configurations for compile task.
+//   remote APIs: http_rpc, file_service.
+//   stats histograms.
+//   global data shared by all compile tasks.
+//     file hash cache, local compiler path, compiler info,
+//     command version mismatches.
+class CompileService {
+ public:
+  enum ForcedFallbackReasonInSetup {
+    kFailToParseFlags,
+    kNoRemoteCompileSupported,
+    kHTTPDisabled,
+    kFailToGetCompilerInfo,
+    kCompilerDisabled,
+    kRequestedByUser,
+
+    kNumForcedFallbackReasonInSetup,
+  };
+
+  enum HumanReadability {
+    kFastHumanUnreadable,
+    kHumanReadable,
+  };
+
+#ifdef _WIN32
+  class MultiRpcController;
+#endif
+  class RpcController {
+   public:
+    explicit RpcController(
+        ThreadpoolHttpServer::HttpServerRequest* http_server_request);
+    ~RpcController();
+
+#ifdef _WIN32
+    // Used as sub-RPC of MultiRpcController.
+    // In this case, you can't call ParseRequest/SendReply.
+    void AttachMultiRpcController(MultiRpcController* multi_rpc);
+#endif
+    bool ParseRequest(ExecReq* req);
+    void SendReply(const ExecResp& resp);
+
+    // Notifies callback when original request is closed.
+    // Can be called from any thread.
+    // callback will be called on the thread where this method is called.
+    void NotifyWhenClosed(OneshotClosure* callback);
+
+    int server_port() const { return server_port_; }
+
+   private:
+    friend class CompileService;
+    ThreadpoolHttpServer::HttpServerRequest* http_server_request_;
+    int server_port_;
+#ifdef _WIN32
+    MultiRpcController* multi_rpc_;
+#endif
+
+    size_t gcc_req_size_;
+    size_t* gcc_resp_size_;
+
+    DISALLOW_COPY_AND_ASSIGN(RpcController);
+  };
+
+#ifdef _WIN32
+  // RpcController for MultiExec.
+  class MultiRpcController {
+   public:
+    MultiRpcController(
+        WorkerThreadManager* wm,
+        ThreadpoolHttpServer::HttpServerRequest* http_server_request);
+    ~MultiRpcController();
+
+    // Parses request as MultiExecReq.
+    // Also sets up RpcController and ExecResp for each ExecReq
+    // in the MultiExecReq.
+    bool ParseRequest(MultiExecReq* req);
+
+    RpcController* rpc(int i) const;
+    ExecResp* mutable_resp(int i) const;
+
+    // Called when i-th ExecReq in the MultiExecReq has been done,
+    // rpc(i) will be invalidated.
+    // Returns true if all resp done.
+    bool ExecDone(int i);
+
+    void SendReply();
+
+    // Notifies callback when original request is closed.
+    // Can be called from any thread.
+    // callback will be called on the thread where this method is called.
+    void NotifyWhenClosed(OneshotClosure* callback);
+
+   private:
+    void RequestClosed();
+
+    WorkerThreadManager* wm_;
+    WorkerThreadManager::ThreadId caller_thread_id_;
+    ThreadpoolHttpServer::HttpServerRequest* http_server_request_;
+    Lock mu_;
+    std::vector<RpcController*> rpcs_;
+    std::unique_ptr<MultiExecResp> resp_;
+    OneshotClosure* closed_callback_;
+    std::vector<std::pair<WorkerThreadManager::ThreadId, OneshotClosure*>>
+        closed_callbacks_;
+
+    size_t gcc_req_size_;
+
+    DISALLOW_COPY_AND_ASSIGN(MultiRpcController);
+  };
+#endif
+  struct GetCompilerInfoParam {
+    GetCompilerInfoParam()
+        : flags(nullptr), cache_hit(false), updated(false) {}
+    // request
+    WorkerThreadManager::ThreadId thread_id;
+    string trace_id;
+    CompilerInfoCache::Key key;
+    const CompilerFlags* flags;
+    std::vector<string> run_envs;
+
+    // response
+    ScopedCompilerInfoState state;
+    // cache_hit=true > fast cache hit, didn't run in worker thread
+    // cache_hit=false,updated=true > cache miss, updated with compiler output
+    // cache_hit=false,update=false > cache miss->cache hit in worker thread
+    bool cache_hit;
+    bool updated;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(GetCompilerInfoParam);
+  };
+
+  explicit CompileService(WorkerThreadManager* wm);
+  ~CompileService();
+
+  WorkerThreadManager* wm() { return wm_; }
+
+  // Configurations.
+  void SetActiveTaskThrottle(int max_active_tasks);
+  void SetCompileTaskHistorySize(int max_finished_tasks,
+                                 int max_failed_tasks,
+                                 int max_long_tasks);
+
+  const string& username() const { return username_; }
+
+  const string& nodename() const { return nodename_; }
+  time_t start_time() const { return start_time_; }
+  const string& compiler_proxy_id_prefix() const {
+    return compiler_proxy_id_prefix_;
+  }
+  void SetCompilerProxyIdPrefix(const string& prefix);
+
+  // Takes ownership of option_setter.
+  void SetSubProcessOptionSetter(
+      std::unique_ptr<SubProcessOptionSetter> option_setter);
+
+  // Takes ownership of http_client.
+  void SetHttpClient(std::unique_ptr<HttpClient> http_client);
+  HttpClient* http_client() const { return http_client_.get(); }
+
+  // Takes ownership of http_rpc.
+  void SetHttpRPC(std::unique_ptr<HttpRPC> http_rpc);
+  HttpRPC* http_rpc() const { return http_rpc_.get(); }
+
+  void SetExecServiceClient(
+      std::unique_ptr<ExecServiceClient> exec_service_client);
+  ExecServiceClient* exec_service_client() const {
+    return exec_service_client_.get();
+  }
+
+  // Takes ownership of multi_file_store.
+  void SetMultiFileStore(std::unique_ptr<MultiFileStore> multi_file_store);
+  MultiFileStore* multi_file_store() const {
+    return multi_file_store_.get();
+  }
+
+  // Takes ownership of file_service.
+  void SetFileServiceHttpClient(
+      std::unique_ptr<FileServiceHttpClient> file_service);
+  FileServiceHttpClient* file_service() const { return file_service_.get(); }
+
+  FileHashCache* file_hash_cache() const { return file_hash_cache_.get(); }
+  CompilerProxyHistogram* histogram() const { return histogram_.get(); }
+
+  void StartIncludeProcessorWorkers(int num_threads);
+  int include_processor_pool() const { return include_processor_pool_; }
+
+  // Takes ownership of log_service_client.
+  void SetLogServiceClient(
+      std::unique_ptr<LogServiceClient> log_service_client);
+  LogServiceClient* log_service() const { return log_service_client_.get(); }
+
+  // Takes ownership of auto_updater.
+  void SetAutoUpdater(std::unique_ptr<AutoUpdater> auto_updater);
+
+  // Takes ownership of watchdog.
+  void SetWatchdog(std::unique_ptr<Watchdog> watchdog,
+                   const std::vector<string>& goma_ipc_env);
+
+  void WatchdogStart(ThreadpoolHttpServer* server, int count) {
+    watchdog_->Start(server, count);
+  }
+
+  void SetNeedToSendContent(bool need_to_send_content) {
+    need_to_send_content_ = need_to_send_content;
+  }
+  bool need_to_send_content() const { return need_to_send_content_; }
+
+  void SetNewFileThreshold(int threshold) {
+    new_file_threshold_ = threshold;
+  }
+  int new_file_threshold() const { return new_file_threshold_; }
+
+  void SetEnableGchHack(bool enable) { enable_gch_hack_ = enable;  }
+  bool enable_gch_hack() const { return enable_gch_hack_; }
+
+  void SetUseRelativePathsInArgv(bool use_relative_paths_in_argv) {
+    use_relative_paths_in_argv_ = use_relative_paths_in_argv;
+  }
+  bool use_relative_paths_in_argv() const {
+    return use_relative_paths_in_argv_;
+  }
+
+  void SetCommandCheckLevel(const string& level) {
+    command_check_level_ = level;
+  }
+  const string& command_check_level() const { return command_check_level_; }
+
+  void SetHermetic(bool hermetic) {
+    hermetic_ = hermetic;
+  }
+  bool hermetic() const { return hermetic_; }
+
+  void SetHermeticFallback(bool fallback) {
+    hermetic_fallback_ = fallback;
+  }
+  bool hermetic_fallback() const { return hermetic_fallback_; }
+
+  void SetDontKillSubprocess(bool dont_kill_subprocess) {
+    dont_kill_subprocess_ = dont_kill_subprocess;
+  }
+  bool dont_kill_subprocess() const { return dont_kill_subprocess_; }
+
+  void SetMaxSubProcsPending(int max_subprocs_pending) {
+    max_subprocs_pending_ = max_subprocs_pending;
+  }
+  int max_subprocs_pending() const { return max_subprocs_pending_; }
+  void SetLocalRunPreference(int local_run_preference) {
+    local_run_preference_ = local_run_preference;
+  }
+  int local_run_preference() const { return local_run_preference_; }
+  void SetLocalRunForFailedInput(bool local_run_for_failed_input) {
+    local_run_for_failed_input_ = local_run_for_failed_input;
+  }
+  bool local_run_for_failed_input() const {
+    return local_run_for_failed_input_;
+  }
+  void SetLocalRunDelayMsec(int local_run_delay_msec) {
+    local_run_delay_msec_ = local_run_delay_msec;
+  }
+  int local_run_delay_msec() const { return local_run_delay_msec_; }
+  void SetStoreLocalRunOutput(bool store_local_run_output) {
+    store_local_run_output_ = store_local_run_output;
+  }
+  bool store_local_run_output() const { return store_local_run_output_; }
+  void SetEnableRemoteLink(bool enable_remote_link) {
+    enable_remote_link_ = enable_remote_link;
+  }
+  bool enable_remote_link() const { return enable_remote_link_; }
+
+  void SetTmpDir(const string& tmp_dir) { tmp_dir_ = tmp_dir; }
+  const string& tmp_dir() const { return tmp_dir_; }
+
+  void SetTimeoutSecs(const std::vector<int>& timeout_secs);
+  const std::vector<int>& timeout_secs() const { return timeout_secs_; }
+
+  // Allow to send info. when this function is called.
+  // All method that use username() and nodename() should check the flag first.
+  void AllowToSendUserInfo() { can_send_user_info_ = true; }
+  bool CanSendUserInfo() const { return can_send_user_info_; }
+
+  void SetAllowedNetworkErrorDuration(int seconds) {
+    allowed_network_error_duration_in_sec_ = seconds;
+  }
+  int AllowedNetworkErrorDuration() const {
+    return allowed_network_error_duration_in_sec_;
+  }
+
+  void SetHashRewriteRule(const std::map<std::string, std::string>& mapping) {
+    compiler_info_builder_->SetHashRewriteRule(mapping);
+  }
+
+  void SetMaxActiveFailFallbackTasks(int num) {
+    max_active_fail_fallback_tasks_ = num;
+  }
+  void SetAllowedMaxActiveFailFallbackDuration(int duration) {
+    allowed_max_active_fail_fallback_duration_in_sec_ = duration;
+  }
+
+  void SetMaxCompilerDisabledTasks(int num) {
+    max_compiler_disabled_tasks_ = num;
+  }
+
+  // ExecService API.
+  // Starts new CompileTask.  done will be called on the same thread.
+  void Exec(RpcController* rpc,
+            const ExecReq* exec_req,
+            ExecResp* exec_resp,
+            OneshotClosure* done);
+
+  // Called when CompileTask is finished.
+  void CompileTaskDone(CompileTask* task);
+
+  // Requests to quit service.
+  void Quit();
+  bool quit() const;
+  // Waits for all tasks finish.
+  void Wait();
+
+  bool DumpTask(int task_id, string* out);
+  bool DumpTaskRequest(int task_id);
+  // Dump the tasks whose state is active or frozen time stamp is after |after|.
+  void DumpToJson(Json::Value* json, long long after);
+  void DumpStats(std::ostringstream* ss);
+  void DumpStatsToFile(const string& filename);
+  // Dump stats in json form (converted from GomaStatzStats).
+  void DumpStatsJson(std::string* json_string, HumanReadability human_readable);
+
+  void ClearTasks();
+
+  // Finds local compiler for |basename| invoked by |gomacc_path| at |cwd|
+  // from |local_path|, and sets the local compiler's path in
+  // |local_compiler_path| and PATH that goma dir is removed in
+  // |no_goma_local_path|.
+  // If |local_compiler_path| is given (and |basename| may be full path),
+  // it just checks |local_compiler_path| is not gomacc.
+  // Returns true if it finds local compiler.
+  // *local_compiler_path returned from this is not be gomacc.
+  // |pathext| should only be given on Windows, which represents PATHEXT
+  // environment variable.
+  bool FindLocalCompilerPath(const string& gomacc_path,
+                             const string& basename,
+                             const string& cwd,
+                             const string& local_path,
+                             const string& pathext,
+                             string* local_compiler_path,
+                             string* no_goma_local_path);
+
+  void GetCompilerInfo(GetCompilerInfoParam* param,
+                       OneshotClosure* callback);
+  bool DisableCompilerInfo(CompilerInfoState* state,
+                           const string& disabled_reason);
+  void DumpCompilerInfo(std::ostringstream* ss);
+
+  bool RecordCommandSpecVersionMismatch(
+      const string& exec_command_version_mismatch);
+  bool RecordCommandSpecBinaryHashMismatch(
+      const string& exec_command_binary_hash_mismatch);
+  bool RecordSubprogramMismatch(const string& subprogram_mismatch);
+  // Record |error_message| is logged with LOG(ERROR) or LOG(WARNING).
+  // If |is_error| is true, logged to LOG(ERROR), otherwise LOG(WARNING).
+  // Statistics would be kept in CompileService.
+  void RecordErrorToLog(const string& error_message, bool is_error);
+  // Record |error_message| is sent to gomacc as GOMA Error.
+  // Statistics would be kept in CompileService.
+  void RecordErrorsToUser(const std::vector<string>& error_messages);
+
+  // Records result for inputs.
+  void RecordInputResult(const std::vector<string>& inputs, bool success);
+  // Returns true if RecordInputResult recorded any of inputs as not succuess
+  // before.
+  bool ContainFailedInput(const std::vector<string>& inputs) const;
+
+  void SetMaxSumOutputSize(size_t size) { max_sum_output_size_ = size; }
+
+  // Acquire output buffer in buf for filesize. buf must be empty.
+  // Returns true when succeeded and buf would have filesize buffer.
+  // Returns false otherwise, and buf remains empty.
+  bool AcquireOutputBuffer(size_t filesize, string* buf);
+  // Release output buffer acquired by AcquireOutputBuffer.
+  // filesize and buf should be the same with AcquireOutputBuffer.
+  void ReleaseOutputBuffer(size_t filesize, string* buf);
+
+  // Records output file is renamed or not.
+  void RecordOutputRename(bool rename);
+
+  // Returns in msec to delay subprocess setup.
+  int GetEstimatedSubprocessDelayTime();
+
+  void DumpErrorStatus(std::ostringstream* ss);
+
+  // Returns false if it reached max_active_fail_fallback_tasks_.
+  bool IncrementActiveFailFallbackTasks();
+
+  void RecordForcedFallbackInSetup(ForcedFallbackReasonInSetup r);
+
+ private:
+  typedef std::pair<GetCompilerInfoParam*, OneshotClosure*> CompilerInfoWaiter;
+  typedef std::vector<CompilerInfoWaiter> CompilerInfoWaiterList;
+
+  // Called when reply from Exec.
+  void ExecDone(WorkerThreadManager::ThreadId thread_id, OneshotClosure* done);
+
+  // Called when compiler_mu_ is held either exlusive or shared.
+  bool FindLocalCompilerPathUnlocked(
+      const string& key,
+      const string& key_cwd,
+      string* local_compiler_path,
+      string* no_goma_local_path) const;
+  bool FindLocalCompilerPathAndUpdate(
+      const string& key,
+      const string& key_cwd,
+      const string& gomacc_path,
+      const string& basename,
+      const string& cwd,
+      const string& local_path,
+      const string& pathext,
+      string* local_compiler_path,
+      string* no_goma_local_path);
+
+  void ClearTasksUnlocked();
+
+  const CompileTask* FindTaskByIdUnlocked(int task_id, bool include_active);
+
+  void DumpCommonStatsUnlocked(GomaStats* stats);
+
+  void GetCompilerInfoInternal(GetCompilerInfoParam* param,
+                               OneshotClosure* callback);
+
+  WorkerThreadManager* wm_;
+
+  Lock quit_mu_;  // protects quit_
+  bool quit_;
+
+  Lock task_id_mu_;
+  int task_id_ GUARDED_BY(task_id_mu_);
+
+  Lock mu_;  // protects other fields.
+  ConditionVariable cond_;
+
+  int max_active_tasks_;
+  int max_finished_tasks_;
+  int max_failed_tasks_;
+  int max_long_tasks_;
+  std::deque<CompileTask*> pending_tasks_;
+  std::set<CompileTask*> active_tasks_;
+  std::deque<CompileTask*> finished_tasks_;
+  std::deque<CompileTask*> failed_tasks_;
+  // long_tasks_ is a heap compared by task's handler time.
+  // A task with the shortest handler time would come to front of long_tasks_.
+  std::vector<CompileTask*> long_tasks_;
+
+  // CompileTask's input that failed.
+  ReadWriteLock failed_inputs_mu_;
+  unordered_set<string> failed_inputs_;
+
+  string username_;
+  string nodename_;
+  time_t start_time_;
+  string compiler_proxy_id_prefix_;
+
+  std::unique_ptr<SubProcessOptionSetter> subprocess_option_setter_;
+  std::unique_ptr<HttpClient> http_client_;
+  std::unique_ptr<HttpRPC> http_rpc_;
+
+  std::unique_ptr<ExecServiceClient> exec_service_client_;
+  std::unique_ptr<MultiFileStore> multi_file_store_;
+  std::unique_ptr<FileServiceHttpClient> file_service_;
+
+  std::unique_ptr<CompilerInfoBuilder> compiler_info_builder_;
+
+  int compiler_info_pool_;
+
+  // protects compiler_info_waiters_, compiler_info_callbacks.
+  Lock compiler_info_mu_;
+  // key: key_cwd: value: a list of waiting param+closure.
+  unordered_map<std::string, CompilerInfoWaiterList*>
+    compiler_info_waiters_;
+
+  std::unique_ptr<FileHashCache> file_hash_cache_;
+
+  int include_processor_pool_;
+
+  std::unique_ptr<LogServiceClient> log_service_client_;
+
+  std::unique_ptr<CompilerProxyHistogram> histogram_;
+
+  std::unique_ptr<AutoUpdater> auto_updater_;
+  std::unique_ptr<Watchdog> watchdog_;
+
+  bool need_to_send_content_;
+  int new_file_threshold_;
+  std::vector<int> timeout_secs_;
+  bool enable_gch_hack_;
+  bool use_relative_paths_in_argv_;
+  string command_check_level_;
+
+  // Set hermetic_mode in ExecReq, that is, don't choose different compiler
+  // than local one.
+  bool hermetic_;
+  // If true, local fallback when no compiler in server side.
+  // If false, error when no compiler in server side.
+  bool hermetic_fallback_;
+
+  bool dont_kill_subprocess_;
+  int max_subprocs_pending_;
+  int local_run_preference_;
+  bool local_run_for_failed_input_;
+  int local_run_delay_msec_;
+  bool store_local_run_output_;
+  bool enable_remote_link_;
+  string tmp_dir_;
+
+  // key: "req_ver - resp_ver", value: count
+  unordered_map<string, int> command_version_mismatch_;
+  unordered_map<string, int> command_binary_hash_mismatch_;
+
+  // key: "path hash", value: count
+  unordered_map<string, int> subprogram_mismatch_;
+
+  // key: error reason, value: pair<is_error, count>
+  unordered_map<string, std::pair<bool, int>> error_to_log_;
+  // key: error reason, value: count
+  unordered_map<string, int> error_to_user_;
+
+  // protects local_compiler_paths_
+  ReadWriteLock compiler_mu_;
+
+  // key: <gomacc_path>:<basename>:<cwd>:<local_path>
+  //     if all path in <local_path> are absolute, "." is used for <cwd>.
+  // value: (local_compiler_path, no_goma_local_path)
+  unordered_map<string, std::pair<string, string>> local_compiler_paths_;
+
+  int num_exec_request_;
+  int num_exec_success_;
+  int num_exec_failure_;
+
+  int num_exec_compiler_proxy_failure_;
+
+  int num_exec_goma_finished_;
+  int num_exec_goma_cache_hit_;
+  int num_exec_goma_local_cache_hit_;
+  int num_exec_goma_aborted_;
+  int num_exec_goma_retry_;
+  int num_exec_local_run_;
+  int num_exec_local_killed_;
+  int num_exec_local_finished_;
+  int num_exec_fail_fallback_;
+
+  std::map<string, int> local_run_reason_;
+
+  int num_file_requested_;
+  int num_file_uploaded_;
+  int num_file_missed_;
+  int num_file_output_;
+  int num_file_rename_output_;
+  int num_file_output_buf_;
+
+  int num_include_processor_total_files_;
+  int num_include_processor_skipped_files_;
+  int64_t include_processor_total_wait_time_;  // might not fit in int32.
+  int64_t include_processor_total_run_time_;  // might not fit in int32.
+
+  size_t cur_sum_output_size_;
+  size_t max_sum_output_size_;
+  size_t req_sum_output_size_;
+  size_t peak_req_sum_output_size_;
+
+  bool can_send_user_info_;
+  int allowed_network_error_duration_in_sec_;
+
+  int num_active_fail_fallback_tasks_;
+  int max_active_fail_fallback_tasks_;
+  int allowed_max_active_fail_fallback_duration_in_sec_;
+  time_t reached_max_active_fail_fallback_time_;
+
+  int num_forced_fallback_in_setup_[kNumForcedFallbackReasonInSetup];
+  int max_compiler_disabled_tasks_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompileService);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_COMPILE_SERVICE_H_
diff --git a/client/compile_stats.cc b/client/compile_stats.cc
new file mode 100644
index 0000000..29b7b7c
--- /dev/null
+++ b/client/compile_stats.cc
@@ -0,0 +1,87 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "compile_stats.h"
+
+#include <sstream>
+
+namespace devtools_goma {
+
+CompileStats::CompileStats()
+    : ExecLog(),
+      gcc_req_size(0),
+      gcc_resp_size(0),
+      input_file_rpc_size(0),
+      input_file_rpc_raw_size(0),
+      output_file_rpc(0),
+      output_file_rpc_req_build_time(0),
+      output_file_rpc_req_send_time(0),
+      output_file_rpc_wait_time(0),
+      output_file_rpc_resp_recv_time(0),
+      output_file_rpc_resp_parse_time(0),
+      output_file_rpc_size(0),
+      output_file_rpc_raw_size(0) {
+}
+
+CompileStats::~CompileStats() {
+}
+
+string CompileStats::major_factor() const {
+  int64_t t = 0;
+  const char* s = "";
+  if (compiler_info_process_time() > t) {
+    t = compiler_info_process_time();
+    s = "compiler_info";
+  }
+  if (include_processor_wait_time() > t) {
+    t = include_processor_wait_time();
+    s = "include_processor_wait_time";
+  }
+  if (include_processor_run_time() > t) {
+    t = include_processor_run_time();
+    s = "include_processor_run_time";
+  }
+  if (include_fileload_time() > t) {
+    t = include_fileload_time();
+    s = "file_upload";
+  }
+  {
+    int64_t sum_rpc_req_send_time = SumRepeatedInt32(rpc_req_send_time());
+    int64_t sum_rpc_resp_recv_time = SumRepeatedInt32(rpc_resp_recv_time());
+    if (sum_rpc_req_send_time > t) {
+      t = sum_rpc_req_send_time;
+      s = "rpc_req";
+    }
+    if (sum_rpc_resp_recv_time > t) {
+      t = sum_rpc_resp_recv_time;
+      s = "rpc_resp";
+    }
+  }
+  if (file_response_time() > t) {
+    t = file_response_time();
+    s = "file_download";
+  }
+  std::ostringstream r;
+  r << s;
+  if (t > 0) {
+    r << ":" << t << "ms";
+    if (handler_time() > 0) {
+      r << " [" << (t * 100 / handler_time()) << "%]";
+    }
+  }
+  return r.str();
+}
+
+int64_t SumRepeatedInt32(
+    const google::protobuf::RepeatedField<google::protobuf::int32>&
+    repeated_int32) {
+  int64_t sum = 0;
+  for (const auto& iter : repeated_int32) {
+    sum += iter;
+  }
+  return sum;
+}
+
+}  // namespace devtools_goma
diff --git a/client/compile_stats.h b/client/compile_stats.h
new file mode 100644
index 0000000..0bb31f8
--- /dev/null
+++ b/client/compile_stats.h
@@ -0,0 +1,51 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_COMPILE_STATS_H_
+#define DEVTOOLS_GOMA_CLIENT_COMPILE_STATS_H_
+
+#include <stdint.h>
+#include <string>
+
+#include "compiler_specific.h"
+
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_log.pb.h"
+MSVC_POP_WARNING()
+
+using std::string;
+
+namespace devtools_goma {
+
+class CompileStats : public ExecLog {
+ public:
+  CompileStats();
+  ~CompileStats();
+
+  size_t gcc_req_size;
+  size_t gcc_resp_size;
+
+  size_t input_file_rpc_size;
+  size_t input_file_rpc_raw_size;
+
+  size_t output_file_rpc;
+  int64_t output_file_rpc_req_build_time;
+  int64_t output_file_rpc_req_send_time;
+  int64_t output_file_rpc_wait_time;
+  int64_t output_file_rpc_resp_recv_time;
+  int64_t output_file_rpc_resp_parse_time;
+  size_t output_file_rpc_size;
+  size_t output_file_rpc_raw_size;
+
+  string major_factor() const;
+};
+
+int64_t SumRepeatedInt32(
+    const google::protobuf::RepeatedField<google::protobuf::int32>&
+    repeated_int32);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_COMPILE_STATS_H_
diff --git a/client/compile_task.cc b/client/compile_task.cc
new file mode 100644
index 0000000..c1e95f8
--- /dev/null
+++ b/client/compile_task.cc
@@ -0,0 +1,5424 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "compile_task.h"
+
+#ifndef _WIN32
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#endif
+
+#include <algorithm>
+#include <iomanip>
+#include <memory>
+#include <sstream>
+
+#include <google/protobuf/text_format.h>
+#include <json/json.h>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compilation_database_reader.h"
+#include "compile_service.h"
+#include "compile_stats.h"
+#include "compiler_flags.h"
+#include "compiler_flags_util.h"
+#include "compiler_info.h"
+#include "compiler_proxy_info.h"
+#include "compiler_specific.h"
+#include "file.h"
+#include "file_dir.h"
+#include "file_hash_cache.h"
+#include "file_helper.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "goma_data_util.h"
+#include "goma_file.h"
+#include "goma_file_dump.h"
+#include "goma_file_http.h"
+#include "http_rpc.h"
+#include "include_file_utils.h"
+#include "include_processor.h"
+#include "ioutil.h"
+#include "jar_parser.h"
+#include "join.h"
+#include "linker_input_processor.h"
+#include "local_output_cache.h"
+#include "lockhelper.h"
+#include "multi_http_rpc.h"
+#include "mypath.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "path_util.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+#include "prototmp/subprocess.pb.h"
+MSVC_POP_WARNING()
+#include "simple_timer.h"
+#include "string_piece_utils.h"
+#include "subprocess_task.h"
+#include "timestamp.h"
+#include "unordered.h"
+#include "util.h"
+#include "worker_thread_manager.h"
+
+#ifdef _WIN32
+# include "posix_helper_win.h"
+#endif
+
+namespace devtools_goma {
+
+static const int kMaxExecRetry = 4;
+
+static string GetLastErrorMessage() {
+  char error_message[1024];
+#ifndef _WIN32
+  // Meaning of returned value of strerror_r is different between
+  // XSI and GNU. Need to ignore.
+  (void)strerror_r(errno, error_message, sizeof(error_message));
+#else
+  FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, 0, GetLastError(), 0,
+                 error_message, sizeof error_message, 0);
+#endif
+  return error_message;
+}
+
+static bool IsFatalError(ExecResp::ExecError error_code) {
+  return error_code == ExecResp::BAD_REQUEST;
+}
+
+static void DumpSubprograms(
+    const google::protobuf::RepeatedPtrField<SubprogramSpec>& subprogram_specs,
+    std::ostringstream* ss) {
+  for (int i = 0; i < subprogram_specs.size(); ++i) {
+    const SubprogramSpec& spec = subprogram_specs.Get(i);
+    if (i > 0)
+      *ss << ", ";
+    *ss << "path=" << spec.path() << " hash=" << spec.binary_hash();
+  }
+}
+
+static void LogCompilerOutput(
+    const string& trace_id, const string& name, StringPiece out) {
+  LOG(INFO) << trace_id << " " << name << ": size=" << out.size();
+  static const int kMaxLines = 32;
+  static const size_t kMaxCols = 200;
+  static const char* kClExeShowIncludePrefix = "Note: including file:";
+  if (out.size() == 0)
+    return;
+  if (out.size() < kMaxCols) {
+    LOG(INFO) << trace_id << " " << name << ":" << out;
+    return;
+  }
+  for (int i = 0; out.size() > 0 && i < kMaxLines;) {
+    size_t end = out.find_first_of("\r\n");
+    StringPiece line;
+    if (end == string::npos) {
+      line = out;
+      out = StringPiece();
+    } else if (end == 0) {
+      out.remove_prefix(1);
+      continue;
+    } else {
+      line = out.substr(0, end);
+      out.remove_prefix(end + 1);
+    }
+    if (line.size() == 0)
+      continue;
+    if (strings::StartsWith(line, kClExeShowIncludePrefix))
+      continue;
+    size_t found = line.find("error");
+    if (found == string::npos)
+      found = line.find("warning");
+    if (found != string::npos) {
+      ++i;
+      if (line.size() > kMaxCols) {
+        LOG(INFO) << trace_id << " " << name << ":"
+                  << line.substr(0, kMaxCols) << "...";
+      } else {
+        LOG(INFO) << trace_id << " " << name << ":" << line;
+      }
+    }
+  }
+}
+
+static void ReleaseMemoryForExecReqInput(ExecReq* req) {
+  ExecReq new_req;
+  new_req.Swap(req);
+  new_req.clear_input();
+  *req = new_req;
+}
+
+#ifndef _WIN32
+pthread_once_t CompileTask::init_once_ = PTHREAD_ONCE_INIT;
+#else
+INIT_ONCE CompileTask::init_once_;
+#endif
+Lock CompileTask::global_mu_;
+
+std::deque<CompileTask*>* CompileTask::link_file_req_tasks_ = nullptr;
+
+static string CreateCommandVersionString(const CommandSpec& spec) {
+  return spec.name() + ' ' + spec.version() + " (" + spec.binary_hash() + ")";
+}
+
+static string StateName(CompileTask::State state) {
+  static const char* names[] = {
+    "INIT",
+    "SETUP",
+    "FILE_REQ",
+    "CALL_EXEC",
+    "LOCAL_OUTPUT",
+    "FILE_RESP",
+    "FINISHED",
+    "LOCAL_RUN",
+    "LOCAL_FINISHED",
+  };
+
+  static_assert(CompileTask::NUM_STATE == arraysize(names),
+                "CompileTask::NUM_STATE and arraysize(names) is not matched");
+
+  CHECK_GE(state, 0);
+  CHECK_LT(state, CompileTask::NUM_STATE);
+  return names[state];
+}
+
+template <typename Iter>
+static void NormalizeSystemIncludePaths(const string& home, const string& cwd,
+                                        Iter path_begin, Iter path_end) {
+  if (home.empty())
+    return;
+
+  for (Iter it = path_begin; it != path_end; ++it) {
+    if (HasPrefixDir(*it, home)) {
+      it->assign(PathResolver::WeakRelativePath(*it, cwd));
+    }
+  }
+}
+
+// Returns true if |buf| is bigobj format header.
+// |buf| should contain 32 byte at least.
+static bool IsBigobjFormat(const unsigned char* buf) {
+  static const unsigned char kV1UUID[16] = {
+    0x38, 0xFE, 0xB3, 0x0C, 0xA5, 0xD9, 0xAB, 0x4D,
+    0xAC, 0x9B, 0xD6, 0xB6, 0x22, 0x26, 0x53, 0xC2,
+  };
+
+  static const unsigned char kV2UUID[16] = {
+    0xC7, 0xA1, 0xBA, 0xD1, 0xEE, 0xBA, 0xA9, 0x4B,
+    0xAF, 0x20, 0xFA, 0xF6, 0x6A, 0xA4, 0xDC, 0xB8
+  };
+
+  if (*reinterpret_cast<const unsigned short*>(buf) != 0)
+    return false;
+  if (*reinterpret_cast<const unsigned short*>(buf + 2) != 0xFFFF)
+    return false;
+
+  // UUID can be different by bigobj version.
+  const unsigned char* uuid = nullptr;
+  if (*reinterpret_cast<const unsigned short*>(buf + 4) == 0x0001) {
+    uuid = kV1UUID;
+  } else if (*reinterpret_cast<const unsigned short*>(buf + 4) == 0x0002) {
+    uuid = kV2UUID;
+  } else {
+    // Unknown bigobj version
+    return false;
+  }
+
+  unsigned short magic = *reinterpret_cast<const unsigned short*>(buf + 6);
+  if (!(magic == 0x014C || magic == 0x8664))
+    return false;
+
+  for (int i = 0; i < 16; ++i) {
+    if (buf[12 + i] != uuid[i])
+      return false;
+  }
+
+  return true;
+}
+
+class CompileTask::InputFileTask {
+ public:
+  // Gets InputFileTask for the filename.
+  // If an InputFileTask for the same filename already exists, use the same
+  // InputFileTask.
+  static InputFileTask* NewInputFileTask(
+      WorkerThreadManager* wm,
+      std::unique_ptr<FileServiceHttpClient> file_service_client,
+      FileHashCache* file_hash_cache,
+      const FileId& file_id,
+      const string& filename,
+      bool missed_content,
+      bool linking,
+      bool is_new_file,
+      const string& old_hash_key,
+      CompileTask* task,
+      ExecReq_Input* input) {
+    DCHECK(file::IsAbsolutePath(filename)) << filename;
+
+#ifndef _WIN32
+    pthread_once(&init_once_,
+                 &CompileTask::InputFileTask::InitializeStaticOnce);
+#else
+    InitOnceExecuteOnce(&init_once_,
+                        &CompileTask::InputFileTask::InitializeWinOnce,
+                        nullptr, nullptr);
+#endif
+
+    InputFileTask* input_file_task = nullptr;
+    {
+      AUTOLOCK(lock, &global_mu_);
+      std::pair<unordered_map<string, InputFileTask*>::iterator, bool> p =
+          task_by_filename_->insert(std::make_pair(filename, input_file_task));
+      if (p.second) {
+        p.first->second = new InputFileTask(wm, std::move(file_service_client),
+                                            file_hash_cache, file_id,
+                                            filename, missed_content,
+                                            linking, is_new_file,
+                                            old_hash_key);
+      }
+      input_file_task = p.first->second;
+      DCHECK(input_file_task != nullptr);
+      input_file_task->SetTaskInput(task, input);
+    }
+    DCHECK_GT(input_file_task->num_tasks(), 0U);
+    VLOG(1) << task->trace_id_ << " start input "
+            << task->num_input_file_task_ << " " << filename;
+    task->StartInputFileTask();
+    return input_file_task;
+  }
+
+  void Run(CompileTask* task, OneshotClosure* closure) {
+    WorkerThreadManager::ThreadId thread_id = task->thread_id_;
+    {
+      AUTOLOCK(lock, &mu_);
+      switch (state_) {
+        case INIT:  // first run.
+          state_ = RUN;
+          break;
+        case RUN:
+          VLOG(1) << task->trace_id() << " input running ("
+                  << tasks_.size() << " tasks)";
+          callbacks_.emplace_back(thread_id, closure);
+          return;
+        case DONE:
+          VLOG(1) << task->trace_id() << " input done";
+          wm_->RunClosureInThread(FROM_HERE, thread_id, closure,
+                                  WorkerThreadManager::PRIORITY_LOW);
+          return;
+      }
+    }
+
+    blob_.reset(new FileBlob);
+    if (missed_content_) {
+      LOG(INFO) << task->trace_id() << " (" << num_tasks() << " tasks)"
+                << " input " << filename_ << " [missed content]";
+    } else {
+      VLOG(1) << task->trace_id() << " (" << num_tasks() << " tasks)"
+              << " input " << filename_;
+    }
+    success_ = file_service_->CreateFileBlob(
+        filename_, missed_content_, blob_.get());
+
+    if (success_) {
+      hash_key_ = FileServiceClient::ComputeHashKey(*blob_);
+      file_size_ = blob_->file_size();
+
+      // For small size of file blob, don't request to store file blob
+      // separately even if the compile task requested hash key only.
+      if (blob_->blob_type() == FileBlob::FILE_META || file_size_ < 512)
+        need_hash_only_ = false;
+
+      if (!missed_content_ && blob_->blob_type() == FileBlob::FILE_META &&
+          need_to_upload_content()) {
+        // We didn't upload FILE_CHUNKs, but seems to need to upload them.
+        LOG(WARNING) << task->trace_id()
+                     << " (" << num_tasks() << " tasks)"
+                     << " reload:" << filename_
+                     << " file changed";
+        blob_.reset(new FileBlob);
+        success_ = file_service_->CreateFileBlob(
+            filename_, true, blob_.get());
+        if (success_) {
+          const string new_hash_key = FileServiceClient::ComputeHashKey(*blob_);
+          const ssize_t new_file_size = blob_->file_size();
+          if (hash_key_ != new_hash_key || file_size_ != new_file_size) {
+            hash_key_ = new_hash_key;
+            file_size_ = new_file_size;
+          }
+        }
+      }
+      if (need_hash_only_ && need_to_upload_content()) {
+        LOG(INFO) << task->trace_id() << " (" << num_tasks() << " tasks)"
+                  << " upload:" << filename_ << " size:" << file_size_
+                  << " reason:" << upload_reason();
+        success_ = file_service_->StoreFileBlob(*blob_);
+        blob_.reset();
+      }
+    }
+
+    if (!success_) {
+      LOG(WARNING) << task->trace_id() << " (" << num_tasks() << " tasks)"
+                   << " input file failed:" << filename_;
+    } else {
+      // Stores file cache key only if we have already uploaded the blob,
+      // or we assume the blob has already been uploaded since it's old enough.
+      // When we decide to upload the blob by embedding it to the request,
+      // we have to store file cache key after the compile request without no
+      // missing inputs error. If missing inputs error happens, it's safer to
+      // resend the blob since we might send the second request to the different
+      // cluster. That cluster might not have the cache.
+      // If blob is old enough, we assume that the file has already been
+      // uploaded. In that case, we register file hash id to |file_hash_cache_|.
+      // See b/11261931
+      //     b/12087209
+      if (blob_.get() == nullptr || !is_new_file_) {
+        // Set upload_timestamp_ms only if we have uploaded the content.
+        const millitime_t upload_timestamp_ms =
+            blob_.get() == nullptr ? GetCurrentTimestampMs() : 0LL;
+
+        if (file_id_.IsValid()) {
+          mtime_ = file_id_.mtime;
+        }
+        new_cache_key_ = file_hash_cache_->StoreFileCacheKey(
+            filename_, hash_key_, upload_timestamp_ms, file_id_);
+        VLOG(1) << task->trace_id() << " (" << num_tasks() << " tasks)"
+                << " input file ok: " << filename_
+                << (blob_.get() == nullptr ? " upload" : " hash only");
+      } else {
+        // Though the blob is new, we didn't upload the blob. It's because
+        // either the blob has been uploaded (new_cache_key_ == false)
+        // or we will upload it by embedding the blob to the compile request
+        // (new_cache_key_ == true).
+        new_cache_key_ = !file_hash_cache_->IsKnownCacheKey(hash_key_);
+        VLOG(1) << task->trace_id() << " (" << num_tasks() << " tasks)"
+                << " input file ok: " << filename_
+                << (new_cache_key_ ? " hash only (embedded upload)"
+                    : " already uploaded");
+      }
+    }
+
+    {
+      AUTOLOCK(lock, &global_mu_);
+      unordered_map<string, InputFileTask*>::iterator found =
+          task_by_filename_->find(filename_);
+      DCHECK(found != task_by_filename_->end());
+      DCHECK(found->second == this);
+      task_by_filename_->erase(found);
+      VLOG(1) << task->trace_id() << " (" << num_tasks() << " tasks)"
+              << " clear task by filename" << filename_;
+    }
+    std::vector<std::pair<WorkerThreadManager::ThreadId,
+                          OneshotClosure*>> callbacks;
+
+    {
+      AUTOLOCK(lock, &mu_);
+      DCHECK_EQ(RUN, state_);
+      state_ = DONE;
+      callbacks.swap(callbacks_);
+    }
+    wm_->RunClosureInThread(FROM_HERE, thread_id, closure,
+                            WorkerThreadManager::PRIORITY_LOW);
+    for (const auto& callback : callbacks)
+      wm_->RunClosureInThread(FROM_HERE,
+                              callback.first, callback.second,
+                              WorkerThreadManager::PRIORITY_LOW);
+  }
+
+  void Done(CompileTask* task) {
+    bool all_finished = false;
+    {
+      AUTOLOCK(lock, &mu_);
+      std::map<CompileTask*, ExecReq_Input*>::iterator found =
+          tasks_.find(task);
+      CHECK(found != tasks_.end());
+      tasks_.erase(found);
+      all_finished = tasks_.empty();
+    }
+    task->MaybeRunInputFileCallback(true);
+    if (all_finished)
+      delete this;
+  }
+
+  const string& filename() const { return filename_; }
+  bool missed_content() const { return missed_content_; }
+  bool need_hash_only() const { return need_hash_only_; }
+  const FileBlob* blob() const { return blob_.get(); }
+  time_t mtime() const { return mtime_; }
+  int GetInMs() const { return timer_.GetInMs(); }
+  ssize_t file_size() const { return file_size_; }
+  const string& old_hash_key() const { return old_hash_key_; }
+  const string& hash_key() const { return hash_key_; }
+  bool success() const { return success_; }
+  bool new_cache_key() const { return new_cache_key_; }
+
+  size_t num_tasks() const {
+    AUTOLOCK(lock, &mu_);
+    return tasks_.size();
+  }
+  ExecReq_Input* GetInputForTask(CompileTask* task) const {
+    AUTOLOCK(lock, &mu_);
+    std::map<CompileTask*, ExecReq_Input*>::const_iterator found =
+        tasks_.find(task);
+    if (found != tasks_.end()) {
+      return found->second;
+    }
+    return nullptr;
+  }
+
+  bool need_to_upload_content() const {
+    if (missed_content_)
+      return true;
+
+    if (strings::EndsWith(filename_, ".rsp")) {
+      return true;
+    }
+    if (is_new_file_) {
+      if (new_cache_key_)
+        return true;
+    }
+    if (old_hash_key_.empty()) {
+      // old file and first check. we assume the file was already uploaded.
+      return false;
+    }
+    return old_hash_key_ != hash_key_;
+  }
+  const char* upload_reason() const {
+    if (missed_content_)
+      return "missed content";
+    if (strings::EndsWith(filename_, ".rsp")) {
+      return "rsp file";
+    }
+    if (is_new_file_) {
+      if (new_cache_key_)
+        return "new file cache_key";
+    }
+    if (old_hash_key_.empty())
+      return "no need to upload - maybe already in cache.";
+    if (old_hash_key_ != hash_key_)
+      return "update cache_key";
+
+    return "no need to upload - cache_key matches";
+  }
+
+  const HttpRPC::Status& http_rpc_status() const {
+    return file_service_->http_rpc_status();
+  }
+
+ private:
+  enum State {
+    INIT,
+    RUN,
+    DONE,
+  };
+
+  InputFileTask(WorkerThreadManager* wm,
+                std::unique_ptr<FileServiceHttpClient> file_service,
+                FileHashCache* file_hash_cache,
+                const FileId& file_id,
+                const string& filename,
+                bool missed_content,
+                bool linking,
+                bool is_new_file,
+                const string& old_hash_key)
+      : wm_(wm),
+        file_service_(std::move(file_service)),
+        file_hash_cache_(file_hash_cache),
+        file_id_(file_id),
+        filename_(filename),
+        state_(INIT),
+        missed_content_(missed_content),
+        need_hash_only_(linking),  // we need hash key only in linking.
+        is_new_file_(is_new_file),
+        mtime_(0),
+        old_hash_key_(old_hash_key),
+        file_size_(0),
+        success_(false),
+        new_cache_key_(false) {
+    timer_.Start();
+  }
+  ~InputFileTask() {
+    CHECK(tasks_.empty());
+  }
+
+  void SetTaskInput(CompileTask* task, ExecReq_Input* input) {
+    AUTOLOCK(lock, &mu_);
+    tasks_.insert(std::make_pair(task, input));
+  }
+
+#ifdef _WIN32
+  static BOOL WINAPI InitializeWinOnce(PINIT_ONCE, PVOID, PVOID*) {
+    InitializeStaticOnce();
+    return TRUE;
+  }
+#endif
+  static void InitializeStaticOnce() {
+    task_by_filename_ = new unordered_map<string, InputFileTask*>;
+  }
+
+  WorkerThreadManager* wm_;
+  std::unique_ptr<FileServiceHttpClient> file_service_;
+  FileHashCache* file_hash_cache_;
+  const FileId file_id_;
+
+  const string filename_;
+  State state_;
+
+  Lock mu_;  // protects tasks_ and callbacks_.
+  std::map<CompileTask*, ExecReq_Input*> tasks_;
+  std::vector<std::pair<WorkerThreadManager::ThreadId,
+                        OneshotClosure*>> callbacks_;
+
+  // true if goma servers couldn't find the content, so we must upload it.
+  const bool missed_content_;
+
+  // true if we'll use hash key only in ExecReq to prevent from bloating it.
+  // false to embed content in ExecReq.
+  bool need_hash_only_;
+
+  // true if the file is considered as new file, so the file might not be
+  // in goma cache yet.
+  // false means the file is old enough, so we could think someone else already
+  // uploaded the content in goma cache.
+  const bool is_new_file_;
+
+  time_t mtime_;      // file's mtime.
+  // hash key stored in file_hash_cache.
+  const string old_hash_key_;
+  // hash key calcurated from blob_.
+  string hash_key_;
+
+  std::unique_ptr<FileBlob> blob_;
+  SimpleTimer timer_;
+  ssize_t file_size_;
+
+  // true if goma file ops is succeeded.
+  bool success_;
+
+  // true if the hash_key_ is first inserted in file hash cache.
+  bool new_cache_key_;
+
+#ifndef _WIN32
+  static pthread_once_t init_once_;
+#else
+  static INIT_ONCE init_once_;
+#endif
+
+  // protects task_by_filename_.
+  static Lock global_mu_;
+  static unordered_map<string, InputFileTask*>* task_by_filename_;
+
+  DISALLOW_COPY_AND_ASSIGN(InputFileTask);
+};
+
+#ifndef _WIN32
+pthread_once_t CompileTask::InputFileTask::init_once_ = PTHREAD_ONCE_INIT;
+#else
+INIT_ONCE CompileTask::InputFileTask::init_once_;
+#endif
+Lock CompileTask::InputFileTask::global_mu_;
+
+unordered_map<string, CompileTask::InputFileTask*>*
+  CompileTask::InputFileTask::task_by_filename_;
+
+// Returns true if all outputs are FILE blob (so no need of further http_rpc).
+bool IsOutputFileEmbedded(const ExecResult& result) {
+  for (const auto& output : result.output()) {
+    if (output.blob().blob_type() != FileBlob::FILE)
+      return false;
+  }
+  return true;
+}
+
+struct CompileTask::OutputFileInfo {
+  OutputFileInfo() : mode(0666), size(0) {}
+  // actual output filename.
+  string filename;
+  // file mode/permission.
+  int mode;
+
+  size_t size;
+
+  // tmp_filename is filename written by OutputFileTask.
+  // tmp_filename may be the same as output filename (when !need_rename), or
+  // rename it to real output filename in CommitOutput().
+  // if tmp file was not written in OutputFileTask, because it holds content
+  // in content field, tmp_filename will be "".
+  string tmp_filename;
+
+  // hash_key is hash of output filename. It will be stored in file hash cache
+  // once output file is committed.
+  string hash_key;
+
+  // content is output content.
+  // it is used to hold output content in memory while output file task.
+  // it will be used iff tmp_filename == "".
+  string content;
+};
+
+class CompileTask::OutputFileTask {
+ public:
+  // Takes ownership of |file_service|.
+  // Doesn't take ownership of |info|.
+  OutputFileTask(WorkerThreadManager* wm,
+                 std::unique_ptr<FileServiceHttpClient> file_service,
+                 CompileTask* task,
+                 int output_index,
+                 const ExecResult_Output& output,
+                 OutputFileInfo* info)
+      : wm_(wm),
+        thread_id_(wm->GetCurrentThreadId()),
+        file_service_(std::move(file_service)),
+        task_(task),
+        output_index_(output_index),
+        output_(output),
+        output_size_(output.blob().file_size()),
+        info_(info),
+        success_(false) {
+    timer_.Start();
+    task_->StartOutputFileTask();
+  }
+  ~OutputFileTask() {
+    task_->MaybeRunOutputFileCallback(output_index_, true);
+  }
+
+  void Run(OneshotClosure* closure) {
+    VLOG(1) << task_->trace_id() << " output " << info_->filename;
+    std::unique_ptr<FileServiceClient::Output> dest(OpenOutput());
+    // TODO: We might want to restrict paths this program may write?
+    success_ = file_service_->OutputFileBlob(output_.blob(), dest.get());
+    if (success_) {
+      info_->hash_key = FileServiceClient::ComputeHashKey(output_.blob());
+    } else {
+      LOG(WARNING) << task_->trace_id()
+                   << " " << (task_->cache_hit() ? "cached" : "no-cached")
+                   << " output file failed:" << info_->filename;
+    }
+    wm_->RunClosureInThread(FROM_HERE, thread_id_, closure,
+                            WorkerThreadManager::PRIORITY_LOW);
+  }
+
+  CompileTask* task() const { return task_; }
+  const ExecResult_Output& output() const { return output_; }
+  int GetInMs() const { return timer_.GetInMs(); }
+  bool success() const { return success_; }
+  bool IsInMemory() const {
+    return info_->tmp_filename.empty();
+  }
+
+  int num_rpc() const {
+    return file_service_->num_rpc();
+  }
+  const HttpRPC::Status& http_rpc_status() const {
+    return file_service_->http_rpc_status();
+  }
+
+ private:
+  std::unique_ptr<FileServiceClient::Output> OpenOutput() {
+    if (info_->tmp_filename.empty()) {
+      return FileServiceClient::StringOutput(info_->filename, &info_->content);
+    }
+    remove(info_->tmp_filename.c_str());
+    return FileServiceClient::FileOutput(info_->tmp_filename, info_->mode);
+  }
+
+  WorkerThreadManager* wm_;
+  WorkerThreadManager::ThreadId thread_id_;
+  std::unique_ptr<FileServiceHttpClient> file_service_;
+  CompileTask* task_;
+  int output_index_;
+  const ExecResult_Output& output_;
+  size_t output_size_;
+  OutputFileInfo* info_;
+  SimpleTimer timer_;
+  bool success_;
+
+  DISALLOW_COPY_AND_ASSIGN(OutputFileTask);
+};
+
+class CompileTask::LocalOutputFileTask {
+ public:
+  LocalOutputFileTask(WorkerThreadManager* wm,
+                      std::unique_ptr<FileServiceClient> file_service,
+                      FileHashCache* file_hash_cache,
+                      const FileId& file_id,
+                      CompileTask* task,
+                      const string& filename)
+      : wm_(wm),
+        thread_id_(wm_->GetCurrentThreadId()),
+        file_service_(std::move(file_service)),
+        file_hash_cache_(file_hash_cache),
+        file_id_(file_id),
+        task_(task),
+        filename_(filename),
+        success_(false) {
+    timer_.Start();
+    task_->StartLocalOutputFileTask();
+  }
+  ~LocalOutputFileTask() {
+    task_->MaybeRunLocalOutputFileCallback(true);
+  }
+
+  void Run(OneshotClosure* closure) {
+    // Store hash_key of output file.  This file would be used in link phase.
+    VLOG(1) << task_->trace_id() << " local output " << filename_;
+    success_ = file_service_->CreateFileBlob(
+        filename_, true, &blob_);
+    if (success_) {
+      CHECK(FileServiceClient::IsValidFileBlob(blob_)) << filename_;
+      string hash_key = FileServiceClient::ComputeHashKey(blob_);
+      bool new_cache_key = file_hash_cache_->StoreFileCacheKey(
+          filename_, hash_key, GetCurrentTimestampMs(), file_id_);
+      if (new_cache_key) {
+        LOG(INFO) << task_->trace_id()
+                  << " local output store:" << filename_
+                  << " size=" << blob_.file_size();
+        success_ = file_service_->StoreFileBlob(blob_);
+      }
+    }
+    if (!success_) {
+      LOG(WARNING) << task_->trace_id()
+                   << " local output read failed:" << filename_;
+    }
+    wm_->RunClosureInThread(FROM_HERE, thread_id_, closure,
+                            WorkerThreadManager::PRIORITY_LOW);
+  }
+
+  CompileTask* task() const { return task_; }
+  const string& filename() const { return filename_; }
+  const FileBlob& blob() const { return blob_; }
+  int GetInMs() const { return timer_.GetInMs(); }
+  bool success() const { return success_; }
+
+ private:
+  WorkerThreadManager* wm_;
+  WorkerThreadManager::ThreadId thread_id_;
+  std::unique_ptr<FileServiceClient> file_service_;
+  FileHashCache* file_hash_cache_;
+  const FileId file_id_;
+  CompileTask* task_;
+  const string filename_;
+  FileBlob blob_;
+  SimpleTimer timer_;
+  bool success_;
+
+  DISALLOW_COPY_AND_ASSIGN(LocalOutputFileTask);
+};
+
+#ifdef _WIN32
+BOOL WINAPI CompileTask::InitializeWinOnce(PINIT_ONCE, PVOID, PVOID*) {
+  CompileTask::InitializeStaticOnce();
+  return TRUE;
+}
+#endif
+
+/* static */
+void CompileTask::InitializeStaticOnce() {
+  link_file_req_tasks_ = new std::deque<CompileTask*>;
+}
+
+CompileTask::CompileTask(CompileService* service, int id)
+    : service_(service),
+      id_(id),
+      rpc_(nullptr),
+      caller_thread_id_(service->wm()->GetCurrentThreadId()),
+      done_(nullptr),
+      stats_(new CompileStats),
+      responsecode_(0),
+      state_(INIT),
+      abort_(false),
+      finished_(false),
+      req_(new ExecReq),
+      linking_(false),
+      precompiling_(false),
+      gomacc_pid_(SubProcessState::kInvalidPid),
+      canceled_(false),
+      resp_(new ExecResp),
+      exit_status_(0),
+      delayed_setup_subproc_(nullptr),
+      subproc_(nullptr),
+      subproc_weight_(SubProcessReq::LIGHT_WEIGHT),
+      subproc_exit_status_(0),
+      want_fallback_(false),
+      should_fallback_(false),
+      verify_output_(false),
+      fail_fallback_(false),
+      local_run_(false),
+      local_killed_(false),
+      depscache_used_(false),
+      gomacc_revision_mismatched_(false),
+      input_file_callback_(nullptr),
+      num_input_file_task_(0),
+      input_file_success_(false),
+      output_file_callback_(nullptr),
+      num_output_file_task_(0),
+      output_file_success_(false),
+      local_output_file_callback_(nullptr),
+      num_local_output_file_task_(0),
+      localoutputcache_lookup_succeeded_(false),
+      refcnt_(0),
+      frozen_timestamp_ms_(0),
+      last_req_timestamp_ms_(0) {
+  thread_id_ = GetCurrentThreadId();
+#ifndef _WIN32
+  pthread_once(&init_once_, InitializeStaticOnce);
+#else
+  InitOnceExecuteOnce(&init_once_, InitializeWinOnce, nullptr, nullptr);
+#endif
+  Ref();
+  std::ostringstream ss;
+  ss << "Task:" << id_;
+  trace_id_ = ss.str();
+
+  time_t start_time;
+  time(&start_time);
+  stats_->set_start_time(start_time);
+  stats_->set_compiler_proxy_user_agent(kUserAgentString);
+}
+
+void CompileTask::Ref() {
+  AUTOLOCK(lock, &mu_);
+  refcnt_++;
+}
+
+void CompileTask::Deref() {
+  int refcnt;
+  {
+    AUTOLOCK(lock, &mu_);
+    refcnt_--;
+    refcnt = refcnt_;
+  }
+  if (refcnt == 0)
+    delete this;
+}
+
+void CompileTask::Init(CompileService::RpcController* rpc,
+                       const ExecReq* req,
+                       ExecResp* resp,
+                       OneshotClosure* done) {
+  VLOG(1) << trace_id_ << " init";
+  CHECK_EQ(INIT, state_);
+  CHECK(service_ != nullptr);
+  CHECK_EQ(caller_thread_id_, service_->wm()->GetCurrentThreadId());
+  rpc_ = rpc;
+  rpc_resp_ = resp;
+  done_ = done;
+  *req_ = *req;
+#ifdef _WIN32
+  pathext_ = GetEnvFromEnvIter(req->env().begin(), req->env().end(),
+                               "PATHEXT", true);
+#endif
+}
+
+void CompileTask::Start() {
+  VLOG(1) << trace_id_ << " start";
+  CHECK_EQ(INIT, state_);
+  stats_->set_pending_time(handler_timer_.GetInMs());
+
+  // We switched to new thread.
+  DCHECK(!BelongsToCurrentThread());
+  thread_id_ = GetCurrentThreadId();
+
+  input_file_id_cache_.reset(new FileIdCache);
+  output_file_id_cache_.reset(new FileIdCache);
+
+  rpc_->NotifyWhenClosed(NewCallback(this, &CompileTask::GomaccClosed));
+
+  int api_version = req_->requester_info().api_version();
+  if (api_version != RequesterInfo::CURRENT_VERSION) {
+    LOG(ERROR) << trace_id_ << " unexpected api_version=" << api_version
+               << " want=" << RequesterInfo::CURRENT_VERSION;
+  }
+#if defined(ENABLE_REVISION_CHECK)
+  if (req_->requester_info().has_goma_revision() &&
+      req_->requester_info().goma_revision() != kBuiltRevisionString) {
+    LOG(WARNING) << trace_id_ << " goma revision mismatch:"
+                 << " gomacc=" << req_->requester_info().goma_revision()
+                 << " compiler_proxy=" << kBuiltRevisionString;
+    gomacc_revision_mismatched_ = true;
+  }
+#endif
+  CopyEnvFromRequest();
+  InitCompilerFlags();
+  if (flags_.get() == nullptr) {
+    LOG(ERROR) << trace_id_ << " Start error: CompilerFlags is nullptr";
+    AddErrorToResponse(TO_USER, "Unsupported command", true);
+    ProcessFinished("Unsupported command");
+    return;
+  }
+  if (!IsLocalCompilerPathValid(trace_id_, *req_, flags_.get())) {
+    LOG(ERROR) << trace_id_ << " Start error: invalid local compiler."
+               << " path=" << req_->command_spec().local_compiler_path();
+    AddErrorToResponse(TO_USER, "Invalid command", true);
+    ProcessFinished("Invalid command");
+    return;
+  }
+  if (!flags_->is_successful()) {
+    LOG(WARNING) << trace_id_ << " Start error:" << flags_->fail_message();
+    // It should fallback.
+  } else if (precompiling_) {
+    LOG(INFO) << trace_id_ << " Start precompile "
+              << (flags_->input_filenames().empty() ? "(no input)" :
+                  flags_->input_filenames()[0])
+              << " gomacc_pid=" << gomacc_pid_;
+    if (!flags_->input_filenames().empty() && !flags_->output_files().empty()) {
+      DCHECK_EQ(1U, flags_->input_filenames().size()) << trace_id_;
+      const string& input_filename =
+          file::JoinPathRespectAbsolute(flags_->cwd(),
+                                        flags_->input_filenames()[0]);
+      string output_filename;
+      for (const auto& output_file : flags_->output_files()) {
+        if (strings::EndsWith(output_file, ".gch")) {
+          int output_filelen = output_file.size();
+          // Full path and strip ".gch".
+          output_filename =
+              file::JoinPathRespectAbsolute(
+                  flags_->cwd(),
+                  output_file.substr(0, output_filelen - 4));
+          break;
+        }
+      }
+      // Copy the header file iff precompiling header to *.gch.
+      if (!output_filename.empty()) {
+        LOG(INFO) << trace_id_ << " copy " << input_filename
+                  << " " << output_filename;
+        if (input_filename != output_filename) {
+          if (File::Copy(input_filename.c_str(),
+                         output_filename.c_str(), true)) {
+            VLOG(1) << trace_id_ << " copy ok";
+            resp_->mutable_result()->set_exit_status(0);
+          } else {
+            AddErrorToResponse(TO_USER,
+                               "Failed to copy " + input_filename + " to " +
+                               output_filename, true);
+          }
+        }
+      } else {
+        AddErrorToResponse(TO_LOG, "Precompile to no *.gch output", false);
+      }
+    }
+  } else if (linking_) {
+    // build_dir will be used to infer the build directory
+    // in `goma_ctl.py report`. See b/25487955.
+    LOG(INFO) << trace_id_ << " Start linking "
+              << (flags_->output_files().empty() ? "(no output)" :
+                  flags_->output_files()[0])
+              << " gomacc_pid=" << gomacc_pid_
+              << " build_dir=" << flags_->cwd();
+  } else {
+    // build_dir will be used to infer the build directory
+    // in `goma_ctl.py report`. See b/25487955.
+    LOG(INFO) << trace_id_ << " Start "
+              << (flags_->input_filenames().empty() ? "(no input)" :
+                  flags_->input_filenames()[0])
+              << " gomacc_pid=" << gomacc_pid_
+              << " build_dir=" << flags_->cwd();
+  }
+  if (!FindLocalCompilerPath()) {
+    // Unable to fallback.
+    LOG(ERROR) << trace_id_ << " Failed to find local compiler path:"
+               << req_->DebugString()
+               << " env:" << requester_env_.DebugString();
+    AddErrorToResponse(TO_USER, "Failed to find local compiler path", true);
+    ProcessFinished("fail to find local compiler");
+    return;
+  }
+  VLOG(1) << "local_compiler:" << req_->command_spec().local_compiler_path();
+  local_compiler_path_ = req_->command_spec().local_compiler_path();
+
+  verify_output_ = ShouldVerifyOutput();
+  should_fallback_ = ShouldFallback();
+  subproc_weight_ = GetTaskWeight();
+  int ramp_up = service_->http_client()->ramp_up();
+
+  if (verify_output_) {
+    VLOG(1) << trace_id_ << " verify_output";
+    SetupSubProcess();
+    RunSubProcess("verify output");
+    service_->RecordForcedFallbackInSetup(CompileService::kRequestedByUser);
+    // we run both local and goma backend.
+    return;
+  } else if (should_fallback_) {
+    VLOG(1) << trace_id_ << " should fallback";
+    SetupSubProcess();
+    RunSubProcess("should fallback");
+    // we don't call goma rpc.
+    return;
+  } else if ((rand() % 100) >= ramp_up) {
+    LOG(WARNING) << trace_id_ << " http disabled "
+                 << " ramp_up=" << ramp_up;
+    should_fallback_ = true;
+    service_->RecordForcedFallbackInSetup(CompileService::kHTTPDisabled);
+    SetupSubProcess();
+    RunSubProcess("http disabled");
+    // we don't call goma rpc.
+    return;
+  } else if (precompiling_ && service_->enable_gch_hack()) {
+    VLOG(1) << trace_id_ << " gch hack";
+    SetupSubProcess();
+    RunSubProcess("gch hack");
+    // we run both local and goma backend in parallel.
+  } else if (!requester_env_.fallback()) {
+    stats_->set_local_run_reason("should not run under GOMA_FALLBACK=false");
+    LOG(INFO) << trace_id_ << " GOMA_FALLBACK=false";
+  } else if (subproc_weight_ == SubProcessReq::HEAVY_WEIGHT) {
+    stats_->set_local_run_reason("should not start running heavy subproc.");
+  } else if (requester_env_.use_local()) {
+    int num_pending_subprocs = SubProcessTask::NumPending();
+    bool is_failed_input = false;
+    if (service_->local_run_for_failed_input()) {
+      is_failed_input = service_->ContainFailedInput(flags_->input_filenames());
+    }
+    int delay_subproc_ms = service_->GetEstimatedSubprocessDelayTime();
+    if (num_pending_subprocs == 0) {
+      stats_->set_local_run_reason("local idle");
+      SetupSubProcess();
+    } else if (is_failed_input) {
+      stats_->set_local_run_reason("previous failed");
+      SetupSubProcess();
+      // TODO: RunSubProcess to run it soon?
+    } else if (delay_subproc_ms <= 0) {
+      stats_->set_local_run_reason("slow goma");
+      SetupSubProcess();
+    } else if (!service_->http_client()->IsHealthy()) {
+      stats_->set_local_run_reason("goma unhealthy");
+      SetupSubProcess();
+    } else {
+      stats_->set_local_run_reason("should not run while delaying subproc");
+      stats_->set_local_delay_time(delay_subproc_ms);
+      VLOG(1) << trace_id_ << " delay subproc " << delay_subproc_ms << "msec";
+      DCHECK(delayed_setup_subproc_ == nullptr) << trace_id_ << " subproc";
+      delayed_setup_subproc_ =
+          service_->wm()->RunDelayedClosureInThread(
+              FROM_HERE,
+              thread_id_,
+              delay_subproc_ms,
+              NewCallback(
+                  this,
+                  &CompileTask::SetupSubProcess));
+    }
+  } else {
+    stats_->set_local_run_reason("should not run under GOMA_USE_LOCAL=false");
+    LOG(INFO) << trace_id_ << " GOMA_USE_LOCAL=false";
+  }
+  if (subproc_ != nullptr && ShouldStopGoma()) {
+    state_ = LOCAL_RUN;
+    stats_->set_local_run_reason("slow goma, local run started in INIT");
+    return;
+  }
+  ProcessSetup();
+}
+
+CompileTask::~CompileTask() {
+  CHECK_EQ(0, refcnt_);
+  CHECK(output_file_.empty());
+}
+
+bool CompileTask::BelongsToCurrentThread() const {
+  return THREAD_ID_IS_SELF(thread_id_);
+}
+
+bool CompileTask::IsGomaccRunning() {
+  if (gomacc_pid_ == SubProcessState::kInvalidPid)
+    return false;
+#ifndef _WIN32
+  int ret = kill(gomacc_pid_, 0);
+  if (ret != 0) {
+    if (errno == ESRCH) {
+      gomacc_pid_ = SubProcessState::kInvalidPid;
+    } else {
+      PLOG(ERROR) << trace_id_ << " kill 0 failed with unexpected errno."
+                  << " gomacc_pid=" << gomacc_pid_;
+    }
+  }
+#else
+  SimpleTimer timer;
+  bool running = false;
+  {
+    ScopedFd proc(OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE,
+                              gomacc_pid_));
+    running = proc.valid();
+  }
+  int ms = timer.GetInMs();
+  LOG_IF(WARNING, ms > 100) << trace_id_
+                            << " SLOW IsGomaccRunning in " << ms << " msec";
+  if (!running) {
+    gomacc_pid_ = SubProcessState::kInvalidPid;
+  }
+#endif
+  return gomacc_pid_ != SubProcessState::kInvalidPid;
+}
+
+void CompileTask::GomaccClosed() {
+  LOG(INFO) << trace_id_ << " gomacc closed "
+            << "at state=" << StateName(state_)
+            << " subproc pid="
+            << (subproc_ != nullptr ? subproc_->started().pid() : 0);
+  canceled_ = true;
+  gomacc_pid_ = SubProcessState::kInvalidPid;
+  // Kill subprocess either it is running, or pending.
+  if (subproc_ != nullptr) {
+    KillSubProcess();
+  }
+}
+
+bool CompileTask::IsSubprocRunning() const {
+  return subproc_ != nullptr &&
+      subproc_->started().pid() != SubProcessState::kInvalidPid;
+}
+
+void CompileTask::ProcessSetup() {
+  VLOG(1) << trace_id_ << " setup";
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(INIT, state_);
+  CHECK(!abort_);
+  CHECK(!should_fallback_);
+  state_ = SETUP;
+  if (ShouldStopGoma()) {
+    state_ = LOCAL_RUN;
+    stats_->set_local_run_reason("slow goma, local run started in SETUP");
+    return;
+  }
+  FillCompilerInfo();
+}
+
+void CompileTask::TryProcessFileRequest() {
+  file_request_timer_.Start();
+  if (linking_) {
+    DCHECK(link_file_req_tasks_ != nullptr);
+    AUTOLOCK(lock, &global_mu_);
+    link_file_req_tasks_->push_back(this);
+    if (link_file_req_tasks_->front() != this) {
+      VLOG(1) << trace_id_ << " pending file req "
+              << link_file_req_tasks_->size();
+      return;
+    }
+  }
+  ProcessFileRequest();
+}
+
+void CompileTask::ProcessFileRequest() {
+  VLOG(1) << trace_id_ << " file req";
+  CHECK(BelongsToCurrentThread());
+  // SETUP: first pass
+  // FILE_REQ: failed in input file task, and retry
+  // FILE_RESP: failed with missing inputs, and retry
+  CHECK(state_ == SETUP || state_ == FILE_REQ || state_ == FILE_RESP)
+      << trace_id_ << " " << StateName(state_);
+  stats_->add_include_fileload_pending_time(file_request_timer_.GetInMs());
+  file_request_timer_.Start();
+  if (abort_) {
+    ProcessPendingFileRequest();
+    ProcessFinished("aborted before file req");
+    return;
+  }
+  state_ = FILE_REQ;
+  if (ShouldStopGoma()) {
+    ProcessPendingFileRequest();
+    state_ = LOCAL_RUN;
+    stats_->set_local_run_reason("slow goma, local run started in FILE_REQ");
+    return;
+  }
+  VLOG(1) << trace_id_
+          << " start processing of input files "
+          << required_files_.size();
+
+  std::set<string> missed_content_files;
+  for (const auto& filename : resp_->missing_input()) {
+    missed_content_files.insert(filename);
+    VLOG(2) << trace_id_ << " missed content: " << filename;
+    if (interleave_uploaded_files_.find(filename) !=
+        interleave_uploaded_files_.end()) {
+      LOG(WARNING) << trace_id_ << " interleave-uploaded file missing:"
+                   << filename;
+    }
+  }
+
+  // InputFileTask assumes that filename is unique in single compile task.
+  RemoveDuplicateFiles(flags_->cwd(), &required_files_);
+
+  // TODO: We don't need to clear the input when we are retrying.
+  req_->clear_input();
+  interleave_uploaded_files_.clear();
+  SetInputFileCallback();
+  std::vector<OneshotClosure*> closures;
+  time_t now = time(nullptr);
+  stats_->set_num_total_input_file(required_files_.size());
+
+  for (const string& filename : required_files_) {
+    ExecReq_Input* input = req_->add_input();
+    input->set_filename(filename);
+    const std::string abs_filename =
+        file::JoinPathRespectAbsolute(flags_->cwd(), filename);
+    bool missed_content =
+        missed_content_files.find(filename) != missed_content_files.end();
+    time_t mtime = 0;
+    string hash_key;
+    bool hash_key_is_ok = false;
+    millitime_t missed_timestamp =
+        (missed_content ? last_req_timestamp_ms_ : 0ULL);
+
+    // If the file was reported as missing, we need to send the file content.
+    //
+    // Otherwise,
+    //  if hash_key_is_ok is true, we can believe hash_key is valid,
+    //  so uses hash_key only (no content uploading here)
+    //
+    //  if hash_key_is_ok is false, we're not sure hash_key is valid or not,
+    //  so try reading the content.  InputFileTaskFinished determines whether
+    //  we should upload the content or not, based on mtime and hash_key.
+    //  if the content's hash matches with this hash_key, we can believe
+    //  hash_key is valid, so don't upload content in this session.
+    //
+    // If we believed hash_key is valid, but goma servers couldn't find the
+    // content, then it would be reported as missing_inputs_ and we'll set
+    // missed_content to true in the retry session.
+    // Even in this case, we need to consider the race condition of upload and
+    // execution. If the file is uploaded by the other task during the task is
+    // getting missing_inputs_, we do not have to upload the file again. We use
+    // the timestamp of file upload and execution to identify this condition.
+    // If upload time is later than execution time (last_req_timestamp_ms_),
+    // we can assume the file is uploaded by others.
+    const FileId& input_file_id = input_file_id_cache_->Get(abs_filename);
+    if (input_file_id.IsValid()) {
+      mtime = input_file_id.mtime;
+    }
+    hash_key_is_ok = service_->file_hash_cache()->GetFileCacheKey(
+        abs_filename, missed_timestamp, input_file_id, &hash_key);
+    if (missed_content) {
+      if (hash_key_is_ok) {
+        VLOG(2) << trace_id_ << " interleave uploaded: "
+                << " filename=" << abs_filename;
+        // TODO: warn if interleave uploaded file is missing.
+        interleave_uploaded_files_.insert(filename);
+      } else {
+        LOG(INFO) << trace_id_ << " missed content:" << abs_filename;
+      }
+    }
+    if (mtime > stats_->latest_input_mtime()) {
+      stats_->set_latest_input_filename(abs_filename);
+      stats_->set_latest_input_mtime(mtime);
+    }
+    if (hash_key_is_ok) {
+      input->set_hash_key(hash_key);
+      continue;
+    }
+    // In linking, we'll use hash_key instead of content in ExecReq to prevent
+    // from bloating ExecReq.
+    VLOG(1) << trace_id_ << " input file:" << abs_filename
+            << (linking_ ? " [linking]" : "");
+    bool is_new_file = false;
+    if (mtime > 0) {
+      if (linking_) {
+        // For linking, we assume input files is old if it is older than
+        // compiler_proxy start time. (i.e. it would be built in previous
+        // build session, so that the files were generated by goma backends
+        // or uploaded by previous compiler_proxy.
+        is_new_file = mtime > service_->start_time();
+      } else {
+        is_new_file = ((now - mtime) < service_->new_file_threshold());
+      }
+    }
+    // If need_to_send_content is set to true, we consider all file is new file.
+    if (service_->need_to_send_content())
+      is_new_file = true;
+
+    InputFileTask* input_file_task =
+        InputFileTask::NewInputFileTask(
+            service_->wm(),
+            service_->file_service()->WithRequesterInfoAndTraceId(
+                requester_info_, trace_id_),
+            service_->file_hash_cache(),
+            input_file_id_cache_->Get(abs_filename),
+            abs_filename, missed_content,
+            linking_, is_new_file, hash_key,
+            this, input);
+    closures.push_back(
+        NewCallback(
+            input_file_task,
+            &InputFileTask::Run,
+            this,
+            NewCallback(
+                this,
+                &CompileTask::InputFileTaskFinished,
+                input_file_task)));
+    DCHECK_EQ(closures.size(), static_cast<size_t>(num_input_file_task_));
+  }
+  DCHECK_EQ(closures.size(), static_cast<size_t>(num_input_file_task_));
+  stats_->add_num_uploading_input_file(closures.size());
+  stats_->add_num_file_uploaded_during_exec_failure(
+      interleave_uploaded_files_.size());
+  if (closures.empty()) {
+    MaybeRunInputFileCallback(false);
+    return;
+  }
+  for (auto* closure : closures)
+    service_->wm()->RunClosure(
+        FROM_HERE, closure, WorkerThreadManager::PRIORITY_LOW);
+}
+
+void CompileTask::ProcessFileRequestDone() {
+  VLOG(1) << trace_id_ << " file req done";
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_REQ, state_);
+  stats_->add_include_fileload_run_time(file_request_timer_.GetInMs());
+  stats_->set_include_fileload_time(
+      include_timer_.GetInMs() - stats_->include_preprocess_time());
+
+  VLOG(1) << trace_id_
+          << " input files processing preprocess "
+          << stats_->include_preprocess_time() << "ms"
+          << ", loading " << stats_->include_fileload_time() << "ms";
+
+  ProcessPendingFileRequest();
+
+  if (abort_) {
+    ProcessFinished("aborted in file req");
+    return;
+  }
+  if (!input_file_success_) {
+    if (IsSubprocRunning()) {
+      VLOG(1) << trace_id_ << " file request failed,"
+              << " but subprocess running";
+      state_ = LOCAL_RUN;
+      stats_->set_local_run_reason("fail goma, local run started in FILE_REQ");
+      return;
+    }
+    AddErrorToResponse(TO_LOG, "Failed to process file request", true);
+    if (service_->http_client()->IsHealthy() &&
+        stats_->num_uploading_input_file_size() > 0 &&
+        stats_->num_uploading_input_file(
+            stats_->num_uploading_input_file_size() - 1) > 0) {
+      // TODO: don't retry for permanent error (no such file, etc).
+      stats_->set_exec_request_retry(stats_->exec_request_retry() + 1);
+      if (stats_->exec_request_retry() <= kMaxExecRetry) {
+        std::ostringstream ss;
+        ss << "Failed to upload "
+           << stats_->num_uploading_input_file(
+               stats_->num_uploading_input_file_size() - 1)
+           << " files";
+        stats_->add_exec_request_retry_reason(ss.str());
+        LOG(INFO) << trace_id_ << " retry in FILE_REQ";
+        resp_->clear_error_message();
+
+        service_->wm()->RunClosureInThread(
+            FROM_HERE,
+            thread_id_,
+            NewCallback(this, &CompileTask::TryProcessFileRequest),
+            WorkerThreadManager::PRIORITY_LOW);
+        return;
+      }
+    }
+    ProcessFinished("fail in file request");
+    return;
+  }
+
+  // Fix for GOMA_GCH.
+  // We're sending *.gch.goma on local disk, but it must appear as *.gch
+  // on backend.
+  if (service_->enable_gch_hack()) {
+    for (auto& input : *req_->mutable_input()) {
+      if (strings::EndsWith(input.filename(), GOMA_GCH_SUFFIX)) {
+        input.mutable_filename()->resize(
+            input.filename().size() - strlen(".goma"));
+      }
+    }
+  }
+
+  // Here, |req_| is all prepared.
+  // TODO: Instead of here, maybe we need to call this
+  // in end of ProcessFileRequest?
+  if (LocalOutputCache::IsEnabled()) {
+    local_output_cache_key_ = LocalOutputCache::MakeCacheKey(*req_);
+    if (LocalOutputCache::instance()->Lookup(local_output_cache_key_,
+                                             resp_.get(),
+                                             trace_id_)) {
+      LOG(INFO) << trace_id_ << " lookup succeeded";
+      localoutputcache_lookup_succeeded_ = true;
+
+      ReleaseMemoryForExecReqInput(req_.get());
+      state_ = LOCAL_OUTPUT;
+      ProcessFileResponse();
+      return;
+    }
+  }
+
+  ProcessCallExec();
+}
+
+void CompileTask::ProcessPendingFileRequest() {
+  if (!linking_)
+    return;
+
+  DCHECK_EQ(this, link_file_req_tasks_->front());
+  CompileTask* pending_task = nullptr;
+  {
+    AUTOLOCK(lock, &global_mu_);
+    link_file_req_tasks_->pop_front();
+    if (!link_file_req_tasks_->empty()) {
+      pending_task = link_file_req_tasks_->front();
+    }
+  }
+  if (pending_task != nullptr) {
+    VLOG(1) << pending_task->trace_id_ << " start file req";
+    service_->wm()->RunClosureInThread(
+        FROM_HERE,
+        pending_task->thread_id_,
+        NewCallback(pending_task, &CompileTask::ProcessFileRequest),
+        WorkerThreadManager::PRIORITY_LOW);
+  }
+}
+
+void CompileTask::ProcessCallExec() {
+  VLOG(1) << trace_id_ << " call exec";
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_REQ, state_);
+  if (abort_) {
+    ProcessFinished("aborted before call exec");
+    return;
+  }
+  CHECK(!requester_env_.verify_command().empty() ||
+        req_->input_size() > 0) << trace_id_ << " call exec";
+  state_ = CALL_EXEC;
+  if (ShouldStopGoma()) {
+    state_ = LOCAL_RUN;
+    stats_->set_local_run_reason("slow goma, local run started in CALL_EXEC");
+    return;
+  }
+
+  if (req_->trace()) LOG(INFO) << trace_id_ << " requesting remote trace";
+  rpc_call_timer_.Start();
+  req_->mutable_requester_info()->set_retry(stats_->exec_request_retry());
+  VLOG(2) << trace_id_
+          << " request string to send:" << req_->DebugString();
+  {
+    AUTOLOCK(lock, &mu_);
+    http_rpc_status_.reset(new HttpRPC::Status());
+    http_rpc_status_->trace_id = trace_id_;
+    copy(service_->timeout_secs().begin(), service_->timeout_secs().end(),
+         back_inserter(http_rpc_status_->timeout_secs));
+  }
+
+  exec_resp_.reset(new ExecResp);
+  service_->exec_service_client()->ExecAsync(
+      req_.get(), exec_resp_.get(), http_rpc_status_.get(),
+      NewCallback(this, &CompileTask::ProcessCallExecDone));
+
+  last_req_timestamp_ms_ = GetCurrentTimestampMs();
+  if (requester_env_.use_local() &&
+      (subproc_weight_ == SubProcessReq::HEAVY_WEIGHT) &&
+      subproc_ == nullptr) {
+    // now, it's ok to run subprocess.
+    stats_->set_local_run_reason("slow goma linking");
+    SetupSubProcess();
+  }
+}
+
+void CompileTask::ProcessCallExecDone() {
+  VLOG(1) << trace_id_ << " call exec done";
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(CALL_EXEC, state_);
+  exit_status_ = exec_resp_->result().exit_status();
+  resp_->Swap(exec_resp_.get());
+  exec_resp_.reset();
+  string retry_reason;
+  for (const auto& msg : resp_->error_message()) {
+    exec_error_message_.push_back(msg);
+    if (!retry_reason.empty()) {
+      retry_reason += "\n";
+    }
+    retry_reason += msg;
+  }
+  // clear error_message from server.
+  // server error message logged, but not send back to user.
+  resp_->clear_error_message();
+
+  stats_->add_rpc_call_time(rpc_call_timer_.GetInMs());
+
+  if (http_rpc_status_->master_trace_id.empty() ||
+      http_rpc_status_->master_trace_id == http_rpc_status_->trace_id) {
+    stats_->add_rpc_req_size(http_rpc_status_->req_size);
+    stats_->add_rpc_resp_size(http_rpc_status_->resp_size);
+    stats_->add_rpc_raw_req_size(http_rpc_status_->raw_req_size);
+    stats_->add_rpc_raw_resp_size(http_rpc_status_->raw_resp_size);
+    stats_->add_rpc_throttle_time(http_rpc_status_->throttle_time);
+    stats_->add_rpc_pending_time(http_rpc_status_->pending_time);
+    stats_->add_rpc_req_build_time(http_rpc_status_->req_build_time);
+    stats_->add_rpc_req_send_time(http_rpc_status_->req_send_time);
+    stats_->add_rpc_wait_time(http_rpc_status_->wait_time);
+    stats_->add_rpc_resp_recv_time(http_rpc_status_->resp_recv_time);
+    stats_->add_rpc_resp_parse_time(http_rpc_status_->resp_parse_time);
+  }
+  stats_->add_rpc_master_trace_id(http_rpc_status_->master_trace_id);
+
+
+  stats_->set_cache_hit(
+    http_rpc_status_->finished &&
+    resp_->has_cache_hit() &&
+    resp_->cache_hit() != ExecResp::NO_CACHE);
+
+  if (stats_->cache_hit()) {
+    if (!resp_->has_cache_hit()) {
+      // for old backends.
+      stats_->set_cache_source(ExecLog::UNKNOWN_CACHE);
+    } else {
+      switch (resp_->cache_hit()) {
+        case ExecResp::NO_CACHE:
+          LOG(ERROR) << trace_id_ << " cache_hit, but NO_CACHE";
+          break;
+        case ExecResp::MEM_CACHE:
+          stats_->set_cache_source(ExecLog::MEM_CACHE);
+          break;
+        case ExecResp::STORAGE_CACHE:
+          stats_->set_cache_source(ExecLog::STORAGE_CACHE);
+          break;
+        default:
+          LOG(ERROR) << trace_id_ << " unknown cache_source="
+                     << resp_->cache_hit();
+          stats_->set_cache_source(ExecLog::UNKNOWN_CACHE);
+      }
+    }
+  }
+
+
+  if (resp_->has_cache_key())
+    resp_cache_key_ = resp_->cache_key();
+
+  if (abort_) {
+    ProcessFinished("aborted in call exec");
+    return;
+  }
+
+  if (!http_rpc_status_->enabled) {
+    stats_->set_network_failure_type(ExecLog::DISABLED);
+  } else if (http_rpc_status_->err == 0) {
+    stats_->set_network_failure_type(ExecLog::NO_NETWORK_ERROR);
+  } else {   // i.e. http_rpc_status_->err != 0.
+    stats_->set_network_failure_type(ExecLog::UNKNOWN_NETWORK_ERROR);
+    switch (http_rpc_status_->state) {
+      case HttpClient::Status::INIT:  FALLTHROUGH_INTENDED;
+      case HttpClient::Status::PENDING:
+        stats_->set_network_failure_type(ExecLog::CONNECT_FAILED);
+        break;
+      case HttpClient::Status::SENDING_REQUEST:
+        stats_->set_network_failure_type(ExecLog::SEND_FAILED);
+        break;
+      case HttpClient::Status::REQUEST_SENT:
+        stats_->set_network_failure_type(ExecLog::TIMEDOUT_AFTER_SEND);
+        break;
+      case HttpClient::Status::RECEIVING_RESPONSE:
+        stats_->set_network_failure_type(ExecLog::RECEIVE_FAILED);
+        break;
+      case HttpClient::Status::RESPONSE_RECEIVED:
+        if (http_rpc_status_->http_return_code != 200) {
+          stats_->set_network_failure_type(ExecLog::BAD_HTTP_STATUS_CODE);
+        }
+        break;
+    }
+  }
+
+  const int err = http_rpc_status_->err;
+  if (err < 0) {
+    LOG(WARNING) << trace_id_ << " rpc err=" << err << " "
+                 << (err == ERR_TIMEOUT ? " timed out" : " failed")
+                 << " " << http_rpc_status_->err_message;
+    if (IsSubprocRunning()) {
+      VLOG(1) << trace_id_ << " goma failed, but subprocess running.";
+      state_ = LOCAL_RUN;
+      stats_->set_local_run_reason("fail goma, local run started in CALL_EXEC");
+      return;
+    }
+    AddErrorToResponse(TO_LOG, "", true);
+    // Don't Retry if it is client error: 3xx or 4xx.
+    // Retry if it is server error: 5xx (e.g. 502 error from GFE)
+    //
+    // Also, OK to retry on socket timeout occurred during reciving response.
+    if (((http_rpc_status_->http_return_code / 100) == 5) ||
+        (http_rpc_status_->state == HttpClient::Status::RECEIVING_RESPONSE)) {
+      std::ostringstream ss;
+      ss << "RPC failed http=" << http_rpc_status_->http_return_code
+         << ": " << http_rpc_status_->err_message;
+      if (!retry_reason.empty()) {
+        retry_reason += "\n";
+      }
+      retry_reason += ss.str();
+    } else {
+      // No retry for client error: 3xx, 4xx (302, 403 for dos block,
+      // 401 for auth error, etc).
+      LOG(WARNING) << trace_id_ << " RPC failed http="
+                   << http_rpc_status_->http_return_code
+                   << ": " << http_rpc_status_->err_message
+                   << ": no retry";
+    }
+  }
+  if (err == OK && resp_->missing_input_size() > 0) {
+    // missing input will be handled in ProcessFileResponse and
+    // ProcessFileRequest will retry the request with uploading
+    // contents of missing inputs.
+    // Just retrying the request here would not upload contents
+    // so probably fails with missing input again, so don't retry here.
+    LOG_IF(WARNING, !retry_reason.empty())
+        << trace_id_ << " missing inputs:" << resp_->missing_input_size()
+        << " but retry_reason set:" << retry_reason;
+  } else if (!retry_reason.empty()) {
+    if (service_->http_client()->IsHealthy()) {
+      LOG(INFO) << trace_id_ << " exec retry:"
+                << stats_->exec_request_retry()
+                << " error=" << resp_->error()
+                << " " << retry_reason;
+      stats_->set_exec_request_retry(stats_->exec_request_retry() + 1);
+      if (stats_->exec_request_retry() <= kMaxExecRetry &&
+          !(resp_->has_error() && IsFatalError(resp_->error()))) {
+        stats_->add_exec_request_retry_reason(retry_reason);
+        LOG(INFO) << trace_id_ << " retry in CALL_EXEC";
+        resp_->clear_error_message();
+        resp_->clear_error();
+        state_ = FILE_REQ;
+        service_->wm()->RunClosureInThread(
+            FROM_HERE,
+            thread_id_,
+            NewCallback(this, &CompileTask::ProcessCallExec),
+            WorkerThreadManager::PRIORITY_LOW);
+        return;
+      } else {
+        LOG(WARNING) << trace_id_ << " exec error:"
+                     << resp_->error()
+                     << " " << retry_reason
+                     << " but http is healthy";
+      }
+    }
+    CheckNoMatchingCommandSpec(retry_reason);
+    ProcessFinished("fail in call exec");
+    return;
+  }
+
+  if (err < 0) {
+    ProcessFinished("fail in call exec");
+    return;
+  }
+
+  // Saves embedded upload information. We have to call this before
+  // clearing inputs.
+  StoreEmbeddedUploadInformationIfNeeded();
+
+  ReleaseMemoryForExecReqInput(req_.get());
+
+  if (resp_->missing_input_size() == 0) {
+    // Check command spec when not missing input response.
+    CheckCommandSpec();
+  }
+  ProcessFileResponse();
+}
+
+void CompileTask::ProcessFileResponse() {
+  VLOG(1) << trace_id_ << " file resp";
+  CHECK(BelongsToCurrentThread());
+  CHECK(state_ == CALL_EXEC || state_ == LOCAL_OUTPUT) << state_;
+  if (abort_) {
+    ProcessFinished("aborted before file resp");
+    return;
+  }
+  state_ = FILE_RESP;
+  if (ShouldStopGoma()) {
+    state_ = LOCAL_RUN;
+    stats_->set_local_run_reason("slow goma, local run started in FILE_RESP");
+    return;
+  }
+  file_response_timer_.Start();
+  if (resp_->missing_input_size() > 0) {
+    stats_->add_num_missing_input_file(resp_->missing_input_size());
+    LOG(WARNING) << trace_id_
+                 << " request didn't have full content:"
+                 << resp_->missing_input_size()
+                 << " in "
+                 << required_files_.size()
+                 << " : retry=" << stats_->exec_request_retry();
+    for (const auto& filename : resp_->missing_input()) {
+      std::ostringstream ss;
+      ss << "Required file not on goma cache:" << filename;
+      if (interleave_uploaded_files_.find(filename)
+          != interleave_uploaded_files_.end()) {
+        ss << " (interleave uploaded)";
+      }
+      AddErrorToResponse(TO_LOG, ss.str(), true);
+    }
+    for (const auto& reason : resp_->missing_reason()) {
+      AddErrorToResponse(TO_LOG, reason, true);
+    }
+    int need_to_send_content_threshold = required_files_.size() / 2;
+    if (!service_->need_to_send_content()
+        && (resp_->missing_input_size() > need_to_send_content_threshold)) {
+      LOG(WARNING) << trace_id_
+                   << " Lots of missing files. Will send file contents"
+                   << " even if it's old enough.";
+      service_->SetNeedToSendContent(true);
+    }
+    output_file_success_ = false;
+    ProcessFileResponseDone();
+    return;
+  }
+  if (stats_->exec_request_retry() == 0 && service_->need_to_send_content()) {
+    LOG(INFO) << trace_id_ << " no missing files."
+              << " Turn off to force sending old file contents";
+    service_->SetNeedToSendContent(false);
+  }
+
+  // No missing input files.
+  if (!IsGomaccRunning()) {
+    PLOG(WARNING) << trace_id_
+                  << " pid:" << gomacc_pid_ << " does not receive signal 0 "
+                  << " abort=" << abort_;
+    // user may not receive the error message, because gomacc already killed.
+    AddErrorToResponse(TO_LOG, "gomacc killed?", true);
+    // If the requesting process was already dead, we should not write output
+    // files.
+    ProcessFinished("gomacc killed");
+    return;
+  }
+
+  // Decide if it could use in-memory output or not and should write output
+  // in tmp file or not.
+  bool want_in_memory_output = true;
+  string need_rename_reason;
+  if (verify_output_) {
+    VLOG(1) << trace_id_ << " output need_rename for verify_output";
+    want_in_memory_output = false;
+    need_rename_reason = "verify_output";
+  } else if (!success()) {
+    VLOG(1) << trace_id_ << " output need_rename for fail exec";
+    // TODO: we don't need to write remote output for fail exec?
+    want_in_memory_output = false;
+    need_rename_reason = "fail exec";
+  } else {
+    // resp_ contains whole output data, and no need to more http_rpc to
+    // fetch output file data, so no need to run local compiler any more.
+    if (delayed_setup_subproc_ != nullptr) {
+      delayed_setup_subproc_->Cancel();
+      delayed_setup_subproc_ = nullptr;
+    }
+    if (subproc_ != nullptr) {
+      // racing between remote and local.
+      // even if subproc_->started().pid() == kInvalidPid, subproc might
+      // have started (because compile_proxy and subproc is async).
+      // The compile task wants in_memory output by default, but when it
+      // couldn't use in memory output because of lack of memory, it
+      // should write output in tmp file (i.e. need to rename).
+      // TODO: cancel subproc if it was not started yet,
+      //             or use local subproc if it has already started.
+      VLOG(1) << trace_id_ << " output need_rename for local_subproc "
+              << subproc_->started().pid();
+      std::ostringstream ss;
+      ss << "local_subproc pid=" << subproc_->started().pid();
+      need_rename_reason = ss.str();
+    }
+  }
+
+  exec_output_file_.clear();
+  ClearOutputFile();
+  output_file_.resize(resp_->result().output_size());
+  SetOutputFileCallback();
+  std::vector<OneshotClosure*> closures;
+  for (int i = 0; i < resp_->result().output_size(); ++i) {
+    const string& output_filename = resp_->result().output(i).filename();
+    CheckOutputFilename(output_filename);
+
+    exec_output_file_.push_back(output_filename);
+    string filename = file::JoinPathRespectAbsolute(
+        stats_->cwd(), output_filename);
+    // TODO: check output paths matches with flag's output filenames?
+    if (service_->enable_gch_hack() && strings::EndsWith(filename, ".gch"))
+      filename += ".goma";
+
+    OutputFileInfo* output_info = &output_file_[i];
+    output_info->filename = filename;
+    bool try_acquire_output_buffer = want_in_memory_output;
+    if (FileServiceClient::IsValidFileBlob(resp_->result().output(i).blob())) {
+      output_info->size = resp_->result().output(i).blob().file_size();
+    } else {
+      LOG(ERROR) << trace_id_ << " output is invalid:"
+                 << filename;
+      try_acquire_output_buffer = false;
+    }
+    if (try_acquire_output_buffer && service_->AcquireOutputBuffer(
+            output_info->size, &output_info->content)) {
+      output_info->tmp_filename.clear();
+      VLOG(1) << trace_id_ << " output in buffer:"
+              << filename
+              << " size="
+              << output_info->size;
+    } else {
+      if (!need_rename_reason.empty()) {
+        std::ostringstream ss;
+        ss << filename << ".tmp." << id();
+        output_info->tmp_filename = ss.str();
+        LOG(INFO) << trace_id_ << " output in tmp file:"
+                  << output_info->tmp_filename
+                  << " for " << need_rename_reason;
+      } else {
+        // no need to rename, so write output directly to the output file.
+        output_info->tmp_filename = filename;
+        LOG(INFO) << trace_id_ << " output in file:" << filename;
+      }
+    }
+    if (resp_->result().output(i).is_executable())
+      output_info->mode = 0777;
+    if (requester_env_.has_umask()) {
+      output_info->mode &= ~requester_env_.umask();
+      VLOG(1) << trace_id_ << " output file mode is updated."
+              << " filename=" << filename
+              << " mode=" << std::oct << output_info->mode;
+    }
+    std::unique_ptr<OutputFileTask> output_file_task(
+        new OutputFileTask(
+            service_->wm(),
+            service_->file_service()->WithRequesterInfoAndTraceId(
+                requester_info_, trace_id_),
+            this, i, resp_->result().output(i),
+            output_info));
+
+    OutputFileTask* output_file_task_pointer = output_file_task.get();
+    closures.push_back(
+        NewCallback(
+            output_file_task_pointer,
+            &OutputFileTask::Run,
+            NewCallback(
+                this,
+                &CompileTask::OutputFileTaskFinished,
+                std::move(output_file_task))));
+  }
+  stats_->set_num_output_file(closures.size());
+  if (closures.empty()) {
+    MaybeRunOutputFileCallback(-1, false);
+  } else {
+    for (auto* closure : closures) {
+      service_->wm()->RunClosure(
+          FROM_HERE, closure, WorkerThreadManager::PRIORITY_LOW);
+    }
+  }
+}
+
+void CompileTask::ProcessFileResponseDone() {
+  VLOG(1) << trace_id_ << " file resp done";
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_RESP, state_);
+
+  stats_->set_file_response_time(file_response_timer_.GetInMs());
+
+  if (abort_) {
+    ProcessFinished("aborted in file resp");
+    return;
+  }
+  if (!output_file_success_) {
+    if (!abort_) {
+      if (!(precompiling_ && service_->enable_gch_hack()) &&
+          IsSubprocRunning()) {
+        VLOG(1) << trace_id_ << " failed to process file response,"
+                << " but subprocess running";
+        state_ = LOCAL_RUN;
+        stats_->set_local_run_reason(
+            "fail goma, local run started in FILE_RESP");
+        return;
+      }
+
+      // For missing input error, we don't make it as error but warning
+      // when this is the first try and we will retry it later.
+      bool should_error = stats_->exec_request_retry() > 0;
+      std::ostringstream ss;
+      ss << "Try:" << stats_->exec_request_retry() << ": ";
+      if (resp_->missing_input_size() > 0) {
+        // goma server replied with missing inputs.
+        // retry: use the list of missing files in response to fill in
+        // needed files
+        ss << "Missing " << resp_->missing_input_size() << " input files.";
+      } else {
+        should_error = true;
+        ss << "Failed to download "
+           << stats_->num_output_file()
+           << " files"
+           << " in " << (cache_hit() ? "cached" : "no-cached") << "result";
+      }
+
+      bool do_retry = false;
+      std::ostringstream no_retry_reason;
+      if (compiler_info_state_.disabled()) {
+        no_retry_reason << "compiler disabled. no retry."
+                        << " disabled_reason="
+                        << compiler_info_state_.GetDisabledReason();
+      } else if (!service_->http_client()->IsHealthyRecently()) {
+        no_retry_reason << "http is unhealthy. no retry."
+                        << " health_status="
+                        << service_->http_client()->GetHealthStatusMessage();
+      } else {
+        stats_->set_exec_request_retry(stats_->exec_request_retry() + 1);
+        do_retry = stats_->exec_request_retry() <= kMaxExecRetry;
+        if (!do_retry) {
+          no_retry_reason << "too many retry";
+        }
+      }
+
+      if (!do_retry)
+        should_error = true;
+      AddErrorToResponse(TO_LOG, ss.str(), should_error);
+
+      if (do_retry) {
+        if (!service_->http_client()->IsHealthy()) {
+          LOG(WARNING) << trace_id_ << " http is unhealthy, but retry."
+                       << " health_status="
+                       << service_->http_client()->GetHealthStatusMessage();
+        }
+        VLOG(2) << trace_id_
+                << " Failed to process file response (we will retry):"
+                << resp_->DebugString();
+        stats_->add_exec_request_retry_reason(ss.str());
+        LOG(INFO) << trace_id_ << " retry in FILE_RESP";
+        resp_->clear_error_message();
+        TryProcessFileRequest();
+        return;
+      } else {
+        AddErrorToResponse(TO_LOG, no_retry_reason.str(), true);
+      }
+    }
+    VLOG(2) << trace_id_
+            << " Failed to process file response (second time):"
+            << resp_->DebugString();
+    ProcessFinished("failed in file response");
+    return;
+  }
+
+  if (verify_output_) {
+    CHECK(subproc_ == nullptr);
+    CHECK(delayed_setup_subproc_ == nullptr);
+    for (const auto& info : output_file_) {
+      const string& filename = info.filename;
+      const string& tmp_filename = info.tmp_filename;
+      if (!VerifyOutput(filename, tmp_filename)) {
+        output_file_success_ = false;
+      }
+    }
+    output_file_.clear();
+    ProcessFinished("verify done");
+    return;
+  }
+  if (success()) {
+    ProcessFinished("");
+  } else {
+    ClearOutputFile();
+    ProcessFinished("fail exec");
+  }
+}
+
+void CompileTask::ProcessFinished(const string& msg) {
+  if (abort_ || !msg.empty()) {
+    LOG(INFO) << trace_id_ << " finished " << msg
+              << " state=" << StateName(state_)
+              << " abort=" << abort_;
+  } else {
+    VLOG(1) << trace_id_ << " finished " << msg
+            << " state=" << StateName(state_);
+    DCHECK(success()) << trace_id_ << " finished";
+    DCHECK_EQ(FILE_RESP, state_) << trace_id_ << " finished";
+  }
+  CHECK(BelongsToCurrentThread());
+  CHECK_LT(state_, FINISHED);
+  DCHECK(!finished_);
+  finished_ = true;
+  if (state_ == INIT) {
+    // failed to find local compiler path.
+    // it also happens if user uses old gomacc.
+    LOG(ERROR) << trace_id_ << " failed in INIT.";
+    CHECK(subproc_ == nullptr);
+    CHECK(delayed_setup_subproc_ == nullptr);
+    CHECK(!abort_);
+    state_ = FINISHED;
+    ReplyResponse("failed in INIT");
+    return;
+  }
+  if (!abort_)
+    state_ = FINISHED;
+  if (verify_output_) {
+    VLOG(2) << trace_id_ << " verify response:" << resp_->DebugString();
+    CHECK(subproc_ == nullptr);
+    CHECK(delayed_setup_subproc_ == nullptr);
+    ReplyResponse("verify done");
+    return;
+  }
+  if (precompiling_ && service_->enable_gch_hack()) {
+    // In gch hack mode, we'll run both local and remote simultaneously.
+    if (subproc_ != nullptr) {
+      // subprocess still running.
+      // we'll reply response when subprocess is finished.
+      return;
+    }
+    // subprocess finished first.
+    CHECK(delayed_setup_subproc_ == nullptr);
+    VLOG(1) << trace_id_ << " gch hack: local and goma finished.";
+    ProcessReply();
+    return;
+  }
+
+  if (!requester_env_.fallback()) {
+    VLOG(1) << trace_id_ << " goma finished and no fallback.";
+    CHECK(subproc_ == nullptr);
+    CHECK(delayed_setup_subproc_ == nullptr);
+    ProcessReply();
+    return;
+  }
+  if (abort_) {
+    // local finished first (race or verify output).
+    if (local_output_file_callback_ == nullptr)
+      Done();
+    // If local_output_file_callback_ is not nullptr, uploading local output
+    // file is on the fly, so ProcessLocalFileOutputDone() will be called
+    // later.
+    return;
+  }
+  CHECK_EQ(FINISHED, state_);
+  if (success() || !IsGomaccRunning() || !want_fallback_) {
+    if (!success() && !want_fallback_) {
+      LOG(INFO) << trace_id_ << " failed and no need to fallback";
+    } else {
+      VLOG(1) << trace_id_ << " success or gomacc killed.";
+    }
+    stats_->clear_local_run_reason();
+    if (delayed_setup_subproc_ != nullptr) {
+      delayed_setup_subproc_->Cancel();
+      delayed_setup_subproc_ = nullptr;
+    }
+    if (subproc_ != nullptr) {
+      LOG(INFO) << trace_id_ << " goma finished, killing subproc pid="
+                << subproc_->started().pid();
+      KillSubProcess();  // FinishSubProcess will be called.
+    } else {
+      ProcessReply();  // GOMA_FALLBACK=false or GOMA_USE_LOCAL=false
+    }
+    return;
+  }
+  LOG(INFO) << trace_id_ << " fail fallback"
+            << " exit=" << resp_->result().exit_status()
+            << " cache_key=" << resp_->cache_key()
+            << " flag=" << flag_dump_;
+  DCHECK(requester_env_.fallback());
+  DCHECK(!fail_fallback_);
+  stdout_ = resp_->result().stdout_buffer();
+  stderr_ = resp_->result().stderr_buffer();
+  LogCompilerOutput(trace_id_, "stdout", stdout_);
+  LogCompilerOutput(trace_id_, "stderr", stderr_);
+
+  fail_fallback_ = true;
+  // TODO: active fail fallback only for http error?
+  // b/36576025 b/36577821
+  if (!service_->IncrementActiveFailFallbackTasks()) {
+    AddErrorToResponse(
+        TO_USER, "reached max number of active fail fallbacks", true);
+    if (delayed_setup_subproc_ != nullptr) {
+      delayed_setup_subproc_->Cancel();
+      delayed_setup_subproc_ = nullptr;
+    }
+    if (subproc_ != nullptr) {
+      LOG(INFO) << trace_id_ << " killing subproc pid="
+                << subproc_->started().pid();
+      KillSubProcess();  // FinishSubProcess will be called.
+    } else {
+      ProcessReply();  // GOMA_FALLBACK=false or GOMA_USE_LOCAL=false
+    }
+    return;
+  }
+  if (subproc_ == nullptr) {
+    // subproc_ might be nullptr (e.g. GOMA_USE_LOCAL=false).
+    SetupSubProcess();
+  }
+  RunSubProcess(msg);
+}
+
+void CompileTask::ProcessReply() {
+  VLOG(1) << trace_id_ << " process reply";
+  DCHECK(BelongsToCurrentThread());
+  CHECK_EQ(FINISHED, state_);
+  CHECK(subproc_ == nullptr);
+  CHECK(delayed_setup_subproc_ == nullptr);
+  CHECK(!abort_);
+  string msg;
+  if (IsGomaccRunning()) {
+    VLOG(2) << trace_id_ << " goma result:" << resp_->DebugString();
+    if (local_run_ && service_->dont_kill_subprocess()) {
+      // if we ran local process and dont_kill_subprocess is true, we just
+      // use local results, so we don't need to rename remote outputs.
+      CommitOutput(false);
+      msg = "goma success, but local used";
+    } else {
+      CommitOutput(true);
+      if (localoutputcache_lookup_succeeded_) {
+        msg = "goma success (local cache hit)";
+      } else if (cache_hit()) {
+        msg = "goma success (cache hit)";
+      } else {
+        msg = "goma success";
+      }
+    }
+
+    if (LocalOutputCache::IsEnabled()) {
+      if (!localoutputcache_lookup_succeeded_ &&
+          !local_output_cache_key_.empty() &&
+          success()) {
+        // Here, local or remote output has been performed,
+        // and output cache key exists.
+        // Note: we need to save output before ReplyResponse. Otherwise,
+        // output file might be removed by ninja.
+        if (!LocalOutputCache::instance()->SaveOutput(local_output_cache_key_,
+                                                      req_.get(),
+                                                      resp_.get(),
+                                                      trace_id_)) {
+          LOG(ERROR) << trace_id_ << " failed to save localoutputcache";
+        }
+      }
+    }
+  } else {
+    msg = "goma canceled";
+  }
+
+  if (!subproc_stdout_.empty()) remove(subproc_stdout_.c_str());
+  if (!subproc_stderr_.empty()) remove(subproc_stderr_.c_str());
+  ReplyResponse(msg);
+}
+
+struct CompileTask::RenameParam {
+  string oldpath;
+  string newpath;
+};
+
+void CompileTask::RenameCallback(RenameParam* param, string* err) {
+  err->clear();
+  int r = rename(param->oldpath.c_str(), param->newpath.c_str());
+  if (r == 0) {
+    return;
+  }
+  // if errno != EEXIST, log, AddErrorToResponse and returns without
+  // setting *err (so no retry in DoOutput), since non-EEXIST error might
+  // not be worth to retry?
+  std::ostringstream ss;
+  ss << "rename error:" << param->oldpath << " " << param->newpath
+     << " errno=" << errno;
+  *err = ss.str();
+}
+
+struct CompileTask::ContentOutputParam {
+  ContentOutputParam() : info(nullptr) {}
+  string filename;
+  OutputFileInfo* info;
+};
+
+void CompileTask::ContentOutputCallback(
+    ContentOutputParam* param, string* err) {
+  err->clear();
+  remove(param->filename.c_str());
+  std::unique_ptr<FileServiceClient::Output> fout(
+      FileServiceClient::FileOutput(param->filename, param->info->mode));
+  if (!fout->IsValid()) {
+    std::ostringstream ss;
+    ss << "open for write error:" << param->filename;
+    *err = ss.str();
+    return;
+  }
+  if (!fout->WriteAt(0L, param->info->content) || !fout->Close()) {
+    std::ostringstream ss;
+    ss << "write error:" << param->filename;
+    *err = ss.str();
+    return;
+  }
+}
+
+#ifdef _WIN32
+void CompileTask::DoOutput(const string& opname, const string& filename,
+                           PermanentClosure* closure, string* err) {
+  static const int kMaxDeleteRetryForDoOutput = 5;
+  // Large sleep time will not harm a normal user.
+  // Followings are executed after termination of the child process,
+  // and deletion usually succeeds without retrying.
+  static const int kInitialRetrySleepInMs = 100;
+  // On Posix, rename success if target file already exists and it is
+  // in writable directory.
+  // On Win32, rename will fail if target file already exists, so we
+  // need to delete it explicitly before rename.
+  // In this code, we assume a file is temporary locked by a process
+  // like AntiVirus, and the lock will be released for a while.
+  //
+  // You may consider to use MoveFileEx with MOVEFILE_REPLACE_EXISTING.
+  // Calling it may take forever and stall compiler_proxy if the process
+  // having the lock is not behaving. As a result, we do not use it.
+  int sleep_in_ms = kInitialRetrySleepInMs;
+  for (int retry = 0; retry < kMaxDeleteRetryForDoOutput; ++retry) {
+    closure->Run();
+    if (err->empty()) {
+      return;
+    }
+    LOG(WARNING) << trace_id_ << " DoOutput operation failed."
+                 << " opname=" << opname
+                 << " filename=" << filename
+                 << " err=" << *err;
+
+    // TODO: identify a process that has a file lock.
+    // As far as I know, people seems to use NtQueryInformationProcess,
+    // which is an obsoleted function, to list up processes.
+
+    // http://msdn.microsoft.com/en-us/library/windows/desktop/aa364944(v=vs.85).aspx
+    DWORD attr = GetFileAttributesA(filename.c_str());
+    if (attr == INVALID_FILE_ATTRIBUTES) {
+      LOG_SYSRESULT(GetLastError());
+      std::ostringstream ss;
+      ss << opname << " failed but GetFileAttributes "
+         << "returns INVALID_FILE_ATTRIBUTES"
+         << " filename=" << filename
+         << " attr=" << attr;
+      AddErrorToResponse(TO_USER, ss.str(), true);
+      return;
+    }
+
+    LOG(INFO) << trace_id_ << " "
+              << "The file exists. We need to remove."
+              << " filename=" << filename
+              << " attr=" << attr;
+    if (remove(filename.c_str()) == 0) {
+      LOG(INFO) << trace_id_ << " "
+                << "Delete succeeds."
+                << " filename=" << filename;
+      continue;
+    }
+
+    LOG(WARNING) << trace_id_ << " "
+                 << "Failed to delete file:"
+                 << " filename=" << filename
+                 << " retry=" << retry
+                 << " sleep_in_ms=" << sleep_in_ms;
+    Sleep(sleep_in_ms);
+    sleep_in_ms *= 2;
+  }
+  if (err->empty()) {
+    std::ostringstream ss;
+    ss << opname << " failed but err is empty?";
+    *err = ss.str();
+  }
+  PLOG(ERROR) << trace_id_ << " " << *err;
+  AddErrorToResponse(TO_USER, *err, true);
+}
+#else
+void CompileTask::DoOutput(const string& opname,
+                           const string& filename,
+                           PermanentClosure* closure,
+                           string* err) {
+  closure->Run();
+  if (!err->empty()) {
+    PLOG(ERROR) << trace_id_ << " DoOutput operation failed."
+                << " opname=" << opname
+                << " filename=" << filename
+                << " err=" << *err;
+    AddErrorToResponse(TO_USER, *err, true);
+  }
+}
+#endif
+
+void CompileTask::RewriteCoffTimestamp(const string& filename) {
+  StringPiece ext = file::Extension(filename);
+  if (ext != "obj")
+    return;
+
+  ScopedFd fd(ScopedFd::OpenForRewrite(filename));
+  if (!fd.valid()) {
+    LOG(ERROR) << trace_id_ << " failed to open file for coff rewrite: "
+               << filename;
+    return;
+  }
+
+  // Check COFF file header. COFF header is like this.
+  // c.f. http://delorie.com/djgpp/doc/coff/
+  // 0-1   version. must be 0x014c for x86, 0x8664 for x64
+  // 2-3   number of sections (not necessary for us)
+  // 4-7   timestamp
+  // ...
+  //
+  // All numeric fields are stored in host native order.
+  // Currently we're checking magic is x86 or x64, all numeric
+  // should be little endian here.
+  //
+  // When /bigobj is specified in cl.exe, microsoft extends COFF file format
+  // to accept more sections.
+  // In this case, the file header is like this:
+  // 0-1   0x0000 (IMAGE_FILE_MACHINE_UNKNOWN)
+  // 2-3   0xFFFF
+  // 4-5   version (0x0001 or 0x0002)
+  // 6-7   machine (0x014c or 0x8664)
+  // 8-11  timestamp
+  // 12-27 uuid: 38feb30ca5d9ab4dac9bd6b6222653c2 for version 0x0001
+  //             c7a1bad1eebaa94baf20faf66aa4dcb8 for version 0x0002
+  //
+  // TODO: Find bigobj version 1 document and add link here.
+
+  unsigned char buf[32];
+  ssize_t read_byte = fd.Read(buf, sizeof(buf));
+  if (read_byte != sizeof(buf)) {
+    LOG(ERROR) << trace_id_
+               << " couldn't read the first " << sizeof(buf)
+               << " byte. file is too small?"
+               << " filename=" << filename
+               << " read_byte=" << read_byte;
+    return;
+  }
+
+  unsigned short magic = *reinterpret_cast<unsigned short*>(buf);
+  int offset = 0;
+  if (magic == 0x014c || magic == 0x8664) {
+    offset = 4;
+  } else if (IsBigobjFormat(buf)) {
+    offset = 8;
+  }
+  if (offset > 0) {
+    unsigned int old = *reinterpret_cast<unsigned int*>(buf + offset);
+    unsigned int now = time(nullptr);
+
+    fd.Seek(offset, ScopedFd::SeekAbsolute);
+    fd.Write(&now, 4);
+
+    LOG(INFO) << trace_id_
+              << " Rewriting timestamp:" << " file=" << filename
+              << " offset=" << offset
+              << " old=" << old << " new=" << now;
+    return;
+  }
+
+  std::stringstream ss;
+  for (size_t i = 0; i < sizeof(buf); ++i) {
+    ss << std::hex << std::setw(2) << std::setfill('0')
+       << (static_cast<unsigned int>(buf[i]) & 0xFF);
+  }
+  LOG(ERROR) << trace_id_
+             << " Unknown COFF header."
+             << " filename=" << filename
+             << " first " << sizeof(buf) << "byte=" << ss.str();
+  return;
+}
+
+void CompileTask::CommitOutput(bool use_remote) {
+  VLOG(1) << trace_id_ << " commit output " << use_remote;
+  DCHECK(BelongsToCurrentThread());
+  CHECK(state_ == FINISHED);
+  CHECK(!abort_);
+  CHECK(subproc_ == nullptr);
+  CHECK(delayed_setup_subproc_ == nullptr);
+
+  std::vector<string> output_bases;
+  bool has_obj = false;
+
+  for (auto& info : output_file_) {
+    SimpleTimer timer;
+    const string& filename = info.filename;
+    const string& tmp_filename = info.tmp_filename;
+    const string& hash_key = info.hash_key;
+    DCHECK(!hash_key.empty()) << filename;
+    const bool use_content = tmp_filename.empty();
+    bool need_rename = !tmp_filename.empty() && tmp_filename != filename;
+    if (!use_remote) {
+      // If use_remote is false, we should have outputs of local process.
+      VLOG(1) << trace_id_ << " commit output (use local) in "
+              << filename;
+      if (access(filename.c_str(), R_OK) == 0) {
+        if (need_rename) {
+          // We might have written tmp file for remote output, but decided
+          // to use local output.
+          // In this case, we want to remove tmp file of remote output.
+          remove(tmp_filename.c_str());
+        }
+      } else {
+        // !use_remote, but local output doesn't exist?
+        PLOG(ERROR) << trace_id_ << " " << filename;
+      }
+      if (use_content) {
+        VLOG(1) << trace_id_ << " release buffer of remote output";
+        service_->ReleaseOutputBuffer(info.size, &info.content);
+      }
+      need_rename = false;
+    } else if (use_content) {
+      // If use_remote is true, and use_content is true,
+      // write content (remote output) in filename.
+      VLOG(1) << trace_id_ << " commit output (use remote content) to "
+              << filename;
+      ContentOutputParam param;
+      param.filename = filename;
+      param.info = &info;
+      string err;
+      std::unique_ptr<PermanentClosure> callback(
+          NewPermanentCallback(
+              this,
+              &CompileTask::ContentOutputCallback,
+              &param, &err));
+      DoOutput("content_output", filename, callback.get(), &err);
+      service_->ReleaseOutputBuffer(info.size, &info.content);
+      need_rename = false;
+    } else if (need_rename) {
+      // If use_remote is true, use_content is false, and
+      // need_rename is true, we wrote remote output in
+      // tmp_filename, and we need to rename tmp_filename
+      // to filename.
+      VLOG(1) << trace_id_ << " commit output (use remote tmp file) "
+              << "rename " << tmp_filename << " => " << filename;
+      RenameParam param;
+      param.oldpath = tmp_filename;
+      param.newpath = filename;
+      string err;
+      std::unique_ptr<PermanentClosure> callback(
+          NewPermanentCallback(
+             this, &CompileTask::RenameCallback, &param, &err));
+      DoOutput("rename", filename, callback.get(), &err);
+    } else {
+      // If use_remote is true, use_content is false, and
+      // need_rename is false, we wrote remote output in
+      // filename, so do nothing here.
+      VLOG(1) << trace_id_ << " commit output (use remote file) in "
+              << filename;
+    }
+
+    // Incremental Link doesn't work well if object file timestamp is wrong.
+    // If it's Windows object file (.obj) from remote,
+    // we'd like to rewrite timestamp when the content is from remote cache.
+    // According to our measurement, this doesn't have
+    // measureable performance penalty.
+    // see b/24388745
+    if (use_remote && stats_->cache_hit()) {
+      RewriteCoffTimestamp(filename);
+    }
+
+    service_->RecordOutputRename(need_rename);
+    // The output file is generated in goma cache, so we believe the cache_key
+    // is valid.  It would be used in link phase.
+    service_->file_hash_cache()->StoreFileCacheKey(
+        filename, hash_key, GetCurrentTimestampMs(),
+        output_file_id_cache_->Get(filename));
+    VLOG(1) << trace_id_ << " "
+            << tmp_filename << " -> " << filename
+            << " " << hash_key;
+    LOG_IF(ERROR, !info.content.empty())
+        << trace_id_ << " content was not released: " << filename;
+    int ms = timer.GetInMs();
+    LOG_IF(WARNING, ms > 100) << trace_id_
+                              << " CommitOutput " << ms << " msec"
+                              << " size=" << info.size
+                              << " filename=" << info.filename;
+    StringPiece output_base = file::Basename(info.filename);
+    output_bases.push_back(string(output_base));
+    StringPiece ext = file::Extension(output_base);
+    if (flags_->is_gcc() && ext == "o") {
+      has_obj = true;
+    } else if (flags_->is_vc() && ext == "obj") {
+      has_obj = true;
+    } else if (flags_->is_javac() && ext == "class") {
+      has_obj = true;
+    }
+
+  }
+  output_file_.clear();
+
+  // TODO: For clang-tidy, maybe we don't need to output
+  // no obj warning?
+
+  if (has_obj) {
+    LOG(INFO) << trace_id_ << " CommitOutput num=" << output_bases.size()
+              << " cache_key=" << resp_->cache_key()
+              << ": " << output_bases;
+  } else {
+    LOG(WARNING) << trace_id_ << " CommitOutput num=" << output_bases.size()
+                 << " no obj: cache_key=" << resp_->cache_key()
+                 << ": " << output_bases;
+  }
+}
+
+void CompileTask::ReplyResponse(const string& msg) {
+  LOG(INFO) << trace_id_ << " ReplyResponse: " << msg;
+  DCHECK(BelongsToCurrentThread());
+  CHECK(state_ == FINISHED || state_ == LOCAL_FINISHED || abort_);
+  CHECK(rpc_ != nullptr);
+  CHECK(rpc_resp_ != nullptr);
+  CHECK(subproc_ == nullptr);
+  CHECK(delayed_setup_subproc_ == nullptr);
+
+  if (failed() || fail_fallback_) {
+    int allowed_error_duration = service_->AllowedNetworkErrorDuration();
+    time_t error_start_time =
+        service_->http_client()->NetworkErrorStartedTime();
+    if (allowed_error_duration >= 0 && error_start_time > 0) {
+      time_t now = time(nullptr);
+      if (now > error_start_time + allowed_error_duration) {
+        AddErrorToResponse(
+            TO_USER, "network error continued for a long time", true);
+      }
+    }
+  }
+
+  if (resp_->has_result()) {
+    VLOG(1) << trace_id_ << " exit=" << resp_->result().exit_status();
+    stats_->set_exec_exit_status(resp_->result().exit_status());
+  } else {
+    LOG(WARNING) << trace_id_ << " empty result";
+    stats_->set_exec_exit_status(-256);
+  }
+  if (service_->local_run_for_failed_input() && flags_.get() != nullptr) {
+    service_->RecordInputResult(flags_->input_filenames(),
+                                stats_->exec_exit_status() == 0);
+  }
+  if (resp_->error_message_size() != 0) {
+    std::vector<string> errs(resp_->error_message().begin(),
+                             resp_->error_message().end());
+    LOG_IF(ERROR, resp_->result().exit_status() == 0)
+        << trace_id_ << " should not have error message on exit_status=0."
+        << " errs=" << errs;
+    service_->RecordErrorsToUser(errs);
+  }
+  UpdateStats();
+  *rpc_resp_ = *resp_;
+  OneshotClosure* done = done_;
+  done_ = nullptr;
+  rpc_resp_ = nullptr;
+  rpc_ = nullptr;
+  if (done) {
+    service_->wm()->RunClosureInThread(
+        FROM_HERE,
+        caller_thread_id_, done, WorkerThreadManager::PRIORITY_IMMEDIATE);
+  }
+  if (!canceled_ && stats_->exec_exit_status() != 0) {
+    if (exit_status_ == 0 && subproc_exit_status_ == 0) {
+      stats_->set_compiler_proxy_error(true);
+      LOG(ERROR) << trace_id_ << " compilation failure "
+                 << "due to compiler_proxy error.";
+    }
+  }
+  responsecode_ = 200;
+  stats_->set_handler_time(handler_timer_.GetInMs());
+  gomacc_pid_ = SubProcessState::kInvalidPid;
+
+  static const int kSlowTaskInMs = 5 * 60 * 1000;  // 5 mins
+  if (stats_->handler_time() > kSlowTaskInMs) {
+    ExecLog stats = *stats_;
+    // clear non-stats fields.
+    stats.clear_username();
+    stats.clear_nodename();
+    stats.clear_port();
+    stats.clear_compiler_proxy_start_time();
+    stats.clear_task_id();
+    stats.clear_compiler_proxy_user_agent();
+    stats.clear_start_time();
+    stats.clear_arg();
+    stats.clear_env();
+    stats.clear_cwd();
+    stats.clear_expanded_arg();
+    stats.clear_command_version();
+    stats.clear_command_target();
+    LOG(ERROR) << trace_id_ << " SLOW:" << stats.DebugString();
+  }
+
+  // if abort_, remote process is still on the fly.
+  // Done() will be called later in ProcessFinished.
+  if (abort_)
+    CHECK(!finished_);
+  // if local_output_file_callback_ is not nullptr, uploading local output file
+  // is on the fly, so ProcessLocalFileOutputDone() will be called later.
+  if (finished_ && local_output_file_callback_ == nullptr) {
+    CHECK_GE(state_, FINISHED);
+    CHECK_EQ(0, num_local_output_file_task_);
+    Done();
+  }
+}
+
+void CompileTask::ProcessLocalFileOutput() {
+  VLOG(1) << trace_id_ << " local output";
+  CHECK(BelongsToCurrentThread());
+  CHECK(local_output_file_callback_ == nullptr);
+  CHECK_EQ(0, num_local_output_file_task_);
+  if (!service_->store_local_run_output())
+    return;
+
+  SetLocalOutputFileCallback();
+  std::vector<OneshotClosure*> closures;
+  for (const auto& output_file : flags_->output_files()) {
+    const string& filename =
+        file::JoinPathRespectAbsolute(flags_->cwd(), output_file);
+    // only uploads *.o
+    if (!strings::EndsWith(filename, ".o"))
+      continue;
+    string hash_key;
+    const FileId& output_file_id = output_file_id_cache_->Get(filename);
+    bool found_in_cache =
+        service_->file_hash_cache()->GetFileCacheKey(
+            filename, 0ULL, output_file_id, &hash_key);
+    if (found_in_cache) {
+      VLOG(1) << "file:" << filename << " already on cache: " << hash_key;
+      continue;
+    }
+    LOG(INFO) << trace_id_ << " local output:" << filename;
+    std::unique_ptr<LocalOutputFileTask> local_output_file_task(
+        new LocalOutputFileTask(
+            service_->wm(),
+            service_->file_service()->WithRequesterInfoAndTraceId(
+                requester_info_, trace_id_),
+            service_->file_hash_cache(),
+            output_file_id_cache_->Get(filename), this, filename));
+
+    LocalOutputFileTask* local_output_file_task_pointer =
+        local_output_file_task.get();
+
+    closures.push_back(
+        NewCallback(
+            local_output_file_task_pointer,
+            &LocalOutputFileTask::Run,
+            NewCallback(
+                this,
+                &CompileTask::LocalOutputFileTaskFinished,
+                std::move(local_output_file_task))));
+  }
+  if (closures.empty()) {
+    VLOG(1) << trace_id_ << " no local output upload";
+    service_->wm()->RunClosureInThread(
+        FROM_HERE,
+        thread_id_,
+        NewCallback(
+            this,
+            &CompileTask::MaybeRunLocalOutputFileCallback, false),
+        WorkerThreadManager::PRIORITY_LOW);
+    return;
+  }
+  for (auto* closure : closures)
+    service_->wm()->RunClosure(
+        FROM_HERE, closure, WorkerThreadManager::PRIORITY_LOW);
+}
+
+void CompileTask::ProcessLocalFileOutputDone() {
+  VLOG(1) << trace_id_ << " local output done";
+  CHECK(BelongsToCurrentThread());
+  local_output_file_callback_ = nullptr;
+  if (finished_) {
+    CHECK(subproc_ == nullptr);
+    CHECK(delayed_setup_subproc_ == nullptr);
+    Done();
+    return;
+  }
+  // if !finished_, remote call is still on the fly, and eventually
+  // ProcessFinished will be called, and Done will be called
+  // because local_output_file_callback_ is already nullptr.
+}
+
+void CompileTask::Done() {
+  VLOG(1) << trace_id_ << " Done";
+  // FINISHED: normal case.
+  // LOCAL_FINISHED: fallback by should_fallback_.
+  // abort_: idle fallback.
+  if (!abort_)
+    CHECK_GE(state_, FINISHED);
+  CHECK(rpc_ == nullptr) << trace_id_
+                      << " " << StateName(state_) << " abort:" << abort_;
+  CHECK(rpc_resp_ == nullptr);
+  CHECK(done_ == nullptr);
+  CHECK(subproc_ == nullptr);
+  CHECK(delayed_setup_subproc_ == nullptr);
+  CHECK(input_file_callback_ == nullptr);
+  CHECK(output_file_callback_ == nullptr);
+  CHECK(local_output_file_callback_ == nullptr);
+  ClearOutputFile();
+
+  // If compile failed, delete deps cache entry here.
+  if (DepsCache::IsEnabled()) {
+    if ((failed() || fail_fallback_) && deps_identifier_.valid()) {
+      DepsCache::instance()->RemoveDependency(deps_identifier_);
+      LOG(INFO) << trace_id_ << " remove deps cache entry.";
+    }
+  }
+
+  SaveInfoFromInputOutput();
+  service_->CompileTaskDone(this);
+  VLOG(1) << trace_id_ << " finalized.";
+}
+
+void CompileTask::DumpToJson(bool need_detail, Json::Value* root) const {
+  SubProcessState::State subproc_state = SubProcessState::NUM_STATE;
+  pid_t subproc_pid = static_cast<pid_t>(SubProcessState::kInvalidPid);
+  {
+    AUTOLOCK(lock, &mu_);
+    if (subproc_ != nullptr) {
+      subproc_state = subproc_->state();
+      subproc_pid = subproc_->started().pid();
+    }
+  }
+
+  (*root)["id"] = id_;
+
+  if ((state_ < FINISHED && !abort_) || state_ == LOCAL_RUN) {
+    // elapsed total time for current running process.
+    (*root)["elapsed"] = handler_timer_.GetInMs();
+  }
+  if (stats_->handler_time()) (*root)["time"] = stats_->handler_time();
+  if (gomacc_pid_ != SubProcessState::kInvalidPid)
+    (*root)["pid"] = gomacc_pid_;
+  if (!flag_dump_.empty()) (*root)["flag"] = flag_dump_;
+  if (localoutputcache_lookup_succeeded_) {
+    (*root)["cache"] = "local hit";
+  } else if (stats_->cache_hit()) {
+    (*root)["cache"] = "hit";
+  }
+  (*root)["state"] = StateName(state_);
+  if (abort_) (*root)["abort"] = 1;
+  if (subproc_pid != SubProcessState::kInvalidPid) {
+    (*root)["subproc_state"] =
+        SubProcessState::State_Name(subproc_state);
+    (*root)["subproc_pid"] = Json::Value::Int64(subproc_pid);
+  }
+  string major_factor_str = stats_->major_factor();
+  if (!major_factor_str.empty())
+    (*root)["major_factor"] = major_factor_str;
+  if (stats_->has_exec_command_version_mismatch()) {
+    (*root)["command_version_mismatch"] =
+        stats_->exec_command_version_mismatch();
+  }
+  if (stats_->has_exec_command_binary_hash_mismatch()) {
+    (*root)["command_binary_hash_mismatch"] =
+        stats_->exec_command_binary_hash_mismatch();
+  }
+  if (stats_->has_exec_command_subprograms_mismatch()) {
+    (*root)["command_subprograms_mismatch"] =
+        stats_->exec_command_subprograms_mismatch();
+  }
+  // for task color.
+  if (responsecode_) (*root)["http"] = responsecode_;
+  if (stats_->exec_exit_status())
+    (*root)["exit"] = stats_->exec_exit_status();
+  if (stats_->exec_request_retry())
+    (*root)["retry"] = stats_->exec_request_retry();
+  if (fail_fallback_) (*root)["fail_fallback"]= 1;
+  if (stats_->goma_error())
+    (*root)["goma_error"] = 1;
+  if (stats_->compiler_proxy_error())
+    (*root)["compiler_proxy_error"] = 1;
+  if (canceled_)
+    (*root)["canceled"] = 1;
+
+  // additional message
+  if (gomacc_revision_mismatched_) {
+    (*root)["gomacc_revision_mismatch"] = 1;
+  }
+
+  if (need_detail) {
+    struct tm local_start_time;
+    char timebuf[64];
+    const time_t start_time = static_cast<time_t>(stats_->start_time());
+#ifndef _WIN32
+    localtime_r(&start_time, &local_start_time);
+    strftime(timebuf, sizeof timebuf, "%Y-%m-%d %H:%M:%S %z",
+             &local_start_time);
+#else
+    localtime_s(&local_start_time, &start_time);
+    strftime(timebuf, sizeof timebuf, "%Y-%m-%d %H:%M:%S ",
+             &local_start_time);
+    long tzoff = 0;
+    _get_timezone(&tzoff);
+    char tzsign = tzoff >= 0 ? '-' : '+';
+    tzoff = abs(tzoff);
+    sprintf_s(timebuf + strlen(timebuf), sizeof timebuf - strlen(timebuf),
+              "%c%02d%02d",
+              tzsign, tzoff / 3600, (tzoff % 3600) / 60);
+#endif
+    (*root)["start_time"] = timebuf;
+
+    if (stats_->has_latest_input_filename()) {
+      (*root)["latest_input_filename"] =
+          stats_->latest_input_filename();
+    }
+    if (stats_->has_latest_input_mtime()) {
+      (*root)["input_wait"] =
+          stats_->start_time() - stats_->latest_input_mtime();
+    }
+
+    if (stats_->num_total_input_file())
+      (*root)["total_input"] = stats_->num_total_input_file();
+    if (stats_->num_uploading_input_file_size() > 0) {
+      (*root)["uploading_input"] = Json::Value::Int64(
+          SumRepeatedInt32(stats_->num_uploading_input_file()));
+    }
+    if (num_input_file_task_ > 0) {
+      (*root)["num_input_file_task"] = num_input_file_task_;
+    }
+    if (stats_->num_missing_input_file_size() > 0) {
+      (*root)["missing_input"] = Json::Value::Int64(
+          SumRepeatedInt32(stats_->num_missing_input_file()));
+    }
+    if (stats_->compiler_info_process_time()) {
+      (*root)["compiler_info_process_time"] =
+          stats_->compiler_info_process_time();
+    }
+    // When depscache_used() is true, we ran include_preprocessor but its
+    // processing time was 0ms. So, we'd like to show it.
+    if (stats_->include_preprocess_time() || stats_->depscache_used()) {
+      (*root)["include_preprocess_time"] = stats_->include_preprocess_time();
+    }
+    if (stats_->depscache_used()) {
+      (*root)["depscache_used"] =
+          (stats_->depscache_used() ? "true" : "false");
+    }
+    if (stats_->include_fileload_time()) {
+      (*root)["include_fileload_time"] = stats_->include_fileload_time();
+    }
+    if (stats_->include_fileload_pending_time_size()) {
+      int64_t sum = SumRepeatedInt32(stats_->include_fileload_pending_time());
+      if (sum) {
+        (*root)["include_fileload_pending_time"] = Json::Value::Int64(sum);
+      }
+    }
+    if (stats_->include_fileload_run_time_size()) {
+      int64_t sum = SumRepeatedInt32(stats_->include_fileload_run_time());
+      if (sum) {
+        (*root)["include_fileload_run_time"] = Json::Value::Int64(sum);
+      }
+    }
+    if (stats_->rpc_call_time_size()) {
+      (*root)["rpc_call_time"] = Json::Value::Int64(
+          SumRepeatedInt32(stats_->rpc_call_time()));
+    }
+    if (stats_->file_response_time())
+      (*root)["file_response_time"] = stats_->file_response_time();
+    if (stats_->gcc_req_size)
+      (*root)["gcc_req_size"] = Json::Value::Int64(stats_->gcc_req_size);
+    if (stats_->gcc_resp_size)
+      (*root)["gcc_resp_size"] = Json::Value::Int64(stats_->gcc_resp_size);
+    {
+      AUTOLOCK(lock, &mu_);
+      if (http_rpc_status_.get()) {
+        if (!http_rpc_status_->response_header.empty()) {
+          (*root)["response_header"] =
+              http_rpc_status_->response_header;
+        }
+      }
+    }
+    if (stats_->rpc_req_size_size() > 0) {
+      (*root)["exec_req_size"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->rpc_req_size()));
+    }
+    if (stats_->rpc_master_trace_id_size() > 0) {
+      string masters;
+      JoinStrings(stats_->rpc_master_trace_id(), " ", &masters);
+      (*root)["exec_rpc_master"] = masters;
+    }
+    if (stats_->rpc_throttle_time_size() > 0) {
+      (*root)["exec_throttle_time"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->rpc_throttle_time()));
+    }
+    if (stats_->rpc_pending_time_size() > 0) {
+      (*root)["exec_pending_time"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->rpc_pending_time()));
+    }
+    if (stats_->rpc_req_build_time_size() > 0) {
+      (*root)["exec_req_build_time"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->rpc_req_build_time()));
+    }
+    if (stats_->rpc_req_send_time_size() > 0) {
+      (*root)["exec_req_send_time"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->rpc_req_send_time()));
+    }
+    if (stats_->rpc_wait_time_size() > 0) {
+      (*root)["exec_wait_time"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->rpc_wait_time()));
+    }
+    if (stats_->rpc_resp_size_size() > 0) {
+      (*root)["exec_resp_size"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->rpc_resp_size()));
+    }
+    if (stats_->rpc_resp_recv_time_size() > 0) {
+      (*root)["exec_resp_recv_time"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->rpc_resp_recv_time()));
+    }
+    if (stats_->rpc_resp_parse_time_size() > 0) {
+      (*root)["exec_resp_parse_time"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->rpc_resp_parse_time()));
+    }
+    if (stats_->has_local_run_reason()) {
+      (*root)["local_run_reason"] =
+          stats_->local_run_reason();
+    }
+    if (stats_->local_pending_time() > 0)
+      (*root)["local_pending_ms"] = stats_->local_pending_time();
+    if (stats_->local_run_time() > 0)
+      (*root)["local_run_ms"] = stats_->local_run_time();
+    if (stats_->local_mem_kb() > 0)
+      (*root)["local_mem_kb"] = Json::Value::Int64(stats_->local_mem_kb());
+    if (stats_->local_output_file_time_size() > 0) {
+      (*root)["local_output_file_time"] = Json::Value::Int64(
+          SumRepeatedInt32(stats_->local_output_file_time()));
+    }
+    if (stats_->local_output_file_size_size() > 0) {
+      (*root)["local_output_file_size"] = Json::Value::Int64(
+          SumRepeatedInt32(stats_->local_output_file_size()));
+    }
+
+    if (stats_->output_file_size_size() > 0) {
+      (*root)["output_file_size"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->output_file_size()));
+    }
+    if (stats_->chunk_resp_size_size() > 0) {
+      (*root)["chunk_resp_size"] =
+          Json::Value::Int64(SumRepeatedInt32(stats_->chunk_resp_size()));
+    }
+    if (stats_->output_file_rpc)
+      (*root)["output_file_rpc"] = Json::Value::Int64(stats_->output_file_rpc);
+    if (stats_->output_file_rpc_req_build_time) {
+      (*root)["output_file_rpc_req_build_time"] =
+          Json::Value::Int64(stats_->output_file_rpc_req_build_time);
+    }
+    if (stats_->output_file_rpc_req_send_time) {
+      (*root)["output_file_rpc_req_send_time"] =
+          Json::Value::Int64(stats_->output_file_rpc_req_send_time);
+    }
+    if (stats_->output_file_rpc_wait_time) {
+      (*root)["output_file_rpc_wait_time"] =
+          Json::Value::Int64(stats_->output_file_rpc_wait_time);
+    }
+    if (stats_->output_file_rpc_resp_recv_time) {
+      (*root)["output_file_rpc_resp_recv_time"] =
+          Json::Value::Int64(stats_->output_file_rpc_resp_recv_time);
+    }
+    if (stats_->output_file_rpc_resp_parse_time) {
+      (*root)["output_file_rpc_resp_parse_time"] =
+          Json::Value::Int64(stats_->output_file_rpc_resp_parse_time);
+    }
+    if (exec_output_file_.size() > 0) {
+      Json::Value exec_output_file(Json::arrayValue);
+      for (size_t i = 0; i < exec_output_file_.size(); ++i) {
+        exec_output_file.append(exec_output_file_[i]);
+      }
+      (*root)["exec_output_file"] = exec_output_file;
+    }
+    if (!resp_cache_key_.empty())
+      (*root)["cache_key"] = resp_cache_key_;
+
+    if (stats_->exec_request_retry_reason_size() > 0) {
+      Json::Value exec_output_retry_reason(Json::arrayValue);
+      for (int i = 0; i < stats_->exec_request_retry_reason_size(); ++i) {
+        exec_output_retry_reason.append(
+            stats_->exec_request_retry_reason(i));
+      }
+      (*root)["exec_request_retry_reason"] = exec_output_retry_reason;
+    }
+    if (exec_error_message_.size() > 0) {
+      Json::Value error_message(Json::arrayValue);
+      for (size_t i = 0; i < exec_error_message_.size(); ++i) {
+        error_message.append(exec_error_message_[i]);
+      }
+      (*root)["error_message"] = error_message;
+    }
+    if (!stats_->cwd().empty())
+      (*root)["cwd"] = stats_->cwd();
+    if (!orig_flag_dump_.empty())
+        (*root)["orig_flag"] = orig_flag_dump_;
+    if (stats_->env_size() > 0) {
+      Json::Value env(Json::arrayValue);
+      for (int i = 0; i < stats_->env_size(); ++i) {
+        env.append(stats_->env(i));
+      }
+      (*root)["env"] = env;
+    }
+    if (!stdout_.empty())
+      (*root)["stdout"] = stdout_;
+    if (!stderr_.empty())
+      (*root)["stderr"] = stderr_;
+
+    Json::Value inputs(Json::arrayValue);
+    for (std::set<string>::const_iterator iter = required_files_.begin();
+         iter != required_files_.end();
+         ++iter) {
+      inputs.append(*iter);
+    }
+    (*root)["inputs"] = inputs;
+
+    if (system_library_paths_.size() > 0) {
+      Json::Value system_library_paths(Json::arrayValue);
+      for (size_t i = 0; i < system_library_paths_.size(); ++i) {
+        system_library_paths.append(system_library_paths_[i]);
+      }
+      (*root)["system_library_paths"] = system_library_paths;
+    }
+
+  } else {
+    (*root)["summaryOnly"] = 1;
+  }
+}
+
+// ----------------------------------------------------------------
+// state_: INIT
+void CompileTask::CopyEnvFromRequest() {
+  CHECK_EQ(INIT, state_);
+  requester_env_ = req_->requester_env();
+  want_fallback_ = requester_env_.fallback();
+  req_->clear_requester_env();
+
+  for (const auto& arg : req_->arg())
+    stats_->add_arg(arg);
+  for (const auto& env : req_->env())
+    stats_->add_env(env);
+  stats_->set_cwd(req_->cwd());
+
+  gomacc_pid_ = req_->requester_info().pid();
+
+  if (service_->CanSendUserInfo()) {
+    if (!service_->username().empty())
+      req_->mutable_requester_info()->set_username(service_->username());
+    stats_->set_username(req_->requester_info().username());
+    stats_->set_nodename(service_->nodename());
+  }
+  req_->mutable_requester_info()->set_compiler_proxy_id(
+      GenerateCompilerProxyId());
+  stats_->set_port(rpc_->server_port());
+  stats_->set_compiler_proxy_start_time(service_->start_time());
+  stats_->set_task_id(id_);
+  requester_info_ = req_->requester_info();
+}
+
+string CompileTask::GenerateCompilerProxyId() const {
+  std::ostringstream s;
+  s << service_->compiler_proxy_id_prefix() << id_;
+  return s.str();
+}
+
+// static
+bool CompileTask::IsLocalCompilerPathValid(
+    const string& trace_id,
+    const ExecReq& req, const CompilerFlags* flags) {
+  // Compiler_proxy will resolve local_compiler_path
+  // if gomacc is masqueraded or prepended compiler is basename.
+  // No need to think this as error.
+  if (!req.command_spec().has_local_compiler_path()) {
+    return true;
+  }
+  // If local_compiler_path exists, it must be the same compiler_name with
+  // flag_'s.
+  const string name = CompilerFlags::GetCompilerName(
+      req.command_spec().local_compiler_path());
+  if (req.command_spec().has_name() &&
+      req.command_spec().name() != name) {
+    LOG(ERROR) << trace_id << " compiler name mismatches."
+               << " command_spec.name=" << req.command_spec().name()
+               << " name=" << name;
+    return false;
+  }
+  if (flags && flags->compiler_name() != name) {
+    LOG(ERROR) << trace_id << " compiler name mismatches."
+               << " flags.compiler_name=" << flags->compiler_name()
+               << " name=" << name;
+    return false;
+  }
+  return true;
+}
+
+// static
+void CompileTask::RemoveDuplicateFiles(const std::string& cwd,
+                                       std::set<std::string>* filenames) {
+  std::map<std::string, std::string> path_map;
+  std::set<std::string> unique_files;
+  for (const auto& filename : *filenames) {
+    const std::string& abs_filename = file::JoinPathRespectAbsolute(
+        cwd, filename);
+    auto it = path_map.find(abs_filename);
+    if (it == path_map.end()) {
+      path_map.emplace(abs_filename, filename);
+      unique_files.insert(filename);
+      continue;
+    }
+
+    // If there is already registered filename, compare and take shorter one.
+    // If lenght is same, take lexicographically smaller one.
+    if (std::make_pair(filename.size(), filename) <
+        std::make_pair(it->second.size(), it->second)) {
+      unique_files.erase(it->second);
+      unique_files.insert(filename);
+      it->second = filename;
+    }
+  }
+  filenames->swap(unique_files);
+}
+
+void CompileTask::InitCompilerFlags() {
+  CHECK_EQ(INIT, state_);
+  std::vector<string> args(req_->arg().begin(), req_->arg().end());
+  VLOG(1) << trace_id_ << " " << args;
+  flags_ = CompilerFlags::New(args, req_->cwd());
+  if (flags_.get() == nullptr) {
+    return;
+  }
+  flag_dump_ = flags_->DebugString();
+  if (flags_->is_gcc()) {
+    const GCCFlags& gcc_flag = static_cast<const GCCFlags&>(*flags_);
+    linking_ = (gcc_flag.mode() == GCCFlags::LINK);
+    precompiling_ = gcc_flag.is_precompiling_header();
+  } else if (flags_->is_vc()) {
+    // TODO: check linking_ etc.
+  } else if (flags_->is_clang_tidy()) {
+    // Sets the actual gcc_flags for clang_tidy_flags here.
+    ClangTidyFlags& clang_tidy_flags = static_cast<ClangTidyFlags&>(*flags_);
+    if (clang_tidy_flags.input_filenames().size() != 1) {
+      LOG(WARNING) << trace_id_ << " Input file is not unique.";
+      clang_tidy_flags.set_is_successful(false);
+      return;
+    }
+    const string& input_file = clang_tidy_flags.input_filenames()[0];
+    const string input_file_abs =
+        file::JoinPathRespectAbsolute(clang_tidy_flags.cwd(), input_file);
+    string compdb_path = CompilationDatabaseReader::FindCompilationDatabase(
+      clang_tidy_flags.build_path(), file::Dirname(input_file_abs));
+
+    std::vector<string> clang_args;
+    string build_dir;
+    if (!CompilationDatabaseReader::MakeClangArgs(clang_tidy_flags,
+                                                  compdb_path,
+                                                  &clang_args,
+                                                  &build_dir)) {
+      // Failed to make clang args. Then Mark CompilerFlags unsuccessful.
+      LOG(WARNING) << trace_id_
+                   << " Failed to make clang args. local fallback.";
+      clang_tidy_flags.set_is_successful(false);
+      return;
+    }
+
+    DCHECK(!build_dir.empty());
+    clang_tidy_flags.SetCompilationDatabasePath(compdb_path);
+    clang_tidy_flags.SetClangArgs(clang_args, build_dir);
+  }
+}
+
+bool CompileTask::FindLocalCompilerPath() {
+  CHECK_EQ(INIT, state_);
+  CHECK(flags_.get());
+
+  // If gomacc sets local_compiler_path, just use it.
+  if (!req_->command_spec().local_compiler_path().empty()) {
+    string local_compiler = PathResolver::PlatformConvert(
+        req_->command_spec().local_compiler_path());
+
+    // TODO: confirm why local_compiler_path should not be
+    //                    basename, and remove the code if possible.
+    // local_compiler_path should not be basename only.
+    if (local_compiler.find(PathResolver::kPathSep) == string::npos) {
+      LOG(ERROR) << trace_id_ << " local_compiler_path should not be basename:"
+                 << local_compiler;
+    } else if (service_->FindLocalCompilerPath(
+        requester_env_.gomacc_path(),
+        local_compiler,
+        stats_->cwd(),
+        requester_env_.local_path(),
+        pathext_,
+        &local_compiler,
+        &local_path_)) {
+      // Since compiler_info resolves relative path to absolute path,
+      // we do not need to make local_comiler_path to absolute path
+      // any more. (b/6340137, b/28088682)
+      if (!pathext_.empty() &&
+          !strings::EndsWith(local_compiler,
+                             req_->command_spec().local_compiler_path())) {
+        // PathExt should be resolved on Windows.  Let me use it.
+        req_->mutable_command_spec()->set_local_compiler_path(local_compiler);
+      }
+      return true;
+    }
+    return false;
+  }
+
+  if (!requester_env_.has_local_path() ||
+      requester_env_.local_path().empty()) {
+    LOG(ERROR) << "no PATH in requester env." << requester_env_.DebugString();
+    AddErrorToResponse(TO_USER,
+                       "no PATH in requester env.  Using old gomacc?", true);
+    return false;
+  }
+  if (!requester_env_.has_gomacc_path()) {
+    LOG(ERROR) << "no gomacc path in requester env."
+               << requester_env_.DebugString();
+    AddErrorToResponse(TO_USER,
+                       "no gomacc in requester env.  Using old gomacc?", true);
+    return false;
+  }
+
+  string local_compiler_path;
+  if (service_->FindLocalCompilerPath(
+          requester_env_.gomacc_path(),
+          flags_->compiler_base_name(),
+          stats_->cwd(),
+          requester_env_.local_path(),
+          pathext_,
+          &local_compiler_path,
+          &local_path_)) {
+    req_->mutable_command_spec()->set_local_compiler_path(
+          local_compiler_path);
+    return true;
+  }
+  return false;
+}
+
+bool CompileTask::ShouldFallback() const {
+  CHECK_EQ(INIT, state_);
+  CHECK(flags_.get());
+  if (!requester_env_.verify_command().empty())
+    return false;
+  if (!flags_->is_successful()) {
+    service_->RecordForcedFallbackInSetup(CompileService::kFailToParseFlags);
+    LOG(INFO) << trace_id_
+              << " force fallback. failed to parse compiler flags.";
+    return true;
+  }
+  if (flags_->input_filenames().empty()) {
+    service_->RecordForcedFallbackInSetup(
+        CompileService::kNoRemoteCompileSupported);
+    LOG(INFO) << trace_id_
+              << " force fallback. no input files give.";
+    return true;
+  }
+  if (flags_->is_gcc()) {
+    const GCCFlags& gcc_flag = static_cast<const GCCFlags&>(*flags_);
+    if (gcc_flag.is_stdin_input()) {
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      LOG(INFO) << trace_id_
+                << " force fallback."
+                << " cannot use stdin as input in goma backend.";
+      return true;
+    }
+    if (gcc_flag.has_wrapper()) {
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      LOG(INFO) << trace_id_
+                << " force fallback. -wrapper is not supported";
+      return true;
+    }
+    if (!verify_output_ && gcc_flag.mode() == GCCFlags::PREPROCESS) {
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      LOG(INFO) << trace_id_
+                << " force fallback. preprocess is usually light-weight.";
+      return true;
+    }
+    if (!service_->enable_gch_hack() && precompiling_) {
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      LOG(INFO) << trace_id_
+                << " force fallback. gch hack is not enabled and precompiling.";
+      return true;
+    }
+    if (!service_->enable_remote_link() && linking_) {
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      LOG(INFO) << trace_id_
+                << " force fallback linking.";
+      return true;
+    }
+    StringPiece ext = file::Extension(flags_->input_filenames()[0]);
+    if (ext == "s" || ext == "S") {
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      LOG(INFO) << trace_id_
+                << " force fallback. assembler should be light-weight.";
+      return true;
+    }
+  } else if (flags_->is_vc()) {
+    const VCFlags& vc_flag = static_cast<const VCFlags&>(*flags_);
+    // GOMA doesn't work with PCH so we generate it only for local builds.
+    if (!vc_flag.creating_pch().empty()) {
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      LOG(INFO) << trace_id_
+                << " force fallback. cannot create pch in goma backend.";
+      return true;
+    }
+    if (vc_flag.require_mspdbserv()) {
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      LOG(INFO) << trace_id_
+                << " force fallback. cannot run mspdbserv in goma backend.";
+      return true;
+    }
+  } else if (flags_->is_javac()) {
+    const JavacFlags& javac_flag = static_cast<const JavacFlags&>(*flags_);
+    // TODO: remove following code when goma backend get ready.
+    // Force fallback a compile request with -processor (b/38215808)
+    if (!javac_flag.processors().empty()) {
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      LOG(INFO) << trace_id_
+                << " force fallback to avoid running annotation processor in"
+                << " goma backend (b/38215808)";
+      return true;
+    }
+  } else if (flags_->is_java()) {
+    LOG(INFO) << trace_id_
+              << " force fallback to avoid running java program in"
+              << " goma backend";
+    return true;
+  }
+
+#ifndef _WIN32
+  // TODO: check "NUL", "CON", "AUX" on windows?
+  for (const auto & input_filename : flags_->input_filenames()) {
+    const string input = file::JoinPathRespectAbsolute(
+        flags_->cwd(), input_filename);
+    struct stat st;
+    if (stat(input.c_str(), &st) != 0) {
+      PLOG(INFO) << trace_id_ << " " << input << ": stat error";
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      return true;
+    }
+    if (!S_ISREG(st.st_mode)) {
+      LOG(INFO) << trace_id_ << " " << input << " not regular file";
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kNoRemoteCompileSupported);
+      return true;
+    }
+  }
+#endif
+
+  // TODO: fallback input file should be flag of compiler proxy?
+  if (requester_env_.fallback_input_file_size() == 0)
+    return false;
+
+  std::vector<string> fallback_input_files(
+      requester_env_.fallback_input_file().begin(),
+      requester_env_.fallback_input_file().end());
+  std::sort(fallback_input_files.begin(), fallback_input_files.end());
+  for (const auto& input_filename : flags_->input_filenames()) {
+    if (binary_search(fallback_input_files.begin(),
+                      fallback_input_files.end(),
+                      input_filename)) {
+      service_->RecordForcedFallbackInSetup(CompileService::kRequestedByUser);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool CompileTask::ShouldVerifyOutput() const {
+  CHECK_EQ(INIT, state_);
+  return requester_env_.verify_output();
+}
+
+SubProcessReq::Weight CompileTask::GetTaskWeight() const {
+  CHECK_EQ(INIT, state_);
+  int weight_score = req_->arg_size();
+  if (linking_)
+    weight_score *= 10;
+
+  if (weight_score > 1000)
+    return SubProcessReq::HEAVY_WEIGHT;
+  return SubProcessReq::LIGHT_WEIGHT;
+}
+
+bool CompileTask::ShouldStopGoma() const {
+  if (verify_output_)
+    return false;
+  if (precompiling_ && service_->enable_gch_hack())
+    return false;
+  if (subproc_ == nullptr) {
+    DCHECK(!abort_);
+    return false;
+  }
+  if (IsSubprocRunning()) {
+    if (service_->dont_kill_subprocess()) {
+      // When dont_kill_subprocess is true, we'll ignore remote results and
+      // always use local results, so calling remote is not useless when
+      // subprocess is already running.
+      return true;
+    }
+    if (service_->local_run_preference() >= state_)
+      return true;
+  }
+  if (stats_->exec_request_retry() > 1) {
+    int num_pending = SubProcessTask::NumPending();
+    // Prefer local when pendings are few.
+    return num_pending <= service_->max_subprocs_pending();
+  }
+  if (service_->http_client()->ramp_up() == 0) {
+    // If http blocked (i.e. got 302, 403 error), stop calling remote.
+    LOG(INFO) << trace_id_ << " stop goma. http disabled";
+    return true;
+  }
+  return false;
+}
+
+// ----------------------------------------------------------------
+// state_: SETUP
+void CompileTask::FillCompilerInfo() {
+  CHECK_EQ(SETUP, state_);
+
+  compiler_info_timer_.Start();
+
+  std::vector<string> key_envs(stats_->env().begin(), stats_->env().end());
+  std::vector<string> run_envs(key_envs);
+  if (!local_path_.empty())
+    run_envs.push_back("PATH=" + local_path_);
+#ifdef _WIN32
+  if (!pathext_.empty())
+    run_envs.push_back("PATHEXT=" + pathext_);
+  if (flags_->is_vc()) {
+    run_envs.push_back("TMP=" + service_->tmp_dir());
+    run_envs.push_back("TEMP=" + service_->tmp_dir());
+  }
+#endif
+  std::unique_ptr<CompileService::GetCompilerInfoParam> param(
+      new CompileService::GetCompilerInfoParam);
+  param->thread_id = service_->wm()->GetCurrentThreadId();
+  param->trace_id = trace_id_;
+  DCHECK_NE(
+      req_->command_spec().local_compiler_path().find(PathResolver::kPathSep),
+      string::npos)
+      << trace_id_ << " expect local_compiler_path is relative path"
+      " or absolute path but " << req_->command_spec().local_compiler_path();
+  param->key = CompilerInfoCache::CreateKey(
+      *flags_,
+      req_->command_spec().local_compiler_path(),
+      key_envs);
+  param->flags = flags_.get();
+  param->run_envs = run_envs;
+
+  CompileService::GetCompilerInfoParam* param_pointer = param.get();
+  service_->GetCompilerInfo(
+      param_pointer,
+      NewCallback(
+          this, &CompileTask::FillCompilerInfoDone, std::move(param)));
+}
+
+void CompileTask::FillCompilerInfoDone(
+    std::unique_ptr<CompileService::GetCompilerInfoParam> param) {
+  CHECK_EQ(SETUP, state_);
+
+  int msec = compiler_info_timer_.GetInMs();
+  stats_->set_compiler_info_process_time(msec);
+  std::ostringstream ss;
+  ss << " cache_hit=" << param->cache_hit
+     << " updated=" << param->updated
+     << " state=" << param->state.get()
+     << " in " << msec << " msec";
+  if (msec > 1000) {
+    LOG(WARNING) << trace_id_ << " SLOW fill compiler info"
+                 << ss.str();
+  } else {
+    LOG(INFO) << trace_id_ << " fill compiler info"
+              << ss.str();
+  }
+
+  if (param->state.get() == nullptr) {
+    AddErrorToResponse(TO_USER,
+                       "something wrong trying to get compiler info.", true);
+    service_->RecordForcedFallbackInSetup(
+        CompileService::kFailToGetCompilerInfo);
+    SetupRequestDone(false);
+    return;
+  }
+
+  compiler_info_state_ = std::move(param->state);
+  DCHECK(compiler_info_state_.get() != nullptr);
+
+  if (compiler_info_state_.get()->info().HasError()) {
+    // In this case, it found local compiler, but failed to get necessary
+    // information, such as system include paths.
+    // It would happen when multiple -arch options are used.
+    if (requester_env_.fallback()) {
+      // Force to fallback mode to handle this case.
+      should_fallback_ = true;
+      service_->RecordForcedFallbackInSetup(
+          CompileService::kFailToGetCompilerInfo);
+    }
+    AddErrorToResponse(should_fallback_ ? TO_LOG : TO_USER,
+                       compiler_info_state_.get()->info().error_message(),
+                       true);
+    SetupRequestDone(false);
+    return;
+  }
+  if (compiler_info_state_.disabled()) {
+    // In this case, it found local compiler, but not in server side
+    // (by past compile task).
+    if (service_->hermetic_fallback() || requester_env_.fallback()) {
+      should_fallback_ = true;
+      service_->RecordForcedFallbackInSetup(CompileService::kCompilerDisabled);
+    }
+    // we already responded "<local compiler path> is disabled" when it
+    // was disabled the compiler info, so won't show the same error message
+    // to user.
+    AddErrorToResponse(TO_LOG, "compiler is disabled", true);
+    SetupRequestDone(false);
+    return;
+  }
+  if (service_->hermetic()) {
+    req_->set_hermetic_mode(true);
+  }
+#ifndef _WIN32
+  if (service_->use_relative_paths_in_argv()) {
+    MakeWeakRelativeInArgv();
+  }
+#endif
+  MayUpdateSubprogramSpec();
+  UpdateExpandedArgs();
+  ModifyRequestArgs();
+  ModifyRequestEnvs();
+  UpdateCommandSpec();
+  stats_->set_command_version(req_->command_spec().version());
+  stats_->set_command_target(req_->command_spec().target());
+
+  UpdateRequiredFiles();
+}
+
+void CompileTask::UpdateRequiredFiles() {
+  CHECK_EQ(SETUP, state_);
+  include_timer_.Start();
+  include_wait_timer_.Start();
+  if (flags_->is_gcc()) {
+    const GCCFlags& gcc_flag = static_cast<const GCCFlags&>(*flags_);
+    if (gcc_flag.mode() != GCCFlags::LINK) {
+      CHECK(!linking_);
+      GetIncludeFiles();
+      return;
+    }
+    if (gcc_flag.args().size() == 2 &&
+        gcc_flag.args()[1] == "--version") {
+      // for requester_env_.verify_command()
+      VLOG(1) << trace_id_ << " --version";
+      UpdateRequiredFilesDone(true);
+      return;
+    }
+    // TODO: if input files are not obj/ar, check include files as well?
+    VLOG(1) << trace_id_ << " link mode";
+    CHECK(linking_);
+    GetLinkRequiredFiles();
+    return;
+  }
+
+  if (flags_->is_vc()) {
+    // TODO: fix for linking_ mode.
+    GetIncludeFiles();
+    return;
+  }
+
+  if (flags_->is_javac()) {
+    GetJavaRequiredFiles();
+    return;
+  }
+
+  if (flags_->is_clang_tidy()) {
+    GetIncludeFiles();
+    return;
+  }
+
+  LOG(ERROR) << trace_id_ << " unknown flag type:" << flags_->DebugString();
+  UpdateRequiredFilesDone(false);
+}
+
+void CompileTask::UpdateRequiredFilesDone(bool ok) {
+  if (!ok) {
+    // Failed to update required_files.
+    if (requester_env_.verify_command().empty()) {
+      LOG(INFO) << trace_id_ << " failed to update required files. ";
+      should_fallback_ = true;
+      SetupRequestDone(false);
+      return;
+    }
+    VLOG(1) << trace_id_ << "verify_command="
+            << requester_env_.verify_command();
+  }
+  // Add the input files as well.
+  for (const auto& input_filename : flags_->input_filenames()) {
+    required_files_.insert(input_filename);
+  }
+  for (const auto& opt_input_filename: flags_->optional_input_filenames()) {
+    const string& abs_filename = file::JoinPathRespectAbsolute(
+        stats_->cwd(), opt_input_filename);
+    if (access(abs_filename.c_str(), R_OK) == 0) {
+      required_files_.insert(opt_input_filename);
+    } else {
+      LOG(WARNING) << trace_id_ << " optional file not found:" << abs_filename;
+    }
+  }
+  // If gomacc sets input file, add them as well.
+  for (const auto& input : req_->input()) {
+    required_files_.insert(input.filename());
+  }
+  if (VLOG_IS_ON(2)) {
+    for (const auto& required_file : required_files_) {
+      LOG(INFO) << trace_id_ << " required files:" << required_file;
+    }
+  }
+  req_->clear_input();
+
+  stats_->set_include_preprocess_time(include_timer_.GetInMs());
+  stats_->set_depscache_used(depscache_used_);
+
+  LOG_IF(WARNING, stats_->include_processor_run_time() > 1000)
+      << trace_id_ << " SLOW run IncludeProcessor"
+      << " required_files=" << required_files_.size()
+      << " depscache=" << depscache_used_
+      << " in " << stats_->include_processor_run_time() << " msec";
+
+  SetupRequestDone(true);
+}
+
+void CompileTask::SetupRequestDone(bool ok) {
+  CHECK_EQ(SETUP, state_);
+
+  if (abort_) {
+    // subproc of local idle was already finished.
+    ProcessFinished("aborted in setup");
+    return;
+  }
+
+  if (!ok) {
+    if (should_fallback_) {
+      VLOG(1) << trace_id_ << " should fallback by setup failure";
+      // should_fallback_ expects INIT state when subprocess finishes
+      // in CompileTask::FinishSubProcess().
+      state_ = INIT;
+      if (subproc_ == nullptr)
+        SetupSubProcess();
+      RunSubProcess("fallback by setup failure");
+      return;
+    }
+    // no fallback.
+    AddErrorToResponse(TO_USER, "Failed to setup request", true);
+    ProcessFinished("fail in setup");
+    return;
+  }
+  TryProcessFileRequest();
+}
+
+#ifndef _WIN32
+bool CompileTask::MakeWeakRelativeInArgv() {
+  CHECK_EQ(SETUP, state_);
+  DCHECK(compiler_info_state_.get() != nullptr);
+  orig_flag_dump_ = flag_dump_;
+  // If cwd is in tmp directory, we can't know output path is
+  // whether ./path/to/output or $TMP/path/to/output.
+  // If latter, make the path relative would produce wrong output file.
+  if (HasPrefixDir(req_->cwd(), "/tmp") || HasPrefixDir(req_->cwd(), "/var")) {
+    LOG(WARNING) << "GOMA_USE_RELATIVE_PATHS_IN_ARGV=true, but cwd may be "
+                 << "under temp directory: " << req_->cwd() << ". "
+                 << "Use original args.";
+    orig_flag_dump_ = "";
+    return false;
+  }
+  bool changed = false;
+  std::ostringstream ss;
+  const std::vector<string>& parsed_args =
+      CompilerFlagsUtil::MakeWeakRelative(
+          flags_->args(), req_->cwd(), compiler_info_state_.get()->info());
+  for (size_t i = 0; i < parsed_args.size(); ++i) {
+    if (req_->arg(i) != parsed_args[i]) {
+      VLOG(1) << "Arg[" << i << "]: " << req_->arg(i) << " => "
+              << parsed_args[i];
+      req_->set_arg(i, parsed_args[i]);
+      changed = true;
+    }
+    ss << req_->arg(i) << " ";
+  }
+  flag_dump_ = ss.str();
+  if (!changed) {
+    VLOG(1) << "GOMA_USE_RELATIVE_PATHS_IN_ARGV=true, "
+            << "but no argv changed";
+    orig_flag_dump_ = "";
+  }
+  return changed;
+}
+#endif
+
+static void FixCommandSpec(const CompilerInfo& compiler_info,
+                           const CompilerFlags& flags,
+                           CommandSpec* command_spec) {
+  // Overwrites name in command_spec if possible.
+  // The name is used for selecting a compiler in goma backend.
+  // The name set by gomacc could be wrong if a given compiler, especially it is
+  // cc or c++, is a symlink to non-gcc compiler. Since compiler_info knows
+  // more details on the compiler, we overwrite the name with the value comes
+  // from it.
+  //
+  // You may think we can use realpath(3) in gomacc. We do not do that because
+  // of two reasons:
+  // 1. compiler_info is cached.
+  // 2. we can know more detailed info there.
+  if (compiler_info.HasName())
+    command_spec->set_name(compiler_info.name());
+
+  if (!command_spec->has_version())
+    command_spec->set_version(compiler_info.version());
+  if (!command_spec->has_target())
+    command_spec->set_target(compiler_info.target());
+  command_spec->set_binary_hash(compiler_info.request_compiler_hash());
+
+  command_spec->clear_system_include_path();
+  command_spec->clear_cxx_system_include_path();
+  command_spec->clear_system_framework_path();
+  command_spec->clear_system_library_path();
+
+  // C++ program should only send C++ include paths, otherwise, include order
+  // might be wrong. For C program, cxx_system_include_paths would be empty.
+  // c.f. b/25675250
+  bool is_cplusplus = false;
+  if (flags.is_gcc()) {
+    is_cplusplus = static_cast<const GCCFlags&>(flags).is_cplusplus();
+  } else if (flags.is_vc()) {
+    is_cplusplus = static_cast<const VCFlags&>(flags).is_cplusplus();
+  } else if (flags.is_clang_tidy()) {
+    is_cplusplus = static_cast<const ClangTidyFlags&>(flags).is_cplusplus();
+  }
+
+  if (!is_cplusplus) {
+    for (const auto& path : compiler_info.system_include_paths())
+      command_spec->add_system_include_path(path);
+  }
+  for (const auto& path : compiler_info.cxx_system_include_paths())
+    command_spec->add_cxx_system_include_path(path);
+  for (const auto& path : compiler_info.system_framework_paths())
+    command_spec->add_system_framework_path(path);
+}
+
+static void FixSystemLibraryPath(const std::vector<string>& library_paths,
+                                 CommandSpec* command_spec) {
+  for (const auto& path : library_paths)
+    command_spec->add_system_library_path(path);
+}
+
+void CompileTask::UpdateExpandedArgs() {
+  for (const auto& expanded_arg : flags_->expanded_args()) {
+    req_->add_expanded_arg(expanded_arg);
+    stats_->add_expanded_arg(expanded_arg);
+  }
+}
+
+void CompileTask::ModifyRequestArgs() {
+  DCHECK(compiler_info_state_.get() != nullptr);
+  const CompilerInfo& compiler_info = compiler_info_state_.get()->info();
+  if (compiler_info.HasAdditionalFlags()) {
+    bool use_expanded_args = (req_->expanded_arg_size() > 0);
+    for (const auto& flag : compiler_info.additional_flags()) {
+      req_->add_arg(flag);
+      if (use_expanded_args) {
+        req_->add_expanded_arg(flag);
+      }
+    }
+  }
+
+  if (flags_->is_gcc()) {
+    GCCFlags* gcc_flags = static_cast<GCCFlags*>(flags_.get());
+    if (!gcc_flags->has_fno_sanitize_blacklist()) {
+      // clang has default blacklist files.
+      // c.f. http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/SanitizerArgs.cpp?revision=242286&view=markup#l82
+      // It's in clang resource directory.
+      //
+      // clang's sanitizer list is here.
+      // http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Sanitizers.def
+      //
+      // Note that -fsanitize=cfi-* implies -fsanitize=cfi basically, but
+      // as of 9 Sep, 2015, -fsanitize=cfi-cast-strict doesn't imply
+      // -fsanitize=cfi.
+
+      static const struct {
+        const char* sanitize_value;
+        const char* blacklist_filename;
+      } kSanitizerCheckers[] = {
+        { "address", "asan_blacklist.txt" },
+        { "memory", "msan_blacklist.txt" },
+        { "thread", "tsan_blacklist.txt" },
+        { "cfi", "cfi_blacklist.txt" },
+        { "cfi-derived-cast", "cfi_blacklist.txt" },
+        { "cfi-unrelated-cast", "cfi_blacklist.txt" },
+        { "cfi-nvcall", "cfi_blacklist.txt" },
+        { "cfi-vcall", "cfi_blacklist.txt" },
+        { "dataflow", "dfsan_abilist.txt" },
+      };
+
+      std::set<string> added_blacklist;
+      bool needs_resource_dir = false;
+      for (const auto& checker : kSanitizerCheckers) {
+        if (gcc_flags->fsanitize().count(checker.sanitize_value) == 0)
+          continue;
+
+        if (!added_blacklist.insert(checker.blacklist_filename).second)
+          continue;
+
+        // When -no-canoical-prefixes is used, resource_dir could be relative
+        // path from the current directory. So, we need to join cwd.
+        // Without -no-canonical-prefixes, resource-dir will be absolute.
+        string blacklist = file::JoinPathRespectAbsolute(
+            flags_->cwd(),
+            compiler_info.data().resource_dir(),
+            checker.blacklist_filename);
+        if (!input_file_id_cache_->Get(blacklist).IsValid()) {
+          // -fsanitize is specified, but no default blacklist is found.
+          // current clang has only asan_blacklist.txt and msan_blacklist.txt,
+          // so this case will often happen.
+          continue;
+        }
+
+        req_->add_input()->set_filename(blacklist);
+        LOG(INFO) << "input automatically added: " << blacklist;
+        needs_resource_dir = true;
+      }
+
+      if (gcc_flags->has_resource_dir()) {
+        // Here, -resource-dir is specified by user.
+        if (gcc_flags->resource_dir() != compiler_info.data().resource_dir()) {
+          LOG(WARNING) << "user specified non default -resource-dir:"
+                       << " default=" << compiler_info.data().resource_dir()
+                       << " user=" << gcc_flags->resource_dir();
+        }
+        needs_resource_dir = false;
+      }
+
+      // When we need to upload the default blacklist.txt and -resource-dir is
+      // not specified, we'd like to specify it.
+      if (needs_resource_dir) {
+        string resource_dir_arg =
+            "-resource-dir=" + compiler_info.data().resource_dir();
+        req_->add_arg(resource_dir_arg);
+        LOG(INFO) << "automatically added: " << resource_dir_arg;
+        bool use_expanded_args = (req_->expanded_arg_size() > 0);
+        if (use_expanded_args) {
+          req_->add_expanded_arg(resource_dir_arg);
+        }
+      }
+    }
+  }
+
+  if (!flags_->is_vc())
+    return;
+
+  // If /Yu is specified, we add /Y- to tell the backend compiler not
+  // to try using PCH. We add this here because we don't want to show
+  // this flag in compiler_proxy's console.
+  const string& using_pch = static_cast<const VCFlags&>(*flags_).using_pch();
+  if (using_pch.empty())
+    return;
+
+  req_->add_arg("/Y-");
+  req_->add_expanded_arg("/Y-");
+
+  string joined;
+  JoinStrings(req_->arg(), " ", &joined);
+  LOG(INFO) << "Modified args: " << joined;
+}
+
+void CompileTask::ModifyRequestEnvs() {
+  std::vector<string> envs;
+  for (const auto& env : req_->env()) {
+    if (flags_->IsServerImportantEnv(env.c_str())) {
+      envs.push_back(env);
+    }
+  }
+  if (envs.size() == (size_t)req_->env_size()) {
+    return;
+  }
+
+  req_->clear_env();
+  for (const auto& env : envs) {
+    req_->add_env(env);
+  }
+  LOG(INFO) << "Modified env: " << envs;
+}
+
+void CompileTask::UpdateCommandSpec() {
+  CHECK_EQ(SETUP, state_);
+  command_spec_ = req_->command_spec();
+  CommandSpec* command_spec = req_->mutable_command_spec();
+  if (compiler_info_state_.get() == nullptr)
+    return;
+  const CompilerInfo& compiler_info = compiler_info_state_.get()->info();
+  FixCommandSpec(compiler_info, *flags_, command_spec);
+}
+
+void CompileTask::MayFixSubprogramSpec(
+    google::protobuf::RepeatedPtrField<SubprogramSpec>* subprogram_specs)
+        const {
+  std::set<string> used_subprogram_name;
+  subprogram_specs->Clear();
+  if (compiler_info_state_.get() == nullptr) {
+    return;
+  }
+  for (const auto& info : compiler_info_state_.get()->info().subprograms()) {
+    DCHECK(file::IsAbsolutePath(info.name))
+        << "filename of subprogram is expected to be absolute path."
+        << " info.name=" << info.name
+        << " info.hash=" << info.hash;
+    if (!used_subprogram_name.insert(info.name).second) {
+      LOG(ERROR) << "The same subprogram is added twice.  Ignoring."
+                 << " info.name=" << info.name
+                 << " info.hash=" << info.hash;
+      continue;
+    }
+    SubprogramSpec* subprog_spec = subprogram_specs->Add();
+    subprog_spec->set_path(info.name);
+    subprog_spec->set_binary_hash(info.hash);
+  }
+}
+
+void CompileTask::MayUpdateSubprogramSpec() {
+  CHECK_EQ(SETUP, state_);
+  MayFixSubprogramSpec(req_->mutable_subprogram());
+  if (VLOG_IS_ON(3)) {
+    for (const auto& subprog_spec : req_->subprogram()) {
+      LOG(INFO) << trace_id_ << " update subprogram spec:"
+                << " path=" << subprog_spec.path()
+                << " hash=" << subprog_spec.binary_hash();
+    }
+  }
+}
+
+struct CompileTask::RunIncludeProcessorParam {
+  RunIncludeProcessorParam() : result_status(false),
+                               total_files(0),
+                               skipped_files(0) {}
+  // request
+  string input_filename;
+  string abs_input_filename;
+  // response
+  bool result_status;
+  std::set<string> required_files;
+  int total_files;
+  int skipped_files;
+  std::unique_ptr<FileIdCache> file_id_cache;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(RunIncludeProcessorParam);
+};
+
+void CompileTask::GetIncludeFiles() {
+  CHECK_EQ(SETUP, state_);
+  DCHECK(flags_->is_gcc() || flags_->is_vc() || flags_->is_clang_tidy());
+  DCHECK(compiler_info_state_.get() != nullptr);
+
+  // We don't support multiple input files.
+  if (flags_->input_filenames().size() != 1U) {
+    LOG(ERROR) << trace_id_ << " multiple inputs? "
+               << flags_->input_filenames().size()
+               << " " << flags_->input_filenames();
+    AddErrorToResponse(TO_USER, "multiple inputs are not supported. ", true);
+    UpdateRequiredFilesDone(false);
+    return;
+  }
+  const string& input_filename = flags_->input_filenames()[0];
+
+  const string& abs_input_filename =
+      file::JoinPathRespectAbsolute(flags_->cwd(), input_filename);
+
+  if (DepsCache::IsEnabled()) {
+    DepsCache* dc = DepsCache::instance();
+    deps_identifier_ = DepsCache::MakeDepsIdentifier(
+        compiler_info_state_.get()->info(), *flags_);
+    if (deps_identifier_.valid() &&
+        dc->GetDependencies(deps_identifier_,
+                            flags_->cwd(),
+                            abs_input_filename,
+                            &required_files_,
+                            input_file_id_cache_.get())) {
+      LOG(INFO) << trace_id_ << " use deps cache. required_files="
+                << required_files_.size();
+      depscache_used_ = true;
+      UpdateRequiredFilesDone(true);
+      return;
+    }
+  }
+  std::unique_ptr<RunIncludeProcessorParam> param(new RunIncludeProcessorParam);
+  param->input_filename = input_filename;
+  param->abs_input_filename = abs_input_filename;
+  input_file_id_cache_->ReleaseOwner();
+  param->file_id_cache = std::move(input_file_id_cache_);
+
+  OneshotClosure* closure =
+      NewCallback(
+          this, &CompileTask::RunIncludeProcessor, std::move(param));
+  service_->wm()->RunClosureInPool(
+      FROM_HERE, service_->include_processor_pool(),
+      closure,
+      WorkerThreadManager::PRIORITY_LOW);
+}
+
+void CompileTask::RunIncludeProcessor(
+    std::unique_ptr<RunIncludeProcessorParam> param) {
+  DCHECK(compiler_info_state_.get() != nullptr);
+
+  // Pass ownership temporary to IncludeProcessor thread.
+  param->file_id_cache->AcquireOwner();
+
+  stats_->set_include_processor_wait_time(include_wait_timer_.GetInMs());
+  LOG_IF(WARNING, stats_->include_processor_wait_time() > 1000)
+      << trace_id_ << " SLOW start IncludeProcessor"
+      << " in " << stats_->include_processor_wait_time() << " msec";
+
+  SimpleTimer include_timer(SimpleTimer::START);
+  IncludeProcessor include_processor;
+  param->result_status = include_processor.GetIncludeFiles(
+      param->input_filename,
+      flags_->cwd_for_include_processor(),
+      *flags_,
+      compiler_info_state_.get()->info(),
+      &param->required_files,
+      param->file_id_cache.get());
+  stats_->set_include_processor_run_time(include_timer.GetInMs());
+
+  if (!param->result_status) {
+    LOG(WARNING) << trace_id_
+                 << " Unsupported feature detected "
+                 << "in our pseudo includer! "
+                 << flags_->DebugString();
+  }
+  param->total_files = include_processor.total_files();
+  param->skipped_files = include_processor.skipped_files();
+
+  // Back ownership from IncludeProcessor thread to CompileTask thread.
+  param->file_id_cache->ReleaseOwner();
+  service_->wm()->RunClosureInThread(
+      FROM_HERE, thread_id_,
+      NewCallback(
+          this, &CompileTask::RunIncludeProcessorDone, std::move(param)),
+      WorkerThreadManager::PRIORITY_LOW);
+}
+
+void CompileTask::RunIncludeProcessorDone(
+    std::unique_ptr<RunIncludeProcessorParam> param) {
+  DCHECK(BelongsToCurrentThread());
+
+  input_file_id_cache_ = std::move(param->file_id_cache);
+  input_file_id_cache_->AcquireOwner();
+  required_files_.swap(param->required_files);
+
+  stats_->set_include_preprocess_total_files(param->total_files);
+  stats_->set_include_preprocess_skipped_files(param->skipped_files);
+
+  if (DepsCache::IsEnabled()) {
+    if (param->result_status && deps_identifier_.valid()) {
+      DepsCache* dc = DepsCache::instance();
+      if (!dc->SetDependencies(deps_identifier_,
+                               flags_->cwd(),
+                               param->abs_input_filename,
+                               required_files_,
+                               input_file_id_cache_.get())) {
+        LOG(INFO) << trace_id_ << " failed to save dependencies.";
+      }
+    }
+  }
+
+  UpdateRequiredFilesDone(param->result_status);
+}
+
+struct CompileTask::RunLinkerInputProcessorParam {
+  RunLinkerInputProcessorParam() : result_status(false) {}
+  // request
+  // response
+  bool result_status;
+  std::set<string> required_files;
+  std::vector<string> system_library_paths;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(RunLinkerInputProcessorParam);
+};
+
+void CompileTask::GetLinkRequiredFiles() {
+  CHECK_EQ(SETUP, state_);
+  DCHECK(compiler_info_state_.get() != nullptr);
+
+  std::unique_ptr<RunLinkerInputProcessorParam> param(
+      new RunLinkerInputProcessorParam);
+
+  OneshotClosure* closure =
+      NewCallback(
+          this, &CompileTask::RunLinkerInputProcessor, std::move(param));
+  service_->wm()->RunClosureInPool(
+      FROM_HERE, service_->include_processor_pool(),
+      closure,
+      WorkerThreadManager::PRIORITY_LOW);
+}
+
+void CompileTask::RunLinkerInputProcessor(
+    std::unique_ptr<RunLinkerInputProcessorParam> param) {
+  DCHECK(compiler_info_state_.get() != nullptr);
+  LinkerInputProcessor linker_input_processor(
+      flags_->args(), flags_->cwd());
+  param->result_status = linker_input_processor.GetInputFilesAndLibraryPath(
+      compiler_info_state_.get()->info(),
+      req_->command_spec(),
+      &param->required_files,
+      &param->system_library_paths);
+  if (!param->result_status) {
+    LOG(WARNING) << trace_id_
+                 << " Failed to get input files "
+                 << flags_->DebugString();
+  }
+  service_->wm()->RunClosureInThread(
+      FROM_HERE, thread_id_,
+      NewCallback(
+          this, &CompileTask::RunLinkerInputProcessorDone, std::move(param)),
+      WorkerThreadManager::PRIORITY_LOW);
+}
+
+void CompileTask::RunLinkerInputProcessorDone(
+    std::unique_ptr<RunLinkerInputProcessorParam> param) {
+  DCHECK(BelongsToCurrentThread());
+
+  required_files_.swap(param->required_files);
+  system_library_paths_.swap(param->system_library_paths);
+  FixSystemLibraryPath(system_library_paths_, req_->mutable_command_spec());
+
+  UpdateRequiredFilesDone(param->result_status);
+}
+
+struct CompileTask::RunJarParserParam {
+  RunJarParserParam() {}
+  // request
+  // response
+  std::set<string> required_files;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(RunJarParserParam);
+};
+
+void CompileTask::GetJavaRequiredFiles() {
+  CHECK_EQ(SETUP, state_);
+
+  std::unique_ptr<RunJarParserParam> param(new RunJarParserParam);
+
+  OneshotClosure* closure = NewCallback(
+      this, &CompileTask::RunJarParser, std::move(param));
+  service_->wm()->RunClosureInPool(
+      FROM_HERE, service_->include_processor_pool(),
+      closure,
+      WorkerThreadManager::PRIORITY_LOW);
+}
+
+void CompileTask::RunJarParser(std::unique_ptr<RunJarParserParam> param) {
+  JarParser jar_parser;
+  DCHECK(flags_->is_javac());
+  jar_parser.GetJarFiles(static_cast<JavacFlags*>(flags_.get())->jar_files(),
+                         stats_->cwd(),
+                         &param->required_files);
+  service_->wm()->RunClosureInThread(
+      FROM_HERE, thread_id_,
+      NewCallback(this, &CompileTask::RunJarParserDone, std::move(param)),
+      WorkerThreadManager::PRIORITY_LOW);
+}
+
+void CompileTask::RunJarParserDone(std::unique_ptr<RunJarParserParam> param) {
+  DCHECK(BelongsToCurrentThread());
+
+  required_files_.swap(param->required_files);
+  UpdateRequiredFilesDone(true);
+}
+
+// ----------------------------------------------------------------
+// state_: FILE_REQ.
+void CompileTask::SetInputFileCallback() {
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_REQ, state_);
+  CHECK(!input_file_callback_);
+  input_file_callback_ = NewCallback(
+      this, &CompileTask::ProcessFileRequestDone);
+  num_input_file_task_ = 0;
+  input_file_success_ = true;
+}
+
+void CompileTask::StartInputFileTask() {
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_REQ, state_);
+  ++num_input_file_task_;
+}
+
+void CompileTask::InputFileTaskFinished(InputFileTask* input_file_task) {
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_REQ, state_);
+
+  if (abort_) {
+    VLOG(1) << trace_id_ << "aborted ";
+    input_file_success_ = false;
+    input_file_task->Done(this);
+    return;
+  }
+
+  const string& filename = input_file_task->filename();
+  const string& hash_key = input_file_task->hash_key();
+  const ssize_t file_size = input_file_task->file_size();
+  const time_t mtime = input_file_task->mtime();
+  VLOG(1) << trace_id_ << " input done:" << filename;
+  if (mtime > stats_->latest_input_mtime()) {
+    stats_->set_latest_input_filename(filename);
+    stats_->set_latest_input_mtime(mtime);
+  }
+  if (!input_file_task->success()) {
+    AddErrorToResponse(TO_LOG, "Create file blob failed for:" + filename, true);
+    input_file_success_ = false;
+    input_file_task->Done(this);
+    return;
+  }
+  DCHECK(!hash_key.empty()) << filename;
+  stats_->add_input_file_time(input_file_task->GetInMs());
+  stats_->add_input_file_size(file_size);
+  ExecReq_Input* input = input_file_task->GetInputForTask(this);
+  CHECK(input != nullptr) << trace_id_ << " filename:" << filename;
+  input->set_hash_key(hash_key);
+
+  if (!input_file_task->need_hash_only()) {
+    const FileBlob* blob = input_file_task->blob();
+    CHECK(blob != nullptr) << trace_id_ << " " << filename;
+    if (input_file_task->need_to_upload_content()) {
+      LOG(INFO) << trace_id_ << " embedded upload:" << filename
+                << " size=" << file_size
+                << " reason:" << input_file_task->upload_reason()
+                << " retry:" << stats_->exec_request_retry();
+      // We can't swap blob since input_file_task is shared with
+      // several compile tasks.
+      *input->mutable_content() = *blob;
+      if (!FileServiceClient::IsValidFileBlob(input->content())) {
+        LOG(ERROR) << trace_id_ << " bad embedded content "
+                   << filename;
+        input_file_success_ = false;
+      }
+    }
+  }
+  const HttpRPC::Status& http_rpc_status =
+      input_file_task->http_rpc_status();
+  stats_->input_file_rpc_size += http_rpc_status.req_size;
+  stats_->input_file_rpc_raw_size += http_rpc_status.raw_req_size;
+  input_file_task->Done(this);
+}
+
+void CompileTask::MaybeRunInputFileCallback(bool task_finished) {
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_REQ, state_);
+  OneshotClosure* closure = nullptr;
+  if (task_finished) {
+    --num_input_file_task_;
+    VLOG(1) << trace_id_ << " input remain=" << num_input_file_task_;
+    if (num_input_file_task_ > 0)
+      return;
+  }
+  CHECK_EQ(0, num_input_file_task_);
+  if (input_file_callback_) {
+    closure = input_file_callback_;
+    input_file_callback_ = nullptr;
+  }
+  if (closure)
+    closure->Run();
+}
+
+// ----------------------------------------------------------------
+// state_: CALL_EXEC.
+
+void CompileTask::CheckCommandSpec() {
+  CHECK_EQ(CALL_EXEC, state_);
+  if (!resp_->result().has_command_spec()) {
+    return;
+  }
+
+  // Checks all mismatches first, then decide behavior later.
+  bool is_name_mismatch = false;
+  bool is_target_mismatch = false;
+  bool is_binary_hash_mismatch = false;
+  bool is_version_mismatch = false;
+  bool is_subprograms_mismatch = false;
+  const CommandSpec& req_command_spec = req_->command_spec();
+  const CommandSpec& resp_command_spec = resp_->result().command_spec();
+  const string message_on_mismatch(
+      "local:" + CreateCommandVersionString(req_command_spec) +
+      " but remote:" +
+      CreateCommandVersionString(resp_command_spec));
+  if (req_command_spec.name() != resp_command_spec.name()) {
+    is_name_mismatch = true;
+    std::ostringstream ss;
+    ss << trace_id_ << " compiler name mismatch:"
+       << " local:" << req_command_spec.name()
+       << " remote:" << resp_command_spec.name();
+    AddErrorToResponse(TO_LOG, ss.str(), false);
+    stats_->set_exec_command_name_mismatch(message_on_mismatch);
+  }
+  if (req_command_spec.target() != resp_command_spec.target()) {
+    is_target_mismatch = true;
+    std::ostringstream ss;
+    ss << trace_id_ << " compiler target mismatch:"
+       << " local:" << req_command_spec.name()
+       << " remote:" << resp_command_spec.name();
+    AddErrorToResponse(TO_LOG, ss.str(), false);
+    stats_->set_exec_command_target_mismatch(message_on_mismatch);
+  }
+  if (req_command_spec.binary_hash() != resp_command_spec.binary_hash()) {
+    is_binary_hash_mismatch = true;
+    LOG(WARNING) << trace_id_ << " compiler binary hash mismatch:"
+                 << " local:" << req_command_spec.binary_hash()
+                 << " remote:" << resp_command_spec.binary_hash();
+    stats_->set_exec_command_binary_hash_mismatch(message_on_mismatch);
+  }
+  if (req_command_spec.version() != resp_command_spec.version()) {
+    is_version_mismatch = true;
+    LOG(WARNING) << trace_id_ << " compiler version mismatch:"
+                 << " local:" << req_command_spec.version()
+                 << " remote:" << resp_command_spec.version();
+    stats_->set_exec_command_version_mismatch(message_on_mismatch);
+  }
+  if (!IsSameSubprograms(*req_, *resp_)) {
+    is_subprograms_mismatch = true;
+    std::ostringstream local_subprograms;
+    DumpSubprograms(req_->subprogram(), &local_subprograms);
+    std::ostringstream remote_subprograms;
+    DumpSubprograms(resp_->result().subprogram(), &remote_subprograms);
+    LOG(WARNING) << trace_id_ << " compiler subprograms mismatch:"
+                 << " local:" << local_subprograms.str()
+                 << " remote:" << remote_subprograms.str();
+    std::ostringstream ss;
+    ss << "local:" << CreateCommandVersionString(req_command_spec)
+       << " subprogram:" << local_subprograms.str()
+       << " but remote:" << CreateCommandVersionString(resp_command_spec)
+       << " subprogram:" << remote_subprograms.str();
+    stats_->set_exec_command_subprograms_mismatch(ss.str());
+  }
+
+  if (service_->hermetic()) {
+    bool mismatch = false;
+    // Check if remote used the same command spec.
+    if (is_name_mismatch) {
+      mismatch = true;
+      AddErrorToResponse(TO_USER, "compiler name mismatch", true);
+    }
+    if (is_target_mismatch) {
+      mismatch = true;
+      AddErrorToResponse(TO_USER, "compiler target mismatch", true);
+    }
+    if (is_binary_hash_mismatch) {
+      mismatch = true;
+      AddErrorToResponse(TO_USER, "compiler binary hash mismatch", true);
+    }
+    if (is_version_mismatch) {
+      AddErrorToResponse(TO_USER, "compiler version mismatch", true);
+      mismatch = true;
+    }
+    if (is_subprograms_mismatch) {
+      AddErrorToResponse(TO_USER, "subprograms mismatch", true);
+      mismatch = true;
+    }
+    if (mismatch) {
+      if (service_->DisableCompilerInfo(compiler_info_state_.get(),
+                                        "hermetic mismatch")) {
+        AddErrorToResponse(
+            TO_USER,
+            req_->command_spec().local_compiler_path() + " is disabled.",
+            true);
+      }
+      want_fallback_ = service_->hermetic_fallback();
+      if (want_fallback_ != requester_env_.fallback()) {
+        LOG(INFO) << trace_id_ << " hermetic mismatch: fallback changed from "
+                  << requester_env_.fallback()
+                  << " to " << want_fallback_;
+      }
+    }
+    return;
+  }
+
+  if (is_name_mismatch || is_target_mismatch) {
+    AddErrorToResponse(TO_USER, "compiler name or target mismatch", true);
+    if (service_->DisableCompilerInfo(compiler_info_state_.get(),
+                                      "compiler name or target mismatch")) {
+      AddErrorToResponse(
+          TO_USER,
+          req_->command_spec().local_compiler_path() + " is disabled.",
+          true);
+    }
+    return;
+  }
+  // TODO: drop command_check_level support in the future.
+  //                    GOMA_HERMETIC should be recommended.
+  if (is_binary_hash_mismatch) {
+    string error_message;
+    bool set_error = false;
+    if (service_->RecordCommandSpecBinaryHashMismatch(
+            stats_->exec_command_binary_hash_mismatch())) {
+      error_message = "compiler binary hash mismatch: " +
+          stats_->exec_command_binary_hash_mismatch();
+    }
+    if (service_->command_check_level() == "checksum") {
+      set_error = true;
+    }
+    if (!requester_env_.verify_command().empty()) {
+      if (requester_env_.verify_command() == "checksum" ||
+          requester_env_.verify_command() == "all") {
+        AddErrorToResponse(TO_LOG, "", true);
+        resp_->mutable_result()->set_stderr_buffer(
+            "compiler binary hash mismatch: " +
+            stats_->exec_command_binary_hash_mismatch() + "\n" +
+            resp_->mutable_result()->stderr_buffer());
+      }
+      // ignore when other verify command mode.
+    } else if (!error_message.empty()) {
+      error_message =
+          (set_error ? "Error: " : "Warning: ") + error_message;
+      AddErrorToResponse(TO_USER, error_message, set_error);
+    }
+  }
+  if (is_version_mismatch) {
+    string error_message;
+    bool set_error = false;
+    if (service_->RecordCommandSpecVersionMismatch(
+            stats_->exec_command_version_mismatch())) {
+      error_message = "compiler version mismatch: " +
+                      stats_->exec_command_version_mismatch();
+    }
+    if (service_->command_check_level() == "version") {
+      set_error = true;
+    }
+    if (!requester_env_.verify_command().empty()) {
+      if (requester_env_.verify_command() == "version" ||
+          requester_env_.verify_command() == "all") {
+        AddErrorToResponse(TO_LOG, "", true);
+        resp_->mutable_result()->set_stderr_buffer(
+            "compiler version mismatch: " +
+            stats_->exec_command_version_mismatch() + "\n" +
+            resp_->mutable_result()->stderr_buffer());
+      }
+      // ignore when other verify command mode.
+    } else if (!error_message.empty()) {
+      error_message =
+          (set_error ? "Error: " : "Warning: ") + error_message;
+      AddErrorToResponse(TO_USER, error_message, set_error);
+    }
+  }
+  if (is_subprograms_mismatch) {
+    std::ostringstream error_message;
+    bool set_error = false;
+
+    std::set<string> remote_hashes;
+    for (const auto& subprog : resp_->result().subprogram()) {
+      remote_hashes.insert(subprog.binary_hash());
+    }
+    for (const auto& subprog : req_->subprogram()) {
+      if (remote_hashes.find(subprog.binary_hash()) != remote_hashes.end()) {
+        continue;
+      }
+      std::ostringstream ss;
+      ss << subprog.path() << " " << subprog.binary_hash();
+      if (service_->RecordSubprogramMismatch(ss.str())) {
+        if (!error_message.str().empty()) {
+          error_message << std::endl;
+        }
+        error_message << "subprogram mismatch: "
+                      << ss.str();
+      }
+    }
+
+    if (service_->command_check_level() == "checksum") {
+      set_error = true;
+    }
+    if (!requester_env_.verify_command().empty()) {
+      if (requester_env_.verify_command() == "checksum" ||
+          requester_env_.verify_command() == "all") {
+        AddErrorToResponse(TO_LOG, "", true);
+        resp_->mutable_result()->set_stderr_buffer(
+            error_message.str() + "\n" +
+            resp_->mutable_result()->stderr_buffer());
+      }
+      // ignore when other verify command mode.
+    } else if (!error_message.str().empty()) {
+      AddErrorToResponse(
+          TO_USER,
+          (set_error ? "Error: " : "Warning: ") + error_message.str(),
+          set_error);
+    }
+  }
+}
+
+void CompileTask::CheckNoMatchingCommandSpec(const string& retry_reason) {
+  CHECK_EQ(CALL_EXEC, state_);
+
+  // If ExecResult does not have CommandSpec, goma backend did not try
+  // to find the compiler. No need to check mismatches.
+  if (!resp_->result().has_command_spec()) {
+    return;
+  }
+
+  bool is_compiler_missing = false;
+  bool is_subprogram_missing = false;
+  // If ExecResult has incomplete CommandSpec, it means that goma backend
+  // tried to select a matching compiler but failed.
+  if (!resp_->result().command_spec().has_binary_hash()) {
+    is_compiler_missing = true;
+  }
+  if (!IsSameSubprograms(*req_, *resp_)) {
+    is_subprogram_missing = true;
+  }
+  // Nothing is missing.
+  if (!is_compiler_missing && !is_subprogram_missing) {
+    return;
+  }
+
+  std::ostringstream local_subprograms;
+  std::ostringstream remote_subprograms;
+  DumpSubprograms(req_->subprogram(), &local_subprograms);
+  DumpSubprograms(resp_->result().subprogram(), &remote_subprograms);
+
+  std::ostringstream what_missing;
+  if (is_compiler_missing) {
+    LOG(WARNING) << trace_id_
+                 << " compiler not found:"
+                 << " local: "
+                 << CreateCommandVersionString(req_->command_spec())
+                 << " remote: none";
+    what_missing << "compiler("
+                 << CreateCommandVersionString(req_->command_spec())
+                 << ")";
+  }
+  if (is_subprogram_missing) {
+    LOG(WARNING) << trace_id_
+                 << " subprogram not found:"
+                 << " local: " << local_subprograms.str()
+                 << " remote: " << remote_subprograms.str();
+    if (!what_missing.str().empty())
+      what_missing << "/";
+    what_missing << "subprograms("
+                 << local_subprograms.str()
+                 << ")";
+  }
+
+  std::ostringstream ss;
+  ss << "local: " << CreateCommandVersionString(req_->command_spec())
+     << " subprogram: " << local_subprograms.str()
+     << " but remote: ";
+  if (is_compiler_missing) {
+    ss << "none";
+  } else {
+    ss << CreateCommandVersionString(resp_->result().command_spec());
+  }
+  ss << " subprogram: " << remote_subprograms.str();
+  stats_->set_exec_command_not_found(ss.str());
+
+  if (service_->hermetic() && !what_missing.str().empty()) {
+    std::ostringstream msg;
+    msg << "No matching " << what_missing.str() << " found in server";
+    AddErrorToResponse(TO_USER, msg.str(), true);
+    if (is_compiler_missing &&
+        service_->DisableCompilerInfo(compiler_info_state_.get(),
+                                      "no matching compiler found in server")) {
+        AddErrorToResponse(
+            TO_USER, req_->command_spec().local_compiler_path() +
+            " is disabled.",
+            true);
+    }
+
+    want_fallback_ = service_->hermetic_fallback();
+    if (want_fallback_ != requester_env_.fallback()) {
+      LOG(INFO) << trace_id_
+                << " hermetic miss "
+                << what_missing.str()
+                << ": fallback changed from "
+                << requester_env_.fallback()
+                << " to " << want_fallback_;
+    }
+  }
+}
+
+void CompileTask::StoreEmbeddedUploadInformationIfNeeded() {
+  // We save embedded upload information only if missing input size is 0.
+  // Let's consider the situation we're using cluster A and cluster B.
+  // When we send a compile request to cluster A, cluster A might report
+  // there are missing inputs. Then we retry to send a compile request.
+  // However, we might send it to another cluster B. Then cluster B might
+  // report missing input error again.
+  // So, we would like to save the embedded upload information only if
+  // missing input error did not happen.
+  // TODO: This can reduce the number of input file missing, it would
+  // still exist. After uploading a file to cluster B was succeeded, we might
+  // send another compile request to cluster A. When cluster A does not have
+  // the file cache, missing inputs error will occur.
+
+  if (resp_->missing_input_size() > 0)
+    return;
+
+  // TODO: What time should we use here?
+  const millitime_t upload_timestamp_ms = GetCurrentTimestampMs();
+
+  for (const auto& input : req_->input()) {
+    // If content does not exist, it's not embedded upload.
+    if (!input.has_content())
+      continue;
+    const std::string& abs_filename = file::JoinPathRespectAbsolute(
+        flags_->cwd(), input.filename());
+    bool new_cache_key = service_->file_hash_cache()->StoreFileCacheKey(
+        abs_filename, input.hash_key(), upload_timestamp_ms,
+        input_file_id_cache_->Get(abs_filename));
+    VLOG(1) << trace_id_
+            << " store file cache key for embedded upload: "
+            << abs_filename
+            << " : is new cache key? = " << new_cache_key;
+  }
+}
+
+// ----------------------------------------------------------------
+// state_: FILE_RESP.
+void CompileTask::SetOutputFileCallback() {
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_RESP, state_);
+  CHECK(!output_file_callback_);
+  output_file_callback_ = NewCallback(
+      this, &CompileTask::ProcessFileResponseDone);
+  num_output_file_task_ = 0;
+  output_file_success_ = true;
+}
+
+void CompileTask::CheckOutputFilename(const string& filename) {
+  CHECK_EQ(FILE_RESP, state_);
+  if (filename[0] == '/') {
+    if (HasPrefixDir(filename, service_->tmp_dir()) ||
+        HasPrefixDir(filename, "/var")) {
+      VLOG(1) << "Output to temp directory:" << filename;
+    } else if (service_->use_relative_paths_in_argv()) {
+      // If FLAGS_USE_RELATIVE_PATHS_IN_ARGV is false, output path may be
+      // absolute path specified by -o or so.
+
+      Json::Value json;
+      DumpToJson(true, &json);
+      LOG(ERROR) << trace_id_ << " " << json;
+      LOG(FATAL) << "Absolute output filename:"
+                 << filename;
+    }
+  }
+}
+
+void CompileTask::StartOutputFileTask() {
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_RESP, state_);
+  ++num_output_file_task_;
+}
+
+void CompileTask::OutputFileTaskFinished(
+    std::unique_ptr<OutputFileTask> output_file_task) {
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_RESP, state_);
+
+  DCHECK_EQ(this, output_file_task->task());
+  const ExecResult_Output& output = output_file_task->output();
+  const string& filename = output.filename();
+
+  if (abort_) {
+    output_file_success_ = false;
+    return;
+  }
+  if (!output_file_task->success()) {
+    AddErrorToResponse(TO_LOG,
+                       "Failed to write file blob:" + filename + " (" +
+                       (cache_hit() ? "cached" : "no-cached") + ")",
+                       true);
+    output_file_success_ = false;
+
+    // If it fails to write file, goma has ExecResult in cache but might
+    // lost output file.  It would be better to retry with STORE_ONLY
+    // to recreate output file and store it in cache.
+    ExecReq::CachePolicy cache_policy = req_->cache_policy();
+    if (cache_policy == ExecReq::LOOKUP_AND_STORE ||
+        cache_policy == ExecReq::LOOKUP_AND_STORE_SUCCESS) {
+      LOG(WARNING) << trace_id_
+                   << " will retry with STORE_ONLY";
+      req_->set_cache_policy(ExecReq::STORE_ONLY);
+    }
+    return;
+  }
+  int output_file_time = output_file_task->GetInMs();
+  LOG_IF(WARNING, output_file_time > 60 * 1000)
+      << trace_id_
+      << " SLOW output file:"
+      << " filename=" << filename
+      << " http_rpc=" << output_file_task->http_rpc_status().DebugString()
+      << " num_rpc=" << output_file_task->num_rpc()
+      << " in_memory=" << output_file_task->IsInMemory()
+      << " in " << output_file_time << " msec";
+  stats_->add_output_file_time(output_file_time);
+  LOG_IF(WARNING,
+         output.blob().blob_type() != FileBlob::FILE &&
+         output.blob().blob_type() != FileBlob::FILE_META)
+      << "Invalid blob type: " << output.blob().blob_type();
+  stats_->add_output_file_size(output.blob().file_size());
+  stats_->output_file_rpc += output_file_task->num_rpc();
+  const HttpRPC::Status& http_rpc_status =
+      output_file_task->http_rpc_status();
+  stats_->add_chunk_resp_size(http_rpc_status.resp_size);
+  stats_->output_file_rpc_req_build_time += http_rpc_status.req_build_time;
+  stats_->output_file_rpc_req_send_time += http_rpc_status.req_send_time;
+  stats_->output_file_rpc_wait_time += http_rpc_status.wait_time;
+  stats_->output_file_rpc_resp_recv_time += http_rpc_status.resp_recv_time;
+  stats_->output_file_rpc_resp_parse_time += http_rpc_status.resp_parse_time;
+  stats_->output_file_rpc_size += http_rpc_status.resp_size;
+  stats_->output_file_rpc_raw_size += http_rpc_status.raw_resp_size;
+}
+
+void CompileTask::MaybeRunOutputFileCallback(int index, bool task_finished) {
+  CHECK(BelongsToCurrentThread());
+  CHECK_EQ(FILE_RESP, state_);
+  OneshotClosure* closure = nullptr;
+  if (task_finished) {
+    DCHECK_NE(-1, index);
+    // Once output.blob has been written on disk, we don't need it
+    // any more.
+    resp_->mutable_result()->mutable_output(index)->clear_blob();
+    --num_output_file_task_;
+    if (num_output_file_task_ > 0)
+      return;
+  } else {
+    CHECK_EQ(-1, index);
+  }
+  CHECK_EQ(0, num_output_file_task_);
+  if (output_file_callback_) {
+    closure = output_file_callback_;
+    output_file_callback_ = nullptr;
+  }
+  if (closure)
+    closure->Run();
+}
+
+bool CompileTask::VerifyOutput(
+    const string& local_output_path,
+    const string& goma_output_path) {
+  CHECK_EQ(FILE_RESP, state_);
+  LOG(INFO) << "Verify Output: "
+            << " local:" << local_output_path
+            << " goma:" << goma_output_path;
+  std::ostringstream error_message;
+  static const int kSize = 1024;
+  char local_buf[kSize];
+  char goma_buf[kSize];
+  ScopedFd local_fd(ScopedFd::OpenForRead(local_output_path));
+  if (!local_fd.valid()) {
+    error_message << "Not found: local file:" << local_output_path;
+    AddErrorToResponse(TO_USER, error_message.str(), true);
+    return false;
+  }
+  ScopedFd goma_fd(ScopedFd::OpenForRead(goma_output_path));
+  if (!goma_fd.valid()) {
+    error_message << "Not found: goma file:" << goma_output_path;
+    AddErrorToResponse(TO_USER, error_message.str(), true);
+    return false;
+  }
+  int local_len;
+  int goma_len;
+  for (size_t len = 0; ; len += local_len) {
+    local_len = local_fd.Read(local_buf, kSize);
+    if (local_len < 0) {
+      error_message << "read error local:" << local_output_path
+                    << " @" << len << " " << GetLastErrorMessage();
+      AddErrorToResponse(TO_USER, error_message.str(), true);
+      return false;
+    }
+    goma_len = goma_fd.Read(goma_buf, kSize);
+    if (goma_len < 0) {
+      error_message << "read error goma:" << goma_output_path
+                    << " @" << len << " " << GetLastErrorMessage();
+      AddErrorToResponse(TO_USER, error_message.str(), true);
+      return false;
+    }
+    if (local_len != goma_len) {
+      error_message << "read len: " << local_len << "!=" << goma_len
+                    << " " << local_output_path << " @" << len;
+      AddErrorToResponse(TO_USER, error_message.str(), true);
+      return false;
+    }
+    if (local_len == 0) {
+      LOG(INFO) << trace_id_
+                << " Verify OK: " << local_output_path
+                << " size=" << len;
+      return true;
+    }
+    if (memcmp(local_buf, goma_buf, local_len) != 0) {
+      error_message << "output mismatch: "
+                    << " local:" << local_output_path
+                    << " goma:" << goma_output_path
+                    << " @[" << len << "," <<  local_len << ")";
+      AddErrorToResponse(TO_USER, error_message.str(), true);
+      return false;
+    }
+    VLOG(2) << "len:" << len << "+" << local_len;
+  }
+  return true;
+}
+
+void CompileTask::ClearOutputFile() {
+  for (auto& iter : output_file_) {
+    if (!iter.content.empty()) {
+      LOG(INFO) << trace_id_ << " clear output, but content is not empty";
+      service_->ReleaseOutputBuffer(iter.size, &iter.content);
+      continue;
+    }
+    // Remove if we wrote tmp file for the output.
+    // Don't remove filename, which is the actual output filename,
+    // and local run might have output to the file.
+    const string& filename = iter.filename;
+    const string& tmp_filename = iter.tmp_filename;
+    if (!tmp_filename.empty() && tmp_filename != filename) {
+      remove(tmp_filename.c_str());
+    }
+  }
+  output_file_.clear();
+}
+
+// ----------------------------------------------------------------
+// local run finished.
+void CompileTask::SetLocalOutputFileCallback() {
+  CHECK(BelongsToCurrentThread());
+  CHECK(!local_output_file_callback_);
+  local_output_file_callback_ = NewCallback(
+      this, &CompileTask::ProcessLocalFileOutputDone);
+  num_local_output_file_task_ = 0;
+}
+
+void CompileTask::StartLocalOutputFileTask() {
+  CHECK(BelongsToCurrentThread());
+  ++num_local_output_file_task_;
+}
+
+void CompileTask::LocalOutputFileTaskFinished(
+    std::unique_ptr<LocalOutputFileTask> local_output_file_task) {
+  CHECK(BelongsToCurrentThread());
+
+  DCHECK_EQ(this, local_output_file_task->task());
+  const string& filename = local_output_file_task->filename();
+  if (!local_output_file_task->success()) {
+    LOG(WARNING) << trace_id_
+                 << " Create file blob failed for local output:" << filename;
+    return;
+  }
+  const FileBlob& blob = local_output_file_task->blob();
+  stats_->add_local_output_file_time(local_output_file_task->GetInMs());
+  stats_->add_local_output_file_size(blob.file_size());
+}
+
+void CompileTask::MaybeRunLocalOutputFileCallback(bool task_finished) {
+  CHECK(BelongsToCurrentThread());
+  OneshotClosure* closure = nullptr;
+  if (task_finished) {
+    --num_local_output_file_task_;
+    if (num_local_output_file_task_ > 0)
+      return;
+  }
+  CHECK_EQ(0, num_local_output_file_task_);
+  if (local_output_file_callback_) {
+    closure = local_output_file_callback_;
+    local_output_file_callback_ = nullptr;
+  }
+  if (closure)
+    closure->Run();
+}
+
+// ----------------------------------------------------------------
+// state_: FINISHED/LOCAL_FINISHED or abort_
+void CompileTask::UpdateStats() {
+  CHECK(state_ >= FINISHED || abort_);
+
+  resp_->set_compiler_proxy_time(handler_timer_.GetInMs() / 1000.0);
+  resp_->set_compiler_proxy_include_preproc_time(
+      stats_->include_preprocess_time() / 1000.0);
+  resp_->set_compiler_proxy_include_fileload_time(
+      stats_->include_fileload_time() / 1000.0);
+  resp_->set_compiler_proxy_rpc_call_time(
+      SumRepeatedInt32(stats_->rpc_call_time()) / 1000.0);
+  resp_->set_compiler_proxy_file_response_time(
+      stats_->file_response_time() / 1000.0);
+  resp_->set_compiler_proxy_rpc_build_time(
+      SumRepeatedInt32(stats_->rpc_req_build_time()) / 1000.0);
+  resp_->set_compiler_proxy_rpc_send_time(
+      SumRepeatedInt32(stats_->rpc_req_send_time()) / 1000.0);
+  resp_->set_compiler_proxy_rpc_wait_time(
+      SumRepeatedInt32(stats_->rpc_wait_time()) / 1000.0);
+  resp_->set_compiler_proxy_rpc_recv_time(
+      SumRepeatedInt32(stats_->rpc_resp_recv_time()) / 1000.0);
+  resp_->set_compiler_proxy_rpc_parse_time(
+      SumRepeatedInt32(stats_->rpc_resp_parse_time()) / 1000.0);
+
+  resp_->set_compiler_proxy_local_pending_time(
+      stats_->local_pending_time() / 1000.0);
+  resp_->set_compiler_proxy_local_run_time(stats_->local_run_time() / 1000.0);
+
+  // TODO: similar logic found in CompileService::CompileTaskDone, so
+  // it would be better to be merged.  Note that ExecResp are not available
+  // in CompileService::CompileTaskDone.
+  switch (state_) {
+    case FINISHED:
+      resp_->set_compiler_proxy_goma_finished(true);
+      if (stats_->cache_hit())
+        resp_->set_compiler_proxy_goma_cache_hit(true);
+      break;
+    case LOCAL_FINISHED:
+      resp_->set_compiler_proxy_local_finished(true);
+      break;
+    default:
+      resp_->set_compiler_proxy_goma_aborted(true);
+      break;
+  }
+  if (stats_->goma_error())
+    resp_->set_compiler_proxy_goma_error(true);
+  if (local_run_)
+    resp_->set_compiler_proxy_local_run(true);
+  if (local_killed_)
+    resp_->set_compiler_proxy_local_killed(true);
+
+  resp_->set_compiler_proxy_exec_request_retry(
+      stats_->exec_request_retry());
+}
+
+void CompileTask::SaveInfoFromInputOutput() {
+  DCHECK(BelongsToCurrentThread());
+  CHECK(state_ >= FINISHED || abort_);
+  CHECK(req_.get());
+  CHECK(resp_.get());
+  CHECK(!exec_resp_.get());
+
+  if (failed() || fail_fallback_) {
+    if (!fail_fallback_) {
+      // if fail fallback, we already stored remote outputs in stdout_ and
+      // stderr_, and resp_ becomes local process output.
+      stdout_ = resp_->result().stdout_buffer();
+      stderr_ = resp_->result().stderr_buffer();
+    }
+  }
+  req_.reset();
+  resp_.reset();
+  flags_.reset();
+  input_file_id_cache_.reset();
+  output_file_id_cache_.reset();
+}
+
+// ----------------------------------------------------------------
+// subprocess handling.
+void CompileTask::SetupSubProcess() {
+  VLOG(1) << trace_id_ << " SetupSubProcess "
+          << SubProcessReq::Weight_Name(subproc_weight_);
+  CHECK(BelongsToCurrentThread());
+  CHECK(subproc_ == nullptr) << trace_id_ << " " << StateName(state_)
+                          << " pid=" << subproc_->started().pid()
+                          << stats_->local_run_reason();
+  CHECK(!req_->command_spec().local_compiler_path().empty())
+      << req_->DebugString();
+  if (delayed_setup_subproc_ != nullptr) {
+    delayed_setup_subproc_->Cancel();
+    delayed_setup_subproc_ = nullptr;
+  }
+
+  std::vector<const char*> argv;
+  argv.push_back(req_->command_spec().local_compiler_path().c_str());
+  for (int i = 1; i < stats_->arg_size(); ++i) {
+    argv.push_back(stats_->arg(i).c_str());
+  }
+  argv.push_back(nullptr);
+
+  subproc_ = new SubProcessTask(
+      trace_id_,
+      req_->command_spec().local_compiler_path().c_str(),
+      const_cast<char**>(&argv[0]));
+  SubProcessReq* req = subproc_->mutable_req();
+  req->set_cwd(req_->cwd());
+  if (requester_env_.has_umask()) {
+    req->set_umask(requester_env_.umask());
+  }
+  if (flags_->is_gcc()) {
+    const GCCFlags& gcc_flag = static_cast<const GCCFlags&>(*flags_);
+    if (gcc_flag.is_stdin_input()) {
+      CHECK_GE(req_->input_size(), 1) << req_->DebugString();
+      req->set_stdin_filename(req_->input(0).filename());
+    }
+  } else if (flags_->is_vc()) {
+    // TODO: handle input is stdin case for VC++?
+  }
+  {
+    std::ostringstream filenamebuf;
+    filenamebuf << "gomacc." << id_ << ".out";
+    subproc_stdout_ = file::JoinPath(service_->tmp_dir(), filenamebuf.str());
+    req->set_stdout_filename(subproc_stdout_);
+  }
+  {
+    std::ostringstream filenamebuf;
+    filenamebuf << "gomacc." << id_ << ".err";
+    subproc_stderr_ = file::JoinPath(service_->tmp_dir(), filenamebuf.str());
+    req->set_stderr_filename(subproc_stderr_);
+  }
+  for (const auto& env : stats_->env()) {
+    req->add_env(env);
+  }
+  if (local_path_.empty()) {
+    LOG(WARNING) << "Empty PATH: " << req_->DebugString();
+  } else {
+    req->add_env("PATH=" + local_path_);
+  }
+#ifdef _WIN32
+  req->add_env("TMP=" + service_->tmp_dir());
+  req->add_env("TEMP=" + service_->tmp_dir());
+  if (pathext_.empty()) {
+    LOG(WARNING) << "Empty PATHEXT: " << req_->DebugString();
+  } else {
+    req->add_env("PATHEXT=" + pathext_);
+  }
+#endif
+
+  req->set_weight(subproc_weight_);
+  subproc_->Start(
+      NewCallback(
+          this,
+          &CompileTask::FinishSubProcess));
+}
+
+void CompileTask::RunSubProcess(const string& reason) {
+  VLOG(1) << trace_id_ << " RunSubProcess " << reason;
+  CHECK(!abort_);
+  if (subproc_ == nullptr) {
+    LOG(WARNING) << trace_id_ << " subproc already finished.";
+    return;
+  }
+  stats_->set_local_run_reason(reason);
+  subproc_->RequestRun();
+  VLOG(1) << "Run " << reason << " " << subproc_->req().DebugString();
+}
+
+void CompileTask::KillSubProcess() {
+  // TODO: support the case subprocess is killed by FAIL_FAST.
+  VLOG(1) << trace_id_ << " KillSubProcess";
+  CHECK(subproc_ != nullptr);
+  SubProcessState::State state = subproc_->state();
+  local_killed_ = subproc_->Kill();  // Will call FinishSubProcess().
+  VLOG(1) << trace_id_ << " kill pid=" << subproc_->started().pid()
+          << " " << local_killed_
+          << " " << SubProcessState::State_Name(state)
+          << "->" << SubProcessState::State_Name(subproc_->state());
+  if (local_killed_) {
+    if (service_->dont_kill_subprocess()) {
+      stats_->set_local_run_reason("fast goma, but wait for local.");
+    } else {
+      stats_->set_local_run_reason("killed by fast goma");
+    }
+  } else if (subproc_->started().pid() != SubProcessState::kInvalidPid) {
+    // subproc was signaled but not waited yet.
+    stats_->set_local_run_reason("fast goma, local signaled");
+  } else {
+    // subproc was initialized, but not yet started.
+    stats_->set_local_run_reason("fast goma, local not started");
+  }
+}
+
+void CompileTask::FinishSubProcess() {
+  VLOG(1) << trace_id_ << " FinishSubProcess";
+  CHECK(BelongsToCurrentThread());
+  CHECK(!abort_);
+  SubProcessTask* subproc = nullptr;
+  {
+    AUTOLOCK(lock, &mu_);
+    subproc = subproc_;
+    subproc_ = nullptr;
+  }
+  CHECK(subproc);
+
+  LOG(INFO) << trace_id_ << " finished subprocess."
+            << " pid=" << subproc->started().pid()
+            << " status=" << subproc->terminated().status()
+            << " pending_ms=" << subproc->started().pending_ms()
+            << " run_ms=" << subproc->terminated().run_ms()
+            << " mem_kb=" << subproc->terminated().mem_kb()
+            << " local_killed=" << local_killed_;
+
+  bool local_run_failed = false;
+  bool local_run_goma_failure = false;
+  if (subproc->started().pid() != SubProcessState::kInvalidPid) {
+    local_run_ = true;
+    if (!local_killed_) {
+      subproc_exit_status_ = subproc->terminated().status();
+      // something failed after start of subproc. e.g. kill failed.
+      if (subproc_exit_status_ < 0) {
+        stats_->set_compiler_proxy_error(true);
+        LOG(ERROR) << trace_id_ << " subproc exec failure by goma"
+                   << " pid=" << subproc->started().pid()
+                   << " status=" << subproc_exit_status_
+                   << " error=" << SubProcessTerminated_ErrorTerminate_Name(
+                       subproc->terminated().error());
+        local_run_goma_failure = true;
+      }
+      if (subproc_exit_status_ != 0) {
+        local_run_failed = true;
+      }
+    }
+    stats_->set_local_pending_time(subproc->started().pending_ms());
+    stats_->set_local_run_time(subproc->terminated().run_ms());
+    stats_->set_local_mem_kb(subproc->terminated().mem_kb());
+    VLOG(1) << trace_id_ << " subproc finished"
+            << " pid=" << subproc->started().pid();
+  } else {
+    // pid is kInvalidPid
+    if (subproc->terminated().status() ==
+        SubProcessTerminated::kInternalError) {
+      std::ostringstream ss;
+      ss << "failed to run compiler locally."
+         << " pid=" << subproc->started().pid()
+         << " error=" << SubProcessTerminated_ErrorTerminate_Name(
+             subproc->terminated().error())
+         << " status=" << subproc->terminated().status();
+      AddErrorToResponse(TO_USER, ss.str(), true);
+      local_run_failed = true;
+      local_run_goma_failure = true;
+    }
+  }
+
+  if (state_ == FINISHED && !fail_fallback_) {
+    ProcessReply();
+    return;
+  }
+
+  // This subprocess would be
+  // - gch hack (state_ < FINISHED, goma service was slower than local).
+  // - verify output. (state_ == INIT) -> SETUP
+  // - should fallback. (state_ == INIT) -> LOCAL_FINISHED.
+  // - fail fallback. (state_ = FINISHED, fail_fallback_ == true)
+  // - fallback only (state_ == LOCAL_RUN)
+  // - idle fallback (state_ < FINISHED, goma service was slower than local).
+  //   - might be killed because gomacc closed the ipc.
+  string orig_stdout = resp_->result().stdout_buffer();
+  string orig_stderr = resp_->result().stderr_buffer();
+
+  CHECK(resp_.get() != nullptr) << trace_id_ << " state=" << state_;
+  ExecResult* result = resp_->mutable_result();
+  CHECK(result != nullptr) << trace_id_ << " state=" << state_;
+  if (fail_fallback_ && local_run_ &&
+      result->exit_status() != subproc->terminated().status())
+    stats_->set_goma_error(true);
+  result->set_exit_status(subproc->terminated().status());
+  if (result->exit_status() == 0) {
+    resp_->clear_error_message();
+  }
+  if (subproc->terminated().has_term_signal()) {
+    std::ostringstream ss;
+    ss << "child process exited unexpectedly with signal."
+       << " signal=" << subproc->terminated().term_signal();
+    exec_error_message_.push_back(ss.str());
+    CHECK(result->exit_status() != 0)
+        << trace_id_ << " if term signal is not 0, exit status must not be 0."
+        << ss.str();
+  }
+
+  string stdout_buffer;
+  CHECK(!subproc_stdout_.empty()) << trace_id_ << " state=" << state_;
+  ReadFileToString(subproc_stdout_.c_str(), &stdout_buffer);
+  remove(subproc_stdout_.c_str());
+  if (fail_fallback_ && local_run_ && orig_stdout != stdout_buffer)
+    stats_->set_goma_error(true);
+  result->set_stdout_buffer(stdout_buffer);
+
+  string stderr_buffer;
+  CHECK(!subproc_stderr_.empty()) << trace_id_ << " state=" << state_;
+  ReadFileToString(subproc_stderr_.c_str(), &stderr_buffer);
+  remove(subproc_stderr_.c_str());
+  if (fail_fallback_ && local_run_ && orig_stderr != stderr_buffer)
+    stats_->set_goma_error(true);
+  result->set_stderr_buffer(stderr_buffer);
+
+  if (verify_output_) {
+    CHECK_EQ(INIT, state_);
+    // local runs done, start remote.
+    ProcessSetup();
+    return;
+  }
+
+  if (precompiling_ && service_->enable_gch_hack()) {
+    CHECK_LT(state_, FINISHED) << trace_id_ << " finish subproc";
+    CHECK(subproc_ == nullptr) << trace_id_ << " finish subproc";
+    // local runs done, not yet goma.
+    return;
+  }
+
+  // Upload output files asynchronously, so that these files could be
+  // used in link phrase.
+  if (!local_run_failed) {
+    ProcessLocalFileOutput();
+    // The callback must be called asynchronously.
+    if (service_->store_local_run_output())
+      CHECK(local_output_file_callback_ != nullptr);
+  }
+  if (should_fallback_) {
+    CHECK_EQ(INIT, state_);
+    state_ = LOCAL_FINISHED;
+    finished_ = true;
+    // reply fallback response.
+    VLOG(2) << trace_id_ << " should fallback:" << resp_->DebugString();
+    if (!local_run_failed) {
+      ReplyResponse("should fallback");
+    } else {
+      ReplyResponse("should fallback but local run failed");
+    }
+    return;
+  }
+  if (fail_fallback_) {
+    CHECK_EQ(FINISHED, state_);
+    VLOG(2) << trace_id_ << " fail fallback:" << resp_->DebugString();
+    if (!local_run_failed) {
+      ReplyResponse("fail fallback");
+    } else {
+      // If both remote and local failed, it is a real compile failure.
+      // We must not preserve goma's error message then. (b/27889459)
+      resp_->clear_error_message();
+      ReplyResponse("fail fallback and local run also failed");
+    }
+    return;
+  }
+  if (state_ == LOCAL_RUN) {
+    VLOG(2) << trace_id_ << " local run finished:" << resp_->DebugString();
+    state_ = LOCAL_FINISHED;
+    finished_ = true;
+    if (!local_run_goma_failure) {
+      resp_->clear_error_message();
+    }
+    ReplyResponse("local finish, no goma");
+    // TODO: restart from the beginning.
+    // Since no remote compile is running here, it is nice to start remote
+    // compile in this case.  However, let me postpone the implementation
+    // until I understand procedure of CompileTask well.
+    return;
+  }
+  // otherwise, local finishes earlier than remote, or setup.
+  if (!local_run_goma_failure) {
+    abort_ = true;
+    VLOG(2) << trace_id_ << " idle fallback:" << resp_->DebugString();
+    resp_->clear_error_message();
+    ReplyResponse("local finish, abort goma");
+    return;
+  }
+  // In this case, remote should be running and we expect that success.
+  LOG(INFO) << trace_id_ << " local compile failed because of goma."
+            << " waiting for remote result.";
+}
+
+// ----------------------------------------------------------------
+
+bool CompileTask::failed() const {
+  return stats_->exec_exit_status() != 0;
+}
+
+bool CompileTask::canceled() const {
+  return canceled_;
+}
+
+bool CompileTask::cache_hit() const {
+  return stats_->cache_hit();
+}
+
+bool CompileTask::local_cache_hit() const {
+  return localoutputcache_lookup_succeeded_;
+}
+
+void CompileTask::AddErrorToResponse(
+    ErrDest dest, const string& error_message, bool set_error) {
+  if (!error_message.empty()) {
+    if (set_error)
+      LOG(ERROR) << trace_id_ << " " << error_message;
+    else
+      LOG(WARNING) << trace_id_ << " " << error_message;
+    std::ostringstream msg;
+    msg << "compiler_proxy:";
+    msg << handler_timer_.GetInMs() << "ms: ";
+    msg << error_message;
+    if (dest == TO_USER) {
+      DCHECK(set_error) << trace_id_
+                        << " user error should always set error."
+                        << " msg=" << error_message;
+      resp_->add_error_message(msg.str());
+    } else {
+      service_->RecordErrorToLog(error_message, set_error);
+    }
+    exec_error_message_.push_back(msg.str());
+  }
+  if (set_error &&
+      (!resp_->has_result() || resp_->result().exit_status() == 0)) {
+    resp_->mutable_result()->set_exit_status(1);
+  }
+}
+
+void CompileTask::DumpRequest() const {
+  if (frozen_timestamp_ms_ == 0) {
+    LOG(ERROR) << trace_id_ << " DumpRequest called on active task";
+    return;
+  }
+  LOG(INFO) << trace_id_ << " DumpRequest";
+  string filename = "exec_req.data";
+  ExecReq req;
+  CommandSpec* command_spec = req.mutable_command_spec();
+  *command_spec = command_spec_;
+  command_spec->set_local_compiler_path(local_compiler_path_);
+  if (compiler_info_state_.get() != nullptr) {
+    const CompilerInfo& compiler_info = compiler_info_state_.get()->info();
+    std::vector<string> args(stats_->arg().begin(), stats_->arg().end());
+    std::unique_ptr<CompilerFlags> flags(
+        CompilerFlags::New(args, stats_->cwd()));
+    FixCommandSpec(compiler_info, *flags, command_spec);
+    FixSystemLibraryPath(system_library_paths_, command_spec);
+    MayFixSubprogramSpec(req.mutable_subprogram());
+  } else {
+    // If compiler_info_state_ is nullptr, it would be should_fallback_.
+    LOG_IF(ERROR, !should_fallback_)
+        << trace_id_ << " DumpRequest compiler_info_state_ is nullptr.";
+    filename = "local_exec_req.data";
+  }
+
+  for (const auto& arg : stats_->arg())
+    req.add_arg(arg);
+  for (const auto& env : stats_->env())
+    req.add_env(env);
+  for (const auto& expanded_arg : stats_->expanded_arg())
+    req.add_expanded_arg(expanded_arg);
+  req.set_cwd(stats_->cwd());
+  *req.mutable_requester_info() = requester_info_;
+
+  std::ostringstream ss;
+  ss << "task_request_" << id_;
+  const string task_request_dir = file::JoinPath(service_->tmp_dir(), ss.str());
+  RecursivelyDelete(task_request_dir);
+#ifndef _WIN32
+  PCHECK(mkdir(task_request_dir.c_str(), 0755) == 0);
+#else
+  if (!CreateDirectoryA(task_request_dir.c_str(), nullptr)) {
+    DWORD err = GetLastError();
+    LOG_SYSRESULT(err);
+    LOG_IF(FATAL, FAILED(err)) << "CreateDirectoryA " << task_request_dir;
+  }
+#endif
+
+  for (const auto& input_filename : required_files_) {
+    ExecReq_Input* input = req.add_input();
+    input->set_filename(input_filename);
+    FileServiceDumpClient fs;
+    if (!fs.CreateFileBlob(input_filename, true, input->mutable_content())) {
+      LOG(ERROR) << trace_id_ << " DumpRequest failed to create fileblob:"
+                 << input_filename;
+    } else {
+      input->set_hash_key(FileServiceClient::ComputeHashKey(input->content()));
+      if (!fs.Dump(file::JoinPath(task_request_dir, input->hash_key()))) {
+        LOG(ERROR) << trace_id_ << " DumpRequest failed to store fileblob:"
+                   << input_filename
+                   << " hash:" << input->hash_key();
+      }
+    }
+  }
+  string r;
+  req.SerializeToString(&r);
+  filename = file::JoinPath(task_request_dir, filename);
+  if (!WriteStringToFile(r, filename)) {
+    LOG(ERROR) << trace_id_ << " DumpRequest failed to write: " << filename;
+  } else {
+    LOG(INFO) << trace_id_ << " DumpRequest wrote serialized proto: "
+              << filename;
+  }
+
+  // Only show file hash for text_format.
+  for (auto& input : *req.mutable_input()) {
+    input.clear_content();
+  }
+
+  string text_req;
+  google::protobuf::TextFormat::PrintToString(req, &text_req);
+  filename += ".txt";
+  if (!WriteStringToFile(text_req, filename)) {
+    LOG(ERROR) << trace_id_ << " DumpRequest failed to write: " << filename;
+  } else {
+    LOG(INFO) << trace_id_ << " DumpRequest wrote text proto: " << filename;
+  }
+
+  LOG(INFO) << trace_id_ << " DumpRequest done";
+}
+
+}  // namespace devtools_goma
diff --git a/client/compile_task.h b/client/compile_task.h
new file mode 100644
index 0000000..81eb608
--- /dev/null
+++ b/client/compile_task.h
@@ -0,0 +1,488 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_COMPILE_TASK_H_
+#define DEVTOOLS_GOMA_CLIENT_COMPILE_TASK_H_
+
+#include <deque>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <json/json.h>
+
+#include "basictypes.h"
+#include "compiler_info.h"
+#include "compiler_specific.h"
+#include "compile_service.h"
+#include "deps_cache.h"
+#include "file_id.h"
+#include "file_id_cache.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "google/protobuf/repeated_field.h"
+MSVC_POP_WARNING()
+#include "http_rpc.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+#include "prototmp/subprocess.pb.h"
+MSVC_POP_WARNING()
+#include "simple_timer.h"
+#include "subprocess_task.h"
+#include "threadpool_http_server.h"
+#include "timestamp.h"
+#include "unordered.h"
+
+namespace devtools_goma {
+
+class Closure;
+class CompileStats;
+class CompilerFlags;
+class CompilerProxyHistogram;
+
+// CompileTask handles single compile request from gomacc.
+// It basically runs on the same thread it is created, but InputFileTask and
+// OutputFileTask would run on other threads.
+// Note that DumpToString() may be called on other threads.
+class CompileTask {
+ public:
+  enum State {
+    // running state, or state at response sent if abort_ is true.
+    INIT,  // Initialize the task.  Run subprocess for fast fallback/verify.
+    SETUP,  // Setup the request. Run include processor.
+    FILE_REQ,  // Upload input files.
+    CALL_EXEC,  // Call Exec request.
+    LOCAL_OUTPUT,  // LocalOutputCache lookup succeeded.
+    FILE_RESP,  // Download output files.
+    // finished state:  response sent.
+    FINISHED,  // Finished.
+    LOCAL_RUN,  // Local run started and didn't issue goma call.
+    LOCAL_FINISHED,  // Local run finished for fast fallback.
+    NUM_STATE,
+  };
+  CompileTask(CompileService* service, int id);
+
+  void Ref();
+  void Deref();
+
+  // Task ID, a serial number.
+  int id() const { return id_; }
+  const string& trace_id() const { return trace_id_; }
+
+  // Inits CompileTask.
+  // It takes ownership of rpc, req, resp and done.
+  void Init(CompileService::RpcController* rpc,
+            const ExecReq* req,
+            ExecResp* resp,
+            OneshotClosure* done);
+
+  // It will run on other thread than current thread, but done closure will
+  // be called on the same thread where this method was called.
+  void Start();
+
+  bool failed() const;
+  bool canceled() const;
+  bool abort() const { return abort_; }
+  bool local_run() const { return local_run_; }
+  bool local_killed() const { return local_killed_; }
+  bool fail_fallback() const { return fail_fallback_; }
+  bool cache_hit() const;
+  bool local_cache_hit() const;
+
+  State state() const { return state_; }
+
+  const CompileStats& stats() const { return *stats_; }
+  CompileStats* mutable_stats() { return stats_.get(); }
+
+  void DumpToJson(bool need_detail, Json::Value* root) const;
+
+  // DumpRequest is called on finished task.
+  void DumpRequest() const;
+
+  void SetFrozenTimestampMs(millitime_t frozen_timestamp_ms) {
+    frozen_timestamp_ms_ = frozen_timestamp_ms;
+  }
+  millitime_t GetFrozenTimestampMs() const { return frozen_timestamp_ms_; }
+  millitime_t GetLastReqTimestampMs() const { return last_req_timestamp_ms_; }
+
+ private:
+  friend class CompileTaskTest;
+  enum ErrDest {
+    // To log: write in log file, and show on status page.
+    TO_LOG,
+    // To user: may send back to gomacc, so user will see the message.
+    // including TO_LOG.
+    TO_USER,
+  };
+  class InputFileTask;
+  class OutputFileTask;
+  struct OutputFileInfo;
+  class LocalOutputFileTask;
+  friend class InputFileTask;
+  friend class OutputFileTask;
+  friend class LocalOutputFileTask;
+  friend class CompilerProxyHistogram;
+  struct RenameParam;
+  struct ContentOutputParam;
+  struct RunIncludeProcessorParam;
+  struct RunLinkerInputProcessorParam;
+  struct RunJarParserParam;
+
+  ~CompileTask();
+
+  bool BelongsToCurrentThread() const;
+
+  bool success() const { return resp_->result().exit_status() == 0; }
+  bool IsGomaccRunning();
+  // Notified from http server request of gomacc when the goma ipc is closed.
+  void GomaccClosed();
+
+  bool IsSubprocRunning() const;
+
+  // Copies env and requester env from req_, and clear requester env
+  // from req_.
+  void CopyEnvFromRequest();
+  string GenerateCompilerProxyId() const;
+
+  // validate local compiler path.
+  static bool IsLocalCompilerPathValid(
+      const string& trace_id,
+      const ExecReq& req, const CompilerFlags* flags);
+
+  // Remove duplicate filepath from |filenames|
+  // for files normalized by JoinPathRepectAbsolute with |cwd|.
+  // Relative path is taken in high priority.
+  static void RemoveDuplicateFiles(const std::string& cwd,
+                                   std::set<std::string>* filenames);
+
+  // Initializes compiler flags from the request.
+  void InitCompilerFlags();
+
+  // Finds local compiler path from the request.
+  // Updates req_->command_spec().local_compiler_path() and local_path_.
+  bool FindLocalCompilerPath();
+
+  // Checks if we should fallback the request.
+  bool ShouldFallback() const;
+
+  // Checks if we should verify output.
+  bool ShouldVerifyOutput() const;
+
+  // Gets task weight.
+  SubProcessReq::Weight GetTaskWeight() const;
+
+  // Checks if we should stop goma and use local run only.
+  bool ShouldStopGoma() const;
+
+  // Sets up goma request. (e.g include processor).
+  // state_: INIT -> SETUP
+  void ProcessSetup();
+
+  // Processes file request. (runs InputFileTasks).
+  // state_: SETUP -> FILE_REQ
+  void TryProcessFileRequest();
+  void ProcessFileRequest();
+  void ProcessFileRequestDone();
+  void ProcessPendingFileRequest();
+
+  // state_: FILE_REQ -> CALL_EXEC (call Exec service).
+  void ProcessCallExec();
+  void ProcessCallExecDone();
+
+  // state_: CALL_EXEC -> FILE_RESP (runs OutputFileTasks).
+  void ProcessFileResponse();
+  void ProcessFileResponseDone();
+
+  // state_: -> FINISHED or abort_. (ready to send response)
+  // finished_ becomes true.
+  // joins in FinishSubProcess if subproc_ is active.
+  void ProcessFinished(const string& msg);
+
+  // Replies with goma result.
+  // state_: FINISHED && !abort_ && subprocess has been finished.
+  void ProcessReply();
+
+  // state_: !abort_, FINISHED.
+  // If use_remote is true, it renames remote outputs to real outputs.
+  // If use_remote is false, it just remove remote outputs.
+  void CommitOutput(bool use_remote);
+
+  // DoOutput will run closure to output in filename.
+  // It doesn't take ownership of closure and err.
+  // It is expected that closure will set error in *err, and
+  // after calling closure, err->empty() means success, otherwise failure.
+  // On Windows, it will retry several times, so closure must be permanent
+  // callback.
+  void DoOutput(const string& opname, const string& filename,
+                PermanentClosure* closure, string* err);
+  void RenameCallback(RenameParam* param, string* err);
+  void ContentOutputCallback(ContentOutputParam* param, string* err);
+
+  // If file is coff file, rewrite timestamp to the current time.
+  void RewriteCoffTimestamp(const string& filename);
+
+  // state_: FINISHED/LOCAL_FINISHED or abort_.
+  void ReplyResponse(const string& msg);
+
+  void ProcessLocalFileOutput();
+  void ProcessLocalFileOutputDone();
+
+  // Saves stats, clears proto messages and calls CompileTaskDone to make
+  // this CompileTask expired.
+  void Done();
+
+  // Methods used in state_: SETUP
+  void FillCompilerInfo();
+  void FillCompilerInfoDone(
+      std::unique_ptr<CompileService::GetCompilerInfoParam> param);
+#ifndef _WIN32
+  bool MakeWeakRelativeInArgv();
+#endif
+  void UpdateExpandedArgs();
+  void ModifyRequestArgs();
+  void ModifyRequestEnvs();
+  void UpdateCommandSpec();
+  // Updates SubprogramSpec if send_subprogram_spec is enabled.
+  void MayUpdateSubprogramSpec();
+  // Fix SubprogramSpec if send_subprogram_spec is enabled.
+  void MayFixSubprogramSpec(
+      google::protobuf::RepeatedPtrField<SubprogramSpec>* subprogram_specs)
+          const;
+  void UpdateRequiredFiles();
+  void GetIncludeFiles();
+  void RunIncludeProcessor(std::unique_ptr<RunIncludeProcessorParam> param);
+  void RunIncludeProcessorDone(std::unique_ptr<RunIncludeProcessorParam> param);
+  void GetLinkRequiredFiles();
+  void RunLinkerInputProcessor(
+      std::unique_ptr<RunLinkerInputProcessorParam> param);
+  void RunLinkerInputProcessorDone(
+      std::unique_ptr<RunLinkerInputProcessorParam> param);
+  void GetJavaRequiredFiles();
+  void RunJarParser(std::unique_ptr<RunJarParserParam> param);
+  void RunJarParserDone(std::unique_ptr<RunJarParserParam> param);
+  void UpdateRequiredFilesDone(bool ok);
+  void SetupRequestDone(bool ok);
+
+  // Methods used state_: FILE_REQ
+  void SetInputFileCallback();
+  void StartInputFileTask();
+  void InputFileTaskFinished(InputFileTask* input_file_task);
+  void MaybeRunInputFileCallback(bool task_finished);
+
+  // Methods used in state_: CALL_EXEC
+  void CheckCommandSpec();
+  void CheckNoMatchingCommandSpec(const string& retry_reason);
+  void StoreEmbeddedUploadInformationIfNeeded();
+
+  // Methods used in state_: FILE_RESP
+  void SetOutputFileCallback();
+  void CheckOutputFilename(const string& filename);
+  void StartOutputFileTask();
+  void OutputFileTaskFinished(std::unique_ptr<OutputFileTask> output_file_task);
+  void MaybeRunOutputFileCallback(int index, bool task_finished);
+  bool VerifyOutput(const string& local_output_path,
+                    const string& goma_output_path);
+  void ClearOutputFile();
+
+  // Methods used in state_: fail_fallback_, LOCAL_FINISHED or abort_
+  // (after local run finished)
+  void SetLocalOutputFileCallback();
+  void StartLocalOutputFileTask();
+  void LocalOutputFileTaskFinished(
+      std::unique_ptr<LocalOutputFileTask> local_output_file_task);
+  void MaybeRunLocalOutputFileCallback(bool task_finished);
+
+  // Methods used in state_: FINISHED/LOCAL_FINISHED or abort_.
+  void UpdateStats();
+  void SaveInfoFromInputOutput();
+
+  // ----------------------------------------------------------------
+  // Sets subprocess for local run.  The subprocess becomes ready to run.
+  void SetupSubProcess();
+
+  // Runs subprocess in high priority with reason.
+  void RunSubProcess(const string& reason);
+
+  // Kills subprocess.  FinishSubProcess will be called later.
+  void KillSubProcess();
+
+  // Finished subprocess.
+  void FinishSubProcess();
+
+  // ----------------------------------------------------------------
+  // Add error message to response and sets error exit status.
+  void AddErrorToResponse(
+      ErrDest dest, const string& error_message, bool set_error);
+
+#ifdef _WIN32
+  static BOOL WINAPI InitializeWinOnce(PINIT_ONCE, PVOID, PVOID*);
+#endif
+  static void InitializeStaticOnce();
+
+  CompileService* service_;
+  const int id_;  // A serial number.
+  string trace_id_;
+
+  // RPC between gomacc and compiler proxy.
+  // These are vaild until ReplyResponse().
+  CompileService::RpcController* rpc_;
+  ExecResp* rpc_resp_;
+  WorkerThreadManager::ThreadId caller_thread_id_;
+  OneshotClosure* done_;
+
+  std::unique_ptr<CompileStats> stats_;
+
+  int responsecode_;
+
+  State state_;
+  bool abort_;  // local proc finished first.
+  bool finished_;  // remote call finished (no active remote calls).
+
+  std::unique_ptr<ExecReq> req_;
+  CommandSpec command_spec_;
+  ScopedCompilerInfoState compiler_info_state_;
+  string local_compiler_path_;
+  RequesterInfo requester_info_;
+  RequesterEnv requester_env_;
+
+  std::unique_ptr<CompilerFlags> flags_;
+  bool linking_;
+  bool precompiling_;
+
+  // gomacc_pid_:
+  //   gomacc_pid_ == SubprocessState::kInvalidPid if gomacc not running.
+  int gomacc_pid_;
+  // true if a connection to gomacc is lost, and the task is canceled.
+  bool canceled_;
+
+  string orig_flag_dump_;
+  string flag_dump_;
+  std::set<string> required_files_;
+
+  // Caches all FileId in this compilation unit, since creating FileId is slow
+  // especially on Windows.
+  // So that FileIdCache doesn't need to have lock,
+  // 2 FileIdCache instances are used for input/output in CompileTask.
+  // TODO: Maybe we can merge this with |required_files_|.
+  std::unique_ptr<FileIdCache> input_file_id_cache_;
+  std::unique_ptr<FileIdCache> output_file_id_cache_;
+
+  // |system_library_paths_| is used only when linking_ == true.
+  std::vector<string> system_library_paths_;
+  // list of interleave uploaded files_to confirm the mechanism works fine.
+  unordered_set<string> interleave_uploaded_files_;
+
+  std::unique_ptr<ExecResp> resp_;
+  std::unique_ptr<ExecResp> exec_resp_;
+
+  std::vector<string> exec_output_file_;
+  std::vector<string> exec_error_message_;
+  // exit_status_ is an exit status of remote goma compilation.
+  // if this is 0, remote goma compilation might have finished successfully,
+  // or might not be executed.
+  // in other words, if this is not 0, remote goma compilation failed.
+  int exit_status_;
+  string stdout_;
+  string stderr_;
+
+  // HttpRPC stt for ExecRequest.
+  std::unique_ptr<HttpRPC::Status> http_rpc_status_;
+
+  WorkerThreadManager::CancelableClosure* delayed_setup_subproc_;
+  // local subprocess
+  string local_path_;
+  // PATHEXT environment variable in ExecReq for Windows.
+  string pathext_;
+  // subproc_ == NULL; subprocess is not ready to run or already finished.
+  // subproc_ != NULL; subprocess is ready to run or running.
+  SubProcessTask* subproc_;
+  SubProcessReq::Weight subproc_weight_;
+  // subproc_exit_status_ is an exit status of local compilation.
+  // if this is 0, local compilation might have finished successfully,
+  // might not be executed, or might have been killed because of fast goma.
+  // in other words, if this is not 0, local compilation failed.
+  // Note that local compilation might have been failed because of goma
+  // if goma failed to setup env, cwd to run local compiler.
+  // TODO: can we detect this kind of error?
+  int subproc_exit_status_;
+  string subproc_stdout_;
+  string subproc_stderr_;
+  // request fallback when exec call failed. initialized with
+  // requester_env_.fallback(), but might be changed for hermetic fallback.
+  bool want_fallback_;
+  bool should_fallback_;  // do fallback because of setup failures etc.
+  bool verify_output_;
+  bool fail_fallback_;
+  bool local_run_;
+  bool local_killed_;
+  bool depscache_used_;
+  bool gomacc_revision_mismatched_;
+
+  // Timers
+  SimpleTimer handler_timer_;
+  SimpleTimer compiler_info_timer_;
+  SimpleTimer include_timer_;
+  SimpleTimer include_wait_timer_;
+  SimpleTimer rpc_call_timer_;
+  SimpleTimer file_response_timer_;
+  SimpleTimer file_request_timer_;
+
+  // trace info.
+  string resp_cache_key_;
+
+  // Input file process.
+  OneshotClosure* input_file_callback_;
+  int num_input_file_task_;
+  bool input_file_success_;
+
+  // Output file process.
+  OneshotClosure* output_file_callback_;
+  std::vector<OutputFileInfo> output_file_;
+  int num_output_file_task_;
+  bool output_file_success_;
+
+  // Local output file process.
+  OneshotClosure* local_output_file_callback_;
+  int num_local_output_file_task_;
+
+  // DepsCache
+  DepsCache::Identifier deps_identifier_;
+
+  // LocalOutputCache
+  bool localoutputcache_lookup_succeeded_;
+  // Even if lookup failed, we'd like to keep key after calculation so that
+  // we can put cache later and at that time we don't need to recalculate
+  // the key.
+  std::string local_output_cache_key_;
+
+  // Protects ref counts, subproc_ and http_rpc_status_.
+  Lock mu_;
+  int refcnt_;
+
+  PlatformThreadId thread_id_;
+
+  // Timestamp that this task transited to Finished or Failed.
+  millitime_t frozen_timestamp_ms_;
+
+  // Timestamp that this task transmitted the request to Goma.
+  millitime_t last_req_timestamp_ms_;
+
+#ifndef _WIN32
+  static pthread_once_t init_once_;
+#else
+  static INIT_ONCE init_once_;
+#endif
+
+  // protects link_file_req_tasks_.
+  static Lock global_mu_;
+  static std::deque<CompileTask*>* link_file_req_tasks_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompileTask);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_COMPILE_TASK_H_
diff --git a/client/compile_task_unittest.cc b/client/compile_task_unittest.cc
new file mode 100644
index 0000000..04d2a36
--- /dev/null
+++ b/client/compile_task_unittest.cc
@@ -0,0 +1,307 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "compile_task.h"
+
+#include <memory>
+#include <set>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "compiler_flags.h"
+#include "path.h"
+#include "string_util.h"
+
+namespace devtools_goma {
+
+namespace {
+
+const char kRootDir[] =
+#ifdef _WIN32
+    "C:\\";
+#else
+    "/";
+#endif
+
+}  // anonymous namespace
+
+class CompileTaskTest : public testing::Test {
+ public:
+  static bool IsLocalCompilerPathValid(
+      const string& trace_id,
+      const ExecReq& req,
+      const CompilerFlags* flags) {
+    return CompileTask::IsLocalCompilerPathValid(trace_id, req, flags);
+  }
+
+  static void RemoveDuplicateFiles(const std::string& cwd,
+                                   std::set<std::string>* filenames) {
+    CompileTask::RemoveDuplicateFiles(cwd, filenames);
+  }
+};
+
+TEST_F(CompileTaskTest, IsLocalCompilerPathValidWithEmptyLocalCompilerPath) {
+  ExecReq req;
+  req.mutable_command_spec()->set_name("gcc");
+  req.mutable_command_spec()->set_version("1.2.3");
+  req.mutable_command_spec()->set_target("x86_64-linux-gnu");
+  req.mutable_command_spec()->set_binary_hash("deadbeaf");
+  req.set_cwd("/tmp");
+
+  std::vector<string> args = {"gcc", "-c", "dummy.cc"};
+  std::unique_ptr<CompilerFlags> flag(CompilerFlags::New(args, "."));
+
+  EXPECT_TRUE(CompileTaskTest::IsLocalCompilerPathValid(
+      "dummy", req, flag.get()));
+}
+
+TEST_F(CompileTaskTest, IsLocalCompilerPathValidWithSameCommandSpec) {
+  ExecReq req;
+  req.mutable_command_spec()->set_name("gcc");
+  req.mutable_command_spec()->set_version("1.2.3");
+  req.mutable_command_spec()->set_target("x86_64-linux-gnu");
+  req.mutable_command_spec()->set_binary_hash("deadbeaf");
+  req.set_cwd("/tmp");
+  req.mutable_command_spec()->set_local_compiler_path("/usr/bin/gcc");
+
+  std::vector<string> args = {"gcc", "-c", "dummy.cc"};
+  std::unique_ptr<CompilerFlags> flag(CompilerFlags::New(args, "."));
+
+  EXPECT_TRUE(CompileTaskTest::IsLocalCompilerPathValid(
+      "dummy", req, flag.get()));
+}
+
+TEST_F(CompileTaskTest, IsLocalCompilerPathValidWithUnknownName) {
+  ExecReq req;
+  req.mutable_command_spec()->set_name("gcc");
+  req.mutable_command_spec()->set_version("1.2.3");
+  req.mutable_command_spec()->set_target("x86_64-linux-gnu");
+  req.mutable_command_spec()->set_binary_hash("deadbeaf");
+  req.set_cwd("/tmp");
+  req.mutable_command_spec()->set_local_compiler_path("/usr/bin/id");
+
+  std::vector<string> args = {"gcc", "-c", "dummy.cc"};
+  std::unique_ptr<CompilerFlags> flag(CompilerFlags::New(args, "."));
+
+  EXPECT_FALSE(CompileTaskTest::IsLocalCompilerPathValid(
+      "dummy", req, flag.get()));
+}
+
+TEST_F(CompileTaskTest, IsLocalCompilerPathValidWithCommandSpecMismatch) {
+  ExecReq req;
+  req.mutable_command_spec()->set_name("clang");
+  req.mutable_command_spec()->set_version("1.2.3");
+  req.mutable_command_spec()->set_target("x86_64-linux-gnu");
+  req.mutable_command_spec()->set_binary_hash("deadbeaf");
+  req.set_cwd("/tmp");
+  req.mutable_command_spec()->set_local_compiler_path("/usr/bin/gcc");
+
+  std::vector<string> args = {"gcc", "-c", "dummy.cc"};
+  std::unique_ptr<CompilerFlags> flag(CompilerFlags::New(args, "."));
+
+  EXPECT_FALSE(CompileTaskTest::IsLocalCompilerPathValid(
+      "dummy", req, flag.get()));
+}
+
+TEST_F(CompileTaskTest, IsLocalCompilerPathValidWithArgsMismatch) {
+  ExecReq req;
+  req.mutable_command_spec()->set_name("gcc");
+  req.mutable_command_spec()->set_version("1.2.3");
+  req.mutable_command_spec()->set_target("x86_64-linux-gnu");
+  req.mutable_command_spec()->set_binary_hash("deadbeaf");
+  req.set_cwd("/tmp");
+  req.mutable_command_spec()->set_local_compiler_path("/usr/bin/gcc");
+
+  std::vector<string> args = {"clang", "-c", "dummy.cc"};
+  std::unique_ptr<CompilerFlags> flag(CompilerFlags::New(args, "."));
+
+  EXPECT_FALSE(CompileTaskTest::IsLocalCompilerPathValid(
+      "dummy", req, flag.get()));
+}
+
+TEST_F(CompileTaskTest, IsLocalCompilerPathValidWithSameCommandSpecClExe) {
+  ExecReq req;
+  req.mutable_command_spec()->set_name("cl.exe");
+  req.mutable_command_spec()->set_version("1.2.3");
+  req.mutable_command_spec()->set_target("x86");
+  req.mutable_command_spec()->set_binary_hash("deadbeaf");
+  req.set_cwd("/tmp");
+  req.mutable_command_spec()->set_local_compiler_path("c:\\dummy\\cl.exe");
+
+  std::vector<string> args = {"c:\\dummy\\cl.exe", "/c", "dummy.cc"};
+  std::unique_ptr<CompilerFlags> flag(CompilerFlags::New(args, "."));
+
+  EXPECT_TRUE(CompileTaskTest::IsLocalCompilerPathValid(
+      "dummy", req, flag.get()));
+}
+
+TEST_F(CompileTaskTest, IsLocalCompilerPathValidWithOmittingExtension) {
+  ExecReq req;
+  req.mutable_command_spec()->set_name("cl.exe");
+  req.mutable_command_spec()->set_version("1.2.3");
+  req.mutable_command_spec()->set_target("x86");
+  req.mutable_command_spec()->set_binary_hash("deadbeaf");
+  req.set_cwd("/tmp");
+  req.mutable_command_spec()->set_local_compiler_path("c:\\dummy\\cl");
+
+  std::vector<string> args = {"cl", "/c", "dummy.cc"};
+  std::unique_ptr<CompilerFlags> flag(CompilerFlags::New(args, "."));
+
+  EXPECT_TRUE(CompileTaskTest::IsLocalCompilerPathValid(
+      "dummy", req, flag.get()));
+}
+
+TEST_F(CompileTaskTest, IsLocalCompilerPathValidWithUnknownNameClExe) {
+  ExecReq req;
+  req.mutable_command_spec()->set_name("cl.exe");
+  req.mutable_command_spec()->set_version("1.2.3");
+  req.mutable_command_spec()->set_target("x86");
+  req.mutable_command_spec()->set_binary_hash("deadbeaf");
+  req.set_cwd("/tmp");
+  req.mutable_command_spec()->set_local_compiler_path(
+      "c:\\dummy\\shutdown.exe");
+
+  std::vector<string> args = {"c:\\dummy\\cl.exe", "/c", "dummy.cc"};
+  std::unique_ptr<CompilerFlags> flag(CompilerFlags::New(args, "."));
+
+  EXPECT_FALSE(CompileTaskTest::IsLocalCompilerPathValid(
+      "dummy", req, flag.get()));
+}
+
+TEST_F(CompileTaskTest, IsLocalCompilerPathValidWithCommandSpecMismatchClExe) {
+  ExecReq req;
+  req.mutable_command_spec()->set_name("clang-cl");
+  req.mutable_command_spec()->set_version("1.2.3");
+  req.mutable_command_spec()->set_target("x86");
+  req.mutable_command_spec()->set_binary_hash("deadbeaf");
+  req.set_cwd("/tmp");
+  req.mutable_command_spec()->set_local_compiler_path("c:\\dummy\\cl.exe");
+
+  std::vector<string> args = {"c:\\dummy\\cl.exe", "/c", "dummy.cc"};
+  std::unique_ptr<CompilerFlags> flag(CompilerFlags::New(args, "."));
+
+  EXPECT_FALSE(CompileTaskTest::IsLocalCompilerPathValid(
+      "dummy", req, flag.get()));
+}
+
+TEST_F(CompileTaskTest, IsLocalCompilerPathValidWithArgsMismatchClExe) {
+  ExecReq req;
+  req.mutable_command_spec()->set_name("cl.exe");
+  req.mutable_command_spec()->set_version("1.2.3");
+  req.mutable_command_spec()->set_target("x86");
+  req.mutable_command_spec()->set_binary_hash("deadbeaf");
+  req.set_cwd("/tmp");
+  req.mutable_command_spec()->set_local_compiler_path("c:\\dummy\\cl.exe");
+
+  std::vector<string> args = {"c:\\dummy\\clang-cl.exe", "/c", "dummy.cc"};
+  std::unique_ptr<CompilerFlags> flag(CompilerFlags::New(args, "."));
+
+  EXPECT_FALSE(CompileTaskTest::IsLocalCompilerPathValid(
+      "dummy", req, flag.get()));
+}
+// TODO: add other combinations if necessary.
+
+TEST_F(CompileTaskTest, RemoveDuplicateFiles) {
+  {
+    // different filepath
+    std::set<std::string> filenames {
+      file::JoinPath(kRootDir, "foo", "bar.cc"),
+      file::JoinPath(kRootDir, "foo", "baz.cc")
+    };
+    RemoveDuplicateFiles("", &filenames);
+
+    std::set<std::string> expected {
+      file::JoinPath(kRootDir, "foo", "bar.cc"),
+      file::JoinPath(kRootDir, "foo", "baz.cc")
+    };
+    EXPECT_EQ(filenames, expected);
+  }
+
+  {
+    // different filepath if case is not same.
+    std::set<std::string> filenames {
+      file::JoinPath(kRootDir, "Foo"),
+      file::JoinPath(ToLower(kRootDir), "fOO"),
+    };
+    RemoveDuplicateFiles("", &filenames);
+
+    std::set<std::string> expected {
+      file::JoinPath(kRootDir, "Foo"),
+      file::JoinPath(ToLower(kRootDir), "fOO"),
+    };
+    EXPECT_EQ(filenames, expected);
+  }
+
+  {
+    // same filepath when JoinPathRespectAbsolute
+    std::set<std::string> filenames {
+      "bar.cc", file::JoinPath(kRootDir, "foo", "bar.cc")};
+    RemoveDuplicateFiles(file::JoinPath(kRootDir, "foo"), &filenames);
+
+    std::set<std::string> expected {"bar.cc"};;
+    EXPECT_EQ(filenames, expected);
+  }
+
+  {
+    // same filepath when JoinPathRespectAbsolute with ..
+    std::set<std::string> filenames {
+      file::JoinPath("..", "bar.cc"),
+      file::JoinPath(kRootDir, "foo", "baz", "..", "bar.cc")
+    };
+    RemoveDuplicateFiles(file::JoinPath(kRootDir, "foo", "baz"),
+                         &filenames);
+
+    std::set<std::string> expected {file::JoinPath("..", "bar.cc")};
+    EXPECT_EQ(filenames, expected);
+  }
+
+  {
+    // different filepath when JoinPathRespectAbsolute
+    std::set<std::string> filenames {
+      file::JoinPath(kRootDir, "foo", "baz", "..", "bar.cc"),
+      file::JoinPath(kRootDir, "foo", "bar.cc")
+    };
+    RemoveDuplicateFiles("", &filenames);
+
+    std::set<std::string> expected {
+      file::JoinPath(kRootDir, "foo", "baz", "..", "bar.cc"),
+      file::JoinPath(kRootDir, "foo", "bar.cc")
+    };
+    EXPECT_EQ(filenames, expected);
+  }
+
+  {
+    // different filepath when JoinPathRespectAbsolute
+    std::set<std::string> filenames {
+      file::JoinPath("baz", "..", "bar.cc"),
+      file::JoinPath(kRootDir, "foo", "bar.cc")
+    };
+    RemoveDuplicateFiles(file::JoinPath(kRootDir, "foo"), &filenames);
+
+    std::set<std::string> expected {
+      file::JoinPath("baz", "..", "bar.cc"),
+      file::JoinPath(kRootDir, "foo", "bar.cc")
+    };
+    EXPECT_EQ(filenames, expected);
+  }
+
+  {
+    // different filepath when JoinPathRespectAbsolute
+    std::set<std::string> filenames {
+      file::JoinPath("..", "bar.cc"),
+      file::JoinPath(kRootDir, "foo", "bar.cc")
+    };
+    RemoveDuplicateFiles(file::JoinPath(kRootDir, "foo", "baz"),
+                         &filenames);
+
+    std::set<std::string> expected {
+      file::JoinPath("..", "bar.cc"),
+      file::JoinPath(kRootDir, "foo", "bar.cc")
+    };
+    EXPECT_EQ(filenames, expected);
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/compiler_flags_util.cc b/client/compiler_flags_util.cc
new file mode 100644
index 0000000..bd6d404
--- /dev/null
+++ b/client/compiler_flags_util.cc
@@ -0,0 +1,83 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef _WIN32
+// TODO: Check if we need to do this for VCFlags as well.
+
+#include "compiler_flags_util.h"
+
+#include "compiler_flags.h"
+#include "compiler_info.h"
+#include "compiler_specific.h"
+#include "flag_parser.h"
+#include "path_resolver.h"
+
+namespace {
+
+class FixPath : public FlagParser::Callback {
+ public:
+  explicit FixPath(const string& cwd) : cwd_(cwd) {}
+
+  void RegisterSystemPath(const string& path) {
+    path_resolver_.RegisterSystemPath(path);
+  }
+
+  string ParseFlagValue(const FlagParser::Flag& flag ALLOW_UNUSED,
+                        const string& value) override {
+    if (path_resolver_.IsSystemPath(value))
+      return value;
+
+    return path_resolver_.WeakRelativePath(value, cwd_);
+  }
+
+ private:
+  string cwd_;
+  devtools_goma::PathResolver path_resolver_;
+};
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+std::vector<string> CompilerFlagsUtil::MakeWeakRelative(
+    const std::vector<string>& args,
+    const string& cwd,
+    const CompilerInfo& compiler_info) {
+  FixPath fix_path(cwd);
+  for (const auto& path : compiler_info.cxx_system_include_paths())
+    fix_path.RegisterSystemPath(path);
+  for (const auto& path : compiler_info.system_include_paths())
+    fix_path.RegisterSystemPath(path);
+  for (const auto& path : compiler_info.system_framework_paths())
+    fix_path.RegisterSystemPath(path);
+
+  FlagParser parser;
+  GCCFlags::DefineFlags(&parser);
+
+  parser.AddFlag("o")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("MF")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("Wp,-MD,")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("isysroot")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("isystem")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("-isysroot")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("B")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("iframework")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("I")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("F")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("L")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("include")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("imacros")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddFlag("MT")->SetCallbackForParsedArgs(&fix_path);
+  parser.AddNonFlag()->SetCallbackForParsedArgs(&fix_path);
+
+  parser.AddFlag("Xclang")->SetCallbackForParsedArgs(&fix_path);
+  parser.Parse(args);
+
+  return parser.GetParsedArgs();
+}
+
+}  // namespace devtools_goma
+
+#endif  // _WIN32
diff --git a/client/compiler_flags_util.h b/client/compiler_flags_util.h
new file mode 100644
index 0000000..6769ed4
--- /dev/null
+++ b/client/compiler_flags_util.h
@@ -0,0 +1,46 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_COMPILER_FLAGS_UTIL_H_
+#define DEVTOOLS_GOMA_CLIENT_COMPILER_FLAGS_UTIL_H_
+
+#ifndef _WIN32
+
+#include <string>
+#include <vector>
+
+using std::string;
+
+namespace devtools_goma {
+
+class CompilerInfo;
+
+class CompilerFlagsUtil {
+ public:
+  // Changes path names used in args to be relative from cwd as much as
+  // possible.  If a path name is under system include paths specified
+  // by compiler_info, or totally different path from cwd, it remains
+  // as absolute path.
+  // For example, when cwd = /home/goma/src/WebKit/WebKitBuild
+  //     -I/home/goma/src/WebKit/Source/WebKit
+  //  => -I../Source/WebKit
+  //     -o /home/goma/src/WebKit/WebKitBuild/foo.o
+  //  => -o foo.o
+  //
+  //     -I/usr/include
+  //  => -I/usr/include  # it is system path.
+  //
+  //     -o /tmp/bar.o
+  //  => -o /tmp/bar.o   # /home != /tmp
+  static std::vector<string> MakeWeakRelative(
+      const std::vector<string>& args,
+      const string& cwd,
+      const CompilerInfo& compiler_info);
+};
+
+}  // namespace devtools_goma
+
+#endif  // _WIN32
+#endif  // DEVTOOLS_GOMA_CLIENT_COMPILER_FLAGS_UTIL_H_
diff --git a/client/compiler_flags_util_unittest.cc b/client/compiler_flags_util_unittest.cc
new file mode 100644
index 0000000..0a8b500
--- /dev/null
+++ b/client/compiler_flags_util_unittest.cc
@@ -0,0 +1,235 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "compiler_flags_util.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "compiler_info.h"
+#include "gtest/gtest.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class CompilerFlagsUtilTest : public testing::Test {
+ protected:
+  void SetSystemIncludePaths(
+      const std::vector<string>& cxx_system_include_paths,
+      const std::vector<string>& system_include_paths,
+      const std::vector<string>& system_framework_paths,
+      CompilerInfoData* compiler_info_data) {
+    for (const auto& p : cxx_system_include_paths) {
+      compiler_info_data->add_cxx_system_include_paths(p);
+    }
+    for (const auto& p : system_include_paths) {
+      compiler_info_data->add_system_include_paths(p);
+    }
+    for (const auto& p : system_framework_paths) {
+      compiler_info_data->add_system_framework_paths(p);
+    }
+  }
+};
+
+#ifndef _WIN32
+TEST_F(CompilerFlagsUtilTest, MakeWeakRelativeMacWebKit) {
+  const string cwd =
+      "/Users/goma/src/chromium-webkit/src/third_party/WebKit/Source/WebKit";
+  std::vector<string> args;
+  args.push_back("/Developer/usr/bin/gcc-4.2");
+  args.push_back("-x");
+  args.push_back("objective-c");
+  args.push_back("-arch");
+  args.push_back("x86_64");
+  args.push_back("-fmessage-length=0");
+  args.push_back("-pipe");
+  args.push_back("-std=gnu99");
+  args.push_back("-Wno-trigraphs");
+  args.push_back("-fpascal-strings");
+  args.push_back("-O2");
+  args.push_back("-Werror");
+  args.push_back("-DNDEBUG");
+  args.push_back("-fobjc-gc");
+  args.push_back("-mmacosx-version-min=10.6");
+  args.push_back("-gdwarf-2");
+  args.push_back("-I/Users/goma/src/chromium-webkit/src/third_party/WebKit/"
+                 "WebKitBuild/WebKit.build/Release/WebKit.build/WebKit.hmap");
+  args.push_back("-Wall");
+  args.push_back("-F/Users/goma/src/chromium-webkit/src/third_party/WebKit/"
+                 "WebKitBuild/Release");
+  args.push_back("-F/System/Library/Frameworks/WebKit.framework/Versions/A/"
+                 "Frameworks");
+  args.push_back("-I/Users/goma/src/chromium-webkit/src/third_party/WebKit/"
+                 "WebKitBuild/Release/include");
+  args.push_back("-include");
+  args.push_back("/Users/goma/src/chromium-webkit/src/third_party/WebKit/"
+                 "Source/WebKit/mac/WebKitPrefix.h");
+  args.push_back("-imacros");
+  args.push_back("/Users/goma/src/chromium-webkit/src/third_party/WebKit/"
+                 "Source/WebKit/mac/WebKitPrefix2.h");
+  args.push_back("-c");
+  args.push_back("/Users/goma/src/chromium-webkit/src/third_party/WebKit/"
+                 "Source/WebKit/mac/Misc/WebKitErrors.m");
+  args.push_back("-o");
+  args.push_back("/Users/goma/src/chromium-webkit/src/third_party/WebKit/"
+                 "WebKitBuild/WebKit.build/Release/WebKit.build/"
+                 "Objects-normal/x86_64/WebKitErrors.o");
+  ASSERT_EQ(29U, args.size());
+
+  std::unique_ptr<CompilerInfoData> compiler_info_data(new CompilerInfoData);
+  {
+    std::vector<string> cxx_system_include_paths;
+    std::vector<string> system_include_paths;
+    std::vector<string> system_framework_paths;
+
+    cxx_system_include_paths.push_back("/usr/include/c++/4.2.1");
+    system_include_paths.push_back(
+        "/Developer/usr/bin/../lib/gcc/i686-apple-darwin10/4.2.1/include");
+    system_include_paths.push_back(
+        "/usr/lib/gcc/i686-apple-darwin10/4.2.1/include");
+    system_include_paths.push_back("/usr/include");
+
+    system_framework_paths.push_back("/System/Library/Frameworks");
+    system_framework_paths.push_back("/Library/Frameworks");
+
+    SetSystemIncludePaths(
+        cxx_system_include_paths,
+        system_include_paths,
+        system_framework_paths,
+        compiler_info_data.get());
+  }
+
+  CompilerInfo compiler_info(std::move(compiler_info_data));
+
+  std::vector<string> parsed_args =
+      CompilerFlagsUtil::MakeWeakRelative(
+          args, cwd, compiler_info);
+  ASSERT_EQ(args.size(), parsed_args.size());
+  EXPECT_EQ("/Developer/usr/bin/gcc-4.2", parsed_args[0]);
+  EXPECT_EQ("-x", parsed_args[1]);
+  EXPECT_EQ("objective-c", parsed_args[2]);
+  EXPECT_EQ("-arch", parsed_args[3]);
+  EXPECT_EQ("x86_64", parsed_args[4]);
+  EXPECT_EQ("-fmessage-length=0", parsed_args[5]);
+  EXPECT_EQ("-pipe", parsed_args[6]);
+  EXPECT_EQ("-std=gnu99", parsed_args[7]);
+  EXPECT_EQ("-Wno-trigraphs", parsed_args[8]);
+  EXPECT_EQ("-fpascal-strings", parsed_args[9]);
+  EXPECT_EQ("-O2", parsed_args[10]);
+  EXPECT_EQ("-Werror", parsed_args[11]);
+  EXPECT_EQ("-DNDEBUG", parsed_args[12]);
+  EXPECT_EQ("-fobjc-gc", parsed_args[13]);
+  EXPECT_EQ("-mmacosx-version-min=10.6", parsed_args[14]);
+  EXPECT_EQ("-gdwarf-2", parsed_args[15]);
+  EXPECT_EQ("-I../../"
+            "WebKitBuild/WebKit.build/Release/WebKit.build/WebKit.hmap",
+            parsed_args[16]);
+  EXPECT_EQ("-Wall", parsed_args[17]);
+  EXPECT_EQ("-F../../WebKitBuild/Release", parsed_args[18]);
+  EXPECT_EQ("-F/System/Library/Frameworks/WebKit.framework/Versions/A/"
+            "Frameworks", parsed_args[19]);
+  EXPECT_EQ("-I../../WebKitBuild/Release/include", parsed_args[20]);
+  EXPECT_EQ("-include", parsed_args[21]);
+  EXPECT_EQ("mac/WebKitPrefix.h", parsed_args[22]);
+  EXPECT_EQ("-imacros", parsed_args[23]);
+  EXPECT_EQ("mac/WebKitPrefix2.h", parsed_args[24]);
+  EXPECT_EQ("-c", parsed_args[25]);
+  EXPECT_EQ("mac/Misc/WebKitErrors.m", parsed_args[26]);
+  EXPECT_EQ("-o", parsed_args[27]);
+  EXPECT_EQ("../../WebKitBuild/WebKit.build/Release/WebKit.build/"
+            "Objects-normal/x86_64/WebKitErrors.o", parsed_args[28]);
+}
+
+TEST_F(CompilerFlagsUtilTest, MakeWeakRelativeChromiumClang) {
+  const string cwd = "/home/goma/src/chromium1/src";
+  std::vector<string> args;
+  args.push_back("clang++");
+  args.push_back("-DNO_HEAPCHECKER");
+  args.push_back("-DENABLE_REMOTING=1");
+  args.push_back("-DGR_GL_CUSTOM_SETUP_HEADER=\"GrGLConfig_chrome.h\"");
+  args.push_back("-Ithird_party/icu/public/common");
+  args.push_back("-I/usr/include/gtk-2.0");
+  args.push_back("-Wno-unnamed-type-template-args");
+  args.push_back("-O2");
+  args.push_back("-Xclang");
+  args.push_back("-load");
+  args.push_back("-Xclang");
+  args.push_back("/home/goma/src/chromium1/src/"
+                 "tools/clang/scripts/../../../"
+                 "third_party/llvm-build/Release+Asserts/lib/"
+                 "libFindBadConstructs.so");
+  args.push_back("-Xclang");
+  args.push_back("-add-plugin");
+  args.push_back("-Xclang");
+  args.push_back("find-bad-constructs");
+  args.push_back("-fdata-sections");
+  args.push_back("-ffunction-sections");
+  args.push_back("-MMD");
+  args.push_back("-MF");
+  args.push_back("llvm/Release/.deps/llvm/Release/obj.target/"
+                 "common/chrome/common/about_handler.o.d.raw");
+  args.push_back("-c");
+  args.push_back("-o");
+  args.push_back("llvm/Release/obj.target"
+                 "/common/chrome/common/about_handler.o");
+  args.push_back("chrome/common/about_handler.cc");
+  ASSERT_EQ(25U, args.size());
+
+  std::unique_ptr<CompilerInfoData> compiler_info_data(new CompilerInfoData);
+  {
+    std::vector<string> cxx_system_include_paths;
+    cxx_system_include_paths.push_back("/usr/include/c++/4.4.3");
+    std::vector<string> system_include_paths;
+    system_include_paths.push_back("/usr/include");
+    std::vector<string> system_framework_paths;
+    SetSystemIncludePaths(
+        cxx_system_include_paths,
+        system_include_paths,
+        system_framework_paths,
+        compiler_info_data.get());
+  }
+
+  CompilerInfo compiler_info(std::move(compiler_info_data));
+
+  std::vector<string> parsed_args =
+      CompilerFlagsUtil::MakeWeakRelative(
+          args, cwd, compiler_info);
+  ASSERT_EQ(25U, parsed_args.size());
+  EXPECT_EQ("clang++", parsed_args[0]);
+  EXPECT_EQ("-DNO_HEAPCHECKER", parsed_args[1]);
+  EXPECT_EQ("-DENABLE_REMOTING=1", parsed_args[2]);
+  EXPECT_EQ("-DGR_GL_CUSTOM_SETUP_HEADER=\"GrGLConfig_chrome.h\"",
+            parsed_args[3]);
+  EXPECT_EQ("-Ithird_party/icu/public/common", parsed_args[4]);
+  EXPECT_EQ("-I/usr/include/gtk-2.0", parsed_args[5]);
+  EXPECT_EQ("-Wno-unnamed-type-template-args", parsed_args[6]);
+  EXPECT_EQ("-O2", parsed_args[7]);
+  EXPECT_EQ("-Xclang", parsed_args[8]);
+  EXPECT_EQ("-load", parsed_args[9]);
+  EXPECT_EQ("-Xclang", parsed_args[10]);
+  EXPECT_EQ("tools/clang/scripts/../../../"
+            "third_party/llvm-build/Release+Asserts/lib/"
+            "libFindBadConstructs.so", parsed_args[11]);
+  EXPECT_EQ("-Xclang", parsed_args[12]);
+  EXPECT_EQ("-add-plugin", parsed_args[13]);
+  EXPECT_EQ("-Xclang", parsed_args[14]);
+  EXPECT_EQ("find-bad-constructs", parsed_args[15]);
+  EXPECT_EQ("-fdata-sections", parsed_args[16]);
+  EXPECT_EQ("-ffunction-sections", parsed_args[17]);
+  EXPECT_EQ("-MMD", parsed_args[18]);
+  EXPECT_EQ("-MF", parsed_args[19]);
+  EXPECT_EQ("llvm/Release/.deps/llvm/Release/obj.target/"
+            "common/chrome/common/about_handler.o.d.raw", parsed_args[20]);
+  EXPECT_EQ("-c", parsed_args[21]);
+  EXPECT_EQ("-o", parsed_args[22]);
+  EXPECT_EQ("llvm/Release/obj.target/common/chrome/common/about_handler.o",
+            parsed_args[23]);
+  EXPECT_EQ("chrome/common/about_handler.cc", parsed_args[24]);
+}
+#endif
+
+}  // namespace devtools_goma
diff --git a/client/compiler_info.cc b/client/compiler_info.cc
new file mode 100644
index 0000000..c000b80
--- /dev/null
+++ b/client/compiler_info.cc
@@ -0,0 +1,2773 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "compiler_info.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <iterator>
+#include <map>
+#include <memory>
+#include <set>
+#include <sstream>
+
+#include "autolock_timer.h"
+#include "cmdline_parser.h"
+#include "compiler_flags.h"
+#include "compiler_specific.h"
+#include "file_dir.h"
+#include "file.h"
+#include "file_id.h"
+#include "flag_parser.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "goma_hash.h"
+#include "ioutil.h"
+#include "mypath.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "path_util.h"
+#include "scoped_tmp_file.h"
+#include "split.h"
+#include "string_piece_utils.h"
+#include "util.h"
+
+#ifdef _WIN32
+#include "config_win.h"
+#include "posix_helper_win.h"
+#endif
+
+namespace devtools_goma {
+
+namespace {
+
+#include "clang_features.cc"
+
+void SetFileIdToData(const FileId& file_id, CompilerInfoData::FileId* data) {
+#ifdef _WIN32
+  data->set_volume_serial_number(file_id.volume_serial_number);
+  data->set_file_index_high(file_id.file_index_high);
+  data->set_file_index_low(file_id.file_index_low);
+#else
+  data->set_dev(file_id.dev);
+  data->set_inode(file_id.inode);
+#endif
+  data->set_mtime(file_id.mtime);
+  data->set_size(file_id.size);
+  data->set_is_directory(file_id.is_directory);
+}
+
+void GetFileIdFromData(const CompilerInfoData::FileId& data,
+                       FileId* file_id) {
+#ifdef _WIN32
+  file_id->volume_serial_number = data.volume_serial_number();
+  file_id->file_index_high = data.file_index_high();
+  file_id->file_index_low = data.file_index_low();
+#else
+  file_id->dev = data.dev();
+  file_id->inode = data.inode();
+#endif
+  file_id->mtime = data.mtime();
+  file_id->size = data.size();
+  file_id->is_directory = data.is_directory();
+}
+
+// If |path| exsts in |sha256_cache|, the value is returned.
+// Otherwise, calculate sha256 hash from |path|, and put the result
+// to |sha256_cache|.
+// Returns false if calculating sha256 hash from |path| failed.
+bool GetHashFromCacheOrFile(const string& path,
+                            string* hash,
+                            unordered_map<string, string>* sha256_cache) {
+  auto it = sha256_cache->find(path);
+  if (it != sha256_cache->end()) {
+    *hash = it->second;
+    return true;
+  }
+
+  if (!GomaSha256FromFile(path, hash))
+    return false;
+
+  sha256_cache->insert(make_pair(path, *hash));
+  return true;
+}
+
+bool AddSubprogramInfo(
+    const string& path,
+    google::protobuf::RepeatedPtrField<CompilerInfoData::SubprogramInfo>* ss) {
+  CompilerInfoData::SubprogramInfo* s = ss->Add();
+  if (!CompilerInfoBuilder::SubprogramInfoFromPath(path, s)) {
+    ss->RemoveLast();
+    return false;
+  }
+  return true;
+}
+
+}  // anonymous namespace.
+
+#ifdef _WIN32
+// GetNaClToolchainRoot is a part of hack needed for
+// the (build: Windows, target: NaCl) compile.
+static string GetNaClToolchainRoot(const string& normal_nacl_gcc_path) {
+  return PathResolver::ResolvePath(
+      file::JoinPath(file::Dirname(normal_nacl_gcc_path), ".."));
+}
+#endif
+
+// Execute GCC and get the string output for GCC version
+static bool GetGccVersion(const string& bare_gcc,
+                          const std::vector<string>& compiler_info_envs,
+                          const string& cwd,
+                          string* version) {
+  std::vector<string> argv;
+  argv.push_back(bare_gcc);
+  argv.push_back("-dumpversion");
+  std::vector<string> env(compiler_info_envs);
+  env.push_back("LC_ALL=C");
+  int32_t status = 0;
+  const string dumpversion_output(
+      ReadCommandOutput(bare_gcc, argv, env, cwd,
+                        MERGE_STDOUT_STDERR, &status));
+  if (status != 0) {
+    LOG(ERROR) << "ReadCommandOutput exited with non zero status code."
+               << " bare_gcc=" << bare_gcc
+               << " status=" << status
+               << " argv=" << argv
+               << " env=" << env
+               << " cwd=" << cwd
+               << " dumpversion_output=" << dumpversion_output;
+    return false;
+  }
+
+  argv[1] = "--version";
+  const string version_output(
+      ReadCommandOutput(bare_gcc, argv, env, cwd,
+                        MERGE_STDOUT_STDERR, &status));
+  if (status != 0) {
+    LOG(ERROR) << "ReadCommandOutput exited with non zero status code."
+               << " bare_gcc=" << bare_gcc
+               << " status=" << status
+               << " argv=" << argv
+               << " env=" << env
+               << " cwd=" << cwd
+               << " version_output=" << version_output;
+    return false;
+  }
+
+  if (dumpversion_output.empty() || version_output.empty()) {
+    LOG(ERROR) << "dumpversion_output or version_output is empty."
+               << " bare_gcc=" << bare_gcc
+               << " status=" << status
+               << " argv=" << argv
+               << " env=" << env
+               << " cwd=" << cwd
+               << " dumpversion_output=" << dumpversion_output
+               << " version_output=" << version_output;
+    return false;
+  }
+  *version = GetCxxCompilerVersionFromCommandOutputs(bare_gcc,
+                                                     dumpversion_output,
+                                                     version_output);
+  return true;
+}
+
+// Execute GCC and get the string output for GCC target architecture
+// This target is used to pick the same compiler in the backends, so
+// we don't need to use compiler_info_flags here.
+static bool GetGccTarget(const string& bare_gcc,
+                         const std::vector<string>& compiler_info_envs,
+                         const string& cwd,
+                         string* target) {
+  std::vector<string> argv;
+  argv.push_back(bare_gcc);
+  argv.push_back("-dumpmachine");
+  std::vector<string> env(compiler_info_envs);
+  env.push_back("LC_ALL=C");
+  int32_t status = 0;
+  string gcc_output(ReadCommandOutput(bare_gcc, argv, env, cwd,
+                                      MERGE_STDOUT_STDERR, &status));
+  if (status != 0) {
+    LOG(ERROR) << "ReadCommandOutput exited with non zero status code."
+               << " bare_gcc=" << bare_gcc
+               << " status=" << status
+               << " argv=" << argv
+               << " env=" << env
+               << " cwd=" << cwd
+               << " gcc_output=" << gcc_output;
+    return false;
+  }
+  *target = GetFirstLine(gcc_output);
+  return !target->empty();
+}
+
+static bool ParseDriverArgs(const string& display_output,
+                            std::vector<string>* driver_args) {
+  StringPiece buf(display_output);
+  size_t pos;
+  do {
+    pos = buf.find_first_of("\n");
+    StringPiece line = buf.substr(0, pos);
+    buf.remove_prefix(pos + 1);
+    if (line[0] == ' ') {
+      return ParsePosixCommandLineToArgv(string(line), driver_args);
+    }
+  } while (pos != StringPiece::npos);
+  return false;
+}
+
+
+static string GetVCOutputString(const string& cl_exe_path,
+                                const string& vcflags,
+                                const string& dumb_file,
+                                const std::vector<string>& compiler_info_flags,
+                                const std::vector<string>& compiler_info_envs,
+                                const string& cwd) {
+  // The trick we do here gives both include path and predefined macros.
+  std::vector<string> argv;
+  argv.push_back(cl_exe_path);
+  argv.push_back("/nologo");
+  argv.push_back(vcflags);
+  copy(compiler_info_flags.begin(), compiler_info_flags.end(),
+       back_inserter(argv));
+  argv.push_back(dumb_file);
+  int32_t dummy;  // It is fine to return non zero status code.
+  return ReadCommandOutput(cl_exe_path, argv, compiler_info_envs, cwd,
+                           MERGE_STDOUT_STDERR, &dummy);
+}
+
+// Since clang-cl is emulation of cl.exe, it might not have meaningful
+// clang-cl -dumpversion.  It leads inconsistency of goma's compiler version
+// format between clang and clang-cl.  Former expect <dumpversion>[<version>]
+// latter cannot have <dumpversion>.
+// As a result, let me use different way of getting version string.
+// TODO: make this support gcc and use this instead of
+//                    GetGccTarget.
+static string GetClangClSharpOutput(
+    const string& clang_path,
+    const std::vector<string>& compiler_info_flags,
+    const std::vector<string>& compiler_info_envs,
+    const string& cwd) {
+  std::vector<string> argv;
+  argv.push_back(clang_path);
+  copy(compiler_info_flags.begin(),
+       compiler_info_flags.end(),
+       back_inserter(argv));
+  argv.push_back("-###");
+  int32_t status = 0;
+  string output = ReadCommandOutput(
+      clang_path, argv, compiler_info_envs, cwd,
+      MERGE_STDOUT_STDERR, &status);
+  if (status != 0) {
+    LOG(ERROR) << "ReadCommandOutput exited with non zero status code."
+               << " clang_path=" << clang_path
+               << " status=" << status
+               << " argv=" << argv
+               << " compiler_info_envs=" << compiler_info_envs
+               << " cwd=" << cwd
+               << " output=" << output;
+    return "";
+  }
+  return output;
+}
+
+/* static */
+std::unique_ptr<CompilerInfoData> CompilerInfoBuilder::FillFromCompilerOutputs(
+    const CompilerFlags& flags,
+    const string& local_compiler_path,
+    const std::vector<string>& compiler_info_envs) {
+  std::unique_ptr<CompilerInfoData> data(new CompilerInfoData);
+
+  data->set_last_used_at(time(nullptr));
+
+  // TODO: minimize the execution of ReadCommandOutput.
+  // If we execute gcc/clang with -xc -v for example, we can get not only
+  // real compiler path but also target and version.
+  // However, I understand we need large refactoring of CompilerInfo
+  // for minimizing the execution while keeping readability.
+  if (flags.is_gcc()) {
+    data->set_local_compiler_path(local_compiler_path);
+    data->set_real_compiler_path(
+        GetRealCompilerPath(local_compiler_path, flags.cwd(),
+                            compiler_info_envs));
+  } else {
+    data->set_local_compiler_path(local_compiler_path);
+    data->set_real_compiler_path(local_compiler_path);
+  }
+
+  if (!file::IsAbsolutePath(local_compiler_path)) {
+    data->set_cwd(flags.cwd());
+  }
+
+  const string& abs_local_compiler_path =
+      PathResolver::ResolvePath(
+          file::JoinPathRespectAbsolute(flags.cwd(),
+                                        data->local_compiler_path()));
+  VLOG(2) << "FillFromCompilerOutputs:"
+          << " abs_local_compiler_path=" << abs_local_compiler_path
+          << " cwd=" << flags.cwd()
+          << " local_compiler_path=" << data->local_compiler_path();
+  data->set_real_compiler_path(
+      PathResolver::ResolvePath(
+          file::JoinPathRespectAbsolute(flags.cwd(),
+                                        data->real_compiler_path())));
+
+  if (!GomaSha256FromFile(abs_local_compiler_path,
+                          data->mutable_local_compiler_hash())) {
+    LOG(ERROR) << "Could not open local compiler file "
+               << abs_local_compiler_path;
+    data->set_found(false);
+    return data;
+  }
+
+  if (!GomaSha256FromFile(data->real_compiler_path(),
+                          data->mutable_hash())) {
+    LOG(ERROR) << "Could not open real compiler file "
+               << data->real_compiler_path();
+    data->set_found(false);
+    return data;
+  }
+  data->set_name(GetCompilerName(*data));
+  if (data->name().empty()) {
+    AddErrorMessage("Failed to get compiler name of " +
+                    abs_local_compiler_path,
+                    data.get());
+    LOG(ERROR) << data->error_message();
+    return data;
+  }
+  data->set_lang(flags.lang());
+
+  FileId local_compiler_id(abs_local_compiler_path);
+  if (!local_compiler_id.IsValid()) {
+    LOG(ERROR) << "Failed to get file id of " << abs_local_compiler_path;
+    data->set_found(false);
+    return data;
+  }
+  SetFileIdToData(local_compiler_id, data->mutable_local_compiler_id());
+  data->mutable_real_compiler_id()->CopyFrom(data->local_compiler_id());
+
+  data->set_found(true);
+
+  if (abs_local_compiler_path != data->real_compiler_path()) {
+    FileId real_compiler_id(data->real_compiler_path());
+    if (!real_compiler_id.IsValid()) {
+      LOG(ERROR) << "Failed to get file id of " << data->real_compiler_path();
+      data->set_found(false);
+      return data;
+    }
+    SetFileIdToData(real_compiler_id, data->mutable_real_compiler_id());
+  }
+  if (flags.is_gcc()) {
+    // Some compilers uses wrapper script to set build target, and in such a
+    // situation, build target could be different.
+    // To make goma backend use proper wrapper script, or set proper -target,
+    // we should need to use local_compiler_path instead of real path.
+    bool has_version = GetGccVersion(
+        abs_local_compiler_path, compiler_info_envs,
+        flags.cwd(), data->mutable_version());
+    bool has_target = GetGccTarget(
+        abs_local_compiler_path, compiler_info_envs,
+        flags.cwd(), data->mutable_target());
+
+    bool is_clang = CompilerFlags::IsClangCommand(
+        data->real_compiler_path());
+
+    const GCCFlags& gcc_flags = static_cast<const GCCFlags&>(flags);
+    const bool is_clang_tidy = false;
+
+    // TODO: As we have -x flags in compiler_info,
+    //               include_processor don't need to have 2 kinds of
+    //               system include paths (C and C++).
+    //               However, we still need them because backend
+    //               should set them using different ways
+    //               (-isystem and CPLUS_INCLUDE_PATH).
+    //               Once b/5218687 is fixed, we should
+    //               be able to eliminate cxx_system_include_paths.
+    if (!SetBasicCompilerInfo(local_compiler_path,
+                              gcc_flags.compiler_info_flags(),
+                              compiler_info_envs,
+                              gcc_flags.cwd(),
+                              "-x" + flags.lang(),
+                              gcc_flags.is_cplusplus(),
+                              is_clang,
+                              is_clang_tidy,
+                              gcc_flags.has_nostdinc(),
+                              data.get())) {
+      DCHECK(data->has_error_message());
+      // If error occurred in SetBasicCompilerInfo, we do not need to
+      // continue.
+      return data;
+    }
+
+    if (!has_version) {
+      AddErrorMessage("Failed to get version for " +
+                      data->real_compiler_path(),
+                      data.get());
+      LOG(ERROR) << data->error_message();
+      return data;
+    }
+    if (!has_target) {
+      AddErrorMessage("Failed to get target for " +
+                      data->real_compiler_path(),
+                      data.get());
+      LOG(ERROR) << data->error_message();
+      return data;
+    }
+
+    if (!GetExtraSubprograms(local_compiler_path,
+                             gcc_flags,
+                             compiler_info_envs,
+                             data.get())) {
+      std::ostringstream ss;
+      ss << "Failed to get subprograms for "
+         << data->real_compiler_path();
+      AddErrorMessage(ss.str(), data.get());
+      LOG(ERROR) << data->error_message();
+      return data;
+    }
+    {
+      // Since we only support subprograms for gcc/clang,
+      // we do not need to rewrite subprogram's hashes on windows clang
+      // (clang-cl), MSVS cl.exe and Javac.
+      AUTO_SHARED_LOCK(lock, &rwlock_);
+      RewriteHashUnlocked(hash_rewrite_rule_, data.get());
+    }
+
+    // Hack for GCC 5's has_include and has_include_next support.
+    // GCC has built-in macro that defines __has_include to __has_include__
+    // and __has_include_next to __has_include_next__.
+    // https://gcc.gnu.org/viewcvs/gcc/trunk/gcc/c-family/c-cppbuiltin.c?revision=229533&view=markup#l794
+    // However, __has_include__ and __has_include_next__ are usable but not
+    // defined.
+    // https://gcc.gnu.org/viewcvs/gcc/trunk/libcpp/init.c?revision=229154&view=markup#l376
+    // i.e.
+    // if we execute gcc -E to followings, we only get
+    // "__has_include__(<stddef.h>)"
+    //   #ifdef __has_include__
+    //   "__has_include__"
+    //   #endif
+    //   #ifdef __has_include__(<stddef.h>)
+    //   "__has_include__(<stddef.h>)"
+    //   #endif
+    // See also: b/25581637
+    //
+    // Note that I do not think we need version check because:
+    // 1. __has_include is the new feature and old version does not have it.
+    // 2. I can hardly think they change their implementation as far as
+    //    I guessed from the code
+    if (data->name() == "gcc" || data->name() == "g++") {
+      bool has_include = false;
+      bool has_include__ = false;
+      bool has_include_next = false;
+      bool has_include_next__ = false;
+      for (const auto& m : data->supported_predefined_macros()) {
+        if (m == "__has_include")
+          has_include = true;
+        if (m == "__has_include__")
+          has_include__ = true;
+        if (m == "__has_include_next")
+          has_include_next = true;
+        if (m == "__has_include_next__")
+          has_include_next__ = true;
+      }
+
+      if (has_include && !has_include__ &&
+          (data->predefined_macros().find("__has_include__")
+           != string::npos)) {
+        data->add_hidden_predefined_macros("__has_include__");
+      }
+      if (has_include_next && !has_include_next__ &&
+          (data->predefined_macros().find("__has_include_next__")
+           != string::npos)) {
+        data->add_hidden_predefined_macros("__has_include_next__");
+      }
+    }
+  } else if (flags.is_vc()) {
+    if (CompilerFlags::IsClangClCommand(local_compiler_path)) {
+      const VCFlags& vc_flags = static_cast<const VCFlags&>(flags);
+      const bool is_clang = true;
+      const bool is_clang_tidy = false;
+      const string& lang_flag = vc_flags.is_cplusplus()?"/TP":"/TC";
+      if (!SetBasicCompilerInfo(abs_local_compiler_path,
+                                vc_flags.compiler_info_flags(),
+                                compiler_info_envs,
+                                vc_flags.cwd(),
+                                lang_flag,
+                                vc_flags.is_cplusplus(),
+                                is_clang,
+                                is_clang_tidy,
+                                false,
+                                data.get())) {
+        DCHECK(data->has_error_message());
+        // If error occurred in SetBasicCompilerInfo, we do not need to
+        // continue.
+        return data;
+      }
+
+      const string& sharp_output = GetClangClSharpOutput(
+          abs_local_compiler_path, vc_flags.compiler_info_flags(),
+          compiler_info_envs, vc_flags.cwd());
+      if (sharp_output.empty() ||
+          !ParseClangVersionTarget(sharp_output,
+                                   data->mutable_version(),
+                                   data->mutable_target())) {
+        AddErrorMessage("Failed to get version string for " +
+                        abs_local_compiler_path,
+                        data.get());
+        LOG(ERROR) << data->error_message();
+        return data;
+      }
+    } else {
+      // cl.exe.
+      string vcflags_path = GetMyDirectory();
+      vcflags_path += "\\vcflags.exe";
+      data->set_predefined_macros(
+          data->predefined_macros() + flags.implicit_macros());
+      if (!GetVCVersion(abs_local_compiler_path, compiler_info_envs,
+                        flags.cwd(),
+                        data->mutable_version(),
+                        data->mutable_target())) {
+        AddErrorMessage("Failed to get cl.exe version for " +
+                        abs_local_compiler_path,
+                        data.get());
+        LOG(ERROR) << data->error_message();
+        return data;
+      }
+      if (!GetVCDefaultValues(abs_local_compiler_path,
+                              vcflags_path,
+                              flags.compiler_info_flags(),
+                              compiler_info_envs,
+                              flags.cwd(),
+                              data->lang(), data.get())) {
+        AddErrorMessage("Failed to get cl.exe system include path "
+                        " or predifined macros for " + abs_local_compiler_path,
+                        data.get());
+        LOG(ERROR) << data->error_message();
+        return data;
+      }
+    }
+  } else if (flags.is_javac()) {
+    if (!GetJavacVersion(local_compiler_path, compiler_info_envs, flags.cwd(),
+                         data->mutable_version())) {
+      AddErrorMessage("Failed to get java version for " + local_compiler_path,
+                      data.get());
+      LOG(ERROR) << data->error_message();
+      return data;
+    }
+    data->set_target("java");
+  } else if (flags.is_clang_tidy()) {
+    if (!GetClangTidyVersionTarget(local_compiler_path,
+                                   compiler_info_envs,
+                                   flags.cwd(),
+                                   data->mutable_version(),
+                                   data->mutable_target())) {
+      AddErrorMessage(
+          "Failed to get clang-tidy version for " + local_compiler_path,
+          data.get());
+      LOG(ERROR) << data->error_message();
+      return data;
+    }
+
+    string clang_abs_local_compiler_path =
+        file::JoinPath(file::Dirname(abs_local_compiler_path), "clang");
+
+    const ClangTidyFlags& clang_tidy_flags =
+        static_cast<const ClangTidyFlags&>(flags);
+    const bool is_clang = true;
+    const bool is_clang_tidy = true;
+
+    // See the comment in this function where SetBasicCompilerInfo
+    // is called in clangs.is_gcc() if-statement.
+    if (!SetBasicCompilerInfo(clang_abs_local_compiler_path,
+                              clang_tidy_flags.compiler_info_flags(),
+                              compiler_info_envs,
+                              clang_tidy_flags.cwd(),
+                              "-x" + flags.lang(),
+                              clang_tidy_flags.is_cplusplus(),
+                              is_clang,
+                              is_clang_tidy,
+                              clang_tidy_flags.has_nostdinc(),
+                              data.get())) {
+      DCHECK(data->has_error_message());
+      // If error occurred in SetBasicCompilerInfo, we do not need to
+      // continue.
+      AddErrorMessage("Failed to set basic compiler info for "
+                      "corresponding clang: " + clang_abs_local_compiler_path,
+                      data.get());
+      LOG(ERROR) << data->error_message();
+      return data;
+    }
+  } else {
+    LOG(FATAL) << "Unknown compiler type";
+  }
+
+  return data;
+}
+
+/* static */
+bool CompilerInfoBuilder::SplitGccIncludeOutput(
+    const string& gcc_v_output,
+    std::vector<string>* qpaths,
+    std::vector<string>* paths,
+    std::vector<string>* framework_paths) {
+  // TODO: use StringPiece for gcc_v_output etc.
+
+  static const string kQStartMarker("#include \"...\" search starts here:");
+  static const string kStartMarker("#include <...> search starts here:");
+  static const string kEndMarker("End of search list.");
+  size_t qstart_pos = gcc_v_output.find(kQStartMarker);
+  size_t start_pos = gcc_v_output.find(kStartMarker);
+  size_t end_pos = gcc_v_output.find(kEndMarker);
+  if (qstart_pos == string::npos ||
+      start_pos == string::npos || end_pos == string::npos) {
+    // something is wrong with output from gcc.
+    LOG(WARNING) << "gcc output is wrong. " << gcc_v_output;
+    return false;
+  }
+  if (qpaths != nullptr) {
+    string gcc_v_qsearch_paths(
+        gcc_v_output.substr(
+            qstart_pos + kQStartMarker.size(),
+            start_pos - qstart_pos - kQStartMarker.size()));
+    VLOG(2) << "extracted qsearch paths [" << gcc_v_qsearch_paths << "]";
+    qpaths->clear();
+    std::vector<string> split_qpaths;
+    SplitStringUsing(gcc_v_qsearch_paths, "\r\n", &split_qpaths);
+    for (const auto& split_qpath : split_qpaths) {
+      StringPiece qpath = StringStrip(split_qpath);
+      if (!qpath.empty()) {
+        qpaths->emplace_back(string(qpath));
+      }
+    }
+  }
+
+  string gcc_v_search_paths(
+      gcc_v_output.substr(
+          start_pos + kStartMarker.size(),
+          end_pos - start_pos - kStartMarker.size()));
+  VLOG(2) << "extracted search paths [" << gcc_v_search_paths << "]";
+  paths->clear();
+  std::vector<string> split_paths;
+  SplitStringUsing(gcc_v_search_paths, "\r\n", &split_paths);
+  for (const auto& split_path : split_paths) {
+    StringPiece path = StringStrip(split_path);
+    if (!path.empty()) {
+      static const char* kFrameworkMarker = "(framework directory)";
+      if (strings::EndsWith(path, kFrameworkMarker)) {
+        if (framework_paths) {
+          path.remove_suffix(strlen(kFrameworkMarker));
+          path = StringStrip(path);
+          framework_paths->emplace_back(path);
+        }
+      } else {
+        paths->emplace_back(path);
+      }
+    }
+  }
+  return true;
+}
+
+/* static */
+bool CompilerInfoBuilder::ParseFeatures(
+    const string& feature_output,
+    FeatureList object_macros,
+    FeatureList function_macros,
+    FeatureList features,
+    FeatureList extensions,
+    FeatureList attributes,
+    FeatureList cpp_attributes,
+    FeatureList declspec_attributes,
+    FeatureList builtins,
+    CompilerInfoData* compiler_info) {
+  const size_t num_all_features =
+      object_macros.second + function_macros.second +
+      features.second + extensions.second + attributes.second +
+      cpp_attributes.second + declspec_attributes.second +
+      builtins.second;
+  std::vector<string> lines;
+  SplitStringUsing(feature_output, "\n", &lines);
+
+  size_t index = 0;
+  int expected_index = -1;
+  for (const auto& line : lines) {
+    if (line.empty())
+      continue;
+
+    if (line[0] == '#' && line.size() > 3) {
+      // expects:
+      // # <number> "<filename>"
+      expected_index = std::atoi(line.c_str() + 2) - 1;
+    }
+
+    if (line[0] == '#' || line[0] == '\0')
+      continue;
+
+    if (!(isalnum(line[0]) || line[0] == '_')) {
+      LOG(ERROR) << "Ignoring expected line in clang's output: "
+                 << line;
+      continue;
+    }
+
+    if (index >= num_all_features) {
+      LOG(ERROR) << "The number of known extensions is strange:"
+                 << " index=" << index
+                 << " feature_output=" << feature_output;
+      AddErrorMessage(
+          "goma error: unknown feature or extension detected.",
+          compiler_info);
+      return false;
+    }
+
+    size_t current_index = index++;
+    LOG_IF(WARNING,
+           expected_index < 0 ||
+           static_cast<size_t>(expected_index) != current_index)
+        << "index seems to be wrong."
+        << " current_index=" << current_index
+        << " expected_index=" << expected_index;
+
+    // The result is 0 or 1 in most cases.
+    // __has_cpp_attribute(xxx) can be 200809, 201309, though.
+    // Anyway, we remember the value that is all digit.
+
+    bool all_digit = true;
+    for (char c : line) {
+      if (!isdigit(c)) {
+        all_digit = false;
+        break;
+      }
+    }
+    int value = all_digit ? std::atoi(line.c_str()) : 0;
+    if (value == 0)
+      continue;
+
+    if (current_index < object_macros.second) {
+      compiler_info->add_supported_predefined_macros(
+          object_macros.first[current_index]);
+      continue;
+    }
+    current_index -= object_macros.second;
+    if (current_index < function_macros.second) {
+      compiler_info->add_supported_predefined_macros(
+          function_macros.first[current_index]);
+      continue;
+    }
+    current_index -= function_macros.second;
+    if (current_index < features.second) {
+      CompilerInfoData::MacroValue* m = compiler_info->add_has_feature();
+      m->set_key(features.first[current_index]);
+      m->set_value(value);
+      continue;
+    }
+    current_index -= features.second;
+    if (current_index < extensions.second) {
+      CompilerInfoData::MacroValue* m = compiler_info->add_has_extension();
+      m->set_key(extensions.first[current_index]);
+      m->set_value(value);
+      continue;
+    }
+    current_index -= extensions.second;
+    if (current_index < attributes.second) {
+      CompilerInfoData::MacroValue* m = compiler_info->add_has_attribute();
+      m->set_key(attributes.first[current_index]);
+      m->set_value(value);
+      continue;
+    }
+    current_index -= attributes.second;
+    if (current_index < cpp_attributes.second) {
+      CompilerInfoData::MacroValue* m = compiler_info->add_has_cpp_attribute();
+      m->set_key(cpp_attributes.first[current_index]);
+      m->set_value(value);
+      continue;
+    }
+    current_index -= cpp_attributes.second;
+    if (current_index < declspec_attributes.second) {
+      CompilerInfoData::MacroValue* m =
+          compiler_info->add_has_declspec_attribute();
+      m->set_key(declspec_attributes.first[current_index]);
+      m->set_value(value);
+      continue;
+    }
+    current_index -= declspec_attributes.second;
+    if (current_index < builtins.second) {
+      CompilerInfoData::MacroValue* m = compiler_info->add_has_builtin();
+      m->set_key(builtins.first[current_index]);
+      m->set_value(value);
+      continue;
+    }
+
+    // Since we've checked index range, must not reach here.
+    LOG(FATAL) << "The number of features exceeds the expected number:"
+               << " expected=" << num_all_features
+               << " actual=" << (index - 1);
+  }
+
+  if (index != num_all_features) {
+    LOG(ERROR)  << "The number of features should be "
+                << "the expected number:"
+                << " expected=" << num_all_features
+                << " actual=" << index
+                << " feature_output=" << feature_output;
+    AddErrorMessage(
+        "goma error: failed to detect clang features.",
+        compiler_info);
+    return false;
+  }
+  return true;
+}
+
+/* static */
+bool CompilerInfoBuilder::GetPredefinedFeaturesAndExtensions(
+    const string& normal_compiler_path,
+    const string& lang_flag,
+    const std::vector<string>& compiler_info_flags,
+    const std::vector<string>& compiler_info_envs,
+    const string& cwd,
+    CompilerInfoData* compiler_info) {
+  std::ostringstream oss;
+
+  int index = 0;
+
+  // Check object-like predefined macros are supported.
+  // For example, __FILE__, __LINE__, __COUNTER__, ...
+  for (int i = 0; i < kPredefinedObjectMacroSize; ++i) {
+    oss << "#ifdef " << kPredefinedObjectMacros[i] << "\n"
+        << '#' << ++index << '\n'
+        << "1\n"
+        << "#else\n";
+    oss << '#' << index << '\n'
+        << "0\n"
+        << "#endif\n";
+  }
+
+  // Check function-like predefined macros are supported.
+  // __has_include(), __has_feature(), __has_extension(), ...
+  for (int i = 0; i < kPredefinedFunctionMacroSize; ++i) {
+    oss << "#ifdef " << kPredefinedFunctionMacros[i] << "\n"
+        << '#' << ++index << '\n'
+        << "1\n"
+        << "#else\n";
+    oss << '#' << index << '\n'
+        << "0\n"
+        << "#endif\n";
+  }
+
+  // Define predefined macros in case they are not defined.
+  oss << "#ifndef __has_feature\n"
+      << "# define __has_feature(x) 0\n"
+      << "#endif\n"
+      << "#ifndef __has_extension\n"
+      << "# define __has_extension(x) 0\n"
+      << "#endif\n"
+      << "#ifndef __has_attribute\n"
+      << "# define __has_attribute(x) 0\n"
+      << "#endif\n"
+      << "#ifndef __has_cpp_attribute\n"
+      << "# define __has_cpp_attribute(x) 0\n"
+      << "#endif\n"
+      << "#ifndef __has_declspec_attribute\n"
+      << "# define __has_declspec_attribute(x) 0\n"
+      << "#endif\n"
+      << "#ifndef __has_builtin\n"
+      << "# define __has_builtin(x) 0\n"
+      << "#endif\n";
+
+  for (size_t i = 0; i < NUM_KNOWN_FEATURES; i++) {
+    // Specify the line number to tell pre-processor to output newlines.
+    oss << '#' << ++index << '\n';
+    oss << "__has_feature(" << KNOWN_FEATURES[i] << ")\n";
+  }
+  for (size_t i = 0; i < NUM_KNOWN_EXTENSIONS; i++) {
+    // Specify the line number to tell pre-processor to output newlines.
+    oss << '#' << ++index << '\n';
+    oss << string("__has_extension(") << KNOWN_EXTENSIONS[i] << ")\n";
+  }
+  for (size_t i = 0; i < NUM_KNOWN_ATTRIBUTES; i++) {
+    // Specify the line number to tell pre-processor to output newlines.
+    oss << '#' << ++index << '\n';
+    oss << string("__has_attribute(") << KNOWN_ATTRIBUTES[i] << ")\n";
+  }
+  // Check this only in c++ mode. In c mode, preprocess will fail.
+  for (size_t i = 0; i < NUM_KNOWN_CPP_ATTRIBUTES; i++) {
+    // Specify the line number to tell pre-processor to output newlines.
+    oss << '#' << ++index << '\n';
+    if (lang_flag == "-xc++")
+      oss << string("__has_cpp_attribute(") << KNOWN_CPP_ATTRIBUTES[i] << ")\n";
+    else
+      oss << "0\n";
+  }
+  for (size_t i = 0; i < NUM_KNOWN_DECLSPEC_ATTRIBUTES; i++) {
+    oss << '#' << ++index << '\n';
+    oss << "__has_declspec_attribute(" << KNOWN_DECLSPEC_ATTRIBUTES[i] << ")\n";
+  }
+  for (size_t i = 0; i < NUM_KNOWN_BUILTINS; i++) {
+    oss << '#' << ++index << '\n';
+    oss << "__has_builtin(" << KNOWN_BUILTINS[i] << ")\n";
+  }
+
+  const string& source = oss.str();
+  VLOG(1) << "source=" << source;
+
+  ScopedTmpFile tmp_file("goma_compiler_proxy_check_features_");
+  if (!tmp_file.valid()) {
+    PLOG(ERROR) << "failed to make temp file: " << tmp_file.filename();
+    AddErrorMessage(
+        "goma error: failed to create a temp. file.",
+        compiler_info);
+    return false;
+  }
+
+  ssize_t written = tmp_file.Write(source.data(), source.size());
+  if (static_cast<ssize_t>(source.size()) != written) {
+    PLOG(ERROR) << "Failed to write source into " << tmp_file.filename()
+                << ": " << source.size() << " vs " << written;
+    AddErrorMessage("goma error: failed to write a temp file.",
+                    compiler_info);
+    return false;
+  }
+  // We do not need to append data to |tmp_file|.
+  // Keeping it opened may cause a trouble on Windows.
+  // Note: |tmp_file.filename()| is kept until the end of the scope.
+  if (!tmp_file.Close()) {
+    PLOG(ERROR) << "failed to close temp file: " << tmp_file.filename();
+    AddErrorMessage("goma error: failed to close a temp. file.",
+                    compiler_info);
+    return false;
+  }
+
+  std::vector<string> argv;
+  argv.push_back(normal_compiler_path);
+  copy(compiler_info_flags.begin(), compiler_info_flags.end(),
+       back_inserter(argv));
+  argv.push_back(lang_flag);
+  argv.push_back("-E");
+  argv.push_back(tmp_file.filename());
+
+  std::vector<string> env;
+  env.push_back("LC_ALL=C");
+  copy(compiler_info_envs.begin(), compiler_info_envs.end(),
+       back_inserter(env));
+
+  int32_t status = 0;
+  const string& out = ReadCommandOutput(normal_compiler_path, argv, env, cwd,
+                                        STDOUT_ONLY, &status);
+  VLOG(1) << "out=" << out;
+  LOG_IF(ERROR, status != 0)
+      << "Read of features and extensions did not ends with status 0."
+      << " normal_compiler_path=" << normal_compiler_path
+      << " status=" << status
+      << " argv=" << argv
+      << " env=" << env
+      << " cwd=" << cwd
+      << " out=" << out;
+
+  FeatureList object_macros = std::make_pair(
+      kPredefinedObjectMacros, kPredefinedObjectMacroSize);
+  FeatureList function_macros = std::make_pair(
+      kPredefinedFunctionMacros, kPredefinedFunctionMacroSize);
+  FeatureList features = std::make_pair(
+      KNOWN_FEATURES, NUM_KNOWN_FEATURES);
+  FeatureList extensions = std::make_pair(
+      KNOWN_EXTENSIONS, NUM_KNOWN_EXTENSIONS);
+  FeatureList attributes = std::make_pair(
+      KNOWN_ATTRIBUTES, NUM_KNOWN_ATTRIBUTES);
+  FeatureList cpp_attributes = std::make_pair(
+      KNOWN_CPP_ATTRIBUTES, NUM_KNOWN_CPP_ATTRIBUTES);
+  FeatureList declspec_attributes = std::make_pair(
+      KNOWN_DECLSPEC_ATTRIBUTES, NUM_KNOWN_DECLSPEC_ATTRIBUTES);
+  FeatureList builtins = std::make_pair(
+      KNOWN_BUILTINS, NUM_KNOWN_BUILTINS);
+
+  return ParseFeatures(out, object_macros, function_macros,
+                       features, extensions, attributes,
+                       cpp_attributes, declspec_attributes,
+                       builtins,
+                       compiler_info);
+}
+
+/* static */
+bool CompilerInfoBuilder::GetAdditionalFlags(
+    const string& cxx_display_output, std::vector<string>* flags) {
+  std::vector<string> driver_args;
+  if (!ParseDriverArgs(cxx_display_output, &driver_args))
+    return false;
+  FlagParser flag_parser;
+  GCCFlags::DefineFlags(&flag_parser);
+  flag_parser.AddBoolFlag("fuse-init-array")->SetOutput(flags);
+  flag_parser.Parse(driver_args);
+
+  return true;
+}
+
+/* static */
+bool CompilerInfoBuilder::GetResourceDir(const string& c_display_output,
+                                         CompilerInfoData* compiler_info) {
+  std::vector<string> driver_args;
+  if (!ParseDriverArgs(c_display_output, &driver_args))
+    return false;
+
+  FlagParser flag_parser;
+  GCCFlags::DefineFlags(&flag_parser);
+
+  FlagParser::Flag* resource_dir = flag_parser.AddFlag("resource-dir");
+  flag_parser.Parse(driver_args);
+
+  if (!resource_dir->seen())
+    return false;
+
+  string dir = resource_dir->GetLastValue();
+  if (dir.empty())
+    return false;
+
+  compiler_info->set_resource_dir(dir);
+  return true;
+}
+
+class GetClangPluginPath : public FlagParser::Callback {
+ public:
+  GetClangPluginPath(
+      std::vector<string>* subprograms)
+      : load_seen_(false), subprograms_(subprograms) {}
+  ~GetClangPluginPath() override {}
+
+  string ParseFlagValue(const FlagParser::Flag& flag ALLOW_UNUSED,
+                        const string& value) override {
+    if (load_seen_) {
+      load_seen_ = false;
+      if (!used_plugin_.insert(value).second) {
+        LOG(INFO) << "The same plugin is trying to be added more than twice."
+                  << " Let us ignore it to reduce subprogram spec size."
+                  << " path=" << value;
+      }
+      subprograms_->push_back(value);
+    }
+    if (value == "-load") {
+      load_seen_ = true;
+    }
+    return value;
+  }
+
+ private:
+  bool load_seen_;
+  std::vector<string>* subprograms_;
+  std::set<string> used_plugin_;
+};
+
+/* static */
+bool CompilerInfoBuilder::GetExtraSubprograms(
+    const string& normal_gcc_path,
+    const GCCFlags& gcc_flags,
+    const std::vector<string>& compiler_info_envs,
+    CompilerInfoData* compiler_info) {
+  // TODO: support linker subprograms on linking.
+  std::vector<string> clang_plugins;
+  std::vector<string> B_options;
+  bool no_integrated_as = false;
+  std::set<string> known_subprograms;
+  CompilerInfoBuilder::ParseSubprogramFlags(normal_gcc_path, gcc_flags,
+                                            &clang_plugins,
+                                            &B_options,
+                                            &no_integrated_as);
+  for (const auto& path : clang_plugins) {
+    string absolute_path = file::JoinPathRespectAbsolute(gcc_flags.cwd(), path);
+    if (!known_subprograms.insert(absolute_path).second) {
+      LOG(INFO) << "ignored duplicated subprogram: " << absolute_path;
+      continue;
+    }
+    if (!AddSubprogramInfo(absolute_path,
+                           compiler_info->mutable_subprograms())) {
+      LOG(ERROR) << "invalid plugin:"
+                 << " absolute_path=" << absolute_path
+                 << " normal_gcc_path=" << normal_gcc_path
+                 << " compiler_info_flags="
+                 << gcc_flags.compiler_info_flags();
+      return false;
+    }
+  }
+
+  std::vector<string> subprogram_paths;
+  if (!CompilerInfoBuilder::GetSubprograms(normal_gcc_path, gcc_flags.lang(),
+                                           gcc_flags.compiler_info_flags(),
+                                           compiler_info_envs, gcc_flags.cwd(),
+                                           no_integrated_as,
+                                           &subprogram_paths)) {
+    LOG(ERROR) << "failed to get subprograms.";
+    return false;
+  }
+  if (no_integrated_as && !CompilerInfoBuilder::HasAsPath(subprogram_paths)) {
+    LOG(ERROR) << "no_integrated_as is set but we cannot find as.";
+    return false;
+  }
+  for (const auto& path : subprogram_paths) {
+    bool may_register = false;
+    if (no_integrated_as && strings::EndsWith(path, "as")) {
+      may_register = true;
+    } else {
+      // List only subprograms under -B path for backward compatibility.
+      // See b/63082235
+      for (const string& b : B_options) {
+        if (strings::StartsWith(path, b)) {
+          may_register = true;
+          break;
+        }
+      }
+    }
+    if (!may_register) {
+      LOG(INFO) << "showed up as subprogram but not sent for"
+                << " backword compatibility."
+                << " path=" << path
+                << " normal_gcc_path=" << normal_gcc_path
+                << " compiler_info_flags=" << gcc_flags.compiler_info_flags();
+      continue;
+    }
+
+    string absolute_path = file::JoinPathRespectAbsolute(gcc_flags.cwd(), path);
+    if (!known_subprograms.insert(absolute_path).second) {
+      LOG(INFO) << "ignored duplicated subprogram: " << absolute_path;
+      continue;
+    }
+    if (!AddSubprogramInfo(absolute_path,
+                           compiler_info->mutable_subprograms())) {
+      LOG(ERROR) << "invalid subprogram:"
+                 << " absolute_path=" << absolute_path
+                 << " normal_gcc_path=" << normal_gcc_path
+                 << " compiler_info_flags="
+                 << gcc_flags.compiler_info_flags();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+/* static */
+void CompilerInfoBuilder::ParseSubprogramFlags(
+    const string& normal_gcc_path,
+    const GCCFlags& gcc_flags,
+    std::vector<string>* clang_plugins,
+    std::vector<string>* B_options,
+    bool* no_integrated_as) {
+  const std::vector<string>& compiler_info_flags =
+      gcc_flags.compiler_info_flags();
+  FlagParser flag_parser;
+  GCCFlags::DefineFlags(&flag_parser);
+
+  // Clang plugin support.
+  GetClangPluginPath get_clang_plugin_path(clang_plugins);
+  flag_parser.AddFlag("Xclang")->SetCallbackForParsedArgs(
+      &get_clang_plugin_path);
+
+  // Support no-integrated-as.
+  flag_parser.AddBoolFlag("no-integrated-as")->SetSeenOutput(
+      no_integrated_as);
+  flag_parser.AddBoolFlag("fno-integrated-as")->SetSeenOutput(
+      no_integrated_as);
+
+  // Parse -B options.
+  FlagParser::Flag* flag_B = flag_parser.AddBoolFlag("B");
+
+  std::vector<string> argv;
+  argv.push_back(normal_gcc_path);
+  copy(compiler_info_flags.begin(), compiler_info_flags.end(),
+       back_inserter(argv));
+  flag_parser.Parse(argv);
+
+  std::copy(flag_B->values().cbegin(), flag_B->values().cend(),
+            std::back_inserter(*B_options));
+}
+
+
+/* static */
+void CompilerInfoBuilder::ParseGetSubprogramsOutput(
+    const string& gcc_output, std::vector<string>* paths) {
+  const std::vector<string> candidates = {
+    "as", "objcopy", "cc1", "cc1plus", "cpp", "nm"};
+  std::set<string> known;
+
+  std::vector<string> lines;
+  SplitStringUsing(gcc_output, "\r\n", &lines);
+  for (const auto& line : lines) {
+    if (line.empty() || line[0] != ' ')
+      continue;
+    std::vector<string> argv;
+    // Since clang is not used on Windows now, this won't be the issue.
+    ParsePosixCommandLineToArgv(line, &argv);
+    if (argv.size() == 0)
+      continue;
+    const string& cmd = argv[0];
+    StringPiece basename = file::Basename(cmd);
+    if (basename == cmd) {
+      // To keep backword compatibility, we do not add subprogram searched
+      // in PATH.
+      LOG(INFO) << "ignore subprogram searched in PATH."
+                << " cmd=" << cmd;
+      continue;
+    }
+    if (!known.insert(cmd).second) {
+      continue;
+    }
+    for (const auto& candidate : candidates) {
+      if (basename == candidate ||
+          strings::EndsWith(basename, "-" + candidate)) {
+        paths->push_back(cmd);
+        break;
+      }
+    }
+  }
+}
+
+/* static */
+bool CompilerInfoBuilder::GetSubprograms(
+    const string& gcc_path,
+    const string& lang,
+    const std::vector<string>& compiler_info_flags,
+    const std::vector<string>& compiler_info_envs,
+    const string& cwd,
+    bool warn_on_empty,
+    std::vector<string>* subprogs) {
+  std::vector<string> argv = {gcc_path};
+  copy(compiler_info_flags.begin(), compiler_info_flags.end(),
+       back_inserter(argv));
+  // Since a compiler returns EXIT_FAILURE if fails to output file,
+  // we need to use a fake temporary file.
+  // Failure of writing *.dwo might be the reason.
+  ScopedTmpDir tmp("get_subprograms");
+  if (!tmp.valid()) {
+    LOG(ERROR) << "cannot make an empty directory";
+    return false;
+  }
+#ifdef _WIN32
+  // This code is used by NaCl gcc, PNaCl clang on Windows.
+  // Former uses /dev/null as null device, and latter recently uses NUL as
+  // null device.  To provide the same code to both, let me use temporary
+  // file for that.
+  ScopedTmpFile tmpfile("get_subprograms");
+  if (!tmpfile.valid()) {
+    LOG(ERROR) << "cannot make an empty file";
+    return false;
+  }
+  tmpfile.Close();
+  const string& empty_file = tmpfile.filename();
+  VLOG(2) << "empty_file=" << empty_file;
+#else
+  const string& empty_file = "/dev/null";
+#endif
+  const string output_file = file::JoinPath(tmp.dirname(), "output");
+  VLOG(2) << "output_file=" << output_file;
+  argv.emplace_back("-x" + lang);
+  argv.emplace_back("-c");
+  argv.emplace_back(empty_file);
+  argv.emplace_back("-o");
+  argv.emplace_back(output_file);
+  argv.emplace_back("-v");
+  int32_t status;
+  string gcc_output(
+      ReadCommandOutput(gcc_path, argv, compiler_info_envs, cwd,
+                        MERGE_STDOUT_STDERR, &status));
+  if (status != 0) {
+    LOG(ERROR)
+      << "ReadCommandOutput exited with non zero status code."
+      << " gcc_path=" << gcc_path
+      << " status=" << status
+      << " argv=" << argv
+      << " env=" << compiler_info_envs
+      << " cwd=" << cwd
+      << " gcc_output=" << gcc_output;
+    return false;
+  }
+  VLOG(1) << "GetSubprograms:"
+      << " gcc_path=" << gcc_path
+      << " status=" << status
+      << " argv=" << argv
+      << " env=" << compiler_info_envs
+      << " cwd=" << cwd
+      << " gcc_output=" << gcc_output;
+  CompilerInfoBuilder::ParseGetSubprogramsOutput(gcc_output, subprogs);
+  LOG_IF(ERROR, warn_on_empty && subprogs->empty())
+      << "Expect to have at least one subprograms but empty."
+      << " gcc_path=" << gcc_path
+      << " status=" << status
+      << " argv=" << argv
+      << " env=" << compiler_info_envs
+      << " cwd=" << cwd
+      << " gcc_output=" << gcc_output;
+  return true;
+}
+
+/* static */
+bool CompilerInfoBuilder::HasAsPath(
+    const std::vector<string>& subprogram_paths) {
+  for (const auto& path : subprogram_paths) {
+    StringPiece basename = file::Basename(path);
+    if (basename == "as" || strings::EndsWith(basename, "-as")) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/* static */
+string CompilerInfoBuilder::ParseRealClangPath(StringPiece v_out) {
+  StringPiece::size_type pos = v_out.find_first_of('"');
+  if (pos == StringPiece::npos)
+    return "";
+  v_out.remove_prefix(pos + 1);
+  pos = v_out.find_first_of('"');
+  if (pos == StringPiece::npos)
+    return "";
+  v_out = v_out.substr(0, pos);
+  if (!CompilerFlags::IsClangCommand(v_out))
+    return "";
+  return string(v_out);
+}
+
+#if defined(__linux__) || defined(__MACH__)
+static string GetRealClangPath(const string& normal_gcc_path,
+                               const string& cwd,
+                               const std::vector<string>& envs) {
+  std::vector<string> argv;
+  argv.push_back(normal_gcc_path);
+  argv.push_back("-xc");
+  argv.push_back("-v");
+  argv.push_back("-E");
+  argv.push_back("/dev/null");
+  int32_t status = 0;
+  const string v_output = ReadCommandOutput(normal_gcc_path, argv, envs, cwd,
+                                            MERGE_STDOUT_STDERR, &status);
+  LOG_IF(ERROR, status != 0)
+      << "ReadCommandOutput exited with non zero status code."
+      << " normal_gcc_path=" << normal_gcc_path
+      << " status=" << status
+      << " argv=" << argv
+      << " envs=" << envs
+      << " cwd=" << cwd
+      << " v_output=" << v_output;
+  const string clang_path = CompilerInfoBuilder::ParseRealClangPath(v_output);
+  if (!clang_path.empty() && access(clang_path.c_str(), X_OK) == 0)
+    return clang_path;
+  return string();
+}
+#endif
+
+/* static */
+string CompilerInfoBuilder::GetRealCompilerPath(
+    const string& normal_gcc_path,
+    const string& cwd,
+    const std::vector<string>& envs) {
+#if !defined(__linux__) && !defined(__MACH__) && !defined(_WIN32)
+  return normal_gcc_path;
+#endif
+
+#if defined(__linux__) || defined(__MACH__)
+  // For whom using a wrapper script for clang.
+  // E.g. ChromeOS clang and Android.
+  //
+  // Since clang invokes itself as cc1, we can find its real name by capturing
+  // what is cc1.  Exception is that it is invoked via a shell script that
+  // invokes loader, which might be only done by ChromeOS clang.
+  //
+  // For pnacl-clang, although we still use binary_hash of local_compiler for
+  // command_spec in request, we also need real compiler to check toolchain
+  // update for compiler_info_cache.
+  if (CompilerFlags::IsClangCommand(normal_gcc_path)) {
+    const string real_path = GetRealClangPath(normal_gcc_path, cwd, envs);
+    if (real_path.empty()) {
+      LOG(WARNING) << "seems not be a clang?"
+                   << " normal_gcc_path=" << normal_gcc_path;
+      return normal_gcc_path;
+    }
+#ifndef __linux__
+    return real_path;
+#else
+    // Ubuntu Linux is required to build ChromeOS.
+    // We do not need to consider ChromeOS clang for Mac.
+    // http://www.chromium.org/chromium-os/quick-start-guide
+    //
+    // Consider the clang is ChromeOS clang, which runs via a wrapper.
+    // TODO: more reliable ways?
+    string real_chromeos_clang_path = real_path + ".elf";
+    if (access(real_chromeos_clang_path.c_str(), X_OK) == 0) {
+      return real_chromeos_clang_path;
+    } else {
+      return real_path;
+    }
+#endif
+  }
+#endif
+
+#ifdef __linux__
+  // For ChromeOS compilers.
+  // Note: Ubuntu Linux is required to build ChromeOS.
+  // http://www.chromium.org/chromium-os/quick-start-guide
+  std::vector<string> argv;
+  argv.push_back(normal_gcc_path);
+  argv.push_back("-v");
+  int32_t status = 0;
+  const string& v_output = ReadCommandOutput(normal_gcc_path, argv, envs, cwd,
+                                             MERGE_STDOUT_STDERR, &status);
+  LOG_IF(ERROR, status != 0)
+      << "ReadCommandOutput exited with non zero status code."
+      << " normal_gcc_path=" << normal_gcc_path
+      << " status=" << status
+      << " argv=" << argv
+      << " envs=" << envs
+      << " cwd=" << cwd
+      << " v_output=" << v_output;
+  const char* kCollectGcc = "COLLECT_GCC=";
+  size_t index = v_output.find(kCollectGcc);
+  if (index == string::npos)
+    return normal_gcc_path;
+  index += strlen(kCollectGcc);
+
+  // If COLLECT_GCC is specified and gcc is accompanied by gcc.real,
+  // we assume the "real" one is the last binary we will run.
+  // TODO: More reliable ways?
+  const string& gcc_path =
+      v_output.substr(index, v_output.find_first_of("\r\n", index) - index);
+  const string& real_gcc_path = gcc_path + ".real";
+  if (access(real_gcc_path.c_str(), R_OK) == 0) {
+    return real_gcc_path;
+  }
+  return gcc_path;
+#endif
+
+#ifdef __MACH__
+  if (file::Dirname(normal_gcc_path) != "/usr/bin") {
+    return normal_gcc_path;
+  }
+  const string clang_path = GetRealClangPath(normal_gcc_path, cwd, envs);
+  if (!clang_path.empty()) {
+    return clang_path;
+  }
+  LOG(INFO) << "The command seems not clang. Use it as-is: "
+            << normal_gcc_path;
+  return normal_gcc_path;
+#endif
+
+#ifdef _WIN32
+  // For Windows nacl-{gcc,g++}.
+  // The real binary is ../libexec/nacl-{gcc,g++}.exe.  Binaries under
+  // the bin directory are just wrappers to them.
+  if (CompilerFlags::IsNaClGCCCommand(normal_gcc_path)) {
+    const string& candidate_path = file::JoinPath(
+        GetNaClToolchainRoot(normal_gcc_path),
+        file::JoinPath("libexec", file::Basename(normal_gcc_path)));
+    if (access(candidate_path.c_str(), X_OK) == 0)
+      return candidate_path;
+  }
+  return normal_gcc_path;
+#endif
+}
+
+/* static */
+string CompilerInfoBuilder::GetRealSubprogramPath(
+    const string& subprog_path) {
+#ifndef __linux__
+  return subprog_path;
+#else
+  // Currently, we only see objcopy runs via shell script wrapper, and
+  // nothing else (i.e. no as or so). (b/30571185)
+  if (file::Basename(subprog_path) != "objcopy") {
+    return subprog_path;
+  }
+
+  // Assume ChromeOS objcopy is always in
+  // "<target arch>/binutils-bin/<version>-gold/objcopy",
+  // and real objcopy is in
+  // "<target arch>/binutils-bin/<version>/objcopy.elf".
+  if (file::Basename(
+      file::Dirname(file::Dirname(subprog_path))) != "binutils-bin") {
+    return subprog_path;
+  }
+  StringPiece dirname = file::Dirname(subprog_path);
+  static const char kGoldSuffix[] = "-gold";
+  if (strings::EndsWith(dirname, kGoldSuffix)) {
+    dirname.remove_suffix(sizeof(kGoldSuffix) - 1);
+  }
+  const string new_subprog_path = file::JoinPath(dirname, "objcopy.elf");
+  FileId new_id(new_subprog_path);
+  if (!new_id.IsValid()) {
+    LOG(INFO) << ".elf does not exist, might not be chromeos path?"
+              << " expect to exist=" << new_subprog_path
+              << " orignal subprog_path=" << subprog_path;
+    return subprog_path;
+  }
+  LOG(INFO) << "Hack for objcopy used for ChromeOS simple chrome build:"
+            << " apparent subprog_path=" << subprog_path
+            << " real subprog_path=" << new_subprog_path;
+  return new_subprog_path;
+#endif
+}
+
+/* static */
+bool CompilerInfoBuilder::ParseJavacVersion(const string& version_info,
+                                            string* version) {
+  version->assign(string(StringRstrip(version_info)));
+  static const char kJavac[] = "javac ";
+  static const size_t kJavacLength = sizeof(kJavac) - 1;  // Removed '\0'.
+  if (!strings::StartsWith(*version, kJavac)) {
+    LOG(ERROR) << "Unable to parse javac -version output:"
+               << *version;
+    return false;
+  }
+  version->erase(0, kJavacLength);
+  return true;
+}
+
+/* static */
+bool CompilerInfoBuilder::GetJavacVersion(
+    const string& javac,
+    const std::vector<string>& compiler_info_envs,
+    const string& cwd,
+    string* version) {
+  std::vector<string> argv;
+  argv.push_back(javac);
+  argv.push_back("-version");
+  std::vector<string> env(compiler_info_envs);
+  env.push_back("LC_ALL=C");
+  int32_t status = 0;
+  bool ret = ParseJavacVersion(
+      ReadCommandOutput(javac, argv, env, cwd, MERGE_STDOUT_STDERR, &status),
+      version);
+  LOG_IF(ERROR, status != 0)
+      << "ReadCommandOutput exited with non zero status code."
+      << " javac=" << javac
+      << " status=" << status
+      << " argv=" << argv
+      << " env=" << env
+      << " cwd=" << cwd;
+  return ret;
+}
+
+/* static */
+bool CompilerInfoBuilder::ParseVCVersion(
+    const string& vc_logo, string* version, string* target) {
+  // VC's logo format:
+  // ... Version 16.00.40219.01 for 80x86
+  // so we return cl 16.00.40219.01
+  string::size_type pos = vc_logo.find("Version ");
+  string::size_type pos2 = vc_logo.find(" for");
+  string::size_type pos3 = vc_logo.find("\r");
+  if (pos == string::npos || pos2 == string::npos || pos3 == string::npos ||
+      pos2 < pos || pos3 < pos2) {
+    LOG(INFO) << "Unable to parse cl.exe output."
+               << " vc_logo=" << vc_logo;
+    return false;
+  }
+  pos += 8;  // 8: length of "Version "
+  *version = vc_logo.substr(pos, pos2 - pos);
+  *target = vc_logo.substr(pos2 + 5, pos3 - pos2 - 5);
+  return true;
+}
+
+
+/* static */
+bool CompilerInfoBuilder::GetVCVersion(
+    const string& cl_exe_path, const std::vector<string>& env,
+    const string& cwd,
+    string* version, string* target) {
+  std::vector<string> argv;
+  argv.push_back(cl_exe_path);
+  int32_t status = 0;
+  string vc_logo(ReadCommandOutput(cl_exe_path, argv, env, cwd,
+                                   MERGE_STDOUT_STDERR, &status));
+  if (status != 0) {
+    LOG(ERROR) << "ReadCommandOutput exited with non zero status code."
+               << " cl_exe_path=" << cl_exe_path
+               << " status=" << status
+               << " argv=" << argv
+               << " env=" << env
+               << " cwd=" << cwd
+               << " vc_logo=" << vc_logo;
+    return false;
+  }
+  if (!CompilerInfoBuilder::ParseVCVersion(vc_logo, version, target)) {
+    LOG(ERROR) << "Failed to parse VCVersion."
+               << " cl_exe_path=" << cl_exe_path
+               << " status=" << status
+               << " argv=" << argv
+               << " env=" << env
+               << " cwd=" << cwd
+               << " vc_logo=" << vc_logo;
+    return false;
+  }
+  return true;
+}
+
+/* static */
+bool CompilerInfoBuilder::ParseVCOutputString(const string& output,
+                                       std::vector<string>* include_paths,
+                                       string* predefined_macros) {
+  std::vector<string> args;
+  // |output| doesn't contains command name, so adds "cl.exe" here.
+  args.push_back("cl.exe");
+  if (!ParseWinCommandLineToArgv(output, &args)) {
+    LOG(ERROR) << "Fail parse cmdline:" << output;
+    return false;
+  }
+
+  VCFlags flags(args, ".");
+  if (!flags.is_successful()) {
+    LOG(ERROR) << "ParseVCOutput error:" << flags.fail_message();
+    return false;
+  }
+
+  copy(flags.include_dirs().begin(), flags.include_dirs().end(),
+       back_inserter(*include_paths));
+
+  if (predefined_macros == nullptr)
+    return true;
+  std::ostringstream ss;
+  for (const auto& elm : flags.commandline_macros()) {
+    const string& macro = elm.first;
+    DCHECK(elm.second) << macro;
+    size_t found = macro.find('=');
+    if (found == string::npos) {
+      ss << "#define " << macro << "\n";
+    } else {
+      ss << "#define " << macro.substr(0, found)
+         << " " << macro.substr(found + 1)
+         << "\n";
+    }
+  }
+  *predefined_macros += ss.str();
+  return true;
+}
+
+/* static */
+bool CompilerInfoBuilder::ParseClangVersionTarget(
+    const string& sharp_output,
+    string* version, string* target) {
+  static const char* kTarget = "Target: ";
+  std::vector<string> lines;
+  SplitStringUsing(sharp_output, "\r\n", &lines);
+  if (lines.size() < 2)
+    return false;
+  if (!strings::StartsWith(lines[1], kTarget))
+    return false;
+  version->assign(lines[0]);
+  target->assign(lines[1].substr(strlen(kTarget)));
+  return true;
+}
+
+/* static */
+bool CompilerInfoBuilder::GetClangTidyVersionTarget(
+    const string& clang_tidy_path,
+    const std::vector<string>& compiler_info_envs,
+    const string& cwd,
+    string* version,
+    string* target) {
+  std::vector<string> argv;
+  argv.push_back(clang_tidy_path);
+  argv.push_back("-version");
+
+  std::vector<string> env(compiler_info_envs);
+  env.push_back("LC_ALL=C");
+
+  int32_t status = 0;
+  const string output(ReadCommandOutput(clang_tidy_path, argv, env, cwd,
+                                        MERGE_STDOUT_STDERR, &status));
+
+  if (status != 0) {
+    LOG(ERROR) << "ReadCommandOutput exited with non zero status code."
+               << " clang_tidy_path=" << clang_tidy_path
+               << " status=" << status
+               << " argv=" << argv
+               << " env=" << env
+               << " cwd=" << cwd
+               << " output=" << output;
+    return false;
+  }
+
+  return ParseClangTidyVersionTarget(output, version, target);
+}
+
+/* static */
+bool CompilerInfoBuilder::ParseClangTidyVersionTarget(const string& output,
+                                                      string* version,
+                                                      string* target) {
+  static const char kVersion[] = "  LLVM version ";
+  static const char kTarget[] = "  Default target: ";
+
+  std::vector<string> lines;
+  SplitStringUsing(output, "\r\n", &lines);
+
+  if (lines.size() < 4)
+    return false;
+  if (!strings::StartsWith(lines[1], kVersion))
+    return false;
+  if (!strings::StartsWith(lines[3], kTarget))
+    return false;
+
+  *version = lines[1].substr(strlen(kVersion));
+  *target = lines[3].substr(strlen(kTarget));
+
+  return true;
+}
+
+static string GccDisplayPrograms(
+    const string& normal_compiler_path,
+    const std::vector<string>& compiler_info_flags,
+    const std::vector<string>& compiler_info_envs,
+    const string& lang_flag,
+    const string& option,
+    const string& cwd,
+    int32_t* status) {
+  std::vector<string> argv;
+  argv.push_back(normal_compiler_path);
+  copy(compiler_info_flags.begin(), compiler_info_flags.end(),
+       back_inserter(argv));
+  argv.push_back(lang_flag);
+  if (!option.empty()) {
+    if (CompilerFlags::IsClangClCommand(normal_compiler_path)) {
+      argv.push_back("-Xclang");
+    }
+    argv.push_back(option);
+  }
+#ifdef _WIN32
+  // This code is used by NaCl gcc, PNaCl clang and clang-cl on Windows.
+  // Former uses /dev/null as null device, and latter recently uses NUL as
+  // null device.  To provide the same code to both, let me use temporary
+  // file for that.
+  ScopedTmpFile tmp("gcc_display_program");
+  if (!tmp.valid()) {
+    LOG(ERROR) << "cannot make an empty file";
+    *status = -1;
+    return "";
+  }
+  tmp.Close();
+  const string& empty_file = tmp.filename();
+  VLOG(2) << "empty_file=" << empty_file;
+#else
+  const string& empty_file = "/dev/null";
+#endif
+  argv.push_back("-v");
+  argv.push_back("-E");
+  argv.push_back(empty_file);
+  argv.push_back("-o");
+  argv.push_back(empty_file);
+
+  std::vector<string> env;
+  env.push_back("LC_ALL=C");
+  copy(compiler_info_envs.begin(), compiler_info_envs.end(),
+       back_inserter(env));
+
+  return ReadCommandOutput(normal_compiler_path, argv, env, cwd,
+                           MERGE_STDOUT_STDERR, status);
+}
+
+// Return true if everything is fine, and all necessary information
+// (system include paths, predefined macro, etc) are set to |compiler_info|.
+// Otherwise false, and |compiler_info->error_message| is set.
+//
+// |local_compiler_path| is compiler path.
+// |compiler_info_flags| is used as command line options to get info.
+// |compiler_info_envs| is used as environment to get info.
+// |cwd| is current working directory on getting info.
+// |lang_flag| specifies the language to get predefined macros and features.
+// e.g. clang -dM |lang_flag| -E /dev/null
+// It is usually -xc and -xc++ on gcc variants, but there are also other
+// languages such as c-header, cpp-output.  We should use it.
+// Currently, Objective-C++ and C++ need to be treated as C++, and
+// I believe CompilerFlags should be point of decision of is_cplusplus
+// judgement.  For that reason, |is_cplusplus| is passed regardless
+// of what lang_flag is used.
+// |is_clang| indicates the compiler is clang, so need to get features,
+// extensions etc.
+// static
+bool CompilerInfoBuilder::SetBasicCompilerInfo(
+    const string& local_compiler_path,
+    const std::vector<string>& compiler_info_flags,
+    const std::vector<string>& compiler_info_envs,
+    const string& cwd,
+    const string& lang_flag,
+    bool is_cplusplus,
+    bool is_clang,
+    bool is_clang_tidy,
+    bool has_nostdinc,
+    CompilerInfoData* compiler_info) {
+  // cxx_lang_flag, c_lang_flag for c++, c respectively.
+  // For gcc and clang,
+  // even when language is objective-c, objective-c++, c-header, cpp-output,
+  // c++-header, c++-cpp-output, we'll use -xc++, -xc to get system include
+  // paths.
+  // For clang-cl.exe, we use /TP and /TC like we do for gcc and clang.
+  string cxx_lang_flag;
+  string c_lang_flag;
+  if (CompilerFlags::IsClangClCommand(local_compiler_path)) {
+    cxx_lang_flag = "/TP";
+    c_lang_flag = "/TC";
+  } else {
+    cxx_lang_flag = "-xc++";
+    c_lang_flag = "-xc";
+  }
+
+  // We assumes include system paths are same for given compiler_info_flags
+  // and compiler_info_envs.
+  //
+  // We changes the way to get system include path whether it is compiling C++
+  // source code or not.
+  // C++:
+  //   c++ system include path = [paths by -xc++]
+  //   c   system include path = [paths by -xc++ -nostdinc++]
+  // C:
+  //   c   system include path = [paths by -xc]
+  //   no need to check C++ system include path.
+  //
+  // Note that the way to get system include paths are still under discussion
+  // in b/13178705.
+  string c_output, cxx_output;
+  if (is_cplusplus) {
+    int32_t status;
+    cxx_output = GccDisplayPrograms(
+        local_compiler_path,
+        compiler_info_flags,
+        compiler_info_envs,
+        cxx_lang_flag,
+        "",
+        cwd,
+        &status);
+    if (status != 0) {
+      AddErrorMessage(
+          "Failed to execute compiler to get c++ system "
+          "include paths for " + local_compiler_path,
+          compiler_info);
+      LOG(ERROR) << compiler_info->error_message()
+                 << " status=" << status
+                 << " cxx_output=" << cxx_output;
+      return false;
+    }
+    c_output = GccDisplayPrograms(
+        local_compiler_path,
+        compiler_info_flags,
+        compiler_info_envs,
+        cxx_lang_flag,
+        "-nostdinc++",
+        cwd,
+        &status);
+    if (status != 0) {
+      AddErrorMessage(
+          "Failed to execute compiler to get c system "
+          "include paths for " + local_compiler_path,
+          compiler_info);
+      LOG(ERROR) << compiler_info->error_message()
+                 << " status=" << status
+                 << " c_output=" << c_output;
+      return false;
+    }
+  } else {
+    int32_t status;
+    c_output = GccDisplayPrograms(
+        local_compiler_path,
+        compiler_info_flags,
+        compiler_info_envs,
+        c_lang_flag,
+        "",
+        cwd,
+        &status);
+    if (status != 0) {
+      AddErrorMessage(
+          "Failed to execute compiler to get c system "
+          "include paths for " + local_compiler_path,
+          compiler_info);
+      LOG(ERROR) << compiler_info->error_message()
+                 << " status=" << status
+                 << " c_output=" << c_output;
+      return false;
+    }
+  }
+
+  if (!GetSystemIncludePaths(local_compiler_path,
+                             compiler_info_flags,
+                             compiler_info_envs,
+                             cxx_output,
+                             c_output,
+                             is_cplusplus,
+                             has_nostdinc,
+                             compiler_info)) {
+    AddErrorMessage(
+        "Failed to get system include paths for " +
+        local_compiler_path,
+        compiler_info);
+    LOG(ERROR) << compiler_info->error_message();
+    return false;
+  }
+  if (!GetPredefinedMacros(local_compiler_path,
+                           compiler_info_flags,
+                           compiler_info_envs,
+                           cwd,
+                           lang_flag,
+                           compiler_info)) {
+    AddErrorMessage(
+        "Failed to get predefined macros for " +
+        local_compiler_path,
+        compiler_info);
+    LOG(ERROR) << compiler_info->error_message();
+    return false;
+  }
+
+  if (!cxx_output.empty() && !is_clang_tidy) {
+    std::vector<string> additional_flags;
+    CompilerInfoBuilder::GetAdditionalFlags(cxx_output, &additional_flags);
+    for (const auto& f : additional_flags) {
+      compiler_info->add_additional_flags(f);
+    }
+  }
+  if (!c_output.empty()) {
+    CompilerInfoBuilder::GetResourceDir(c_output, compiler_info);
+  }
+
+  if (!GetPredefinedFeaturesAndExtensions(local_compiler_path,
+                                          lang_flag,
+                                          compiler_info_flags,
+                                          compiler_info_envs,
+                                          cwd,
+                                          compiler_info)) {
+    LOG(ERROR) << "Failed to get predefined features and extensions."
+               << " local_compiler_path=" << local_compiler_path
+               << " lang_flag=" << lang_flag;
+    DCHECK(compiler_info->has_error_message());
+    return false;
+  }
+  return true;
+}
+
+// static
+void CompilerInfoBuilder::UpdateIncludePaths(
+    const std::vector<string>& paths,
+    google::protobuf::RepeatedPtrField<string>* include_paths) {
+  std::copy(paths.cbegin(), paths.cend(),
+            google::protobuf::RepeatedFieldBackInserter(include_paths));
+}
+
+// static
+bool CompilerInfoBuilder::GetSystemIncludePaths(
+    const string& normal_compiler_path,
+    const std::vector<string>& compiler_info_flags,
+    const std::vector<string>& compiler_info_envs,
+    const string& cxx_display_output,
+    const string& c_display_output,
+    bool is_cplusplus,
+    bool has_nostdinc,
+    CompilerInfoData* compiler_info) {
+  compiler_info->clear_quote_include_paths();
+  compiler_info->clear_cxx_system_include_paths();
+  compiler_info->clear_system_include_paths();
+  compiler_info->clear_system_framework_paths();
+
+  std::vector<string> quote_include_paths;
+  std::vector<string> cxx_system_include_paths;
+  std::vector<string> system_framework_paths;
+  if (cxx_display_output.empty() ||
+      !CompilerInfoBuilder::SplitGccIncludeOutput(
+          cxx_display_output,
+          &quote_include_paths,
+          &cxx_system_include_paths,
+          &system_framework_paths)) {
+    LOG_IF(WARNING, is_cplusplus)
+        << "Cannot detect g++ system include paths:"
+        << " normal_compiler_path=" << normal_compiler_path
+        << " compiler_info_flags=" << compiler_info_flags
+        << " compiler_info_envs=" << compiler_info_envs;
+  }
+
+  UpdateIncludePaths(
+      quote_include_paths,
+      compiler_info->mutable_quote_include_paths());
+
+  UpdateIncludePaths(
+      cxx_system_include_paths,
+      compiler_info->mutable_cxx_system_include_paths());
+
+  UpdateIncludePaths(
+      system_framework_paths,
+      compiler_info->mutable_system_framework_paths());
+
+  std::vector<string>* quote_include_paths_ptr = nullptr;
+  // If quote_include_paths couldn't be obtained above,
+  // we'll try to fetch them here.
+  if (compiler_info->quote_include_paths_size() == 0) {
+    DCHECK(quote_include_paths.empty());
+    quote_include_paths_ptr = &quote_include_paths;
+  }
+
+  std::vector<string>* framework_paths_ptr = nullptr;
+  // If system_framework_paths couldn't be obtained above,
+  // we'll try to fetch them here.
+  if (compiler_info->system_framework_paths_size() == 0) {
+    DCHECK(system_framework_paths.empty());
+    framework_paths_ptr = &system_framework_paths;
+  }
+  std::vector<string> system_include_paths;
+  if (!CompilerInfoBuilder::SplitGccIncludeOutput(
+          c_display_output,
+          quote_include_paths_ptr,
+          &system_include_paths,
+          framework_paths_ptr)) {
+    LOG(WARNING) << "Cannot detect gcc system include paths:"
+                 << " normal_compiler_path=" << normal_compiler_path
+                 << " compiler_info_flags=" << compiler_info_flags
+                 << " compiler_info_envs=" << compiler_info_envs;
+  }
+  if (quote_include_paths_ptr != nullptr) {
+    UpdateIncludePaths(
+        quote_include_paths,
+        compiler_info->mutable_quote_include_paths());
+  }
+
+  UpdateIncludePaths(
+      system_include_paths,
+      compiler_info->mutable_system_include_paths());
+
+  if (framework_paths_ptr != nullptr) {
+    UpdateIncludePaths(
+        system_framework_paths,
+        compiler_info->mutable_system_framework_paths());
+  }
+
+  if (compiler_info->cxx_system_include_paths_size() == 0 &&
+      compiler_info->system_include_paths_size() == 0 &&
+      !has_nostdinc) {
+    std::stringstream ss;
+    ss << "Cannot detect system include paths:"
+       << " normal_compiler_path=" << normal_compiler_path
+       << " compiler_info_flags=" << compiler_info_flags
+       << " compiler_info_envs=" << compiler_info_envs
+       << " cxx_display_output=" << cxx_display_output
+       << " c_display_output=" << c_display_output;
+    AddErrorMessage(ss.str(), compiler_info);
+    LOG(ERROR) << ss.str();
+    return false;
+  }
+
+#ifdef _WIN32
+  // In the (build: Windows, target: NaCl (not PNaCl)) compile,
+  // include paths under toolchain root are shown as relative path from it.
+  if (CompilerFlags::IsNaClGCCCommand(normal_compiler_path)) {
+    compiler_info->set_toolchain_root(
+        GetNaClToolchainRoot(normal_compiler_path));
+  }
+#endif
+  return true;
+}
+
+static string GccDisplayPredefinedMacros(
+    const string& normal_compiler_path,
+    const std::vector<string>& compiler_info_flags,
+    const std::vector<string>& compiler_info_envs,
+    const string& cwd,
+    const string& lang_flag,
+    int32_t* status) {
+  std::vector<string> argv;
+  argv.push_back(normal_compiler_path);
+  copy(compiler_info_flags.begin(), compiler_info_flags.end(),
+       back_inserter(argv));
+#ifdef _WIN32
+  // This code is used by NaCl gcc, PNaCl clang and clang-cl on Windows.
+  // Former uses /dev/null as null device, and latter recently uses NUL as
+  // null device.  To provide the same code to both, let me use temporary
+  // file for that.
+  ScopedTmpFile tmp("gcc_display_predefined_macro");
+  if (!tmp.valid()) {
+    LOG(ERROR) << "cannot make an empty file";
+    *status = -1;
+    return "";
+  }
+  tmp.Close();
+  const string& empty_file = tmp.filename();
+  VLOG(2) << "empty_file=" << empty_file;
+#else
+  const string& empty_file = "/dev/null";
+#endif
+
+  argv.push_back(lang_flag);
+  argv.push_back("-E");
+  argv.push_back(empty_file);
+  if (CompilerFlags::IsClangClCommand(normal_compiler_path)) {
+    argv.push_back("-Xclang");
+  }
+  argv.push_back("-dM");
+
+  std::vector<string> env;
+  env.push_back("LC_ALL=C");
+  copy(compiler_info_envs.begin(), compiler_info_envs.end(),
+       back_inserter(env));
+
+  const string& macros = ReadCommandOutput(normal_compiler_path, argv, env, cwd,
+                                           MERGE_STDOUT_STDERR, status);
+  if (*status != 0) {
+    LOG(ERROR) << "ReadCommandOutput exited with non zero status code."
+               << " normal_compiler_path=" << normal_compiler_path
+               << " status=" << status
+               << " argv=" << argv
+               << " env=" << env << " cwd=" << cwd
+               << " macros=" << macros;
+    return "";
+  }
+  return macros;
+}
+
+// static
+bool CompilerInfoBuilder::GetPredefinedMacros(
+    const string& normal_compiler_path,
+    const std::vector<string>& compiler_info_flags,
+    const std::vector<string>& compiler_info_envs,
+    const string& cwd,
+    const string& lang_flag,
+    CompilerInfoData* compiler_info) {
+  int32_t status;
+  const string& macros =
+      GccDisplayPredefinedMacros(
+          normal_compiler_path, compiler_info_flags, compiler_info_envs, cwd,
+          lang_flag, &status);
+  if (status != 0)
+    return false;
+  compiler_info->set_predefined_macros(macros);
+  return true;
+}
+
+// static
+bool CompilerInfoBuilder::GetVCDefaultValues(
+    const string& cl_exe_path,
+    const string& vcflags_path,
+    const std::vector<string>& compiler_info_flags,
+    const std::vector<string>& compiler_info_envs,
+    const string& cwd,
+    const string& lang,
+    CompilerInfoData* compiler_info) {
+  // VC++ accepts two different undocumented flags to dump all predefined values
+  // in preprocessor.  /B1 is for C and /Bx is for C++.
+  string vc_cpp_flags = "/Bx";
+  string vc_c_flags = "/B1";
+  vc_cpp_flags += vcflags_path;
+  vc_c_flags += vcflags_path;
+
+  // It does not matter that non-exist-file.cpp/.c is on disk or not.  VCFlags
+  // will error out cl.exe and display the information we want before actually
+  // opening that file.
+  string output_cpp = GetVCOutputString(cl_exe_path, vc_cpp_flags,
+      "non-exist-file.cpp", compiler_info_flags, compiler_info_envs, cwd);
+  string output_c = GetVCOutputString(cl_exe_path, vc_c_flags,
+      "non-exist-file.c", compiler_info_flags, compiler_info_envs, cwd);
+
+  std::vector<string> cxx_system_include_paths;
+  if (!CompilerInfoBuilder::ParseVCOutputString(
+          output_cpp, &cxx_system_include_paths,
+          lang == "c++" ?
+          compiler_info->mutable_predefined_macros() : nullptr)) {
+    return false;
+  }
+  for (const auto& p : cxx_system_include_paths) {
+    compiler_info->add_cxx_system_include_paths(p);
+  }
+  std::vector<string> system_include_paths;
+  if (!CompilerInfoBuilder::ParseVCOutputString(
+          output_c, &system_include_paths,
+          lang == "c" ?
+          compiler_info->mutable_predefined_macros() : nullptr)) {
+    return false;
+  }
+  for (const auto& p : system_include_paths) {
+    compiler_info->add_system_include_paths(p);
+  }
+  return true;
+}
+
+/* static */
+void CompilerInfoBuilder::AddErrorMessage(
+    const std::string& message,
+    CompilerInfoData* compiler_info) {
+  if (compiler_info->failed_at() == 0)
+    compiler_info->set_failed_at(time(nullptr));
+
+  if (!compiler_info->has_error_message()) {
+    compiler_info->set_error_message(
+        compiler_info->error_message() + "\n");
+  }
+  compiler_info->set_error_message(
+      compiler_info->error_message() + message);
+}
+
+/* static */
+void CompilerInfoBuilder::OverrideError(
+    const std::string& message, time_t failed_at,
+    CompilerInfoData* compiler_info) {
+  DCHECK((message.empty() && failed_at == 0) ||
+         (!message.empty() && failed_at > 0));
+  compiler_info->set_error_message(message);
+  compiler_info->set_failed_at(failed_at);
+}
+
+/* static */
+bool CompilerInfoBuilder::SubprogramInfoFromPath(
+    const string& path, CompilerInfoData::SubprogramInfo* s) {
+  FileId file_id(path);
+  if (!file_id.IsValid()) {
+    return false;
+  }
+  string hash;
+  if (!GomaSha256FromFile(GetRealSubprogramPath(path), &hash)) {
+    return false;
+  }
+  s->set_name(path);
+  s->set_hash(hash);
+  SetFileIdToData(file_id, s->mutable_file_id());
+  return true;
+}
+
+void CompilerInfoBuilder::SetHashRewriteRule(
+    const std::map<std::string, std::string>& rule) {
+  LOG(INFO) << "new hash rewrite rule will be set:"
+            << rule;
+  AUTO_EXCLUSIVE_LOCK(lock, &rwlock_);
+  hash_rewrite_rule_ = rule;
+}
+
+/* static */
+bool CompilerInfoBuilder::RewriteHashUnlocked(
+    const std::map<std::string, std::string>& rule,
+    CompilerInfoData* data) {
+  if (rule.empty()) {
+    return false;
+  }
+
+  bool did_rewrite = false;
+  for (auto& info : *data->mutable_subprograms()) {
+    const auto& found = rule.find(info.hash());
+    if (found != rule.end()) {
+      VLOG(3) << "rewrite hash of subprograms:"
+              << " from=" << info.hash()
+              << " to=" << found->second;
+      info.set_hash(found->second);
+      did_rewrite = true;
+    }
+  }
+  return did_rewrite;
+}
+
+std::string CompilerInfoBuilder::GetCompilerName(const CompilerInfoData& data) {
+  StringPiece base = file::Basename(data.local_compiler_path());
+  if (base != "cc" && base != "c++") {
+    // We can simply use local_compiler_path for judging compiler name
+    // if basename is not "cc" or "c++".
+    // See also b/13107706
+    return CompilerFlags::GetCompilerName(data.local_compiler_path());
+  }
+  if (!CompilerFlags::IsClangCommand(data.real_compiler_path())) {
+    return CompilerFlags::GetCompilerName(data.real_compiler_path());
+  }
+  // clang++ is usually symlink to clang, and real compiler path is
+  // usually be clang.  It does not usually reflect what we expect as a
+  // compiler name.
+  string real_name = CompilerFlags::GetCompilerName(data.real_compiler_path());
+  if (base == "cc") {
+    return real_name;
+  }
+  if (real_name == "clang") {
+    return string("clang++");
+  }
+  LOG(WARNING) << "Cannot detect compiler name:"
+               << " local=" << data.local_compiler_path()
+               << " real=" << data.real_compiler_path();
+  return string();
+}
+
+void CompilerInfoBuilder::Dump(std::ostringstream* ss) {
+  AUTO_SHARED_LOCK(lock, &rwlock_);
+  if (hash_rewrite_rule_.empty())
+    return;
+  *ss << "compiler_info_builder:" << std::endl
+      << "  hash_rewrite_rule:" << std::endl;
+  for (const auto& entry : hash_rewrite_rule_) {
+    *ss << "    " << entry.first << ":" << entry.second << std::endl;
+  }
+  *ss << std::endl;
+}
+
+/* static */
+void CompilerInfo::SubprogramInfo::FromData(
+    const CompilerInfoData::SubprogramInfo& info_data,
+    SubprogramInfo* info) {
+  info->name = info_data.name();
+  info->hash = info_data.hash();
+  GetFileIdFromData(info_data.file_id(), &info->file_id);
+}
+
+/* static */
+CompilerInfo::SubprogramInfo CompilerInfo::SubprogramInfo::FromPath(
+    const string& path) {
+  CompilerInfoData::SubprogramInfo data;
+  CompilerInfoBuilder::SubprogramInfoFromPath(path, &data);
+  CompilerInfo::SubprogramInfo s;
+  FromData(data, &s);
+  return s;
+}
+
+string CompilerInfo::SubprogramInfo::DebugString() const {
+  std::stringstream ss;
+  ss << "name: " << name;
+  ss << ", valid:" << file_id.IsValid();
+  ss << ", hash: " << hash;
+  return ss.str();
+}
+
+string CompilerInfo::DebugString() const {
+  return data_->DebugString();
+}
+
+bool CompilerInfo::IsUpToDate(const string& local_compiler_path) const {
+  FileId cur_local(local_compiler_path);
+  if (cur_local != local_compiler_id_) {
+    LOG(INFO) << "compiler id is not matched:"
+              << " path=" << local_compiler_path
+              << " local_compiler_id=" << local_compiler_id_.DebugString()
+              << " cur_local=" << cur_local.DebugString();
+    return false;
+  }
+  if (local_compiler_path != data_->real_compiler_path()) {
+    // Since |local_compiler_path| != |real_compiler_path|,
+    // We need to check that the real compiler is also the same.
+    FileId cur_real(data_->real_compiler_path());
+    if (cur_real != real_compiler_id_) {
+      LOG(INFO) << "real compiler id is not matched:"
+                << " local_compiler_path=" << local_compiler_path
+                << " real_compiler_path=" << data_->real_compiler_path()
+                << " local_compiler_id=" << local_compiler_id_.DebugString()
+                << " real_compiler_id=" << real_compiler_id_.DebugString()
+                << " cur_real=" << cur_real.DebugString();
+      return false;
+    }
+  }
+
+  for (const auto& subprog : subprograms_) {
+    FileId file_id(subprog.name);
+    if (file_id != subprog.file_id) {
+      LOG(INFO) << "subprogram is not matched:"
+                << " local_compiler_path=" << local_compiler_path
+                << " subprogram=" << subprog.name
+                << " subprogram_file_id=" << subprog.file_id.DebugString()
+                << " file_id=" << file_id.DebugString();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool CompilerInfo::UpdateFileIdIfHashMatch(
+    unordered_map<string, string>* sha256_cache) {
+  // Checks real compiler hash and subprogram hash.
+  // If they are all matched, we update FileId.
+
+  string local_hash;
+  if (!GetHashFromCacheOrFile(abs_local_compiler_path(),
+                              &local_hash,
+                              sha256_cache)) {
+    LOG(WARNING) << "calculating local compiler hash failed: "
+                 << "path=" << local_compiler_path();
+    return false;
+  }
+  if (local_hash != local_compiler_hash()) {
+    LOG(INFO) << "local compiler hash didn't match:"
+              << " path=" << local_compiler_path()
+              << " prev=" << local_compiler_hash()
+              << " current=" << local_hash;
+    return false;
+  }
+
+  string real_hash;
+  if (!GetHashFromCacheOrFile(real_compiler_path(), &real_hash, sha256_cache)) {
+    LOG(WARNING) << "calculating real compiler hash failed: "
+                 << "path=" << real_compiler_path();
+    return false;
+  }
+  if (real_hash != real_compiler_hash()) {
+    LOG(INFO) << "real compiler hash didn't match:"
+              << " path=" << real_compiler_path()
+              << " prev=" << real_compiler_hash()
+              << " current=" << real_hash;
+    return false;
+  }
+
+  for (const auto& subprog : subprograms_) {
+    string subprogram_hash;
+    if (!GetHashFromCacheOrFile(subprog.name, &subprogram_hash, sha256_cache)) {
+      LOG(WARNING) << "calculating subprogram hash failed: "
+                   << "name=" << subprog.name;
+      return false;
+    }
+    if (subprogram_hash != subprog.hash) {
+      LOG(INFO) << "subprogram hash didn't match:"
+                << " path=" << real_compiler_path()
+                << " subprogram=" << subprog.name
+                << " prev=" << subprog.hash
+                << " current=" << subprogram_hash;
+      return false;
+    }
+  }
+
+  if (subprograms().size() !=
+      static_cast<size_t>(data_->subprograms().size())) {
+    LOG(ERROR) << "CompilerInfo subprograms and data subprograms size differs: "
+               << " Inconsistent state: " << data_->real_compiler_path();
+    return false;
+  }
+
+  for (size_t i = 0; i < subprograms_.size(); ++i) {
+    const auto& subprog = subprograms_[i];
+    const auto& data_subprog = data_->subprograms(i);
+    if (subprog.name != data_subprog.name()) {
+      LOG(ERROR) << "CompilerInfo subprogram and its data subprograms"
+                 << " is inconsistent: compiler=" << data_->real_compiler_path()
+                 << " inconsistent subprogram: "
+                 << subprog.name << " != " << data_subprog.name();
+      return false;
+    }
+  }
+
+  // OK. all hash matched. Let's update FileId.
+
+  FileId cur_local(local_compiler_path());
+  if (cur_local != local_compiler_id_) {
+    LOG(INFO) << "local_compiler_id_ is updated:"
+              << " old=" << local_compiler_id_.DebugString()
+              << " new=" << cur_local.DebugString();
+    local_compiler_id_ = cur_local;
+    SetFileIdToData(cur_local, data_->mutable_local_compiler_id());
+  }
+
+  // When |local_compiler_path| == |real_compiler_path|,
+  // local_compiler_id and real_compiler_id should be the same.
+  // Otherwise, we take FileId for real_compiler_path().
+  FileId cur_real(cur_local);
+  if (local_compiler_path() != real_compiler_path()) {
+    cur_real = FileId(real_compiler_path());
+  }
+  if (cur_real != real_compiler_id_) {
+    LOG(INFO) << "real_compiler_id_ is updated:"
+              << " old=" << real_compiler_id_.DebugString()
+              << " new=" << cur_real.DebugString();
+    real_compiler_id_ = cur_real;
+    SetFileIdToData(cur_real, data_->mutable_real_compiler_id());
+  }
+
+  for (size_t i = 0; i < subprograms_.size(); ++i) {
+    auto& subprog = subprograms_[i];
+    auto* data_subprog = data_->mutable_subprograms(i);
+
+    FileId file_id(subprog.name);
+    if (file_id != subprog.file_id) {
+      LOG(INFO) << "subprogram id is updated:"
+                << " name=" << subprog.name
+                << " old=" << subprog.file_id.DebugString()
+                << " new=" << file_id.DebugString();
+      subprog.file_id = file_id;
+      SetFileIdToData(file_id, data_subprog->mutable_file_id());
+    }
+  }
+
+  return true;
+}
+
+bool CompilerInfo::IsSystemInclude(const string& filepath) const {
+  for (const auto& path : cxx_system_include_paths_) {
+    if (HasPrefixDir(filepath, path))
+      return true;
+  }
+  for (const auto& path : system_include_paths_) {
+    if (HasPrefixDir(filepath, path))
+      return true;
+  }
+  for (const auto& path : system_framework_paths_) {
+    if (HasPrefixDir(filepath, path))
+      return true;
+  }
+  return false;
+}
+
+bool CompilerInfo::IsCwdRelative(const string& cwd) const {
+  if (HasPrefixDir(data_->real_compiler_path(), cwd)) {
+    VLOG(1) << "real_compiler_path is cwd relative:"
+            << data_->real_compiler_path()
+            << " @" << cwd;
+    return true;
+  }
+  for (size_t i = 0; i < quote_include_paths_.size(); ++i) {
+    if (!file::IsAbsolutePath(quote_include_paths_[i]) ||
+        HasPrefixDir(quote_include_paths_[i], cwd)) {
+      VLOG(1) << "quote_include_path[" << i << "] is cwd relative:"
+              << quote_include_paths_[i] << " @" << cwd;
+      return true;
+    }
+  }
+  for (size_t i = 0; i < cxx_system_include_paths_.size(); ++i) {
+    if (!file::IsAbsolutePath(cxx_system_include_paths_[i]) ||
+        HasPrefixDir(cxx_system_include_paths_[i], cwd)) {
+      VLOG(1) << "cxx_system_include_path[" << i << "] is cwd relative:"
+              << cxx_system_include_paths_[i] << " @" << cwd;
+      return true;
+    }
+  }
+  for (size_t i = 0; i < system_include_paths_.size(); ++i) {
+    if (!file::IsAbsolutePath(system_include_paths_[i]) ||
+        HasPrefixDir(system_include_paths_[i], cwd)) {
+      VLOG(1) << "system_include_path[" << i << "] is cwd relative:"
+              << system_include_paths_[i] << " @" << cwd;
+      return true;
+    }
+  }
+  for (size_t i = 0; i < system_framework_paths_.size(); ++i) {
+    if (!file::IsAbsolutePath(system_framework_paths_[i]) ||
+        HasPrefixDir(system_framework_paths_[i], cwd)) {
+      VLOG(1) << "system_framework_path[" << i << "] is cwd relative:"
+              << system_framework_paths_[i] << " @" << cwd;
+      return true;
+    }
+  }
+  if (data_->predefined_macros().find(cwd) != string::npos) {
+    VLOG(1) << "predefined macros contains cwd " << cwd;
+    return true;
+  }
+  for (size_t i = 0; i < subprograms_.size(); ++i) {
+    const string& name = subprograms_[i].name;
+    if (HasPrefixDir(name, cwd)) {
+      VLOG(1) << "subprograms[" << i << "] is cwd relative: "
+              << name << " @" << cwd;
+      return true;
+    }
+  }
+  return false;
+}
+
+string CompilerInfo::abs_local_compiler_path() const {
+  return file::JoinPathRespectAbsolute(
+      data_->cwd(), data_->local_compiler_path());
+}
+
+const string& CompilerInfo::request_compiler_hash() const {
+  if (CompilerFlags::IsPNaClClangCommand(data_->local_compiler_path())) {
+    return data_->local_compiler_hash();
+  }
+  return data_->hash();
+}
+
+void CompilerInfo::Init() {
+  CHECK(data_.get());
+  GetFileIdFromData(data_->local_compiler_id(), &local_compiler_id_);
+  GetFileIdFromData(data_->real_compiler_id(), &real_compiler_id_);
+
+  for (const auto& p : data_->quote_include_paths()) {
+    quote_include_paths_.push_back(p);
+  }
+  for (const auto& p : data_->cxx_system_include_paths()) {
+    cxx_system_include_paths_.push_back(p);
+  }
+  for (const auto& p : data_->system_include_paths()) {
+    system_include_paths_.push_back(p);
+  }
+  for (const auto& p : data_->system_framework_paths()) {
+    system_framework_paths_.push_back(p);
+  }
+
+  for (const auto& m : data_->supported_predefined_macros()) {
+    if (!supported_predefined_macros_.insert(make_pair(m, false)).second) {
+      LOG(WARNING) << "duplicated predefined_macro: "
+                   << " real_compiler_path=" << data_->real_compiler_path()
+                   << " macro=" << m;
+    }
+  }
+  for (const auto& m : data_->hidden_predefined_macros()) {
+    if (!supported_predefined_macros_.insert(make_pair(m, true)).second) {
+      LOG(WARNING) << "duplicated predefined_macro: "
+                   << " real_compiler_path=" << data_->real_compiler_path()
+                   << " macro=" << m;
+    }
+  }
+  for (const auto& p : data_->has_feature()) {
+    has_feature_.insert(make_pair(p.key(), p.value()));
+  }
+  for (const auto& p : data_->has_extension()) {
+    has_extension_.insert(make_pair(p.key(), p.value()));
+  }
+  for (const auto& p : data_->has_attribute()) {
+    has_attribute_.insert(make_pair(p.key(), p.value()));
+  }
+  for (const auto& p : data_->has_cpp_attribute()) {
+    has_cpp_attribute_.insert(make_pair(p.key(), p.value()));
+  }
+  for (const auto& p : data_->has_declspec_attribute()) {
+    has_declspec_attribute_.insert(make_pair(p.key(), p.value()));
+  }
+  for (const auto& p : data_->has_builtin()) {
+    has_builtin_.insert(make_pair(p.key(), p.value()));
+  }
+
+  for (const auto& f : data_->additional_flags()) {
+    additional_flags_.push_back(f);
+  }
+
+  for (const auto& data : data_->subprograms()) {
+    SubprogramInfo s;
+    SubprogramInfo::FromData(data, &s);
+    subprograms_.push_back(s);
+  }
+}
+
+time_t CompilerInfo::last_used_at() const {
+  AUTO_SHARED_LOCK(lock, &last_used_at_mu_);
+  return data_->last_used_at();
+}
+
+void CompilerInfo::set_last_used_at(time_t t) {
+  AUTO_EXCLUSIVE_LOCK(lock, &last_used_at_mu_);
+  data_->set_last_used_at(t);
+}
+
+CompilerInfoState::CompilerInfoState(std::unique_ptr<CompilerInfoData> data)
+    : compiler_info_(std::move(data)),
+      refcnt_(0),
+      disabled_(false),
+      used_(0) {
+  LOG(INFO) << "New CompilerInfoState " << this;
+  if (!compiler_info_.found() && !compiler_info_.HasError()) {
+    CompilerInfoBuilder::AddErrorMessage("compiler not found",
+                                         compiler_info_.get());
+  }
+}
+
+CompilerInfoState::~CompilerInfoState() {}
+
+
+void CompilerInfoState::Ref() {
+  AUTOLOCK(lock, &mu_);
+  refcnt_++;
+}
+
+void CompilerInfoState::Deref() {
+  int refcnt;
+  {
+    AUTOLOCK(lock, &mu_);
+    refcnt_--;
+    refcnt = refcnt_;
+  }
+  if (refcnt == 0) {
+    LOG(INFO) << "Delete CompilerInfoState " << this;
+    delete this;
+  }
+}
+
+int CompilerInfoState::refcnt() const {
+  AUTOLOCK(lock, &mu_);
+  return refcnt_;
+}
+
+bool CompilerInfoState::disabled() const {
+  AUTOLOCK(lock, &mu_);
+  return disabled_;
+}
+
+string CompilerInfoState::GetDisabledReason() const {
+  AUTOLOCK(lock, &mu_);
+  return disabled_reason_;
+}
+
+void CompilerInfoState::SetDisabled(bool disabled,
+                                    const string& disabled_reason) {
+  AUTOLOCK(lock, &mu_);
+  LOG(INFO) << "CompilerInfoState " << this << " disabled=" << disabled
+            << " reason=" << disabled_reason;
+  disabled_ = true;
+  disabled_reason_ = disabled_reason;
+}
+
+void CompilerInfoState::Use(const string& local_compiler_path,
+                            const CompilerFlags& flags) {
+  {
+    AUTOLOCK(lock, &mu_);
+    if (used_++ > 0)
+      return;
+  }
+
+  // CompilerInfo::DebugString() could be too large for glog.
+  // glog message size is 30000 by default.
+  // https://github.com/google/glog/blob/bf766fac4f828c81556499d7c16d53cc871d8bd2/src/logging.cc#L335
+  // So, split info log at max 20000.
+  //
+  // TODO: It might be good to introduce a new compact printer for
+  // CompilerInfo. I tried implementing it with
+  // google::protobuf::TextFormat::Printer, but it is hardcoding ':'
+  // (key: value), so I gave it up to make a neat Printer with
+  // TextFormat::Printer.
+  string info = compiler_info_.DebugString();
+  StringPiece piece(info);
+
+  LOG(INFO) << "compiler_info_state=" << this
+            << " path=" << local_compiler_path
+            << ": flags=" << flags.compiler_info_flags()
+            << ": info=" << piece.substr(0, std::min(static_cast<size_t>(20000),
+                                                     piece.size()));
+
+  size_t begin_pos = 20000;
+  while (begin_pos < piece.size()) {
+    size_t len = std::min(static_cast<size_t>(20000),
+                          piece.size() - begin_pos);
+    LOG(INFO) << "info continued:"
+              << " compiler_info_state=" << this
+              << " info(continued)=" << piece.substr(begin_pos, len);
+    begin_pos += len;
+  }
+}
+
+int CompilerInfoState::used() const {
+  AUTOLOCK(lock, &mu_);
+  return used_;
+}
+
+void CompilerInfoState::UpdateLastUsedAt() {
+  compiler_info_.set_last_used_at(time(nullptr));
+}
+
+ScopedCompilerInfoState::ScopedCompilerInfoState(CompilerInfoState* state)
+    : state_(state) {
+  if (state_ != nullptr)
+    state_->Ref();
+}
+
+ScopedCompilerInfoState::~ScopedCompilerInfoState() {
+  if (state_ != nullptr)
+    state_->Deref();
+}
+
+void ScopedCompilerInfoState::reset(CompilerInfoState* state) {
+  if (state != nullptr)
+    state->Ref();
+  if (state_ != nullptr)
+    state_->Deref();
+  state_ = state;
+}
+
+void ScopedCompilerInfoState::swap(ScopedCompilerInfoState* other) {
+  CompilerInfoState* other_state = other->state_;
+  other->state_ = state_;
+  state_ = other_state;
+}
+
+bool ScopedCompilerInfoState::disabled() const {
+  if (state_ == nullptr)
+    return true;
+
+  return state_->disabled();
+}
+
+string ScopedCompilerInfoState::GetDisabledReason() const {
+  if (state_ == nullptr)
+    return string();
+
+  return state_->GetDisabledReason();
+}
+
+}  // namespace devtools_goma
diff --git a/client/compiler_info.h b/client/compiler_info.h
new file mode 100644
index 0000000..86511ed
--- /dev/null
+++ b/client/compiler_info.h
@@ -0,0 +1,561 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_COMPILER_INFO_H_
+#define DEVTOOLS_GOMA_CLIENT_COMPILER_INFO_H_
+
+#include <sys/types.h>
+#include <time.h>
+
+#include <map>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "compiler_specific.h"
+#include "file_id.h"
+#include "google/protobuf/repeated_field.h"
+#include "gtest/gtest_prod.h"
+#include "lockhelper.h"
+#include "predefined_macros.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/compiler_info_data.pb.h"
+MSVC_POP_WARNING()
+#include "string_piece.h"
+#include "unordered.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class CompilerFlags;
+class GCCFlags;
+
+// CompilerInfoBuilder provides methods to construct CompilerInfoData.
+//
+//   CompielrInfoBuilder cib;
+//   std::unique_ptr<CompilerInfoData> data(
+//      cib.FillFromCompilerOutputs(....));
+//   CompilerInfo compiler_info(std::move(data));
+class CompilerInfoBuilder {
+ public:
+  typedef std::pair<const char* const*, size_t> FeatureList;
+
+  CompilerInfoBuilder() {}
+  ~CompilerInfoBuilder() {}
+
+  // Creates new CompilerInfoData* from compiler outputs.
+  // if found is true and error_message in it is empty,
+  // it successfully gets compiler info.
+  // if found is true and error_message in it is not empty,
+  // it finds local compiler but failed to get some information, such as
+  // system include paths.
+  // if found is false if it fails to find local compiler.
+  // Caller should take ownership of returned CompilerInfoData.
+  std::unique_ptr<CompilerInfoData> FillFromCompilerOutputs(
+      const CompilerFlags& flags,
+      const string& local_compiler_path,
+      const std::vector<string>& compiler_info_envs);
+
+  // helper methods.
+  // Parses output of "gcc -x <lang> -v -E /dev/null -o /dev/null", and
+  // extracts |qpaths| (for #include "..."),
+  // |paths| (for #include <...>) and |framework_paths|.
+  static bool SplitGccIncludeOutput(
+      const string& gcc_v_output,
+      std::vector<string>* qpaths,
+      std::vector<string>* paths,
+      std::vector<string>* framework_paths);
+
+  // Parses output of clang feature macros.
+  static bool ParseFeatures(const string& feature_output,
+                            FeatureList object_macros,
+                            FeatureList function_macros,
+                            FeatureList feature,
+                            FeatureList extension,
+                            FeatureList attribute,
+                            FeatureList cpp_attribute,
+                            FeatureList declspec_attribute,
+                            FeatureList builtins,
+                            CompilerInfoData* compiler_info);
+
+  static bool GetPredefinedFeaturesAndExtensions(
+    const string& normal_compiler_path,
+    const string& lang_flag,
+    const std::vector<string>& compiler_info_flags,
+    const std::vector<string>& compiler_info_envs,
+    const string& cwd,
+    CompilerInfoData* compiler_info);
+
+  static bool GetAdditionalFlags(
+      const string& gxx_output, std::vector<string>* flags);
+
+  // Sets the compiler resource directory. asan_blacklist.txt etc. are
+  // located in this directory.
+  // Returns true if succeeded.
+  static bool GetResourceDir(const string& c_display_output,
+                             CompilerInfoData* compiler_info);
+
+  // Returns false if GetExtraSubprograms failed to get subprogram info
+  // while a subprogram exists.
+  static bool GetExtraSubprograms(const string& normal_gcc_path,
+                                  const GCCFlags& flags,
+                                  const std::vector<string>& compiler_info_envs,
+                                  CompilerInfoData* compiler_info);
+
+  // Parses compile flags for subprograms, especially clang plugins.
+  static void ParseSubprogramFlags(const string& normal_gcc_path,
+                                   const GCCFlags& flags,
+                                   std::vector<string>* clang_plugins,
+                                   std::vector<string>* B_options,
+                                   bool* no_integrated_as);
+  // Parse |gcc_output| to get list of subprograms.
+  static void ParseGetSubprogramsOutput(const string& gcc_output,
+                                        std::vector<string>* paths);
+
+  // Returns true on success, and |subprograms| will have full path of
+  // external subprograms or empty vector if not found.
+  // Returns false on failure.
+  static bool GetSubprograms(
+      const string& gcc_path,
+      const string& lang,
+      const std::vector<string>& compiler_info_flags,
+      const std::vector<string>& compiler_info_envs,
+      const string& cwd, bool warn_on_empty,
+      std::vector<string>* subprograms);
+
+  // Returns true if |subprogram_paths| contain a path for as (assembler).
+  static bool HasAsPath(const std::vector<string>& subprogram_paths);
+
+  // Parses "-xc -v -E /dev/null" output and returns real clang path.
+  static string ParseRealClangPath(StringPiece v_out);
+
+  // Get real compiler path.
+  // See: go/ma/resources-for-developers/goma-compiler-selection-mechanism
+  static string GetRealCompilerPath(const string& normal_gcc_path,
+                                    const string& cwd,
+                                    const std::vector<string>& envs);
+  // Get real subprogram path.
+  // See: go/ma/resources-for-developers/goma-compiler-selection-mechanism
+  static string GetRealSubprogramPath(const string& subprogram_path);
+
+  // Parses output of "javac", and extracts |version|.
+  static bool ParseJavacVersion(const string& vc_logo, string* version);
+
+  // Execute javac and get the string output for javac version
+  static bool GetJavacVersion(const string& javac,
+                              const std::vector<string>& compiler_info_envs,
+                              const string& cwd,
+                              string* version);
+
+  // Parses output of "cl.exe", and extracts |version| and |target|.
+  static bool ParseVCVersion(
+      const string& vc_logo, string* version, string* target);
+
+  // Execute VC and get the string output for VC version
+  static bool GetVCVersion(
+    const string& cl_exe_path, const std::vector<string>& env,
+    const string& cwd,
+    string* version, string* target);
+
+  // Parses output of "cl.exe /nologo /Bxvcflags.exe non-exist-file.cpp" (C++)
+  // or "cl.exe /nologo /B1vcflags.exe non-exist-file.c" (C),
+  // and extracts |include_paths| and |predefined macros| in
+  // "#define FOO X\n" format.
+  // |predefined_macros| may be NULL (don't capture predefined macros
+  // in this case).
+  static bool ParseVCOutputString(
+      const string& output,
+      std::vector<string>* include_paths,
+      string* predefined_macros);
+
+  // Parses output of clang / clang-cl -### result to get
+  // |version| and |target|.
+  static bool ParseClangVersionTarget(const string& sharp_output,
+                                      string* version,
+                                      string* target);
+
+  // Executes clang-tidy and gets the string output for clang-tidy version.
+  static bool GetClangTidyVersionTarget(
+      const string& clang_tidy_path,
+      const std::vector<string>& compiler_info_envs,
+      const string& cwd,
+      string* version,
+      string* target);
+  static bool ParseClangTidyVersionTarget(const string& output,
+                                          string* version,
+                                          string* target);
+
+  static bool SetBasicCompilerInfo(
+      const string& local_compiler_path,
+      const std::vector<string>& compiler_info_flags,
+      const std::vector<string>& compiler_info_envs,
+      const string& cwd,
+      const string& lang_flag,
+      bool is_cplusplus,
+      bool is_clang,
+      bool is_clang_tidy,
+      bool has_nostdinc,
+      CompilerInfoData* compiler_info);
+
+  static bool GetSystemIncludePaths(
+      const string& normal_compiler_path,
+      const std::vector<string>& compiler_info_flags,
+      const std::vector<string>& compiler_info_envs,
+      const string& cxx_display_output,
+      const string& c_display_output,
+      bool is_cplusplus,
+      bool has_nostdinc,
+      CompilerInfoData* compiler_info);
+
+  static bool GetPredefinedMacros(
+      const string& normal_compiler_path,
+      const std::vector<string>& compiler_info_flags,
+      const std::vector<string>& compiler_info_envs,
+      const string& cwd,
+      const string& lang_flag,
+      CompilerInfoData* compiler_info);
+
+  static bool GetVCDefaultValues(const string& cl_exe_path,
+                                 const string& vcflags_path,
+                                 const std::vector<string>& compiler_info_flags,
+                                 const std::vector<string>& compiler_info_envs,
+                                 const string& cwd,
+                                 const string& lang,
+                                 CompilerInfoData* compiler_info);
+
+  // Set up system include_paths to be sent to goma backend via ExecReq.
+  // To make the compile deterministic, we sometimes need to use relative
+  // path system include paths, and UpdateIncludePaths automatically
+  // converts the paths.
+  static void UpdateIncludePaths(
+      const std::vector<string>& paths,
+      google::protobuf::RepeatedPtrField<string>* include_paths);
+
+  // Adds error message to CompilerInfo. When |failed_at| is not 0,
+  // it's also updated.
+  static void AddErrorMessage(const std::string& message,
+                              CompilerInfoData* compiler_info);
+  // Overrides the current error message.
+  // if |message| is not empty, |failed_at| must be non-zero positive.
+  static void OverrideError(const std::string& message, time_t faile_at,
+                            CompilerInfoData* compiler_info);
+
+  static bool SubprogramInfoFromPath(
+      const string& path, CompilerInfoData::SubprogramInfo* s);
+
+  void SetHashRewriteRule(const std::map<std::string, std::string>& rule);
+
+  static bool RewriteHashUnlocked(
+      const std::map<std::string, std::string>& rule,
+      CompilerInfoData* data);
+
+  // Returns compiler name to be used in ExecReq's CompilerSpec.
+  // If it fails to identify the compiler name, it returns empty string.
+  static string GetCompilerName(const CompilerInfoData& data);
+
+  void Dump(std::ostringstream* ss);
+
+ private:
+
+  ReadWriteLock rwlock_;
+  std::map<std::string, std::string> hash_rewrite_rule_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompilerInfoBuilder);
+};
+
+// Represent how a compiler is configured.
+// Used as const object.
+class CompilerInfo {
+ public:
+  struct SubprogramInfo {
+    SubprogramInfo() {}
+    static void FromData(const CompilerInfoData::SubprogramInfo& info_data,
+                         SubprogramInfo* info);
+    static SubprogramInfo FromPath(const string& path);
+    bool IsValid() const {
+      return file_id.IsValid() && !hash.empty() && !name.empty();
+    }
+    bool operator==(const SubprogramInfo& rhs) const {
+      return name == rhs.name &&
+          hash == rhs.hash &&
+          file_id == rhs.file_id;
+    }
+    string DebugString() const;
+
+    string name;
+    string hash;
+    FileId file_id;
+  };
+
+  // Takes ownership of data.
+  explicit CompilerInfo(std::unique_ptr<CompilerInfoData> data)
+      : data_(std::move(data)) {
+    Init();
+  }
+  ~CompilerInfo() {}
+
+  string DebugString() const;
+
+  // Returns true if |local_compiler_path| is up to date.
+  // i.e. FileId of |local_compiler_path| matches |local_compiler_id|.
+  bool IsUpToDate(const string& local_compiler_path) const;
+
+  // Updates FileId to the current FileId when hash is matched.
+  // Returns false if hash doesn't match.
+  bool UpdateFileIdIfHashMatch(unordered_map<string, string>* sha256_cache);
+
+  // Returns true if CompilerInfo has some error.
+  bool HasError() const { return data_->has_error_message(); }
+
+  bool IsSystemInclude(const string& filepath) const;
+
+  bool IsCwdRelative(const string& cwd) const;
+
+  const FileId& local_compiler_id() const { return local_compiler_id_; }
+  const string& local_compiler_path() const {
+    return data_->local_compiler_path();
+  }
+  string abs_local_compiler_path() const;
+  const string& local_compiler_hash() const {
+    return data_->local_compiler_hash();
+  }
+
+  const FileId& real_compiler_id() const { return real_compiler_id_; }
+  const string& real_compiler_path() const {
+    return data_->real_compiler_path();
+  }
+  const string& real_compiler_hash() const {
+    return data_->hash();
+  }
+
+  // compiler hash to identify the compiler in backend.
+  const string& request_compiler_hash() const;
+
+  // include paths could be relative path from cwd.
+  // Also, system include paths could be relative path from toolchain root
+  // (Windows NaCl toolchain only).
+  // You should file::JoinPathRespectAbsolute with cwd before you use it in
+  // include processor.
+
+  // quote dir is valid only if it exists. note quote dir may be cwd relative
+  // so it depends on cwd if dir is valid or not.
+  const std::vector<string>& quote_include_paths() const {
+    return quote_include_paths_;
+  }
+  const std::vector<string>& cxx_system_include_paths() const {
+    return cxx_system_include_paths_;
+  }
+  const std::vector<string>& system_include_paths() const {
+    return system_include_paths_;
+  }
+  const std::vector<string>& system_framework_paths() const {
+    return system_framework_paths_;
+  }
+  const string& toolchain_root() const {
+    return data_->toolchain_root();
+  }
+  const string& predefined_macros() const {
+    return data_->predefined_macros();
+  }
+  const string& name() const { return data_->name(); }
+  bool HasName() const { return data_->has_name(); }
+
+  const string& version() const { return data_->version(); }
+  const string& target() const { return data_->target(); }
+  const string& lang() const { return data_->lang(); }
+  const string& error_message() const { return data_->error_message(); }
+
+  const unordered_map<string, bool>& supported_predefined_macros() const {
+    return supported_predefined_macros_;
+  }
+  const unordered_map<string, int>& has_feature() const { return has_feature_; }
+  const unordered_map<string, int>& has_extension() const {
+    return has_extension_;
+  }
+  const unordered_map<string, int>& has_attribute() const {
+    return has_attribute_;
+  }
+  const unordered_map<string, int>& has_cpp_attribute() const {
+    return has_cpp_attribute_;
+  }
+  const unordered_map<string, int>& has_declspec_attribute() const {
+    return has_declspec_attribute_;
+  }
+  const unordered_map<string, int>& has_builtin() const {
+    return has_builtin_;
+  }
+  const std::vector<string>& additional_flags() const {
+    return additional_flags_;
+  }
+  bool HasAdditionalFlags() const { return !additional_flags_.empty(); }
+  const std::vector<SubprogramInfo>& subprograms() const {
+    return subprograms_;
+  }
+
+  time_t failed_at() const { return data_->failed_at(); }
+
+  time_t last_used_at() const;
+  void set_last_used_at(time_t t);
+
+  bool found() const { return data_->found(); }
+
+  bool IsSameCompiler(const CompilerInfo& ci) const {
+    return data_->target() == ci.data_->target()
+        && data_->version() == ci.data_->version()
+        && data_->lang() == ci.data_->lang()
+        && data_->hash() == ci.data_->hash()
+        && data_->real_compiler_path() == ci.data_->real_compiler_path();
+  }
+
+  const CompilerInfoData& data() const { return *data_; }
+
+  CompilerInfoData* get() { return data_.get(); }
+
+ private:
+  friend class CompilerInfoCacheTest;
+  void Init();
+
+  std::unique_ptr<CompilerInfoData> data_;
+
+  FileId local_compiler_id_;
+  // Real compiler's FileId if real_compiler_path != local_compiler_path.
+  // Otherwise, real_compiler_id is the same as local_compiler_id.
+  FileId real_compiler_id_;
+
+  std::vector<string> quote_include_paths_;
+  std::vector<string> cxx_system_include_paths_;
+  std::vector<string> system_include_paths_;
+  std::vector<string> system_framework_paths_;
+
+  // <macro name, hidden>.
+  // If it is hidden macro like __has_include__ in GCC 5, hidden is set.
+  unordered_map<string, bool> supported_predefined_macros_;
+  unordered_map<string, int> has_feature_;
+  unordered_map<string, int> has_extension_;
+  unordered_map<string, int> has_attribute_;
+  unordered_map<string, int> has_cpp_attribute_;
+  unordered_map<string, int> has_declspec_attribute_;
+  unordered_map<string, int> has_builtin_;
+
+  std::vector<string> additional_flags_;
+
+  // A list of subprograms specified by -B flag.
+  std::vector<SubprogramInfo> subprograms_;
+
+  mutable ReadWriteLock last_used_at_mu_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompilerInfo);
+};
+
+class ScopedCompilerInfoState;
+
+// CompilerInfoState contains CompilerInfo (created from local system) and
+// disabled status (updated by response from remote).
+// ref counted.
+class CompilerInfoState {
+ public:
+  // Constructor creates with refcnt_==0.
+  // Before sharing it, caller should call Ref.
+  // Takes ownership of data.
+  explicit CompilerInfoState(std::unique_ptr<CompilerInfoData> data);
+
+  const CompilerInfo& info() const { return compiler_info_; }
+
+  // refcnt returns the current reference count.
+  // potentially race. when you get the value, actual refcnt may be updated.
+  // use be careful.
+  int refcnt() const;
+
+  // Returns if it has been disabled (e.g. compiler not found in backend)
+  // Potential race (i.e. even if caller gets disabled()==false, it will
+  // become true while checking input files or calling rpc), but it might
+  // be acceptable.
+  bool disabled() const;
+  string GetDisabledReason() const;
+  void SetDisabled(bool disabled, const string& disabled_reason);
+
+  void Use(const string& local_compiler_path,
+           const CompilerFlags& flags);
+  int used() const;
+
+  void UpdateLastUsedAt();
+
+ private:
+  friend class ScopedCompilerInfoState;
+  friend class CompilerInfoCache;
+  friend class CompilerInfoCacheTest;
+  ~CompilerInfoState();
+
+  void Ref();
+  void Deref();
+
+  CompilerInfo compiler_info_;
+
+  mutable Lock mu_;  // protects refcnt_, disabled_, disabled_reason_.
+  int refcnt_;
+  // When server side does not have the information about this compiler,
+  // it's disabled.
+  bool disabled_;
+  string disabled_reason_;
+
+  int used_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompilerInfoState);
+};
+
+// ScopedCompilerInfoState manages lifecycle of CompilerInfoState.
+// thread-unsafe.
+//
+// Initializes
+//   ScopedCompilerInfoState cis;
+//   cis.FillFromCompilerOutputs(...);
+//
+// share compiler_info_state with cis:
+//   ScopedCompilerInfoState state;
+//   state.reset(cis.get());
+//
+//   ScopedCompilerInfoState state2(cis);
+//
+// transfer compiler_info_state from cis:
+//   ScopedCompilerInfoState state(std::move(cis));
+class ScopedCompilerInfoState {
+ public:
+  ScopedCompilerInfoState() : state_(nullptr) {}
+  explicit ScopedCompilerInfoState(CompilerInfoState* state);
+  ~ScopedCompilerInfoState();
+
+  ScopedCompilerInfoState(ScopedCompilerInfoState&& state) noexcept
+      : state_(std::move(state.state_)) {
+    state.state_ = nullptr;
+  }
+
+  ScopedCompilerInfoState& operator=(ScopedCompilerInfoState&& other) {
+    std::swap(state_, other.state_);
+    return *this;
+  }
+
+  CompilerInfoState* get() const { return state_; }
+
+  // reset derefs current state and refs given state.
+  void reset(CompilerInfoState* state);
+
+  // swap swaps state with other.
+  // useful to transfer state from other without modifying refcnt.
+  void swap(ScopedCompilerInfoState* other);
+
+  bool disabled() const;
+  string GetDisabledReason() const;
+
+ private:
+  CompilerInfoState* state_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedCompilerInfoState);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_COMPILER_INFO_H_
diff --git a/client/compiler_info_cache.cc b/client/compiler_info_cache.cc
new file mode 100644
index 0000000..e7f1e06
--- /dev/null
+++ b/client/compiler_info_cache.cc
@@ -0,0 +1,571 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <memory>
+
+#include "compiler_info_cache.h"
+
+#include "autolock_timer.h"
+#include "compiler_flags.h"
+#include "compiler_proxy_info.h"
+#include "file.h"
+#include "glog/logging.h"
+#include "goma_hash.h"
+#include "join.h"
+#include "path.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/compiler_info_data.pb.h"
+MSVC_POP_WARNING()
+
+using std::string;
+
+namespace devtools_goma {
+
+const int kNegativeCacheDurationSec = 600;  // 10 minutes.
+const int kUpdateLastUsedAtDurationSec = 600;  // 10 minutes.
+
+CompilerInfoCache* CompilerInfoCache::instance_;
+
+string CompilerInfoCache::Key::ToString(bool cwd_relative) const {
+  if (cwd_relative) {
+    return local_compiler_path + " " + base + cwd;
+  }
+  // if |local_compiler_path| is not absolute path,
+  // CompilerInfo may not be independent of |cwd|.
+  // e.g. with -no-canonical-prefixes
+  DCHECK(file::IsAbsolutePath(local_compiler_path));
+  return local_compiler_path + " " + base;
+}
+
+string CompilerInfoCache::Key::abs_local_compiler_path() const {
+  return file::JoinPathRespectAbsolute(cwd, local_compiler_path);
+}
+
+/* static */
+void CompilerInfoCache::Init(const string& cache_dir,
+                             const string& cache_filename,
+                             int cache_holding_time_sec) {
+  CHECK(instance_ == nullptr);
+  if (cache_filename == "") {
+    instance_ = new CompilerInfoCache("", cache_holding_time_sec);
+    return;
+  }
+  instance_ = new CompilerInfoCache(
+      file::JoinPathRespectAbsolute(cache_dir, cache_filename),
+      cache_holding_time_sec);
+}
+
+void CompilerInfoCache::Quit() {
+  delete instance_;
+  instance_ = nullptr;
+}
+
+CompilerInfoCache::CompilerInfoCache(const string& cache_filename,
+                                     int cache_holding_time_sec)
+    : cache_file_(cache_filename),
+      cache_holding_time_sec_(cache_holding_time_sec),
+      validator_(new CompilerInfoCache::CompilerInfoValidator),
+      num_stores_(0),
+      num_store_dups_(0),
+      num_miss_(0),
+      num_fail_(0),
+      loaded_size_(0) {
+  if (cache_file_.Enabled()) {
+    Load();
+  } else {
+    LOG(INFO) << "compiler_info_cache: no cache file";
+  }
+}
+
+CompilerInfoCache::~CompilerInfoCache() {
+  if (cache_file_.Enabled()) {
+    Save();
+  }
+  Clear();
+}
+
+void CompilerInfoCache::Clear() {
+  for (auto& it : keys_by_hash_) {
+    delete it.second;
+  }
+  keys_by_hash_.clear();
+  for (auto& it : compiler_info_) {
+    it.second->Deref();
+  }
+  compiler_info_.clear();
+}
+
+/* static */
+CompilerInfoCache::Key CompilerInfoCache::CreateKey(
+    const CompilerFlags& flags,
+    const std::string& local_compiler_path,
+    const std::vector<std::string>& key_envs) {
+  const std::vector<string>& compiler_info_flags = flags.compiler_info_flags();
+  std::vector<string> compiler_info_keys(compiler_info_flags);
+  copy(key_envs.begin(), key_envs.end(), back_inserter(compiler_info_keys));
+  string compiler_info_keys_str;
+  JoinStrings(compiler_info_keys, " ", &compiler_info_keys_str);
+
+  Key key;
+  key.base = compiler_info_keys_str + " lang:" + flags.lang() + " @";
+  key.cwd = flags.cwd();
+  key.local_compiler_path = local_compiler_path;
+  return key;
+}
+
+CompilerInfoState* CompilerInfoCache::Lookup(const Key& key) {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  CompilerInfoState* state = nullptr;
+  if (file::IsAbsolutePath(key.local_compiler_path)) {
+    state = LookupUnlocked(key.ToString(!Key::kCwdRelative),
+                           key.local_compiler_path);
+  }
+  if (state == nullptr) {
+    state = LookupUnlocked(key.ToString(Key::kCwdRelative),
+                           key.abs_local_compiler_path());
+  }
+
+  // Update last used timestamp of |state| having old timestamp.
+  if (state != nullptr &&
+      time(nullptr) - state->info().last_used_at() >
+      kUpdateLastUsedAtDurationSec) {
+    state->UpdateLastUsedAt();
+  }
+
+  return state;
+}
+
+CompilerInfoState* CompilerInfoCache::LookupUnlocked(
+    const string& compiler_info_key,
+    const string& abs_local_compiler_path) {
+  auto it = compiler_info_.find(compiler_info_key);
+  if (it == compiler_info_.end()) {
+    return nullptr;
+  }
+  auto info = it->second;
+  if (validator_->Validate(info->info(), abs_local_compiler_path)) {
+    VLOG(1) << "Cache hit for compiler-info with key: "
+            << compiler_info_key;
+
+    if (!info->info().HasError()) {
+      return info;
+    }
+
+    time_t now = time(nullptr);
+    if (now < info->info().failed_at() + kNegativeCacheDurationSec) {
+      return info;
+    }
+
+    VLOG(1) << "Negative cache is expired: " << compiler_info_key;
+  }
+
+  LOG(INFO) << "Cache hit, but obsolete compiler-info for key: "
+            << compiler_info_key;
+  return nullptr;
+}
+
+CompilerInfoState* CompilerInfoCache::Store(
+    const Key& key, std::unique_ptr<CompilerInfoData> data) {
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+  DCHECK(data != nullptr);
+
+  ScopedCompilerInfoState state;
+
+  bool dup = false;
+  string dup_compiler_info_key;
+  string hash = HashKey(*data);
+  {
+    auto found = keys_by_hash_.find(hash);
+    if (found != keys_by_hash_.end()) {
+      unordered_set<string>* keys = found->second;
+      if (!keys->empty()) {
+        const string& compiler_info_key = *keys->begin();
+        state.reset(LookupUnlocked(
+            compiler_info_key, key.abs_local_compiler_path()));
+        if (state.get() != nullptr) {
+          LOG(INFO) << "hash=" << hash << " share with " << compiler_info_key;
+          dup = true;
+        }
+      }
+    }
+  }
+
+  if (state.get() == nullptr) {
+    state.reset(new CompilerInfoState(std::move(data)));
+  }
+  state.get()->Ref();  // in cache.
+
+  if (!state.get()->info().found()) {
+    ++num_miss_;
+    DCHECK(state.get()->info().HasError());
+    DCHECK_NE(state.get()->info().failed_at(), 0);
+  } else if (state.get()->info().HasError()) {
+    ++num_fail_;
+    DCHECK_NE(state.get()->info().failed_at(), 0);
+  } else if (dup) {
+    ++num_store_dups_;
+    DCHECK_EQ(state.get()->info().failed_at(), 0);
+  } else {
+    ++num_stores_;
+    DCHECK_EQ(state.get()->info().failed_at(), 0);
+  }
+
+  string old_hash;
+  const string compiler_info_key =
+      key.ToString(!file::IsAbsolutePath(key.local_compiler_path) ||
+                   state.get()->info().IsCwdRelative(key.cwd));
+  {
+    auto p = compiler_info_.insert(
+        std::make_pair(compiler_info_key, state.get()));
+    if (!p.second) {
+      CompilerInfoState* old_state = p.first->second;
+      old_hash = HashKey(old_state->info().data());
+      old_state->Deref();
+      p.first->second = state.get();
+    }
+  }
+  {
+    unordered_set<string>* keys = nullptr;
+    auto p = keys_by_hash_.insert(std::make_pair(hash, keys));
+    if (p.second) {
+      p.first->second = new unordered_set<string>;
+    }
+    p.first->second->insert(compiler_info_key);
+    LOG(INFO) << "hash=" << hash << " key=" << compiler_info_key;
+  }
+  if (old_hash != "") {
+    auto p = keys_by_hash_.find(old_hash);
+    if (p != keys_by_hash_.end()) {
+      LOG(INFO) << "delete hash=" << hash << " key=" << compiler_info_key;
+      p->second->erase(compiler_info_key);
+      if (p->second->empty()) {
+        LOG(INFO) << "delete hash=" << hash;
+        delete p->second;
+        keys_by_hash_.erase(p);
+      }
+    }
+  }
+  if (dup) {
+    DCHECK_GT(state.get()->refcnt(), 2);
+  } else {
+    DCHECK_EQ(state.get()->refcnt(), 2);
+  }
+  LOG(INFO) << "Update state=" << state.get()
+            << " for key=" << compiler_info_key
+            << " hash=" << hash;
+
+  // Check if the same local compiler was already disabled.
+  for (const auto& info : compiler_info_) {
+    CompilerInfoState* cis = info.second;
+    if (!cis->disabled())
+      continue;
+    if (state.get()->info().IsSameCompiler(cis->info())) {
+      state.get()->SetDisabled(true, "the same compiler is already disabled");
+      LOG(INFO) << "Disabled state=" << state.get();
+      break;
+    }
+  }
+  // CompilerInfoState is referenced in cache, so it won't be destroyed
+  // when state is destroyed.
+  return state.get();
+}
+
+bool CompilerInfoCache::Disable(CompilerInfoState* compiler_info_state,
+                                const std::string& disabled_reason) {
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+
+  LOG(INFO) << "Disable state=" << compiler_info_state;
+  bool disabled = false;
+  if (!compiler_info_state->disabled()) {
+    compiler_info_state->SetDisabled(true, disabled_reason);
+    LOG(INFO) << "Disabled state=" << compiler_info_state;
+    disabled = true;
+  }
+
+  // Also mark other CompilerInfo disabled if it is the same
+  // local compiler (but it would use different compiler_info_flags).
+  for (auto& info : compiler_info_) {
+    CompilerInfoState* cis = info.second;
+    if (cis->disabled())
+      continue;
+    if (compiler_info_state->info().IsSameCompiler(cis->info())) {
+      if (!cis->disabled()) {
+        cis->SetDisabled(true, disabled_reason);
+        LOG(INFO) << "Disabled state=" << cis;
+      }
+    }
+  }
+
+  return disabled;
+}
+
+void CompilerInfoCache::Dump(std::ostringstream* ss) {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  (*ss) << "compiler info:" << compiler_info_.size()
+        << " info_hashes=" << keys_by_hash_.size() << "\n";
+
+  (*ss) << "\n[keys by hash]\n";
+  for (const auto& it : keys_by_hash_) {
+    (*ss) << "hash: " << it.first << "\n";
+    for (const auto& k : *it.second) {
+      (*ss) << " key:" << k << "\n";
+    }
+    (*ss) << "\n";
+  }
+  (*ss) << "\n";
+
+  (*ss) << "\n[compiler info]\n\n";
+  for (const auto& info : compiler_info_) {
+    (*ss) << "key: " << info.first;
+    (*ss) << "\n";
+    if (info.second->disabled()) {
+      (*ss) << "disabled ";
+    }
+    (*ss) << "state=" << info.second;
+    (*ss) << " cnt=" << info.second->refcnt();
+    (*ss) << " used=" << info.second->used();
+    (*ss) << "\n";
+    (*ss) << info.second->info().DebugString() << "\n";
+  }
+}
+
+// Dump compiler itself information (not CompilerInfo).
+// For each one compiler, only one entry is dumped.
+void CompilerInfoCache::DumpCompilersJSON(Json::Value* json) {
+  AUTO_SHARED_LOCK(lock, &mu_);
+
+  // Dumping whole CompilerInfoData could be too large, and
+  // it is not compiler itself information but CompilerInfo.
+  // So, we extract a few fields from CompilerInfoData.
+
+  Json::Value arr(Json::arrayValue);
+
+  unordered_set<std::string> used;
+  for (const auto& info : compiler_info_) {
+    const CompilerInfoData& data = info.second->info().data();
+
+    // Check local_compiler_path so that the same compiler does not appear
+    // twice.
+    if (used.count(data.local_compiler_path()) > 0) {
+      continue;
+    }
+    used.insert(data.local_compiler_path());
+
+    Json::Value value;
+    value["name"] = data.name();
+    value["version"] = data.version();
+    value["target"] = data.target();
+
+    value["local_compiler_path"] = data.local_compiler_path();
+    value["local_compiler_hash"] = data.local_compiler_hash();
+
+    value["real_compiler_path"] = data.real_compiler_path();
+    value["real_compiler_hash"] = data.hash();  // hash() is real compiler hash.
+
+    arr.append(std::move(value));
+  }
+
+  (*json)["compilers"] = std::move(arr);
+}
+
+bool CompilerInfoCache::HasCompilerMismatch() const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  for (const auto& info : compiler_info_) {
+    if (info.second->disabled())
+      return true;
+  }
+  return false;
+}
+
+int CompilerInfoCache::NumStores() const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  return num_stores_;
+}
+
+int CompilerInfoCache::NumStoreDups() const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  return num_store_dups_;
+}
+
+int CompilerInfoCache::NumMiss() const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  return num_miss_;
+}
+
+int CompilerInfoCache::NumFail() const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  return num_fail_;
+}
+
+int CompilerInfoCache::LoadedSize() const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  return loaded_size_;
+}
+
+void CompilerInfoCache::SetValidator(CompilerInfoValidator* validator) {
+  CHECK(validator);
+  validator_.reset(validator);
+}
+
+bool CompilerInfoCache::CompilerInfoValidator::Validate(
+    const CompilerInfo& compiler_info,
+    const string& local_compiler_path) {
+  return compiler_info.IsUpToDate(local_compiler_path);
+}
+
+/* static */
+string CompilerInfoCache::HashKey(const CompilerInfoData& data) {
+  string serialized;
+  data.SerializeToString(&serialized);
+  string hash;
+  ComputeDataHashKey(serialized, &hash);
+  return hash;
+}
+
+bool CompilerInfoCache::Load() {
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+
+  LOG(INFO) << "loading from " << cache_file_.filename();
+
+  CompilerInfoDataTable table;
+  if (!cache_file_.Load(&table)) {
+    LOG(ERROR) << "failed to load cache file " << cache_file_.filename();
+    return false;
+  }
+
+  Unmarshal(table);
+  if (table.built_revision() != kBuiltRevisionString) {
+    LOG(WARNING) << "loaded from " << cache_file_.filename()
+                 << " mismatch built_revision: got=" << table.built_revision()
+                 << " want=" << kBuiltRevisionString;
+    Clear();
+    return false;
+  }
+
+  loaded_size_ = table.ByteSize();
+
+  LOG(INFO) << "loaded from " << cache_file_.filename()
+            << " loaded size " << loaded_size_;
+
+  UpdateOlderCompilerInfo();
+
+  return true;
+}
+
+void CompilerInfoCache::UpdateOlderCompilerInfo() {
+  // Check CompilerInfo validity. Obsolete CompilerInfo will be removed.
+  // Since calculating sha256 is slow, we need cache. Otherwise, we will
+  // need more than 2 seconds to check.
+  unordered_map<string, string> sha256_cache;
+  std::vector<string> keys_to_remove;
+  time_t now = time(nullptr);
+
+  for (const auto& entry : compiler_info_) {
+    const std::string& key = entry.first;
+    CompilerInfoState* state = entry.second;
+
+    const std::string& abs_local_compiler_path =
+        state->compiler_info_.abs_local_compiler_path();
+
+    // if the cache is not used recently, we do not reuse it.
+    time_t time_diff = now - state->info().last_used_at();
+    if (time_diff > cache_holding_time_sec_) {
+      LOG(INFO) << "evict old cache: " << abs_local_compiler_path
+                << " last used at: "
+                << time_diff / (60 * 60 * 24)
+                << " days ago";
+      keys_to_remove.push_back(key);
+      continue;
+    }
+
+    if (validator_->Validate(state->info(), abs_local_compiler_path)) {
+      LOG(INFO) << "valid compiler: " << abs_local_compiler_path;
+      continue;
+    }
+
+    if (state->compiler_info_.UpdateFileIdIfHashMatch(&sha256_cache)) {
+      LOG(INFO) << "compiler fileid didn't match, but hash matched: "
+                << abs_local_compiler_path;
+      continue;
+    }
+
+    LOG(INFO) << "compiler outdated: " << abs_local_compiler_path;
+    keys_to_remove.push_back(key);
+  }
+
+  for (const auto& key : keys_to_remove) {
+    LOG(INFO) << "Removing outdated compiler: " << key;
+    auto it = compiler_info_.find(key);
+    if (it != compiler_info_.end()) {
+      it->second->Deref();
+      compiler_info_.erase(it);
+    }
+  }
+}
+
+bool CompilerInfoCache::Unmarshal(const CompilerInfoDataTable& table) {
+  for (const auto& it : table.compiler_info_data()) {
+    unordered_set<string>* keys = new unordered_set<string>;
+    for (const auto& key : it.keys()) {
+      keys->insert(key);
+    }
+    const CompilerInfoData& data = it.data();
+    std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+    *cid = data;
+    const string& hash = HashKey(*cid);
+    ScopedCompilerInfoState state(new CompilerInfoState(std::move(cid)));
+    for (const auto& key : *keys) {
+      compiler_info_.insert(std::make_pair(key, state.get()));
+      state.get()->Ref();
+    }
+    keys_by_hash_.insert(std::make_pair(hash, keys));
+  }
+  // TODO: can be void?
+  return true;
+}
+
+bool CompilerInfoCache::Save() {
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+
+  LOG(INFO) << "saving to " << cache_file_.filename();
+
+  CompilerInfoDataTable table;
+  if (!Marshal(&table)) {
+    return false;
+  }
+
+  if (!cache_file_.Save(table)) {
+    LOG(ERROR) << "failed to save cache file " << cache_file_.filename();
+    return false;
+  }
+  LOG(INFO) << "saved to " << cache_file_.filename();
+  return true;
+}
+
+bool CompilerInfoCache::Marshal(CompilerInfoDataTable* table) {
+  unordered_map<string, CompilerInfoDataTable::Entry*> by_hash;
+  for (const auto& it : compiler_info_) {
+    const string& info_key = it.first;
+    CompilerInfoState* state = it.second;
+    if (state->disabled()) {
+      continue;
+    }
+    const CompilerInfoData& data = state->info().data();
+    string hash = HashKey(data);
+    CompilerInfoDataTable::Entry* entry = nullptr;
+    auto p = by_hash.insert(std::make_pair(hash, entry));
+    if (p.second) {
+      p.first->second = table->add_compiler_info_data();
+      p.first->second->mutable_data()->CopyFrom(data);
+    }
+    entry = p.first->second;
+    entry->add_keys(info_key);
+  }
+  table->set_built_revision(kBuiltRevisionString);
+  // TODO: can be void?
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/compiler_info_cache.h b/client/compiler_info_cache.h
new file mode 100644
index 0000000..0fd124d
--- /dev/null
+++ b/client/compiler_info_cache.h
@@ -0,0 +1,154 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_COMPILER_INFO_CACHE_H_
+#define DEVTOOLS_GOMA_CLIENT_COMPILER_INFO_CACHE_H_
+
+#include <ctime>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "cache_file.h"
+#include "compiler_info.h"
+#include "json/json.h"
+#include "lockhelper.h"
+#include "unordered.h"
+
+namespace devtools_goma {
+
+class CompilerFlags;
+class CompilerInfo;
+class CompilerInfoState;
+class CompilerInfoDataTable;
+
+// CompilerInfoCache caches CompilerInfo.
+// Information about a particular compiler found in 'path', with
+// extra '-mxx' information.
+// This class is thread-safe.
+class CompilerInfoCache {
+ public:
+  struct Key {
+    static const bool kCwdRelative = true;
+    std::string base;
+    std::string cwd;
+    std::string local_compiler_path;
+
+    std::string ToString(bool cwd_relative) const;
+    std::string abs_local_compiler_path() const;
+  };
+
+  // CompilerInfoValidator just calls IsValid() of CompilerInfo.
+  // You can set your own validator to test CompilerInfoCache.
+  class CompilerInfoValidator {
+  public:
+    virtual ~CompilerInfoValidator() {}
+    // Returns true if compiler_info cache is valid.
+    virtual bool Validate(const CompilerInfo& compiler_info,
+                          const std::string& local_compiler_path);
+  };
+
+  ~CompilerInfoCache();
+
+  // Initializes the CompilerInfoCache.
+  // when cache_filename is empty, this won't load cached data.
+  // otherwise, it will try to load cached data from
+  // JoinPathRespectAbsolute(cache_dir, cache_filename).
+  static void Init(const string& cache_dir, const string& cache_filename,
+                   int cache_holding_time_sec);
+  static CompilerInfoCache* instance() { return instance_; }
+
+  // Saves CompilerInfoCache into cache file.
+  static void Quit();
+
+  static Key CreateKey(const CompilerFlags& flags,
+                       const std::string& local_compiler_path,
+                       const std::vector<std::string>& key_envs);
+
+  // Lookup just checks cached compiler_info.
+  // Returns CompilerInfoState in cache.
+  // It would be better to use ScopedCompilerInfoState to manage the
+  // returned pointer.
+  //    ScopedCompilerInfoState cis(cache->Lookup(...));
+  //
+  // Note that found compiler_info may not be valid.
+  // Returns nullptr if it missed in cache or found obsoleted.
+  CompilerInfoState* Lookup(const Key& key);
+
+  // Store stores compiler_info in cache and returns compiler_info_state.
+  // compiler_info may be disabled if the same local compiler was already
+  // disabled.
+  // Takes ownership of data.
+  CompilerInfoState* Store(const Key& key,
+                           std::unique_ptr<CompilerInfoData> data);
+
+  // Disable compiler_info_state and other compiler_info_states with
+  // the same local compiler.
+  bool Disable(CompilerInfoState* compiler_info_state,
+               const std::string& disabled_reason);
+
+  void Dump(std::ostringstream* ss);
+  void DumpCompilersJSON(Json::Value* json);
+
+  bool HasCompilerMismatch() const;
+
+  int NumStores() const;
+  int NumStoreDups() const;
+  int NumMiss() const;
+  int NumFail() const;
+  int LoadedSize() const;
+
+  // Takes the ownership of validator.
+  // Use this for testing purpose.
+  void SetValidator(CompilerInfoValidator* validator);
+  CompilerInfoValidator* validator() const { return validator_.get(); }
+
+ private:
+  CompilerInfoCache(const string& cache_filename, int cache_holding_time_sec);
+
+  static string HashKey(const CompilerInfoData& data);
+  bool Load();
+  bool Unmarshal(const CompilerInfoDataTable& table);
+  bool Save();
+  bool Marshal(CompilerInfoDataTable* table);
+  void Clear();
+
+  CompilerInfoState* LookupUnlocked(const string& compiler_info_key,
+                                    const string& abs_local_compiler_path);
+
+  // Check CompilerInfo validity. CompilerInfo that does not match with the
+  // current local compiler will be removed or updated.
+  void UpdateOlderCompilerInfo();
+
+  friend class CompilerInfoCacheTest;
+
+  static CompilerInfoCache* instance_;
+
+  const CacheFile cache_file_;
+  const int cache_holding_time_sec_;
+
+  std::unique_ptr<CompilerInfoValidator> validator_;
+
+  mutable ReadWriteLock mu_;
+
+  // key: compiler_info_key
+  unordered_map<std::string, CompilerInfoState*> compiler_info_;
+
+  // key: hash of CompilerInfoData. value: compiler_info_key.
+  unordered_map<std::string, unordered_set<std::string>*> keys_by_hash_;
+
+  int num_stores_;
+  int num_store_dups_;
+  int num_miss_;
+  int num_fail_;
+  int loaded_size_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompilerInfoCache);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_COMPILER_INFO_CACHE_H_
diff --git a/client/compiler_info_cache_unittest.cc b/client/compiler_info_cache_unittest.cc
new file mode 100644
index 0000000..5c745f6
--- /dev/null
+++ b/client/compiler_info_cache_unittest.cc
@@ -0,0 +1,805 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "compiler_info_cache.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "compiler_flags.h"
+#include "path.h"
+#include "subprocess.h"
+#include "unittest_util.h"
+#include "util.h"
+
+using std::string;
+
+namespace {
+const int kCacheHoldingTimeSec = 60 * 60 * 24 * 30;  // 30 days
+}
+
+namespace devtools_goma {
+
+class TestCompilerInfoValidator
+    : public CompilerInfoCache::CompilerInfoValidator {
+ public:
+  TestCompilerInfoValidator() {}
+
+  bool Validate(const CompilerInfo& compiler_info,
+                const string& local_compiler_path) override {
+    return true;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(TestCompilerInfoValidator);
+};
+
+
+class HashCheckingCompilerInfoValidator
+    : public CompilerInfoCache::CompilerInfoValidator {
+ public:
+  HashCheckingCompilerInfoValidator() {}
+
+  bool Validate(const CompilerInfo& compiler_info,
+                const string& local_compiler_path) override {
+    // If FileId is the same, this should be ok.
+    if (compiler_info.local_compiler_id() == local_compiler_file_id_) {
+      return true;
+    }
+
+    // Otherwise, we check hash. If hash is the same, it's still ok.
+    if (compiler_info.local_compiler_hash() == local_compiler_hash_) {
+      return true;
+    }
+
+    // compiler is updated.
+    return false;
+  }
+
+  void SetLocalCompilerHash(const std::string& hash) {
+    local_compiler_hash_ = hash;
+  }
+  void SetLocalCompilerFileId(const FileId& file_id) {
+    local_compiler_file_id_ = file_id;
+  }
+
+ private:
+  std::string local_compiler_hash_;
+  FileId local_compiler_file_id_;
+};
+
+class CompilerInfoCacheTest : public testing::Test {
+ public:
+  CompilerInfoCacheTest()
+      : cache_(new CompilerInfoCache("", kCacheHoldingTimeSec)),
+        validator_(new TestCompilerInfoValidator) {
+    cache_->SetValidator(validator_);
+  }
+
+ protected:
+  bool Unmarshal(const CompilerInfoDataTable& table) {
+    return cache_->Unmarshal(table);
+  }
+  bool Marshal(CompilerInfoDataTable* table) {
+    return cache_->Marshal(table);
+  }
+  string HashKey(const CompilerInfoData& data) {
+    return cache_->HashKey(data);
+  }
+  void Clear() {
+    cache_->Clear();
+  }
+  void UpdateOlderCompilerInfo() {
+    cache_->UpdateOlderCompilerInfo();
+  }
+
+  void SetFailedAt(CompilerInfoState* state, time_t failed_at) {
+    // TODO: in prod code, CompilerInfo would never be updated like this.
+    // error message has been changed only if new CompilerInfo data is stored.
+    CompilerInfoBuilder::OverrideError(
+        "error message by SetFailedAt()", failed_at,
+        state->compiler_info_.data_.get());
+  }
+
+  void SetValidator(CompilerInfoCache::CompilerInfoValidator* validator) {
+    cache_->SetValidator(validator);
+    validator_ = validator;
+  }
+
+  void SetCompilerInfoFileId(CompilerInfo* compiler_info,
+                             const FileId& file_id) {
+    compiler_info->local_compiler_id_ = file_id;
+  }
+
+  void SetCompilerInfoHash(CompilerInfo* compiler_info,
+                           const std::string& hash) {
+    compiler_info->data_->set_hash(hash);
+  }
+
+  const unordered_map<string, CompilerInfoState*>& compiler_info() const {
+    return cache_->compiler_info_;
+  }
+
+  const unordered_map<string, unordered_set<string>*>& keys_by_hash() const {
+    return cache_->keys_by_hash_;
+  }
+
+  std::unique_ptr<CompilerInfoCache> cache_;
+  CompilerInfoCache::CompilerInfoValidator* validator_;  // Owned by cache_.
+};
+
+TEST_F(CompilerInfoCacheTest, Lookup) {
+  std::vector<string> args;
+  args.push_back("/usr/bin/gcc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::New(args, "/tmp"));
+  std::vector<string> key_env;
+
+  CompilerInfoCache::Key key(CompilerInfoCache::CreateKey(
+      *flags, "/usr/bin/gcc", key_env));
+  ScopedCompilerInfoState cis(cache_->Lookup(key));
+  EXPECT_TRUE(cis.get() == nullptr);
+
+  // get valid compiler info.
+  std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+  cid->set_found(true);
+
+  cis.reset(cache_->Store(key, std::move(cid)));
+  EXPECT_EQ(2, cis.get()->refcnt());  // caller & in cache
+
+  CompilerInfoState* state = cis.get();
+  cis.reset(nullptr);
+  EXPECT_EQ(1, state->refcnt());  // in cache
+
+  // When taking the second compiler info, we don't need to fill
+  // CompilerInfo again.
+  cis.reset(cache_->Lookup(key));
+  EXPECT_TRUE(cis.get() == state);
+  EXPECT_EQ(2, cis.get()->refcnt()); // caller & in cache.
+
+  ScopedCompilerInfoState cis2(std::move(cis));
+  EXPECT_TRUE(cis.get() == nullptr);
+  EXPECT_TRUE(cis2.get() == state);
+  EXPECT_EQ(2, cis2.get()->refcnt());
+
+  cis2.reset(nullptr);
+  EXPECT_EQ(1, state->refcnt()); // in cache.
+}
+
+TEST_F(CompilerInfoCacheTest, CompilerInfoCacheKeyRelative) {
+  std::vector<string> args {"./clang"};
+  std::vector<string> key_env;
+
+  std::unique_ptr<CompilerFlags> flags1(CompilerFlags::New(args, "/dir1"));
+  std::unique_ptr<CompilerFlags> flags2(CompilerFlags::New(args, "/dir2"));
+
+  CompilerInfoCache::Key key1(CompilerInfoCache::CreateKey(
+      *flags1, "./clang", key_env));
+  CompilerInfoCache::Key key2(CompilerInfoCache::CreateKey(
+      *flags2, "./clang", key_env));
+
+  EXPECT_FALSE(file::IsAbsolutePath(key1.local_compiler_path));
+  EXPECT_FALSE(file::IsAbsolutePath(key2.local_compiler_path));
+
+  EXPECT_NE(key1.ToString(CompilerInfoCache::Key::kCwdRelative),
+            key2.ToString(CompilerInfoCache::Key::kCwdRelative));
+}
+
+TEST_F(CompilerInfoCacheTest, CompilerInfoCacheKeyAbsolute) {
+  std::vector<string> args {"/usr/bin/clang"};
+  std::vector<string> key_env;
+
+  std::unique_ptr<CompilerFlags> flags1(CompilerFlags::New(args, "/dir1"));
+  std::unique_ptr<CompilerFlags> flags2(CompilerFlags::New(args, "/dir2"));
+
+  CompilerInfoCache::Key key1(CompilerInfoCache::CreateKey(
+      *flags1, "/usr/bin/clang", key_env));
+  CompilerInfoCache::Key key2(CompilerInfoCache::CreateKey(
+      *flags2, "/usr/bin/clang", key_env));
+
+  EXPECT_TRUE(file::IsAbsolutePath(key1.local_compiler_path));
+  EXPECT_TRUE(file::IsAbsolutePath(key2.local_compiler_path));
+
+  EXPECT_NE(key1.ToString(CompilerInfoCache::Key::kCwdRelative),
+            key2.ToString(CompilerInfoCache::Key::kCwdRelative));
+
+  EXPECT_EQ(key1.ToString(!CompilerInfoCache::Key::kCwdRelative),
+            key2.ToString(!CompilerInfoCache::Key::kCwdRelative));
+}
+
+TEST_F(CompilerInfoCacheTest, DupStore) {
+  std::vector<string> args;
+  args.push_back("/usr/bin/gcc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::New(args, "/tmp"));
+  std::vector<string> key_env;
+
+  CompilerInfoCache::Key key(CompilerInfoCache::CreateKey(
+      *flags, "/usr/bin/gcc", key_env));
+  ScopedCompilerInfoState cis(cache_->Lookup(key));
+  EXPECT_TRUE(cis.get() == nullptr);
+
+  time_t now = time(nullptr);
+  // get valid compiler info.
+  std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+  cid->set_last_used_at(now);
+  cid->set_found(true);
+
+  cis.reset(cache_->Store(key, std::move(cid)));
+  EXPECT_EQ(2, cis.get()->refcnt());  // caller & in cache
+
+  {
+    EXPECT_EQ(1U, keys_by_hash().size());
+    const unordered_set<string>& keys = *keys_by_hash().begin()->second;
+    EXPECT_EQ(1U, keys.size());
+  }
+
+  CompilerInfoState* state = cis.get();
+  cis.reset(nullptr);
+  EXPECT_EQ(1, state->refcnt());  // in cache
+
+  // When taking the second compiler info, we don't need to fill
+  // CompilerInfo again.
+  cis.reset(cache_->Lookup(key));
+  EXPECT_TRUE(cis.get() == state);
+  EXPECT_EQ(2, cis.get()->refcnt());  // caller & in cache.
+
+  // different compiler_info_key;
+  args.push_back("-fPIC");
+  flags = CompilerFlags::New(args, "/tmp");
+
+  CompilerInfoCache::Key key2(CompilerInfoCache::CreateKey(
+      *flags, "/usr/bin/gcc", key_env));
+  EXPECT_NE(key.base, key2.base);
+  ASSERT_TRUE(file::IsAbsolutePath(key.local_compiler_path));
+  ASSERT_TRUE(file::IsAbsolutePath(key2.local_compiler_path));
+  EXPECT_NE(key.ToString(false),
+            key2.ToString(false));
+
+  cis.reset(cache_->Lookup(key2));
+  EXPECT_TRUE(cis.get() == nullptr);
+
+  // get valid compiler info, which is the same as before.
+  cid.reset(new CompilerInfoData);
+  cid->set_last_used_at(now);
+  cid->set_found(true);
+
+  cis.reset(cache_->Store(key2, std::move(cid)));
+  EXPECT_EQ(3, cis.get()->refcnt());  // caller & in cache (for key and key2).
+  EXPECT_TRUE(cis.get() == state);  // same as before.
+
+  {
+    EXPECT_EQ(1U, keys_by_hash().size());
+    const unordered_set<string>& keys = *keys_by_hash().begin()->second;
+    EXPECT_EQ(2U, keys.size());
+  }
+
+  // update with different data.
+  cid.reset(new CompilerInfoData);
+  cid->set_last_used_at(now);
+  cid->set_name("gcc");
+  cid->set_found(true);
+
+  cis.reset(cache_->Store(key2, std::move(cid)));
+  EXPECT_EQ(2, cis.get()->refcnt());  // caller & in cache (for key2).
+  EXPECT_TRUE(cis.get() != state);  // different
+
+  {
+    EXPECT_EQ(2U, keys_by_hash().size());
+    for (const auto& it : keys_by_hash()) {
+      EXPECT_EQ(1U, it.second->size());
+    }
+  }
+
+  cis.reset(cache_->Lookup(key));
+  EXPECT_TRUE(cis.get() == state);
+  EXPECT_EQ(2, cis.get()->refcnt());  // caller & in cache (for key).
+}
+
+TEST_F(CompilerInfoCacheTest, NegativeCache) {
+  const string compiler_path("/invalid/gcc");
+
+  std::vector<string> args;
+  args.push_back(compiler_path);
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::New(args, "/tmp"));
+  std::vector<string> key_env;
+
+  CompilerInfoCache::Key key(CompilerInfoCache::CreateKey(
+      *flags, compiler_path, key_env));
+
+  // Taking CompilerInfo should fail.
+  ScopedCompilerInfoState cis(cache_->Lookup(key));
+  EXPECT_TRUE(cis.get() == nullptr);
+
+  // get error compiler info
+  std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+  cid->set_found(true);
+  CompilerInfoBuilder::AddErrorMessage("invalid gcc", cid.get());
+
+  cis.reset(cache_->Store(key, std::move(cid)));
+  EXPECT_EQ(1, cache_->NumFail());
+  EXPECT_EQ(0, cache_->NumMiss());
+  EXPECT_TRUE(cis.get() != nullptr);
+  EXPECT_EQ(2, cis.get()->refcnt());  // caller & in cache
+  EXPECT_TRUE(cis.get()->info().found());
+  EXPECT_TRUE(cis.get()->info().HasError());
+  EXPECT_GT(cis.get()->info().failed_at(), 0);
+
+  // will get negatively cached CompilerInfo.
+  ScopedCompilerInfoState cis2(cache_->Lookup(key));
+  EXPECT_TRUE(cis2.get() == cis.get());
+  EXPECT_EQ(3, cis.get()->refcnt());  // cis, cis2 & in cache
+  EXPECT_EQ(1, cache_->NumFail());
+  EXPECT_EQ(0, cache_->NumMiss());
+
+  cis2.reset(nullptr);
+  EXPECT_EQ(2, cis.get()->refcnt());  // cis & in cache
+
+  // Sets old failed_at time.
+  time_t now = time(nullptr);
+  SetFailedAt(cis.get(), now - 3600);
+
+  // Since the negative cache is expired, we will get no CompilerInfo,
+  // and will need to retry to make CompilerInfo again.
+  cis2.reset(cache_->Lookup(key));
+  EXPECT_TRUE(cis2.get() == nullptr);
+  EXPECT_EQ(2, cis.get()->refcnt()); // cis & in cache
+
+  // get compiler info again, and update.
+  std::unique_ptr<CompilerInfoData> cid2(new CompilerInfoData);
+  cid2->set_found(true);
+  CompilerInfoBuilder::AddErrorMessage("invalid gcc", cid2.get());
+
+  cis2.reset(cache_->Store(key, std::move(cid2)));
+  EXPECT_EQ(2, cis2.get()->refcnt());  // cis2 & in cache
+  EXPECT_EQ(1, cis.get()->refcnt());  // cis only, removed from cache.
+  EXPECT_EQ(2, cache_->NumFail());
+  EXPECT_EQ(0, cache_->NumMiss());
+}
+
+TEST_F(CompilerInfoCacheTest, MissingCompilerCache) {
+  const string compiler_path("/missing/gcc");
+
+  std::vector<string> args;
+  args.push_back(compiler_path);
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::New(args, "/tmp"));
+  std::vector<string> key_env;
+
+  CompilerInfoCache::Key key(CompilerInfoCache::CreateKey(
+      *flags, compiler_path, key_env));
+
+  // Taking CompilerInfo should fail.
+  ScopedCompilerInfoState cis(cache_->Lookup(key));
+  EXPECT_TRUE(cis.get() == nullptr);
+
+  std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+  CompilerInfoBuilder::AddErrorMessage("Couldn't open local compiler file",
+                                       cid.get());
+  cis.reset(cache_->Store(key, std::move(cid)));
+  EXPECT_EQ(2, cis.get()->refcnt()); // caller & in cache
+  EXPECT_EQ(0, cache_->NumFail());
+  EXPECT_EQ(1, cache_->NumMiss());
+  EXPECT_TRUE(cis.get()->info().HasError());
+  EXPECT_FALSE(cis.get()->info().found());
+  EXPECT_GT(cis.get()->info().failed_at(), 0);
+
+  // will get negatively cached CompilerInfo.
+  ScopedCompilerInfoState cis2(cache_->Lookup(key));
+  EXPECT_TRUE(cis.get() == cis2.get());
+  EXPECT_EQ(3, cis.get()->refcnt());  // cis, cis2 & in cache.
+  EXPECT_EQ(0, cache_->NumFail());
+  EXPECT_EQ(1, cache_->NumMiss());
+  EXPECT_TRUE(cis2.get()->info().HasError());
+  EXPECT_FALSE(cis2.get()->info().found());
+  EXPECT_GT(cis2.get()->info().failed_at(), 0);
+
+  cis2.reset(nullptr);
+  EXPECT_EQ(2, cis.get()->refcnt());  // cis & in cache
+
+  // Sets old failed_at time.
+  time_t now = time(nullptr);
+  SetFailedAt(cis.get(), now - 3600);
+
+  // Since the negative cache is expired, we will retry to make
+  // CompilerInfo again.
+  cis2.reset(cache_->Lookup(key));
+  EXPECT_TRUE(cis2.get() == nullptr);
+  EXPECT_EQ(2, cis.get()->refcnt()); // cis & still in cache
+
+  // get compiler info again, and update.
+  std::unique_ptr<CompilerInfoData> cid2(new CompilerInfoData);
+  CompilerInfoBuilder::AddErrorMessage("Couldn't open local compiler file",
+                                       cid2.get());
+  cis2.reset(cache_->Store(key, std::move(cid2)));
+  EXPECT_EQ(2, cis2.get()->refcnt());  // cis2 & in cache
+  EXPECT_EQ(1, cis.get()->refcnt());  // cis only, removed from cache.
+  EXPECT_EQ(0, cache_->NumFail());
+  EXPECT_EQ(2, cache_->NumMiss());
+  EXPECT_TRUE(cis2.get()->info().HasError());
+  EXPECT_FALSE(cis2.get()->info().found());
+  EXPECT_GT(cis2.get()->info().failed_at(), 0);
+
+}
+
+TEST_F(CompilerInfoCacheTest, Marshal) {
+  CompilerInfoCache::Key key;
+  key.base = "/usr/bin/gcc -O2";
+  key.cwd = "/b/build/slave/work";
+  key.local_compiler_path = "/usr/bin/gcc";
+
+  std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+  cid->set_name("gcc");
+  cid->set_lang("c");
+  cid->set_found(true);
+  const string hash1 = HashKey(*cid.get());
+
+  ASSERT_TRUE(file::IsAbsolutePath(key.local_compiler_path));
+  const string key1 = key.ToString(
+      !CompilerInfoCache::Key::kCwdRelative);
+  ScopedCompilerInfoState cis(cache_->Store(key, std::move(cid)));
+
+  key.base = "/usr/bin/gcc -O2 -fno-diagnostics-show-option";
+  cid.reset(new CompilerInfoData);
+  cid->set_name("gcc");
+  cid->set_lang("c");
+  cid->set_found(true);
+  EXPECT_EQ(hash1, HashKey(*cid.get()));
+  ASSERT_TRUE(file::IsAbsolutePath(key.local_compiler_path));
+  const string key2 = key.ToString(
+      !CompilerInfoCache::Key::kCwdRelative);
+  EXPECT_NE(key1, key2);
+  cis.reset(cache_->Store(key, std::move(cid)));
+
+  key.base = "/usr/bin/g++ -O2";
+  key.local_compiler_path = "/usr/bin/g++";
+  cid.reset(new CompilerInfoData);
+  cid->set_name("g++");
+  cid->set_lang("c++");
+  cid->set_found(true);
+  const string hash3 = HashKey(*cid.get());
+  EXPECT_NE(hash1, hash3);
+  ASSERT_TRUE(file::IsAbsolutePath(key.local_compiler_path));
+  const string key3 = key.ToString(
+      !CompilerInfoCache::Key::kCwdRelative);
+  EXPECT_NE(key1, key3);
+  EXPECT_NE(key2, key3);
+  cis.reset(cache_->Store(key, std::move(cid)));
+
+  key.base = "/usr/bin/clang";
+  key.local_compiler_path = "/usr/bin/clang";
+  cid.reset(new CompilerInfoData);
+  cid->set_name("clang");
+  cid->set_lang("c");
+  cid->set_found(true);
+  const string hash4 = HashKey(*cid.get());
+  EXPECT_NE(hash1, hash4);
+  EXPECT_NE(hash3, hash4);
+  ASSERT_TRUE(file::IsAbsolutePath(key.local_compiler_path));
+  const string key4 = key.ToString(
+      !CompilerInfoCache::Key::kCwdRelative);
+  EXPECT_NE(key1, key4);
+  EXPECT_NE(key2, key4);
+  EXPECT_NE(key3, key4);
+  cis.reset(cache_->Store(key, std::move(cid)));
+  cis.get()->SetDisabled(true, "disabled for test");
+
+  cis.reset(nullptr);
+
+  CompilerInfoDataTable table;
+  EXPECT_TRUE(Marshal(&table));
+
+  EXPECT_EQ(2, table.compiler_info_data_size());
+  bool hash1_found = false;
+  bool hash3_found = false;
+  for (int i = 0; i < 2; ++i) {
+    const CompilerInfoDataTable::Entry& entry = table.compiler_info_data(i);
+    switch (entry.keys_size()) {
+      case 2: // hash1: key1, key2
+        {
+          unordered_set<string> keys(entry.keys().begin(), entry.keys().end());
+          EXPECT_EQ(1U, keys.count(key1));
+          EXPECT_EQ(1U, keys.count(key2));
+          EXPECT_EQ("gcc", entry.data().name());
+          EXPECT_EQ("c", entry.data().lang());
+          EXPECT_TRUE(entry.data().found());
+          EXPECT_EQ(hash1, HashKey(entry.data()));
+          hash1_found = true;
+        }
+        break;
+      case 1: // hash3: key3
+        {
+          EXPECT_EQ(key3, entry.keys(0));
+          EXPECT_EQ("g++", entry.data().name());
+          EXPECT_EQ("c++", entry.data().lang());
+          EXPECT_TRUE(entry.data().found());
+          EXPECT_EQ(hash3, HashKey(entry.data()));
+          hash3_found = true;
+        }
+        break;
+      default:
+        ADD_FAILURE() << "unexpected entry[" << i << "].keys_size()"
+                      << entry.keys_size();
+    }
+  }
+  EXPECT_TRUE(hash1_found);
+  EXPECT_TRUE(hash3_found);
+}
+
+TEST_F(CompilerInfoCacheTest, Unmarshal) {
+  CompilerInfoDataTable table;
+  CompilerInfoDataTable::Entry* entry = table.add_compiler_info_data();
+  entry->add_keys("/usr/bin/gcc -O2 @");
+  entry->add_keys("/usr/bin/gcc -O2 -fno-diagnostics-show-option @");
+  CompilerInfoData* data = entry->mutable_data();
+  data->set_name("gcc");
+  data->set_lang("c");
+  data->set_found(true);
+
+  entry = table.add_compiler_info_data();
+  entry->add_keys("/usr/bin/g++ -O2 @");
+  data = entry->mutable_data();
+  data->set_name("g++");
+  data->set_lang("c++");
+  data->set_found(true);
+
+  EXPECT_TRUE(Unmarshal(table));
+
+  EXPECT_EQ(3U, compiler_info().size());
+  auto p = compiler_info().find("/usr/bin/gcc -O2 @");
+  EXPECT_TRUE(p != compiler_info().end());
+  CompilerInfoState* state = p->second;
+  EXPECT_EQ(2, state->refcnt());
+  EXPECT_EQ("gcc", state->info().data().name());
+  EXPECT_EQ("c", state->info().data().lang());
+  EXPECT_TRUE(state->info().data().found());
+  const string& hash1 = HashKey(state->info().data());
+
+  p = compiler_info().find("/usr/bin/gcc -O2 -fno-diagnostics-show-option @");
+  EXPECT_TRUE(p != compiler_info().end());
+  state = p->second;
+  EXPECT_EQ(2, state->refcnt());
+  EXPECT_EQ("gcc", state->info().data().name());
+  EXPECT_EQ("c", state->info().data().lang());
+  EXPECT_TRUE(state->info().data().found());
+  EXPECT_EQ(hash1, HashKey(state->info().data()));
+
+  p = compiler_info().find("/usr/bin/g++ -O2 @");
+  EXPECT_TRUE(p != compiler_info().end());
+  state = p->second;
+  EXPECT_EQ(1, state->refcnt());
+  EXPECT_EQ("g++", state->info().data().name());
+  EXPECT_EQ("c++", state->info().data().lang());
+  EXPECT_TRUE(state->info().data().found());
+  const string& hash2 = HashKey(state->info().data());
+  EXPECT_NE(hash1, hash2);
+
+  EXPECT_EQ(2U, keys_by_hash().size());
+  auto found = keys_by_hash().find(hash1);
+  EXPECT_TRUE(found != keys_by_hash().end());
+  const unordered_set<string>* keys = found->second;
+  EXPECT_EQ(2U, keys->size());
+  EXPECT_EQ(1U, keys->count("/usr/bin/gcc -O2 @"));
+  EXPECT_EQ(1U, keys->count("/usr/bin/gcc -O2 -fno-diagnostics-show-option @"));
+
+  found = keys_by_hash().find(hash2);
+  EXPECT_TRUE(found != keys_by_hash().end());
+  keys = found->second;
+  EXPECT_EQ(1U, keys->size());
+  EXPECT_EQ(1U, keys->count("/usr/bin/g++ -O2 @"));
+}
+
+TEST_F(CompilerInfoCacheTest, UpdateOlderCompilerInfo)
+{
+  const std::string valid_hash = "valid_hash";
+  FileId valid_fileid;
+  valid_fileid.mtime = 1234567;
+
+  HashCheckingCompilerInfoValidator* validator =
+        new HashCheckingCompilerInfoValidator();
+  SetValidator(validator);  // valiadtor is owned by the callee.
+  validator->SetLocalCompilerFileId(valid_fileid);
+  validator->SetLocalCompilerHash(valid_hash);
+
+  std::vector<string> args;
+  args.push_back("/usr/bin/gcc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::New(args, "/tmp"));
+  std::vector<string> key_env;
+
+  CompilerInfoCache::Key key(CompilerInfoCache::CreateKey(
+      *flags, "/usr/bin/gcc", key_env));
+  ScopedCompilerInfoState cis(cache_->Lookup(key));
+  EXPECT_TRUE(cis.get() == nullptr);
+
+  std::vector<string> old_args;
+  old_args.push_back("/usr/bin/oldgcc");
+  std::unique_ptr<CompilerFlags> old_flags(
+      CompilerFlags::New(old_args, "/tmp"));
+  std::vector<string> old_key_env;
+
+  CompilerInfoCache::Key old_key(CompilerInfoCache::CreateKey(
+      *old_flags, "/usr/bin/oldgcc", old_key_env));
+  ScopedCompilerInfoState old_cis(cache_->Lookup(old_key));
+  EXPECT_TRUE(old_cis.get() == nullptr);
+
+  // Set valid compiler info.
+  {
+    std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+    cid->set_last_used_at(time(nullptr));
+    cid->set_found(true);
+#ifndef _WIN32
+    cid->mutable_local_compiler_id()->set_dev(valid_fileid.dev);
+    cid->mutable_local_compiler_id()->set_inode(valid_fileid.inode);
+#endif
+    cid->mutable_local_compiler_id()->set_mtime(valid_fileid.mtime);
+    cid->mutable_local_compiler_id()->set_size(valid_fileid.size);
+    cid->set_local_compiler_hash(valid_hash);
+    cid->set_hash(valid_hash);
+
+    cis.reset(cache_->Store(key, std::move(cid)));
+    EXPECT_EQ(2, cis.get()->refcnt());  // caller & in cache
+  }
+
+  // Set old compiler info.
+  {
+    std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+    // created 31 days ago
+    cid->set_last_used_at(time(nullptr) - 60 * 60 * 24 * 31);
+    cid->set_found(true);
+#ifndef _WIN32
+    cid->mutable_local_compiler_id()->set_dev(valid_fileid.dev);
+    cid->mutable_local_compiler_id()->set_inode(valid_fileid.inode);
+#endif
+    cid->mutable_local_compiler_id()->set_mtime(valid_fileid.mtime);
+    cid->mutable_local_compiler_id()->set_size(valid_fileid.size);
+    cid->set_local_compiler_hash(valid_hash);
+    cid->set_hash(valid_hash);
+
+    old_cis.reset(cache_->Store(old_key, std::move(cid)));
+    EXPECT_EQ(2, old_cis.get()->refcnt());  // caller & in cache
+  }
+
+  // Now Cache should be valid.
+  {
+    UpdateOlderCompilerInfo();
+
+    ScopedCompilerInfoState tmp(cache_->Lookup(key));
+    EXPECT_TRUE(tmp.get() != nullptr);
+
+    ScopedCompilerInfoState old_tmp(cache_->Lookup(old_key));
+    EXPECT_TRUE(old_tmp.get() == nullptr);
+  }
+
+  {
+    // Change local compiler FileId. (= changed local file timestamp.)
+    FileId changed_fileid(valid_fileid);
+    changed_fileid.mtime += 1000;
+    validator->SetLocalCompilerFileId(changed_fileid);
+
+    // Even FileId is changed, file hash is the same, CompilerInfo
+    // will be taken.
+    UpdateOlderCompilerInfo();
+
+    ScopedCompilerInfoState tmp(cache_->Lookup(key));
+    EXPECT_TRUE(tmp.get() != nullptr);
+  }
+
+  {
+    // Change FileId & Hash
+    FileId changed_fileid(valid_fileid);
+    changed_fileid.mtime += 2000;
+    validator->SetLocalCompilerFileId(changed_fileid);
+    validator->SetLocalCompilerHash("unexpected_hash");
+
+    // Since FileId and file hash are both changed,
+    // cache should be removed.
+    UpdateOlderCompilerInfo();
+
+    ScopedCompilerInfoState tmp(cache_->Lookup(key));
+    EXPECT_TRUE(tmp.get() == nullptr);
+  }
+}
+
+TEST_F(CompilerInfoCacheTest, Clear) {
+  CompilerInfoDataTable table;
+  CompilerInfoDataTable::Entry* entry = table.add_compiler_info_data();
+  entry->add_keys("/usr/bin/gcc -O2 @");
+  entry->add_keys("/usr/bin/gcc -O2 -fno-diagnostics-show-option @");
+  CompilerInfoData* data = entry->mutable_data();
+  data->set_name("gcc");
+  data->set_lang("c");
+  data->set_found(true);
+
+  entry = table.add_compiler_info_data();
+  entry->add_keys("/usr/bin/g++ -O2 @");
+  data = entry->mutable_data();
+  data->set_name("g++");
+  data->set_lang("c++");
+  data->set_found(true);
+
+  EXPECT_TRUE(Unmarshal(table));
+
+  EXPECT_FALSE(compiler_info().empty());
+  EXPECT_FALSE(keys_by_hash().empty());
+
+  Clear();
+  EXPECT_TRUE(compiler_info().empty());
+  EXPECT_TRUE(keys_by_hash().empty());
+}
+
+// TODO: add tests for Load and Save.
+
+#ifdef __linux__
+TEST_F(CompilerInfoCacheTest, RelativePathCompiler) {
+  InstallReadCommandOutputFunc(ReadCommandOutputByPopen);
+  TmpdirUtil tmpdir_util("compiler_info_cache_unittest");
+  tmpdir_util.SetCwd("");
+
+  static const char kCompilerInfoCache[] = "compiler_info_cache";
+
+  CompilerInfoCache::Init(tmpdir_util.tmpdir(), kCompilerInfoCache, 3600);
+  CompilerInfoBuilder cib;
+  const std::vector<string> empty_env;
+  CompilerInfoCache::Key key1, key2, key3;
+
+  {
+    std::vector<string> args { "usr/bin/gcc" };
+    std::unique_ptr<CompilerFlags> flags(
+        CompilerFlags::MustNew(args, "/"));
+    std::unique_ptr<CompilerInfoData> cid(
+        cib.FillFromCompilerOutputs(*flags, "usr/bin/gcc", empty_env));
+    EXPECT_NE(nullptr, cid);
+    key1 = CompilerInfoCache::CreateKey(
+        *flags, "usr/bin/gcc", empty_env);
+
+    CompilerInfoCache::instance()->Store(
+        key1, std::move(cid));
+  }
+
+  {
+    std::vector<string> args { "../usr/bin/gcc" };
+    std::unique_ptr<CompilerFlags> flags(
+        CompilerFlags::MustNew(args, "/bin"));
+    std::unique_ptr<CompilerInfoData> cid(
+        cib.FillFromCompilerOutputs(*flags, "../usr/bin/gcc", empty_env));
+    EXPECT_NE(nullptr, cid);
+    key2 = CompilerInfoCache::CreateKey(
+        *flags, "../usr/bin/gcc", empty_env);
+
+    CompilerInfoCache::instance()->Store(
+        key2, std::move(cid));
+  }
+
+  {
+    std::vector<string> args { "/usr/bin/gcc" };
+    std::unique_ptr<CompilerFlags> flags(
+        CompilerFlags::MustNew(args, tmpdir_util.cwd()));
+    std::unique_ptr<CompilerInfoData> cid(
+        cib.FillFromCompilerOutputs(*flags, "/usr/bin/gcc", empty_env));
+    EXPECT_NE(nullptr, cid);
+    key3 = CompilerInfoCache::CreateKey(
+        *flags, "/usr/bin/gcc", empty_env);
+
+    CompilerInfoCache::instance()->Store(
+        key3, std::move(cid));
+  }
+
+  EXPECT_EQ(3, CompilerInfoCache::instance()->NumStores());
+  EXPECT_EQ(0, CompilerInfoCache::instance()->NumStoreDups());
+  CompilerInfoCache::Quit();
+
+  ASSERT_TRUE(Chdir("/"));
+
+  CompilerInfoCache::Init(tmpdir_util.tmpdir(), kCompilerInfoCache, 3600);
+
+  EXPECT_NE(nullptr, CompilerInfoCache::instance()->Lookup(key1));
+  EXPECT_NE(nullptr, CompilerInfoCache::instance()->Lookup(key2));
+  EXPECT_NE(nullptr, CompilerInfoCache::instance()->Lookup(key3));
+
+  CompilerInfoCache::Quit();
+}
+#endif  // __linux__
+
+}  // namespace devtools_goma
diff --git a/client/compiler_info_data.proto b/client/compiler_info_data.proto
new file mode 100644
index 0000000..d7e1b7c
--- /dev/null
+++ b/client/compiler_info_data.proto
@@ -0,0 +1,132 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+syntax = "proto2";
+
+package devtools_goma;
+
+// CompilerInfoData is a serializable format of CompilerInfo.
+//
+// NEXT ID TO USE: 34
+message CompilerInfoData {
+  // FileId of the compiler or subprogram.
+  // All fields must be synced with file_id.h
+  // NEXT ID TO USE: 9
+  message FileId {
+    optional uint32 volume_serial_number = 5;
+    optional uint32 file_index_high = 6;
+    optional uint32 file_index_low = 7;
+
+    optional uint64 dev = 1;
+    optional uint64 inode = 2;
+
+    optional int64 mtime = 3;
+    optional int64 size = 4;
+    optional bool is_directory = 8;
+  };
+
+  message SubprogramInfo {
+    optional string name = 1;
+    optional string hash = 2;
+    optional FileId file_id = 3;
+  };
+
+  message MacroValue {
+    optional string key = 1;
+    optional int64 value = 2;
+  }
+
+  // compiler name.
+  optional string name = 1;
+  // compiler version.
+  optional string version = 2;
+  // compiler target
+  optional string target = 3;
+
+  optional string lang = 4;
+
+  // some compiler command is just a wrapper for real compiler, and the command
+  // doesn't change when the real compiler is updated.
+  // TODO:
+  // Make only compiler info cache key has |cwd| and |local_compiler_path|
+  // so that duplicate compiler infos are merged.
+  optional FileId local_compiler_id = 5;
+  optional string local_compiler_path = 28;
+  optional string cwd = 32;
+  optional string local_compiler_hash = 29;
+
+  // Real compiler's FileId if real_compiler_path != local_compiler_path.
+  // Otherwise, real_compiler_id must be the same as local_compiler_id.
+  optional FileId real_compiler_id = 6;
+  optional string real_compiler_path = 8;
+  // hash is sha256 hash of real compiler.
+  // TODO: better to rename this to real_compiler_hash?
+  optional string hash = 7;
+
+  repeated string quote_include_paths = 31;
+  repeated string cxx_system_include_paths = 9;
+  repeated string system_include_paths = 10;
+  repeated string system_framework_paths = 11;
+
+  optional string predefined_macros = 12;
+
+  // unordered map.
+  repeated string supported_predefined_macros = 13;
+  repeated string hidden_predefined_macros = 27;
+  // unordered map.
+  // use map<string, int64>? b/21859856
+  repeated MacroValue has_feature = 14;
+  repeated MacroValue has_extension = 15;
+  repeated MacroValue has_attribute = 16;
+  repeated MacroValue has_cpp_attribute = 17;
+  repeated MacroValue has_declspec_attribute = 18;
+  repeated MacroValue has_builtin = 33;
+
+  // Automatically prepend this path before system include paths.
+  // This is required for Windows NaCl toolchain because all system include
+  // paths are relative paths from the toolchain root.
+  //
+  optional string toolchain_root = 19;
+
+  // Additional command line flags to be sent to backends.
+  // (for example, -fuse-init-arrary).
+  repeated string additional_flags = 20;
+
+  // A list of subprograms specified by -B flag.
+  repeated SubprogramInfo subprograms = 21;
+
+  // When taking compiler info is failed, |failed_at| is not 0.
+  // The error reason should be set in error_message.
+  optional int64 failed_at = 23;
+  optional string error_message = 24;
+  // if found is false, failed_at should be > 0, and error_message should not be
+  // empty.
+  optional bool found = 25;
+
+  // Path to compiler resource directory.
+  optional string resource_dir = 26;
+
+  // Timestamp this cache is used recently.
+  optional int64 last_used_at = 30;
+};
+
+// CompilerInfoDataTable is a table of CompilerInfoData indexed by
+// compiler_info_key.
+//
+// NEXT ID TO USE: 4
+message CompilerInfoDataTable {
+  message Entry {
+    repeated string keys = 1;
+    optional CompilerInfoData data = 2;
+  }
+
+  // CompilerInfoData per its hash.
+  repeated Entry compiler_info_data = 1;
+
+  optional int32 DEPRECATED_version = 2 [deprecated=true];
+
+  // When this revision is different, all cache will be disposed.
+  optional string built_revision = 3;
+};
diff --git a/client/compiler_info_unittest.cc b/client/compiler_info_unittest.cc
new file mode 100644
index 0000000..1503798
--- /dev/null
+++ b/client/compiler_info_unittest.cc
@@ -0,0 +1,1366 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "compiler_info.h"
+
+#include <memory>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+#include <google/protobuf/repeated_field.h>
+#include <gtest/gtest.h>
+
+#include "basictypes.h"
+#include "compiler_flags.h"
+#include "mypath.h"
+#include "path.h"
+#include "subprocess.h"
+#include "unittest_util.h"
+#include "util.h"
+
+namespace devtools_goma {
+
+class CompilerInfoTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    CheckTempDirectory(GetGomaTmpDir());
+  }
+
+  void AppendPredefinedMacros(const string& macro,
+                              CompilerInfoData* cid) {
+    cid->set_predefined_macros(cid->predefined_macros() + macro);
+  }
+
+  int FindValue(const unordered_map<string, int>& map, const string& key) {
+    const auto& it = map.find(key);
+    if (it == map.end())
+      return 0;
+    return it->second;
+  }
+
+  string TestDir() {
+    // This module is in out\Release.
+    const std::string parent_dir = file::JoinPath(GetMyDirectory(), "..");
+    const std::string top_dir = file::JoinPath(parent_dir, "..");
+    return file::JoinPath(top_dir, "test");
+  }
+};
+
+TEST_F(CompilerInfoTest, SplitGccIncludeOutput) {
+  // glucid gcc-4.4.3
+  static const char kGccVOutput[] =
+      "Using built-in specs.\n"
+      "Target: x86_64-linux-gnu\n"
+      "Configured with: ../src/configure -v "
+      "--with-pkgversion='Ubuntu 4.4.3-4ubuntu5.1' "
+      "--with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs "
+      "--enable-languages=c,c++,fortran,objc,obj-c++ "
+      "--prefix=/usr --enable-shared --enable-multiarch "
+      "--enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib "
+      "--without-included-gettext --enable-threads=posix "
+      "--with-gxx-include-dir=/usr/include/c++/4.4 --program-suffix=-4.4 "
+      "--enable-nls --enable-clocale=gnu --enable-libstdcxx-debug "
+      "--enable-plugin --enable-objc-gc --disable-werror --with-arch-32=i486 "
+      "--with-tune=generic --enable-checking=release --build=x86_64-linux-gnu "
+      "--host=x86_64-linux-gnu --target=x86_64-linux-gnu\n"
+      "Thread model: posix\n"
+      "gcc version 4.4.3 (Ubuntu 4.4.3-4ubuntu5.1) \n"
+      "COLLECT_GCC_OPTIONS='-v' '-E' '-o' '/dev/null' '-shared-libgcc' "
+      "'-mtune=generic'\n"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/cc1 -E -quiet -v /dev/null "
+      "-D_FORTIFY_SOURCE=2 -o /dev/null -mtune=generic -fstack-protector\n"
+      "ignoring nonexistent directory \"/usr/local/include/x86_64-linux-gnu\"\n"
+      "ignoring nonexistent directory \"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/"
+      "../../../../x86_64-linux-gnu/include\"\n"
+      "ignoring nonexistent directory \"/usr/include/x86_64-linux-gnu\"\n"
+      "#include \"...\" search starts here:\n"
+      "#include <...> search starts here:\n"
+      " /usr/local/include\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include-fixed\n"
+      " /usr/include\n"
+      "End of search list.\n"
+      "COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:/usr/lib/gcc/x86_64-linux-gnu/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:/usr/lib/gcc/x86_64-linux-gnu/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:/usr/lib/gcc/x86_64-linux-gnu/\n"
+      "LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/:/lib/../lib/:"
+      "/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../:/lib/:"
+      "/usr/lib/:/usr/lib/x86_64-linux-gnu/\n"
+      "COLLECT_GCC_OPTIONS='-v' '-E' '-o' '/dev/null' '-shared-libgcc' "
+      "'-mtune=generic'\n";
+
+  std::vector<string> qpaths;
+  std::vector<string> paths;
+  std::vector<string> framework_paths;
+  EXPECT_TRUE(CompilerInfoBuilder::SplitGccIncludeOutput(
+      kGccVOutput, &qpaths, &paths, &framework_paths));
+
+  EXPECT_TRUE(qpaths.empty());
+  std::vector<string> expected_paths;
+  expected_paths.push_back("/usr/local/include");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include-fixed");
+  expected_paths.push_back("/usr/include");
+  EXPECT_EQ(expected_paths, paths);
+  EXPECT_TRUE(framework_paths.empty());
+}
+
+TEST_F(CompilerInfoTest, SplitGccIncludeOutputWithCurIncludePath) {
+  // glucid gcc-4.4.3 with C_INCLUDE_PATH=.
+  static const char kGccVOutput[] =
+      "Using built-in specs.\n"
+      "Target: x86_64-linux-gnu\n"
+      "Configured with: ../src/configure -v "
+      "--with-pkgversion='Ubuntu 4.4.3-4ubuntu5.1' "
+      "--with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs "
+      "--enable-languages=c,c++,fortran,objc,obj-c++ "
+      "--prefix=/usr --enable-shared --enable-multiarch "
+      "--enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib "
+      "--without-included-gettext --enable-threads=posix "
+      "--with-gxx-include-dir=/usr/include/c++/4.4 --program-suffix=-4.4 "
+      "--enable-nls --enable-clocale=gnu --enable-libstdcxx-debug "
+      "--enable-plugin --enable-objc-gc --disable-werror --with-arch-32=i486 "
+      "--with-tune=generic --enable-checking=release --build=x86_64-linux-gnu "
+      "--host=x86_64-linux-gnu --target=x86_64-linux-gnu\n"
+      "Thread model: posix\n"
+      "gcc version 4.4.3 (Ubuntu 4.4.3-4ubuntu5.1) \n"
+      "COLLECT_GCC_OPTIONS='-v' '-E' '-o' '/dev/null' '-shared-libgcc' "
+      "'-mtune=generic'\n"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/cc1 -E -quiet -v /dev/null "
+      "-D_FORTIFY_SOURCE=2 -o /dev/null -mtune=generic -fstack-protector\n"
+      "ignoring nonexistent directory \"/usr/local/include/x86_64-linux-gnu\"\n"
+      "ignoring nonexistent directory \"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/"
+      "../../../../x86_64-linux-gnu/include\"\n"
+      "ignoring nonexistent directory \"/usr/include/x86_64-linux-gnu\"\n"
+      "#include \"...\" search starts here:\n"
+      "#include <...> search starts here:\n"
+      " .\n"
+      " /usr/local/include\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include-fixed\n"
+      " /usr/include\n"
+      "End of search list.\n"
+      "COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:/usr/lib/gcc/x86_64-linux-gnu/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:/usr/lib/gcc/x86_64-linux-gnu/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:/usr/lib/gcc/x86_64-linux-gnu/\n"
+      "LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/:/lib/../lib/:"
+      "/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../:/lib/:"
+      "/usr/lib/:/usr/lib/x86_64-linux-gnu/\n"
+      "COLLECT_GCC_OPTIONS='-v' '-E' '-o' '/dev/null' '-shared-libgcc' "
+      "'-mtune=generic'\n";
+
+  std::vector<string> qpaths;
+  std::vector<string> paths;
+  std::vector<string> framework_paths;
+  EXPECT_TRUE(CompilerInfoBuilder::SplitGccIncludeOutput(
+      kGccVOutput, &qpaths, &paths, &framework_paths));
+
+  EXPECT_TRUE(qpaths.empty());
+  std::vector<string> expected_paths;
+  expected_paths.push_back(".");
+  expected_paths.push_back("/usr/local/include");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include-fixed");
+  expected_paths.push_back("/usr/include");
+  EXPECT_EQ(expected_paths, paths);
+  EXPECT_TRUE(framework_paths.empty());
+}
+
+
+TEST_F(CompilerInfoTest, IsCwdRelative) {
+  {
+    std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+    cid->add_cxx_system_include_paths("/usr/local/include");
+    cid->add_cxx_system_include_paths(
+        "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include");
+    cid->add_cxx_system_include_paths(
+        "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include-fixed");
+    cid->add_cxx_system_include_paths("/usr/include");
+    cid->set_found(true);
+    CompilerInfo info(std::move(cid));
+    EXPECT_FALSE(info.IsCwdRelative("/tmp"));
+    EXPECT_TRUE(info.IsCwdRelative("/usr"));
+  }
+
+  {
+    std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+    cid->add_cxx_system_include_paths("/tmp/.");
+    cid->add_cxx_system_include_paths("/usr/local/include");
+    cid->add_cxx_system_include_paths(
+        "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include");
+    cid->add_cxx_system_include_paths(
+        "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include-fixed");
+    cid->add_cxx_system_include_paths("/usr/include");
+    cid->set_found(true);
+    CompilerInfo info(std::move(cid));
+    EXPECT_TRUE(info.IsCwdRelative("/tmp"));
+    EXPECT_FALSE(info.IsCwdRelative("/usr/src"));
+  }
+}
+
+TEST_F(CompilerInfoTest, IsCwdRelativeWithSubprogramInfo) {
+  TmpdirUtil tmpdir("is_cwd_relative");
+  tmpdir.CreateEmptyFile("as");
+
+  CompilerInfoData::SubprogramInfo subprog_data;
+  CompilerInfoBuilder::SubprogramInfoFromPath(tmpdir.FullPath("as"),
+                                              &subprog_data);
+  CompilerInfo::SubprogramInfo subprog;
+  CompilerInfo::SubprogramInfo::FromData(subprog_data, &subprog);
+  std::vector<CompilerInfo::SubprogramInfo> subprogs;
+  subprogs.push_back(subprog);
+
+  std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+  cid->set_found(true);
+  cid->add_subprograms()->CopyFrom(subprog_data);
+
+  CompilerInfo info(std::move(cid));
+  EXPECT_TRUE(info.IsCwdRelative(tmpdir.tmpdir()));
+  EXPECT_FALSE(info.IsCwdRelative("/nonexistent"));
+}
+
+TEST_F(CompilerInfoTest, GetJavacVersion) {
+  static const char kVersionInfo[] = "javac 1.6.0_43\n";
+
+  string version;
+  CompilerInfoBuilder::ParseJavacVersion(kVersionInfo, &version);
+  EXPECT_EQ("1.6.0_43", version);
+}
+
+TEST_F(CompilerInfoTest, ParseVCOutput) {
+  // output of "cl /nologo /Bxvcflags.exe foo.cpp".
+  static const char kInputCpp[] =
+    "-zm0x20000000 -il "
+    "C:\\Users\\a\\AppData\\Local\\Temp\\_CL_3da4ff85 -typedil -f foo.cpp -W 1 "
+    "-Ze -D_MSC_EXTENSIONS -Zp8 -ZB64 -D_INTEGRAL_MAX_BITS=64 -Gs -Ot "
+    "-Fofoo.obj -pc \\:/ -Fdvc90.pdb -D_MSC_VER=1500 -D_MSC_FULL_VER=150030729 "
+    "-D_MSC_BUILD=1 -D_WIN32 -D_M_IX86=600 -D_M_IX86_FP=0 -GS -GR -D_CPPRTTI "
+    "-Zc:forScope -Zc:wchar_t -clrNoPureCRT -D_MT "
+    "-I C:\\vs08\\VC\\ATLMFC\\INCLUDE -I C:\\vs08\\VC\\INCLUDE "
+    "-I \"C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\include\" "
+    "-I \"C:\\Program Files (x86)\\Microsoft Visual Studio 10.0\\VC\\"
+    "INCLUDE\\\\\" -I \"C:\\Program Files (x86)\\Microsoft Visual Studio 10.0\\"
+    "VC\\ATLMFC\\INCLUDE\" "
+    "-I \"C:\\Program Files (x86)\\Microsoft SDKs\\Windows\\v7.0A\\include\"";
+
+  // output of "cl /nologo /B1vcflags.exe foo.c".
+  static const char kInputC[] =
+    "-zm0x20000000 -il "
+    "C:\\Users\\a\\AppData\\Local\\Temp\\_CL_212628dc -typedil -f foo.c -W 1 "
+    "-Ze -D_MSC_EXTENSIONS -Zp8 -ZB64 -D_INTEGRAL_MAX_BITS=64 -Gs -Ot "
+    "-Fofoo.obj -pc \\:/ -Fdvc90.pdb -D_MSC_VER=1500 -D_MSC_FULL_VER=150030729 "
+    "-D_MSC_BUILD=1 -D_WIN32 -D_M_IX86=600 -D_M_IX86_FP=0 -GS -clrNoPureCRT "
+    "-D_MT -I C:\\vs08\\VC\\ATLMFC\\INCLUDE -I C:\\vs08\\VC\\INCLUDE "
+    "-I \"C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\include\" "
+    "-I \"C:\\Program Files (x86)\\Microsoft Visual Studio 10.0\\VC\\"
+    "INCLUDE\\\\\" -I \"C:\\Program Files (x86)\\Microsoft Visual Studio 10.0\\"
+    "VC\\ATLMFC\\INCLUDE\" "
+    "-I \"C:\\Program Files (x86)\\Microsoft SDKs\\Windows\\v7.0A\\include\"";
+
+  std::unique_ptr<CompilerInfoData> info_cpp_data(new CompilerInfoData);
+  AppendPredefinedMacros("#define __cplusplus\n", info_cpp_data.get());
+  {
+    std::vector<string> cxx_system_include_paths;
+    string predefined_macros(info_cpp_data->predefined_macros());
+    EXPECT_TRUE(CompilerInfoBuilder::ParseVCOutputString(
+                  kInputCpp, &cxx_system_include_paths, &predefined_macros));
+    for (const auto& p : cxx_system_include_paths) {
+      info_cpp_data->add_cxx_system_include_paths(p);
+    }
+    info_cpp_data->set_predefined_macros(predefined_macros);
+  }
+
+  CompilerInfo info_cpp(std::move(info_cpp_data));
+
+  std::vector<string> expected_include_paths;
+  expected_include_paths.push_back("C:\\vs08\\VC\\ATLMFC\\INCLUDE");
+  expected_include_paths.push_back("C:\\vs08\\VC\\INCLUDE");
+  expected_include_paths.push_back(
+      "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\include");
+  expected_include_paths.push_back(
+      "C:\\Program Files (x86)\\Microsoft Visual Studio 10.0\\VC\\"
+      "INCLUDE\\");
+  expected_include_paths.push_back(
+      "C:\\Program Files (x86)\\Microsoft Visual Studio 10.0\\"
+      "VC\\ATLMFC\\INCLUDE");
+  expected_include_paths.push_back(
+      "C:\\Program Files (x86)\\Microsoft SDKs\\Windows\\v7.0A\\include");
+  EXPECT_EQ(0U, info_cpp.system_include_paths().size());
+  EXPECT_EQ(0U, info_cpp.system_framework_paths().size());
+  EXPECT_EQ(6U, info_cpp.cxx_system_include_paths().size());
+  EXPECT_EQ(expected_include_paths, info_cpp.cxx_system_include_paths());
+  EXPECT_EQ(
+      "#define __cplusplus\n"
+      "#define _MSC_EXTENSIONS\n"
+      "#define _INTEGRAL_MAX_BITS 64\n"
+      "#define _MSC_VER 1500\n"
+      "#define _MSC_FULL_VER 150030729\n"
+      "#define _MSC_BUILD 1\n"
+      "#define _WIN32\n"
+      "#define _M_IX86 600\n"
+      "#define _M_IX86_FP 0\n"
+      "#define _CPPRTTI\n"
+      "#define _MT\n",
+      info_cpp.predefined_macros());
+
+  std::unique_ptr<CompilerInfoData> info_c_data(new CompilerInfoData);
+  {
+    std::vector<string> system_include_paths;
+    string predefined_macros;
+    EXPECT_TRUE(CompilerInfoBuilder::ParseVCOutputString(kInputC,
+        &system_include_paths, &predefined_macros));
+    for (const auto& p : system_include_paths) {
+      info_c_data->add_system_include_paths(p);
+    }
+    info_c_data->set_predefined_macros(predefined_macros);
+  }
+
+  CompilerInfo info_c(std::move(info_c_data));
+  EXPECT_EQ(6U, info_c.system_include_paths().size());
+  EXPECT_EQ(expected_include_paths, info_c.system_include_paths());
+  EXPECT_EQ(0U, info_c.system_framework_paths().size());
+  EXPECT_EQ(0U, info_c.cxx_system_include_paths().size());
+  EXPECT_EQ(
+      "#define _MSC_EXTENSIONS\n"
+      "#define _INTEGRAL_MAX_BITS 64\n"
+      "#define _MSC_VER 1500\n"
+      "#define _MSC_FULL_VER 150030729\n"
+      "#define _MSC_BUILD 1\n"
+      "#define _WIN32\n"
+      "#define _M_IX86 600\n"
+      "#define _M_IX86_FP 0\n"
+      "#define _MT\n",
+      info_c.predefined_macros());
+
+  std::unique_ptr<CompilerInfoData> info_data(new CompilerInfoData);
+  AppendPredefinedMacros("#define __cplusplus\n", info_data.get());
+  {
+    std::vector<string> system_include_paths;
+    std::vector<string> cxx_system_include_paths;
+    string predefined_macros(info_data->predefined_macros());
+    EXPECT_TRUE(CompilerInfoBuilder::ParseVCOutputString(kInputCpp,
+        &cxx_system_include_paths, &predefined_macros));
+    EXPECT_TRUE(CompilerInfoBuilder::ParseVCOutputString(kInputCpp,
+        &system_include_paths, nullptr));
+    for (const auto& p : cxx_system_include_paths) {
+      info_data->add_cxx_system_include_paths(p);
+    }
+    for (const auto& p : system_include_paths) {
+      info_data->add_system_include_paths(p);
+    }
+    info_data->set_predefined_macros(predefined_macros);
+  }
+  CompilerInfo info(std::move(info_data));
+  EXPECT_EQ(6U, info.system_include_paths().size());
+  EXPECT_EQ(expected_include_paths, info.system_include_paths());
+  EXPECT_EQ(0U, info.system_framework_paths().size());
+  ASSERT_EQ(6U, info.cxx_system_include_paths().size());
+  EXPECT_EQ(expected_include_paths, info.cxx_system_include_paths());
+  EXPECT_EQ(
+      "#define __cplusplus\n"
+      "#define _MSC_EXTENSIONS\n"
+      "#define _INTEGRAL_MAX_BITS 64\n"
+      "#define _MSC_VER 1500\n"
+      "#define _MSC_FULL_VER 150030729\n"
+      "#define _MSC_BUILD 1\n"
+      "#define _WIN32\n"
+      "#define _M_IX86 600\n"
+      "#define _M_IX86_FP 0\n"
+      "#define _CPPRTTI\n"
+      "#define _MT\n",
+      info.predefined_macros());
+
+  std::vector<string> dummy;
+  EXPECT_FALSE(CompilerInfoBuilder::ParseVCOutputString("\"", &dummy, nullptr));
+}
+
+TEST_F(CompilerInfoTest, GetVCVersion) {
+  static const char kVc2008[] =
+    "Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 15.00.30729.01 for "
+    "80x86\r\nCopyright (C) Microsoft Corporation.  All rights reserved.\r\n"
+    "\r\nusage: cl [ option... ] filename... [ /link linkoption... ]\r\n";
+
+  static const char kVc2010[] =
+    "Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.40219.01 for "
+    "80x86\r\nCopyright (C) Microsoft Corporation.  All rights reserved.\r\n"
+    "\r\nusage: cl [ option... ] filename... [ /link linkoption... ]\r\n";
+
+  static const char kWinsdk71[] =
+    "Microsoft (R) C/C++ Optimizing Compiler Version 16.00.40219.01 for x64\r\n"
+    "Copyright (C) Microsoft Corporation.  All rights reserved.\r\n\r\n"
+    "cl : Command line error D8003 : missing source filename";
+
+  string version, target;
+  EXPECT_TRUE(CompilerInfoBuilder::ParseVCVersion(kVc2008, &version, &target));
+  EXPECT_EQ("15.00.30729.01", version);
+  EXPECT_EQ("80x86", target);
+  version.clear();
+  target.clear();
+  EXPECT_TRUE(CompilerInfoBuilder::ParseVCVersion(kVc2010, &version, &target));
+  EXPECT_EQ("16.00.40219.01", version);
+  EXPECT_EQ("80x86", target);
+  version.clear();
+  target.clear();
+  EXPECT_TRUE(CompilerInfoBuilder::ParseVCVersion(
+      kWinsdk71, &version, &target));
+  EXPECT_EQ("16.00.40219.01", version);
+  EXPECT_EQ("x64", target);
+  version.clear();
+  target.clear();
+  EXPECT_FALSE(CompilerInfoBuilder::ParseVCVersion("", &version, &target));
+}
+
+TEST_F(CompilerInfoTest, ParseClangTidyVersionTarget)
+{
+  const char kOutput[] =
+    "LLVM (http://llvm.org/):\n"
+    "  LLVM version 3.9.0svn\n"
+    "  Optimized build.\n"
+    "  Default target: x86_64-unknown-linux-gnu\n"
+    "  Host CPU: sandybridge\n";
+
+  string version;
+  string target;
+  CompilerInfoBuilder::ParseClangTidyVersionTarget(kOutput, &version, &target);
+
+  EXPECT_EQ("3.9.0svn", version);
+  EXPECT_EQ("x86_64-unknown-linux-gnu", target);
+}
+
+TEST_F(CompilerInfoTest, ClangGcc46) {
+  // third_party/llvm-build/Release+Asserts/bin/clang++ -x c++ -v
+  // -E /dev/null -o /dev/null
+  // on gprecise.
+  static const char kClangOutput[] =
+      "clang version 3.4 (trunk 184830)\n"
+      "Target: x86_64-unknown-linux-gnu\n"
+      "Thread model: posix\n"
+      " \"/home/goma/src/goma/client/third_party/"
+      "llvm-build/Release+Asserts/bin/clang\" -cc1 "
+      "-triple x86_64-unknown-linux-gnu -E -disable-free "
+      "-main-file-name null -mrelocation-model static "
+      "-mdisable-fp-elim -fmath-errno -masm-verbose -mconstructor-aliases "
+      "-munwind-tables -target-cpu x86-64 -target-linker-version 2.22 "
+      "-v -resource-dir /home/goma/src/goma/client/"
+      "third_party/llvm-build/Release+Asserts/bin/../"
+      "lib/clang/3.4 "
+      "-internal-isystem /usr/lib/gcc/x86_64-linux-gnu/4.6/"
+      "../../../../include/c++/4.6 "
+      "-internal-isystem /usr/lib/gcc/x86_64-linux-gnu/4.6/"
+      "../../../../include/c++/4.6/x86_64-linux-gnu "
+      "-internal-isystem /usr/lib/gcc/x86_64-linux-gnu/4.6/"
+      "../../../../include/c++/4.6/backward "
+      "-internal-isystem /usr/lib/gcc/x86_64-linux-gnu/4.6/"
+      "../../../../include/x86_64-linux-gnu/c++/4.6 "
+      "-internal-isystem /usr/local/include "
+      "-internal-isystem /home/goma/src/goma/client/"
+      "third_party/llvm-build/Release+Asserts/bin/../"
+      "lib/clang/3.4/include "
+      "-internal-externc-isystem /usr/include/x86_64-linux-gnu "
+      "-internal-externc-isystem /include "
+      "-internal-externc-isystem /usr/include "
+      "-fdeprecated-macro "
+      "-fdebug-compilation-dir /home/goma/src/goma/client "
+      "-ferror-limit 19 -fmessage-length 80 -mstackrealign -fobjc-runtime=gcc "
+      "-fobjc-default-synthesize-properties -fcxx-exceptions -fexceptions "
+      "-fdiagnostics-show-option -fcolor-diagnostics -vectorize-loops "
+      "-o /dev/null -x c++ /dev/null\n"
+      "clang -cc1 version 3.4 based upon LLVM 3.4svn default "
+      "target x86_64-unknown-linux-gnu\n"
+      "ignoring nonexistent directory \"/usr/lib/gcc/x86_64-linux-gnu/4.6/"
+      "../../../../include/x86_64-linux-gnu/c++/4.6\"\n"
+      "ignoring nonexistent directory \"/include\"\n"
+      "#include \"...\" search starts here:\n"
+      "#include <...> search starts here:\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/"
+      "x86_64-linux-gnu\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/"
+      "backward\n"
+      " /usr/local/include\n"
+      " /home/goma/src/goma/client/third_party/"
+      "llvm-build/Release+Asserts/bin/../lib/clang/3.4/include\n"
+      " /usr/include/x86_64-linux-gnu\n"
+      " /usr/include\n"
+      "End of search list.\n";
+  std::vector<string> args;
+  EXPECT_TRUE(CompilerInfoBuilder::GetAdditionalFlags(kClangOutput, &args));
+  EXPECT_TRUE(args.empty());
+}
+
+TEST_F(CompilerInfoTest, ClangGcc47) {
+  // third_party/llvm-build/Release+Asserts/bin/clang++ -x c++ -v
+  // -E /dev/null -o /dev/null
+  // on goma-chromeos
+  static const char kClangOutput[] =
+      "clang version 3.4 (trunk 184830)\n"
+      "Target: x86_64-unknown-linux-gnu\n"
+      "Thread model: posix\n"
+      " \"/home/chrome-bot/b/build/slave/goma-chromeos/build"
+      "/client/third_party/llvm-build/Release+Asserts/bin/clang\" -cc1 "
+      "-triple x86_64-unknown-linux-gnu -E -disable-free -main-file-name null "
+      "-mrelocation-model static -mdisable-fp-elim -fmath-errno -masm-verbose "
+      "-mconstructor-aliases -munwind-tables -fuse-init-array "
+      "-target-cpu x86-64 -target-linker-version 2.22 -v -resource-dir "
+      "/home/chrome-bot/b/build/slave/goma-chromeos/build"
+      "/client/third_party/llvm-build/Release+Asserts/bin/"
+      "../lib/clang/3.4 "
+      "-internal-isystem /usr/lib64/gcc/x86_64-pc-linux-gnu/4.7.x-google/"
+      "include/g++-v4 "
+      "-internal-isystem /usr/lib64/gcc/x86_64-pc-linux-gnu/4.7.x-google/"
+      "include/g++-v4/x86_64-pc-linux-gnu "
+      "-internal-isystem /usr/lib64/gcc/x86_64-pc-linux-gnu/4.7.x-google/"
+      "include/g++-v4/backward "
+      "-internal-isystem /usr/local/include "
+      "-internal-isystem /home/chrome-bot/b/build/slave/goma-chromeos/build/"
+      "client/third_party/llvm-build/Release+Asserts/bin/../"
+      "lib/clang/3.4/include "
+      "-internal-externc-isystem /include "
+      "-internal-externc-isystem /usr/include "
+      "-fdeprecated-macro -fdebug-compilation-dir /home/chrome-bot"
+      "/b/build/slave/goma-chromeos/build/client "
+      "-ferror-limit 19 -fmessage-length 0 -mstackrealign "
+      "-fobjc-runtime=gcc -fobjc-default-synthesize-properties "
+      "-fcxx-exceptions -fexceptions -fdiagnostics-show-option "
+      "-vectorize-loops -o /dev/null -x c++ /dev/null\n"
+      "clang -cc1 version 3.4 based upon LLVM 3.4svn default "
+      "target x86_64-unknown-linux-gnu\n"
+      "ignoring nonexistent directory \"/usr/local/include\"\n"
+      "ignoring nonexistent directory \"/include\""
+      "#include \"...\" search starts here:\n"
+      "#include <...> search starts here:\n"
+      " /usr/lib64/gcc/x86_64-pc-linux-gnu/4.7.x-google/include/g++-v4\n"
+      " /usr/lib64/gcc/x86_64-pc-linux-gnu/4.7.x-google/include/g++-v4/"
+      "x86_64-pc-linux-gnu\n"
+      "/usr/lib64/gcc/x86_64-pc-linux-gnu/4.7.x-google/include/g++-v4/"
+      "backward\n"
+      " /home/chrome-bot/b/build/slave/goma-chromeos/build/"
+      "client/third_party/llvm-build/Release+Asserts/bin/../"
+      "lib/clang/3.4/include\n"
+      " /usr/include\n"
+      "End of search list.\n";
+  std::vector<string> args;
+  EXPECT_TRUE(CompilerInfoBuilder::GetAdditionalFlags(kClangOutput, &args));
+  std::vector<string> expected_args;
+  expected_args.push_back("-fuse-init-array");
+  EXPECT_EQ(expected_args, args);
+}
+
+TEST_F(CompilerInfoTest, ParseFeatures) {
+  static const char* kDummyObjectMacros[] = {
+      "dummy_macro1",
+      "dummy_macro2",
+  };
+  static const unsigned long kNumDummyObjectMacros =
+      arraysize(kDummyObjectMacros);
+  static const char* kDummyFunctionMacros[] = {
+      "dummy_func1",
+      "dummy_func2",
+  };
+  static const unsigned long kNumDummyFunctionMacros =
+      arraysize(kDummyFunctionMacros);
+  static const char* kDummyFeatures[] = {
+      "dummy_feature1",
+      "dummy_feature2",
+  };
+  static const unsigned long kNumDummyFeatures =
+      arraysize(kDummyFeatures);
+  static const char* kDummyExtensions[] = {
+      "dummy_extension1",
+      "dummy_extension2",
+  };
+  static const unsigned long kNumDummyExtensions =
+      arraysize(kDummyExtensions);
+  static const char* kDummyAttributes[] = {
+      "dummy_attribute1",
+      "dummy_attribute2",
+      "dummy_attribute3",
+      "dummy_attribute4",
+      "_Alignas",
+      "asm",
+  };
+  static const unsigned long kNumDummyAttributes =
+      arraysize(kDummyAttributes);
+  static const char* kDummyCppAttributes[] = {
+      "dummy_cpp_attribute1",
+      "dummy_cpp_attribute2",
+      "clang::dummy_cpp_attribute1",
+      "clang::dummy_cpp_attribute2",
+  };
+  static const unsigned long kNumDummyCppAttributes =
+      arraysize(kDummyCppAttributes);
+
+  static const char* kDummyDeclspecAttributes[] = {
+      "dummy_declspec_attributes1",
+      "dummy_declspec_attributes2",
+  };
+  static const unsigned long kNumDummyDeclspecAttributes =
+     arraysize(kDummyDeclspecAttributes);
+
+  static const char* kDummyBuiltins[] = {
+      "dummy_builtin1",
+      "dummy_builtin2",
+  };
+  static const unsigned long kNumDummyBuiltins = arraysize(kDummyBuiltins);
+
+  static const char kClangOutput[] =
+      "# 1 \"a.c\"\n"
+      "# 1 \"a.c\" 1\n"
+      "# 1 \"<built-in>\" 1\n"
+      "# 1 \"<built-in>\" 3\n"
+      "# 132 \"<built-in>\" 3\n"
+      "# 1 \"<command line>\" 1\n"
+      "# 1 \"<built-in>\" 2\n"
+      "# 1 \"a.c\" 2\n"
+      "# 1 \"a.c\"\n"  // object macros.
+      "1\n"
+      "# 2 \"a.c\"\n"
+      "0\n"
+      "# 3 \"a.c\"\n"  // function macros.
+      "1\n"
+      "# 4 \"a.c\"\n"
+      "0\n"
+      "# 5 \"a.c\"\n"  // features.
+      "1\n"
+      "# 6 \"a.c\"\n"
+      "0\n"
+      "# 7 \"a.c\"\n"  // extensions.
+      "1\n"
+      "# 8 \"a.c\"\n"
+      "0\n"
+      "# 9 \"a.c\"\n"  // attributes.
+      "1\n"
+      "# 10 \"a.c\"\n"
+      "0)\n"
+      "# 11 \"a.c\"\n"
+      "1\n"
+      "# 12\n"
+      "0\n"
+      "# 13\n"
+      "_Alignas)\n"
+      "# 14\n"
+      "asm)\n"
+      "# 15\n"         // cpp attributes.
+      "201304\n"
+      "# 16\n"
+      "0\n"
+      "# 17\n"
+      "201301\n"
+      "# 18\n"
+      "0\n"
+      "# 19\n"         // declspec attributes.
+      "1\n"
+      "# 20\n"
+      "0\n"
+      "# 21\n"         // builtins
+      "1\n"
+      "# 22\n"
+      "0\n";
+
+  CompilerInfoBuilder::FeatureList object_macros = std::make_pair(
+      kDummyObjectMacros, kNumDummyObjectMacros);
+  CompilerInfoBuilder::FeatureList function_macros = std::make_pair(
+      kDummyFunctionMacros, kNumDummyFunctionMacros);
+  CompilerInfoBuilder::FeatureList features = std::make_pair(
+      kDummyFeatures, kNumDummyFeatures);
+  CompilerInfoBuilder::FeatureList extensions = std::make_pair(
+      kDummyExtensions, kNumDummyExtensions);
+  CompilerInfoBuilder::FeatureList attributes = std::make_pair(
+      kDummyAttributes, kNumDummyAttributes);
+  CompilerInfoBuilder::FeatureList cpp_attributes = std::make_pair(
+      kDummyCppAttributes, kNumDummyCppAttributes);
+  CompilerInfoBuilder::FeatureList declspec_attributes = std::make_pair(
+      kDummyDeclspecAttributes, kNumDummyDeclspecAttributes);
+  CompilerInfoBuilder::FeatureList builtins = std::make_pair(
+      kDummyBuiltins, kNumDummyBuiltins);
+
+  std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+  EXPECT_TRUE(CompilerInfoBuilder::ParseFeatures(
+                kClangOutput, object_macros, function_macros,
+                features, extensions, attributes, cpp_attributes,
+                declspec_attributes, builtins, cid.get()));
+  CompilerInfo info(std::move(cid));
+
+  EXPECT_EQ(2U, info.supported_predefined_macros().size());
+  EXPECT_EQ(1U, info.supported_predefined_macros().count("dummy_macro1"));
+  EXPECT_EQ(0U, info.supported_predefined_macros().count("dummy_macro2"));
+  EXPECT_EQ(1U, info.supported_predefined_macros().count("dummy_func1"));
+  EXPECT_EQ(0U, info.supported_predefined_macros().count("dummy_func2"));
+
+  EXPECT_EQ(1U, info.has_feature().size());
+  EXPECT_EQ(1, FindValue(info.has_feature(), "dummy_feature1"));
+  EXPECT_EQ(0U, info.has_feature().count("dummy_feature2"));
+
+  EXPECT_EQ(1U, info.has_extension().size());
+  EXPECT_EQ(1, FindValue(info.has_extension(), "dummy_extension1"));
+  EXPECT_EQ(0U, info.has_extension().count("dummy_extension2"));
+
+  EXPECT_EQ(2U, info.has_attribute().size());
+  EXPECT_EQ(1, FindValue(info.has_attribute(), "dummy_attribute1"));
+  EXPECT_EQ(0U, info.has_attribute().count("dummy_attribute2"));
+  EXPECT_EQ(1, FindValue(info.has_attribute(), "dummy_attribute3"));
+  EXPECT_EQ(0U, info.has_attribute().count("dummy_attribute4"));
+  EXPECT_EQ(0U, info.has_attribute().count("_Alignas"));
+  EXPECT_EQ(0U, info.has_attribute().count("asm"));
+
+  EXPECT_EQ(2U, info.has_cpp_attribute().size());
+  EXPECT_EQ(201304, FindValue(info.has_cpp_attribute(),
+                              "dummy_cpp_attribute1"));
+  EXPECT_EQ(0U, info.has_cpp_attribute().count("dummy_cpp_attribute2"));
+  EXPECT_EQ(201301, FindValue(info.has_cpp_attribute(),
+                              "clang::dummy_cpp_attribute1"));
+  EXPECT_EQ(0U, info.has_cpp_attribute().count("clang::dummy_cpp_attribute2"));
+
+  EXPECT_EQ(1U, info.has_declspec_attribute().size());
+  EXPECT_EQ(1, FindValue(info.has_declspec_attribute(),
+                         "dummy_declspec_attributes1"));
+  EXPECT_EQ(0U,
+            info.has_declspec_attribute().count("dummy_declspec_attributes2"));
+
+  EXPECT_EQ(1, FindValue(info.has_builtin(), "dummy_builtin1"));
+  EXPECT_EQ(0U, info.has_builtin().count("dummy_builtin2"));
+}
+
+
+TEST_F(CompilerInfoTest, ParseRealClangPathForChromeOS) {
+  const char kClangVoutput[] =
+      "Chromium OS 3.9_pre265926-r9 clang version 3.9.0 "
+      "(/var/cache/chromeos-cache/distfiles/host/egit-src/clang.git "
+      "af6a0b98569cf7981fe27327ac4bf19bd0d6b162) (/var/cache/chromeos"
+      "-cache/distfiles/host/egit-src/llvm.git 26a9873b72c6dbb425ae07"
+      "5fcf51caa9fc5e892b) (based on LLVM 3.9.0svn)\n"
+      "Target: x86_64-cros-linux-gnu\n"
+      "Thread model: posix\n"
+      "InstalledDir: /usr/local/google/home/test/.cros_"
+      "cache/chrome-sdk/tarballs/falco+8754.0.0+target_toolchain/usr/"
+      "bin\n"
+      "Found candidate GCC installation: /usr/local/google/home/test/"
+      ".cros_cache/chrome-sdk/tarballs/falco+8754.0.0+target_toolchain/"
+      "usr/bin/../lib/gcc/x86_64-cros-linux-gnu/4.9.x\n"
+      "Selected GCC installation: /usr/local/google/home/test/.cros_cache"
+      "/chrome-sdk/tarballs/falco+8754.0.0+target_toolchain/usr/bin/../"
+      "lib/gcc/x86_64-cros-linux-gnu/4.9.x\n"
+      "Candidate multilib: .;@m64\n"
+      "Selected multilib: .;@m64\n"
+      " \"/usr/local/google/home/test/usr/bin/clang-3.9\" -cc1 "
+      "-triple x86_64-cros-linux-gnu -E -disable-free -disable-llvm-"
+      "verifier -discard-value-names -main-file-name null "
+      "-o - -x c /dev/null\n"
+      "clang -cc1 version 3.9.0 based upon LLVM 3.9.0svn default target"
+      " x86_64-pc-linux-gnu\n"
+      "ignoring nonexistent directory \"/usr/local/google/test/"
+      ".cros_cache/chrome-sdk/tarballs/falco+8754.0.0+sysroot_"
+      "chromeos-base_chromeos-chrome.tar.xz/usr/local/include\"\n"
+      "ignoring nonexistent directory \"/usr/local/google/home/test/"
+      ".cros_cache/chrome-sdk/tarballs/falco+8754.0.0+sysroot_chromeos-"
+      "base_chromeos-chrome.tar.xz/include\"\n"
+      "#include \"...\" search starts here:\n"
+      "#include <...> search starts here:\n"
+      " /usr/local/google/home/test/.cros_cache/chrome-sdk/tarballs/"
+      "falco+8754.0.0+target_toolchain/usr/bin/../lib64/clang/3.9.0/"
+      "include\n"
+      " /usr/local/google/home/test/.cros_cache/chrome-sdk/tarballs/"
+      "falco+8754.0.0+sysroot_chromeos-base_chromeos-chrome.tar.xz/"
+      "usr/include\n"
+      "End of search list.\n"
+      "# 1 \"/dev/null\"\n"
+      "# 1 \"<built-in>\" 1\n"
+      "# 1 \"<built-in>\" 3\n"
+      "# 321 \"<built-in>\" 3\n"
+      "# 1 \"<command line>\" 1\n"
+      "# 1 \"<built-in>\" 2\n"
+      "# 1 \"/dev/null\" 2\n";
+
+  const string path = CompilerInfoBuilder::ParseRealClangPath(
+      kClangVoutput);
+  EXPECT_EQ("/usr/local/google/home/test/usr/bin/clang-3.9",
+            path);
+}
+
+TEST_F(CompilerInfoTest, ParseClangVersionTarget) {
+  static const char kClangSharpOutput[] =
+      "clang version 3.5 (trunk)\n"
+      "Target: i686-pc-win32\n"
+      "Thread model: posix\n";
+  string version, target;
+  EXPECT_TRUE(
+      CompilerInfoBuilder::ParseClangVersionTarget(
+          kClangSharpOutput,
+          &version, &target));
+  EXPECT_EQ("clang version 3.5 (trunk)", version);
+  EXPECT_EQ("i686-pc-win32", target);
+}
+
+#ifdef _WIN32
+TEST_F(CompilerInfoTest, SplitGccIncludeOutputForClang) {
+  static const char kClangOutput[] =
+      "clang -cc1 version 3.5 based upon LLVM 3.5svn default target "
+      "i686-pc-win32\n"
+      "#include \"...\" search starts here:\n"
+      "#include <...> search starts here:\n"
+      " C:\\Users\\goma\\proj\\clang\\trying\\build\\bin\\..\\lib"
+      "\\clang\\3.5\\include\n"
+      " C:\\Program Files (x86)\\Microsoft Visual Studio 11.0\\VC\\INCLUDE\n"
+      " C:\\Program Files (x86)\\Microsoft Visual Studio 11.0\\VC\\ATLMFC"
+      "\\INCLUDE\n"
+      " C:\\Program Files (x86)\\Windows Kits\\8.0\\include\\shared\n"
+      " C:\\Program Files (x86)\\Windows Kits\\8.0\\include\\um\n"
+      " C:\\Program Files (x86)\\Windows Kits\\8.0\\include\\winrt\n"
+      "End of search list.\n"
+      "#line 1 \"..\\\\..\\\\proj\\\\clang\\\\empty.cc\"\n"
+      "#line 1 \"<built-in>\"\n"
+      "#line 1 \"<built-in>\"\n"
+      "#line 176 \"<built-in>\"\n"
+      "#line 1 \"<command line>\"\n"
+      "#line 1 \"<built-in>\"\n"
+      "#line 1 \"..\\\\..\\\\proj\\\\clang\\\\empty.cc\"\n";
+
+  std::vector<string> qpaths;
+  std::vector<string> paths;
+  std::vector<string> framework_paths;
+  EXPECT_TRUE(CompilerInfoBuilder::SplitGccIncludeOutput(
+      kClangOutput, &qpaths, &paths, &framework_paths));
+
+  EXPECT_TRUE(qpaths.empty());
+  std::vector<string> expected_paths;
+  expected_paths.push_back(
+      "C:\\Users\\goma\\proj\\clang\\trying\\build\\bin\\..\\lib"
+      "\\clang\\3.5\\include");
+  expected_paths.push_back(
+      "C:\\Program Files (x86)\\Microsoft Visual Studio 11.0\\VC\\INCLUDE");
+  expected_paths.push_back(
+      "C:\\Program Files (x86)\\Microsoft Visual Studio 11.0\\VC\\ATLMFC"
+      "\\INCLUDE");
+  expected_paths.push_back(
+      "C:\\Program Files (x86)\\Windows Kits\\8.0\\include\\shared");
+  expected_paths.push_back(
+      "C:\\Program Files (x86)\\Windows Kits\\8.0\\include\\um");
+  expected_paths.push_back(
+      "C:\\Program Files (x86)\\Windows Kits\\8.0\\include\\winrt");
+  EXPECT_EQ(expected_paths, paths);
+  EXPECT_TRUE(framework_paths.empty());
+}
+#endif
+
+TEST_F(CompilerInfoTest, SplitGccIncludeOutputForIQuote) {
+  // gtrusty gcc-4.8 -xc++ -iquote include -v -E /dev/null -o /dev/null
+  static const char kGccVOutput[] =
+      "Using built-in specs.\n"
+      "COLLECT_GCC=gcc\n"
+      "Target: x86_64-linux-gnu\n"
+      "Configured with: ../src/configure -v "
+      "--with-pkgversion='Ubuntu 4.8.4-2ubuntu1~14.04.3' "
+      "--with-bugurl=file:///usr/share/doc/gcc-4.8/README.Bugs "
+      "--enable-languages=c,c++,java,go,d,fortran,objc,obj-c++ "
+      "--prefix=/usr --program-suffix=-4.8 --enable-shared "
+      "--enable-linker-build-id --libexecdir=/usr/lib "
+      "--without-included-gettext --enable-threads=posix "
+      "--with-gxx-include-dir=/usr/include/c++/4.8 --libdir=/usr/lib "
+      "--enable-nls --with-sysroot=/ --enable-clocale=gnu "
+      "--enable-libstdcxx-debug --enable-libstdcxx-time=yes "
+      "--enable-gnu-unique-object --disable-libmudflap --enable-plugin "
+      "--with-system-zlib --disable-browser-plugin --enable-java-awt=gtk "
+      "--enable-gtk-cairo "
+      "--with-java-home=/usr/lib/jvm/java-1.5.0-gcj-4.8-amd64/jre "
+      "--enable-java-home "
+      "--with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-4.8-amd64 "
+      "--with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-4.8-amd64 "
+      "--with-arch-directory=amd64 "
+      "--with-ecj-jar=/usr/share/java/eclipse-ecj.jar "
+      "--enable-objc-gc --enable-multiarch --disable-werror "
+      "--with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 "
+      "--with-tune=generic --enable-checking=release "
+      "--build=x86_64-linux-gnu --host=x86_64-linux-gnu "
+      "--target=x86_64-linux-gnu\n"
+      "Thread model: posix\n"
+      "gcc version 4.8.4 (Ubuntu 4.8.4-2ubuntu1~14.04.3) \n"
+      "COLLECT_GCC_OPTIONS='-v' '-iquote' 'include' '-E' '-mtune=generic' "
+      "'-march=x86-64'\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.8/cc1plus -E -quiet -v "
+      "-imultiarch x86_64-linux-gnu -D_GNU_SOURCE -iquote include /dev/null "
+      "-quiet -dumpbase null -mtune=generic -march=x86-64 -auxbase null "
+      "-version -fstack-protector -Wformat -Wformat-security\n"
+      "ignoring duplicate directory "
+      "\"/usr/include/x86_64-linux-gnu/c++/4.8\"\n"
+      "ignoring nonexistent directory "
+      "\"/usr/local/include/x86_64-linux-gnu\"\n"
+      "ignoring nonexistent directory "
+      "\"/usr/lib/gcc/x86_64-linux-gnu/4.8/../../../../"
+      "x86_64-linux-gnu/include\"\n"
+      "#include \"...\" search starts here:\n"
+      " include\n"
+      "#include <...> search starts here:\n"
+      " /usr/include/c++/4.8\n"
+      " /usr/include/x86_64-linux-gnu/c++/4.8\n"
+      " /usr/include/c++/4.8/backward\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.8/include\n"
+      " /usr/local/include\n"
+      " /usr/lib/gcc/x86_64-linux-gnu/4.8/include-fixed\n"
+      " /usr/include/x86_64-linux-gnu\n"
+      " /usr/include\n"
+      "End of search list.\n"
+      "COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.8/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.8/:/usr/lib/gcc/x86_64-linux-gnu/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.8/:/usr/lib/gcc/x86_64-linux-gnu/\n"
+      "LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.8/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.8/../../../x86_64-linux-gnu/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.8/../../../../lib/:"
+      "/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:"
+      "/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/4.8/../../../:/lib/:"
+      "/usr/lib/\n"
+      "COLLECT_GCC_OPTIONS='-v' '-iquote' 'include' '-E' '-mtune=generic' "
+      "'-march=x86-64'\n";
+
+  std::vector<string> qpaths;
+  std::vector<string> paths;
+  std::vector<string> framework_paths;
+  EXPECT_TRUE(CompilerInfoBuilder::SplitGccIncludeOutput(
+      kGccVOutput, &qpaths, &paths, &framework_paths));
+
+  const std::vector<string> expected_qpaths {
+    "include",
+  };
+  EXPECT_EQ(expected_qpaths, qpaths);
+  const std::vector<string> expected_paths {
+    "/usr/include/c++/4.8",
+    "/usr/include/x86_64-linux-gnu/c++/4.8",
+    "/usr/include/c++/4.8/backward",
+    "/usr/lib/gcc/x86_64-linux-gnu/4.8/include",
+    "/usr/local/include",
+    "/usr/lib/gcc/x86_64-linux-gnu/4.8/include-fixed",
+    "/usr/include/x86_64-linux-gnu",
+    "/usr/include",
+  };
+  EXPECT_EQ(expected_paths, paths);
+  EXPECT_TRUE(framework_paths.empty());
+}
+
+::std::ostream& operator<<(
+    ::std::ostream& os,
+    const CompilerInfo::SubprogramInfo& info) {
+  return os << info.DebugString();
+}
+
+TEST_F(CompilerInfoTest, GetExtraSubprogramsClangPlugin) {
+  const string cwd("/");
+
+  TmpdirUtil tmpdir("get_extra_subprograms_clang_plugin");
+  tmpdir.SetCwd(cwd);
+  tmpdir.CreateEmptyFile("libPlugin.so");
+
+  std::vector<string> args, envs;
+  args.push_back("/usr/bin/clang");
+  args.push_back("-Xclang");
+  args.push_back("-load");
+  args.push_back("-Xclang");
+  args.push_back(file::JoinPath(tmpdir.tmpdir(), "libPlugin.so"));
+  args.push_back("-c");
+  args.push_back("hello.c");
+  GCCFlags flags(args, cwd);
+  std::vector<string> clang_plugins;
+  std::vector<string> B_options;
+  bool no_integrated_as = false;
+  CompilerInfoBuilder::ParseSubprogramFlags(
+      "/usr/bin/clang", flags, &clang_plugins, &B_options, &no_integrated_as);
+  std::vector<string> expected = {tmpdir.FullPath("libPlugin.so")};
+  EXPECT_EQ(expected, clang_plugins);
+  EXPECT_TRUE(B_options.empty());
+  EXPECT_FALSE(no_integrated_as);
+}
+
+TEST_F(CompilerInfoTest, GetExtraSubprogramsClangPluginRelative) {
+  const string cwd("/");
+
+  TmpdirUtil tmpdir("get_extra_subprograms_clang_plugin");
+  tmpdir.SetCwd(cwd);
+  tmpdir.CreateEmptyFile("libPlugin.so");
+
+  std::vector<string> args, envs;
+  args.push_back("/usr/bin/clang");
+  args.push_back("-Xclang");
+  args.push_back("-load");
+  args.push_back("-Xclang");
+  args.push_back("libPlugin.so");
+  args.push_back("-c");
+  args.push_back("hello.c");
+  GCCFlags flags(args, cwd);
+  std::vector<string> clang_plugins;
+  std::vector<string> B_options;
+  bool no_integrated_as = false;
+  CompilerInfoBuilder::ParseSubprogramFlags(
+      "/usr/bin/clang", flags, &clang_plugins, &B_options, &no_integrated_as);
+  std::vector<string> expected = {"libPlugin.so"};
+  EXPECT_EQ(expected, clang_plugins);
+  EXPECT_TRUE(B_options.empty());
+  EXPECT_FALSE(no_integrated_as);
+}
+
+TEST_F(CompilerInfoTest, GetExtraSubprogramsBOptions) {
+  const string cwd("/");
+
+  TmpdirUtil tmpdir("get_extra_subprograms_clang_plugin");
+  tmpdir.SetCwd(cwd);
+  tmpdir.CreateEmptyFile("libPlugin.so");
+
+  std::vector<string> args, envs;
+  args.push_back("/usr/bin/clang");
+  args.push_back("-B");
+  args.push_back("dummy");
+  args.push_back("-c");
+  args.push_back("hello.c");
+  GCCFlags flags(args, cwd);
+  std::vector<string> clang_plugins;
+  std::vector<string> B_options;
+  bool no_integrated_as = false;
+  CompilerInfoBuilder::ParseSubprogramFlags(
+      "/usr/bin/clang", flags, &clang_plugins, &B_options, &no_integrated_as);
+  std::vector<string> expected = {"dummy"};
+  EXPECT_TRUE(clang_plugins.empty());
+  EXPECT_EQ(expected, B_options);
+  EXPECT_FALSE(no_integrated_as);
+}
+
+TEST_F(CompilerInfoTest, ParseGetSubprogramsOutput) {
+  const char kClangOutput[] =
+      "clang version 3.5.0 (trunk 214024)\n"
+      "Target: arm--linux\n"
+      "Thread model: posix\n"
+      " \"/usr/local/google/ssd/goma/chrome_src/src/third_party/"
+      "llvm-build/Release+Asserts/bin/clang\" \"-cc1\" \"-triple\" \""
+      "armv4t--linux\" \"-S\" \"-disable-free\" \"-main-file-name\" \""
+      "null\" \"-mrelocation-model\" \"static\" \"-mdisable-fp-elim\" \""
+      "-fmath-errno\" \"-masm-verbose\" \"-no-integrated-as\" \""
+      "-mconstructor-aliases\" \"-target-cpu\" \"arm7tdmi\" \"-target-abi"
+      "\" \"apcs-gnu\" \"-mfloat-abi\" \"hard\" \"-target-linker-version"
+      "\" \"2.22\" \"-dwarf-column-info\" \"-coverage-file\" \"/tmp/null-"
+      "6cb82c.s\" \"-resource-dir\" \"/usr/local/google/ssd/goma/"
+      "chrome_src/src/third_party/llvm-build/Release+Asserts/bin/../lib/"
+      "clang/3.5.0\" \"-internal-isystem\" \"/usr/lib/gcc/arm-linux-gnueabi/"
+      "4.6/../../../../include/c++/4.6\" \"-internal-isystem\" \""
+      "/usr/lib/gcc/arm-linux-gnueabi/4.6/../../../../include/c++/4.6/"
+      "arm-linux-gnueabi\" \"-internal-isystem\" \"/usr/lib/gcc/arm-linux-"
+      "gnueabi/4.6/../../../../include/c++/4.6/backward\" \""
+      "-internal-isystem\" \"/usr/lib/gcc/arm-linux-gnueabi/4.6/../../../../"
+      "include/arm-linux-gnueabi/c++/4.6\" \"-internal-isystem\" "
+      "\"/usr/local/include\" \"-internal-isystem\" \"/usr/local/google/"
+      "ssd/goma/chrome_src/src/third_party/llvm-build/Release+Asserts"
+      "/bin/../lib/clang/3.5.0/include\" \"-internal-externc-isystem\" "
+      "\"/include\" \"-internal-externc-isystem\" \"/usr/include\" "
+      "\"-fdeprecated-macro\" \"-fno-dwarf-directory-asm\" "
+      "\"-fdebug-compilation-dir\" \"/usr/local/google/home/goma/"
+      ".ssd/chrome_src/src\" \"-ferror-limit\" \"19\" \"-fmessage-length\" "
+      "\"0\" \"-mstackrealign\" \"-fno-signed-char\" \"-fobjc-runtime=gcc\" "
+      "\"-fcxx-exceptions\" \"-fexceptions\" \"-fdiagnostics-show-option\" "
+      "\"-o\" \"/tmp/null-6cb82c.s\" \"-x\" \"c++\" \"/dev/null\"\n"
+      " \"/usr/lib/gcc/arm-linux-gnueabi/4.6/../../../../arm-linux-gnueabi"
+      "/bin/as\" \"-mfloat-abi=hard\" \"-o\" \"/dev/null\" "
+      "\"/tmp/null-6cb82c.s\"\n";
+
+  std::vector<string> subprograms;
+  std::vector<string> expected = {
+    "/usr/lib/gcc/arm-linux-gnueabi/4.6/../../../../arm-linux-gnueabi/bin/as",
+  };
+  CompilerInfoBuilder::ParseGetSubprogramsOutput(
+      kClangOutput, &subprograms);
+  EXPECT_EQ(expected, subprograms);
+}
+
+TEST_F(CompilerInfoTest, ParseGetSubprogramsOutputWithAsSuffix) {
+  const char kClangOutput[] =
+    "clang version 3.5.0 (trunk 214024)\n"
+    "Target: arm--linux-androideabi\n"
+    "Thread model: posix\n"
+    " \"/mnt/scratch0/b_used/build/slave/android_clang_dbg_recipe/build/src/"
+    "third_party/llvm-build/Release+Asserts/bin/clang\" \"-cc1\" \"-triple"
+    "\" \"armv6--linux-androideabi\" \"-S\" \"-disable-free\" \"-main-file-"
+    "name\" \"null\" \"-mrelocation-model\" \"pic\" \"-pic-level\" \"2\" \""
+    "-mdisable-fp-elim\" \"-relaxed-aliasing\" \"-fmath-errno\" \"-masm-"
+    "verbose\" \"-no-integrated-as\" \"-mconstructor-aliases\" \"-munwind-"
+    "tables\" \"-fuse-init-array\" \"-target-cpu\" \"cortex-a6\" \"-target-"
+    "feature\" \"+soft-float-abi\" \"-target-feature\" \"+neon\" \"-target-"
+    "abi\" \"aapcs-linux\" \"-mfloat-abi\" \"soft\" \"-target-linker-version"
+    "\" \"1.22\" \"-dwarf-column-info\" \"-ffunction-sections\" \"-fdata"
+    "-sections\" \"-coverage-file\" \"/tmp/null-c11ea4.s\" \"-resource-dir"
+    "\" \"/mnt/scratch0/b_used/build/slave/android_clang_dbg_recipe/build"
+    "/src/third_party/llvm-build/Release+Asserts/bin/../lib/clang/3.5.0\" "
+    "\"-isystem\" \"/mnt/scratch0/b_used/build/slave/android_clang_dbg_"
+    "recipe/build/src/third_party/android_tools/ndk//sources/cxx-stl/"
+    "stlport/stlport\" \"-isysroot\" \"/mnt/scratch0/b_used/build/slave/"
+    "android_clang_dbg_recipe/build/src/third_party/android_tools/ndk//"
+    "platforms/android-14/arch-arm\" \"-internal-isystem\" \"/mnt/scratch0/"
+    "b_used/build/slave/android_clang_dbg_recipe/build/src/third_party/"
+    "android_tools/ndk//platforms/android-14/arch-arm/usr/local/include"
+    "\" \"-internal-isystem\" \"/mnt/scratch0/b_used/build/slave/android_"
+    "clang_dbg_recipe/build/src/third_party/llvm-build/Release+Asserts/bin/"
+    "../lib/clang/3.5.0/include\" \"-internal-externc-isystem\" \"/mnt/"
+    "scratch0/b_used/build/slave/android_clang_dbg_recipe/build/src/"
+    "third_party/android_tools/ndk//platforms/android-14/arch-arm/include"
+    "\" \"-internal-externc-isystem\" \"/mnt/scratch0/b_used/build/slave/"
+    "android_clang_dbg_recipe/build/src/third_party/android_tools/ndk//"
+    "platforms/android-14/arch-arm/usr/include\" \"-Os\" \"-std=gnu++11\" "
+    "\"-fdeprecated-macro\" \"-fno-dwarf-directory-asm\" \"-fdebug-"
+    "compilation-dir\" \"/mnt/scratch0/b_used/build/slave/android_clang_"
+    "dbg_recipe/build/src/out/Debug\" \"-ferror-limit\" \"19\" \"-fmessage"
+    "-length\" \"0\" \"-fvisibility\" \"hidden\" \"-fvisibility-inlines-"
+    "hidden\" \"-fsanitize=address\" \"-stack-protector\" \"1\" \""
+    "-mstackrealign\" \"-fno-rtti\" \"-fno-signed-char\" \"-fno-threadsafe"
+    "-statics\" \"-fobjc-runtime=gcc\" \"-fdiagnostics-show-option\" \"-fcolor"
+    "-diagnostics\" \"-vectorize-loops\" \"-vectorize-slp\" \"-load\" \"/mnt/"
+    "scratch0/b_used/build/slave/android_clang_dbg_recipe/build/src/tools/"
+    "clang/scripts/../../../third_party/llvm-build/Release+Asserts/lib/"
+    "libFindBadConstructs.so\" \"-add-plugin\" \"find-bad-constructs\" \""
+    "-mllvm\" \"-asan-globals=0\" \"-o\" \"/tmp/null-c11ea4.s\" \"-x\" \""
+    "c++\" \"/dev/null\"\n"
+    " \"/mnt/scratch0/b_used/build/slave/android_clang_dbg_recipe/build/src/"
+    "third_party/android_tools/ndk//toolchains/arm-linux-androideabi-4.8/"
+    "prebuilt/linux-x86_64/bin/arm-linux-androideabi-as\" \"-mfloat-abi="
+    "softfp\" \"-march=armv7-a\" \"-mfpu=neon\" \"-o\" \"/dev/null\" \"/tmp/"
+    "null-c11ea4.s\"\n";
+
+  std::vector<string> subprograms;
+  std::vector<string> expected = {
+    "/mnt/scratch0/b_used/build/slave/android_clang_dbg_recipe/build/src/"
+        "third_party/android_tools/ndk//toolchains/arm-linux-androideabi-4.8/"
+        "prebuilt/linux-x86_64/bin/arm-linux-androideabi-as",
+  };
+  CompilerInfoBuilder::ParseGetSubprogramsOutput(
+      kClangOutput, &subprograms);
+  EXPECT_EQ(expected, subprograms);
+}
+
+TEST_F(CompilerInfoTest, ParseGetSubprogramsOutputShouldFailIfNoAs) {
+  const char kClangOutput[] =
+      "clang version 3.5.0 (trunk 214024)\n"
+      "Target: arm--linux\n"
+      "Thread model: posix\n"
+      "clang: warning: unknown platform, assuming -mfloat-abi=soft\n"
+      "clang: warning: unknown platform, assuming -mfloat-abi=soft\n"
+      " \"/usr/local/google/ssd/goma/chrome_src/src/third_party/"
+      "llvm-build/Release+Asserts/bin/clang\" \"-cc0\" \"-triple\" "
+      "\"armv4t--linux\" \"-emit-obj\" \"-mrelax-all\" \"-disable-free\" "
+      "\"-main-file-name\" \"null\" \"-mrelocation-model\" \"static\" "
+      "\"-mdisable-fp-elim\" \"-fmath-errno\" \"-masm-verbose\" "
+      "\"-mconstructor-aliases\" \"-target-cpu\" \"arm6tdmi\" "
+      "\"-target-feature\" \"+soft-float\" \"-target-feature\" "
+      "\"+soft-float-abi\" \"-target-feature\" \"-neon\" \"-target-feature\" "
+      "\"-crypto\" \"-target-abi\" \"apcs-gnu\" \"-msoft-float\" "
+      "\"-mfloat-abi\" \"soft\" \"-target-linker-version\" \"2.22\" "
+      "\"-dwarf-column-info\" \"-coverage-file\" \"/dev/null\" "
+      "\"-resource-dir\" \"/usr/local/google/ssd/goma/chrome_src/src/"
+      "third_party/llvm-build/Release+Asserts/bin/../lib/clang/3.5.0\" "
+      "\"-internal-isystem\" \"/usr/lib/gcc/arm-linux-gnueabi/4.6/../../"
+      "../../include/c++/4.6\" \"-internal-isystem\" \"/usr/lib/gcc/"
+      "arm-linux-gnueabi/4.6/../../../../include/c++/4.6/arm-linux-gnueabi\" "
+      "\"-internal-isystem\" \"/usr/lib/gcc/arm-linux-gnueabi/4.6/../../"
+      "../../include/c++/4.6/backward\" \"-internal-isystem\" \"/usr/lib/"
+      "gcc/arm-linux-gnueabi/4.6/../../../../include/arm-linux-gnueabi/c++/"
+      "4.6\" \"-internal-isystem\" \"/usr/local/include\" "
+      "\"-internal-isystem\" \"/usr/local/google/ssd/goma/"
+      "chrome_src/src/third_party/llvm-build/Release+Asserts/bin/../lib/"
+      "clang/3.5.0/include\" \"-internal-externc-isystem\" \"/include\" "
+      "\"-internal-externc-isystem\" \"/usr/include\" \"-fdeprecated-macro\" "
+      "\"-fdebug-compilation-dir\" \"/usr/local/google/home/goma/"
+      ".ssd/chrome_src/src\" \"-ferror-limit\" \"19\" \"-fmessage-length\" "
+      "\"0\" \"-mstackrealign\" \"-fno-signed-char\" \"-fobjc-runtime=gcc\" "
+      "\"-fcxx-exceptions\" \"-fexceptions\" \"-fdiagnostics-show-option\" "
+      "\"-o\" \"/dev/null\" \"-x\" \"c++\" \"/dev/null\"\n";
+
+  std::vector<string> subprograms;
+  CompilerInfoBuilder::ParseGetSubprogramsOutput(
+      kClangOutput, &subprograms);
+  EXPECT_TRUE(subprograms.empty());
+}
+
+TEST_F(CompilerInfoTest, ParseGetSubprogramsOutputShouldGetSubprogWithPrefix) {
+  const char kDummyClangOutput[] =
+      " third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.9/"
+      "prebuilt/linux-x86_64/bin/arm-linux-androideabi-objcopy "
+      "--extract-dwo <file.o> <file.dwo>\n";
+  std::vector<string> subprograms;
+  CompilerInfoBuilder::ParseGetSubprogramsOutput(
+      kDummyClangOutput, &subprograms);
+  std::vector<string> expected = {
+    "third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.9/"
+        "prebuilt/linux-x86_64/bin/arm-linux-androideabi-objcopy"
+  };
+  EXPECT_EQ(expected, subprograms);
+}
+
+TEST_F(CompilerInfoTest, ParseGetSubprogramsOutputShouldDedupe) {
+  const char kDummyClangOutput[] =
+      " third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.9/"
+      "prebuilt/linux-x86_64/bin/arm-linux-androideabi-objcopy "
+      "--extract-dwo <file.o> <file.dwo>\n"
+      " third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.9/"
+      "prebuilt/linux-x86_64/bin/arm-linux-androideabi-objcopy "
+      "/usr/bin/objcopy --strip-dwo <file.o>\n";
+  std::vector<string> subprograms;
+  CompilerInfoBuilder::ParseGetSubprogramsOutput(
+      kDummyClangOutput, &subprograms);
+  std::vector<string> expected = {
+    "third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.9/"
+        "prebuilt/linux-x86_64/bin/arm-linux-androideabi-objcopy"
+  };
+  EXPECT_EQ(expected, subprograms);
+}
+
+TEST_F(CompilerInfoTest, RewriteHashUnlockedEmptyRule) {
+  std::map<std::string, std::string> rule;
+  CompilerInfoData data;
+  auto* sub = data.add_subprograms();
+  sub->set_hash("dummy_hash");
+  EXPECT_FALSE(CompilerInfoBuilder::RewriteHashUnlocked(rule, &data));
+  EXPECT_EQ(1, data.subprograms_size());
+  EXPECT_EQ("dummy_hash", data.subprograms(0).hash());
+}
+
+TEST_F(CompilerInfoTest, RewriteHashUnlockedNoMatchingRule) {
+  std::map<std::string, std::string> rule;
+  CHECK(rule.insert(std::make_pair("no_match", "no_match")).second);
+  CompilerInfoData data;
+  auto* sub = data.add_subprograms();
+  sub->set_hash("dummy_hash");
+  EXPECT_FALSE(CompilerInfoBuilder::RewriteHashUnlocked(rule, &data));
+  EXPECT_EQ(1, data.subprograms_size());
+  EXPECT_EQ("dummy_hash", data.subprograms(0).hash());
+}
+
+TEST_F(CompilerInfoTest, RewriteHashUnlockedMatchingRule) {
+  std::map<std::string, std::string> rule;
+  CHECK(rule.insert(std::make_pair("old_hash", "new_hash")).second);
+  CompilerInfoData data;
+  auto* sub = data.add_subprograms();
+  sub->set_hash("old_hash");
+  EXPECT_TRUE(CompilerInfoBuilder::RewriteHashUnlocked(rule, &data));
+  EXPECT_EQ(1, data.subprograms_size());
+  EXPECT_EQ("new_hash", data.subprograms(0).hash());
+}
+
+TEST_F(CompilerInfoTest, RewriteHashUnlockedBothMatchingAndNotMatching) {
+  std::map<std::string, std::string> rule;
+  CHECK(rule.insert(std::make_pair("old_hash", "new_hash")).second);
+  CompilerInfoData data;
+  auto* sub = data.add_subprograms();
+  sub->set_hash("old_hash");
+  auto* sub2 = data.add_subprograms();
+  sub2->set_hash("yet_another_hash");
+  EXPECT_TRUE(CompilerInfoBuilder::RewriteHashUnlocked(rule, &data));
+  EXPECT_EQ(2, data.subprograms_size());
+  EXPECT_EQ("new_hash", data.subprograms(0).hash());
+  EXPECT_EQ("yet_another_hash", data.subprograms(1).hash());
+}
+
+TEST_F(CompilerInfoTest, GetCompilerNameUsualCases) {
+  std::vector<std::pair<std::string, std::string>> test_cases = {
+    {"clang", "clang"},
+    {"clang++", "clang"},
+    {"g++", "g++"},
+    {"gcc", "gcc"},
+  };
+
+  for (const auto& tc : test_cases) {
+    CompilerInfoData data;
+    data.set_local_compiler_path(tc.first);
+    data.set_real_compiler_path(tc.second);
+    EXPECT_EQ(tc.first, CompilerInfoBuilder::GetCompilerName(data));
+  }
+}
+
+TEST_F(CompilerInfoTest, GetCompilerNameCc) {
+  std::vector<std::string> test_cases = {"clang", "gcc"};
+
+  for (const auto& tc : test_cases) {
+    CompilerInfoData data;
+    data.set_local_compiler_path("cc");
+    data.set_real_compiler_path(tc);
+    EXPECT_EQ(tc, CompilerInfoBuilder::GetCompilerName(data));
+  }
+}
+
+TEST_F(CompilerInfoTest, GetCompilerNameCxx) {
+  CompilerInfoData data;
+  data.set_local_compiler_path("c++");
+  data.set_real_compiler_path("g++");
+  EXPECT_EQ("g++", CompilerInfoBuilder::GetCompilerName(data));
+
+  data.set_local_compiler_path("c++");
+  data.set_real_compiler_path("clang");
+  EXPECT_EQ("clang++", CompilerInfoBuilder::GetCompilerName(data));
+}
+
+TEST_F(CompilerInfoTest, GetCompilerNameUnsupportedCase) {
+  CompilerInfoData data;
+  data.set_local_compiler_path("c++");
+  data.set_real_compiler_path("clang++");
+  EXPECT_EQ("", CompilerInfoBuilder::GetCompilerName(data));
+}
+
+#ifdef __linux__
+TEST_F(CompilerInfoTest, GetRealSubprogramPath) {
+  TmpdirUtil tmpdir("get_real_subprogram_path");
+  static const char kWrapperPath[] =
+      "dummy/x86_64-cros-linux-gnu/binutils-bin/2.25.51-gold/objcopy";
+  static const char kRealPath[] =
+      "dummy/x86_64-cros-linux-gnu/binutils-bin/2.25.51/objcopy.elf";
+
+  tmpdir.CreateEmptyFile(kWrapperPath);
+  tmpdir.CreateEmptyFile(kRealPath);
+
+  EXPECT_EQ(
+      tmpdir.FullPath(kRealPath),
+      CompilerInfoBuilder::GetRealSubprogramPath(
+          tmpdir.FullPath(kWrapperPath)));
+}
+#endif
+
+TEST_F(CompilerInfoTest, FillFromCompilerOutputsShouldUseProperPath) {
+  std::vector<string> envs;
+#ifdef _WIN32
+  const string clang = file::JoinPath(TestDir(), "clang.bat");
+  InstallReadCommandOutputFunc(ReadCommandOutputByRedirector);
+  envs.emplace_back("PATHEXT=" + GetEnv("PATHEXT"));
+#else
+  const string clang = file::JoinPath(TestDir(), "clang");
+  InstallReadCommandOutputFunc(ReadCommandOutputByPopen);
+#endif
+  std::vector<string> args = {
+    clang,
+  };
+  envs.emplace_back("PATH=" + GetEnv("PATH"));
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  CompilerInfoBuilder cib;
+  std::unique_ptr<CompilerInfoData> data(
+      cib.FillFromCompilerOutputs(*flags, clang, envs));
+  EXPECT_TRUE(data.get());
+  EXPECT_EQ(0, data->failed_at());
+}
+
+class ScopedCompilerInfoStateTest : public testing::Test {
+ protected:
+  void FillFromCompilerOutputs(ScopedCompilerInfoState* cis) {
+    std::unique_ptr<CompilerInfoData> data(new CompilerInfoData);
+    data->set_found(true);
+    cis->reset(new CompilerInfoState(std::move(data)));
+  }
+};
+
+TEST_F(ScopedCompilerInfoStateTest, reset) {
+  ScopedCompilerInfoState cis;
+  FillFromCompilerOutputs(&cis);
+  EXPECT_TRUE(cis.get() != nullptr);
+  EXPECT_EQ(1, cis.get()->refcnt());
+
+  cis.reset(cis.get());
+  EXPECT_TRUE(cis.get() != nullptr);
+  EXPECT_EQ(1, cis.get()->refcnt());
+}
+
+}  // namespace devtools_goma
diff --git a/client/compiler_proxy.cc b/client/compiler_proxy.cc
new file mode 100644
index 0000000..3566398
--- /dev/null
+++ b/client/compiler_proxy.cc
@@ -0,0 +1,2055 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// Compiler proxy reimplemented as asynchronous.
+
+// #define HAVE_HEAP_PROFILER 1
+// #define HAVE_CPU_PROFILER 1
+
+#include "threadpool_http_server.h"
+
+#include <stdio.h>
+#include <time.h>
+
+#ifndef _WIN32
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/file.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#endif
+#ifdef __MACH__
+#include <sys/sysctl.h>
+#endif
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <string>
+#include <unordered_set>
+
+#include "arfile_reader.h"
+#include "autolock_timer.h"
+#include "auto_updater.h"
+#include "breakpad.h"
+#include "basictypes.h"
+#include "callback.h"
+#include "compile_stats.h"
+#include "compile_service.h"
+#include "compile_stats.h"
+#include "compiler_flags.h"
+#include "compiler_flags_util.h"
+#include "compiler_info.h"
+#include "compiler_info_cache.h"
+#include "compiler_proxy_contentionz_script.h"
+#include "compiler_proxy_histogram.h"
+#include "compiler_proxy_info.h"
+#include "compiler_proxy_status_html5.h"
+#include "compiler_proxy_status_script.h"
+#include "compiler_proxy_status_style.h"
+#include "compiler_specific.h"
+#include "compilerz_html.h"
+#include "compilerz_script.h"
+#include "compilerz_style.h"
+#include "counterz.h"
+#include "cpp_macro.h"
+#include "deps_cache.h"
+#include "env_flags.h"
+#include "file_hash_cache.h"
+#include "file_helper.h"
+#include "file_id_cache.h"
+#include "glog/logging.h"
+#include "goma_file.h"
+#include "goma_file_http.h"
+#include "goma_hash.h"
+#include "goma_init.h"
+#include "hash_rewrite_parser.h"
+#include "http.h"
+#include "http_init.h"
+#include "http_rpc.h"
+#include "http_rpc_init.h"
+#include "include_cache.h"
+#include "include_file_finder.h"
+#include "ioutil.h"
+#include "jarfile_reader.h"
+#include "jquery.min.h"
+#include "local_output_cache.h"
+#include "log_cleaner.h"
+#include "log_service_client.h"
+#include "multi_http_rpc.h"
+#include "mypath.h"
+#include "oauth2.h"
+#include "oauth2_token.h"
+#include "path.h"
+#include "platform_thread.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+#include "rand_util.h"
+#include "scoped_fd.h"
+#include "settings.h"
+#include "socket_factory.h"
+#include "split.h"
+#include "string_piece_utils.h"
+#include "strutil.h"
+#include "subprocess.h"
+#include "subprocess_controller.h"
+#include "subprocess_controller_client.h"
+#include "subprocess_option_setter.h"
+#include "subprocess_task.h"
+#include "trustedipsmanager.h"
+#include "util.h"
+#include "watchdog.h"
+#include "worker_thread_manager.h"
+
+
+#if HAVE_HEAP_PROFILER
+#include <gperftools/heap-profiler.h>
+#endif
+#if HAVE_CPU_PROFILER
+#include <gperftools/profiler.h>
+#endif
+
+using devtools_goma::CompileService;
+using devtools_goma::ExecReq;
+using devtools_goma::ExecResp;
+using devtools_goma::ScopedFd;
+using devtools_goma::ThreadpoolHttpServer;
+
+using std::string;
+
+#ifndef _WIN32
+using devtools_goma::Daemonize;
+#endif
+
+namespace {
+
+#ifdef _WIN32
+
+string FindLogFile(string log_dir, string base_name, string log_type) {
+  // Log file is in log_dir and its name is like
+  // <base_name>.<host_name>.<user_name>.log.<log_type>.<timestamp>.<pid>
+  const string pid = std::to_string(GetCurrentProcessId());
+
+  string pattern = log_dir;
+  pattern.append("\\");
+  pattern.append(base_name);
+  pattern.append("*");
+
+  string found_file;
+  WIN32_FIND_DATAA find_data = {0};
+  HANDLE find_handle = FindFirstFileA(pattern.c_str(), &find_data);
+  if (find_handle != INVALID_HANDLE_VALUE) {
+    do {
+      if (strings::EndsWith(find_data.cFileName, pid) &&
+          strstr(find_data.cFileName, log_type.c_str())) {
+        found_file = file::JoinPath(log_dir, find_data.cFileName);
+        break;
+      }
+    } while (FindNextFileA(find_handle, &find_data) != 0);
+    FindClose(find_handle);
+  }
+  return found_file;
+}
+
+#endif
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+// This class is reused for every request.
+class CompilerProxyHttpHandler : public ThreadpoolHttpServer::HttpHandler,
+                                 public ThreadpoolHttpServer::Monitor {
+ public:
+  CompilerProxyHttpHandler(const string& myname,
+                           const string& tmpdir,
+                           WorkerThreadManager* wm)
+      : myname_(myname),
+        service_(wm),
+        log_cleaner_closure_id_(kInvalidPeriodicClosureId),
+        memory_tracker_closure_id_(kInvalidPeriodicClosureId),
+        rpc_sent_count_(0),
+        tmpdir_(tmpdir)
+#if HAVE_HEAP_PROFILER
+      , compiler_proxy_heap_profile_file_(file::JoinPathRespectAbsolute(
+          tmpdir_, FLAGS_COMPILER_PROXY_HEAP_PROFILE_FILE))
+#endif
+#if HAVE_CPU_PROFILER
+      , compiler_proxy_cpu_profile_file_(file::JoinPathRespectAbsolute(
+          tmpdir_, FLAGS_COMPILER_PROXY_CPU_PROFILE_FILE))
+      , cpu_profiling_(false)
+#endif
+  {
+    if (FLAGS_SEND_USER_INFO) {
+      service_.AllowToSendUserInfo();
+    }
+    service_.SetActiveTaskThrottle(FLAGS_MAX_ACTIVE_TASKS);
+    service_.SetCompileTaskHistorySize(
+        FLAGS_MAX_FINISHED_TASKS,
+        FLAGS_MAX_FAILED_TASKS,
+        FLAGS_MAX_LONG_TASKS);
+    if (!FLAGS_HASH_REWRITE_RULE_FILE.empty()) {
+      string rewrite_rule;
+      CHECK(ReadFileToString(FLAGS_HASH_REWRITE_RULE_FILE.c_str(),
+                             &rewrite_rule))
+          << "You need rewrite rule in "
+          << FLAGS_HASH_REWRITE_RULE_FILE
+          << " or unset GOMA_HASH_REWRITE_RULE_FILE";
+      std::map<string, string> mapping;
+      if (!ParseRewriteRule(rewrite_rule, &mapping)) {
+        LOG(ERROR) << "failed to parse rewrite rule in a file "
+                   << FLAGS_HASH_REWRITE_RULE_FILE;
+      } else {
+        service_.SetHashRewriteRule(mapping);
+      }
+    }
+    int network_error_margin = 0;
+    if (FLAGS_FAIL_FAST) {
+      LOG(INFO) << "fail fast mode";
+      if (FLAGS_ALLOWED_NETWORK_ERROR_DURATION < 0) {
+        FLAGS_ALLOWED_NETWORK_ERROR_DURATION = 60;
+        network_error_margin = 30;
+        LOG(INFO) << "override GOMA_ALLOWED_NETWORK_ERROR_DURATION to "
+                  << FLAGS_ALLOWED_NETWORK_ERROR_DURATION << " secs";
+      } else {
+        network_error_margin = FLAGS_ALLOWED_NETWORK_ERROR_DURATION / 2;
+        LOG(INFO) << "use GOMA_ALLOWED_NETWORK_ERROR_DURATION="
+                  << FLAGS_ALLOWED_NETWORK_ERROR_DURATION << " secs";
+      }
+      if (FLAGS_MAX_ACTIVE_FAIL_FALLBACK_TASKS < 0) {
+        // TODO: consider using this for fail fallback caused by
+        // remote goma backend's execution failure not network error.
+        FLAGS_MAX_ACTIVE_FAIL_FALLBACK_TASKS = FLAGS_BURST_MAX_SUBPROCS;
+        LOG(INFO) << "override GOMA_MAX_ACTIVE_FAIL_FALLBACK_TASKS to "
+                  << FLAGS_MAX_ACTIVE_FAIL_FALLBACK_TASKS;
+        if (FLAGS_ALLOWED_MAX_ACTIVE_FAIL_FALLBACK_DURATION < 0) {
+          // Prefer to show network failure to reaching max active fail
+          // fallback.  If fail fallback is caused by network error, it is also
+          // counted as active fail fallbacks but people can easily understand
+          // the reason by seeing network failure.
+          FLAGS_ALLOWED_MAX_ACTIVE_FAIL_FALLBACK_DURATION =
+              FLAGS_ALLOWED_NETWORK_ERROR_DURATION + 10;
+          LOG(INFO) << "override "
+                    << "FLAGS_ALLOWED_MAX_ACTIVE_FAIL_FALLBACK_DURATION_IN_SEC "
+                    << "to " << FLAGS_ALLOWED_MAX_ACTIVE_FAIL_FALLBACK_DURATION
+                    << " secs";
+        }
+      }
+    }
+    http_options_.proxy_host_name = FLAGS_PROXY_HOST;
+    http_options_.proxy_port = FLAGS_PROXY_PORT;
+    HttpClient::Options http_options = http_options_;
+    InitHttpClientOptions(&http_options);
+    http_options.network_error_margin = network_error_margin;
+    if (FLAGS_NETWORK_ERROR_THRESHOLD_PERCENT >= 0 &&
+        FLAGS_NETWORK_ERROR_THRESHOLD_PERCENT < 100) {
+      http_options.network_error_threshold_percent =
+          FLAGS_NETWORK_ERROR_THRESHOLD_PERCENT;
+    }
+    LOG_IF(ERROR, FLAGS_NETWORK_ERROR_THRESHOLD_PERCENT >= 100)
+        << "GOMA_NETWORK_ERROR_THRESHOLD_PERCENT must be less than 100: "
+        << FLAGS_NETWORK_ERROR_THRESHOLD_PERCENT;
+    if (FLAGS_BACKEND_SOFT_STICKINESS) {
+      string cookie;
+      if (FLAGS_BACKEND_SOFT_STICKINESS_REFRESH) {
+        cookie = GetRandomAlphanumeric(64);
+      } else {
+        ComputeDataHashKey(service_.username() + "@" + service_.nodename(),
+                           &cookie);
+      }
+      http_options.cookie = "GomaClient=" + cookie;
+    }
+    std::unique_ptr<HttpClient> client(new HttpClient(
+        HttpClient::NewSocketFactoryFromOptions(http_options),
+        HttpClient::NewTLSEngineFactoryFromOptions(http_options),
+        http_options, wm));
+    CHECK_GE(FLAGS_MAX_SUBPROCS, FLAGS_MAX_SUBPROCS_LOW);
+    CHECK_GE(FLAGS_MAX_SUBPROCS, FLAGS_MAX_SUBPROCS_HEAVY);
+    CHECK_GE(FLAGS_BURST_MAX_SUBPROCS, FLAGS_BURST_MAX_SUBPROCS_LOW);
+    CHECK_GE(FLAGS_BURST_MAX_SUBPROCS, FLAGS_BURST_MAX_SUBPROCS_HEAVY);
+    std::unique_ptr<SubProcessOptionSetter> option_setter(
+        new SubProcessOptionSetter(
+            FLAGS_MAX_SUBPROCS,
+            FLAGS_MAX_SUBPROCS_LOW,
+            FLAGS_MAX_SUBPROCS_HEAVY,
+            FLAGS_BURST_MAX_SUBPROCS,
+            FLAGS_BURST_MAX_SUBPROCS_LOW,
+            FLAGS_BURST_MAX_SUBPROCS_HEAVY));
+    client->SetMonitor(
+        std::unique_ptr<NetworkErrorMonitor>(
+            new NetworkErrorMonitor(option_setter.get())));
+    service_.SetSubProcessOptionSetter(std::move(option_setter));
+    service_.SetMaxCompilerDisabledTasks(FLAGS_MAX_COMPILER_DISABLED_TASKS);
+    service_.SetHttpClient(std::move(client));
+
+    HttpRPC::Options http_rpc_options;
+    InitHttpRPCOptions(&http_rpc_options);
+    service_.SetHttpRPC(std::unique_ptr<HttpRPC>(
+        new HttpRPC(service_.http_client(), http_rpc_options)));
+
+    service_.SetExecServiceClient(std::unique_ptr<ExecServiceClient>(
+        new ExecServiceClient(service_.http_rpc(), "/e")));
+
+    MultiHttpRPC::Options multi_store_options;
+    multi_store_options.max_req_in_call = FLAGS_MULTI_STORE_IN_CALL;
+    multi_store_options.req_size_threshold_in_call =
+        FLAGS_MULTI_STORE_THRESHOLD_SIZE_IN_CALL;
+    multi_store_options.check_interval_ms = FLAGS_MULTI_STORE_PENDING_MS;
+    service_.SetMultiFileStore(std::unique_ptr<MultiFileStore>(
+        new MultiFileStore(
+            service_.http_rpc(),
+            "/s",
+            multi_store_options,
+            wm)));
+    service_.SetFileServiceHttpClient(std::unique_ptr<FileServiceHttpClient>(
+        new FileServiceHttpClient(
+            service_.http_rpc(),
+            "/s",
+            "/l",
+            service_.multi_file_store())));
+    if (FLAGS_PROVIDE_INFO)
+      service_.SetLogServiceClient(
+          std::unique_ptr<LogServiceClient>(
+              new LogServiceClient(
+                  service_.http_rpc(),
+                  "/sl",
+                  FLAGS_NUM_LOG_IN_SAVE_LOG,
+                  FLAGS_LOG_PENDING_MS,
+                  wm)));
+    ArFileReader::Register();
+    JarFileReader::Register();
+    service_.StartIncludeProcessorWorkers(FLAGS_INCLUDE_PROCESSOR_THREADS);
+    service_.SetNeedToSendContent(FLAGS_COMPILER_PROXY_STORE_FILE);
+    service_.SetNewFileThreshold(FLAGS_COMPILER_PROXY_NEW_FILE_THRESHOLD);
+    service_.SetEnableGchHack(FLAGS_ENABLE_GCH_HACK);
+    service_.SetUseRelativePathsInArgv(FLAGS_USE_RELATIVE_PATHS_IN_ARGV);
+    service_.SetCommandCheckLevel(FLAGS_COMMAND_CHECK_LEVEL);
+    if (FLAGS_HERMETIC == "off") {
+      service_.SetHermetic(false);
+    } else if (FLAGS_HERMETIC == "fallback") {
+      service_.SetHermetic(true);
+      service_.SetHermeticFallback(true);
+    } else if (FLAGS_HERMETIC == "error") {
+      service_.SetHermetic(true);
+      service_.SetHermeticFallback(false);
+    } else {
+      LOG(FATAL) << "Unknown hermetic mode: " << FLAGS_HERMETIC
+                 << " should be one of \"off\", \"fallback\" or \"error\"";
+    }
+    service_.SetDontKillSubprocess(FLAGS_DONT_KILL_SUBPROCESS);
+    service_.SetMaxSubProcsPending(FLAGS_MAX_SUBPROCS_PENDING);
+    service_.SetLocalRunPreference(FLAGS_LOCAL_RUN_PREFERENCE);
+    service_.SetLocalRunForFailedInput(FLAGS_LOCAL_RUN_FOR_FAILED_INPUT);
+    service_.SetLocalRunDelayMsec(FLAGS_LOCAL_RUN_DELAY_MSEC);
+    service_.SetMaxSumOutputSize(
+        FLAGS_MAX_SUM_OUTPUT_SIZE_IN_MB * 1024 * 1024);
+    service_.SetStoreLocalRunOutput(FLAGS_STORE_LOCAL_RUN_OUTPUT);
+    service_.SetEnableRemoteLink(FLAGS_ENABLE_REMOTE_LINK);
+    service_.SetTmpDir(tmpdir_);
+    service_.SetAllowedNetworkErrorDuration(
+        FLAGS_ALLOWED_NETWORK_ERROR_DURATION);
+    service_.SetMaxActiveFailFallbackTasks(
+        FLAGS_MAX_ACTIVE_FAIL_FALLBACK_TASKS);
+    service_.SetAllowedMaxActiveFailFallbackDuration(
+        FLAGS_ALLOWED_MAX_ACTIVE_FAIL_FALLBACK_DURATION);
+
+    std::vector<string> timeout_secs_str;
+    SplitStringUsing(FLAGS_COMPILER_PROXY_RPC_TIMEOUT_SECS, ",",
+                     &timeout_secs_str);
+    std::vector<int> timeout_secs;
+    for (const auto& it : timeout_secs_str)
+      timeout_secs.push_back(atoi(it.c_str()));
+    service_.SetTimeoutSecs(timeout_secs);
+
+    if (FLAGS_LOG_CLEAN_INTERVAL > 0) {
+      log_cleaner_.AddLogBasename(myname_);
+      log_cleaner_.AddLogBasename(myname_ + "-subproc");
+      log_cleaner_.AddLogBasename("gomacc");
+      log_cleaner_.AddLogBasename("cc");
+      log_cleaner_.AddLogBasename("c++");
+      log_cleaner_.AddLogBasename("gcc");
+      log_cleaner_.AddLogBasename("g++");
+      log_cleaner_.AddLogBasename("clang");
+      log_cleaner_.AddLogBasename("clang++");
+      log_cleaner_.AddLogBasename("goma_fetch");
+
+      std::unique_ptr<PermanentClosure> closure = NewPermanentCallback(
+          this, &CompilerProxyHttpHandler::RunCleanOldLogs);
+      closure->Run();
+      log_cleaner_closure_id_ = wm->RegisterPeriodicClosure(
+          FROM_HERE, FLAGS_LOG_CLEAN_INTERVAL * 1000, std::move(closure));
+    } else {
+      LOG(INFO) << "log cleaner disabled";
+    }
+
+    if (FLAGS_MEMORY_TRACK_INTERVAL > 0) {
+      memory_tracker_closure_id_ = wm->RegisterPeriodicClosure(
+          FROM_HERE, FLAGS_MEMORY_TRACK_INTERVAL * 1000,
+          NewPermanentCallback(this,
+                               &CompilerProxyHttpHandler::RunTrackMemory));
+    } else {
+      LOG(INFO) << "memory tracker disabled";
+    }
+
+    InitialPing();
+
+    http_handlers_.insert(
+        std::make_pair("/",
+                       &CompilerProxyHttpHandler::HandleStatusRequest));
+    internal_http_handlers_.insert(
+        std::make_pair("/static/jquery.min.js",
+                       &CompilerProxyHttpHandler::HandleJQuery));
+    internal_http_handlers_.insert(
+        std::make_pair("/static/compiler_proxy_status_script.js",
+                       &CompilerProxyHttpHandler::HandleStatusJavaScript));
+    internal_http_handlers_.insert(
+        std::make_pair("/static/compiler_proxy_contentionz_script.js",
+                       &CompilerProxyHttpHandler::HandleContentionzJavaScript));
+    internal_http_handlers_.insert(
+        std::make_pair("/static/compiler_proxy_status_style.css",
+                       &CompilerProxyHttpHandler::HandleStatusCSS));
+    internal_http_handlers_.insert(
+        std::make_pair("/static/compilerz.js",
+                       &CompilerProxyHttpHandler::HandleCompilerzScript));
+    internal_http_handlers_.insert(
+        std::make_pair("/static/compilerz.css",
+                       &CompilerProxyHttpHandler::HandleCompilerzStyle));
+    internal_http_handlers_.insert(
+        std::make_pair("/api/taskz",
+                       &CompilerProxyHttpHandler::HandleTaskRequest));
+    internal_http_handlers_.insert(
+        std::make_pair("/api/accountz",
+                       &CompilerProxyHttpHandler::HandleAccountRequest));
+    internal_http_handlers_.insert(
+        std::make_pair("/api/compilerz",
+                       &CompilerProxyHttpHandler::HandleCompilerJSONRequest));
+    internal_http_handlers_.insert(
+        std::make_pair("/api/loginz",
+                       &CompilerProxyHttpHandler::HandleLoginRequest));
+    internal_http_handlers_.insert(
+        std::make_pair("/api/authz",
+                       &CompilerProxyHttpHandler::HandleAuthRequest));
+    internal_http_handlers_.insert(
+        std::make_pair("/api/logoutz",
+                       &CompilerProxyHttpHandler::HandleLogoutRequest));
+    http_handlers_.insert(
+        std::make_pair("/statz",
+                       &CompilerProxyHttpHandler::HandleStatsRequest));
+    http_handlers_.insert(
+        std::make_pair("/compilerz",
+                       &CompilerProxyHttpHandler::HandleCompilerzRequest));
+    http_handlers_.insert(
+        std::make_pair("/histogramz",
+                       &CompilerProxyHttpHandler::HandleHistogramRequest));
+    http_handlers_.insert(
+        std::make_pair("/httprpcz",
+                       &CompilerProxyHttpHandler::HandleHttpRpcRequest));
+    http_handlers_.insert(
+        std::make_pair("/threadz",
+                       &CompilerProxyHttpHandler::HandleThreadRequest));
+    http_handlers_.insert(
+        std::make_pair("/contentionz",
+                       &CompilerProxyHttpHandler::HandleContentionRequest));
+    http_handlers_.insert(
+        std::make_pair("/filecachez",
+                       &CompilerProxyHttpHandler::HandleFileCacheRequest));
+    http_handlers_.insert(
+        std::make_pair("/compilerinfoz",
+                       &CompilerProxyHttpHandler::HandleCompilerInfoRequest));
+    http_handlers_.insert(
+        std::make_pair("/includecachez",
+                       &CompilerProxyHttpHandler::HandleIncludeCacheRequest));
+    http_handlers_.insert(
+        std::make_pair("/flagz",
+                       &CompilerProxyHttpHandler::HandleFlagRequest));
+    http_handlers_.insert(
+        std::make_pair("/versionz",
+                       &CompilerProxyHttpHandler::HandleVersionRequest));
+    http_handlers_.insert(
+        std::make_pair("/healthz",
+                       &CompilerProxyHttpHandler::HandleHealthRequest));
+    internal_http_handlers_.insert(
+        std::make_pair("/portz",
+                       &CompilerProxyHttpHandler::HandlePortRequest));
+    http_handlers_.insert(
+        std::make_pair("/logz",
+                       &CompilerProxyHttpHandler::HandleLogRequest));
+    http_handlers_.insert(
+        std::make_pair("/errorz",
+                       &CompilerProxyHttpHandler::HandleErrorStatusRequest));
+#if HAVE_COUNTERZ
+    http_handlers_.insert(
+        std::make_pair("/counterz",
+                       &CompilerProxyHttpHandler::HandleCounterRequest));
+#endif
+#if HAVE_HEAP_PROFILER
+    http_handlers_.insert(
+        std::make_pair("/heapz",
+                       &CompilerProxyHttpHandler::HandleHeapRequest));
+#endif
+#if HAVE_CPU_PROFILER
+    http_handlers_.insert(
+        std::make_pair("/profilez",
+                       &CompilerProxyHttpHandler::HandleProfileRequest));
+#endif
+  }
+
+  ~CompilerProxyHttpHandler() override {
+  }
+
+  // TODO: better handling of HTTP errors.
+  //                    might be ok to retry soon on timeout but might not be
+  //                    good to retry soon for 4xx or 5xx status code.
+  bool InitialPing() {
+    int http_status_code = -1;
+    time_t ping_end_time = time(nullptr) + FLAGS_PING_TIMEOUT_SEC;
+    int num_retry = 0;
+    int backoff_ms = service_.http_client()->options().min_retry_backoff_ms;
+    while (time(nullptr) < ping_end_time) {
+      HttpRPC::Status status;
+      status.timeout_secs.push_back(FLAGS_PING_RETRY_INTERVAL);
+      status.trace_id = "ping";
+      http_status_code = service_.http_rpc()->Ping(service_.wm(),
+                                                   "/ping", &status);
+      if ((http_status_code != -1 && http_status_code != 0 &&
+           http_status_code != 401 && http_status_code != 408 &&
+           http_status_code / 100 != 5) ||
+          // Since SocketPool retries connections and it should be natural
+          // to assume that IP address that did not respond well would not
+          // respond well for a while, we can think connection failure
+          // as non-retryable error.
+          !status.connect_success) {
+        LOG(INFO) << "will not retry."
+                  << " http_status_code=" << http_status_code
+                  << " connect_success=" << status.connect_success
+                  << " finished=" << status.finished
+                  << " err=" << status.err;
+        break;
+      }
+      // Retry for HTTP status 401 only if OAuth2 is valid.
+      // When OAuth2 is enabled, but not valid (i.e. no refresh token),
+      // it would fail with 401 and no need to retry.
+      // b/68980193
+      if (http_status_code == 401 &&
+          !service_.http_client()->options().oauth2_config.valid()) {
+        LOG(INFO) << "will not retry for auth failure without valid OAuth2."
+                  << " http_status_code=" << http_status_code
+                  << " connect_success=" << status.connect_success
+                  << " finished=" << status.finished
+                  << " err=" << status.err;
+        break;
+      }
+      if (http_status_code == 401 || http_status_code / 100 == 5) {
+        // retry after backoff_ms.
+        backoff_ms = HttpClient::BackoffMsec(
+            service_.http_client()->options(),
+            backoff_ms, true);
+        LOG(INFO) << "backoff " << backoff_ms << " msec"
+                  << " because of http_status_code=" << http_status_code;
+        PlatformThread::Sleep(backoff_ms);
+      }
+      LOG(ERROR) << "Going to retry ping."
+                 << " http_status_code=" << http_status_code
+                 << " num_retry=" << num_retry;
+      num_retry++;
+    }
+    if (http_status_code != 200) {
+      LOG(ERROR) << "HTTP error=" << http_status_code
+                 << ": Cannot connect to server at "
+                 << service_.http_client()->options().RequestURL("/ping")
+                 << " num_retry=" << num_retry;
+      if (http_status_code == 401) {
+        // TODO: fix this message for external users.
+        LOG(ERROR)
+            << "Please use OAuth2 to access from non-corp network.";
+      }
+      return false;
+    }
+    return true;
+  }
+
+  void HandleHttpRequest(
+      ThreadpoolHttpServer::HttpServerRequest* http_server_request) override {
+    const string& path = http_server_request->req_path();
+    if (service_.compiler_proxy_id_prefix().empty()) {
+      std::ostringstream ss;
+      ss << service_.username() << "@" << service_.nodename() << ":"
+         << http_server_request->server().port()
+         << "/" << service_.start_time() << "/";
+      if (FLAGS_SEND_USER_INFO) {
+        service_.SetCompilerProxyIdPrefix(ss.str());
+      } else {
+        string hash;
+        ComputeDataHashKey(ss.str(), &hash);
+        std::ostringstream sss;
+        sss << "anonymous@" << hash << ":8088/" << service_.start_time() << "/";
+        service_.SetCompilerProxyIdPrefix(sss.str());
+      }
+    }
+#ifdef _WIN32
+    if (path == "/me") {
+      if (!http_server_request->CheckCredential()) {
+        SendErrorMessage(http_server_request, 401, "Unauthorized");
+        return;
+      }
+      CompileService::MultiRpcController* rpc
+          = new CompileService::MultiRpcController(
+              service_.wm(), http_server_request);
+      MultiExecReq multi_exec;
+      if (!rpc->ParseRequest(&multi_exec)) {
+        delete rpc;
+        SendErrorMessage(http_server_request, 404, "Bad request");
+        return;
+      }
+      for (int i = 0; i < multi_exec.req_size(); ++i) {
+        if (ShouldTrace()) {
+          VLOG(1) << "Setting Trace on this request";
+          multi_exec.mutable_req(i)->set_trace(true);
+        } else {
+          multi_exec.mutable_req(i)->set_trace(false);
+        }
+        service_.Exec(rpc->rpc(i), &multi_exec.req(i), rpc->mutable_resp(i),
+                      NewCallback(
+                          this,
+                          &CompilerProxyHttpHandler::ExecDoneInMulti, rpc, i));
+      }
+      return;
+    }
+#endif
+    if (path == "/e") {
+      if (!http_server_request->CheckCredential()) {
+        SendErrorMessage(http_server_request, 401, "Unauthorized");
+        return;
+      }
+      CompileService::RpcController* rpc
+          = new CompileService::RpcController(http_server_request);
+      ExecReq req;
+      if (!rpc->ParseRequest(&req)) {
+        delete rpc;
+        SendErrorMessage(http_server_request, 404, "Bad request");
+        return;
+      }
+      if (ShouldTrace()) {
+        VLOG(1) << "Setting Trace on this request";
+        req.set_trace(true);
+      } else {
+        req.set_trace(false);
+      }
+
+      ExecResp* resp = new ExecResp;
+      // rpc and resp will be deleted in ExecDone.
+      service_.Exec(rpc, &req, resp,
+                    devtools_goma::NewCallback(
+                        this, &CompilerProxyHttpHandler::ExecDone, rpc, resp));
+      return;
+    }
+
+    // Most paths will be accessed by browser, so checked by IsTrusted().
+    if (http_server_request->IsTrusted()) {
+      HttpHandlerMethod handler = nullptr;
+      std::map<string, HttpHandlerMethod>::const_iterator found =
+          internal_http_handlers_.find(path);
+      if (found != internal_http_handlers_.end()) {
+        handler = found->second;
+      } else if ((found = http_handlers_.find(path)) != http_handlers_.end()) {
+        handler = found->second;
+        // Users are checking the console... This would be a good
+        // timing for flushing logs.
+        devtools_goma::FlushLogFiles();
+      }
+      if (handler != nullptr) {
+        string response;
+        int responsecode = (this->*handler)(*http_server_request,
+                                            &response);
+        if (response.empty()) {
+          if (responsecode == 404) {
+            response = "HTTP/1.1 404 Not Found\r\n\r\n";
+          } else {
+            LOG(FATAL) << "Response is empty and unknown response code: "
+                       << responsecode;
+          }
+        }
+        http_server_request->SendReply(response);
+        http_server_request = nullptr;
+      } else if (path == "/quitquitquit") {
+        http_server_request->SendReply("HTTP/1.1 200 OK\r\n\r\nquit!");
+        http_server_request = nullptr;
+        DumpStatsToInfoLog();
+        service_.wm()->DebugLog();
+        DumpHistogramToInfoLog();
+        DumpIncludeCacheLogToInfoLog();
+        DumpContentionLogToInfoLog();
+        DumpStatsProto();
+        LOG(INFO) << "Dump done.";
+        devtools_goma::FlushLogFiles();
+        service_.Quit();
+      } else if (path == "/abortabortabort") {
+        http_server_request->SendReply("HTTP/1.1 200 OK\r\n\r\nquit!");
+        http_server_request = nullptr;
+        service_.ClearTasks();
+        exit(1);
+      } else {
+        http_server_request->SendReply("HTTP/1.1 404 Not found\r\n\r\n");
+        http_server_request = nullptr;
+      }
+    } else {
+      http_server_request->SendReply("HTTP/1.1 404 Not found\r\n\r\n");
+      http_server_request = nullptr;
+    }
+  }
+
+  bool shutting_down() override {
+    return service_.quit();
+  }
+
+  void FinishHandle(const ThreadpoolHttpServer::Stat& stat) override {
+    service_.histogram()->UpdateThreadpoolHttpServerStat(stat);
+  }
+
+  void Wait() {
+    if (memory_tracker_closure_id_ != kInvalidPeriodicClosureId) {
+      service_.wm()->UnregisterPeriodicClosure(memory_tracker_closure_id_);
+      memory_tracker_closure_id_ = kInvalidPeriodicClosureId;
+    }
+    if (log_cleaner_closure_id_ != kInvalidPeriodicClosureId) {
+      service_.wm()->UnregisterPeriodicClosure(log_cleaner_closure_id_);
+      log_cleaner_closure_id_ = kInvalidPeriodicClosureId;
+    }
+    service_.Wait();
+  }
+
+  // Takes ownership of auto_upadter.
+  void SetAutoUpdater(std::unique_ptr<AutoUpdater> auto_updater) {
+    service_.SetAutoUpdater(std::move(auto_updater));
+  }
+
+  // Takes ownership of watchdog.
+  void SetWatchdog(std::unique_ptr<Watchdog> watchdog,
+                   const std::vector<string>& goma_ipc_env,
+                   ThreadpoolHttpServer* server,
+                   int count) {
+    service_.SetWatchdog(std::move(watchdog), goma_ipc_env);
+    service_.WatchdogStart(server, count);
+  }
+
+  void TrackMemoryOneshot() {
+    TrackMemory();
+  }
+
+ private:
+  typedef ThreadpoolHttpServer::HttpServerRequest HttpServerRequest;
+
+  typedef int (CompilerProxyHttpHandler::*HttpHandlerMethod)(
+      const HttpServerRequest& request, string* response);
+
+  void OutputOkHeader(const char* content_type, std::ostringstream* ss) {
+    *ss << "HTTP/1.1 200 OK\r\n"
+        << "Content-Type: " << content_type << "\r\n\r\n";
+  }
+
+  int Redirect(const string& url, string* response) {
+    std::ostringstream ss;
+    ss << "HTTP/1.1 302 Found\r\n"
+       << "Location: " << url << "\r\n"
+       << "\r\n";
+    *response = ss.str();
+    return 302;
+  }
+
+  int BadRequest(string* response) {
+    *response = "HTTP/1.1 400 Bad Request\r\n\r\n";
+    return 400;
+  }
+
+  void OutputOkHeaderAndBody(const char* content_type, StringPiece content,
+                             std::ostringstream* ss) {
+    *ss << "HTTP/1.1 200 OK\r\n"
+        << "Content-Type: " << content_type << "\r\n"
+        << "Content-Length: " << content.size() << "\r\n\r\n"
+        << content;
+  }
+
+  int HandleStatusRequest(const HttpServerRequest& request, string* response) {
+    return HandleStatusRequestHtml(
+        request,
+        string(compiler_proxy_status_html5_html_start,
+               compiler_proxy_status_html5_html_size),
+        response);
+  }
+
+  int HandleCompilerzRequest(const HttpServerRequest& request,
+                             string* response) {
+    std::ostringstream ss;
+    OutputOkHeaderAndBody("text/html; charset=utf-8",
+                          StringPiece(compilerz_html_html_start,
+                                      compilerz_html_html_size),
+                          &ss);
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleCompilerzScript(const HttpServerRequest& request,
+                            string* response) {
+    std::ostringstream ss;
+    OutputOkHeaderAndBody("text/javascript; charset=utf-8",
+                          StringPiece(compilerz_script_js_start,
+                                      compilerz_script_js_size),
+                          &ss);
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleCompilerzStyle(const HttpServerRequest& request,
+                           string* response) {
+    std::ostringstream ss;
+    OutputOkHeaderAndBody("text/css; charset=utf-8",
+                          StringPiece(compilerz_style_css_start,
+                                      compilerz_style_css_size),
+                          &ss);
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleJQuery(const HttpServerRequest& request,
+                   string* response) {
+    std::ostringstream ss;
+    OutputOkHeaderAndBody("text/javascript; charset=utf-8",
+                          StringPiece(jquery_min_js_start,
+                                      jquery_min_js_size),
+                          &ss);
+
+    *response = ss.str();
+    return 200;
+  }
+
+
+  int HandleStatusJavaScript(const HttpServerRequest& request,
+                             string* response) {
+      std::ostringstream ss;
+      OutputOkHeaderAndBody("text/javascript; charset=utf-8",
+                            StringPiece(compiler_proxy_status_script_js_start,
+                                        compiler_proxy_status_script_js_size),
+                            &ss);
+      *response = ss.str();
+      return 200;
+  }
+
+  int HandleContentionzJavaScript(const HttpServerRequest& request,
+                                  string* response) {
+      std::ostringstream ss;
+      OutputOkHeaderAndBody("text/javascript; charset=utf-8",
+                            StringPiece(
+                                compiler_proxy_contentionz_script_js_start,
+                                compiler_proxy_contentionz_script_js_size),
+                            &ss);
+      *response = ss.str();
+      return 200;
+  }
+
+  int HandleStatusCSS(const HttpServerRequest& request,
+                      string* response) {
+      std::ostringstream ss;
+      OutputOkHeaderAndBody("text/css; charset=utf-8",
+                            StringPiece(compiler_proxy_status_style_css_start,
+                                        compiler_proxy_status_style_css_size),
+                            &ss);
+      *response = ss.str();
+      return 200;
+  }
+
+  // Helper function for HandleStatusRequest() and HandleStatusRequestOld().
+  int HandleStatusRequestHtml(const HttpServerRequest& request,
+                              string original_status, string* response) {
+    string status;
+
+    std::ostringstream endpoints;
+    GetEndpoints(&endpoints);
+    StringReplace(original_status, "{{ENDPOINTS}}", endpoints.str(), true,
+                  &status);
+
+    original_status.swap(status);
+    status.clear();
+    std::ostringstream global_info;
+    GetGlobalInfo(request, &global_info);
+    StringReplace(original_status, "{{GLOBAL_INFO}}", global_info.str(), true,
+                  &status);
+
+    std::ostringstream ss;
+    ss << "HTTP/1.1 200 OK\r\n";
+    ss << "Content-Type: text/html; charset=utf-8\r\n";
+    ss << "Content-Length: " << status.size() << "\r\n";
+    ss << "\r\n";
+
+    ss << status;
+    *response = ss.str();
+    return 200;
+  }
+
+  void GetEndpoints(std::ostringstream* ss) {
+    for (const auto& iter : http_handlers_) {
+      if (strings::StartsWith(iter.first, "/api/"))
+        continue;
+      *ss << "<a href='" << iter.first << "'>" << iter.first << "</a>";
+      *ss << " ";
+    }
+  }
+
+  void GetGlobalInfo(const HttpServerRequest& request, std::ostringstream* ss) {
+    static const char kBr[] = "<br>";
+
+    *ss << "<table width=100%>";
+    *ss << "<tr>";
+
+    *ss << "<td>";
+
+    char ctime_buf[30];
+    const time_t start_time = service_.start_time();
+#ifndef _WIN32
+    ctime_r(&start_time, ctime_buf);
+#else
+    ctime_s(ctime_buf, 30, &start_time);
+#endif
+    int uptime = static_cast<int>(time(nullptr) - start_time);
+    int upsec = uptime % 60;
+    int upmin = (uptime / 60) % 60;
+    int uphour = (uptime / 60 / 24);
+    *ss << "Started: " << ctime_buf << " -- up "
+        << uphour << " hr " << upmin << " min "<< upsec << " sec" << kBr;
+
+    *ss << "Built on " << kBuiltTimeString << kBr;
+
+    *ss << "Built at " << kBuiltUserNameString << '@'
+        << kBuiltHostNameString << ':' << kBuiltDirectoryString << kBr;
+
+    *ss << "Built from changelist " << kBuiltRevisionString << kBr;
+#ifndef NDEBUG
+    *ss << "WARNING: DEBUG BINARY -- Performance may suffer" << kBr;
+#endif
+#ifdef ADDRESS_SANITIZER
+    *ss << "WARNING: ASAN BINARY -- Performance may suffer" << kBr;
+#endif
+
+    *ss << "PID is " << Getpid() << kBr;
+
+    *ss << "</td>";
+
+    *ss << "<td align=right valign=top>";
+
+    *ss << "Running on "
+        << service_.username() << "@" << service_.nodename() << ":"
+        << request.server().port();
+    if (!request.server().un_socket_name().empty()) {
+      *ss << " + " << request.server().un_socket_name();
+    }
+    *ss << kBr;
+
+    *ss << "Running at " << GetCurrentDirNameOrDie() << kBr;
+
+    // TODO: Process size from /proc/self/stat for linux.
+
+    // TODO: Links to /proc.
+
+    *ss << "Log files: "
+        << "<a href=\"/logz?INFO\">INFO</a> "
+        << "<a href=\"/logz?WARNING\">WARNING</a> "
+        << "<a href=\"/logz?ERROR\">ERROR</a>" << kBr;
+#ifndef _WIN32
+    *ss << "Log files(subproc): "
+        << "<a href=\"/logz?subproc-INFO\">INFO</a> "
+        << "<a href=\"/logz?subproc-WARNING\">WARNING</a> "
+        << "<a href=\"/logz?subproc-ERROR\">ERROR</a>" << kBr;
+#endif
+
+
+    *ss << "</td>";
+
+    *ss << "</tr>";
+    *ss << "</table>";
+  }
+
+  int HandleTaskRequest(const HttpServerRequest& request, string* response) {
+    std::ostringstream ss;
+    if (request.method() != "POST") {
+      // Check for cross-site script inclusion (XSSI).
+      const string content =
+          "unacceptable http method:" + request.method() + "\r\n";
+      ss << "HTTP/1.1 405 Method Not Allowed\r\n";
+      ss << "Content-Type: text/plain\r\n";
+      ss << "Content-Length: " << content.size() << "\r\n";
+      ss << "\r\n";
+      ss << content;
+      *response = ss.str();
+      return 405;
+    }
+    if (!FLAGS_API_TASKZ_FILE_FOR_TEST.empty()) {
+      string content;
+      CHECK(ReadFileToString(FLAGS_API_TASKZ_FILE_FOR_TEST, &content))
+          << FLAGS_API_TASKZ_FILE_FOR_TEST;
+      OutputOkHeaderAndBody("application/json", content, &ss);
+      *response = ss.str();
+      return 200;
+    }
+    const string& query = request.query();
+    std::map<string, string> params = ParseQuery(query);
+    auto p = params.find("id");
+    if (p != params.end()) {
+      const string& task_id_str = p->second;
+      int task_id = atoi(task_id_str.c_str());
+
+      if (params["dump"] == "req") {
+        if (!service_.DumpTaskRequest(task_id)) {
+          ss << "HTTP/1.1 404 Not found\r\n";
+          ss << "\r\n";
+          *response = ss.str();
+          return 404;
+        }
+        OutputOkHeader("text/plain", &ss);
+        *response = ss.str();
+        return 200;
+      }
+
+      string json;
+      if (!service_.DumpTask(task_id, &json)) {
+        ss << "HTTP/1.1 404 Not found\r\n";
+        ss << "\r\n";
+        *response = ss.str();
+        return 404;
+      }
+      OutputOkHeaderAndBody("application/json", json, &ss);
+      *response = ss.str();
+      return 200;
+    }
+    long long after = 0;
+    p = params.find("after");
+    if (p != params.end()) {
+      const string& after_str = p->second;
+#ifndef _WIN32
+      after = strtoll(after_str.c_str(), nullptr, 10);
+#else
+      after = _atoi64(after_str.c_str());
+#endif
+    }
+    OutputOkHeader("application/json", &ss);
+    Json::Value json;
+    service_.DumpToJson(&json, after);
+    ss << json;
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleAccountRequest(const HttpServerRequest& /* req */,
+                           string* response) {
+    std::ostringstream ss;
+    OutputOkHeader("application/json", &ss);
+    ss << "{";
+    ss << "\"status\": "
+       << EscapeString(service_.http_client()->GetHealthStatusMessage());
+    const string& account = service_.http_client()->GetAccount();
+    if (!account.empty()) {
+      ss << ", \"account\": " << EscapeString(account);
+    }
+    OAuth2Config config;
+    service_.http_client()->GetOAuth2Config(&config);
+    if (config.enabled()) {
+      if (config.refresh_token.empty()) {
+        ss << ", \"text\": \"login\""
+           << ", \"href\": \"/api/loginz\"";
+      } else if (account.empty()) {
+        // even if refresh_token exists, account is empty.
+        // maybe, bad oauth2 setup.
+        ss << ", \"text\": \"bad oauth2 config - login\""
+           << ", \"href\": \"/api/loginz\"";
+      }
+      // TODO: add logout.
+    } else {
+      LOG(WARNING) << "oauth2 config disabled";
+    }
+    ss << "}";
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleLoginRequest(const HttpServerRequest& request,
+                         string* response) {
+    OAuth2Config config;
+    service_.http_client()->GetOAuth2Config(&config);
+    if (config.valid()) {
+      const string& account = service_.http_client()->GetAccount();
+      if (!account.empty()) {
+        // already login.
+        return BadRequest(response);
+      }
+      // bad oauth2 config?
+    }
+    // TODO: limit access?
+    LOG(INFO) << "start login";
+    DefaultOAuth2Config(&config);
+    SaveOAuth2Config(FLAGS_OAUTH2_CONFIG_FILE, config);
+    service_.http_client()->SetOAuth2Config(config);
+    string login_state;
+    string redirect_uri;
+    NewLoginState(request.server().port(), &login_state, &redirect_uri);
+    std::ostringstream url;
+    url << config.auth_uri << "?scope=" << config.scope
+        << "&redirect_uri=" << redirect_uri
+        << "&client_id=" << config.client_id
+        << "&state=" << login_state
+        << "&response_type=code";
+    return Redirect(url.str(), response);
+  }
+
+  int HandleAuthRequest(const HttpServerRequest& request,
+                        string* response) {
+    const string& query = request.query();
+    std::map<string, string> params = ParseQuery(query);
+    if (!CheckLoginState(params["state"])) {
+      LOG(WARNING) << "login state mismatch:" << params["state"];
+      return BadRequest(response);
+    }
+    const string& code = params["code"];
+    if (code.empty()) {
+      LOG(WARNING) << "missing code:" << query;
+      return BadRequest(response);
+    }
+    LOG(INFO) << "got auth code";
+    OAuth2Config config;
+    service_.http_client()->GetOAuth2Config(&config);
+    if (config.valid()) {
+      const string& account = service_.http_client()->GetAccount();
+      if (!account.empty()) {
+        // already login.
+        return BadRequest(response);
+      }
+    }
+    config.refresh_token =
+        ExchangeOAuth2RefreshToken(service_.wm(),
+                                   http_options_,
+                                   config,
+                                   code, GetRedirectURI());
+    if (config.refresh_token.empty()) {
+      LOG(WARNING) << "failed to get refresh token";
+      return BadRequest(response);
+    }
+    LOG(INFO) << "got refresh token";
+    SaveOAuth2Config(FLAGS_OAUTH2_CONFIG_FILE, config);
+    service_.http_client()->SetOAuth2Config(config);
+    if (InitialPing()) {
+      LOG(INFO) << "Login as " << service_.http_client()->GetAccount();
+    }
+    return Redirect("/", response);
+  }
+
+  int HandleLogoutRequest(const HttpServerRequest& /* req */,
+                          string* response) {
+    // TODO: limit access only for the authenticated user.
+    *response = "HTTP/1.1 501 Not Implemented\r\n\r\n";
+    return 501;
+#if 0
+    std::ostringstream ss;
+    OAuth2Config config;
+    if (!service_.http_client()->GetOAuth2Config(&config)) {
+      return BadRequest(response);
+    }
+    config.refresh_token = "";
+    service_.http_client()->SetOAuth2Config(config);
+    SaveOAuth2Config(FLAGS_OAUTH2_CONFIG_FILE, config);
+    LOG(INFO) << "logout";
+    return Redirect("/", response);
+#endif
+  }
+
+  int HandleStatsRequest(const HttpServerRequest& request,
+                         string* response) {
+    bool emit_json = false;
+    for (const auto& s : strings::Split(request.query(), "&")) {
+      if (s == "format=json") {
+        emit_json = true;
+        break;
+      }
+    }
+
+    std::ostringstream ss;
+    if (emit_json) {
+      OutputOkHeader("text/json", &ss);
+      string json_string;
+      service_.DumpStatsJson(&json_string, CompileService::kHumanReadable);
+      ss << json_string;
+    } else {
+      OutputOkHeader("text/plain", &ss);
+      service_.DumpStats(&ss);
+    }
+
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleHistogramRequest(const HttpServerRequest& request,
+                             string* response) {
+    const string& query = request.query();
+    bool reset = strstr(query.c_str(), "reset") != nullptr;
+    std::ostringstream ss;
+    OutputOkHeader("text/plain", &ss);
+    service_.histogram()->DumpString(&ss);
+    if (reset) {
+      service_.histogram()->Reset();
+      ss << "Reset done\n";
+    }
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleHttpRpcRequest(const HttpServerRequest& /* request */,
+                           string* response) {
+    std::ostringstream ss;
+    OutputOkHeader("text/plain", &ss);
+    ss << "[http configuration]\n\n" << service_.http_client()->DebugString();
+    ss << "\n\n";
+    ss << "[http rpc]\n\n" << service_.http_rpc()->DebugString();
+    ss << "\n\n";
+    ss << "[multi store]\n\n"
+       << service_.file_service()->multi_file_store()->DebugString();
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleThreadRequest(const HttpServerRequest& /* request */,
+                          string* response) {
+    std::ostringstream ss;
+    OutputOkHeader("text/plain", &ss);
+    ss << "[worker threads]\n\n" << service_.wm()->DebugString();
+    ss << "[subprocess]\n\n"
+       << SubProcessControllerClient::Get()->DebugString();
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleContentionRequest(const HttpServerRequest& request,
+                              string* response) {
+    std::ostringstream ss;
+
+    if (g_auto_lock_stats) {
+      std::unordered_set<string> skip_name = {
+        "descriptor_poller::PollEvents",
+        "worker_thread::NextClosure",
+      };
+
+      for (const auto& s : strings::Split(request.query(), "&")) {
+        if (s == "detailed=1") {
+          skip_name.clear();
+          break;
+        }
+      }
+
+      OutputOkHeader("text/html", &ss);
+      g_auto_lock_stats->Report(&ss, skip_name);
+    } else {
+      OutputOkHeader("text/plain", &ss);
+#ifdef NO_AUTOLOCK_STAT
+      ss << "disabled (built with NO_AUTOLOCK_STAT)";
+#else
+      ss << "disabled.  to turn on contentionz, GOMA_ENABLE_CONTENTIONZ=true";
+#endif
+    }
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleFileCacheRequest(const HttpServerRequest& /* request */,
+                             string* response) {
+    std::ostringstream ss;
+    OutputOkHeader("text/plain", &ss);
+    ss << "[file hash cache]\n\n" << service_.file_hash_cache()->DebugString();
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleCompilerInfoRequest(const HttpServerRequest& /* request */,
+                                string* response) {
+    std::ostringstream ss;
+    OutputOkHeader("text/plain", &ss);
+    service_.DumpCompilerInfo(&ss);
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleCompilerJSONRequest(const HttpServerRequest& /* request */,
+                                string* response) {
+    std::ostringstream ss;
+    OutputOkHeader("application/json", &ss);
+
+    Json::Value json;
+    CompilerInfoCache::instance()->DumpCompilersJSON(&json);
+    ss << json.toStyledString() << std::endl;
+    *response = ss.str();
+
+    return 200;
+  }
+
+  int HandleIncludeCacheRequest(const HttpServerRequest& /* request */,
+                                string* response) {
+    std::ostringstream ss;
+    OutputOkHeader("text/plain", &ss);
+    IncludeCache::DumpAll(&ss);
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleFlagRequest(const HttpServerRequest& /* request */,
+                        string* response) {
+    std::ostringstream ss;
+    OutputOkHeader("text/plain", &ss);
+    DumpEnvFlag(&ss);
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleVersionRequest(const HttpServerRequest& /* request */,
+                           string* response) {
+    std::ostringstream ss;
+    OutputOkHeader("text/plain", &ss);
+    ss << kBuiltRevisionString;
+    *response = ss.str();
+    return 200;
+  }
+
+  int HandleHealthRequest(const HttpServerRequest& request,
+                          string* response) {
+    const string& query = request.query();
+    const string health_status =
+        service_.http_client()->GetHealthStatusMessage();
+    *response = "HTTP/1.1 200 OK\r\n\r\n" + health_status;
+    LOG(INFO) << "I am healthy:" << health_status
+              << " to pid:" << request.peer_pid()
+              << " query:" << query;
+    // gomacc checkhealth use ?pid=<pid> as query.
+    // note that: build_nexe.py also checks /healthz.
+    if (request.peer_pid() != 0 || !query.empty()) {
+      service_.wm()->DebugLog();
+    }
+    return 200;
+  }
+
+  int HandlePortRequest(const HttpServerRequest& request,
+                        string* response) {
+    LOG(INFO) << "handle portz port=" << request.server().port();
+    HttpPortResponse resp;
+    resp.set_port(request.server().port());
+    string serialized_resp;
+    resp.SerializeToString(&serialized_resp);
+
+    std::ostringstream oss;
+    oss << "HTTP/1.1 200 OK\r\n"
+        << "Content-Type: binary/x-protocol-buffer\r\n"
+        << "Content-Length: " << serialized_resp.size() << "\r\n\r\n"
+        << serialized_resp;
+    *response = oss.str();
+    return 200;
+  }
+
+  int HandleLogRequest(const HttpServerRequest& request,
+                       string* response) {
+    std::ostringstream oss;
+    const string& log_request = request.query();
+    if (log_request.empty()) {
+      string content = ("<a href=\"?INFO\">INFO</a> /"
+                        "<a href=\"?WARNING\">WARNING</a> /"
+                        "<a href=\"?ERROR\">ERROR</a>"
+#ifndef _WIN32
+                        "<br />"
+                        "<a href=\"?subproc-INFO\">subproc-INFO</a> /"
+                        "<a href=\"?subproc-WARNING\">subproc-WARNING</a> /"
+                        "<a href=\"?subproc-ERROR\">subproc-ERROR</a>"
+#endif
+                        "<br />");
+      oss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/html\r\n"
+          << "Content-Length: " << content.size() << "\r\n\r\n"
+          << content;
+    } else {
+      const std::vector<string>& log_dirs = google::GetLoggingDirectories();
+      if (log_dirs.empty()) {
+        LOG(ERROR) << "No logging directories";
+        return 404;
+      }
+      string log_suffix;
+      string log_type = log_request;
+      if (log_request.find("subproc-") == 0) {
+        log_suffix = "-subproc";
+        log_type = log_request.substr(strlen("subproc-"));
+      }
+      if (log_type != "INFO" && log_type != "WARNING" &&
+          log_type != "ERROR" && log_type != "FATAL") {
+        LOG(WARNING) << "Unknown log type: " << log_type;
+        return 404;
+      }
+      string log_filename =
+          file::JoinPath(log_dirs[0], myname_ + log_suffix + "." + log_type);
+#ifdef _WIN32
+      const string& original_log = FindLogFile(log_dirs[0], myname_, log_type);
+      // Workaround GLOG not opening file in share read.
+      if (!CopyFileA(original_log.c_str(), log_filename.c_str(), FALSE)) {
+        log_filename = original_log;  // Can't copy, let's just try share read.
+      }
+#endif
+      string log;
+      if (!ReadFileToString(log_filename.c_str(), &log)) {
+        return 404;
+      }
+      oss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/plain\r\n"
+          << "Content-Length: " << log.size() << "\r\n\r\n"
+          << log;
+    }
+
+    *response = oss.str();
+    return 200;
+  }
+
+  int HandleErrorStatusRequest(const HttpServerRequest&, string* response) {
+    std::ostringstream ss;
+    OutputOkHeader("application/json", &ss);
+    service_.DumpErrorStatus(&ss);
+    *response = ss.str();
+    return 200;
+  }
+
+#ifdef HAVE_COUNTERZ
+  int HandleCounterRequest(const HttpServerRequest&, string* response) {
+    // TODO: implement better view using javascript if necessary.
+    std::ostringstream ss;
+    OutputOkHeader("application/json", &ss);
+    Json::Value json;
+    if (Counterz::Instance() != nullptr) {
+      Counterz::Instance()->DumpToJson(&json);
+    } else {
+      LOG(ERROR) << "counterz is used before Init().";
+      json = "counterz is used before Init().";
+    }
+
+    ss << json.toStyledString() << std::endl;
+    *response = ss.str();
+    return 200;
+  }
+#endif
+
+#ifdef _WIN32
+  void ExecDoneInMulti(CompileService::MultiRpcController* rpc, int i) {
+    if (rpc->ExecDone(i)) {
+      std::unique_ptr<CompileService::MultiRpcController> rpc_autodeleter(rpc);
+      rpc->SendReply();
+    }
+  }
+#endif
+
+  void ExecDone(CompileService::RpcController* rpc, ExecResp* resp) {
+    std::unique_ptr<CompileService::RpcController> rpc_autodeleter(rpc);
+    std::unique_ptr<ExecResp> resp_autodeleter(resp);
+
+    rpc->SendReply(*resp);
+  }
+
+  void SendErrorMessage(
+      ThreadpoolHttpServer::HttpServerRequest* http_server_request,
+      int response_code, const string& status_message) {
+    std::ostringstream http_response_message;
+    http_response_message
+        << "HTTP/1.1 " << response_code << " " << status_message << "\r\n\r\n";
+    http_server_request->SendReply(http_response_message.str());
+  }
+
+  void RunCleanOldLogs() {
+    if (FLAGS_LOG_CLEAN_INTERVAL <= 0) {
+      LOG(WARNING) << "log clean interval <= 0, "
+                   << "but attempted cleaning old logs";
+      return;
+    }
+    // Switch from alarm worker to normal worker.
+    service_.wm()->RunClosure(
+        FROM_HERE,
+        NewCallback(
+            this, &CompilerProxyHttpHandler::CleanOldLogs),
+        WorkerThreadManager::PRIORITY_LOW);
+  }
+
+  void CleanOldLogs() {
+    if (FLAGS_LOG_CLEAN_INTERVAL <= 0)
+      return;
+    time_t now = time(nullptr);
+    log_cleaner_.CleanOldLogs(now - FLAGS_LOG_CLEAN_INTERVAL);
+  }
+
+  void RunTrackMemory() {
+    if (FLAGS_MEMORY_TRACK_INTERVAL <= 0) {
+      LOG(WARNING) << "memory track interval <= 0, "
+                   << "but attempted tracking memory";
+      return;
+    }
+
+    // Switch from alarm worker to normal worker.
+    service_.wm()->RunClosure(
+        FROM_HERE,
+        NewCallback(
+            this, &CompilerProxyHttpHandler::TrackMemory),
+        WorkerThreadManager::PRIORITY_LOW);
+  }
+
+  void TrackMemory() {
+    int64_t memory_byte = GetConsumingMemoryOfCurrentProcess();
+    int64_t warning_threshold =
+        static_cast<int64_t>(FLAGS_MEMORY_WARNING_THRESHOLD_IN_MB) *
+        1024 * 1024;
+    if (memory_byte >= warning_threshold) {
+      LOG(WARNING) << "memory tracking: consuming memory = "
+                   << memory_byte << " bytes, which is higher than "
+                   << "warning threshold "
+                   << warning_threshold << " bytes";
+    } else {
+      LOG(INFO) << "memory tracking: consuming memory = "
+                << memory_byte << " bytes";
+    }
+
+    if (service_.log_service()) {
+      MemoryUsageLog memory_usage_log;
+      memory_usage_log.set_compiler_proxy_start_time(service_.start_time());
+      memory_usage_log.set_compiler_proxy_user_agent(kUserAgentString);
+      memory_usage_log.set_username(service_.username());
+      memory_usage_log.set_nodename(service_.nodename());
+
+      time_t current_time;
+      time(&current_time);
+      memory_usage_log.set_memory(memory_byte);
+      memory_usage_log.set_time(current_time);
+
+      service_.log_service()->SaveMemoryUsageLog(memory_usage_log);
+    }
+  }
+
+  void DumpStatsToInfoLog() {
+    // TODO: Remove this after diagnose_goma_log.py and
+    // diagnose_goma_log_server reads json format stats.
+    {
+      std::ostringstream ss;
+      service_.DumpStats(&ss);
+      LOG(INFO) << "Dumping stats...\n"
+                << ss.str();
+    }
+
+    // Also dump json format. Using FastWriter for compaction.
+    {
+      std::string json_string;
+      service_.DumpStatsJson(&json_string,
+                             CompileService::kFastHumanUnreadable);
+      LOG(INFO) << "Dumping json stats...\n"
+                << json_string;
+    }
+  }
+
+  void DumpHistogramToInfoLog() {
+    std::ostringstream ss;
+    service_.histogram()->DumpString(&ss);
+
+    LOG(INFO) << "Dumping histogram...\n"
+              << ss.str();
+  }
+
+  void DumpIncludeCacheLogToInfoLog() {
+    std::ostringstream ss;
+    IncludeCache::DumpAll(&ss);
+
+    LOG(INFO) << "Dumping include cache...\n"
+              << ss.str();
+  }
+
+  void DumpContentionLogToInfoLog() {
+    std::ostringstream ss;
+    g_auto_lock_stats->TextReport(&ss);
+    LOG(INFO) << "Dumping contention...\n"
+              << ss.str();
+  }
+
+  void DumpStatsProto() {
+    if (FLAGS_DUMP_STATS_FILE.empty())
+      return;
+
+    service_.DumpStatsToFile(FLAGS_DUMP_STATS_FILE);
+  }
+
+#if HAVE_HEAP_PROFILER
+  int HandleHeapRequest(const HttpServerRequest& request,
+                        string* response) {
+    *response = "HTTP/1.1 200 OK\r\n\r\n";
+    if (IsHeapProfilerRunning()) {
+      HeapProfilerDump("requested by /heapz");
+      HeapProfilerStop();
+      *response += "heap profiler stopped. see " +
+          compiler_proxy_heap_profile_file_ + ".*.heap";
+    } else {
+      HeapProfilerStart(compiler_proxy_heap_profile_file_.c_str());
+      *response += "heap profiler starts.";
+    }
+    return 200;
+  }
+#endif
+#if HAVE_CPU_PROFILER
+  int HandleProfileRequest(const HttpServerRequest& request,
+                           string* response) {
+    *response = "HTTP/1.1 200 OK\r\n\r\n";
+    if (cpu_profiling_) {
+      ProfilerStop();
+      cpu_profiling_ = false;
+      *response += "cpu profiler stopped. see " +
+          compiler_proxy_cpu_profile_file_;
+    } else {
+      ProfilerStart(compiler_proxy_cpu_profile_file_.c_str());
+      cpu_profiling_ = true;
+      *response += "cpu profiler starts.";
+    }
+    return 200;
+  }
+#endif
+
+  void NewLoginState(int port, string* login_state, string* redirect_uri) {
+    *login_state = GetRandomAlphanumeric(32);
+    std::ostringstream ss;
+    ss << "http://localhost:" << port << "/api/authz";
+    *redirect_uri = ss.str();
+    AUTOLOCK(lock, &login_state_mu_);
+    oauth2_login_state_ = *login_state;
+    oauth2_redirect_uri_ = *redirect_uri;
+  }
+
+  bool CheckLoginState(const string& state) const {
+    AUTOLOCK(lock, &login_state_mu_);
+    return oauth2_login_state_ == state;
+  }
+
+  string GetRedirectURI() const {
+    AUTOLOCK(lock, &login_state_mu_);
+    return oauth2_redirect_uri_;
+  }
+
+  bool ShouldTrace() {
+    if (FLAGS_RPC_TRACE_PERIOD < 1) {
+      return false;
+    }
+    AUTOLOCK(lock, &rpc_sent_count_mu_);
+    return rpc_sent_count_++ % FLAGS_RPC_TRACE_PERIOD == 0;
+  }
+
+  const string myname_;
+  CompileService service_;
+  LogCleaner log_cleaner_;
+  PeriodicClosureId log_cleaner_closure_id_;
+  PeriodicClosureId memory_tracker_closure_id_;
+  Lock rpc_sent_count_mu_;
+  uint64_t rpc_sent_count_ GUARDED_BY(rpc_sent_count_mu_);
+
+  std::map<string, HttpHandlerMethod> http_handlers_;
+  std::map<string, HttpHandlerMethod> internal_http_handlers_;
+
+  const string tmpdir_;
+
+#if HAVE_HEAP_PROFILER
+  const string compiler_proxy_heap_profile_file_;
+#endif
+#if HAVE_CPU_PROFILER
+  const string compiler_proxy_cpu_profile_file_;
+  bool cpu_profiling_;
+#endif
+
+  // Default http_options_ for any http clients in compiler_proxy
+  // such as oauth2 etc.
+  HttpClient::Options http_options_;
+
+  Lock login_state_mu_;
+  string oauth2_login_state_;
+  string oauth2_redirect_uri_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompilerProxyHttpHandler);
+};
+
+}  // namespace devtools_goma
+
+#ifndef _WIN32
+bool CheckFileOwnedByMyself(const string& filename, uid_t uid) {
+  struct stat st;
+  if (stat(filename.c_str(), &st) == -1)
+    return true;
+  if (st.st_uid == uid)
+    return true;
+
+  std::cerr << "GOMA: compiler_proxy:"
+            << " other user (" << st.st_uid << ") owns " << filename
+            << ", so you (" << uid << ") can not run compiler_proxy. "
+            << std::endl;
+  std::cerr << "GOMA: remove " << filename << std::endl;
+  return false;
+}
+
+ScopedFd LockMyself(const string& filename, int port) {
+  // Open myself and lock it during execution.
+  std::ostringstream filename_buf;
+  filename_buf << filename << "." << port;
+  string lock_filename = filename_buf.str();
+  if (!CheckFileOwnedByMyself(lock_filename.c_str(), getuid())) {
+    exit(1);
+  }
+  ScopedFd fd(open(lock_filename.c_str(), O_RDONLY|O_CREAT, S_IRUSR));
+  if (!fd.valid()) {
+    std::cerr << "GOMA: compiler_proxy: "
+              << "failed to open lock file:" << lock_filename << std::endl;
+    exit(1);
+  }
+  int ret = flock(fd.fd(), LOCK_EX | LOCK_NB);
+  if (ret == -1 && errno == EWOULDBLOCK) {
+    std::cerr << "GOMA: compiler_proxy: "
+              << "there is already someone else with lock" << std::endl;
+    exit(1);
+  }
+  return fd;
+}
+#endif
+
+void InitResourceLimits(int* nfile) {
+#ifndef _WIN32
+  struct rlimit lim;
+  PCHECK(getrlimit(RLIMIT_NOFILE, &lim) == 0);
+  *nfile = static_cast<int>(lim.rlim_cur);
+  const rlim_t prev = lim.rlim_cur;
+  rlim_t open_max = static_cast<rlim_t>(sysconf(_SC_OPEN_MAX));
+#ifdef OPEN_MAX
+  open_max = std::max(open_max, static_cast<rlim_t>(OPEN_MAX));
+#endif
+  open_max = std::max(open_max, lim.rlim_cur);
+#ifdef __MACH__
+  // Choose smaller size from sysctl.  (b/9548636)
+  int mib[2] = {CTL_KERN, -1};
+  static const int kSecondMibs[] = {KERN_MAXFILES, KERN_MAXFILESPERPROC};
+  for (const auto& it : kSecondMibs) {
+    rlim_t tmp;
+    size_t length = sizeof(tmp);
+    mib[1] = it;
+    PCHECK(sysctl(mib, 2, &tmp, &length, nullptr, 0) == 0) << it;
+    open_max = std::min(tmp, open_max);
+  }
+  // setrlimit(3) will fail with EINVAL if launchctl sets smaller limit,
+  // which default is 256.  b/11596636
+#endif
+  lim.rlim_cur = std::min(open_max, lim.rlim_max);
+  if (setrlimit(RLIMIT_NOFILE, &lim) != 0) {
+    // we might get EPERM or EINVAL if we try to increase RLIMIT_NOFILE above
+    // the current kernel maxium.
+    PLOG(ERROR) << "setrlimit(RLIMIT_NOFILE, &lim) != 0"
+                << " rlim_cur:" << lim.rlim_cur
+                << " rlim_max:" << lim.rlim_max
+                << " rlim_cur would remain " << prev;
+    lim.rlim_cur = prev;
+  } else {
+    LOG(INFO) << "setrlimit RLIMIT_NOFILE " << prev << " -> " << lim.rlim_cur;
+  }
+  *nfile = static_cast<int>(lim.rlim_cur);
+#else
+  *nfile = FLAGS_COMPILER_PROXY_MAX_SOCKETS;
+#endif
+}
+
+void InitTrustedIps(devtools_goma::TrustedIpsManager* trustedipsmanager) {
+  std::vector<string> trusted_ips;
+  SplitStringUsing(FLAGS_COMPILER_PROXY_TRUSTED_IPS, ",", &trusted_ips);
+  for (const auto& ip : trusted_ips) {
+    trustedipsmanager->AddAllow(ip);
+  }
+}
+
+namespace devtools_goma {
+
+void DepsCacheInit() {
+  string cache_filename;
+  if (!FLAGS_DEPS_CACHE_FILE.empty()) {
+    cache_filename = file::JoinPathRespectAbsolute(GetCacheDirectory(),
+                                                   FLAGS_DEPS_CACHE_FILE);
+  }
+
+  DepsCache::Init(cache_filename,
+                  FLAGS_DEPS_CACHE_IDENTIFIER_ALIVE_DURATION,
+                  FLAGS_DEPS_CACHE_TABLE_THRESHOLD,
+                  FLAGS_DEPS_CACHE_MAX_PROTO_SIZE_IN_MB);
+}
+
+void CompilerInfoCacheInit() {
+  CompilerInfoCache::Init(GetCacheDirectory(), FLAGS_COMPILER_INFO_CACHE_FILE,
+                          FLAGS_COMPILER_INFO_CACHE_HOLDING_TIME_SEC);
+}
+
+}  // namespace devtools_goma
+
+int main(int argc, char* argv[], const char* envp[]) {
+  devtools_goma::Init(argc, argv, envp);
+
+#if HAVE_COUNTERZ
+  devtools_goma::Counterz::Init();
+#endif
+
+  if (FLAGS_ENABLE_GLOBAL_FILE_ID_CACHE) {
+    devtools_goma::GlobalFileIdCache::Init();
+  }
+
+  const string tmpdir = FLAGS_TMP_DIR;
+#ifndef _WIN32
+  const string compiler_proxy_addr = file::JoinPathRespectAbsolute(
+      tmpdir,
+      FLAGS_COMPILER_PROXY_SOCKET_NAME);
+
+  if (!CheckFileOwnedByMyself(compiler_proxy_addr, getuid())) {
+    exit(1);
+  }
+
+  const string lock_filename = file::JoinPathRespectAbsolute(
+      tmpdir,
+      FLAGS_COMPILER_PROXY_LOCK_FILENAME);
+  ScopedFd lockfd(
+      LockMyself(lock_filename,
+                 FLAGS_COMPILER_PROXY_PORT));
+  if (FLAGS_COMPILER_PROXY_DAEMON_MODE) {
+    int fd[2];
+    PCHECK(pipe(fd) == 0);
+    pid_t pid;
+    if ((pid = fork())) {
+      PCHECK(pid > 0);
+      // Get pid from daemonized process
+      close(fd[1]);
+      pid_t server_pid;
+      PCHECK(read(fd[0], &server_pid, sizeof(server_pid)) ==
+             sizeof(server_pid));
+      std::cout << server_pid << std::endl;
+      exit(0);
+    }
+    close(fd[0]);
+    std::set<int> preserve_fds;
+    preserve_fds.insert(lockfd.fd());
+    Daemonize(
+        file::JoinPathRespectAbsolute(tmpdir,
+                                      FLAGS_COMPILER_PROXY_DAEMON_STDERR),
+        fd[1],
+        preserve_fds);
+  }
+
+  // Initialize rand.
+  srand(static_cast<unsigned int>(time(nullptr)));
+
+  // Do not die with a SIGHUP and SIGPIPE.
+  signal(SIGHUP, SIG_IGN);
+  signal(SIGPIPE, SIG_IGN);
+#else
+  const string compiler_proxy_addr = FLAGS_COMPILER_PROXY_SOCKET_NAME;
+  WinsockHelper wsa;
+  devtools_goma::ScopedFd lock_fd;
+  std::ostringstream filename_buf;
+  filename_buf << "Global\\" << FLAGS_COMPILER_PROXY_LOCK_FILENAME << "."
+               << FLAGS_COMPILER_PROXY_PORT;
+  string lock_filename = filename_buf.str();
+
+  lock_fd.reset(CreateEventA(nullptr, TRUE, FALSE, lock_filename.c_str()));
+  DWORD last_error = GetLastError();
+  if (last_error == ERROR_ALREADY_EXISTS) {
+    std::cerr << "GOMA: compiler proxy: already existed" << std::endl;
+    exit(1);
+  }
+  if (!lock_fd.valid()) {
+    LOG(ERROR) << "Cannot acquire global named object: " << last_error;
+    exit(1);
+  }
+
+#ifdef NDEBUG
+  // Sets error mode to SEM_FAILCRITICALERRORS and SEM_NOGPFAULTERRORBOX
+  // to prevent from popping up message box on error.
+  // We don't use CREATE_DEFAULT_ERROR_MODE for dwCreationFlags in
+  // CreateProcess function.
+  // http://msdn.microsoft.com/en-us/library/windows/desktop/ms680621(v=vs.85).aspx
+  UINT old_error_mode = SetErrorMode(
+      SEM_FAILCRITICALERRORS|SEM_NOGPFAULTERRORBOX);
+  LOG(INFO) << "Set error mode from " << old_error_mode
+            << " to " << GetErrorMode();
+#endif
+#endif
+  devtools_goma::SubProcessController::Options subproc_options;
+  subproc_options.max_subprocs = FLAGS_MAX_SUBPROCS;
+  subproc_options.max_subprocs_low_priority = FLAGS_MAX_SUBPROCS_LOW;
+  subproc_options.max_subprocs_heavy_weight = FLAGS_MAX_SUBPROCS_HEAVY;
+  subproc_options.dont_kill_subprocess = FLAGS_DONT_KILL_SUBPROCESS;
+  if (!FLAGS_DONT_KILL_COMMANDS.empty()) {
+    std::vector<string> dont_kill_commands;
+    SplitStringUsing(FLAGS_DONT_KILL_COMMANDS, ",", &dont_kill_commands);
+    for (string cmd : dont_kill_commands) {
+#ifdef _WIN32
+      std::transform(cmd.begin(), cmd.end(), cmd.begin(), ::tolower);
+#endif
+      subproc_options.dont_kill_commands.insert(cmd);
+    }
+  }
+  devtools_goma::SubProcessController::Initialize(argv[0], subproc_options);
+
+  devtools_goma::InitLogging(argv[0]);
+  if (FLAGS_COMPILER_PROXY_ENABLE_CRASH_DUMP) {
+    devtools_goma::InitCrashReporter(devtools_goma::GetCrashDumpDirectory());
+    LOG(INFO) << "breakpad is enabled";
+  }
+  std::unique_ptr<devtools_goma::AutoUpdater> auto_updater(
+      new devtools_goma::AutoUpdater(FLAGS_CTL_SCRIPT_NAME));
+  if (auto_updater->my_version() > 0) {
+    LOG(INFO) << "goma version:" << auto_updater->my_version();
+  }
+  auto_updater->SetEnv(envp);
+
+  int max_nfile = 0;
+  InitResourceLimits(&max_nfile);
+  CHECK_GT(max_nfile, 0);
+#if defined(USE_EPOLL) || defined(USE_KQUEUE) || defined(_WIN32)
+  int max_num_sockets = max_nfile;
+#else
+  // UNIX select doesn't accept descriptors greater than FD_SETSIZE.
+  int max_num_sockets = std::min<int>(max_nfile, FD_SETSIZE);
+#endif
+  LOG(INFO) << "max_num_sockets=" << max_num_sockets
+            << " max_nfile=" << max_nfile;
+
+  devtools_goma::WorkerThreadManager wm;
+  wm.Start(FLAGS_COMPILER_PROXY_THREADS);
+
+  devtools_goma::SubProcessControllerClient::Initialize(&wm, tmpdir);
+
+  devtools_goma::InstallReadCommandOutputFunc(
+      &devtools_goma::SubProcessTask::ReadCommandOutput);
+
+  devtools_goma::IncludeFileFinder::Init(FLAGS_ENABLE_GCH_HACK);
+
+  devtools_goma::IncludeCache::Init(
+      FLAGS_MAX_INCLUDE_CACHE_SIZE, !FLAGS_DEPS_CACHE_FILE.empty());
+  if (FLAGS_ENABLE_MACRO_CACHE) {
+    devtools_goma::InitMacroEnvCache();
+  }
+
+  std::unique_ptr<devtools_goma::WorkerThreadRunner> init_deps_cache(
+      new devtools_goma::WorkerThreadRunner(
+          &wm, FROM_HERE,
+          devtools_goma::NewCallback(devtools_goma::DepsCacheInit)));
+  std::unique_ptr<devtools_goma::WorkerThreadRunner> init_compiler_info_cache(
+      new devtools_goma::WorkerThreadRunner(
+          &wm, FROM_HERE,
+          devtools_goma::NewCallback(devtools_goma::CompilerInfoCacheInit)));
+
+  devtools_goma::TrustedIpsManager trustedipsmanager;
+  InitTrustedIps(&trustedipsmanager);
+
+  if (!FLAGS_SETTINGS_SERVER.empty()) {
+    ApplySettings(FLAGS_SETTINGS_SERVER, FLAGS_ASSERT_SETTINGS, &wm);
+  }
+  std::unique_ptr<devtools_goma::CompilerProxyHttpHandler> handler(
+      new devtools_goma::CompilerProxyHttpHandler(
+          string(file::Basename(argv[0])), tmpdir, &wm));
+
+  ThreadpoolHttpServer server(FLAGS_COMPILER_PROXY_LISTEN_ADDR,
+                              FLAGS_COMPILER_PROXY_PORT,
+                              FLAGS_COMPILER_PROXY_NUM_FIND_PORTS,
+                              &wm,
+                              FLAGS_COMPILER_PROXY_HTTP_THREADS,
+                              handler.get(),
+                              max_num_sockets);
+  server.SetMonitor(handler.get());
+  server.SetTrustedIpsManager(&trustedipsmanager);
+  CHECK(!compiler_proxy_addr.empty())
+      << "broken compiler_proxy_addr configuration. "
+      << "set GOMA_COMPILER_PROXY_SOCKET_NAME"
+      << " for compiler_proxy ipc addr";
+  server.StartIPC(compiler_proxy_addr,
+                  FLAGS_COMPILER_PROXY_THREADS,
+                  FLAGS_MAX_OVERCOMMIT_INCOMING_SOCKETS);
+  // TCP serves only status pages, no limit.
+  auto_updater->Start(&server, FLAGS_AUTO_UPDATE_IDLE_COUNT);
+  handler->SetAutoUpdater(std::move(auto_updater));
+
+  if (FLAGS_WATCHDOG_TIMER > 0) {
+    std::unique_ptr<devtools_goma::Watchdog> watchdog(
+        new devtools_goma::Watchdog);
+    std::vector<string> env;
+    env.push_back("GOMA_COMPILER_PROXY_SOCKET_NAME=" + compiler_proxy_addr);
+    env.push_back("PATH=" + devtools_goma::GetEnv("PATH"));
+    env.push_back("PATHEXT=" + devtools_goma::GetEnv("PATHEXT"));
+    env.push_back("USER=" + devtools_goma::GetUsername());
+    env.push_back("GOMA_TMP_DIR=" + FLAGS_TMP_DIR);
+    handler->SetWatchdog(std::move(watchdog), env,
+                         &server, FLAGS_WATCHDOG_TIMER);
+  }
+
+  devtools_goma::LocalOutputCache::Init(
+      FLAGS_LOCAL_OUTPUT_CACHE_DIR,
+      &wm,
+      FLAGS_LOCAL_OUTPUT_CACHE_MAX_CACHE_AMOUNT_IN_MB,
+      FLAGS_LOCAL_OUTPUT_CACHE_THRESHOLD_CACHE_AMOUNT_IN_MB,
+      FLAGS_LOCAL_OUTPUT_CACHE_MAX_ITEMS,
+      FLAGS_LOCAL_OUTPUT_CACHE_THRESHOLD_ITEMS);
+
+  init_deps_cache.reset();
+  init_compiler_info_cache.reset();
+  // Show memory just before server loop to understand how much memory is
+  // used for initialization.
+  handler->TrackMemoryOneshot();
+
+  LOG(INFO) << "server loop start";
+  if (server.Loop() != 0) {
+    LOG(ERROR) << "Server failed";
+    exit(1);
+  }
+  LOG(INFO) << "server loop end";
+  devtools_goma::FlushLogFiles();
+  server.StopIPC();
+#ifndef _WIN32
+  flock(lockfd.fd(), LOCK_UN);
+  lockfd.reset(-1);
+#else
+  lock_fd.Close();
+#endif
+  LOG(INFO) << "unlock compiler_proxy";
+  devtools_goma::FlushLogFiles();
+  devtools_goma::SubProcessControllerClient::Get()->Quit();
+  devtools_goma::LocalOutputCache::Quit();
+  server.Wait();
+  handler->Wait();
+  devtools_goma::CompilerInfoCache::Quit();
+  devtools_goma::DepsCache::Quit();
+  if (FLAGS_ENABLE_MACRO_CACHE) {
+    devtools_goma::QuitMacroEnvCache();
+  }
+  devtools_goma::IncludeCache::Quit();
+  devtools_goma::SubProcessControllerClient::Get()->Shutdown();
+
+  handler.reset();
+  wm.Finish();
+#ifndef _WIN32
+  // compiler_proxy only creates subprocess controller server as child process,
+  // so waits for the status of it;
+  int status;
+  PCHECK(wait(&status) > 0);
+  LOG(INFO) << "wait:" << status;
+#endif
+
+  if (FLAGS_ENABLE_GLOBAL_FILE_ID_CACHE) {
+    devtools_goma::GlobalFileIdCache::Quit();
+  }
+
+#if HAVE_COUNTERZ
+  devtools_goma::Counterz::Quit();
+#endif
+
+  return 0;
+}
diff --git a/client/compiler_proxy_histogram.cc b/client/compiler_proxy_histogram.cc
new file mode 100644
index 0000000..dc2f977
--- /dev/null
+++ b/client/compiler_proxy_histogram.cc
@@ -0,0 +1,283 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "compiler_proxy_histogram.h"
+
+#include <sstream>
+
+#include "autolock_timer.h"
+#include "compile_stats.h"
+#include "compiler_specific.h"
+#include "glog/logging.h"
+#include "histogram.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_stats.pb.h"
+MSVC_POP_WARNING()
+
+namespace devtools_goma {
+
+static const char* GetHistogramItemName(size_t i) {
+  // This list needs to be in sync with HistogramItems
+  static const char* HistogramItemNames[] = {
+    "PendingTime",
+    "CompilerInfoProcessTime",
+    "IncludePreprocessTime",
+    "IncludeProcessorWaitTime",
+    "IncludeProcessorRunTime",
+    "IncludeFileloadTime",
+    "UploadingInputFile",
+    "MissingInputFile",
+    "RPCCallTime",
+    "FileResponseTime",
+    "CompilerProxyHandlerTime",
+    "GccReqSize",
+    "GccRespSize",
+    "ExecReqSize",
+    "ExecReqRawSize",
+    "ExecReqCompressionRatio",
+    "ExecReqBulidTime",
+    "ExecReqTime",
+    "ExecReqKbps",
+    "ExecWaitTime",
+    "ExecRespSize",
+    "ExecRespRawSize",
+    "ExecRespCompressionRatio",
+    "ExecRespTime",
+    "ExecRespKbps",
+    "ExecRespParseTime",
+    "InputFileTime",
+    "InputFileSize",
+    "InputFileKbps",
+    "InputFileReqRawSize",
+    "InputFileReqCompressionRatio",
+    "OutputFileTime",
+    "OutputFileSize",
+    "ChunkRespSize",
+    "OutputFileKbps",
+    "OutputFileRespRawSize",
+    "OutputFileRespCompressionRatio",
+    "LocalDelayTime",
+    "LocalPendingTime",
+    "LocalRunTime",
+    "LocalMemSize",
+    "LocalOutputFileTime",
+    "LocalOutputFileSize",
+    "ThreadpoolHttpServerRequestSize",
+    "ThreadpoolHttpServerResponseSize",
+    "ThreadpoolHttpServerWaitingTime",
+    "ThreadpoolHttpServerReadRequestTime",
+    "ThreadpoolHttpServerHandlerTime",
+    "ThreadpoolHttpServerWriteResponseTime",
+    "NumCols"
+  };
+  return HistogramItemNames[i];
+}
+
+CompilerProxyHistogram::CompilerProxyHistogram()
+    : histogram_(NumCols) {
+  for (size_t i = 0; i < NumCols; ++i)
+    histogram_[i].SetName(GetHistogramItemName(i));
+}
+
+CompilerProxyHistogram::~CompilerProxyHistogram() {
+}
+
+void CompilerProxyHistogram::UpdateThreadpoolHttpServerStat(
+    const ThreadpoolHttpServer::Stat& stat) {
+  AUTOLOCK(lock, &mu_);
+  histogram_[THSReqSize].Add(stat.req_size);
+  histogram_[THSRespSize].Add(stat.resp_size);
+  histogram_[THSWaitingTime].Add(stat.waiting_time_msec);
+  histogram_[THSReadReqTime].Add(stat.read_req_time_msec);
+  histogram_[THSHandlerTime].Add(stat.handler_time_msec);
+  histogram_[THSWriteRespTime].Add(stat.write_resp_time_msec);
+}
+
+void CompilerProxyHistogram::UpdateCompileStat(const CompileStats& stats) {
+  AUTOLOCK(lock, &mu_);
+  if (stats.pending_time())
+    histogram_[PendingTime].Add(stats.pending_time());
+  if (stats.compiler_info_process_time())
+    histogram_[CompilerInfoProcessTime].Add(stats.compiler_info_process_time());
+  if (stats.include_preprocess_time())
+    histogram_[IncludePreprocessTime].Add(stats.include_preprocess_time());
+  if (stats.include_processor_wait_time()) {
+    histogram_[IncludeProcessorWaitTime].Add(
+        stats.include_processor_wait_time());
+  }
+  if (stats.include_processor_run_time())
+    histogram_[IncludeProcessorRunTime].Add(stats.include_processor_run_time());
+  if (stats.include_fileload_time())
+    histogram_[IncludeFileloadTime].Add(stats.include_fileload_time());
+  if (stats.num_uploading_input_file_size() > 0) {
+    histogram_[UploadingInputFile].Add(
+        SumRepeatedInt32(stats.num_uploading_input_file()));
+  }
+  if (stats.num_missing_input_file_size() > 0) {
+    histogram_[MissingInputFile].Add(
+        SumRepeatedInt32(stats.num_missing_input_file()));
+  }
+
+  if (stats.rpc_call_time_size() > 0)
+    histogram_[RPCCallTime].Add(SumRepeatedInt32(stats.rpc_call_time()));
+  if (stats.file_response_time())
+    histogram_[FileResponseTime].Add(stats.file_response_time());
+  if (stats.handler_time())
+    histogram_[CompilerProxyHandlerTime].Add(stats.handler_time());
+  if (stats.gcc_req_size)
+    histogram_[GccReqSize].Add(stats.gcc_req_size);
+  if (stats.gcc_resp_size)
+    histogram_[GccRespSize].Add(stats.gcc_resp_size);
+
+  // Exec call.
+  int64_t rpc_req_size = 0;
+  if (stats.rpc_req_size_size() > 0) {
+    rpc_req_size = SumRepeatedInt32(stats.rpc_req_size());
+    histogram_[ExecReqSize].Add(rpc_req_size);
+  }
+  if (stats.rpc_raw_req_size_size() > 0) {
+    int64_t rpc_raw_req_size = SumRepeatedInt32(stats.rpc_raw_req_size());
+    histogram_[ExecReqRawSize].Add(rpc_raw_req_size);
+    if (rpc_raw_req_size > 0) {
+      histogram_[ExecReqCompressionRatio].Add(
+          100 * rpc_req_size / rpc_raw_req_size);
+    }
+  }
+  if (stats.rpc_req_build_time_size() > 0) {
+    histogram_[ExecReqBuildTime].Add(
+        SumRepeatedInt32(stats.rpc_req_build_time()));
+  }
+  if (stats.rpc_req_send_time_size() > 0) {
+    int64_t rpc_req_send_time = SumRepeatedInt32(stats.rpc_req_send_time());
+    histogram_[ExecReqTime].Add(rpc_req_send_time);
+    if (rpc_req_send_time > 0) {
+      histogram_[ExecReqKbps].Add(rpc_req_size / rpc_req_send_time);
+    }
+  }
+  if (stats.rpc_wait_time_size() > 0)
+    histogram_[ExecWaitTime].Add(SumRepeatedInt32(stats.rpc_wait_time()));
+
+  int64_t rpc_resp_size = 0;
+  if (stats.rpc_resp_size_size() > 0) {
+    rpc_resp_size = SumRepeatedInt32(stats.rpc_resp_size());
+    histogram_[ExecRespSize].Add(rpc_resp_size);
+  }
+  if (stats.rpc_raw_resp_size_size() > 0) {
+    int64_t rpc_raw_resp_size = SumRepeatedInt32(stats.rpc_raw_resp_size());
+    histogram_[ExecRespRawSize].Add(rpc_raw_resp_size);
+    if (rpc_raw_resp_size > 0) {
+      histogram_[ExecRespCompressionRatio].Add(
+          100 * rpc_resp_size / rpc_raw_resp_size);
+    }
+  }
+  if (stats.rpc_resp_recv_time_size() > 0) {
+    int64_t rpc_resp_recv_time = SumRepeatedInt32(stats.rpc_resp_recv_time());
+    histogram_[ExecRespTime].Add(rpc_resp_recv_time);
+    if (rpc_resp_recv_time > 0)
+      histogram_[ExecRespKbps].Add(rpc_resp_size / rpc_resp_recv_time);
+  }
+  if (stats.rpc_resp_parse_time_size() > 0) {
+    histogram_[ExecRespParseTime].Add(
+        SumRepeatedInt32(stats.rpc_resp_parse_time()));
+  }
+  // Look into protobuf response.
+
+  // FileService
+  int64_t input_file_time = 0;
+  if (stats.input_file_time_size() > 0) {
+    input_file_time = SumRepeatedInt32(stats.input_file_time());
+    histogram_[InputFileTime].Add(input_file_time);
+  }
+  if (stats.input_file_size_size() > 0) {
+    int64_t input_file_size = SumRepeatedInt32(stats.input_file_size());
+    histogram_[InputFileSize].Add(input_file_size);
+    if (input_file_time > 0) {
+      histogram_[InputFileKbps].Add(input_file_size / input_file_time);
+    }
+  }
+  if (stats.input_file_rpc_raw_size > 0) {
+    histogram_[InputFileReqRawSize].Add(stats.input_file_rpc_raw_size);
+    histogram_[InputFileReqCompressionRatio].Add(
+        100 * stats.input_file_rpc_size / stats.input_file_rpc_raw_size);
+  }
+  int64_t output_file_time = 0;
+  if (stats.output_file_time_size() > 0) {
+    output_file_time = SumRepeatedInt32(stats.output_file_time());
+    histogram_[OutputFileTime].Add(output_file_time);
+  }
+  if (stats.output_file_size_size() > 0) {
+    int64_t output_file_size = SumRepeatedInt32(stats.output_file_size());
+    histogram_[OutputFileSize].Add(output_file_size);
+    if (output_file_time > 0) {
+      histogram_[OutputFileKbps].Add(output_file_size / output_file_time);
+    }
+  }
+  if (stats.output_file_rpc_raw_size > 0) {
+    histogram_[OutputFileRespRawSize].Add(stats.output_file_rpc_raw_size);
+    histogram_[OutputFileRespCompressionRatio].Add(
+        100 * stats.output_file_rpc_size / stats.output_file_rpc_raw_size);
+  }
+  if (stats.chunk_resp_size_size() > 0)
+    histogram_[ChunkRespSize].Add(SumRepeatedInt32(stats.chunk_resp_size()));
+
+  if (stats.local_delay_time() > 0)
+    histogram_[LocalDelayTime].Add(stats.local_delay_time());
+  if (stats.local_pending_time() > 0)
+    histogram_[LocalPendingTime].Add(stats.local_pending_time());
+  if (stats.local_run_time() > 0)
+    histogram_[LocalRunTime].Add(stats.local_run_time());
+  if (stats.local_mem_kb() > 0)
+    histogram_[LocalMemSize].Add(stats.local_mem_kb());
+  if (stats.local_output_file_time_size() > 0) {
+    histogram_[LocalOutputFileTime].Add(SumRepeatedInt32(
+        stats.local_output_file_time()));
+  }
+  if (stats.local_output_file_size_size() > 0) {
+    histogram_[LocalOutputFileSize].Add(SumRepeatedInt32(
+        stats.local_output_file_size()));
+  }
+}
+
+int64_t CompilerProxyHistogram::GetStatMean(HistogramItems item) const {
+  DCHECK_GE(item, 0);
+  DCHECK_LT(item, NumCols);
+  AUTOLOCK(lock, &mu_);
+  const Histogram& h = histogram_[item];
+  if (h.count() == 0)
+    return 0;
+  return h.mean();
+}
+
+double CompilerProxyHistogram::GetStatStandardDeviation(
+    HistogramItems item) const {
+  DCHECK_GE(item, 0);
+  DCHECK_LT(item, NumCols);
+  AUTOLOCK(lock, &mu_);
+  const Histogram& h = histogram_[item];
+  if (h.count() == 0)
+    return 0.0;
+  return h.standard_deviation();
+}
+
+void CompilerProxyHistogram::DumpString(std::ostringstream* ss) {
+  AUTOLOCK(lock, &mu_);
+  for (size_t i = 0; i < NumCols; ++i) {
+    if (histogram_[i].count() > 0)
+      (*ss) << histogram_[i].DebugString() << "\n";
+  }
+}
+
+void CompilerProxyHistogram::DumpToProto(GomaHistograms* hist) {
+  AUTOLOCK(lock, &mu_);
+  histogram_[RPCCallTime].DumpToProto(hist->mutable_rpc_call_time());
+}
+
+void CompilerProxyHistogram::Reset() {
+  AUTOLOCK(lock, &mu_);
+  for (size_t i = 0; i < NumCols; ++i)
+    histogram_[i].Reset();
+}
+
+}  // namespace devtools_goma
diff --git a/client/compiler_proxy_histogram.h b/client/compiler_proxy_histogram.h
new file mode 100644
index 0000000..18a3334
--- /dev/null
+++ b/client/compiler_proxy_histogram.h
@@ -0,0 +1,112 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_COMPILER_PROXY_HISTOGRAM_H_
+#define DEVTOOLS_GOMA_CLIENT_COMPILER_PROXY_HISTOGRAM_H_
+
+#include <sstream>
+#include <vector>
+
+#include "basictypes.h"
+#include "histogram.h"
+#include "lockhelper.h"
+#include "threadpool_http_server.h"
+
+namespace devtools_goma {
+
+class CompileStats;
+class GomaHistograms;
+
+class CompilerProxyHistogram {
+ public:
+  enum HistogramItems {
+    // Stats from compiler_proxy Task
+    PendingTime,
+    CompilerInfoProcessTime,
+    IncludePreprocessTime,
+    IncludeProcessorWaitTime,
+    IncludeProcessorRunTime,
+    IncludeFileloadTime,
+    UploadingInputFile,
+    MissingInputFile,
+    // Time taken for HTTP RPC compiler_proxy is sending to stubby_proxy / GFE.
+    RPCCallTime,
+    FileResponseTime,
+    // Time taken for compiler_proxy to handle request.
+    CompilerProxyHandlerTime,
+
+    // Stats from protocol buffer reponse
+    GccReqSize,
+    GccRespSize,
+
+    ExecReqSize,
+    ExecReqRawSize,
+    ExecReqCompressionRatio,
+    ExecReqBuildTime,
+    ExecReqTime,
+    ExecReqKbps,
+    ExecWaitTime,
+    ExecRespSize,
+    ExecRespRawSize,
+    ExecRespCompressionRatio,
+    ExecRespTime,
+    ExecRespKbps,
+    ExecRespParseTime,
+
+    // Stats for FileService
+    InputFileTime,
+    InputFileSize,
+    InputFileKbps,
+    InputFileReqRawSize,
+    InputFileReqCompressionRatio,
+    OutputFileTime,
+    OutputFileSize,
+    ChunkRespSize,
+    OutputFileKbps,
+    OutputFileRespRawSize,
+    OutputFileRespCompressionRatio,
+
+    // Stats for subprocess
+    LocalDelayTime,
+    LocalPendingTime,
+    LocalRunTime,
+    LocalMemSize,
+    LocalOutputFileTime,
+    LocalOutputFileSize,
+
+    // Stats for ThreadpoolHttpServer
+    THSReqSize,
+    THSRespSize,
+    THSWaitingTime,
+    THSReadReqTime,
+    THSHandlerTime,
+    THSWriteRespTime,
+    NumCols
+  };
+
+  CompilerProxyHistogram();
+  ~CompilerProxyHistogram();
+
+  void UpdateThreadpoolHttpServerStat(
+      const ThreadpoolHttpServer::Stat& stat);
+  void UpdateCompileStat(const CompileStats& task);
+
+  int64_t GetStatMean(HistogramItems item) const;
+  double GetStatStandardDeviation(HistogramItems item) const;
+  void DumpString(std::ostringstream* ss);
+  void DumpToProto(GomaHistograms* hist);
+
+  void Reset();
+
+ private:
+  Lock mu_;
+  std::vector<Histogram> histogram_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompilerProxyHistogram);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_COMPILER_PROXY_HISTOGRAM_H_
diff --git a/client/content.cc b/client/content.cc
new file mode 100644
index 0000000..62cfe87
--- /dev/null
+++ b/client/content.cc
@@ -0,0 +1,106 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "content.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <memory>
+
+#ifndef _WIN32
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#else
+# include "config_win.h"
+#endif
+#include "scoped_fd.h"
+
+#include <glog/logging.h>
+
+namespace devtools_goma {
+
+// static
+std::unique_ptr<Content> Content::CreateFromFile(const string& filepath) {
+  ScopedFd fd(ScopedFd::OpenForRead(filepath));
+  if (!fd.valid())
+    return nullptr;
+  size_t len = 0;
+  if (!fd.GetFileSize(&len)) {
+    PLOG(ERROR) << "fd:" << fd << " filepath:" << filepath;
+    return nullptr;
+  }
+
+  return CreateFromFileDescriptor(filepath, fd, len);
+}
+
+// static
+std::unique_ptr<Content> Content::CreateFromFileDescriptor(
+    const string& filepath, const ScopedFd& fd, size_t filesize) {
+  DCHECK(fd.valid());
+
+  std::unique_ptr<char[]> buf(new char[filesize + 1]);
+  CHECK(buf.get() != nullptr) << "filepath:" << filepath
+                              << "filesize:" << filesize;
+  char* buf_end = buf.get() + filesize;
+  *buf_end = '\0';
+  size_t offset = 0;
+  while (offset < filesize) {
+    ssize_t actual_read = fd.Read(buf.get() + offset, filesize - offset);
+    if (actual_read <= 0) {
+      PLOG(ERROR) << "fd: " << fd << " filepath:" << filepath
+                  << " offset: " << offset
+                  << " actual_read: " << actual_read;
+      return nullptr;
+    }
+    offset += actual_read;
+  }
+
+  if (offset != filesize) {
+    LOG(ERROR) << "size mismatch filepath:" << filepath << " fd:" << fd
+               << " offset:" << offset << " filesize:" << filesize;
+    return CreateFromUnique(std::move(buf), offset);
+  }
+
+  return CreateFromUnique(std::move(buf), filesize);
+}
+
+// static
+std::unique_ptr<Content> Content::CreateFromString(const string& str) {
+  std::unique_ptr<char[]> buf(new char[str.length() + 1]);
+  CHECK(buf != nullptr);
+  memcpy(static_cast<void*>(buf.get()),
+         static_cast<const void*>(str.data()),
+         str.length());
+  return CreateFromUnique(std::move(buf), str.length());
+}
+
+// static
+std::unique_ptr<Content> Content::CreateFromContent(const Content& content) {
+  const size_t content_length = content.size();
+  return CreateFromBuffer(content.buf_.get(), content_length);
+}
+
+// static
+std::unique_ptr<Content> Content::CreateFromBuffer(const char* buffer,
+                                                   size_t len) {
+  std::unique_ptr<char[]> new_buffer(new char[len + 1]);
+  CHECK(new_buffer != nullptr);
+  memcpy(new_buffer.get(), buffer, len);
+  new_buffer[len] = '\0';
+  return CreateFromUnique(std::move(new_buffer), len);
+}
+
+// static
+std::unique_ptr<Content> Content::CreateFromUnique(
+    std::unique_ptr<const char[]> buffer, size_t len) {
+  const char* buffer_end = buffer.get() + len;
+  return std::unique_ptr<Content>(new Content(std::move(buffer), buffer_end));
+}
+
+}  // namespace devtools_goma
diff --git a/client/content.h b/client/content.h
new file mode 100644
index 0000000..51f2879
--- /dev/null
+++ b/client/content.h
@@ -0,0 +1,60 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CONTENT_H_
+#define DEVTOOLS_GOMA_CLIENT_CONTENT_H_
+
+#include <memory>
+#include <string>
+
+#include "basictypes.h"
+#include "string_piece.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class ScopedFd;
+
+class Content final {
+ public:
+  // Creates content from a file. nullptr will be returned if an error
+  // occured e.g. a file does not exist.
+  static std::unique_ptr<Content> CreateFromFile(const string& filepath);
+  static std::unique_ptr<Content> CreateFromString(const string& str);
+  static std::unique_ptr<Content> CreateFromContent(const Content& content);
+
+  // The content of |buf| is copied, so Content won't own |buf|.
+  // CreateFromBuffer allocates new memory in function.
+  // It is prefer to use CreateFromBuffer when
+  // |len| is smaller than size of |buf|.
+  static std::unique_ptr<Content> CreateFromBuffer(const char* buf, size_t len);
+  static std::unique_ptr<Content> CreateFromUnique(
+      std::unique_ptr<const char[]> buf, size_t len);
+
+  // Creates content from file descriptor. This method also takes |filesize|
+  // so that we can skip calling stat. The argument |filepath| is used for
+  // logging purpose.
+  static std::unique_ptr<Content> CreateFromFileDescriptor(
+      const string& filepath, const ScopedFd& fd, size_t filesize);
+
+  StringPiece ToStringPiece() const { return StringPiece(buf_.get(), size()); }
+  const char* buf() const { return buf_.get(); }
+  const char* buf_end() const { return buf_end_; }
+  size_t size() const { return buf_end() - buf(); }
+
+ private:
+  Content(std::unique_ptr<const char[]> buf, const char* buf_end)
+      : buf_(std::move(buf)), buf_end_(buf_end) {}
+
+  std::unique_ptr<const char[]> buf_;
+  const char* buf_end_;
+
+  DISALLOW_COPY_AND_ASSIGN(Content);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CONTENT_H_
diff --git a/client/content_cursor.cc b/client/content_cursor.cc
new file mode 100644
index 0000000..f1ccb09
--- /dev/null
+++ b/client/content_cursor.cc
@@ -0,0 +1,33 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content_cursor.h"
+
+#include <algorithm>
+
+namespace devtools_goma {
+
+int ContentCursor::GetChar() {
+  if (cur_ == buf_end()) {
+    return EOF;
+  }
+  return *cur_++;
+}
+
+bool ContentCursor::Advance(size_t n) {
+  if (cur_ + n <= buf_end()) {
+    cur_ = cur_ + n;
+    return true;
+  } else {
+    cur_ = buf_end();
+    return false;
+  }
+}
+
+bool ContentCursor::SkipUntil(char c) {
+  cur_ = std::find(cur_, buf_end(), c);
+  return cur_ != buf_end();
+}
+
+}  // namespace devtools_goma
diff --git a/client/content_cursor.h b/client/content_cursor.h
new file mode 100644
index 0000000..a2314ab
--- /dev/null
+++ b/client/content_cursor.h
@@ -0,0 +1,47 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CONTENT_CURSOR_H_
+#define DEVTOOLS_GOMA_CLIENT_CONTENT_CURSOR_H_
+
+#include <memory>
+
+#include "content.h"
+
+namespace devtools_goma {
+
+// ContentCursor is a cursor that runs on Content. It owns Content.
+class ContentCursor final {
+ public:
+  explicit ContentCursor(std::unique_ptr<Content> content)
+      : content_(std::move(content)),
+        cur_(content_->buf()) {
+  }
+
+  ContentCursor(const ContentCursor&) = delete;
+  void operator=(const ContentCursor&) = delete;
+
+  const char* buf() const { return content_->buf(); }
+  const char* buf_end() const { return content_->buf_end(); }
+  const char* cur() const { return cur_; }
+
+  int GetChar();
+
+  // Advance cursor in |n| characters.
+  // Returns true if it's possible.
+  // If |n| is too large, cursur will point the end of the buffer.
+  bool Advance(size_t n);
+
+  // Skip until |c| is found. Returns true if |c| is found.
+  // When true is returned, *cur() must be |c|.
+  bool SkipUntil(char c);
+
+ private:
+  std::unique_ptr<Content> content_;
+  const char* cur_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CONTENT_CURSOR_H_
diff --git a/client/content_cursor_unittest.cc b/client/content_cursor_unittest.cc
new file mode 100644
index 0000000..6bdafb7
--- /dev/null
+++ b/client/content_cursor_unittest.cc
@@ -0,0 +1,94 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content_cursor.h"
+
+#include <gtest/gtest.h>
+
+#include <cstring>
+#include <memory>
+
+namespace {
+
+std::unique_ptr<devtools_goma::Content> MakeNonNulTerminatedContent() {
+  std::unique_ptr<char[]> buf(new char[11]);
+  strcpy(buf.get(), "0123456789");
+
+  // Taking the first 5 characters only.
+  // Currently CreateFromUnique might make a Content that does not end with
+  // '\0'. The other methods add '\0'.
+  return devtools_goma::Content::CreateFromUnique(std::move(buf), 5);
+}
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+TEST(ContentCursorTest, Advance) {
+  ContentCursor c(Content::CreateFromString("0123456789"));
+
+  EXPECT_TRUE(c.Advance(3));
+  EXPECT_EQ(c.cur() - c.buf(), 3);
+
+  // Jump to the end. This is OK.
+  EXPECT_TRUE(c.Advance(7));
+  EXPECT_EQ(c.cur() - c.buf(), 10);
+
+  // Cannot overrun.
+  EXPECT_FALSE(c.Advance(1));
+  EXPECT_EQ(c.cur() - c.buf(), 10);
+
+  // Advancing 0 is allowed.
+  EXPECT_TRUE(c.Advance(0));
+  EXPECT_EQ(c.cur() - c.buf(), 10);
+}
+
+TEST(ContentCursorTest, SkipUntil) {
+  {
+    ContentCursor c(MakeNonNulTerminatedContent());
+    ASSERT_FALSE(c.SkipUntil('\0'));
+  }
+
+  {
+    ContentCursor c(MakeNonNulTerminatedContent());
+    EXPECT_TRUE(c.SkipUntil('2'));
+    EXPECT_EQ(*c.cur(), '2');
+
+    // Check |cur| index to avoid string comparison.
+    EXPECT_EQ(c.cur() - c.buf(), 2);
+  }
+
+  {
+    ContentCursor c(MakeNonNulTerminatedContent());
+    EXPECT_TRUE(c.SkipUntil('4'));
+    EXPECT_EQ(*c.cur(), '4');
+    EXPECT_EQ(c.cur() - c.buf(), 4);
+  }
+
+  // '5' should be out of range.
+  {
+    ContentCursor c(MakeNonNulTerminatedContent());
+    EXPECT_FALSE(c.SkipUntil('5'));
+    EXPECT_EQ(c.cur() - c.buf(), c.buf_end() - c.buf());
+  }
+
+  // '7' should be out of range.
+  {
+    ContentCursor c(MakeNonNulTerminatedContent());
+    EXPECT_FALSE(c.SkipUntil('7'));
+    EXPECT_EQ(c.cur() - c.buf(), c.buf_end() - c.buf());
+  }
+}
+
+TEST(ContentCursorTest, SkipUntilEvil) {
+  std::unique_ptr<char[]> buf(new char[11]);
+  strcpy(buf.get(), "0123456789");
+  buf[3] = '\0';  // \0 in the Content.
+
+  ContentCursor c(Content::CreateFromUnique(std::move(buf), 5));
+  EXPECT_TRUE(c.SkipUntil('4'));
+  EXPECT_EQ(c.cur() - c.buf(), 4);
+}
+
+}  // namespace devtools_goma
diff --git a/client/counterz.cc b/client/counterz.cc
new file mode 100644
index 0000000..8006ece
--- /dev/null
+++ b/client/counterz.cc
@@ -0,0 +1,57 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "counterz.h"
+
+#include <algorithm>
+
+#include <glog/logging.h>
+
+namespace devtools_goma {
+
+void CounterInfo::Dump(
+    std::string* name, int64_t* time_ns, int64_t* count) const {
+  *name = name_;
+  *count = counter_.value();
+  *time_ns = total_time_in_ns_.value();
+}
+
+void Counterz::DumpToJson(Json::Value* json) const {
+  *json = Json::Value(Json::objectValue);
+
+  AUTOLOCK(lock, &mu_);
+  for (size_t i = 0; i < counters_.size(); ++i) {
+    std::string name;
+    int64_t count, time_ns;
+    counters_[i]->Dump(&name, &time_ns, &count);
+    (*json)[name]["count"] = Json::Int64(count);
+
+    // TODO: human readable representation.
+    (*json)[name]["time(s)"] = Json::Value(time_ns / 1e9);
+    (*json)[name]["avg(ms)"] = Json::Value(
+        time_ns / 1e6 / std::max<int64_t>(count, 1));
+  }
+}
+
+Counterz* Counterz::instance_;
+
+/* static */
+void Counterz::Init() {
+  CHECK(instance_ == nullptr);
+  instance_ = new Counterz;
+}
+
+/* static */
+void Counterz::Quit() {
+  CHECK(instance_ != nullptr);
+  delete instance_;
+  instance_ = nullptr;
+}
+
+/* static */
+Counterz* Counterz::Instance() {
+  return instance_;
+}
+
+}  // namespace devtools_goma
diff --git a/client/counterz.h b/client/counterz.h
new file mode 100644
index 0000000..91bb14b
--- /dev/null
+++ b/client/counterz.h
@@ -0,0 +1,124 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_COUNTERZ_H_
+#define DEVTOOLS_GOMA_CLIENT_COUNTERZ_H_
+
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "atomic_stats_counter.h"
+#include "autolock_timer.h"
+#include "json/json.h"
+#include "lockhelper.h"
+
+namespace devtools_goma {
+
+class CounterInfo {
+ public:
+  CounterInfo(const char* const location,
+              const char* const funcname,
+              const char* const name) {
+    name_.reserve(strlen(location) + strlen(funcname) + strlen(name) + 3);
+    name_ += location;
+    name_ += "(";
+    name_ += funcname;
+    name_ += ":";
+    name_ += name;
+    name_ += ")";
+  }
+
+  void Inc(int64_t time_ns) {
+    counter_.Add(1);
+    total_time_in_ns_.Add(time_ns);
+  }
+
+  void Dump(std::string* name, int64_t* time_ns, int64_t* count) const;
+
+ private:
+  CounterInfo(const CounterInfo&) = delete;
+  CounterInfo& operator=(const CounterInfo&) = delete;
+
+  std::string name_;
+  StatsCounter counter_;
+  StatsCounter total_time_in_ns_;
+};
+
+class Counterz {
+ public:
+  void DumpToJson(Json::Value* json) const;
+
+  CounterInfo* NewCounterInfo(const char* const location,
+                              const char* const funcname,
+                              const char* const name) {
+    AUTOLOCK(lock, &mu_);
+    counters_.emplace_back(new CounterInfo(location, funcname, name));
+    return counters_.back().get();
+  }
+
+  static void Init();
+  static void Quit();
+  static Counterz* Instance();
+
+ private:
+  Counterz() {}
+  Counterz(const Counterz&) = delete;
+  Counterz& operator=(const Counterz&) = delete;
+
+  mutable Lock mu_;
+  std::vector<std::unique_ptr<CounterInfo>> counters_;
+
+  static Counterz* instance_;
+};
+
+// Takes walltime of scope and stores it to CounterInfo.
+class ScopedCounter {
+ public:
+  explicit ScopedCounter(CounterInfo* counter_info)
+      : counter_info_(counter_info), timer_(SimpleTimer::START) {
+  }
+
+  ~ScopedCounter() {
+    counter_info_->Inc(timer_.GetInNanoSeconds());
+  }
+
+ private:
+  ScopedCounter(const ScopedCounter&) = delete;
+  ScopedCounter& operator=(const ScopedCounter&) = delete;
+
+  CounterInfo* counter_info_;
+  SimpleTimer timer_;
+};
+
+// If HAVE_COUNTERZ is defined, counterz is enabled.
+#ifdef HAVE_COUNTERZ
+
+#define GOMA_COUNTERZ_STRINGFY(i) #i
+#define GOMA_COUNTERZ_STR(i) GOMA_COUNTERZ_STRINGFY(i)
+
+#define GOMA_COUNTERZ_CAT(a, b) a ## b
+#define GOMA_COUNTERZ_CONCAT(a, b) GOMA_COUNTERZ_CAT(a, b)
+#define GOMA_COUNTERZ_INFO_VAR_NAME(name) GOMA_COUNTERZ_CONCAT(name, __LINE__)
+
+#define GOMA_COUNTERZ(name)                                                \
+  static CounterInfo* GOMA_COUNTERZ_INFO_VAR_NAME(counter_info) =          \
+      Counterz::Instance() == nullptr ?                                    \
+          nullptr : Counterz::Instance()->NewCounterInfo(                  \
+              __FILE__ ":" GOMA_COUNTERZ_STR(__LINE__), __func__, (name)); \
+  ScopedCounter GOMA_COUNTERZ_INFO_VAR_NAME(scoped_counter)                \
+      (GOMA_COUNTERZ_INFO_VAR_NAME(counter_info));
+
+#else
+
+#define GOMA_COUNTERZ_N(name, n)
+#define GOMA_COUNTERZ(name)
+
+#endif  // HAVE_COUNTERZ
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_COUNTERZ_H_
diff --git a/client/cpp_input.h b/client/cpp_input.h
new file mode 100644
index 0000000..84fa720
--- /dev/null
+++ b/client/cpp_input.h
@@ -0,0 +1,51 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CPP_INPUT_H_
+#define DEVTOOLS_GOMA_CLIENT_CPP_INPUT_H_
+
+#include <memory>
+#include <string>
+
+#include "content.h"
+#include "cpp_input_stream.h"
+#include "include_guard_detector.h"
+
+namespace devtools_goma {
+
+class CppInput {
+ public:
+  CppInput(std::unique_ptr<Content> content, const FileId& fileid,
+           const string& filepath, const string& directory,
+           int include_dir_index)
+      : filepath_(filepath),
+        directory_(directory), include_dir_index_(include_dir_index),
+        stream_(std::move(content), fileid) {
+  }
+
+  const string& filepath() const { return filepath_; }
+  const string& directory() const { return directory_; }
+  const FileId& fileid() const { return stream_.fileid(); }
+  int include_dir_index() const { return include_dir_index_; }
+
+  CppInputStream* stream() { return &stream_; }
+
+  IncludeGuardDetector* include_guard_detector() {
+    return &include_guard_detector_;
+  }
+
+ private:
+  const string filepath_;
+  const string directory_;
+  const int include_dir_index_;
+
+  CppInputStream stream_;
+  IncludeGuardDetector include_guard_detector_;
+
+  DISALLOW_COPY_AND_ASSIGN(CppInput);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CPP_INPUT_H_
diff --git a/client/cpp_input_stream.cc b/client/cpp_input_stream.cc
new file mode 100644
index 0000000..6ce6d1a
--- /dev/null
+++ b/client/cpp_input_stream.cc
@@ -0,0 +1,88 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "cpp_input_stream.h"
+
+#include <glog/logging.h>
+
+#include "string_util.h"
+
+namespace devtools_goma {
+
+void CppInputStream::ConsumeChar() {
+  line_ += (*cur_ == '\n');
+  ++cur_;
+}
+
+size_t CppInputStream::GetLengthToCurrentFrom(
+    const char* from, int lastchar) const {
+  return cur_ - from - (lastchar == EOF ? 0 : 1);
+}
+
+void CppInputStream::Advance(int pos, int line) {
+  this->line_ += line;
+  cur_ += pos;
+}
+
+int CppInputStream::GetChar() {
+  DCHECK(cur_);
+  if (cur_ >= content_->buf_end())
+    return EOF;
+  line_ += (*cur_ == '\n');
+  return *cur_++;
+}
+
+int CppInputStream::GetCharWithBackslashHandling() {
+  int c = GetChar();
+  while (c == '\\') {
+    const char* prev = cur();
+    if (PeekChar() == '\r')
+      Advance(1, 0);
+    if (PeekChar() == '\n')
+      Advance(1, 1);
+    if (prev == cur())
+      return c;
+    c = GetChar();
+  }
+  return c;
+}
+
+void CppInputStream::UngetChar(int c) {
+  if (c != EOF) {
+    cur_--;
+    if (c == '\n')
+      line_--;
+  }
+}
+
+int CppInputStream::PeekChar() const {
+  DCHECK(cur_);
+  if (cur_ >= content_->buf_end())
+    return EOF;
+  return *cur_;
+}
+
+int CppInputStream::PeekChar(int offset) const {
+  DCHECK(cur_);
+  if (cur_ + offset >= content_->buf_end())
+    return EOF;
+  return *(cur_ + offset);
+}
+
+void CppInputStream::SkipWhiteSpaces() {
+  int c = GetChar();
+  while (IsCppBlank(c)) {
+    c = GetChar();
+    if (c == '\\') {
+      c = GetChar();
+      if (c == '\r')
+        c = GetChar();
+      if (c == '\n')
+        c = GetChar();
+    }
+  }
+  UngetChar(c);
+}
+
+}  // namespace devtools_goma
diff --git a/client/cpp_input_stream.h b/client/cpp_input_stream.h
new file mode 100644
index 0000000..61642ca
--- /dev/null
+++ b/client/cpp_input_stream.h
@@ -0,0 +1,51 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CPP_INPUT_STREAM_H_
+#define DEVTOOLS_GOMA_CLIENT_CPP_INPUT_STREAM_H_
+
+#include <memory>
+
+#include "content.h"
+#include "file_id.h"
+
+namespace devtools_goma {
+
+class CppInputStream {
+ public:
+  explicit CppInputStream(
+      std::unique_ptr<Content> content, const FileId& fileid)
+      : content_(std::move(content)), cur_(content_->buf()), line_(1),
+        fileid_(fileid) {}
+
+  CppInputStream(const CppInputStream&) = delete;
+  void operator=(const CppInputStream&) = delete;
+
+  int line() const { return line_; }
+  const char* cur() const { return cur_; }
+  const char* begin() const { return content_->buf(); }
+  const char* end() const { return content_->buf_end(); }
+  size_t pos() const { return cur_ - content_->buf(); }
+  const FileId& fileid() const { return fileid_; }
+
+  void ConsumeChar();
+  size_t GetLengthToCurrentFrom(const char* from, int lastchar) const;
+  void Advance(int pos, int line);
+  int GetChar();
+  int GetCharWithBackslashHandling();
+  void UngetChar(int c);
+  int PeekChar() const;
+  int PeekChar(int offset) const;
+  void SkipWhiteSpaces();
+
+ private:
+  std::unique_ptr<Content> content_;
+  const char* cur_;
+  int line_;
+  const FileId fileid_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CPP_INPUT_STREAM_H_
diff --git a/client/cpp_macro.cc b/client/cpp_macro.cc
new file mode 100644
index 0000000..37c20ba
--- /dev/null
+++ b/client/cpp_macro.cc
@@ -0,0 +1,89 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "cpp_macro.h"
+
+#include "autolock_timer.h"
+
+namespace devtools_goma {
+
+namespace {
+
+Lock mu_;
+std::vector<std::unique_ptr<MacroEnv>>* macro_env_cache_ GUARDED_BY(mu_);
+
+}  // anonymous namespace
+
+string Macro::DebugString(CppParser* parser, const string& name) const {
+  string str;
+  str.reserve(64);
+  str.append("Macro[");
+  str.append(name);
+  switch (type) {
+    case OBJ:
+      str.append("(OBJ)]");
+      break;
+    case FUNC:
+      str.append("(FUNC, args:");
+      str.append(std::to_string(num_args));
+      if (is_vararg) {
+        str.append(", vararg");
+      }
+      str.append(")]");
+      break;
+    case CBK:
+      str.append("(CALLBACK)]");
+      break;
+    case CBK_FUNC:
+      str.append("(CALLBACK_FUNC)]");
+      break;
+    case UNDEFINED:
+      str.append("(UNDEFINED)]");
+      break;
+    case UNUSED:
+      str.append("(UNUSED)]");
+      break;
+  }
+  str.append(" => ");
+  if (callback) {
+    str.append((parser->*callback)().DebugString());
+  } else {
+    for (const auto& iter : replacement) {
+      str.append(iter.DebugString());
+    }
+  }
+  return str;
+}
+
+void InitMacroEnvCache() {
+  AUTOLOCK(lock, &mu_);
+  CHECK(macro_env_cache_ == nullptr);
+  macro_env_cache_ = new std::vector<std::unique_ptr<MacroEnv>>();
+}
+
+void QuitMacroEnvCache() {
+  AUTOLOCK(lock, &mu_);
+  delete macro_env_cache_;
+  macro_env_cache_ = nullptr;
+}
+
+std::unique_ptr<MacroEnv> GetMacroEnvFromCache() {
+  AUTOLOCK(lock, &mu_);
+  if (macro_env_cache_ == nullptr || macro_env_cache_->empty()) {
+    return std::unique_ptr<MacroEnv>(new MacroEnv);
+  }
+  auto macro = std::move(macro_env_cache_->back());
+  macro_env_cache_->pop_back();
+  return macro;
+}
+
+void ReleaseMacroEnvToCache(std::unique_ptr<MacroEnv> macro) {
+  AUTOLOCK(lock, &mu_);
+  if (macro_env_cache_ == nullptr) {
+    return;
+  }
+  macro_env_cache_->push_back(std::move(macro));
+}
+
+}  // namespace devtools_goma
diff --git a/client/cpp_macro.h b/client/cpp_macro.h
new file mode 100644
index 0000000..235ff3f
--- /dev/null
+++ b/client/cpp_macro.h
@@ -0,0 +1,95 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CPP_MACRO_H_
+#define DEVTOOLS_GOMA_CLIENT_CPP_MACRO_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "cpp_token.h"
+#include "file_id.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class CppParser;
+
+// There are two types of macros:
+// 1. Object-like macro (OBJ):
+//  # define identifier [space] replacement-list [NL]
+//
+// 2. Function-like macro (FUNC):
+//  # define identifier '(' [identifier-list] ')' replacement-list [NL]
+//  # define identifier '(' ... ')' replacement-list [NL]
+//  # define identifier '(' identifier-list, ... ')' replacement-list [NL]
+//
+// CALLBACK and CALLBACK_FUNC types are internal macro types that are used
+// for predefined macros (obj-like and func-like macros) that need to be
+// evaluated at macro expansion time.
+struct Macro {
+  using Token = CppToken;
+  using ArrayTokenList = std::vector<Token>;
+  typedef Token (CppParser::*CallbackObj)();
+  typedef Token (CppParser::*CallbackFunc)(const ArrayTokenList&);
+  enum Type {
+    // UNDEFINED represents macro is referenced without define,
+    // or deleted by #undef
+    UNDEFINED,
+    OBJ,
+    FUNC,
+    CBK,
+    CBK_FUNC,
+    // UNUSED represents macro in macro cache is not referenced and defined in
+    // current preprocessing.
+    UNUSED,
+  };
+  explicit Macro(int id)
+      : id(id), type(UNDEFINED), callback(NULL),
+        callback_func(NULL), num_args(0), is_vararg(false),
+        has_identifier_in_replacement(false), macro_pos(0) {}
+  Macro(int id, Type type)
+      : id(id), type(type),
+        callback(NULL), callback_func(NULL), num_args(0), is_vararg(false),
+        has_identifier_in_replacement(false), macro_pos(0) {}
+
+  bool IsMatch(const FileId& fid, size_t pos) const {
+    return fileid.IsValid() && fid == fileid && pos == macro_pos;
+  }
+  string DebugString(CppParser* parser, const string& name) const;
+
+  int id;
+  Type type;
+  ArrayTokenList replacement;
+  CallbackObj callback;
+  CallbackFunc callback_func;
+  size_t num_args;
+  bool is_vararg;
+  bool has_identifier_in_replacement;
+
+  // fileid and macro_pos represent position and fileid of file
+  // that macro is defined. This is used to check validness of cached macro.
+  FileId fileid;
+  size_t macro_pos;
+};
+
+
+// MacroEnv is a map from macro name to macro. It includes parsed macro set.
+// CppParser will take one instance of MacroEnv.
+// At first, type of each macro is UNUSED, but it's updated while parsing.
+// Before returning this to macro env pool,
+// every macro type is marked as UNUSED.
+using MacroEnv = std::unordered_map<string, Macro>;
+
+void InitMacroEnvCache();
+void QuitMacroEnvCache();
+
+std::unique_ptr<MacroEnv> GetMacroEnvFromCache();
+void ReleaseMacroEnvToCache(std::unique_ptr<MacroEnv> macro);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CPP_MACRO_H_
diff --git a/client/cpp_parser.cc b/client/cpp_parser.cc
new file mode 100644
index 0000000..7773c05
--- /dev/null
+++ b/client/cpp_parser.cc
@@ -0,0 +1,2340 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "cpp_parser.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <memory>
+#include <set>
+
+#include "compiler_info.h"
+#include "compiler_specific.h"
+#include "content.h"
+#include "counterz.h"
+#include "cpp_input.h"
+#include "cpp_macro.h"
+#include "cpp_tokenizer.h"
+#include "file_id.h"
+#include "include_guard_detector.h"
+#include "ioutil.h"
+#include "lockhelper.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "static_darray.h"
+#include "string_piece.h"
+#include "string_piece_utils.h"
+#include "string_util.h"
+#include "util.h"
+
+namespace {
+
+static const int kIncludeFileDepthLimit = 1024;
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+#include "cpp_parser_darray.h"
+
+
+// CppParser::PragmaOnceFileSet ----------------------------------------
+
+void CppParser::PragmaOnceFileSet::Insert(const std::string& file) {
+  files_.insert(PathResolver::ResolvePath(file));
+}
+
+bool CppParser::PragmaOnceFileSet::Has(const std::string& file) const {
+  return files_.find(PathResolver::ResolvePath(file)) != files_.end();
+}
+
+// CppParser::IntegerConstantEvaluator ---------------------------------
+
+class CppParser::IntegerConstantEvaluator {
+ public:
+  IntegerConstantEvaluator(
+      const ArrayTokenList& tokens,
+      CppParser* parser)
+      : tokens_(tokens),
+        iter_(tokens.begin()),
+        parser_(parser) {
+    CHECK(parser_);
+    VLOG(2) << parser_->DebugStringPrefix() << " Evaluating: "
+            << DebugString(TokenList(tokens.begin(), tokens.end()));
+  }
+
+  int GetValue() {
+    return Conditional();
+  }
+
+ private:
+  int Conditional() {
+    int v1 = Expression(Primary(), 0);
+    while (iter_ != tokens_.end()) {
+      if (iter_->IsPuncChar('?')) {
+        ++iter_;
+        int v2 = Conditional();
+        if (iter_ == tokens_.end() || !iter_->IsPuncChar(':')) {
+          parser_->Error("syntax error: missing ':' in ternary operation");
+          return 0;
+        }
+        ++iter_;
+        int v3 = Conditional();
+        return v1 ? v2 : v3;
+      }
+      break;
+    }
+    return v1;
+  }
+
+  int Expression(int v1, int min_precedence) {
+    while (iter_ != tokens_.end() &&
+           iter_->IsOperator() &&
+           iter_->GetPrecedence() >= min_precedence) {
+      const Token& op = *iter_++;
+      int v2 = Primary();
+      while (iter_ != tokens_.end() &&
+             iter_->IsOperator() &&
+             iter_->GetPrecedence() > op.GetPrecedence()) {
+        v2 = Expression(v2, iter_->GetPrecedence());
+      }
+      v1 = op.ApplyOperator(v1, v2);
+    }
+    return v1;
+  }
+
+  int Primary() {
+    int result = 0;
+    int sign = 1;
+    while (iter_ != tokens_.end()) {
+      const Token& token = *iter_++;
+      switch (token.type) {
+        case Token::IDENTIFIER:
+          // If it comes to here without expanded to number, it means
+          // identifier is not defined.  Such case should be 0 unless
+          // it is the C++ reserved keyword "true".
+          if (parser_->is_cplusplus() && token.string_value == "true") {
+            // Int value of C++ reserved keyword "true" is 1.
+            // See: ISO/IEC 14882:2011 (C++11) 4.5 Integral promotions.
+            result = 1;
+          }
+          break;
+        case Token::NUMBER:
+          result = token.v.int_value;
+          break;
+        case Token::SUB:
+          sign = 0 - sign;
+          continue;
+        case Token::ADD:
+          continue;
+        case Token::PUNCTUATOR:
+          switch (token.v.char_value.c) {
+            case '(':
+              result = GetValue();
+              if (iter_ != tokens_.end() && iter_->IsPuncChar(')')) {
+                ++iter_;
+              }
+              break;
+            case '!':
+              return !Primary();
+            case '~':
+              return ~Primary();
+            default: {
+              parser_->Error("unknown unary operator: ", token.DebugString());
+              break;
+            }
+          }
+          break;
+        default:
+          break;
+      }
+      break;
+    }
+    return sign * result;
+  }
+
+  const ArrayTokenList& tokens_;
+  ArrayTokenList::const_iterator iter_;
+  CppParser* parser_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntegerConstantEvaluator);
+};
+
+// CppParser -----------------------------------------------------------
+
+// static
+#ifndef _WIN32
+pthread_once_t CppParser::key_once_ = PTHREAD_ONCE_INIT;
+#else
+INIT_ONCE CppParser::key_once_;
+#endif
+
+bool CppParser::global_initialized_ = false;
+CppParser::PredefinedObjMacroMap* CppParser::predefined_macros_ = nullptr;
+CppParser::PredefinedFuncMacroMap* CppParser::predefined_func_macros_ = nullptr;
+
+const CppParser::DirectiveHandler CppParser::kDirectiveTable[] = {
+  &CppParser::ProcessInclude,
+  &CppParser::ProcessImport,
+  &CppParser::ProcessIncludeNext,
+  &CppParser::ProcessDefine,
+  &CppParser::ProcessUndef,
+  &CppParser::ProcessIfdef,
+  &CppParser::ProcessIfndef,
+  &CppParser::ProcessIf,
+  &CppParser::ProcessElse,
+  &CppParser::ProcessEndif,
+  &CppParser::ProcessElif,
+  &CppParser::ProcessPragma,
+};
+
+const CppParser::DirectiveHandler CppParser::kFalseConditionDirectiveTable[] = {
+  nullptr, nullptr, nullptr,  // include, import, include_next
+  nullptr, nullptr,        // define, undef
+  &CppParser::ProcessConditionInFalse,
+  &CppParser::ProcessConditionInFalse,
+  &CppParser::ProcessConditionInFalse,
+  &CppParser::ProcessElse,
+  &CppParser::ProcessEndif,
+  &CppParser::ProcessElif,
+  nullptr,              // pragma
+};
+
+COMPILE_ASSERT(arraysize(CppParser::kDirectiveTable) ==
+               arraysize(kDirectiveKeywords),
+               directives_keywords_handler_mismatch);
+COMPILE_ASSERT(arraysize(CppParser::kDirectiveTable) ==
+               arraysize(CppParser::kFalseConditionDirectiveTable),
+               directives_array_size_mismatch);
+
+CppParser::CppParser()
+    : condition_in_false_depth_(0),
+      counter_(0),
+      is_cplusplus_(false),
+      next_macro_id_(0),
+      bracket_include_dir_index_(kIncludeDirIndexStarting),
+      include_observer_(nullptr),
+      error_observer_(nullptr),
+      compiler_info_(nullptr),
+      is_vc_(false),
+      disabled_(false),
+      skipped_files_(0),
+      total_files_(0),
+      obj_cache_hit_(0),
+      func_cache_hit_(0),
+      owner_thread_id_(GetCurrentThreadId()) {
+  char buf[26];
+  time_t tm;
+  time(&tm);
+#ifndef _WIN32
+  ctime_r(&tm, buf);
+#else
+  // All Windows CRT functions are thread-safe if use /MT or /MD in compile
+  // options.
+  ctime_s(buf, 26, &tm);
+#endif
+  current_time_ = string(&buf[11], 8);
+  current_date_ = string(&buf[4], 7) + string(&buf[20], 4);
+#ifndef _WIN32
+  pthread_once(&key_once_, InitializeStaticOnce);
+#else
+  InitOnceExecuteOnce(&key_once_, CppParser::InitializeWinOnce,
+                      nullptr, nullptr);
+#endif
+  // Push empty input as a sentinel.
+  last_input_.reset(
+      new Input(std::unique_ptr<Content>(Content::CreateFromString("")),
+                FileId(), "<empty>", "<empty>", -1));
+  macros_ = GetMacroEnvFromCache();
+}
+
+CppParser::~CppParser() {
+  DCHECK(THREAD_ID_IS_SELF(owner_thread_id_));
+  while (!inputs_.empty())
+    PopInput();
+  for (auto* p : used_macros_) {
+    p->type = Macro::UNUSED;
+  }
+  ReleaseMacroEnvToCache(std::move(macros_));
+}
+
+void CppParser::SetCompilerInfo(const CompilerInfo* compiler_info) {
+  compiler_info_ = compiler_info;
+  if (compiler_info_ == nullptr)
+    return;
+
+  AddStringInput(compiler_info_->predefined_macros(), "(predefined)");
+  ProcessDirectives();
+
+  enabled_predefined_macros_ = compiler_info->supported_predefined_macros();
+  set_is_cplusplus(compiler_info_->lang() == "c++");
+}
+
+bool CppParser::ProcessDirectives() {
+  if (disabled_)
+    return false;
+  for (;;) {
+    int directive = NextDirective();
+    if (directive < 0) {
+      break;
+    }
+    DCHECK(directive < static_cast<int>(arraysize(kDirectiveKeywords)));
+    VLOG(2) << DebugStringPrefix() << " Directive:"
+            << kDirectiveKeywords[directive];
+    if (CurrentCondition()) {
+      (this->*(kDirectiveTable[directive]))();
+    } else {
+      (this->*(kFalseConditionDirectiveTable[directive]))();
+    }
+  }
+  return !disabled_;
+}
+
+void CppParser::UngetToken(const Token& token) {
+  last_token_ = token;
+}
+
+int CppParser::NextDirective() {
+  while (HasMoreInput()) {
+    std::string error_reason;
+    if (!CppTokenizer::SkipUntilDirective(input()->stream(), &error_reason)) {
+      // When no directive was found, false is returned. It's not an error.
+      // In this case, |error_reason| is empty.
+      if (!error_reason.empty()) {
+        Error(error_reason);
+      }
+      PopInput();
+      if (HasMoreInput()) {
+        continue;
+      }
+      return -1;
+    }
+    Input* current = input();
+    current->stream()->SkipWhiteSpaces();
+    const StaticDoubleArray& darray =
+        CurrentCondition() ? kDirectiveArray : kConditionalDirectiveArray;
+    StaticDoubleArray::LookupHelper helper(&darray);
+    int value = -1;
+    for (;;) {
+      int c = current->stream()->GetCharWithBackslashHandling();
+      if (c == EOF) {
+        value = helper.GetValue();
+        break;
+      }
+      if (!IsAsciiAlphaDigit(c) && c != '_') {
+        current->stream()->UngetChar(c);
+        value = helper.GetValue();
+        break;
+      }
+      if (!helper.Lookup(static_cast<char>(c)))
+        break;
+    }
+    if (value >= 0) {
+      return value;
+    }
+    continue;
+  }
+  return -1;
+}
+
+void CppParser::AddMacroByString(const string& name, const string& body) {
+  string macro = name + (body.empty() ? "" : " ") + body + '\n';
+  AddStringInput(macro, "(macro)");
+  ProcessDefine();
+}
+
+void CppParser::DeleteMacro(const string& name) {
+  if (predefined_macros_->find(name) != predefined_macros_->end() ||
+      predefined_func_macros_->find(name) != predefined_func_macros_->end()) {
+    Error("predefined macro cannot be deleted:", name);
+    return;
+  }
+  VLOG(2) << "#UNDEF Macro " << name;
+  unordered_map<string, Macro>::iterator found = macros_->find(name);
+
+  if (found == macros_->end() || found->second.type == Macro::UNUSED ||
+      found->second.type == Macro::UNDEFINED) {
+    return;
+  }
+
+  found->second.type = Macro::UNDEFINED;
+}
+
+bool CppParser::HasMacro(const string& name) {
+  return GetMacro(name, false) != nullptr;
+}
+
+bool CppParser::IsMacroDefined(const string& name) {
+  Macro* m = GetMacro(name, false);
+  if (m == nullptr || m->type == Macro::UNUSED || m->type == Macro::UNDEFINED) {
+    return false;
+  }
+  // Hack for GCC 5.
+  // e.g. __has_include__ is not defined but callable.
+  if (m->type == Macro::CBK_FUNC && IsHiddenPredefinedMacro(name)) {
+    return false;
+  }
+  return true;
+}
+
+void CppParser::AddStringInput(const string& content, const string& pathname) {
+  if (inputs_.size() >= kIncludeFileDepthLimit) {
+    LOG(ERROR) << "Exceed include depth limit: " << kIncludeFileDepthLimit
+               << " pathname: " << pathname;
+    disabled_ = true;
+    return;
+  }
+  inputs_.emplace_back(
+      new Input(std::unique_ptr<Content>(Content::CreateFromString(content)),
+                FileId(), pathname, "(string)",
+                kCurrentDirIncludeDirIndex));
+}
+
+void CppParser::AddFileInput(
+    std::unique_ptr<Content> fp, const FileId& fileid, const string& filepath,
+    const string& directory, int include_dir_index) {
+  if (inputs_.size() >= kIncludeFileDepthLimit) {
+    LOG(ERROR) << "Exceeds include depth limit: " << kIncludeFileDepthLimit
+               << " filepath: " << filepath;
+    disabled_ = true;
+    return;
+  }
+
+  DCHECK(fp);
+  DCHECK_GE(include_dir_index, kCurrentDirIncludeDirIndex);
+  if (base_file_.empty())
+    base_file_ = filepath;
+  inputs_.emplace_back(new Input(std::move(fp), fileid, filepath, directory,
+                                 include_dir_index));
+  VLOG(2) << "Including file: " << filepath;
+}
+
+string CppParser::DumpMacros() {
+  std::stringstream ss;
+  for (const auto& iter : *macros_) {
+    ss << iter.second.DebugString(this, iter.first) << std::endl;
+  }
+  return ss.str();
+}
+
+/* static */
+string CppParser::DebugString(const TokenList& tokens) {
+  return DebugString(tokens.begin(), tokens.end());
+}
+
+/* static */
+string CppParser::DebugString(TokenList::const_iterator begin,
+                              TokenList::const_iterator end) {
+  string str;
+  for (auto iter = begin; iter != end; ++iter) {
+    str.append(iter->DebugString());
+  }
+  return str;
+}
+
+string CppParser::DebugStringPrefix() {
+  string str;
+  str.reserve(input()->filepath().size() + 32);
+  str.append("(");
+  str.append(input()->filepath());
+  str.append(":");
+  str.append(std::to_string(input()->stream()->line()));
+  str.append(")");
+  return str;
+}
+
+void CppParser::Error(StringPiece error) {
+  if (!error_observer_)
+    return;
+  Error(error, "");
+}
+
+void CppParser::Error(StringPiece error, StringPiece arg) {
+  if (!error_observer_)
+    return;
+  string str;
+  str.reserve(error.size() + input()->filepath().size() + 100);
+  str.append("CppParser");
+  str.append(DebugStringPrefix());
+  str.append(" ");
+  error.AppendToString(&str);
+  arg.AppendToString(&str);
+  error_observer_->HandleError(str);
+}
+
+void CppParser::ProcessInclude() {
+  GOMA_COUNTERZ("include");
+  input()->include_guard_detector()->OnProcessOther();
+  ProcessIncludeInternal(kTypeInclude);
+}
+
+void CppParser::ProcessImport() {
+  GOMA_COUNTERZ("import");
+  input()->include_guard_detector()->OnProcessOther();
+  if (!is_vc_) {
+    // For gcc, #import means include only-once.
+    // http://gcc.gnu.org/onlinedocs/gcc-3.2/cpp/Obsolete-once-only-headers.html
+
+    // For Objective-C, #import means include only-once.
+    // https://developer.apple.com/library/mac/documentation/MacOSX/Conceptual/BPFrameworks/Tasks/IncludingFrameworks.html
+    // > If you are working in Objective-C, you may use the #import directive
+    // instead of the #include directive. The two directives have the same
+    // basic results. but the #import directive guarantees that the same
+    // header file is never included more than once.
+    ProcessIncludeInternal(kTypeImport);
+    return;
+  }
+  // For VC++, #import is used to incorporate information from a type library.
+  // http://msdn.microsoft.com/en-us/library/8etzzkb6(v=vs.71).aspx
+  LOG(WARNING) << DebugStringPrefix() << " #import used, "
+               << "but goma couldn't handle it yet. "
+               << "See b/9286087";
+  disabled_ = true;
+}
+
+void CppParser::ProcessIncludeNext() {
+  GOMA_COUNTERZ("include_next");
+  input()->include_guard_detector()->OnProcessOther();
+  ProcessIncludeInternal(kTypeIncludeNext);
+}
+
+void CppParser::ProcessDefine() {
+  input()->include_guard_detector()->OnProcessOther();
+  Token name = NextToken(true);
+  if (name.type != Token::IDENTIFIER) {
+    Error("invalid preprocessing macro name token: ", name.DebugString());
+    return;
+  }
+  Token token = NextToken(false);
+  if (token.IsPuncChar('(')) {
+    ReadFunctionMacro(name.string_value);
+  } else {
+    if (token.type == Token::NEWLINE || token.type == Token::END) {
+      const auto pos = input()->stream()->pos();
+      const auto& fileid = input()->fileid();
+      Macro* macro = AddMacro(name.string_value, Macro::OBJ, fileid, pos).first;
+      VLOG(2) << DebugStringPrefix() << " #DEFINE "
+              << macro->DebugString(this, name.string_value);
+      return;
+    }
+    if (token.type != Token::SPACE) {
+      Error("missing whitespace after macro name");
+      UngetToken(token);
+    }
+    ReadObjectMacro(name.string_value);
+  }
+}
+
+void CppParser::ProcessUndef() {
+  input()->include_guard_detector()->OnProcessOther();
+  Token name = NextToken(true);
+  if (name.type != Token::IDENTIFIER) {
+    Error("invalid preprocessing macro name token ", name.DebugString());
+    return;
+  }
+  DeleteMacro(name.string_value);
+}
+
+void CppParser::ProcessConditionInFalse() {
+  input()->include_guard_detector()->OnProcessCondition();
+  ++condition_in_false_depth_;
+}
+
+void CppParser::ProcessIfdef() {
+  input()->include_guard_detector()->OnProcessCondition();
+  bool v = IsMacroDefined(ReadDefined());
+  VLOG(2) << DebugStringPrefix() << " #IFDEF " << v;
+  conditions_.push_back(Condition(v));
+}
+
+void CppParser::ProcessIfndef() {
+  string ident = ReadDefined();
+  input()->include_guard_detector()->OnProcessIfndef(ident);
+  bool v = !IsMacroDefined(ident);
+  VLOG(2) << DebugStringPrefix() << " #IFNDEF " << v;
+  conditions_.push_back(Condition(v));
+}
+
+void CppParser::ProcessIf() {
+  string ident;
+  int v = ReadConditionWithCheckingIncludeGuard(&ident);
+  input()->include_guard_detector()->OnProcessIf(ident);
+  VLOG(2) << DebugStringPrefix() << " #IF " << v;
+  conditions_.push_back(Condition(v != 0));
+}
+
+void CppParser::ProcessElse() {
+  input()->include_guard_detector()->OnProcessOther();
+  if (condition_in_false_depth_ > 0) {
+    return;
+  }
+  if (conditions_.empty()) {
+    Error("stray else");
+    return;
+  }
+  conditions_.back().cond = (!conditions_.back().cond &&
+                             !conditions_.back().taken);
+}
+
+void CppParser::ProcessEndif() {
+  input()->include_guard_detector()->OnProcessEndif();
+  if (condition_in_false_depth_) {
+    --condition_in_false_depth_;
+    return;
+  }
+  if (conditions_.empty()) {
+    Error("stray endif");
+    return;
+  }
+  conditions_.pop_back();
+}
+
+void CppParser::ProcessElif() {
+  input()->include_guard_detector()->OnProcessOther();
+  if (condition_in_false_depth_ > 0) {
+    return;
+  }
+  if (conditions_.empty()) {
+    Error("stray elif");
+    return;
+  }
+  if (conditions_.back().taken) {
+    conditions_.back().cond = false;
+    return;
+  }
+  int v = ReadCondition();
+  VLOG(2) << DebugStringPrefix() << " #ELIF " << v;
+  conditions_.back().cond = (v != 0);
+  conditions_.back().taken |= (v != 0);
+}
+
+void CppParser::ProcessPragma() {
+  input()->include_guard_detector()->OnProcessOther();
+  Token token(NextToken(true));
+  if (token.type == Token::IDENTIFIER && token.string_value == "once") {
+    pragma_once_fileset_.Insert(input()->filepath());
+  }
+}
+
+CppParser::Token CppParser::NextToken(bool skip_space) {
+  if (last_token_.type != Token::END) {
+    Token token = last_token_;
+    last_token_ = Token();
+    VLOG(3) << token.DebugString();
+    return token;
+  }
+  while (HasMoreInput()) {
+    Token token;
+    std::string error_reason;
+    if (!CppTokenizer::NextTokenFrom(input()->stream(), skip_space,
+                                     &token, &error_reason)) {
+      if (!error_reason.empty()) {
+        Error(error_reason);
+      }
+    }
+
+    if (token.type != Token::END) {
+      VLOG(3) << token.DebugString();
+      return token;
+    }
+    PopInput();
+  }
+  return Token(Token::END);
+}
+
+void CppParser::ProcessIncludeInternal(IncludeType include_type) {
+  input()->stream()->SkipWhiteSpaces();
+  int c;
+  if (!HasMoreInput() || (c = input()->stream()->GetChar()) == EOF) {
+    Error("missing include path");
+    return;
+  }
+  const char* directive = "";
+  switch (include_type) {
+    case kTypeInclude: directive = "include"; break;
+    case kTypeImport: directive = "import"; break;
+    case kTypeIncludeNext: directive = "include_next"; break;
+    default:
+      LOG(FATAL) << "unknown include_type=" << include_type;
+  }
+  // Simple <filepath> case.
+  if (c == '<') {
+    string path;
+    string error_reason;
+    if (!CppTokenizer::ReadStringUntilDelimiter(input()->stream(), &path,
+                                                '>', &error_reason)) {
+      Error(error_reason);
+    }
+    if (!path.empty() && include_observer_) {
+      int next_index = bracket_include_dir_index_;
+      if (include_type == kTypeIncludeNext) {
+        next_index = input()->include_dir_index() + 1;
+      }
+      // We should not find the current directory (without specifying by -I).
+      DCHECK_GE(next_index, bracket_include_dir_index_)
+          << ' ' << input()->include_dir_index();
+      if (!include_observer_->HandleInclude(
+              path, input()->directory(), input()->filepath(), '<',
+              next_index)) {
+        LOG(WARNING) << "HandleInclude failed #" << directive
+                     << " <" << path << ">"
+                     << " from " << input()->filepath()
+                     << " [dir:" << input()->directory()
+                     << " index:" << input()->include_dir_index() << "]";
+        return;
+      }
+      if (include_type == kTypeImport) {
+        DCHECK(!inputs_.empty());
+        const string& filepath = inputs_.back()->filepath();
+        pragma_once_fileset_.Insert(filepath);
+        VLOG(1) << "HandleInclude #import " << filepath;
+      }
+    }
+    return;
+  }
+  // Simple "filepath" case.
+  if (c == '"') {
+    string path;
+    string error_reason;
+    if (!CppTokenizer::ReadStringUntilDelimiter(input()->stream(), &path,
+                                                '"', &error_reason)) {
+      Error(error_reason);
+    }
+    if (!path.empty() && include_observer_) {
+      int quote_char = c;
+      int next_index = input()->include_dir_index();
+      if (include_type == kTypeIncludeNext) {
+        quote_char = '<';
+        ++next_index;
+      }
+      if (!include_observer_->HandleInclude(
+              path, input()->directory(), input()->filepath(), quote_char,
+              next_index)) {
+        LOG(WARNING) << "HandleInclude failed #" << directive
+                     << " \"" << path << "\""
+                     << " from " << input()->filepath()
+                     << " [dir:" << input()->directory()
+                     << " index:" << input()->include_dir_index() << "]";
+        return;
+      }
+      if (include_type == kTypeImport) {
+        DCHECK(!inputs_.empty());
+        const string& filepath = inputs_.back()->filepath();
+        pragma_once_fileset_.Insert(filepath);
+        VLOG(1) << "HandleInclude #import " << filepath;
+      }
+    }
+    return;
+  }
+  input()->stream()->UngetChar(c);
+
+  // Include path is neither <filepath> nor "filepath".
+  // Try expanding macros if there are any.
+  ArrayTokenList tokens;
+  Token token = NextToken(true);
+  while (token.type != Token::END && token.type != Token::NEWLINE) {
+    tokens.push_back(token);
+    token = NextToken(false);
+  }
+
+  ArrayTokenList expanded;
+  Expand0(tokens, &expanded, false);
+
+  if (expanded.empty()) {
+    Error("#include expects \"filename\" or <filename>");
+    LOG(WARNING) << "HandleInclude empty arg for #" << directive;
+    return;
+  }
+
+  // See if the expanded token(s) is <filepath> or "filepath".
+  token = expanded.front();
+  if (token.type == Token::LT) {
+    string path;
+    auto iter = expanded.begin();
+    ++iter;
+    for (; iter != expanded.end() && iter->type != Token::GT; ++iter) {
+      path.append(iter->GetCanonicalString());
+    }
+    int next_index = bracket_include_dir_index_;
+    if (include_type == kTypeIncludeNext) {
+      next_index = input()->include_dir_index() + 1;
+      DCHECK_GE(next_index, bracket_include_dir_index_);
+    }
+    if (include_observer_) {
+      if (!include_observer_->HandleInclude(
+              path, input()->directory(), input()->filepath(), '<',
+              next_index)) {
+        LOG(WARNING) << "HandleInclude failed #" << directive
+                     << " <" << path << ">"
+                     << " from " << input()->filepath()
+                     << " [dir:" << input()->directory()
+                     << " index:" << input()->include_dir_index() << "]";
+        return;
+      }
+      if (include_type == kTypeImport) {
+        DCHECK(!inputs_.empty());
+        const string& filepath = inputs_.back()->filepath();
+        pragma_once_fileset_.Insert(filepath);
+        VLOG(1) << "HandleInclude #import " << filepath;
+      }
+    }
+    return;
+  }
+  if (token.type == Token::STRING) {
+    if (include_observer_) {
+      int quote_char = '"';
+      int next_index = input()->include_dir_index();
+      if (include_type == kTypeIncludeNext) {
+        quote_char = '<';
+        ++next_index;
+      }
+      if (!include_observer_->HandleInclude(
+              token.string_value, input()->directory(), input()->filepath(),
+              quote_char, next_index)) {
+        LOG(WARNING) << "HandleInclude failed #" << directive
+                     << " \"" << token.string_value << "\""
+                     << " from " << input()->filepath()
+                     << " [dir:" << input()->directory()
+                     << " index:" << input()->include_dir_index() << "]";
+        return;
+      }
+      if (include_type == kTypeImport) {
+        DCHECK(!inputs_.empty());
+        const string& filepath = inputs_.back()->filepath();
+        pragma_once_fileset_.Insert(filepath);
+        VLOG(1) << "HandleInclude #import " << filepath;
+      }
+    }
+    return;
+  }
+  Error("#include expects \"filename\" or <filename>");
+}
+
+void CppParser::ReadObjectMacro(const string& name) {
+  const auto pos = input()->stream()->pos();
+  const auto& fileid = input()->fileid();
+
+  auto optional_macro = AddMacro(name, Macro::OBJ, fileid, pos);
+  if (optional_macro.second) {
+    ++obj_cache_hit_;
+    GOMA_COUNTERZ("object skip");
+    return;
+  } else {
+    GOMA_COUNTERZ("object no skip");
+  }
+  Macro* macro = optional_macro.first;
+
+  CHECK(macro);
+  Token token = NextToken(true);
+  while (token.type != Token::NEWLINE && token.type != Token::END) {
+    // Remove contiguous spaces (i.e. '   ' => ' ')
+    // Remove preceding spaces for ## (i.e. ' ##' => '##')
+    if (token.type == Token::SPACE ||
+        token.type == Token::DOUBLESHARP) {
+      TrimTokenSpace(macro);
+    }
+
+    if (token.type == Token::IDENTIFIER) {
+      macro->has_identifier_in_replacement = true;
+    }
+    macro->replacement.push_back(std::move(token));
+    // Remove trailing spaces for ## (i.e. '## ' => '##')
+    token = NextToken(token.type == Token::DOUBLESHARP);
+  }
+
+  TrimTokenSpace(macro);
+
+  VLOG(2) << DebugStringPrefix() << " #DEFINE "
+          << macro->DebugString(this, name);
+}
+
+void CppParser::ReadFunctionMacro(const string& name) {
+  const auto pos = input()->stream()->pos();
+  const auto& fileid = input()->fileid();
+
+  unordered_map<string, size_t> params;
+  size_t param_index = 0;
+  bool is_vararg = false;
+  for (;;) {
+    Token token = NextToken(true);
+    if (token.type == Token::NEWLINE || token.type == Token::END) {
+      Error("missing ')' in the macro parameter list");
+      return;
+    } else if (token.type == Token::IDENTIFIER) {
+      if (!params.insert(
+              std::make_pair(token.string_value, param_index)).second) {
+        Error("duplicate macro parameter ", token.string_value);
+        return;
+      }
+      param_index++;
+      token = NextToken(true);
+      if (token.IsPuncChar(',')) {
+        continue;
+      }
+      if (token.IsPuncChar(')')) {
+        break;
+      }
+    } else if (token.type == Token::TRIPLEDOT) {
+      is_vararg = true;
+      token = NextToken(true);
+      if (!token.IsPuncChar(')')) {
+        Error("vararg must be the last of the macro parameter list");
+        return;
+      }
+      break;
+    } else if (token.IsPuncChar(')')) {
+      break;
+    }
+    Error("invalid preprocessing macro arg token ", token.DebugString());
+    return;
+  }
+
+  auto optional_macro = AddMacro(name, Macro::FUNC, fileid, pos);
+  if (!optional_macro.first) {
+    return;
+  }
+  if (optional_macro.second) {
+    ++func_cache_hit_;
+    GOMA_COUNTERZ("function skip");
+    return;
+  } else {
+    GOMA_COUNTERZ("function no skip");
+  }
+
+  Macro* macro = optional_macro.first;
+  DCHECK(params.size() == param_index);
+  macro->num_args = params.size();
+  macro->is_vararg = is_vararg;
+
+  Token token = NextToken(true);
+  while (token.type != Token::NEWLINE && token.type != Token::END) {
+    if (token.type == Token::IDENTIFIER) {
+      unordered_map<string, size_t>::iterator iter = params.find(
+          token.string_value);
+      if (iter != params.end()) {
+        token.MakeMacroParam(iter->second);
+      } else if (token.string_value == "__VA_ARGS__") {
+        token.MakeMacroParamVaArgs();
+      }
+    }
+
+    // Remove contiguous spaces (i.e. '   ' => ' ')
+    // Remove preceding spaces for ## (i.e. ' ##' => '##')
+    if (token.type == Token::SPACE ||
+        token.type == Token::DOUBLESHARP) {
+      TrimTokenSpace(macro);
+    }
+    if (token.type == Token::IDENTIFIER) {
+      macro->has_identifier_in_replacement = true;
+    }
+    macro->replacement.push_back(std::move(token));
+    // Remove trailing spaces for ## (i.e. '## ' => '##')
+    token = NextToken(token.type == Token::DOUBLESHARP);
+  }
+
+  TrimTokenSpace(macro);
+
+  VLOG(2) << DebugStringPrefix() << " #DEFINE "
+          << macro->DebugString(this, name);
+}
+
+string CppParser::ReadDefined() {
+  Token token(NextToken(true));
+  bool has_paren = false;
+  if (token.IsPuncChar('(')) {
+    token = NextToken(true);
+    has_paren = true;
+  }
+  if (token.type != Token::IDENTIFIER) {
+    Error("macro names must be identifiers");
+    return string();
+  }
+  if (has_paren) {
+    Token paren(NextToken(true));
+    if (!paren.IsPuncChar(')')) {
+      UngetToken(paren);
+      Error("missing terminating ')' character");
+    }
+  }
+
+  return token.string_value;
+}
+
+int CppParser::ReadConditionWithCheckingIncludeGuard(string* ident) {
+  // We use this state machine to detect include guard.
+  // TODO: If IntegerConstantEvaluator can process 'defined',
+  // this code should be simpler.
+  enum State {
+    START,
+    HAS_READ_BANG,
+    HAS_READ_COND,
+    NOT_INCLUDE_GUARD,
+  } state = START;
+
+  // 1. Reads tokens while replacing "defined" expression.
+  ArrayTokenList tokens;
+  Token token(NextToken(true));
+
+  for (;;) {
+    if (token.type == Token::END || token.type == Token::NEWLINE) {
+      break;
+    }
+
+    string s;
+    if (token.type == Token::IDENTIFIER && token.string_value == "defined") {
+      s = ReadDefined();
+      token = Token(static_cast<int>(IsMacroDefined(s)));
+    }
+
+    if (state == START) {
+      if (token.type == Token::PUNCTUATOR && token.v.char_value.c == '!') {
+        state = HAS_READ_BANG;
+      } else {
+        state = NOT_INCLUDE_GUARD;
+      }
+    } else if (state == HAS_READ_BANG) {
+      if (!s.empty()) {
+        state = HAS_READ_COND;
+        *ident = s;
+      } else {
+        state = NOT_INCLUDE_GUARD;
+      }
+    } else {
+      // When we read something when state is HAS_READ_COND, it's not
+      // an include guard, let alone NOT_INCLUDE_GUARD.
+      state = NOT_INCLUDE_GUARD;
+    }
+
+    tokens.push_back(std::move(token));
+    token = NextToken(false);
+  }
+
+  // When state is HAS_READ_COND, it means we detected #if !defined(FOO).
+  if (state != HAS_READ_COND)
+    ident->clear();
+
+  // 2. Expands macros.
+  ArrayTokenList expanded;
+  Expand0(tokens, &expanded, true);
+
+  // 3. Evaluates the expanded integer constant expression.
+  IntegerConstantEvaluator evaluator(expanded, this);
+  return evaluator.GetValue();
+}
+
+void CppParser::TrimTokenSpace(Macro* macro) {
+  while (!macro->replacement.empty() &&
+         macro->replacement.back().type == Token::SPACE) {
+    macro->replacement.pop_back();
+  }
+}
+
+int CppParser::ReadCondition() {
+  // 1. Reads tokens while replacing "defined" expression.
+  ArrayTokenList tokens;
+  Token token(NextToken(true));
+  for (;;) {
+    if (token.type == Token::END || token.type == Token::NEWLINE) {
+      break;
+    }
+    if (token.type == Token::IDENTIFIER && token.string_value == "defined") {
+      token = Token(static_cast<int>(IsMacroDefined(ReadDefined())));
+    }
+    tokens.push_back(std::move(token));
+    token = NextToken(false);
+  }
+
+  // 2. Expands macros.
+  ArrayTokenList expanded;
+  Expand0(tokens, &expanded, true);
+
+  // 3. Evaluates the expanded integer constant expression.
+  IntegerConstantEvaluator evaluator(expanded, this);
+  return evaluator.GetValue();
+}
+
+bool CppParser::FastGetMacroArgument(const ArrayTokenList& input_tokens,
+                                     bool skip_space,
+                                     ArrayTokenList::const_iterator* iter,
+                                     ArrayTokenList* arg) {
+  // |*iter| is just after '(' or ','.
+
+  while (*iter != input_tokens.end() && (*iter)->type == Token::SPACE) {
+    ++(*iter);
+  }
+
+  int paren_depth = 0;
+  while (*iter != input_tokens.end()) {
+    if (paren_depth == 0 &&
+        ((*iter)->IsPuncChar(',') || (*iter)->IsPuncChar(')'))) {
+      break;
+    }
+    if ((*iter)->type != Token::SPACE || !skip_space) {
+      arg->push_back(**iter);
+    }
+    if ((*iter)->IsPuncChar('(')) {
+      ++paren_depth;
+    } else if ((*iter)->IsPuncChar(')')) {
+      --paren_depth;
+    }
+    ++*iter;
+  }
+
+  // |*iter| is just ',' or ')'.
+  return paren_depth == 0 && *iter != input_tokens.end();
+}
+
+bool CppParser::FastGetMacroArguments(const ArrayTokenList& input_tokens,
+                                      bool skip_space,
+                                      ArrayTokenList::const_iterator* iter,
+                                      std::vector<ArrayTokenList>* args) {
+  auto iter_backup = *iter;
+  // |*iter| is  macro identifier.
+  DCHECK((*iter)->type == Token::IDENTIFIER) << (*iter)->DebugString();
+  ++*iter;
+
+  // skip space between macro identifier and '('.
+  while (*iter != input_tokens.end() && (*iter)->type == Token::SPACE) {
+    ++*iter;
+  }
+
+  if (*iter == input_tokens.end() || !(*iter)->IsPuncChar('(')) {
+    // This case happens in valid below input.
+    // #define f(x)
+    // f
+    *iter = iter_backup;
+    return false;
+  }
+
+  // consume '('.
+  ++*iter;
+
+  while (*iter != input_tokens.end() && !(*iter)->IsPuncChar(')')) {
+    if ((*iter)->IsPuncChar(',')) {
+      ++*iter;
+    }
+    ArrayTokenList arg;
+    if (!FastGetMacroArgument(input_tokens, skip_space, iter, &arg)) {
+      LOG(WARNING) << "Failed to get FastGetMacroArgument: "
+                   << DebugString(TokenList(input_tokens.begin(),
+                                            input_tokens.end()));
+      *iter = iter_backup;
+      return false;
+    }
+
+    args->push_back(std::move(arg));
+  }
+
+  if (*iter == input_tokens.end() ||
+      !(*iter)->IsPuncChar(')')) {
+    LOG(WARNING) << "Failed to find close paren of function macro call: "
+                   << DebugString(TokenList(input_tokens.begin(),
+                                            input_tokens.end()));
+    *iter = iter_backup;
+    return false;
+  }
+  ++iter;
+
+  // |*iter| is just after function close paren.
+  return true;
+}
+
+// FastExpand tries one step macro expansion for macros
+// which simple replacements are sufficient.
+// For example,
+// ```
+// 1: #define A B  <macro id 1>
+// 2: #define B C  <macro id 2>
+// 3: #define C 1  <macro id 3>
+// 4: #if A
+// 5: #endif
+// ```
+// In line 4, we pass [IDENT("A")] as |input_tokens| to FastExpand,
+// and obtain [BEGIN_HIDE(1), IDENT(B), END_HIDE(1)] as |output_tokens|.
+// We apply FastExpand for prevous |output_tokens| and obtain
+// [BEGIN_HIDE(1), BEGIN_HIDE(2), IDENT(C), END_HIDE(2), END_HIDE(1)]
+// finally obtain
+// [BEGIN_HIDE(1), BEGIN_HIDE(2), NUMBER(1), END_HIDE(2), END_HIDE(1)].
+//
+// This function returns true if macro expansion happened.
+// This function fails to expand macro in following cases.
+// * macro containing '#', '##' or '__VA_ARGS__' tokens.
+// * macro containing "defined".
+//   top level "defined" in #if direcitve is processed in
+//   ReadConditionWithCheckingIncludeGuard function.
+// In these cases, we need to fallback to normal expansion.
+//
+// If we have recursive macros like below
+// ```
+// 1: #define A B  <macro id 1>
+// 2: #define B A  <macro id 2>
+// 3: #if A
+// 4: #endif
+// ```
+// Macro `A` is expanded like
+// [IDENT("A")] -> [BEGIN_HIDE(1), IDENT("B"), END_HIDE(1)]
+//   ->  [BEGIN_HIDE(1), BEGIN_HIDE(2), IDENT("A"), END_HIDE(2), END_HIDE(1)]
+// Then we detect recursion by BEGIN_HIDE(1),
+// so IDENT("A") is not replaced here.
+// TODO; Migrate slowpath to this vector only fastpath.
+bool CppParser::FastExpand(const ArrayTokenList& input_tokens, bool skip_space,
+                           std::set<int>* hideset,
+                           ArrayTokenList* output_tokens,
+                           bool* need_fallback) {
+  // TODO: handle these fallback case
+  for (const auto& token : input_tokens) {
+    if (token.type == Token::SHARP ||
+        token.type == Token::DOUBLESHARP ||
+        token.type == Token::MACRO_PARAM_VA_ARGS) {
+      *need_fallback = true;
+      return false;
+    }
+  }
+
+  bool replaced = false;
+
+  for (auto iter = input_tokens.begin();
+       iter != input_tokens.end(); ++iter) {
+
+    const auto& token = *iter;
+
+    if (token.type == Token::BEGIN_HIDE) {
+      hideset->insert(token.v.int_value);
+      output_tokens->push_back(token);
+      continue;
+    } else if (token.type == Token::END_HIDE) {
+      hideset->erase(token.v.int_value);
+      output_tokens->push_back(token);
+      continue;
+    }
+
+    if (skip_space && token.type == Token::SPACE) {
+      continue;
+    }
+
+    if (token.type != Token::IDENTIFIER) {
+      output_tokens->push_back(token);
+      continue;
+    }
+
+    if (token.string_value == "defined") {
+      // TODO: handle defined
+      *need_fallback = true;
+      return replaced;
+    }
+
+    const auto* macro = GetMacro(token.string_value, false);
+    if (macro == nullptr || hideset->find(macro->id) != hideset->end()) {
+      output_tokens->push_back(token);
+      continue;
+    }
+
+    if (macro->type != Macro::FUNC &&
+        macro->type != Macro::OBJ &&
+        macro->type != Macro::CBK_FUNC &&
+        macro->type != Macro::UNDEFINED &&
+        macro->type != Macro::UNUSED) {
+      // TODO: handle other macros if necessary.
+      *need_fallback = true;
+      return replaced;
+    }
+
+    if (macro->type == Macro::OBJ) {
+      replaced = true;
+      if (macro->has_identifier_in_replacement) {
+        output_tokens->push_back(Token(Token::BEGIN_HIDE, macro->id));
+      }
+      for (const auto& token : macro->replacement) {
+        if (skip_space && token.type == Token::SPACE) {
+          continue;
+        }
+        output_tokens->push_back(token);
+      }
+      if (macro->has_identifier_in_replacement) {
+        output_tokens->push_back(Token(Token::END_HIDE, macro->id));
+      }
+      continue;
+    }
+
+    std::vector<ArrayTokenList> args;
+    if (!FastGetMacroArguments(input_tokens, skip_space, &iter, &args)) {
+      if (macro->type == Macro::CBK_FUNC) {
+        // CBK_FUNC should not be illegal form.
+        *need_fallback = true;
+        return replaced;
+      }
+      output_tokens->push_back(token);
+      continue;
+    }
+
+    if (macro->type == Macro::CBK_FUNC) {
+      if (args.size() != 1) {
+        // number of arguments of CBK_FUNC should be 1.
+        *need_fallback = true;
+        return replaced;
+      }
+      replaced = true;
+      output_tokens->push_back((this->*(macro->callback_func))(args[0]));
+      continue;
+    }
+
+    DCHECK(macro->type == Macro::FUNC)
+        << macro->DebugString(this, token.string_value);
+
+    if (!macro->is_vararg && args.size() != macro->num_args) {
+      *need_fallback = true;
+      return replaced;
+    }
+
+    // #define x(a, b, ...) is treated as macro->num_args = 3
+    // and '...' can be empty.
+    if (macro->is_vararg && args.size() + 1 < macro->num_args) {
+      *need_fallback = true;
+      return replaced;
+    }
+
+    replaced = true;
+    if (macro->has_identifier_in_replacement) {
+      output_tokens->push_back(Token(Token::BEGIN_HIDE, macro->id));
+    }
+
+    for (const auto& token : macro->replacement) {
+
+      if (skip_space && token.type == Token::SPACE) {
+        continue;
+      }
+
+      if (token.type == Token::MACRO_PARAM_VA_ARGS) {
+        for (size_t i = macro->num_args; i < args.size(); ++i) {
+          if (i > macro->num_args) {
+            output_tokens->push_back(Token(Token::PUNCTUATOR, ','));
+          }
+          ArrayTokenList expanded_arg;
+          replaced |= FastExpand(args[i], skip_space, hideset,
+                                 &expanded_arg, need_fallback);
+          if (*need_fallback) {
+            return replaced;
+          }
+          output_tokens->insert(
+              output_tokens->end(), expanded_arg.begin(), expanded_arg.end());
+        }
+        continue;
+      }
+
+      if (token.type != Token::MACRO_PARAM) {
+        output_tokens->push_back(token);
+        continue;
+      }
+
+      // need to expand arg before inserting.
+      ArrayTokenList expanded_arg;
+      replaced |= FastExpand(args[token.v.param_index], skip_space, hideset,
+                             &expanded_arg, need_fallback);
+      if (*need_fallback) {
+        return replaced;
+      }
+      output_tokens->insert(
+          output_tokens->end(), expanded_arg.begin(), expanded_arg.end());
+    }
+
+    if (macro->has_identifier_in_replacement) {
+      output_tokens->push_back(Token(Token::END_HIDE, macro->id));
+    }
+  }
+  return replaced;
+}
+
+bool CppParser::Expand0Fastpath(const ArrayTokenList& input_tokens,
+                                bool skip_space,
+                                ArrayTokenList* output_tokens) {
+  bool need_fallback = false;
+  ArrayTokenList cur_tokens(input_tokens);
+  for (;;) {
+    std::set<int> hide_set;
+    ArrayTokenList replaced_tokens;
+    bool replace_happened = FastExpand(
+        cur_tokens, skip_space, &hide_set, &replaced_tokens, &need_fallback);
+    if (need_fallback) {
+      break;
+    }
+
+    cur_tokens.swap(replaced_tokens);
+
+    if (!replace_happened) {
+      break;
+    }
+  }
+
+  if (need_fallback) {
+    GOMA_COUNTERZ("fallback");
+    return false;
+  }
+
+  GOMA_COUNTERZ("simple replace");
+  cur_tokens.erase(
+      std::remove_if(
+          cur_tokens.begin(), cur_tokens.end(),
+          [](const Token& t) {
+            return (t.type == Token::BEGIN_HIDE ||
+                    t.type == Token::END_HIDE);
+          }),
+      cur_tokens.end());
+
+  output_tokens->swap(cur_tokens);
+  return true;
+}
+
+// Macro expansion code.
+// Most of the code below is a naive implementation of the published algorithm
+// described in http://www.spinellis.gr/blog/20060626/.
+void CppParser::Expand0(const ArrayTokenList& input_tokens,
+                        ArrayTokenList* output_tokens,
+                        bool skip_space) {
+  // If simple replacement is sufficient for expansion,
+  // does not call heavy Expand function.
+  if (Expand0Fastpath(input_tokens, skip_space, output_tokens)) {
+    return;
+  }
+  TokenList input_list(input_tokens.begin(), input_tokens.end());
+  MacroSetList hs_input, hs_output;
+  hs_input.assign(input_list.size(), MacroSet());
+
+  TokenList output_list;
+  VLOG(2) << DebugStringPrefix() << " Expand: " << DebugString(input_list);
+  MacroExpandContext input(&input_list, &hs_input);
+  MacroExpandContext output(&output_list, &hs_output);
+  Expand(&input, input.Begin(), &output, output.Begin(), skip_space, true);
+  VLOG(2) << DebugStringPrefix() << " Expanded: "
+          << DebugString(output_list);
+
+  output_tokens->assign(output_list.begin(), output_list.end());
+}
+
+void CppParser::Expand(
+    MacroExpandContext* input, MacroExpandIterator input_iter,
+    MacroExpandContext* output, const MacroExpandIterator output_iter,
+    bool skip_space,
+    bool use_hideset) {
+  DCHECK(output);
+
+  while (input_iter != input->End()) {
+    MacroExpandIterator cur_input_iter = input_iter;
+    const Token& token = input_iter.token();
+    const MacroSet& hide_set = use_hideset ? input_iter.hide_set() : MacroSet();
+    ++input_iter;
+
+    DCHECK_NE(token.type, Token::BEGIN_HIDE);
+    DCHECK_NE(token.type, Token::END_HIDE);
+
+    if (token.type == Token::END) {
+      return;
+    }
+    if (token.type != Token::IDENTIFIER) {
+      if (token.type != Token::SPACE || !skip_space)
+        output->Insert(output_iter, token, hide_set);
+      continue;
+    }
+
+    VLOG(3) << " Expanding:" << DebugString(cur_input_iter.iter(),
+                                            input->End().iter())
+            << " token:" << token.DebugString();
+
+    // Handle "defined" before expanding macros.
+    if (token.string_value == "defined" &&
+        (!is_vc_ || input_iter.token().type == Token::SPACE)) {
+      bool has_paren = false;
+      if (input_iter != input->End() &&
+          (input_iter.token().IsPuncChar('(') ||
+           input_iter.token().type == Token::SPACE)) {
+        has_paren = input_iter.token().IsPuncChar('(');
+        // For now, we only output this warning for "defined(foo)".
+        // This is because 1. for "defined foo", the bahavior of gcc
+        // and vc++ is same and 2. WebKit is using "defined foo" in
+        // its core library, so this can be a bit too noisy.
+        if (has_paren) {
+          if (compiler_info_ == nullptr ||
+              !compiler_info_->IsSystemInclude(this->input()->filepath())) {
+            LOG(WARNING)
+                << DebugStringPrefix()
+                << " Using \"defined\" in macro causes undefined behavior. "
+                << "See b/6533195";
+          }
+        }
+        ++input_iter;
+      }
+      if (input_iter.token().type != Token::IDENTIFIER) {
+        Error("macro names must be identifiers");
+        return;
+      }
+      int defined = (GetMacro(input_iter.token().string_value, true)
+                     != nullptr);
+      ++input_iter;
+      if (has_paren && input_iter != input->End() &&
+          input_iter.token().IsPuncChar(')')) {
+        ++input_iter;
+      }
+      output->Insert(output_iter, Token(defined), hide_set);
+      continue;
+    }
+
+    // Case 1. input[0] is not a macro or in input[0]'s hide_set.
+    const string& name = token.string_value;
+    Macro* macro = GetMacro(name, false);
+    if (macro == nullptr || hide_set.Get(macro->id)) {
+      VLOG(4) << "expanding 1:" << token.DebugString();
+      output->Insert(output_iter, token, hide_set);
+      continue;
+    }
+
+    // Case 2. input[0] is an object-like macro ("()-less macro").
+    if (macro->type == Macro::OBJ) {
+      VLOG(4) << "expanding 2:" << macro->DebugString(this, name);
+      MacroSet hs = hide_set;
+      if (use_hideset) {
+        hs.Set(macro->id);
+      }
+      input_iter = Substitute(macro->replacement, macro->num_args,
+                              ArrayArgList(), hs,
+                              input, input_iter, skip_space, use_hideset);
+      continue;
+    }
+
+    // Case 2'. input[0] is a callback macro.
+    if (macro->type == Macro::CBK) {
+      VLOG(4) << "expanding 2':" << macro->DebugString(this, name);
+      CHECK(macro->callback);
+      Token result = (this->*(macro->callback))();
+      output->Insert(output_iter, result, hide_set);
+      continue;
+    }
+
+    // Case 3. input[0] is a function-like macro ("()'d macro").
+    if (macro->type == Macro::FUNC) {
+      VLOG(4) << "expanding 3:" << macro->DebugString(this, name);
+      ArrayArgList args;
+      MacroSet rparen_hs;
+      if (GetMacroArguments(name, macro, &args, *input, &input_iter,
+                            &rparen_hs)) {
+        MacroSet hs = hide_set;
+        if (macro->is_vararg) {
+          use_hideset = false;
+        }
+        if (use_hideset) {
+          hs.Union(rparen_hs);
+          hs.Set(macro->id);
+        }
+        input_iter = Substitute(macro->replacement, macro->num_args,
+                                args, hs, input, input_iter,
+                                skip_space, use_hideset);
+        continue;
+      } else {
+        VLOG(3) << "failed to get macro argument:" << token.DebugString();
+      }
+    }
+
+    // Case 3'. input[0] is a function-like callback macro.
+    if (macro->type == Macro::CBK_FUNC) {
+      VLOG(4) << "expanding 3':" << macro->DebugString(this, name);
+      // Get callback macro arguments.
+      if (!SkipUntilBeginMacroArguments(name, *input, &input_iter)) {
+        continue;
+      }
+      ArrayTokenList args;
+      int nest = 0;
+      while (input_iter != input->End()) {
+        const Token& t = input_iter.token();
+        ++input_iter;
+        if (t.IsPuncChar(')')) {
+          if (nest-- == 0) {
+            break;
+          }
+        } else if (t.IsPuncChar('(')) {
+          nest++;
+        }
+        args.push_back(t);
+      }
+      Token result = (this->*(macro->callback_func))(args);
+      output->Insert(output_iter, result, hide_set);
+      continue;
+    }
+
+    // Case 4. Other cases.
+    VLOG(4) << "expanding 4:" << macro->DebugString(this, name);
+    output->Insert(output_iter, token, hide_set);
+  }
+}
+
+// Substitute macro args, handle stringize and paste.
+// subst() in http://www.spinellis.gr/blog/20060626/
+CppParser::MacroExpandIterator CppParser::Substitute(
+    const ArrayTokenList& replacement,
+    size_t num_args,
+    const ArrayArgList& args, const MacroSet& hide_set,
+    MacroExpandContext* output,
+    const MacroExpandIterator output_iter,
+    bool skip_space,
+    bool use_hideset) {
+  MacroExpandIterator saved_iter = output_iter;
+  --saved_iter;
+  for (ArrayTokenList::const_iterator iter = replacement.begin();
+       iter != replacement.end(); ) {
+    const Token& token = *iter++;
+    Token next;
+    if (iter != replacement.end())
+      next = *iter;
+
+    // Case 1. # param
+    if (token.type == Token::SHARP &&
+        next.type == Token::MACRO_PARAM) {
+      DCHECK(next.v.param_index < args.size());
+      if (!args[next.v.param_index].empty()) {
+        output->Insert(output_iter,
+                       Stringize(args[next.v.param_index]),
+                       hide_set);
+      }
+      iter++;
+      continue;
+    }
+
+    // Case 2. ## param
+    if (token.type == Token::DOUBLESHARP &&
+        next.type == Token::MACRO_PARAM) {
+      const TokenList& arg = args[next.v.param_index];
+      if (!arg.empty()) {
+        TokenList::const_iterator arg_iter = arg.begin();
+        Glue(output_iter.iter(), *arg_iter++);
+        output->Insert(output_iter, arg_iter, arg.end(), hide_set);
+      }
+      iter++;
+      continue;
+    }
+
+    // Case 3. ## token <remainder>
+    if (token.type == Token::DOUBLESHARP &&
+        next.type == Token::IDENTIFIER) {
+      Glue(output_iter.iter(), next);
+      iter++;
+      continue;
+    }
+
+    // Case 4. param ## <remainder>
+    if (token.type == Token::MACRO_PARAM &&
+        next.type == Token::DOUBLESHARP) {
+      const TokenList& arg = args[token.v.param_index];
+      if (arg.empty()) {
+        iter++;
+        if (iter != replacement.end() &&
+            iter->type == Token::MACRO_PARAM) {
+          const TokenList& arg2 = args[iter->v.param_index];
+          output->Insert(output_iter, arg2.begin(), arg2.end(), hide_set);
+          iter++;
+        }
+      } else {
+        // ## is processed in the next iteration.
+        output->Insert(output_iter, arg.begin(), arg.end(), hide_set);
+      }
+      continue;
+    }
+
+    // Case 5. param <remainder>
+    if (token.type == Token::MACRO_PARAM) {
+      TokenList arg = args[token.v.param_index];
+      MacroSetList hs_input;
+      hs_input.assign(arg.size(), MacroSet());
+      MacroExpandContext input(&arg, &hs_input);
+      MacroSetList::iterator saved_hs_iter = output_iter.hs_iter();
+      --saved_hs_iter;
+      Expand(&input, input.Begin(), output, output_iter,
+             skip_space, use_hideset);
+      // Add hide set to the tokens added by the Expand.
+      for (MacroSetList::iterator hs_iter = ++saved_hs_iter;
+           hs_iter != output_iter.hs_iter(); ++hs_iter) {
+        hs_iter->Union(hide_set);
+      }
+      continue;
+    }
+
+    // Case 6. __VA_ARGS__ <remainder>
+    if (token.type == Token::MACRO_PARAM_VA_ARGS) {
+      TokenList arg = args[num_args];
+      MacroSetList hs_input;
+      hs_input.assign(arg.size(), MacroSet());
+      MacroExpandContext input(&arg, &hs_input);
+      MacroSetList::iterator saved_hs_iter = output_iter.hs_iter();
+      --saved_hs_iter;
+      Expand(&input, input.Begin(), output, output_iter, skip_space, false);
+      // Add hide set to the tokens added by the Expand.
+      for (MacroSetList::iterator hs_iter = ++saved_hs_iter;
+           hs_iter != output_iter.hs_iter(); ++hs_iter) {
+        hs_iter->Union(hide_set);
+      }
+      continue;
+    }
+
+    // Case 7. Other cases.
+    output->Insert(output_iter, token, hide_set);
+  }
+  ++saved_iter;
+  VLOG(3) << "substitute:=>"
+          << DebugString(saved_iter.iter(), output->End().iter());
+  return saved_iter;
+}
+
+// Paste the last of |list| with the |token|.
+// glue() in http://www.spinellis.gr/blog/20060626/
+void CppParser::Glue(TokenList::iterator left_pos, const Token& right) {
+  // TODO: Misc chars can also generate a new token (e.g. '|', '|'
+  // -> "||").
+  Token& left = *(--left_pos);
+  left.Append(right.GetCanonicalString());
+}
+
+// Sringize the given token list.
+// stringize() in http://www.spinellis.gr/blog/20060626/
+CppParser::Token CppParser::Stringize(const TokenList& list) {
+  Token output(Token::STRING);
+  for (const auto& token : list) {
+    if (token.type == Token::STRING) {
+      string temp;
+      temp += "\"";
+      for (size_t i = 0; i < token.string_value.length(); ++i) {
+        char c = token.string_value[i];
+        if (c == '\\' || c == '"') {
+          temp += '\\';
+        }
+        temp += c;
+      }
+      temp += "\"";
+      output.Append(temp);
+    } else {
+      output.Append(token.GetCanonicalString());
+    }
+  }
+  return output;
+}
+
+bool CppParser::SkipUntilBeginMacroArguments(
+    const string& macro_name,
+    const MacroExpandContext& input,
+    MacroExpandIterator* iter) {
+  bool ok = true;
+  if (*iter != input.End() && iter->token().type == Token::SPACE) {
+    ++(*iter);
+  }
+  if (*iter == input.End() || !iter->token().IsPuncChar('(')) {
+    // Macro invoked without arguments.
+    Error("macro is referred without any arguments:", macro_name);
+    ok = false;
+  } else {
+    ++(*iter);
+  }
+  return ok;
+}
+
+// Get macro arguments using the comma tokens as delimiters.
+// Arguments in nested parenthesis pairs are parsed in nested token lists.
+// Returns a vector of token lists.
+// e.g. macro(a1, a2(b1, b2), a3, a4(c1(d)))
+//  --> [[a1], [a2, '(', b1, b2, ')'], [a3], [a4, '(', c1, '(', d, ')', ')']]
+bool CppParser::GetMacroArguments(
+    const std::string& macro_name, Macro* macro, ArrayArgList* args,
+    const MacroExpandContext& input, MacroExpandIterator* iter,
+    MacroSet* rparen_hs) {
+  DCHECK(macro);
+  if (!SkipUntilBeginMacroArguments(macro_name, input, iter)) {
+    return false;
+  }
+
+  int nest = 0;
+  bool ok = true;
+  TokenList list;
+  while (*iter != input.End()) {
+    const Token& token = iter->token();
+    const MacroSet& hide_set = iter->hide_set();
+    ++(*iter);
+    if (token.IsPuncChar(',')) {
+      if (nest == 0) {
+        args->push_back(list);
+        VLOG(3) << "macro:" << macro_name << " found ,"
+                << " nest=0" << " args=" << args->size();
+        list = TokenList();
+      } else {
+        list.push_back(token);
+      }
+      if (*iter != input.End() && iter->token().type == Token::SPACE) {
+        ++(*iter);
+      }
+      continue;
+    }
+    if (token.IsPuncChar(')')) {
+      if (nest-- == 0) {
+        args->push_back(list);
+        VLOG(3) << "macro:" << macro_name << " found )"
+                << " nest=0" << " args=" << args->size();
+        *rparen_hs = hide_set;
+        break;
+      }
+    } else if (token.IsPuncChar('(')) {
+      nest++;
+    }
+    list.push_back(token);
+  }
+  // FOO() case.
+  if (macro->num_args == 0U && args->size() == 1U && args->front().empty()) {
+    args->clear();
+  }
+  // FOO() is valid for macro FOO(x).
+  if (macro->num_args == 1U && args->size() == 0U) {
+    // Push empty string token.
+    list.clear();
+    list.push_back(Token(Token::STRING));
+    args->push_back(list);
+  }
+  if (!macro->is_vararg && macro->num_args != args->size()) {
+    Error("macro argument number mismatching with the parameter list");
+    VLOG(3) << "macro:" << macro_name
+            << " want args:" << macro->num_args
+            << " got args:" << args->size();
+    ok = false;
+  }
+  if (macro->is_vararg) {
+    list.clear();
+    for (size_t i = macro->num_args; i < args->size(); ++i) {
+      list.insert(list.end(), args->at(i).begin(), args->at(i).end());
+      if (i != args->size() - 1) {
+        list.push_back(Token(Token::PUNCTUATOR, ','));
+      }
+    }
+    args->resize(macro->num_args);
+    args->push_back(list);
+  }
+  return ok;
+}
+
+std::pair<Macro*, bool> CppParser::AddMacro(
+    const string& name, Macro::Type type,
+    const FileId& fileid, size_t macro_pos) {
+  if (predefined_macros_->find(name) != predefined_macros_->end() ||
+      predefined_func_macros_->find(name) != predefined_func_macros_->end()) {
+    Error("redefining predefined macro ", name);
+  }
+  return AddMacroInternal(name, type, fileid, macro_pos);
+}
+
+std::pair<Macro*, bool> CppParser::AddMacroInternal(
+    const string& name, Macro::Type type,
+    const FileId& fileid, size_t macro_pos) {
+  DCHECK(!name.empty()) << "Adding a macro that does not have a name.";
+
+  {
+    auto it = macros_->find(name);
+    if (it != macros_->end()) {
+      if (it->second.IsMatch(fileid, macro_pos)) {
+        it->second.type = type;
+        it->second.id = next_macro_id_++;
+        used_macros_.push_back(&it->second);
+        return std::make_pair(&it->second, true);
+      }
+
+      if (it->second.type != Macro::UNDEFINED &&
+          it->second.type != Macro::UNUSED) {
+        Error("macro is already defined:", name);
+      }
+
+      it->second = Macro(next_macro_id_++, type);
+      it->second.fileid = fileid;
+      it->second.macro_pos = macro_pos;
+      used_macros_.push_back(&it->second);
+      return std::make_pair(&it->second, false);
+    }
+  }
+
+  std::pair<unordered_map<string, Macro>::iterator, bool> result =
+      macros_->emplace(name, Macro(next_macro_id_++, type));
+  result.first->second.fileid = fileid;
+  result.first->second.macro_pos = macro_pos;
+  used_macros_.push_back(&result.first->second);
+  return std::make_pair(&result.first->second, false);
+}
+
+
+Macro* CppParser::GetMacro(const string& name, bool add_undefined) {
+  unordered_map<string, Macro>::iterator found = macros_->find(name);
+  if (found == macros_->end() || found->second.type == Macro::UNUSED) {
+    // Check predefined macros.
+    {
+      PredefinedObjMacroMap::const_iterator found_predefined =
+        predefined_macros_->find(name);
+      if (found_predefined != predefined_macros_->end() &&
+          IsEnabledPredefinedMacro(found_predefined->first)) {
+        Macro* macro = AddMacroInternal(name, Macro::CBK, FileId(), 0).first;
+        macro->callback = found_predefined->second;
+        return macro;
+      }
+    }
+    // Check predefined macros.
+    {
+      PredefinedFuncMacroMap::const_iterator found_predefined_func =
+        predefined_func_macros_->find(name);
+      if (found_predefined_func != predefined_func_macros_->end() &&
+          IsEnabledPredefinedMacro(found_predefined_func->first)) {
+        Macro* macro = AddMacroInternal(
+            name, Macro::CBK_FUNC, FileId(), 0).first;
+        macro->callback_func = found_predefined_func->second;
+        return macro;
+      }
+    }
+    // "true" and "false" are C++ reserved keyword.
+    // No need to treat them as undefined if C++.
+    if (is_cplusplus_ &&
+        (name == "true" || name == "false")) {
+      return nullptr;
+    }
+    // No macros found for the given name.
+    if (add_undefined)
+      AddMacro(name, Macro::UNDEFINED, FileId(), 0);
+    return nullptr;
+  }
+
+  if (found->second.type == Macro::UNDEFINED) {
+    return nullptr;
+  }
+  return &found->second;
+}
+
+void CppParser::PopInput() {
+  DCHECK(HasMoreInput());
+
+  std::unique_ptr<Input> current = std::move(inputs_.back());
+  inputs_.pop_back();
+
+  current->include_guard_detector()->OnPop();
+  if (!current->filepath().empty() &&
+      current->include_guard_detector()->IsGuardDetected() &&
+      IsMacroDefined(current->include_guard_detector()->detected_ident())) {
+    include_guard_ident_[current->filepath()] =
+        current->include_guard_detector()->detected_ident();
+  }
+
+  last_input_ = std::move(current);
+}
+
+bool CppParser::IsProcessedFileInternal(const string& path,
+                                        int include_dir_index) {
+  VLOG(2) << "IsProcessedFileInternal:"
+          << " path=" << path
+          << " include_dir_index=" << include_dir_index;
+  // Check if this file is in the pragma_once history.
+  if (pragma_once_fileset_.Has(path)) {
+    VLOG(1) << "Skipping " << path << " for pragma once";
+    return true;
+  }
+
+  const auto& iter = include_guard_ident_.find(path);
+  if (iter == include_guard_ident_.end()) {
+    return false;
+  }
+  if (IsMacroDefined(iter->second)) {
+    VLOG(1) << "Skipping " << path << " for include guarded by "
+            << iter->second;
+    return true;
+  }
+  return false;
+}
+
+CppParser::Token CppParser::GetFileName() {
+  Token token(Token::STRING);
+  token.Append(input()->filepath());
+  return token;
+}
+
+CppParser::Token CppParser::GetLineNumber() {
+  Token token(Token::NUMBER);
+  // We always evaluate macros after reading the line until the end,
+  // so the line number needs to be subtracted by 1.
+  token.v.int_value = input()->stream()->line() - 1;
+  token.Append(std::to_string(token.v.int_value));
+  return token;
+}
+
+CppParser::Token CppParser::GetDate() {
+  Token token(Token::STRING);
+  token.Append(current_date_);
+  return token;
+}
+
+CppParser::Token CppParser::GetTime() {
+  Token token(Token::STRING);
+  token.Append(current_time_);
+  return token;
+}
+
+CppParser::Token CppParser::GetCounter() {
+  return Token(counter_++);
+}
+
+CppParser::Token CppParser::GetBaseFile() {
+  Token token(Token::STRING);
+  token.Append(base_file_);
+  return token;
+}
+
+CppParser::Token CppParser::ProcessHasInclude(const ArrayTokenList& tokens) {
+  return Token(static_cast<int>(ProcessHasIncludeInternal(tokens, false)));
+}
+
+CppParser::Token CppParser::ProcessHasIncludeNext(
+    const ArrayTokenList& tokens) {
+  return Token(static_cast<int>(ProcessHasIncludeInternal(tokens, true)));
+}
+
+bool CppParser::ProcessHasIncludeInternal(const ArrayTokenList& tokens,
+                                          bool is_include_next) {
+  if (tokens.empty()) {
+    Error("__has_include expects \"filename\" or <filename>");
+    return false;
+  }
+
+  ArrayTokenList tokenlist(tokens.begin(), tokens.end());
+  ArrayTokenList expanded;
+  Expand0(tokenlist, &expanded, false);
+  if (expanded.empty()) {
+    Error("__has_include expects \"filename\" or <filename>");
+    return false;
+  }
+
+  Token token = expanded.front();
+  if (token.type == Token::LT) {
+    string path;
+    auto iter = expanded.begin();
+    ++iter;
+    for (; iter != expanded.end() && iter->type != Token::GT; ++iter) {
+      path.append(iter->GetCanonicalString());
+    }
+    VLOG(1) << DebugStringPrefix() << "HAS_INCLUDE(<" << path << ">)";
+    if (include_observer_) {
+      return include_observer_->HasInclude(
+          path, input()->directory(), input()->filepath(),
+          '<',
+          is_include_next ? (input()->include_dir_index() + 1) :
+          bracket_include_dir_index_);
+    }
+    return false;
+  }
+  if (token.type == Token::STRING) {
+    VLOG(1) << DebugStringPrefix() << "HAS_INCLUDE(" << token.string_value
+            << ")";
+    if (include_observer_) {
+      return include_observer_->HasInclude(
+          token.string_value, input()->directory(), input()->filepath(),
+          is_include_next ? '<' : '"',
+          is_include_next ? (input()->include_dir_index() + 1) :
+          input()->include_dir_index());
+    }
+    return false;
+  }
+  Error("__has_include expects \"filename\" or <filename>");
+  return false;
+}
+
+CppParser::Token CppParser::ProcessHasCheckMacro(
+    const string& name,
+    const ArrayTokenList& tokens,
+    const unordered_map<string, int>& has_check_macro) {
+  if (tokens.empty()) {
+    Error(name + " expects an identifier");
+    return Token(0);
+  }
+
+  ArrayTokenList token_list(tokens.begin(), tokens.end());
+  ArrayTokenList expanded;
+  Expand0(token_list, &expanded, true);
+  if (expanded.empty()) {
+    Error(name + " expects an identifier");
+    return Token(0);
+  }
+
+  // Let's consider "__has_cpp_attribute(clang::fallthrough)".
+  // Here, token list is like "clang" ":" ":" "fallthrough".
+  //
+  // TODO: what happens
+  //   1. if space is inserted between tokens?
+  //   2. if clang or fallthrough is defined somwhere?
+  //
+  // b/71611716
+
+  string ident;
+  if (expanded.size() > 1) {
+    // Concat the expanded tokens. Allow only ident or ':'.
+    for (const auto& t : expanded) {
+      if (t.type == Token::IDENTIFIER) {
+        ident += t.string_value;
+      } else if (t.IsPuncChar(':')) {
+        ident += ':';
+      } else {
+        Error(name + " expects an identifier");
+        return Token(0);
+      }
+    }
+  } else {
+    Token token = expanded.front();
+    if (token.type != Token::IDENTIFIER) {
+      Error(name + " expects an identifier");
+      return Token(0);
+    }
+    ident = token.string_value;
+  }
+
+  // Normalize the extension identifier.
+  // '__feature__' is normalized to 'feature' in clang.
+  if (ident.size() >= 4 && strings::StartsWith(ident, "__")
+      && strings::EndsWith(ident, "__")) {
+    ident.resize(ident.size() - 2);
+    ident = ident.substr(2);
+  }
+
+  const auto& iter = has_check_macro.find(ident);
+  if (iter == has_check_macro.end())
+    return Token(0);
+  return Token(iter->second);
+}
+
+void CppParser::EnablePredefinedMacro(const string& name) {
+  enabled_predefined_macros_.insert(std::make_pair(name, false));
+}
+
+bool CppParser::IsHiddenPredefinedMacro(const string& name) const {
+  const auto& found = enabled_predefined_macros_.find(name);
+  if (found == enabled_predefined_macros_.end()) {
+    return false;
+  }
+  return found->second;
+}
+
+#ifdef _WIN32
+BOOL WINAPI CppParser::InitializeWinOnce(PINIT_ONCE, PVOID, PVOID*) {
+  CppParser::InitializeStaticOnce();
+  return TRUE;
+}
+#endif
+
+void CppParser::InitializeStaticOnce() {
+  DCHECK(!global_initialized_);
+
+  CppTokenizer::InitializeStaticOnce();
+
+  // One-time assertion checks to see the static values auto-generated by
+  // generate_static_darray.py are initialized as expected.
+  const DirectiveHandler* table = kDirectiveTable;
+  DCHECK(table[kDirectiveInclude] == &CppParser::ProcessInclude);
+  DCHECK(table[kDirectiveImport] == &CppParser::ProcessImport);
+  DCHECK(table[kDirectiveIncludeNext] == &CppParser::ProcessIncludeNext);
+  DCHECK(table[kDirectiveDefine] == &CppParser::ProcessDefine);
+  DCHECK(table[kDirectiveUndef] == &CppParser::ProcessUndef);
+  DCHECK(table[kDirectiveIfdef] == &CppParser::ProcessIfdef);
+  DCHECK(table[kDirectiveIfndef] == &CppParser::ProcessIfndef);
+  DCHECK(table[kDirectiveIf] == &CppParser::ProcessIf);
+  DCHECK(table[kDirectiveElse] == &CppParser::ProcessElse);
+  DCHECK(table[kDirectiveEndif] == &CppParser::ProcessEndif);
+  DCHECK(table[kDirectiveElif] == &CppParser::ProcessElif);
+  DCHECK(table[kDirectivePragma] == &CppParser::ProcessPragma);
+
+  table = kFalseConditionDirectiveTable;
+  DCHECK(table[kDirectiveIfdef] == &CppParser::ProcessConditionInFalse);
+  DCHECK(table[kDirectiveIfndef] == &CppParser::ProcessConditionInFalse);
+  DCHECK(table[kDirectiveIf] == &CppParser::ProcessConditionInFalse);
+  DCHECK(table[kDirectiveElse] == &CppParser::ProcessElse);
+  DCHECK(table[kDirectiveEndif] == &CppParser::ProcessEndif);
+  DCHECK(table[kDirectiveElif] == &CppParser::ProcessElif);
+
+  typedef CppParser self;
+  static const struct {
+    const char* name;
+    Macro::CallbackObj callback;
+  } kPredefinedCallbackMacros[] = {
+    { "__FILE__", &self::GetFileName },
+    { "__LINE__", &self::GetLineNumber },
+    { "__DATE__", &self::GetDate },
+    { "__TIME__", &self::GetTime },
+    { "__COUNTER__",   &self::GetCounter },
+    { "__BASE_FILE__", &self::GetBaseFile },
+  };
+  predefined_macros_ = new PredefinedObjMacroMap;
+  for (const auto& iter : kPredefinedCallbackMacros) {
+    predefined_macros_->insert(std::make_pair(iter.name, iter.callback));
+  }
+
+  static const struct {
+    const char* name;
+    Macro::CallbackFunc callback;
+  } kPredefinedCallbackFuncMacros[] = {
+    { "__has_include", &self::ProcessHasInclude },
+    { "__has_include__", &self::ProcessHasInclude },
+    { "__has_include_next", &self::ProcessHasIncludeNext },
+    { "__has_include_next__", &self::ProcessHasIncludeNext },
+    { "__has_feature", &self::ProcessHasFeature },
+    { "__has_extension", &self::ProcessHasExtension },
+    { "__has_attribute", &self::ProcessHasAttribute },
+    { "__has_cpp_attribute", &self::ProcessHasCppAttribute },
+    { "__has_declspec_attribute", &self::ProcessHasDeclspecAttribute },
+    { "__has_builtin", &self::ProcessHasBuiltin },
+  };
+  predefined_func_macros_ = new PredefinedFuncMacroMap;
+  for (const auto& iter : kPredefinedCallbackFuncMacros) {
+    predefined_func_macros_->insert(std::make_pair(iter.name, iter.callback));
+  }
+
+  global_initialized_ = true;
+}
+
+}  // namespace devtools_goma
+
+#ifdef TEST
+
+using devtools_goma::Content;
+using devtools_goma::CppParser;
+using devtools_goma::GetBaseDir;
+using devtools_goma::GetCurrentDirNameOrDie;
+using devtools_goma::PathResolver;
+
+class TestIncludeObserver : public CppParser::IncludeObserver {
+ public:
+  bool HandleInclude(
+      const string& path,
+      const string& current_directory ALLOW_UNUSED,
+      const string& current_filepath ALLOW_UNUSED,
+      char quote_char ALLOW_UNUSED,  // '"' or '<'
+      int include_dir_index ALLOW_UNUSED) override {
+    if (quote_char == '<' &&
+        include_dir_index > CppParser::kIncludeDirIndexStarting) {
+      std::cout << "#INCLUDE_NEXT ";
+    } else {
+      std::cout << "#INCLUDE ";
+    }
+    char close_quote_char = (quote_char == '<') ? '>' : quote_char;
+    std::cout << quote_char << path << close_quote_char << std::endl;
+#ifdef _WIN32
+    UNREFERENCED_PARAMETER(current_directory);
+    UNREFERENCED_PARAMETER(current_filepath);
+#endif
+    return true;
+  }
+
+  bool HasInclude(
+      const string& path ALLOW_UNUSED,
+      const string& current_directory ALLOW_UNUSED,
+      const string& current_filepath ALLOW_UNUSED,
+      char quote_char ALLOW_UNUSED,  // '"' or '<'
+      int include_dir_index ALLOW_UNUSED) override {
+#ifdef _WIN32
+    UNREFERENCED_PARAMETER(path);
+    UNREFERENCED_PARAMETER(current_directory);
+    UNREFERENCED_PARAMETER(current_filepath);
+    UNREFERENCED_PARAMETER(quote_char);
+    UNREFERENCED_PARAMETER(include_dir_index);
+#endif
+    return true;
+  }
+};
+
+class TestErrorObserver : public CppParser::ErrorObserver {
+ public:
+  void HandleError(const string& error) override {
+    LOG(WARNING) << error;
+  }
+};
+
+static bool TryAddFileInput(CppParser* parser, const string& filepath,
+                            int include_dir_index) {
+  std::unique_ptr<Content> fp(Content::CreateFromFile(filepath));
+  if (!fp) {
+    return false;
+  }
+  devtools_goma::FileId fileid(filepath);
+  string directory;
+  GetBaseDir(filepath, &directory);
+  parser->AddFileInput(std::move(fp), fileid, filepath, directory,
+                       include_dir_index);
+  return true;
+}
+
+static std::pair<string, string> GetMacroArg(const char* arg) {
+  string macro(arg);
+  size_t found = macro.find('=');
+  if (found == string::npos) {
+    return std::make_pair(macro, "");
+  }
+  const string& key = macro.substr(0, found);
+  const string& value = macro.substr(found + 1, macro.size() - (found + 1));
+  return std::make_pair(key, value);
+}
+
+int main(int argc, char *argv[]) {
+  int ac = 1;
+  std::vector<std::pair<string, string>> arg_macros;
+  for (; ac < argc; ++ac) {
+    if (strncmp(argv[ac], "-D", 2) == 0) {
+      if (strlen(argv[ac]) > 2) {
+        arg_macros.push_back(GetMacroArg(&argv[ac][2]));
+      } else if (ac + 1 < argc) {
+        arg_macros.push_back(GetMacroArg(argv[++ac]));
+      }
+      continue;
+    }
+    break;
+  }
+
+  if (ac >= argc) {
+    std::cerr << argv[0] << " [-D<macro> ...] path" << std::endl;
+    std::cerr << "e.g.: " << argv[0] << " -D'S(x)=<lib##x.h>' tmp.c"
+              << std::endl;
+    exit(1);
+  }
+
+  const string cwd = GetCurrentDirNameOrDie();
+
+  PathResolver path_resolver;
+
+  string path = file::JoinPathRespectAbsolute(cwd, argv[ac]);
+  path = path_resolver.ResolvePath(path);
+
+  std::cout << std::endl << "===== Tokens =====" << std::endl;
+  {
+    CppParser parser;
+    TryAddFileInput(&parser, path, CppParser::kCurrentDirIncludeDirIndex);
+    for (;;) {
+      CppParser::Token token = parser.NextToken(false);
+      if (token.type == CppParser::Token::END) {
+        break;
+      }
+      std::cout << token.DebugString();
+    }
+  }
+
+  {
+    CppParser parser;
+    TestIncludeObserver include_observer;
+    TestErrorObserver error_observer;
+    TryAddFileInput(&parser, path, CppParser::kCurrentDirIncludeDirIndex);
+    parser.set_include_observer(&include_observer);
+    parser.set_error_observer(&error_observer);
+
+    for (const auto& arg_macro : arg_macros) {
+      parser.AddMacroByString(arg_macro.first, arg_macro.second);
+    }
+
+    std::cout << std::endl << "===== Includes =====" << std::endl;
+    parser.ProcessDirectives();
+
+    std::cout << std::endl << "===== Macros =====" << std::endl;
+    std::cout << parser.DumpMacros();
+  }
+}
+
+#endif  // TEST
diff --git a/client/cpp_parser.h b/client/cpp_parser.h
new file mode 100644
index 0000000..f95f3a1
--- /dev/null
+++ b/client/cpp_parser.h
@@ -0,0 +1,519 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CPP_PARSER_H_
+#define DEVTOOLS_GOMA_CLIENT_CPP_PARSER_H_
+
+#include <bitset>
+#include <list>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "basictypes.h"
+#include "compiler_info.h"
+#include "cpp_input.h"
+#include "cpp_macro.h"
+#include "cpp_token.h"
+#include "file_id.h"
+#include "glog/logging.h"
+#include "gtest/gtest_prod.h"
+#include "platform_thread.h"
+#include "predefined_macros.h"
+#include "string_piece.h"
+#include "unordered.h"
+
+#ifdef _WIN32
+# include "config_win.h"
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+class Content;
+class CppInputStream;
+
+class MacroSet {
+ public:
+  MacroSet() {}
+  void Set(int i) {
+    macros_.insert(i);
+  }
+  bool Get(int i) const {
+    return macros_.find(i) != macros_.end();
+  }
+  void Union(const MacroSet& other) {
+    macros_.insert(other.macros_.begin(), other.macros_.end());
+  }
+
+  bool empty() const { return macros_.empty(); }
+
+ private:
+  std::set<int> macros_;
+};
+
+// CppParser is thread-unsafe.
+// TODO: Add unittest for this class.
+class CppParser {
+ public:
+  class IncludeObserver {
+   public:
+    virtual ~IncludeObserver() {}
+
+    // Handles include directive that CppParser processes.
+    // Returns true if the include file is found (or it was already processed).
+    // Returns false if the include file was not found and failed to process
+    // the include directive.
+    virtual bool HandleInclude(
+        const string& path,
+        const string& current_directory,
+        const string& current_filepath,
+        char quote_char,  // '"' or '<'
+        int include_dir_index) = 0;
+
+    // Handles __has_include() macro.
+    // Returns value of __has_include().
+    virtual bool HasInclude(
+        const string& path,
+        const string& current_directory,
+        const string& current_filepath,
+        char quote_char,  // '"' or '<'
+        int include_dir_index) = 0;
+  };
+  class ErrorObserver {
+   public:
+    virtual ~ErrorObserver() {}
+    virtual void HandleError(const string& error) = 0;
+  };
+
+  using Token = CppToken;
+  using Input = CppInput;
+
+  typedef std::list<Token> TokenList;
+  typedef std::vector<Token> ArrayTokenList;
+  typedef std::vector<TokenList> ArrayArgList;
+  typedef std::list<MacroSet> MacroSetList;
+
+  CppParser();
+  ~CppParser();
+
+  void set_bracket_include_dir_index(int index) {
+    bracket_include_dir_index_ = index;
+  }
+  void set_include_observer(IncludeObserver* obs) { include_observer_ = obs; }
+  void set_error_observer(ErrorObserver* obs) { error_observer_ = obs; }
+  void SetCompilerInfo(const CompilerInfo* compiler_info);
+
+  // Support predefined macro. This is expected to be used for tests.
+  // For usual cases, SetCompilerInfo() should be used.
+  void EnablePredefinedMacro(const string& name);
+  bool IsEnabledPredefinedMacro(const string& name) const {
+    return enabled_predefined_macros_.count(name) > 0;
+  }
+  bool IsHiddenPredefinedMacro(const string& name) const;
+
+  void set_is_vc() { is_vc_ = true; }
+  void set_is_cplusplus(bool is_cplusplus) { is_cplusplus_ = is_cplusplus; }
+  bool is_cplusplus() const { return is_cplusplus_; }
+
+  // Parses and processes directives only.
+  // Returns false if it failed to process and is pretty sure it missed some
+  // input files.
+  bool ProcessDirectives();
+
+  Token NextToken(bool skip_space);
+  void UngetToken(const Token& token);
+  int NextDirective();
+
+  void AddMacroByString(const string& name, const string& body);
+  void DeleteMacro(const string& name);
+  bool HasMacro(const string& name);
+  bool IsMacroDefined(const string& name);
+
+  void ClearBaseFile() { base_file_.clear(); }
+
+  void AddStringInput(const string& content, const string& pathname);
+
+  // Adds |content| of |path|, which exists in |directory|.
+  // |include_dir_index| is an index of a list of include dirs.
+  void AddFileInput(std::unique_ptr<Content> content, const FileId& fileid,
+                    const string& path, const string& directory,
+                    int include_dir_index);
+
+  // Returns true if the parser has already processed the |path|
+  // and the set of macros that the file depends on have not changed.
+  bool IsProcessedFile(const string& filepath, int include_dir_index) {
+    ++total_files_;
+    if (!IsProcessedFileInternal(filepath, include_dir_index))
+      return false;
+    ++skipped_files_;
+    return true;
+  }
+
+  int total_files() const { return total_files_; }
+  int skipped_files() const { return skipped_files_; }
+
+  int obj_cache_hit() const { return obj_cache_hit_; }
+  int func_cache_hit() const { return func_cache_hit_; }
+
+  // For debug.
+  string DumpMacros();
+  static string DebugString(const TokenList& tokens);
+  static string DebugString(TokenList::const_iterator begin,
+                            TokenList::const_iterator end);
+
+  void Error(StringPiece error);
+  void Error(StringPiece error, StringPiece arg);
+  string DebugStringPrefix();
+
+  typedef void (CppParser::*DirectiveHandler)();
+  static const DirectiveHandler kDirectiveTable[];
+  static const DirectiveHandler kFalseConditionDirectiveTable[];
+
+  // include_dir_index for the current directory, which is not specified by -I.
+  // This is mainly used for the source file, or header files included by
+  // #include "somewhere.h"
+  static const int kCurrentDirIncludeDirIndex = 0;
+  // include_dir_index will start from this value
+  // for include directories specified by -iquote, -I, -isystem etc.
+  // -iquote range [kIncludeDirIndexStarting, bracket_include_dir_index_).
+  // others [bracket_include_dir_index_, ...).
+  // in other words,
+  //  #include "..." search starts from kIncludeDirIndexStarting.
+  //    kCurrentDirIncludeDirIndex is special for current dir.
+  //    directories specified by option are from kIncludeDirIndexStarting.
+  //  #include <...> search starts from bracket_include_dir_index_.
+  static const int kIncludeDirIndexStarting = 1;
+ private:
+
+  // Manage files having #pragma once.
+  class PragmaOnceFileSet {
+   public:
+    void Insert(const std::string& file);
+    bool Has(const std::string& file) const;
+
+   private:
+    std::unordered_set<std::string> files_;
+  };
+
+  struct Condition {
+    explicit Condition(bool cond) : cond(cond), taken(cond) {}
+    bool cond;
+    bool taken;
+  };
+
+  // Helper class for macro expansion.
+  // In macro expansion we associate each token with corresponding 'hide_set';
+  // This helper class and its Iterator are intended to help us manage
+  // two distinct lists (of tokens and hide_set) always together.
+  class MacroExpandContext {
+   public:
+    class Iterator {
+     public:
+      Iterator(TokenList::iterator iter, MacroSetList::iterator hs_iter)
+          : iter_(iter), hs_iter_(hs_iter) {}
+
+      Iterator& operator++() {
+        ++iter_;
+        ++hs_iter_;
+        return *this;
+      }
+      Iterator& operator--() {
+        --iter_;
+        --hs_iter_;
+        return *this;
+      }
+      TokenList::iterator iter() const { return iter_; }
+      MacroSetList::iterator hs_iter() const { return hs_iter_; }
+      const Token& token() const { return (*iter_); }
+      const MacroSet& hide_set() const { return (*hs_iter_); }
+      bool operator==(const Iterator& rhs) const { return iter_ == rhs.iter_; }
+      bool operator!=(const Iterator& rhs) const { return iter_ != rhs.iter_; }
+
+     private:
+      TokenList::iterator iter_;
+      MacroSetList::iterator hs_iter_;
+    };
+
+    MacroExpandContext(TokenList* tokens, MacroSetList* hide_sets)
+        : tokens_(tokens), hs_(hide_sets) {}
+
+    void Insert(const Iterator& pos,
+                const Token& token,
+                const MacroSet& hide_set) {
+      tokens_->insert(pos.iter(), token);
+      hs_->insert(pos.hs_iter(), hide_set);
+    }
+
+    void Insert(const Iterator& pos,
+                TokenList::const_iterator begin,
+                TokenList::const_iterator end,
+                const MacroSet& hide_set) {
+      tokens_->insert(pos.iter(), begin, end);
+      hs_->insert(pos.hs_iter(), distance(begin, end), hide_set);
+    }
+
+    const TokenList& tokens() const { return *tokens_; }
+    const MacroSetList& hide_sets() const { return *hs_; }
+    Iterator Begin() const { return Iterator(tokens_->begin(), hs_->begin()); }
+    Iterator End() const { return Iterator(tokens_->end(), hs_->end()); }
+
+   private:
+    TokenList* tokens_;
+    MacroSetList* hs_;
+  };
+  typedef MacroExpandContext::Iterator MacroExpandIterator;
+  enum IncludeType {
+    kTypeInclude,
+    kTypeImport,  // include once.
+    kTypeIncludeNext,
+  };
+
+  class IntegerConstantEvaluator;
+
+  bool IsProcessedFileInternal(const string& filepath, int include_dir_index);
+
+  void ProcessInclude();
+  void ProcessImport();
+  void ProcessIncludeNext();
+  void ProcessDefine();
+  void ProcessUndef();
+  void ProcessConditionInFalse();
+  void ProcessIfdef();
+  void ProcessIfndef();
+  void ProcessIf();
+  void ProcessElse();
+  void ProcessEndif();
+  void ProcessElif();
+  void ProcessPragma();
+
+  void ProcessIncludeInternal(IncludeType include_type);
+
+  // Parser helpers.
+  void ReadObjectMacro(const string& name);
+  void ReadFunctionMacro(const string& name);
+  // Reads the identifier name to check #ifdef/#ifndef/defined(x)
+  // When the syntax is invalid, empty string will be returned.
+  string ReadDefined();
+  int ReadCondition();
+  // Same as ReadCondition except checking include guard form like
+  // #if !defined(FOO). When such condition is detected, |ident| is
+  // set to FOO.
+  int ReadConditionWithCheckingIncludeGuard(string* ident);
+
+  void TrimTokenSpace(Macro* macro);
+
+  bool FastGetMacroArgument(const ArrayTokenList& input_tokens,
+                            bool skip_space,
+                            ArrayTokenList::const_iterator* iter,
+                            ArrayTokenList* arg);
+  bool FastGetMacroArguments(const ArrayTokenList& input_tokens,
+                             bool skip_space,
+                             ArrayTokenList::const_iterator* iter,
+                             std::vector<ArrayTokenList>* args);
+  bool FastExpand(const ArrayTokenList& input_tokens, bool skip_space,
+                  std::set<int>* hideset, ArrayTokenList* output_tokens,
+                  bool* need_fallback);
+
+  bool Expand0Fastpath(const ArrayTokenList& input, bool skip_space,
+                       ArrayTokenList* output);
+
+  // Macro expansion routines.
+  // (skip_space parameter is passed around mainly for optimization;
+  // for integer expression evaluation in most cases we don't need to
+  // preserve spaces.)
+  void Expand0(const ArrayTokenList& input, ArrayTokenList* output,
+               bool skip_space);
+  void Expand(MacroExpandContext* input, MacroExpandIterator input_iter,
+              MacroExpandContext* output,
+              const MacroExpandIterator output_iter,
+              bool skip_space, bool use_hideset);
+  MacroExpandIterator Substitute(const ArrayTokenList& replacement,
+                                 size_t num_args,
+                                 const ArrayArgList& args,
+                                 const MacroSet& hide_set,
+                                 MacroExpandContext* output,
+                                 const MacroExpandIterator output_iter,
+                                 bool skip_space, bool use_hideset);
+  void Glue(TokenList::iterator left_pos, const Token& right);
+  Token Stringize(const TokenList& list);
+  bool SkipUntilBeginMacroArguments(const string& macro_name,
+                                    const MacroExpandContext& input,
+                                    MacroExpandIterator* iter);
+  bool GetMacroArguments(const std::string& macro_name, Macro* macro,
+                         ArrayArgList* args,
+                         const MacroExpandContext& input,
+                         MacroExpandIterator* iter,
+                         MacroSet* rparen_hs);
+
+  // Macro dictionary helpers.
+  // second element of returned value represents
+  // whether macro is taken from cache or not.
+  std::pair<Macro*, bool> AddMacro(const string& name, Macro::Type type,
+                                   const FileId& fileid, size_t macro_pos);
+  std::pair<Macro*, bool> AddMacroInternal(const string& name, Macro::Type type,
+                          const FileId& fileid, size_t macro_pos);
+  Macro* GetMacro(const string& name, bool add_undefined);
+
+  Input* input() const {
+    if (HasMoreInput()) {
+      return inputs_.back().get();
+    }
+    return last_input_.get();
+  }
+
+  bool HasMoreInput() const {
+    return !inputs_.empty();
+  }
+  void PopInput();
+
+  bool CurrentCondition() const {
+    return conditions_.empty() || conditions_.back().cond;
+  }
+
+  // Predefined macro callbacks.
+  Token GetFileName();
+  Token GetLineNumber();
+  Token GetDate();
+  Token GetTime();
+  Token GetCounter();
+  Token GetBaseFile();
+
+  Token ProcessHasInclude(const ArrayTokenList& tokens);
+  Token ProcessHasIncludeNext(const ArrayTokenList& tokens);
+  bool ProcessHasIncludeInternal(const ArrayTokenList& tokens,
+                                 bool is_include_next);
+
+  Token ProcessHasFeature(const ArrayTokenList& tokens) {
+    if (!compiler_info_) {
+      VLOG(1) << DebugStringPrefix() << " CompilerInfo is not set.";
+      return Token(0);
+    }
+    return ProcessHasCheckMacro("__has_feature", tokens,
+                                compiler_info_->has_feature());
+  }
+  Token ProcessHasExtension(const ArrayTokenList& tokens) {
+    if (!compiler_info_) {
+      VLOG(1) << DebugStringPrefix() << " CompilerInfo is not set.";
+      return Token(0);
+    }
+    return ProcessHasCheckMacro("__has_extension", tokens,
+                                compiler_info_->has_extension());
+  }
+  Token ProcessHasAttribute(const ArrayTokenList& tokens) {
+    if (!compiler_info_) {
+      VLOG(1) << DebugStringPrefix() << " CompilerInfo is not set.";
+      return Token(0);
+    }
+    return ProcessHasCheckMacro("__has_attribute", tokens,
+                                compiler_info_->has_attribute());
+  }
+  Token ProcessHasCppAttribute(const ArrayTokenList& tokens) {
+    if (!compiler_info_) {
+      VLOG(1) << DebugStringPrefix() << " CompilerInfo is not set.";
+      return Token(0);
+    }
+    return ProcessHasCheckMacro("__has_cpp_attribute", tokens,
+                                compiler_info_->has_cpp_attribute());
+  }
+  Token ProcessHasDeclspecAttribute(const ArrayTokenList& tokens) {
+    if (!compiler_info_) {
+      VLOG(1) << DebugStringPrefix() << " CompilerInfo is not set.";
+      return Token(0);
+    }
+    return ProcessHasCheckMacro("__has_declspec_attribute", tokens,
+                                compiler_info_->has_declspec_attribute());
+  }
+  Token ProcessHasBuiltin(const ArrayTokenList& tokens) {
+    if (!compiler_info_) {
+      VLOG(1) << DebugStringPrefix() << " CompilerInfo is not set.";
+      return Token(0);
+    }
+    return ProcessHasCheckMacro("__has_builtin", tokens,
+                                compiler_info_->has_builtin());
+  }
+
+  Token ProcessHasCheckMacro(
+      const string& name,
+      const ArrayTokenList& tokens,
+      const unordered_map<string, int>& has_check_macro);
+
+#ifdef _WIN32
+  static BOOL WINAPI InitializeWinOnce(PINIT_ONCE, PVOID, PVOID*);
+#endif
+  static void InitializeStaticOnce();
+
+  std::vector<std::unique_ptr<Input>> inputs_;
+  std::unique_ptr<Input> last_input_;
+
+  Token last_token_;
+  std::unique_ptr<MacroEnv> macros_;
+
+  std::vector<Condition> conditions_;
+  int condition_in_false_depth_;
+
+  PragmaOnceFileSet pragma_once_fileset_;
+
+  string current_date_;
+  string current_time_;
+  string base_file_;
+  int counter_;
+
+  unordered_map<string, bool> enabled_predefined_macros_;
+
+  bool is_cplusplus_;
+
+  int next_macro_id_;
+
+  int bracket_include_dir_index_;
+  IncludeObserver* include_observer_;
+  ErrorObserver* error_observer_;
+
+  // When include guard macro is detected, the token is preserved here.
+  unordered_map<string, string> include_guard_ident_;
+
+  const CompilerInfo* compiler_info_;
+  bool is_vc_;
+
+  // disabled_ becomes true if it detects unsupported features and is
+  // pretty sure it couldn't pass necessary files to IncludeObserver.
+  // b/9286087
+  bool disabled_;
+
+  // For statistics.
+  int skipped_files_;
+  int total_files_;
+
+  // list of pointers to Macro cached in |macros_| a include processing.
+  std::vector<Macro*> used_macros_;
+
+  int obj_cache_hit_;
+  int func_cache_hit_;
+
+  PlatformThreadId owner_thread_id_;
+
+  typedef unordered_map<string, Macro::CallbackObj> PredefinedObjMacroMap;
+  typedef unordered_map<string, Macro::CallbackFunc> PredefinedFuncMacroMap;
+
+  static PredefinedObjMacroMap* predefined_macros_;
+  static PredefinedFuncMacroMap* predefined_func_macros_;
+  static bool global_initialized_;
+#ifndef _WIN32
+  static pthread_once_t key_once_;
+#else
+  static INIT_ONCE key_once_;
+#endif
+
+  friend class CppParserTest;
+  DISALLOW_COPY_AND_ASSIGN(CppParser);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CPP_PARSER_H_
diff --git a/client/cpp_parser_unittest.cc b/client/cpp_parser_unittest.cc
new file mode 100644
index 0000000..be9d0bb
--- /dev/null
+++ b/client/cpp_parser_unittest.cc
@@ -0,0 +1,1436 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <list>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <glog/stl_logging.h>
+#include <gtest/gtest.h>
+
+#include "compiler_specific.h"
+#include "cpp_parser.h"
+#include "timestamp.h"
+#include "unittest_util.h"
+
+namespace devtools_goma {
+
+using Token = CppToken;
+using ArrayTokenList = std::vector<Token>;
+using TokenList = std::list<Token>;
+
+class CppParserTest : public testing::Test {
+ protected:
+
+  void SetUp() override {
+    tmpdir_.reset(new TmpdirUtil("cpp_parser_test"));
+  }
+
+  void TearDown() override {
+    tmpdir_.reset();
+  }
+
+  ArrayTokenList GetAllTokens(CppParser* parser) const {
+    ArrayTokenList tokens;
+    for (;;) {
+      Token token = parser->NextToken(false);
+      if (token.type == Token::END) {
+        break;
+      }
+      tokens.push_back(std::move(token));
+    }
+    return tokens;
+  }
+
+  void CheckExpand(const std::string& defines, const std::string& expand,
+                   const ArrayTokenList& expected) {
+    CppParser cpp_parser;
+    cpp_parser.AddStringInput(defines, "(string)");
+    EXPECT_TRUE(cpp_parser.ProcessDirectives());
+
+    cpp_parser.AddStringInput(expand, "(string)");
+
+    auto tokens = GetAllTokens(&cpp_parser);
+
+    ArrayTokenList expanded;
+    cpp_parser.Expand0(tokens, &expanded, true);
+
+    EXPECT_EQ(expanded.size(), expected.size());
+    EXPECT_EQ(expanded, expected)
+        << "defines: " << defines << '\n'
+        << "expand: " << expand << '\n'
+        << "expanded: " << CppParser::DebugString(
+            TokenList(expanded.begin(), expanded.end()));
+
+    expanded.clear();
+    EXPECT_TRUE(cpp_parser.Expand0Fastpath(tokens, true, &expanded));
+    EXPECT_EQ(expanded.size(), expected.size());
+    EXPECT_EQ(expanded, expected)
+        << "defines: " << defines << '\n'
+        << "expand: " << expand << '\n'
+        << "expanded: " << CppParser::DebugString(
+            TokenList(expanded.begin(), expanded.end()));
+  }
+
+  std::unique_ptr<TmpdirUtil> tmpdir_;
+};
+
+class CppIncludeObserver : public CppParser::IncludeObserver {
+ public:
+  explicit CppIncludeObserver(CppParser* parser)
+      : parser_(parser) {
+  }
+  ~CppIncludeObserver() override {}
+  bool HandleInclude(
+      const string& path,
+      const string& current_directory ALLOW_UNUSED,
+      const string& current_filepath ALLOW_UNUSED,
+      char quote_char ALLOW_UNUSED,
+      int include_dir_index ALLOW_UNUSED) override {
+    if (parser_->IsProcessedFile(path, include_dir_index)) {
+      ++skipped_[path];
+      return true;
+    }
+    std::map<string, string>::const_iterator p = includes_.find(path);
+    if (p == includes_.end()) {
+      return false;
+    }
+
+    ++included_[path];
+    parser_->AddStringInput(p->second, p->first);
+    return true;
+  }
+
+  bool HasInclude(
+      const string& path,
+      const string& current_directory ALLOW_UNUSED,
+      const string& current_filepath ALLOW_UNUSED,
+      char quote_char ALLOW_UNUSED,
+      int include_dir_index ALLOW_UNUSED) override {
+    return includes_.find(path) != includes_.end();
+  }
+
+  void SetInclude(const string& filepath, const string& content) {
+    includes_.insert(make_pair(filepath, content));
+  }
+
+  int SkipCount(const string& filepath) const {
+    const auto& it = skipped_.find(filepath);
+    if (it == skipped_.end())
+      return 0;
+    return it->second;
+  }
+
+  int IncludedCount(const string& filepath) const {
+    const auto& it = included_.find(filepath);
+    if (it == included_.end())
+      return 0;
+    return it->second;
+  }
+
+ private:
+  CppParser* parser_;
+  std::map<string, string> includes_;
+  std::map<string, int> skipped_;
+  std::map<string, int> included_;
+  DISALLOW_COPY_AND_ASSIGN(CppIncludeObserver);
+};
+
+class CppErrorObserver : public CppParser::ErrorObserver {
+ public:
+  CppErrorObserver() {}
+  ~CppErrorObserver() override {}
+  void HandleError(const string& error) override {
+    errors_.push_back(error);
+  }
+  const std::vector<string>& errors() const {
+    return errors_;
+  }
+
+ private:
+  std::vector<string> errors_;
+  DISALLOW_COPY_AND_ASSIGN(CppErrorObserver);
+};
+
+TEST_F(CppParserTest, MacroSet) {
+  MacroSet a, b, c;
+  EXPECT_TRUE(a.empty());
+  a.Set(4);
+  a.Set(10);
+  b.Set(80);
+  EXPECT_FALSE(a.empty());
+  EXPECT_FALSE(b.empty());
+  EXPECT_TRUE(a.Get(4));
+  EXPECT_FALSE(a.Get(80));
+  EXPECT_FALSE(b.Get(4));
+  EXPECT_TRUE(b.Get(80));
+  a.Union(b);
+  EXPECT_FALSE(a.Get(0));
+  EXPECT_TRUE(a.Get(4));
+  EXPECT_TRUE(a.Get(80));
+}
+
+TEST_F(CppParserTest, TokenizeDefineString) {
+  CppParser cpp_parser;
+  cpp_parser.AddStringInput("#define KOTORI \"piyo\\\"piyo\"", "(string)");
+
+  CppParser::Token t = cpp_parser.NextToken(true);
+  EXPECT_EQ(t.type, CppParser::Token::SHARP);
+
+  t = cpp_parser.NextToken(true);
+  EXPECT_EQ(t.type, CppParser::Token::IDENTIFIER);
+  EXPECT_EQ(t.string_value, "define");
+
+  t = cpp_parser.NextToken(true);
+  EXPECT_EQ(t.type, CppParser::Token::IDENTIFIER);
+  EXPECT_EQ(t.string_value, "KOTORI");
+
+  t = cpp_parser.NextToken(true);
+  EXPECT_EQ(t.type, CppParser::Token::STRING);
+  EXPECT_EQ(t.string_value, "piyo\\\"piyo");
+
+  t = cpp_parser.NextToken(true);
+  EXPECT_EQ(t.type, CppParser::Token::END);
+}
+
+TEST_F(CppParserTest, DontCrashWithEmptyInclude) {
+  CppParser cpp_parser;
+  cpp_parser.AddStringInput("#include\n", "(string)");
+  CppErrorObserver err_observer;
+  cpp_parser.set_error_observer(&err_observer);
+  cpp_parser.ProcessDirectives();
+  ASSERT_EQ(1U, err_observer.errors().size());
+  EXPECT_EQ("CppParser((string):2) "
+            "#include expects \"filename\" or <filename>",
+            err_observer.errors()[0]);
+}
+
+TEST_F(CppParserTest, DontCrashWithEmptyHasInclude) {
+  CppParser cpp_parser;
+  cpp_parser.EnablePredefinedMacro("__has_include");
+  cpp_parser.AddStringInput("#if __has_include()\n#endif\n"
+                            "#if __has_include(\n#endif\n"
+                            "#if __has_include",
+                            "(string)");
+  CppErrorObserver err_observer;
+  cpp_parser.set_error_observer(&err_observer);
+  cpp_parser.ProcessDirectives();
+  ASSERT_EQ(3U, err_observer.errors().size());
+  EXPECT_EQ("CppParser((string):2) "
+            "__has_include expects \"filename\" or <filename>",
+            err_observer.errors()[0]);
+  EXPECT_EQ("CppParser((string):4) "
+            "__has_include expects \"filename\" or <filename>",
+            err_observer.errors()[1]);
+  EXPECT_EQ("CppParser((string):5) "
+            "macro is referred without any arguments:__has_include",
+            err_observer.errors()[2]);
+}
+
+TEST_F(CppParserTest, HasFeatureResultValue) {
+  std::unique_ptr<CompilerInfoData> info_data(new CompilerInfoData);
+  info_data->add_supported_predefined_macros("__has_feature");
+  info_data->add_supported_predefined_macros("__has_extension");
+  info_data->add_supported_predefined_macros("__has_attribute");
+  info_data->add_supported_predefined_macros("__has_cpp_attribute");
+  info_data->add_supported_predefined_macros("__has_declspec_attribute");
+  info_data->add_supported_predefined_macros("__has_builtin");
+  CompilerInfoData::MacroValue* m;
+  m = info_data->add_has_feature();
+  m->set_key("feature");
+  m->set_value(2);
+  m = info_data->add_has_extension();
+  m->set_key("extension");
+  m->set_value(3);
+  m = info_data->add_has_attribute();
+  m->set_key("attribute");
+  m->set_value(4);
+  m = info_data->add_has_cpp_attribute();
+  m->set_key("cpp_attribute");
+  m->set_value(5);
+  m = info_data->add_has_declspec_attribute();
+  m->set_key("declspec_attribute");
+  m->set_value(6);
+  m = info_data->add_has_builtin();
+  m->set_key("builtin");
+  m->set_value(7);
+
+  CompilerInfo info(std::move(info_data));
+
+  CppParser cpp_parser;
+  cpp_parser.SetCompilerInfo(&info);
+
+  cpp_parser.AddStringInput(
+    "#if __has_feature(feature) == 2\n"
+    "# define FEATURE_FEATURE_OK\n"
+    "#endif\n"
+    "#if __has_feature( feature ) == 2\n"
+    "# define FEATURE_FEATURE_SPACE_OK\n"
+    "#endif\n"
+    "#if __has_feature(extension) == 0\n"
+    "# define FEATURE_EXTENSION_OK\n"
+    "#endif\n"
+    "#if __has_feature(attribute) == 0\n"
+    "# define FEATURE_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_feature(cpp_attribute) == 0\n"
+    "# define FEATURE_CPP_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_feature(declspec_attribute) == 0\n"
+    "# define FEATURE_DECLSPEC_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_feature(builtin) == 0\n"
+    "# define FEATURE_BUILTIN_OK\n"
+    "#endif\n"
+    "#if __has_extension(feature) == 0\n"
+    "# define EXTENSION_FEATURE_OK\n"
+    "#endif\n"
+    "#if __has_extension(extension) == 3\n"
+    "# define EXTENSION_EXTENSION_OK\n"
+    "#endif\n"
+    "#if __has_extension( extension ) == 3\n"
+    "# define EXTENSION_EXTENSION_SPACE_OK\n"
+    "#endif\n"
+    "#if __has_extension(attribute) == 0\n"
+    "# define EXTENSION_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_extension(cpp_attribute) == 0\n"
+    "# define EXTENSION_CPP_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_extension(declspec_attribute) == 0\n"
+    "# define EXTENSION_DECLSPEC_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_extension(builtin) == 0\n"
+    "# define EXTENSION_BUILTIN_OK\n"
+    "#endif\n"
+    "#if __has_attribute(feature) == 0\n"
+    "# define ATTRIBUTE_FEATURE_OK\n"
+    "#endif\n"
+    "#if __has_attribute(extension) == 0\n"
+    "# define ATTRIBUTE_EXTENSION_OK\n"
+    "#endif\n"
+    "#if __has_attribute(attribute) == 4\n"
+    "# define ATTRIBUTE_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_attribute( attribute ) == 4\n"
+    "# define ATTRIBUTE_ATTRIBUTE_SPACE_OK\n"
+    "#endif\n"
+    "#if __has_attribute(cpp_attribute) == 0\n"
+    "# define ATTRIBUTE_CPP_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_attribute(declspec_attribute) == 0\n"
+    "# define ATTRIBUTE_DECLSPEC_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_attribute(builtin) == 0\n"
+    "# define ATTRIBUTE_BUILTIN_OK\n"
+    "#endif\n"
+    "#if __has_cpp_attribute(feature) == 0\n"
+    "# define CPP_ATTRIBUTE_FEATURE_OK\n"
+    "#endif\n"
+    "#if __has_cpp_attribute(extension) == 0\n"
+    "# define CPP_ATTRIBUTE_EXTENSION_OK\n"
+    "#endif\n"
+    "#if __has_cpp_attribute(attribute) == 0\n"
+    "# define CPP_ATTRIBUTE_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_cpp_attribute(cpp_attribute) == 5\n"
+    "# define CPP_ATTRIBUTE_CPP_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_cpp_attribute( cpp_attribute ) == 5\n"
+    "# define CPP_ATTRIBUTE_CPP_ATTRIBUTE_SPACE_OK\n"
+    "#endif\n"
+    "#if __has_cpp_attribute(declspec_attribute) == 0\n"
+    "# define CPP_ATTRIBUTE_DECLSPEC_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_cpp_attribute(builtin) == 0\n"
+    "# define CPP_ATTRIBUTE_BUILTIN_OK\n"
+    "#endif\n"
+    "#if __has_declspec_attribute(feature) == 0\n"
+    "# define DECLSPEC_ATTRIBUTE_FEATURE_OK\n"
+    "#endif\n"
+    "#if __has_declspec_attribute(extension) == 0\n"
+    "# define DECLSPEC_ATTRIBUTE_EXTENSION_OK\n"
+    "#endif\n"
+    "#if __has_declspec_attribute(attribute) == 0\n"
+    "# define DECLSPEC_ATTRIBUTE_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_declspec_attribute(cpp_attribute) == 0\n"
+    "# define DECLSPEC_ATTRIBUTE_CPP_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_declspec_attribute(declspec_attribute) == 6\n"
+    "# define DECLSPEC_ATTRIBUTE_DECLSPEC_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_declspec_attribute( declspec_attribute ) == 6\n"
+    "# define DECLSPEC_ATTRIBUTE_DECLSPEC_ATTRIBUTE_SPACE_OK\n"
+    "#endif\n"
+    "#if __has_declspec_attribute(builtin) == 0\n"
+    "# define DECLSPEC_ATTRIBUTE_BUILTIN_OK\n"
+    "#endif\n"
+    "#if __has_builtin(feature) == 0\n"
+    "# define BUILTIN_FEATURE_OK\n"
+    "#endif\n"
+    "#if __has_builtin(extension) == 0\n"
+    "# define BUILTIN_EXTENSION_OK\n"
+    "#endif\n"
+    "#if __has_builtin(attribute) == 0\n"
+    "# define BUILTIN_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_builtin(cpp_attribute) == 0\n"
+    "# define BUILTIN_CPP_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_builtin(declspec_attribute) == 0\n"
+    "# define BUILTIN_DECLSPEC_ATTRIBUTE_OK\n"
+    "#endif\n"
+    "#if __has_builtin(builtin) == 7\n"
+    "# define BUILTIN_BUILTIN_OK\n"
+    "#endif\n"
+    "#if __has_builtin( builtin ) == 7\n"
+    "# define BUILTIN_BUILTIN_SPACE_OK\n"
+    "#endif\n", "(string)");
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("FEATURE_FEATURE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("FEATURE_FEATURE_SPACE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("FEATURE_EXTENSION_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("FEATURE_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("FEATURE_CPP_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("FEATURE_DECLSPEC_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("FEATURE_BUILTIN_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("EXTENSION_FEATURE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("EXTENSION_EXTENSION_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("EXTENSION_EXTENSION_SPACE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("EXTENSION_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("EXTENSION_CPP_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("EXTENSION_DECLSPEC_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("EXTENSION_BUILTIN_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("ATTRIBUTE_FEATURE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("ATTRIBUTE_EXTENSION_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("ATTRIBUTE_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("ATTRIBUTE_ATTRIBUTE_SPACE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("ATTRIBUTE_CPP_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("ATTRIBUTE_DECLSPEC_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("ATTRIBUTE_BUILTIN_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("CPP_ATTRIBUTE_FEATURE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("CPP_ATTRIBUTE_EXTENSION_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("CPP_ATTRIBUTE_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("CPP_ATTRIBUTE_CPP_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined(
+      "CPP_ATTRIBUTE_CPP_ATTRIBUTE_SPACE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("CPP_ATTRIBUTE_DECLSPEC_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("CPP_ATTRIBUTE_BUILTIN_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("DECLSPEC_ATTRIBUTE_FEATURE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("DECLSPEC_ATTRIBUTE_EXTENSION_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("DECLSPEC_ATTRIBUTE_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("DECLSPEC_ATTRIBUTE_CPP_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined(
+                  "DECLSPEC_ATTRIBUTE_DECLSPEC_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined(
+                  "DECLSPEC_ATTRIBUTE_DECLSPEC_ATTRIBUTE_SPACE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("DECLSPEC_ATTRIBUTE_BUILTIN_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("BUILTIN_FEATURE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("BUILTIN_EXTENSION_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("BUILTIN_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("BUILTIN_CPP_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("BUILTIN_DECLSPEC_ATTRIBUTE_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("BUILTIN_BUILTIN_OK"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("BUILTIN_BUILTIN_SPACE_OK"));
+}
+
+TEST_F(CppParserTest, PredefinedNoCache) {
+  InitMacroEnvCache();
+
+  for (int i = 0; i < 2; ++i) {
+    std::unique_ptr<CompilerInfoData> info_data(new CompilerInfoData);
+    info_data->add_supported_predefined_macros("__has_builtin");
+    CompilerInfo info(std::move(info_data));
+
+    CppParser cpp_parser;
+    cpp_parser.SetCompilerInfo(&info);
+
+    CppIncludeObserver include_observer(&cpp_parser);
+    include_observer.SetInclude("a.h", "");
+    cpp_parser.set_include_observer(&include_observer);
+
+    cpp_parser.AddStringInput("#ifdef __has_builtin\n"
+                              "# include \"a.h\"\n"
+                              "#endif\n", "");
+    cpp_parser.ProcessDirectives();
+
+    EXPECT_EQ(1, include_observer.IncludedCount("a.h"));
+
+    EXPECT_EQ(0, cpp_parser.obj_cache_hit());
+    EXPECT_EQ(0, cpp_parser.func_cache_hit());
+  }
+
+  QuitMacroEnvCache();
+}
+
+TEST_F(CppParserTest, ClangExtendedCheckMacro) {
+  InitMacroEnvCache();
+
+  std::unique_ptr<CompilerInfoData> info_data(new CompilerInfoData);
+  info_data->add_supported_predefined_macros("__has_cpp_attribute");
+
+  CompilerInfoData::MacroValue* m;
+  m = info_data->add_has_cpp_attribute();
+  m->set_key("clang::fallthrough");
+  m->set_value(1);
+
+  CompilerInfo info(std::move(info_data));
+
+  CppParser cpp_parser;
+  cpp_parser.SetCompilerInfo(&info);
+
+  // clang::fallthrough must be allowed.
+  cpp_parser.AddStringInput("#if __has_cpp_attribute(clang::fallthrough)\n"
+                            "# define FOO\n"
+                            "#endif\n"
+                            "#if __has_cpp_attribute(clang@@fallthrough)\n"
+                            "# define BAR\n"
+                            "#endif\n"
+                            "#if __has_cpp_attribute(clang::fallthrough)\n"
+                            "# define BAZ\n"
+                            "#endif\n",
+                            "(string)");
+
+  CppErrorObserver err_observer;
+  cpp_parser.set_error_observer(&err_observer);
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("FOO"));
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("BAR"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("BAZ"));
+
+  // TODO: I feel this is a change detection test...
+  ASSERT_EQ(1U, err_observer.errors().size()) << err_observer.errors();
+  EXPECT_EQ("CppParser((string):5) "
+            "__has_cpp_attribute expects an identifier",
+            err_observer.errors()[0]);
+
+  QuitMacroEnvCache();
+}
+
+TEST_F(CppParserTest, DontCrashWithEmptyTokenInCheckMacro) {
+  std::unique_ptr<CompilerInfoData> info_data(new CompilerInfoData);
+  info_data->add_supported_predefined_macros("__has_feature");
+  info_data->add_supported_predefined_macros("__has_extension");
+  info_data->add_supported_predefined_macros("__has_attribute");
+  info_data->add_supported_predefined_macros("__has_cpp_attribute");
+  info_data->add_supported_predefined_macros("__has_declspec_attribute");
+  info_data->add_supported_predefined_macros("__has_builtin");
+  CompilerInfoData::MacroValue* m;
+  m = info_data->add_has_feature();
+  m->set_key("foo");
+  m->set_value(1);
+  m = info_data->add_has_extension();
+  m->set_key("foo");
+  m->set_value(1);
+  m = info_data->add_has_attribute();
+  m->set_key("foo");
+  m->set_value(1);
+  m = info_data->add_has_cpp_attribute();
+  m->set_key("foo");
+  m->set_value(1);
+  m = info_data->add_has_declspec_attribute();
+  m->set_key("foo");
+  m->set_value(1);
+  m = info_data->add_has_builtin();
+  m->set_key("foo");
+  m->set_value(1);
+
+  CompilerInfo info(std::move(info_data));
+
+  CppParser cpp_parser;
+  cpp_parser.SetCompilerInfo(&info);
+
+  cpp_parser.AddStringInput("#if __has_feature()\n#endif\n"
+                            "#if __has_feature(\n#endif\n"
+                            "#if __has_feature\n#endif\n"
+                            "#if __has_extension()\n#endif\n"
+                            "#if __has_extension(\n#endif\n"
+                            "#if __has_extension\n#endif\n"
+                            "#if __has_attribute()\n#endif\n"
+                            "#if __has_attribute(\n#endif\n"
+                            "#if __has_attribute\n#endif\n"
+                            "#if __has_cpp_attribute()\n#endif\n"
+                            "#if __has_cpp_attribute(\n#endif\n"
+                            "#if __has_cpp_attribute\n#endif\n"
+                            "#if __has_declspec_attribute()\n#endif\n"
+                            "#if __has_declspec_attribute(\n#endif\n"
+                            "#if __has_declspec_attribute\n#endif\n"
+                            "#if __has_builtin()\n#endif\n"
+                            "#if __has_builtin(\n#endif\n"
+                            "#if __has_builtin\n#endif\n",
+                            "(string)");
+  CppErrorObserver err_observer;
+  cpp_parser.set_error_observer(&err_observer);
+  cpp_parser.ProcessDirectives();
+  ASSERT_EQ(18U, err_observer.errors().size()) << err_observer.errors();
+  EXPECT_EQ("CppParser((string):2) "
+            "__has_feature expects an identifier",
+            err_observer.errors()[0]);
+  EXPECT_EQ("CppParser((string):4) "
+            "__has_feature expects an identifier",
+            err_observer.errors()[1]);
+  EXPECT_EQ("CppParser((string):6) "
+            "macro is referred without any arguments:__has_feature",
+            err_observer.errors()[2]);
+  EXPECT_EQ("CppParser((string):8) "
+            "__has_extension expects an identifier",
+            err_observer.errors()[3]);
+  EXPECT_EQ("CppParser((string):10) "
+            "__has_extension expects an identifier",
+            err_observer.errors()[4]);
+  EXPECT_EQ("CppParser((string):12) "
+            "macro is referred without any arguments:__has_extension",
+            err_observer.errors()[5]);
+  EXPECT_EQ("CppParser((string):14) "
+            "__has_attribute expects an identifier",
+            err_observer.errors()[6]);
+  EXPECT_EQ("CppParser((string):16) "
+            "__has_attribute expects an identifier",
+            err_observer.errors()[7]);
+  EXPECT_EQ("CppParser((string):18) "
+            "macro is referred without any arguments:__has_attribute",
+            err_observer.errors()[8]);
+  EXPECT_EQ("CppParser((string):20) "
+            "__has_cpp_attribute expects an identifier",
+            err_observer.errors()[9]);
+  EXPECT_EQ("CppParser((string):22) "
+            "__has_cpp_attribute expects an identifier",
+            err_observer.errors()[10]);
+  EXPECT_EQ("CppParser((string):24) "
+            "macro is referred without any arguments:__has_cpp_attribute",
+            err_observer.errors()[11]);
+  EXPECT_EQ("CppParser((string):26) "
+            "__has_declspec_attribute expects an identifier",
+            err_observer.errors()[12]);
+  EXPECT_EQ("CppParser((string):28) "
+            "__has_declspec_attribute expects an identifier",
+            err_observer.errors()[13]);
+  EXPECT_EQ("CppParser((string):30) "
+            "macro is referred without any arguments:__has_declspec_attribute",
+            err_observer.errors()[14]);
+  EXPECT_EQ("CppParser((string):32) "
+            "__has_builtin expects an identifier",
+            err_observer.errors()[15]);
+  EXPECT_EQ("CppParser((string):34) "
+            "__has_builtin expects an identifier",
+            err_observer.errors()[16]);
+  EXPECT_EQ("CppParser((string):36) "
+            "macro is referred without any arguments:__has_builtin",
+            err_observer.errors()[17]);
+}
+
+TEST_F(CppParserTest, ExpandMacro) {
+  CppParser cpp_parser;
+  cpp_parser.AddStringInput("#define M() 1\n"
+                            "#if M()\n"
+                            "#endif\n"
+                            "#if M(x)\n"
+                            "#endif\n"
+                            "#define M1(x) x\n"
+                            "#if M1()\n"
+                            "#endif\n"
+                            "#if M1(1)\n"
+                            "#endif\n"
+                            "#define M2(x,y) x+y\n"
+                            "#if M2(1,1)\n"
+                            "#endif\n"
+                            "#if M2(,1)\n"
+                            "#endif\n"
+                            "#if M2(1,)\n"
+                            "#endif\n"
+                            "#if M2()\n"
+                            "#endif\n"
+                            "#if M2(1)\n"
+                            "#endif\n"
+                            "#if M2(1,,1)\n"
+                            "#endif\n",
+                            "(string)");
+  CppErrorObserver err_observer;
+  cpp_parser.set_error_observer(&err_observer);
+  cpp_parser.ProcessDirectives();
+  ASSERT_EQ(4U, err_observer.errors().size());
+  // TODO: line number is #endif line that just after #if that
+  // error happened?
+  EXPECT_EQ("CppParser((string):5) "  // M(x)
+            "macro argument number mismatching with the parameter list",
+            err_observer.errors()[0]);
+  EXPECT_EQ("CppParser((string):19) "  // M2()
+            "macro argument number mismatching with the parameter list",
+            err_observer.errors()[1]);
+  EXPECT_EQ("CppParser((string):21) "  // M2(1)
+            "macro argument number mismatching with the parameter list",
+            err_observer.errors()[2]);
+  EXPECT_EQ("CppParser((string):23) "  // M2(1,,1)
+            "macro argument number mismatching with the parameter list",
+            err_observer.errors()[3]);
+
+}
+
+TEST_F(CppParserTest, IncludeMoreThanOnce) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+  include_observer.SetInclude("foo.h",
+                              "#ifdef hoge\n"
+                              "#endif\n");
+  cpp_parser.set_include_observer(&include_observer);
+  cpp_parser.AddStringInput("#define hoge\n"
+                            "#include <foo.h>\n"
+                            "#undef hoge\n"
+                            "#include <foo.h>\n",
+                            "foo.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_EQ(2, cpp_parser.total_files());
+  EXPECT_EQ(0, cpp_parser.skipped_files());
+}
+
+TEST_F(CppParserTest, ImportOnlyOnce) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+  include_observer.SetInclude("foo.h",
+                              "#ifdef hoge\n"
+                              "#endif\n");
+  cpp_parser.set_include_observer(&include_observer);
+  cpp_parser.AddStringInput("#define hoge\n"
+                            "#import <foo.h>\n"
+                            "#undef hoge\n"
+                            "#import <foo.h>\n",
+                            "foo.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_EQ(2, cpp_parser.total_files());
+  EXPECT_EQ(1, cpp_parser.skipped_files());
+}
+
+TEST_F(CppParserTest, BoolShouldBeTreatedAsBoolOnCplusplus) {
+  CppParser cpp_parser;
+  cpp_parser.set_is_cplusplus(true);
+  cpp_parser.AddStringInput("#if true\n"
+                            "#define foo\n"
+                            "#endif\n"
+                            "#if false\n"
+                            "#define bar\n"
+                            "#endif\n",
+                            "baz.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("foo"));
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("bar"));
+}
+
+TEST_F(CppParserTest, BoolShouldNotBeTreatedAsBoolOnNonCplusplus) {
+  CppParser cpp_parser;
+  cpp_parser.AddStringInput("#if true\n"
+                            "#define foo\n"
+                            "#endif\n"
+                            "#if false\n"
+                            "#define bar\n"
+                            "#endif\n",
+                            "baz.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("foo"));
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("bar"));
+}
+
+TEST_F(CppParserTest, BoolShouldNotBeTreatedAsDefined) {
+  CppParser cpp_parser;
+  cpp_parser.set_is_cplusplus(true);
+  cpp_parser.AddStringInput("#if true\n"
+                            "#define foo\n"
+                            "#endif\n"
+                            "#if defined(true)\n"
+                            "#define bar\n"
+                            "#endif\n"
+                            "#if false\n"
+                            "#define baz\n"
+                            "#endif\n"
+                            "#if defined(false)\n"
+                            "#define qux\n"
+                            "#endif\n",
+                            "baz.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("foo"));
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("bar"));
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("baz"));
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("qux"));
+}
+
+TEST_F(CppParserTest, BoolShouldBeOverriddenByMacroInTrueToTrueCase) {
+  CppParser cpp_parser;
+  cpp_parser.set_is_cplusplus(true);
+  cpp_parser.AddStringInput("#define true true\n"
+                            "#if true\n"
+                            "#define foo\n"
+                            "#endif\n"
+                            "#if defined(true)\n"
+                            "#define bar\n"
+                            "#endif\n",
+                            "baz.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("foo"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("bar"));
+}
+
+TEST_F(CppParserTest, BoolShouldBeOverriddenByMacroInTrueToFalseCase) {
+  CppParser cpp_parser;
+  cpp_parser.set_is_cplusplus(true);
+  cpp_parser.AddStringInput("#define true false\n"
+                            "#if true\n"
+                            "#define foo\n"
+                            "#endif\n"
+                            "#if defined(true)\n"
+                            "#define bar\n"
+                            "#endif\n",
+                            "baz.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("foo"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("bar"));
+}
+
+TEST_F(CppParserTest, BoolShouldBeOverriddenByMacroInFalseToTrueCase) {
+  CppParser cpp_parser;
+  cpp_parser.set_is_cplusplus(true);
+  cpp_parser.AddStringInput("#define false true\n"
+                            "#if false\n"
+                            "#define foo\n"
+                            "#endif\n"
+                            "#if defined(false)\n"
+                            "#define bar\n"
+                            "#endif\n",
+                            "baz.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("foo"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("bar"));
+}
+
+TEST_F(CppParserTest, BoolShouldBeOverriddenByMacroInFalseToFalseCase) {
+  CppParser cpp_parser;
+  cpp_parser.set_is_cplusplus(true);
+  cpp_parser.AddStringInput("#define false false\n"
+                            "#if false\n"
+                            "#define foo\n"
+                            "#endif\n"
+                            "#if defined(false)\n"
+                            "#define bar\n"
+                            "#endif\n",
+                            "baz.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("foo"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("bar"));
+}
+
+TEST_F(CppParserTest, BoolShouldBeOverriddenAndPossibleToUndefOnTrueCase) {
+  CppParser cpp_parser;
+  cpp_parser.set_is_cplusplus(true);
+  cpp_parser.AddStringInput("#define true false\n"
+                            "#if true\n"
+                            "#define foo\n"
+                            "#endif\n"
+                            "#undef true\n"
+                            "#if true\n"
+                            "#define bar\n"
+                            "#endif\n",
+                            "baz.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("foo"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("bar"));
+}
+
+TEST_F(CppParserTest, BoolShouldBeOverriddenAndPossibleToUndefOnFalseCase) {
+  CppParser cpp_parser;
+  cpp_parser.set_is_cplusplus(true);
+  cpp_parser.AddStringInput("#define false true\n"
+                            "#if false\n"
+                            "#define foo\n"
+                            "#endif\n"
+                            "#undef false\n"
+                            "#if false\n"
+                            "#define bar\n"
+                            "#endif\n",
+                            "baz.cc");
+  cpp_parser.ProcessDirectives();
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("foo"));
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("bar"));
+}
+
+TEST_F(CppParserTest, MacroSetChanged) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+  include_observer.SetInclude("a.h",
+                              "#ifndef A_H\n"
+                              "#define A_H\n"
+                              "#endif\n"
+                              "#undef X\n");
+
+  include_observer.SetInclude("b.h",
+                              "#ifndef B_H\n"
+                              "#define B_H\n"
+                              "#define X 1\n"
+                              "#include \"a.h\"\n"
+                              "#define Y 1\n"
+                              "#endif\n");
+
+  cpp_parser.set_include_observer(&include_observer);
+  cpp_parser.AddStringInput("#include \"a.h\"\n"
+                            "#include \"b.h\"\n",
+                            "a.cc");
+  cpp_parser.ProcessDirectives();
+
+  // After #include "a.h" in b.h, X must be undefined.
+  // Including a.h should not be skipped.
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("X"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("Y"));
+}
+
+TEST_F(CppParserTest, TopFileMacroDefinitionUpdate) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+  include_observer.SetInclude("a.h",
+                              "#ifdef INCLUDE_B\n"
+                              "#include \"b.h\"\n"
+                              "#endif\n");
+  include_observer.SetInclude("b.h",
+                              "#define B\n");
+  include_observer.SetInclude("c.h",
+                              "#include \"a.h\"\n");
+
+  cpp_parser.set_include_observer(&include_observer);
+  cpp_parser.AddStringInput("#include \"a.h\"\n"
+                            "#define INCLUDE_B\n"
+                            "#include \"c.h\"\n",
+                            "a.cc");
+  cpp_parser.ProcessDirectives();
+
+  // After #define INCLUDE_B in a.cc, the result of
+  // #ifdef INCLUDE_B in a.h should be changed.
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("B"));
+}
+
+TEST_F(CppParserTest, SkippedByIncludeGuard) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+
+  include_observer.SetInclude("a.h",
+                              "#ifndef A_H\n"
+                              "#define A_H\n"
+                              "#endif");
+  include_observer.SetInclude("b.h",
+                              "#ifndef B_H\n"
+                              "#define B_H\n"
+                              "#include \"a.h\"\n"
+                              "#endif");
+  include_observer.SetInclude("c.h",
+                              "#ifndef C_H\n"
+                              "#define C_H\n"
+                              "#include \"b.h\"\n"
+                              "#endif");
+
+  cpp_parser.set_include_observer(&include_observer);
+
+  cpp_parser.AddStringInput(
+    "#include \"c.h\"\n"
+    "#include \"b.h\"\n"
+    "#include \"a.h\"\n", "(string)");
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_EQ(1, include_observer.IncludedCount("a.h"));
+  EXPECT_EQ(1, include_observer.IncludedCount("b.h"));
+  EXPECT_EQ(1, include_observer.IncludedCount("c.h"));
+
+  EXPECT_EQ(1, include_observer.SkipCount("a.h"));
+  EXPECT_EQ(1, include_observer.SkipCount("b.h"));
+  EXPECT_EQ(0, include_observer.SkipCount("c.h"));
+}
+
+TEST_F(CppParserTest, SkippedByIncludeGuardIfDefinedCase) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+
+  include_observer.SetInclude("a.h",
+                              "#if !defined(A_H)\n"
+                              "#define A_H\n"
+                              "#endif");
+  include_observer.SetInclude("b.h",
+                              "#if !defined(B_H)\n"
+                              "#define B_H\n"
+                              "#include \"a.h\"\n"
+                              "#endif");
+  include_observer.SetInclude("c.h",
+                              "#if !defined(C_H)\n"
+                              "#define C_H\n"
+                              "#include \"b.h\"\n"
+                              "#endif");
+
+  cpp_parser.set_include_observer(&include_observer);
+
+  cpp_parser.AddStringInput(
+    "#include \"c.h\"\n"
+    "#include \"b.h\"\n"
+    "#include \"a.h\"\n", "(string)");
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_EQ(1, include_observer.IncludedCount("a.h"));
+  EXPECT_EQ(1, include_observer.IncludedCount("b.h"));
+  EXPECT_EQ(1, include_observer.IncludedCount("c.h"));
+
+  EXPECT_EQ(1, include_observer.SkipCount("a.h"));
+  EXPECT_EQ(1, include_observer.SkipCount("b.h"));
+  EXPECT_EQ(0, include_observer.SkipCount("c.h"));
+}
+
+TEST_F(CppParserTest, SkippedByIncludeGuardIfDefinedInvalidCase) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+
+  // Only a.h is the correct include guard.
+  // So, we won't skip the other header files.
+
+  include_observer.SetInclude("a.h",
+                              "#if !defined(A_H)\n"
+                              "#define A_H\n"
+                              "#endif");
+
+  include_observer.SetInclude("b.h",
+                              "#if !defined(B_H) || 1\n"
+                              "#define B_H\n"
+                              "#endif");
+
+  include_observer.SetInclude("c.h",
+                              "#if 1 || !defined(C_H)\n"
+                              "#define C_H\n"
+                              "#endif");
+
+  include_observer.SetInclude("d.h",
+                              "#if ID(!defined(D_H))\n"
+                              "#define D_H\n"
+                              "#endif");
+
+  include_observer.SetInclude("e.h",
+                              "#if defined(E_H)\n"
+                              "#define E_H\n"
+                              "#endif");
+
+  cpp_parser.set_include_observer(&include_observer);
+
+  cpp_parser.AddStringInput(
+    "#define ID(X) X\n"
+    "#include \"a.h\"\n"
+    "#include \"a.h\"\n"
+    "#include \"b.h\"\n"
+    "#include \"b.h\"\n"
+    "#include \"c.h\"\n"
+    "#include \"c.h\"\n"
+    "#include \"d.h\"\n"
+    "#include \"d.h\"\n"
+    "#include \"e.h\"\n"
+    "#include \"e.h\"\n", "(string)");
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_EQ(1, include_observer.IncludedCount("a.h"));
+  EXPECT_EQ(2, include_observer.IncludedCount("b.h"));
+  EXPECT_EQ(2, include_observer.IncludedCount("c.h"));
+  EXPECT_EQ(2, include_observer.IncludedCount("d.h"));
+  EXPECT_EQ(2, include_observer.IncludedCount("e.h"));
+
+  EXPECT_EQ(1, include_observer.SkipCount("a.h"));
+  EXPECT_EQ(0, include_observer.SkipCount("b.h"));
+  EXPECT_EQ(0, include_observer.SkipCount("c.h"));
+  EXPECT_EQ(0, include_observer.SkipCount("d.h"));
+  EXPECT_EQ(0, include_observer.SkipCount("e.h"));
+}
+
+TEST_F(CppParserTest, DontSkipdByIncludeGuardIfndefButNotDefined) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+
+  include_observer.SetInclude("a.h",
+                              "#ifndef FOO\n"
+                              "# include \"b.h\"\n"
+                              "#else\n"
+                              "# include \"c.h\"\n"
+                              "#endif\n");
+  include_observer.SetInclude("b.h",
+                              "#define B_H");
+  include_observer.SetInclude("c.h",
+                              "#define C_H");
+
+  cpp_parser.set_include_observer(&include_observer);
+
+  cpp_parser.AddStringInput(
+      "#include \"a.h\"\n"
+      "#define FOO\n"
+      "#include \"a.h\"\n", "(string)");
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_EQ(2, include_observer.IncludedCount("a.h"));
+  EXPECT_EQ(1, include_observer.IncludedCount("b.h"));
+  EXPECT_EQ(1, include_observer.IncludedCount("c.h"));
+
+  EXPECT_EQ(0, include_observer.SkipCount("a.h"));
+  EXPECT_EQ(0, include_observer.SkipCount("b.h"));
+  EXPECT_EQ(0, include_observer.SkipCount("c.h"));
+}
+
+TEST_F(CppParserTest, DontSkipdIncludeGuardAndUndefined) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+
+  include_observer.SetInclude("a.h",
+                              "#ifndef FOO\n"
+                              "#define FOO\n"
+                              "#endif\n");
+
+  cpp_parser.set_include_observer(&include_observer);
+
+  cpp_parser.AddStringInput(
+      "#include \"a.h\"\n"
+      "#undef FOO\n"
+      "#include \"a.h\"\n", "(string)");
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_EQ(2, include_observer.IncludedCount("a.h"));
+
+  EXPECT_EQ(0, include_observer.SkipCount("a.h"));
+}
+
+TEST_F(CppParserTest, ColonPercentShouldBeTreatedAsSharp) {
+  CppParser cpp_parser;
+  cpp_parser.AddStringInput("#define  a  b  %:%: c \n"
+                            "#define bc 1\n"
+                            "#if a == bc\n"
+                            "#define correct\n"
+                            "#else\n"
+                            "#define wrong\n"
+                            "#endif\n",
+                            "(string)");
+  cpp_parser.ProcessDirectives();
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("a"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("correct"));
+  EXPECT_FALSE(cpp_parser.IsMacroDefined("wrong"));
+}
+
+TEST_F(CppParserTest, SpaceInMacroShouldBeTreatedAsIs) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+
+  include_observer.SetInclude("foobar",
+                              "");
+  include_observer.SetInclude("foo bar",
+                              "");
+  include_observer.SetInclude("foo  bar",
+                              "");
+
+  cpp_parser.set_include_observer(&include_observer);
+
+  // FOO2 is expanded to <foo_bar>, not <foo__bar> (underscore means a space)
+  cpp_parser.AddStringInput("#define FOO1 <foo bar>\n"
+                            "#define FOO2 <foo  bar>\n"
+                            "#include FOO1\n"
+                            "#include FOO2\n",
+                            "foo.cc");
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_EQ(0, include_observer.IncludedCount("foobar"));
+  EXPECT_EQ(2, include_observer.IncludedCount("foo bar"));
+  EXPECT_EQ(0, include_observer.IncludedCount("foo  bar"));
+}
+
+TEST_F(CppParserTest, SpaceNearDoubleSharpShouldBeTreatedCorrectly) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+
+  include_observer.SetInclude("hogefuga",
+                              "");
+
+  cpp_parser.set_include_observer(&include_observer);
+
+  cpp_parser.AddStringInput("#define cut(x, y) <x   ##   y>\n"
+                            "#include cut(hoge, fuga)\n",
+                            "foo.cc");
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_EQ(1, include_observer.IncludedCount("hogefuga"));
+}
+
+TEST_F(CppParserTest, DirectiveWithSpaces) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+
+  include_observer.SetInclude("x.h", "");
+  include_observer.SetInclude("y.h", "");
+
+  cpp_parser.set_include_observer(&include_observer);
+
+  cpp_parser.AddStringInput("\n"
+                            " # define foo\n"
+                            "  #   define bar\n"
+                            " # ifdef foo\n"
+                            "  #  include \"x.h\"\n"
+                            " # endif\n"
+                            "# ifdef bar\n"
+                            "# include \"y.h\"\n",
+                            "# endif\n"
+                            "foo.cc");
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_EQ(1, include_observer.IncludedCount("x.h"));
+  EXPECT_EQ(1, include_observer.IncludedCount("y.h"));
+}
+
+TEST_F(CppParserTest, MultiAddMacroByString) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+
+  include_observer.SetInclude("x.h", "");
+  include_observer.SetInclude("y.h", "");
+
+  cpp_parser.set_include_observer(&include_observer);
+
+  cpp_parser.AddMacroByString("macro1", "");
+  cpp_parser.AddMacroByString("macro2", "");
+  cpp_parser.AddStringInput("#ifdef macro1\n"
+                            "#include \"x.h\"\n"
+                            "#endif\n"
+                            "#ifdef macro2\n"
+                            "#include \"y.h\"\n"
+                            "#endif\n",
+                            "foo.cc");
+  cpp_parser.ProcessDirectives();
+
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("macro1"));
+  EXPECT_TRUE(cpp_parser.IsMacroDefined("macro2"));
+
+  EXPECT_EQ(1, include_observer.IncludedCount("x.h"));
+  EXPECT_EQ(1, include_observer.IncludedCount("y.h"));
+}
+
+TEST_F(CppParserTest, Expand0) {
+  // This test does not pass slow test.
+  // TODO: remove slow path or fix slow path.
+  CheckExpand("#define e(x) ee(x)\n"
+              "#define ee(x) x(y)\n"
+              "#define f(x) f\n"
+              "#define foo e(f(x))\n",
+              "foo",
+              {Token(Token::IDENTIFIER, "f"),
+               Token(Token::PUNCTUATOR, '('),
+               Token(Token::IDENTIFIER, "y"),
+               Token(Token::PUNCTUATOR, ')')});
+
+  CheckExpand("#define f(x) f\n"
+              "#define foo f(x)(y)\n",
+              "foo",
+              {Token(Token::IDENTIFIER, "f"), Token(Token::PUNCTUATOR, '('),
+               Token(Token::IDENTIFIER, "y"), Token(Token::PUNCTUATOR, ')')});
+
+  CheckExpand("#define a 1\n", "a", {Token(1)});
+
+  CheckExpand("#define a b\n"
+              "#define b 1\n",
+              "a", {Token(1)});
+
+  CheckExpand("#define a a\n", "a", {Token(Token::IDENTIFIER, "a")});
+
+  CheckExpand("#define a",
+              "a",
+              ArrayTokenList());
+
+  CheckExpand("#define a b\n"
+              "#define b c\n"
+              "#define c a\n", "a", {Token(Token::IDENTIFIER, "a")});
+
+  CheckExpand("#define id(x) x\n",
+              "id(id(a))", {Token(Token::IDENTIFIER, "a")});
+
+  CheckExpand("",
+              "a",
+              {Token(Token::IDENTIFIER, "a")});
+
+  CheckExpand("#define f(x)",
+              "f",
+              {Token(Token::IDENTIFIER, "f")});
+
+  CheckExpand("#define f",
+              "f(x)",
+              {Token(Token::PUNCTUATOR, '('),
+               Token(Token::IDENTIFIER, "x"),
+               Token(Token::PUNCTUATOR, ')'),});
+
+  CheckExpand("#define f(...) __VA_ARGS__",
+              "f()",
+              ArrayTokenList());
+
+  CheckExpand("#define f(...) __VA_ARGS__",
+              "f(x)",
+              {Token(Token::IDENTIFIER, "x")});
+
+  CheckExpand("#define f(...) __VA_ARGS__",
+              "f(x,y)",
+              {Token(Token::IDENTIFIER, "x"),
+               Token(Token::PUNCTUATOR, ','),
+               Token(Token::IDENTIFIER, "y")});
+
+  CheckExpand("#define f(...) __VA_ARGS__\n"
+              "#define x 1\n"
+              "#define y 2\n",
+              "f(x,y)",
+              {Token(1),
+               Token(Token::PUNCTUATOR, ','),
+               Token(2)});
+
+  CheckExpand("#define f(x, y, ...) __VA_ARGS__, y, x\n",
+              "f(1, 2)",
+              {Token(Token::PUNCTUATOR, ','),
+               Token(2),
+               Token(Token::PUNCTUATOR, ','),
+               Token(1)});
+
+  CheckExpand("#define f(x, y, ...) __VA_ARGS__, y, x\n",
+              "f(1, 2, 3, 4)",
+              {Token(3),
+               Token(Token::PUNCTUATOR, ','),
+               Token(4),
+               Token(Token::PUNCTUATOR, ','),
+               Token(2),
+               Token(Token::PUNCTUATOR, ','),
+               Token(1)});
+
+  CheckExpand("#define X(a, b, c, ...) c\n",
+              "X(\"a\", \"b\", \"c\", \"d\", \"e\")",
+              {Token(Token::STRING, "c")});
+
+  CheckExpand("#define g(x, y, ...) f(x, y, __VA_ARGS__)\n"
+              "#define f(x, y, ...) g(0, x, y, __VA_ARGS__)\n",
+              "f(1, 2)",
+              {
+               Token(Token::IDENTIFIER, "f"),
+               Token(Token::PUNCTUATOR, '('),
+               Token(0),
+               Token(Token::PUNCTUATOR, ','),
+               Token(1),
+               Token(Token::PUNCTUATOR, ','),
+               Token(2),
+               Token(Token::PUNCTUATOR, ','),
+               Token(Token::PUNCTUATOR, ')'),
+              });
+
+  CheckExpand("#define two(...) __VA_ARGS__, __VA_ARGS__\n",
+              "two(two(1), two(2))",
+              {
+               Token(1),
+               Token(Token::PUNCTUATOR, ','),
+               Token(1),
+               Token(Token::PUNCTUATOR, ','),
+               Token(2),
+               Token(Token::PUNCTUATOR, ','),
+               Token(2),
+               Token(Token::PUNCTUATOR, ','),
+               Token(1),
+               Token(Token::PUNCTUATOR, ','),
+               Token(1),
+               Token(Token::PUNCTUATOR, ','),
+               Token(2),
+               Token(Token::PUNCTUATOR, ','),
+               Token(2),
+              });
+}
+
+TEST_F(CppParserTest, LimitIncludeDepth) {
+  CppParser cpp_parser;
+  CppIncludeObserver include_observer(&cpp_parser);
+
+  include_observer.SetInclude("bar.h",
+                              "#include \"bar.h\"\n");
+
+  cpp_parser.set_include_observer(&include_observer);
+
+  cpp_parser.AddStringInput("#include \"bar.h\"\n",
+                            "foo.cc");
+  EXPECT_FALSE(cpp_parser.ProcessDirectives());
+  EXPECT_EQ(1024, include_observer.IncludedCount("bar.h"));
+}
+
+TEST_F(CppParserTest, MacroCache) {
+  InitMacroEnvCache();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  tmpdir_->CreateTmpFile("a.h", R"(
+#define a 1
+#ifdef a
+# define b 2
+#endif
+#define two 1
+)");
+
+  for (int i = 0; i < 2; ++i) {
+    CppParser cpp_parser;
+    cpp_parser.AddFileInput(Content::CreateFromFile(ah), FileId(ah),
+                            "a.h", "a", 0);
+
+    EXPECT_TRUE(cpp_parser.ProcessDirectives());
+
+    EXPECT_TRUE(cpp_parser.IsMacroDefined("a"));
+    EXPECT_TRUE(cpp_parser.IsMacroDefined("b"));
+    EXPECT_TRUE(cpp_parser.IsMacroDefined("two"));
+
+    if (i == 0) {
+      EXPECT_EQ(0, cpp_parser.obj_cache_hit());
+    } else {
+      // cache hit for "a", "b" and "two".
+      EXPECT_EQ(3, cpp_parser.obj_cache_hit());
+    }
+  }
+
+  const string& bh = tmpdir_->FullPath("b.h");
+  tmpdir_->CreateTmpFile("b.h", R"(
+#ifdef a
+# define b 3
+#endif
+
+#ifdef two
+# define NOT_REACHABLE1 1
+#endif
+
+#if two == 1
+# define NOT_REACHABLE2 1
+#endif
+
+#define two 2
+
+#if two == 2
+# define OK 1
+#endif
+)");
+
+  for (int i = 0; i < 2; ++i) {
+    CppParser cpp_parser;
+    cpp_parser.AddFileInput(Content::CreateFromFile(bh), FileId(bh),
+                            "b.h", "b", 0);
+
+    EXPECT_TRUE(cpp_parser.ProcessDirectives());
+
+
+    EXPECT_FALSE(cpp_parser.IsMacroDefined("a"));
+    EXPECT_FALSE(cpp_parser.IsMacroDefined("b"));
+
+    EXPECT_FALSE(cpp_parser.IsMacroDefined("NOT_REACHABLE1"));
+    EXPECT_FALSE(cpp_parser.IsMacroDefined("NOT_REACHABLE2"));
+
+    EXPECT_TRUE(cpp_parser.IsMacroDefined("OK"));
+    EXPECT_TRUE(cpp_parser.IsMacroDefined("two"));
+
+    if (i == 0) {
+      EXPECT_EQ(0, cpp_parser.obj_cache_hit());
+    } else {
+      // cache hit for "two" and "OK".
+      EXPECT_EQ(2, cpp_parser.obj_cache_hit());
+    }
+  }
+
+  QuitMacroEnvCache();
+}
+
+}  // namespace devtools_goma
diff --git a/client/cpp_token.cc b/client/cpp_token.cc
new file mode 100644
index 0000000..4245249
--- /dev/null
+++ b/client/cpp_token.cc
@@ -0,0 +1,120 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "cpp_token.h"
+
+namespace {
+
+int Mul(int v1, int v2) { return v1 * v2; }
+int Div(int v1, int v2) { return v2 == 0 ? 0 : v1 / v2; }
+int Mod(int v1, int v2) { return v2 == 0 ? 0 : v1 % v2; }
+int Add(int v1, int v2) { return v1 + v2; }
+int Sub(int v1, int v2) { return v1 - v2; }
+int RShift(int v1, int v2) { return v1 >> v2; }
+int LShift(int v1, int v2) { return v1 << v2; }
+int Gt(int v1, int v2) { return v1 > v2; }
+int Lt(int v1, int v2) { return v1 < v2; }
+int Ge(int v1, int v2) { return v1 >= v2; }
+int Le(int v1, int v2) { return v1 <= v2; }
+int Eq(int v1, int v2) { return v1 == v2; }
+int Ne(int v1, int v2) { return v1 != v2; }
+int And(int v1, int v2) { return v1 & v2; }
+int Xor(int v1, int v2) { return v1 ^ v2; }
+int Or(int v1, int v2) { return v1 | v2; }
+int LAnd(int v1, int v2) { return v1 && v2; }
+int LOr(int v1, int v2) { return v1 || v2; }
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+const int CppToken::kPrecedenceTable[] = {
+  9, 9, 9,      // MUL, DIV, MOD,
+  8, 8,         // ADD, SUB,
+  7, 7,         // RSHIFT, LSHIFT,
+  6, 6, 6, 6,   // GT, LT, GE, LE,
+  5, 5,         // EQ, NE,
+  4,            // AND,
+  3,            // XOR,
+  2,            // OR,
+  1,            // LAND,
+  0,            // LOR,
+};
+
+const CppToken::OperatorFunction CppToken::kFunctionTable[] = {
+  Mul, Div, Mod, Add, Sub, RShift, LShift, Gt, Lt, Ge, Le, Eq, Ne,
+  And, Xor, Or, LAnd, LOr
+};
+
+std::string CppToken::DebugString() const {
+  std::string str;
+  str.reserve(16);
+  switch (type) {
+    case IDENTIFIER:
+      str.append("[IDENT(");
+      str.append(string_value);
+      str.append(")]");
+      break;
+    case STRING:
+      str.append("[STRING(\"");
+      str.append(string_value);
+      str.append("\")]");
+      break;
+    case NUMBER:
+      str.append("[NUMBER(");
+      str.append(string_value);
+      str.append(", ");
+      str.append(std::to_string(v.int_value));
+      str.append(")]");
+      break;
+    case DOUBLESHARP:
+      return "[##]";
+    case TRIPLEDOT:
+      return "[...]";
+    case NEWLINE:
+      return "[NL]\n";
+    case ESCAPED:
+      str.append("[\\");
+      str.push_back(v.char_value.c);
+      str.append("]");
+      break;
+    case MACRO_PARAM:
+      str.append("[MACRO_PARAM(arg");
+      str.append(std::to_string(v.param_index));
+      str.append(")]");
+      break;
+    case MACRO_PARAM_VA_ARGS:
+      str.append("[MACRO_PARAM_VA_ARGS]");
+      break;
+    case END:
+      return "[END]";
+    case BEGIN_HIDE:
+      str.append("[BEGIN_HIDE(" + std::to_string(v.int_value) + ")]");
+      break;
+    case END_HIDE:
+      str.append("[END_HIDE(" + std::to_string(v.int_value) + ")]");
+      break;
+    default:
+      str.append("[");
+      if (!string_value.empty()) {
+        str.append(string_value);
+      } else if (v.char_value.c) {
+        str.push_back(v.char_value.c);
+      } else {
+        str.append(v.char_value.c2);
+      }
+      str.append("]");
+  }
+  return str;
+}
+
+std::string CppToken::GetCanonicalString() const {
+  if (!string_value.empty())
+    return string_value;
+  if (v.char_value.c)
+    return std::string() + v.char_value.c;
+  return v.char_value.c2;
+}
+
+}  // namespace devtools_goma
diff --git a/client/cpp_token.h b/client/cpp_token.h
new file mode 100644
index 0000000..43dd01c
--- /dev/null
+++ b/client/cpp_token.h
@@ -0,0 +1,147 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CPP_TOKEN_H_
+#define DEVTOOLS_GOMA_CLIENT_CPP_TOKEN_H_
+
+#include <string>
+#include <type_traits>
+
+#include "glog/logging.h"
+
+#include "string_piece.h"
+
+namespace devtools_goma {
+
+struct CppToken {
+  enum Type {
+    IDENTIFIER, STRING, NUMBER, SHARP, DOUBLESHARP, TRIPLEDOT,
+    SPACE, NEWLINE, ESCAPED, PUNCTUATOR, END, MACRO_PARAM,
+    MACRO_PARAM_VA_ARGS,
+
+    // Operators
+    OP_BEGIN,
+    MUL = OP_BEGIN, DIV, MOD, ADD, SUB, RSHIFT, LSHIFT, GT, LT,
+    GE, LE, EQ, NE, AND, XOR, OR, LAND, LOR,
+
+    // Hideset annotation.
+    // This is used only for macro expansion.
+    // If token has below type, it should have macro id in v.int_value.
+    BEGIN_HIDE, END_HIDE
+  };
+
+  typedef int (*OperatorFunction)(int, int);
+
+  CppToken() : type(END) {}
+  explicit CppToken(Type type) : type(type) {}
+  explicit CppToken(int i) : type(NUMBER), v(i) {}
+  CppToken(Type type, char c) : type(type), v(c) {}
+  CppToken(Type type, char c1, char c2) : type(type), v(c1, c2) {}
+  CppToken(Type type, int i) : type(type) {
+    v.int_value = i;
+  }
+  CppToken(Type type, StringPiece s) : type(type), string_value(s) {}
+
+  bool operator==(const CppToken& other) const {
+    if (type != other.type) {
+      return false;
+    }
+    if (type == NUMBER) {
+      return v.int_value == other.v.int_value;
+    }
+
+    return DebugString() == other.DebugString();
+  }
+
+  void Append(const char* str, size_t size);
+  void Append(const std::string& str);
+  bool IsPuncChar(int c) const;
+  bool IsOperator() const;
+  void MakeMacroParam(size_t param_index);
+  void MakeMacroParamVaArgs();
+
+  std::string DebugString() const;
+  std::string GetCanonicalString() const;
+
+  int ApplyOperator(int v1, int v2) const {
+    DCHECK(IsOperator());
+    return kFunctionTable[type - OP_BEGIN](v1, v2);
+  }
+  OperatorFunction GetOperator() const {
+    DCHECK(IsOperator());
+    return kFunctionTable[type - OP_BEGIN];
+  }
+  int GetPrecedence() const {
+    DCHECK(IsOperator());
+    return kPrecedenceTable[type - OP_BEGIN];
+  }
+
+  static const OperatorFunction kFunctionTable[];
+  static const int kPrecedenceTable[];
+
+  Type type;
+  std::string string_value;
+
+  // A struct to hold char value(s) for operators and punctuators.
+  struct CharValue {
+    // For one-char tokens.
+    char c;
+    // For two-char tokens; c is always set to zero when c2 has a value.
+    char c2[3];
+  };
+
+  union value {
+    value() : param_index(0) {}
+    value(int i) : param_index(i) {}
+    value(char c) : param_index(0) {
+      char_value.c = c;
+    }
+    value(char c1, char c2) {
+      char_value.c = 0;
+      char_value.c2[0] = c1;
+      char_value.c2[1] = c2;
+      char_value.c2[2] = 0;
+    }
+    CharValue char_value;
+    long int_value;
+    size_t param_index;
+  } v;
+};
+
+inline void CppToken::Append(const char* str, size_t size) {
+  string_value.append(str, size);
+}
+
+inline void CppToken::Append(const std::string& str) {
+  string_value.append(str);
+}
+
+inline bool CppToken::IsPuncChar(int c) const {
+  return ((type == PUNCTUATOR || type >= OP_BEGIN) && v.int_value == c);
+}
+
+inline bool CppToken::IsOperator() const {
+  return (type >= OP_BEGIN);
+}
+
+inline void CppToken::MakeMacroParam(size_t param_index) {
+  DCHECK_EQ(IDENTIFIER, type);
+  type = MACRO_PARAM;
+  v.param_index = param_index;
+  string_value.clear();
+}
+
+inline void CppToken::MakeMacroParamVaArgs() {
+  DCHECK_EQ(IDENTIFIER, type);
+  DCHECK_EQ("__VA_ARGS__", string_value);
+  type = MACRO_PARAM_VA_ARGS;
+  string_value.clear();
+}
+
+static_assert(std::is_nothrow_move_constructible<CppToken>::value,
+              "CppToken must be move constructible");
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CPP_TOKEN_H_
diff --git a/client/cpp_tokenizer.cc b/client/cpp_tokenizer.cc
new file mode 100644
index 0000000..c6adf11
--- /dev/null
+++ b/client/cpp_tokenizer.cc
@@ -0,0 +1,605 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "cpp_tokenizer.h"
+
+#ifndef NO_SSE2
+#include <emmintrin.h>
+#endif  // NO_SSE2
+
+#ifdef _WIN32
+#include <intrin.h>
+#endif
+
+#include "compiler_specific.h"
+#include "string_util.h"
+
+namespace {
+
+#ifdef _WIN32
+static inline int CountZero(int v) {
+  unsigned long r;
+  _BitScanForward(&r, v);
+  return r;
+}
+#else
+static inline int CountZero(int v) {
+  return __builtin_ctz(v);
+}
+#endif
+
+// __popcnt (on MSVC) emits POPCNT. Some engineers are still using older
+// machine that does not have POPCNT. So, we'd like to avoid __popcnt.
+// clang-cl.exe must have __builtin_popcunt, so use it.
+// For cl.exe, use this somewhat fast algorithm.
+// See b/65465347
+#if defined(_WIN32) && !defined(__clang__)
+static inline int PopCount(int v) {
+  v = (v & 0x55555555) + (v >> 1 & 0x55555555);
+  v = (v & 0x33333333) + (v >> 2 & 0x33333333);
+  v = (v & 0x0f0f0f0f) + (v >> 4 & 0x0f0f0f0f);
+  v = (v & 0x00ff00ff) + (v >> 8 & 0x00ff00ff);
+  return (v & 0x0000ffff) + (v >>16 & 0x0000ffff);
+}
+#else
+static inline int PopCount(int v) {
+  return __builtin_popcount(v);
+}
+#endif
+
+#ifndef NO_SSE2
+typedef ALIGNAS(16) char aligned_char16[16];
+const aligned_char16 kNewlinePattern = {
+  0xA, 0xA, 0xA, 0xA, 0xA, 0xA, 0xA, 0xA,
+  0xA, 0xA, 0xA, 0xA, 0xA, 0xA, 0xA, 0xA,
+};
+const aligned_char16 kSlashPattern = {
+  '/', '/', '/', '/', '/', '/', '/', '/',
+  '/', '/', '/', '/', '/', '/', '/', '/',
+};
+const aligned_char16 kSharpPattern = {
+  '#', '#', '#', '#', '#', '#', '#', '#',
+  '#', '#', '#', '#', '#', '#', '#', '#',
+};
+#endif  // NO_SSE2
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+std::set<std::string>* CppTokenizer::integer_suffixes_ = nullptr;
+CppToken::Type kOpTokenTable[128][128];
+
+static void InitializeTokenSwitchTables() {
+  for (int i = 0; i < 128; ++i)
+    for (int j = 0; j < 128; ++j)
+      kOpTokenTable[i][j] = CppToken::PUNCTUATOR;
+# define UC(c)   static_cast<unsigned char>(c)
+  kOpTokenTable[UC('=')][UC('=')] = CppToken::EQ;
+  kOpTokenTable[UC('!')][UC('=')] = CppToken::NE;
+  kOpTokenTable[UC('>')][UC('=')] = CppToken::GE;
+  kOpTokenTable[UC('<')][UC('=')] = CppToken::LE;
+  kOpTokenTable[UC('&')][UC('&')] = CppToken::LAND;
+  kOpTokenTable[UC('|')][UC('|')] = CppToken::LOR;
+  kOpTokenTable[UC('>')][UC('>')] = CppToken::RSHIFT;
+  kOpTokenTable[UC('<')][UC('<')] = CppToken::LSHIFT;
+  kOpTokenTable[UC('#')][UC('#')] = CppToken::DOUBLESHARP;
+  kOpTokenTable[UC('\r')][UC('\n')] = CppToken::NEWLINE;
+  kOpTokenTable[UC('*')][0] = CppToken::MUL;
+  kOpTokenTable[UC('+')][0] = CppToken::ADD;
+  kOpTokenTable[UC('-')][0] = CppToken::SUB;
+  kOpTokenTable[UC('>')][0] = CppToken::GT;
+  kOpTokenTable[UC('<')][0] = CppToken::LT;
+  kOpTokenTable[UC('&')][0] = CppToken::AND;
+  kOpTokenTable[UC('^')][0] = CppToken::XOR;
+  kOpTokenTable[UC('|')][0] = CppToken::OR;
+  kOpTokenTable[UC('#')][0] = CppToken::SHARP;
+  kOpTokenTable[UC('\n')][0] = CppToken::NEWLINE;
+# undef UC
+}
+
+// static
+void CppTokenizer::InitializeStaticOnce() {
+  static const char* kLongSuffixes[] = { "l", "ll" };
+  static const char* kUnsignedSuffix = "u";
+  integer_suffixes_ = new std::set<std::string>;
+  integer_suffixes_->insert(kUnsignedSuffix);
+  for (const auto& suffix : kLongSuffixes) {
+    integer_suffixes_->insert(suffix);
+    integer_suffixes_->insert(std::string(kUnsignedSuffix) + suffix);
+    integer_suffixes_->insert(suffix + std::string(kUnsignedSuffix));
+  }
+
+  InitializeTokenSwitchTables();
+}
+
+// static
+bool CppTokenizer::NextTokenFrom(CppInputStream* stream,
+                                 bool skip_space,
+                                 CppToken* token,
+                                 std::string* error_reason) {
+  for (;;) {
+    const char* cur = stream->cur();
+    int c = stream->GetChar();
+    if (c == EOF) {
+      *token = CppToken(CppToken::END);
+      return true;
+    }
+    if (c >= 128) {
+      *token = CppToken(CppToken::PUNCTUATOR, static_cast<char>(c));
+      return true;
+    }
+    if (IsCppBlank(c)) {
+      if (skip_space) {
+        stream->SkipWhiteSpaces();
+        continue;
+      }
+      *token = CppToken(CppToken::SPACE, static_cast<char>(c));
+      return true;
+    }
+    int c1 = stream->PeekChar();
+    switch (c) {
+      case '/':
+        if (c1 == '/') {
+          SkipUntilLineBreakIgnoreComment(stream);
+          *token = CppToken(CppToken::NEWLINE);
+          return true;
+        }
+        if (c1 == '*') {
+          stream->Advance(1, 0);
+          if (!SkipComment(stream, error_reason)) {
+            *token = CppToken(CppToken::END);
+            return false;
+          }
+          *token = CppToken(CppToken::SPACE, ' ');
+          return true;
+        }
+        *token = CppToken(CppToken::DIV, '/');
+        return true;
+      case '%':
+        if (c1 == ':') {
+          stream->Advance(1, 0);
+          if (stream->PeekChar(0) == '%' &&
+              stream->PeekChar(1) == ':') {
+            stream->Advance(2, 0);
+            *token = CppToken(CppToken::DOUBLESHARP);
+            return true;
+          }
+          *token = CppToken(CppToken::SHARP, '#');
+          return true;
+        }
+        *token = CppToken(CppToken::MOD, '%');
+        return true;
+      case '.':
+        if (c1 >= '0' && c1 <= '9') {
+          *token = ReadNumber(stream, c, cur);
+          return true;
+        }
+        if (c1 == '.' && stream->PeekChar(1) == '.') {
+          stream->Advance(2, 0);
+          *token = CppToken(CppToken::TRIPLEDOT);
+          return true;
+        }
+        *token = CppToken(CppToken::PUNCTUATOR, '.');
+        return true;
+      case '\\':
+        c = stream->GetChar();
+        if (c != '\r' && c != '\n') {
+          *token = CppToken(CppToken::ESCAPED, static_cast<char>(c));
+          return true;
+        }
+        if (c == '\r' && stream->PeekChar() == '\n')
+          stream->Advance(1, 1);
+        break;
+      case '"': {
+        *token = CppToken(CppToken::STRING);
+        if (!ReadString(stream, token, error_reason)) {
+          return false;
+        }
+        return true;
+      }
+      default:
+        if (c == '_' || IsAsciiAlpha(c)) {
+          *token = ReadIdentifier(stream, cur);
+          return true;
+        }
+        if (c >= '0' && c <= '9') {
+          *token = ReadNumber(stream, c, cur);
+          return true;
+        }
+        if (c1 == EOF) {
+          *token = CppToken(kOpTokenTable[c][0], static_cast<char>(c));
+          return true;
+        }
+        if ((c1 & ~0x7f) == 0 && kOpTokenTable[c][c1] != CppToken::PUNCTUATOR) {
+          stream->Advance(1, 0);
+          *token = CppToken(kOpTokenTable[c][c1],
+                            static_cast<char>(c), static_cast<char>(c1));
+          return true;
+        }
+        *token = CppToken(kOpTokenTable[c][0], static_cast<char>(c));
+        return true;
+    }
+  }
+}
+
+// static
+bool CppTokenizer::ReadStringUntilDelimiter(CppInputStream* stream,
+                                            std::string* result_str,
+                                            char delimiter,
+                                            std::string* error_reason) {
+  const char* begin = stream->cur();
+  for (;;) {
+    int c = stream->PeekChar();
+    if (c == EOF) {
+      return true;
+    }
+    if (c == delimiter) {
+      const char* cur = stream->cur() - 1;
+      stream->Advance(1, 0);
+      if (*cur != '\\') {
+        result_str->append(begin, stream->cur() - begin - 1);
+        return true;
+      }
+    } else if (c == '\n') {
+      const char* cur = stream->cur() - 1;
+      stream->Advance(1, 1);
+      cur -= (*cur == '\r');
+      if (*cur != '\\') {
+        *error_reason = "missing terminating character";
+        return false;
+      }
+      result_str->append(begin, stream->cur() - begin - 2);
+      begin = stream->cur();
+    } else {
+      stream->Advance(1, 0);
+    }
+  }
+}
+
+// static
+CppToken CppTokenizer::ReadIdentifier(CppInputStream* stream,
+                                      const char* begin) {
+  CppToken token(CppToken::IDENTIFIER);
+  for (;;) {
+    int c = stream->GetChar();
+    if (IsAsciiAlphaDigit(c) || c == '_' ||
+        (c == '\\' && HandleLineFoldingWithToken(stream, &token, &begin))) {
+      continue;
+    }
+    token.Append(begin, stream->GetLengthToCurrentFrom(begin, c));
+    stream->UngetChar(c);
+    return token;
+  }
+}
+
+// (6.4.2) Preprocessing numbers
+// pp-number :
+//    digit
+//    .digit
+//    pp-number digit
+//    pp-number nondigit
+//    pp-number [eEpP] sign  ([pP] is new in C99)
+//    pp-number .
+//
+// static
+CppToken CppTokenizer::ReadNumber(CppInputStream* stream, int c0,
+                                  const char* begin) {
+  CppToken token(CppToken::NUMBER);
+
+  bool maybe_int_constant = (c0 != '.');
+  int base = 10;
+  int value = 0;
+  std::string suffix;
+  int c;
+
+  // Handle base prefix.
+  if (c0 == '0') {
+    base = 8;
+    int c1 = stream->PeekChar();
+    if (c1 == 'x' || c1 == 'X') {
+      stream->Advance(1, 0);
+      base = 16;
+    }
+  } else {
+    value = c0 - '0';
+  }
+
+  if (maybe_int_constant) {
+    // Read the digits part.
+    c = ToLowerASCII(stream->GetChar());
+    while ((c >= '0' && c <= ('0' + std::min(9, base - 1))) ||
+           (base == 16 && c >= 'a' && c <= 'f')) {
+      value = value * base + ((c >= 'a') ? (c - 'a' + 10) : (c - '0'));
+      c = ToLowerASCII(stream->GetChar());
+    }
+    stream->UngetChar(c);
+  }
+
+  // (digit | [a-zA-Z_] | . | [eEpP][+-])*
+  for (;;) {
+    c = stream->GetChar();
+    if (c == '\\' && HandleLineFoldingWithToken(stream, &token, &begin)) {
+      continue;
+    }
+    if ((c >= '0' && c <= '9') || c == '.' || c == '_') {
+      maybe_int_constant = false;
+      continue;
+    }
+    c = ToLowerASCII(c);
+    if (c >= 'a' && c <= 'z') {
+      if (maybe_int_constant) {
+        suffix += static_cast<char>(c);
+      }
+      if (c == 'e' || c == 'p') {
+        int c1 = stream->PeekChar();
+        if (c1 == '+' || c1 == '-') {
+          maybe_int_constant = false;
+          stream->Advance(1, 0);
+        }
+      }
+      continue;
+    }
+    break;
+  }
+
+  token.Append(begin, stream->GetLengthToCurrentFrom(begin, c));
+  stream->UngetChar(c);
+  if (maybe_int_constant &&
+      (suffix.empty() ||
+       integer_suffixes_->find(suffix) != integer_suffixes_->end())) {
+    token.v.int_value = value;
+  }
+  return token;
+}
+
+// static
+bool CppTokenizer::ReadString(CppInputStream* stream,
+                              CppToken* result_token,
+                              std::string* error_reason) {
+  CppToken token(CppToken::STRING);
+  if (!ReadStringUntilDelimiter(stream, &token.string_value,
+                                '"', error_reason)) {
+    return false;
+  }
+
+  *result_token = std::move(token);
+  return true;
+}
+
+// static
+bool CppTokenizer::HandleLineFoldingWithToken(CppInputStream* stream,
+                                              CppToken* token,
+                                              const char** begin) {
+  int c = stream->PeekChar();
+  if (c != '\r' && c != '\n')
+    return false;
+  stream->ConsumeChar();
+  token->Append(*begin, stream->cur() - *begin - 2);
+  if (c == '\r' && stream->PeekChar() == '\n')
+    stream->Advance(1, 1);
+  *begin = stream->cur();
+  return true;
+}
+
+// static
+bool CppTokenizer::SkipComment(CppInputStream* stream,
+                               std::string* error_reason) {
+  const char* begin = stream->cur();
+#ifndef NO_SSE2
+  __m128i slash_pattern = *(__m128i*)kSlashPattern;
+  __m128i newline_pattern = *(__m128i*)kNewlinePattern;
+  while (stream->cur() + 16 < stream->end()) {
+    __m128i s = _mm_loadu_si128((__m128i const*)stream->cur());
+    __m128i slash_test = _mm_cmpeq_epi8(s, slash_pattern);
+    __m128i newline_test = _mm_cmpeq_epi8(s, newline_pattern);
+    int result = _mm_movemask_epi8(slash_test);
+    int newline_result = _mm_movemask_epi8(newline_test);
+    while (result) {
+      int index = CountZero(result);
+      unsigned int shift = (1 << index);
+      result &= ~shift;
+      const char* cur = stream->cur() + index - 1;
+      if (*cur == '*') {
+        unsigned int mask = shift - 1;
+        stream->Advance(index + 1, PopCount(newline_result & mask));
+        return true;
+      }
+    }
+    stream->Advance(16, PopCount(newline_result));
+  }
+#endif  // NO_SSE2
+  for (;;) {
+    int c = stream->PeekChar();
+    if (c == EOF) {
+      *error_reason = "missing terminating '*/' for comment";
+      return false;
+    }
+    if (c == '/' && stream->cur() != begin &&
+        *(stream->cur() - 1) == '*') {
+      stream->Advance(1, 0);
+      return true;
+    }
+    stream->ConsumeChar();
+  }
+}
+
+// static
+bool CppTokenizer::SkipUntilDirective(CppInputStream* stream,
+                                      std::string* error_reason) {
+  const char* begin = stream->cur();
+#ifndef NO_SSE2
+  // TODO: String index instruction (pcmpestri) would work better
+  // on sse4.2 enabled platforms.
+  __m128i slash_pattern = *(__m128i*)kSlashPattern;
+  __m128i sharp_pattern = *(__m128i*)kSharpPattern;
+  __m128i newline_pattern = *(__m128i*)kNewlinePattern;
+  while (stream->cur() + 16 < stream->end()) {
+    __m128i s = _mm_loadu_si128((__m128i const*)stream->cur());
+    __m128i slash_test = _mm_cmpeq_epi8(s, slash_pattern);
+    __m128i sharp_test = _mm_cmpeq_epi8(s, sharp_pattern);
+    __m128i newline_test = _mm_cmpeq_epi8(s, newline_pattern);
+    int slash_result = _mm_movemask_epi8(slash_test);
+    int sharp_result = _mm_movemask_epi8(sharp_test);
+    int newline_result = _mm_movemask_epi8(newline_test);
+    int result = slash_result | sharp_result;
+    while (result) {
+      int index = CountZero(result);
+      unsigned int shift = (1 << index);
+      result &= ~shift;
+      unsigned int mask = shift - 1;
+      const char* cur = stream->cur() + index;
+      if (*cur == '/') {
+        int c1 = *(cur + 1);
+        if (c1 == '/') {
+          stream->Advance(index + 2, PopCount(newline_result & mask));
+          SkipUntilLineBreakIgnoreComment(stream);
+          goto done;
+        } else if (c1 == '*') {
+          stream->Advance(index + 2, PopCount(newline_result & mask));
+          if (!SkipComment(stream, error_reason))
+            return false;
+          goto done;
+        }
+      } else if (*cur == '#') {
+        if (IsAfterEndOfLine(cur, stream->begin())) {
+          stream->Advance(index + 1, PopCount(newline_result & mask));
+          return true;
+        }
+      }
+    }
+    stream->Advance(16, PopCount(newline_result));
+  done:
+    continue;
+  }
+#endif  // NO_SSE2
+  for (;;) {
+    int c = stream->PeekChar();
+    if (c == EOF)
+      return false;
+    if (stream->cur() != begin) {
+      int c0 = *(stream->cur() - 1);
+      if (c0 == '/' && c == '/') {
+        stream->Advance(1, 0);
+        SkipUntilLineBreakIgnoreComment(stream);
+        continue;
+      } else if (c0 == '/' && c == '*') {
+        stream->Advance(1, 0);
+        if (!SkipComment(stream, error_reason))
+          return false;
+      }
+    }
+    if (c == '#') {
+      if (IsAfterEndOfLine(stream->cur(),
+                           stream->begin())) {
+        stream->Advance(1, 0);
+        return true;
+      }
+      stream->Advance(1, 0);
+      continue;
+    }
+    stream->ConsumeChar();
+  }
+
+  return false;
+}
+
+// static
+void CppTokenizer::SkipUntilLineBreakIgnoreComment(CppInputStream* stream) {
+#ifndef NO_SSE2
+  __m128i newline_pattern = *(__m128i*)kNewlinePattern;
+  while (stream->cur() + 16 < stream->end()) {
+    __m128i s = _mm_loadu_si128((__m128i const*)stream->cur());
+    __m128i newline_test = _mm_cmpeq_epi8(s, newline_pattern);
+    int newline_result = _mm_movemask_epi8(newline_test);
+    int result = newline_result;
+    while (result) {
+      int index = CountZero(result);
+      unsigned int shift = (1 << index);
+      result &= ~shift;
+      unsigned int mask = shift - 1;
+      const char* cur = stream->cur() + index - 1;
+      cur -= (*cur == '\r');
+      if (*cur != '\\') {
+        stream->Advance(index + 1, PopCount(newline_result & mask));
+        return;
+      }
+    }
+    stream->Advance(16, PopCount(newline_result));
+  }
+#endif  // NO_SSE2
+  for (;;) {
+    int c = stream->PeekChar();
+    if (c == EOF)
+      return;
+    if (c == '\n') {
+      const char* cur = stream->cur() - 1;
+      stream->Advance(1, 1);
+      cur -= (*cur == '\r');
+      if (*cur != '\\')
+        return;
+    } else {
+      stream->Advance(1, 0);
+    }
+  }
+}
+
+// static
+bool CppTokenizer::IsAfterEndOfLine(const char* cur, const char* begin) {
+  for (;;) {
+    if (cur == begin)
+      return true;
+    int c = *--cur;
+    if (!IsCppBlank(c))
+      break;
+  }
+
+  while (begin <= cur) {
+    int c = *cur;
+    if (c == '\n') {
+      if (--cur < begin)
+        return true;
+      cur -= (*cur == '\r');
+      if (cur < begin || *cur != '\\')
+        return true;
+
+      --cur;
+      continue;
+    }
+
+    if (c == '/') {
+      if (--cur < begin || *cur != '*')
+        return false;
+
+      --cur;
+      bool block_comment_start_found = false;
+      // Move backward until "/*" is found.
+      while (cur - 1 >= begin) {
+        if (*(cur - 1) == '/' && *cur == '*') {
+          cur -= 2;
+          block_comment_start_found = true;
+          break;
+        }
+        --cur;
+      }
+
+      if (block_comment_start_found)
+        continue;
+
+      // When '/*' is not found, it's not after end of line.
+      return false;
+    }
+
+    if (IsCppBlank(c)) {
+      --cur;
+      continue;
+    }
+
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/cpp_tokenizer.h b/client/cpp_tokenizer.h
new file mode 100644
index 0000000..2fb258b
--- /dev/null
+++ b/client/cpp_tokenizer.h
@@ -0,0 +1,66 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CPP_TOKENIZER_H_
+#define DEVTOOLS_GOMA_CLIENT_CPP_TOKENIZER_H_
+
+#include <set>
+#include <string>
+
+#include "cpp_input_stream.h"
+#include "cpp_token.h"
+#include "gtest/gtest_prod.h"
+
+namespace devtools_goma {
+
+class CppTokenizer {
+ public:
+  CppTokenizer() = delete;
+  CppTokenizer(const CppTokenizer&) = delete;
+  void operator=(const CppTokenizer&) = delete;
+
+  static void InitializeStaticOnce();
+
+  static bool NextTokenFrom(CppInputStream* stream,
+                            bool skip_space,
+                            CppToken* token,
+                            std::string* error_reason);
+
+  // Reads string CppToken.
+  static bool ReadString(CppInputStream* stream,
+                         CppToken* result_token,
+                         std::string* error_reason);
+  // Reads string until |delimiter|.
+  // When error happened, error reason is set to |error_reason|, and false is
+  // returned.
+  static bool ReadStringUntilDelimiter(CppInputStream* stream,
+                                       std::string* result,
+                                       char delimiter,
+                                       std::string* error_reason);
+
+  static CppToken ReadIdentifier(CppInputStream* stream, const char* begin);
+  static CppToken ReadNumber(CppInputStream* stream, int c0, const char* begin);
+
+  // Handles line-folding with '\\', updates the token's string_value and
+  // advances the begin pointer.
+  // Returns true if it has consumed a line break.
+  static bool HandleLineFoldingWithToken(CppInputStream* stream,
+                                         CppToken* token, const char** begin);
+
+  static bool SkipComment(CppInputStream* stream,
+                          std::string* error_reason);
+  static bool SkipUntilDirective(CppInputStream* stream,
+                                 std::string* error_reason);
+  static void SkipUntilLineBreakIgnoreComment(CppInputStream* stream);
+  static bool IsAfterEndOfLine(const char* cur, const char* begin);
+
+ private:
+  static std::set<std::string>* integer_suffixes_;
+
+  FRIEND_TEST(CppTokenizerTest, IsAfterEndOfLine);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CPP_TOKENIZER_H_
diff --git a/client/cpp_tokenizer_unittest.cc b/client/cpp_tokenizer_unittest.cc
new file mode 100644
index 0000000..ffda8a0
--- /dev/null
+++ b/client/cpp_tokenizer_unittest.cc
@@ -0,0 +1,45 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <cstring>
+
+#include <gtest/gtest.h>
+
+#include "cpp_tokenizer.h"
+
+namespace devtools_goma {
+
+TEST(CppTokenizerTest, IsAfterEndOfLine) {
+  const char* src1 = " #include <iostream>";
+  EXPECT_TRUE(CppTokenizer::IsAfterEndOfLine(strchr(src1, '#'), src1));
+
+  const char* src2 = " f(); #include <iostream>";
+  EXPECT_FALSE(CppTokenizer::IsAfterEndOfLine(strchr(src2, '#'), src2));
+
+  const char* src3 = " \n #include <iostream>";
+  EXPECT_TRUE(CppTokenizer::IsAfterEndOfLine(strchr(src3, '#'), src3));
+
+  const char* src4 = " f(); \n #include <iostream>";
+  EXPECT_TRUE(CppTokenizer::IsAfterEndOfLine(strchr(src4, '#'), src4));
+
+  const char* src5 = "  \\\n #include <iostream>";
+  EXPECT_TRUE(CppTokenizer::IsAfterEndOfLine(strchr(src5, '#'), src5));
+
+  const char* src6 = " f(); \\\n #include <iostream>";
+  EXPECT_FALSE(CppTokenizer::IsAfterEndOfLine(strchr(src6, '#'), src6));
+
+  const char* src7 = " /* foo */  \\\n #include <iostream>";
+  EXPECT_TRUE(CppTokenizer::IsAfterEndOfLine(strchr(src7, '#'), src7));
+
+  const char* src8 = " f(); /* foo */ \\\n #include <iostream>";
+  EXPECT_FALSE(CppTokenizer::IsAfterEndOfLine(strchr(src8, '#'), src8));
+
+  const char* src9 = " /* foo */ \\\r\n /* foo */  \\\n #include <iostream>";
+  EXPECT_TRUE(CppTokenizer::IsAfterEndOfLine(strchr(src9, '#'), src9));
+
+  const char* src10 = "f();/* foo */ \\\r\n /* foo */ \\\n #include <iostream>";
+  EXPECT_FALSE(CppTokenizer::IsAfterEndOfLine(strchr(src10, '#'), src10));
+}
+
+}  // namespace devtools_goma
diff --git a/client/cros_util.cc b/client/cros_util.cc
new file mode 100644
index 0000000..8b091ff
--- /dev/null
+++ b/client/cros_util.cc
@@ -0,0 +1,167 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef __linux__
+#error "We only expect this is used by Linux."
+#endif
+
+#include "cros_util.h"
+
+#include <sys/time.h>
+#include <time.h>
+
+#include <memory>
+
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+
+#include "basictypes.h"
+#include "file_helper.h"
+#include "ioutil.h"
+#include "scoped_fd.h"
+#include "split.h"
+#include "string_piece.h"
+
+namespace {
+const char* const kDefaultBlacklist[] = {
+  "/dev-libs/nss",  // make -j fails
+  "/app-crypt/nss",  // make -j fails
+  "/dev-libs/m17n-lib",  // make -j fails
+  "/sys-fs/mtools",  // make -j fails
+  "/dev-java/icedtea",  // make -j fails
+  "/dev-libs/openssl",  // Makefile force -j1
+};
+
+}  // namespace
+
+namespace devtools_goma {
+
+std::vector<string> ParseBlacklistContents(const string& contents) {
+  std::vector<string> lines;
+  SplitStringUsing(contents, "\r\n", &lines);
+
+  std::vector<string> parsed;
+  for (const auto& line : lines) {
+    StringPiece stripped_line = StringStrip(line);
+    if (!stripped_line.empty())
+      parsed.push_back(string(stripped_line));
+  }
+  return parsed;
+}
+
+std::vector<string> GetBlacklist() {
+  const char* blacklist_file = getenv("GOMACC_BLACKLIST");
+  if (blacklist_file == nullptr) {
+    std::vector<string> default_blacklist;
+    for (const auto& it : kDefaultBlacklist) {
+      default_blacklist.push_back(it);
+    }
+    return default_blacklist;
+  }
+  string contents;
+  CHECK(ReadFileToString(blacklist_file, &contents))
+    << "Failed to read GOMACC_BLACKLIST=" << blacklist_file;
+  return ParseBlacklistContents(contents);
+}
+
+bool IsBlacklisted(const string& path, const std::vector<string>& blacklist) {
+  for (size_t i = 0; i < blacklist.size(); ++i) {
+    if (path.find(blacklist[i]) != string::npos) {
+      LOG(INFO) << "The path is blacklisted. "
+                << " path=" << path;
+      return true;
+    }
+  }
+  return false;
+}
+
+float GetLoadAverage() {
+  string line;
+  ScopedFd fd(ScopedFd::OpenForRead("/proc/loadavg"));
+  if (!fd.valid()) {
+    PLOG(ERROR) << "failed to open /proc/loadavg";
+    return -1;
+  }
+  char buf[1024];
+  int r = fd.Read(buf, sizeof(buf) - 1);
+  if (r < 5) {  // should read at least "x.yz "
+    PLOG(ERROR) << "failed to read /proc/loadavg";
+    return -1;
+  }
+  buf[r] = '\0';
+
+  std::vector<string> loadavgs;
+  SplitStringUsing(buf, " \t", &loadavgs);
+  if (loadavgs.empty()) {
+    LOG(ERROR) << "failed to get load average.";
+    return -1;
+  }
+  char* endptr;
+  float load = strtof(loadavgs[0].c_str(), &endptr);
+  if (loadavgs[0].c_str() == endptr) {
+    LOG(ERROR) << "failed to parse load average."
+        << " buf=" << buf
+        << " loadavgs[0]=" << loadavgs[0];
+    return -1;
+  }
+  return load;
+}
+
+int RandInt(int a, int b) {
+  static bool initialized = false;
+  if (!initialized) {
+    // I chose gettimeofday because I believe it is more unlikely to cause the
+    // same random number pattern than srand(time(nullptr)).
+    struct timeval tv;
+    CHECK_EQ(gettimeofday(&tv, nullptr), 0);
+    srandom(tv.tv_usec);
+  }
+  return a + random() % (b - a + 1);
+}
+
+bool CanGomaccHandleCwd() {
+  const std::vector<string> blacklist = GetBlacklist();
+  std::unique_ptr<char, decltype(&free)> cwd(getcwd(nullptr, 0), free);
+  if (IsBlacklisted(cwd.get(), blacklist) || getuid() == 0) {
+    return false;
+  }
+  return true;
+}
+
+void WaitUntilLoadAvgLowerThan(float load, int max_sleep_time) {
+  CHECK_GT(load, 0.0)
+      << "load must be larger than 0.  Or, this function won't finish."
+      << " load=" << load;
+  CHECK_GT(max_sleep_time, 0)
+      << "Max sleep time should be larger than 0."
+      << " max_sleep_time=" << max_sleep_time;
+  time_t current_time, last_update;
+  current_time = last_update = time(nullptr);
+
+  int sleep_time = 1;
+  for (;;) {
+    float current_loadavg = GetLoadAverage();
+    CHECK_GE(current_loadavg, 0.0)
+        << "load average < 0.  Possibly GetLoadAverage is broken."
+        << " current_loadavg=" << current_loadavg;
+    if (current_loadavg < load)
+      break;
+
+    current_time = time(nullptr);
+    if (current_time - last_update > max_sleep_time) {
+      LOG(WARNING) << "waiting."
+                   << " load=" << load
+                   << " current_loadavg=" << current_loadavg
+                   << " max_sleep_time=" << max_sleep_time;
+      last_update = current_time;
+    }
+    sleep_time *= 2;
+    if (sleep_time > max_sleep_time)
+      sleep_time = max_sleep_time;
+    sleep(RandInt(1, sleep_time));
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/cros_util.h b/client/cros_util.h
new file mode 100644
index 0000000..c47086b
--- /dev/null
+++ b/client/cros_util.h
@@ -0,0 +1,63 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_CROS_UTIL_H_
+#define DEVTOOLS_GOMA_CLIENT_CROS_UTIL_H_
+
+#include <string>
+#include <vector>
+
+using std::string;
+
+namespace devtools_goma {
+
+#ifdef __linux__
+
+// Parses blacklist file contents and returns a list of blacklised directories.
+std::vector<string> ParseBlacklistContents(const string& contents);
+
+// Returns list of portage package names to blacklist.
+//  If non-empty $GOMACC_BLACKLIST is set, we use its file contents in the file
+//  specified by $GOMACC_BLACKLIST instead of the default one.
+//  If $GOMACC_BLACKLIST is an empty string, we use _DEFAULT_BLACKLIST.
+//  The blacklist contents should be list of directories like:
+//  /dev-libs/nss
+//  /sys-fs/mtools
+//
+//  Note that empty line is just ignored.
+std::vector<string> GetBlacklist();
+
+// Returns true if |path| matches with one of path name in |blacklist|.
+bool IsBlacklisted(const string& path, const std::vector<string>& blacklist);
+
+// Returns load average in 1 min.  Returns negative value on error.
+float GetLoadAverage();
+
+// Returns random integer x such as |a| <= x <= |b|.
+// Note: a returned random number is not uniform.
+//       I think it enough for randomizing a sleep time.
+int RandInt(int a, int b);
+
+// Returns true if current working directory is not in the black list.
+// If in the black list, gomacc won't send the request to compiler_proxy.
+bool CanGomaccHandleCwd();
+
+// Waits the load average becomes less than |load|.
+// This function may make the program asleep at most |max_sleep_time|.
+void WaitUntilLoadAvgLowerThan(float load, int max_sleep_time);
+
+#else
+
+// Provide dummy function for non-linux.
+bool CanGomaccHandle(const std::vector<string>& args) {
+  return true;
+}
+void WaitUntilLoadAvgLowerThan(float load, int max_sleep_time) {}
+
+#endif
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_CROS_UTIL_H_
diff --git a/client/cros_util_unittest.cc b/client/cros_util_unittest.cc
new file mode 100644
index 0000000..1deaf10
--- /dev/null
+++ b/client/cros_util_unittest.cc
@@ -0,0 +1,116 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "cros_util.h"
+
+#include<string>
+#include<vector>
+
+#include <gtest/gtest.h>
+
+using std::string;
+
+namespace devtools_goma {
+
+TEST(CrosUtil, ParseBlacklistContents) {
+  std::vector<string> expect;
+
+  static const char* kEmpty = "";
+  EXPECT_EQ(expect, ParseBlacklistContents(kEmpty));
+
+  static const char* kCrLf = "\n\r ";
+  EXPECT_EQ(expect, ParseBlacklistContents(kCrLf));
+
+  static const char* kTmp = "/tmp";
+  expect.push_back("/tmp");
+  EXPECT_EQ(expect, ParseBlacklistContents(kTmp));
+  expect.clear();
+
+  static const char* kTmpWithWhiteSpaces = "\r\n /tmp\r\n ";
+  expect.push_back("/tmp");
+  EXPECT_EQ(expect, ParseBlacklistContents(kTmpWithWhiteSpaces));
+  expect.clear();
+
+  static const char* kTwoDirs = "\n/example\n/example2\n";
+  expect.push_back("/example");
+  expect.push_back("/example2");
+  EXPECT_EQ(expect, ParseBlacklistContents(kTwoDirs));
+  expect.clear();
+
+  static const char* kTwoDirsWithSpaces =
+      "\n/example \r\n \r\n \r\n /example2\n";
+  expect.push_back("/example");
+  expect.push_back("/example2");
+  EXPECT_EQ(expect, ParseBlacklistContents(kTwoDirsWithSpaces));
+  expect.clear();
+
+  static const char* kDirnameWithSpace = "\n/dirname with space \r\n";
+  expect.push_back("/dirname with space");
+  EXPECT_EQ(expect, ParseBlacklistContents(kDirnameWithSpace));
+  expect.clear();
+
+  static const char* kTwoDirnamesWithSpace =
+      "\n/dirname with  space \r\n /with space/part 2 \r\n";
+  expect.push_back("/dirname with  space");
+  expect.push_back("/with space/part 2");
+  EXPECT_EQ(expect, ParseBlacklistContents(kTwoDirnamesWithSpace));
+  expect.clear();
+}
+
+TEST(CrosUtil, IsBlacklisted) {
+  std::vector<string> blacklist;
+  blacklist.push_back("/tmp");
+  EXPECT_TRUE(IsBlacklisted("/tmp", blacklist));
+  blacklist.clear();
+
+  blacklist.push_back("non-related");
+  blacklist.push_back("/tmp");
+  EXPECT_TRUE(IsBlacklisted("/tmp", blacklist));
+  blacklist.clear();
+
+  blacklist.push_back("/usr");
+  blacklist.push_back("/tmp");
+  EXPECT_TRUE(IsBlacklisted("/usr/local/etc", blacklist));
+  blacklist.clear();
+
+  blacklist.push_back("non-related");
+  blacklist.push_back("/local");
+  EXPECT_TRUE(IsBlacklisted("/usr/local/etc", blacklist));
+  blacklist.clear();
+
+  blacklist.push_back("non-related");
+  blacklist.push_back("/etc");
+  EXPECT_TRUE(IsBlacklisted("/usr/local/etc", blacklist));
+  blacklist.clear();
+
+  EXPECT_FALSE(IsBlacklisted("/tmp", blacklist));
+  blacklist.clear();
+
+  blacklist.push_back("non-related");
+  EXPECT_FALSE(IsBlacklisted("/tmp", blacklist));
+  blacklist.clear();
+
+  blacklist.push_back("/opt");
+  blacklist.push_back("/tmp");
+  EXPECT_FALSE(IsBlacklisted("/usr/local/etc", blacklist));
+  blacklist.clear();
+}
+
+TEST(CrosUtil, GetLoadAverage) {
+  // Smoke test
+  EXPECT_GE(GetLoadAverage(), 0.0);
+}
+
+TEST(CrosUtil, RandInt) {
+  // Smoke test
+  for (size_t i = 0; i < 100; ++i) {
+    int r = RandInt(10, 20);
+    EXPECT_LT(r, 21);
+    EXPECT_GT(r, 9);
+  }
+  EXPECT_EQ(128, RandInt(128, 128));
+}
+
+}  // namespace devtools_goma
diff --git a/client/data/fuzzer_dictionaries/net_url_request_fuzzer.dict b/client/data/fuzzer_dictionaries/net_url_request_fuzzer.dict
new file mode 100644
index 0000000..6cdfcb2
--- /dev/null
+++ b/client/data/fuzzer_dictionaries/net_url_request_fuzzer.dict
@@ -0,0 +1,1720 @@
+# Copied from chromium's net/data/fuzzer_dictionaries.
+#
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Fuzzer dictionary targetting HTTP/1.x responses.
+
+# Entries that are generally useful in headers
+":"
+"\x0A"
+"\x0D"
+"0"
+"50"
+"500"
+# Horizontal whitespace. Matters mostly in status line.
+" "
+"\x09"
+# Header continuation
+"\x0D\x0A\x09"
+# Used in a lot of individual headers
+";"
+"="
+","
+"\""
+"-"
+
+# Status line components
+"HTTP"
+"/1.1"
+"/1.0"
+# More interesting status codes.  Leading space so can be inserted into
+# other status lines.
+" 100"
+" 200"
+" 206"
+" 301"
+" 302"
+" 303"
+" 304"
+" 307"
+" 308"
+" 401"
+" 403"
+" 404"
+" 500"
+" 501"
+" 403"
+
+# Full status lines (Some with relevant following headers)
+"HTTP/1.1 200 OK\x0A\x0A"
+"HTTP/1.1 100 Continue\x0A\x0A"
+"HTTP/1.1 401 Unauthorized\x0AWWW-Authenticate: Basic realm=\"Middle-Earth\"\x0A\xA0"
+"HTTP/1.1 407 Proxy Authentication Required\x0AProxy-Authenticate: Digest realm=\"Middle-Earth\", nonce=\"aaaaaaaaaa\"\x0A\x0A"
+"HTTP/1.0 301 Moved Permanently\x0ALocation: /a\x0A\x0A"
+"HTTP/1.1 302 Found\x0ALocation: http://lost/\x0A\x0A"
+
+# Proxy authentication headers. Note that fuzzers don't support NTLM or
+# negotiate.
+"WWW-Authenticate:"
+"Proxy-Authenticate:"
+"Basic"
+"Digest"
+"realm"
+"nonce"
+
+"Connection:"
+"Proxy-Connection:"
+"Keep-Alive"
+"Close"
+"Upgrade"
+"\x0AConnection: Keep-Alive"
+"\x0AConnection: Close"
+"\x0AProxy-Connection: Keep-Alive"
+"\x0AProxy-Connection: Close"
+
+"Content-Length:"
+"Transfer-Encoding:"
+"chunked"
+"\x0AContent-Length: 0"
+"\x0AContent-Length: 500"
+"\x0ATransfer-Encoding: chunked\x0A\x0A5\x0A12345\x0A0\x0A\x0A"
+
+"Location:"
+"\x0ALocation: http://foo/"
+"\x0ALocation: http://bar/"
+"\x0ALocation: https://foo/"
+"\x0ALocation: https://bar/"
+
+"Accept-Ranges:"
+"bytes"
+"\x0AAccept-Ranges: bytes"
+
+"Content-Range:"
+
+"Age:"
+"\x0AAge: 0"
+"\x0AAge: 3153600000"
+
+"Cache-Control:"
+"max-age"
+"no-cache"
+"no-store"
+"must-revalidate"
+"\x0ACache-Control: max-age=3153600000"
+"\x0ACache-Control: max-age=0"
+"\x0ACache-Control: no-cache"
+"\x0ACache-Control: no-store"
+"\x0ACache-Control: must-revalidate"
+
+"Content-Disposition:"
+"attachment"
+"filename"
+
+"Content-Encoding:"
+"gzip"
+"deflate"
+"sdch"
+"br"
+"\x0AContent-Encoding: gzip"
+"\x0AContent-Encoding: deflate"
+"\x0AContent-Encoding: sdch"
+"\x0AContent-Encoding: br"
+
+"Date:"
+"Fri, 01 Apr, 2050 14:14:14 GMT"
+"Mon, 28 Mar, 2016 04:04:04 GMT"
+"\x0ADate: Fri, 01 Apr, 2050 14:14:14 GMT"
+"\x0ADate: Mon, 28 Mar, 2016 04:04:04 GMT"
+
+"Last-Modified:"
+"\x0ALast-Modified: Fri, 01 Apr, 2050 14:14:14 GMT"
+"\x0ALast-Modified: Mon, 28 Mar, 2016 04:04:04 GMT"
+
+"Expires:"
+"\x0AExpires: Fri, 01 Apr, 2050 14:14:14 GMT"
+"\x0AExpires: Mon, 28 Mar, 2016 04:04:04 GMT"
+
+"Set-Cookie:"
+"Expires"
+"Max-Age"
+"Domain"
+"Path"
+"Secure"
+"HttpOnly"
+"Priority"
+"Low"
+"Medium"
+"High"
+"SameSite"
+"Strict"
+"Lax"
+"\x0ASet-Cookie: foo=bar"
+"\x0ASet-Cookie: foo2=bar2;HttpOnly;Priority=Low;SameSite=Strict;Path=/"
+"\x0ASet-Cookie: foo=chicken;SameSite=Lax"
+
+"Strict-Transport-Security:"
+"includeSubDomains"
+
+"Vary:"
+"\x0AVary: Cookie"
+"\x0AVary: Age"
+
+"ETag:"
+"\x0AETag: jumboshrimp"
+
+
+# This part has been generated with testing/libfuzzer/dictionary_generator.py
+# using net_url_request_fuzzer binary and RFC 3986.
+"all"
+"consider"
+"Transfer-Encoding"
+"D.,"
+"prefix"
+"concept"
+"CR"
+"follow"
+"RFC-850"
+"(which"
+"ISDN"
+"\"TE\""
+"increase"
+"number"
+"calculate"
+"\"IETF"
+"fixed-length"
+"\"OPTIONAL\""
+"to"
+"Host"
+"program"
+"Western"
+"under"
+"Changing"
+"(STD"
+"digit"
+"returned"
+"returning"
+"very"
+"SP,"
+"SP."
+"Validation"
+"(URI):"
+"Incomplete"
+"Origin"
+"--"
+"cause"
+"EXPRESS"
+"list"
+"large"
+"expired."
+"small"
+"(URL)\","
+"range."
+"past"
+"second"
+"Version"
+"allowed."
+"tag."
+"implemented"
+"canonical"
+"even"
+"established"
+"errors"
+"incompatible"
+"section"
+"contributed"
+"while"
+"decoding"
+"version"
+"above"
+"TTL"
+"new"
+"increasing"
+"method"
+"WWW-"
+"never"
+"equals"
+"here"
+"ranges"
+"reported"
+"compressed"
+"active"
+"path"
+"strong"
+"Index"
+"changed"
+"DISCLAIMS"
+"prior"
+"amount"
+"published"
+"NOT"
+"error,"
+"options"
+"via"
+"followed"
+"secure"
+"family"
+"\"HTTP\""
+"Unspecified"
+"replace"
+"CERN/3.0"
+"CTE"
+"(CTE)"
+"TO"
+"Too"
+"CTL"
+"PUT,"
+"total"
+"PUT)"
+"Security"
+"select"
+"languages"
+"TASK"
+"exception."
+"would"
+"contains"
+"negative"
+"User-Agent"
+"call"
+"MUST,"
+"type"
+"until"
+"authorization"
+"more"
+"ISO-8859-9,"
+"initiated"
+"composite"
+"LF,"
+"line"
+"it"
+"warn"
+"American"
+"varying"
+"known"
+"Found"
+"MHTML"
+"must"
+"parse"
+"none"
+"1999"
+"work"
+"paragraph"
+"sent"
+"evolved"
+"root"
+"example"
+"requested,"
+"history"
+"type."
+"(HTCPCP/1.0)\","
+"accept"
+"currency"
+"minimum"
+"Compromise"
+"numbers"
+"want"
+"type:"
+"times"
+"simple"
+"LF"
+"information"
+"needs"
+"end"
+"goal"
+"verify"
+"far"
+"Pragma"
+"reject"
+"A"
+"badly"
+"HEAD"
+"description"
+"number."
+"insecure"
+"after"
+"variant"
+"confirmed"
+"reflect"
+"wrong"
+"law"
+"response"
+"types"
+"a"
+"All"
+"short"
+"attempt"
+"third"
+"menu."
+")"
+"algorithms"
+"cases."
+"File"
+"\"DEFLATE"
+"order"
+"\"SHOULD"
+"help"
+"don't"
+"over"
+"vary"
+"satisfied"
+"CD-ROM,"
+"held"
+"HTTP-WG."
+"through"
+"of,"
+"existence"
+"its"
+"digest"
+"before"
+"difference"
+"20"
+"termed"
+"MAY,"
+"fix"
+"ISO-3166"
+"actually"
+"407"
+"(GNU"
+"absence"
+"\"HTTP/1.1\","
+"Sun,"
+"MERCHANTABILITY"
+"408"
+"it."
+"them"
+"good"
+"return"
+"HTTP/2.4"
+"combination"
+"URL"
+"URI"
+"Due"
+"Bad"
+"they"
+"Control"
+"always"
+"decimal"
+"refresh"
+"expectation."
+"MAY"
+"token"
+"]URI,"
+"[CRLF]"
+"found"
+"Content-Type"
+"ports"
+"trailer"
+"referred"
+"status"
+"weight"
+"series"
+"reduce"
+"(URI)"
+"expect"
+"max-age=0"
+"combining"
+"operation"
+"beyond"
+"Type"
+"event"
+"is:"
+"by"
+"E."
+"network"
+"Server:"
+"open"
+"\"MUST/MAY/SHOULD\""
+"since"
+"request/response"
+"content"
+"message."
+"PATCH,"
+"7"
+"2DIGIT"
+"available."
+"K.,"
+"linear"
+"Extension"
+"University"
+"enclosing"
+"free"
+"reason"
+"base"
+"proxy"
+"POST"
+"beginning"
+"generate"
+"text/plain"
+"definition"
+"perform"
+"Partial"
+"created"
+"UPALPHA"
+"script"
+"\"GMT\""
+"filter"
+"SSL"
+"expecting"
+"If-Modified-Since"
+"HEAD."
+"HEAD,"
+"assign"
+"user"
+"major"
+"already"
+"Copyright"
+"encoding"
+"Cache"
+"Please"
+"token."
+"TCP"
+"content-range"
+"least"
+"another"
+"FITNESS"
+"invalid."
+"\""
+"service"
+"image/gif"
+"top"
+"header)"
+"construct"
+"2"
+"ignored."
+"listed"
+"passed"
+"Delta"
+"LOALPHA"
+"scheme"
+"store"
+"too"
+"M."
+"immediate"
+"direct"
+"tokens"
+"part"
+"WAIS"
+"F.,"
+"to:"
+"distance"
+"Code"
+"target"
+"Content-Type:"
+"zero,"
+"likely"
+"WWW-Authenticate"
+"matter"
+"idle"
+"determined"
+"stale"
+"ISO-8859-8,"
+"payload"
+"ANSI"
+"B"
+"seen"
+"HTTP/1.1.)"
+"null"
+"OPTIONS"
+"contents"
+"paths"
+"data."
+"data)"
+"zero"
+"depending"
+"Acceptable"
+"responsible"
+"(MIME"
+"also"
+"internal"
+"(C)"
+"build"
+"finding"
+"With"
+"UCI"
+"Names"
+"content-"
+"added"
+"headers."
+"Content-Disposition"
+"object"
+"\"MUST\","
+"most"
+"regular"
+"ensure"
+"letter"
+"2*N"
+"services"
+"The"
+"Responses"
+"payload."
+"clear"
+"sometimes"
+"flow"
+"Client"
+"ISO-8859-3,"
+"Its"
+"incomplete"
+"\"MIME"
+"Note:"
+"particularly"
+"labels"
+"\"C\""
+"session"
+"Unrecognized"
+"find"
+"]"
+"implementation"
+"[RFC"
+"ranges."
+"BNF,"
+"user-agent"
+"failed"
+"URL)."
+"LDAP)"
+"8"
+"US-ASCII"
+"do"
+"hit"
+"stop"
+"\"HTTP\"."
+"While"
+"Set"
+"rest"
+"report"
+"during"
+"body,"
+"PUT"
+"(via"
+"public"
+"twice"
+"bad"
+"common"
+"release"
+"require"
+"set"
+"mandatory"
+"reference"
+"\"F\""
+"MIME:"
+"depends"
+"individual"
+"result"
+"J."
+"close"
+"subject"
+"said"
+"headers"
+"WWW\","
+"See"
+"BUT"
+"unable"
+"various"
+"probably"
+"0)"
+"0."
+"0,"
+"discovery"
+"available"
+"we"
+"reasons."
+"terms"
+"missing"
+"Server"
+"(MIME)"
+"OPTIONAL;"
+"AND"
+"both"
+"protect"
+"Unexpected"
+"last"
+"reverse"
+"\"MAY\","
+"*TEXT"
+"against"
+"connection"
+"became"
+"context"
+"exceeds"
+"however,"
+"mean"
+"reached."
+"finds"
+"experimental"
+"load"
+"Redirect"
+"Content-Length"
+"alternate"
+"consume"
+"point"
+"reasons"
+"had"
+"header"
+"DNS)"
+"DNS."
+"B.,"
+"(O)."
+"1.0"
+"throughout"
+"BCP"
+"["
+"application/pdf"
+"\"REQUIRED\","
+"C."
+"basis"
+"\"POST\""
+"create"
+"acceptance"
+"(MHTML)\","
+"Reason"
+"been"
+"."
+"much"
+"\"PUT\""
+"basic"
+"expected"
+"text/html;"
+"empty"
+"HTTP/1.0"
+"concerning"
+"Flow"
+"N"
+"size,"
+"\"W/\""
+"reason."
+"MA"
+"\"DELETE\""
+"unnecessarily"
+"exception"
+"handling"
+"Group,"
+"particular,"
+"technical"
+"near"
+"\"GZIP"
+"error"
+"(IANA)"
+"\"TRACE\""
+"Accept-Language"
+"played"
+"is"
+"herein"
+"encountered"
+"E-mail"
+"MIME"
+"in"
+"accepted."
+"if"
+"containing"
+"\"A"
+"lengths"
+"make"
+"format"
+"\"I"
+"unrecognized"
+"widely"
+"9"
+"several"
+"higher"
+"\"%"
+"used"
+"temporary"
+"alert"
+"action"
+"purpose"
+"characters"
+"stack"
+"recent"
+"lower"
+"task"
+"database"
+"NNTP"
+"failing"
+"person"
+"client"
+"length."
+"entry"
+"the"
+"left"
+"protocol"
+"US-ASCII."
+"THAT"
+"bandwidth"
+"inactive"
+"(TE)"
+"Internet"
+"HTTP/1.0)"
+"HTTP/1.0."
+"previous"
+"tables"
+"unique"
+"case."
+"character"
+"Trailers"
+"source"
+"ISO-8859-2,"
+"subjects"
+"WILL"
+"location"
+"0*3DIGIT"
+"input"
+"save"
+"remaining"
+"URI."
+"URI,"
+"fact,"
+"transfer-encoding"
+"possible"
+"required."
+"Assigned"
+"Length"
+"URI;"
+"integer"
+"bit"
+"Sat,"
+"desire"
+"OK"
+"success"
+"ISO-8859-5,"
+"OF"
+"signal"
+"INFRINGE"
+"H.F.,"
+"specific"
+"X3.4-1986"
+"security"
+"OR"
+"S."
+"right"
+"old"
+"often"
+"deal"
+"people"
+"successfully"
+"some"
+"back"
+"HT"
+"Last-Modified"
+"headers)"
+"DEL"
+"examples"
+"unless"
+"(BNF)"
+"TCP/IP"
+"ignore"
+"PUT."
+"INDEX."
+"headers,"
+"for"
+"track"
+"CONNECT"
+"be"
+"replaced"
+"run"
+"deleted"
+"example,"
+"<URL:"
+"O"
+"last-modified"
+"become"
+"relating"
+"permitted"
+"ALPHA,"
+"First"
+"ENGINEERING"
+"anything"
+"tracing"
+"\"UTF-8,"
+"*<TEXT,"
+"range"
+"3ALPHA"
+"extensions"
+"positive"
+"block"
+"IRC/6.9,"
+"W3C/MIT"
+"into"
+"within"
+"ACM"
+"two"
+"down"
+"file."
+"compression"
+"IETF"
+"expired"
+"support"
+"initial"
+"question"
+"long"
+"User"
+"HT."
+"forward"
+"version."
+"sections"
+"disallowed"
+"lowest"
+"HT,"
+"an"
+"form"
+"attempted"
+"registered"
+"differences"
+"URL."
+"failure"
+"server."
+"link"
+"CRLF"
+"DNS"
+"encoded"
+"Non-Authoritative"
+"true"
+"GMT"
+"reset"
+"consist"
+"versions"
+"used,"
+"maximum"
+"us"
+"used."
+"If-None-Match"
+"HTML\","
+"similar"
+"called"
+"delete"
+"DELETE,"
+"storing"
+"associated"
+"Introduction"
+"request"
+"specified"
+"influence"
+"To"
+"single"
+"warning"
+"exist"
+"New"
+"NOT,"
+"check"
+"ISO-8859-1:1987."
+"encrypt"
+"Only"
+"no"
+"May"
+"when"
+"A,"
+"invalid"
+"A."
+"MHTML,"
+"name."
+"setting"
+"role"
+"Proxy"
+"test"
+"TE"
+"pseudonym"
+"negotiation."
+"exceeded"
+"update"
+"T."
+"variable"
+"NOT\","
+"R."
+"longer"
+"algorithm"
+"IANA."
+"age"
+"packets"
+"together"
+"An"
+"As"
+"time"
+"failures"
+"requires"
+"avoid"
+"code."
+"once"
+"code"
+"partial"
+"chain"
+"TEXT"
+"results"
+"existing"
+"go"
+"(LZW)."
+"provided."
+"CPU"
+"CREATE"
+"Notice"
+"LF>"
+"\"HEAD\""
+"zone"
+"UNIX,"
+"ARPA"
+"send"
+"Standard"
+"environment"
+"USENET"
+"Not"
+"Nov"
+"include"
+"resources"
+"string"
+"advantage"
+"outside"
+"Explicit"
+"ALL"
+"HTTP/1.1;"
+"entries"
+"HTTP/1.1,"
+"HTTP/1.1."
+"entire"
+"Protocol"
+"level"
+"did"
+"button"
+"HTTP/1.0\","
+"(RFC"
+"try"
+"ONLY"
+"Tag"
+"(LWS"
+"\"SHOULD\","
+"prevent"
+"\"A\"..\"Z\">"
+"unexpected"
+"INFORMATION"
+"Failed"
+"\"A\""
+"Satisfiable"
+"port"
+"append"
+"\"HTTP/1.0\""
+"formats"
+"ISO-8859-4,"
+"appear"
+"rate"
+"opaque"
+"current"
+"waiting"
+"HTML"
+"shared"
+"CRLF)"
+"302"
+"body"
+"FTP"
+"NNTP."
+"\"SHALL"
+"following"
+"objects"
+"address"
+"1*HEX"
+"Distribution"
+"entry."
+"HTTP."
+"change"
+"cache)"
+"incoming"
+"\"AS"
+"receive"
+"larger"
+"host"
+"descended"
+"here."
+"+"
+"{"
+"makes"
+"composed"
+"named"
+"useful"
+"addresses"
+"extra"
+"detected."
+"When"
+"private"
+"session."
+"gateway"
+"Status"
+"use"
+"from"
+"stream"
+"working"
+"value."
+"next"
+"few"
+"--THIS_STRING_SEPARATES"
+"POST,"
+"memory"
+"scope"
+"means"
+"HEX\""
+"(GMT),"
+"bytes:"
+"Default"
+"Require"
+"Required"
+"DIGIT"
+"validity"
+"bytes,"
+"Connection"
+"Time"
+"cases"
+"name:"
+"behalf"
+"MD5"
+"lowercase"
+"RIGHTS"
+"this"
+"NTP"
+"--THIS_STRING_SEPARATES--"
+"Syntax"
+"values"
+"can"
+"believed"
+"making"
+"closing"
+"modifier"
+"J.,"
+"control"
+"reserved"
+"links"
+"process"
+"attribute"
+"high"
+"tag"
+"allowed"
+"Policy"
+"input,"
+"native"
+"class,"
+"Missing"
+"HTTP-"
+"HTTP,"
+"charset"
+"delay"
+"located"
+"R.,"
+"instead"
+"1XX"
+"WARRANTIES,"
+"parameter"
+"FORCE"
+"STD"
+"may"
+"Request"
+"British"
+"HEREIN"
+"Roman"
+"client's"
+"[SP"
+"ANSI,"
+"date"
+"such"
+"data"
+"HTTP/1.1\","
+"Y."
+"UA"
+"revalidate"
+"element"
+"so"
+"allow"
+"(LWS)"
+"holds"
+"move"
+"years"
+"including"
+"LINK,"
+"still"
+"pointer"
+"non-zero"
+"1"
+"negotiated"
+"Multiple"
+"line."
+"using:"
+"forms"
+"Referer"
+"P.,"
+"PNG,\""
+"cache-control"
+"policy"
+"mail"
+"\"HTTP"
+"SIGCOMM"
+"greater"
+"matches"
+"lesser"
+"not"
+"parsing"
+"matched"
+"term"
+"name"
+"establishment"
+"A.,"
+"ISO-639"
+"entirely"
+"identifier"
+"elements"
+"|"
+"successful"
+"domain"
+"From"
+"Network"
+"related"
+"UNLINK"
+"trying"
+"(LZ77)"
+"year"
+"(MIC)"
+"Parameter"
+"special"
+"out"
+"ultimately"
+"space"
+"REQUIRED"
+"416"
+"WARRANTY"
+"4DIGIT"
+"time,"
+"L."
+"time."
+"supports"
+"(A,"
+"state"
+"This"
+"derived"
+"INTERNET"
+"possibility"
+"\"B\""
+"RFC"
+"IMPLIED,"
+"byte"
+"received."
+"log"
+"ISO-8859-7,"
+"\"HTTP/1.1\""
+"language"
+"could"
+"transition"
+"programming"
+"tries"
+"keep"
+"length"
+"place"
+"S.,"
+"first"
+"origin"
+"there"
+"sent."
+"3DIGIT"
+"K."
+"one"
+"CHAR"
+"list,"
+"ISI"
+"version:"
+"message"
+"CSS1,"
+"quality"
+"size"
+"doesn't"
+"given"
+"For"
+"enabled."
+"unknown"
+"system"
+"unspecified"
+"parallel"
+"priority"
+"their"
+"attack"
+"intermediate"
+"HTTP:"
+"Date"
+"x-gzip"
+"Data"
+"Response"
+"HTTP/2.0,"
+"gives"
+"Success"
+"that"
+"completed"
+"exactly"
+"R"
+"pragma"
+"(IANA"
+"copy"
+"than"
+"History"
+"wide"
+"12"
+"14"
+"16"
+"was"
+"Universal"
+"protected"
+"servers."
+"were"
+"1)"
+"IS\""
+"SHTTP/1.3,"
+"1*8ALPHA"
+"Location"
+"and"
+"Information"
+"false"
+"1.1"
+"1.2"
+"(2**N),"
+"turned"
+"Tue,"
+"Other"
+"SP"
+"(LWS)."
+"have"
+"MIME,"
+"need"
+"Mail"
+"any"
+"Requested"
+"conversion"
+"HTTP/2.13,"
+"database."
+"After"
+"able"
+"mechanism"
+"OPTIONAL"
+"take"
+"which"
+"HTTP/0.9,"
+"201"
+"200"
+"begin"
+"multiple"
+"Name"
+"trace"
+"206"
+"buffer"
+"who"
+"connected"
+"plus"
+"HTTP/12.3."
+"\"OPTIONS\""
+"segment"
+"class"
+"D."
+"considered"
+"GET"
+"Some"
+"TE:"
+"L.,"
+"(URL)"
+"}"
+"fact"
+"Web"
+"WA"
+"violation"
+"text"
+"supported"
+"synchronous"
+"Authentication"
+"inconsistent"
+"CRLF."
+"CRLF,"
+"label"
+"Public"
+"MUST"
+"true,"
+"cache."
+"upgrade"
+"based"
+"Posting"
+"("
+"cache"
+"3"
+"should"
+"only"
+"Proxy-Authorization"
+"Byte"
+"Strong"
+"local"
+"MIC"
+"WARRANTIES"
+"(UA)"
+"<US-ASCII"
+"handle"
+"get"
+"E.,"
+"Accept-Ranges"
+"expectation"
+"(See"
+"cannot"
+"128"
+"THE"
+"conjunction"
+"BNF"
+"DIGIT,"
+"closure"
+"resource"
+"ended"
+"cached"
+"W."
+"ISO-8859."
+"calling"
+"INCLUDING"
+"contain"
+"ISI/RR-98-463,"
+"\"CONNECT\""
+"where"
+"ignored"
+"IANA"
+"exists"
+"configured"
+"C"
+"packet"
+"up"
+"relative"
+"multipart"
+"end."
+"detect"
+"has"
+"stream."
+"1*DIGIT"
+"Oct"
+"written"
+"LIMITED"
+"closed"
+"between"
+"boundary"
+"reading"
+"across"
+"\"RECOMMENDED\","
+"Body"
+"ability"
+"FOR"
+"opening"
+"importance"
+"screen"
+"connection."
+"ISO-8859-1"
+"UNIX"
+"key"
+"group"
+"configuration"
+"P."
+"valid"
+"\"HTTP/1.1\"."
+"HTTP/1.0),"
+"WWW"
+"revoked"
+"TRACE"
+"many"
+"taking"
+"(OK)"
+"equal"
+"Rules"
+"(0)"
+"Unicode"
+"*OCTET"
+"(URN)"
+"safely"
+"can't"
+"among"
+"(OK)."
+"Log"
+"period"
+"colon"
+"adding"
+"spans"
+"article"
+"table"
+"allocated"
+"LWS:"
+"Identifier"
+"Wed,"
+"USA"
+"Proxy-Authenticate"
+"encryption"
+"Jun"
+"copies"
+"But"
+"mark"
+"defined"
+"combined"
+"LWS,"
+"LWS"
+"prototype"
+">"
+"enable"
+"401"
+"content-disposition"
+"received"
+"unsafe"
+"SMTP"
+"ANY"
+"World"
+"chain."
+"case"
+"disconnected"
+"(HTTP)"
+"these"
+"Number"
+"value"
+"will"
+"Fri,"
+"\"SHALL\","
+"Any"
+"Additional"
+"resident"
+"NOT\""
+"thus"
+"it,"
+"according"
+"Content"
+"Content-Range"
+"properties"
+"Unsupported"
+"malformed"
+"PARTICULAR"
+"You"
+"binary"
+"different"
+"perhaps"
+"generic"
+"pay"
+"set."
+"00:00:00"
+"1DIGIT"
+"same"
+"parts"
+"largest"
+"units"
+"document"
+"Types"
+"residing"
+"breakdown"
+"UTC"
+"extended"
+"http"
+"I"
+"IP"
+"effect"
+"allocation"
+"running"
+"*LWS"
+"infinite"
+"frequently"
+"tracking"
+"undefined"
+"CR."
+"well"
+"It"
+"If-Range"
+"patterns"
+"without"
+"M.,"
+"CR,"
+"In"
+"position"
+"model"
+"audio"
+"If"
+"negotiation"
+"Also,"
+"Service"
+"less"
+"being"
+"generally"
+"obtain"
+"actions"
+"Access"
+"stored"
+"CRC."
+"However,"
+"application"
+"capabilities"
+"appeared"
+"add"
+"Thu,"
+"4"
+"Although"
+"HTTP/1.1"
+"usage"
+"(A"
+"match"
+"details"
+"tests"
+"aspects"
+"read"
+"Many"
+"H.,"
+"early"
+"action,"
+"T"
+"address."
+"using"
+"password"
+"loss"
+"like"
+"text/html"
+"Content-Encoding"
+"B,"
+"B."
+"\"HTTP/1.1"
+"server"
+"discarded"
+"either"
+"BACK)"
+"output"
+"Operation"
+"page"
+"\"GET\""
+"exceed"
+"because"
+"sequence"
+"uppercase"
+"Since"
+"growth"
+"Authority"
+"respect"
+"International"
+"recognition"
+"happens"
+"provided"
+"trust"
+"lead"
+"MIT"
+"highest"
+"expectation,"
+"does"
+"Authorization"
+"assuming"
+"refer"
+"GET)"
+"GET,"
+"GET."
+"equivalent"
+"Official"
+"C)"
+"ISO-8859-1."
+"broken"
+"Range"
+"HTTP/1.0,"
+"LWS>"
+"X3.4-1986,"
+"Microsoft"
+"on"
+"about"
+"actual"
+"extension"
+"of"
+"C,"
+"accepted"
+"FTP,"
+"compatible"
+"addition"
+"unidirectional"
+"Message"
+"DELETE"
+"content-type"
+"or"
+"UC"
+"final"
+"No"
+"ISO-"
+"image"
+"Two"
+"Internal"
+"times,"
+"ISO-8859-6,"
+"determine"
+"T.,"
+"operator"
+"T/TCP"
+"additional"
+"area"
+"GET\""
+"transfer"
+"*"
+"decode"
+"start"
+"describes"
+"low"
+"strict"
+"context."
+"function"
+"complete"
+"N."
+"enough"
+"OCTET"
+"but"
+"IMPLIED"
+"Error"
+"Dec"
+"with"
+"Trailer"
+"count"
+"clients."
+"made"
+"compute"
+"default"
+"GMT,"
+"ISO-8859-1,"
+"Moved"
+"this,"
+"ISO-8859-1)"
+"SHOULD"
+"PURPOSE."
+"limit"
+"GMT."
+"site"
+"problem"
+"define"
+"USE"
+"image/jpeg"
+"\"E\""
+"URL,"
+"describe"
+"general"
+"as"
+"UST"
+"at"
+"file"
+"lifetime"
+"are"
+"Accept-Encoding"
+"incorrect"
+"variety"
+"\"D\""
+"virtual"
+"details."
+"field"
+"other"
+"5"
+"Purpose"
+"you"
+"CA"
+"requested"
+"repeat"
+"HEX"
+"symbol"
+"Cache-Control"
+"Remove"
+"March"
+"important"
+"H."
+"code)"
+"included"
+"SOCIETY"
+"\"MUST"
+"ISO-10646\","
+"\"ZLIB"
+"audio/basic"
+"\"ISO-8859-1\""
+"\"WAIS"
+"persistent"
+"having"
+"directory"
+"ALPHA"
+"validation"
+"original"
+
diff --git a/client/deps_cache.cc b/client/deps_cache.cc
new file mode 100644
index 0000000..cb1ee16
--- /dev/null
+++ b/client/deps_cache.cc
@@ -0,0 +1,614 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "deps_cache.h"
+
+#include <cmath>
+#include <fstream>
+#include <functional>
+#include <iterator>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "compiler_flags.h"
+#include "compiler_info.h"
+#include "compiler_proxy_info.h"
+#include "compiler_specific.h"
+#include "content.h"
+#include "directive_filter.h"
+#include "file.h"
+#include "goma_hash.h"
+#include "include_cache.h"
+#include "include_processor.h"
+#include "join.h"
+#include "path.h"
+#include "path_resolver.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/deps_cache_data.pb.h"
+#include "prototmp/goma_stats.pb.h"
+MSVC_POP_WARNING()
+#include "split.h"
+#include "util.h"
+
+using std::string;
+
+namespace {
+
+template<typename Flags>
+void AppendCompilerFlagsInfo(const Flags& flags, std::stringstream* ss) {
+  (*ss) << ":include_dirs=";
+  for (const auto& path : flags.include_dirs()) {
+    (*ss) << path << ',';
+  }
+  (*ss) << ":commandline_macros=";
+  for (const auto& key_value : flags.commandline_macros()) {
+    (*ss) << key_value.first << ',' << key_value.second << ',';
+  }
+  (*ss) << ":compiler_info_flags=";
+  for (const auto& flag : flags.compiler_info_flags()) {
+    (*ss) << flag << ',';
+  }
+}
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+DepsCache* DepsCache::instance_;
+
+DepsCache::DepsCache(const string& cache_filename,
+                     int identifier_alive_duration,
+                     size_t deps_table_size_threshold,
+                     int max_proto_size_in_mega_bytes)
+    : cache_file_(cache_filename),
+      identifier_alive_duration_(identifier_alive_duration),
+      deps_table_size_threshold_(deps_table_size_threshold),
+      max_proto_size_in_mega_bytes_(max_proto_size_in_mega_bytes),
+      hit_count_(0),
+      missed_count_(0),
+      missed_by_updated_count_(0) {
+}
+
+DepsCache::~DepsCache() {}
+
+// static
+void DepsCache::Init(const string& cache_filename,
+                     int identifier_alive_duration,
+                     size_t deps_table_size_threshold,
+                     int max_proto_size_in_mega_bytes) {
+  if (cache_filename.empty()) {
+    LOG(INFO) << "DepsCache is disabled.";
+    return;
+  }
+
+  if (!IncludeCache::IsEnabled()) {
+    LOG(WARNING) << "DepsCache is disabled since IncludeCache is not enabled.";
+    return;
+  }
+  if (!IncludeCache::instance()->calculates_directive_hash()) {
+    LOG(WARNING) << "DepsCache is disabeld since IncludeCache does not "
+                 << "calculate directive hash. Enable IncludeCache with "
+                 << "directive hash calculation";
+    return;
+  }
+
+  LOG(INFO) << "DepsCache is enabled. cache_filename=" << cache_filename;
+  instance_ = new DepsCache(cache_filename, identifier_alive_duration,
+                            deps_table_size_threshold,
+                            max_proto_size_in_mega_bytes);
+
+  if (!instance_->LoadGomaDeps()) {
+    // If deps cache is broken (or does not exist), clear all cache.
+    LOG(INFO) << "couldn't load deps cache file. "
+              << "The cache file is broken or too large";
+    instance_->Clear();
+  }
+}
+
+// static
+void DepsCache::Quit() {
+  if (!IsEnabled())
+    return;
+
+  instance_->SaveGomaDeps();
+  delete instance_;
+  instance_ = nullptr;
+}
+
+void DepsCache::Clear() {
+  {
+    AUTOLOCK(lock, &mu_);
+    deps_table_.clear();
+  }
+  filename_id_table_.Clear();
+  {
+    AUTOLOCK(lock, &count_mu_);
+    hit_count_ = 0;
+    missed_by_updated_count_ = 0;
+    missed_count_ = 0;
+  }
+}
+
+bool DepsCache::SetDependencies(const DepsCache::Identifier& identifier,
+                                const std::string& cwd,
+                                const string& input_file,
+                                const std::set<string>& dependencies,
+                                FileIdCache* file_id_cache) {
+  DCHECK(identifier.valid());
+  DCHECK(file::IsAbsolutePath(cwd)) << cwd;
+
+  std::vector<DepsHashId> deps_hash_ids;
+
+  // We set input_file as dependency also.
+  std::set<string> deps(dependencies);
+  deps.insert(input_file);
+
+  bool all_ok = true;
+  for (const auto& filename : deps) {
+    DCHECK(!filename.empty());
+    const std::string& abs_filename = file::JoinPathRespectAbsolute(
+        cwd, filename);
+
+    FilenameIdTable::Id id = filename_id_table_.InsertFilename(filename);
+    if (id == FilenameIdTable::kInvalidId) {
+      all_ok = false;
+      break;
+    }
+
+    FileId file_id(file_id_cache->Get(abs_filename));
+    if (!file_id.IsValid()) {
+      all_ok = false;
+      LOG(WARNING) << "invalid file id: " << abs_filename;
+      break;
+    }
+
+    OptionalSHA256HashValue directive_hash =
+        IncludeCache::instance()->GetDirectiveHash(abs_filename, file_id);
+    if (!directive_hash.valid()) {
+      all_ok = false;
+      LOG(WARNING) << "invalid directive hash: " << abs_filename;
+      break;
+    }
+
+    DCHECK(DepsHashId(id, file_id, directive_hash.value()).IsValid());
+    deps_hash_ids.push_back(DepsHashId(id, file_id, directive_hash.value()));
+  }
+
+  AUTOLOCK(lock, &mu_);
+  if (!all_ok) {
+    deps_table_.erase(identifier.value());
+    return false;
+  }
+
+  deps_table_[identifier.value()].last_used_time = time(nullptr);
+  std::swap(deps_table_[identifier.value()].deps_hash_ids, deps_hash_ids);
+  return true;
+}
+
+bool DepsCache::GetDependencies(const DepsCache::Identifier& identifier,
+                                const std::string& cwd,
+                                const string& input_file,
+                                std::set<string>* dependencies,
+                                FileIdCache* file_id_cache) {
+  DCHECK(identifier.valid());
+  DCHECK(file::IsAbsolutePath(cwd)) << cwd;
+
+  std::vector<DepsHashId> deps_hash_ids;
+  {
+    AUTOLOCK(lock, &mu_);
+    auto it = deps_table_.find(identifier.value());
+    if (it == deps_table_.end()) {
+      IncrMissedCount();
+      return false;
+    }
+    it->second.last_used_time = time(nullptr);
+    deps_hash_ids = it->second.deps_hash_ids;
+  }
+
+  std::set<string> result;
+  for (const auto& deps_hash_id : deps_hash_ids) {
+    const string& filename = filename_id_table_.ToFilename(deps_hash_id.id);
+    if (filename.empty()) {
+      LOG(ERROR) << "Unexpected FilenameIdTable conversion failure: "
+                 << "id=" << deps_hash_id.id;
+      IncrMissedCount();
+      return false;
+    }
+
+    if (IsDirectiveModified(file::JoinPathRespectAbsolute(cwd, filename),
+                            deps_hash_id.file_id,
+                            deps_hash_id.directive_hash,
+                            file_id_cache)) {
+      IncrMissedByUpdatedCount();
+      return false;
+    }
+
+    result.insert(filename);
+  }
+
+  // We don't add input_file in dependencies.
+  result.erase(input_file);
+
+  std::swap(*dependencies, result);
+  IncrHitCount();
+  return true;
+}
+
+void DepsCache::RemoveDependency(const DepsCache::Identifier& identifier) {
+  DCHECK(identifier.valid());
+
+  AUTOLOCK(lock, &mu_);
+  deps_table_.erase(identifier.value());
+}
+
+void DepsCache::IncrMissedCount() {
+  AUTOLOCK(lock, &count_mu_);
+  ++missed_count_;
+}
+
+void DepsCache::IncrMissedByUpdatedCount() {
+  AUTOLOCK(lock, &count_mu_);
+  ++missed_by_updated_count_;
+}
+
+void DepsCache::IncrHitCount() {
+  AUTOLOCK(lock, &count_mu_);
+  ++hit_count_;
+}
+
+void DepsCache::DumpStatsToProto(DepsCacheStats* stat) const {
+  {
+    AUTOLOCK(lock, &mu_);
+    stat->set_deps_table_size(deps_table_.size());
+    size_t max_entries = 0;
+    size_t total_entries = 0;
+    for (const auto& entry : deps_table_) {
+      size_t size = entry.second.deps_hash_ids.size();
+      total_entries += size;
+      max_entries = std::max(max_entries, size);
+    }
+    stat->set_max_entries(max_entries);
+    stat->set_total_entries(total_entries);
+  }
+  stat->set_idtable_size(filename_id_table_.Size());
+  {
+    AUTOLOCK(lock, &count_mu_);
+    stat->set_hit(hit_count_);
+    stat->set_updated(missed_by_updated_count_);
+    stat->set_missed(missed_count_);
+  }
+}
+
+// static
+bool DepsCache::IsDirectiveModified(const string& filename,
+                                    const FileId& old_file_id,
+                                    const SHA256HashValue& old_directive_hash,
+                                    FileIdCache* file_id_cache) {
+  FileId file_id(file_id_cache->Get(filename));
+
+  if (!file_id.IsValid()) {
+    // When file doesn't exist, let's consider a directive is changed.
+    return true;
+  }
+  if (file_id == old_file_id)
+    return false;
+
+  OptionalSHA256HashValue directive_hash =
+      IncludeCache::instance()->GetDirectiveHash(filename, file_id);
+  if (!directive_hash.valid()) {
+    // The file couldn't be read or the file is removed during the build.
+    LOG(ERROR) << "couldn't read a file in deps: " << filename;
+    return true;
+  }
+  if (directive_hash.value() == old_directive_hash) {
+    return false;
+  }
+
+  return true;
+}
+
+bool DepsCache::LoadGomaDeps() {
+  const time_t time_threshold = time(nullptr) - identifier_alive_duration_;
+  GomaDeps goma_deps;
+
+  const int total_bytes_limit = max_proto_size_in_mega_bytes_ * 1024 * 1024;
+  const int warning_threshold = total_bytes_limit * 3 / 4;
+
+  if (!cache_file_.LoadWithMaxLimit(&goma_deps,
+                                    total_bytes_limit,
+                                    warning_threshold)) {
+    LOG(ERROR) << "failed to load cache file " << cache_file_.filename();
+    return false;
+  }
+
+  // Version check
+
+  // Version mismatch. Older deps won't be reused.
+  if (goma_deps.built_revision() != kBuiltRevisionString) {
+    LOG(INFO) << "Old deps cache was detected. This deps cache is ignored. "
+              << "Current version should be " << kBuiltRevisionString
+              << " but deps cache version is " << goma_deps.built_revision();
+    Clear();
+    return false;
+  }
+
+  LOG(INFO) << "Version matched.";
+
+  // Load FilenameIdTable
+  unordered_set<FilenameIdTable::Id> valid_ids;
+  if (!filename_id_table_.LoadFrom(goma_deps.filename_id_table(), &valid_ids)) {
+    Clear();
+    return false;
+  }
+
+  // Load DepsIdTable
+  unordered_map<FilenameIdTable::Id,
+                std::pair<FileId, string>> deps_hash_id_map;
+  {
+    const GomaDepsIdTable& table = goma_deps.deps_id_table();
+    UnorderedMapReserve(table.record_size(), &deps_hash_id_map);
+    for (const auto& record : table.record()) {
+      if (!valid_ids.count(record.filename_id())) {
+        LOG(ERROR) << "DepsIdTable contains unexpected filename_id: "
+                   << record.filename_id();
+        Clear();
+        return false;
+      }
+
+      if (deps_hash_id_map.count(record.filename_id())) {
+        LOG(ERROR) << "DepsIdTable contains duplicated filename_id: "
+                   << record.filename_id();
+        Clear();
+        return false;
+      }
+
+      FileId file_id;
+#ifndef _WIN32
+      file_id.dev = record.dev();
+      file_id.inode = record.inode();
+#endif
+      file_id.mtime = record.mtime();
+      file_id.size = record.size();
+
+      deps_hash_id_map[record.filename_id()] =
+          std::make_pair(file_id, record.directive_hash());
+    }
+  }
+
+  LOG(INFO) << "Loading DepsTable OK.";
+
+  // Load Dependencies
+  {
+    const GomaDependencyTable& table = goma_deps.dependency_table();
+    UnorderedMapReserve(table.record_size(), &deps_table_);
+    for (const auto& record : table.record()) {
+      if (identifier_alive_duration_ >= 0 &&
+          record.last_used_time() < time_threshold) {
+        continue;
+      }
+
+      Key key;
+      if (!SHA256HashValue::ConvertFromHexString(record.identifier(),
+                                                 &key)) {
+        LOG(ERROR) << "DependecyTable contains corrupted sha256 string: "
+                   << record.identifier();
+        Clear();
+        return false;
+      }
+
+      if (deps_table_.count(key)) {
+        LOG(ERROR) << "DependencyTable contains duplicated identifier: "
+                   << record.identifier();
+        Clear();
+        return false;
+      }
+
+      DepsTableData* deps_table_data = &deps_table_[key];
+      deps_table_data->last_used_time = record.last_used_time();
+      deps_table_data->deps_hash_ids.reserve(record.filename_id_size());
+      for (const auto& id : record.filename_id()) {
+        if (!valid_ids.count(id)) {
+          LOG(ERROR) << "DependencyTable contains unexpected filename_id: "
+                     << id;
+          Clear();
+          return false;
+        }
+
+        const auto& hashid = deps_hash_id_map[id];
+        SHA256HashValue hash_value;
+        if (!SHA256HashValue::ConvertFromHexString(hashid.second,
+                                                   &hash_value)) {
+          LOG(ERROR) << "DependencyTable contains corrupted sha256 string: "
+                     << hashid.second;
+          Clear();
+          return false;
+        }
+        deps_table_data->deps_hash_ids.push_back(
+            DepsHashId(id, hashid.first, hash_value));
+      }
+    }
+  }
+
+  LOG(INFO) << cache_file_.filename() << " has been successfully loaded.";
+
+  return true;
+}
+
+bool DepsCache::SaveGomaDeps() {
+  // We don't take lock here since this should be called from Quit() only.
+  // It should be single-threaded.
+
+  const time_t time_threshold = time(nullptr) - identifier_alive_duration_;
+
+  GomaDeps goma_deps;
+  goma_deps.set_built_revision(kBuiltRevisionString);
+
+  // First, drop older DepsTable entry from deps_table_.
+  if (identifier_alive_duration_ >= 0) {
+    for (auto it = deps_table_.begin(); it != deps_table_.end(); ) {
+      if (it->second.last_used_time < time_threshold) {
+        // should be OK since all iterators but deleted one keep valid.
+        deps_table_.erase(it++);
+      } else {
+        ++it;
+      }
+    }
+  }
+
+  // Checks the size of DepsTable. If it exceeds threshold, we'd like to remove
+  // older identifiers.
+  if (deps_table_.size() > deps_table_size_threshold_) {
+    LOG(INFO) << "DepsTable size " << deps_table_.size()
+              << " exceeds the threshold " << deps_table_size_threshold_
+              << ". Older cache will be deleted";
+    std::vector<std::pair<time_t, Key>> keys_by_time;
+    keys_by_time.reserve(deps_table_.size());
+    for (const auto& entry : deps_table_) {
+      keys_by_time.push_back(
+          std::make_pair(entry.second.last_used_time, entry.first));
+    }
+    std::sort(keys_by_time.begin(), keys_by_time.end(),
+         std::greater<std::pair<time_t, Key>>());
+    for (size_t i = deps_table_size_threshold_; i < keys_by_time.size(); ++i) {
+      deps_table_.erase(keys_by_time[i].second);
+    }
+  }
+
+  // We create a map:
+  //   FilenameIdTable::Id -> pair<FileId, directive-hash>.
+  // When we saw multiple DepsHashId for one FilenameIdTable::Id,
+  // we choose the one whose mtime is the latest.
+  unordered_map<FilenameIdTable::Id, std::pair<FileId, SHA256HashValue>> m;
+  for (const auto& deps_table_entry : deps_table_) {
+    for (const auto& deps_hash_id : deps_table_entry.second.deps_hash_ids) {
+      FilenameIdTable::Id id = deps_hash_id.id;
+      if (!m.count(id) || m[id].first.mtime < deps_hash_id.file_id.mtime) {
+        m[id] = std::make_pair(deps_hash_id.file_id,
+                               deps_hash_id.directive_hash);
+      }
+    }
+  }
+
+  // Store all ids which have been saved. We only save these ids.
+  std::set<FilenameIdTable::Id> used_ids;
+
+  // Save DepsHashIdTable. We remove records whose directive_hash is not the
+  // same one in |m|, because it's old. In that case, we need to
+  // recalculate deps cache at all next time, so it's no worth to save them.
+  {
+    GomaDependencyTable* table = goma_deps.mutable_dependency_table();
+    for (const auto& deps_table_entry : deps_table_) {
+      // First, check all the deps_table_entry are valid.
+      bool ok = true;
+      for (const auto& deps_hash_id : deps_table_entry.second.deps_hash_ids) {
+        if (deps_hash_id.directive_hash != m[deps_hash_id.id].second) {
+          ok = false;
+          break;
+        }
+      }
+
+      if (!ok)
+        continue;
+
+      GomaDependencyTableRecord* record = table->add_record();
+      record->set_identifier(deps_table_entry.first.ToHexString());
+      record->set_last_used_time(deps_table_entry.second.last_used_time);
+      for (const auto& deps_hash_id : deps_table_entry.second.deps_hash_ids) {
+        used_ids.insert(deps_hash_id.id);
+        record->add_filename_id(deps_hash_id.id);
+      }
+    }
+  }
+
+  // Save GomaDepsIdTable
+  {
+    GomaDepsIdTable* table = goma_deps.mutable_deps_id_table();
+    for (const auto& entry : m) {
+      if (!used_ids.count(entry.first))
+        continue;
+      GomaDepsIdTableRecord* record = table->add_record();
+      record->set_filename_id(entry.first);
+#ifndef _WIN32
+      record->set_dev(entry.second.first.dev);
+      record->set_inode(entry.second.first.inode);
+#endif
+      record->set_mtime(entry.second.first.mtime);
+      record->set_size(entry.second.first.size);
+      record->set_directive_hash(entry.second.second.ToHexString());
+    }
+  }
+
+  // Save FilenameIdTable. We remove id which does not appear in |used_ids|,
+  // because no one will refer it.
+  filename_id_table_.SaveTo(used_ids, goma_deps.mutable_filename_id_table());
+
+  if (!cache_file_.Save(goma_deps)) {
+    LOG(ERROR) << "failed to save cache file " << cache_file_.filename();
+    return false;
+  }
+  LOG(INFO) << "saved to " << cache_file_.filename();
+  return true;
+}
+
+// static
+DepsCache::Identifier DepsCache::MakeDepsIdentifier(
+    const CompilerInfo& compiler_info,
+    const CompilerFlags& compiler_flags) {
+  std::stringstream ss;
+
+  // TODO: Maybe we need to merge some code with IncludeProcessor
+  // to enumerate what information is necessary for enumerating headers?
+
+  ss << "compiler_name=" << compiler_info.name();
+  ss << ":compiler_path=" << compiler_info.real_compiler_path();
+
+  // Some buildbot always copies nacl-gcc compiler to target directory.
+  // In that case, FileId is different per build. So, we'd like to use
+  // compiler hash.
+  ss << ":compiler_hash=" << compiler_info.real_compiler_hash();
+
+  ss << ":cwd=" << compiler_flags.cwd();
+
+  ss << ":input=";
+  for (const auto& filename : compiler_flags.input_filenames()) {
+    ss << filename << ',';
+  }
+
+  ss << ":cxx_system_include_paths=";
+  for (const auto& path : compiler_info.cxx_system_include_paths()) {
+    ss << path << ",";
+  }
+  ss << ":system_include_paths=";
+  for (const auto& path : compiler_info.system_include_paths()) {
+    ss << path << ",";
+  }
+  ss << ":system_framework_paths=";
+  for (const auto& path : compiler_info.system_framework_paths()) {
+    ss << path << ",";
+  }
+  ss << ":predefined_macros=" << compiler_info.predefined_macros();
+
+  if (compiler_flags.is_gcc()) {
+    const GCCFlags& flags = static_cast<const GCCFlags&>(compiler_flags);
+    AppendCompilerFlagsInfo(flags, &ss);
+  } else if (compiler_flags.is_vc()) {
+    const VCFlags& flags = static_cast<const VCFlags&>(compiler_flags);
+    AppendCompilerFlagsInfo(flags, &ss);
+  } else {
+    // TODO: Support javac.
+    LOG(INFO) << "Cannot handle this CompilerFlags yet: "
+              << compiler_flags.compiler_name();
+    return DepsCache::Identifier();
+  }
+
+  SHA256HashValue value;
+  ComputeDataHashKeyForSHA256HashValue(ss.str(), &value);
+  return DepsCache::Identifier(value);
+}
+
+}  // namespace devtools_goma
diff --git a/client/deps_cache.h b/client/deps_cache.h
new file mode 100644
index 0000000..71a5429
--- /dev/null
+++ b/client/deps_cache.h
@@ -0,0 +1,177 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_DEPS_CACHE_H_
+#define DEVTOOLS_GOMA_CLIENT_DEPS_CACHE_H_
+
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "autolock_timer.h"
+#include "cache_file.h"
+#include "file_id_cache.h"
+#include "filename_id_table.h"
+#include "goma_hash.h"
+#include "sha256hash_hasher.h"
+
+namespace devtools_goma {
+
+class CompilerFlags;
+class CompilerInfo;
+class DepsCacheStats;
+
+// DepsCache is a cache for dependent files.
+// We make an 'identifier' which identifies compile command,
+// and a map from 'identifier' to dependent files (and extra information).
+//
+// When we run the same command which has the same identifier,
+// we check whether we can reuse the dependet files list.
+// This is done by the following algorithm:
+//  For all dependent files:
+//   1. Check FileId. If it's the same, we think a file is not changed.
+//   2. Check directive_hash, which is a hash value created from file's
+//      directive lines. If it's the same, dependant files won't be changed.
+class DepsCache {
+ public:
+  typedef OptionalSHA256HashValue Identifier;
+
+  static DepsCache* instance() { return instance_; }
+  static bool IsEnabled() { return instance_ != nullptr; }
+
+  // Initializes the DepsCache.
+  // When |cache_filename| is empty, this won't be enabled.
+  // When |cache_filename| file exists, we load it.
+  static void Init(const std::string& cache_filename,
+                   int identifier_alive_duration,
+                   size_t deps_table_size_threshold,
+                   int max_proto_size_in_mega_bytes);
+
+  // Saves .goma_deps file is DepsCache is initialized.
+  static void Quit();
+
+  // Creates identifier to set/get dependencies.
+  static Identifier MakeDepsIdentifier(
+      const CompilerInfo& compiler_info,
+      const CompilerFlags& compiler_flags);
+
+  // Records a dependency; a compile command can be identified with
+  // |identifier|, and the command uses |input_file| as
+  // an input file (e.g. *.cc), also, the command requires
+  // |dependencies| files (e.g. *.h), can be relative.
+  // |cwd| should be absolute.
+  // |identifier| should not be empty.
+  // |input_file| can be relative.
+  bool SetDependencies(const Identifier& identifier,
+                       const std::string& cwd,
+                       const std::string& input_file,
+                       const std::set<std::string>& dependencies,
+                       FileIdCache* file_id_cache);
+
+  // Gets dependent files using |identifer|.
+  // We check the dependecies are not changed. If changed, false will be
+  // returned and |dependecies| won't be changed.
+  // |input_file| is removed from the result of |dependencies|.
+  // |input_file| can be relative.
+  // |cwd| should be absolute.
+  // path in |dependencies| can be relative.
+  bool GetDependencies(const Identifier& identifier,
+                       const std::string& cwd,
+                       const std::string& input_file,
+                       std::set<std::string>* dependencies,
+                       FileIdCache* file_id_cache);
+
+  void RemoveDependency(const Identifier& identifier);
+
+  // Dump internal stats.
+  void DumpStatsToProto(DepsCacheStats* stats) const;
+
+ private:
+  friend class DepsCacheTest;
+
+  // DepsHashId is used to check whether an include file is updated.
+  // |directive_hash| is a hash value of the file's directive lines.
+  struct DepsHashId {
+    DepsHashId() {}
+    DepsHashId(FilenameIdTable::Id id, const FileId& file_id,
+               const SHA256HashValue& directive_hash) :
+        id(id), file_id(file_id), directive_hash(directive_hash) {
+    }
+
+    bool IsValid() const {
+      return id != FilenameIdTable::kInvalidId && file_id.IsValid();
+    }
+
+    FilenameIdTable::Id id;
+    FileId file_id;
+    SHA256HashValue directive_hash;
+  };
+
+  struct DepsTableData {
+    DepsTableData() : last_used_time(0) {
+    }
+
+    time_t last_used_time;
+    std::vector<DepsHashId> deps_hash_ids;
+  };
+
+  typedef SHA256HashValue Key;
+  typedef unordered_map<Key, DepsTableData, SHA256HashValueHasher> DepsTable;
+
+  DepsCache(const string& cache_filename,
+            int identifier_alive_duration,
+            size_t deps_table_size_threshold,
+            int max_proto_size_in_mega_bytes);
+  ~DepsCache();
+
+  void Clear();
+
+  bool SaveGomaDeps();
+  bool LoadGomaDeps();
+
+  void IncrMissedCount();
+  void IncrMissedByUpdatedCount();
+  void IncrHitCount();
+
+  static bool IsDirectiveModified(const string& filename,
+                                  const FileId& old_file_id,
+                                  const SHA256HashValue& old_directive_hash,
+                                  FileIdCache* file_id_cache);
+
+  static DepsCache* instance_;
+
+  const CacheFile cache_file_;
+  // When an identifier is older than this value (in second), it won't be
+  // removed in save/load. If negative, we don't dispose old cache.
+  const int identifier_alive_duration_;
+  // When lots of DepsTable exist, we'd like to remove older DepsTable entry
+  // when saving.
+  const size_t deps_table_size_threshold_;
+  // If the proto for cache exceeds this size, loading will fail.
+  // In that case, cache is just ignored.
+  const int max_proto_size_in_mega_bytes_;
+
+  // protects deps_table_.
+  Lock mu_;
+  DepsTable deps_table_;
+
+  // Instead of using a filename, we alternatively use an id for
+  // performance and memory space. So, we manage this table to convert
+  // between filename and id.
+  FilenameIdTable filename_id_table_;
+
+  Lock count_mu_;
+  unsigned int hit_count_;
+  unsigned int missed_count_;
+  unsigned int missed_by_updated_count_;
+
+  DISALLOW_COPY_AND_ASSIGN(DepsCache);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_DEPS_CACHE_H_
diff --git a/client/deps_cache_data.proto b/client/deps_cache_data.proto
new file mode 100644
index 0000000..9dad774
--- /dev/null
+++ b/client/deps_cache_data.proto
@@ -0,0 +1,72 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+syntax = "proto2";
+
+package devtools_goma;
+
+// GomaDeps contains all information for DepsCache.
+// We will make the following map from this information.
+// <identifier> -> <input file>s
+// <input file> -> <file id>, <directive hash>
+//
+// - <identifier> is hash created from command line.
+// - <input file> is a file that will be sent to goma server
+//   in the <identifier> command line.
+// - <file id> is a FileId of <input file>
+// - <directive hash> is a hash value of <input file>'s directive lines.
+//
+// This information is saved to deps cache file.
+
+message GomaDeps {
+  required GomaFilenameIdTable filename_id_table = 1;
+  // id 2, 3 are deleted.
+  required GomaDepsIdTable deps_id_table = 4;
+  required GomaDependencyTable dependency_table = 5;
+  optional int32 DEPRECATED_version = 6 [deprecated=true];
+  // When the built revision does not match with the real kBuiltRevision,
+  // we dispose cache.
+  optional string built_revision = 7;
+}
+
+// GomaFilenameIdTable is a bimap (filename <-> int (filename_id))
+// Since more than 64MB proto cannot be saved by default, we need to reduce the
+// proto size. There are lots of the same filenames, so we use integer id
+// instead of using filename as is.
+message GomaFilenameIdTable {
+  optional int32 DEPRECATED_next_available_id = 1 [deprecated=true];
+  repeated GomaFilenameIdTableRecord record = 2;
+}
+
+message GomaFilenameIdTableRecord {
+  required string filename = 1;
+  required uint32 filename_id = 2;
+}
+
+// GomaDepsIdTable is a map (<filename_id> -> <file id>, <directive_hash>)
+message GomaDepsIdTable {
+  repeated GomaDepsIdTableRecord record = 1;
+}
+
+message GomaDepsIdTableRecord {
+  required uint32 filename_id = 1;
+  optional uint64 dev = 2;
+  optional uint64 inode = 3;
+  required int64 mtime = 4;
+  required int64 size = 5;
+  required string directive_hash = 6;
+}
+
+// GomaDependencyTable is a map record
+// (<identifer> -> <input file's filename_id>s)
+message GomaDependencyTable {
+  repeated GomaDependencyTableRecord record = 1;
+}
+
+message GomaDependencyTableRecord {
+  required string identifier = 1;
+  repeated int32 filename_id = 2;
+  optional int64 last_used_time = 3;
+}
diff --git a/client/deps_cache_unittest.cc b/client/deps_cache_unittest.cc
new file mode 100644
index 0000000..dc3eee4
--- /dev/null
+++ b/client/deps_cache_unittest.cc
@@ -0,0 +1,1254 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "deps_cache.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include <fstream>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "compiler_flags.h"
+#include "compiler_info.h"
+#include "file.h"
+#include "file_helper.h"
+#include "include_cache.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "prototmp/deps_cache_data.pb.h"
+#include "subprocess.h"
+#include "unittest_util.h"
+
+using std::string;
+
+namespace {
+const int kDepsCacheAliveDuration = 3 * 24 * 3600;
+const int kDepsCacheThreshold = 10;
+const int kDepsCacheMaxProtoSizeInMB = 64;
+}
+
+namespace devtools_goma {
+
+class DepsCacheTest : public testing::Test {
+ protected:
+  typedef DepsCache::DepsHashId DepsHashId;
+
+  void SetUp() override {
+    tmpdir_.reset(new TmpdirUtil("deps_cache_test"));
+    IncludeCache::Init(32, true);
+    DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                    kDepsCacheAliveDuration,
+                    kDepsCacheThreshold,
+                    kDepsCacheMaxProtoSizeInMB);
+    dc_ = DepsCache::instance();
+    CHECK(dc_ != nullptr) << "dc_ == nullptr";
+    identifier_count_ = 0;
+  }
+
+  void TearDown() override {
+    DepsCache::Quit();
+    IncludeCache::Quit();
+    tmpdir_.reset();
+  }
+
+  void SetFileId(FileIdCache* cache, const string& filename,
+                 const FileId& file_id) {
+    std::pair<FileIdCache::FileIdMap::iterator, bool> p =
+        cache->file_ids_.insert(std::make_pair(filename, file_id));
+    if (!p.second)
+      p.first->second = file_id;
+  }
+
+  bool GetDepsHashId(const DepsCache::Identifier& identifier,
+                     const string& filename,
+                     DepsCache::DepsHashId* deps_hash_id) const {
+    CHECK(identifier.valid());
+
+    FilenameIdTable::Id id = dc_->filename_id_table_.ToId(filename);
+    if (id == FilenameIdTable::kInvalidId)
+      return false;
+
+    auto it = dc_->deps_table_.find(identifier.value());
+    if (it == dc_->deps_table_.end())
+      return false;
+
+    for (const auto& dhi : it->second.deps_hash_ids) {
+      if (dhi.id == id) {
+        *deps_hash_id = dhi;
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  bool UpdateLastUsedTime(const DepsCache::Identifier& identifier,
+                          time_t last_used_time) {
+    CHECK(identifier.valid());
+
+    auto it = dc_->deps_table_.find(identifier.value());
+    if (it == dc_->deps_table_.end())
+      return false;
+
+    it->second.last_used_time = last_used_time;
+    return true;
+  }
+
+  void UpdateGomaBuiltRevision() {
+    const std::string deps_path =
+        file::JoinPath(tmpdir_->tmpdir(), ".goma_deps");
+    const std::string deps_sha256_path =
+        file::JoinPath(tmpdir_->tmpdir(), ".goma_deps.sha256");
+
+    GomaDeps goma_deps;
+
+    // Load GomaDeps.
+    {
+      std::ifstream dot_goma_deps(deps_path.c_str(), std::ifstream::binary);
+      ASSERT_TRUE(dot_goma_deps.is_open());
+      ASSERT_TRUE(goma_deps.ParseFromIstream(&dot_goma_deps));
+    }
+
+    goma_deps.set_built_revision(goma_deps.built_revision() + "-new");
+
+    // Save GomaDeps + .sha256
+    // Without updating .sha256, integrity check will revoke the cache. That's
+    // not what we wan to test.
+    {
+      std::ofstream dot_goma_deps(deps_path.c_str(), std::ofstream::binary);
+      ASSERT_TRUE(dot_goma_deps.is_open());
+      ASSERT_TRUE(goma_deps.SerializeToOstream(&dot_goma_deps));
+    }
+
+    {
+      std::string sha256_str;
+      ASSERT_TRUE(GomaSha256FromFile(deps_path, &sha256_str));
+      ASSERT_TRUE(WriteStringToFile(sha256_str, deps_sha256_path));
+    }
+  }
+
+  void UpdateIdentifierLastUsedTime(const DepsCache::Identifier& identifier,
+                                    time_t last_used_time) {
+    CHECK(identifier.valid());
+
+    const string& deps_path = file::JoinPath(tmpdir_->tmpdir(), ".goma_deps");
+
+    GomaDeps goma_deps;
+
+    // Load GomaDeps
+    {
+      std::ifstream dot_goma_deps(deps_path.c_str(), std::ifstream::binary);
+      ASSERT_TRUE(dot_goma_deps.is_open());
+      ASSERT_TRUE(goma_deps.ParseFromIstream(&dot_goma_deps));
+    }
+
+    GomaDependencyTable* table = goma_deps.mutable_dependency_table();
+    for (int i = 0; i < table->record_size(); ++i) {
+      GomaDependencyTableRecord* record = table->mutable_record(i);
+      if (record->identifier() == identifier.value().ToHexString()) {
+        record->set_last_used_time(last_used_time);
+      }
+    }
+
+    // Save GomaDeps
+    {
+      std::ofstream dot_goma_deps(deps_path.c_str(), std::ofstream::binary);
+      ASSERT_TRUE(dot_goma_deps.is_open());
+      ASSERT_TRUE(goma_deps.SerializeToOstream(&dot_goma_deps));
+    }
+  }
+
+  std::unique_ptr<CompilerInfoData> CreateBarebornCompilerInfo(
+      const string& name) {
+    std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+    cid->set_found(true);
+    cid->set_name(name);
+    cid->set_hash(name + "1234567890");
+    return cid;
+  }
+
+  FilenameIdTable::Id GetFilenameTableId(const string& filename) {
+    return dc_->filename_id_table_.ToId(filename);
+  }
+
+  DepsCache::Identifier MakeDepsIdentifier(
+      const CompilerInfo& compiler_info,
+      const CompilerFlags& compiler_flags) {
+    return DepsCache::MakeDepsIdentifier(compiler_info, compiler_flags);
+  }
+
+  bool SetDependencies(const DepsCache::Identifier& identifier,
+                       const string& input_file,
+                       const std::set<string>& dependencies,
+                       FileIdCache* file_id_cache) {
+    return dc_->SetDependencies(identifier, tmpdir_->realcwd(), input_file,
+                                dependencies, file_id_cache);
+  }
+
+  bool GetDependencies(const DepsCache::Identifier& identifier,
+                       const string& input_file,
+                       std::set<string>* dependencies,
+                       FileIdCache* file_id_cache) const {
+    return dc_->GetDependencies(identifier, tmpdir_->realcwd(), input_file,
+                                dependencies, file_id_cache);
+  }
+
+  void RemoveDependency(const DepsCache::Identifier& identifier) {
+    return dc_->RemoveDependency(identifier);
+  }
+
+  int DepsCacheSize() const {
+    return static_cast<int>(dc_->deps_table_.size());
+  }
+
+  DepsCache::Identifier MakeFreshIdentifier() {
+    SHA256HashValue hash_value;
+    SHA256HashValue::ConvertFromHexString(
+        "1234567890123456789012345678901234567890123456789012345678901234",
+        &hash_value);
+    int* p = reinterpret_cast<int*>(&hash_value);
+    *p = identifier_count_++;
+
+    return DepsCache::Identifier(hash_value);
+  }
+
+  std::unique_ptr<TmpdirUtil> tmpdir_;
+  DepsCache* dc_;
+  int identifier_count_;
+};
+
+TEST_F(DepsCacheTest, SetGetDependencies) {
+  const DepsCache::Identifier identifier = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  // First compile.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    // Since identifier is not registered, we cannot utilize the dependencies
+    // cache.
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+    EXPECT_TRUE(deps.empty());
+
+    // Note that deps does not contain the input file itself.
+    deps.insert(ah);
+    EXPECT_TRUE(SetDependencies(identifier, acc, deps, &file_id_cache));
+  }
+
+  // Second compile. We can utilize the dependency cache.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_TRUE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+
+    std::set<string> deps_expected;
+    deps_expected.insert(ah);
+    EXPECT_EQ(deps_expected, deps);
+  }
+
+  // Update acc
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyopiyo");
+
+  // Third compile.
+  // Since directive hash is not changed, this should succeed.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_TRUE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+
+    std::set<string> deps_expected;
+    deps_expected.insert(ah);
+    EXPECT_EQ(deps_expected, deps);
+  }
+
+  // Update acc. Update directives.
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "#define A\n"
+      "piyopiyo");
+
+  // Fourth compile. Since acc directive hash is changed,
+  // GetDependencies should return false.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+    EXPECT_TRUE(deps.empty());
+  }
+}
+
+TEST_F(DepsCacheTest, SetGetDependenciesRelative) {
+  const DepsCache::Identifier identifier = MakeFreshIdentifier();
+
+  const string& ah = "a.h";
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile(ah, "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  // First compile.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    // Since identifier is not registered, we cannot utilize the dependencies
+    // cache.
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+    EXPECT_TRUE(deps.empty());
+
+    // Note that deps does not contain the input file itself.
+    deps.insert(ah);
+    EXPECT_TRUE(SetDependencies(identifier, acc, deps, &file_id_cache));
+  }
+
+  // Second compile. We can utilize the dependency cache.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_TRUE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+
+    std::set<string> deps_expected;
+    deps_expected.insert(ah);
+    EXPECT_EQ(deps_expected, deps);
+  }
+
+  // Update acc
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyopiyo");
+
+  // Third compile.
+  // Since directive hash is not changed, this should succeed.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_TRUE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+
+    std::set<string> deps_expected;
+    deps_expected.insert(ah);
+    EXPECT_EQ(deps_expected, deps);
+  }
+
+  // Update acc. Update directives.
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "#define A\n"
+      "piyopiyo");
+
+  // Fourth compile. Since acc directive hash is changed,
+  // GetDependencies should return false.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+    EXPECT_TRUE(deps.empty());
+  }
+}
+
+TEST_F(DepsCacheTest, RemoveDependencies) {
+  const DepsCache::Identifier identifier = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  // First compile.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+    deps.insert(ah);
+    EXPECT_TRUE(SetDependencies(identifier, acc, deps, &file_id_cache));
+  }
+
+  // Second compile. We can utilize the dependency cache.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_TRUE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+  }
+
+  RemoveDependency(identifier);
+
+  // Third compile. Since we've removed identifier, we cannot utilize the cache.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+  }
+}
+
+TEST_F(DepsCacheTest, RemoveFile) {
+  const DepsCache::Identifier identifier = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& bh = tmpdir_->FullPath("b.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori A");
+  tmpdir_->CreateTmpFile("b.h", "kotori B");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "#include \"a.h\"\n"
+      "#include \"b.h\"\n"
+      "piyo");
+
+  // First compile.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+    deps.insert(ah);
+    deps.insert(bh);
+    EXPECT_TRUE(SetDependencies(identifier, acc, deps, &file_id_cache));
+  }
+
+  // Second compile. We can utilize the dependency cache.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_TRUE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+  }
+
+  // Delete b.h
+  tmpdir_->RemoveTmpFile("b.h");
+
+  // Third compile. Since we've removed a file, cache should not be used.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+  }
+}
+
+TEST_F(DepsCacheTest, Restart) {
+  const DepsCache::Identifier identifier = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  // First compile.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+    EXPECT_TRUE(deps.empty());
+
+    deps.insert(ah);
+    EXPECT_TRUE(SetDependencies(identifier, acc, deps, &file_id_cache));
+  }
+
+  // Restart DepsCache.
+  DepsCache::Quit();
+  IncludeCache::Quit();
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  kDepsCacheAliveDuration,
+                  kDepsCacheThreshold,
+                  kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  // Second compile. We can utilize the dependency cache.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_TRUE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+
+    std::set<string> deps_expected;
+    deps_expected.insert(ah);
+    EXPECT_EQ(deps_expected, deps);
+  }
+}
+
+TEST_F(DepsCacheTest, RestartWithFileIdUpdate) {
+  const DepsCache::Identifier identifier1 = MakeFreshIdentifier();
+  const DepsCache::Identifier identifier2 = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  // First compile for identifier1
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    deps.insert(ah);
+    ASSERT_TRUE(SetDependencies(identifier1, acc, deps, &file_id_cache));
+  }
+
+  // Update a.cc with same directive hash.
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyopiyo");
+
+  // First compile for identifier2
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    deps.insert(ah);
+    ASSERT_TRUE(SetDependencies(identifier2, acc, deps, &file_id_cache));
+  }
+
+  // Here, a.cc was updated after identifer1 compile.
+  // FileId was different, but directive_hash should be the same.
+  {
+    DepsHashId deps_hash_id1;
+    DepsHashId deps_hash_id2;
+    ASSERT_TRUE(GetDepsHashId(identifier1, acc, &deps_hash_id1));
+    ASSERT_TRUE(GetDepsHashId(identifier2, acc, &deps_hash_id2));
+
+    ASSERT_EQ(deps_hash_id1.directive_hash, deps_hash_id2.directive_hash);
+    ASSERT_NE(deps_hash_id1.file_id, deps_hash_id2.file_id);
+  }
+
+  // Restart DepsCache.
+  DepsCache::Quit();
+  IncludeCache::Quit();
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  kDepsCacheAliveDuration,
+                  kDepsCacheThreshold,
+                  kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  // DepsHashId will be updated to the latest one.
+  // Here, a.cc was updated after identifer1 compile.
+  // FileId was different, but directive_hash should be the same.
+  {
+    DepsHashId deps_hash_id1;
+    DepsHashId deps_hash_id2;
+    ASSERT_TRUE(GetDepsHashId(identifier1, acc, &deps_hash_id1));
+    ASSERT_TRUE(GetDepsHashId(identifier2, acc, &deps_hash_id2));
+
+    EXPECT_EQ(deps_hash_id1.directive_hash, deps_hash_id2.directive_hash);
+    EXPECT_EQ(deps_hash_id1.file_id, deps_hash_id2.file_id);
+  }
+}
+
+TEST_F(DepsCacheTest, RestartWithDirectiveHashUpdate) {
+  // NOTE: dependency
+  // identifier1 -> a.h, b.h, a.cc
+  // identifier2 -> a.h, a.cc
+
+  const DepsCache::Identifier identifier1 = MakeFreshIdentifier();
+  const DepsCache::Identifier identifier2 = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& bh = tmpdir_->FullPath("b.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("b.h",
+      "#include <stdio.h>\n"
+      "piyo");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <math.h>\n"
+      "piyo");
+
+  // First compile for identifier1
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    deps.insert(ah);
+    deps.insert(bh);
+    ASSERT_TRUE(SetDependencies(identifier1, acc, deps, &file_id_cache));
+  }
+
+  // Update a.cc with different directive hash.
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <string.h>\n"
+      "piyopiyo");
+
+  // First compile for identifier2
+  {
+    FileIdCache file_id_cache;
+
+    // mtime might be the same as before (machine too fast).
+    // So, we'd like to update mtime here to improve test stability.
+    FileId file_id = file_id_cache.Get(acc);
+    file_id.mtime += 1;
+    SetFileId(&file_id_cache, acc, file_id);
+
+    std::set<string> deps;
+    deps.insert(ah);
+    ASSERT_TRUE(SetDependencies(identifier2, acc, deps, &file_id_cache));
+  }
+
+  // Here, a.cc was updated after identifer1 compile.
+  // Both directive_hash and file_id were different.
+  {
+    DepsHashId deps_hash_id1;
+    DepsHashId deps_hash_id2;
+    ASSERT_TRUE(GetDepsHashId(identifier1, acc, &deps_hash_id1));
+    ASSERT_TRUE(GetDepsHashId(identifier2, acc, &deps_hash_id2));
+
+    ASSERT_NE(deps_hash_id1.directive_hash, deps_hash_id2.directive_hash);
+    ASSERT_NE(deps_hash_id1.file_id, deps_hash_id2.file_id);
+  }
+
+  // Restart DepsCache.
+  DepsCache::Quit();
+  IncludeCache::Quit();
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  kDepsCacheAliveDuration,
+                  kDepsCacheThreshold,
+                  kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  // Since identifier1 was old, its entry will be garbage-collected.
+  {
+    DepsHashId deps_hash_id1;
+    EXPECT_FALSE(GetDepsHashId(identifier1, ah, &deps_hash_id1));
+    EXPECT_FALSE(GetDepsHashId(identifier1, bh, &deps_hash_id1));
+    EXPECT_FALSE(GetDepsHashId(identifier1, acc, &deps_hash_id1));
+  }
+
+  // FilenameIdTable::Id for 'bh' should be garbage-collected.
+  EXPECT_EQ(FilenameIdTable::kInvalidId, GetFilenameTableId(bh));
+}
+
+TEST_F(DepsCacheTest, RestartWithOldIdentifier) {
+  const DepsCache::Identifier identifier1 = MakeFreshIdentifier();
+  const DepsCache::Identifier identifier2 = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  // First compile for identifier1
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    deps.insert(ah);
+    ASSERT_TRUE(SetDependencies(identifier1, acc, deps, &file_id_cache));
+  }
+  // First compile for identifier2
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    deps.insert(ah);
+    ASSERT_TRUE(SetDependencies(identifier2, acc, deps, &file_id_cache));
+  }
+
+  // Change the last_used_time of identifier2
+  {
+    time_t time_old_enough = 0;
+    ASSERT_TRUE(UpdateLastUsedTime(identifier2, time_old_enough));
+  }
+
+  // Restart DepsCache.
+  DepsCache::Quit();
+  IncludeCache::Quit();
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  kDepsCacheAliveDuration,
+                  kDepsCacheThreshold,
+                  kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  // Since last_used_time of identifier2 was old,
+  // it should be garbage-collected.
+  // identifier1 should alive.
+  {
+    DepsHashId deps_hash_id;
+    EXPECT_TRUE(GetDepsHashId(identifier1, ah, &deps_hash_id));
+    EXPECT_TRUE(GetDepsHashId(identifier1, acc, &deps_hash_id));
+    EXPECT_FALSE(GetDepsHashId(identifier2, ah, &deps_hash_id));
+    EXPECT_FALSE(GetDepsHashId(identifier2, acc, &deps_hash_id));
+  }
+
+  // Restart DepsCache with updating identifier1.
+  DepsCache::Quit();
+  IncludeCache::Quit();
+
+  // Update identifier1 last_used_time to time old enough.
+  {
+    time_t time_old_enough = 0;
+    UpdateIdentifierLastUsedTime(identifier1, time_old_enough);
+  }
+
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  kDepsCacheAliveDuration,
+                  kDepsCacheThreshold,
+                  kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  // All identifiers are garbage-collected.
+  {
+    DepsHashId deps_hash_id;
+    EXPECT_FALSE(GetDepsHashId(identifier1, ah, &deps_hash_id));
+    EXPECT_FALSE(GetDepsHashId(identifier1, acc, &deps_hash_id));
+    EXPECT_FALSE(GetDepsHashId(identifier2, ah, &deps_hash_id));
+    EXPECT_FALSE(GetDepsHashId(identifier2, acc, &deps_hash_id));
+  }
+}
+
+TEST_F(DepsCacheTest, RestartWithOldIdentifierWithNegativeAliveDuration) {
+  // Restart DepsCache with negative alive duration
+  DepsCache::Quit();
+  IncludeCache::Quit();
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  -1, kDepsCacheThreshold, kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  const DepsCache::Identifier identifier1 = MakeFreshIdentifier();
+  const DepsCache::Identifier identifier2 = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  // Add old identifiers.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    deps.insert(ah);
+    ASSERT_TRUE(SetDependencies(identifier1, acc, deps, &file_id_cache));
+    ASSERT_TRUE(SetDependencies(identifier2, acc, deps, &file_id_cache));
+
+    time_t time_old_enough = 0;
+    ASSERT_TRUE(UpdateLastUsedTime(identifier1, time_old_enough));
+    ASSERT_TRUE(UpdateLastUsedTime(identifier2, time_old_enough));
+  }
+
+  // Restart DepsCache with negative alive duration
+  DepsCache::Quit();
+  IncludeCache::Quit();
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  -1, kDepsCacheThreshold, kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  // All identifiers should alive.
+  {
+    DepsHashId deps_hash_id;
+    EXPECT_TRUE(GetDepsHashId(identifier1, ah, &deps_hash_id));
+    EXPECT_TRUE(GetDepsHashId(identifier1, acc, &deps_hash_id));
+    EXPECT_TRUE(GetDepsHashId(identifier2, ah, &deps_hash_id));
+    EXPECT_TRUE(GetDepsHashId(identifier2, acc, &deps_hash_id));
+  }
+}
+
+TEST_F(DepsCacheTest, RestartWithBuiltRevisionUpdate) {
+  const DepsCache::Identifier identifier = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  // First compile.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+    EXPECT_TRUE(deps.empty());
+
+    deps.insert(ah);
+    EXPECT_TRUE(SetDependencies(identifier, acc, deps, &file_id_cache));
+  }
+
+  // Restart DepsCache.
+  DepsCache::Quit();
+  IncludeCache::Quit();
+
+  // Change the built revision of GomaDeps.
+  UpdateGomaBuiltRevision();
+
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  kDepsCacheAliveDuration,
+                  kDepsCacheThreshold,
+                  kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  // All cache will be disposed.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+  }
+}
+
+TEST_F(DepsCacheTest, RestartWithMissingSha256) {
+  const DepsCache::Identifier identifier = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  // First compile.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+    EXPECT_TRUE(deps.empty());
+
+    deps.insert(ah);
+    EXPECT_TRUE(SetDependencies(identifier, acc, deps, &file_id_cache));
+  }
+
+  // Restart DepsCache.
+  DepsCache::Quit();
+  IncludeCache::Quit();
+
+  // Remove .goma_deps.sha256
+  {
+    const string& sha256_deps_path = file::JoinPath(tmpdir_->tmpdir(),
+                                                    ".goma_deps.sha256");
+    ASSERT_EQ(0, remove(sha256_deps_path.c_str()));
+  }
+
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  kDepsCacheAliveDuration,
+                  kDepsCacheThreshold,
+                  kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  // All cache will be disposed.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+  }
+}
+
+TEST_F(DepsCacheTest, RestartWithInvalidSha256) {
+  const DepsCache::Identifier identifier = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  // First compile.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+    EXPECT_TRUE(deps.empty());
+
+    deps.insert(ah);
+    EXPECT_TRUE(SetDependencies(identifier, acc, deps, &file_id_cache));
+  }
+
+  // Restart DepsCache.
+  DepsCache::Quit();
+  IncludeCache::Quit();
+
+  // Convert .goma_deps.sha256 to invalid one
+  {
+    const string& sha256_deps_path = file::JoinPath(tmpdir_->tmpdir(),
+                                                    ".goma_deps.sha256");
+    ASSERT_TRUE(WriteStringToFile("invalid-sha256", sha256_deps_path));
+  }
+
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  kDepsCacheAliveDuration,
+                  kDepsCacheThreshold,
+                  kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  // All cache will be disposed.
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifier, acc, &deps, &file_id_cache));
+  }
+}
+
+TEST_F(DepsCacheTest, RestartWithUpdatedFilesInSomeIdentifier) {
+  // identifier1: a.h, b.h, a.cc
+  // identifier2: a.h, a.cc
+  // "a.h" of identifier2 is latest and updated, but "a.h" of identifier1 is
+  // older, so identidifer1 won't be saved.
+  // In this case, "b.h" won't be included in FilenameIdtable.
+
+  const DepsCache::Identifier identifier1 = MakeFreshIdentifier();
+  const DepsCache::Identifier identifier2 = MakeFreshIdentifier();
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& bh = tmpdir_->FullPath("b.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori-a");
+  tmpdir_->CreateTmpFile("b.h", "kotori-kotori-b");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+  {
+    FileIdCache file_id_cache;
+
+    std::set<string> deps;
+    deps.insert(ah);
+    deps.insert(bh);
+    EXPECT_TRUE(SetDependencies(identifier1, acc, deps, &file_id_cache));
+  }
+
+  tmpdir_->CreateTmpFile("a.h", "#include <string.h>\n");
+
+  {
+    FileIdCache file_id_cache;
+    FileId file_id = file_id_cache.Get(ah);
+    file_id.mtime += 1;  // Ensure it's newer than the previous.
+    SetFileId(&file_id_cache, ah, file_id);
+
+    std::set<string> deps;
+    deps.insert(ah);
+    EXPECT_TRUE(SetDependencies(identifier2, acc, deps, &file_id_cache));
+  }
+
+  // Restart DepsCache.
+  DepsCache::Quit();
+  IncludeCache::Quit();
+
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  kDepsCacheAliveDuration,
+                  kDepsCacheThreshold,
+                  kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+    EXPECT_FALSE(GetDependencies(identifier1, acc, &deps, &file_id_cache));
+  }
+  {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+    EXPECT_TRUE(GetDependencies(identifier2, acc, &deps, &file_id_cache));
+  }
+}
+
+TEST_F(DepsCacheTest, RestartWithLargeNumberIdentifiers) {
+  const int N = 30;
+  ASSERT_GT(N, kDepsCacheThreshold);
+
+  std::vector<DepsCache::Identifier> identifiers(N);
+  for (int i = 0; i < N; ++i) {
+    identifiers[i] = MakeFreshIdentifier();
+  }
+
+  const string& ah = tmpdir_->FullPath("a.h");
+  const string& acc = tmpdir_->FullPath("a.cc");
+
+  tmpdir_->CreateTmpFile("a.h", "kotori");
+  tmpdir_->CreateTmpFile("a.cc",
+      "#include <stdio.h>\n"
+      "piyo");
+
+  for (int i = 0; i < N; ++i) {
+    FileIdCache file_id_cache;
+    std::set<string> deps;
+
+    EXPECT_FALSE(GetDependencies(identifiers[i], acc, &deps, &file_id_cache));
+    EXPECT_TRUE(deps.empty());
+
+    deps.insert(ah);
+    EXPECT_TRUE(SetDependencies(identifiers[i], acc, deps, &file_id_cache));
+  }
+
+  // Restart DepsCache.
+  DepsCache::Quit();
+  IncludeCache::Quit();
+
+  IncludeCache::Init(32, true);
+  DepsCache::Init(file::JoinPath(tmpdir_->tmpdir(), ".goma_deps"),
+                  kDepsCacheAliveDuration,
+                  kDepsCacheThreshold,
+                  kDepsCacheMaxProtoSizeInMB);
+  dc_ = DepsCache::instance();
+
+  EXPECT_EQ(kDepsCacheThreshold, DepsCacheSize());
+}
+
+TEST_F(DepsCacheTest, MakeDepsIdentifierGcc) {
+  const string bare_gcc = "/usr/bin/gcc";
+  const string bare_clang = "/usr/bin/clang";
+
+  DepsCache::Identifier identifier;
+  {
+    std::vector<string> args;
+    args.push_back("gcc");
+    args.push_back("-c");
+    args.push_back("test.c");
+
+    GCCFlags flags(args, "/tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_gcc));
+    identifier = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier.valid());
+  }
+
+  DepsCache::Identifier identifier_compiler;
+  {
+    std::vector<string> args;
+    args.push_back("clang");  // this differs.
+    args.push_back("-c");
+    args.push_back("test.c");
+
+    GCCFlags flags(args, "/tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_clang));
+    identifier_compiler = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_compiler.valid());
+  }
+
+  DepsCache::Identifier identifier_filename;
+  {
+    std::vector<string> args;
+    args.push_back("gcc");
+    args.push_back("-c");
+    args.push_back("test2.c");  // this differs.
+
+    GCCFlags flags(args, "/tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_gcc));
+    identifier_filename = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_filename.valid());
+  }
+
+  DepsCache::Identifier identifier_include;
+  {
+    std::vector<string> args;
+    args.push_back("gcc");
+    args.push_back("-I/include");  // this differs.
+    args.push_back("-c");
+    args.push_back("test.c");
+
+    GCCFlags flags(args, "/tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_gcc));
+    identifier_include = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_include.valid());
+  }
+
+  DepsCache::Identifier identifier_systeminclude;
+  {
+    std::vector<string> args;
+    args.push_back("gcc");
+    args.push_back("-isysteminclude");  // this differs.
+    args.push_back("-c");
+    args.push_back("test.c");
+
+    GCCFlags flags(args, "/tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_gcc));
+    identifier_systeminclude = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_systeminclude.valid());
+  }
+
+  DepsCache::Identifier identifier_macro;
+  {
+    std::vector<string> args;
+    args.push_back("gcc");
+    args.push_back("-DKOTORI");  // this differs.
+    args.push_back("-c");
+    args.push_back("test.c");
+
+    GCCFlags flags(args, "/tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_gcc));
+    identifier_macro = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_macro.valid());
+  }
+
+  DepsCache::Identifier identifier_cwd;
+  {
+    std::vector<string> args;
+    args.push_back("gcc");
+    args.push_back("-c");
+    args.push_back("test.c");
+
+    GCCFlags flags(args, "/tmp2");  // this differs.
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_gcc));
+    identifier_cwd = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_cwd.valid());
+  }
+
+  EXPECT_NE(identifier.value(), identifier_include.value());
+  EXPECT_NE(identifier.value(), identifier_compiler.value());
+  EXPECT_NE(identifier.value(), identifier_filename.value());
+  EXPECT_NE(identifier.value(), identifier_systeminclude.value());
+  EXPECT_NE(identifier.value(), identifier_macro.value());
+  EXPECT_NE(identifier.value(), identifier_cwd.value());
+}
+
+TEST_F(DepsCacheTest, MakeDepsIdentifierVC) {
+  const string bare_cl = "cl.exe";
+
+  DepsCache::Identifier identifier;
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back("test.c");
+
+    VCFlags flags(args, "C:\\tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_cl));
+    identifier = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier.valid());
+  }
+
+  DepsCache::Identifier identifier_filename;
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back("test2.c");  // this differs.
+
+    VCFlags flags(args, "C:\\tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_cl));
+    identifier_filename = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_filename.valid());
+  }
+
+  DepsCache::Identifier identifier_include;
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("-IC:\\include");  // this differs.
+    args.push_back("/c");
+    args.push_back("test.c");
+
+    VCFlags flags(args, "C:\\tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_cl));
+    identifier_include = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_include.valid());
+  }
+
+  DepsCache::Identifier identifier_compiler;
+  {
+    std::vector<string> args;
+    args.push_back("C:\\clang-cl.exe");  // this differs.
+    args.push_back("/c");
+    args.push_back("test.c");
+
+    VCFlags flags(args, "C:\\tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo("C:\\clang-cl.exe"));
+    identifier_compiler = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_compiler.valid());
+  }
+
+  DepsCache::Identifier identifier_macro;
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/DKOTORI");  // this differs.
+    args.push_back("/c");
+    args.push_back("test.c");
+
+    VCFlags flags(args, "C:\\tmp");
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_cl));
+    identifier_macro = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_macro.valid());
+  }
+
+  DepsCache::Identifier identifier_cwd;
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back("test.c");
+
+    VCFlags flags(args, "C:\\tmp2");  // this differs.
+    CompilerInfo info(CreateBarebornCompilerInfo(bare_cl));
+    identifier_cwd = MakeDepsIdentifier(info, flags);
+    EXPECT_TRUE(identifier_cwd.valid());
+  }
+
+  EXPECT_NE(identifier.value(), identifier_filename.value());
+  EXPECT_NE(identifier.value(), identifier_include.value());
+  EXPECT_NE(identifier.value(), identifier_compiler.value());
+  EXPECT_NE(identifier.value(), identifier_macro.value());
+  EXPECT_NE(identifier.value(), identifier_cwd.value());
+}
+
+TEST_F(DepsCacheTest, MakeDepsIdentifierJavac) {
+  // TODO: Currently DepsCache for java is disabled.
+  // Invalid RequiredFilesIdentifier is always returned.
+
+  std::vector<string> args;
+  args.push_back("javac");
+  args.push_back("Test.java");
+
+  JavacFlags flags(args, "/tmp");
+  std::unique_ptr<CompilerInfoData> cid(new CompilerInfoData);
+  cid->set_found(true);
+  CompilerInfo info(std::move(cid));
+  DepsCache::Identifier identifier = MakeDepsIdentifier(info, flags);
+  EXPECT_FALSE(identifier.valid());
+}
+
+}  // namespace devtools_goma
diff --git a/client/descriptor.h b/client/descriptor.h
new file mode 100644
index 0000000..a7c8494
--- /dev/null
+++ b/client/descriptor.h
@@ -0,0 +1,75 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_DESCRIPTOR_H_
+#define DEVTOOLS_GOMA_CLIENT_DESCRIPTOR_H_
+
+#include <memory>
+#include <string>
+
+#ifndef _WIN32
+#include <unistd.h>
+#else
+#include "config_win.h"
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+class OneshotClosure;
+class PermanentClosure;
+class SocketDescriptor;
+
+// Descriptor must be used on the same thread where it is created.
+// All notification closure will be called on the same thread.
+class Descriptor {
+ public:
+  // closure must be created by NewPermanentCallback.
+  // it takes ownership of closure.
+  // must not call this in notification closure itself.
+  virtual void NotifyWhenReadable(
+      std::unique_ptr<PermanentClosure> closure) = 0;
+  virtual void NotifyWhenWritable(
+      std::unique_ptr<PermanentClosure> closure) = 0;
+  virtual void ClearWritable() = 0;
+  // closure must be created by NewCallback, that is, one shot closure.
+  // must not call this in notification closure itself.
+  virtual void NotifyWhenTimedout(double timeout,
+                                  OneshotClosure* closure) = 0;
+  virtual void ChangeTimeout(double timeout) = 0;
+
+  // Read/Write returns following values:
+  //  < 0: I/O error including retriable error.
+  //       (A caller should retry Read/Write if NeedRetry is true)
+  //  = 0: a connection is closed by a peer.
+  //  > 0: number of bytes read/written.
+  virtual ssize_t Read(void* ptr, size_t len) = 0;
+  virtual ssize_t Write(const void* ptr, size_t len) = 0;
+  // NeedRetry is true when previous Read or Write is failed but
+  // a caller should retry Read or Write.
+  virtual bool NeedRetry() const = 0;
+  // CanReuse returns true if underlying socket can be reused.
+  virtual bool CanReuse() const = 0;
+  virtual string GetLastErrorMessage() const = 0;
+
+  // stop more notification.
+  // you can call this in notification closure.
+  virtual void StopRead() = 0;
+  virtual void StopWrite() = 0;
+
+  virtual SocketDescriptor* socket_descriptor() = 0;
+
+ protected:
+  Descriptor() {}
+  virtual ~Descriptor() {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Descriptor);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_DESCRIPTOR_H_
diff --git a/client/descriptor_poller.cc b/client/descriptor_poller.cc
new file mode 100644
index 0000000..e389df2
--- /dev/null
+++ b/client/descriptor_poller.cc
@@ -0,0 +1,156 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "descriptor_poller.h"
+
+#include "autolock_timer.h"
+#include "socket_descriptor.h"
+#include "glog/logging.h"
+#include "simple_timer.h"
+
+namespace devtools_goma {
+
+DescriptorPollerBase::DescriptorPollerBase(SocketDescriptor* poll_breaker,
+                                           ScopedSocket&& poll_signaler)
+    : poll_thread_(0) {
+  CHECK(poll_breaker);
+  CHECK(poll_signaler.valid());
+  poll_breaker_.reset(poll_breaker);
+  poll_signaler_ = std::move(poll_signaler);
+}
+
+bool DescriptorPollerBase::PollEvents(
+    const DescriptorMap& descriptors,
+    int timeout_millisec,
+    int priority,
+    CallbackQueue* callbacks,
+    Lock* lock, AutoLockStat** statp) EXCLUSIVE_LOCKS_REQUIRED(lock) {
+  CHECK(lock);
+  CHECK(statp);
+  if (!poll_thread_) {
+    poll_thread_ = GetCurrentThreadId();
+  }
+  CHECK(THREAD_ID_IS_SELF(poll_thread_));
+
+  PreparePollEvents(descriptors);
+  int num_descriptors = descriptors.size() + 1;
+
+  SimpleTimer timer(SimpleTimer::NO_START);
+  if (*statp != nullptr) {
+    timer.Start();
+  }
+  lock->Release();
+  if (*statp != nullptr) {
+    (*statp)->UpdateWaitTime(timer.GetInNanoSeconds());
+    timer.Start();
+  }
+  VLOG(3) << "poll on " << num_descriptors << " fds";
+  int r = PollEventsInternal(timeout_millisec);
+  VLOG(3) << "poll -> " << r;
+  lock->Acquire();
+  if (*statp != nullptr) {
+    (*statp)->UpdateHoldTime(timer.GetInNanoSeconds());
+  }
+  if (r == 0) {
+    // timed-out
+    VLOG(3) << "poll timed out";
+    std::unique_ptr<EventEnumerator> enumerator(
+        GetEventEnumerator(descriptors));
+    SocketDescriptor* d = nullptr;
+    while ((d = enumerator->Next()) != nullptr) {
+      CHECK(d);
+      if (d->fd() < 0) {
+        VLOG(1) << "closed? " << d;
+        continue;
+      }
+      if (d->fd() == poll_breaker_->fd()) {
+        continue;
+      }
+      if (d->priority() <= priority) {
+        continue;
+      }
+      if (d->wait_readable() || d->wait_writable()) {
+        OneshotClosure* closure = d->GetTimeoutClosure();
+        VLOG(2) << "fd " << d->fd() << " poll timeout "
+          << timeout_millisec << " msec"
+          << " " << closure;
+        if (closure) {
+          (*callbacks)[d->priority()].push_back(closure);
+        }
+      }
+    }
+    return true;
+  }
+  if (r == -1) {
+    if (errno != EINTR)
+      PLOG(WARNING) << "poll failed with " << errno;
+    return true;
+  }
+
+  bool poll_break = false;
+  std::unique_ptr<EventEnumerator> enumerator(GetEventEnumerator(descriptors));
+  SocketDescriptor* d = nullptr;
+  while ((d = enumerator->Next()) != nullptr) {
+    CHECK(d);
+    if (d->fd() < 0) {
+      VLOG(1) << "closed? " << d;
+      continue;
+    }
+
+    if (d->fd() == poll_breaker_->fd()) {
+      if (enumerator->IsReadable()) {
+        // This is signalling from RunClosure() or sigchld.
+        char buf[256];
+        int n = poll_breaker_->Read(buf, sizeof(buf));
+        PLOG_IF(WARNING, n < 0) << "poll breaker n=" << n;
+        poll_break = true;
+      }
+      continue;
+    }
+    if (d->priority() <= priority) {
+      continue;
+    }
+
+    bool idle = true;
+    if (enumerator->IsReadable()) {
+      OneshotClosure* closure = d->GetReadableClosure();
+      VLOG(2) << "fd " << d->fd() << " readable "
+        << WorkerThreadManager::Priority_Name(d->priority())
+        << " " << closure;
+      if (closure) {
+        (*callbacks)[d->priority()].push_back(closure);
+        idle = false;
+      }
+    }
+    if (enumerator->IsWritable()) {
+      OneshotClosure* closure = d->GetWritableClosure();
+      VLOG(2) << "fd " << d->fd() << " writable "
+        << WorkerThreadManager::Priority_Name(d->priority())
+        << " " << closure;
+      if (closure) {
+        (*callbacks)[d->priority()].push_back(closure);
+        idle = false;
+      }
+    }
+    if (idle) {
+      OneshotClosure* closure = d->GetTimeoutClosure();
+      VLOG(2) << "fd " << d->fd() << " idle "
+        << WorkerThreadManager::Priority_Name(d->priority())
+        << " " << closure;
+      if (closure)
+        (*callbacks)[d->priority()].push_back(closure);
+    }
+  }
+  return poll_break;
+}
+
+void DescriptorPollerBase::Signal() {
+  int r = poll_signaler_.Write("", 1);
+  LOG_IF(WARNING, r <= 0)
+      << "poll signal r=" << r
+      << " msg="<< poll_signaler_.GetLastErrorMessage();
+}
+
+}  // namespace devtools_goma
diff --git a/client/descriptor_poller.h b/client/descriptor_poller.h
new file mode 100644
index 0000000..7480701
--- /dev/null
+++ b/client/descriptor_poller.h
@@ -0,0 +1,122 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_DESCRIPTOR_POLLER_H_
+#define DEVTOOLS_GOMA_CLIENT_DESCRIPTOR_POLLER_H_
+
+#include <deque>
+#include <map>
+#include <memory>
+
+#include "scoped_fd.h"
+#include "worker_thread_manager.h"
+
+namespace devtools_goma {
+
+class AutoLockStat;
+class SocketDescriptor;
+
+class DescriptorPoller {
+ public:
+  enum EventType { kReadEvent, kWriteEvent };
+
+  typedef std::map<WorkerThreadManager::Priority, std::deque<OneshotClosure*>>
+      CallbackQueue;
+  typedef std::map<int, SocketDescriptor*> DescriptorMap;
+
+  // Creates a new DescriptorPoller instance.
+  // |poll_breaker| is a special Descriptor that has no callbacks and is
+  // only used to break the PollEvents.  Its ownership is transferred
+  // to the poller.
+  // poll_signaler should not be SocketDescriptor because it will be used
+  // on other thread than the thread for the DescriptorPoller.
+  static DescriptorPoller* NewDescriptorPoller(
+      SocketDescriptor* poll_breaker, ScopedSocket&& poll_signaler);
+  DescriptorPoller() {}
+  virtual ~DescriptorPoller() {}
+
+  // Registers and unregister polling event for a given descriptor.
+  // They may be called on a different thread (with lock) from the one
+  // polling events.
+  virtual void RegisterPollEvent(SocketDescriptor* d, EventType) = 0;
+  virtual void UnregisterPollEvent(SocketDescriptor* d, EventType) = 0;
+  virtual void RegisterTimeoutEvent(SocketDescriptor* d) = 0;
+  virtual void UnregisterTimeoutEvent(SocketDescriptor* d) = 0;
+  virtual void UnregisterDescriptor(SocketDescriptor* d) = 0;
+
+  // Blocking; polls events over descriptors at most |timeout_millsec| and
+  // populates |callbacks| if any descriptors which has higher priority
+  // than |priority| had any events.
+  // This must be called with |lock| locked and on a single polling thread.
+  // Returns true if poll breakers broke poller.
+  virtual bool PollEvents(const DescriptorMap& descriptors,
+                          int timeout_millisec,
+                          int priority,
+                          CallbackQueue* callbacks,
+                          Lock* lock,
+                          AutoLockStat** statp) = 0;
+
+  virtual void Signal() = 0;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DescriptorPoller);
+};
+
+class DescriptorPollerBase : public DescriptorPoller {
+ public:
+  DescriptorPollerBase(SocketDescriptor* poll_breaker,
+                       ScopedSocket&& poll_signaler);
+  ~DescriptorPollerBase() override {}
+
+  class EventEnumerator {
+   public:
+    // Returns the next descriptor on which events have occured.
+    // Returns NULL if there're no more descriptors.
+    virtual SocketDescriptor* Next() = 0;
+
+    // Returns the current descriptor's information.
+    virtual bool IsReadable() const = 0;
+    virtual bool IsWritable() const = 0;
+
+    virtual ~EventEnumerator() {}
+  };
+
+  // Returns true if idle.
+  bool PollEvents(const DescriptorMap& descriptors,
+                  int timeout_millisec,
+                  int priority,
+                  CallbackQueue* callbacks,
+                  Lock* lock,
+                  AutoLockStat** statp) override;
+  void Signal() override;
+
+ protected:
+  // Called right before PollEventsInternal; with lock held.
+  // Scans descriptors or examines registered descriptors to determine
+  // which descriptors to be polled.
+  virtual void PreparePollEvents(const DescriptorMap& descriptors) = 0;
+
+  // Does actual polling.  Returns the number of file descriptors ready
+  // for the requested I/O, zero if it has timed out, or -1 on failure.
+  virtual int PollEventsInternal(int timeout_millisec) = 0;
+
+  // Called right after PollEventsInternal; with lock held.
+  // Returns EventEnumerator with which caller can iterate over descriptors
+  // that have had any events.
+  virtual EventEnumerator* GetEventEnumerator(
+      const DescriptorMap& descriptors) = 0;
+
+  SocketDescriptor* poll_breaker() const { return poll_breaker_.get(); }
+
+ private:
+  std::unique_ptr<SocketDescriptor> poll_breaker_;
+  ScopedSocket poll_signaler_;
+  WorkerThreadManager::ThreadId poll_thread_;
+  DISALLOW_COPY_AND_ASSIGN(DescriptorPollerBase);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_DESCRIPTOR_POLLER_H_
diff --git a/client/descriptor_poller_epoll.cc b/client/descriptor_poller_epoll.cc
new file mode 100644
index 0000000..8eb0f85
--- /dev/null
+++ b/client/descriptor_poller_epoll.cc
@@ -0,0 +1,180 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifdef USE_EPOLL
+
+#include <memory>
+
+#include "descriptor_poller.h"
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
+# error kernel is too old to use epoll. Try "make USE_SELECT=1".
+#endif
+#include <sys/epoll.h>
+#define EPOLL_SIZE_HINT FD_SETSIZE  // Any value but not 0 should be ok.
+
+#include "compiler_specific.h"
+#include "socket_descriptor.h"
+#include "glog/logging.h"
+#include "scoped_fd.h"
+#include "unordered.h"
+
+namespace devtools_goma {
+
+class EpollDescriptorPoller : public DescriptorPollerBase {
+ public:
+  EpollDescriptorPoller(SocketDescriptor* poll_breaker,
+                        ScopedSocket&& poll_signaler)
+      : DescriptorPollerBase(poll_breaker, std::move(poll_signaler)),
+        epoll_fd_(-1),
+        nevents_(0),
+        last_nevents_(0) {
+    epoll_fd_.reset(epoll_create(EPOLL_SIZE_HINT));
+    CHECK(epoll_fd_.valid());
+    CHECK(poll_breaker);
+    struct epoll_event ev = {};
+    ev.events = EPOLLIN;
+    ev.data.ptr = poll_breaker;
+    PCHECK(epoll_ctl(
+        epoll_fd_.fd(), EPOLL_CTL_ADD, poll_breaker->fd(), &ev) != -1);
+  }
+
+  void RegisterPollEvent(SocketDescriptor* d, EventType type) override {
+    DCHECK(d->wait_writable() || d->wait_readable());
+    struct epoll_event ev = {};
+    ev.data.ptr = d;
+    if (type == kReadEvent || d->wait_readable()) {
+      DCHECK(d->wait_readable());
+      ev.events |= EPOLLIN;
+    }
+    if (type == kWriteEvent || d->wait_writable()) {
+      DCHECK(d->wait_writable());
+      ev.events |= EPOLLOUT;
+    }
+    int r = epoll_ctl(epoll_fd_.fd(), EPOLL_CTL_ADD, d->fd(), &ev);
+    if (r < 0 && errno == EEXIST) {
+      r = epoll_ctl(epoll_fd_.fd(), EPOLL_CTL_MOD, d->fd(), &ev);
+    }
+    PCHECK(r != -1) << "Cannot add fd for epoll:" << d->fd();
+  }
+
+  void UnregisterPollEvent(SocketDescriptor* d,
+                           EventType type ALLOW_UNUSED) override {
+    struct epoll_event ev = {};
+    ev.data.ptr = d;
+    int op = EPOLL_CTL_DEL;
+    if (d->wait_readable()) {
+      ev.events |= EPOLLIN;
+      op = EPOLL_CTL_MOD;
+    }
+    if (d->wait_writable()) {
+      ev.events |= EPOLLOUT;
+      op = EPOLL_CTL_MOD;
+    }
+    PCHECK(epoll_ctl(epoll_fd_.fd(), op, d->fd(), &ev) != -1)
+        << "Cannot delete fd for epoll:" << d->fd();
+  }
+
+  void RegisterTimeoutEvent(SocketDescriptor* d) override {
+    timeout_waiters_.insert(d);
+  }
+
+  void UnregisterTimeoutEvent(SocketDescriptor* d) override {
+    timeout_waiters_.erase(d);
+  }
+
+  void UnregisterDescriptor(SocketDescriptor* d) override {
+    CHECK(d);
+    timeout_waiters_.erase(d);
+    int r = epoll_ctl(epoll_fd_.fd(), EPOLL_CTL_DEL, d->fd(), nullptr);
+    PCHECK(r != -1 || errno == ENOENT)
+        << "Cannot delete fd for epoll:" << d->fd();
+  }
+
+ protected:
+  void PreparePollEvents(const DescriptorMap& descriptors) override {
+    nevents_ = descriptors.size() + 1;
+    if (last_nevents_ < nevents_) {
+      events_.reset(new struct epoll_event[nevents_]);
+    }
+    last_nevents_ = nevents_;
+  }
+
+  int PollEventsInternal(int timeout_millisec) override {
+    nfds_ = epoll_wait(epoll_fd_.fd(), events_.get(),
+                       nevents_, timeout_millisec);
+    return nfds_;
+  }
+
+  class EpollEventEnumerator : public DescriptorPollerBase::EventEnumerator {
+   public:
+    explicit EpollEventEnumerator(EpollDescriptorPoller* poller)
+        : poller_(poller), idx_(0), current_ev_(nullptr) {
+      CHECK(poller_);
+      timedout_iter_ = poller_->timeout_waiters_.begin();
+    }
+
+    SocketDescriptor* Next() override {
+      // Iterates over fired events.
+      if (idx_ < poller_->nfds_) {
+        current_ev_ = &poller_->events_.get()[idx_++];
+        SocketDescriptor* d = static_cast<SocketDescriptor*>(
+            current_ev_->data.ptr);
+        event_received_.insert(d);
+        return d;
+      }
+      current_ev_ = nullptr;
+      // Then iterates over timed out ones.
+      for (; timedout_iter_ != poller_->timeout_waiters_.end();
+        ++timedout_iter_) {
+          if (event_received_.find(*timedout_iter_) == event_received_.end())
+            return *timedout_iter_++;
+      }
+      return nullptr;
+    }
+
+    bool IsReadable() const override {
+      return current_ev_ && (current_ev_->events & EPOLLIN);
+    }
+    bool IsWritable() const override {
+      return current_ev_ && (current_ev_->events & EPOLLOUT);
+    }
+
+  private:
+    EpollDescriptorPoller* poller_;
+    int idx_;
+    struct epoll_event* current_ev_;
+    unordered_set<SocketDescriptor*>::const_iterator timedout_iter_;
+    unordered_set<SocketDescriptor*> event_received_;
+
+    DISALLOW_COPY_AND_ASSIGN(EpollEventEnumerator);
+  };
+
+  EventEnumerator* GetEventEnumerator(
+      const DescriptorMap& descriptors ALLOW_UNUSED) override {
+    return new EpollEventEnumerator(this);
+  }
+
+ private:
+  friend class EpollEventEnumerator;
+  ScopedFd epoll_fd_;
+  std::unique_ptr<struct epoll_event[]> events_;
+  unordered_set<SocketDescriptor*> timeout_waiters_;
+  int nevents_;
+  int last_nevents_;
+  int nfds_;
+  DISALLOW_COPY_AND_ASSIGN(EpollDescriptorPoller);
+};
+
+// static
+DescriptorPoller* DescriptorPoller::NewDescriptorPoller(
+    SocketDescriptor* breaker, ScopedSocket&& signaler) {
+  return new EpollDescriptorPoller(breaker, std::move(signaler));
+}
+
+}  // namespace devtools_goma
+
+#endif  // USE_EPOLL
diff --git a/client/descriptor_poller_kqueue.cc b/client/descriptor_poller_kqueue.cc
new file mode 100644
index 0000000..a300ca0
--- /dev/null
+++ b/client/descriptor_poller_kqueue.cc
@@ -0,0 +1,181 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifdef USE_KQUEUE
+
+#include "descriptor_poller.h"
+
+#include <sys/event.h>
+#include <sys/time.h>
+
+#include "socket_descriptor.h"
+#include "glog/logging.h"
+#include "scoped_fd.h"
+#include "unordered.h"
+
+namespace devtools_goma {
+
+class KqueueDescriptorPoller : public DescriptorPollerBase {
+ public:
+  KqueueDescriptorPoller(SocketDescriptor* poll_breaker,
+                         ScopedSocket&& poll_signaler)
+      : DescriptorPollerBase(poll_breaker, std::move(poll_signaler)),
+        kqueue_fd_(-1),
+        nevents_(0) {
+    kqueue_fd_.reset(kqueue());
+    CHECK(kqueue_fd_.valid());
+    CHECK(poll_breaker);
+    struct kevent kev;
+    EV_SET(&kev, poll_breaker->fd(), EVFILT_READ, EV_ADD, 0, 0, nullptr);
+    PCHECK(kevent(kqueue_fd_.fd(), &kev, 1, nullptr, 0, nullptr) != -1)
+        << "Cannot add fd for kqueue:" << poll_breaker->fd();
+  }
+
+  void RegisterPollEvent(SocketDescriptor* d, EventType type) override {
+    DCHECK(d->wait_writable() || d->wait_readable());
+    struct kevent kev;
+    short filter = 0;
+    if (type == kReadEvent) {
+      DCHECK(d->wait_readable());
+      filter = EVFILT_READ;
+    } else if (type == kWriteEvent) {
+      DCHECK(d->wait_writable());
+      filter = EVFILT_WRITE;
+    }
+    DCHECK(filter);
+    EV_SET(&kev, d->fd(), filter, EV_ADD, 0, 0, nullptr);
+    PCHECK(kevent(kqueue_fd_.fd(), &kev, 1, nullptr, 0, nullptr) != -1)
+        << "Cannot add fd for kqueue:" << poll_breaker()->fd();
+  }
+
+  void UnregisterPollEvent(SocketDescriptor* d, EventType type) override {
+    struct kevent kev;
+    short filter = (type == kReadEvent) ? EVFILT_READ : EVFILT_WRITE;
+    EV_SET(&kev, d->fd(), filter, EV_DELETE, 0, 0, nullptr);
+    int r = kevent(kqueue_fd_.fd(), &kev, 1, nullptr, 0, nullptr);
+    PCHECK(r != -1 || errno == ENOENT)
+        << "Cannot delete fd from kqueue:" << d->fd();
+  }
+
+  void RegisterTimeoutEvent(SocketDescriptor* d) override {
+    timeout_waiters_.insert(d);
+  }
+
+  void UnregisterTimeoutEvent(SocketDescriptor* d) override {
+    timeout_waiters_.erase(d);
+  }
+
+  void UnregisterDescriptor(SocketDescriptor* d) override {
+    CHECK(d);
+    timeout_waiters_.erase(d);
+
+    struct kevent kev;
+    EV_SET(&kev, d->fd(), EVFILT_READ, EV_DELETE, 0, 0, nullptr);
+    int r = kevent(kqueue_fd_.fd(), &kev, 1, nullptr, 0, nullptr);
+    PCHECK(r != -1 || errno == ENOENT)
+        << "Cannot delete fd from kqueue:" << d->fd();
+
+    EV_SET(&kev, d->fd(), EVFILT_WRITE, EV_DELETE, 0, 0, nullptr);
+    r = kevent(kqueue_fd_.fd(), &kev, 1, nullptr, 0, nullptr);
+    PCHECK(r != -1 || errno == ENOENT)
+        << "Cannot delete fd from kqueue:" << d->fd();
+  }
+
+ protected:
+  void PreparePollEvents(const DescriptorMap& descriptors) override {
+    eventlist_.resize(descriptors.size() + 1);
+  }
+
+  int PollEventsInternal(int timeout_millisec) override {
+    struct timespec tv;
+    tv.tv_sec = timeout_millisec / 1000;
+    tv.tv_nsec = (timeout_millisec - (tv.tv_sec * 1000)) * 1000000;
+    nevents_ = kevent(kqueue_fd_.fd(), nullptr, 0,
+        &eventlist_[0], eventlist_.size(), &tv);
+    return nevents_;
+  }
+
+  class KqueueEventEnumerator : public DescriptorPollerBase::EventEnumerator {
+   public:
+    KqueueEventEnumerator(KqueueDescriptorPoller* poller,
+                          const DescriptorMap& descriptors)
+      : poller_(poller),
+        descriptors_(descriptors),
+        idx_(0),
+        current_ev_(nullptr) {
+      CHECK(poller_);
+      timedout_iter_ = poller_->timeout_waiters_.begin();
+    }
+
+    SocketDescriptor* Next() override {
+      // Iterates over fired events.
+      if (idx_ < poller_->nevents_) {
+        current_ev_ = &poller_->eventlist_[idx_++];
+        PCHECK(!(current_ev_->flags & EV_ERROR));
+        SocketDescriptor* d = nullptr;
+        if (static_cast<int>(current_ev_->ident) ==
+            poller_->poll_breaker()->fd()) {
+          d = poller_->poll_breaker();
+        } else {
+          DescriptorMap::const_iterator iter = descriptors_.find(
+              current_ev_->ident);
+          CHECK(iter != descriptors_.end());
+          d = iter->second;
+        }
+        event_received_.insert(d);
+        return d;
+      }
+      current_ev_ = nullptr;
+      // Then iterates over timed out ones.
+      for (; timedout_iter_ != poller_->timeout_waiters_.end();
+           ++timedout_iter_) {
+        if (event_received_.find(*timedout_iter_) == event_received_.end())
+          return *timedout_iter_++;
+      }
+      return nullptr;
+    }
+
+    bool IsReadable() const override {
+      return current_ev_ && (current_ev_->filter == EVFILT_READ);
+    }
+    bool IsWritable() const override {
+      return current_ev_ && (current_ev_->filter == EVFILT_WRITE);
+    }
+
+  private:
+    KqueueDescriptorPoller* poller_;
+    const DescriptorMap& descriptors_;
+    int idx_;
+    struct kevent* current_ev_;
+    unordered_set<SocketDescriptor*>::const_iterator timedout_iter_;
+    unordered_set<SocketDescriptor*> event_received_;
+
+    DISALLOW_COPY_AND_ASSIGN(KqueueEventEnumerator);
+  };
+
+  EventEnumerator* GetEventEnumerator(
+      const DescriptorMap& descriptors) override {
+    DCHECK(nevents_ <= static_cast<int>(eventlist_.size()));
+    return new KqueueEventEnumerator(this, descriptors);
+  }
+
+ private:
+  friend class KqueueEventEnumerator;
+  ScopedFd kqueue_fd_;
+  std::vector<struct kevent> eventlist_;
+  unordered_set<SocketDescriptor*> timeout_waiters_;
+  int nevents_;
+  DISALLOW_COPY_AND_ASSIGN(KqueueDescriptorPoller);
+};
+
+// static
+DescriptorPoller* DescriptorPoller::NewDescriptorPoller(
+    SocketDescriptor* breaker, ScopedSocket&& signaler) {
+  return new KqueueDescriptorPoller(breaker, std::move(signaler));
+}
+
+}  // namespace devtools_goma
+
+#endif  // USE_KQUEUE
diff --git a/client/descriptor_poller_select.cc b/client/descriptor_poller_select.cc
new file mode 100644
index 0000000..984aa98
--- /dev/null
+++ b/client/descriptor_poller_select.cc
@@ -0,0 +1,189 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#if !USE_EPOLL && !USE_KQUEUE
+
+#include "descriptor_poller.h"
+
+#include <algorithm>
+#include <vector>
+
+#ifndef _WIN32
+# include <limits.h>
+# include <signal.h>
+# include <sys/ioctl.h>
+# include <sys/socket.h>
+# include <sys/wait.h>
+#else
+# include "socket_helper_win.h"
+#endif
+
+#include "socket_descriptor.h"
+#include "glog/logging.h"
+
+namespace devtools_goma {
+
+class SelectDescriptorPoller : public DescriptorPollerBase {
+ public:
+  SelectDescriptorPoller(SocketDescriptor* poll_breaker,
+                         ScopedSocket&& poll_signaler)
+      : DescriptorPollerBase(poll_breaker, std::move(poll_signaler)),
+        max_fd_(-1) {
+    // Socket number ranges from 1 to 32767 on Windows, where the FD_SETSIZE is
+    // 64. There's no guarantee on Windows that the value of socket fd is
+    // smaller than FD_SETSIZE.
+#ifndef _WIN32
+    CHECK_LT(poll_breaker->fd(), FD_SETSIZE);
+#endif
+  }
+
+  // No-op. We register polling descriptors in PreparePollEvents.
+  void RegisterPollEvent(SocketDescriptor*, EventType) override {}
+  void UnregisterPollEvent(SocketDescriptor*, EventType) override {}
+  void RegisterTimeoutEvent(SocketDescriptor*) override {}
+  void UnregisterTimeoutEvent(SocketDescriptor*) override {}
+  void UnregisterDescriptor(SocketDescriptor*) override {}
+
+ protected:
+  void PreparePollEvents(const DescriptorMap& descriptors) override {
+    FD_ZERO(&read_fd_);
+    FD_ZERO(&write_fd_);
+
+    max_fd_ = poll_breaker()->fd();
+    int fd = poll_breaker()->fd();
+
+    std::vector<SocketDescriptor*> waiting_descriptors;
+    for (const auto& iter : descriptors) {
+      SocketDescriptor* d = iter.second;
+      fd = d->fd();
+      if (fd < 0) {
+        VLOG(1) << "closed? " << d;
+        continue;
+      }
+      if (!d->wait_readable() && !d->wait_writable()) {
+        VLOG(1) << "not waiting? " << fd << " " << d;
+        continue;
+      }
+      waiting_descriptors.push_back(d);
+    }
+
+#ifdef _WIN32
+    // FD_SETSIZE is very small (64) on Windows.
+    // Following is a workaround. i.e. randomly drops descriptors.
+    int number_of_fd = 1;
+    if (waiting_descriptors.size() >= FD_SETSIZE) {
+      std::random_shuffle(waiting_descriptors.begin(),
+                          waiting_descriptors.end());
+      LOG(INFO) << "#waiting_descriptors is larger than FD_SETSIZE."
+                << " #descriptors=" << descriptors.size()
+                << " #waiting_descriptors=" << waiting_descriptors.size()
+                << " FD_SETSIZE=" << FD_SETSIZE;
+    }
+#endif
+    MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+    FD_SET(fd, &read_fd_);
+    MSVC_POP_WARNING();
+
+    for (const auto* d : waiting_descriptors) {
+      fd = d->fd();
+      bool wait_readable = d->wait_readable();
+      bool wait_writable = d->wait_writable();
+      CHECK(wait_readable || wait_writable);
+#ifndef _WIN32
+      CHECK_LT(fd, FD_SETSIZE);
+#else
+      number_of_fd++;
+      if (number_of_fd >= FD_SETSIZE) {
+        break;
+      }
+#endif
+      if (wait_readable) {
+        if (fd > max_fd_) max_fd_ = fd;
+        MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+        FD_SET(fd, &read_fd_);
+        MSVC_POP_WARNING();
+      }
+      if (wait_writable) {
+        if (fd > max_fd_) max_fd_ = fd;
+        MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+        FD_SET(fd, &write_fd_);
+        MSVC_POP_WARNING();
+      }
+    }
+  }
+
+  int PollEventsInternal(int timeout_millisec) override {
+    struct timeval tv;
+    tv.tv_sec = timeout_millisec / 1000;
+    tv.tv_usec = (timeout_millisec - (tv.tv_sec * 1000)) * 1000;
+    return select(max_fd_ + 1, &read_fd_, &write_fd_, nullptr, &tv);
+  }
+
+  class SelectEventEnumerator : public DescriptorPollerBase::EventEnumerator {
+   public:
+    SelectEventEnumerator(SelectDescriptorPoller* poller,
+                          const DescriptorMap& descriptors)
+        : poller_(poller),
+          descriptors_(descriptors),
+          iter_(descriptors_.begin()),
+          current_fd_(-1) {
+      DCHECK(poller);
+    }
+
+    SocketDescriptor* Next() override {
+      // Iterates over descriptors.
+      if (iter_ != descriptors_.end()) {
+        SocketDescriptor* d = iter_->second;
+        current_fd_ = d->fd();
+        ++iter_;
+        return d;
+      }
+      // Then returns poll_breaker.
+      if (current_fd_ != poller_->poll_breaker()->fd()) {
+        SocketDescriptor* d = poller_->poll_breaker();
+        current_fd_ = d->fd();
+        return d;
+      }
+      return nullptr;
+    }
+
+    bool IsReadable() const override {
+      return FD_ISSET(current_fd_, &poller_->read_fd_) != 0;
+    }
+    bool IsWritable() const override {
+      return FD_ISSET(current_fd_, &poller_->write_fd_) != 0;
+    }
+
+   private:
+    SelectDescriptorPoller* poller_;
+    const DescriptorMap& descriptors_;
+    DescriptorMap::const_iterator iter_;
+    int current_fd_;
+
+    DISALLOW_COPY_AND_ASSIGN(SelectEventEnumerator);
+  };
+
+  EventEnumerator* GetEventEnumerator(
+      const DescriptorMap& descriptors) override {
+    return new SelectEventEnumerator(this, descriptors);
+  }
+
+ private:
+  friend class SelectEventEnumerator;
+  fd_set read_fd_;
+  fd_set write_fd_;
+  int max_fd_;
+  DISALLOW_COPY_AND_ASSIGN(SelectDescriptorPoller);
+};
+
+// static
+DescriptorPoller* DescriptorPoller::NewDescriptorPoller(
+    SocketDescriptor* breaker, ScopedSocket&& signaler) {
+  return new SelectDescriptorPoller(breaker, std::move(signaler));
+}
+
+}  // namespace devtools_goma
+
+#endif
diff --git a/client/diagnose_goma_log.py b/client/diagnose_goma_log.py
new file mode 100755
index 0000000..7cc88d2
--- /dev/null
+++ b/client/diagnose_goma_log.py
@@ -0,0 +1,784 @@
+#!/usr/bin/env python
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""A tool to diagnose goma log file.
+
+Usage)
+  $ ./diagnose_goma_log.py
+    - diagnose the latest goma logs
+
+  $ ./diagnose_goma_log.py compiler_proxy.INFO
+    - diagnose "compiler_proxy.INFO" file as goma log.
+
+It shows
+ - initial logs such as
+      goma built revision
+      goma flags
+      goma version
+ - Counts of each ReplyResponse
+ - stats of duration of Task
+ - Task log of ReplyResponse fail fallback
+ - Error lines
+ - Warning lines
+"""
+
+
+
+
+import datetime
+import glob
+import gzip
+import json
+import logging
+import optparse
+import os
+import re
+import sys
+
+
+LOGLINE_RE = re.compile(
+    '^([IWEF])(\\d{4} \\d{2}:\\d{2}:\\d{2}).(\\d{6})  *(.*)')
+
+
+class TaskLog(object):
+  """Log instances of a compile task."""
+
+  def __init__(self, taskId, desc, start_time):
+    self.id = taskId
+    self.desc = desc
+    self.compile_type = 'compile'
+    if desc.startswith('precompiling '):
+      self.compile_type = 'precompile'
+    elif desc.startswith('linking '):
+      self.compile_type = 'link'
+    self.start_time = start_time
+    self.end_time = start_time
+    self.response = None
+    self.loglines = []
+
+  def Duration(self):
+    """Task's duration.
+
+    Returns:
+      Task's duration in datetime.timedelta
+    """
+    return self.end_time - self.start_time
+
+
+class LogLine(object):
+  """A log instance."""
+
+  def __init__(self, loglevel, time_str, micro_str, logtext):
+    self.loglevel = loglevel
+    self.logtext = logtext
+    self._time_str = time_str
+    self._micro_str = micro_str
+    self._logtime = None  # invalid.
+
+  def __str__(self):
+    return '%s %s.%s %s' % (self.loglevel, self._time_str, self._micro_str,
+                            self.logtext)
+  @property
+  def logtime(self):
+    """Returns an instance of datetime.datetime when the log line is created."""
+    if self._logtime:
+      return self._logtime
+
+    if not self._time_str or not self._micro_str:
+      return None
+
+    now = datetime.datetime.now()
+
+    # strptime won't accept "0229" if year is not provided.
+    # c.f. http://bugs.python.org/issue26460
+    lt = datetime.datetime.strptime(str(now.year) + self._time_str,
+                                    '%Y%m%d %H:%M:%S')
+    if lt.month > now.month:
+      lt.year -= 1
+    self._logtime = datetime.datetime(
+        lt.year, lt.month, lt.day, lt.hour, lt.minute, lt.second,
+        int(self._micro_str))
+    return self._logtime
+
+
+class OpenWrapper(object):
+  """A wrapper of open."""
+
+  def __init__(self, filename):
+    self._filename = filename
+    self._fh = None
+
+  def __enter__(self):
+    _, ext = os.path.splitext(self._filename)
+    if ext == '.gz':
+      self._fh = gzip.open(self._filename)
+    else:
+      self._fh = open(self._filename)
+    return self._fh
+
+  def __exit__(self, unused_exc_type, unused_exc_value, unused_traceback):
+    self._fh.close()
+
+
+def ParseLogline(logline):
+  """Parses a log line.
+
+  Args:
+    logline: a log line string
+  Returns:
+    a LogLine instance.  First line of log instance will have
+    loglevel and logtext.  Followings will have None for these.
+  """
+  m = LOGLINE_RE.match(logline)
+  if m:
+    loglevel = m.group(1)
+    logtext = m.group(4)
+    return LogLine(loglevel, m.group(2), m.group(3), logtext)
+  return LogLine(None, None, None, logline)
+
+
+def FindCompilerProxyInfos(logdir):
+  """Finds compiler_proxy INFO log files.
+
+  Args:
+    logdir: a log directory
+  Returns:
+    a list of full path names of compiler_proxy INFO log files of the last
+    compiler_proxy invocation.
+  """
+  all_compiler_proxy_logs = glob.glob(
+      os.path.join(logdir, 'compiler_proxy.*.INFO.*'))
+  all_compiler_proxy_logs.sort(reverse=True)
+  compiler_proxy_logs = []
+  for logfile in all_compiler_proxy_logs:
+    with OpenWrapper(logfile) as f:
+      # Check if the file starts with "goma built revision" log.
+      # It is the first log file of the compiler_proxy invocation.
+      for line in f.readlines(1024):
+        if line.find('goma built revision') > 0:
+          compiler_proxy_logs.append(logfile)
+          compiler_proxy_logs.reverse()
+          return compiler_proxy_logs
+      else:
+        compiler_proxy_logs.append(logfile)
+  all_compiler_proxy_logs.reverse()
+  return all_compiler_proxy_logs
+
+
+def IterLines(compiler_proxy_infos):
+  """Generates each line in compiler_proxy_infos.
+
+  Args:
+    compiler_proxy_infos: a list of file names.
+  Yields:
+    a line string.
+  """
+  for logfile in compiler_proxy_infos:
+    with OpenWrapper(logfile) as f:
+      for line in f:
+        yield line
+
+
+def IterLoglines(lines):
+  """Generates each LogLine from lines.
+
+  Args:
+    lines: a line generator
+  Yields:
+    a LogLine instance.
+  """
+  last_logline = None
+  for line in lines:
+    # logging.debug('line:%s' % line)
+    logline = ParseLogline(line.rstrip())
+    if not last_logline:
+      last_logline = logline
+      continue
+    if logline.loglevel:
+      yield last_logline
+      last_logline = logline
+      continue
+    # drop "Log line format:" line.
+    if logline.logtext.startswith('Log line format:'):
+      continue
+    last_logline.logtext += '\n' + logline.logtext
+  yield last_logline
+
+
+class DurationPerParallelism(object):
+  """Duration per parallelism."""
+
+  def __init__(self):
+    self.durations = dict()
+    self.last_time = None
+    self.parallelism = 0
+
+  def Start(self, time):
+    """Start new task at the time."""
+    self._Update(time)
+    self.parallelism += 1
+
+  def Finish(self, time):
+    """Finish a task at the time."""
+    self._Update(time)
+    self.parallelism -= 1
+
+  def _Update(self, time):
+    duration = time - self.last_time
+    if duration < datetime.timedelta():
+      logging.debug('negative duration: %s - %s' % (time, self.last_time))
+      duration = datetime.timedelta()
+    self.durations.setdefault(self.parallelism, datetime.timedelta())
+    self.durations[self.parallelism] += duration
+    self.last_time = time
+
+
+def ParseJsonStats(logline):
+  """Parse json stats logged before compiler_proxy quitting.
+
+  Args:
+    logline: string (in json form)
+
+  Returns:
+    json object
+  """
+  try:
+    return json.loads(logline)
+  except ValueError as ex:
+    print('failed to parse stats as json. stats=%s error=%s' % (logline, ex))
+    return None
+
+
+def LongValueFromJsonStats(json_stats, keys):
+  """Get long integer value from stats with iterating keys.
+  For example: when keys = ['stats', 'timeStats', 'uptime'], this returns
+  long(json_stats['stats']['timeStats']['uptime']) if any.
+
+  Args:
+    stats: json
+    keys: iterable keys
+
+  Returns:
+    long value if any. None otherwise.
+  """
+  curr = json_stats
+  for k in keys:
+    if not curr or k not in curr:
+      return None
+    curr = curr[k]
+  if not curr:
+    return None
+  return long(curr)
+
+
+def ParseGomaFlags(logline):
+  """Parse goma flags
+
+  We assume each line has this kind of form
+    GOMA_AAA=BBB
+    GOMA_CCC=DDD (auto configured)
+
+  Returns:
+    dict like { GOMA_AAA: BBB, GOMA_CCC: DDD }.
+    last (auto configured) will be dropped from the value.
+  """
+
+  result = {}
+  for line in logline.splitlines():
+    line = line.strip()
+    if line.endswith("(auto configured)"):
+      line = line[:-len("(auto configured)")].strip()
+    pos = line.find('=')
+    if pos < 0:
+      continue
+    key, value = line[0:pos], line[pos + 1:]
+    result[key] = value
+
+  return result
+
+
+class SimpleStats(object):
+  """Simple Statistics."""
+
+  def __init__(self):
+    self.stats = {}
+
+  def Update(self, name, value):
+    """Update statistics.
+
+    Args:
+      name: a string name of entry.
+      value: a numeric value to add.
+    """
+    if not self.stats.get(name):
+      self.stats[name] = {}
+    self.stats[name]['num'] = self.stats[name].get('num', 0) + 1
+    self.stats[name]['min'] = min(
+        self.stats[name].get('min', sys.maxint), value)
+    self.stats[name]['max'] = max(
+        self.stats[name].get('max', -sys.maxint - 1), value)
+    self.stats[name]['sum'] = self.stats[name].get('sum', 0) + value
+    self.stats[name]['sum_x2'] = (
+        self.stats[name].get('sum_x2', 0) + value*value)
+
+
+def DiagnoseGomaLog(options, args):
+  """Diagnoses goma log files.
+
+  Args:
+    options: options
+    args: a list of log files to be processed.  If none, it will read log files
+          in options.logdir.
+  Returns:
+    0 if no critial error logs found.  1 when critial error logs found.
+  """
+  if args:
+    compiler_proxy_infos = args
+  else:
+    compiler_proxy_infos = FindCompilerProxyInfos(options.logdir)
+  if not compiler_proxy_infos:
+    logging.error('no compiler_proxy INFO file found')
+    return 1
+
+  print compiler_proxy_infos
+
+  log_created = None  # Initial LogLine.
+  goma_revision = None
+  goma_version = None
+  goma_flags = None
+  goma_limits = None
+
+  goma_flags_parsed = None
+
+  # TaskLog for each compile type.
+  # Each dict will have task id as key, TaskLog as value.
+  tasks = {'compile': dict(), 'precompile': dict(), 'link': dict()}
+  # Task's replies for each compile type.
+  # Each dict will have task response as key, and counts as value.
+  replies = {'compile': dict(), 'precompile': dict(), 'link': dict()}
+  # List of failed tasks for each compile type.
+  # Each list will be a list of TaskLogs.
+  fail_tasks = {'compile': [], 'precompile': [], 'link': []}
+
+  # Lists of LogLines for each loglevel.
+  fatals = []
+  errors = []
+  warnings = []
+
+  # Warnings that could be seen in normal cases.
+  # key: regexp for a log text.
+  # value: a format for key string of warnings_known. %s will be replaced
+  #     with $1 for the key regexp.
+  warnings_pattern = [
+      (re.compile(
+          r'.* \((.*)\) Using "defined" in macro causes undefined behavior.*'),
+       'Using "defined" in macro causes undefined behavior %s'),
+      (re.compile(r'.*Task:(.*) request didn\'t have full content.*'),
+       'request missing input in Task:%s'),
+  ]
+  warnings_known = dict()
+
+  uptime = 0
+  slow_tasks = []
+  slow_task_stats = SimpleStats()
+
+  messages = []
+
+  task_pendings = dict()
+
+  durations_per_parallelism = DurationPerParallelism()
+
+  crash_dump = ''
+
+  error_task_ids = set()
+  warning_task_ids = set()
+
+  statz_output = ''
+  json_statz_output = ''
+
+  for logline in IterLoglines(IterLines(compiler_proxy_infos)):
+    logging.debug('logline:%s', logline)
+    if not log_created:
+      log_created = logline
+      continue
+
+    if not durations_per_parallelism.last_time and logline.logtime:
+      durations_per_parallelism.last_time = logline.logtime
+
+    if logline.loglevel == 'F':
+      fatals.append(logline)
+    elif logline.loglevel == 'E':
+      errors.append(logline)
+    elif logline.loglevel == 'W':
+      for pat, w in warnings_pattern:
+        m = pat.match(logline.logtext)
+        if m:
+          warntype = w % m.group(1)
+          warnings_known.setdefault(warntype, 0)
+          warnings_known[warntype] += 1
+          break
+      else:
+        warnings.append(logline)
+
+    if not goma_revision:
+      m = re.match('.*goma built revision (.*)', logline.logtext)
+      if m:
+        goma_revision = m.group(1)
+        continue
+    if not goma_version:
+      m = re.match('.*goma version:(.*)', logline.logtext)
+      if m:
+        goma_version = m.group(1)
+        continue
+    if not goma_flags:
+      m = re.match('.*goma flags:(.*)', logline.logtext, flags=re.DOTALL)
+      if m:
+        goma_flags = m.group(1)
+        goma_flags_parsed = ParseGomaFlags(goma_flags)
+        continue
+    if not goma_limits:
+      m = re.match('.*(max incoming:.*)', logline.logtext)
+      if m:
+        goma_limits = m.group(1)
+        continue
+
+    m = re.match('.*Crash Dump (.*)', logline.logtext)
+    if m:
+      crash_dump = m.group(1)
+      continue
+
+    m = re.match('.*Task:(\\d+) (.*)', logline.logtext)
+    if m:
+      # Task's LogLine.
+      task_id = m.group(1)
+      task_log = m.group(2)
+      if logline.loglevel == 'E':
+        error_task_ids.add(task_id)
+      if logline.loglevel == 'W':
+        warning_task_ids.add(task_id)
+      m = re.match('Start (.*)', task_log)
+      if m:
+        # Task's start.
+        task = TaskLog(task_id, m.group(1), logline.logtime)
+        if task_pendings.get(task_id):
+          task.loglines.extend(task_pendings[task_id])
+          slow_tasks.append('Task:%s time to start: %s' % (
+              task_id, (task.start_time - task.loglines[0].logtime)))
+          slow_task_stats.Update(
+              'start task too slow',
+              (task.start_time - task.loglines[0].logtime).total_seconds())
+          task.start_time = task.loglines[0].logtime
+          del task_pendings[task_id]
+        else:
+          # just start now
+          durations_per_parallelism.Start(logline.logtime)
+        task.loglines.append(logline)
+        tasks[task.compile_type][task.id] = task
+        logging.info('task start: %s %s', task_id, task.compile_type)
+        continue
+
+      # Lookup the TaskLog by taskId.
+      for compile_type in tasks:
+        task = tasks[compile_type].get(task_id)
+        if task:
+          break
+      if not task:
+        # maybe, flag fail or pending.  e.g. b/6845420
+        task_pendings.setdefault(task_id, [])
+        task_pendings[task_id].append(logline)
+        if len(task_pendings[task_id]) == 1:
+          # start pending?
+          durations_per_parallelism.Start(logline.logtime)
+        logging.info('Task:%s log without Start: %s' % (
+            task_id, logline.logtext))
+        continue
+      task.loglines.append(logline)
+      m = re.match('ReplyResponse: (.*)', task_log)
+      if m:
+        # Task's response.
+        task.end_time = logline.logtime
+        task.response = m.group(1)
+        durations_per_parallelism.Finish(logline.logtime)
+        logging.info('task end: %s %s %s',
+                     task_id, task.response, task.Duration())
+        replies[task.compile_type].setdefault(task.response, 0)
+        replies[task.compile_type][task.response] += 1
+        if task.response == 'fail fallback':
+          fail_tasks[task.compile_type].append(task)
+        continue
+
+    m = re.match('.*Dumping stats...(.*)', logline.logtext,
+                 flags=re.DOTALL)
+    if m:
+      statz_output = m.group(1)
+
+    json_stats = None
+    m = re.match('.*Dumping json stats...(.*)', logline.logtext,
+                 flags=re.DOTALL)
+    if m:
+      json_statz_output = m.group(1)
+      json_stats = ParseJsonStats(json_statz_output)
+
+    if json_stats:
+      uptime = LongValueFromJsonStats(json_stats,
+          ['stats', 'timeStats', 'uptime'])
+      consuming_memory = LongValueFromJsonStats(json_stats,
+          ['stats', 'memoryStats', 'consuming'])
+      missed_files = LongValueFromJsonStats(json_stats,
+          ['stats', 'fileStats', 'missed'])
+
+      memory_threshold = options.memory_threshold
+      if memory_threshold < 0:
+        # Automatically configure memory threshold.
+        gibibyte = 1024 * 1024 * 1024L
+        if ('GOMA_MAX_POOLED_INCLUDE_DIR_CACHE' in goma_flags_parsed and
+            goma_flags_parsed['GOMA_MAX_POOLED_INCLUDE_DIR_CACHE'].isdigit()):
+          x = int(goma_flags_parsed['GOMA_MAX_POOLED_INCLUDE_DIR_CACHE'])
+          memory_threshold = 2 * gibibyte + x * (gibibyte / 256)
+        else:
+          memory_threshold = 3 * gibibyte
+
+      if consuming_memory and consuming_memory > memory_threshold:
+        messages.append('Consumed too much memory: %d > %d' % (
+                        consuming_memory, memory_threshold))
+
+      if missed_files and missed_files > options.filemiss_threshold:
+        messages.append('Too much missing files: %d > %d' % (
+                        missed_files, options.filemiss_threshold))
+
+  print log_created.logtext
+  print
+  print 'goma built revision %s' % goma_revision
+  print 'goma version %s' % goma_version
+  print 'goma flags %s' % goma_flags
+  print 'goma limits %s' % goma_limits
+
+  print
+  for compile_type in tasks:
+    print
+    print '%s: # of tasks: %d' % (compile_type, len(tasks[compile_type]))
+    if tasks[compile_type]:
+      print '   replies:'
+      for resp in replies[compile_type].keys():
+        print '     %s : %d' % (resp, replies[compile_type][resp])
+      unfinished = []
+      for task in tasks[compile_type].values():
+        if not task.response:
+          unfinished.append(task)
+      if len(unfinished) > 0:
+        messages.append('unfinished job %d' % len(unfinished))
+        print '   unfinished: %d' % len(unfinished)
+        for task in unfinished:
+          print '     Task:%s - unfinished' % task.id
+          for logline in task.loglines:
+            print '       %s %s' % (logline.logtime, logline.logtext)
+          print
+
+      print '   durations:'
+      durations = tasks[compile_type].values()
+      total_duration = datetime.timedelta()
+      durations.sort(cmp=lambda a, b: cmp(a.Duration(), b.Duration()))
+      for d in durations:
+        total_duration += d.Duration()
+      print '       ave : %s' % (total_duration / len(durations))
+      print '       max : %s' % durations[-1].Duration()
+      print '        98%%: %s' % durations[int(len(durations)*0.98)].Duration()
+      print '        91%%: %s' % durations[int(len(durations)*0.91)].Duration()
+      print '        75%%: %s' % durations[int(len(durations)*0.75)].Duration()
+      print '        50%%: %s' % durations[len(durations)/2].Duration()
+      print '        25%%: %s' % durations[int(len(durations)*0.25)].Duration()
+      print '         9%%: %s' % durations[int(len(durations)*0.09)].Duration()
+      print '         2%%: %s' % durations[int(len(durations)*0.02)].Duration()
+      print '       min : %s' % durations[0].Duration()
+      print '   long tasks:'
+      for i in range(min(3, len(durations))):
+        task = durations[-(i + 1)]
+        print '   #%d %s Task:%s' % (i + 1, task.Duration(), task.id)
+        print '       %s' % task.desc
+        print '       %s' % task.response
+      if fail_tasks[compile_type]:
+        if len(fail_tasks[compile_type]) > options.fail_tasks_threshold:
+          messages.append('Too many fail tasks in %s: %d > %d' % (
+              compile_type, len(fail_tasks[compile_type]),
+              options.fail_tasks_threshold))
+        print '   fail tasks:'
+        for i in range(min(3, len(fail_tasks[compile_type]))):
+          task = fail_tasks[compile_type][i]
+          print '    Task:%s' % task.id
+          print '      %s' % task.desc
+          for logline in task.loglines:
+            print '       %s %s' % (logline.logtime, logline.logtext)
+
+  if crash_dump:
+    messages.append('CRASH dump exists')
+    print
+    print 'Crash'
+    print crash_dump
+
+  if statz_output:
+    print
+    print 'Goma stats: ', statz_output
+
+  if json_statz_output:
+    print
+    print 'Goma json stats: ', json_statz_output
+
+  print
+  print 'Duration per num active tasks'
+  for p in durations_per_parallelism.durations:
+    print ' %d tasks: %s' % (p, durations_per_parallelism.durations[p])
+
+  if fatals:
+    messages.append('FATAL log exists: %s' % len(fatals))
+    print
+    print 'Fatal'
+    for fatal in fatals:
+      print fatal
+  if len(error_task_ids) > options.errors_threshold:
+    messages.append('Task having ERROR log exists: %s > %s' % (
+        len(error_task_ids), options.errors_threshold))
+  if options.show_errors and errors:
+    print
+    print 'Error'
+    for error in errors:
+      print error
+  if warnings_known:
+    print
+    warnings_known_out = []
+    for warntype, count in warnings_known.iteritems():
+      if count > options.show_known_warnings_threshold:
+        warnings_known_out.append('  %d: %s' % (count, warntype))
+    if warnings_known_out:
+      print 'Known warning'
+      for warning in warnings_known_out:
+        print warning
+  if len(warning_task_ids) > options.warnings_threshold:
+    messages.append('Task having WARNING log exists: %s > %s' % (
+        len(warning_task_ids), options.warnings_threshold))
+  if options.show_warnings and warnings:
+    print
+    print 'Warning'
+    for warning in warnings:
+      print warning
+
+  if (len(slow_tasks) > 0 and
+      uptime > options.show_slow_tasks_if_uptime_longer_than_sec):
+    options.show_slow_tasks = True
+    for key, value in slow_task_stats.stats.iteritems():
+      messages.append('%s: num=%d, longest=%s' % (
+          key, value['num'], value['max']))
+
+  if options.show_slow_tasks and slow_tasks:
+    print
+    print 'SLOW Tasks'
+    for slow_task in slow_tasks:
+      print slow_task
+
+  if options.output_json:
+    summary = {
+        'stats': {
+            'fatal': len(fatals),
+            'error': len(errors),
+            'warning': len(warnings),
+        },
+        'messages': messages,
+        'goma_revision': goma_revision,
+        'goma_version': goma_version,
+        'uptime': uptime,
+    }
+    with open(options.output_json, 'w') as f:
+      json.dump(summary, f)
+
+  if messages:
+    print
+    for msg in messages:
+      print msg
+    return 1
+  return 0
+
+
+def GetGlogDir():
+  """Get glog directory.
+
+  It should match the logic with GetTempDirectories in
+  third_party/glog/src/logging.cc
+  On Windows, GetTempPathA will be $TMP, $TEMP, $USERPROFILE and the Windows
+  directory.
+  http://msdn.microsoft.com/ja-jp/library/windows/desktop/aa364992(v=vs.85).aspx
+
+  Returns:
+    a directory name.
+  """
+  candidates = [os.environ.get('TEST_TMPDIR', ''),
+                os.environ.get('TMPDIR', ''),
+                os.environ.get('TMP', '')]
+  for tmpdir in candidates:
+    if os.path.isdir(tmpdir):
+      return tmpdir
+  return '/tmp'
+
+
+def main():
+  option_parser = optparse.OptionParser()
+  option_parser.add_option('', '--logdir', default=GetGlogDir(),
+                           help='directory in which compiler_proxy.INFO exits')
+  option_parser.add_option('', '--show-errors', action='store_true',
+                           default=True,
+                           help='show error log messages')
+  option_parser.add_option('', '--no-show-errors', action='store_false',
+                           dest='show_errors',
+                           help='do not show error log messages')
+  option_parser.add_option('', '--show-warnings', action='store_true',
+                           default=False,
+                           help='show warning log messages')
+  option_parser.add_option('', '--no-show-warnings', action='store_false',
+                           dest='show_warnings',
+                           help='do not show warning log messages')
+  option_parser.add_option('', '--show-known-warnings-threshold',
+                           default=5,
+                           help='show known warnings threshold')
+  option_parser.add_option('', '--fail-tasks-threshold',
+                           default=0,
+                           help='threshold for fail tasks')
+  option_parser.add_option('', '--errors-threshold',
+                           default=10,
+                           help='threshold for ERROR logs')
+  option_parser.add_option('', '--warnings-threshold',
+                           default=100,
+                           help='threshold for WARNING logs')
+  option_parser.add_option('', '--show-slow-tasks', action='store_true',
+                           default=False,
+                           help='show slow tasks')
+  option_parser.add_option('', '--no-show-slow-tasks', action='store_false',
+                           dest='show_slow_tasks',
+                           help='do not show slow tasks')
+  option_parser.add_option('', '--show-slow-tasks-if-uptime-longer-than-sec',
+                           default=2700,
+                           help='show slow tasks if compiler_proxy uptime is'
+                                'longer than this seconds')
+  option_parser.add_option('', '--memory-threshold',
+                           default=-1,
+                           help='threshold for memory comsuption. '
+                                'automatically configured if negative')
+  option_parser.add_option('', '--filemiss-threshold',
+                           default=30000,
+                           help="threshold for file missed")
+  option_parser.add_option('-v', '--verbose', action='count', default=0,
+                           help='verbose logging')
+  option_parser.add_option('-o', '--output-json',
+                           help='Output JSON information into a specified file')
+  options, args = option_parser.parse_args()
+  if options.verbose >= 2:
+    logging.basicConfig(level=logging.DEBUG)
+  elif options.verbose:
+    logging.basicConfig(level=logging.INFO)
+  else:
+    logging.basicConfig(level=logging.WARNING)
+
+  return DiagnoseGomaLog(options, args)
+
+
+if '__main__' == __name__:
+  sys.exit(main())
diff --git a/client/directive_filter.cc b/client/directive_filter.cc
new file mode 100644
index 0000000..23e441a
--- /dev/null
+++ b/client/directive_filter.cc
@@ -0,0 +1,332 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "directive_filter.h"
+
+#ifdef TEST
+#include <stdio.h>
+#endif
+#include <string.h>
+
+#include <memory>
+#include <vector>
+
+#include "content.h"
+#include "glog/logging.h"
+#include "string_piece.h"
+#include "string_piece_utils.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// static
+std::unique_ptr<Content> DirectiveFilter::MakeFilteredContent(
+    const Content& content) {
+  const size_t content_length = content.size();
+  std::unique_ptr<char[]> buffer(new char[content_length + 1]);
+
+  size_t length = RemoveComments(content.buf(), content.buf_end(),
+                                 buffer.get());
+
+  length = FilterOnlyDirectives(buffer.get(), buffer.get() + length,
+                                buffer.get());
+
+  length = RemoveEscapedNewLine(buffer.get(), buffer.get() + length,
+                                buffer.get());
+
+  length = RemoveDeadDirectives(buffer.get(), buffer.get() + length,
+                                buffer.get());
+
+  return Content::CreateFromBuffer(buffer.get(), length);
+}
+
+// static
+const char* DirectiveFilter::SkipSpaces(const char* pos, const char* end) {
+  while (pos != end) {
+    if (*pos == ' ' || *pos == '\t') {
+      ++pos;
+      continue;
+    }
+
+    int newline_byte = IsEscapedNewLine(pos, end);
+    if (newline_byte > 0) {
+      pos += newline_byte;
+      continue;
+    }
+
+    return pos;
+  }
+
+  return end;
+}
+
+/* static */
+const char* DirectiveFilter::NextLineHead(const char* pos, const char* end) {
+  while (pos != end) {
+    if (*pos == '\n')
+      return pos + 1;
+
+    int newline_byte = IsEscapedNewLine(pos, end);
+    if (newline_byte)
+      pos += newline_byte;
+    else
+      pos += 1;
+  }
+
+  return end;
+}
+
+// static
+int DirectiveFilter::CopyStringLiteral(const char* pos, const char* end,
+                                       char* dst) {
+  const char* initial_pos = pos;
+
+  DCHECK_EQ(*pos, '\"');
+  DCHECK(pos != end);
+
+  // Copy '\"'
+  *dst++ = *pos++;
+
+  while (pos != end) {
+    // String literal ends.
+    if (*pos == '\"') {
+      *dst++ = *pos++;
+      break;
+    }
+
+    // Corresponding " was not found. Keep this as is.
+    if (*pos == '\n') {
+      *dst++ = *pos++;
+      break;
+    }
+
+    int newline_byte = IsEscapedNewLine(pos, end);
+    if (newline_byte > 0) {
+      while (newline_byte--) {
+        *dst++ = *pos++;
+      }
+      continue;
+    }
+
+    // \" does not end string literal.
+    // I don't think we need to support trigraph. So, we don't consider "??/",
+    // which means "\".
+    if (*pos == '\\' && pos + 1 != end && *(pos + 1) == '\"') {
+      *dst++ = *pos++;
+      *dst++ = *pos++;
+      continue;
+    }
+
+    *dst++ = *pos++;
+  }
+
+  return pos - initial_pos;
+}
+
+// static
+int DirectiveFilter::IsEscapedNewLine(const char* pos, const char* end) {
+  if (*pos != '\\')
+    return 0;
+
+  if (pos + 1 < end && *(pos + 1) == '\n')
+    return 2;
+
+  if (pos + 2 < end && *(pos + 1) == '\r' && *(pos + 2) == '\n')
+    return 3;
+
+  return 0;
+}
+
+// Copied |src| to |dst| with removing comments.
+// TODO: We assume '"' is not in include pathname.
+// When such pathname exists, this won't work well. e.g. #include <foo"bar>
+// static
+size_t DirectiveFilter::RemoveComments(const char* src, const char* end,
+                                       char* dst) {
+  const char* original_dst = dst;
+
+  while (src != end) {
+    // String starts.
+    if (*src == '\"') {
+      int num_copied = CopyStringLiteral(src, end, dst);
+      src += num_copied;
+      dst += num_copied;
+      continue;
+    }
+
+    // Check a comment does not start.
+    if (*src != '/' || src + 1 == end) {
+      *dst++ = *src++;
+      continue;
+    }
+
+    // Block comment starts.
+    if (*(src + 1) == '*') {
+      const char* end_comment = nullptr;
+      const char* pos = src + 2;
+      while (pos + 2 <= end) {
+        if (*pos == '*' && *(pos + 1) == '/') {
+          end_comment = pos;
+          break;
+        }
+        ++pos;
+      }
+
+      // When block comment end is not found, we don't skip them.
+      if (end_comment == nullptr) {
+        while (src < end)
+          *dst++ = *src++;
+        return dst - original_dst;
+      }
+
+      src = end_comment + 2;
+      *dst++ = ' ';
+      continue;
+    }
+
+    // One-line comment starts.
+    if (*(src + 1) == '/') {
+      src = DirectiveFilter::NextLineHead(src + 2, end);
+      *dst++ = '\n';
+      continue;
+    }
+
+    *dst++ = *src++;
+  }
+
+  return dst - original_dst;
+}
+
+// static
+size_t DirectiveFilter::RemoveEscapedNewLine(
+    const char* src, const char* end, char* dst) {
+  const char* initial_dst = dst;
+
+  while (src != end) {
+    int newline_bytes = IsEscapedNewLine(src, end);
+    if (newline_bytes == 0) {
+      *dst++ = *src++;
+    } else {
+      src += newline_bytes;
+    }
+  }
+
+  return dst - initial_dst;
+}
+
+// static
+size_t DirectiveFilter::FilterOnlyDirectives(
+    const char* src, const char* end, char* dst) {
+  const char* const original_dst = dst;
+
+  while (src != end) {
+    src = DirectiveFilter::SkipSpaces(src, end);
+
+    if (src != end && *src == '#') {
+      *dst++ = *src++;
+      // Omit spaces after '#' in directive.
+      src = DirectiveFilter::SkipSpaces(src, end);
+      const char* next_line_head = DirectiveFilter::NextLineHead(src, end);
+      memmove(dst, src, next_line_head - src);
+      dst += next_line_head - src;
+      src = next_line_head;
+    } else {
+      src = DirectiveFilter::NextLineHead(src, end);
+    }
+  }
+
+  return dst - original_dst;
+}
+
+// static
+size_t DirectiveFilter::RemoveDeadDirectives(
+    const char* src, const char* end, char* dst) {
+  const char* const original_dst = dst;
+  std::vector<StringPiece> directive_stack;
+
+  while (src != end) {
+    const char* next_line_head = DirectiveFilter::NextLineHead(src, end);
+    StringPiece current_directive_line(src, next_line_head - src);
+
+    src = next_line_head;
+
+    // Drop "#error" support for performance.
+    // We assume "#error" almost never happens,
+    // so let compiler detect #error failure instead of goma preprocessor.
+    if (strings::StartsWith(current_directive_line, "#error")) {
+      continue;
+    }
+
+    // Drop pragma support other than once.
+    // "#pragma once" is only supported pragma in goma preprocessor.
+    if (strings::StartsWith(current_directive_line, "#pragma") &&
+        current_directive_line.find("once") == StringPiece::npos) {
+      continue;
+    }
+
+    // Drop #else and #elif until we see something else because
+    // such #else of #elif does not change control flow.
+    // e.g. code like following is removed because it has no effect
+    // to included files.
+    // #if USE_STDERR
+    //   std::cerr << "some error" << std::endl;
+    // #else
+    //   std::cout << "some error" << std::endl;
+    // #endif
+    if (strings::StartsWith(current_directive_line, "#endif")) {
+      while (!directive_stack.empty() &&
+             (strings::StartsWith(directive_stack.back(), "#else") ||
+              strings::StartsWith(directive_stack.back(), "#elif"))) {
+        directive_stack.pop_back();
+      }
+
+      if (!directive_stack.empty() &&
+          strings::StartsWith(directive_stack.back(), "#if")) {
+        directive_stack.pop_back();
+      } else {
+        directive_stack.push_back(current_directive_line);
+      }
+    } else {
+      directive_stack.push_back(current_directive_line);
+    }
+  }
+
+  for (const auto& directive : directive_stack) {
+    memmove(dst, directive.begin(), directive.size());
+    dst += directive.size();
+  }
+
+  return dst - original_dst;
+}
+
+}  // namespace devtools_goma
+
+#ifdef TEST
+
+using devtools_goma::Content;
+using devtools_goma::DirectiveFilter;
+
+int main(int argc, char* argv[]) {
+  if (argc < 2) {
+    fprintf(stderr, "Usage: directive_filter <header or source>\n");
+    return 1;
+  }
+
+  std::unique_ptr<Content> content(Content::CreateFromFile(argv[1]));
+  if (!content.get()) {
+    fprintf(stderr, "Cannot read %s\n", argv[1]);
+    return 1;
+  }
+
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  fwrite(filtered->buf(), sizeof(char), filtered->size(), stdout);
+  fflush(stdout);
+
+  return 0;
+}
+#endif
diff --git a/client/directive_filter.h b/client/directive_filter.h
new file mode 100644
index 0000000..ff0c5f1
--- /dev/null
+++ b/client/directive_filter.h
@@ -0,0 +1,79 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_DIRECTIVE_FILTER_H_
+#define DEVTOOLS_GOMA_CLIENT_DIRECTIVE_FILTER_H_
+
+#include <memory>
+
+#include "content.h"
+#include "gtest/gtest_prod.h"
+
+namespace devtools_goma {
+
+// DirectiveFilter removes lines
+// that do not affect included files from Content.
+//
+// TODO: Currently we cannot handle #include <foo//bar> correctly.
+class DirectiveFilter {
+ public:
+  // Removes lines that do not affect included files from |content|.
+  // The result Content is newly generated.
+  static std::unique_ptr<Content> MakeFilteredContent(
+      const Content& content);
+
+ private:
+  // Returns the pointer to the next non-space character. If nothing, |end| will
+  // be returned.
+  static const char* SkipSpaces(const char* pos, const char* end);
+
+  // Returns the pointer to the head of the logical next line.
+  // A escaped newline (\\\n) is considered.
+  static const char* NextLineHead(const char* pos, const char* end);
+
+  // Copies string literal beginning with |pos| to |dst|.
+  // Returns how many bytes are copied.
+  static int CopyStringLiteral(const char* pos, const char* end, char* dst);
+
+  // If |*pos| points to "\\\n" or "\\\r\n", the number of bytes for
+  // escaped newline is returned. Otherwise, 0 is returned.
+  // For example, "a" is 0, "\\\n" is 2, and "\\\r\n" is 3.
+  static int IsEscapedNewLine(const char* pos, const char* end);
+
+  // Removes comments from |src|. It's OK if |src| and |dst| are the same.
+  // |dst| should points at least (end - src) bytes of memory.
+  // The size of copied byte is returned.
+  static size_t RemoveComments(const char* src, const char* end,
+                               char* dst);
+
+  // Remove escaped newlines \\\n and \\\r\n from |src|.
+  // It's OK if |src| and |dst| are the same.
+  // |dst| should points at least (end - src) bytes of memory.
+  // The size of copied byte is returned.
+  static size_t RemoveEscapedNewLine(const char* src, const char* end,
+                                     char* dst);
+
+  // Removes comments and non-directive lines from |src|. It's OK
+  // if |src| and |dst| are the same. |dst| should points at least
+  // (end - src) bytes of memory. The size of copied byte is returned.
+  static size_t FilterOnlyDirectives(const char* src, const char* end,
+                                     char* dst);
+
+  // Removes if/ifdef/ifndef/elif/else/endif/error/pragma directive lines
+  // that do not affect included files from |src|.
+  // It's OK if |src| and |dst| are the same.
+  // |dst| should points at least (end - src) bytes of memory.
+  // The size of copied byte is returned.
+  static size_t RemoveDeadDirectives(const char* src, const char* end,
+                                     char* dst);
+
+  FRIEND_TEST(DirectiveFilterTest, SkipSpaces);
+  FRIEND_TEST(DirectiveFilterTest, NextLineHead);
+  DISALLOW_COPY_AND_ASSIGN(DirectiveFilter);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_DIRECTIVE_FILTER_H_
diff --git a/client/directive_filter_unittest.cc b/client/directive_filter_unittest.cc
new file mode 100644
index 0000000..d87daf1
--- /dev/null
+++ b/client/directive_filter_unittest.cc
@@ -0,0 +1,563 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "directive_filter.h"
+
+#include <memory>
+#include <string>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "content.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class DirectiveFilterTest : public testing::Test {
+};
+
+TEST_F(DirectiveFilterTest, SkipSpaces) {
+  string src = "    12   3 \\\n 4 \\\n\\\n   5  \\\r\n  6  \\\n";
+  const char* pos = src.c_str();
+  const char* end = src.c_str() + src.size();
+
+  pos = DirectiveFilter::SkipSpaces(pos, end);
+  EXPECT_EQ('1', *pos);
+
+  ++pos;
+  pos = DirectiveFilter::SkipSpaces(pos, end);
+  EXPECT_EQ('2', *pos);
+
+  ++pos;
+  pos = DirectiveFilter::SkipSpaces(pos, end);
+  EXPECT_EQ('3', *pos);
+
+  ++pos;
+  pos = DirectiveFilter::SkipSpaces(pos, end);
+  EXPECT_EQ('4', *pos);
+
+  ++pos;
+  pos = DirectiveFilter::SkipSpaces(pos, end);
+  EXPECT_EQ('5', *pos);
+
+  ++pos;
+  pos = DirectiveFilter::SkipSpaces(pos, end);
+  EXPECT_EQ('6', *pos);
+
+  ++pos;
+  pos = DirectiveFilter::SkipSpaces(pos, end);
+  EXPECT_EQ(end, pos);
+}
+
+TEST_F(DirectiveFilterTest, NextLineHead) {
+  string src = "\n1    \\\n  \n2  \\\n\\\n\\\r\n\n3   \\\r\n";
+  const char* pos = src.c_str();
+  const char* end = src.c_str() + src.size();
+
+  pos = DirectiveFilter::NextLineHead(pos, end);
+  EXPECT_EQ('1', *pos);
+
+  ++pos;
+  pos = DirectiveFilter::NextLineHead(pos, end);
+  EXPECT_EQ('2', *pos);
+
+  ++pos;
+  pos = DirectiveFilter::NextLineHead(pos, end);
+  EXPECT_EQ('3', *pos);
+
+  ++pos;
+  pos = DirectiveFilter::NextLineHead(pos, end);
+  EXPECT_EQ(end, pos);
+}
+
+TEST_F(DirectiveFilterTest, RemovesBlockComment) {
+  // All comments will be removed.
+  string src = "/* foo bar */";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, RemoveNonComment) {
+  // All comments will be removed.
+  string src = "foo bar";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, RemovesBlockCommentContainingOnelineComment) {
+  string src = "/* // */";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, RemovesOnelineComment) {
+  string src = "// foo bar";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, RemovesOnelineCommentContainingBlockCommentStart1) {
+  string src = "// /*";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, RemovesOnelineCommentContainingBlockCommentStart2) {
+  string src = "// /*\n*/";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, RemovesComplexBlockComment) {
+  string src = "/*/ #include <iostream> /*/";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, BlockCommentIsNotFinished) {
+  string src = "/* #include <iostream>";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, FilterDirectives) {
+  string src =
+    "#include <iostream>\n"
+    " f(); g(); h(); \n"
+    "#include <iomanip>\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+    "#include <iostream>\n"
+    "#include <iomanip>\n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, DirectiveIsDividedWithBackslashAndLF) {
+  string src =
+    "#include \\\n"
+    "<iostream>";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#include <iostream>";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, DirectiveIsDividedWithBackslashAndLFLF) {
+  string src =
+    "#include \\\n\\\n"
+    "<iostream>";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#include <iostream>";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, DirectiveIsDividedWithBackslashAndCRLF) {
+  string src =
+    "#include \\\r\n"
+    "<iostream>";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#include <iostream>";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, EmptyLineAndBackslashLFBeforeDirective) {
+  string src =
+    "                \\\n"
+    "#include <iostream>";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("#include <iostream>",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, EmptyLineAndBackslashLFLFBeforeDirective) {
+  string src =
+    "                \\\n\\\n"
+    "#include <iostream>";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("#include <iostream>",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, EmptyLineAndBackslashCRLFBeforeDirective) {
+  string src =
+    "                \\\r\n"
+    "#include <iostream>";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("#include <iostream>",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, DirectiveIsDividedWithComments) {
+  string src =
+    "#include /*\n"
+    " something */\\\n"
+    "<iostream>\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  EXPECT_EQ("#include  <iostream>\n",
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, FilterDirectivesWithContinuingLines4) {
+  string src =
+    "      #include <iostream>\n"
+    "  #endif\n"
+    " #include /* hoge */\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+    "#include <iostream>\n"
+    "#endif\n"
+    "#include  \n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, DirectiveContainsComments) {
+  string src =
+    "      #include <iostream>  //\n"
+    "  #endif /* \n"
+    " #include /* hoge */\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+    "#include <iostream>  \n"
+    "#endif  \n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, OneLineCommentContainsBlockComment) {
+  string src = "// /* \n#include <iostream>\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#include <iostream>\n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, IncludePathContainsSlashSlash) {
+  string src = "#include \"foo//bar\"\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#include \"foo//bar\"\n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+// When just keeping #include containing //, we might miss other comment start.
+TEST_F(DirectiveFilterTest, IncludePathContainsSlashSlash2) {
+  string src = "#include \"foo//bar\" /*\n hoge */\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#include \"foo//bar\"  \n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+// When just keeping #include containing //, we might miss other comment start.
+TEST_F(DirectiveFilterTest, IncludePathContainsSlashSlash3) {
+  string src = "#include \"foo//bar\" // hoge */\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#include \"foo//bar\" \n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, StrayDoubleQuotation) {
+  string src = "\"\n#include <iostream>\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#include <iostream>\n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, StrayDoubleQuotation2) {
+  string src = "#include <iostream> \"\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#include <iostream> \"\n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, BlockCommentStartInString) {
+  string src = "\"ho/*ge\"\n#include <iostream>\n\"fu*/ga\"";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#include <iostream>\n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, LineCommentStartInString) {
+  string src = "#define HOGE \"HOGE\\FUGA\"\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected = "#define HOGE \"HOGE\\FUGA\"\n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, MultipleLineString) {
+  string src =
+    "#define HOGE \"HOGE\\\n"
+    "//\\\"hoge\\\"\\\n"
+    "FUGA\"\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+      "#define HOGE \"HOGE//\\\"hoge\\\"FUGA\"\n";
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, StringContainingDoubleQuotation) {
+  string src =
+    "#define HOGE \"HOGE\\\"\\\n"
+    "//\\\"hoge\\\"\\\n"
+    "FUGA\"\n";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+    "#define HOGE \"HOGE\\\"//\\\"hoge\\\"FUGA\"\n";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, MultipleLineDirectiveAndIdentifier) {
+  string src =
+      "#de\\\n"
+      "fi\\\n"
+      "ne\\\n"
+      " \\\n"
+      "H\\\n"
+      "OG\\\n"
+      "E";
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+    "#define HOGE";
+
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, NomeaningIfDirectives) {
+  string src =
+      "#if 1\n"
+      "# pragma once\n"
+      "#else\n"
+      "# error removed_error\n"
+      "#endif\n"
+      "#ifdef x\n"
+      "# pragma comment(lib, \"hoge\")\n"
+      "#endif\n"
+      "#ifndef x\n"
+      "#elif 2\n"
+      "#else\n"
+      "#endif\n";
+
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+      "#if 1\n"
+      "#pragma once\n"
+      "#endif\n";
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, NomeaningIfDirectivesNested) {
+  string src =
+      "#if X\n"
+      "# if Y\n"
+      "# endif\n"
+      "#endif\n"
+      "#define hoge\n"
+      "#if X\n"
+      "# if Y\n"
+      "#  include \"something.h\"\n"
+      "# endif\n"
+      "#endif\n"
+      "#define fuga\n"
+      "#if X\n"
+      "# if Y\n"
+      "# elif Z\n"
+      "#  include \"something.h\"\n"
+      "# endif\n"
+      "#endif\n";
+
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+      "#define hoge\n"
+      "#if X\n"
+      "#if Y\n"
+      "#include \"something.h\"\n"
+      "#endif\n"
+      "#endif\n"
+      "#define fuga\n"
+      "#if X\n"
+      "#if Y\n"
+      "#elif Z\n"
+      "#include \"something.h\"\n"
+      "#endif\n"
+      "#endif\n";
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, NomeaningIfDirectivesUnbalanced1) {
+  string src =
+      "#if A\n";
+
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+      "#if A\n";
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, NomeaningIfDirectivesUnbalanced2) {
+  string src =
+      "#endif\n";
+
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+      "#endif\n";
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+TEST_F(DirectiveFilterTest, NomeaningIfDirectivesUnbalanced3) {
+  string src =
+      "#elif A\n"
+      "#endif\n";
+
+  std::unique_ptr<Content> content(Content::CreateFromString(src));
+  std::unique_ptr<Content> filtered(
+      DirectiveFilter::MakeFilteredContent(*content));
+
+  string expected =
+      "#endif\n";
+  EXPECT_EQ(expected,
+            string(filtered->buf(), filtered->buf_end() - filtered->buf()));
+}
+
+}  // namespace devtools_goma
diff --git a/client/dump_env.c b/client/dump_env.c
new file mode 100644
index 0000000..54476ed
--- /dev/null
+++ b/client/dump_env.c
@@ -0,0 +1,19 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <stdio.h>
+
+int main(int argc, char** argv, char** envp) {
+  int i;
+  char** env;
+  for (i = 0; i < argc; i++) {
+    fprintf(stdout, "%s\n", argv[i]);
+  }
+  for (env = envp; *env; env++) {
+    fprintf(stderr, "%s\n", *env);
+  }
+
+  return 0;
+}
diff --git a/client/dump_syms.py b/client/dump_syms.py
new file mode 100644
index 0000000..af01d54
--- /dev/null
+++ b/client/dump_syms.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+#
+# Copyright 2015 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Dump executable's breakpad symbols.
+
+Usage:
+  % dump_syms.py --dump_syms ../../dump_syms --input compiler_proxy \
+    --output compiler_proxy.sym
+"""
+
+import argparse
+import os
+import platform
+import re
+import subprocess
+import sys
+
+class Error(Exception):
+  """Raised on Error."""
+
+
+class DumpSyms(object):
+  """General purpose dump syms class."""
+
+  def __init__(self, dump_syms, src, dst):
+    """Initialize dump_sym.
+
+    Args:
+      dump_syms: dump_syms command in full path.
+      src: a file name to dump symbols.
+      dst: an output file to dump symbols.
+    """
+    self._dump_syms = dump_syms
+    self._src = src
+    self._dst = dst
+
+  def Dump(self):
+    """Dump symbols for breakpad."""
+    with open(self._dst, 'w') as f:
+      subprocess.check_call([self._dump_syms, self._src], stdout=f)
+
+
+class MacDumpSyms(DumpSyms):
+  """Dump syms for mac."""
+
+  def Dump(self):
+    """Dump symbols for breakpad."""
+    dsym_file = self._src + '.dSYM'
+    subprocess.check_call(['dsymutil', self._src, '-o', dsym_file])
+    with open(self._dst, 'w') as f:
+      p = subprocess.Popen(
+        [self._dump_syms, '-g', dsym_file, self._src],
+        stdout=f, stderr=subprocess.PIPE)
+      _, stderr_data = p.communicate()
+
+      # Filtering noisy warnings.
+      # b/17405320
+      # https://crbug.com/392648
+      filter_re = re.compile(
+        r'^.*: warning: function at offset 0x[a-f\d]+ has no name$' +
+        r'|^.*: the DIE at offset 0x[a-f\d]+ has a DW_AT_.*$' +
+        r'|^.*: warning: failed to demangle [\.\w]+$' +
+        r'|^.*: in compilation unit .* \(offset 0x[a-f\d]+\):$')
+      for line in stderr_data.split('\n'):
+        if not filter_re.match(line):
+          print >> sys.stderr, line
+
+def GetDumpSyms(dump_syms, src, dst):
+  if platform.system() == 'Darwin':
+    return MacDumpSyms(dump_syms, src, dst)
+  return DumpSyms(dump_syms, src, dst)
+
+def main():
+  parser = argparse.ArgumentParser(description='dump breakpad symbols')
+  parser.add_argument('--dump_syms', help='path to dump_syms command',
+                      required=True)
+  parser.add_argument('--input',
+                      help=('input for dump_syms command. '
+                            'input should be a binary with debug symbols'),
+                      required=True)
+  parser.add_argument('--output', help='sym filename', required=True)
+  args = parser.parse_args()
+
+  if not os.path.exists(args.dump_syms):
+    raise Error('dump_syms %s does not exist.' % args.dump_syms)
+  if not os.path.exists(args.input):
+    raise Error('input binary %s does not exist.' % args.input)
+  ds = GetDumpSyms(args.dump_syms, args.input, args.output)
+  ds.Dump()
+
+
+if __name__ == '__main__':
+  main()
diff --git a/client/elf_parser.cc b/client/elf_parser.cc
new file mode 100644
index 0000000..e274c9d
--- /dev/null
+++ b/client/elf_parser.cc
@@ -0,0 +1,481 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "elf_parser.h"
+
+#ifdef __linux__
+#include <elf.h>
+#endif
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+
+#include "scoped_fd.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+template <typename Ehdr, typename Phdr, typename Shdr, typename Dyn>
+class ElfParserImpl : public ElfParser {
+ public:
+  ElfParserImpl(const string& filename, ScopedFd&& fd,
+                const char elfIdent[EI_NIDENT])
+      : ElfParser(),
+        filename_(filename),
+        fd_(std::move(fd)),
+        valid_(false),
+        use_program_header_(true),
+        dynamic_phdr_(nullptr),
+        strtab_shdr_(nullptr),
+        dynamic_shdr_(nullptr),
+        text_offset_(0) {
+    VLOG(1) << "Elf:" << filename;
+    memset(&ehdr_, 0, sizeof ehdr_);
+    memcpy(ehdr_.e_ident, elfIdent, EI_NIDENT);
+    int elf_class = elfIdent[EI_CLASS];
+    VLOG(1) << "elf_class=" << elf_class;
+    int elf_data = elfIdent[EI_DATA];
+    VLOG(1) << "elf_data=" << elf_data;
+
+    valid_ = (memcmp(elfIdent, ELFMAG, SELFMAG) == 0);
+    if (valid_) {
+      valid_ = elfIdent[EI_DATA] == ELFDATA2LSB;
+      LOG_IF(ERROR, !valid_) << "unsupported data encoding:"
+                             << elfIdent[EI_DATA];
+    }
+    CheckIdent();
+  }
+  ~ElfParserImpl() override {
+    for (size_t i = 0; i < phdrs_.size(); ++i)
+      delete phdrs_[i];
+    for (size_t i = 0; i < shdrs_.size(); ++i)
+      delete shdrs_[i];
+  }
+
+  bool valid() const override { return valid_; }
+  void UseProgramHeader(bool use_program_header) override {
+    use_program_header_ = use_program_header;
+  }
+
+  bool ReadDynamicNeeded(std::vector<string>* needed) override {
+    VLOG(1) << "ReadDynamicNeeded:" << filename_;
+    if (!valid_) {
+      LOG(ERROR) << "not valid:" << filename_;
+      return false;
+    }
+    if (!ReadEhdr()) {
+      return false;
+    }
+    if (use_program_header_) {
+      if (!ReadPhdrs()) {
+        return false;
+      }
+      if (!ReadDynamicSegment()) {
+        return false;
+      }
+    } else {
+      if (!ReadShdrs()) {
+        return false;
+      }
+      if (!ReadDynamicSection()) {
+        return false;
+      }
+    }
+    if (!ReadDtStrtab()) {
+      return false;
+    }
+    if (dyntab_.empty()) {
+      LOG(ERROR) << "empty dyntab? " << filename_;
+      return false;
+    }
+    if (dt_strtab_.empty()) {
+      LOG(ERROR) << "empty dt_strtab? " << filename_;
+      return false;
+    }
+    ReadStringEntryInDynamic(DT_NEEDED, needed);
+    return true;
+  }
+
+  bool ReadDynamicNeededAndRpath(std::vector<string>* needed,
+                                 std::vector<string>* rpath) override {
+    if (!ReadDynamicNeeded(needed))
+      return false;
+
+    ReadStringEntryInDynamic(DT_RUNPATH, rpath);
+    // A loader checks DT_RPATH if and only if there are no DT_RUNPATH.
+    if (rpath->empty()) {
+      ReadStringEntryInDynamic(DT_RPATH, rpath);
+    }
+    return true;
+  }
+
+ private:
+  void CheckIdent();
+  bool ReadEhdr() {
+    if (!valid_)
+      return false;
+    if (read(fd_.fd(), reinterpret_cast<char*>(&ehdr_) + EI_NIDENT,
+             sizeof(Ehdr) - EI_NIDENT) != (sizeof(Ehdr) - EI_NIDENT)) {
+      PLOG(ERROR) << "read ehdr:" << filename_;
+      valid_ = false;
+      return false;
+    }
+    VLOG(1) << DumpEhdr(ehdr_);
+    return true;
+  }
+  bool ReadPhdrs() {
+    if (!valid_)
+      return false;
+    if (lseek(fd_.fd(), ehdr_.e_phoff, SEEK_SET) == static_cast<off_t>(-1)) {
+      PLOG(ERROR) << "seek phoff:" << ehdr_.e_phoff << " " << filename_;
+      valid_ = false;
+      return false;
+    }
+    for (int i = 0; i < ehdr_.e_phnum; ++i) {
+      Phdr* phdr = new Phdr;
+      if (read(fd_.fd(), reinterpret_cast<char*>(phdr), sizeof(Phdr)) !=
+          sizeof(Phdr)) {
+        PLOG(ERROR) << "read phdr:" << i << " " << filename_;
+        valid_ = false;
+        return false;
+      }
+      phdrs_.push_back(phdr);
+      VLOG(1) << i << ":" << DumpPhdr(*phdr);
+      switch (phdr->p_type) {
+        case PT_DYNAMIC:
+          LOG_IF(ERROR, dynamic_phdr_ != nullptr)
+              << filename_ << " PT_DYNAMIC "
+              << DumpPhdr(*dynamic_phdr_) << " " << DumpPhdr(*phdr);
+          dynamic_phdr_ = phdr;
+          break;
+        case PT_LOAD:
+          // The first segment, which contains dynstr, is being mapped
+          // in non-zero address. Update text_offset_ to adjust the
+          // offset of dynstr later.
+          if (phdr->p_offset == 0 && phdr->p_vaddr) {
+            LOG_IF(ERROR, ehdr_.e_type != ET_EXEC)
+                << "Non zero vaddr for non EXEC ELF (" << ehdr_.e_type
+                << "): " << DumpPhdr(*phdr);
+            text_offset_ = phdr->p_vaddr;
+          }
+          break;
+        default:
+          break;
+      }
+    }
+    return valid_;
+  }
+  bool ReadShdrs() {
+    if (!valid_)
+      return false;
+    if (lseek(fd_.fd(), ehdr_.e_shoff, SEEK_SET) == static_cast<off_t>(-1)) {
+      PLOG(ERROR) << "seek shoff:" << ehdr_.e_shoff << " " << filename_;
+      valid_ = false;
+      return false;
+    }
+    for (int i = 0; i < ehdr_.e_shnum; ++i) {
+      Shdr* shdr = new Shdr;
+      if (read(fd_.fd(), reinterpret_cast<char*>(shdr), sizeof(Shdr)) !=
+          sizeof(Shdr)) {
+        PLOG(ERROR) << "read shdr:" << i << " " << filename_;
+        valid_ = false;
+        return false;
+      }
+      shdrs_.push_back(shdr);
+      VLOG(1) << i << ":" << DumpShdr(*shdr);
+      // TODO: This cannot handle ET_EXEC as this doesn't
+      //               update text_offset_.
+      switch (shdr->sh_type) {
+        case SHT_STRTAB:
+          // May have several STRTAB. Last one is ok?
+          strtab_shdr_ = shdr;
+          break;
+        case SHT_DYNAMIC:
+          LOG_IF(ERROR, dynamic_shdr_ != nullptr)
+              << filename_ << " SHT_DYNAMIC "
+              << DumpShdr(*dynamic_shdr_) << " " << DumpShdr(*shdr);
+          dynamic_shdr_ = shdr;
+          break;
+        default: break;
+      }
+    }
+    if (strtab_shdr_ != nullptr)
+      ReadStrtab();
+    return valid_;
+  }
+
+  bool ReadStrtab() {
+    if (!valid_)
+      return false;
+    if (strtab_shdr_ == nullptr)
+      return false;
+    VLOG(1) << "strtab:" << DumpShdr(*strtab_shdr_);
+    return ReadSectionData(*strtab_shdr_, &strtab_);
+  }
+
+  bool ReadDynamicSegment() {
+    if (!valid_)
+      return false;
+    if (dynamic_phdr_ == nullptr)
+      return false;
+    VLOG(1) << "dynamic:" << DumpPhdr(*dynamic_phdr_);
+    return ReadSegmentData(*dynamic_phdr_, &dyntab_);
+  }
+
+  bool ReadDynamicSection() {
+    if (!valid_)
+      return false;
+    if (dynamic_shdr_ == nullptr)
+      return false;
+    VLOG(1) << "dynamic:" << DumpShdr(*dynamic_shdr_);
+    return ReadSectionData(*dynamic_shdr_, &dyntab_);
+  }
+
+  bool ReadSegmentData(const Phdr& phdr, string* data) {
+    VLOG(1) << "read:" << DumpPhdr(phdr);
+    return ReadFromFile(phdr.p_offset, phdr.p_filesz, data);
+  }
+  bool ReadSectionData(const Shdr& shdr, string* data) {
+    VLOG(1) << "read:" << DumpShdr(shdr);
+    return ReadFromFile(shdr.sh_offset, shdr.sh_size, data);
+  }
+
+  bool ReadFromFile(off_t offset, size_t size, string* data) {
+    if (!valid_)
+      return false;
+    if (lseek(fd_.fd(), offset, SEEK_SET) == static_cast<off_t>(-1)) {
+      PLOG(ERROR) << "seek:" << offset << " " << filename_;
+      valid_ = false;
+      return false;
+    }
+    data->resize(size);
+    if (read(fd_.fd(), const_cast<char*>(data->data()), size) !=
+        static_cast<ssize_t>(size)) {
+      PLOG(ERROR) << "read data:" << size << " " << filename_;
+      valid_ = false;
+      return false;
+    }
+    return true;
+  }
+
+  bool ReadDtStrtab() {
+    if (!valid_)
+      return false;
+    if (dyntab_.empty())
+      return false;
+    off_t off = 0;
+    size_t size = 0;
+    for (size_t pos = 0; pos < dyntab_.size(); pos += sizeof(Dyn)) {
+      const Dyn* dyn = reinterpret_cast<const Dyn*>(dyntab_.data() + pos);
+      VLOG(2) << DumpDyn(*dyn);
+      if (dyn->d_tag == DT_STRTAB)
+        off = dyn->d_un.d_ptr - text_offset_;
+      else if (dyn->d_tag == DT_STRSZ)
+        size = dyn->d_un.d_val;
+    }
+    VLOG(1) << "dt_strtab: off=" << off << " size=" << size;
+    return ReadFromFile(off, size, &dt_strtab_);
+  }
+
+  void ReadStringEntryInDynamic(int type, std::vector<string>* out) {
+    for (size_t pos = 0; pos < dyntab_.size(); pos += sizeof(Dyn)) {
+      const Dyn* dyn = reinterpret_cast<const Dyn*>(dyntab_.data() + pos);
+      if (dyn->d_tag == type) {
+        if (dyn->d_un.d_val > dt_strtab_.size()) {
+          LOG(ERROR) << "out of range dt_strtab:" << dyn->d_un.d_val
+                     << " dt_strtab.size=" << dt_strtab_.size();
+          continue;
+        }
+        out->push_back(dt_strtab_.data() + dyn->d_un.d_val);
+      }
+    }
+  }
+
+  string DumpEhdr(const Ehdr& ehdr) {
+    std::stringstream ss;
+    ss << "Elf:";
+    ss << " type:" << ehdr.e_type;
+    ss << " machine:" << ehdr.e_machine;
+    ss << " version:" << ehdr.e_version;
+    ss << " entry:" << ehdr.e_entry;
+    ss << " phoff:" << ehdr.e_phoff;
+    ss << " shoff:" << ehdr.e_shoff;
+    ss << " flags:" << ehdr.e_flags;
+    ss << " ehsize:" << ehdr.e_ehsize;
+    ss << " phentsize:" << ehdr.e_phentsize;
+    ss << " phnum:" << ehdr.e_phnum;
+    ss << " shentsize:" << ehdr.e_shentsize;
+    ss << " shnum:" << ehdr.e_shnum;
+    ss << " shstrndx:" << ehdr.e_shstrndx;
+    return ss.str();
+  }
+
+  string DumpPhdr(const Phdr& phdr) {
+    std::stringstream ss;
+    ss << "Program:";
+    ss << " type:" << phdr.p_type;
+    ss << " offset:" << phdr.p_offset;
+    ss << " vaddr:" << phdr.p_vaddr;
+    ss << " paddr:" << phdr.p_paddr;
+    ss << " filesz:" << phdr.p_filesz;
+    ss << " memsz:" << phdr.p_memsz;
+    ss << " flags:" << phdr.p_flags;
+    ss << " align:" << phdr.p_align;
+    return ss.str();
+  }
+
+  string DumpShdr(const Shdr& shdr) {
+    std::stringstream ss;
+    ss << "Section:";
+    ss << " name:" << shdr.sh_name;
+    if (shdr.sh_name < strtab_.size()) {
+      ss << "'" << (strtab_.data() + shdr.sh_name) << "'";
+    }
+    ss << " type:" << shdr.sh_type;
+    ss << " flag:" << shdr.sh_flags;
+    ss << " addr:" << shdr.sh_offset;
+    ss << " offset:" << shdr.sh_size;
+    ss << " size:" << shdr.sh_size;
+    ss << " link:" << shdr.sh_link;
+    ss << " info:" << shdr.sh_info;
+    ss << " addralign:" << shdr.sh_addralign;
+    ss << " entsize:" << shdr.sh_entsize;
+
+    return ss.str();
+  }
+
+  string DumpDyn(const Dyn& dyn) {
+    std::stringstream ss;
+    ss << "Dyn:";
+    ss << " tag:" << dyn.d_tag;
+    ss << " val:" << dyn.d_un.d_val << " ptr:" << dyn.d_un.d_ptr;
+    return ss.str();
+  }
+
+  const string filename_;
+  ScopedFd fd_;
+  bool valid_;
+  bool use_program_header_;
+  Ehdr ehdr_;
+  std::vector<Phdr*> phdrs_;
+  Phdr* dynamic_phdr_;
+  std::vector<Shdr*> shdrs_;
+  Shdr* strtab_shdr_;
+  string strtab_;
+  Shdr* dynamic_shdr_;
+  string dyntab_;
+  string dt_strtab_;
+  size_t text_offset_;
+  DISALLOW_COPY_AND_ASSIGN(ElfParserImpl);
+};
+
+template<>
+void ElfParserImpl<Elf32_Ehdr,
+                   Elf32_Phdr, Elf32_Shdr, Elf32_Dyn>::CheckIdent() {
+  if (valid_) {
+    valid_ = (ehdr_.e_ident[EI_CLASS] == ELFCLASS32);
+    LOG_IF(ERROR, !valid_) << "not elf class32";
+  }
+}
+
+template<>
+void ElfParserImpl<Elf64_Ehdr,
+                   Elf64_Phdr, Elf64_Shdr, Elf64_Dyn>::CheckIdent() {
+  if (valid_) {
+    valid_ = (ehdr_.e_ident[EI_CLASS] = ELFCLASS64);
+    LOG_IF(ERROR, !valid_) << "not elf class64";
+  }
+}
+
+static ScopedFd OpenElf(const string& filename, char *elfIdent) {
+  ScopedFd fd(ScopedFd::OpenForRead(filename));
+  if (!fd.valid()) {
+    PLOG(WARNING) << "open:" << filename;
+    return ScopedFd();
+  }
+  if (read(fd.fd(), elfIdent, EI_NIDENT) != EI_NIDENT) {
+    PLOG(WARNING) << "read elf ident:" << filename;
+    return ScopedFd();
+  }
+  if (memcmp(elfIdent, ELFMAG, SELFMAG) != 0) {
+    LOG(WARNING) << "not elf: " << filename
+                 << " ident:" << string(elfIdent, SELFMAG);
+    return ScopedFd();
+  }
+  return fd;
+}
+
+/* static */
+std::unique_ptr<ElfParser> ElfParser::NewElfParser(const string& filename) {
+  char elfIdent[EI_NIDENT];
+  ScopedFd fd(OpenElf(filename.c_str(), elfIdent));
+  if (!fd.valid()) {
+    PLOG(ERROR) << "open elf:" << filename;
+    return nullptr;
+  }
+  switch (elfIdent[EI_CLASS]) {
+    case ELFCLASS32:
+      return std::unique_ptr<ElfParser>(
+          new ElfParserImpl<Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr, Elf32_Dyn>(
+              filename, std::move(fd), elfIdent));
+    case ELFCLASS64:
+      return std::unique_ptr<ElfParser>(
+          new ElfParserImpl<Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr, Elf64_Dyn>(
+              filename, std::move(fd), elfIdent));
+    default:
+      LOG(ERROR) << "Unknown elf class:" << elfIdent[EI_CLASS];
+      return nullptr;
+  }
+}
+
+/* static */
+bool ElfParser::IsElf(const string& filename) {
+  char elfIdent[EI_NIDENT];
+  ScopedFd fd(OpenElf(filename.c_str(), elfIdent));
+  return fd.valid();
+}
+
+}  // namespace devtools_goma
+
+#ifdef TEST
+
+#include <cstdlib>
+#include <iostream>
+
+using devtools_goma::ElfParser;
+
+int main(int argc, char* argv[]) {
+  if (argc != 2) {
+    std::cout << "Usage: " << argv[0] << " <filename>" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  google::InitGoogleLogging(argv[0]);
+
+  std::unique_ptr<ElfParser> elf = ElfParser::NewElfParser(argv[1]);
+  CHECK(elf != nullptr);
+  CHECK(elf->valid());
+  std::vector<string> needed, rpath;
+  if (!elf->ReadDynamicNeededAndRpath(&needed, &rpath)) {
+    LOG(FATAL) << "ReadDynamicNeededAndRpath";
+  }
+  for (const auto& it : needed) {
+    std::cout << "NEEDED:" << it << std::endl;
+  }
+  for (const auto& it : rpath) {
+    std::cout << "RPATH:" << it << std::endl;
+  }
+  exit(0);
+}
+
+#endif
diff --git a/client/elf_parser.h b/client/elf_parser.h
new file mode 100644
index 0000000..338b20d
--- /dev/null
+++ b/client/elf_parser.h
@@ -0,0 +1,39 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_ELF_PARSER_H_
+#define DEVTOOLS_GOMA_CLIENT_ELF_PARSER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class ElfParser {
+ public:
+  static std::unique_ptr<ElfParser> NewElfParser(const string& filename);
+  virtual ~ElfParser() {}
+  virtual bool valid() const = 0;
+  virtual void UseProgramHeader(bool use_program_header) = 0;
+  virtual bool ReadDynamicNeeded(std::vector<string>* needed) = 0;
+  virtual bool ReadDynamicNeededAndRpath(std::vector<string>* needed,
+                                         std::vector<string>* rpath) = 0;
+
+  static bool IsElf(const string& filename);
+ protected:
+  ElfParser() {}
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ElfParser);
+};
+
+}  // namespace devtools_goma
+
+
+#endif  // DEVTOOLS_GOMA_CLIENT_ELF_PARSER_H_
diff --git a/client/elf_parser_unittest.cc b/client/elf_parser_unittest.cc
new file mode 100644
index 0000000..7a96944
--- /dev/null
+++ b/client/elf_parser_unittest.cc
@@ -0,0 +1,176 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+#include <gtest/gtest.h>
+
+#include "elf_parser.h"
+#include "file_dir.h"
+#include "mypath.h"
+#include "path.h"
+#include "simple_timer.h"
+#include "subprocess.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class ElfParserTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    data_dir_ = file::JoinPath(GetMyDirectory(), "../../test");
+  }
+
+  void GetObjdumpOutput(const string& filename, std::vector<string>* needed) {
+    std::vector<string> argv;
+    argv.push_back("objdump");
+    argv.push_back("-p");
+    argv.push_back(filename);
+    std::vector<string> env;
+    env.push_back("LC_ALL=C");
+    string output = ReadCommandOutputByPopen("objdump", argv, env, ".",
+                                             MERGE_STDOUT_STDERR, nullptr);
+    size_t pos = 0;
+    while ((pos = output.find("NEEDED", pos)) != string::npos) {
+      pos += strlen("NEEDED");
+      while (pos < output.size()) {
+        if (output[pos] != ' ')
+          break;
+        ++pos;
+      }
+      size_t spos = pos;
+      while (pos < output.size()) {
+        if (output[pos] == '\n')
+          break;
+        ++pos;
+      }
+      needed->push_back(output.substr(spos, pos - spos));
+      ++pos;
+      if (output[pos] != ' ')
+        break;
+      ++pos;
+    }
+  }
+
+  string data_dir_;
+};
+
+TEST_F(ElfParserTest, GetObjdumpOutput) {
+  std::vector<string> needed;
+  GetObjdumpOutput(file::JoinPath(data_dir_, "libdl.so"), &needed);
+  EXPECT_EQ(2U, needed.size());
+  EXPECT_EQ("libc.so.6", needed[0]);
+  EXPECT_EQ("ld-linux-x86-64.so.2", needed[1]);
+}
+
+TEST_F(ElfParserTest, ReadDynamicNeeded) {
+  std::unique_ptr<ElfParser> parser(ElfParser::NewElfParser(
+      file::JoinPath(data_dir_, "libdl.so")));
+  ASSERT_TRUE(parser != nullptr);
+  EXPECT_TRUE(parser->valid());
+  std::vector<string> needed;
+  EXPECT_TRUE(parser->ReadDynamicNeeded(&needed));
+  EXPECT_EQ(2U, needed.size());
+  EXPECT_EQ("libc.so.6", needed[0]);
+  EXPECT_EQ("ld-linux-x86-64.so.2", needed[1]);
+}
+
+TEST_F(ElfParserTest, IsElf) {
+  EXPECT_TRUE(ElfParser::IsElf(file::JoinPath(data_dir_, "libdl.so")));
+  EXPECT_FALSE(ElfParser::IsElf(file::JoinPath(data_dir_, "libc.so")));
+}
+
+TEST_F(ElfParserTest, UsrLib) {
+  std::vector<DirEntry> entries;
+  ASSERT_TRUE(ListDirectory("/usr/lib", &entries));
+  int num = 0;
+  SimpleTimer timer;
+  double elf_parser_p_time = 0;
+  double elf_parser_s_time = 0;
+  double objdump_time = 0;
+  for (size_t i = 0; i < entries.size(); ++i) {
+    string name = entries[i].name;
+    if (name == "." || name == "..")
+      continue;
+    string fullname = file::JoinPath("/usr/lib", name);
+    VLOG(1) << fullname;
+    if (fullname.find(".so") == string::npos)
+      continue;
+    struct stat st;
+    if (stat(fullname.c_str(), &st) < 0)
+      continue;
+    if (!S_ISREG(st.st_mode))
+      continue;
+    if (!ElfParser::IsElf(fullname))
+      continue;
+
+    std::vector<string> p_needed;
+    timer.Start();
+    std::unique_ptr<ElfParser> parser(ElfParser::NewElfParser(fullname));
+    ASSERT_TRUE(parser != nullptr) << fullname;
+    EXPECT_TRUE(parser->valid()) << fullname;
+    parser->UseProgramHeader(true);
+    EXPECT_TRUE(parser->ReadDynamicNeeded(&p_needed)) << fullname;
+    elf_parser_p_time += timer.Get();
+
+    std::vector<string> s_needed;
+    timer.Start();
+    parser = ElfParser::NewElfParser(fullname);
+    ASSERT_TRUE(parser != nullptr) << fullname;
+    EXPECT_TRUE(parser->valid()) << fullname;
+    parser->UseProgramHeader(false);
+    EXPECT_TRUE(parser->ReadDynamicNeeded(&s_needed)) << fullname;
+    elf_parser_s_time += timer.Get();
+
+    std::vector<string> expected_needed;
+    timer.Start();
+    GetObjdumpOutput(fullname, &expected_needed);
+    objdump_time += timer.Get();
+
+    EXPECT_EQ(expected_needed, p_needed) << fullname;
+    EXPECT_EQ(expected_needed, s_needed) << fullname;
+    ++num;
+  }
+  EXPECT_GT(num, 0);
+  LOG(INFO) << "check elf files:" << num;
+  LOG(INFO) << "time"
+            << " p:" << elf_parser_p_time
+            << " s:" << elf_parser_s_time
+            << " objdump:" << objdump_time;
+}
+
+TEST_F(ElfParserTest, ReadDynamicNeededAndRpath) {
+  std::vector<string> argv;
+  std::vector<string> env;
+  argv.push_back("gcc");
+  argv.push_back("-xc");
+  argv.push_back("/dev/null");
+  argv.push_back("-shared");
+  argv.push_back("-Wl,-rpath=/lib");
+  argv.push_back("-o");
+  argv.push_back("/tmp/null.so");
+  ReadCommandOutputByPopen("gcc", argv, env, ".", MERGE_STDOUT_STDERR, nullptr);
+
+  std::unique_ptr<ElfParser> parser(ElfParser::NewElfParser("/tmp/null.so"));
+  ASSERT_TRUE(parser != nullptr);
+  EXPECT_TRUE(parser->valid());
+  std::vector<string> needed, rpath;
+  EXPECT_TRUE(parser->ReadDynamicNeededAndRpath(&needed, &rpath));
+  EXPECT_EQ(1U, needed.size());
+  EXPECT_EQ("libc.so.6", needed[0]);
+  EXPECT_EQ(1U, rpath.size());
+  EXPECT_EQ("/lib", rpath[0]);
+}
+
+}  // namespace devtools_goma
diff --git a/client/env_flags.cc b/client/env_flags.cc
new file mode 100644
index 0000000..153c712
--- /dev/null
+++ b/client/env_flags.cc
@@ -0,0 +1,153 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "env_flags.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <map>
+#include <set>
+#include <string>
+#include <sstream>
+
+using std::string;
+
+struct GomaAutoConfigurer {
+  GomaAutoConfigurer(string (*GetConfiguredValue)(void),
+                     void (*SetConfiguredValue)(void))
+      : GetConfiguredValue(GetConfiguredValue),
+        SetConfiguredValue(SetConfiguredValue) {}
+
+  string (*GetConfiguredValue)(void);
+  void (*SetConfiguredValue)(void);
+};
+
+static std::set<string>* g_env_flag_names;
+typedef std::map<string, GomaAutoConfigurer> AutoConfigurerMap;
+static AutoConfigurerMap* g_autoconfigurers;
+
+void RegisterEnvFlag(const char* name) {
+  if (!g_env_flag_names) {
+    g_env_flag_names = new std::set<string>;
+  }
+  if (!g_env_flag_names->insert(name).second) {
+    fprintf(stderr, "%s has registered twice\n", name);
+    exit(1);
+  }
+}
+
+void RegisterEnvAutoConfFlag(const char* name,
+                             string (*GetConfiguredValue)(),
+                             void (*SetConfiguredValue)()) {
+  if (!g_autoconfigurers) {
+    g_autoconfigurers = new AutoConfigurerMap;
+  }
+
+  GomaAutoConfigurer configurer(GetConfiguredValue, SetConfiguredValue);
+
+  if (!g_autoconfigurers->insert(make_pair(string(name), configurer)).second) {
+    fprintf(stderr, "%s has registered twice for autoconf\n", name);
+    exit(1);
+  }
+}
+
+void CheckFlagNames(const char** envp) {
+  bool ok = true;
+  for (int i = 0; envp[i]; i++) {
+    if (strncmp(envp[i], "GOMA_", 5)) {
+      continue;
+    }
+    const char* name_end = strchr(envp[i], '=');
+    assert(name_end);
+    const string name(envp[i] + 5, name_end - envp[i] - 5);
+    if (!g_env_flag_names->count(name)) {
+      fprintf(stderr, "%s: unknown GOMA_ parameter\n", envp[i]);
+      ok = false;
+    }
+  }
+  if (!ok) {
+    exit(1);
+  }
+}
+
+void AutoConfigureFlags(const char** envp) {
+  std::set<string> goma_set_params;
+
+  for (int i = 0; envp[i]; i++) {
+    if (strncmp(envp[i], "GOMA_", 5))
+      continue;
+
+    const char* name_end = strchr(envp[i], '=');
+    assert(name_end);
+    const string name(envp[i] + 5, name_end - envp[i] - 5);
+    goma_set_params.insert(name);
+  }
+
+  for (const auto& it : *g_autoconfigurers) {
+    if (goma_set_params.count(it.first))
+      continue;
+    it.second.SetConfiguredValue();
+  }
+}
+
+void DumpEnvFlag(std::ostringstream* ss) {
+  if (g_env_flag_names == nullptr)
+    return;
+
+  for (const auto& iter : *g_env_flag_names) {
+    const string name = "GOMA_" + iter;
+    char* v = nullptr;
+#ifdef _WIN32
+    _dupenv_s(&v, nullptr, name.c_str());
+#else
+    v = getenv(name.c_str());
+#endif
+    if (v != nullptr) {
+      (*ss) << name << "=" << v << std::endl;
+    } else if (g_autoconfigurers->count(iter)) {
+      (*ss) << name << "="
+            << g_autoconfigurers->find(iter)->second.GetConfiguredValue()
+            << " (auto configured)" << std::endl;
+    }
+  }
+}
+
+#ifdef _WIN32
+string GOMA_EnvToString(const char* envname, const char* dflt) {
+  char* env;
+  if (_dupenv_s(&env, nullptr, envname) == 0 && env != nullptr) {
+    string value = env;
+    free(env);
+    return value;
+  } else {
+    return dflt;
+  }
+}
+
+bool GOMA_EnvToBool(const char* envname, bool dflt) {
+  char* env;
+  if (_dupenv_s(&env, nullptr, envname) == 0 && env != nullptr) {
+    bool value = (memchr("tTyY1\0", env[0], 6) != nullptr);
+    free(env);
+    return value;
+  } else {
+    return dflt;
+  }
+}
+
+int GOMA_EnvToInt(const char* envname, int dflt) {
+  char* env;
+  if (_dupenv_s(&env, nullptr, envname) == 0 && env != nullptr) {
+    int value = strtol(env, nullptr, 10);
+    free(env);
+    return value;
+  } else {
+    return dflt;
+  }
+}
+
+#endif
diff --git a/client/env_flags.h b/client/env_flags.h
new file mode 100644
index 0000000..29f2eb5
--- /dev/null
+++ b/client/env_flags.h
@@ -0,0 +1,121 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_ENV_FLAGS_H_
+#define DEVTOOLS_GOMA_CLIENT_ENV_FLAGS_H_
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+#include <sstream>
+
+void RegisterEnvFlag(const char* name);
+void RegisterEnvAutoConfFlag(const char* name,
+                             std::string (*GetConfiguredValue)(),
+                             void (*SetConfiguredValue)());
+void CheckFlagNames(const char** envp);
+void AutoConfigureFlags(const char** envp);
+void DumpEnvFlag(std::ostringstream* ss);
+
+#ifdef _WIN32
+// MSVS warns the usage of 'getenv'.
+std::string GOMA_EnvToString(const char* envname, const char* dflt);
+bool GOMA_EnvToBool(const char* envname, bool dflt);
+int GOMA_EnvToInt(const char* envname, int dflt);
+
+#else
+// These macros (could be functions, but I don't want to bother with a .cc
+// file), make it easier to initialize flags from the environment.
+
+#define GOMA_EnvToString(envname, dflt)         \
+  (!getenv(envname) ? (dflt) : getenv(envname))
+
+#define GOMA_EnvToBool(envname, dflt)                                   \
+  (!getenv(envname) ? (dflt) : memchr("tTyY1\0", getenv(envname)[0], 6) != NULL)
+
+#define GOMA_EnvToInt(envname, dflt)                                    \
+  (!getenv(envname) ? (dflt) : strtol(getenv(envname), NULL, 10))
+#endif
+
+#define GOMA_REGISTER_FLAG_NAME(name)                                   \
+  struct RegisterEnvFlag##name {                                        \
+    explicit RegisterEnvFlag##name() {                                  \
+      RegisterEnvFlag(#name);                                           \
+    }                                                                   \
+  };                                                                    \
+  RegisterEnvFlag##name g_register_env_flag_##name
+
+#define GOMA_REGISTER_AUTOCONF_FLAG_NAME(name, func)                    \
+  struct RegisterEnvAutoConfFlagSetter##name {                          \
+    static void SetConfiguredValue() {                                  \
+      FLAGS_ ## name = func();                                          \
+    }                                                                   \
+    /* Since we would like to use this kind of method for all types */  \
+    /* (e.g. int, bool, etc.), we chose to return string */             \
+    static std::string GetConfiguredValue() {                           \
+      std::ostringstream ss;                                             \
+      ss << func();                                                     \
+      return ss.str();                                                  \
+    }                                                                   \
+  };                                                                    \
+  struct RegisterEnvAutoConfFlag##name {                                \
+    RegisterEnvAutoConfFlag##name() {                                   \
+      RegisterEnvAutoConfFlag(                                          \
+          #name,                                                        \
+          RegisterEnvAutoConfFlagSetter##name::GetConfiguredValue,      \
+          RegisterEnvAutoConfFlagSetter##name::SetConfiguredValue);     \
+    }                                                                   \
+  };                                                                    \
+  RegisterEnvAutoConfFlag##name g_register_autoconf_flag_##name;        \
+
+
+#define GOMA_DECLARE_VARIABLE(type, name, tn)                           \
+  namespace FLAG__namespace_do_not_use_directly_use_GOMA_DECLARE_##tn##_instead { \
+  extern type FLAGS_##name;                                             \
+  }                                                                     \
+  using FLAG__namespace_do_not_use_directly_use_GOMA_DECLARE_##tn##_instead::FLAGS_##name
+#define GOMA_DEFINE_VARIABLE(type, name, value, meaning, tn)            \
+  namespace FLAG__namespace_do_not_use_directly_use_GOMA_DECLARE_##tn##_instead { \
+  type FLAGS_##name(value);                                             \
+  }                                                                     \
+  using FLAG__namespace_do_not_use_directly_use_GOMA_DECLARE_##tn##_instead::FLAGS_##name;\
+  GOMA_REGISTER_FLAG_NAME(name)
+
+// bool specialization
+#define GOMA_DECLARE_bool(name)                 \
+  GOMA_DECLARE_VARIABLE(bool, name, bool)
+#define GOMA_DEFINE_bool(name, value, meaning)                          \
+  GOMA_DEFINE_VARIABLE(bool, name, GOMA_EnvToBool("GOMA_" #name, value), \
+                       meaning, bool)
+
+typedef int int32;
+
+// int32 specialization
+#define GOMA_DECLARE_int32(name)                \
+  GOMA_DECLARE_VARIABLE(int32, name, int32)
+#define GOMA_DEFINE_int32(name, value, meaning)                         \
+  GOMA_DEFINE_VARIABLE(int32, name, GOMA_EnvToInt("GOMA_" #name, value), \
+                       meaning, int32)
+#define GOMA_DEFINE_AUTOCONF_int32(name, func, meaning) \
+  GOMA_DEFINE_int32(name, 0, meaning); \
+  GOMA_REGISTER_AUTOCONF_FLAG_NAME(name, func)
+
+// Special case for string, because we have to specify the namespace
+// std::string, which doesn't play nicely with our FLAG__namespace hackery.
+#define GOMA_DECLARE_string(name)                                       \
+  namespace FLAG__namespace_do_not_use_directly_use_GOMA_DECLARE_string_instead { \
+    extern  std::string FLAGS_##name;                                   \
+  }                                                                     \
+  using FLAG__namespace_do_not_use_directly_use_GOMA_DECLARE_string_instead::FLAGS_##name
+
+#define GOMA_DEFINE_string(name, value, meaning)                        \
+  namespace FLAG__namespace_do_not_use_directly_use_GOMA_DECLARE_string_instead { \
+  std::string FLAGS_##name(GOMA_EnvToString("GOMA_" #name, value));     \
+  }                                                                     \
+  using FLAG__namespace_do_not_use_directly_use_GOMA_DECLARE_string_instead::FLAGS_##name; \
+  GOMA_REGISTER_FLAG_NAME(name)
+
+#endif  // DEVTOOLS_GOMA_CLIENT_ENV_FLAGS_H_
diff --git a/client/env_flags_unittest.cc b/client/env_flags_unittest.cc
new file mode 100644
index 0000000..1122e50
--- /dev/null
+++ b/client/env_flags_unittest.cc
@@ -0,0 +1,94 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "env_flags.h"
+
+#include <string>
+
+#include "gtest/gtest.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+static const int kInitialValue = 0;
+static const int kAutoConfiguredValue = 72;
+
+static int DefaultIntValueForUnittest() {
+  return kAutoConfiguredValue;
+}
+
+GOMA_DEFINE_AUTOCONF_int32(INTVAL_FOR_UNITTEST,
+                           DefaultIntValueForUnittest,
+                           "For testing only.");
+
+class EnvFlagsTest : public testing::Test {
+  void SetUp() override {
+    // When |envp| does not contain GOMA_INTVAL_FOR_UNITTEST,
+    // AutoConfigureFlags() will set the auto configured value to
+    // FLAGS_INTVAL_FOR_UNITTEST. However, when |envp| contains
+    // GOMA_INTVAL_FOR_UNITTEST, AutoConfigureFlags() does not parse |envp| to
+    // set FLAGS_INTVAL_FOR_UNITTEST, i.e. value in |envp| will be just ignored.
+    // So we have to set an initial value to FLAGS_INTVAL_FOR_UNITTEST here.
+    FLAGS_INTVAL_FOR_UNITTEST = kInitialValue;
+  }
+};
+
+TEST_F(EnvFlagsTest, EmptyEnv) {
+  const char* envp[] = {
+    nullptr
+  };
+  AutoConfigureFlags(envp);
+
+  EXPECT_EQ(kAutoConfiguredValue, FLAGS_INTVAL_FOR_UNITTEST);
+}
+
+TEST_F(EnvFlagsTest, EnvGivenByUser1) {
+  const char* envp[] = {
+    "GOMA_INTVAL_FOR_UNITTEST=0",
+    nullptr
+  };
+  AutoConfigureFlags(envp);
+
+  EXPECT_EQ(kInitialValue, FLAGS_INTVAL_FOR_UNITTEST);
+}
+
+TEST_F(EnvFlagsTest, EnvGivenByUser2) {
+  const char* envp[] = {
+    "GOMA_INTVAL_FOR_UNITTEST=1",
+    nullptr
+  };
+  FLAGS_INTVAL_FOR_UNITTEST = 1;
+  AutoConfigureFlags(envp);
+
+  // Since AutuConfigureFlags does not parse |envp|,
+  // FLAGS_INTVAL_FOR_UNITTEST should still be the same before
+  // calling AutoConfigureFlags.
+  EXPECT_EQ(1, FLAGS_INTVAL_FOR_UNITTEST);
+}
+
+TEST_F(EnvFlagsTest, NoGomaPrefix) {
+  const char* envp[] = {
+    "TEST=0",
+    nullptr
+  };
+  AutoConfigureFlags(envp);
+
+  EXPECT_EQ(kAutoConfiguredValue, FLAGS_INTVAL_FOR_UNITTEST);
+}
+
+TEST_F(EnvFlagsTest, VariousEnv) {
+  const char* envp[] = {
+    "GOMA_PRE=test",
+    "GOMA_INTVAL_FOR_UNITTEST=0",
+    "GOMA_POST=test",
+    nullptr
+  };
+  AutoConfigureFlags(envp);
+
+  EXPECT_EQ(kInitialValue, FLAGS_INTVAL_FOR_UNITTEST);
+}
+
+}  // namespace devtools_goma
diff --git a/client/error_notice.proto b/client/error_notice.proto
new file mode 100644
index 0000000..ce6defe
--- /dev/null
+++ b/client/error_notice.proto
@@ -0,0 +1,67 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+syntax = "proto2";
+
+package devtools_goma;
+
+
+message ErrorNotices {
+  // if we need to change message format, repeated field allows us to return
+  // multiple versions.
+  repeated ErrorNotice notice = 1;
+}
+
+message ErrorNotice {
+  // if gomacc returned non-zero exit status, CompileError is used for
+  // distinguish it is caused by goma error or not.
+  enum CompileError {
+    // set this if gomacc cannot find compiler_proxy
+    COMPILER_PROXY_UNREACHABLE = 1;
+    // set this if compiler_proxy_failures > 0 or
+    // failed to find exact match executable under GOMA_HERMETIC=error.
+    COMPILER_PROXY_FAILURE = 2;
+  }
+  required int32 version = 1;
+  // The value is set only on compile error caused by goma.
+  optional CompileError compile_error = 2;
+
+  // Infrastructure status let the caller know the status of compiler_proxy
+  // and backend.  The caller should judge it safe to continue or not.
+  optional InfraStatus infra_status = 3;
+}
+
+// Message to store goma infrastructure status.
+// It includes status inside compiler_proxy, that of network to goma server,
+// and that in goma server.
+// NEXT ID TO USE: 16
+message InfraStatus {
+  // CompilerInfo
+  optional int32 num_compiler_info_miss = 1;
+  optional int32 num_compiler_info_fail = 2;
+
+  // HttpClient
+  optional int32 ping_status_code = 3;
+  repeated HttpStatusCode http_status_code = 4;
+  optional int32 num_http_sent = 5;
+  optional int32 num_http_active = 6;
+  optional int32 num_http_retry = 7;
+  optional int32 num_http_timeout = 8;
+  optional int32 num_http_error = 9;
+  optional int32 num_network_error = 12;
+  optional int32 num_network_recovered = 13;
+
+  // CompileService
+  optional int32 num_exec_fail_fallback = 10;
+  optional int32 num_exec_compiler_proxy_failure = 11;
+
+  // error.
+  optional int32 num_user_error = 14;
+  optional int32 num_user_warning = 15 [deprecated=true];
+}
+
+// Protobuf message for showing HTTP status code and its count.
+message HttpStatusCode {
+  required int32 status_code = 1;
+  required int32 count = 2;
+}
diff --git a/client/fake_tls_engine.cc b/client/fake_tls_engine.cc
new file mode 100644
index 0000000..41cafd7
--- /dev/null
+++ b/client/fake_tls_engine.cc
@@ -0,0 +1,103 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "fake_tls_engine.h"
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+FakeTLSEngine::~FakeTLSEngine() {
+  if (broken_ != FAKE_TLS_NO_BROKEN)
+    EXPECT_TRUE(execute_broken_);
+}
+
+bool FakeTLSEngine::IsIOPending() const {
+  // Nothing is pending.
+  return false;
+}
+
+int FakeTLSEngine::GetDataToSendTransport(string *data) {
+  if (broken_ == FAKE_TLS_GET_BROKEN) {
+    execute_broken_ = true;
+    return TLSEngine::TLS_ERROR;
+  }
+  data->clear();
+  data->append(buffer_app_to_sock_);
+  buffer_app_to_sock_.clear();
+  return data->size();
+}
+
+size_t FakeTLSEngine::GetBufSizeFromTransport() {
+  return 1024;
+}
+
+int FakeTLSEngine::SetDataFromTransport(const StringPiece& data) {
+  if (broken_ == FAKE_TLS_SET_BROKEN) {
+    execute_broken_ = true;
+    return TLSEngine::TLS_ERROR;
+  }
+  buffer_sock_to_app_.append(string(data));
+  return data.size();
+}
+
+int FakeTLSEngine::Read(void* data, int size) {
+  if (broken_ == FAKE_TLS_READ_BROKEN) {
+    execute_broken_ = true;
+    return TLSEngine::TLS_ERROR;
+  }
+  if (buffer_sock_to_app_.size() == 0)
+    return TLSEngine::TLS_WANT_READ;
+  int copy_size = buffer_sock_to_app_.size() - offset_sock_to_app_;
+  if (max_read_size_ > 0 && copy_size > max_read_size_)
+    copy_size = max_read_size_;
+  if (size < copy_size)
+    copy_size = size;
+  if (copy_size > 0)
+  memmove(data, buffer_sock_to_app_.c_str() + offset_sock_to_app_, copy_size);
+  offset_sock_to_app_ += copy_size;
+  if (buffer_sock_to_app_.size() == offset_sock_to_app_) {
+    buffer_sock_to_app_.clear();
+    offset_sock_to_app_ = 0;
+  }
+  return copy_size;
+}
+
+int FakeTLSEngine::Write(const void* data, int size) {
+  if (broken_ == FAKE_TLS_WRITE_BROKEN) {
+    execute_broken_ = true;
+    return TLSEngine::TLS_ERROR;
+  }
+  buffer_app_to_sock_.append(string(static_cast<const char*>(data), size));
+  return size;
+}
+
+FakeTLSEngineFactory::~FakeTLSEngineFactory() {
+  EXPECT_EQ(sock_, -1);
+  EXPECT_FALSE(tls_engine_);
+}
+
+TLSEngine* FakeTLSEngineFactory::NewTLSEngine(int sock) {
+  if (sock_ == -1) {
+    sock_ = sock;
+    tls_engine_ = new FakeTLSEngine;
+    tls_engine_->SetBroken(broken_);
+    tls_engine_->SetMaxReadSize(max_read_size_);
+  }
+
+  // We should implement more powerful mock if you use more than one socket.
+  EXPECT_EQ(sock, sock_);
+  return tls_engine_;
+}
+
+void FakeTLSEngineFactory::WillCloseSocket(int sock) {
+  EXPECT_NE(sock, -1);
+  EXPECT_EQ(sock, sock_);
+  delete tls_engine_;
+  sock_ = -1;
+  tls_engine_ = nullptr;
+}
+
+}  // namespace devtools_goma
diff --git a/client/fake_tls_engine.h b/client/fake_tls_engine.h
new file mode 100644
index 0000000..3e97a4b
--- /dev/null
+++ b/client/fake_tls_engine.h
@@ -0,0 +1,99 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_FAKE_TLS_ENGINE_H_
+#define DEVTOOLS_GOMA_CLIENT_FAKE_TLS_ENGINE_H_
+
+#include "compiler_specific.h"
+#include "tls_engine.h"
+
+namespace devtools_goma {
+
+// This just pass through transport input to application, and vice-versa.
+// That is why this is called "fake".
+class FakeTLSEngine : public TLSEngine {
+ public:
+  enum FakeTLSEngineBroken {
+    FAKE_TLS_NO_BROKEN = 0,
+    FAKE_TLS_GET_BROKEN = 1,
+    FAKE_TLS_SET_BROKEN = 2,
+    FAKE_TLS_READ_BROKEN = 3,
+    FAKE_TLS_WRITE_BROKEN = 4,
+  };
+  bool IsIOPending() const override;
+
+  int GetDataToSendTransport(string* data) override;
+  size_t GetBufSizeFromTransport() override;
+  int SetDataFromTransport(const StringPiece& data) override;
+
+  // Read and Write return number of read/write bytes if success.
+  // Otherwise, TLSErrorReason.
+  int Read(void* data, int size) override;
+  int Write(const void* data, int size) override;
+
+  string GetLastErrorMessage() const override {
+    return "TLSEngine error message";
+  }
+
+  bool IsRecycled() const override { return is_recycled_; }
+
+ protected:
+  friend class FakeTLSEngineFactory;
+  FakeTLSEngine() :
+    offset_sock_to_app_(0),
+    is_recycled_(false),
+    broken_(FAKE_TLS_NO_BROKEN),
+    execute_broken_(false),
+    max_read_size_(-1) {}
+  ~FakeTLSEngine() override;
+  virtual void SetIsRecycled(bool value) { is_recycled_ = value; }
+  virtual void SetBroken(FakeTLSEngineBroken broken) { broken_ = broken; }
+  virtual void SetMaxReadSize(int size) { max_read_size_ = size; }
+
+ private:
+  string buffer_app_to_sock_;
+  string buffer_sock_to_app_;
+  size_t offset_sock_to_app_;
+  bool is_recycled_;
+  enum FakeTLSEngineBroken broken_;
+  bool execute_broken_;
+  int max_read_size_;
+
+  DISALLOW_COPY_AND_ASSIGN(FakeTLSEngine);
+};
+
+// TLSEngineFactory is synchronized.
+class FakeTLSEngineFactory : public TLSEngineFactory {
+ public:
+  FakeTLSEngineFactory() :
+    sock_(-1), tls_engine_(NULL), broken_(FakeTLSEngine::FAKE_TLS_NO_BROKEN),
+    max_read_size_(-1) {}
+  ~FakeTLSEngineFactory() override;
+  TLSEngine* NewTLSEngine(int sock) override;
+  void WillCloseSocket(int sock) override;
+
+  string GetCertsInfo() override { return certs_info_; }
+  void SetBroken(FakeTLSEngine::FakeTLSEngineBroken broken) {
+    broken_ = broken;
+  }
+  void SetMaxReadSize(int size) {
+    max_read_size_ = size;
+  }
+  // Dummy.
+  void SetHostname(const string& hostname ALLOW_UNUSED) override {}
+
+ private:
+  int sock_;
+  FakeTLSEngine* tls_engine_;
+  string certs_info_;
+  enum FakeTLSEngine::FakeTLSEngineBroken broken_;
+  int max_read_size_;
+
+  DISALLOW_COPY_AND_ASSIGN(FakeTLSEngineFactory);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_FAKE_TLS_ENGINE_H_
diff --git a/client/file_hash_cache.cc b/client/file_hash_cache.cc
new file mode 100644
index 0000000..d56c318
--- /dev/null
+++ b/client/file_hash_cache.cc
@@ -0,0 +1,159 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// Hold file cache state for compiler_proxy
+//
+
+#include <fcntl.h>
+#include <sys/types.h>
+
+#include <sstream>
+#include <string>
+
+#include "atomic_stats_counter.h"
+#include "autolock_timer.h"
+#include "env_flags.h"
+#include "file_hash_cache.h"
+#include "glog/logging.h"
+#include "path.h"
+#include "unordered.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// Returns cache ID if it was found in cache.
+bool FileHashCache::GetFileCacheKey(const string& filename,
+                                    millitime_t missed_timestamp_ms,
+                                    const FileId& file_id,
+                                    string* cache_key) {
+  DCHECK(file::IsAbsolutePath(filename)) << filename;
+  cache_key->clear();
+
+  if (!file_id.IsValid()) {
+    LOG(INFO) << "Clear cache: file_id is invalid: " << filename;
+    AUTO_EXCLUSIVE_LOCK(lock, &file_cache_mutex_);
+    file_cache_.erase(filename);
+    num_stat_error_.Add(1);
+    return false;
+  }
+
+  FileInfo info;
+  {
+    AUTO_SHARED_LOCK(lock, &file_cache_mutex_);
+    unordered_map<string, struct FileInfo>::iterator it =
+        file_cache_.find(filename);
+    if (it == file_cache_.end()) {
+      num_cache_miss_.Add(1);
+      return false;
+    }
+    info = it->second;
+    num_cache_hit_.Add(1);
+  }
+
+  // found in cache.  Verify (reasonably) that it is the one we
+  // are looking for, using lightweight information.
+  if (file_id == info.file_id) {
+    *cache_key = info.cache_key;
+    bool valid = true;
+    if (missed_timestamp_ms != 0) {
+      valid = missed_timestamp_ms <= info.last_uploaded_timestamp_ms;
+      VLOG_IF(2, valid) << "uploaded after missing input request? "
+                        << filename
+                        << " missed=" << missed_timestamp_ms
+                        << " uploaded=" << info.last_uploaded_timestamp_ms;
+    }
+    if (valid && info.last_checked > info.file_id.mtime) {
+      // We are reasonably confident that this was the right
+      // information we found.
+      return true;
+    }
+    VLOG(1) << "might be obsolete cache: " << filename << " " << *cache_key;
+    return false;
+  }
+
+  AUTO_EXCLUSIVE_LOCK(lock, &file_cache_mutex_);
+  LOG(INFO) << "Clear obsolete cache: " << filename << " " << *cache_key;
+  file_cache_.erase(filename);
+  num_clear_obsolete_.Add(1);
+  return false;
+}
+
+// TODO: there is a race condition that if file changed
+// between send and receive, it won't be detected correctly. Fix
+// that later if it's a problem..
+bool FileHashCache::StoreFileCacheKey(
+    const string& filename, const string& cache_key,
+    millitime_t upload_timestamp_ms,
+    const FileId& file_id) {
+  if (!file_id.IsValid()) {
+    LOG(WARNING) << "Try to store, but clear cache: failed taking FileId: "
+                  << filename;
+    // Remove the cache key if it's not found in the cache.
+    AUTO_EXCLUSIVE_LOCK(lock, &file_cache_mutex_);
+    file_cache_.erase(filename);
+    num_clear_cache_.Add(1);
+    // we don't clear cache key from known_cache_keys_, because other file
+    // may have the same cache_key (copied content).
+    return false;
+  }
+
+  {
+    FileInfo info;
+    info.cache_key = cache_key;
+    info.file_id = file_id;
+    info.last_checked = time(nullptr);
+    info.last_uploaded_timestamp_ms = upload_timestamp_ms;
+
+    AUTO_EXCLUSIVE_LOCK(lock, &file_cache_mutex_);
+
+    std::pair<unordered_map<string, struct FileInfo>::iterator, bool> p =
+        file_cache_.insert(make_pair(filename, info));
+    if (!p.second) {
+      if (info.last_uploaded_timestamp_ms == 0) {
+        info.last_uploaded_timestamp_ms =
+            p.first->second.last_uploaded_timestamp_ms;
+      }
+      p.first->second = info;
+    }
+    num_store_cache_.Add(1);
+  }
+
+  AUTO_EXCLUSIVE_LOCK(lock, &known_cache_keys_mutex_);
+  std::pair<unordered_set<string>::iterator, bool> p2 =
+      known_cache_keys_.insert(cache_key);
+  return p2.second;
+}
+
+bool FileHashCache::IsKnownCacheKey(const string& cache_key) {
+  AUTO_SHARED_LOCK(lock, &known_cache_keys_mutex_);
+  return known_cache_keys_.count(cache_key) > 0;
+}
+
+FileHashCache::FileHashCache() {
+}
+
+string FileHashCache::DebugString() {
+  std::stringstream ss;
+  ss << "[GetFileCacheKey]" << std::endl;
+  ss << "cache hit=" << num_cache_hit_.value() << std::endl;
+  ss << "cache miss=" << num_cache_miss_.value() << std::endl;
+  ss << "stat error=" << num_stat_error_.value() << std::endl;
+  ss << "clear obsolete=" << num_clear_obsolete_.value() << std::endl;
+  ss << "[StoreFileCacheKey]" << std::endl;
+  ss << "store cache=" << num_store_cache_.value() << std::endl;
+  ss << "clear cache=" << num_clear_cache_.value() << std::endl << std::endl;
+
+  AUTO_SHARED_LOCK(lock, &file_cache_mutex_);
+  ss << "[file_cache] size=" << file_cache_.size() << std::endl;
+  for (const auto& it : file_cache_) {
+    ss << "filename:" << it.first << " key:" << it.second.cache_key
+       << " file_size:" << it.second.file_id.size
+       << " mtime:" << it.second.file_id.mtime << std::endl;
+  }
+  return ss.str();
+}
+
+}  // namespace devtools_goma
diff --git a/client/file_hash_cache.h b/client/file_hash_cache.h
new file mode 100644
index 0000000..78c1341
--- /dev/null
+++ b/client/file_hash_cache.h
@@ -0,0 +1,106 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_FILE_HASH_CACHE_H_
+#define DEVTOOLS_GOMA_CLIENT_FILE_HASH_CACHE_H_
+
+#include <string>
+
+#include "atomic_stats_counter.h"
+#include "basictypes.h"
+#include "file_id.h"
+#include "lockhelper.h"
+#include "thread_annotations.h"
+#include "timestamp.h"
+#include "unordered.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class FileHashCache {
+ public:
+  FileHashCache();
+
+  // Gets hash code (cache key) of |filename|.
+  // Returns true if it has cache key.
+  // Returns false and *cache_key is not empty, if *cache_key was used for
+  // cache key of the file but is not sure in some race condition because
+  // mtime granularity is second.
+  //   X.xx sec: last checked, hash_key is H1.
+  //   X.yy sec: file is modified.
+  //   X.zz sec: check the cache. mtime is X, the same as X.xx sec,
+  //             but hash key might be H1 (not modified at X.yy)
+  //             or might not be H1 (modified at X.yy)
+  // If |filename| exists and |missed_timestamp_ms| is not 0, cache_key will
+  // be valid if |missed_timestamp_ms| <= |last_uploaded_timestamp_ms|.
+  // cache_key will be invalidated if |missed_timestamp_ms| >
+  // |last_uploaded_timestamp_ms|.
+  // FileId for |filename| is |file_id|.
+  // We don't take |file_id_cache| ownership.
+  // If |missed_timestamp_ms| is 0, this check won't be performed.
+  // Returns false and *cache_key is empty if it doesn't know cache key of the
+  // file at all.
+  bool GetFileCacheKey(const string& filename,
+                       millitime_t missed_timestamp_ms,
+                       const FileId& file_id,
+                       string* cache_key);
+
+  // Stores hash code (cache key) of |filename|.
+  // |upload_timestamp_ms| is upload time or download time of the file
+  // in milliseconds.
+  // Please set 0LL if you do not upload or download the file. It preserves
+  // last_uploaded_timestamp_ms.
+  // |file_id| is a FileId of |filename|.
+  // If |file_id| is invalid, it clears the cache_key of the filename,
+  // and returns false.
+  // Returns true if the cache_key is the first used in FileCacheKey.
+  // Returns false if the cache_key was used before or |file_id| is invalid.
+  bool StoreFileCacheKey(const string &filename, const string& cache_key,
+                         millitime_t upload_timestamp_ms,
+                         const FileId& file_id);
+
+  bool IsKnownCacheKey(const string& cache_key);
+
+  string DebugString();
+
+ private:
+  struct FileInfo {
+    string cache_key;
+    FileId file_id;
+    // time when hash key was stored in cache.
+    // FileInfo represents valid hash key of local file if mtime < last_checked.
+    time_t last_checked;
+
+    // time when file content was uploaded to backend, or downloaded from
+    // backend.
+    // we could assume the file has been in remote cache and use hash_key
+    // at time t if last_uploaded_timestamp_ms != 0 &&
+    // t > last_uploaded_timestamp_ms.
+    millitime_t last_uploaded_timestamp_ms;
+  };
+
+  // A map from filename to file cache info.
+  ReadWriteLock file_cache_mutex_;
+  unordered_map<string, struct FileInfo> file_cache_
+    GUARDED_BY(file_cache_mutex_);
+
+  // A set of cache keys that have been stored, so we could believe a cache_key
+  // in this set is in goma cache.
+  ReadWriteLock known_cache_keys_mutex_;
+  unordered_set<string> known_cache_keys_ GUARDED_BY(known_cache_keys_mutex_);
+
+  StatsCounter num_cache_hit_;
+  StatsCounter num_cache_miss_;
+  StatsCounter num_stat_error_;
+  StatsCounter num_clear_obsolete_;
+  StatsCounter num_store_cache_;
+  StatsCounter num_clear_cache_;
+
+  DISALLOW_COPY_AND_ASSIGN(FileHashCache);
+};
+
+}  // namespace devtools_goma
+#endif  // DEVTOOLS_GOMA_CLIENT_FILE_HASH_CACHE_H_
diff --git a/client/file_id.cc b/client/file_id.cc
new file mode 100644
index 0000000..fc5fd85
--- /dev/null
+++ b/client/file_id.cc
@@ -0,0 +1,119 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "file_id.h"
+
+#include <sys/stat.h>
+#include <sstream>
+
+#ifndef _WIN32
+#include <sys/types.h>
+#include <unistd.h>
+#else
+#include "filetime_win.h"
+#endif
+#include "counterz.h"
+#include "glog/logging.h"
+#include "scoped_fd.h"
+
+#ifdef _WIN32
+namespace {
+
+bool InitFromInfo(const BY_HANDLE_FILE_INFORMATION& info,
+                  devtools_goma::FileId* file_id) {
+  if (info.nFileSizeHigh != 0) {
+    LOG(ERROR) << "Goma won't handle a file whose size is larger than 4 GB.";
+    return false;
+  }
+
+  if (info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+    file_id->is_directory = true;
+  }
+
+  file_id->size = static_cast<off_t>(info.nFileSizeLow);
+  file_id->mtime =
+      devtools_goma::ConvertFiletimeToUnixTime(info.ftLastWriteTime);
+  file_id->volume_serial_number = info.dwVolumeSerialNumber;
+  file_id->file_index_high = info.nFileIndexHigh;
+  file_id->file_index_low = info.nFileIndexLow;
+  return true;
+}
+
+}  // namespace
+#endif
+
+namespace devtools_goma {
+
+const off_t FileId::kInvalidFileSize = -1;
+
+FileId::FileId(const string& filename)
+  :
+#ifdef _WIN32
+    volume_serial_number(0), file_index_high(0), file_index_low(0),
+#else
+    dev(0), inode(0),
+#endif
+    mtime(0), size(kInvalidFileSize),
+    is_directory(false) {
+  GOMA_COUNTERZ("FileId");
+#ifndef _WIN32
+  struct stat stat_buf;
+  if (stat(filename.c_str(), &stat_buf) == 0) {
+    InitFromStat(stat_buf);
+  }
+#else
+  // See: https://msdn.microsoft.com/en-us/library/aa363788(v=vs.85).aspx
+  BY_HANDLE_FILE_INFORMATION fileinfo;
+  ScopedFd fd(ScopedFd::OpenForStat(filename));
+  if (fd.valid() && GetFileInformationByHandle(fd.handle(), &fileinfo)) {
+    if (!InitFromInfo(fileinfo, this)) {
+      LOG(WARNING) << "Error in init file id."
+                   << " filename=" << filename;
+    }
+  }
+#endif
+}
+
+#ifndef _WIN32
+void FileId::InitFromStat(const struct stat& stat_buf) {
+  dev = stat_buf.st_dev;
+  inode = stat_buf.st_ino;
+  mtime = stat_buf.st_mtime;
+  size = stat_buf.st_size;
+  is_directory = S_ISDIR(stat_buf.st_mode);
+}
+#endif
+
+bool FileId::IsValid() const {
+  return size != kInvalidFileSize;
+}
+
+bool FileId::CanBeNewerThan(const FileId& old, time_t last_checked) const {
+  // If mtime >= last_checked - 1, the file might be updated within
+  // the same second. We need to re-check the file for this case, too.
+  // The minus one is for VMs, where mtime can delay 1 second.
+  return mtime >= last_checked - 1 || *this != old;
+}
+
+std::string FileId::DebugString() const {
+  std::stringstream ss;
+  ss << "{";
+#ifdef _WIN32
+  ss << "volume_serial_number=" << volume_serial_number;
+  ss << " file_index_high=" << file_index_high;
+  ss << " file_index_low=" << file_index_low;
+#else
+  ss << "dev=" << dev;
+  ss << " inode=" << inode;
+#endif
+
+  ss << " mtime=" << mtime;
+  ss << " size=" << size;
+  ss << " is_directory=" << is_directory;
+  ss << "}";
+  return ss.str();
+}
+
+}  // namespace devtools_goma
diff --git a/client/file_id.h b/client/file_id.h
new file mode 100644
index 0000000..ce6c727
--- /dev/null
+++ b/client/file_id.h
@@ -0,0 +1,83 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_FILE_ID_H_
+#define DEVTOOLS_GOMA_CLIENT_FILE_ID_H_
+
+#include <time.h>
+#ifndef _WIN32
+#include <sys/stat.h>
+#else
+#include "config_win.h"
+#endif
+
+#include <string>
+
+using std::string;
+
+namespace devtools_goma {
+
+// A helper class to check if a file is updated.
+//
+// Note: please also update compiler_info_data protobuf.
+// FileId is used for detecting update of compilers/subprograms.
+struct FileId {
+  static const off_t kInvalidFileSize;
+  FileId() :
+#ifdef _WIN32
+      volume_serial_number(0), file_index_high(0), file_index_low(0),
+#else
+      dev(0), inode(0),
+#endif
+      mtime(0), size(kInvalidFileSize),
+      is_directory(false) {}
+  explicit FileId(const string& filename);
+
+  bool IsValid() const;
+  bool CanBeNewerThan(const FileId& old, time_t last_checked) const;
+
+  std::string DebugString() const;
+
+  bool operator==(const FileId& other) const {
+    return
+#ifdef _WIN32
+        volume_serial_number == other.volume_serial_number &&
+        file_index_high == other.file_index_high &&
+        file_index_low == other.file_index_low &&
+#else
+        dev == other.dev && inode == other.inode &&
+#endif
+        mtime == other.mtime && size == other.size &&
+        is_directory == other.is_directory;
+  }
+
+  bool operator!=(const FileId& other) const {
+    return !(*this == other);
+  }
+
+#ifdef _WIN32
+  DWORD volume_serial_number;
+
+  // 64bit FileIndex is not guaranteed to be unique in ReFS file system
+  // introduced with Windows Server 2012.
+  DWORD file_index_high;
+  DWORD file_index_low;
+#else
+  dev_t dev;
+  ino_t inode;
+#endif
+  time_t mtime;
+  off_t size;
+  bool is_directory;
+
+ private:
+#ifndef _WIN32
+  void InitFromStat(const struct stat& stat_buf);
+#endif
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_FILE_ID_H_
diff --git a/client/file_id_cache.cc b/client/file_id_cache.cc
new file mode 100644
index 0000000..9944af0
--- /dev/null
+++ b/client/file_id_cache.cc
@@ -0,0 +1,108 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "file_id_cache.h"
+
+#include <string>
+
+#include <glog/logging.h>
+
+#include "autolock_timer.h"
+#include "path.h"
+#include "unordered.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// TODO: Add stats.
+
+FileId GlobalFileIdCache::Get(const string& path) {
+  {
+    AUTO_SHARED_LOCK(lock, &mu_);
+    auto it = file_ids_.find(path);
+    if (it != file_ids_.end()) {
+      return it->second;
+    }
+  }
+
+  FileId id(path);
+  if (!id.IsValid() || id.is_directory) {
+    return id;
+  }
+
+  {
+    AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+    file_ids_.emplace(path, id);
+  }
+  return id;
+}
+
+GlobalFileIdCache* GlobalFileIdCache::instance_ = nullptr;
+
+/* static */
+void GlobalFileIdCache::Init() {
+  CHECK(instance_ == nullptr);
+  instance_ = new GlobalFileIdCache;
+}
+
+/* static */
+void GlobalFileIdCache::Quit() {
+  CHECK(instance_ != nullptr);
+  delete instance_;
+  instance_ = nullptr;
+}
+
+/* static */
+GlobalFileIdCache* GlobalFileIdCache::Instance() {
+  return instance_;
+}
+
+FileIdCache::FileIdCache()
+    : is_acquired_(true), owner_thread_id_(GetCurrentThreadId()) {
+}
+
+FileIdCache::~FileIdCache() {
+  DCHECK(!is_acquired_ || THREAD_ID_IS_SELF(owner_thread_id_));
+}
+
+FileId FileIdCache::Get(const string& filename) {
+  DCHECK(is_acquired_ && THREAD_ID_IS_SELF(owner_thread_id_));
+  DCHECK(file::IsAbsolutePath(filename)) << filename;
+
+  FileIdMap::iterator iter = file_ids_.find(filename);
+  if (iter != file_ids_.end())
+    return iter->second;
+
+  FileId id;
+
+  if (GlobalFileIdCache::Instance() != nullptr) {
+    id = GlobalFileIdCache::Instance()->Get(filename);
+  } else {
+    id = FileId(filename);
+  }
+
+  file_ids_.insert(std::make_pair(filename, id));
+
+  return id;
+}
+
+void FileIdCache::Clear() {
+  DCHECK(is_acquired_ && THREAD_ID_IS_SELF(owner_thread_id_));
+  file_ids_.clear();
+}
+
+void FileIdCache::AcquireOwner() {
+  DCHECK(!is_acquired_);
+  is_acquired_ = true;
+  owner_thread_id_ = GetCurrentThreadId();
+}
+
+void FileIdCache::ReleaseOwner() {
+  DCHECK(is_acquired_ && THREAD_ID_IS_SELF(owner_thread_id_));
+  is_acquired_ = false;
+}
+
+}  // namespace devtools_goma
diff --git a/client/file_id_cache.h b/client/file_id_cache.h
new file mode 100644
index 0000000..2bf0a04
--- /dev/null
+++ b/client/file_id_cache.h
@@ -0,0 +1,76 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_FILE_ID_CACHE_H_
+#define DEVTOOLS_GOMA_CLIENT_FILE_ID_CACHE_H_
+
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include "basictypes.h"
+#include "file_id.h"
+#include "lockhelper.h"
+#include "platform_thread.h"
+#include "thread_annotations.h"
+#include "unordered.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// GlobalFileIdCache caches FileIds globally.
+// This only holds valid and non-directory FileIds.
+// The instance of this class is thread-safe.
+class GlobalFileIdCache {
+ public:
+  FileId Get(const string& path);
+
+  static void Init();
+  static void Quit();
+  static GlobalFileIdCache* Instance();
+
+ private:
+  ReadWriteLock mu_;
+  std::unordered_map<string, FileId> file_ids_ GUARDED_BY(mu_);
+
+  static GlobalFileIdCache* instance_;
+};
+
+// FileIdCache caches FileIds.
+// Instance of this class is thread-unsafe.
+class FileIdCache {
+ public:
+  FileIdCache();
+  ~FileIdCache();
+
+  // Returns FileId cache if any. If not, we create FileId for |filename|.
+  FileId Get(const string& filename);
+
+  // Clears all caches.
+  void Clear();
+
+  // Caller thread takes ownership of the instance of FileIdCache.
+  void AcquireOwner();
+
+  // Caller thread releases ownership of the instance of FileIdCache.
+  void ReleaseOwner();
+
+  friend class DepsCacheTest;
+
+ private:
+  typedef unordered_map<string, FileId> FileIdMap;
+
+  bool is_acquired_;
+  PlatformThreadId owner_thread_id_;
+
+  FileIdMap file_ids_;
+
+  DISALLOW_COPY_AND_ASSIGN(FileIdCache);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_FILE_ID_CACHE_H_
diff --git a/client/filename_id_table.cc b/client/filename_id_table.cc
new file mode 100644
index 0000000..81d6288
--- /dev/null
+++ b/client/filename_id_table.cc
@@ -0,0 +1,142 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "filename_id_table.h"
+
+#include <algorithm>
+
+#include "prototmp/deps_cache_data.pb.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+const FilenameIdTable::Id FilenameIdTable::kInvalidId = -1;
+
+FilenameIdTable::FilenameIdTable() :
+    next_available_id_(0) {
+}
+
+size_t FilenameIdTable::Size() const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  return map_to_filename_.size();
+}
+
+void FilenameIdTable::Clear() {
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+  ClearUnlocked();
+}
+
+void FilenameIdTable::ClearUnlocked() {
+  map_to_filename_.clear();
+  map_to_id_.clear();
+  next_available_id_ = 0;
+}
+
+bool FilenameIdTable::LoadFrom(const GomaFilenameIdTable& table,
+                               unordered_set<FilenameIdTable::Id>* valid_ids) {
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+
+  for (const auto& record : table.record()) {
+    if (!InsertEntryUnlocked(record.filename(), record.filename_id())) {
+      LOG(WARNING) << "Invalid filename_id entry detected: "
+                   << record.filename() << " " << record.filename_id();
+      ClearUnlocked();
+      if (valid_ids) {
+        valid_ids->clear();
+      }
+      return false;
+    }
+
+    if (valid_ids)
+      valid_ids->insert(record.filename_id());
+  }
+
+  return true;
+}
+
+void FilenameIdTable::SaveTo(const std::set<FilenameIdTable::Id>& ids,
+                             GomaFilenameIdTable* table) const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+
+  for (const auto& entry : map_to_filename_) {
+    FilenameIdTable::Id id = entry.first;
+    const string& filename = entry.second;
+
+    if (!ids.count(id))
+      continue;
+
+    GomaFilenameIdTableRecord* record = table->add_record();
+    record->set_filename_id(id);
+    record->set_filename(filename);
+  }
+}
+
+bool FilenameIdTable::InsertEntryUnlocked(const string& filename,
+                                           FilenameIdTable::Id id) {
+  if (id < 0 || filename.empty())
+    return false;
+
+  auto it_to_filename = map_to_filename_.find(id);
+  if (it_to_filename != map_to_filename_.end() &&
+      it_to_filename->second != filename) {
+    return false;
+  }
+
+  auto it_to_id = map_to_id_.find(filename);
+  if (it_to_id != map_to_id_.end() && it_to_id->second != id)
+    return false;
+
+  map_to_filename_[id] = filename;
+  map_to_id_[filename] = id;
+  next_available_id_ = std::max(next_available_id_, id + 1);
+  return true;
+}
+
+FilenameIdTable::Id FilenameIdTable::InsertFilename(const string& filename) {
+  if (filename.empty())
+    return kInvalidId;
+
+  {
+    AUTO_SHARED_LOCK(lock, &mu_);
+    Id id = LookupIdUnlocked(filename);
+    if (id != kInvalidId) {
+      return id;
+    }
+  }
+
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+  Id id = LookupIdUnlocked(filename);
+  if (id != kInvalidId) {
+    return id;
+  }
+
+  map_to_id_[filename] = next_available_id_;
+  map_to_filename_[next_available_id_] = filename;
+  return next_available_id_++;
+}
+
+FilenameIdTable::Id FilenameIdTable::LookupIdUnlocked(
+    const string& filename) const {
+  auto it = map_to_id_.find(filename);
+  if (it == map_to_id_.end())
+    return kInvalidId;
+  return it->second;
+}
+
+string FilenameIdTable::ToFilename(FilenameIdTable::Id id) const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  auto it = map_to_filename_.find(id);
+  if (it == map_to_filename_.end())
+    return string();
+  return it->second;
+}
+
+FilenameIdTable::Id FilenameIdTable::ToId(const string& filename) const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  return LookupIdUnlocked(filename);
+}
+
+}  // namespace devtools_goma
diff --git a/client/filename_id_table.h b/client/filename_id_table.h
new file mode 100644
index 0000000..6eb9c1c
--- /dev/null
+++ b/client/filename_id_table.h
@@ -0,0 +1,76 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_FILENAME_ID_TABLE_H_
+#define DEVTOOLS_GOMA_CLIENT_FILENAME_ID_TABLE_H_
+
+#include <set>
+#include <string>
+
+#include "autolock_timer.h"
+#include "unordered.h"
+
+namespace devtools_goma {
+
+class GomaFilenameIdTable;
+
+// FilenameIdTable converts filepath <-> integer id.
+// The instance of this class is thread-safe.
+class FilenameIdTable {
+ public:
+  typedef int Id;
+
+  static const Id kInvalidId;
+
+  FilenameIdTable();
+
+  size_t Size() const;
+
+  // Clears all data.
+  void Clear();
+
+  // Loads the data from |table|. If loading failed (because of duplicated
+  // entry etc.), false will be returned.
+  // |valid_ids| will contain all the valid ids if not null.
+  bool LoadFrom(const GomaFilenameIdTable& table,
+                unordered_set<FilenameIdTable::Id>* valid_ids);
+  // Saves the data to |table|. Only entry that has |ids| will be saved.
+  void SaveTo(const std::set<Id>& ids, GomaFilenameIdTable* table) const;
+
+  // Inserts |filename|.
+  // If |filename| is a new one, a new Id will be returned.
+  // If |filename| is already inserted, the corresponding Id is returned.
+  // If |filename| is empty, kInvalidId is returned.
+  Id InsertFilename(const std::string& filaname);
+
+  // Converts |id| to filaname. If |id| is not registered, empty string will
+  // be returned.
+  std::string ToFilename(Id id) const;
+
+  // Converts |filename| to Id. If |filename| is not registered,
+  // kInvalidId is returned.
+  Id ToId(const std::string& filename) const;
+
+ private:
+  bool InsertEntryUnlocked(const std::string& filename, Id id)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  void ClearUnlocked()
+      EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  Id LookupIdUnlocked(const std::string& filename) const
+      SHARED_LOCKS_REQUIRED(mu_);
+
+  ReadWriteLock mu_;
+  Id next_available_id_ GUARDED_BY(mu_);
+  unordered_map<Id, std::string> map_to_filename_ GUARDED_BY(mu_);
+  unordered_map<std::string, Id> map_to_id_ GUARDED_BY(mu_);
+
+  DISALLOW_COPY_AND_ASSIGN(FilenameIdTable);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_FILENAME_ID_TABLE_H_
diff --git a/client/filename_id_table_unittest.cc b/client/filename_id_table_unittest.cc
new file mode 100644
index 0000000..8b812fc
--- /dev/null
+++ b/client/filename_id_table_unittest.cc
@@ -0,0 +1,154 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "filename_id_table.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "prototmp/deps_cache_data.pb.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+TEST(FilenameIdTableTest, SaveLoad) {
+  FilenameIdTable table;
+  FilenameIdTable::Id id_a = table.InsertFilename("a");
+  FilenameIdTable::Id id_b = table.InsertFilename("b");
+  FilenameIdTable::Id id_c = table.InsertFilename("c");
+
+  // Saves only 'a' and 'b'.
+  std::set<FilenameIdTable::Id> ids;
+  ids.insert(id_a);
+  ids.insert(id_b);
+
+  GomaFilenameIdTable goma_table;
+  table.SaveTo(ids, &goma_table);
+  table.Clear();
+
+  EXPECT_EQ(FilenameIdTable::kInvalidId, table.ToId("a"));
+
+  unordered_set<FilenameIdTable::Id> valid_ids;
+  table.LoadFrom(goma_table, &valid_ids);
+
+  EXPECT_EQ(id_a, table.ToId("a"));
+  EXPECT_EQ(id_b, table.ToId("b"));
+  // 'c' is not saved, so kInvalidId should be returned.
+  EXPECT_EQ(FilenameIdTable::kInvalidId, table.ToId("c"));
+
+  // id_a, and id_b should be valid. However, since id_c was removed,
+  // it shouldn't be valid.
+  EXPECT_GT(valid_ids.count(id_a), 0U);
+  EXPECT_GT(valid_ids.count(id_b), 0U);
+  EXPECT_EQ(valid_ids.count(id_c), 0U);
+}
+
+TEST(FilenameIdTableTest, LoadFailedDuplicateId) {
+  FilenameIdTable table;
+
+  GomaFilenameIdTable goma_table;
+  GomaFilenameIdTableRecord* record = goma_table.add_record();
+  record->set_filename("a");
+  record->set_filename_id(0);
+
+  record = goma_table.add_record();
+  record->set_filename("b");
+  record->set_filename_id(0);
+
+  unordered_set<FilenameIdTable::Id> valid_ids;
+  EXPECT_FALSE(table.LoadFrom(goma_table, &valid_ids));
+
+  EXPECT_TRUE(valid_ids.empty());
+}
+
+TEST(FilenameIdTableTest, LoadFailedDuplicateFilename) {
+  FilenameIdTable table;
+
+  GomaFilenameIdTable goma_table;
+  GomaFilenameIdTableRecord* record = goma_table.add_record();
+  record->set_filename("a");
+  record->set_filename_id(0);
+
+  record = goma_table.add_record();
+  record->set_filename("a");
+  record->set_filename_id(1);
+
+  unordered_set<FilenameIdTable::Id> valid_ids;
+  EXPECT_FALSE(table.LoadFrom(goma_table, &valid_ids));
+
+  EXPECT_TRUE(valid_ids.empty());
+}
+
+TEST(FilenameIdTableTest, Clear) {
+  FilenameIdTable table;
+  FilenameIdTable::Id id_a = table.InsertFilename("a");
+
+  table.Clear();
+
+  EXPECT_EQ("", table.ToFilename(id_a));
+  EXPECT_EQ(FilenameIdTable::kInvalidId, table.ToId("a"));
+}
+
+TEST(FilenameIdTableTest, InsertFilename) {
+  FilenameIdTable table;
+  EXPECT_EQ(0, table.InsertFilename("a.cc"));
+  EXPECT_EQ(1, table.InsertFilename("b.cc"));
+  EXPECT_EQ(2, table.InsertFilename("c.cc"));
+
+  // OK to insert the same filename again.
+  EXPECT_EQ(0, table.InsertFilename("a.cc"));
+  EXPECT_EQ(1, table.InsertFilename("b.cc"));
+  EXPECT_EQ(2, table.InsertFilename("c.cc"));
+
+  // empty string cannot be inserted.
+  EXPECT_EQ(FilenameIdTable::kInvalidId, table.InsertFilename(""));
+}
+
+TEST(FilenameIdTableTest, DontNormalize) {
+  FilenameIdTable table;
+  FilenameIdTable::Id a = table.InsertFilename("/tmp/a");
+  FilenameIdTable::Id b = table.InsertFilename("/tmp/a/../a");
+  FilenameIdTable::Id c = table.InsertFilename("/tmp/a/../../tmp/a");
+
+  EXPECT_NE(a, b);
+  EXPECT_NE(b, c);
+  EXPECT_NE(c, a);
+
+  EXPECT_EQ("/tmp/a", table.ToFilename(a));
+  EXPECT_EQ("/tmp/a/../a", table.ToFilename(b));
+  EXPECT_EQ("/tmp/a/../../tmp/a", table.ToFilename(c));
+}
+
+TEST(FilenameIdTableTest, ToFilename) {
+  FilenameIdTable table;
+  FilenameIdTable::Id id_a = table.InsertFilename("a.cc");
+  FilenameIdTable::Id id_b = table.InsertFilename("b.cc");
+  FilenameIdTable::Id id_c = table.InsertFilename("c.cc");
+
+  EXPECT_EQ("a.cc", table.ToFilename(id_a));
+  EXPECT_EQ("b.cc", table.ToFilename(id_b));
+  EXPECT_EQ("c.cc", table.ToFilename(id_c));
+
+  EXPECT_EQ("", table.ToFilename(100));
+  EXPECT_EQ("", table.ToFilename(200));
+  EXPECT_EQ("", table.ToFilename(FilenameIdTable::kInvalidId));
+}
+
+TEST(FilenameIdTableTest, ToId) {
+  FilenameIdTable table;
+  FilenameIdTable::Id id_a = table.InsertFilename("a.cc");
+  FilenameIdTable::Id id_b = table.InsertFilename("b.cc");
+  FilenameIdTable::Id id_c = table.InsertFilename("c.cc");
+
+  EXPECT_EQ(id_a, table.ToId("a.cc"));
+  EXPECT_EQ(id_b, table.ToId("b.cc"));
+  EXPECT_EQ(id_c, table.ToId("c.cc"));
+
+  EXPECT_EQ(FilenameIdTable::kInvalidId, table.ToId("d.cc"));
+  EXPECT_EQ(FilenameIdTable::kInvalidId, table.ToId(""));
+}
+
+}  // namespace devtools_goma
diff --git a/client/filetime_win.cc b/client/filetime_win.cc
new file mode 100644
index 0000000..8b1fb8d
--- /dev/null
+++ b/client/filetime_win.cc
@@ -0,0 +1,18 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "filetime_win.h"
+
+namespace devtools_goma {
+
+time_t ConvertFiletimeToUnixTime(const FILETIME& filetime) {
+  ULARGE_INTEGER ull;
+    ull.LowPart = filetime.dwLowDateTime;
+    ull.HighPart = filetime.dwHighDateTime;
+    return (ull.QuadPart / PRECISION_DIVIDER)
+        - (DELTA_EPOCH_IN_MICROSECS / 1000000);  // time_t is in seconds.
+}
+
+}  // namespace devtools_goma
diff --git a/client/filetime_win.h b/client/filetime_win.h
new file mode 100644
index 0000000..e077a60
--- /dev/null
+++ b/client/filetime_win.h
@@ -0,0 +1,26 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_FILETIME_WIN_H_
+#define DEVTOOLS_GOMA_CLIENT_FILETIME_WIN_H_
+
+#ifndef _WIN32
+#error Win32 only
+#endif
+
+#include <ctime>
+
+#include "config_win.h"
+
+namespace devtools_goma {
+
+// FILETIME contains a 64-bit value representing the number of 100-nanosecond
+// intervals since January 1, 1601 (UTC).
+// time_t is the number of seconds since Januray 1, 1970 (UTC).
+time_t ConvertFiletimeToUnixTime(const FILETIME& filetime);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_FILETIME_WIN_H_
diff --git a/client/framework_path_resolver.cc b/client/framework_path_resolver.cc
new file mode 100644
index 0000000..c152a58
--- /dev/null
+++ b/client/framework_path_resolver.cc
@@ -0,0 +1,101 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "framework_path_resolver.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include <iterator>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+
+#include "path.h"
+
+#ifdef _WIN32
+# include "posix_helper_win.h"
+#endif
+
+using std::string;
+
+namespace {
+static const char* kFrameworkSuffix = ".framework";
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+FrameworkPathResolver::FrameworkPathResolver(const string& cwd)
+    : cwd_(cwd) {
+#ifdef __MACH__
+  default_searchpaths_.push_back("/Library/Frameworks");
+  default_searchpaths_.push_back("/System/Library/Frameworks");
+#endif
+}
+
+string FrameworkPathResolver::FrameworkFile(
+    const string& syslibroot,
+    const string& dirname,
+    const string& name,
+    const std::vector<string>& candidates) const {
+  const string path =
+      file::JoinPath(syslibroot,
+          file::JoinPathRespectAbsolute(
+              file::JoinPathRespectAbsolute(cwd_, dirname),
+              name + kFrameworkSuffix));
+
+  for (const auto& candidate : candidates) {
+    const string filename = file::JoinPath(path, candidate);
+    VLOG(2) << "check:" << filename;
+    if (access(filename.c_str(), R_OK) == 0) {
+      return filename;
+    }
+  }
+  return "";
+}
+
+// -framework name[.suffix] to filename.
+string FrameworkPathResolver::ExpandFrameworkPath(
+    const string& framework) const {
+  std::vector<string> candidates;
+  string name = framework;
+  size_t found = framework.find_first_of(',');
+  if (found != string::npos) {
+    // -framework name[,suffix] to try name.framework/name_suffix,
+    // then name.framework/name.
+    name = framework.substr(0, found);
+    const string suffix = framework.substr(found + 1);
+    candidates.push_back(name + "_" + suffix);
+    candidates.push_back(name);
+  } else {
+    candidates.push_back(framework);
+  }
+
+  for (const auto& path : searchpaths_) {
+    const string file = FrameworkFile("", path, name, candidates);
+    if (!file.empty()) {
+      return file;
+    }
+  }
+
+  for (const auto& path : default_searchpaths_) {
+    const string file = FrameworkFile(syslibroot_, path, name, candidates);
+    if (!file.empty()) {
+      return file;
+    }
+  }
+
+  return "";
+}
+
+void FrameworkPathResolver::AppendSearchpaths(
+    const std::vector<string>& searchpaths) {
+  copy(searchpaths.begin(), searchpaths.end(), back_inserter(searchpaths_));
+}
+
+}  // namespace devtools_goma
diff --git a/client/framework_path_resolver.h b/client/framework_path_resolver.h
new file mode 100644
index 0000000..74ee68c
--- /dev/null
+++ b/client/framework_path_resolver.h
@@ -0,0 +1,45 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_FRAMEWORK_PATH_RESOLVER_H_
+#define DEVTOOLS_GOMA_CLIENT_FRAMEWORK_PATH_RESOLVER_H_
+
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class FrameworkPathResolver {
+ public:
+  explicit FrameworkPathResolver(const string& cwd);
+  ~FrameworkPathResolver() {}
+
+  // Returns list of files in the framework.
+  string ExpandFrameworkPath(const string& framework) const;
+  void SetSyslibroot(const string& syslibroot) {
+    syslibroot_ = syslibroot;
+  }
+  void AppendSearchpaths(const std::vector<string>& searchpaths);
+
+ private:
+  string FrameworkFile(const string& syslibroot, const string& dirname,
+                       const string& name,
+                       const std::vector<string>& candidates) const;
+
+  const string cwd_;
+  string syslibroot_;
+  std::vector<string> searchpaths_;
+  std::vector<string> default_searchpaths_;
+
+  DISALLOW_COPY_AND_ASSIGN(FrameworkPathResolver);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_FRAMEWORK_PATH_RESOLVER_H_
diff --git a/client/genc.py b/client/genc.py
new file mode 100755
index 0000000..29a9030
--- /dev/null
+++ b/client/genc.py
@@ -0,0 +1,89 @@
+#!/usr/bin/python
+#
+# Copyright 2010 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generate C source file to embed given file.
+
+Usage:
+ % genc.py file.txt # generates file.c and file.h
+"""
+
+
+
+import optparse
+import os
+import os.path
+import re
+import sys
+
+def writeToEscaping(dst, src):
+  count = 0
+  for c in src.read():
+    v = ord(c)
+    if v > 127:
+      v = -(256 - v)
+    dst.write('%3d, ' % v)
+    count += 1
+    if count >= 16:
+      dst.write('\n')
+      count = 0
+
+def main():
+  parser = optparse.OptionParser()
+  parser.add_option('-o', '--out-dir', default='.',
+                    help='Output directory')
+  parser.add_option('-p', '--prefix', default='',
+                    help=('A structure name given by prefix + basename.'
+                          'Without this flag, given pathame becomes structure '
+                          'name.'))
+  options, args = parser.parse_args()
+  filename = args[0]
+  size = os.stat(filename).st_size
+  symbol = re.sub('[^0-9a-zA-Z]', '_', os.path.basename(filename))
+  symbol = '%s%s' % (options.prefix, symbol)
+  name = os.path.splitext(filename)[0]
+  if options.out_dir:
+    name = os.path.join(options.out_dir, os.path.basename(name))
+  header_file = name + '.h'
+  try:
+    out = open(header_file, 'w')
+    out.write("""
+// This is auto-generated file from %(filename)s. DO NOT EDIT.
+//
+extern "C" {
+const int %(symbol)s_size = %(size)d;
+extern const char %(symbol)s_start[];
+};
+""" % {'filename': filename,
+       'size': size,
+       'symbol': symbol})
+    out.close()
+  except Exception, ex:
+    os.remove(header_file)
+    print 'Failed to generate %s: %s' % (header_file, ex)
+    sys.exit(1)
+
+  c_file = name + '.c'
+  try:
+    dst = open(c_file, 'wb')
+    dst.write("""
+// This is auto-generated file from %(filename)s. DO NOT EDIT.
+//
+const char %(symbol)s_start[] = {
+""" % {'filename': filename,
+       'symbol': symbol})
+    src = open(filename, 'rb')
+    writeToEscaping(dst, src)
+    dst.write('};\n')
+    src.close()
+    dst.close()
+  except Exception, ex:
+    os.remove(c_file)
+    print 'Failed to generate %s: %s' % (c_file, ex)
+    sys.exit(1)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/client/generate_compiler_proxy_info.py b/client/generate_compiler_proxy_info.py
new file mode 100755
index 0000000..467d9db
--- /dev/null
+++ b/client/generate_compiler_proxy_info.py
@@ -0,0 +1,129 @@
+#!/usr/bin/python
+#
+# Copyright 2011 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generates compiler_proxy info string for GOMA.
+
+This will collect user name, build date, changelist number, and
+etc. for use in HTTP RPC for user-agent and compiler_proxy's console.
+User-agents will help going through the server logs when debugging
+and the console will help users to make a better bug report.
+"""
+
+
+
+import datetime
+import getpass
+import optparse
+import os
+import os.path
+import re
+import socket
+import sys
+
+
+def GetUserName():
+  """Obtain user ID string."""
+  try:
+    return getpass.getuser()
+  except Exception:
+    # TODO: Do we have better solution here?
+    # When using ninja on Windows, several envvars are dropped.
+    # So getpass.getuser() cannot estimate username correctly.
+    try:
+      import win32api  # pylint: disable=F0401
+      return win32api.GetUserName()
+    except ImportError:
+      return 'unknown'
+
+
+def GetHostName():
+  """Obtain host name string."""
+  return socket.gethostname()
+
+
+def GetRevisionNumber():
+  """Obtain a number to represent revision of source code.
+
+  Returns:
+    a revision number string whose format is:
+      <commit hash>@<committer date unix timestamp>
+  """
+  rev_number_file = os.environ.get('COMPILER_PROXY_REVISION_NUMBER_FILE')
+  if rev_number_file:
+    try:
+      with open(rev_number_file) as f:
+        rev = f.read().strip()
+        if re.match(r'[0-9a-f]+@\d+', rev):
+          return rev
+        print 'revision seems not match the pattern: %s' % rev
+    except IOError, ex:
+      print 'cannot open revision number file: %s' % ex
+  # <commit hash>@<committer date unix timestamp>
+  git_hash_output = os.popen('git log -1 --pretty=format:%H@%ct', 'r')
+  git_hash = git_hash_output.read().strip()
+  if git_hash != "":
+    return git_hash
+  print 'Could not get CL information, falling back to unknown.'
+  return 'unknown'
+
+
+def GetDateAndTime():
+  """Obtain date and time."""
+  return datetime.datetime.utcnow().isoformat() + "Z"
+
+
+def GetGomaDirectory():
+  """Obtain goma directory."""
+  return os.path.dirname(os.getcwd())
+
+
+def UserAgentString():
+  # TODO: add platform string.
+  return 'compiler-proxy built by %s at %s on %s ' % (GetUserName(),
+                                                      GetRevisionNumber(),
+                                                      GetDateAndTime())
+
+
+def GenerateSourceCode(out_dir):
+  info_file = os.path.join(out_dir, 'compiler_proxy_info.h')
+  try:
+    fp = open(info_file, 'w')
+    fp.write("""
+// File autogenerated by generate_compiler_proxy_info.py, do not modify.
+#ifndef COMPILER_PROXY_INFO_H_
+#define COMPILER_PROXY_INFO_H_
+static const char kUserAgentString[] = "%(user_agent)s";
+static const char kBuiltTimeString[] = "%(built_time)s";
+static const char kBuiltDirectoryString[] = "%(built_directory)s";
+static const char kBuiltUserNameString[] = "%(user_name)s";
+static const char kBuiltHostNameString[] = "%(host_name)s";
+static const char kBuiltRevisionString[] = "%(revision)s";
+#endif // COMPILER_PROXY_INFO_H_
+""" % {
+      'user_agent': UserAgentString(),
+      'built_time': GetDateAndTime(),
+      'built_directory': repr(GetGomaDirectory())[1:-1],
+      'user_name': GetUserName(),
+      'host_name': GetHostName(),
+      'revision': GetRevisionNumber(),
+      })
+    fp.close()
+  except Exception, ex:
+    os.remove(info_file)
+    print 'Failed to generate %s: %s' % (info_file, ex)
+    sys.exit(1)
+
+
+def main():
+  parser = optparse.OptionParser()
+  parser.add_option('-o', '--out-dir', default='.',
+                    help='Output directory')
+  options, _ = parser.parse_args()
+  GenerateSourceCode(options.out_dir)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/client/generate_feature_lists.py b/client/generate_feature_lists.py
new file mode 100755
index 0000000..d01746b
--- /dev/null
+++ b/client/generate_feature_lists.py
@@ -0,0 +1,158 @@
+#!/usr/bin/python
+#
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generates the lists of clangs features.
+
+See: http://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros
+"""
+
+
+
+import re
+import urllib2
+
+BASE_URL = 'http://llvm.org/svn/llvm-project/cfe/trunk'
+ATTR_URL = BASE_URL + '/include/clang/Basic/Attr.td'
+PPMACRO_EXPANSION_URL = BASE_URL + '/lib/Lex/PPMacroExpansion.cpp'
+BUILTINS_URL = BASE_URL + '/include/clang/Basic/Builtins.def'
+CASE_NAME_PATTERN = re.compile(r'Case\("(.*?)"')
+ATTR_NAME_IN_BRACKETS_PATTERN = re.compile(r'(.*?)<"(.*?)">')
+CPP_ATTR_NAME_IN_BRACKETS_PATTERN = re.compile(
+    r'CXX11<"(.*?)",\s*"(.*?)"(.*?)>')
+DECLSPEC_NAME_IN_BRACKETS_PATTERN = re.compile(r'Declspec<"(.*?)">')
+SPELLINGS_PATTERN = re.compile(r'let Spellings = \[(.*?)\];',
+                               re.MULTILINE | re.DOTALL)
+BUILTINS_PATTERN = re.compile(r'BUILTIN\((\w+),')
+
+class Error(Exception):
+  pass
+
+
+def GetRevision():
+  trunk = urllib2.urlopen(BASE_URL).read()
+  matched = re.search('Revision (\d+):', trunk)
+  if matched:
+    return matched.group(1)
+  raise Error('Failed to parse revision.')
+
+
+def ScrapeFunction(source, function_name):
+  m = re.search(function_name + r'.*{', source)
+  if not m:
+    raise Error(function_name + ' not found')
+
+  s = source[m.end():]
+  m = re.search('\n}', s)
+  if not m:
+    raise Error(function_name + " doesn't end")
+
+  return s[:m.start()]
+
+
+# Fetch all required data.
+revision = GetRevision()
+ppmacro_expansion = urllib2.urlopen(PPMACRO_EXPANSION_URL).read()
+attr_td = urllib2.urlopen(ATTR_URL).read()
+builtins_def = urllib2.urlopen(BUILTINS_URL).read()
+
+print '// This is auto-generated file from generate_feature_list.py.'
+print '// Clang revision: %s.' % revision
+print '// *** DO NOT EDIT ***'
+
+# __has_feature
+featureFunc = ScrapeFunction(ppmacro_expansion, 'HasFeature')
+features = CASE_NAME_PATTERN.findall(featureFunc)
+features.sort()
+print
+print 'static const char* KNOWN_FEATURES[] = {'
+for feature in features:
+  print '  "%s",' % feature
+print '};'
+print 'static const unsigned long NUM_KNOWN_FEATURES ='
+print '    sizeof(KNOWN_FEATURES) / sizeof(KNOWN_FEATURES[0]);'
+
+# __has_extension
+extensionFunc = ScrapeFunction(ppmacro_expansion, 'HasExtension')
+extensions = CASE_NAME_PATTERN.findall(extensionFunc)
+extensions.sort()
+print
+print 'static const char* KNOWN_EXTENSIONS[] = {'
+for extension in extensions:
+  print '  "%s",' % extension
+print '};'
+print 'static const unsigned long NUM_KNOWN_EXTENSIONS ='
+print '    sizeof(KNOWN_EXTENSIONS) / sizeof(KNOWN_EXTENSIONS[0]);'
+
+# __has_attribute
+attributes = set()
+for spellings in re.findall(SPELLINGS_PATTERN, attr_td):
+  for entry in ATTR_NAME_IN_BRACKETS_PATTERN.findall(spellings):
+    if entry[0] == "Pragma":
+      # Ignore attribute used with pragma.
+      # Pragma seems to be only used for #pragma.
+      # It also caused the issue. (b/63365915)
+      continue
+    attr = entry[1]
+    if '"' in attr:
+      l = attr.split('"')
+      attr = l[len(l) - 1]
+    if attr and not attr in attributes:
+      attributes.add(attr)
+attributes = list(attributes)
+attributes.sort()
+print
+print 'static const char* KNOWN_ATTRIBUTES[] = {'
+print '\n'.join(['  "%s",' % attr for attr in attributes])
+print '};'
+print 'static const unsigned long NUM_KNOWN_ATTRIBUTES ='
+print '    sizeof(KNOWN_ATTRIBUTES) / sizeof(KNOWN_ATTRIBUTES[0]);'
+
+# __has_cpp_attribute
+# CXX11<"clang", "fallthrough", 1>  --> clang::fallthrough
+# CXX11<"", "noreturn">             --> noreturn
+cpp_attributes = set()
+for spellings in re.findall(SPELLINGS_PATTERN, attr_td):
+  for attr in CPP_ATTR_NAME_IN_BRACKETS_PATTERN.findall(spellings):
+    namespace = attr[0]
+    name = attr[1]
+    if namespace:
+      cpp_attributes.add(namespace + '::' + name)
+    else:
+      cpp_attributes.add(name)
+cpp_attributes = list(cpp_attributes)
+cpp_attributes.sort()
+print
+print 'static const char* KNOWN_CPP_ATTRIBUTES[] = {'
+print '\n'.join(['  "%s",' % attr for attr in cpp_attributes])
+print '};'
+print 'static const unsigned long NUM_KNOWN_CPP_ATTRIBUTES ='
+print '    sizeof(KNOWN_CPP_ATTRIBUTES) / sizeof(KNOWN_CPP_ATTRIBUTES[0]);'
+
+# __has_declspec_attribute
+declspec_attributes = set()
+for spellings in re.findall(SPELLINGS_PATTERN, attr_td):
+  for attr in DECLSPEC_NAME_IN_BRACKETS_PATTERN.findall(spellings):
+    declspec_attributes.add(attr)
+declspec_attributes = list(declspec_attributes)
+declspec_attributes.sort()
+print
+print 'static const char* KNOWN_DECLSPEC_ATTRIBUTES[] = {'
+print '\n'.join(['  "%s",' % attr for attr in attributes])
+print '};'
+print 'static const unsigned long NUM_KNOWN_DECLSPEC_ATTRIBUTES ='
+print '    sizeof(KNOWN_DECLSPEC_ATTRIBUTES) /'
+print '    sizeof(KNOWN_DECLSPEC_ATTRIBUTES[0]);'
+
+# __has_builtin
+builtins = list(set(re.findall(BUILTINS_PATTERN, builtins_def)))
+builtins.sort()
+print
+print 'static const char* KNOWN_BUILTINS[] = {'
+print '\n'.join(['  "%s",' % builtin for builtin in builtins])
+print '};'
+print 'static const unsigned long NUM_KNOWN_BUILTINS ='
+print '    sizeof(KNOWN_BUILTINS) /'
+print '    sizeof(KNOWN_BUILTINS[0]);'
diff --git a/client/generate_static_darray.py b/client/generate_static_darray.py
new file mode 100755
index 0000000..cc2b09c
--- /dev/null
+++ b/client/generate_static_darray.py
@@ -0,0 +1,248 @@
+#!/usr/bin/python
+#
+# Copyright 2011 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generates c++ static double array data for given symbols.
+
+This only works well for small number (~dozens) of keywords set.
+
+Usage:
+  To generate enum and static double-array data:
+  % generate_static_darray.py \
+      --keywords=comma,separated,keys \
+      --outfile=<outfile>
+
+"""
+
+
+
+from collections import defaultdict
+import optparse
+import os
+import re
+import random
+import sys
+
+
+# Constants for goma cpp.
+CPP_OUTFILE = 'cpp_parser_darray.h'
+CPP_DIRECTIVES = [
+  'include', 'import', 'include_next', 'define', 'undef',
+  'ifdef', 'ifndef', 'if', 'else', 'endif', 'elif', 'pragma' ]
+CPP_COND_DIRECTIVES = [
+  'ifdef', 'ifndef', 'if', 'else', 'endif', 'elif' ]
+
+# Constants for testing data.
+TEST_OUTFILE = 'static_darray_test_array.h'
+TEST_FALLBACK_DEFAULT_KEYWORDS = ['bird', 'bison', 'cat', 'category']
+
+
+class Trie(object):
+  """Representing a simple trie which only has Add operation."""
+  class Node:
+    def __init__(self):
+      self.value = 0
+      self.children = defaultdict(Trie.Node)
+
+  def __init__(self):
+    self.root = Trie.Node()
+
+  def Add(self, word, value):
+    node = self.root
+    for c in word:
+      node = node.children[c]
+    node.value = value
+
+
+class DoubleArray(object):
+  """Representing a simple double-array."""
+  class Node(object):
+    def __init__(self):
+      self.base = 1
+      self.check = -1
+
+  class List(list):
+    def __getitem__(self, index):
+      if index >= len(self):
+        self.extend([DoubleArray.Node() for _ in xrange(index - len(self) + 1)])
+      return list.__getitem__(self, index)
+
+  def __init__(self, encode=None, end_char=None):
+    self.nodes = DoubleArray.List()
+    self.encode = encode
+    self.base_char = None
+    self.end_char = end_char
+
+  def FindBase(self, base, children):
+    while True:
+      if all((self.nodes[base + self.encode(c)].check < 0 for c in children)):
+        return base
+      base += 1
+
+  # x -> (c) -> y
+  # nodes[x].base + encode(c) = y
+  # nodes[y].check = x
+  def AddTrieNode(self, trienode, from_base):
+    base_start = self.FindBase(0, trienode.children)
+    self.nodes[from_base].base = base_start
+    for (c, n) in sorted(trienode.children.iteritems()):
+      base = base_start + self.encode(c)
+      self.nodes[base].check = from_base
+    for (c, n) in sorted(trienode.children.iteritems()):
+      base = base_start + self.encode(c)
+      if n.value:
+        self.nodes[base].base = -n.value
+      else:
+        self.AddTrieNode(n, base)
+
+  def Build(self, dictionary):
+    if not self.encode or not self.end_char:
+      min_char = min(map(min, dictionary.iterkeys()))
+      max_char = max(map(max, dictionary.iterkeys()))
+      self.base_char = min_char
+      self.end_char = chr(ord(max_char) + 1)
+      self.encode = lambda c: ord(c) - ord(min_char) + 1
+
+    trie = Trie()
+    for (word, value) in dictionary.iteritems():
+      trie.Add(word + self.end_char, value)
+    self.AddTrieNode(trie.root, 0)
+
+  def Lookup(self, word):
+    index = 0
+    for c in word + self.end_char:
+      next_index = self.nodes[index].base + self.encode(c)
+      if next_index >= len(self.nodes) or index != self.nodes[next_index].check:
+        return -1
+      index = next_index
+    return -self.nodes[index].base
+
+  def DumpAsCArray(self, name, c_type, out):
+    out.write('const %s %s[] = {\n' % (c_type, name))
+    for i in xrange(0, len(self.nodes), 5):
+      out.write('  ')
+      out.write(', '.join(['{ %d, %d }' % (n.base, n.check)
+                           for n in self.nodes[i:i+5]]))
+      out.write(',\n')
+    out.write('};\n')
+
+
+def PrintKeywordEnumAndArray(keywords, prefix, out, values=None,
+    print_enum=True, print_keywords=True, perform_check=True):
+  """Print c++ enum and double-array list from the given keywords."""
+
+  if print_enum:
+    out.write('enum %sValue {\n  ' % prefix)
+    camel = lambda word: ''.join([w.title() for w in word.split('_')])
+    out.write(',\n  '.join(('k%s%s' % (prefix, camel(k)) for k in keywords)))
+    out.write('\n};\n')
+
+  if print_keywords:
+    out.write('const char* k%sKeywords[] = {\n  ' % prefix)
+    out.write(',\n  '.join(('"%s"' % k for k in keywords)))
+    out.write('\n};\n')
+
+  nodes_name = 'k%sNodes' % prefix
+  if not values:
+    values = xrange(len(keywords))
+  da = DoubleArray()
+  da.Build(dict(zip(keywords, values)))
+
+  if perform_check:
+    for (word, value) in zip(keywords, values):
+      assert value == da.Lookup(word)
+      assert -1 == da.Lookup(word[:-1])
+
+  da.DumpAsCArray(nodes_name, 'StaticDoubleArray::Node', out)
+  out.write('const StaticDoubleArray k%sArray(' % prefix)
+  out.write('%s, %d, \'%c\', %d);\n' %
+            (nodes_name,               # nodes
+             len(da.nodes),            # nodes_len
+             da.base_char,             # encode_base
+             da.encode(da.end_char)))  # terminate_code
+
+
+def GetRandomKeywords(dictfile, maxwords):
+  keywords = set()
+  if not os.path.exists(dictfile):
+    return TEST_FALLBACK_DEFAULT_KEYWORDS
+  file_size = os.stat(dictfile)[6]
+  with open(dictfile) as d:
+    # Try at most 2 * maxwords to prevent eternal loop.
+    for _ in xrange(2 * maxwords):
+      offset = random.randint(0, file_size - 1)
+      d.seek(offset)
+      d.readline()
+      word = d.readline().strip()
+      word = re.sub('\W', '', word)
+      if not word:
+        continue
+      keywords.add(word)
+      if len(keywords) >= maxwords:
+        break
+  return keywords
+
+
+def main():
+  option_parser = optparse.OptionParser()
+  option_parser.add_option('', '--keywords', default=None,
+                           help='Comma-separated keywords to encode. '
+                                'If none is given pre-defined keywords for '
+                                'goma will be used.')
+  option_parser.add_option('', '--outfile', default=None,
+                           help='Output file name.  Will output to stdout '
+                                'if none is given.')
+  option_parser.add_option('', '--prefix', default='DArray',
+                           help='Prefix string used to make up an enum '
+                                'and array name.  Used only if --keywords '
+                                'is given.')
+  option_parser.add_option('-o', '--out-dir', default='.',
+                           help='Output directory')
+  option_parser.add_option('', '--test', action='store_true', default=False,
+                           help='Generate test array data.  Other parameter '
+                                'is ignored if this is given.')
+  option_parser.add_option('', '--verbose', action='store_true', default=False,
+                           help='Be verbose.')
+  options, _ = option_parser.parse_args()
+  random.seed()
+
+  keywords = []
+  if options.test:
+    options.outfile = TEST_OUTFILE
+    keywords = GetRandomKeywords('/usr/share/dict/words', 30)
+  elif options.keywords:
+    keywords = options.keywords.split(',')
+  elif not options.outfile:
+    options.outfile = CPP_OUTFILE
+
+  out = sys.stdout
+  outfile = None
+  try:
+    if options.outfile:
+      outfile = os.path.join(options.out_dir, options.outfile)
+      out = open(outfile, 'w')
+
+    if keywords:
+      PrintKeywordEnumAndArray(keywords, options.prefix, out)
+    else:
+      PrintKeywordEnumAndArray(CPP_DIRECTIVES, 'Directive', out)
+      PrintKeywordEnumAndArray(
+          CPP_COND_DIRECTIVES, 'ConditionalDirective', out,
+          [CPP_DIRECTIVES.index(v) for v in CPP_COND_DIRECTIVES],
+          print_enum=False, print_keywords=False)
+
+    if outfile:
+      if options.verbose:
+        print 'Generated enum and double-array data into "%s".' % outfile
+      out.close()
+  except Exception, ex:
+    if outfile:
+      os.remove(outfile)
+      print 'Failed to generate %s: %s' % (outfile, ex)
+    sys.exit(1)
+
+
+if __name__ == "__main__":
+  sys.exit(main())
diff --git a/client/gettimeofday_helper_win.cc b/client/gettimeofday_helper_win.cc
new file mode 100644
index 0000000..42816e9
--- /dev/null
+++ b/client/gettimeofday_helper_win.cc
@@ -0,0 +1,62 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <time.h>
+
+#include "gettimeofday_helper_win.h"
+
+namespace devtools_goma {
+
+// TODO: Modify following function to be test-able.
+int gettimeofday(struct timeval* tv, struct timezone* tz) {
+  // Define a structure to receive the current Windows filetime
+  FILETIME ft;
+
+  // Initialize the present time to 0 and the timezone to UTC
+  unsigned __int64 tmpres = 0;
+  static int tzflag = 0;
+
+  if (nullptr != tv) {
+    GetSystemTimeAsFileTime(&ft);
+
+    // The GetSystemTimeAsFileTime returns the number of 100 nanosecond
+    // intervals since Jan 1, 1601 in a structure. Copy the high bits to
+    // the 64 bit tmpres, shift it left by 32 then or in the low 32 bits.
+    tmpres |= ft.dwHighDateTime;
+    tmpres <<= 32;
+    tmpres |= ft.dwLowDateTime;
+
+    // Convert to microseconds by dividing by 10
+    tmpres /= 10;
+
+    // The Unix epoch starts on Jan 1 1970.  Need to subtract the difference
+    // in seconds from Jan 1 1601.
+    tmpres -= DELTA_EPOCH_IN_MICROSECS;
+
+    // Finally change microseconds to seconds and place in the seconds value.
+    // The modulus picks up the microseconds.
+    tv->tv_sec = (long)(tmpres / 1000000UL);
+    tv->tv_usec = (long)(tmpres % 1000000UL);
+  }
+
+  if (nullptr != tz) {
+    if (!tzflag) {
+      _tzset();
+      tzflag++;
+    }
+
+    // Adjust for the timezone west of Greenwich
+    long timezone;
+    if (_get_timezone(&timezone) == 0)
+      tz->tz_minuteswest = timezone / 60;
+    int daylight_hours;
+    if (_get_daylight(&daylight_hours) == 0)
+      tz->tz_dsttime = daylight_hours;
+  }
+
+  return 0;
+}
+
+}  // namespace devtools_goma
diff --git a/client/gettimeofday_helper_win.h b/client/gettimeofday_helper_win.h
new file mode 100644
index 0000000..6ade891
--- /dev/null
+++ b/client/gettimeofday_helper_win.h
@@ -0,0 +1,23 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GETTIMEOFDAY_HELPER_WIN_H_
+#define DEVTOOLS_GOMA_CLIENT_GETTIMEOFDAY_HELPER_WIN_H_
+
+#include "config_win.h"
+#include <winsock2.h>  // for timeval
+
+namespace devtools_goma {
+
+struct timezone {
+  int tz_minuteswest;  // minutes west of Greenwich
+  int tz_dsttime;      // type of DST correction
+};
+
+int gettimeofday(struct timeval* tv, struct timezone* tz);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GETTIMEOFDAY_HELPER_WIN_H_
diff --git a/client/goma-make b/client/goma-make
new file mode 100755
index 0000000..1746d6f
--- /dev/null
+++ b/client/goma-make
@@ -0,0 +1,146 @@
+#!/usr/bin/python
+#
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""A thin wrapper of make command for Chromium OS build.
+
+Unfortunately, there are several packages with which we cannot specify
+-j option. For such packages, we modify PATH environment variable not
+to use goma.
+"""
+
+import os
+import sys
+
+
+_DEFAULT_BLACKLIST = [
+    '/dev-libs/nss',  # make -j fails
+    '/app-crypt/nss',  # make -j fails
+    '/dev-libs/m17n-lib',  # make -j fails
+    '/sys-fs/mtools',  # make -j fails
+    '/dev-java/icedtea',  # make -j fails
+    '/dev-libs/openssl',  # Makefile force -j1
+    ]
+
+
+def ParseBlacklistContents(contents):
+  """Parses blacklist file contents and returns a blacklised directories.
+
+  Args:
+    contents: contents in a blacklist file.
+
+  Returns:
+    a list of blacklisted directories.
+  """
+  blacklist = [x.strip() for x in contents.splitlines()]
+  return [x for x in blacklist if x]
+
+
+def GetBlacklist():
+  """Returns list of portage package names to blacklist.
+
+  If non-empty $GOMACC_BLACKLIST is set, we use its file contents in the file
+  specified by $GOMACC_BLACKLIST instead of the default one.
+  If $GOMACC_BLACKLIST is an empty string, we use _DEFAULT_BLACKLIST.
+  The blacklist contents should be list of directories like:
+  /dev-libs/nss
+  /sys-fs/mtools
+
+  Note that empty line is just ignored.
+
+  Returns:
+    a list of blacklisted directories.
+  """
+  blacklist_file = os.getenv('GOMACC_BLACKLIST')
+  if not blacklist_file:
+    return _DEFAULT_BLACKLIST
+  return ParseBlacklistContents(open(blacklist_file).read())
+
+
+def IsBlacklisted(path, blacklist):
+  """Determine whether a path belongs to some backlisted package.
+
+  Args:
+    path: path of a portage package
+
+  Returns:
+    True if the portage package has been blacklisted.
+  """
+  for exemption_path in blacklist:
+    if path.find(exemption_path) != -1:
+      return True
+  return False
+
+
+def RemoveGomaFromPath():
+  """Remove the goma directory from the search path.
+
+  This will force a non-goma build, as needed for blacklisted packages.
+  """
+  if not os.environ.get('GOMA_DIR'):
+    return
+  goma_dir = os.getenv('GOMA_DIR')
+  paths = os.getenv('PATH').split(os.pathsep)
+  paths = [path for path in paths if not path.startswith(goma_dir)]
+  os.putenv('PATH', os.pathsep.join(paths))
+
+
+def RemoveParallelBuildArguments(args):
+  """Remove parallel-build related arguments.
+  This avoids passing unreasonably-high values into non-goma builds.
+
+  Args:
+    args: the arguments list to modify.
+  """
+
+  # This processing assumes simple '-j ###' or '-j###' formatted arguments.
+  # More complex combinations (e.g. '-vj10') will be ignored entirely.
+  parallel_args = ['-j']
+
+  for idx, arg in enumerate(args):
+    if arg == None:
+      pass  # Already marked this element for removal
+    elif arg in parallel_args:  # '-j ###' format
+      args[idx + 0] = None
+      if idx + 1 < len(args):
+        args[idx + 1] = None
+    elif arg[0:2] in parallel_args:  # '-j###' format
+      args[idx + 0] = None
+
+  args[:] = filter(lambda x: x != None, args)
+  return args
+
+
+def main():
+  args = ['/usr/bin/make']
+  args.extend(sys.argv[1:])
+  blacklist = GetBlacklist()
+  # Regardless of with/without goma, make is limited to under certain
+  # load average.
+  args[1:1] = ['-l10']
+  if os.getenv('MAKELEVEL'):
+    # In a sub-make, do not modify the flags.
+    pass
+  elif IsBlacklisted(os.getcwd(), blacklist) or os.getuid() == 0:
+    # A root user cannot talk to goma.
+    RemoveGomaFromPath()
+    RemoveParallelBuildArguments(args)
+    if os.environ.get('GOMA_DIR'):
+      del os.environ['GOMA_DIR']
+    if os.environ.get('GOMACC_PATH'):
+      del os.environ['GOMACC_PATH']
+  else:
+    # Insert default flags at the beginning of the argument list.
+    # These will be overridden by any user-supplied options.
+    RemoveGomaFromPath()
+    if os.environ.get('GOMA_DIR'):
+      os.environ['GOMACC_PATH'] = os.path.join(os.environ['GOMA_DIR'], 'gomacc')
+      args[1:1] = ['-j100']
+
+  os.execv(args[0], args)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/client/goma-make_unittest b/client/goma-make_unittest
new file mode 100755
index 0000000..c253563
--- /dev/null
+++ b/client/goma-make_unittest
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tests for goma-make."""
+
+import imp
+import os
+import sys
+import tempfile
+import unittest
+
+
+_TEST_DIR = os.path.abspath(os.path.dirname(__file__))
+_GOMA_MAKE_PATH = os.path.abspath(os.path.join(_TEST_DIR, 'goma-make'))
+
+class GomaMakeTest(unittest.TestCase):
+
+  def setUp(self):
+    mod_name, _ = os.path.splitext(os.path.basename(_GOMA_MAKE_PATH))
+    print _GOMA_MAKE_PATH
+    self._module = imp.load_source(mod_name, _GOMA_MAKE_PATH)
+    if 'GOMACC_BLACKLIST' in os.environ:
+      del os.environ['GOMACC_BLACKLIST']
+    self._tmpfile = ''
+
+  def tearDown(self):
+    if self._tmpfile:
+      os.unlink(self._tmpfile)
+
+  def SetBlacklistFile(self, contents):
+    """Create blacklist file with contents, and set it to $GOMACC_BLACKLIST.
+
+    Args:
+      contents: a string to be stored to the blacklist file.
+    """
+    handle, self._tmpfile = tempfile.mkstemp()
+    os.write(handle, contents)
+    os.close(handle)
+    os.environ['GOMACC_BLACKLIST'] = self._tmpfile
+
+  def testParseBlacklistContents(self):
+    self.assertEqual(self._module.ParseBlacklistContents(''), [])
+    self.assertEqual(self._module.ParseBlacklistContents('\n\r '), [])
+    self.assertEqual(self._module.ParseBlacklistContents('/tmp'), ['/tmp'])
+    self.assertEqual(
+        self._module.ParseBlacklistContents('\r\n /tmp\r\n '), ['/tmp'])
+    self.assertEqual(
+        self._module.ParseBlacklistContents('\n/example\n/example2\n'),
+        ['/example', '/example2'])
+    self.assertEqual(
+        self._module.ParseBlacklistContents(
+            '\r\n /example \r\n \r\n \r\n /example2\r\n '),
+        ['/example', '/example2'])
+    self.assertEqual(
+        self._module.ParseBlacklistContents(
+            '\r\n /dirname with space\r\n '),
+        ['/dirname with space'])
+    self.assertEqual(
+        self._module.ParseBlacklistContents(
+            '\r\n /dirname with  space\r\n /with space/part 2 \r\n '),
+        ['/dirname with  space', '/with space/part 2'])
+
+  def testIsBlacklistedShouldReturnTrueIfPathIsInBlacklist(self):
+    self.assertTrue(self._module.IsBlacklisted('/tmp', ['/tmp']))
+    self.assertTrue(self._module.IsBlacklisted('/tmp', ['non-related', '/tmp']))
+    self.assertTrue(self._module.IsBlacklisted('/usr/local/etc',
+                                               ['/usr', '/tmp']))
+    self.assertTrue(self._module.IsBlacklisted('/use/local/etc',
+                                               ['non-related', '/local']))
+    self.assertTrue(self._module.IsBlacklisted('/use/local/etc',
+                                               ['non-related', '/etc']))
+
+  def testIsBlacklistedShouldReturnFalseIfPathIsNotInBlacklist(self):
+    self.assertFalse(self._module.IsBlacklisted('/tmp', []))
+    self.assertFalse(self._module.IsBlacklisted('/tmp', ['non-related']))
+    self.assertFalse(self._module.IsBlacklisted('/usr/local/etc',
+                                                ['/opt', '/tmp']))
+
+  def testRemoveParallelBuildArgumentsShouldWork(self):
+    args = [
+        'make',
+        'KEY=VALUE',
+        '-j', '65536',
+        '-d',
+        '-f', 'makefile',
+        '-i',
+        '-l', '1',
+        '-k',
+        'KEY2=VALUE2',
+        'all']
+    expected = [
+        'make',
+        'KEY=VALUE',
+        '-d',
+        '-f', 'makefile',
+        '-i',
+        '-l', '1',
+        '-k',
+        'KEY2=VALUE2',
+        'all']
+    actual = self._module.RemoveParallelBuildArguments(args)
+    self.assertEqual(actual, expected)
+
+  def testGetBlacklistShouldReturnDefaultIfEnvIsNotSet(self):
+    self.assertIsNone(os.getenv('GOMACC_BLACKLIST'))
+    blacklist = self._module.GetBlacklist()
+    self.assertEqual(blacklist, self._module._DEFAULT_BLACKLIST)
+
+  def testGetBlacklistShouldUseBlacklistInEnv(self):
+    # also edit testParseBlacklistContents when you modify parsing rule.
+    self.assertIsNone(os.getenv('GOMACC_BLACKLIST'))
+    self.SetBlacklistFile('/tmp')
+    self.assertIsNotNone(os.getenv('GOMACC_BLACKLIST'))
+    blacklist = self._module.GetBlacklist()
+    self.assertNotEqual(blacklist, self._module._DEFAULT_BLACKLIST)
+    self.assertEqual(blacklist, ['/tmp'])
+
+  def testGetBlacklistShouldUseParseBlacklistContentsForParsing(self):
+    class Spy:
+      def __init__(self):
+        self.contents = None
+
+      def ParseBlacklistContents(self, contents):
+        self.contents = contents
+        return ['dummy']
+
+    spy = Spy()
+    self._module.ParseBlacklistContents = spy.ParseBlacklistContents
+    self.assertIsNone(os.getenv('GOMACC_BLACKLIST'))
+    self.SetBlacklistFile('/tmp')
+    self.assertIsNotNone(os.getenv('GOMACC_BLACKLIST'))
+    blacklist = self._module.GetBlacklist()
+    self.assertEqual(spy.contents, '/tmp')
+    self.assertEqual(blacklist, ['dummy'])
+
+# TODO: add more test using environment variables.
+
+
+def main():
+  suite = unittest.TestLoader().loadTestsFromTestCase(GomaMakeTest)
+  result = unittest.TextTestRunner().run(suite)
+
+  exit_code = 0
+  if result.errors:
+    exit_code |= 0x01
+  if result.failures:
+    exit_code |= 0x01
+  if exit_code:
+    sys.exit(exit_code)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/client/goma-wrapper b/client/goma-wrapper
new file mode 100755
index 0000000..169bb3a
--- /dev/null
+++ b/client/goma-wrapper
@@ -0,0 +1,76 @@
+#!/bin/sh
+#
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+#
+# A simple wrapper script to setup goma for chromeos build.
+#
+# Example usage:
+#
+# % /path/to/goma/goma-wrapper emerge-x86-generic chromeos-chrome
+# % /path/to/goma/goma-wrapper -j50 -l5 emerge-x86-generic chromeos-chrome
+
+FLAGS_jobs=
+FLAGS_load=
+while getopts 'j:l:' opt; do
+  case $opt in
+    'j') FLAGS_jobs=$OPTARG ;;
+    'l') FLAGS_load=$OPTARG ;;
+  esac
+done
+shift $(($OPTIND - 1))
+
+makeopts="-j${FLAGS_jobs:=100}"
+
+# scons also uses MAKEOPTS, but does not support the -l option.
+# We add it here only if explicitly set by the user, otherwise
+# let goma-make use its own default value.
+if [ -n "${FLAGS_load}" ]; then
+  makeopts="${makeopts} -l${FLAGS_load}"
+fi
+
+export GOMA_DIR=$(cd $(dirname $0); pwd)
+export GOMACC_PATH="$GOMA_DIR/gomacc"
+export MAKE="$GOMA_DIR/goma-make"
+export MAKEOPTS=${makeopts}
+# There file names will be used in ./configure . We cannot get benefit
+# of goma for ./configure because it is serial process and each code
+# is small.
+export GOMA_FALLBACK_INPUT_FILES=conftest.c,conftest.cc,conftest.cpp,_configtest.c
+
+# The logic should be the same as GetTempDirectory in mypath.cc.
+function get_temp_directory() {
+  # If GOMA_TMP_DIR exists, use it.
+  if [[ -n "${GOMA_TMP_DIR:-}" ]]; then
+    echo "${GOMA_TMP_DIR}"
+    return
+  fi
+
+  local user_candidates=(${SUDO_USER:-} ${USERNAME:-} ${USER:-} ${LOGNAME:-})
+  local user="unknown"
+  for uc in ${user_candidates[@]}; do
+    # user 'root' must be skipped. See GetUsernameEnv() in client/mypath.cc.
+    if [[ "${uc}" == "root" ]]; then
+      continue
+    fi
+    user="${uc}"
+    break
+  done
+
+  # When GOMA_TMP_DIR is not defined, check ${TMP}/goma_${user} etc.
+  local candidates=(${TEST_TMPDIR:-} ${TMPDIR:-} ${TMP:-})
+  for tmpdir in ${candidates[@]}; do
+    if [[ -d "${tmpdir}" ]]; then
+      echo "${tmpdir%/}/goma_${user}"
+      return
+    fi
+  done
+
+  echo "/tmp/goma_${user}"
+}
+
+export GOMA_TMP_DIR=$(get_temp_directory)
+
+exec "$@"
diff --git a/client/goma_auth.py b/client/goma_auth.py
new file mode 100755
index 0000000..4816f43
--- /dev/null
+++ b/client/goma_auth.py
@@ -0,0 +1,323 @@
+#!/usr/bin/env python
+
+# Copyright 2015 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""A Script to set goma_oauth2_config."""
+
+import argparse
+import BaseHTTPServer
+import copy
+import json
+import os
+import string
+import subprocess
+import sys
+import urllib
+import urlparse
+import webbrowser
+import random
+
+
+GOOGLE_AUTH_URI = 'https://accounts.google.com/o/oauth2/auth'
+OAUTH_SCOPES = 'https://www.googleapis.com/auth/userinfo.email'
+OAUTH_TOKEN_ENDPOINT = 'https://www.googleapis.com/oauth2/v3/token'
+OOB_CALLBACK_URN = 'urn:ietf:wg:oauth:2.0:oob'
+GOMA_OAUTH_SCOPE='https://www.googleapis.com/auth/goma'
+
+CHROME_INFRA_CONFIG_URL = ('https://chrome-infra-auth.appspot.com/'
+                           'auth/api/v1/server/oauth_config')
+CHROME_INFRA_CONFIG_DEV_URL = ('https://chrome-infra-auth-dev.appspot.com/'
+                               'auth/api/v1/server/oauth_config')
+DEFAULT_GOMA_OAUTH2_CONFIG_FILE_NAME = '.goma_oauth2_config'
+
+OAUTH_STATE_LENGTH = 64
+
+
+class Error(Exception):
+  """Raised on Error."""
+
+
+class GomaOAuth2Config(dict):
+  """File-backed OAuth2 configuration."""
+
+  def __init__(self):
+    dict.__init__(self)
+    self._path = self._GetLocation()
+    self._backup = None
+
+  @staticmethod
+  def _GetLocation():
+    """Returns Goma OAuth2 config file path."""
+    env_name = 'GOMA_OAUTH2_CONFIG_FILE'
+    env = os.environ.get(env_name)
+    if env:
+      return env
+    homedir = os.path.expanduser('~')
+    if homedir == '~':
+      raise Error('Cannot find user\'s home directory.')
+    return os.path.join(homedir, DEFAULT_GOMA_OAUTH2_CONFIG_FILE_NAME)
+
+  def Load(self):
+    """Loads config from a file."""
+    if not os.path.exists(self._path):
+      return False
+    try:
+      with open(self._path) as f:
+        self.update(json.load(f))
+    except ValueError:
+      return False
+    self._backup = copy.copy(self)
+    return True
+
+  def Save(self):
+    """Saves config to a file."""
+    # TODO: not save unnecessary data.
+    if self == self._backup:
+      return
+    with open(self._path, 'wb') as f:
+      if os.name == 'posix':
+        os.fchmod(f.fileno(), 0600)
+      json.dump(self, f)
+    self._backup = copy.copy(self)
+
+  def Delete(self):
+    """Deletes a config file."""
+    if not os.path.exists(self._path):
+      return
+    os.remove(self._path)
+    self._backup = None
+
+
+def HttpGetRequest(url):
+  """Proceed an HTTP GET request, and returns an HTTP response body.
+
+  Note: using curl instead of urllib2.urlopen because python < 2.7.9 does
+  not verify certificates. See http://lwn.net/Articles/611243/
+
+  Args:
+    url: a URL string of an HTTP server.
+
+  Returns:
+    a response from the server.
+  """
+  cmd = ['curl', url, '--silent', '-o', '-']
+  return subprocess.check_output(cmd)
+
+
+def HttpPostRequest(url, post_dict):
+  """Proceed an HTTP POST request, and returns an HTTP response body.
+
+  Note: using curl instead of urllib2.urlopen because python < 2.7.9 does
+  not verify certificates. See http://lwn.net/Articles/611243/
+
+  Args:
+    url: a URL string of an HTTP server.
+    post_dict: a dictionary of a body to be posted.
+
+  Returns:
+    a response from the server.
+  """
+  body = urllib.urlencode(post_dict)
+  cmd = ['curl', '-d', body, url, '--silent', '-o', '-']
+  return subprocess.check_output(cmd)
+
+
+def OAuth2BasicConfig():
+  """Returns a dictionary of a basic OAuth2 config."""
+  return {
+      'client_id': '',
+      'client_secret': '',
+      'redirect_uri': OOB_CALLBACK_URN,
+      'auth_uri': GOOGLE_AUTH_URI,
+      'scope': OAUTH_SCOPES,
+      'token_uri': OAUTH_TOKEN_ENDPOINT,
+  }
+
+
+def ReadOAuth2ConfigFromSite(is_dev):
+  """Returns OAuth2 config that is come from chrome-infra-site.
+
+  Args:
+    is_dev: use dev site on True.
+
+  Returns:
+    a dictionary of OAuth2 config.
+  """
+  url = CHROME_INFRA_CONFIG_DEV_URL if is_dev else CHROME_INFRA_CONFIG_URL
+  config = json.loads(HttpGetRequest(url))
+  ret = OAuth2BasicConfig()
+  ret.update({
+      'client_id': config['client_id'],
+      'client_secret': config['client_not_so_secret'],
+  })
+  return ret
+
+
+class AuthorizationCodeHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+  """HTTP handler to get authorization code."""
+
+  code = None
+  state = None
+
+  @classmethod
+  def _SetCode(cls, code):
+    """Internal function to set code to class variable."""
+    if not code:
+      raise Error('code is None')
+    cls.code = code[0]
+
+  def do_GET(self):
+    """A handler to receive authorization code."""
+    if self.address_string() != 'localhost':
+      raise Error('should be localhost but %s' % self.client_address)
+    form = urlparse.parse_qs(urlparse.urlparse(self.path).query)
+    server_state = form.get('state', [''])[0]
+    if server_state != self.state:
+      raise Error('possibly XSRF: state from server (%s) is not %s' % (
+          server_state, self.state))
+    self._SetCode(form.get('code'))
+    self.send_response(200, "OK")
+
+
+def _RandomString(length):
+  """Returns random string.
+
+  Args:
+    length: length of the string.
+
+  Returns:
+    random string.
+  """
+  generator = random.SystemRandom()
+  return ''.join(generator.choice(string.letters + string.digits)
+                 for _ in xrange(length))
+
+
+def GetAuthorizationCodeViaBrowser(config):
+  """Gets authorization code using browser.
+
+  This way is useful for users with desktop machines.
+
+  Args:
+    config: a dictionary of config.
+
+  Returns:
+    authorization code.
+  """
+  AuthorizationCodeHandler.state = _RandomString(OAUTH_STATE_LENGTH)
+  httpd = BaseHTTPServer.HTTPServer(('', 0), AuthorizationCodeHandler)
+  config['redirect_uri'] = 'http://localhost:%d' % httpd.server_port
+  body = urllib.urlencode({
+      'scope': config['scope'],
+      'redirect_uri': config['redirect_uri'],
+      'client_id': config['client_id'],
+      'state': AuthorizationCodeHandler.state,
+      'response_type': 'code'})
+  google_auth_url = '%s?%s' % (config['auth_uri'], body)
+  webbrowser.open(google_auth_url)
+  httpd.handle_request()
+  httpd.server_close()
+  return AuthorizationCodeHandler.code
+
+
+def GetAuthorizationCodeViaCommandLine(config):
+  """Gets authorization code via command line.
+
+  This way is useful anywhere without a browser.
+
+  Args:
+    config: a dictionary of config.
+
+  Returns:
+    authorization code.
+  """
+  body = urllib.urlencode({
+      'scope': config['scope'],
+      'redirect_uri': config['redirect_uri'],
+      'client_id': config['client_id'],
+      'response_type': 'code'})
+  google_auth_url = '%s?%s' % (config['auth_uri'], body)
+  print 'Please visit following URL with your browser, and approve access:'
+  print google_auth_url
+  return raw_input('Please input the code:')
+
+
+def GetRefreshToken(get_code_func, config):
+  """Get refresh token with oauth 3 legged authentication.
+
+  Args:
+    get_code_func: a function for getting authorization code.
+    config: a dictionary of config.
+
+  Returns:
+    a refresh token string.
+  """
+  code = get_code_func(config)
+  assert code and type(code) == str
+  post_data = {
+      'code': code,
+      'client_id': config['client_id'],
+      'client_secret': config['client_secret'],
+      'redirect_uri': config['redirect_uri'],
+      'grant_type': 'authorization_code'
+  }
+  resp = json.loads(HttpPostRequest(config['token_uri'], post_data))
+  return resp['refresh_token']
+
+
+def VerifyRefreshToken(config):
+  """Returns True if a refresh token in config is valid."""
+  if not 'refresh_token' in config:
+    return False
+  post_data = {
+      'client_id': config['client_id'],
+      'client_secret': config['client_secret'],
+      'refresh_token': config['refresh_token'],
+      'grant_type': 'refresh_token'
+  }
+  resp = json.loads(HttpPostRequest(config['token_uri'], post_data))
+  if 'error' in resp:
+    return False
+  return 'access_token' in resp
+
+
+def Login(options):
+  """Does login procedure.
+
+  If there is valid config, it does nothing.
+  If config is invalid, raise.
+  If there is no config, it asks the user to get refresh token.
+  """
+  config = GomaOAuth2Config()
+  if options.delete:
+    config.Delete()
+  if not config.Load():
+    config.update(ReadOAuth2ConfigFromSite(options.dev_chrome_infra_site))
+    func = GetAuthorizationCodeViaCommandLine
+    if options.browser:
+      func = GetAuthorizationCodeViaBrowser
+    config['refresh_token'] = GetRefreshToken(func, config)
+
+  if not VerifyRefreshToken(config):
+    raise Error('invalid refresh token')
+
+  config.Save()
+
+
+def main():
+  print '!!!EXPERIMENTAL!!!  WE MAY CHANGE THIS WITHOUT ANNOUNCEMENT.'
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--dev_chrome_infra_site', action='store_true',
+                      help=('Use dev chrome infra site.'))
+  parser.add_argument('--delete', action='store_true',
+                      help=('Delete the stored goma OAuth2 config file.'))
+  parser.add_argument('--browser', action='store_true',
+                      help=('Use browser to get goma OAuth2 token.'))
+  options = parser.parse_args()
+  Login(options)
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/client/goma_ctl.bat b/client/goma_ctl.bat
new file mode 100644
index 0000000..b223ee5
--- /dev/null
+++ b/client/goma_ctl.bat
@@ -0,0 +1,7 @@
+@echo off

+

+REM Copyright 2012 The Goma Authors. All rights reserved.

+REM Use of this source code is governed by a BSD-style license that can be

+REM found in the LICENSE file.

+

+python "%~dp0goma_ctl.py" %*

diff --git a/client/goma_ctl.py b/client/goma_ctl.py
new file mode 100755
index 0000000..ecd0fff
--- /dev/null
+++ b/client/goma_ctl.py
@@ -0,0 +1,2373 @@
+#!/usr/bin/env python
+
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# TODO: remove GOMA_COMPILER_PROXY_PORT from code.
+#                    it could be 8089, 8090, ... actually.
+
+"""A Script to manage compiler_proxy.
+
+It starts/stops compiler_proxy.exe or compiler_proxy like goma_ctl.sh.
+"""
+
+
+
+import collections
+import copy
+import glob
+import hashlib
+import json
+import os
+import re
+import shutil
+import socket
+import subprocess
+import sys
+import tarfile
+import tempfile
+import time
+import urllib
+import urllib2
+import urlparse
+import zipfile
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+_TRUE_PATTERN = re.compile(r'^([tTyY]|1)')
+_DEFAULT_ENV = [
+    ('USE_SSL', 'true'),
+    ('PING_TIMEOUT_SEC', '60'),
+    ('LOG_CLEAN_INTERVAL', str(24 * 60 * 60)),
+    ]
+_DEFAULT_NO_SSL_ENV = [
+    ('STUBBY_PROXY_PORT', '80'),
+    ]
+_MAX_COOLDOWN_WAIT = 5  # seconds to wait for compiler_proxy to shutdown.
+_COOLDOWN_SLEEP = 1  # seconds to each wait for compiler_proxy to shutdown.
+_CURL_RETRY = 5  # times to retry for transient failures on curl.
+_TMP_DIR_PREFIX = 'goma_'
+_TMP_DIR_WIN = 'goma'
+_CRASH_DUMP_DIR = 'goma_crash'
+_CACHE_DIR = 'goma_cache'
+_PRODUCT_NAME = 'Goma'  # product name used for crash report.
+_DUMP_FILE_SUFFIX = '.dmp'
+_CHECKSUM_FILE = 'sha256.json'
+
+
+def _IsGomaFlagTrue(flag_name, default=False):
+  """Return true when the given flag is true.
+
+  Note:
+  Implementation is based on client/env_flags.h.
+  Any values that do not match _TRUE_PATTERN are False.
+
+  Args:
+    flag_name: name of a GOMA flag without GOMA_ prefix.
+    default: default return value if the flag is not set.
+
+  Returns:
+    True if the flag is true.  Otherwise False.
+  """
+  flag_value = os.environ.get('GOMA_%s' % flag_name, '')
+  if not flag_value:
+    return default
+  return bool(_TRUE_PATTERN.search(flag_value))
+
+
+def _SetGomaFlagDefaultValueIfEmpty(flag_name, default_value):
+  """Set default value to the given flag if it is not set.
+
+  Args:
+    flag_name: name of a GOMA flag without GOMA_ prefix.
+    default_value: default value to be set if the flag is not set.
+  """
+  full_flag_name = 'GOMA_%s' % flag_name
+  if not os.environ.has_key(full_flag_name):
+    os.environ[full_flag_name] = default_value
+
+
+def _ParseManifestContents(manifest):
+  """Parse contents of MANIFEST into a dictionary.
+
+  Args:
+    manifest: a string of manifest to be parsed.
+
+  Returns:
+    The dictionary of key and values in string.
+  """
+  output = {}
+  for line in manifest.splitlines():
+    pair = line.strip().split('=', 1)
+    if len(pair) == 2:
+      output[pair[0].strip()] = pair[1].strip()
+  return output
+
+
+def _IsBadVersion(cur_ver, bad_vers):
+  """Check cur_ver is in bad_vers.
+
+  Args:
+    cur_ver: current version number.
+    bad_vers: a string for bad version, '|' separated.
+  Returns:
+    True if cur_ver is in bad_vers.
+  """
+  for ver in bad_vers.split('|'):
+    if str(cur_ver) == ver:
+      return True
+  return False
+
+
+def _ShouldUpdate(cur_ver, next_ver, bad_vers):
+  """Check to update from cur_ver to next_ver.
+
+  Basically, update to newer version (i.e. cur_ver < next_ver).
+  If cur_ver is the same as next_ver, then should not update (because
+  it is already up-to-date).
+  If cur_ver is listed in bad_vers, then
+  should update to next_ver even if cur_ver > next_ver.
+
+  Args:
+    cur_ver: current version number
+    next_ver: next version number
+    bad_vers: a string for bad versions, '|' separated.
+  Returns:
+    True to update cur_ver to next_ver. False otherwise.
+  """
+  if cur_ver < next_ver:
+    return True
+  if cur_ver == next_ver:
+    return False
+  return _IsBadVersion(cur_ver, bad_vers)
+
+
+def _GetProxyEnv():
+  """Detects HTTP proxy environmental variables.
+
+  Returns:
+    a dictionary of proxy host and port if found.  Otherwise, None.
+
+  Raises:
+    ConfigError: if GOMA cannot handle HTTP proxy environmental variables.
+  """
+  http_proxy = urllib.getproxies()
+  for proxy_type in ['https', 'http']:
+    if proxy_type in http_proxy:
+      proxy_env = http_proxy[proxy_type]
+      break
+  else:  # No acceptable HTTP proxy environmental variables configured.
+    return None
+
+  # Convert <host>[:<port>] to http://<host>[:<port>].
+  if '://' not in proxy_env:
+    proxy_env = 'http://%s' % proxy_env
+
+  parsed = urlparse.urlparse(proxy_env)
+  # I am confident that ParseResult has scheme parameter.
+  # See Also: http://docs.python.org/library/urlparse.html
+  # pylint: disable=E1101
+  if parsed.scheme == 'https':
+    raise ConfigError('Sorry, GOMA do not support proxy with HTTPS.')
+
+  if parsed.username or parsed.password:
+    raise ConfigError('Sorry, GOMA do not support proxy with user/password.')
+
+  if not parsed.hostname:
+    raise ConfigError('You should set HTTP proxy host.')
+
+  if not parsed.port:
+    raise ConfigError('You cannot omit HTTP proxy port to use GOMA.')
+
+  return {'host': parsed.hostname, 'port': str(parsed.port)}
+
+
+def _ParseSpaceSeparatedValues(data):
+  """Parses space separated values.
+
+  This function assumes that 1st line is a list of labels.
+
+  e.g. If data is like this:
+  | COMMAND   PID
+  | bash        1
+  | tcsh        2
+  This function returns:
+  | [{'COMMAND': 'bash', 'PID': '1'}, {'COMMAND': 'tcsh', 'PID': '2'}]
+
+  Args:
+    data: space separated values to be parsed.
+
+  Returns:
+    a list of dictionaries parsed from data.
+  """
+  # TODO: remove this if I will not use this on Windows.
+  label = None
+  contents = []
+  for line in data.splitlines():
+    entries = line.split()
+    if not entries:  # skip empty line.
+      continue
+
+    if not label:  # 1st line.
+      label = entries
+    else:
+      contents.append(dict(zip(label, entries)))
+  return contents
+
+
+def _ParseLsof(data):
+  """Parse lsof -F pu <filename>.
+
+  Although this function might only be used on Posix environment, I put this
+  here for ease of testing.
+
+  e.g. If data is like this:
+  | u1
+  | p2
+  | u3
+  | p4
+  This function returns:
+  | [{'uid': 1L, 'pid': 2L}, {'uid': 3L, 'pid': 4L}]
+
+  Args:
+    data: result of lsof -F pu <filename>.
+
+  Returns:
+    a list of dictionaries parsed from data.
+  """
+  rule = {
+      'p': 'pid',
+      'u': 'uid',
+      }
+  contents = []
+  content = {}
+  for line in data.splitlines():
+    if not line:  # skip empty line.
+      continue
+
+    if line[0] not in rule:  # skip unknown symbol.
+      continue
+
+    label = rule[line[0]]
+    if label in content:
+      contents.append(content)
+      content = {}
+    content[label] = long(line[1:])
+  if content:
+    contents.append(content)
+  return contents
+
+
+def _GetEnvMatchedCondition(candidates, condition, default_value):
+  """Returns environmental variable that matched the condition.
+
+  Args:
+    candidates: a list of string to specify environmental variables.
+    condition: a condition to decide which value to return.
+    default_value: a string to be returned if no candidates matched.
+
+  Returns:
+    a string of enviromnental variable's value that matched the condition.
+    Otherwise, default_value will be returned.
+  """
+  for candidate in candidates:
+    value = os.environ.get(candidate, '')
+    if value and condition(value):
+      return value
+  return default_value
+
+
+def _GetTempDirectory():
+  """Get temp directory.
+
+  It should match the logic with GetTempDirectory in client/mypath.cc.
+
+  Returns:
+    a directory name.
+  """
+  candidates = ['TEST_TMPDIR', 'TMPDIR', 'TMP']
+  return _GetEnvMatchedCondition(candidates, os.path.isdir, '/tmp')
+
+
+def _GetLogDirectory():
+  """Get directory where log exists.
+
+  Returns:
+    a directory name.
+  """
+  candidates = ['GLOG_log_dir', 'TEST_TMPDIR', 'TMPDIR', 'TMP']
+  return _GetEnvMatchedCondition(candidates, os.path.isdir, '/tmp')
+
+
+def _GetUsername():
+  """Get user name.
+
+  Returns:
+    an user name that runs this script.
+  """
+  candidates = ['SUDO_USER', 'USERNAME', 'USER', 'LOGNAME']
+  return _GetEnvMatchedCondition(candidates,
+                                 lambda x: x != 'root',
+                                 'unknown')
+
+
+def _GetHostname():
+  """Gets hostname.
+
+  Returns:
+    a hostname of the machine running this script.
+  """
+  return socket.gethostname()
+
+
+def _FindCommandInPath(command, find_subdir_rule=os.path.join):
+  """Find command in the PATH.
+
+  Args:
+    command: a string of command name to find out.
+    find_subdir_rule: a function to explore sub directory.
+
+  Returns:
+    a string of a full path name if the command is found. Otherwise, None.
+  """
+  for directory in os.environ['PATH'].split(os.path.pathsep):
+    fullpath = find_subdir_rule(directory, command)
+    if fullpath and os.path.isfile(fullpath) and os.access(fullpath, os.X_OK):
+      return fullpath
+  return None
+
+
+def _ParseFlagz(flagz):
+  """Returns a dictionary of user-configured flagz.
+
+  Note that the dictionary will not contain auto configured flags.
+
+  Args:
+    flagz: a string returned by compiler_proxy's /flagz.
+
+  Returns:
+    a dictionary of user-configured flags.
+  """
+  envs = {}
+  for line in flagz.splitlines():
+    line = line.strip()
+    if line.endswith('(auto configured)'):
+      continue
+    pair = line.split('=', 1)
+    if len(pair) == 2:
+      envs[pair[0].strip()] = pair[1].strip()
+  return envs
+
+
+def _IsGomaFlagUpdated(envs):
+  """Returns true if environment is updated from the argument.
+
+  Note: caller MUST NOT set environ after call of this method.
+  Otherwise, this function may always return true.
+
+  Args:
+    a dictionary of environment to check. e.g. {
+      'GOMA_USE_SSL': 'true',
+    }
+
+  Returns:
+    True if one of values is different from given one.
+  """
+  for key, original in envs.iteritems():
+    new = os.environ.get(key)
+    if new != original:
+      return True
+  for key, value in os.environ.iteritems():
+    if key.startswith('GOMA_'):
+      if value != envs.get(key):
+        return True
+  return False
+
+
+def _CalculateChecksum(filename):
+  """Calculate SHA256 checksum of given file.
+
+  Args:
+    filename: a string filename to calculate checksum.
+
+  Returns:
+    hexdigest string of file contents.
+  """
+  with open(filename, 'rb') as f:
+    return hashlib.sha256(f.read()).hexdigest()
+
+
+def _GetUserRuntimeDirectory():
+  # pylint: disable=E1101
+  # Configure from sysname in uname.
+  if os.uname()[0] != 'Linux':
+    return None
+
+  # Prefer to use the user runtime directory on Linux.
+  user_runtime_dir = os.path.join('/run', 'user', '%d' % os.getuid())
+  if os.path.isdir(user_runtime_dir):
+    return user_runtime_dir
+
+
+class ConfigError(Exception):
+  """Raises when an error found in configurations."""
+
+
+class Error(Exception):
+  """Raises when an error found in the system."""
+
+
+class PopenWithCheck(subprocess.Popen):
+  """subprocess.Popen with automatic exit status check on communicate."""
+
+  def communicate(self, input=None):
+    # I do not think argument name |input| is good but this is from the original
+    # communicate method.
+    # pylint: disable=W0622
+    (stdout, stderr) = super(PopenWithCheck, self).communicate(input)
+    if self.returncode is None or self.returncode != 0:
+      if stdout or stderr:
+        raise Error('Error(%s): %s%s' % (self.returncode, stdout, stderr))
+      else:
+        raise Error('failed to execute subprocess return=%s' % self.returncode)
+    return (stdout, stderr)
+
+
+class GomaDriver(object):
+  """Driver of Goma control."""
+
+  def __init__(self, env, backend):
+    """Initialize GomaDriver.
+
+    Args:
+      env: an instance of GomaEnv subclass.
+      backend: an instance of GomaBackend subclass.
+    """
+    self._env = env
+    self._backend = backend
+    self._latest_package_dir = 'latest'
+    self._action_mappings = {
+        'pull': self._Pull,
+        'start': self._StartCompilerProxy,
+        'status': self._GetStatus,
+        'stop': self._ShutdownCompilerProxy,
+        'latest_version': self._PrintLatestVersion,
+        'update': self._Update,
+        'restart': self._RestartCompilerProxy,
+        'ensure_start': self._EnsureStartCompilerProxy,
+        'fetch': self._Fetch,
+        'stat': self._PrintStatistics,
+        'histogram': self._PrintHistogram,
+        'jsonstatus': self._PrintJsonStatus,
+        'report': self._Report,
+        'audit': self._Audit,
+    }
+    self._version = 0
+    self._manifest = {}
+    self._args = []
+    self._ReadManifest()
+    self._compiler_proxy_running = None
+
+  def _ReadManifest(self):
+    """Reads MANIFEST file.
+    """
+    self._manifest = self._env.ReadManifest()
+    if self._manifest.has_key('VERSION'):
+      self._version = int(self._manifest['VERSION'])
+    if self._manifest.has_key('GOMA_API_KEY_FILE'):
+      sys.stderr.write('WARNING: GOMA_API_KEY_FILE is deprecated\n')
+
+  def _UpdateManifest(self):
+    """Write self._manifest to in MANIFEST."""
+    self._env.WriteManifest(self._manifest)
+
+  def _ValidFiles(self, files):
+    """Validate files."""
+    for f in files:
+      filename = os.path.join(self._latest_package_dir, f)
+      if not self._env.IsValidMagic(filename):
+        print '%s is broken.' % filename
+        return False
+    return True
+
+  def _Pull(self):
+    """Download the latest package to goma_dir/latest."""
+    latest_version, bad_version = self._GetLatestVersion()
+    files_to_download = ['MANIFEST', self._env.GetPackageName()]
+    if ((_ShouldUpdate(self._DownloadedVersion(), latest_version,
+                       bad_version) or
+         not self._ValidFiles(files_to_download)) and
+        _ShouldUpdate(self._version, latest_version, bad_version)):
+      self._env.RemoveDirectory(self._latest_package_dir)
+      self._env.MakeDirectory(self._latest_package_dir)
+      base_url = self._backend.GetDownloadBaseUrl()
+      for f in files_to_download:
+        url = '%s/%s' % (base_url, f)
+        destination = os.path.join(self._latest_package_dir, f)
+        print 'Downloading %s' % url
+        self._env.HttpDownload(url,
+                               rewrite_url=self._backend.RewriteRequest,
+                               headers=self._backend.GetHeaders(),
+                               destination_file=destination)
+      manifest = self._env.ReadManifest(self._latest_package_dir)
+      manifest['PLATFORM'] = self._env.GetPlatform()
+      self._env.WriteManifest(manifest, self._latest_package_dir)
+    else:
+      print 'Downloaded package is already the latest version.'
+
+      # update the timestamp of MANIFEST in self._latest_package_dir
+      # to skip unnecessary download in ensure_start if the file is valid.
+      if self._env.IsValidManifest(self._latest_package_dir):
+        manifest = self._env.ReadManifest(directory=self._latest_package_dir)
+        self._env.WriteManifest(manifest, directory=self._latest_package_dir)
+
+  def _GetRunningCompilerProxyVersion(self):
+    versionz = self._env.ControlCompilerProxy('/versionz', fast=True)
+    if versionz['status']:
+      return versionz['message'].strip()
+    return None
+
+  def _GetDiskCompilerProxyVersion(self):
+    return self._env.GetCompilerProxyVersion().replace('GOMA version',
+                                                       '').strip()
+
+  def _GetCompilerProxyHealthz(self):
+    """Returns compiler proxy healthz message."""
+    healthz = self._env.ControlCompilerProxy('/healthz', fast=True)
+    if healthz['status']:
+      return healthz['message'].strip()
+    return 'unavailable /healthz'
+
+  def _IsCompilerProxySilentlyUpdated(self):
+    """Returns True if compiler_proxy is different from running version."""
+    disk_version = self._GetDiskCompilerProxyVersion()
+    running_version = self._GetRunningCompilerProxyVersion()
+    if running_version:
+      return running_version != disk_version
+    return False
+
+  def _IsGomaFlagUpdated(self):
+    flagz = self._env.ControlCompilerProxy('/flagz', fast=True)
+    if flagz['status']:
+      return _IsGomaFlagUpdated(_ParseFlagz(flagz['message'].strip()))
+    return False
+
+  def _GenericStartCompilerProxy(self, ensure=False):
+    self._env.CheckConfig()
+    if self._compiler_proxy_running is None:
+      self._compiler_proxy_running = self._env.CompilerProxyRunning()
+    if (not ensure and self._env.MayUsingDefaultIPCPort() and
+        self._compiler_proxy_running):
+      self._KillStakeholders()
+      self._compiler_proxy_running = False
+
+    can_auto_update = self._env.CanAutoUpdate()
+    if can_auto_update:
+      bad_version = ''
+
+      if (self._env.ReadManifest(self._latest_package_dir) and
+          self._env.IsManifestModifiedRecently(self._latest_package_dir)):
+        print ('Auto update is skipped'
+               ' because %s/MANIFEST was updated recently.' %
+               self._latest_package_dir)
+        latest_version = self._version
+      else:
+        latest_version, bad_version = self._GetLatestVersion()
+      do_update = False
+      if self._version < latest_version:
+        print 'new goma client found (VERSION=%d).' % latest_version
+        do_update = True
+      if _IsBadVersion(self._version, bad_version):
+        print 'your version (VERSION=%d) is marked as bad version (%s)' % (
+            self._version, bad_version)
+        do_update = True
+      if do_update:
+        print 'Updating...'
+        self._env.AutoUpdate()
+        # AutoUpdate may change running status.
+        self._compiler_proxy_running = self._env.CompilerProxyRunning()
+        self._ReadManifest()
+
+    if 'VERSION' in self._manifest:
+      print 'Using goma VERSION=%s (%s)' % (
+          self._manifest['VERSION'],
+          'latest' if can_auto_update else 'no_auto_update')
+    disk_version = self._GetDiskCompilerProxyVersion()
+    print 'GOMA version %s' % disk_version
+    if ensure and self._compiler_proxy_running:
+      healthz = self._GetCompilerProxyHealthz()
+      if healthz != 'ok':
+        print 'goma is not in healthy state: %s' % healthz
+      updated = self._IsCompilerProxySilentlyUpdated()
+      flag_updated = self._IsGomaFlagUpdated()
+      if flag_updated:
+        print 'flagz is updated from the previous time.'
+      if healthz != 'ok' or updated or flag_updated:
+        self._ShutdownCompilerProxy()
+        if not self._WaitCooldown():
+          self._KillStakeholders()
+        self._compiler_proxy_running = False
+
+    if ensure and self._compiler_proxy_running:
+      print
+      print 'goma is already running.'
+      print
+      return
+
+    # AutoUpdate may restart compiler proxy.
+    if not self._compiler_proxy_running:
+      self._env.ExecCompilerProxy()
+      self._compiler_proxy_running = True
+
+    if self._GetStatus():
+      running_version = self._GetRunningCompilerProxyVersion()
+      if running_version != disk_version:
+        print 'Updated GOMA version %s' % running_version
+      print
+      print 'Now goma is ready!'
+      print
+      return
+    else:
+      raise Error('Failed to start compiler_proxy successfully.')
+
+  def _StartCompilerProxy(self):
+    self._GenericStartCompilerProxy(ensure=False)
+
+  def _EnsureStartCompilerProxy(self):
+    self._GenericStartCompilerProxy(ensure=True)
+
+  def _GetStatus(self):
+    reply = self._env.ControlCompilerProxy('/healthz')
+    if not 'pid' in reply:
+      reply['pid'] = 'unknown'
+    print 'compiler proxy (pid=%(pid)s) status: %(url)s %(message)s' % reply
+    if reply['message'].startswith('error:'):
+        reply['status'] = False
+    return reply['status']
+
+  def _ShutdownCompilerProxy(self):
+    print 'Killing compiler proxy.'
+    reply = self._env.ControlCompilerProxy('/quitquitquit')
+    print 'compiler proxy status: %(url)s %(message)s' % reply
+
+  def _GetLatestVersion(self):
+    """Get latest version of goma.
+
+    Returns:
+      A tuple of the version number and bad_version string from MANIFEST
+
+    Raises:
+      Error: if failed to determine the latest version.
+    """
+    try:
+      url = self._backend.GetDownloadBaseUrl() + '/MANIFEST'
+    except Error as ex:
+      oauth2_config_file = os.environ.get('GOMA_OAUTH2_CONFIG_FILE')
+      if (oauth2_config_file and
+          "not_initialized" in open(oauth2_config_file).read()):
+        return 0, ""
+      raise ex
+    contents = self._env.HttpDownload(
+        url,
+        rewrite_url=self._backend.RewriteRequest,
+        headers=self._backend.GetHeaders())
+    manifest = _ParseManifestContents(contents)
+    if 'VERSION' in manifest:
+      return (int(manifest['VERSION']), manifest.get('bad_version', ''))
+    raise Error('Unable to determine the latest version. '
+                'Failed to download the latest valid MANIFEST '
+                'from the server.\n'
+                'Response from server: %s' % contents)
+
+  def _DownloadedVersion(self):
+    """Check version of already downloaded goma package.
+
+    Returns:
+      The version as integer.
+    """
+    version = 0
+    try:
+      version = int(self._env.ReadManifest(self._latest_package_dir)['VERSION'])
+    except (KeyError, ValueError):
+      pass
+    return version
+
+  def _WaitCooldown(self):
+    """Wait until compiler_proxy process have finished.
+
+    This will give up after waiting _MAX_COOLDOWN_WAIT seconds.
+    It would return False, if other compiler_proxy is running on other IPC port.
+
+    Returns:
+      True if compiler_proxy successfully cool down.  Otherwise, False.
+    """
+    if not self._env.CompilerProxyRunning():
+      return True
+    print 'Waiting for cool down...',
+    for cnt in range(_MAX_COOLDOWN_WAIT):
+      if not self._env.CompilerProxyRunning():
+        break
+      print (_MAX_COOLDOWN_WAIT - cnt),
+      sys.stdout.flush()
+      time.sleep(_COOLDOWN_SLEEP)
+    else:
+      print 'give up'
+      return False
+    print
+    return True
+
+  def _KillStakeholders(self):
+    """Kill and wait until its shutdown."""
+    self._env.KillStakeholders()
+    if not self._WaitCooldown():
+      print 'Could not kill compiler_proxy.'
+      print 'Probably, somebody else also runs compiler_proxy.'
+
+  def _UpdatePackage(self):
+    """Update or install latest package.
+
+    We raise error immediately when there is anything wrong instead of
+    trying to do something smart. When things go wrong it can be disk
+    issues and it's better to have human intervention instead.
+
+    Raises:
+      Error: if failed to install the package.
+    """
+    update_dir = 'update'
+    self._env.RemoveDirectory(update_dir)
+    self._env.MakeDirectory(update_dir)
+    manifest = self._env.ReadManifest(self._latest_package_dir)
+    if not manifest or not manifest.has_key('VERSION'):
+      manifest_file = os.path.join(self._latest_package_dir, 'MANIFEST')
+      print 'MANIFEST (%s) seems to be broken.' % manifest_file
+      print 'Going to remove MANIFEST.'
+      self._env.RemoveFile(manifest_file)
+      print 'Please execute update again.'
+      raise Error('MANIFEST in downloaded version is broken.')
+    latest_version = int(manifest['VERSION'])
+    package_file = os.path.join(self._latest_package_dir,
+                                self._env.GetPackageName())
+    if not self._env.ExtractPackage(package_file, update_dir):
+      print 'Package file (%s) seems to be broken.' % package_file
+      print 'Going to remove package_file.'
+      self._env.RemoveFile(package_file)
+      print 'Please execute update again.'
+      raise Error('Failed to extract downloaded package')
+    if not self._Audit(update_dir=update_dir):
+      print 'Failed to verify a file in package.'
+      print 'Going to remove package_file and update_dir'
+      self._env.RemoveFile(package_file)
+      self._env.RemoveDirectory(update_dir)
+      raise Error('downloaded package is broken')
+    if self._env.IsGomaInstalledBefore():
+      # This is an update rather than fresh install.
+      print 'Stopping compiler_proxy ...'
+      self._ShutdownCompilerProxy()
+      if not self._WaitCooldown():
+        self._KillStakeholders()
+      self._compiler_proxy_running = False
+    print 'Updating package to %s ...' % self._env.GetScriptDir()
+    if not self._env.InstallPackage(update_dir):
+      raise Error('Failed to install package')
+    self._version = latest_version
+    self._manifest.update(manifest)
+    self._UpdateManifest()
+    self._env.RemoveDirectory(update_dir)
+
+  def _Update(self):
+    """Update goma binary to latest version."""
+    latest_version, bad_version = self._GetLatestVersion()
+    if _ShouldUpdate(self._version, latest_version, bad_version):
+      self._Pull()
+      self._env.BackupCurrentPackage()
+      rollback = True
+      if self._compiler_proxy_running is None:
+        self._compiler_proxy_running = self._env.CompilerProxyRunning()
+      is_goma_running = self._compiler_proxy_running
+      try:
+        self._UpdatePackage()
+        rollback = False
+      finally:
+        if rollback:
+          print 'Failed to update. Rollback...'
+          try:
+            self._env.RollbackUpdate()
+          except Error as inst:
+            print inst
+        if is_goma_running and not self._env.CompilerProxyRunning():
+          print self._env.GetCompilerProxyVersion()
+          self._env.ExecCompilerProxy()
+          self._compiler_proxy_running = True
+    else:
+      print 'Goma is already up-to-date.'
+
+  def _RestartCompilerProxy(self):
+    if self._compiler_proxy_running is None:
+      self._compiler_proxy_running = self._env.CompilerProxyRunning()
+    if self._compiler_proxy_running:
+      self._ShutdownCompilerProxy()
+      if not self._WaitCooldown():
+        self._KillStakeholders()
+      self._compiler_proxy_running = False
+    self._StartCompilerProxy()
+
+  def _Fetch(self):
+    """Fetch requested goma package."""
+    if len(self._args) < 2:
+      raise ConfigError('At least platform should be specified to fetch.')
+    platform = self._args[1]
+    pkg_name = _GetPackageName(platform)
+    if len(self._args) > 2:
+      outfile = os.path.join(os.getcwd(), self._args[2])
+    else:
+      outfile = os.path.join(os.getcwd(), pkg_name)
+    url = '%s/%s' % (self._backend.GetDownloadBaseUrl(), pkg_name)
+    print 'Downloading %s' % url
+    self._env.HttpDownload(url,
+                           rewrite_url=self._backend.RewriteRequest,
+                           headers=self._backend.GetHeaders(),
+                           destination_file=outfile)
+
+  def _PrintLatestVersion(self):
+    """Print latest version on stdout."""
+    latest_version, _ = self._GetLatestVersion()
+    print 'VERSION=%d' % latest_version
+
+  def _PrintStatistics(self):
+    print self._env.ControlCompilerProxy('/statz')['message']
+
+  def _PrintHistogram(self):
+    print self._env.ControlCompilerProxy('/histogramz')['message']
+
+  def _PrintJsonStatus(self):
+    status = self._GetJsonStatus()
+    if len(self._args) > 1:
+      with open(self._args[1], 'w') as f:
+        f.write(status)
+    else:
+      print status
+
+  def _FindLatestInfoFile(self, command_name):
+    """Finds latest *.INFO.* file.
+
+    Args:
+      command_name: command name of *.INFO.* file. e.g. compiler_proxy.
+
+    Returns:
+      The latest *.INFO.* file path. None if not found.
+    """
+
+    info_pattern = os.path.join(_GetLogDirectory(), command_name +'.*.INFO.*')
+    candidates = glob.glob(info_pattern)
+    if candidates:
+      return sorted(candidates, reverse=True)[0]
+    return None
+
+  def _CopyLatestInfoFile(self, command_name, dst):
+    """Copies latest *.INFO.* file to destination.
+
+    Args:
+      command_name: command_name of *.INFO.* file to copy.
+                    e.g. compiler_proxy.
+      dst: destination directory name.
+    """
+
+    infolog_path = self._FindLatestInfoFile(command_name)
+    if infolog_path:
+      self._env.CopyFile(infolog_path,
+                         os.path.join(dst, os.path.basename(infolog_path)))
+    else:
+      print '%s log was not found' % command_name
+
+  def _InferBuildDirectory(self):
+    """Infer latest build directory from compiler_proxy.INFO.
+
+    This would work for chromium build. Not sure for android etc.
+
+    Returns:
+      build directory if inferred. None otherwise.
+    """
+
+    infolog_path = self._FindLatestInfoFile('compiler_proxy')
+    if not infolog_path:
+      print 'compiler_proxy log was not found'
+      return None
+
+    build_re = re.compile('.*Task:.*Start.* build_dir=(.*)')
+
+    # List build_dir from compiler_proxy, and take only last 10 build dirs.
+    build_dirs = collections.deque()
+    with open(infolog_path) as f:
+      for line in f.readlines():
+        m = build_re.match(line)
+        if m:
+          build_dirs.append(m.group(1))
+          if len(build_dirs) > 10:
+            build_dirs.popleft()
+
+    if not build_dirs:
+      return None
+
+    counter = collections.Counter(build_dirs)
+    for candidate, _ in counter.most_common():
+      if os.path.exists(os.path.join(candidate, '.ninja_log')):
+        return candidate
+    return None
+
+  def _Report(self):
+    tempdir = None
+    try:
+      tempdir = tempfile.mkdtemp()
+
+      compiler_proxy_is_working = True
+      # Check compiler_proxy is working.
+      ret = self._env.ControlCompilerProxy('/healthz')
+      if ret.get('status', False):
+        print 'compiler_proxy is working:'
+      else:
+        compiler_proxy_is_working = False
+        print 'compiler_proxy is not working:'
+        print '  omit compiler_proxy stats'
+
+      if compiler_proxy_is_working:
+        keys = ['compilerinfoz', 'histogramz', 'serverz', 'statz']
+        for key in keys:
+          ret = self._env.ControlCompilerProxy('/' + key)
+          if not ret.get('status', False):
+            # Failed to get the message. compiler_proxy has died?
+            print ('  failed to get %s: %s' % (key, ret['message']))
+            continue
+          print ('  include /%s' % key)
+          self._env.WriteFile(os.path.join(tempdir, key + '-output'),
+                              ret['message'])
+
+      self._CopyLatestInfoFile('compiler_proxy', tempdir)
+      self._CopyLatestInfoFile('compiler_proxy-subproc', tempdir)
+
+      build_dir = self._InferBuildDirectory()
+      if build_dir:
+        print 'build directory is inferred as', build_dir
+        src_ninja_log = os.path.join(build_dir, '.ninja_log')
+        if os.path.exists(src_ninja_log):
+          dst_ninja_log = os.path.join(tempdir, 'ninja_log')
+          self._env.CopyFile(src_ninja_log, dst_ninja_log)
+        print '  include ninja_log'
+      else:
+        print 'build directory cannot be inferred:'
+        print '  omit ninja_log'
+
+      output_filename = os.path.join(_GetTempDirectory(), 'goma-report.tgz')
+      self._env.MakeTgzFromDirectory(tempdir, output_filename)
+
+      print ''
+      print 'A report file is successfully created:'
+      print ' ', output_filename
+    finally:
+      if tempdir:
+        shutil.rmtree(tempdir, ignore_errors=True)
+
+  def _GetJsonStatus(self):
+    reply = self._env.ControlCompilerProxy('/errorz')
+    if not reply['status']:
+      return json.dumps({
+          'notice': [
+              {
+                  'version': 1,
+                  'compile_error': 'COMPILER_PROXY_UNREACHABLE',
+              },
+          ]})
+    return reply['message']
+
+  def _Audit(self, update_dir=''):
+    """Audit files in the goma client package.
+
+    If update_dir is an empty string, it audit current goma files.
+
+    Args:
+      update_dir: directory containing goma files to verify.
+
+    Returns:
+      False if failed to verify.
+    """
+    cksums = self._env.LoadChecksum(update_dir=update_dir)
+    if not cksums:
+      print 'No checksum could be loaded.'
+      return True
+    for filename, checksum in cksums.iteritems():
+      # TODO: remove following two lines after the next release.
+      # Windows checksum file has non-existing .pdb files.
+      if os.path.splitext(filename)[1] == '.pdb':
+        continue
+      digest = self._env.CalculateChecksum(filename, update_dir=update_dir)
+      if checksum != digest:
+        print '%s differs: %s != %s' % (filename, checksum, digest)
+        return False
+    print 'All files verified.'
+    return True
+
+
+  def _CreateDirectory(self, dir_name, purpose):
+    info = {
+        'purpose': purpose,
+        'dir': dir_name,
+        }
+    if not self._env.IsDirectoryExist(info['dir']):
+      sys.stderr.write('creating %(purpose)s dir (%(dir)s).\n' % info)
+      self._env.MakeDirectory(info['dir'])
+    else:
+      if not self._env.EnsureDirectoryOwnedByUser(info['dir']):
+        sys.stderr.write(
+            'Error: %(purpose)s dir (%(dir)s) is not owned by you.\n' % info)
+        raise Error('%(purpose)s dir (%(dir)s) is not owned by you.' % info)
+
+  def _CreateGomaTmpDirectory(self):
+    tmp_dir = self._env.GetGomaTmpDir()
+    self._CreateDirectory(tmp_dir, 'temp')
+    sys.stdout.write('using %s as tmpdir\n' % tmp_dir)
+    os.environ['GOMA_TMP_DIR'] = tmp_dir
+
+  def _CreateCrashDumpDirectory(self):
+    self._CreateDirectory(self._env.GetCrashDumpDirectory(), 'crash dump')
+
+  def _CreateCacheDirectory(self):
+    self._CreateDirectory(self._env.GetCacheDirectory(), 'cache')
+
+  def _Usage(self):
+    """Print usage."""
+    program_name = self._env.GetGomaCtlScriptName()
+    print 'Usage: %s <subcommand>, available subcommands are:' % program_name
+    print '  start                 start compiler proxy'
+    print '  stop                  stop compiler proxy'
+    print '  restart               restart compiler proxy'
+    print '  ensure_start          start compiler proxy if it is not running'
+    print '  pull                  just download the latest goma pkg for update'
+    print '  update                update or install goma package'
+    print '  status                get compiler proxy status'
+    print '  stat                  show statistics'
+    print '  histogram             show histogram'
+    print '  jsonstatus [outfile]  show status report in JSON'
+    print '  latest_version        show the available latest release version'
+    print '  fetch <platform> [outfile]  download the latest goma package'
+    print '  report                create a report file.'
+    print '  audit                 audit goma client.'
+
+  def _DefaultAction(self):
+    if self._args and not self._args[0] in ('-h', '--help', 'help'):
+      print 'unknown command: %s' % ' '.join(self._args)
+      print
+    self._Usage()
+
+  def Dispatch(self, args):
+    """Parse and dispatch commands."""
+    self._CreateGomaTmpDirectory()
+    self._CreateCrashDumpDirectory()
+    self._CreateCacheDirectory()
+    self._args = args
+    if not args:
+      self._GetStatus()
+    else:
+      self._action_mappings.get(args[0], self._DefaultAction)()
+
+
+class GomaEnv(object):
+  """Goma running environment."""
+  # You must implement following protected variables in subclass.
+  _GOMACC = ''
+  _COMPILER_PROXY = ''
+  _GOMA_FETCH = ''
+  _CURL = ''
+  _COMPILER_PROXY_IDENTIFIER_ENV_NAME = ''
+  PLATFORM_CANDIDATES = []
+  _DEFAULT_ENV = []
+  _DEFAULT_SSL_ENV = []
+
+  def __init__(self, script_dir=SCRIPT_DIR):
+    self._dir = os.path.abspath(script_dir)
+    self._compiler_proxy_binary = os.environ.get(
+        'GOMA_COMPILER_PROXY_BINARY',
+        os.path.join(self._dir, self._COMPILER_PROXY))
+    self._goma_fetch = None
+    if os.path.exists(os.path.join(self._dir, self._GOMA_FETCH)):
+      self._goma_fetch = os.path.join(self._dir, self._GOMA_FETCH)
+    self._curl_path = None
+    self._is_daemon_mode = False
+    self._gomacc_binary = os.path.join(self._dir, self._GOMACC)
+    self._manifest = self.ReadManifest(self._dir)
+    self._platform = self._manifest.get('PLATFORM', '')
+    # If manifest does not have PLATFORM, goma_ctl.py tries to get it from env.
+    # See: b/16274764
+    if not self._platform:
+      self._platform = os.environ.get('PLATFORM', '')
+    self._version = self._manifest.get('VERSION', '')
+    self._time = time.time()
+    self._goma_params = None
+    self._gomacc_socket = None
+    self._gomacc_port = None
+    self._https_proxy = None
+    self._backup = None
+    self._SetupEnviron()
+
+  # methods that may interfere with external environment.
+  def MayUsingDefaultIPCPort(self):
+    """Returns True if IPC port is not configured in environmental variables.
+
+    If os.environ has self._COMPILER_PORT_IDENTIFIER_ENV_NAME, it may use
+    non-default IPC port.  Otherwise, it would use default port.
+
+    Returns:
+      True if os.environ does not have self._COMPILER_IDENTIFIER_ENV_NAME.
+    """
+    return not os.environ.has_key(self._COMPILER_PROXY_IDENTIFIER_ENV_NAME)
+
+  def GetCompilerProxyVersion(self):
+    """Returns compiler proxy version."""
+    return PopenWithCheck([self._compiler_proxy_binary, '--version'],
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.STDOUT).communicate()[0].rstrip()
+
+  def GetScriptDir(self):
+    return self._dir
+
+  def IsManifestModifiedRecently(self, directory='', threshold=4*60*60):
+    manifest_path = os.path.join(self._dir, directory, 'MANIFEST')
+    return time.time() - os.stat(manifest_path).st_mtime < threshold
+
+  def ReadManifest(self, directory=''):
+    """Read manifest from MANIFEST file in the directory.
+
+    Args:
+      directory: a string of directory name to read the manifest file.
+
+    Returns:
+      A dictionary of manifest if the manifest file exist.
+      Otherwise, an empty dictionary.
+    """
+    manifest_path = os.path.join(self._dir, directory, 'MANIFEST')
+    if not os.path.isfile(manifest_path):
+      return {}
+    return _ParseManifestContents(open(manifest_path, 'r').read())
+
+  def WriteManifest(self, manifest, directory=''):
+    """Write manifest dictionary to MANIFEST file in the directory.
+
+    Args:
+      manifest: a dictionary of the manifest.
+      directory: a string of directory name to write the manifest file.
+    """
+    manifest_path = os.path.join(self._dir, directory, 'MANIFEST')
+    with open(manifest_path, 'w') as manifest_file:
+      for key, value in manifest.items():
+        manifest_file.write('%s=%s\n' % (key, value))
+
+  def CheckConfig(self):
+    """Checks GomaEnv configurations."""
+    socket_name = os.environ.get(self._COMPILER_PROXY_IDENTIFIER_ENV_NAME, '')
+    if self._gomacc_socket != socket_name:
+      self._gomacc_socket = socket_name
+      self._gomacc_port = None # invalidate
+    if not os.path.isdir(self._dir):
+      raise ConfigError('%s is not directory' % (self._dir))
+    if not os.path.isfile(self._compiler_proxy_binary):
+      raise ConfigError('compiler_proxy(%s) not exist' % (
+          self._compiler_proxy_binary))
+    if not os.path.isfile(self._gomacc_binary):
+      raise ConfigError('gomacc(%s) not exist' % self._gomacc_binary)
+    self._CheckPlatformConfig()
+
+  def _GetCompilerProxyPort(self, proc=None):
+    """Gets compiler_proxy's port by "gomacc port".
+
+    Args:
+      proc: an instance of subprocess.Popen to poll.
+
+    Returns:
+      a string of compiler proxy port number.
+
+    Raises:
+      Error: if it cannot get compiler proxy port.
+    """
+    if self._gomacc_port:
+      return self._gomacc_port
+
+    port_error = ''
+    stderr = ''
+
+    ping_start_time = time.time()
+    ping_timeout_sec = int(os.environ.get('GOMA_PING_TIMEOUT_SEC', '0')) + 20
+    ping_deadline = ping_start_time + ping_timeout_sec
+    ping_print_time = ping_start_time
+    while True:
+      current_time = time.time()
+      if current_time > ping_deadline:
+        break
+
+      if current_time - ping_print_time > 1:
+        print 'waiting for compiler_proxy...'
+        ping_print_time = current_time
+
+      # output glog output to stderr but ignore it because it is usually about
+      # failure of connecting IPC port.
+      env = os.environ.copy()
+      env['GLOG_logtostderr'] = 'true'
+      with tempfile.TemporaryFile() as tf:
+        # "gomacc port" command may fail until compiler_proxy gets ready.
+        # We know gomacc port only output port number to stdout, whose size
+        # should be within pipe buffer.
+        portcmd = subprocess.Popen([self._gomacc_binary, 'port'],
+                                   stdout=subprocess.PIPE,
+                                   stderr=tf,
+                                   env=env)
+        self._WaitWithTimeout(portcmd, 1)
+        if portcmd.poll() is None:
+          # port takes long time
+          portcmd.kill()
+          port_error = 'port timedout'
+          tf.seek(0)
+          stderr = tf.read()
+          continue
+        portcmd.wait()
+        port = portcmd.stdout.read()
+        tf.seek(0)
+        stderr = tf.read()
+      if port and int(port) != 0:
+        self._gomacc_port = str(int(port))
+        return self._gomacc_port
+      if proc and not self._is_daemon_mode:
+        proc.poll()
+        if proc.returncode is not None:
+          raise Error('compiler_proxy is not running %d' % proc.returncode)
+    if port_error:
+      print port_error
+    if stderr:
+      sys.stderr.write(stderr)
+    e = Error('compiler_proxy is not ready?')
+    self._GetDetailedFailureReason()
+    if proc:
+      e = Error('compiler_proxy is not ready? pid=%d' % proc.pid)
+      if proc.returncode is not None:
+        e = Error('compiler_proxy is not running %d' % proc.returncode)
+      proc.kill()
+    raise e
+
+  def ControlCompilerProxy(self, command, fast=False):
+    """Send comamnd to compiler proxy.
+
+    Args:
+      command: a string of command to send to the compiler proxy.
+      fast: True if it doesn't needs to check compiler_proxy is running
+            and stakeholder pids.
+
+    Returns:
+      Dict of boolean status, message string, and url prefix.
+      if fast is False, it will have pids for stakeholder's pids.
+      if fast is True, pids will be empty.
+    """
+    self.CheckConfig()
+    if not fast and not self.CompilerProxyRunning():
+      return {'status': False, 'message': 'goma is not running.', 'url': ''}
+    url_prefix = 'http://127.0.0.1:0'
+    try:
+      url_prefix = 'http://127.0.0.1:%s' % self._GetCompilerProxyPort()
+      url = '%s%s' % (url_prefix, command)
+      resp = urllib2.urlopen(url)
+      reply = resp.read()
+      pids = ''
+      if not fast:
+        pids = ','.join(self._GetStakeholderPids())
+      return {'status': True, 'message': reply, 'url': url_prefix,
+              'pid': pids}
+    except (urllib2.URLError, Error, socket.error) as ex:
+      # urllib2.urlopen(url) may raise socket.error, such as [Errno 10054]
+      # An existing connection was forcibly closed by the remote host.
+      # socket.error uses IOError as base class in python 2.6.
+      # note: socket.error changed to an alias of OSError in python 3.3.
+      msg = repr(ex)
+    return {'status': False, 'message': msg, 'url': url_prefix}
+
+  def _FindCurlPath(self):
+    """Identify depot_tool path and use the curl there."""
+    if self._curl_path:
+      return self._curl_path
+
+    self._curl_path = _FindCommandInPath(
+        self._CURL, find_subdir_rule=self._FindCurlUnderPath)
+    if self._curl_path:
+      return self._curl_path
+    raise Error('Unable to find curl')
+
+  def HttpDownload(self, source_url,
+                   rewrite_url=None, headers=None, destination_file=None):
+    """Download data from the given URL to the file.
+
+    If self._goma_fetch defined, prefer goma_fetch to curl.
+    Using curl instead of urllib2.urlopen because of python limitations.  For
+    the minimum python version we targeted at (2.6), urllib2 does not validate
+    certificates in SSL connection.
+    TODO: kill curl supports.
+
+    Args:
+      source_url: URL to retrieve data.
+      rewrite_url: rewrite source_url for curl.
+      headers: a dictionary to be used in the HTTP header.
+      destination_file: file name to store data, if specified None, return
+                        contents as string.
+
+    Returns:
+      None if provided destination_file, downloaded contents otherwise.
+
+    Raises:
+      Error if fetch failed.
+    """
+    if self._goma_fetch:
+      # for proxy, goma_fetch uses $GOMA_PROXY_HOST, $GOMA_PROXY_PORT.
+      # headers is used to set Authorization header, but goma_fetch will
+      # set appropriate authorization headers from goma env flags.
+      # increate timeout.
+      env = os.environ
+      env['GOMA_HTTP_SOCKET_READ_TIMEOUT_SECS'] = '300.0'
+      if destination_file:
+        destination_file = os.path.join(self._dir, destination_file)
+        with open(destination_file, 'wb') as f:
+          retcode = subprocess.call([self._goma_fetch, source_url],
+                                    env=env,
+                                    stdout=f)
+          if retcode:
+            raise Error('failed to fetch %s: %d' % (source_url, retcode))
+        return
+      return PopenWithCheck([self._goma_fetch, source_url],
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE,
+                            env=env).communicate()[0]
+
+    if rewrite_url:
+      source_url = rewrite_url(source_url)
+
+    curl_command = [self._FindCurlPath(), '--silent',
+                    '--retry', str(_CURL_RETRY)]
+    if self._https_proxy:
+      curl_command.extend(['--proxy', self._https_proxy])
+    if headers:
+      for name, value in headers.items():
+        curl_command.extend(['-H', '%s: %s' % (name, value)])
+
+    if destination_file:
+      destination_file = os.path.join(self._dir, destination_file)
+      retcode = subprocess.call(curl_command + ['-o', destination_file,
+                                                source_url])
+      if retcode:
+        raise Error('failed to fetch %s: %d' % (source_url, retcode))
+      return
+
+    return PopenWithCheck(curl_command + [source_url],
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE).communicate()[0]
+
+  def GetGomaTmpDir(self):
+    """Get a directory path for goma.
+
+    Returns:
+      a directory name.
+    """
+    raise NotImplementedError
+
+  def GetCrashDumpDirectory(self):
+    """Get a directory path that may contain crash dump.
+
+    Returns:
+      a directory name.
+    """
+    return os.path.join(self.GetGomaTmpDir(), _CRASH_DUMP_DIR)
+
+  def GetCacheDirectory(self):
+    """Get a directory path that may contain cache.
+
+    Returns:
+      a directory name.
+    """
+    cache_dir = os.environ.get('GOMA_CACHE_DIR')
+    if cache_dir:
+      return cache_dir
+
+    return os.path.join(self.GetGomaTmpDir(), _CACHE_DIR)
+
+  def GetCrashDumps(self):
+    """Get file names of crash dumps.
+
+    Returns:
+      a list of full qualified crash dump file names.
+      If no crash dump, empty list is returned.
+    """
+    crash_dir = self.GetCrashDumpDirectory()
+    return glob.glob(os.path.join(crash_dir, '*' + _DUMP_FILE_SUFFIX))
+
+
+  def WriteFile(self, filename, content):
+    with open(filename, 'w') as f:
+      f.write(content)
+
+  def CopyFile(self, from_file, to_file):
+    shutil.copy(from_file, to_file)
+
+  def MakeTgzFromDirectory(self, dir_name, output_filename):
+    with tarfile.open(output_filename, 'w:gz') as tf:
+      tf.add(dir_name)
+
+  def RemoveFile(self, filename):
+    filename = os.path.join(self._dir, filename)
+    os.remove(filename)
+
+  def _ReadBytesFromFile(self, filename, length):
+    filename = os.path.join(self._dir, filename)
+    with open(filename) as f:
+      return f.read(length)
+
+  def IsValidManifest(self, directory=''):
+    contents = self.ReadManifest(directory=directory)
+
+    if 'PLATFORM' in contents and 'VERSION' in contents:
+      return True
+    return False
+
+  def IsValidMagic(self, filename):
+    # MANIFEST is special case.
+    if os.path.basename(filename) == 'MANIFEST':
+      return self.IsValidManifest(os.path.dirname(filename))
+
+    filename = os.path.join(self._dir, filename)
+
+    if not os.path.exists(filename):
+      return False
+
+    magics = {
+        '.tgz': '\x1F\x8B',
+        '.txz': '\xFD7zXZ\x00',
+        '.zip': 'PK',
+    }
+    magic = magics.get(os.path.splitext(filename)[1])
+    if not magic:
+      return True
+    return self._ReadBytesFromFile(filename, len(magic)) == magic
+
+  def RemoveDirectory(self, directory):
+    directory = os.path.join(self._dir, directory)
+    shutil.rmtree(directory, ignore_errors=True)
+
+  def MakeDirectory(self, directory):
+    directory = os.path.join(self._dir, directory)
+    os.mkdir(directory, 0700)
+    if not os.path.exists(directory):
+      raise Error('Unable to create directory: %s.' % directory)
+
+  def IsDirectoryExist(self, directory):
+    directory = os.path.join(self._dir, directory)
+    # To avoid symlink attack, the directory should not be symlink.
+    return os.path.isdir(directory) and not os.path.islink(directory)
+
+  def IsGomaInstalledBefore(self):
+    return os.path.exists(self._compiler_proxy_binary)
+
+  def IsOldFile(self, filename):
+    log_clean_interval = int(os.environ.get('GOMA_LOG_CLEAN_INTERVAL', '-1'))
+    if log_clean_interval < 0:
+      return False
+    return os.path.getmtime(filename) < self._time - log_clean_interval
+
+  def _SetupEnviron(self):
+    """Sets default environment variables if they are not configured."""
+    os.environ['GLOG_logfile_mode'] = str(0600)
+    for flag_name, default_value in _DEFAULT_ENV:
+      _SetGomaFlagDefaultValueIfEmpty(flag_name, default_value)
+    for flag_name, default_value in self._DEFAULT_ENV:
+      _SetGomaFlagDefaultValueIfEmpty(flag_name, default_value)
+
+    if not _IsGomaFlagTrue('USE_SSL'):
+      for flag_name, default_value in _DEFAULT_NO_SSL_ENV:
+        _SetGomaFlagDefaultValueIfEmpty(flag_name, default_value)
+
+    if _IsGomaFlagTrue('USE_SSL'):
+      for flag_name, default_value in self._DEFAULT_SSL_ENV:
+        _SetGomaFlagDefaultValueIfEmpty(flag_name, default_value)
+
+    # Automatic Proxy configuration.
+    proxy_env = _GetProxyEnv()
+    if proxy_env:
+      os.environ['GOMA_PROXY_HOST'] = proxy_env['host']
+      os.environ['GOMA_PROXY_PORT'] = proxy_env['port']
+    if (os.environ.has_key('GOMA_PROXY_HOST') and
+        os.environ.has_key('GOMA_PROXY_PORT')):
+      # Set HTTPS proxy for curl.
+      self._https_proxy = '%s:%s' % (os.environ['GOMA_PROXY_HOST'],
+                                     os.environ['GOMA_PROXY_PORT'])
+
+  def ExecCompilerProxy(self):
+    """Execute compiler proxy in platform dependent way."""
+    self._gomacc_port = None  # invalidate gomacc_port cache.
+    proc = self._ExecCompilerProxy()
+    return self._GetCompilerProxyPort(proc=proc)  # set the new gomacc_port.
+
+  def GetPlatform(self):
+    """Get platform.
+
+    If the script do not know the platform, it will ask and set platform member
+    varible automatically.
+
+    Returns:
+      a string of platform.
+    """
+    if self._platform:
+      return self._platform
+
+    idx = 1
+    to_show = []
+    for candidate in self.PLATFORM_CANDIDATES:
+      to_show.append('%d. %s' % (idx, candidate[0]))
+      idx += 1
+    print 'What is your platform?'
+    selected = raw_input('%s ? --> ' % '  '.join(to_show))
+    try:
+      self._platform = self.PLATFORM_CANDIDATES[int(selected) - 1][1]
+    except (ValueError, IndexError):
+      raise Error('Invalid selection')
+    return self._platform
+
+  def CanAutoUpdate(self):
+    """Checks auto update is allowed or not.
+
+    Returns:
+      True if auto-update is allowed.  Otherwise, False.
+    """
+    if self._version:
+      if not os.path.isfile(os.path.join(self._dir, 'no_auto_update')):
+        return True
+    return False
+
+  def AutoUpdate(self):
+    """Automatically update the client."""
+    # Just call myself with update option.
+    script = os.path.join(self._dir,
+                          os.path.basename(os.path.realpath(__file__)))
+    subprocess.check_call(['python', script, 'update'])
+
+  def BackupCurrentPackage(self, backup_dir='backup'):
+    """Back up current pacakge.
+
+    Args:
+      backup_dir: a string of back up directory name.
+    """
+    self._backup = []
+    # ignore parameter in shutil.copytree can be used to remember the copied
+    # files.
+    # See Also: http://docs.python.org/2/library/shutil.html
+
+    def RememberCopiedFiles(path, names):
+      self._backup.append((path, names))
+      return []
+
+    self.RemoveDirectory(backup_dir)
+    shutil.copytree(self._dir, os.path.join(self._dir, backup_dir),
+                    symlinks=True, ignore=RememberCopiedFiles)
+
+  def RollbackUpdate(self, backup_dir='backup'):
+    """Best-effort-rollback from the backup.
+
+    Args:
+      backup_dir: a string of back up directory name.
+
+    Raises:
+      Error: if the caller did not executed BackupCurrentPackage before.  Or,
+             rollback failed.
+    """
+    if not self._backup:
+      raise Error('You should backup files before calling rollback.')
+    for entry in self._backup:
+      backup_dir_path = entry[0].replace(self._dir,
+                                         os.path.join(self._dir, backup_dir))
+      # Note:
+      # Somebody may ask "Why not shutil.copytree?"
+      # It is good for back up but not good for rollback.
+      # Since shutil.copytree try to create directories even if it exist,
+      # it will try to make existing directory and cause OSError if we use it
+      # in rollback process.
+      for filename in entry[1]:
+        from_name = os.path.join(backup_dir_path, filename)
+        to_name = os.path.join(entry[0], filename)
+        from_stat = os.stat(from_name)
+        to_stat = os.stat(to_name) if os.path.exists(to_name) else None
+        # Skip unchanged file / dir.
+        # I expect this also skips running processes since we cannot update it
+        # on Windows.
+        if (to_stat and
+            from_stat.st_size == to_stat.st_size and
+            from_stat.st_mode == to_stat.st_mode and
+            from_stat.st_mtime == to_stat.st_mtime):
+          continue
+
+        if os.path.isfile(from_name) and os.path.isfile(to_name):
+          shutil.copy2(from_name, to_name)
+        elif os.path.isfile(from_name) and not os.path.exists(to_name):
+          shutil.copy2(from_name, to_name)
+        elif os.path.isdir(from_name) and os.path.isdir(to_name):
+          continue  # do nothing if directory exist.
+        elif os.path.isdir(from_name) and not os.path.exists(to_name):
+          self.MakeDirectory(to_name)
+        else:
+          raise Error('Rollback failed.  We cannot rollback %s to %s' %
+                      (from_name, to_name))
+
+  def GetPackageName(self):
+    """Returns package name based on platform."""
+    return _GetPackageName(self.GetPlatform())
+
+  def IsProductionBinary(self):
+    """Returns True if compiler_proxy is release version.
+
+    Since all of our release binaries are compiled by chrome-bot,
+    we can assume that any binaries compiled by chrome bot would be
+    release or release candidate.
+
+    Returns:
+      True if compiler_proxy is built by chrome-bot.
+      Otherwise, False.
+    """
+    info = PopenWithCheck([self._compiler_proxy_binary, '--build-info'],
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.STDOUT).communicate()[0].rstrip()
+    return 'built by chrome-bot' in info
+
+  def _GetExtractedDir(self, update_dir):
+    """Returns a full path directory name where a package is extracted.
+
+    Args:
+      update_dir: a name of update_dir.  This option should be specified when
+                  this method is used in update process.
+
+    Returns:
+      a directory name goma client files are extracted.
+    """
+    if not update_dir:
+      return self._dir
+    return os.path.join(self._dir, update_dir,
+                        'goma-%s' % self.GetPlatform())
+
+  def LoadChecksum(self, update_dir=''):
+    """Returns a dictionary of checksum.
+
+    For backward compatibility, it returns an empty dictionary if a JSON
+    file does not exist.
+
+    Args:
+      update_dir: directory containing latest goma files.
+                  if empty, load checksum from current goma client directory.
+
+    Returns:
+      a dictionary of filename and checksum.
+      e.g. {'compiler_proxy': 'abcdef...', ...}
+    """
+    json_file = os.path.join(self._GetExtractedDir(update_dir), _CHECKSUM_FILE)
+    if not os.path.exists(json_file):
+      print '%s not exist' % json_file
+      return {}
+
+    with open(json_file) as f:
+      return json.load(f)
+
+  def CalculateChecksum(self, filename, update_dir=''):
+    """Returns checksum of a file.
+
+    Args:
+      filename: a string filename under script dir.
+      update_dir: directory containing latest goma files
+                  if empty, calculate checksum of files in current goma client
+                  directory.
+
+    Returns:
+      a checksum of a file.
+    """
+    return _CalculateChecksum(os.path.join(self._GetExtractedDir(update_dir),
+                                           filename))
+
+  # methods need to be implemented in subclasses.
+  def _ProcessRunning(self, image_name):
+    """Test if any process with image_name is running.
+
+    Args:
+      image_name: executable image file name
+
+    Returns:
+      boolean value indicating the result.
+    """
+    raise NotImplementedError('_ProcessRunning should be implemented.')
+
+  def _CheckPlatformConfig(self):
+    """Checks platform dependent GomaEnv configurations."""
+    pass
+
+  def _ExecCompilerProxy(self):
+    """Execute compiler proxy in platform dependent way."""
+    raise NotImplementedError('_ExecCompilerProxy should be implemented.')
+
+  def _GetDetailedFailureReason(self, proc=None):
+    """Gets detailed failure reason if possible."""
+    pass
+
+  def ExtractPackage(self, package_file, update_dir):
+    """Extract a platform dependent package.
+
+    Args:
+      package_file: a filename of package to extract.
+      update_dir: where to extract
+
+    Returns:
+      boolean indicating success or failure.
+    """
+    raise NotImplementedError('ExtractPackage should be implemented.')
+
+  def InstallPackage(self, update_dir):
+    """Overwrite self._dir with files in update_dir.
+
+    Args:
+      update_dir: directory containing latest goma files
+
+    Returns:
+      boolean indicating success or failure.
+    """
+    raise NotImplementedError('InstallPackage should be implemented.')
+
+  def GetGomaCtlScriptName(self):
+    """Get the name of goma_ctl script to be executed by command line."""
+    # Subclass may uses its specific variable.
+    # pylint: disable=R0201
+    return os.environ.get('GOMA_CTL_SCRIPT_NAME',
+                          os.path.basename(os.path.realpath(__file__)))
+
+  @staticmethod
+  def GetPackageExtension(platform):
+    raise NotImplementedError('GetPackageExtension should be implemented.')
+
+  def CompilerProxyRunning(self):
+    """Returns True if compiler proxy running.
+
+    Returns:
+      True if compiler_proxy is running.  Otherwise, False.
+    """
+    raise NotImplementedError('CompilerProxyRunning should be implemented.')
+
+  def KillStakeholders(self):
+    """Kills stake holder processes.
+
+    This will kill all processes having locks compiler_proxy needs.
+    """
+    raise NotImplementedError('KillStakeholders should be implemented.')
+
+  def WarnNonProtectedFile(self, filename):
+    """Warn if access to the file is not limited.
+
+    Args:
+      filename: filename to check.
+    """
+    raise NotImplementedError('WarnNonProtectedFile should be implemented.')
+
+  def EnsureDirectoryOwnedByUser(self, directory):
+    """Ensure the directory is owned by the user.
+
+    Args:
+      directory: a name of a directory to be checked.
+
+    Returns:
+      True if the directory is owned by the user.
+    """
+    raise NotImplementedError(
+        'EnsureDirectoryOwnedByUser should be implemented.')
+
+  def _FindCurlUnderPath(self, directory, command):
+    """Return curl full path if exist under prefix.
+
+    Args:
+      directory: a string of directory name to find curl.
+      command: a string of command name of curl.
+
+    Returns:
+      a string of a curl full path if exist.  Otherwise, None.
+    """
+    raise NotImplementedError('_FindCurlUnderPath should be implemented.')
+
+  def _WaitWithTimeout(self, proc, timeout_sec):
+    """Wait proc finish until timeout_sec.
+
+    Args:
+      proc: an instance of subprocess.Popen
+      timeout_sec: an integer number to represent timeout in sec.
+    """
+    raise NotImplementedError
+
+
+class GomaEnvWin(GomaEnv):
+  """Goma running environment for Windows."""
+
+  _GOMACC = 'gomacc.exe'
+  _COMPILER_PROXY = 'compiler_proxy.exe'
+  _GOMA_FETCH = 'goma_fetch.exe'
+  _CURL = 'curl.exe'
+  # TODO: could be in GomaEnv if env name is the same between
+  # posix and win.
+  _COMPILER_PROXY_IDENTIFIER_ENV_NAME = 'GOMA_COMPILER_PROXY_SOCKET_NAME'
+  _DEFAULT_ENV = [
+      ('RPC_EXTRA_PARAMS', '?win'),
+      ('COMPILER_PROXY_SOCKET_NAME', 'goma.ipc'),
+      ]
+  _DEFAULT_SSL_ENV = [
+      # Longer read timeout seems to be required on Windows.
+      ('HTTP_SOCKET_READ_TIMEOUT_SECS', '90.0'),
+      ]
+  PLATFORM_CANDIDATES = [
+      ('Win64', 'win64'),
+      ]
+  _GOMA_CTL_SCRIPT_NAME = 'goma_ctl.bat'
+  _DEPOT_TOOLS_DIR_PATTERN = re.compile(r'.*[/\\]depot_tools[/\\]?$')
+
+  def __init__(self):
+    self._win32process = __import__('win32process')
+    GomaEnv.__init__(self)
+    self._platform = 'win64'
+
+  def GetGomaTmpDir(self):
+    """Get a directory path for goma.
+
+    In chromium/win, we couldn't access %USERNAME%, so _GetUsername
+    is not available.  We could assume %TEMP% would be in %USERPROFILE%.
+    or window is single user machine, so may no need to care so much for
+    other users.
+
+    Returns:
+      a directory name.
+    """
+    tmp_dir = os.environ.get('GOMA_TMP_DIR')
+    if tmp_dir:
+      return tmp_dir
+    return os.path.join(_GetTempDirectory(), _TMP_DIR_WIN)
+
+  @staticmethod
+  def GetPackageExtension(platform):
+    return 'zip'
+
+  def _ProcessRunning(self, image_name):
+    process = PopenWithCheck(['tasklist', '/FI',
+                              'IMAGENAME eq %s' % image_name],
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    output = process.communicate()[0]
+    return image_name in output
+
+  def _CheckPlatformConfig(self):
+    """Checks platform dependent GomaEnv configurations."""
+    if not os.path.isfile(os.path.join(self._dir, 'vcflags.exe')):
+      raise ConfigError('vcflags.exe not found')
+
+  def _ExecCompilerProxy(self):
+    return PopenWithCheck([self._compiler_proxy_binary],
+                          creationflags=self._win32process.DETACHED_PROCESS)
+
+  def _GetDetailedFailureReason(self, proc=None):
+    pids = self._GetStakeholderPids()
+    print 'ports are owned by following processes:'
+    for pid in pids:
+      print PopenWithCheck(['tasklist', '/FI', 'PID eq %s' % pid],
+                           stdout=subprocess.PIPE,
+                           stderr=subprocess.STDOUT).communicate()[0]
+
+  def ExtractPackage(self, package_file, update_dir):
+    """Extract a platform dependent package.
+
+    Args:
+      package_file: a filename of package to extract.
+      update_dir: where to extract
+
+    Returns:
+      boolean indicating success or failure.
+
+    Raises:
+      Error: if package does not exist.
+    """
+    package_file = os.path.join(self._dir, package_file)
+    if not os.path.exists(package_file):
+      raise Error('Expected package file %s does not exist' % package_file)
+    update_dir = os.path.join(self._dir, update_dir)
+    print 'Extracting package %s to %s ...' % (package_file, update_dir)
+    archive = zipfile.ZipFile(package_file)
+    archive.extractall(update_dir)
+    return True
+
+  def InstallPackage(self, update_dir):
+    """Overwrite self._dir with files in update_dir.
+
+    Args:
+      update_dir: directory containing latest goma files
+
+    Returns:
+      boolean indicating success or failure.
+    """
+    assert update_dir != ''
+    source_dir = self._GetExtractedDir(update_dir)
+    # return code may return non zero even if success.
+    return_code = subprocess.call(['robocopy', source_dir, self._dir,
+                                   '/ns', '/nc', '/nfl', '/ndl', '/np',
+                                   '/njh', '/njs'])
+    # ROBOCOPY has very, very interesting error codes.
+    # see http://ss64.com/nt/robocopy-exit.html
+    return return_code < 8
+
+  def GetGomaCtlScriptName(self):
+    return os.environ.get('GOMA_CTL_SCRIPT_NAME', self._GOMA_CTL_SCRIPT_NAME)
+
+  def CompilerProxyRunning(self):
+    return self._ProcessRunning(self._COMPILER_PROXY)
+
+  def _GetStakeholderPids(self):
+    ports = []
+    ports.append(os.environ.get('GOMA_COMPILER_PROXY_PORT', '8088'))
+    ns = PopenWithCheck(['netstat', '-a', '-n', '-o'],
+                        stdout=subprocess.PIPE,
+                        stderr=subprocess.STDOUT).communicate()[0]
+    listenline = re.compile('.*TCP.*(?:%s).*LISTENING *([0-9]*).*' %
+                            '|'.join(ports))
+    pids = set()
+    for line in ns.splitlines():
+      m = listenline.match(line)
+      if m:
+        pids.add(m.group(1))
+    return pids
+
+  def KillStakeholders(self):
+    pids = self._GetStakeholderPids()
+    if pids:
+      args = []
+      for pid in pids:
+        args.extend(['/PID', pid])
+      subprocess.check_call(['taskkill'] + args)
+
+  def WarnNonProtectedFile(self, protocol):
+    # TODO: warn for Win.
+    pass
+
+  def EnsureDirectoryOwnedByUser(self, directory):
+    # TODO: implement for Win.
+    return True
+
+  def _FindCurlUnderPath(self, directory, curl):
+    if not self._DEPOT_TOOLS_DIR_PATTERN.match(directory.lower()):
+      return None
+
+    for root, _, files in os.walk(directory):
+      if curl in files:
+        return os.path.join(root, curl)
+    return None
+
+  def _WaitWithTimeout(self, proc, timeout_sec):
+    import win32api
+    import win32con
+    import win32event
+    try:
+      handle = win32api.OpenProcess(
+          win32con.PROCESS_QUERY_INFORMATION | win32con.SYNCHRONIZE,
+          False, proc.pid)
+      ret = win32event.WaitForSingleObject(handle, timeout_sec * 10**3)
+      if ret in (win32event.WAIT_TIMEOUT, win32event.WAIT_OBJECT_0):
+        return
+      raise Error('WaitForSingleObject returned expected value %s' % ret)
+    finally:
+      if handle:
+        win32api.CloseHandle(handle)
+
+
+class GomaEnvPosix(GomaEnv):
+  """Goma running environment for POSIX."""
+
+  _GOMACC = 'gomacc'
+  _COMPILER_PROXY = 'compiler_proxy'
+  _GOMA_FETCH = 'goma_fetch'
+  _CURL = 'curl'
+  _COMPILER_PROXY_IDENTIFIER_ENV_NAME = 'GOMA_COMPILER_PROXY_SOCKET_NAME'
+  _DEFAULT_ENV = [
+      # goma_ctl.py runs compiler_proxy in daemon mode by default.
+      ('COMPILER_PROXY_DAEMON_MODE', 'true'),
+      ('COMPILER_PROXY_SOCKET_NAME', 'goma.ipc'),
+      ('COMPILER_PROXY_LOCK_FILENAME', 'goma_compiler_proxy.lock'),
+      ('COMPILER_PROXY_PORT', '8088'),
+      ]
+  PLATFORM_CANDIDATES = [
+      # (Shown name, platform)
+      ('Goobuntu', 'goobuntu'),
+      ('Chrome OS', 'chromeos'),
+      ('MacOS', 'mac'),
+      ]
+  _LSOF = 'lsof'
+  _FUSER = 'fuser'
+  _FUSER_PID_PATTERN = re.compile(r'(\d+)')
+  _FUSER_USERNAME_PATTERN = re.compile(r'\((\w+)\)')
+
+  def __init__(self):
+    GomaEnv.__init__(self)
+    # pylint: disable=E1101
+    # Configure from sysname in uname.
+    if os.uname()[0] == 'Darwin':
+      self._platform = 'mac'
+    self._fuser_path = None
+    self._pwd = __import__('pwd')
+
+  def GetGomaTmpDir(self):
+    """Get a directory path for goma.
+
+    Returns:
+      a directory name.
+    """
+    tmp_dir = os.environ.get('GOMA_TMP_DIR')
+    if tmp_dir:
+      return tmp_dir
+    tmp_dir = _GetUserRuntimeDirectory()
+    if not tmp_dir:
+      tmp_dir = _GetTempDirectory()
+    return os.path.join(tmp_dir, _TMP_DIR_PREFIX + _GetUsername())
+
+  @staticmethod
+  def GetPackageExtension(platform):
+    return 'tgz' if platform == 'mac' else 'txz'
+
+  def _ProcessRunning(self, image_name):
+    process = PopenWithCheck(['ps', '-Af'], stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+    output = process.communicate()[0]
+    return image_name in output
+
+  def _ExecCompilerProxy(self):
+    if _IsGomaFlagTrue('COMPILER_PROXY_DAEMON_MODE'):
+      self._is_daemon_mode = True
+    return PopenWithCheck([self._compiler_proxy_binary],
+                          stderr=subprocess.STDOUT)
+
+  def ExtractPackage(self, package_file, update_dir):
+    """Extract a platform dependent package.
+
+    Args:
+      package_file: a filename of package to extract.
+      update_dir: where to extract
+
+    Returns:
+      boolean indicating success or failure.
+
+    Raises:
+      Error: if package does not exist.
+    """
+    package_file = os.path.join(self._dir, package_file)
+    if not os.path.exists(package_file):
+      raise Error('Expected package file %s does not exist' % package_file)
+    update_dir = os.path.join(self._dir, update_dir)
+    print 'Extracting package to %s ...' % update_dir
+    if os.path.splitext(package_file)[1] == '.tgz':
+      tar_options = '-zxf'
+    else:
+      tar_options = '-Jxf'
+    return subprocess.call(['tar', tar_options, package_file, '-C',
+                            update_dir]) == 0
+
+  def InstallPackage(self, update_dir):
+    """Overwrite self._dir with files in update_dir.
+
+    Args:
+      update_dir: directory containing latest goma files
+
+    Returns:
+      boolean indicating success or failure.
+    """
+    assert update_dir != ''
+    # TODO: implement a better version for POSIX
+    source_files = os.path.join(self._GetExtractedDir(update_dir), '*')
+    return subprocess.call(['cp -aRf %s %s' % (source_files, self._dir)],
+                           shell=True) == 0
+
+  def _GetOwners(self, name, network=False):
+    """Get owner pid/uid of file or listen port.
+
+    Args:
+      name: name to check owner. e.g. <tmpdir>/goma.ipc or TCP:8088
+      network: True if the request is for network socket.
+
+    Returns:
+      a list of dictionaries containing owner info.
+    """
+    # os.path.isfile is not feasible to check an unix domain socket.
+    if not network and not os.path.exists(name):
+      return []
+
+    if not network and self._GetFuserPath():
+      (out, err) = subprocess.Popen([self._GetFuserPath(), '-u', name],
+                                    stdout=subprocess.PIPE,
+                                    stderr=subprocess.PIPE).communicate()
+      if out:  # Found at least one owner.
+        pids = self._FUSER_PID_PATTERN.findall(out)
+        usernames = self._FUSER_USERNAME_PATTERN.findall(err)
+        if pids and usernames:
+          uids = [long(self._pwd.getpwnam(x).pw_uid) for x in usernames]
+          return [{'pid': x[0], 'uid': x[1], 'resource': name}
+                  for x in zip(pids, uids)]
+
+    lsof_command = [self._LSOF, '-F', 'pu']
+    if network:
+      lsof_command.append('-i')
+    # Lsof returns 1 for WARNING even if the result is good enough.
+    # It also returns 1 if an owner process is not found.
+    ret = subprocess.Popen(lsof_command + [name],
+                           stdout=subprocess.PIPE,
+                           stderr=subprocess.STDOUT).communicate()[0]
+    if ret:
+      result_list = _ParseLsof(ret)
+      for entry in result_list:
+        entry['resource'] = name
+      return result_list
+    return []
+
+  def _GetStakeholderPids(self, quick=False):
+    """Get PID of stake holders.
+
+    Args:
+      quick: if True, quickly returns result if one of pids is found.
+
+    Returns:
+      a list of pids holding compiler_proxy locks and a port.
+
+    Raises:
+      Error: if compiler_proxy's lock is onwed by others.
+    """
+    # os.getuid does not exist in Windows.
+    # pylint: disable=E1101
+    tmpdir = self.GetGomaTmpDir()
+    socket_file = os.path.join(
+        tmpdir, os.environ['GOMA_COMPILER_PROXY_SOCKET_NAME'])
+    lock_prefix = os.path.join(
+        tmpdir, os.environ['GOMA_COMPILER_PROXY_LOCK_FILENAME'])
+    port = os.environ['GOMA_COMPILER_PROXY_PORT']
+    lock_filename = '%s.%s' % (lock_prefix, port)
+
+    results = []
+    results.extend(self._GetOwners(socket_file))
+    if quick:
+      return results
+    results.extend(self._GetOwners(lock_filename))
+    results.extend(self._GetOwners('TCP:%s' % port, network=True))
+
+    uid = os.getuid()
+    if uid != 0:  # root can handle any processes.
+      owned_by_others = [x for x in results if x['uid'] != uid]
+      if owned_by_others:
+        raise Error('compiler_proxy lock and/or socket is owned by others.'
+                    ' details=%s' % owned_by_others)
+
+    return set([str(x['pid']) for x in results])
+
+  def KillStakeholders(self):
+    pids = self._GetStakeholderPids()
+    if pids:
+      subprocess.check_call(['kill'] + list(pids))
+
+  def _GetFuserPath(self):
+    if self._fuser_path is None:
+      self._fuser_path = _FindCommandInPath(self._FUSER)
+      if not self._fuser_path:
+        self._fuser_path = ''
+    return self._fuser_path
+
+  def CompilerProxyRunning(self):
+    """Returns True if compiler proxy is running."""
+    return bool(self._GetStakeholderPids(quick=True))
+
+  def WarnNonProtectedFile(self, filename):
+    # This is platform dependent part.
+    # pylint: disable=R0201
+    if os.path.exists(filename) and os.stat(filename).st_mode & 077:
+      sys.stderr.write(
+          'We recommend to limit access to the file: %(path)s\n'
+          'e.g. chmod go-rwx %(path)s\n' % {'path': filename})
+
+  def EnsureDirectoryOwnedByUser(self, directory):
+    # This is platform dependent part.
+    # pylint: disable=R0201
+    # We must use lstat instead of stat to avoid symlink attack (b/69717657).
+    st = os.lstat(directory)
+    if st.st_uid != os.geteuid():
+      return False
+    try:
+      os.chmod(directory, 0700)
+    except OSError as err:
+      sys.stderr.write('chmod failure: %s\n' % err)
+      return False
+    return True
+
+  def _FindCurlUnderPath(self, directory, curl):
+    return os.path.join(directory, curl)
+
+  def _WaitWithTimeout(self, proc, timeout_sec):
+    import signal
+    class TimeoutError(Exception):
+      """Raised on timeout."""
+
+    def handle_timeout(_signum, _frame):
+      raise TimeoutError('timed out')
+
+    signal.signal(signal.SIGALRM, handle_timeout)
+    try:
+      signal.alarm(timeout_sec)
+      proc.wait()
+    except TimeoutError:
+      pass
+    finally:
+      signal.alarm(0)
+      signal.signal(signal.SIGALRM, signal.SIG_DFL)
+
+
+
+_GOMA_ENVS = {
+    # os.name, GomaEnv subclass name
+    'nt': GomaEnvWin,
+    'posix': GomaEnvPosix,
+    }
+
+
+def _GetPackageName(platform):
+  """Get name of package.
+
+  Args:
+    platform: a string of platform name.
+
+  Returns:
+    a string of package name of the given platform.
+
+  Raises:
+    ConfigError: when given platform is invalid.
+  """
+  for goma_env in _GOMA_ENVS.values():
+    supported = [x[1] for x in goma_env.PLATFORM_CANDIDATES]
+    if platform in supported:
+      return 'goma-%s.%s' % (platform, goma_env.GetPackageExtension(platform))
+  raise ConfigError('Unknown platform %s specified to get package name.' %
+                    platform)
+
+
+class GomaBackend(object):
+  """Backend specific configs."""
+
+  def __init__(self, env):
+    self._env = env
+    self._download_base_url = None
+    self._path_prefix = None
+    self._stubby_host = None
+    self._SetupEnviron()
+
+  def _SetupEnviron(self):
+    """Set up backend specific environmental variables."""
+    pass
+
+  def _NormalizeBaseUrl(self, resp):
+    """Check the URL is valid, and normalize it if needed.
+
+    Args:
+      resp: response to the download URL request.
+
+    Returns:
+      a string of the download base URL.
+
+    Raises:
+      Error: if the given resp is invalid.
+    """
+    raise NotImplementedError('Please implement _NormalizeBaseUrl')
+
+  def GetDownloadBaseUrl(self):
+    """Orchestrate download base url for retrieving manifest file.
+
+    Returns:
+      The URL string.
+
+    Raises:
+      Error: if failed to obtain download base URL.
+    """
+    if self._download_base_url:
+      return self._download_base_url
+
+    downloadurl_path = '%s/downloadurl' % self._path_prefix
+    downloadurl = 'https://%s%s' % (self._stubby_host, downloadurl_path)
+    url = self._NormalizeBaseUrl(
+        self._env.HttpDownload(downloadurl,
+                               rewrite_url=self.RewriteRequest,
+                               headers=self.GetHeaders()))
+    if os.environ.has_key('GOMACHANNEL'):
+      url += '/%s' % os.environ.get('GOMACHANNEL')
+    if url.startswith('http:'):
+      url = 'https:' + url[5:]
+    self._download_base_url = url
+    return url
+
+  def RewriteRequest(self, request):
+    """Rewrite request based on backend needs."""
+    # This usually do not rewrite but subclass may rewrite.
+    # pylint: disable=R0201
+    return request
+
+  def GetHeaders(self):
+    """Return headers if there are backend specific headers."""
+    # This usually returns nothing but subclass may return.
+    # pylint: disable=R0201
+    return {}
+
+
+class Clients5Backend(GomaBackend):
+  """Backend specific config for Clients5."""
+
+  def _SetupEnviron(self):
+    """Set up clients5 backend specific environmental variables."""
+    # Set member variables for _GetDownloadBaseUrl.
+    self._path_prefix = '/cxx-compiler-service'
+    self._stubby_host = 'clients5.google.com'
+
+    # TODO: provide better way to make server know Windows client.
+    # Fool proof until we provide the way.
+    if (isinstance(self._env, GomaEnvWin) and
+        not os.environ.get('GOMA_RPC_EXTRA_PARAMS', '')):
+      sys.stderr.write('Please set GOMA_RPC_EXTRA_PARAMS=?win\n')
+
+  def _NormalizeBaseUrl(self, resp):
+    """Check the URL is valid, and normalize it if needed.
+
+    Args:
+      resp: response to the download URL request.
+
+    Returns:
+      a string of the download base URL.
+
+    Raises:
+      Error: if the given resp is invalid.
+    """
+    if resp.startswith('http'):
+      return resp
+    msg = 'Could not obtain the download base URL.\n'
+    msg += ('Server response: %s' % resp)
+    raise Error(msg)
+
+  def GetHeaders(self):
+    """Return headers if there are backend specific headers."""
+    return {}
+
+
+def GetGomaDriver():
+  """Returns a proper instance of GomaEnv subclass based on os.name."""
+  if os.name not in _GOMA_ENVS:
+    raise Error('Could not find proper GomaEnv for "%s"' % os.name)
+  env = _GOMA_ENVS[os.name]()
+  backend = Clients5Backend(env)
+  return GomaDriver(env, backend)
+
+
+def main():
+  goma = GetGomaDriver()
+  goma.Dispatch(sys.argv[1:])
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/client/goma_ctl.sh b/client/goma_ctl.sh
new file mode 100755
index 0000000..ad6c909
--- /dev/null
+++ b/client/goma_ctl.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright 2011 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# A script to warn not to use this script.
+
+cat <<EOM 1>&2
+****************************** ERROR ******************************
+
+DO NOT USE goma_ctl.sh anymore. This is just rotten.
+
+We suggest you to use goma_ctl.py or goma_stubby.sh when using stubby_proxy.
+
+*******************************************************************
+EOM
+
+exit 1
diff --git a/client/goma_fetch.cc b/client/goma_fetch.cc
new file mode 100644
index 0000000..73de39e
--- /dev/null
+++ b/client/goma_fetch.cc
@@ -0,0 +1,168 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// goma_fetch is a tool to fetch from goma API endpoints.
+
+#include <string.h>
+
+#include <iostream>
+#include <memory>
+#include <sstream>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_specific.h"
+#include "env_flags.h"
+#include "goma_init.h"
+#include "http.h"
+#include "http_init.h"
+#include "ioutil.h"
+#include "oauth2.h"
+#include "platform_thread.h"
+#include "socket_factory.h"
+#include "string_piece.h"
+#include "worker_thread_manager.h"
+
+#include "goma_flags.cc"
+
+using std::string;
+using devtools_goma::HttpClient;
+using devtools_goma::PlatformThread;
+using devtools_goma::WorkerThreadManager;
+using devtools_goma::WorkerThreadRunner;
+
+namespace {
+
+// Fetcher fetches data by using HttpClient.
+class Fetcher {
+ public:
+  // Takes ownership of HttpClient.
+  explicit Fetcher(std::unique_ptr<HttpClient> client)
+      : client_(std::move(client)) {
+  }
+  ~Fetcher() {
+  }
+
+  void Run() {
+    client_->InitHttpRequest(&req_, "GET", "");
+    req_.AddHeader("Connection", "close");
+
+    int backoff_ms = client_->options().min_retry_backoff_ms;
+
+    std::string err_messages;
+    for (int i = 0; i < FLAGS_FETCH_RETRY; ++i) {
+      err_messages += status_.err_message + " ";
+      status_ = HttpClient::Status();
+      client_->Do(&req_, &resp_, &status_);
+      if (!status_.err) {
+        if (status_.http_return_code >= 400 && status_.http_return_code < 500) {
+          break;
+        }
+        if (status_.http_return_code == 200) {
+          break;
+        }
+      }
+      if (i + 1 < FLAGS_FETCH_RETRY) {
+        LOG(WARNING) << "fetch fail try=" << i
+                     << " err=" << status_.err
+                     << " http code:" << status_.http_return_code
+                     << " " << status_.err_message;
+        backoff_ms = HttpClient::BackoffMsec(client_->options(),
+                                             backoff_ms, true);
+        LOG(INFO) << "backoff " << backoff_ms << "msec";
+        PlatformThread::Sleep(backoff_ms);
+      }
+    }
+    status_.err_message = err_messages + status_.err_message;
+    LOG(INFO) << "get done " << status_.DebugString();
+    client_->WaitNoActive();
+    client_.reset();
+  }
+
+  const HttpClient::Status& status() const {
+    return status_;
+  }
+
+  const devtools_goma::HttpResponse& resp() const {
+    return resp_;
+  }
+
+ private:
+  std::unique_ptr<HttpClient> client_;
+  devtools_goma::HttpRequest req_;
+  devtools_goma::HttpResponse resp_;
+  HttpClient::Status status_;
+
+  DISALLOW_COPY_AND_ASSIGN(Fetcher);
+};
+
+}  // anonymous namespace
+
+int main(int argc, char* argv[], const char* envp[]) {
+  devtools_goma::Init(argc, argv, envp);
+  if (argc < 2) {
+    std::cerr << "usage: " << argv[0] << " url" << std::endl;
+    exit(1);
+  }
+  // Initialize rand.
+  srand(static_cast<unsigned int>(time(nullptr)));
+  devtools_goma::InitLogging(argv[0]);
+#ifdef _WIN32
+  WinsockHelper wsa;
+#endif
+
+  WorkerThreadManager wm;
+  wm.Start(2);
+
+  HttpClient::Options http_options;
+  devtools_goma::InitHttpClientOptions(&http_options);
+  // clear extra params, like "?win".
+  // request paths should be passed via argv[1].
+  http_options.extra_params = "";
+  if (!http_options.InitFromURL(argv[1])) {
+    LOG(FATAL) << "Failed to initialize HttpClient::Options from URL:"
+               << argv[1];
+  }
+  LOG(INFO) << "fetch " << argv[1];
+
+  std::unique_ptr<HttpClient> client(new HttpClient(
+      HttpClient::NewSocketFactoryFromOptions(http_options),
+      HttpClient::NewTLSEngineFactoryFromOptions(http_options),
+      http_options, &wm));
+
+  std::unique_ptr<Fetcher> fetcher(new Fetcher(std::move(client)));
+
+  std::unique_ptr<WorkerThreadRunner> fetch(
+      new WorkerThreadRunner(
+          &wm, FROM_HERE,
+          devtools_goma::NewCallback(
+              fetcher.get(),
+              &Fetcher::Run)));
+  devtools_goma::FlushLogFiles();
+  fetch->Wait();
+  LOG(INFO) << "fetch done";
+  devtools_goma::FlushLogFiles();
+  fetch.reset();
+  wm.Finish();
+  devtools_goma::FlushLogFiles();
+  const HttpClient::Status& status = fetcher->status();
+  if (status.err) {
+    LOG(ERROR) << "fetch " << argv[1]
+               << " err=" << status.err
+               << " " << status.err_message
+               << " " << http_options.DebugString();
+    return 1;
+  }
+  LOG(INFO) << status.DebugString();
+  StringPiece body = fetcher->resp().Body();
+  if (status.http_return_code != 200) {
+    LOG(ERROR) << "fetch " << argv[1]
+               << " http code:" << status.http_return_code
+               << " " << status.err_message;
+    LOG(INFO) << body;
+    return 1;
+  }
+  devtools_goma::WriteStdout(body);
+  return 0;
+}
diff --git a/client/goma_file_dump.cc b/client/goma_file_dump.cc
new file mode 100644
index 0000000..43231dd
--- /dev/null
+++ b/client/goma_file_dump.cc
@@ -0,0 +1,43 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "goma_file_dump.h"
+
+#include "compiler_specific.h"
+#include "file.h"
+#include "file_helper.h"
+#include "glog/logging.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+
+namespace devtools_goma {
+
+FileServiceDumpClient::FileServiceDumpClient()
+    : req_(new StoreFileReq) {
+}
+
+FileServiceDumpClient::~FileServiceDumpClient() {
+}
+
+bool FileServiceDumpClient::StoreFile(
+    const StoreFileReq* req, StoreFileResp* resp) {
+  for (const auto& b : req->blob()) {
+    FileBlob* blob = req_->add_blob();
+    *blob = b;
+    resp->add_hash_key(FileServiceClient::ComputeHashKey(*blob));
+  }
+  return true;
+}
+
+bool FileServiceDumpClient::Dump(const string& filename) const {
+  if (req_->blob_size() == 0)
+    return true;
+  string s;
+  req_->SerializeToString(&s);
+  return WriteStringToFile(s, filename);
+}
+
+}  // namespace devtools_goma
diff --git a/client/goma_file_dump.h b/client/goma_file_dump.h
new file mode 100644
index 0000000..41a90e8
--- /dev/null
+++ b/client/goma_file_dump.h
@@ -0,0 +1,50 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GOMA_FILE_DUMP_H_
+#define DEVTOOLS_GOMA_CLIENT_GOMA_FILE_DUMP_H_
+
+#include <memory>
+#include <string>
+
+#include "goma_file.h"
+
+namespace devtools_goma {
+
+class FileServiceDumpClient : public FileServiceClient {
+ public:
+  FileServiceDumpClient();
+  ~FileServiceDumpClient() override;
+
+  // No async support.
+  std::unique_ptr<AsyncTask<StoreFileReq, StoreFileResp>>
+      NewAsyncStoreFileTask() override {
+    return nullptr;
+  }
+  std::unique_ptr<AsyncTask<LookupFileReq, LookupFileResp>>
+      NewAsyncLookupFileTask() override {
+    return nullptr;
+  }
+
+  // Records all StoreFileReqs.  Always success.
+  bool StoreFile(const StoreFileReq* req, StoreFileResp* resp) override;
+  // No lookup support
+  bool LookupFile(const LookupFileReq* /* req */,
+                  LookupFileResp* /* resp */) override {
+    return false;
+  }
+
+  // Dump recorded StoreFileReqs into filename.
+  bool Dump(const string& filename) const;
+
+ private:
+  std::unique_ptr<StoreFileReq> req_;
+
+  DISALLOW_COPY_AND_ASSIGN(FileServiceDumpClient);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GOMA_FILE_DUMP_H_
diff --git a/client/goma_file_http.cc b/client/goma_file_http.cc
new file mode 100644
index 0000000..132d185
--- /dev/null
+++ b/client/goma_file_http.cc
@@ -0,0 +1,158 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "goma_file_http.h"
+
+#include <sstream>
+
+#include "compiler_specific.h"
+#include "glog/logging.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+#include "goma_file.h"
+#include "http_rpc.h"
+#include "lockhelper.h"
+
+namespace {
+
+template<typename Req, typename Resp>
+class HttpTask : public devtools_goma::FileServiceClient::AsyncTask<Req, Resp> {
+ public:
+  HttpTask(devtools_goma::FileServiceHttpClient* file_service,
+           const string& path, const string& trace_id)
+      : file_service_(file_service),
+        http_(file_service->http()),
+        path_(path) {
+    std::ostringstream ss;
+    if (!trace_id.empty()) {
+      ss << trace_id << " ";
+    }
+    ss << "AsyncFileTask";
+    status_.trace_id = ss.str();
+    status_.finished = true;  // allow to destruct this without Run().
+  }
+  ~HttpTask() override {
+    http_->Wait(&status_);
+  }
+
+  void Run() override {
+    status_.finished = false;
+    Req* req =
+        devtools_goma::FileServiceClient::AsyncTask<Req, Resp>::mutable_req();
+    Resp* resp =
+        devtools_goma::FileServiceClient::AsyncTask<Req, Resp>::mutable_resp();
+    http_->CallWithCallback(path_, req, resp, &status_, nullptr);
+  }
+
+  void Wait() override {
+    http_->Wait(&status_);
+    file_service_->AddHttpRPCStatus(status_);
+  }
+  bool IsSuccess() const override { return status_.err == 0; }  // OK
+
+ private:
+  devtools_goma::FileServiceHttpClient* file_service_;
+  devtools_goma::HttpRPC* http_;
+  string path_;
+  devtools_goma::HttpRPC::Status status_;
+
+  // disallow copy and assign
+  HttpTask(const HttpTask&);
+  void operator=(const HttpTask&);
+};
+
+}  // namespace
+
+namespace devtools_goma {
+
+FileServiceHttpClient::FileServiceHttpClient(
+    HttpRPC* http,
+    const string& store_path,
+    const string& lookup_path,
+    MultiFileStore* multi_file_store)
+    : http_(http),
+      store_path_(store_path),
+      lookup_path_(lookup_path),
+      num_rpc_(0),
+      multi_file_store_(multi_file_store) {
+}
+
+FileServiceHttpClient::~FileServiceHttpClient() {
+}
+
+std::unique_ptr<FileServiceHttpClient>
+FileServiceHttpClient::WithRequesterInfoAndTraceId(
+    const RequesterInfo& requester_info,
+    const string& trace_id) const {
+  std::unique_ptr<FileServiceHttpClient> cloned(
+      new FileServiceHttpClient(http_, store_path_, lookup_path_,
+                                multi_file_store_));
+  cloned->requester_info_.reset(new RequesterInfo);
+  *cloned->requester_info_ = requester_info;
+  cloned->trace_id_ = trace_id;
+  return cloned;
+}
+
+std::unique_ptr<FileServiceClient::AsyncTask<StoreFileReq, StoreFileResp>>
+FileServiceHttpClient::NewAsyncStoreFileTask() {
+  return std::unique_ptr<
+    FileServiceClient::AsyncTask<StoreFileReq, StoreFileResp>>(
+        new HttpTask<StoreFileReq, StoreFileResp>(
+            this, store_path_, trace_id_));
+}
+
+std::unique_ptr<FileServiceClient::AsyncTask<LookupFileReq, LookupFileResp>>
+FileServiceHttpClient::NewAsyncLookupFileTask() {
+  return std::unique_ptr<
+    FileServiceClient::AsyncTask<LookupFileReq, LookupFileResp>>(
+        new HttpTask<LookupFileReq, LookupFileResp>(
+            this, lookup_path_, trace_id_));
+}
+
+bool FileServiceHttpClient::StoreFile(
+    const StoreFileReq* req, StoreFileResp* resp) {
+  HttpRPC::Status status;
+  std::ostringstream ss;
+  if (!trace_id_.empty()) {
+    ss << trace_id_ << " ";
+  }
+  ss << "StoreFile " << req->blob_size() << "blobs";
+  status.trace_id = ss.str();
+  multi_file_store_->StoreFile(&status, req, resp, nullptr);
+  http_->Wait(&status);
+  AddHttpRPCStatus(status);
+  return status.err == 0;
+}
+
+bool FileServiceHttpClient::LookupFile(
+    const LookupFileReq* req, LookupFileResp* resp) {
+  HttpRPC::Status status;
+  std::ostringstream ss;
+  if (!trace_id_.empty()) {
+    ss << trace_id_ << " ";
+  }
+  ss << "LookupFile " << req->hash_key_size() << "keys";
+  status.trace_id = ss.str();
+  status.timeout_should_be_http_error = false;
+  bool ret = !http_->Call(lookup_path_, req, resp, &status);
+  AddHttpRPCStatus(status);
+  return ret;
+}
+
+void FileServiceHttpClient::AddHttpRPCStatus(const HttpRPC::Status& status) {
+  ++num_rpc_;
+  status_.req_size += status.req_size;
+  status_.resp_size += status.resp_size;
+  status_.raw_req_size += status.raw_req_size;
+  status_.raw_resp_size += status.raw_resp_size;
+  status_.req_build_time += status.req_build_time;
+  status_.req_send_time += status.req_send_time;
+  status_.wait_time += status.wait_time;
+  status_.resp_recv_time += status.resp_recv_time;
+  status_.resp_parse_time += status.resp_parse_time;
+}
+
+}  // namespace devtools_goma
diff --git a/client/goma_file_http.h b/client/goma_file_http.h
new file mode 100644
index 0000000..4798b51
--- /dev/null
+++ b/client/goma_file_http.h
@@ -0,0 +1,69 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GOMA_FILE_HTTP_H_
+#define DEVTOOLS_GOMA_CLIENT_GOMA_FILE_HTTP_H_
+
+#include <memory>
+#include <string>
+
+#include "goma_file.h"
+#include "http_rpc.h"
+#include "multi_http_rpc.h"
+
+namespace devtools_goma {
+
+class Closure;
+class RequesterInfo;
+
+class FileServiceHttpClient : public FileServiceClient {
+ public:
+  // It doesn't take ownership of http and multi_file_store.
+  FileServiceHttpClient(HttpRPC* http,
+                        const string& store_path,
+                        const string& lookup_path,
+                        MultiFileStore* multi_file_store);
+  ~FileServiceHttpClient() override;
+
+  // This function doesn't clone |status_|.
+  std::unique_ptr<FileServiceHttpClient> WithRequesterInfoAndTraceId(
+      const RequesterInfo& requester_info, const string& trace_id) const;
+
+  std::unique_ptr<AsyncTask<StoreFileReq, StoreFileResp>>
+      NewAsyncStoreFileTask() override;
+  std::unique_ptr<AsyncTask<LookupFileReq, LookupFileResp>>
+      NewAsyncLookupFileTask() override;
+
+  bool StoreFile(const StoreFileReq* req, StoreFileResp* resp) override;
+  bool LookupFile(const LookupFileReq* req, LookupFileResp* resp) override;
+
+  HttpRPC* http() { return http_; }
+
+  void AddHttpRPCStatus(const HttpRPC::Status& status);
+  int num_rpc() const { return num_rpc_; }
+  const HttpRPC::Status& http_rpc_status() const { return status_; }
+
+  const MultiFileStore* multi_file_store() const {
+    return multi_file_store_;
+  }
+
+ private:
+  HttpRPC* http_;
+  const string store_path_;
+  const string lookup_path_;
+
+  // For stats.
+  int num_rpc_;
+  HttpRPC::Status status_;
+
+  // for multi store
+  MultiFileStore* multi_file_store_;
+
+  DISALLOW_COPY_AND_ASSIGN(FileServiceHttpClient);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GOMA_FILE_HTTP_H_
diff --git a/client/goma_flags.cc b/client/goma_flags.cc
new file mode 100644
index 0000000..48dbc6c
--- /dev/null
+++ b/client/goma_flags.cc
@@ -0,0 +1,638 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// We share these flags among gomacc and compiler_proxy.
+// As gomacc may start compiler_proxy, gomacc should accept flags for
+// compiler_proxy and vice versa.
+
+#include <algorithm>
+
+#include "env_flags.h"
+#include "machine_info.h"
+
+#ifdef GOMA_DECLARE_FLAGS_ONLY
+# undef GOMA_DEFINE_VARIABLE
+# define GOMA_DEFINE_VARIABLE(type, name, value, meaning, tn)  \
+    GOMA_DECLARE_VARIABLE(type, name, tn)
+# undef GOMA_DEFINE_string
+# define GOMA_DEFINE_string(name, value, meaning) GOMA_DECLARE_string(name)
+# undef GOMA_REGISTER_AUTOCONF_FLAG_NAME
+# define GOMA_REGISTER_AUTOCONF_FLAG_NAME(name, func)
+#endif
+
+#ifndef GOMA_DECLARE_FLAGS_ONLY
+// TODO: We would like to know what the best number is?
+static int NumDefaultProxyThreads() {
+  int num_cpus = devtools_goma::GetNumCPUs();
+  if (num_cpus > 0)
+    return std::max(num_cpus, 2);
+
+  return 16;
+}
+
+static int NumDefaultProxyHttpThreads() {
+  int num_cpus = devtools_goma::GetNumCPUs();
+#ifndef _WIN32
+  const int kDivider = 4;
+#else
+  // Windows uses select for accepting sockets and FD_SETSIZE=64.
+  // Limiting the number of threads limits not only usage of memory and cpu
+  // but also limits the number of sockets to handle IPC.
+  // Since it causes connection issues from gomacc under heavy gomacc usage,
+  // we relaxes limitation of the number of IPC threads on Windows.
+  // See: https://code.google.com/p/chromium/issues/detail?id=390764
+  const int kDivider = 1;
+#endif
+  if (num_cpus > 0)
+    return std::max(num_cpus / kDivider, 1);
+
+  return 4;
+}
+
+// The max size of include cache.
+// On Win or Mac, this will improve compile performance.
+// On Linux, IncludeCache itself does not improve compile performance so much,
+// however, IncludeCache is required to enable DepsCache.
+// As of 2017, 32MB is not enough to cache all include headers if clobber
+// build happens. So, in 64bit system, use 64MB by default.
+static int MaxIncludeCacheSize() {
+#if defined(__LP64__) || defined(_WIN64)
+  int64_t memory_size = devtools_goma::GetSystemTotalMemory();
+  int64_t gb = memory_size / 1024 / 1024 / 1024;
+  if (gb >= 15)
+    return 64;
+#endif
+  return 32;
+}
+
+static int MaxBurstSubProcs() {
+  int cpus = devtools_goma::GetNumCPUs();
+  if (cpus > 0)
+    return 2 * cpus;
+  return 6;
+}
+
+static int MaxBurstSubProcsHeavy() {
+  int cpus = devtools_goma::GetNumCPUs();
+  if (cpus >= 2)
+    return cpus / 2;
+  return 1;
+}
+
+#endif  // GOMA_DECLARE_FLAGS_ONLY
+
+// For gomacc
+
+GOMA_DEFINE_bool(DUMP, false, "Dump bunch of info");
+GOMA_DEFINE_bool(DUMP_REQUEST, false, "Dump request protocol buffer");
+GOMA_DEFINE_bool(DUMP_RESPONSE, false, "Dump response protocol buffer");
+GOMA_DEFINE_bool(DUMP_TIME, false, "Dump time info");
+GOMA_DEFINE_bool(DUMP_ARGV, false, "Dump arguments");
+GOMA_DEFINE_bool(DUMP_APPENDLOG, false,
+                 "Dump arguments to /tmp/fallback_command");
+GOMA_DEFINE_bool(STORE_ONLY, false, "Don't use the shared cache in cloud");
+GOMA_DEFINE_bool(USE_SUCCESS, false,
+                 "Lookup the shared cache and store it only if it succeeded");
+GOMA_DEFINE_bool(RETRY, true, "Retry when something failed");
+GOMA_DEFINE_bool(FALLBACK, true, "Fallback when remote execution failed."
+                 "Even it is false, compiler proxy will run local process "
+                 "for non-compile command. "
+                 "If false, implies GOMA_USE_LOCAL=false.");
+GOMA_DEFINE_bool(USE_LOCAL, true, "Use local process when idle.");
+GOMA_DEFINE_string(VERIFY_COMMAND, "",
+                   "Verify command matches with backend."
+                   "\"version\" will check by version of command."
+                   "\"checksum\" will check by checksums of command and "
+                   "subprograms."
+                   "\"all\" will check all of above.");
+GOMA_DEFINE_bool(VERIFY_OUTPUT, false,
+                 "Verify output file with local compiler.");
+GOMA_DEFINE_bool(VERIFY_ASSEMBLER_CODE, false,
+                 "Verify assembler code with local compiler.");
+GOMA_DEFINE_bool(VERIFY_PREPROCESS_CODE, false,
+                 "Verify preprocessed code with local compiler.");
+GOMA_DEFINE_string(COMPILER_PROXY_SOCKET_NAME, "goma.ipc",
+                   "The unix domain socket name of the compiler proxy. "
+                   "On Windows, this is named pipe's name.");
+GOMA_DEFINE_int32(EXCLUSIVE_NUM_PROCS, 4,
+                  "Max number of process to run simultaneously in fallback.");
+GOMA_DEFINE_string(COMPILER_PROXY_BINARY, "compiler_proxy",
+                   "Path to compiler_proxy binary");
+GOMA_DEFINE_bool(OUTPUT_EXEC_RESP, false,
+                 "Always outputs ExecResp");
+GOMA_DEFINE_string(FALLBACK_INPUT_FILES, "",
+                   "Comma separated list of files for which we use "
+                   "local compilers (e.g., conftest.c,_configtest.c).");
+GOMA_DEFINE_bool(FALLBACK_CONFTEST, true,
+                 "Force local fallback for conftest source.");
+GOMA_DEFINE_string(IMPLICIT_INPUT_FILES, "",
+                   "Comma separated list of files to send to goma.");
+#ifdef _WIN32
+// devenv or msbuild would run cl.exe with multiple inputs.
+// gomacc emits ExecReq per input file.
+GOMA_DEFINE_bool(FAN_OUT_EXEC_REQ,  true,
+                 "If true, gomacc do the fan-out compile request per "
+                 "input filenames. "
+                 "In this mode, verify flags are disabled.");
+#endif
+
+GOMA_DEFINE_bool(START_COMPILER_PROXY, false,
+                 "If true, start compiler proxy when gomacc cannot find it.");
+#ifndef _WIN32
+GOMA_DEFINE_string(GOMACC_LOCK_FILENAME, "gomacc.lock",
+                   "Filename to lock only single instance of compiler proxy "
+                   "can startup.");
+#else
+GOMA_DEFINE_string(GOMACC_LOCK_GLOBALNAME,
+                   "Global\\goma_cc_lock_compiler_proxy",
+                   "Global mutex so that only one instance of compiler proxy "
+                   "can startup.");
+#endif
+GOMA_DEFINE_int32(GOMACC_COMPILER_PROXY_RESTART_DELAY, 60,
+                  "How long gomacc should wait before retrying to start the "
+                  "compiler proxy.  This must be specified in sec.");
+GOMA_DEFINE_bool(EXTERNAL_USER, false, "Send as an external user.");
+GOMA_DEFINE_bool(DISABLED, false,
+                 "Execute any commands locally without goma.  No throttling.  "
+                 "Using with large -j option of ninja or make may be harmful.");
+#ifdef __linux__
+GOMA_DEFINE_string(LOAD_AVERAGE_LIMIT, "10",
+                  "gomacc invokes a child process only when the load average "
+                  "is below this value.  Will not wait if the value < 1.0");
+GOMA_DEFINE_int32(MAX_SLEEP_TIME, 60,
+                  "gomacc checks load average less than this time interval "
+                  " (in sec) and sleeps between checks if load average is "
+                  "higher than GOMA_LOAD_AVERAGE_LIMIT. "
+                  "Will not wait if the value <= 0.");
+#endif
+
+GOMA_DEFINE_bool(GOMACC_ENABLE_CRASH_DUMP, false,
+                 "True to store breakpad crash dump on gomacc.");
+GOMA_DEFINE_bool(GOMACC_WRITE_LOG_FOR_TESTING, false,
+                 "True to write log via glog.  Only for testing.");
+
+// For compiler_proxy
+
+GOMA_DEFINE_string(PROXY_HOST, "",
+                   "The hostname of an HTTP proxy.");
+GOMA_DEFINE_int32(PROXY_PORT, 0,
+                  "The port of an HTTP proxy.");
+GOMA_DEFINE_string(SETTINGS_SERVER,
+                   "",
+                   "Settings server URL");
+GOMA_DEFINE_string(ASSERT_SETTINGS,
+                   "",
+                   "Assert settings name matches with this value, "
+                   "if specified.");
+GOMA_DEFINE_string(STUBBY_PROXY_IP_ADDRESS, "clients5.google.com",
+                   "The IP address or hostname of the stubby proxy, or GFE.");
+GOMA_DEFINE_int32(STUBBY_PROXY_PORT, 443,
+                  "The port of the stubby proxy, or GFE.");
+GOMA_DEFINE_string(URL_PATH_PREFIX, "/cxx-compiler-service",
+                   "The HTTP RPC URL path prefix.");
+GOMA_DEFINE_string(COMPILER_PROXY_LISTEN_ADDR, "localhost",
+                   "The address that compiler proxy listens for http."
+                   "INADDR_LOOPBACK(127.0.0.1) for 'localhost'"
+                   "INADDR_ANY for ''.");
+GOMA_DEFINE_int32(COMPILER_PROXY_PORT, 8088,
+                  "The port of the compiler proxy.");
+
+GOMA_DEFINE_bool(COMPILER_PROXY_REUSE_CONNECTION, true,
+                 "Connection is reused for multiple rpcs.");
+
+GOMA_DEFINE_bool(COMPILER_PROXY_FORCE_CONNECT_ERRORNEOUS_ADDRESS, false,
+                 "Retry a connection establishment"
+                 " if failed to get new socket.");
+
+// See  http://smallvoid.com/article/winnt-tcpip-max-limit.html
+// Remember to read the comments by the author.  For Vista/Win7 (where goma is
+// targeted at), the max number of sockets is the number of ports available for
+// establishing connections, which is, 65535 - 1.
+GOMA_DEFINE_int32(COMPILER_PROXY_MAX_SOCKETS, 65534,
+                  "Maximum connections supported on Windows.");
+GOMA_DEFINE_int32(COMPILER_PROXY_NUM_FIND_PORTS, 10,
+                  "Compiler proxy searches a free port by incrementing "
+                  "the port number at most this value when the port "
+                  "specified by COMPILER_PROXY_PORT is in use.");
+GOMA_DEFINE_AUTOCONF_int32(COMPILER_PROXY_THREADS, NumDefaultProxyThreads,
+                           "Number of threads compiler proxy will run in.");
+GOMA_DEFINE_AUTOCONF_int32(COMPILER_PROXY_HTTP_THREADS,
+                           NumDefaultProxyHttpThreads,
+                           "Number of threads compiler proxy will handle "
+                           "http/ipc request.");
+GOMA_DEFINE_AUTOCONF_int32(INCLUDE_PROCESSOR_THREADS, NumDefaultProxyThreads,
+                           "Number of threads for include processor.");
+#ifdef _WIN32
+#define DEFAULT_MAX_OVERCOMIT_INCOMING_SOCKETS 64
+#else
+#define DEFAULT_MAX_OVERCOMIT_INCOMING_SOCKETS 0
+#endif
+GOMA_DEFINE_int32(MAX_OVERCOMMIT_INCOMING_SOCKETS,
+                  DEFAULT_MAX_OVERCOMIT_INCOMING_SOCKETS,
+                  "Number of overcommitted incoming sockets per threads on "
+                  "select.");
+#if defined(__LP64__) || defined(_WIN64)
+# define DEFAULT_MAX_ACTIVE_TASKS 1024
+#else  // 32bit system would not have enough memory
+# define DEFAULT_MAX_ACTIVE_TASKS 200
+#endif
+GOMA_DEFINE_int32(MAX_ACTIVE_TASKS, DEFAULT_MAX_ACTIVE_TASKS,
+                  "Number of active tasks.");
+GOMA_DEFINE_int32(MAX_FINISHED_TASKS, 1024,
+                  "Number of task information to keep for monitoring.");
+GOMA_DEFINE_int32(MAX_FAILED_TASKS, 1024,
+                  "Number of failed task information to keep for monitoring.");
+GOMA_DEFINE_int32(MAX_LONG_TASKS, 50,
+                  "Number of long taks information to keep for monitoring.");
+GOMA_DEFINE_bool(COMPILER_PROXY_STORE_FILE, false,
+                 "True to store files first.  False to believe FileService "
+                 "already has files and not send new file content.");
+GOMA_DEFINE_int32(COMPILER_PROXY_NEW_FILE_THRESHOLD, 60 * 60,
+                  "Time(sec) to consider new file if the file is modified "
+                  "in that time.");
+GOMA_DEFINE_int32(PING_TIMEOUT_SEC, 60,
+                  "Time(sec) for initial ping timeout.");
+GOMA_DEFINE_int32(PING_RETRY_INTERVAL, 10,
+                  "Time(sec) interval for retrying initial ping.");
+GOMA_DEFINE_string(COMPILER_PROXY_RPC_TIMEOUT_SECS, "610",
+                   "Time(sec) for HttpRPC timeouts.");
+GOMA_DEFINE_string(COMMAND_CHECK_LEVEL, "",
+                   "Level of command equivalence. "
+                   "Default (\"\") will check by command name and target "
+                   "architecture. "
+                   "\"version\" will check by version of command. "
+                   "\"checksumn\" will check by checksum of command.");
+GOMA_DEFINE_string(HERMETIC, "fallback",
+                   "Hermetic mode: one of \"off\", \"fallback\" or \"error\". "
+                   "If it is not \"off\", use the compiler with the same "
+                   "version string and binary hash in backend.  If no such "
+                   "compiler is found, run local compiler (for \"fallback\") "
+                   "or response error (for \"error\") and never try "
+                   "sending request again for the same compiler. "
+                   "This flag will override GOMA_FALLBACK when hermetic "
+                   "compiler is not found.");
+GOMA_DEFINE_int32(LOCAL_RUN_PREFERENCE, 3,
+                  "Local run preference. "
+                  "If local process has started before this stage of goma's "
+                  "process (e.g. CompileTask::State), stop racing and "
+                  "ignore goma. ");
+GOMA_DEFINE_bool(LOCAL_RUN_FOR_FAILED_INPUT, true,
+                 "Prefer local run for previous failed input filename. ");
+GOMA_DEFINE_int32(LOCAL_RUN_DELAY_MSEC, 0,
+                  "msec to delay for idle fallback.");
+GOMA_DEFINE_int32(MAX_SUBPROCS, 3,
+                  "Maximum number of subprocesses that run at the same time.");
+GOMA_DEFINE_int32(MAX_SUBPROCS_LOW, 1,
+                  "Maximum number of subprocesses with low priority "
+                  "(e.g. compile locally while requesting to goma). "
+                  "fallback process gets high priority.");
+GOMA_DEFINE_int32(MAX_SUBPROCS_HEAVY, 1,
+                  "Maximum number of subprocesses with heavy weight "
+                  "(such as link).");
+GOMA_DEFINE_AUTOCONF_int32(BURST_MAX_SUBPROCS, MaxBurstSubProcs,
+                           "Maximum number of subprocesses when remote server "
+                           "is not available. When remote server is not "
+                           "available, goma tries to use local cpu more.");
+GOMA_DEFINE_AUTOCONF_int32(BURST_MAX_SUBPROCS_LOW, MaxBurstSubProcs,
+                           "Maximum number of subprocesses with low priority "
+                           "when remote server is not available. In most cases "
+                           "local compile is low priority. So it is "
+                           "recommended to set this the same number as "
+                           "BURST_MAX_SUBPROCS.");
+GOMA_DEFINE_AUTOCONF_int32(BURST_MAX_SUBPROCS_HEAVY, MaxBurstSubProcsHeavy,
+                           "Maximum number of subprocesses with heavy weight "
+                           "when remote server is not available.");
+GOMA_DEFINE_int32(MAX_SUBPROCS_PENDING, 3,
+                  "Threshold to prefer local run to remote goma.");
+// TODO: autoconf
+GOMA_DEFINE_int32(MAX_SUM_OUTPUT_SIZE_IN_MB, 64,
+                  "The max size for output buffer in MB.");
+GOMA_DEFINE_bool(STORE_LOCAL_RUN_OUTPUT, false,
+                 "Store local run output in goma cache.");
+GOMA_DEFINE_bool(ENABLE_REMOTE_LINK, false, "Enable remote link.");
+GOMA_DEFINE_bool(USE_RELATIVE_PATHS_IN_ARGV, false,
+                 "Use relative paths in argv, except system directories.");
+GOMA_DEFINE_string(TMP_DIR, "",
+                   "Temporary Directory.  Ignored on Windows.");
+GOMA_DEFINE_string(CACHE_DIR, "",
+                   "A directory to store goma's cache data. e.g. CRLs");
+GOMA_DEFINE_string(COMPILER_PROXY_LOCK_FILENAME, "goma_compiler_proxy.lock",
+                   "Filename to lock only single instance of compiler proxy "
+                   "can startup.");
+GOMA_DEFINE_string(RPC_EXTRA_PARAMS, "",
+                   "Extra parameter to append to RPC path.");
+
+GOMA_DEFINE_int32(MULTI_STORE_IN_CALL, 128,
+                  "Number of FileBlob in StoreFileReq");
+GOMA_DEFINE_int32(MULTI_STORE_THRESHOLD_SIZE_IN_CALL, 12 * 1024 * 1024,
+                  "Threshold size to issue StoreFileReq");
+GOMA_DEFINE_int32(MULTI_STORE_PENDING_MS, 100,
+                  "Pending time in ms to issue StoreFileReq.");
+GOMA_DEFINE_int32(NUM_LOG_IN_SAVE_LOG, 512,
+                  "Number of ExecLog in SaveLogReq");
+GOMA_DEFINE_int32(LOG_PENDING_MS, 30 * 1000,
+                  "Pending time in ms to save log.");
+// See RFC1918 for private address space.
+GOMA_DEFINE_string(COMPILER_PROXY_TRUSTED_IPS,
+                   "127.0.0.1,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16",
+                   "Trusted IP networks that are allowed to access "
+                   "compiler proxy status page. "
+                   "By default, localhost and private address spaces are "
+                   "considered as trusted.");
+GOMA_DEFINE_bool(ENABLE_GCH_HACK, false,
+                 "Enable *.gch hack");
+GOMA_DEFINE_AUTOCONF_int32(MAX_INCLUDE_CACHE_SIZE,
+                           MaxIncludeCacheSize,
+                           "The size of include cache in MB.");
+GOMA_DEFINE_string(CONTENT_TYPE_FOR_PROTOBUF, "binary/x-protocol-buffer",
+                   "Content-Type for goma's HttpRPC requests.");
+GOMA_DEFINE_bool(BACKEND_SOFT_STICKINESS, false,
+                 "Enable backend soft stickiness, i.e. set cookie header.");
+GOMA_DEFINE_bool(BACKEND_SOFT_STICKINESS_REFRESH, true,
+                 "Use randomly created cookie for backend soft stickiness.");
+GOMA_DEFINE_string(HTTP_AUTHORIZATION_FILE, "",
+                   "Debug only. File that stores Authorization header, "
+                   "if it is not empty.");
+GOMA_DEFINE_string(OAUTH2_CONFIG_FILE, "",
+                   "File that stores configs on OAuth2."
+                   "The file is JSON-like format, and client_id, client_secret,"
+                   " and refresh_token should be set. "
+#ifndef _WIN32
+                   "$HOME/.goma_oauth2_config"
+#else
+                   "%USERPROFILE%\\.goma_oauth2_config"
+#endif
+                   " will be used if no other auth config set.");
+GOMA_DEFINE_string(GCE_SERVICE_ACCOUNT, "",
+                   "service account name in Google Compute Engine.");
+GOMA_DEFINE_string(SERVICE_ACCOUNT_JSON_FILE, "",
+                   "File that stores service account json, downloaded from "
+                   "google cloud console."
+                   "It will be read everytime when access token need to be "
+                   "refreshed. It should be absolute path.");
+GOMA_DEFINE_string(HTTP_HOST, "",
+                   "Alternative host name shown in HTTP Host field. "
+                   "If you use SSL tunnel, compiler proxy connects localhost "
+                   "but the expected Host field might not be localhost.");
+GOMA_DEFINE_bool(USE_SSL, true,
+                 "Communicate server with SSL.");
+GOMA_DEFINE_string(SSL_EXTRA_CERT, "",
+                   "Path to an additional SSL certificate file (PEM) for "
+                   "communication to goma server, not used for oauth2 token "
+                   "exchanges."
+                   "We automatically load our default certificate.");
+GOMA_DEFINE_string(SSL_EXTRA_CERT_DATA, "",
+                   "An additional SSL certificate (PEM) for "
+                   "communication to goma server, not used for oauth2 token "
+                   "exchanges.");
+GOMA_DEFINE_int32(SSL_CRL_MAX_VALID_DURATION, -1,
+                  "Max valid duration of CRL from CRL's lastUpdafe field in "
+                  "seconds. "
+                  "We caches downloaded CRLs no more than this duration. "
+                  "If negative, compiler_proxy follows nextUpdate in CRL.");
+GOMA_DEFINE_bool(PROVIDE_INFO, true,
+                 "Provide info. to Google for improving the service. "
+                 "If enabled, compiler proxy sends timing stats and parameters "
+                 "for both remote tasks and local tasks. It also sends "
+                 "username and nodename if GOMA_SEND_USER_INFO is true.");
+GOMA_DEFINE_bool(SEND_USER_INFO, true,
+                 "Send username and nodename with each request."
+                 "If false, it will use anonymized user info for "
+                 "compiler_proxy_id.");
+GOMA_DEFINE_string(USE_CASE, "",
+                   "goma use case name. It is used for choosing GCE goma "
+                   "backend settings.");
+GOMA_DEFINE_string(DEPS_CACHE_FILE, "",
+                   "Path to the DepsCache cache file. It eliminates "
+                   "unnecessary preprocess to improve the goma performance. "
+                   "If empty, deps cache won't be used. "
+                   "If not absolute path, it will be in GOMA_CACHE_DIR.");
+GOMA_DEFINE_int32(DEPS_CACHE_IDENTIFIER_ALIVE_DURATION, 3 * 24 * 3600,
+                  "Deps cache older than this value (in second) will be "
+                  "removed in saving/loading. If negative, any cache won't be "
+                  "removed.");
+GOMA_DEFINE_int32(DEPS_CACHE_TABLE_THRESHOLD, 35000,
+                  "The max size of DepsCache table threshold. If the number of "
+                  "DepsCache table exceeds this value, older DepsCache entry "
+                  "will be removed in saving.");
+GOMA_DEFINE_int32(DEPS_CACHE_MAX_PROTO_SIZE_IN_MB, 128,
+                  "The max size of DepsCache file. If the file size exceeds "
+                  "this limit, loading will fail. Unit is MB.");
+GOMA_DEFINE_bool(ENABLE_MACRO_CACHE, false,
+                 "Enable cache for parsed define macro.");
+GOMA_DEFINE_string(COMPILER_INFO_CACHE_FILE, "compiler_info_cache",
+                   "Filename of compiler_info's cache. "
+                   "If empty, compiler_info cache file is not used. "
+                   "If not absolute path, it will be in GOMA_CACHE_DIR.");
+GOMA_DEFINE_bool(ENABLE_GLOBAL_FILE_ID_CACHE, false,
+                 "Enable global file id cache. "
+                 "Do not enable this flag when any source file would be "
+                 "changed between compilations.");
+GOMA_DEFINE_int32(COMPILER_INFO_CACHE_HOLDING_TIME_SEC, 60 * 60 * 24 * 30,
+                  "CompilerInfo is not evicted if it is used within "
+                  "COMPILER_INFO_CACHE_HOLDING_TIME_SEC. "
+                  "Otherwise it is evicted when it is loaded from file.");
+GOMA_DEFINE_string(DUMP_STATS_FILE, "",
+                   "Filename to dump stats at the end of compiler_proxy."
+                   "If empty, nothing will be dumped.");
+GOMA_DEFINE_string(HASH_REWRITE_RULE_FILE, "",
+                   "Filename to represent rewrite rule for sha256 hashes "
+                   "of subprograms. Each line of a file should be "
+                   "<from sha256>:<to sha256>");
+GOMA_DEFINE_string(LOCAL_OUTPUT_CACHE_DIR, "",
+                   "Directory that LocalOutputCache uses");
+GOMA_DEFINE_int32(LOCAL_OUTPUT_CACHE_MAX_CACHE_AMOUNT_IN_MB, 1024*30,
+                  "The max size of local output cache. If the total amount "
+                  "exceeds this, older cache will be removed.");
+GOMA_DEFINE_int32(LOCAL_OUTPUT_CACHE_THRESHOLD_CACHE_AMOUNT_IN_MB, 1024*20,
+                  "When LocalOutputCache garbage collection run, entries will "
+                  "be removed until total size is below this value.");
+GOMA_DEFINE_int32(LOCAL_OUTPUT_CACHE_MAX_ITEMS, 100000,
+                  "The max number of cache items. If exceeds this item, older "
+                  "cache will be removed");
+GOMA_DEFINE_int32(LOCAL_OUTPUT_CACHE_THRESHOLD_ITEMS, 80000,
+                  "When LocalOutputCache garbage collection run, entries will "
+                  "be removed until the number of entries are below of this "
+                  "value");
+
+#ifdef _WIN32
+#define DEFAULT_CTL_SCRIPT_NAME "goma_ctl.bat"
+#else
+#define DEFAULT_CTL_SCRIPT_NAME "goma_ctl.py"
+#endif
+GOMA_DEFINE_string(CTL_SCRIPT_NAME,
+                   DEFAULT_CTL_SCRIPT_NAME,
+                   "File name of goma control script. This is used for pulling "
+                   "latest updates in idle time and usually automatically "
+                   "set by the script itself. You SHOULD NOT set this value "
+                   "manually unless you know what you are doing.");
+GOMA_DEFINE_bool(COMPILER_PROXY_ENABLE_CRASH_DUMP, false,
+                 "True to store breakpad crash dump on compiler_proxy.");
+
+// We keep this flag to provide future workarounds for subproc bugs.
+//
+// clang left crash report scripts and data if killed.
+// https://bugs.chromium.org/p/chromium/issues/detail?id=668548
+// goma failed to kill nacl compiler process? goma killed wrapper script,
+// but it failed to wait for nacl compiler?
+// https://bugs.chromium.org/p/chromium/issues/detail?id=668497
+//
+// On old MacOSX, a process happens to enter uninterruptible sleep state ('U'),
+// and it hangs to kill such process. b/5266411
+// jam@ confirmed the recent MacOSX does not affected by this bug.
+// https://code.google.com/p/chromium/issues/detail?id=387934
+GOMA_DEFINE_bool(DONT_KILL_SUBPROCESS,
+                 true,
+                 "Don't kill subprocess.");
+
+#ifdef _WIN32
+# define DEFAULT_DONT_KILL_COMMANDS \
+    "x86_64-nacl-gcc,x86_64-nacl-g++,i686-nacl-gcc,i686-nacl-g++," \
+    "pnacl-clang,pnacl-clang++"
+#else
+# define DEFAULT_DONT_KILL_COMMANDS ""
+#endif
+GOMA_DEFINE_string(DONT_KILL_COMMANDS,
+                   DEFAULT_DONT_KILL_COMMANDS,
+                   "Don't kill commands. "
+                   "On Windows, nacl-gcc sometimes remains its child process "
+                   "with suspended state.  In that situation, \"goma_ctl.bat "
+                   "start\" cannot start compiler_proxy. "
+                   "b/13198323 b/12533849");
+
+#ifndef _WIN32
+GOMA_DEFINE_bool(COMPILER_PROXY_DAEMON_MODE, false,
+                 "True to run as a daemon process.");
+GOMA_DEFINE_string(COMPILER_PROXY_DAEMON_STDERR, "goma_compiler_proxy.stderr",
+                   "Where to write stderr output when running in daemon mode. "
+                   "Used only when COMPILER_PROXY_DAEMON_MODE is true.");
+#endif
+GOMA_DEFINE_int32(AUTO_UPDATE_IDLE_COUNT, 4 * 60 * 60,
+                  "Try to update to the latest version if compiler_proxy "
+                  "has been idle for approx this number of seconds.");
+
+GOMA_DEFINE_int32(WATCHDOG_TIMER, 4 * 60 * 60,
+                  "Watchdog timer in seconds."
+                  "Watchdog is disabled if this value is not positive.");
+
+GOMA_DEFINE_int32(LOG_CLEAN_INTERVAL, 24 * 60 * 60,
+                  "Interval seconds to clean old logs.");
+
+GOMA_DEFINE_int32(MEMORY_TRACK_INTERVAL, 60,
+                  "Interval seconds to track compiler_proxy memory. "
+                  "Periodical memory tracking is disabled if this value is not "
+                  "positive.");
+#if defined(__LP64__) || defined(_WIN64)
+// 4GB on 64bit
+# define DEFAULT_MEMORY_WARNING_THRESHOLD_IN_MB (1024 * 4)
+#else
+// 1.5GB on 32bit
+# define DEFAULT_MEMORY_WARNING_THRESHOLD_IN_MB (1024 + 512)
+#endif
+GOMA_DEFINE_int32(MEMORY_WARNING_THRESHOLD_IN_MB,
+                  DEFAULT_MEMORY_WARNING_THRESHOLD_IN_MB,
+                  "If consuming memory exceeds this value, warning log will be"
+                  " shown.");
+
+GOMA_DEFINE_bool(ENABLE_CONTENTIONZ, true, "Enable contentionz");
+
+GOMA_DEFINE_int32(ALLOWED_NETWORK_ERROR_DURATION, -1,
+                  "Compiler_proxy will make compile error after this duration "
+                  "(in seconds) from when network error has been started. "
+                  "This feature is disabled if negative value is set.");
+GOMA_DEFINE_int32(NETWORK_ERROR_THRESHOLD_PERCENT, -1,
+                  "HTTP client in compiler_proxy consider network is "
+                  "unhealthy if non-200 HTTP response comes more than this "
+                  "percentage."
+                  "Use the default value if negative value is given.");
+
+GOMA_DEFINE_bool(FAIL_FAST, false,
+                 "fail fast mode of compiler proxy.");
+
+GOMA_DEFINE_int32(MAX_ACTIVE_FAIL_FALLBACK_TASKS, -1,
+                  "Compiler_proxy will make compile error without trying local "
+                  "fallback if the number of local fallbacks by remote compile "
+                  "failure gets larger than this value and go over the allowed "
+                  "duration set by ALLOWED_MAX_ACTIVE_FAIL_FALLBACK_DURATION. "
+                  "This feature is disabled if negative value is set.");
+GOMA_DEFINE_int32(ALLOWED_MAX_ACTIVE_FAIL_FALLBACK_DURATION, -1,
+                  "Compiler_proxy will make compile error if the number of "
+                  "local fallbacks by remote compile failure gets larger than "
+                  "MAX_ACTIVE_FAIL_FALLBACK_TASKS and reaches this duration "
+                  "(in seconds). value <= 0 means duration is set to 0.");
+
+GOMA_DEFINE_int32(MAX_COMPILER_DISABLED_TASKS, -1,
+                  "Compiler_proxy will enter burst mode if the number of "
+                  "setup failure caused by compiler disabled gets larger than "
+                  "this value.  This feature is disabled if negative value "
+                  "is set.");
+
+#if HAVE_HEAP_PROFILER
+GOMA_DEFINE_string(COMPILER_PROXY_HEAP_PROFILE_FILE, "goma_compiler_proxy_heapz",
+                   "heap profile filename.");
+#endif
+#if HAVE_CPU_PROFILER
+GOMA_DEFINE_string(COMPILER_PROXY_CPU_PROFILE_FILE,
+                   "goma_compiler_proxy_profilez",
+                   "cpu profile filename.");
+GOMA_DEFINE_string(INCLUDE_PROCESSOR_CPU_PROFILE_FILE,
+                   "goma_include_processor_profilez",
+                   "cpu profile filename.");
+#endif
+
+// HTTP RPC
+// Data for a big (>6MB) real data:
+//
+// 1: 6358280->1406854 in 101ms
+// 2: 6358280->1331872 in 108ms
+// 3: 6358280->1278628 in 118ms
+// 4: 6358280->1171559 in 152ms
+// 5: 6358280->1116972 in 186ms
+// 6: 6358280->1090649 in 236ms
+// 7: 6358280->1086307 in 261ms
+// 8: 6358280->1082820 in 366ms
+// 9: 6358280->1082162 in 459ms
+//
+// It seems somewhere from 3 to 6 would be a nice value.
+GOMA_DEFINE_int32(HTTP_RPC_COMPRESSION_LEVEL, 3,
+                  "Compression level in HttpRPC [0..9]."
+                  "0 forces to disable compression.");
+GOMA_DEFINE_string(HTTP_ACCEPT_ENCODING, "deflate",
+                   "Accept-Encoding of goma's requests (e.g., lzma2)");
+GOMA_DEFINE_bool(HTTP_RPC_START_COMPRESSION, true,
+                 "Starts with compressed request. "
+                 "Compression will be enabled/disabled by Accept-Encoding "
+                 "in server's response.");
+GOMA_DEFINE_bool(HTTP_RPC_CAPTURE_RESPONSE_HEADER, false,
+                 "Capture every response header."
+                 "By default, it only captures response header of "
+                 "http error.");
+GOMA_DEFINE_string(HTTP_SOCKET_READ_TIMEOUT_SECS, "1.0",
+                   "Time(sec) for once the socket receives response header.");
+
+GOMA_DEFINE_int32(HTTP_RPC_MIN_RETRY_BACKOFF, 500,
+                  "Minimum Time(millesec) for retry backoff for HttpRPC. "
+                  "Backoff time is randomized by subtracing 40%, so actual "
+                  "minimum backoff time would be 60% of this value.");
+GOMA_DEFINE_int32(HTTP_RPC_MAX_RETRY_BACKOFF, 5000,
+                  "Minimum Time(millesec) for retry backoff for HttpRPC.");
+
+GOMA_DEFINE_int32(RPC_TRACE_PERIOD, 0,
+                  "How often to request RPC traces on the server. Traces will "
+                  "be requested every nth request (i.e. 0 means never, 1 means "
+                  "always, 10 means every 10th request)");
+
+GOMA_DEFINE_string(API_TASKZ_FILE_FOR_TEST, "",
+                   "Show the content of this file in /api/taskz. "
+                   "For testing only.");
+
+// For goma_fetch
+GOMA_DEFINE_int32(FETCH_RETRY, 5,
+                  "Times to retry for 50x error in http get");
+
+// script or wrapper
+GOMA_DEFINE_string(
+    DIR, "",
+    "Not used by this program, but may be set by wrapper scripts.");
diff --git a/client/goma_init.cc b/client/goma_init.cc
new file mode 100644
index 0000000..42b510e
--- /dev/null
+++ b/client/goma_init.cc
@@ -0,0 +1,100 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "goma_init.h"
+
+#include <iostream>
+
+#include "autolock_timer.h"
+#include "compiler_proxy_info.h"
+#include "env_flags.h"
+#include "glog/logging.h"
+#include "google/protobuf/stubs/logging.h"
+#include "mypath.h"
+#include "ioutil.h"
+
+using std::string;
+
+namespace {
+
+void ProtobufLogHandler(google::protobuf::LogLevel level,
+                        const char* filename,
+                        int line,
+                        const string& message) {
+  // Convert protobuf log level to glog log severity.
+  int severity = google::GLOG_ERROR;
+  switch (level) {
+  case google::protobuf::LOGLEVEL_INFO:
+    severity = google::GLOG_INFO;
+    break;
+  case google::protobuf::LOGLEVEL_WARNING:
+    severity = google::GLOG_WARNING;
+    break;
+  case google::protobuf::LOGLEVEL_ERROR:
+    severity = google::GLOG_ERROR;
+    break;
+  case google::protobuf::LOGLEVEL_FATAL:
+    severity = google::GLOG_FATAL;
+    break;
+  }
+
+  google::LogMessage(filename, line, severity).stream() << message;
+}
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+void Init(int argc, char* argv[], const char* envp[]) {
+  CheckFlagNames(envp);
+  AutoConfigureFlags(envp);
+
+  // Display version string and exit if --version is specified.
+  if (argc == 2 && strcmp(argv[1], "--version") == 0) {
+    std::cout << "GOMA version " << kBuiltRevisionString << std::endl;
+    exit(0);
+  }
+  if (argc == 2 && strcmp(argv[1], "--build-info") == 0) {
+    std::cout << kUserAgentString << std::endl;
+    exit(0);
+  }
+#ifndef NO_AUTOLOCK_STAT
+  if (FLAGS_ENABLE_CONTENTIONZ)
+    g_auto_lock_stats = new AutoLockStats;
+#endif
+
+  const string username = GetUsernameNoEnv();
+  if (username != GetUsernameEnv()) {
+    LOG(ERROR) << "username mismatch: " << username
+               << " env:" << GetUsernameEnv();
+  }
+
+  FLAGS_TMP_DIR = GetGomaTmpDir();
+  CheckTempDirectory(FLAGS_TMP_DIR);
+}
+
+void InitLogging(const char* argv0) {
+  google::InitGoogleLogging(argv0);
+  // Sets log hanlder for protobuf/logging so that protobuf outputs log
+  // to where GLOG is outputting.
+  google::protobuf::SetLogHandler(ProtobufLogHandler);
+#ifndef _WIN32
+  google::InstallFailureSignalHandler();
+#endif
+  LOG(INFO) << "goma built revision " << kBuiltRevisionString;
+#ifndef NDEBUG
+  LOG(ERROR) << "WARNING: DEBUG BINARY -- Performance may suffer";
+#endif
+#ifdef ADDRESS_SANITIZER
+  LOG(ERROR) << "WARNING: ASAN BINARY -- Performance may suffer";
+#endif
+  {
+    std::ostringstream ss;
+    DumpEnvFlag(&ss);
+    LOG(INFO) << "goma flags:" << ss.str();
+  }
+  FlushLogFiles();
+}
+
+}  // namespace devtools_goma
diff --git a/client/goma_init.h b/client/goma_init.h
new file mode 100644
index 0000000..40c0535
--- /dev/null
+++ b/client/goma_init.h
@@ -0,0 +1,18 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GOMA_INIT_H_
+#define DEVTOOLS_GOMA_CLIENT_GOMA_INIT_H_
+
+#define GOMA_DECLARE_FLAGS_ONLY
+#include "goma_flags.cc"
+
+namespace devtools_goma {
+
+void Init(int argc, char* argv[], const char* envp[]);
+void InitLogging(const char* argv0);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GOMA_INIT_H_
diff --git a/client/goma_ipc.cc b/client/goma_ipc.cc
new file mode 100644
index 0000000..53b7fa4
--- /dev/null
+++ b/client/goma_ipc.cc
@@ -0,0 +1,360 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "goma_ipc.h"
+
+#ifndef _WIN32
+#include <fcntl.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#endif
+
+#include <iostream>
+#include <set>
+#include <sstream>
+#include <string>
+
+#include "compiler_proxy_info.h"
+#include "compiler_specific.h"
+#include "env_flags.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "google/protobuf/message.h"
+MSVC_POP_WARNING()
+#include "ioutil.h"
+#include "glog/logging.h"
+#include "goma_ipc_peer.h"
+#include "scoped_fd.h"
+#include "simple_timer.h"
+#include "util.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+static void SetError(int err, const string error_message,
+                     GomaIPC::Status* status) {
+  VLOG(1) << error_message;
+  if (status->err == OK)
+    status->err = err;
+  if (status->error_message.empty())
+    status->error_message = error_message;
+  else
+    status->error_message += "\n" + error_message;
+}
+
+GomaIPC::GomaIPC(std::unique_ptr<ChanFactory> chan_factory)
+    : chan_factory_(std::move(chan_factory)) {
+}
+
+GomaIPC::~GomaIPC() {
+}
+
+int GomaIPC::Call(const string& path,
+                  const google::protobuf::Message* req,
+                  google::protobuf::Message* resp,
+                  Status* status) {
+  DCHECK(status);
+
+  std::unique_ptr<IOChannel> chan(CallAsync(path, req, status));
+  if (chan == nullptr) {
+    LOG(ERROR) << "call failed: " << status->error_message;
+    return status->err;
+  }
+  return Wait(std::move(chan), resp, status);
+}
+
+std::unique_ptr<IOChannel> GomaIPC::CallAsync(
+    const string& path,
+    const google::protobuf::Message* req,
+    Status* status) {
+  DCHECK(status);
+  status->connect_success = false;
+  std::unique_ptr<IOChannel> chan(chan_factory_->New());
+  if (chan == nullptr) {
+    std::ostringstream ss;
+    ss << "Failed to connect to " << chan_factory_->DestName();
+    SetError(FAIL, ss.str(), status);
+    return nullptr;
+  }
+  if (!CheckGomaIPCPeer(chan.get(), nullptr)) {
+    std::ostringstream ss;
+    ss << "Peer is serving by other user?";
+    SetError(FAIL, ss.str(), status);
+    return nullptr;
+  }
+  status->connect_success = true;
+
+  string send_string;
+  SimpleTimer req_send_timer;
+  req->SerializeToString(&send_string);
+  status->req_size = send_string.size();
+  VLOG(1) << "sending " << send_string.size() << " bytes to server.";
+  int err = SendRequest(chan.get(), path, send_string, status);
+  if (err < 0) {
+    std::ostringstream ss;
+    ss << "Failed to send err=" << err
+       << " duration=" << req_send_timer.GetInMs() << "ms";
+    SetError(err, ss.str(), status);
+    return nullptr;
+  }
+  status->req_send_time = req_send_timer.Get();
+  return chan;
+}
+
+int GomaIPC::Wait(std::unique_ptr<IOChannel> chan,
+                  google::protobuf::Message* resp,
+                  Status* status) {
+  DCHECK(status);
+  if (chan == nullptr) {
+    if (status->err != OK) {
+      return status->err;
+    }
+    return FAIL;
+  }
+
+  string header;
+  string body;
+  status->http_return_code = 0;
+  SimpleTimer resp_recv_timer;
+
+  int err = ReadResponse(chan.get(), &header, &body, &status->http_return_code,
+                         status);
+  if (err < 0) {
+    std::ostringstream ss;
+    ss << "Failed to read response err=" << err
+       << " duration=" << resp_recv_timer.GetInMs() << "ms";
+    SetError(err, ss.str(), status);
+    return err;
+  }
+
+  if (status->http_return_code != 200) {
+    std::ostringstream ss;
+    ss << "Invalid HTTP response code: " << status->http_return_code;
+    SetError(FAIL, ss.str(), status);
+    VLOG(2) << header;
+    VLOG(2) << body;
+    return FAIL;
+  }
+  if (body.size() == 0) {
+    SetError(FAIL, "Empty message", status);
+    return FAIL;
+  }
+
+  status->resp_recv_time = resp_recv_timer.Get();
+  status->resp_size = body.size();
+
+  if (!resp->ParseFromString(body)) {
+    SetError(FAIL, "Failed to parse response body", status);
+    return FAIL;
+  }
+  return OK;
+}
+
+int GomaIPC::SendRequest(const IOChannel* chan,
+                         const string& path, const string& s,
+                         Status* status) {
+  std::ostringstream http_send_message;
+  // Using "Host: 0.0.0.0" is hack not to create goma ipc request
+  // on browser.  Host field could not be modified on Browser.
+  // Note: browser will have "Host: localhost:18088" or so on windows.
+  // Also note that it doens't need to have Origin header, although
+  // XMLHttpRequest will add this one automatically, and couldn't be
+  // modified.
+  // e.g. request generated by sample code in b/33103449
+  // POST /e HTTP/1.1
+  // Host: localhost:18088
+  // User-Agent: ....
+  // Content-Length: 381
+  // Accept: */*
+  // Accept-Encoding: gzip, deflate, br
+  // Accept-Language: en-US,en;q=0.8,ja;q=0.6
+  // Cache-Control: no-cache
+  // Connection: keep-alive
+  // Origin: null
+  // Pragma: no-cache
+  //
+  // see also "forbidden header name" in
+  // https://fetch.spec.whatwg.org/#terminology-headers
+  //
+  // This hack is not enough to protect from attack using Network Communication
+  // API in chrome app.
+  // https://developer.chrome.com/apps/app_network
+  http_send_message
+      << "POST " << path << " HTTP/1.1\r\n"
+      << "Host: 0.0.0.0\r\n"
+      << "User-Agent: " << kUserAgentString << "\r\n"
+      << "Content-Type: binary/x-protocol-buffer\r\n"
+      << "Content-Length: " << s.size() << "\r\n";
+  http_send_message << "\r\n" << s;
+  int err = chan->WriteString(http_send_message.str(),
+                              status->initial_timeout_sec);
+  if (err < 0) {
+    LOG(ERROR) << "GOMA: sending request failed: "
+               << chan->GetLastErrorMessage();
+    SetError(err, "Failed to send request", status);
+    return err;
+  }
+  return 0;
+}
+
+int GomaIPC::ReadResponse(const IOChannel* chan,
+                          string* header,
+                          string* body,
+                          int* http_return_code,
+                          Status* status) {
+  int timeout_sec = status->initial_timeout_sec;
+  string response;
+  size_t response_len = 0;
+  size_t offset = 0;
+  size_t content_length = 0;
+  SimpleTimer timer;
+
+  for (;;) {
+    bool found_header = offset > 0 && content_length > 0;
+    if (found_header) {
+      if (response.size() < offset + content_length) {
+        response.resize(offset + content_length);
+      }
+    } else {
+      response.resize(response.size() + kBufSize);
+    }
+    char* buf = const_cast<char*>(response.data()) + response_len;
+    int buf_size = response.size() - response_len;
+    DCHECK_GT(buf_size, 0);
+    int len = chan->ReadWithTimeout(buf, buf_size, timeout_sec);
+    if (len == 0) {
+      LOG(ERROR) << "GOMA: Unexpected end-of-file at " << response_len
+                 << "+" << buf_size
+                 << ": " << chan->GetLastErrorMessage();
+      SetError(FAIL, "Unexpected end-of-file", status);
+      break;
+    }
+    if (len > 0) {
+      response_len += len;
+      // Now we've got the first response. The next response
+      // should come soon. Let's make the timeout shorter.
+      timeout_sec = status->read_timeout_sec;
+      StringPiece resp(response.data(), response_len);
+      if ((found_header || ParseHttpResponse(resp, http_return_code,
+                                             &offset, &content_length,
+                                             nullptr)) &&
+          response_len >= offset + content_length) {
+        break;
+      }
+      continue;
+    }
+    LOG(WARNING)
+        << "GOMA: http response read error:" << len
+        << " after " << response_len << " bytes."
+        << " http=" << *http_return_code
+        << " offset=" << offset
+        << " content_length=" << content_length;
+    if (len == ERR_TIMEOUT && response_len == 0 &&
+        status->health_check_on_timeout) {
+      // long compile/link task and still running?
+      len = CheckHealthz(status);
+      if (len == OK) {
+        LOG(INFO) << "healthy. wait more in pid:" << Getpid();
+      }
+      timeout_sec = status->check_timeout_sec;
+      continue;
+    }
+    return len;
+  }
+
+  // sanity checking the data
+  if (response_len < offset + content_length) {
+    // if response size is too small, there was some network error.
+    std::ostringstream ss;
+    ss << "broken response string from server, it was cut short."
+       << " response_len=" << response_len
+       << " offset=" << offset
+       << " content_length=" << content_length;
+    SetError(FAIL, ss.str(), status);
+    LOG(ERROR) << "GOMA: " << ss.str();
+    return FAIL;
+  }
+
+  if (offset == 0) {
+    *header = response;
+    *body = "";
+  } else {
+    *header = string(response, offset);
+    *body = string(response.c_str() + offset, content_length);
+  }
+
+  return OK;
+}
+
+int GomaIPC::CheckHealthz(Status* status) {
+  // Check /healthz.
+  pid_t pid = Getpid();
+  std::unique_ptr<IOChannel> healthz_chan(chan_factory_->New());
+  if (healthz_chan == nullptr) {
+    std::ostringstream ss;
+    ss << "Failed to connect to " << chan_factory_->DestName()
+       << " from pid:" << pid;
+    LOG(ERROR) << "GOMA: " << ss.str();
+    SetError(FAIL, ss.str(), status);
+    return FAIL;
+  }
+  {
+    std::ostringstream ss;
+    ss << "/healthz?pid=" << pid;
+    int err = SendRequest(healthz_chan.get(), ss.str(), "", status);
+    if (err < 0) {
+      LOG(ERROR) << "GOMA: Failed to send to /healthz err=" << err
+                 << " " << status->error_message
+                 << " from pid:" << pid;
+      return err;
+    }
+  }
+  string healthz_response;
+  healthz_response.resize(kBufSize);
+  char* buf = const_cast<char*>(healthz_response.data());
+  SimpleTimer timer;
+  int len = healthz_chan->ReadWithTimeout(
+      buf, kBufSize, kReadSelectTimeoutSec);
+  if (len <= 0) {
+    std::ostringstream ss;
+    ss << "Error /healthz err=" << len
+       << " duration=" << timer.GetInMs() << "ms"
+       << " in pid:" << pid
+       << " error=" << healthz_chan->GetLastErrorMessage();
+    LOG(ERROR) << "GOMA: " << ss.str();
+    SetError(FAIL, ss.str(), status);
+    return FAIL;
+  }
+  int healthz_status = 0;
+  size_t healthz_offset = 0;
+  size_t healthz_content_length = 0;
+  bool is_chunked = false;
+  if (!ParseHttpResponse(healthz_response, &healthz_status,
+                         &healthz_offset, &healthz_content_length,
+                         &is_chunked)) {
+    LOG(ERROR) << "GOMA: Bad response /healthz in pid:" << pid;
+    SetError(FAIL, "Bad response /healthz", status);
+    return FAIL;
+  }
+  if (healthz_status != 200) {
+    std::ostringstream ss;
+    ss << "not healthy? " << healthz_status << " in pid:" << pid;
+    LOG(ERROR) << "GOMA: " << ss.str();
+    SetError(FAIL, ss.str(), status);
+    return FAIL;
+  }
+  return OK;
+}
+
+string GomaIPC::DebugString() const {
+  std::ostringstream ss;
+  ss << "Socket path: " << chan_factory_->DestName() << std::endl;
+  return ss.str();
+}
+
+}  // namespace devtools_goma
diff --git a/client/goma_ipc.h b/client/goma_ipc.h
new file mode 100644
index 0000000..fd1cc5b
--- /dev/null
+++ b/client/goma_ipc.h
@@ -0,0 +1,127 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GOMA_IPC_H_
+#define DEVTOOLS_GOMA_CLIENT_GOMA_IPC_H_
+
+#ifndef _WIN32
+#include <arpa/inet.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#else
+#include "socket_helper_win.h"
+#endif
+
+#include <memory>
+#include <set>
+#include <string>
+
+#include "basictypes.h"
+
+using std::string;
+
+namespace google {
+namespace protobuf {
+class Message;
+}  // namespace protobuf
+}  // namespace google
+
+namespace devtools_goma {
+
+class Closure;
+class IOChannel;
+class ScopedSocket;
+
+class GomaIPC {
+ public:
+  static const int kDefaultTimeoutSec = 180;  // 3 min.
+  static const int kReadSelectTimeoutSec = 20;
+  static const int kCheckTimeoutSec = 30;
+
+  struct Status {
+    Status() : initial_timeout_sec(kDefaultTimeoutSec),
+               read_timeout_sec(kReadSelectTimeoutSec),
+               check_timeout_sec(kCheckTimeoutSec),
+               health_check_on_timeout(true),
+               connect_success(false), err(0), http_return_code(0),
+               req_size(0), resp_size(0), req_send_time(0), resp_recv_time(0) {}
+
+    int initial_timeout_sec;
+    int read_timeout_sec;
+    int check_timeout_sec;
+    bool health_check_on_timeout;
+
+    // Whether connect() was successful for this request.
+    bool connect_success;
+
+    // Result of RPC for CallWithAsync. OK=success, or error code.
+    int err;
+    string error_message;
+
+    // The return code of HTTP.
+    int http_return_code;
+
+    // size of (maybe compressed) message.
+    size_t req_size;
+    size_t resp_size;
+    double req_send_time;
+    double resp_recv_time;
+  };
+
+  class ChanFactory {
+   public:
+    virtual ~ChanFactory() {}
+
+    virtual std::unique_ptr<IOChannel> New() = 0;
+    virtual std::string DestName() const = 0;
+  };
+
+  // Takes ownership of chan_factory.
+  explicit GomaIPC(std::unique_ptr<ChanFactory> chan_factory);
+  ~GomaIPC();
+
+  // Returns OK on success, negative (Errno) on failure.
+  int Call(const string& path,
+           const google::protobuf::Message* req,
+           google::protobuf::Message* resp,
+           Status* status);
+
+  // Return debug information.
+  string DebugString() const;
+
+  // Returns io channel.
+  std::unique_ptr<IOChannel> CallAsync(
+      const string& path, const google::protobuf::Message* req,
+      Status* status);
+
+  // Takes ownership of io channel.
+  // Returns OK or Errno.
+  int Wait(std::unique_ptr<IOChannel> chan,
+           google::protobuf::Message* resp, Status* status);
+
+ private:
+  // OK on success, negative (Errno) on failure.
+  int SendRequest(const IOChannel* chan,
+                  const string& path, const string& s,
+                  Status* status);
+  // OK on success, negative (Errno) on failure.
+  // If read timed-out after status->initial_timeout_sec, it will check /healthz
+  // by status->check_timeout_sec intervals if status->health_check_on_timeout
+  // is true.
+  int ReadResponse(const IOChannel* chan,
+                   string* header, string* body, int* http_return_code,
+                   Status* status);
+
+  int CheckHealthz(Status* status);
+
+  std::unique_ptr<ChanFactory> chan_factory_;
+
+  DISALLOW_COPY_AND_ASSIGN(GomaIPC);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GOMA_IPC_H_
diff --git a/client/goma_ipc_addr.cc b/client/goma_ipc_addr.cc
new file mode 100644
index 0000000..0838464
--- /dev/null
+++ b/client/goma_ipc_addr.cc
@@ -0,0 +1,55 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "goma_ipc_addr.h"
+
+#include <string.h>
+
+#include "glog/logging.h"
+
+#ifndef UNIX_PATH_MAX
+#define UNIX_PATH_MAX 108
+#endif
+
+namespace devtools_goma {
+
+socklen_t InitializeGomaIPCAddress(const string& path, GomaIPCAddr* addr) {
+  memset(addr, 0, sizeof(GomaIPCAddr));
+#ifndef _WIN32
+  // unix domain.
+  size_t name_len = path.size();
+  // Don't make unix domain socket invisible (i.e. use abstract socket address)
+  // from file system as we need to run different compiler proxies both inside
+  // and ouside chroot.  gomacc and compiler_proxy must run on the same
+  // file system.
+  // See b/5673736 for detail.
+  CHECK_EQ(path[0], '/') << "bad socket path: " << path;
+  if (name_len >= UNIX_PATH_MAX) {
+    name_len = UNIX_PATH_MAX - 1;
+  }
+  addr->sun_family = AF_UNIX;
+  char* sun_path = addr->sun_path;
+  memcpy(sun_path, path.data(), name_len);
+  addr->sun_path[name_len] = '\0';
+#if defined(__MACH__) || defined(__FreeBSD__)
+  addr->sun_len = SUN_LEN(addr);
+  return sizeof(struct sockaddr_un);
+#else
+  return sizeof(addr->sun_family) + name_len;
+#endif
+#else  // _WIN32
+  // TODO: Should use named pipe for IPC on Windows or use Chromium
+  //                  base IPC instead.  For security reason, requester shall
+  //                  be the same user.  Either named pipe or Chromium base
+  //                  solves this concern.
+  u_short server_port = static_cast<u_short>(atoi(path.c_str()));
+  addr->sin_family = AF_INET;
+  CHECK_GT(inet_pton(AF_INET, "127.0.0.1", &addr->sin_addr.s_addr), 0);
+  addr->sin_port = htons(server_port);
+  return sizeof(sockaddr_in);
+#endif
+}
+
+}  // namespace devtools_goma
diff --git a/client/goma_ipc_addr.h b/client/goma_ipc_addr.h
new file mode 100644
index 0000000..ca2e112
--- /dev/null
+++ b/client/goma_ipc_addr.h
@@ -0,0 +1,53 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GOMA_IPC_ADDR_H_
+#define DEVTOOLS_GOMA_CLIENT_GOMA_IPC_ADDR_H_
+
+#ifndef _WIN32
+#include <sys/socket.h>
+#include <sys/un.h>
+#else
+#include "socket_helper_win.h"
+#endif
+
+#include <string>
+
+using std::string;
+
+namespace devtools_goma {
+
+#ifndef _WIN32
+typedef struct sockaddr_un GomaIPCAddr;
+static const int AF_GOMA_IPC = AF_UNIX;
+#else
+// Note on Windows design:
+//
+// Use Named pipe to restrict on the same computer.
+//
+// Initially, we used a separate port 18088 that is opened and listening to
+// incoming request.  We believed this would be ok, since
+// for Google Windows workstations, only one user at a time can log in.  When
+// the user logged out, compiler_proxy will be forced to terminate since it is
+// a user-launched process.  The listener is bound to localhost, therefore it
+// accepts the traffic from within the machine only.  As a result,
+// compiler_proxy will not be relaying requests from a different user.
+//
+// User fast switching can be a legitmate scenario and it will break goma one
+// way or the other.  For a user to launch VC 2008, [s]he must be an admin.
+//
+// Possible attack factor is to web pages that issues request with XHR, since
+// request will be sent regardless of cross origin.
+// Another attack factor would be network API for chrome apps.
+// Note: b/33103449
+//
+typedef struct sockaddr_in GomaIPCAddr;
+static const int AF_GOMA_IPC = AF_INET;
+#endif
+socklen_t InitializeGomaIPCAddress(const string& path, GomaIPCAddr* addr);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GOMA_IPC_ADDR_H_
diff --git a/client/goma_ipc_peer.cc b/client/goma_ipc_peer.cc
new file mode 100644
index 0000000..d0c3804
--- /dev/null
+++ b/client/goma_ipc_peer.cc
@@ -0,0 +1,96 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "goma_ipc_peer.h"
+
+#ifndef _WIN32
+#include <sys/socket.h>
+#if defined(__MACH__) || defined(__FreeBSD__)
+#include <sys/param.h>
+#include <sys/ucred.h>
+#endif
+#include <sys/un.h>
+#ifdef __linux__
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <unistd.h>
+#include <sys/syscall.h>   /* For SYS_xxx definitions */
+#endif
+#endif  // _WIN32
+
+#include "glog/logging.h"
+
+namespace devtools_goma {
+
+#ifndef _WIN32
+#ifdef __linux__
+// hack for fakeroot
+uid_t real_geteuid() {
+  return syscall(SYS_geteuid);
+}
+#else
+uid_t real_geteuid() {
+  return geteuid();
+}
+#endif  // __linux__
+#endif  // _WIN32
+
+bool CheckGomaIPCPeer(const IOChannel* chan, pid_t* peer_pid) {
+#ifdef _WIN32
+  // We only trust named pipe, and don't trust socket.
+  // see goma_ipc_addr.h.
+  return chan->is_secure();
+#elif defined(__MACH__) || defined(__FreeBSD__)
+  // ScopedSocket's fd is valid socket descriptor.
+  // TODO: better interface on IOChannel?
+  int sock = static_cast<const ScopedSocket*>(chan)->get();
+  struct xucred peer_cred;
+  socklen_t peer_cred_len = sizeof(peer_cred);
+  if (getsockopt(sock, 0, LOCAL_PEERCRED, &peer_cred, &peer_cred_len) < 0) {
+    LOG(WARNING) << "cannot get peer credential. Not a unix socket?";
+    return false;
+  }
+  if (peer_cred.cr_version != XUCRED_VERSION) {
+    LOG(WARNING) << "credential version mismatch:"
+                 << " cr_version=" << peer_cred.cr_version
+                 << " XUCRED_VERSION=" << XUCRED_VERSION;
+    return false;
+  }
+  // darwin doesn't have pid in cred structure.
+  // TODO: find another way to get peer pid.
+  if (peer_cred.cr_uid != geteuid()) {
+    LOG(WARNING) << "uid mismatch peer=" << peer_cred.cr_uid
+                 << " self=" << geteuid();
+    return false;
+  }
+  return true;
+#else
+  // ScopedSocket's fd is valid socket descriptor.
+  // TODO: better interface on IOChannel?
+  int sock = static_cast<const ScopedSocket*>(chan)->get();
+  struct ucred peer_cred;
+  socklen_t peer_cred_len = sizeof(peer_cred);
+  if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED,
+                 reinterpret_cast<void*>(&peer_cred),
+                 &peer_cred_len) < 0) {
+    LOG(WARNING) << "cannot get peer credential. Not a unix socket?";
+    return false;
+  }
+  VLOG(3) << "peer_cred pid=" << peer_cred.pid << " uid=" << peer_cred.uid;
+  if (peer_pid != nullptr)
+    *peer_pid = peer_cred.pid;
+  uid_t real_euid = real_geteuid();
+  uid_t euid = geteuid();
+  if (peer_cred.uid != real_euid && peer_cred.uid != euid) {
+    LOG(WARNING) << "uid mismatch peer=" << peer_cred.uid
+                 << " self=" << euid << "/real=" << real_euid;
+    return false;
+  }
+  return true;
+#endif
+}
+
+}  // namespace devtools_goma
diff --git a/client/goma_ipc_peer.h b/client/goma_ipc_peer.h
new file mode 100644
index 0000000..bc9bec9
--- /dev/null
+++ b/client/goma_ipc_peer.h
@@ -0,0 +1,28 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GOMA_IPC_PEER_H_
+#define DEVTOOLS_GOMA_CLIENT_GOMA_IPC_PEER_H_
+
+#ifndef _WIN32
+#include <unistd.h>
+#include <sys/types.h>
+#else
+#include "config_win.h"
+#endif
+
+#include "scoped_fd.h"
+
+namespace devtools_goma {
+
+// Checks chan's peer is the same effective user.
+// Returns true if it is the same user as local side.
+// If peer_pid is not NULL (and platform could know peer pid), peer's pid
+// will be set in *peer_pid.
+bool CheckGomaIPCPeer(const IOChannel* chan, pid_t* peer_pid);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GOMA_IPC_PEER_H_
diff --git a/client/goma_ipc_unittest.cc b/client/goma_ipc_unittest.cc
new file mode 100644
index 0000000..048ba16
--- /dev/null
+++ b/client/goma_ipc_unittest.cc
@@ -0,0 +1,243 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "goma_ipc.h"
+
+#include <string>
+#include <sstream>
+
+#include "compiler_proxy_info.h"
+#include "compiler_specific.h"
+#include "ioutil.h"
+#include "lockhelper.h"
+#include "mock_socket_factory.h"
+#ifdef _WIN32
+#include "named_pipe_client_win.h"
+#include "named_pipe_server_win.h"
+#include "named_pipe_win.h"
+#endif
+#include "platform_thread.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+#include "scoped_fd.h"
+#include "socket_factory.h"
+#include "worker_thread_manager.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using std::string;
+
+namespace devtools_goma {
+
+#ifdef _WIN32
+static const char kNamedPipeName[] = "goma-ipc-unittest";
+#endif
+
+class GomaIPCTest : public ::testing::Test {
+ protected:
+  class MockChanFactory : public GomaIPC::ChanFactory {
+   public:
+    explicit MockChanFactory(int sock)
+        : factory_(new MockSocketFactory(sock)) {
+    }
+    ~MockChanFactory() override {}
+
+    std::unique_ptr<IOChannel> New() override {
+      ScopedSocket s(factory_->NewSocket());
+      if (!s.valid()) {
+        return nullptr;
+      }
+      return std::unique_ptr<IOChannel>(new ScopedSocket(std::move(s)));
+    }
+
+    std::string DestName() const override {
+      return factory_->DestName();
+    }
+
+   private:
+    std::unique_ptr<MockSocketFactory> factory_;
+  };
+
+#ifdef _WIN32
+  class NamedPipeChanFactory : public GomaIPC::ChanFactory {
+   public:
+    NamedPipeChanFactory() : factory_(kNamedPipeName) {
+    }
+    ~NamedPipeChanFactory() override {}
+
+    std::unique_ptr<IOChannel> New() override {
+      ScopedNamedPipe pipe = factory_.New();
+      if (!pipe.valid()) {
+        return nullptr;
+      }
+      return std::unique_ptr<IOChannel>(new ScopedNamedPipe(std::move(pipe)));
+    }
+
+    string DestName() const override {
+      return factory_.DestName();
+    }
+
+   private:
+    NamedPipeFactory factory_;
+  };
+
+  class MockNamedPipeHandler : public NamedPipeServer::Handler {
+   public:
+    ~MockNamedPipeHandler() override {}
+    void HandleIncoming(NamedPipeServer::Request* req) override {
+      LOG(INFO) << "Handle incoming: msg=" << req->request_message();
+      EXPECT_EQ(expect_request_, req->request_message());
+      req->SendReply(reply_);
+    }
+
+    void Transaction(const std::string& expect_req,
+                     const std::string& reply) {
+      expect_request_ = expect_req;
+      reply_ = reply;
+    }
+
+   private:
+    std::string expect_request_;
+    std::string reply_;
+  };
+#endif
+
+  void SetUp() override {
+    wm_.reset(new WorkerThreadManager);
+    wm_->Start(1);
+    mock_server_.reset(new MockSocketServer(wm_.get()));
+#ifdef _WIN32
+    mock_handler_.reset(new MockNamedPipeHandler);
+    named_pipe_server_.reset(
+        new NamedPipeServer(wm_.get(), mock_handler_.get()));
+    named_pipe_server_->Start(kNamedPipeName);
+#endif
+  }
+  void TearDown() override {
+#ifdef _WIN32
+    named_pipe_server_->Stop();
+    named_pipe_server_.reset();
+    mock_handler_.reset();
+#endif
+    mock_server_.reset();
+    wm_->Finish();
+    wm_.reset();
+  }
+  std::unique_ptr<WorkerThreadManager> wm_;
+  std::unique_ptr<MockSocketServer> mock_server_;
+#ifdef _WIN32
+  std::unique_ptr<MockNamedPipeHandler> mock_handler_;
+  std::unique_ptr<NamedPipeServer> named_pipe_server_;
+#endif
+};
+
+TEST_F(GomaIPCTest, ConnectFail) {
+  std::unique_ptr<GomaIPC::ChanFactory> chan_factory(
+      new MockChanFactory(-1));
+  GomaIPC goma_ipc(std::move(chan_factory));
+  GomaIPC::Status status;
+  EmptyMessage req;
+  HttpPortResponse resp;
+  int r = goma_ipc.Call("/portz", &req, &resp, &status);
+  EXPECT_EQ(FAIL, r);
+  EXPECT_FALSE(status.connect_success);
+  EXPECT_EQ(FAIL, status.err);
+  EXPECT_EQ("Failed to connect to mock:80", status.error_message);
+  EXPECT_EQ(0, status.http_return_code);
+}
+
+TEST_F(GomaIPCTest, CallPortz) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  EmptyMessage req;
+  string serialized_req;
+  req.SerializeToString(&serialized_req);
+  std::ostringstream req_ss;
+  req_ss << "POST /portz HTTP/1.1\r\n"
+         << "Host: 0.0.0.0\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: " << serialized_req.size() << "\r\n\r\n"
+         << serialized_req;
+
+  string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  HttpPortResponse resp;
+  resp.set_port(8088);
+  string serialized_resp;
+  resp.SerializeToString(&serialized_resp);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: binary/x-protocol-buffer\r\n"
+          << "Content-Length: " << serialized_resp.size() << "\r\n\r\n"
+          << serialized_resp;
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  resp.Clear();
+  mock_server_->ServerClose(socks[0]);
+
+  std::unique_ptr<GomaIPC::ChanFactory> chan_factory(
+      new MockChanFactory(socks[1]));
+  GomaIPC goma_ipc(std::move(chan_factory));
+  GomaIPC::Status status;
+  int r = goma_ipc.Call("/portz", &req, &resp, &status);
+#ifdef _WIN32
+  // it should fail on Windows, since peer is not named pipe.
+  EXPECT_EQ(FAIL, r);
+#else
+  EXPECT_EQ(0, r);
+  EXPECT_TRUE(status.connect_success);
+  EXPECT_EQ(0, status.err);
+  EXPECT_EQ("", status.error_message);
+  EXPECT_EQ(200, status.http_return_code);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_TRUE(resp.IsInitialized());
+  EXPECT_EQ(8088, resp.port());
+#endif
+}
+
+#ifdef _WIN32
+TEST_F(GomaIPCTest, CallPortzNamedPipewin) {
+  EmptyMessage req;
+  string serialized_req;
+  req.SerializeToString(&serialized_req);
+  std::ostringstream req_ss;
+  req_ss << "POST /portz HTTP/1.1\r\n"
+         << "Host: 0.0.0.0\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: " << serialized_req.size() << "\r\n\r\n"
+         << serialized_req;
+  HttpPortResponse resp;
+  resp.set_port(8088);
+  string serialized_resp;
+  resp.SerializeToString(&serialized_resp);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: binary/x-protocol-buffer\r\n"
+          << "Content-Length: " << serialized_resp.size() << "\r\n\r\n"
+          << serialized_resp;
+  mock_handler_->Transaction(req_ss.str(), resp_ss.str());
+  resp.Clear();
+
+  std::unique_ptr<GomaIPC::ChanFactory> chan_factory(
+      new NamedPipeChanFactory);
+  GomaIPC goma_ipc(std::move(chan_factory));
+  GomaIPC::Status status;
+  int r = goma_ipc.Call("/portz", &req, &resp, &status);
+  EXPECT_EQ(0, r);
+  EXPECT_TRUE(status.connect_success);
+  EXPECT_EQ(0, status.err);
+  EXPECT_EQ("", status.error_message);
+  EXPECT_EQ(200, status.http_return_code);
+  EXPECT_TRUE(resp.IsInitialized());
+  EXPECT_EQ(8088, resp.port());
+}
+#endif
+
+}  // namespace devtools_goma
diff --git a/client/gomacc.cc b/client/gomacc.cc
new file mode 100644
index 0000000..33c3d24
--- /dev/null
+++ b/client/gomacc.cc
@@ -0,0 +1,376 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#ifndef _WIN32
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#endif
+
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+#ifdef _WIN32
+#include "config_win.h"
+#endif
+
+#include "basictypes.h"
+#include "breakpad.h"
+#include "compiler_flags.h"
+#include "compiler_specific.h"
+#include "cros_util.h"
+#include "env_flags.h"
+#include "file_id.h"
+#include "glog/logging.h"
+#include "goma_ipc.h"
+#include "gomacc_argv.h"
+#include "gomacc_common.h"
+#include "ioutil.h"
+#include "mypath.h"
+#include "path.h"  // file::JoinPath
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+#include "scoped_fd.h"
+#include "simple_timer.h"
+#include "strutil.h"
+#include "subprocess.h"
+#include "util.h"
+
+GOMA_DECLARE_bool(DUMP);
+GOMA_DECLARE_bool(DUMP_ARGV);
+GOMA_DECLARE_bool(FALLBACK);
+GOMA_DECLARE_bool(GOMACC_ENABLE_CRASH_DUMP);
+GOMA_DECLARE_bool(GOMACC_WRITE_LOG_FOR_TESTING);
+GOMA_DECLARE_bool(RETRY);
+GOMA_DECLARE_bool(STORE_ONLY);
+GOMA_DECLARE_string(TMP_DIR);
+GOMA_DECLARE_bool(USE_LOCAL);
+GOMA_DECLARE_bool(DISABLED);
+GOMA_DECLARE_bool(VERIFY_ASSEMBLER_CODE);
+GOMA_DECLARE_bool(VERIFY_PREPROCESS_CODE);
+GOMA_DECLARE_string(VERIFY_COMMAND);
+#ifdef __linux__
+GOMA_DECLARE_string(LOAD_AVERAGE_LIMIT);
+GOMA_DECLARE_int32(MAX_SLEEP_TIME);
+#endif
+#ifdef _WIN32
+GOMA_DECLARE_bool(FAN_OUT_EXEC_REQ);
+#endif
+
+using devtools_goma::CompilerFlags;
+using devtools_goma::FileId;
+using devtools_goma::GomaIPC;
+using devtools_goma::GetMyDirectory;
+using devtools_goma::GetMyPathname;
+using devtools_goma::Getpid;
+using devtools_goma::GomaClient;
+using std::string;
+
+#ifndef _WIN32
+using devtools_goma::ReadCommandOutputByPopen;
+using devtools_goma::Execvpe;
+using devtools_goma::ExecvpeNonGomacc;
+#else
+using devtools_goma::SpawnAndWait;
+using devtools_goma::SpawnAndWaitNonGomacc;
+
+#endif
+
+namespace {
+
+// Dump for debugging
+string DumpArgvString(
+    size_t argc, const char *argv[], const char *message) {
+  std::stringstream ss;
+  ss << "DEBUG: " << message << ": ";
+  for (size_t i = 0; i < argc; ++i) {
+    ss << " " << (argv[i] ? argv[i] : "(null)");
+  }
+  ss << std::endl;
+  return ss.str();
+}
+
+#ifndef _WIN32
+static void DumpArgv(size_t argc, const char *argv[], const char *message) {
+  std::cerr << DumpArgvString(argc, argv, message);
+}
+#endif
+
+bool HandleHttpPortRequest(int argc, char* argv[]) {
+  if (argc < 2 || strcmp(argv[1], "port") != 0) {
+    return false;
+  }
+  StringPiece basename = file::Basename(argv[0]);
+  if (basename != "gomacc"
+#ifdef _WIN32
+      && basename != "gomacc.exe"
+#endif
+      ) {
+    return false;
+  }
+
+  GomaIPC::Status status;
+  status.health_check_on_timeout = false;
+  int port = GetCompilerProxyPort(&status);
+  if (port < 0) {
+    std::cerr << "GOMA: port request failed. "
+              << "connect_success: " << status.connect_success
+              << ", err: " << status.err
+              << " " << status.error_message
+              << ", http_return_code: " << status.http_return_code
+              << std::endl;
+  } else {
+    std::cout << port << std::endl;
+  }
+
+  return true;
+}
+
+// Runs gomacc again with modification to get preprocessed code (-E) or
+// assembler code (-S) instead of object code (-c).
+void VerifyIntermediateStageOutput(bool args0_is_argv0,
+                                   const std::vector<string>& args,
+                                   const char* new_option,
+                                   const char* new_ext) {
+#ifndef _WIN32
+  // Unset GOMA_VERIFY_*_CODE not to run the same thing again.
+  unsetenv("GOMA_VERIFY_PREPROCESS_CODE");
+  unsetenv("GOMA_VERIFY_ASSEMBLER_CODE");
+
+  string mypath = GetMyPathname();
+  std::vector<const char*> new_argv;
+  std::vector<string> outputs;
+  new_argv.push_back(mypath.c_str());
+  bool run_verify_output = false;
+  // TODO: refactor CompilerFlags and reuse here.
+  // args[0] represents real gcc/g++/javac command.
+  // mypath is realpath of argv[0].
+  // So, if args[0] is argv[0], we already set it as new_argv[0] (mypath), so
+  // we need to skip args[0].
+  // Otherwise, if args[0] is not argv[0], it would be invoked via gomacc
+  // (e.g. "gomacc gcc .."), so we need to set args[0] in new_argv.
+  for (size_t i = (args0_is_argv0 ? 1 : 0); i < args.size(); i++) {
+    if (args[i] == "-S" || args[i] == "-E") {
+      return;
+    } else if (args[i] == "-c") {
+      new_argv.push_back(new_option);
+      run_verify_output = true;
+      continue;
+    } else if (strncmp(args[i].c_str(), "-M", 2) == 0) {
+      if (args[i] == "-MF")
+        ++i;  // skip next args. -MF file.
+      continue;
+    } else if (args[i] == "-o") {
+      if (i + 1 == args.size()) {
+        // argument to '-o' is missing.
+        return;
+      }
+      new_argv.push_back("-o");
+      ++i;
+    } else if (strncmp(args[i].c_str(), "-o", 2) != 0) {
+      new_argv.push_back(args[i].c_str());
+      continue;
+    }
+    // args[i] is filename or -ofilename.
+    string output = args[i];
+    size_t ext = output.find_last_of('.');
+    CHECK_NE(ext, string::npos);
+    output = output.substr(0, ext) + new_ext;
+    outputs.push_back(output);
+    new_argv.push_back(outputs.back().c_str());
+  }
+  if (!run_verify_output)
+    return;
+
+  int argc = new_argv.size();
+  new_argv.push_back(nullptr);
+  if (FLAGS_DUMP_ARGV)
+    DumpArgv(argc, &new_argv[0], "verify intermediate");
+  pid_t pid = fork();
+  if (!pid) {
+    // Child process.
+    setenv("GOMA_VERIFY_OUTPUT", "true", 1);
+    execvp(GetMyPathname().c_str(), const_cast<char**>(&new_argv[0]));
+    perror("execvp");
+    return;
+  } else if (pid < 0) {
+    perror("fork");
+    return;
+  }
+  int status;
+  waitpid(pid, &status, 0);
+  if (!WIFEXITED(status)) {
+    std::cerr << "VerifyAssemblerCode: exit_status=" << status << std::endl;
+  }
+#else
+  UNREFERENCED_PARAMETER(args0_is_argv0);
+  UNREFERENCED_PARAMETER(args);
+  UNREFERENCED_PARAMETER(new_option);
+  UNREFERENCED_PARAMETER(new_ext);
+#endif
+}
+
+}  // anonymous namespace
+
+int main(int argc, char* argv[], const char* envp[]) {
+  CheckFlagNames(envp);
+
+  GOOGLE_PROTOBUF_VERIFY_VERSION;
+  // TODO: Check their overhead if they are acceptable.
+  //               We might want to eliminate them in release version?
+  google::InitGoogleLogging(argv[0]);
+#ifndef _WIN32
+  google::InstallFailureSignalHandler();
+#else
+  WinsockHelper wsa;
+#endif
+  FLAGS_TMP_DIR = devtools_goma::GetGomaTmpDir();
+  devtools_goma::CheckTempDirectory(FLAGS_TMP_DIR);
+  if (FLAGS_GOMACC_ENABLE_CRASH_DUMP) {
+    devtools_goma::InitCrashReporter(devtools_goma::GetCrashDumpDirectory());
+  }
+  if (FLAGS_GOMACC_WRITE_LOG_FOR_TESTING) {
+    LOG(INFO) << "This is a log used by a test that need gomacc.INFO.";
+    fprintf(stderr, "log has been written. exiting...\n");
+    return 0;
+  }
+
+  if (HandleHttpPortRequest(argc, argv)) {
+    return 0;
+  }
+
+  std::vector<string> args;
+  bool masquerade_mode = false;
+  string verify_command;
+  string local_command_path;
+  if (!devtools_goma::BuildGomaccArgv(
+          argc, (const char**)argv,
+          &args, &masquerade_mode,
+          &verify_command, &local_command_path)) {
+    // no gcc or g++ in argv.
+    fprintf(stderr, "usage: %s [gcc|g++|cl] [options]\n", argv[0]);
+#ifndef _WIN32
+    const string& goma_ctl = file::JoinPath(GetMyDirectory(), "goma_ctl.py");
+    if (system(goma_ctl.c_str())) {
+      fprintf(stderr, "Failed to check compiler_proxy's status\n");
+    }
+#endif
+    exit(1);
+  }
+
+  if (!verify_command.empty()) {
+    FLAGS_VERIFY_COMMAND = verify_command;
+    FLAGS_USE_LOCAL = false;
+    FLAGS_FALLBACK = false;
+    FLAGS_STORE_ONLY = true;
+    FLAGS_RETRY = false;
+  }
+
+#ifdef __linux__
+  // For ChromiumOS.
+  if (!devtools_goma::CanGomaccHandleCwd()) {
+    FLAGS_DISABLED = true;
+  }
+#endif
+
+  if (FLAGS_DISABLED) {
+    if (masquerade_mode) {
+      local_command_path = argv[0];
+    }
+    // Non absolute path gcc won't be set to local_command_path but it should
+    // be set for this time.
+    if (local_command_path.empty()) {
+      local_command_path = argv[1];
+    }
+
+    std::vector<string> envs;
+    envs.push_back("GOMA_WILL_FAIL_WITH_UKNOWN_FLAG=true");
+    for (int i = 0; envp[i]; ++i)
+      envs.push_back(envp[i]);
+
+    FileId gomacc_fileid(GetMyPathname());
+    CHECK(gomacc_fileid.IsValid());
+
+#ifdef __linux__
+    // For ChromiumOS.
+    // TODO: support other platforms?
+    float load = strtof(FLAGS_LOAD_AVERAGE_LIMIT.c_str(), nullptr);
+    if (load >= 1.0 && FLAGS_MAX_SLEEP_TIME > 0) {
+      devtools_goma::WaitUntilLoadAvgLowerThan(load, FLAGS_MAX_SLEEP_TIME);
+    } else {
+      LOG(WARNING) << "Will not wait for the low load average because of "
+                   << "wrong value."
+                   << " FLAGS_LOAD_AVERAGE_LIMIT=" << FLAGS_LOAD_AVERAGE_LIMIT
+                   << " FLAGS_MAX_SLEEP_TIME=" << FLAGS_MAX_SLEEP_TIME;
+    }
+#endif
+
+#ifdef _WIN32
+    // Not sure why, but using execve causes accessing of
+    // invalid memory address.
+    // b/69231578
+    // NOTE: SpawnAndWaitNonGomacc is not execve equivalent for windows.
+    exit(SpawnAndWaitNonGomacc(&gomacc_fileid, local_command_path, args, envs));
+#else
+    exit(ExecvpeNonGomacc(&gomacc_fileid, local_command_path, args, envs));
+#endif
+  }
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::New(args, "."));
+  if (flags.get() == nullptr) {
+    // TODO: handle all commands in compiler_proxy
+    if (local_command_path.empty()) {
+      // masquerade mode with unsupported command name
+      // or prepend mode with command basename.
+      fprintf(stderr, "usage: %s [gcc|g++|cl] [options]\n", argv[0]);
+      exit(1);
+    }
+    std::vector<string> envs;
+    for (int i = 0; envp[i]; ++i)
+      envs.push_back(envp[i]);
+    // prepend mode with command path.
+#ifdef _WIN32
+    // Not sure why, but using execve causes accessing of
+    // invalid memory address.
+    // b/69231578
+    // NOTE: SpawnAndWait is not execve equivalent for windows.
+    exit(SpawnAndWait(local_command_path, args, envs));
+#else
+    exit(Execvpe(local_command_path, args, envs));
+#endif
+  }
+  GomaClient client(Getpid(), std::move(flags), envp, local_command_path);
+
+  if (FLAGS_VERIFY_PREPROCESS_CODE) {
+    VerifyIntermediateStageOutput(masquerade_mode, args, "-E", ".i");
+  }
+  if (FLAGS_VERIFY_ASSEMBLER_CODE) {
+    VerifyIntermediateStageOutput(masquerade_mode, args, "-S", ".s");
+  }
+
+  GomaClient::Result r = client.CallIPC();
+  if (r != GomaClient::IPC_OK)
+    LOG(ERROR) << "GOMA: compiler proxy not working?";
+  int retval = (r != GomaClient::IPC_OK) ? EXIT_FAILURE : client.retval();
+
+  client.OutputResp();
+  // normalize exit status code to what could be handled by caller.
+  if (retval < 0 || retval > 0xff) {
+    return EXIT_FAILURE;
+  }
+  return retval;
+}
diff --git a/client/gomacc_argv.cc b/client/gomacc_argv.cc
new file mode 100644
index 0000000..3ab525f
--- /dev/null
+++ b/client/gomacc_argv.cc
@@ -0,0 +1,146 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "gomacc_argv.h"
+
+#include <string.h>
+
+#include <fstream>
+#include <sstream>
+
+#include "compiler_flags.h"
+#include "path.h"
+#include "string_piece_utils.h"
+
+namespace devtools_goma {
+
+static const char* kGomaVerifyCommandFlag = "--goma-verify-command";
+
+bool BuildGomaccArgv(int orig_argc, const char* orig_argv[],
+                     std::vector<string>* args,
+                     bool* masquerade_mode,
+                     string* verify_command,
+                     string* local_command_path) {
+  int argv0 = -1;
+  const string progname = string(file::Basename(orig_argv[0]));
+  for (int i = 0; i < orig_argc; i++) {
+    if (strings::StartsWith(orig_argv[i], kGomaVerifyCommandFlag)) {
+      // --goma-veirfy-command is useful for end-to-end test.
+      // It always sends a compile request from compiler_proxy to remote server,
+      // ignores cache, and checks compiler version between local and remote.
+      // It also takes a parameter:
+      //  "none": doesn't check compiler version.
+      //  "version": check version string only
+      //  "checksum": check binary hash only
+      //  "all": check "version" and "checksum".
+      if (strcmp(orig_argv[i], kGomaVerifyCommandFlag) == 0) {
+        *verify_command = "all";
+      } else if (orig_argv[i][strlen(kGomaVerifyCommandFlag)] == '=') {
+        *verify_command = orig_argv[i] + strlen(kGomaVerifyCommandFlag) + 1;
+      }
+      if (*verify_command != "version" && *verify_command != "checksum" &&
+          *verify_command != "all" && *verify_command != "none") {
+        fprintf(stderr, "Wrong --goma-verify-command: %s\n",
+                verify_command->c_str());
+        fprintf(stderr,
+                " use \"version\", \"checksum\", \"all\" or \"none\".\n");
+        return false;
+      }
+      continue;
+    } else if (*orig_argv[i] == '-') {
+      // option found without having gcc or g++ as command name.
+      break;
+#ifdef _WIN32
+    } else if (*orig_argv[i] == '/') {
+      break;
+#endif
+    }
+    // found command name.
+    const StringPiece p = file::Basename(orig_argv[i]);
+    if (p == "gomacc"
+#ifdef _WIN32
+        || p == "gomacc.exe"
+#endif
+        ) {
+      continue;
+    }
+    argv0 = i;
+    if (i != 0 && p != orig_argv[i]) {
+      // If this was not the first argument (i.e. symlinked name),
+      // and argv[i] is not basename, then we'll see this as local command path.
+      *local_command_path = orig_argv[i];
+    }
+    break;
+  }
+  if (argv0 < 0)
+    return false;
+  *masquerade_mode = argv0 == 0;
+  orig_argc -= argv0;
+  for (int i = 0; i < orig_argc; i++) {
+    // if masqueraded mode, use basename of argv[0].
+    if (i == 0 && *masquerade_mode)
+      args->push_back(progname);
+    else
+      args->push_back(orig_argv[i + argv0]);
+  }
+  return true;
+}
+
+#ifdef _WIN32
+
+void FanOutArgsByInput(
+  const std::vector<string>& args,
+  const std::set<string>& input_filenames,
+  std::vector<string>* args_no_input) {
+  for (size_t i = 1; i < args.size(); ++i) {
+    if (input_filenames.count(args[i]))
+      continue;
+    args_no_input->push_back(args[i]);
+  }
+}
+
+string BuildArgsForInput(
+    const std::vector<string>& args_no_input,
+    const string& input_filename) {
+  std::ostringstream rsp;
+  for (const auto& arg : args_no_input) {
+    rsp << EscapeWinArg(arg) << " ";
+  }
+  // assume input_filename doesn't end with \.
+  // TODO: quote input_filename correctly.
+  rsp << "\"" << input_filename << "\"";
+  return rsp.str();
+}
+
+string EscapeWinArg(const string& arg) {
+  std::stringstream ss;
+  ss << '"';
+  for (size_t i = 0; i < arg.size(); ++i) {
+    char c = arg[i];
+    switch (c) {
+      case '"':  // " -> \"
+        ss << '\\' << '"';
+        break;
+      case '\\':
+        if (i + 1 == arg.size()) {
+          // \ at the end of string. => "...\\"
+          ss << '\\';
+        } else if (arg[i + 1] == '"') {
+          // \ before " => ..\\\"..
+          ss << '\\';
+        }  // otherwise, backslashes are interpreted literally.
+        // fallthrough
+      default:
+        ss << c;
+        break;
+    }
+  }
+  ss << '"';
+  return ss.str();
+}
+
+#endif  // _WIN32
+
+}  // namespace devtools_goma
diff --git a/client/gomacc_argv.h b/client/gomacc_argv.h
new file mode 100644
index 0000000..129b517
--- /dev/null
+++ b/client/gomacc_argv.h
@@ -0,0 +1,109 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GOMACC_ARGV_H_
+#define DEVTOOLS_GOMA_CLIENT_GOMACC_ARGV_H_
+
+#include <set>
+#include <string>
+#include <vector>
+
+using std::string;
+
+namespace devtools_goma {
+
+// Builds "args" for ExecReq from gomacc argv.
+// If masqueraded, "masquerade_mode" set to true.
+// If --goma-verify-command is specified, set the value to "verify_command".
+// Possible value would be "none", "version", "checksum" and "all".
+// If prepend mode with path/to/compiler, set the path to "local_command_path".
+// Returns true if successful, returns false otherwise.
+//
+// - masqueraded: (e.g. ln -sf gomacc gcc, or copy gomacc.exe cl.exe)
+//  - masquerade in path.e.g. argv[0] = "gcc" or argv[0] = "cl.exe"
+//    for example
+//      "gcc", "-c", "hello.c"
+//      "cl.exe", "/c", "hello.c"
+//    send original argv to compiler_proxy
+//    =>
+//       "gcc", "-c", "hello.c",
+//            local_path=$PATH, gomacc_path=/gomadir/gcc
+//       "cl.exe", "/c", "hello.c"
+//            local_path=%PATH%, gomacc_path=c:\gomadir\cl.exe
+//    local_compiler_path from command name and local_path.
+//    (after a dir where gomacc masquerade exists).
+//  - masquerade full path. e.g. argv[0] = "/gomadir/gcc" or so.
+//    for example
+//      "/gomadir/gcc", "/c", "hello.c"
+//      "c:\gomadir\cl.exe", "/c", "hello.c"
+//    use basename of argv[0] to send compiler_proxy.
+//    =>
+//       "gcc", "-c", "hello.c"
+//            local_path=$PATH, gomacc_path=/gomadir/gcc
+//       "cl.exe", "/c", "hello.c"
+//            local_path=%PATH%, gomacc_path=c:\gomadir\cl.exe
+//    local_compiler_path from command name and local_path.
+//    maybe, local_path should not contain gomadir.
+//
+// - prepended: (e.g. gomacc gcc or gomacc.exe cl.exe)
+//  - prepended to no full path of compiler
+//    (gomacc may or may not be full path)
+//    for example
+//       "gomacc", "gcc", "-c", "hello.c"
+//       "gomacc.exe", "cl.exe", "/c", "hello.c"
+//    =>
+//       "gcc", "-c", "hello.c"
+//           local_path=$PATH, gomacc_path=/gomadir/gomacc
+//       "cl.exe", "/c", "hello.c"
+//           local_path=%PATH%, gomacc_path=c:\gomadir\gomacc.exe
+//    local_compiler_path from command name and local_path.
+//    local_path should not contain gomadir.
+//  - prepended to full path or current relative path of compiler
+//    (gomacc may or may not be full path)
+//    for example
+//       "gomacc", "/usr/bin/gcc", "-c", "hello.c"
+//       "gomacc.exe", "c:\vc\bin\cl.exe", "/c", "hello.c"
+//    =>
+//       "/usr/bin/gcc", "-c", "hello.c"
+//          local_path=$PATH, gomacc_path=/gomadir/gomacc
+//          local_compiler_path=/usr/bin/gcc
+//       "c:\vc\bin\cl.exe", "/c", "hello.c"
+//          local_path=%PATH%, gomacc_path=c:\gomadir\gomacc.exe
+//          local_compiler_path=c:\vc\bin\cl.exe
+//    local_compiler_path if the full path of compiler.
+bool BuildGomaccArgv(int orig_argc, const char* orig_argv[],
+                     std::vector<string>* args,
+                     bool* masquerade_mode,
+                     string* verify_command,
+                     string* local_command_path);
+
+#ifdef _WIN32
+// Used for GOMA_FAN_OUT_EXEC_REQ=true (under devenv or msbuild).
+
+// Fans out "args" for each input filename, and sets command line args
+// for each input filename in "args_no_input".
+// Note that "args_no_input" doesn't have argv0.
+void FanOutArgsByInput(
+    const std::vector<string>& args,
+    const std::set<string>& input_filenames,
+    std::vector<string>* args_no_input);
+
+// Creates command line per input file as
+//   args_no_input...  input_filename
+// The returned value is expected to be written in rsp_file, and
+// used as "cl @rsp_file".
+string BuildArgsForInput(
+    const std::vector<string>& args_no_input,
+    const string& input_filename);
+
+// Escape arg string for Windows.
+// http://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
+string EscapeWinArg(const string& arg);
+
+#endif
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GOMACC_ARGV_H_
diff --git a/client/gomacc_argv_unittest.cc b/client/gomacc_argv_unittest.cc
new file mode 100644
index 0000000..dc0543c
--- /dev/null
+++ b/client/gomacc_argv_unittest.cc
@@ -0,0 +1,565 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "gomacc_argv.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#ifndef _WIN32
+#include <unistd.h>
+#else
+#include <windows.h>
+#endif
+
+#include <string>
+#include <vector>
+
+#include "file.h"
+#include "ioutil.h"
+#include "mypath.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using std::string;
+
+namespace devtools_goma {
+
+#ifndef _WIN32
+TEST(GomaccArgvTest, BuildGomaccArgvMasqueradeGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gcc", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(3, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("gcc", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_TRUE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvMasqueradeClang) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"/gomadir/clang", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(3, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("clang", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_TRUE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependBaseGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "gcc", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("gcc", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvFullPathPrependBaseGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"/gomadir/gomacc", "gcc", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("gcc", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependPathGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "path/gcc", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("path/gcc", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_EQ("path/gcc", local_command_path);
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependFullPathGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "/usr/bin/gcc", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("/usr/bin/gcc", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_EQ("/usr/bin/gcc", local_command_path);
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvFullPathPrependPathGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"/gomadir/gomacc", "path/gcc", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("path/gcc", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_EQ("path/gcc", local_command_path);
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvFullPathPrependFullPathGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"/gomadir/gomacc", "/usr/bin/gcc", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("/usr/bin/gcc", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_EQ("/usr/bin/gcc", local_command_path);
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvMasqueradeVerifyCommandGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gcc", "--goma-verify-command", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(4U, args.size());
+  EXPECT_EQ("gcc", args[0]);
+  EXPECT_EQ("--goma-verify-command", args[1]);
+  EXPECT_EQ("-c", args[2]);
+  EXPECT_EQ("hello.c", args[3]);
+  EXPECT_TRUE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependVerifyCommandGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "--goma-verify-command",
+                        "gcc", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(5, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("gcc", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_EQ("all", verify_command);
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependVerifyCommandVersionGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "--goma-verify-command=version",
+                        "gcc", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(5, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("gcc", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_EQ("version", verify_command);
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependVerifyCommandChecksumFullPathGcc) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "--goma-verify-command=checksum",
+                        "/usr/bin/gcc", "-c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(5, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("/usr/bin/gcc", args[0]);
+  EXPECT_EQ("-c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_EQ("checksum", verify_command);
+  EXPECT_EQ("/usr/bin/gcc", local_command_path);
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependFlag) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "-c", "hello.c"};
+
+  EXPECT_FALSE(BuildGomaccArgv(3, argv,
+                               &args, &masquerade_mode,
+                               &verify_command, &local_command_path));
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvMasqueradeNoCompiler) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"echo", "test"};
+
+  EXPECT_TRUE(BuildGomaccArgv(2, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(2U, args.size());
+  EXPECT_EQ("echo", args[0]);
+  EXPECT_EQ("test", args[1]);
+  EXPECT_TRUE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvMasqueradeFullPathNoCompiler) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"/gomadir/echo", "test"};
+
+  EXPECT_TRUE(BuildGomaccArgv(2, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(2U, args.size());
+  EXPECT_EQ("echo", args[0]);
+  EXPECT_EQ("test", args[1]);
+  EXPECT_TRUE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependBaseNoCompiler) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "echo", "test"};
+
+  EXPECT_TRUE(BuildGomaccArgv(3, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(2U, args.size());
+  EXPECT_EQ("echo", args[0]);
+  EXPECT_EQ("test", args[1]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependoCompiler) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "/bin/echo", "test"};
+
+  EXPECT_TRUE(BuildGomaccArgv(3, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(2U, args.size());
+  EXPECT_EQ("/bin/echo", args[0]);
+  EXPECT_EQ("test", args[1]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_EQ("/bin/echo", local_command_path);
+}
+
+#else  // _WIN32
+TEST(GomaccArgvTest, BuildGomaccArgvMasqueradeCl) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"c:\\gomadir\\cl.exe", "/c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(3, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("cl.exe", args[0]);
+  EXPECT_EQ("/c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_TRUE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependBaseCl) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc.exe", "cl", "/c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("cl", args[0]);
+  EXPECT_EQ("/c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvFullPathPrependBaseCl) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"c:\\gomadir\\gomacc.exe", "cl", "/c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("cl", args[0]);
+  EXPECT_EQ("/c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_TRUE(local_command_path.empty());
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependPathCl) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "path\\cl", "/c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("path\\cl", args[0]);
+  EXPECT_EQ("/c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_EQ("path\\cl", local_command_path);
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependFullPathCl) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "c:\\vc\\bin\\cl", "/c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("c:\\vc\\bin\\cl", args[0]);
+  EXPECT_EQ("/c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_EQ("c:\\vc\\bin\\cl", local_command_path);
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvFullPathPrependPathCl) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"c:\\gomadir\\gomacc", "path\\cl", "/c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("path\\cl", args[0]);
+  EXPECT_EQ("/c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_EQ("path\\cl", local_command_path);
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvFullPathPrependFullPathCl) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"c:\\gomadir\\gomacc",
+                        "c:\\vc\\bin\\cl", "/c", "hello.c"};
+
+  EXPECT_TRUE(BuildGomaccArgv(4, argv,
+                              &args, &masquerade_mode,
+                              &verify_command, &local_command_path));
+  EXPECT_EQ(3U, args.size());
+  EXPECT_EQ("c:\\vc\\bin\\cl", args[0]);
+  EXPECT_EQ("/c", args[1]);
+  EXPECT_EQ("hello.c", args[2]);
+  EXPECT_FALSE(masquerade_mode);
+  EXPECT_TRUE(verify_command.empty());
+  EXPECT_EQ("c:\\vc\\bin\\cl", local_command_path);
+}
+
+TEST(GomaccArgvTest, BuildGomaccArgvPrependNoCl) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc", "/c", "hello.c"};
+
+  EXPECT_FALSE(BuildGomaccArgv(3, argv,
+                               &args, &masquerade_mode,
+                               &verify_command, &local_command_path));
+}
+#endif  // _WIN32
+
+TEST(GomaccArgvTest, BuildGomaccArgvNoCompiler) {
+  std::vector<string> args;
+  bool masquerade_mode;
+  string verify_command;
+  string local_command_path;
+  const char* argv[] = {"gomacc"};
+
+  EXPECT_FALSE(BuildGomaccArgv(1, argv,
+                               &args, &masquerade_mode,
+                               &verify_command, &local_command_path));
+}
+
+#ifdef _WIN32
+
+TEST(GomaccArgvTest, FanOutArgsByInput) {
+  std::vector<string> args;
+  args.push_back("cl");
+  args.push_back("/c");
+  args.push_back("/DFOO");
+  args.push_back("/Ic:\\vc\\include");
+  args.push_back("/Fo..\\obj\\");
+  args.push_back("/Fdfoo.pdb");
+  args.push_back("foo.cpp");
+  args.push_back("bar.cpp");
+  args.push_back("baz.cpp");
+  args.push_back("/MP");
+
+  std::set<string> input_filenames;
+  input_filenames.insert("foo.cpp");
+  input_filenames.insert("bar.cpp");
+  input_filenames.insert("baz.cpp");
+
+  std::vector<string> args_no_input;
+  FanOutArgsByInput(args, input_filenames, &args_no_input);
+  EXPECT_EQ(6U, args_no_input.size());
+  EXPECT_EQ("/c", args_no_input[0]);
+  EXPECT_EQ("/DFOO", args_no_input[1]);
+  EXPECT_EQ("/Ic:\\vc\\include", args_no_input[2]);
+  EXPECT_EQ("/Fo..\\obj\\", args_no_input[3]);
+  EXPECT_EQ("/Fdfoo.pdb", args_no_input[4]);
+  EXPECT_EQ("/MP", args_no_input[5]);
+}
+
+TEST(GomaccArgvTest, BuildArgsForInput) {
+  std::vector<string> args_no_input;
+  args_no_input.push_back("/c");
+  args_no_input.push_back("/DFOO=\"foo.h\"");
+  args_no_input.push_back("/Ic:\\vc\\include");
+  args_no_input.push_back("/Fo..\\obj\\");
+  args_no_input.push_back("/Fdfoo.pdb");
+  args_no_input.push_back("/MP");
+
+  string cmdline = BuildArgsForInput(args_no_input, "foo.cpp");
+  EXPECT_EQ("\"/c\" \"/DFOO=\\\"foo.h\\\"\" \"/Ic:\\vc\\include\" "
+            "\"/Fo..\\obj\\\\\" \"/Fdfoo.pdb\" \"/MP\" \"foo.cpp\"", cmdline);
+}
+
+TEST(GomaccArgvTest, EscapeWinArg) {
+  EXPECT_EQ("\"foo\"", EscapeWinArg("foo"));
+  EXPECT_EQ("\"foo\\bar\"", EscapeWinArg("foo\\bar"));
+  EXPECT_EQ("\"foo bar\"", EscapeWinArg("foo bar"));
+  EXPECT_EQ("\"foo=\\\"bar\\\"\"", EscapeWinArg("foo=\"bar\""));
+  EXPECT_EQ("\"foo\\\\\"", EscapeWinArg("foo\\"));
+  EXPECT_EQ("\"foo\\\\\\\"", EscapeWinArg("foo\\\\"));
+}
+
+#endif
+
+}  // namespace devtools_goma
diff --git a/client/gomacc_common.cc b/client/gomacc_common.cc
new file mode 100644
index 0000000..12f81e3
--- /dev/null
+++ b/client/gomacc_common.cc
@@ -0,0 +1,781 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "gomacc_common.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#ifndef _WIN32
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#endif
+
+#include <algorithm>
+#include <deque>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <set>
+#include <sstream>
+
+#include "compiler_flags.h"
+#include "compiler_proxy_info.h"
+#include "compiler_specific.h"
+#include "env_flags.h"
+#include "file_helper.h"
+#include "file_id.h"
+#include "glog/logging.h"
+#include "goma_ipc_addr.h"
+#include "gomacc_argv.h"
+#include "ioutil.h"
+#include "join.h"
+#include "mypath.h"
+#ifdef _WIN32
+#include "named_pipe_client_win.h"
+#endif
+#include "path.h"  // file::JoinPath
+#include "platform_thread.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+#include "scoped_fd.h"
+#include "simple_timer.h"
+#include "socket_factory.h"
+#include "split.h"
+#include "strutil.h"
+#include "subprocess.h"
+#include "util.h"
+
+#define GOMA_DECLARE_FLAGS_ONLY
+#include "goma_flags.cc"
+
+#ifdef _WIN32
+#define isatty(x) false
+#endif
+
+namespace devtools_goma {
+
+#ifdef _WIN32
+// TODO: move it into goma_ipc, and use it in goma_ipc_unittest.cc?
+class GomaIPCNamedPipeFactory : public GomaIPC::ChanFactory {
+ public:
+  explicit GomaIPCNamedPipeFactory(const string& name)
+      : factory_(name) {
+  }
+  ~GomaIPCNamedPipeFactory() override {}
+
+  GomaIPCNamedPipeFactory(const GomaIPCNamedPipeFactory&) = delete;
+  GomaIPCNamedPipeFactory& operator=(const GomaIPCNamedPipeFactory&) = delete;
+
+  std::unique_ptr<IOChannel> New() override {
+    ScopedNamedPipe pipe = factory_.New();
+    if (!pipe.valid()) {
+      return nullptr;
+    }
+    return std::unique_ptr<IOChannel>(new ScopedNamedPipe(std::move(pipe)));
+  }
+
+  string DestName() const override {
+    return factory_.DestName();
+  }
+
+ private:
+  NamedPipeFactory factory_;
+};
+
+#else
+class GomaIPCSocketFactory : public GomaIPC::ChanFactory {
+ public:
+  explicit GomaIPCSocketFactory(const string& socket_path)
+      : socket_path_(socket_path), addr_(nullptr), addr_len_(0) {
+    addr_len_ = InitializeGomaIPCAddress(socket_path_, &un_addr_);
+    addr_ = reinterpret_cast<const sockaddr*>(&un_addr_);
+  }
+  ~GomaIPCSocketFactory() override {
+  }
+
+  std::unique_ptr<IOChannel> New() override {
+    ScopedSocket socket_fd(socket(AF_GOMA_IPC, SOCK_STREAM, 0));
+    if (!socket_fd.valid())
+      return nullptr;
+    if (connect(socket_fd.get(), addr_, addr_len_) != -1) {
+      if (!socket_fd.SetNonBlocking()) {
+        LOG(ERROR) << "GOMA: failed to set nonblocking: " << socket_fd.get();
+        return nullptr;
+      }
+      return std::unique_ptr<IOChannel>(new ScopedSocket(std::move(socket_fd)));
+    }
+    return nullptr;
+  }
+
+  string DestName() const override {
+    return socket_path_;
+  }
+
+ private:
+  const string socket_path_;
+  GomaIPCAddr un_addr_;
+  const sockaddr* addr_;
+  socklen_t addr_len_;
+  DISALLOW_COPY_AND_ASSIGN(GomaIPCSocketFactory);
+};
+#endif
+
+int GetCompilerProxyPort(GomaIPC::Status* status) {
+#ifndef _WIN32
+  GomaIPC goma_ipc(std::unique_ptr<GomaIPC::ChanFactory>(
+      new GomaIPCSocketFactory(
+          file::JoinPathRespectAbsolute(GetGomaTmpDir(),
+                                        FLAGS_COMPILER_PROXY_SOCKET_NAME))));
+#else
+  GomaIPC goma_ipc(std::unique_ptr<GomaIPC::ChanFactory>(
+      new GomaIPCNamedPipeFactory(FLAGS_COMPILER_PROXY_SOCKET_NAME)));
+#endif
+  devtools_goma::EmptyMessage req;
+  devtools_goma::HttpPortResponse resp;
+  GomaIPC::Status status_buf;
+  status_buf.health_check_on_timeout = false;
+  if (status == nullptr) {
+    status = &status_buf;
+  }
+  if (goma_ipc.Call("/portz", &req, &resp, status) < 0) {
+    return -1;
+  }
+  return resp.port();
+}
+
+bool StartCompilerProxy() {
+  if (!FLAGS_START_COMPILER_PROXY) {
+    fprintf(stderr,
+            "compiler proxy isn't running. Exiting.");
+    exit(1);
+  }
+
+  if (FLAGS_COMPILER_PROXY_BINARY.empty()) {
+    return false;
+  }
+
+  // Try to start up an instance of compiler proxy if it's not
+  // already started.
+  std::cerr << "GOMA: GOMA_START_COMPILER_PROXY=true."
+            << " Starting compiler proxy" << std::endl;
+#ifndef _WIN32
+  devtools_goma::ScopedFd lock_fd(open(FLAGS_GOMACC_LOCK_FILENAME.c_str(),
+                                       O_RDONLY|O_CREAT,
+                                       0644));
+  if (!lock_fd.valid()) {
+    perror("open");
+    std::cerr << "GOMA: Cannot open " << FLAGS_GOMACC_LOCK_FILENAME
+              << std::endl;
+    return false;
+  }
+
+  if (flock(lock_fd.fd(), LOCK_EX) == -1) {
+    perror("flock failed");
+    // Some weird error happened when trying to lock.
+    return false;
+  }
+#else
+  devtools_goma::ScopedFd lock_fd;
+  lock_fd.reset(CreateEventA(nullptr, TRUE, FALSE,
+                             FLAGS_GOMACC_LOCK_GLOBALNAME.c_str()));
+  DWORD last_error = GetLastError();
+  if (last_error == ERROR_ALREADY_EXISTS) {
+    std::cerr << "GOMA: Someone already starting compiler proxy.";
+    return false;
+  }
+  if (!lock_fd.valid()) {
+    std::cerr << "GOMA: Cannot acquire global named object: " << last_error;
+  }
+#endif
+
+  if (GetCompilerProxyPort(nullptr) >= 0) {
+    if (FLAGS_DUMP) {
+      std::cerr << "GOMA: Someone else already ran compiler proxy.";
+    }
+    return true;
+  }
+
+#ifndef _WIN32
+  const string daemon_stderr = file::JoinPathRespectAbsolute(
+      GetGomaTmpDir(), FLAGS_COMPILER_PROXY_DAEMON_STDERR);
+  if (!FLAGS_COMPILER_PROXY_DAEMON_STDERR.empty() &&
+      FLAGS_GOMACC_COMPILER_PROXY_RESTART_DELAY > 0) {
+    struct stat st;
+    if (stat(daemon_stderr.c_str(), &st) != -1) {
+      struct timeval tv;
+      PCHECK(gettimeofday(&tv, nullptr) == 0);
+      if (st.st_size > 0 &&
+          tv.tv_sec - st.st_mtime <
+          FLAGS_GOMACC_COMPILER_PROXY_RESTART_DELAY) {
+        // Don't retry starting proxy too soon if the last attempt seems
+        // to have failed.
+        return false;
+      }
+    }
+  }
+#endif
+
+  const string& compiler_proxy_binary = file::JoinPath(
+      GetMyDirectory(), FLAGS_COMPILER_PROXY_BINARY);
+
+  if (FLAGS_DUMP) {
+    std::cerr << "GOMA: " << "Invoke " << compiler_proxy_binary << std::endl;
+  }
+
+#ifndef _WIN32
+  int pipe_fd[2];
+  PCHECK(pipe(pipe_fd) == 0);
+
+  pid_t pid;
+  if (!(pid = fork())) {
+    // child process, run compiler_proxy with default arguments.
+
+    lock_fd.Close();
+    close(pipe_fd[0]);
+
+    std::set<int> preserve_fds;
+    Daemonize(daemon_stderr, pipe_fd[1], preserve_fds);
+
+    unsetenv("GOMA_COMPILER_PROXY_DAEMON_MODE");
+    if (execlp(compiler_proxy_binary.c_str(),
+                compiler_proxy_binary.c_str(),
+                nullptr) == -1) {
+      perror(("execlp compiler_proxy (" +
+              compiler_proxy_binary +  ")").c_str());
+    }
+    exit(1);
+  } else if (pid < 0) {
+    // did not succeed in fork()
+    perror("fork");
+    std::cerr << "GOMA: fork failed." << std::endl;
+    return false;
+  }
+
+  // Read out the proxy's actual pid.
+  close(pipe_fd[1]);
+  if (read(pipe_fd[0], &pid, sizeof(pid)) != sizeof(pid)) {
+    char buf[1024];
+    // Meaning of returned value of strerror_r is different between
+    // XSI and GNU. Need to ignore.
+    (void)strerror_r(errno, buf, sizeof buf);
+    std::cerr << "GOMA: Could not get the proxy's pid.  Something went wrong:"
+              << buf << std::endl;
+    close(pipe_fd[0]);
+    return false;
+  }
+  close(pipe_fd[0]);
+#else
+  PROCESS_INFORMATION pi;
+  STARTUPINFOA si;
+
+  ZeroMemory(&pi, sizeof(PROCESS_INFORMATION));
+  ZeroMemory(&si, sizeof(STARTUPINFO));
+  si.cb = sizeof(STARTUPINFO);
+
+  string path_env = GetEnv("PATH");
+  CHECK(!path_env.empty()) << "No PATH env. found.";
+  string command_path;
+  // Note: "" to use the Windows default pathext.
+  if (!GetRealExecutablePath(
+      nullptr, "cmd.exe", "", path_env, "", &command_path, nullptr, nullptr)) {
+    std::cerr << "GOMA: failed to find cmd.exe: "
+              << " path_env=" << path_env
+              << std::endl;
+  }
+  string command_line = command_path + " /k \"";
+  command_line = command_line + compiler_proxy_binary;
+  command_line = command_line + "\"";
+  if (CreateProcessA(command_path.c_str(), &command_line[0], nullptr, nullptr,
+                     FALSE, DETACHED_PROCESS, nullptr, nullptr, &si, &pi)) {
+    CloseHandle(pi.hThread);
+  } else {
+    DWORD error = GetLastError();
+    std::cerr << "GOMA: failed to start compiler_proxy: " << error
+              << std::endl;
+  }
+#endif
+
+  int num_retries = 0;
+  // Wait until compiler proxy becomes ready.
+  while (GetCompilerProxyPort(nullptr) < 0) {
+    // Make sure the proxy is running.
+#ifndef _WIN32
+    if (kill(pid, 0) == -1) {
+      std::cerr << "GOMA: Failed to start compiler proxy." << std::endl;
+      return false;
+    }
+#else
+    DWORD exit_code = 0;
+    GetExitCodeProcess(pi.hProcess, &exit_code);
+    if (exit_code != STILL_ACTIVE) {
+      std::cerr << "GOMA: compiler proxy died with exit code "
+                << exit_code << std::endl;
+      return false;
+    }
+#endif
+
+    // If this loop takes more than 3 secs,
+    num_retries++;
+    if (num_retries++ >= 30 && num_retries % 10 == 0) {
+      std::cerr << "GOMA: Compiler proxy is taking too much time to start. "
+                << "Something might go wrong." << std::endl;
+    }
+    // Wait 100ms.
+    PlatformThread::Sleep(100);
+  }
+
+  return true;
+}
+
+GomaClient::GomaClient(int id, std::unique_ptr<CompilerFlags> flags,
+                       const char** envp, const string& local_compiler_path)
+#ifndef _WIN32
+    : goma_ipc_(std::unique_ptr<GomaIPC::ChanFactory>(new GomaIPCSocketFactory(
+          file::JoinPathRespectAbsolute(GetGomaTmpDir(),
+                                        FLAGS_COMPILER_PROXY_SOCKET_NAME)))),
+#else
+    : goma_ipc_(std::unique_ptr<GomaIPC::ChanFactory>(
+            new GomaIPCNamedPipeFactory(FLAGS_COMPILER_PROXY_SOCKET_NAME))),
+#endif
+      id_(id),
+      flags_(std::move(flags)),
+      local_compiler_path_(local_compiler_path) {
+  flags_->GetClientImportantEnvs(envp, &envs_);
+
+#ifdef _WIN32
+  if (flags_->is_vc()) {
+    for (const auto& file : flags_->optional_input_filenames()) {
+      // Open the file while gomacc running to prevent from removal.
+      optional_files_.push_back(new ScopedFd(ScopedFd::OpenForRead(file)));
+    }
+  }
+#endif
+  // used for logging.
+  string buf;
+  std::vector<string> info;
+  info.push_back(flags_->compiler_name());
+  if (flags_->is_gcc()) {
+    const GCCFlags& gcc_flags = static_cast<const GCCFlags&>(*flags_);
+    switch (gcc_flags.mode()) {
+      case GCCFlags::PREPROCESS:
+        info.push_back("preprocessing");
+        if (flags_->input_filenames().size() > 0)
+          info.push_back(flags_->input_filenames()[0]);
+        break;
+      case GCCFlags::COMPILE:
+        info.push_back("compiling");
+        if (flags_->input_filenames().size() > 0)
+          info.push_back(flags_->input_filenames()[0]);
+        break;
+      case GCCFlags::LINK:
+        info.push_back("linking");
+        if (flags_->output_files().size() > 0)
+          info.push_back(flags_->output_files()[0]);
+        break;
+    }
+  } else {
+    if (flags_->input_filenames().size() > 0)
+      info.push_back(flags_->input_filenames()[0]);
+  }
+  JoinStrings(info, " ", &name_);
+}
+
+GomaClient::~GomaClient() {
+  if (stdin_file_.valid()) {
+    remove(stdin_filename_.c_str());
+  }
+#ifdef _WIN32
+  for (const auto& it : rsp_files_) {
+    ScopedFd* fd = it.second;
+    delete fd;
+    DeleteFileA(it.first.c_str());
+  }
+  for (const auto* fd : optional_files_) {
+    delete fd;
+  }
+#endif
+}
+
+void GomaClient::OutputResp() {
+#ifdef _WIN32
+  if (multi_exec_resp_.get()) {
+    OutputMultiExecResp(multi_exec_resp_.get());
+    return;
+  }
+#endif
+  CHECK(exec_resp_.get() != nullptr);
+  OutputExecResp(exec_resp_.get());
+}
+
+int GomaClient::retval() const {
+#ifdef _WIN32
+  if (multi_exec_resp_.get()) {
+    for (const auto& it : multi_exec_resp_->response()) {
+      if (it.resp().result().exit_status() != 0)
+        return it.resp().result().exit_status();
+    }
+    return 0;
+  }
+#endif
+  CHECK(exec_resp_.get());
+  return exec_resp_->result().exit_status();
+}
+
+// Call IPC Request. Return IPC_OK if successful.
+GomaClient::Result GomaClient::CallIPCAsync() {
+  string request_path;
+  std::unique_ptr<google::protobuf::Message> req;
+
+#ifdef _WIN32
+  if (FLAGS_FAN_OUT_EXEC_REQ && flags_->input_filenames().size() > 1) {
+    std::unique_ptr<MultiExecReq> multi_exec_req(new MultiExecReq);
+    request_path = "/me";
+    PrepareMultiExecRequest(multi_exec_req.get());
+    req = std::move(multi_exec_req);
+    multi_exec_resp_.reset(new MultiExecResp);
+  } else {
+#endif
+    std::unique_ptr<ExecReq> exec_req(new ExecReq);
+    request_path = "/e";
+    PrepareExecRequest(*flags_, exec_req.get());
+    req = std::move(exec_req);
+    exec_resp_.reset(new ExecResp);
+#ifdef _WIN32
+  }
+#endif
+  if (FLAGS_DUMP_REQUEST) {
+    std::cerr << "GOMA:" << name_ << ": " << req->DebugString();
+  }
+  status_ = GomaIPC::Status();
+  ipc_chan_ = goma_ipc_.CallAsync(request_path, req.get(), &status_);
+  if (ipc_chan_ == nullptr) {
+    if (status_.connect_success == true) {
+      if (status_.http_return_code == 401) {
+        std::cerr << "GOMA: Authentication failed (401)" << std::endl;
+      } else if (status_.http_return_code == 400) {
+        std::cerr << "GOMA: Bad request (400)" << std::endl;
+      } else if (FLAGS_DUMP) {
+        std::cerr << "GOMA: IPC Connection was successful but RPC failed"
+                  << std::endl;
+      }
+
+      return IPC_REJECTED;
+    } else {
+      // If the failure reason was failure to connect, try starting
+      // compiler proxy and retry the request.
+      if (StartCompilerProxy()) {
+        status_ = GomaIPC::Status();
+        ipc_chan_ = goma_ipc_.CallAsync(request_path, req.get(), &status_);
+        if (ipc_chan_ != nullptr) {
+          // retry after starting compiler_proxy was successful
+          if (FLAGS_DUMP) {
+            std::cerr << "GOMA: Retry after starting compiler_proxy success"
+                      << std::endl;
+          }
+        } else {
+          // Even if we retried, we weren't successful, give up.
+          if (FLAGS_DUMP) {
+            std::cerr << "GOMA: Retry after starting compiler_proxy was "
+                "unsuccessful" << std::endl;
+          }
+          return IPC_FAIL;
+        }
+      } else {
+        // Starting compiler proxy was unsuccessful
+        if (FLAGS_DUMP) {
+          std::cerr << "GOMA: Could not connect to compiler_proxy and "
+              "starting it failed." << std::endl;
+        }
+        return IPC_FAIL;
+      }
+    }
+  }
+  return IPC_OK;
+}
+
+GomaClient::Result GomaClient::WaitIPC() {
+  DCHECK(ipc_chan_ != nullptr);
+  google::protobuf::Message* resp = nullptr;
+#ifdef _WIN32
+  if (multi_exec_resp_.get())
+    resp = multi_exec_resp_.get();
+#endif
+  if (exec_resp_.get())
+    resp = exec_resp_.get();
+
+  if (goma_ipc_.Wait(std::move(ipc_chan_), resp, &status_) != OK)
+    return IPC_FAIL;
+
+  req_send_time_ = status_.req_send_time;
+  resp_recv_time_ = status_.resp_recv_time;
+
+  SimpleTimer timer;
+  if (FLAGS_DUMP_RESPONSE) {
+    std::cerr << "GOMA:" << name_ << ": " << resp->DebugString();
+  }
+  if (FLAGS_OUTPUT_EXEC_RESP) {
+    OutputResp();
+  }
+
+  if (FLAGS_DUMP_TIME) {
+    resp_write_time_ = timer.Get();
+  }
+  // TODO: check output files are written?
+
+
+  if (FLAGS_DUMP_TIME) {
+    std::cerr << "GOMA:" << name_
+              << " send/recv/write="
+              << req_send_time_ << "/"
+              << resp_recv_time_ << "/"
+              << resp_write_time_ << std::endl;
+    // TODO: show more time metrics.
+  }
+  return IPC_OK;
+}
+
+string GomaClient::CreateStdinFile() {
+#ifndef _WIN32
+  stdin_filename_ = file::JoinPath(GetGomaTmpDir(), "gomacc.stdin.XXXXXX");
+  stdin_file_.reset(mkstemp(&stdin_filename_[0]));
+  for (;;) {
+    char buf[4096];
+    int r = read(STDIN_FILENO, buf, sizeof buf);
+    if (r < 0) {
+      if (errno == EINTR) continue;
+      PLOG(ERROR) << "read";
+      break;
+    } else if (r == 0) {
+      break;
+    }
+    PCHECK(write(stdin_file_.fd(), buf, r) == r);
+  }
+#else
+  char temp_file[MAX_PATH] = {0};
+  GetTempFileNameA(GetGomaTmpDir().c_str(), "gomacc.stdin", 0, temp_file);
+  stdin_filename_ = temp_file;
+  stdin_file_.reset(ScopedFd::Create(stdin_filename_, 0600));
+  char buf[4096];
+  size_t actual_read = 0;
+  while ((actual_read = fread(buf, 1, 4096, stdin)) > 0) {
+    stdin_file_.Write(buf, actual_read);
+  }
+#endif
+  return stdin_filename_;
+}
+
+GomaClient::Result GomaClient::CallIPC() {
+  Result r = CallIPCAsync();
+  if (r != IPC_OK)
+    return r;
+  return WaitIPC();
+}
+
+#ifdef _WIN32
+bool GomaClient::PrepareMultiExecRequest(MultiExecReq* req) {
+  const string tmpdir = devtools_goma::GetGomaTmpDir();
+  pid_t pid = Getpid();
+
+  std::set<string> input_filenames(flags_->input_filenames().begin(),
+                              flags_->input_filenames().end());
+  std::vector<string> args_no_input;  // args other than input filenames.
+  // Input filenames may be in @rsp file, so scan expanded_args here.
+  const std::vector<string>& expanded_args =
+      (flags_->expanded_args().empty()
+       ? flags_->args() : flags_->expanded_args());
+  FanOutArgsByInput(expanded_args, input_filenames, &args_no_input);
+
+  int nth = 0;
+  for (std::set<string>::const_iterator iter = input_filenames.begin();
+       iter != input_filenames.end();
+       ++iter, ++nth) {
+    const string& input_filename = *iter;
+    const string cmdline = BuildArgsForInput(args_no_input, input_filename);
+    std::stringstream fname;
+    fname << file::Basename(input_filename)
+          << "." << pid << "." << nth << ".rsp";
+    const string rsp_filename = file::JoinPath(tmpdir, fname.str());
+    if (!WriteStringToFile(cmdline, rsp_filename)) {
+      LOG(ERROR) << "GOMA: Failed to create " << rsp_filename;
+      return false;
+    }
+    // Keeps handle open, so that the rsp_file are not removed by tmp cleaner
+    // while gomacc is running.
+    rsp_files_.emplace_back(rsp_filename,
+                            new ScopedFd(ScopedFd::OpenForRead(rsp_filename)));
+    std::vector<string> args_of_input;
+    args_of_input.push_back(flags_->args()[0]);
+    args_of_input.push_back("@" + rsp_filename);
+    std::unique_ptr<CompilerFlags> flags_of_input(
+        CompilerFlags::MustNew(args_of_input, "."));
+    if (!PrepareExecRequest(*flags_of_input, req->add_req())) {
+      LOG(ERROR) << "GOMA: failed to create ExecReq for " << input_filename;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void GomaClient::OutputMultiExecResp(MultiExecResp* resp) {
+  for (auto& exec_resp : *resp->mutable_response()) {
+    OutputExecResp(exec_resp.mutable_resp());
+  }
+}
+#endif
+
+bool GomaClient::PrepareExecRequest(const CompilerFlags& flags, ExecReq* req) {
+  req->mutable_command_spec()->set_name(
+      flags.compiler_name());
+
+  bool use_color_diagnostics = false;
+#ifndef _WIN32
+  if (CompilerFlags::IsClangCommand(flags.compiler_name()) &&
+      isatty(STDERR_FILENO)) {
+    const char* term = getenv("TERM");
+    if (term != nullptr && strcmp(term, "dump") != 0)
+      use_color_diagnostics = true;
+  }
+#endif
+
+  if (flags.is_gcc()) {
+    const GCCFlags& gcc_flags = static_cast<const GCCFlags&>(flags);
+    if (gcc_flags.is_stdin_input()) {
+      CHECK(!isatty(STDIN_FILENO)) << "goma doesn't support tty input."
+                                   << flags.DebugString();
+      ExecReq_Input* input = req->add_input();
+      string tempfilename = CreateStdinFile();
+      input->set_filename(tempfilename);
+      input->set_hash_key("");
+      DCHECK_EQ(req->input_size(), 1);
+      FLAGS_RETRY = false;
+    }
+    if (FLAGS_FALLBACK_CONFTEST) {
+      devtools_goma::RequesterEnv* requester_env = req->mutable_requester_env();
+      for (const auto& input : gcc_flags.input_filenames()) {
+        if (file::Stem(input) == "conftest") {
+          FileId fid(input);
+          time_t now = time(nullptr);
+          if (!fid.IsValid() || fid.mtime + 10 > now) {
+            // probably conftest.c, force fallback.
+            requester_env->add_fallback_input_file(input);
+          }
+        }
+      }
+    }
+  }
+
+  req->set_experimental_is_external_user(FLAGS_EXTERNAL_USER);
+
+  // If local_compiler_path_ is empty, compiler proxy will find out
+  // local compiler from requester_env's PATH and gomacc_path.
+  if (gomacc_path_.empty()) {
+    req->mutable_requester_env()->set_gomacc_path(GetMyPathname());
+  } else {
+    req->mutable_requester_env()->set_gomacc_path(gomacc_path_);
+  }
+  for (size_t i = 0; i < flags.args().size(); ++i) {
+    req->add_arg(flags.args()[i]);
+    if (i == 0 && use_color_diagnostics)
+      req->add_arg("-fcolor-diagnostics");
+  }
+  if (cwd_.empty()) {
+    cwd_ = GetCurrentDirNameOrDie();
+  }
+  req->set_cwd(cwd_);
+
+  if (!local_compiler_path_.empty()) {
+    req->mutable_command_spec()->set_local_compiler_path(local_compiler_path_);
+  }
+
+  req->mutable_requester_info()->set_api_version(
+      RequesterInfo::CURRENT_VERSION);
+  req->mutable_requester_info()->set_pid(Getpid());
+  req->mutable_requester_info()->set_goma_revision(kBuiltRevisionString);
+
+  if (FLAGS_STORE_ONLY) {
+    if (FLAGS_USE_SUCCESS) {
+      fprintf(stderr,
+              "You cannot use both GOMA_STORE_ONLY and GOMA_USE_SUCCESS\n");
+      exit(1);
+    }
+    req->set_cache_policy(ExecReq::STORE_ONLY);
+  } else if (FLAGS_USE_SUCCESS) {
+    req->set_cache_policy(ExecReq::LOOKUP_AND_STORE_SUCCESS);
+  }
+
+  for (size_t i = 0; i < envs_.size(); i++) {
+    req->add_env(envs_[i]);
+  }
+
+  devtools_goma::RequesterEnv* requester_env = req->mutable_requester_env();
+  const string path_env = GetEnv("PATH");
+  if (!path_env.empty())
+    requester_env->set_local_path(path_env);
+  if (!FLAGS_VERIFY_COMMAND.empty()) {
+    requester_env->set_verify_command(FLAGS_VERIFY_COMMAND);
+    requester_env->set_use_local(false);
+    requester_env->set_fallback(false);
+  } else if (FLAGS_VERIFY_OUTPUT) {
+    requester_env->set_verify_output(true);
+    requester_env->set_use_local(true);
+    requester_env->set_fallback(true);
+  } else {
+    if (FLAGS_USE_LOCAL)
+      requester_env->set_use_local(true);
+    if (FLAGS_FALLBACK)
+      requester_env->set_fallback(true);
+  }
+  if (!FLAGS_FALLBACK_INPUT_FILES.empty()) {
+    std::vector<string> files;
+    SplitStringUsing(FLAGS_FALLBACK_INPUT_FILES, ",", &files);
+    for (size_t i = 0; i < files.size(); ++i) {
+      requester_env->add_fallback_input_file(files[i]);
+    }
+  }
+
+  if (!FLAGS_IMPLICIT_INPUT_FILES.empty()) {
+    // Set these file in ExecReq.
+    // We don't need hash_key for these files here.
+    // Compiler proxy picks them as required_files and computes hash_key.
+    std::vector<string> files;
+    SplitStringUsing(FLAGS_IMPLICIT_INPUT_FILES, ",", &files);
+    for (size_t i = 0; i < files.size(); ++i) {
+      ExecReq_Input* input = req->add_input();
+      input->set_filename(file::JoinPathRespectAbsolute(cwd_, files[i]));
+      input->set_hash_key("");
+    }
+  }
+#ifndef _WIN32
+  mode_t mask = umask(0000);
+  umask(mask);
+  requester_env->set_umask(mask);
+#endif
+  return true;
+}
+
+void GomaClient::OutputExecResp(ExecResp* resp) {
+  WriteStdout(resp->result().stdout_buffer());
+  WriteStderr(resp->result().stderr_buffer());
+  for (int i = 0; i < resp->error_message_size(); i++) {
+    std::cerr << "GOMA:" << name_
+              << ":*ERROR*: " << resp->error_message(i) << std::endl;
+  }
+  resp->mutable_result()->clear_stdout_buffer();
+  resp->mutable_result()->clear_stderr_buffer();
+  resp->clear_error_message();
+}
+
+}  // namespace devtools_goma
diff --git a/client/gomacc_common.h b/client/gomacc_common.h
new file mode 100644
index 0000000..6a7cbe8
--- /dev/null
+++ b/client/gomacc_common.h
@@ -0,0 +1,114 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GOMACC_COMMON_H_
+#define DEVTOOLS_GOMA_CLIENT_GOMACC_COMMON_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "goma_ipc.h"
+#include "scoped_fd.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class CompilerFlags;
+class ExecReq;
+class ExecResp;
+class MultiExecReq;
+class MultiExecResp;
+
+// Returns the port where http server is running.
+// Returns -1 when compiler proxy is not ready.
+// |status| will be modified if |status| is non-NULL.
+int GetCompilerProxyPort(GomaIPC::Status* status);
+
+bool StartCompilerProxy();
+
+class GomaClient {
+ public:
+  enum Result {
+    IPC_OK = 0,
+    IPC_FAIL = -1,
+    IPC_REJECTED = -2,
+  };
+
+  GomaClient(int pid, std::unique_ptr<CompilerFlags> flags, const char** envp,
+             const string& local_compiler_path);
+  ~GomaClient();
+  void OutputResp();
+
+  int retval() const;
+  int id() const { return id_; }
+  const IOChannel* chan() const { return ipc_chan_.get(); }
+
+  // Call IPC Request. Return IPC_OK if successful.
+  Result CallIPCAsync();
+
+  // Wait an already dispatched IPC request to finish.  This needs to be
+  // called after CallIPCAsync().
+  Result WaitIPC();
+
+  string CreateStdinFile();
+
+  // Blocking version of IPC call which calls CallIPCAsync and WaitIPC
+  // internally.
+  Result CallIPC();
+
+  // Sets overriding gomacc_path.
+  // The caller's executable path will be used by default when it is not set.
+  void set_gomacc_path(const string& path) { gomacc_path_ = path; }
+
+  void set_cwd(const string& cwd) { cwd_ = cwd; }
+
+  void set_local_compiler_path(const string& local_compiler_path) {
+    local_compiler_path_ = local_compiler_path;
+  }
+
+ private:
+#ifdef _WIN32
+  bool PrepareMultiExecRequest(MultiExecReq* req);
+  void OutputMultiExecResp(MultiExecResp* resp);
+#endif
+
+  bool PrepareExecRequest(const CompilerFlags& flags, ExecReq* req);
+  void OutputExecResp(ExecResp* resp);
+#ifndef _WIN32
+  void OutputProfInfo(const ExecResp& resp);
+#endif
+
+  GomaIPC goma_ipc_;
+  std::unique_ptr<IOChannel> ipc_chan_;
+  GomaIPC::Status status_;
+
+  int id_;
+  std::unique_ptr<CompilerFlags> flags_;
+  string name_;
+  std::vector<string> envs_;
+#ifdef _WIN32
+  std::vector<ScopedFd*> optional_files_;
+  std::unique_ptr<MultiExecResp> multi_exec_resp_;
+  std::vector<std::pair<string, ScopedFd*>> rsp_files_;
+#endif
+  std::unique_ptr<ExecResp> exec_resp_;
+  double req_send_time_;
+  double resp_recv_time_;
+  double resp_write_time_;
+  ScopedFd stdin_file_;
+  string stdin_filename_;
+  string gomacc_path_;
+  string cwd_;
+  string local_compiler_path_;
+
+  DISALLOW_COPY_AND_ASSIGN(GomaClient);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GOMACC_COMMON_H_
diff --git a/client/gomacc_ipc.cc b/client/gomacc_ipc.cc
new file mode 100644
index 0000000..7940f9c
--- /dev/null
+++ b/client/gomacc_ipc.cc
@@ -0,0 +1,104 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "gomacc_ipc.h"
+
+#include <sys/errno.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "compiler_specific.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/gomacc_message.pb.h"
+MSVC_POP_WARNING()
+
+using std::string;
+
+namespace {
+
+#define HANDLE_EINTR(x) ({ \
+  typeof(x) __eintr_result__; \
+  do { \
+    __eintr_result__ = x; \
+  } while (__eintr_result__ == -1 && errno == EINTR); \
+  __eintr_result__;\
+})
+
+int ReadAll(int fd, char* buf, int bufsize) {
+  int nread = 0;
+  while (bufsize > 0) {
+    int n = HANDLE_EINTR(read(fd, buf, bufsize));
+    if (n < 0)
+      return -1;
+    if (n == 0)
+      return nread;
+    bufsize -= n;
+    buf += n;
+    nread += n;
+  }
+  return nread;
+}
+
+int WriteAll(int fd, const char* buf, int bufsize) {
+  int nwritten = 0;
+  while (bufsize > 0)  {
+    int n = HANDLE_EINTR(write(fd, buf, bufsize));
+    if (n < 0)
+      return -1;
+    if (n == 0)
+      return nwritten;
+    bufsize -= n;
+    buf += n;
+    nwritten += n;
+  }
+  return nwritten;
+}
+
+}  // namespace
+
+bool SendCommand(int fd, GomaCCCommand cmd) {
+  int value = static_cast<int>(cmd);
+  if (WriteAll(fd, (char*)&value, sizeof(int)) != sizeof(int))
+    return false;
+  return true;
+}
+
+bool ReceiveCommand(int fd, GomaCCCommand* cmd) {
+  int value;
+  if (ReadAll(fd, (char*)&value, sizeof(int)) != sizeof(int))
+    return false;
+  *cmd = static_cast<GomaCCCommand>(value);
+  return true;
+}
+
+bool SendMessage(int sock, const google::protobuf::Message& message) {
+  string msg;
+  message.SerializeToString(&msg);
+  int size = static_cast<int>(msg.size());
+  if (!WriteAll(sock, (char*)(&size), sizeof(size)))
+    return false;
+  if (!WriteAll(sock, msg.c_str(), msg.size()))
+    return false;
+  return true;
+}
+
+bool ReceiveMessage(int sock, google::protobuf::Message* message) {
+  int length;
+  if (!ReadAll(sock, (char*)(&length), sizeof(length)))
+    return false;
+  std::unique_ptr<char[]> deleter;
+  // No multi-thread safe.
+  static char scratch[2048];
+  char* buf = scratch;
+  if (static_cast<size_t>(length) > sizeof(scratch)) {
+    buf = new char[length];
+    deleter.reset(buf);
+  }
+  if (!ReadAll(sock, buf, length))
+    return false;
+  message->ParseFromArray(buf, length);
+  return true;
+}
diff --git a/client/gomacc_ipc.h b/client/gomacc_ipc.h
new file mode 100644
index 0000000..6dcfce9
--- /dev/null
+++ b/client/gomacc_ipc.h
@@ -0,0 +1,27 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GOMACC_IPC_H_
+#define DEVTOOLS_GOMA_CLIENT_GOMACC_IPC_H_
+
+namespace google {
+namespace protobuf {
+class Message;
+}  // namespace protobuf
+}  // namespace google
+
+enum GomaCCCommand {
+  GOMACC_CMD_COMPILE,
+  GOMACC_CMD_WAIT,
+  GOMACC_CMD_TERMINATE,
+};
+
+bool SendCommand(int fd, GomaCCCommand cmd);
+bool ReceiveCommand(int fd, GomaCCCommand* cmd);
+
+bool SendMessage(int sock, const google::protobuf::Message& message);
+bool ReceiveMessage(int sock, google::protobuf::Message* message);
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GOMACC_IPC_H_
diff --git a/client/hash_rewrite_parser.cc b/client/hash_rewrite_parser.cc
new file mode 100644
index 0000000..069f4e8
--- /dev/null
+++ b/client/hash_rewrite_parser.cc
@@ -0,0 +1,64 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "hash_rewrite_parser.h"
+
+#include <utility>
+
+#include "glog/logging.h"
+#include "split.h"
+
+namespace {
+
+bool IsSha256Hexadecimal(const string& str) {
+  static const size_t kSha256Len = 256 / 8 * 2;
+  if (str.length() != kSha256Len) {
+    LOG(WARNING) << "wrong length:" << str;
+    return false;
+  }
+  if (str.find_first_not_of("0123456789abcdef") != string::npos) {
+    LOG(WARNING) << "wrong char:" << str;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+bool ParseRewriteRule(const std::string& contents,
+                      std::map<std::string, std::string>* mapping) {
+  std::vector<string> lines;
+  SplitStringUsing(contents, "\n", &lines);
+  for (const auto& line : lines) {
+    if (line.empty())
+      continue;
+    size_t pos = line.find(":");
+    if (pos == string::npos) {
+      LOG(WARNING) << "wrong rule file.";
+      return false;
+    }
+    const string& key = line.substr(0, pos);
+    if (!IsSha256Hexadecimal(key)) {
+      LOG(WARNING) << "The key seems not SHA256 hexadecimal."
+                   << " key=" << key;
+      return false;
+    }
+    const string& value = line.substr(pos + 1);
+    if (!IsSha256Hexadecimal(value)) {
+      LOG(WARNING) << "The value seems not SHA256 hexadecimal."
+                   << " value=" << value;
+      return false;
+    }
+    if (!mapping->insert(std::make_pair(key, value)).second) {
+      LOG(WARNING) << "found the same key twice."
+                   << " key=" << key;
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/hash_rewrite_parser.h b/client/hash_rewrite_parser.h
new file mode 100644
index 0000000..bd7b339
--- /dev/null
+++ b/client/hash_rewrite_parser.h
@@ -0,0 +1,28 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_HASH_REWRITE_PARSER_H_
+#define DEVTOOLS_GOMA_CLIENT_HASH_REWRITE_PARSER_H_
+
+#include <map>
+#include <string>
+
+namespace devtools_goma {
+
+// Parsers subprogram's hash rewrite rule.
+// The rule format is like:
+// <src SHA256 01>:<to SHA256 01>\n
+// <src SHA256 02>:<to SHA256 02>\n
+// <src SHA256 03>:<to SHA256 02>\n
+//
+// It returns true if successed to parse. Otherwise false.
+// The function update |mapping| based on |contents|.  If it returns false,
+// a caller should not use |mapping|.
+// Note that duplicate src hash is considered as error.
+bool ParseRewriteRule(const std::string& contents,
+                      std::map<std::string, std::string>* mapping);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_HASH_REWRITE_PARSER_H_
diff --git a/client/hash_rewrite_parser_unittest.cc b/client/hash_rewrite_parser_unittest.cc
new file mode 100644
index 0000000..e59f4d4
--- /dev/null
+++ b/client/hash_rewrite_parser_unittest.cc
@@ -0,0 +1,131 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "hash_rewrite_parser.h"
+
+#include <map>
+#include <string>
+#include <utility>
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+TEST(ParseRewriteRuleTest, ShouldParseEmptyFile) {
+  std::map<std::string,std::string> mapping;
+  EXPECT_TRUE(ParseRewriteRule("", &mapping));
+  EXPECT_TRUE(mapping.empty());
+}
+
+TEST(ParseRewriteRuleTest, ShouldParseEmptyLines) {
+  std::map<std::string,std::string> mapping;
+  EXPECT_TRUE(ParseRewriteRule("\n\n\n", &mapping));
+  EXPECT_TRUE(mapping.empty());
+}
+
+TEST(ParseRewriteRuleTest, ShouldParseOnelineFile) {
+  std::map<std::string,std::string> mapping;
+  std::map<std::string,std::string> expected;
+  ASSERT_TRUE(expected.insert(std::make_pair(
+      "b5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f",
+      "b8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678"
+      )).second);
+  EXPECT_TRUE(ParseRewriteRule(
+      "b5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f:"
+      "b8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678",
+      &mapping));
+  EXPECT_FALSE(mapping.empty());
+  EXPECT_EQ(expected, mapping);
+}
+
+TEST(ParseRewriteRuleTest, ShouldParseTwolineFile) {
+  std::map<std::string,std::string> mapping;
+  std::map<std::string,std::string> expected;
+  ASSERT_TRUE(expected.insert(std::make_pair(
+      "a5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f",
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678"
+      )).second);
+  ASSERT_TRUE(expected.insert(std::make_pair(
+      "b5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f",
+      "b8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678"
+      )).second);
+  EXPECT_TRUE(ParseRewriteRule(
+      "a5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f:"
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678\n"
+      "b5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f:"
+      "b8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678\n",
+      &mapping));
+  EXPECT_FALSE(mapping.empty());
+  EXPECT_EQ(expected, mapping);
+}
+
+TEST(ParseRewriteRuleTest, ShouldReturnFalseIfNoDelimiter) {
+  std::map<std::string,std::string> mapping;
+  EXPECT_FALSE(ParseRewriteRule(
+      "a5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f",
+      &mapping));
+}
+
+TEST(ParseRewriteRuleTest, ShouldBeErrorIfNotSha256) {
+  std::map<std::string,std::string> mapping;
+  // Too short or long.
+  EXPECT_FALSE(ParseRewriteRule(
+      "a:"
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678\n",
+      &mapping));
+  EXPECT_FALSE(ParseRewriteRule(
+      "a5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f:"
+      "a\n",
+      &mapping));
+  EXPECT_FALSE(ParseRewriteRule(
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678abc:\n"
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678\n",
+      &mapping));
+  EXPECT_FALSE(ParseRewriteRule(
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678:\n"
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678abc\n",
+      &mapping));
+  // not hexdeciaml.
+  EXPECT_FALSE(ParseRewriteRule(
+      "ghi3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f:"
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678\n",
+      &mapping));
+  EXPECT_FALSE(ParseRewriteRule(
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678:\n"
+      "g8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678\n",
+      &mapping));
+}
+
+TEST(ParseRewriteRuleTest, ShouldBeErrorForDuplicatedSourceEntry) {
+  std::map<std::string,std::string> mapping;
+  EXPECT_FALSE(ParseRewriteRule(
+      "a5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f:"
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678\n"
+      "a5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f:"
+      "b8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678\n",
+      &mapping));
+}
+
+TEST(ParseRewriteRuleTest, ShouldAcceptDuplicatedDestEntry) {
+  std::map<std::string,std::string> mapping;
+  std::map<std::string,std::string> expected;
+  ASSERT_TRUE(expected.insert(std::make_pair(
+      "a5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f",
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678"
+      )).second);
+  ASSERT_TRUE(expected.insert(std::make_pair(
+      "b5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f",
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678"
+      )).second);
+  EXPECT_TRUE(ParseRewriteRule(
+      "a5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f:"
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678\n"
+      "b5a3dadbdcafc7902f9502de7f037ec95f6340de8aa0a6b4d9ee74a47379063f:"
+      "a8a38778b7c56de92f5f14c185104285f62c0dec8aed6e2f552cc73a8e9ac678\n",
+      &mapping));
+  EXPECT_FALSE(mapping.empty());
+  EXPECT_EQ(expected, mapping);
+}
+
+}  // namespace devtools_goma
diff --git a/client/histogram.cc b/client/histogram.cc
new file mode 100644
index 0000000..17973a3
--- /dev/null
+++ b/client/histogram.cc
@@ -0,0 +1,188 @@
+/*BINFMTCXX: -DTEST -L ./glog-0.3.1/.libs/ -lglog
+ */
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <algorithm>
+#include <iomanip>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include "compiler_specific.h"
+#include "glog/logging.h"
+#include "histogram.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_stats.pb.h"
+MSVC_POP_WARNING()
+
+using std::string;
+
+namespace devtools_goma {
+
+const int64_t kGraphWidth = 50;
+
+void Histogram::SetName(const string& name) {
+  name_ = name;
+}
+
+void Histogram::SetLogBase(float logbase) {
+  CHECK_EQ(count_, 0) << name_ << ": SetLogBase must be called before Add";
+  logbase_ = logbase;
+}
+
+void Histogram::Reset() {
+  buckets_.clear();
+  min_max_is_set_ = false;
+  count_ = 0;
+  sum_ = 0;
+  sum_of_squares_ = 0;
+}
+
+void Histogram::Add(int64_t value) {
+  buckets_[DetermineBucket(value)]++;
+  if (!min_max_is_set_) {
+    min_max_is_set_ = true;
+    min_ = max_ = value;
+  } else {
+    if (value < min_) min_ = value;
+    if (value > max_) max_ = value;
+  }
+  count_++;
+  sum_ += value;
+  sum_of_squares_ += ((double)value * value);
+}
+
+string Histogram::ManySharps(int64_t n) const {
+  CHECK_GE(n, 0) << name_;
+  CHECK_LE(n, kGraphWidth) << name_;
+
+  string s(n, '#');
+  return s;
+}
+
+string Histogram::DebugString() const {
+  CHECK_GT(count_, 0)
+      << name_
+      << ": Histogram cannot be output unless there is at least one value";
+
+  std::stringstream ss;
+  ss << name_ << ": "
+     << " Basic stats: count: " << count_
+     << " sum: " << sum_
+     << " min: " << min_
+     << " max: " << max_
+     << " mean: " << mean()
+     << " stddev: " << standard_deviation()
+     << "\n";
+  int64_t largest = buckets_.begin()->second;
+  for (const auto& it : buckets_) {
+    if (largest < it.second) largest = it.second;
+  }
+
+  std::vector<std::pair<std::pair<std::string, std::string>,
+                        std::string>> label_values;
+
+  for (int i = DetermineBucket(min_); i <= DetermineBucket(max_); ++i) {
+    std::stringstream min_key_ss;
+    std::stringstream max_key_ss;
+    std::stringstream value_ss;
+
+    min_key_ss << BucketValue(i);
+    max_key_ss << BucketValue(i + 1);
+
+    string min_key = min_key_ss.str();
+    string max_key = max_key_ss.str();
+
+    if (buckets_.find(i) != buckets_.end()) {
+      int64_t value = buckets_.find(i)->second;
+      value_ss << ManySharps(
+          static_cast<int64_t>(
+              static_cast<double>(kGraphWidth)
+              * static_cast<double>(value)
+              / static_cast<double>(largest)))
+         << value;
+    }
+
+    label_values.push_back(std::make_pair(std::make_pair(min_key, max_key),
+                                          value_ss.str()));
+  }
+
+  size_t longest_min_label = 0;
+  size_t longest_max_label = 0;
+  if (!label_values.empty()) {
+    longest_min_label = label_values.back().first.first.size();
+    longest_max_label = label_values.back().first.second.size();
+  }
+
+  for (const auto& entry : label_values) {
+    ss << "["
+       << std::setw(longest_min_label) << std::right << entry.first.first
+       << "-"
+       << std::setw(longest_max_label) << std::right << entry.first.second
+       << "]: "
+       << std::left << entry.second << '\n';
+  }
+
+  return ss.str();
+}
+
+int Histogram::DetermineBucket(int64_t value) const {
+  if (value < 0) {
+    LOG(WARNING) << "value is negative:" << value << " for " << name_;
+    value = 0;
+  }
+
+  if (value < 1)
+    return 0;
+
+  int bucket = static_cast<int>(log(static_cast<double>(value)) /
+                                log(static_cast<double>(logbase_))) + 1;
+  if (bucket < 0) {
+    bucket = 0;
+  }
+  return bucket;
+}
+
+int64_t Histogram::BucketValue(int n) const {
+  if (n < 0) {
+    LOG(WARNING) << "value is negative:" << n << " for " << name_;
+    n = 0;
+  }
+
+  if (n == 0)
+    return 0;
+
+  return static_cast<int64_t>(pow(logbase_, n - 1));
+}
+
+int64_t Histogram::standard_deviation() const {
+  double squared_mean = (double)sum_ * sum_ / count_ / count_;
+  return static_cast<int64_t>(sqrt(sum_of_squares_ / count_ - squared_mean));
+}
+
+void Histogram::DumpToProto(DistributionProto* dist) {
+  dist->set_count(count_);
+  dist->set_sum(sum_);
+  dist->set_sum_of_squares(sum_of_squares_);
+  dist->set_min(min_);
+  dist->set_max(max_);
+
+  dist->set_logbase(logbase_);
+  for (int i = 0; i <= DetermineBucket(max_); ++i) {
+    if (i < DetermineBucket(min_)) {
+      dist->add_bucket_value(0);
+      continue;
+    }
+    const auto& pos = buckets_.find(i);
+    if (pos == buckets_.end()) {
+      dist->add_bucket_value(0);
+      continue;
+    }
+    dist->add_bucket_value(buckets_.find(i)->second);
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/histogram.h b/client/histogram.h
new file mode 100644
index 0000000..41654b2
--- /dev/null
+++ b/client/histogram.h
@@ -0,0 +1,71 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_HISTOGRAM_H_
+#define DEVTOOLS_GOMA_CLIENT_HISTOGRAM_H_
+
+#include <assert.h>
+#include <math.h>
+#include <stdint.h>
+
+#include <map>
+#include <string>
+
+using std::string;
+
+namespace devtools_goma {
+
+class DistributionProto;
+
+class Histogram {
+ public:
+  // Construct an object which holds occurrence frequence information
+  // in interval buckets of log(logbase). Default value for log base is 2.
+  Histogram() : logbase_(2),
+        min_max_is_set_(false),
+        count_(0), sum_(0), sum_of_squares_(0) {}
+  ~Histogram() {}
+
+  void SetName(const string& name);
+
+  // Resets statistics values.
+  // It preserves logbase_.
+  void Reset();
+
+  void Add(int64_t value);
+
+  // Log base can be modified before adding the first value.
+  void SetLogBase(float logbase);
+
+  int DetermineBucket(int64_t value) const;
+  int64_t BucketValue(int n) const;
+  int64_t min() const { return min_; }
+  int64_t max() const { return max_; }
+  int64_t sum() const { return sum_; }
+  double sum_of_squares() const { return sum_of_squares_; }
+  int64_t standard_deviation() const;
+  int64_t mean() const { return sum_ / count_; }
+  int64_t count() const { return count_; }
+  const string& name() const { return name_; }
+  string DebugString() const;
+  void DumpToProto(DistributionProto* dist);
+
+ private:
+  string ManySharps(int64_t n) const;
+
+  string name_;
+  float logbase_;
+  std::map<int, int64_t> buckets_;
+  bool min_max_is_set_;
+  int64_t min_;
+  int64_t max_;
+  int64_t count_;
+  int64_t sum_;
+  double sum_of_squares_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_HISTOGRAM_H_
diff --git a/client/histogram_unittest.cc b/client/histogram_unittest.cc
new file mode 100644
index 0000000..27c29ba
--- /dev/null
+++ b/client/histogram_unittest.cc
@@ -0,0 +1,47 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "histogram.h"
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+TEST(HistogramTest, DetermineBucket)
+{
+  Histogram histogram;
+
+  EXPECT_EQ(0, histogram.DetermineBucket(0));
+  EXPECT_EQ(1, histogram.DetermineBucket(1));
+  EXPECT_EQ(2, histogram.DetermineBucket(2));
+  EXPECT_EQ(2, histogram.DetermineBucket(3));
+  EXPECT_EQ(3, histogram.DetermineBucket(4));
+  EXPECT_EQ(3, histogram.DetermineBucket(5));
+  EXPECT_EQ(3, histogram.DetermineBucket(6));
+  EXPECT_EQ(3, histogram.DetermineBucket(7));
+  EXPECT_EQ(4, histogram.DetermineBucket(8));
+  EXPECT_EQ(4, histogram.DetermineBucket(9));
+
+  // Negative value will be treated as 0.
+  EXPECT_EQ(0, histogram.DetermineBucket(-1));
+  EXPECT_EQ(0, histogram.DetermineBucket(-100));
+}
+
+TEST(HistogramTest, BucketValue)
+{
+  Histogram histogram;
+
+  EXPECT_EQ(0, histogram.BucketValue(0));
+  EXPECT_EQ(1, histogram.BucketValue(1));
+  EXPECT_EQ(2, histogram.BucketValue(2));
+  EXPECT_EQ(4, histogram.BucketValue(3));
+  EXPECT_EQ(8, histogram.BucketValue(4));
+  EXPECT_EQ(16, histogram.BucketValue(5));
+
+  EXPECT_EQ(0, histogram.BucketValue(-1));
+  EXPECT_EQ(0, histogram.BucketValue(-100));
+}
+
+} // namespace devtools_goma
diff --git a/client/http.cc b/client/http.cc
new file mode 100644
index 0000000..2c7518b
--- /dev/null
+++ b/client/http.cc
@@ -0,0 +1,2115 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "http.h"
+
+#ifndef _WIN32
+#include <fcntl.h>
+#ifdef ENABLE_LZMA
+#include <lzma.h>
+#endif
+#include <netdb.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#endif
+#include <time.h>
+#include <zlib.h>
+
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <string>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_proxy_info.h"
+#include "compiler_specific.h"
+#include "compress_util.h"
+#include "descriptor.h"
+#include "env_flags.h"
+#include "fileflag.h"
+#include "glog/logging.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "google/protobuf/message.h"
+#include "google/protobuf/io/gzip_stream.h"
+#include "google/protobuf/io/zero_copy_stream.h"
+#include "google/protobuf/io/zero_copy_stream_impl.h"
+#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
+MSVC_POP_WARNING()
+#include "histogram.h"
+#include "ioutil.h"
+#include "oauth2.h"
+#include "oauth2_token.h"
+#include "openssl_engine.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_stats.pb.h"
+MSVC_POP_WARNING()
+#include "scoped_fd.h"
+#include "simple_timer.h"
+#include "socket_descriptor.h"
+#include "socket_factory.h"
+#include "socket_pool.h"
+#include "string_piece.h"
+#include "tls_descriptor.h"
+#include "worker_thread_manager.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// Note: we can't use X-Goma-Content-Length, because
+// FindContentLengthAndBodyOffset in file.cc would confuse with Content-Length.
+const char HttpClient::kGomaLength[] = "X-Goma-Length: ";
+
+const int kDefaultThrottleTimeoutMilliSec = 600 * 1000;
+const int kDefaultTimeoutSec = 900;
+
+const size_t kMaxTrafficHistory = 120U;
+
+const int kMaxQPS = 700;
+
+const int kRampUpDurationSec = 600; // 10 min
+
+const int kMaxConnectionFailure = 5;
+
+const int kDefaultErrorThresholdPercent = 30;
+
+static bool IsFatalNetworkErrorCode(int status_code) {
+  return status_code == 302 || status_code == 401 || status_code == 403;
+}
+
+static time_t CalculateEnabledFrom(int status_code, time_t enabled_from) {
+  const int kMinDisableDurationSec = 600;  // 10 min
+  const int kMaxDisableDurationSec = 1200; // 20 min
+
+  if (IsFatalNetworkErrorCode(status_code)) {
+    // status code for blocking by dos server.
+    time_t t = time(nullptr) + kMinDisableDurationSec +
+        (rand() % (kMaxDisableDurationSec - kMinDisableDurationSec));
+    if (t > enabled_from) {
+      LOG(INFO) << "status=" << status_code
+                << " extend enabled from: " << enabled_from
+                << " to " << t;
+      enabled_from = t;
+    }
+    return enabled_from;
+  }
+  // status_code == 200; success
+  // status_code == 204; no response
+  // status_code == 400; bad request (app error)
+  // status_code == 408; timeout
+  // status_code == 415; unsupported media type (disable compression)
+  // status_code == 5xx; server error
+  if ((status_code / 100) != 2) {
+    // no update of enabled_from for other than 2xx.
+    return enabled_from;
+  }
+  if (enabled_from == 0) {
+    return 0;
+  }
+  time_t now = time(nullptr);
+  if (now < enabled_from) {
+    // ramp up from now to now+kRampUpDurationSec.
+    LOG(INFO) << "got 200 respose in enabled_from=" << enabled_from
+              << " start ramp up from " << now;
+    enabled_from = now;
+  } else if (enabled_from <= now && now < enabled_from + kRampUpDurationSec) {
+    // nothing to do in ramp up period:
+  } else if (enabled_from + kRampUpDurationSec <= now) {
+    LOG(INFO) << "got 200 response. finish ramp up period";
+    enabled_from = 0;
+  }
+  return enabled_from;
+}
+
+HttpClient::Options::Options()
+    : dest_port(0), proxy_port(0),
+      capture_response_header(false),
+      use_ssl(false), ssl_crl_max_valid_duration(-1),
+      socket_read_timeout_sec(1.0),
+      min_retry_backoff_ms(500), max_retry_backoff_ms(5000),
+      fail_fast(false), network_error_margin(0),
+      network_error_threshold_percent(kDefaultErrorThresholdPercent),
+      allow_throttle(true), reuse_connection(true),
+      force_connect_errorneous_address(false) {
+}
+
+bool HttpClient::Options::InitFromURL(StringPiece url) {
+  size_t pos = url.find("://");
+  if (pos == string::npos) {
+    return false;
+  }
+  StringPiece scheme = url.substr(0, pos);
+  if (scheme == "http") {
+    use_ssl = false;
+    dest_port = 80;
+  } else if (scheme == "https") {
+    use_ssl = true;
+    dest_port = 443;
+  } else {
+    return false;
+  }
+  StringPiece hostport = url.substr(pos + 3);
+  pos = hostport.find("/");
+  if (pos != string::npos) {
+    url_path_prefix = string(hostport.substr(pos));
+    hostport = hostport.substr(0, pos);
+  } else {
+    url_path_prefix = "/";
+  }
+  pos = hostport.find(":");
+  if (pos != string::npos) {
+    dest_host_name = string(hostport.substr(0, pos));
+    dest_port = atoi(string(hostport.substr(pos+1)).c_str());
+  } else {
+    dest_host_name = string(hostport);
+  }
+  return true;
+}
+
+string HttpClient::Options::SocketHost() const {
+  if (!proxy_host_name.empty()) {
+    return proxy_host_name;
+  }
+  return dest_host_name;
+}
+
+int HttpClient::Options::SocketPort() const {
+  if (!proxy_host_name.empty()) {
+    return proxy_port;
+  }
+  return dest_port;
+}
+
+string HttpClient::Options::RequestURL(StringPiece path) const {
+  std::ostringstream url;
+  if ((dest_host_name != SocketHost()
+       || dest_port != SocketPort())
+      && !use_ssl) {
+    // without SSL and with proxy, send request with absolute-form.
+    url << "http://" << dest_host_name << ':' << dest_port;
+  }
+  url << url_path_prefix << path;
+  url << extra_params;
+  return url.str();
+}
+
+string HttpClient::Options::Host() const {
+  if (!http_host_name.empty()) {
+    return http_host_name;
+  }
+  if ((dest_host_name != SocketHost()
+       || dest_port != SocketPort())
+      && use_ssl) {
+    return dest_host_name;
+  }
+  return SocketHost();
+}
+
+string HttpClient::Options::DebugString() const {
+  std::ostringstream ss;
+  ss << "dest=" << dest_host_name << ":" << dest_port;
+  if (!http_host_name.empty())
+    ss << " http_host=" << http_host_name;
+  if (!url_path_prefix.empty())
+    ss << " url_path_prefix=" << url_path_prefix;
+  if (!proxy_host_name.empty())
+    ss << " proxy=" << proxy_host_name << ":" << proxy_port;
+  if (!extra_params.empty())
+    ss << " extra=" << extra_params;
+  if (!authorization.empty())
+    ss << " authorization:enabled";
+  if (!cookie.empty())
+    ss << " cookie=" << cookie;
+  if (oauth2_config.enabled())
+    ss << " oauth2:enabled";
+  if (!service_account_json_filename.empty())
+    ss << " service_account:" << service_account_json_filename;
+  if (!gce_service_account.empty())
+    ss << " gce_service_account:" << gce_service_account;
+  if (capture_response_header)
+    ss << " capture_response_header";
+  if (use_ssl)
+    ss << " use_ssl";
+  if (!ssl_extra_cert.empty())
+    ss << " ssl_extra_cert=" << ssl_extra_cert;
+  if (!ssl_extra_cert_data.empty())
+    ss << " ssl_extra_cert_data:set";
+  ss << " socket_read_timeout_sec=" << socket_read_timeout_sec;
+  ss << " retry_backoff_ms="
+     << min_retry_backoff_ms << " .. " << max_retry_backoff_ms;
+  if (fail_fast) {
+    ss << " fail_fast";
+  }
+  return ss.str();
+}
+
+void HttpClient::Options::ClearAuthConfig() {
+  gce_service_account.clear();
+  service_account_json_filename.clear();
+  oauth2_config.clear();
+  luci_context_auth.clear();
+}
+
+// This object is created when asynchronously waiting for
+// HttpClient. The object is deleted by RunCallback,DoCallback.
+class HttpClient::Task {
+ public:
+  Task(HttpClient* client,
+       const HttpClient::Request* req,
+       HttpClient::Response* resp,
+       Status* status,
+       WorkerThreadManager* wm,
+       OneshotClosure* callback)
+      : client_(client),
+        req_(req),
+        resp_(resp),
+        status_(status),
+        wm_(wm),
+        thread_id_(wm_->GetCurrentThreadId()),
+        d_(nullptr),
+        active_(false),
+        close_state_(HttpClient::ERROR_CLOSE),
+        auth_status_(OK),
+        request_message_written_(0),
+        is_ping_(status_->trace_id == "ping"),
+        callback_(callback) {
+    if (status_->timeout_secs.empty())
+      status_->timeout_secs.push_back(kDefaultTimeoutSec);
+    client_->IncNumActive();
+    resp_->SetRequestPath(req_->request_path());
+    resp_->SetTraceId(status_->trace_id);
+  }
+
+  void Start() {
+    CHECK(!status_->finished);
+    CHECK(!active_);
+    if (client_->failnow()) {
+      status_->enabled = false;
+      RunCallback(FAIL, "http fail now");
+      return;
+    }
+    // TODO: rethink the way refreshing OAuth2 access token.
+    // Refreshing OAuth2 access token is a bit complex operation, and
+    // difficult to track the behavior.  Refactoring must be needed.
+    if (auth_status_ == NEED_REFRESH) {
+      const string& authorization = client_->GetOAuth2Authorization();
+      if (authorization.empty()) {
+        RunCallback(FAIL, "authorization not available");
+        return;
+      }
+      cloned_req_ = req_->Clone();
+      cloned_req_->SetAuthorization(authorization);
+      auth_status_ = OK;
+      req_ = cloned_req_.get();
+      LOG(INFO) << status_->trace_id
+                << " cloned HttpClient::Request to set authorization.";
+    }
+    if (client_->ShouldRefreshOAuth2AccessToken()) {
+      LOG(INFO) << status_->trace_id
+                << " authorization is not ready, going to run after refresh.";
+      auth_status_ = NEED_REFRESH;
+      client_->RunAfterOAuth2AccessTokenGetReady(
+          wm_->GetCurrentThreadId(),
+          NewCallback(this, &HttpClient::Task::Start));
+      return;
+    }
+    int throttle_time = timer_.GetInMs();
+    status_->throttle_time += throttle_time;
+    int backoff = client_->TryStart();
+    if (backoff > 0) {
+      if (status_->num_throttled == 0) {  // only increment first time.
+        DCHECK_EQ(Status::INIT, status_->state);
+        status_->state = Status::PENDING;
+        client_->IncNumPending();
+      }
+      ++status_->num_throttled;
+      if (status_->throttle_time > kDefaultThrottleTimeoutMilliSec) {
+        LOG(WARNING) << status_->trace_id
+                     << " Timeout in throttled. throttle_time="
+                     << status_->throttle_time;
+        RunCallback(ERR_TIMEOUT, "Time-out in throttled");
+        return;
+      }
+      LOG(WARNING) << status_->trace_id
+                   << " Throttled backoff=" << backoff << "msec"
+                   << " remaining="
+                   << (kDefaultThrottleTimeoutMilliSec - status_->throttle_time)
+                   << "ms";
+      // TODO: might need to cancel this on shutdown?
+      wm_->RunDelayedClosureInThread(
+          FROM_HERE,
+          wm_->GetCurrentThreadId(),
+          backoff,
+          NewCallback(this, &HttpClient::Task::Start));
+      timer_.Start();
+      return;
+    }
+    LOG_IF(INFO, status_->num_throttled > 0)
+        << status_->trace_id << " http: Start throttled req. "
+        << status_->num_throttled
+        << " time=" << status_->throttle_time
+        << " [last throttle=" << throttle_time << "]";
+    if (status_->timeout_secs.empty()) {
+      LOG(WARNING) << status_->trace_id
+                   << " Time-out in connect";
+      RunCallback(ERR_TIMEOUT, "Time-out in connect");
+      return;
+    }
+
+    // TODO: make connect async.
+    d_ = client_->NewDescriptor(is_ping_);
+    if (d_ == nullptr) {
+      ++status_->num_connect_failed;
+      // Note we do not retry if handling ping because its scenario
+      // does not match what we expect.
+      //
+      // As written below, this code's goal is mitigating temporary
+      // network failure while several requests are on-flight concurrently.
+      // Since we usually run only one ping request, it does not meet
+      // the scenario below.
+      if (is_ping_ || status_->num_connect_failed > kMaxConnectionFailure) {
+        RunCallback(FAIL, "Can't establish connection to server");
+        return;
+      }
+      // Note that goal of this backoff and retry is mitigating a temporary
+      // network failure suggested in: b/36575944#comment6
+      // The scenario like:
+      //   1. send request A
+      //   2. send request B
+      //   3. got error as response A or B
+      //   4. send request C, need to connect -> fail. no address available
+      //   5. got success as response A or B
+      // (Considered elapsed time from Step 3 to Step 5 is expected to be small,
+      //  say less than 1 second)
+      //
+      // Since we expect the address is marked as success again in Step 5.
+      // we do not retry for long time. (e.g. 60 seconds to error address
+      // become available in socket_pool.)
+      int start_backoff = client_->GetRandomizeBackoffTimeInMs();
+      LOG(WARNING) << status_->trace_id
+                   << " Can't establish connection to server"
+                   << " retry after backoff=" << start_backoff;
+      // TODO: might need to cancel this on shutdown?
+      wm_->RunDelayedClosureInThread(
+          FROM_HERE,
+          wm_->GetCurrentThreadId(),
+          start_backoff,
+          NewCallback(this, &HttpClient::Task::Start));
+      timer_.Start();
+      return;
+    }
+    if (status_->state == Status::PENDING) {
+      client_->DecNumPending();
+    }
+    DCHECK(status_->state == Status::INIT || status_->state == Status::PENDING)
+        << status_->trace_id << " state=" << status_->state;
+    status_->state = Status::SENDING_REQUEST;
+
+    resp_->Reset();
+    active_ = true;
+    status_->connect_success = true;
+    double t = static_cast<double>(status_->timeout_secs.front());
+    status_->timeout_secs.pop_front();
+    timer_.Start();
+    request_message_ = req_->CreateMessage();
+    status_->req_build_time = timer_.GetInMs();
+    status_->req_size = request_message_.size();
+    VLOG(1) << status_->trace_id << " request\n"
+            << request_message_;
+
+    d_->NotifyWhenWritable(
+        NewPermanentCallback(this, &HttpClient::Task::DoWrite));
+    d_->NotifyWhenTimedout(
+        t, NewCallback(this, &HttpClient::Task::DoTimeout));
+    timer_.Start();
+  }
+
+ private:
+  enum AuthorizationStatus {
+    OK,
+    NEED_REFRESH,
+  };
+  ~Task() {
+    CHECK(!active_);
+  }
+
+  void DoWrite() {
+    if (!active_) {
+      LOG(WARNING) << "Already finished?";
+      RunCallback(FAIL, "Writable, but already inactive");
+      return;
+    }
+    if (client_->failnow()) {
+      status_->enabled = false;
+      RunCallback(FAIL, "http fail now");
+      return;
+    }
+    CHECK(d_);
+    VLOG(7) << "DoWrite " << d_;
+    int n = d_->Write(
+        request_message_.data() + request_message_written_,
+        request_message_.size() - request_message_written_);
+    VLOG(3) << status_->trace_id << " DoWrite "
+            << (request_message_.size() - request_message_written_)
+            << " -> " << n;
+    if (n < 0 && d_->NeedRetry())
+      return;
+    if (n <= 0) {
+      LOG(WARNING) << status_->trace_id
+                   << " Write failed " << n
+                   << " err=" << d_->GetLastErrorMessage();
+      std::ostringstream err_message;
+      err_message << status_->trace_id
+                  << " Write failed ret=" << n
+                  << " @" << request_message_written_
+                  << " of " << request_message_.size()
+                  << " : " << d_->GetLastErrorMessage();
+      RunCallback(FAIL, err_message.str());
+      return;
+    }
+    request_message_written_ += n;
+    client_->IncWriteByte(n);
+    if (request_message_written_ == request_message_.size()) {
+      // Request has been sent.
+      DCHECK_EQ(Status::SENDING_REQUEST, status_->state);
+      status_->state = Status::REQUEST_SENT;
+      d_->StopWrite();
+      wm_->RunClosureInThread(
+          FROM_HERE,
+          thread_id_,
+          NewCallback(this, &HttpClient::Task::DoRequestDone),
+          WorkerThreadManager::PRIORITY_IMMEDIATE);
+    }
+  }
+
+  void DoRead() {
+    if (!active_) {
+      LOG(WARNING) << "Already finished?";
+      RunCallback(FAIL, "Readable, but already inactive");
+      return;
+    }
+    if (client_->failnow()) {
+      status_->enabled = false;
+      RunCallback(FAIL, "http fail now");
+      return;
+    }
+    if (status_->state != Status::RECEIVING_RESPONSE) {
+      DCHECK_EQ(Status::REQUEST_SENT, status_->state);
+      status_->state = Status::RECEIVING_RESPONSE;
+    }
+    CHECK(d_);
+    char* buf;
+    int buf_size;
+    resp_->Buffer(&buf, &buf_size);
+    int r = d_->Read(buf, buf_size);
+    VLOG(7) << "DoRead " << d_ << " buf_size=" << buf_size << " r=" << r;
+    if (r < 0 && d_->NeedRetry()) {
+      return;
+    }
+
+    if (r < 0) {  // error
+      LOG(WARNING) << status_->trace_id
+                   << " Read failed " << r
+                   << " err=" << d_->GetLastErrorMessage();
+      std::ostringstream err_message;
+      err_message << status_->trace_id
+                  << " Read failed ret=" << r
+                  << " @" << resp_->len()
+                  << " of " << resp_->buffer_size()
+                  << " : " << d_->GetLastErrorMessage();
+      err_message << " : received=" << resp_->Header();
+      RunCallback(FAIL, err_message.str());
+      return;
+    }
+    if (status_->wait_time == 0 && resp_->len() == 0) {
+      status_->wait_time = timer_.GetInMs();
+      timer_.Start();
+      d_->ChangeTimeout(client_->options().socket_read_timeout_sec);
+    }
+    client_->IncReadByte(r);
+    if (resp_->Recv(r)) {
+      VLOG(1) << status_->trace_id << " response\n"
+              << resp_->Header();
+      status_->resp_recv_time = timer_.GetInMs();
+      timer_.Start();
+      resp_->Parse();
+      status_->resp_parse_time = timer_.GetInMs();
+      status_->resp_size = resp_->len();
+      if (resp_->status_code() != 200 || resp_->result() == FAIL) {
+        DCHECK_EQ(close_state_, HttpClient::ERROR_CLOSE);
+        CaptureResponseHeader();
+      } else {
+        DCHECK_EQ(resp_->result(), OK);
+        DCHECK_EQ(resp_->status_code(), 200);
+
+        if (resp_->HasConnectionClose() ||
+            !client_->options().reuse_connection) {
+          close_state_ = HttpClient::NORMAL_CLOSE;
+        } else {
+          close_state_ = HttpClient::NO_CLOSE;
+        }
+      }
+      status_->http_return_code = resp_->status_code();
+      DCHECK_EQ(Status::RECEIVING_RESPONSE, status_->state);
+      status_->state = Status::RESPONSE_RECEIVED;
+      RunCallback(resp_->result(), resp_->err_message());
+      return;
+    }
+    if (client_->options().capture_response_header &&
+        resp_->HasHeader()) {
+      CaptureResponseHeader();
+    }
+    d_->ChangeTimeout(
+        client_->options().socket_read_timeout_sec +
+        client_->EstimatedRecvTime(resp_->remaining()));
+  }
+
+  void DoTimeout() {
+    if (!active_) {
+      LOG(WARNING) << "Already finished?";
+      return;
+    }
+    if (client_->failnow()) {
+      status_->enabled = false;
+      RunCallback(FAIL, "http fail now");
+      return;
+    }
+    if (status_->timeout_secs.empty()) {
+      std::ostringstream err_message;
+      err_message << "Timed out: ";
+      if (status_->req_send_time == 0 && !request_message_.empty()) {
+        err_message << "sending request "
+                    << request_message_written_
+                    << " of " << request_message_.size()
+                    << " " << timer_.GetInMs() << "ms";
+      } else if (resp_->len() == 0) {
+        err_message << "waiting response "
+                    << " " << timer_.GetInMs() << "ms";
+      } else {
+        err_message << "receiving response "
+                    << resp_->len()
+                    << " of " << resp_->buffer_size()
+                    << " " << timer_.GetInMs() << "ms";
+      }
+      LOG(WARNING) << status_->trace_id << " " << err_message.str();
+      RunCallback(ERR_TIMEOUT, err_message.str());
+      return;
+    }
+    d_->StopRead();
+    d_->StopWrite();
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        thread_id_,
+        NewCallback(this, &HttpClient::Task::DoRetry),
+        WorkerThreadManager::PRIORITY_MED);
+  }
+
+  void RunCallback(int err, const string& err_message) {
+    VLOG(2) << status_->trace_id
+            << " RunCallback"
+            << " err=" << err
+            << " msg=" << err_message;
+    if (d_) {
+      d_->StopRead();
+      d_->StopWrite();
+    }
+    active_ = false;
+    status_->err = err;
+    status_->err_message = err_message;
+
+    if (status_->state == Status::PENDING) {
+      client_->DecNumPending();
+    }
+
+    // We MUST use lower priority than Descriptor to ensure the TLS write
+    // closure stopped.
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        thread_id_,
+        NewCallback(this, &HttpClient::Task::DoCallback),
+        WorkerThreadManager::PRIORITY_MED);
+  }
+
+  void DoRetry() {
+    LOG(INFO) << status_->trace_id << " DoRetry ";
+    if (!active_)
+      return;
+    Descriptor* d = d_;
+    d_ = nullptr;
+    client_->ReleaseDescriptor(d, HttpClient::ERROR_CLOSE);
+    active_ = false;
+    request_message_.clear();
+    request_message_written_ = 0;
+    resp_->Reset();
+    ++status_->num_retry;
+    Start();
+  }
+
+  void DoRequestDone() {
+    VLOG(3) << status_->trace_id << " DoWrite " << " done";
+    if (!active_)
+      return;
+    status_->req_send_time = timer_.GetInMs();
+    request_message_.clear();
+    d_->ClearWritable();
+    d_->NotifyWhenReadable(
+        NewPermanentCallback(this, &HttpClient::Task::DoRead));
+    timer_.Start();
+  }
+
+  void DoCallback() {
+    VLOG(3) << status_->trace_id << " DoCallback"
+            << " close_state=" << close_state_;
+    CHECK(!active_);
+    Descriptor* d = d_;
+    d_ = nullptr;
+    // once callback_ is called, it is not safe to touch status_.
+    status_->finished = true;
+    // Since status for ping would be updated in
+    // UpdateHealthStatusMessageForPing, we do not need to update it here.
+    // (b/26701852)
+    if (!is_ping_) {
+      client_->UpdateStats(*status_);
+    } else {
+      LOG(INFO) << "We will not update status for ping.";
+    }
+    OneshotClosure* callback = callback_;
+    callback_ = nullptr;
+    if (callback)
+      callback->Run();
+    client_->ReleaseDescriptor(d, close_state_);
+    client_->DecNumActive();
+    delete this;
+  }
+
+  void CaptureResponseHeader() {
+    if (!status_->response_header.empty())
+      return;
+    status_->response_header = string(resp_->Header());
+  }
+
+  HttpClient* client_;
+  const HttpClient::Request* req_;
+  std::unique_ptr<HttpClient::Request> cloned_req_;
+  HttpClient::Response* resp_;
+  Status* status_;
+  WorkerThreadManager* wm_;
+  WorkerThreadManager::ThreadId thread_id_;
+  Descriptor* d_;
+
+  bool active_;
+  HttpClient::ConnectionCloseState close_state_;
+  AuthorizationStatus auth_status_;
+
+  string request_message_;
+  size_t request_message_written_;
+
+  const bool is_ping_;
+
+  SimpleTimer timer_;
+
+  // Callback that is called when RPC is received and has completed.
+  OneshotClosure* callback_;
+
+  DISALLOW_COPY_AND_ASSIGN(Task);
+};
+
+HttpClient::Status::Status()
+    : state(Status::INIT),
+      timeout_should_be_http_error(true),
+      connect_success(false),
+      finished(false),
+      err(0),
+      enabled(true),
+      http_return_code(0),
+      req_size(0),
+      resp_size(0),
+      raw_req_size(0),
+      raw_resp_size(0),
+      throttle_time(0),
+      pending_time(0),
+      req_build_time(0),
+      req_send_time(0),
+      wait_time(0),
+      resp_recv_time(0),
+      resp_parse_time(0),
+      num_retry(0),
+      num_throttled(0),
+      num_connect_failed(0) {
+}
+
+string HttpClient::Status::DebugString() const {
+  std::ostringstream ss;
+  ss << "state=" << state
+     << " timeout_should_be_http_error=" << timeout_should_be_http_error
+     << " connect_success=" << connect_success
+     << " finished=" << finished
+     << " err=" << err
+     << " http_return_code=" << http_return_code
+     << " req_size=" << req_size
+     << " resp_size=" << resp_size
+     << " raw_req_size=" << raw_req_size
+     << " raw_resp_size=" << raw_resp_size
+     << " throttle_time=" << throttle_time
+     << " pending_time=" << pending_time
+     << " req_build_time=" << req_build_time
+     << " req_send_time=" << req_send_time
+     << " wait_time=" << wait_time
+     << " resp_recv_time=" << resp_recv_time
+     << " resp_parse_time=" << resp_parse_time
+     << " num_retry=" << num_retry
+     << " num_throttled=" << num_throttled
+     << " num_connect_failed=" << num_connect_failed;
+  return ss.str();
+}
+
+HttpClient::TrafficStat::TrafficStat()
+    : read_byte(0), write_byte(0), query(0), http_err(0) {
+}
+
+/* static */
+std::unique_ptr<SocketFactory> HttpClient::NewSocketFactoryFromOptions(
+    const Options& options) {
+  return std::unique_ptr<SocketFactory>(
+      new SocketPool(options.SocketHost(), options.SocketPort()));
+}
+
+std::unique_ptr<TLSEngineFactory> HttpClient::NewTLSEngineFactoryFromOptions(
+    const Options& options) {
+  if (options.use_ssl) {
+    std::unique_ptr<OpenSSLEngineCache> ssl_engine_fact(new OpenSSLEngineCache);
+    if (!options.ssl_extra_cert.empty())
+      ssl_engine_fact->AddCertificateFromFile(options.ssl_extra_cert);
+    if (!options.ssl_extra_cert_data.empty())
+      ssl_engine_fact->AddCertificateFromString(options.ssl_extra_cert_data);
+    ssl_engine_fact->SetHostname(options.dest_host_name);
+    if (!options.proxy_host_name.empty()) {
+      ssl_engine_fact->SetProxy(options.proxy_host_name, options.proxy_port);
+    }
+    ssl_engine_fact->SetCRLMaxValidDurationInSeconds(
+        options.ssl_crl_max_valid_duration);
+    return std::unique_ptr<TLSEngineFactory>(std::move(ssl_engine_fact));
+  }
+  return nullptr;
+}
+
+HttpClient::HttpClient(std::unique_ptr<SocketFactory> socket_factory,
+                       std::unique_ptr<TLSEngineFactory> tls_engine_factory,
+                       const Options& options,
+                       WorkerThreadManager* wm)
+    : options_(options),
+      tls_engine_factory_(std::move(tls_engine_factory)),
+      socket_pool_(std::move(socket_factory)),
+      wm_(wm),
+      cond_(&mu_),
+      health_status_("initializing"),
+      shutting_down_(false),
+      bad_status_num_in_recent_http_(0),
+      network_error_status_(options.network_error_margin),
+      num_query_(0),
+      num_active_(0),
+      total_pending_(0),
+      peak_pending_(0),
+      num_pending_(0),
+      num_http_retry_(0),
+      num_http_timeout_(0),
+      num_http_error_(0),
+      total_write_byte_(0),
+      total_read_byte_(0),
+      num_writable_(0),
+      num_readable_(0),
+      read_size_(new Histogram),
+      write_size_(new Histogram),
+      total_resp_byte_(0),
+      total_resp_time_(0),
+      ping_http_return_code_(-1),
+      ping_round_trip_time_ms_(-1),
+      traffic_history_closure_id_(kInvalidPeriodicClosureId),
+      retry_backoff_ms_(options.min_retry_backoff_ms),
+      enabled_from_(0),
+      num_network_error_(0),
+      num_network_recovered_(0) {
+  LOG(INFO) << options_.DebugString();
+  CHECK_GT(retry_backoff_ms_, 0);
+  CHECK_LT(options.min_retry_backoff_ms, options.max_retry_backoff_ms);
+  read_size_->SetName("read size distribution");
+  write_size_->SetName("write size distribution");
+  if (!options_.authorization.empty()) {
+    CHECK(options_.authorization.find_first_of("\r\n") == string::npos)
+        << "authorization must not contain CR LF:" << options_.authorization;
+  }
+  if (!options_.cookie.empty()) {
+    CHECK(options_.cookie.find_first_of("\r\n") == string::npos)
+        << "cookie must not contain CR LF:" << options_.cookie;
+  }
+  LOG_IF(ERROR, !socket_pool_->IsInitialized())
+      << "socket pool is not initialized yet.";
+  traffic_history_.push_back(TrafficStat());
+
+  traffic_history_closure_id_ = wm_->RegisterPeriodicClosure(
+      FROM_HERE, 1000, NewPermanentCallback(
+          this, &HttpClient::UpdateTrafficHistory));
+
+  if (options_.use_ssl) {
+    DCHECK(tls_engine_factory_.get() != nullptr);
+    socket_pool_->SetObserver(tls_engine_factory_.get());
+  }
+  HttpClient::Options oauth2_options;
+  oauth2_options.proxy_host_name = options.proxy_host_name;
+  oauth2_options.proxy_port = options.proxy_port;
+  oauth2_options.gce_service_account = options.gce_service_account;
+  oauth2_options.service_account_json_filename =
+      options.service_account_json_filename;
+  oauth2_options.oauth2_config = options.oauth2_config;
+  oauth2_options.luci_context_auth = options.luci_context_auth;
+  oauth_refresh_task_ = OAuth2AccessTokenRefreshTask::New(
+      wm_, oauth2_options);
+}
+
+HttpClient::~HttpClient() {
+  {
+    AUTOLOCK(lock, &mu_);
+    shutting_down_ = true;
+    LOG(INFO) << "wait all tasks num_active=" << num_active_;
+    while (num_active_ > 0)
+      cond_.Wait();
+  }
+  if (oauth_refresh_task_.get()) {
+    oauth_refresh_task_->Shutdown();
+    oauth_refresh_task_->Wait();
+  }
+  if (traffic_history_closure_id_ != kInvalidPeriodicClosureId) {
+    wm_->UnregisterPeriodicClosure(traffic_history_closure_id_);
+    traffic_history_closure_id_ = kInvalidPeriodicClosureId;
+  }
+  LOG(INFO) << "HttpClient terminated.";
+}
+
+void HttpClient::InitHttpRequest(
+    Request* req, const string& method, const string& path) const {
+  req->Init(method, path, options_);
+  const string& auth = GetOAuth2Authorization();
+  if (!auth.empty()) {
+    req->SetAuthorization(auth);
+    LOG_IF(WARNING, !options_.authorization.empty())
+        << "authorization option is given but ignored.";
+  }
+}
+
+void HttpClient::Do(const Request* req, Response* resp, Status* status) {
+  DCHECK(status);
+  DCHECK(wm_);
+  DoAsync(req, resp, status, nullptr);
+  Wait(status);
+}
+
+void HttpClient::DoAsync(
+    const Request* req, Response* resp,
+    Status* status, OneshotClosure* callback) {
+  if (failnow()) {
+    status->enabled = false;
+    status->connect_success = false;
+    status->finished = true;
+    status->err = FAIL;
+    status->err_message = "http disabled";
+    status->http_return_code = 403;
+    // once callback_ is called, it is not safe to touch status.
+    if (callback)
+      callback->Run();
+    return;
+  }
+
+  DCHECK(wm_) << "There isn't any worker thread to send to";
+  Task* task = new Task(this, req, resp, status, wm_, callback);
+  task->Start();
+  return;
+}
+
+void HttpClient::Wait(Status* status) {
+  while (!status->finished) {
+    CHECK(wm_->Dispatch());
+  }
+}
+
+void HttpClient::Shutdown() {
+  {
+    AUTOLOCK(lock, &mu_);
+    LOG(INFO) << "shutdown";
+    shutting_down_ = true;
+    health_status_ = "shutting down";
+  }
+  if (oauth_refresh_task_.get()) {
+    oauth_refresh_task_->Shutdown();
+  }
+}
+
+bool HttpClient::shutting_down() const {
+  AUTOLOCK(lock, &mu_);
+  return shutting_down_;
+}
+
+Descriptor* HttpClient::NewDescriptor(bool may_retry) {
+  ScopedSocket fd(socket_pool_->NewSocket());
+  if (!fd.valid() && (may_retry || options_.force_connect_errorneous_address)) {
+    socket_pool_->ClearErrors();
+    fd = socket_pool_->NewSocket();
+    if (fd.valid()) {
+      LOG(INFO) << "connection retry success after clearing errors";
+    } else {
+      LOG(WARNING) << "connection retry failed after clearing errors";
+    }
+  }
+  if (!fd.valid()) {
+    {
+      AUTOLOCK(lock, &mu_);
+      NetworkErrorDetectedUnlocked();
+    }
+    return nullptr;
+  }
+  if (options_.use_ssl) {
+    TLSEngine *engine = tls_engine_factory_->NewTLSEngine(fd.get());
+    TLSDescriptor::Options tls_desc_options;
+    if (!options_.proxy_host_name.empty()) {
+      tls_desc_options.use_proxy = true;
+      tls_desc_options.dest_host_name = options_.dest_host_name;
+      tls_desc_options.dest_port = options_.dest_port;
+    }
+    TLSDescriptor* d = new TLSDescriptor(
+        wm_->RegisterSocketDescriptor(std::move(fd),
+                                      WorkerThreadManager::PRIORITY_MED),
+        engine, tls_desc_options, wm_);
+    d->Init();
+    return d;
+  } else {
+    return wm_->RegisterSocketDescriptor(std::move(fd),
+                                         WorkerThreadManager::PRIORITY_MED);
+  }
+}
+
+void HttpClient::ReleaseDescriptor(
+    Descriptor* d, ConnectionCloseState close_state) {
+  if (d == nullptr)
+    return;
+
+  bool reuse_socket = (close_state == NO_CLOSE) && d->CanReuse();
+  SocketDescriptor* sd = d->socket_descriptor();
+  DCHECK(!reuse_socket || !sd->IsClosed())
+    << "should not reuse the socket if it has already been closed."
+    << " fd=" << sd->fd()
+    << " reuse_socket=" << reuse_socket
+    << " close_state=" << close_state
+    << " is_closed=" << sd->IsClosed()
+    << " can_reuse=" << d->CanReuse();
+  if (options_.use_ssl) {
+    TLSDescriptor* tls_desc = static_cast<TLSDescriptor*>(d);
+    delete tls_desc;
+  }
+  ScopedSocket fd(wm_->DeleteSocketDescriptor(sd));
+  VLOG(3) << "Release fd=" << fd.get()
+          << " reuse_socket=" << reuse_socket
+          << " close_state=" << close_state;
+  if (fd.valid()) {
+    if (reuse_socket) {
+      socket_pool_->ReleaseSocket(std::move(fd));
+    } else {
+      socket_pool_->CloseSocket(std::move(fd), close_state == ERROR_CLOSE);
+    }
+  }
+}
+
+bool HttpClient::failnow() const {
+  AUTOLOCK(lock, &mu_);
+  if (shutting_down_) {
+    return true;
+  }
+  if (enabled_from_ == 0) {
+    return false;
+  }
+  return time(nullptr) < enabled_from_;
+}
+
+int HttpClient::ramp_up() const {
+  AUTOLOCK(lock, &mu_);
+  if (enabled_from_ == 0) {
+    return 100;
+  }
+  time_t now = time(nullptr);
+  if (now < enabled_from_) {
+    return 0;
+  }
+  return std::min<int>(100, (now - enabled_from_) * 100 / kRampUpDurationSec);
+}
+
+string HttpClient::GetHealthStatusMessage() const {
+  AUTOLOCK(lock, &mu_);
+  return health_status_;
+}
+
+void HttpClient::UpdateStatusCodeHistoryUnlocked() {
+  const int kHTTPStatusCodeHistoryHoldingSec = 3;
+  time_t now = time(nullptr);
+
+  while (!recent_http_status_code_.empty() &&
+         recent_http_status_code_.front().first <
+         now - kHTTPStatusCodeHistoryHoldingSec) {
+    if (recent_http_status_code_.front().second != 200) {
+      --bad_status_num_in_recent_http_;
+    }
+    recent_http_status_code_.pop_front();
+  }
+}
+
+void HttpClient::AddStatusCodeHistoryUnlocked(int status_code) {
+  UpdateStatusCodeHistoryUnlocked();
+
+  time_t now = time(nullptr);
+  if (status_code != 200) {
+    ++bad_status_num_in_recent_http_;
+  }
+  recent_http_status_code_.emplace_back(now, status_code);
+}
+
+bool HttpClient::IsHealthyRecently() {
+  AUTOLOCK(lock, &mu_);
+
+  UpdateStatusCodeHistoryUnlocked();
+
+  return bad_status_num_in_recent_http_ <=
+      recent_http_status_code_.size() *
+      options_.network_error_threshold_percent / 100;
+}
+
+bool HttpClient::IsHealthy() const {
+  AUTOLOCK(lock, &mu_);
+  return health_status_ == "ok";
+}
+
+string HttpClient::GetAccount() {
+  if (oauth_refresh_task_.get() == nullptr) {
+    return "";
+  }
+  return oauth_refresh_task_->GetAccount();
+}
+
+bool HttpClient::GetOAuth2Config(OAuth2Config* config) const {
+  if (oauth_refresh_task_.get() == nullptr) {
+    return false;
+  }
+  return oauth_refresh_task_->GetOAuth2Config(config);
+}
+
+bool HttpClient::SetOAuth2Config(const OAuth2Config& config) {
+  if (oauth_refresh_task_.get() == nullptr) {
+    return false;
+  }
+  if (oauth_refresh_task_->SetOAuth2Config(config)) {
+    AUTOLOCK(lock, &mu_);
+    // if disabled by 401 error, could try now with new oauth2 config.
+    LOG(INFO) << "new oauth2 config: reset enabled_from_=" << enabled_from_
+              << " to 0";
+    enabled_from_ = 0;
+    return true;
+  }
+  return false;
+}
+
+string HttpClient::DebugString() const {
+  AUTOLOCK(lock, &mu_);
+
+  std::ostringstream ss;
+  ss << "Status:" << health_status_ << std::endl;
+  ss << "Remote host: " << socket_pool_->DestName();
+  if (!options_.url_path_prefix.empty()) {
+    ss << " " << options_.url_path_prefix;
+  }
+  if (!options_.extra_params.empty()) {
+    ss << ": " << options_.extra_params;
+  }
+  if (!options_.proxy_host_name.empty()) {
+    ss << " to "
+       << "http://" << options_.dest_host_name << ":" << options_.dest_port;
+  }
+  ss << std::endl;
+  ss << "User-Agent: " << kUserAgentString << std::endl;
+  ss << "SocketPool: " << socket_pool_->DebugString() << std::endl;
+  if (!options_.http_host_name.empty())
+    ss << "Host: " << options_.http_host_name << std::endl;
+  if (!options_.authorization.empty())
+    ss << "Authorization: enabled" << std::endl;
+  if (!options_.cookie.empty())
+    ss << "Cookie: " << options_.cookie << std::endl;
+  if (options_.oauth2_config.enabled()) {
+    ss << "OAuth2: enabled";
+    if (!options_.service_account_json_filename.empty())
+      ss << " service_account:" << options_.service_account_json_filename;
+    if (!options_.gce_service_account.empty())
+      ss << " gce service_account:" << options_.gce_service_account;
+    ss << std::endl;
+  }
+  ss << std::endl;
+  if (options_.capture_response_header)
+    ss << "Capture response header: enabled" << std::endl;
+
+  ss << std::endl;
+
+  ss << "http status:" << std::endl;
+  for (const auto& iter : num_http_status_code_) {
+    ss << " " << iter.first << ": " << iter.second
+       << " (" << (iter.second * 100.0 / num_query_) << "%)" << std::endl;
+  }
+  ss << " Retry: " << num_http_retry_;
+  if (num_query_ > 0)
+    ss << " (" << (num_http_retry_ * 100.0 / num_query_) << "%)";
+  ss << std::endl;
+  ss << " Timeout: " << num_http_timeout_;
+  if (num_query_ > 0)
+    ss << " (" << (num_http_timeout_ * 100.0 / num_query_) << "%)";
+  ss << std::endl;
+  ss << " Error: " << num_http_error_;
+  if (num_query_ > 0)
+    ss << " (" << (num_http_error_ * 100.0 / num_query_) << "%)";
+  ss << std::endl;
+  ss << " Pending: " << total_pending_;
+  if (num_query_ > 0)
+    ss << " (" << (total_pending_ * 100.0 / num_query_) << "%)";
+  ss << " peek " << peak_pending_;
+  ss << std::endl;
+
+  ss << std::endl;
+  ss << "Backoff: " << retry_backoff_ms_ << "msec" << std::endl;
+  if (enabled_from_ > 0) {
+    ss << "Disabled for " << (enabled_from_ - time(nullptr)) << " sec"
+       << std::endl;
+  }
+
+  ss << std::endl;
+  ss << "Write: " << total_write_byte_ << "bytes "
+     << num_writable_ << "calls" << std::endl;
+  ss << "Read: " << total_read_byte_ << "bytes "
+     << num_readable_ << "calls "
+     << "(" << total_resp_byte_ << "bytes in " << total_resp_time_ << "msec)";
+  ss << std::endl;
+  ss << std::endl;
+  ss << write_size_->DebugString() << std::endl;
+  ss << read_size_->DebugString() << std::endl;
+
+  ss << std::endl;
+  if (options_.use_ssl) {
+    ss << "SSL enabled" << std::endl;
+    ss << "Certificate(s) and CRLs:" << std::endl;
+    ss << tls_engine_factory_->GetCertsInfo();
+  } else {
+    ss << "SSL disabled" << std::endl;
+  }
+  ss << std::endl;
+
+  ss << "Network: " << std::endl
+     << " Error Count: " << num_network_error_ << std::endl
+     << " Recovered Count: " << num_network_recovered_ << std::endl;
+
+  return ss.str();
+}
+
+void HttpClient::DumpToJson(Json::Value* json) const {
+  AUTOLOCK(lock, &mu_);
+  (*json)["health_status"] = health_status_;
+  if (!options_.http_host_name.empty()) {
+    (*json)["http_host_name"] = options_.http_host_name;
+  }
+  if (!options_.url_path_prefix.empty()) {
+    (*json)["url_path_prefix"] = options_.url_path_prefix;
+  }
+  if (!options_.extra_params.empty()) {
+    (*json)["extra_params"] = options_.extra_params;
+  }
+  (*json)["user_agent"] = kUserAgentString;
+  (*json)["socket_pool"] = socket_pool_->DebugString();
+  (*json)["authorization"] = (
+      options_.authorization.empty() ? "none" : "enabled");
+  (*json)["cookie"] = options_.cookie;
+  (*json)["oauth2"] = (!options_.oauth2_config.enabled() ? "none" : "enabled");
+  (*json)["capture_response_header"] = (
+      options_.capture_response_header ? "enabled" : "disabled");
+  (*json)["ssl"] = (options_.use_ssl ? "enabled" : "disabled");
+  if (!options_.ssl_extra_cert.empty()) {
+    (*json)["ssl_extra_cert"] = options_.ssl_extra_cert;
+  }
+  if (!options_.ssl_extra_cert_data.empty()) {
+    (*json)["ssl_extra_cert_data"] = "set";
+  }
+  (*json)["socket_read_timeout_sec"] = options_.socket_read_timeout_sec;
+  (*json)["num_query"] = num_query_;
+  (*json)["num_active"] = num_active_;
+  (*json)["num_http_retry"] = num_http_retry_;
+  (*json)["num_http_timeout"] = num_http_timeout_;
+  (*json)["num_http_error"] = num_http_error_;
+  (*json)["write_byte"] = Json::Int64(total_write_byte_);
+  (*json)["read_byte"] = Json::Int64(total_read_byte_);
+  (*json)["num_writable"] = Json::Int64(num_writable_);
+  (*json)["num_readable"] = Json::Int64(num_readable_);
+  (*json)["resp_byte"] = Json::Int64(total_resp_byte_);
+  (*json)["resp_time"] = Json::Int64(total_resp_time_);
+  {
+    TrafficHistory::const_reverse_iterator iter = traffic_history_.rbegin();
+    ++iter;
+    if (iter != traffic_history_.rend()) {
+      (*json)["read_bps"] = iter->read_byte;
+      (*json)["write_bps"] = iter->write_byte;
+    } else {
+      (*json)["read_bps"] = 0;
+      (*json)["write_bps"] = 0;
+    }
+  }
+
+  double byte_max = 0.0;
+  double q_max = 0.0;
+  std::vector<double> read_value;
+  std::vector<double> write_value;
+  std::vector<double> qps;
+  std::vector<double> http_err;
+  for (size_t i = 0; i < kMaxTrafficHistory - traffic_history_.size(); ++i) {
+    read_value.push_back(-1.0);
+    write_value.push_back(-1.0);
+    qps.push_back(-1.0);
+    http_err.push_back(-1.0);
+  }
+  for (TrafficHistory::const_iterator iter = traffic_history_.begin();
+       iter != traffic_history_.end();
+       ++iter) {
+    byte_max = std::max<double>(iter->read_byte, byte_max);
+    read_value.push_back(static_cast<double>(iter->read_byte));
+    byte_max = std::max<double>(iter->write_byte, byte_max);
+    write_value.push_back(static_cast<double>(iter->write_byte));
+    q_max = std::max<double>(iter->query, q_max);
+    qps.push_back(static_cast<double>(iter->query));
+    q_max = std::max<double>(iter->http_err, q_max);
+    http_err.push_back(static_cast<double>(iter->http_err));
+  }
+  byte_max = byte_max * 1.1;
+  q_max = q_max * 1.1;
+
+}
+
+void HttpClient::DumpStatsToProto(HttpRPCStats* stats) const {
+  AUTOLOCK(lock, &mu_);
+  stats->set_ping_status_code(ping_http_return_code_);
+  stats->set_ping_round_trip_time_ms(ping_round_trip_time_ms_);
+  stats->set_query(num_query_);
+  stats->set_retry(num_http_retry_);
+  stats->set_timeout(num_http_timeout_);
+  stats->set_error(num_http_error_);
+  stats->set_network_error(num_network_error_);
+  stats->set_network_recovered(num_network_recovered_);
+  stats->set_current_pending(num_pending_);
+  stats->set_peak_pending(peak_pending_);
+  stats->set_total_pending(total_pending_);
+  for (const auto& iter : num_http_status_code_) {
+    HttpRPCStats_HttpStatus* http_status = stats->add_status_code();
+    http_status->set_status_code(iter.first);
+    http_status->set_count(iter.second);
+  }
+}
+
+int HttpClient::UpdateHealthStatusMessageForPing(const Status& status,
+                                                 int round_trip_time) {
+  LOG(INFO) << "Ping status:"
+            << " http_return_code=" << status.http_return_code
+            << " throttle_time=" << status.throttle_time
+            << " pending_time=" << status.pending_time
+            << " req_build_time=" << status.req_build_time
+            << " req_send_time=" << status.req_send_time
+            << " wait_time=" << status.wait_time
+            << " resp_recv_time=" << status.resp_recv_time
+            << " resp_parse_time=" << status.resp_parse_time
+            << " round_trip_time=" << round_trip_time;
+
+  AUTOLOCK(lock, &mu_);
+  AddStatusCodeHistoryUnlocked(status.http_return_code);
+
+  if (shutting_down_) {
+    health_status_ = "shutting down";
+    ping_http_return_code_ = 0;
+    return ping_http_return_code_;
+  }
+
+  // Under race condition of initial ping, good ping status could be
+  // overridden by bad ping status.  (b/26701852)
+  if (ping_http_return_code_ == 200 && status.http_return_code != 200) {
+    LOG(INFO) << "We do not update status with bad status."
+              << " ping_http_return_code_=" << ping_http_return_code_
+              << " status.http_return_code=" << status.http_return_code;
+    return ping_http_return_code_;
+  }
+  if (!status.finished) {
+    health_status_ = "error: ping no response";
+    ping_http_return_code_ = 408; // status timeout.
+    return ping_http_return_code_;
+  }
+  if (!status.connect_success) {
+    health_status_ = "error: failed to connect to backend servers";
+    ping_http_return_code_ = 0;
+    return ping_http_return_code_;
+  }
+  if (status.err == ERR_TIMEOUT) {
+    health_status_ = "error: timed out to send request to backend servers";
+    ping_http_return_code_ = 408;
+    return ping_http_return_code_;
+  }
+  ping_http_return_code_ = status.http_return_code;
+  if (round_trip_time > 0)
+    ping_round_trip_time_ms_ = round_trip_time;
+  const string running = options_.fail_fast ? "error:" : "running:";
+  if (status.http_return_code != 200) {
+    int status_code = status.http_return_code;
+    enabled_from_ =
+        CalculateEnabledFrom(status.http_return_code, enabled_from_);
+    if (IsFatalNetworkErrorCode(status.http_return_code)) {
+      NetworkErrorDetectedUnlocked();
+    }
+    if (status.http_return_code == 401) {
+      // TODO: make it error, so goma_ctl abort "start"?
+      health_status_ = running + " access to backend servers was rejected.";
+    } else if (status.http_return_code == 302
+               || status.http_return_code == 403) {
+      std::ostringstream ss;
+      ss << running << " access to backend servers was blocked:"
+         << status.http_return_code;
+      health_status_ = ss.str();
+    } else if (status.http_return_code == 0 && status.err < 0) {
+      health_status_ = running + " failed to send request to backend servers";
+      status_code = 500;
+    } else {
+      std::ostringstream ss;
+      ss << running << " access to backend servers was failed:"
+         << status.http_return_code;
+      health_status_ = ss.str();
+    }
+    return status_code;
+  }
+  health_status_ = "ok";
+  return status.http_return_code;
+}
+
+double HttpClient::EstimatedRecvTime(size_t bytes) {
+  AUTOLOCK(lock, &mu_);
+  double t = 0.0;
+  // total_resp_time_ is millisec.
+  if (total_resp_byte_ > 0) {
+    t += bytes * (static_cast<double>(total_resp_time_) /
+                 (1000.0 * total_resp_byte_));
+  }
+  return t;
+}
+
+/* static */
+int HttpClient::BackoffMsec(
+    const Options& options, int prev_backoff_msec, bool in_error) {
+  // Multiply factor used in chromium.
+  // URLRequestThrottlerEntry::kDefaultMultiplyFactor
+  // in net/url_request/url_request_throttler_entry.cc
+  const double kBackoffBase = 1.4;
+  CHECK_GT(prev_backoff_msec, 0);
+  double uncapped_backoff = static_cast<double>(prev_backoff_msec);
+  if (in_error) {
+    uncapped_backoff *= kBackoffBase;
+    return static_cast<int>(
+        std::min<double>(uncapped_backoff, options.max_retry_backoff_ms));
+  }
+  uncapped_backoff /= kBackoffBase;
+  return static_cast<int>(
+      std::max<double>(uncapped_backoff, options.min_retry_backoff_ms));
+}
+
+void HttpClient::UpdateBackoffUnlocked(bool in_error) {
+  const int orig_backoff = retry_backoff_ms_;
+  CHECK_GT(retry_backoff_ms_, 0);
+  retry_backoff_ms_ = BackoffMsec(options_, retry_backoff_ms_, in_error);
+  if (in_error) {
+    LOG(INFO) << "UpdateBackoff error "
+              << orig_backoff << " -> " << retry_backoff_ms_;
+  } else {
+    VLOG(2) << "UpdateBackoff ok "
+              << orig_backoff << " -> " << retry_backoff_ms_;
+  }
+}
+
+string HttpClient::GetOAuth2Authorization() const {
+  if (!oauth_refresh_task_.get()) {
+    return "";
+  }
+  // TODO: disable http on error.
+  return oauth_refresh_task_->GetAuthorization();
+}
+
+bool HttpClient::ShouldRefreshOAuth2AccessToken() const {
+  if (!oauth_refresh_task_.get()) {
+    return false;
+  }
+  return oauth_refresh_task_->ShouldRefresh();
+}
+
+void HttpClient::RunAfterOAuth2AccessTokenGetReady(
+    WorkerThreadManager::ThreadId thread_id, OneshotClosure* closure) {
+
+  CHECK(oauth_refresh_task_.get());
+  oauth_refresh_task_->RunAfterRefresh(thread_id, closure);
+}
+
+// Randomizes backoff by subtracting 40%, so it returns
+// [backoff_ms*0.6, backoff_ms].
+int RandomizeBackoff(int backoff_ms) {
+  const double kRandomizedRatio = 0.4;
+  int randomize_backoff = static_cast<int>(
+      static_cast<double>(backoff_ms) * kRandomizedRatio);
+  if (randomize_backoff == 0)
+    randomize_backoff = 1;
+  backoff_ms -= (rand() % (randomize_backoff + 1));
+  return std::max(1, backoff_ms);
+}
+
+int HttpClient::GetRandomizeBackoffTimeInMs() {
+  return RandomizeBackoff(retry_backoff_ms_);
+}
+
+int HttpClient::TryStart() {
+  AUTOLOCK(lock, &mu_);
+  if ((traffic_history_.back().http_err > 0 ||
+       traffic_history_.back().query >= kMaxQPS) &&
+      options_.allow_throttle) {
+    LOG(WARNING) << "Throttled. queries=" << traffic_history_.back().query
+                 << " err=" << traffic_history_.back().http_err
+                 << " retry_backoff_ms=" << retry_backoff_ms_;
+    return GetRandomizeBackoffTimeInMs();
+  }
+  ++num_query_;
+  ++traffic_history_.back().query;
+  return 0;
+}
+
+void HttpClient::IncNumActive() {
+  AUTOLOCK(lock, &mu_);
+  ++num_active_;
+}
+
+void HttpClient::DecNumActive() {
+  AUTOLOCK(lock, &mu_);
+  --num_active_;
+  DCHECK_GE(num_active_, 0);
+  if (num_active_ == 0)
+    cond_.Signal();
+}
+
+void HttpClient::WaitNoActive() {
+  AUTOLOCK(lock, &mu_);
+  while (num_active_ > 0)
+    cond_.Wait();
+}
+
+void HttpClient::IncNumPending() {
+  AUTOLOCK(lock, &mu_);
+  ++num_pending_;
+  ++total_pending_;
+  peak_pending_ = std::max(peak_pending_, num_pending_);
+}
+
+void HttpClient::DecNumPending() {
+  AUTOLOCK(lock, &mu_);
+  --num_pending_;
+  DCHECK_GE(num_pending_, 0);
+}
+
+void HttpClient::IncReadByte(int n) {
+  AUTOLOCK(lock, &mu_);
+  traffic_history_.back().read_byte += n;
+  total_read_byte_ += n;
+  ++num_readable_;
+  read_size_->Add(n);
+}
+
+void HttpClient::IncWriteByte(int n) {
+  AUTOLOCK(lock, &mu_);
+  traffic_history_.back().write_byte += n;
+  total_write_byte_ += n;
+  ++num_writable_;
+  write_size_->Add(n);
+}
+
+void HttpClient::UpdateStats(const Status& status) {
+  AUTOLOCK(lock, &mu_);
+
+  AddStatusCodeHistoryUnlocked(status.http_return_code);
+
+  ++num_http_status_code_[status.http_return_code];
+  if (status.err != OK) {
+    UpdateBackoffUnlocked(true);
+    if (status.err == ERR_TIMEOUT) {
+      ++num_http_timeout_;
+      if (status.timeout_should_be_http_error) {
+        ++traffic_history_.back().http_err;
+      }
+    } else {
+      ++num_http_error_;
+      if (status.err == FAIL && status.http_return_code == 408) {
+        if (status.timeout_should_be_http_error) {
+          ++traffic_history_.back().http_err;
+        }
+      } else {
+        ++traffic_history_.back().http_err;
+      }
+    }
+  } else {
+    UpdateBackoffUnlocked(false);
+  }
+  enabled_from_ = CalculateEnabledFrom(status.http_return_code, enabled_from_);
+  if (IsFatalNetworkErrorCode(status.http_return_code)) {
+    NetworkErrorDetectedUnlocked();
+  }
+  num_http_retry_ += status.num_retry;
+  total_resp_byte_ += status.resp_size;
+  total_resp_time_ += status.resp_recv_time;
+
+  // clear network_error_started_time_ in 2xx response.
+  if (status.http_return_code / 100 == 2) {
+    NetworkRecoveredUnlocked();
+  }
+}
+
+void HttpClient::UpdateTrafficHistory() {
+  AUTOLOCK(lock, &mu_);
+  if (!shutting_down_) {
+    if (traffic_history_.back().query > 0 && total_resp_time_ > 0) {
+      if (traffic_history_.back().http_err == 0) {
+        if (health_status_ != "ok") {
+          LOG(INFO) << "Update health status:" << health_status_ << " to ok";
+        }
+        health_status_ = "ok";
+      } else {
+        const string running = options_.fail_fast ? "error:" : "running:";
+        if (health_status_ == "ok") {
+          LOG(WARNING) << "Update health status: ok to "
+                       << running
+                       << " had some http errors from backend servers";
+        }
+        health_status_ = running + " had some http errors from backend servers";
+      }
+    }
+  }
+
+  traffic_history_.push_back(TrafficStat());
+  if (traffic_history_.size() >= kMaxTrafficHistory) {
+    traffic_history_.pop_front();
+  }
+}
+
+void HttpClient::NetworkErrorDetectedUnlocked() {
+  // set network error started time if it is not set.
+  time_t now = time(nullptr);
+
+  if (!network_error_status_.OnNetworkErrorDetected(now)) {
+    LOG(INFO) << "Network error continues from "
+              << network_error_status_.NetworkErrorStartedTime();
+    return;
+  }
+
+  LOG(INFO) << "Network error started: time=" << now;
+  ++num_network_error_;
+
+  if (monitor_.get())
+    monitor_->OnNetworkErrorDetected();
+}
+
+void HttpClient::NetworkRecoveredUnlocked() {
+  time_t now = time(nullptr);
+
+  time_t network_error_started_time =
+      network_error_status_.NetworkErrorStartedTime();
+
+  if (!network_error_status_.OnNetworkRecovered(now)) {
+    LOG_IF(INFO, network_error_started_time > 0)
+        << "Waiting network recover until "
+        << network_error_status_.NetworkErrorUntil();
+    return;
+  }
+
+  LOG(INFO) << "Network recovered"
+            << " started=" << network_error_started_time
+            << " recovered=" << now
+            << " duration=" << (now - network_error_started_time);
+  ++num_network_recovered_;
+  if (monitor_.get())
+    monitor_->OnNetworkRecovered();
+}
+
+void HttpClient::SetMonitor(
+    std::unique_ptr<HttpClient::NetworkErrorMonitor> monitor) {
+  AUTOLOCK(lock, &mu_);
+  monitor_ = std::move(monitor);
+}
+
+time_t HttpClient::NetworkErrorStartedTime() const {
+  AUTOLOCK(lock, &mu_);
+  return network_error_status_.NetworkErrorStartedTime();
+}
+
+HttpClient::Request::Request()
+    : content_type_("application/octet-stream") {
+}
+
+HttpClient::Request::~Request() {
+}
+
+void HttpClient::Request::Init(
+    const string& method, const string& path,
+    const HttpClient::Options& options) {
+  SetMethod(method);
+  SetRequestPath(options.RequestURL(path));
+  SetHost(options.Host());
+  if (!options.authorization.empty()) {
+    SetAuthorization(options.authorization);
+  }
+  if (!options.cookie.empty()) {
+    SetCookie(options.cookie);
+  }
+}
+
+void HttpClient::Request::SetMethod(const string& method) {
+  method_ = method;
+}
+
+void HttpClient::Request::SetRequestPath(const string& path) {
+  request_path_ = path;
+}
+
+void HttpClient::Request::SetHost(const string& host) {
+  host_ = host;
+}
+
+void HttpClient::Request::SetContentType(const string& content_type) {
+  content_type_ = content_type;
+}
+
+void HttpClient::Request::SetAuthorization(const string& authorization) {
+  authorization_ = authorization;
+}
+
+void HttpClient::Request::SetCookie(const string& cookie) {
+  cookie_ = cookie;
+}
+
+void HttpClient::Request::AddHeader(const string& key, const string& value) {
+  headers_.push_back(CreateHeader(key, value));
+}
+
+/* static */
+string HttpClient::Request::CreateHeader(
+    const string& key, const string& value) {
+  std::ostringstream line;
+  line << key << ": " << value;
+  return line.str();
+}
+
+string HttpClient::Request::BuildMessage(
+    const std::vector<string>& headers,
+    StringPiece body) const {
+  std::ostringstream msg;
+  msg << method_ << " " << request_path_ << " HTTP/1.1\r\n";
+  if (host_ != "") {
+    msg << "Host: " << host_ << "\r\n";
+  }
+  msg << "User-Agent: " << kUserAgentString << "\r\n";
+  msg << "Content-Type: " << content_type_ << "\r\n";
+  msg << "Content-Length: " << body.size() << "\r\n";
+  if (authorization_ != "") {
+    msg << "Authorization: " << authorization_ << "\r\n";
+  }
+  if (cookie_ != "") {
+    msg << "Cookie: " << cookie_ << "\r\n";
+  }
+  for (const auto& header : headers_) {
+    msg << header << "\r\n";
+  }
+  for (const auto& header : headers) {
+    msg << header << "\r\n";
+  }
+  msg << "\r\n";
+  msg << body;
+  return msg.str();
+}
+
+HttpRequest::HttpRequest() {
+}
+
+HttpRequest::~HttpRequest() {
+}
+
+void HttpRequest::SetBody(const string& body) {
+  body_ = body;
+}
+
+string HttpRequest::CreateMessage() const {
+  std::vector<string> headers;
+  return BuildMessage(headers, body_);
+}
+
+// GetConentEncoding rerpots EncodingType specified in header.
+// If it has X-Goma-Length: header, the value number will be stored in
+// dest_size.
+static EncodingType GetContentEncoding(StringPiece header, size_t* dest_size) {
+  EncodingType encoding = NO_ENCODING;
+  // TODO: Might be better to migrate to lib/compress_util
+  StringPiece::size_type content_encoding_header =
+      header.find("Content-Encoding: deflate\r\n");
+  if (content_encoding_header != StringPiece::npos) {
+    encoding = ENCODING_DEFLATE;
+  } else {
+#ifdef ENABLE_LZMA
+    content_encoding_header = header.find("Content-Encoding: lzma2\r\n");
+    if (content_encoding_header != StringPiece::npos) {
+      encoding = ENCODING_LZMA2;
+    } else {
+      return NO_ENCODING;
+    }
+#else
+    return NO_ENCODING;
+#endif
+  }
+  StringPiece::size_type goma_content_length_header =
+      header.find(HttpClient::kGomaLength);
+  if (goma_content_length_header != StringPiece::npos) {
+    *dest_size =
+        atoi(header.data() + goma_content_length_header +
+             strlen(HttpClient::kGomaLength));
+  }
+  return encoding;
+}
+
+HttpClient::Response::Response()
+    : result_(FAIL),
+      len_(0),
+      body_offset_(0),
+      content_length_(string::npos),
+      is_chunked_(false),
+      remaining_(0),
+      status_code_(0) {
+}
+
+HttpClient::Response::~Response() {
+}
+
+void HttpClient::Response::SetRequestPath(const string& path) {
+  request_path_ = path;
+}
+
+void HttpClient::Response::SetTraceId(const string& trace_id) {
+  trace_id_ = trace_id;
+}
+
+void HttpClient::Response::Reset() {
+  result_ = FAIL;
+  len_ = 0;
+  body_offset_ = 0;
+  content_length_ = string::npos;
+  is_chunked_ = false;
+  remaining_ = 0;
+  status_code_ = 0;
+}
+
+bool HttpClient::Response::HasHeader() const {
+  return body_offset_ > 0 && (is_chunked_ || content_length_ != string::npos);
+}
+
+StringPiece HttpClient::Response::Header() const {
+  if (body_offset_ > 4) {
+    return StringPiece(buffer_.data(), body_offset_ - 4);
+  }
+  StringPiece::size_type header_size = buffer_.find("\r\n\r\n");
+  if (header_size == string::npos) {
+    header_size = len_;
+  }
+  return StringPiece(buffer_.data(), header_size);
+}
+
+void HttpClient::Response::Buffer(char** buf, int* buf_size) {
+  *buf_size = buffer_.size() - len_;
+  if (HasHeader() && content_length_ != string::npos) {
+    if (buffer_.size() < body_offset_ + content_length_) {
+      buffer_.resize(body_offset_ + content_length_);
+    }
+  } else if (*buf_size < kBufSize / 2) {
+    buffer_.resize(buffer_.size() + kBufSize);
+  }
+  *buf = &buffer_[len_];
+  *buf_size = buffer_.size() - len_;
+  CHECK_GT(*buf_size, 0)
+      << " response len=" << len_
+      << " size=" << buffer_.size()
+      << " body_offset=" << body_offset_
+      << " content_length=" << content_length_
+      << " is_chunked=" << is_chunked_;
+}
+
+bool HttpClient::Response::Recv(int r) {
+  bool has_header = HasHeader();
+  len_ += r;
+  StringPiece resp(buffer_.data(), len_);
+  if (!has_header && !ParseHttpResponse(resp, &status_code_, &body_offset_,
+                                        &content_length_,
+                                        &is_chunked_)) {
+    // still reading header.
+    if (r == 0) {
+      LOG(WARNING) << trace_id_ <<
+        " not received a header but connection closed by a peer.";
+      err_message_ = "connection closed before receiving a header.";
+      result_ = FAIL;
+      body_offset_ = len_;
+      return true;
+    }
+    return false;
+  }
+  VLOG(2) << "header ready " << status_code_
+          << " offset=" << body_offset_
+          << " content_length=" << content_length_;
+  // Apiary returns 204 No Content for SaveLog.
+  if (status_code_ == 204 && body_offset_ == len_) {
+    // Go to next step quickly since Status 204 has nothing to parse.
+    result_ = OK;
+    return true;
+  } else if (status_code_ != 200) {
+    // heder found and error code.
+    LOG(WARNING) << trace_id_ << " read "
+                 << " http=" << status_code_
+                 << " path=" << request_path_
+                 << " Details:" << resp;
+    std::ostringstream err;
+    err << "Got HTTP error:" << status_code_;
+    err_message_ = err.str();
+    result_ = FAIL;
+    return true;
+  }
+  if (!is_chunked_ && content_length_ == string::npos) {
+    // no content-length
+    VLOG(3) << trace_id_ << " no content-length."
+            << " We should read until EOF."
+            << " r=" << r;
+    if (r == 0) {
+      VLOG(2) << trace_id_ << " ok r == 0, can finish.";
+      chunks_.clear();
+      chunks_.push_back(
+          StringPiece(buffer_.data() + body_offset_, len_ - body_offset_));
+      return true;
+    }
+    return false;
+  }
+  DCHECK_GT(body_offset_, 0U);
+  if (is_chunked_) {
+    if (!ParseChunkedBody(resp, body_offset_,
+                          &remaining_,
+                          &chunks_)) {
+      // not fully received yet.
+      VLOG(2) << "at least remaining " << remaining_;
+      if (r == 0) {
+        LOG(WARNING) << trace_id_ <<
+                     " connection closed before receiving all chunks.";
+        err_message_ = "connection closed before receiving all chunks.";
+        result_ = FAIL;
+        return true;
+      }
+      return false;
+    }
+    if (remaining_ != 0) {
+      LOG(WARNING) << trace_id_ << " broken chunk tranfer coding";
+      err_message_ = "broken chunk tranfer coding";
+      result_ = FAIL;
+      return true;
+    }
+    return true;
+  }
+
+  DCHECK_NE(content_length_, string::npos);
+  if (len_ < body_offset_ + content_length_) {
+    // not fully received yet.
+    remaining_ = (body_offset_ + content_length_ - len_);
+    VLOG(2) << "remaining " << remaining_;
+    if (r == 0) {
+      LOG(WARNING) << trace_id_ <<
+        " connection closed before receiving all data.";
+      err_message_ = "connection closed before receiving all data.";
+      result_ = FAIL;
+      return true;
+    }
+    return false;
+  }
+
+  LOG_IF(ERROR, r == 0) << trace_id_
+                        << " not expect to see r==0 for this."
+                        << " r=" << r
+                        << " len=" << len_
+                        << " body_offset_=" << body_offset_
+                        << " content_length_=" << content_length_;
+  chunks_.clear();
+  chunks_.push_back(
+      StringPiece(buffer_.data() + body_offset_, content_length_));
+  return true;
+}
+
+bool HttpClient::Response::HasConnectionClose() const {
+  return Header().find("Connection: close\r\n") != StringPiece::npos;
+}
+
+class ChunkedInputStream {
+ public:
+  explicit ChunkedInputStream(const std::vector<StringPiece>& chunks)
+      : size_(0) {
+    for (const auto& chunk : chunks) {
+      inputs_.push_back(
+          new google::protobuf::io::ArrayInputStream(
+              chunk.data(), chunk.size()));
+      size_ += chunk.size();
+    }
+  }
+  ~ChunkedInputStream() {
+    for (size_t i = 0; i < inputs_.size(); ++i) {
+      delete inputs_[i];
+    }
+  }
+
+  std::unique_ptr<google::protobuf::io::ZeroCopyInputStream> stream() const {
+    return std::unique_ptr<google::protobuf::io::ZeroCopyInputStream>(
+        new google::protobuf::io::ConcatenatingInputStream(
+            &inputs_[0], inputs_.size()));
+  }
+
+  size_t size() const { return size_; }
+
+ private:
+  std::vector<google::protobuf::io::ZeroCopyInputStream*> inputs_;
+  size_t size_;
+  DISALLOW_COPY_AND_ASSIGN(ChunkedInputStream);
+};
+
+void HttpClient::Response::Parse() {
+  if (result_ == OK) {
+    return;
+  }
+  if (!err_message_.empty()) {
+    return;
+  }
+
+  string lzma_buf;
+  ChunkedInputStream chunk_stream(chunks_);
+  std::unique_ptr<google::protobuf::io::ZeroCopyInputStream> input(
+      chunk_stream.stream());
+  std::unique_ptr<google::protobuf::io::ZeroCopyInputStream> zlib_header;
+  std::unique_ptr<google::protobuf::io::ZeroCopyInputStream> zlib_content;
+  google::protobuf::io::ZeroCopyInputStream* zlib_streams[2] =
+      {nullptr, nullptr};
+  std::unique_ptr<google::protobuf::io::ZeroCopyInputStream> sub_stream;
+  size_t content_size = chunk_stream.size();
+  EncodingType encoding = GetContentEncoding(Header(), &content_size);
+  if (encoding == ENCODING_DEFLATE) {
+    // see chrome/src/net/base/gzip_filter.cc Insert ZlibHeader.
+    static const char kZlibHeader[2] = {0x78, 0x01};
+    zlib_header.reset(
+        new google::protobuf::io::ArrayInputStream(kZlibHeader, 2));
+    zlib_content = std::move(input);
+    zlib_streams[0] = zlib_header.get();
+    zlib_streams[1] = zlib_content.get();
+    sub_stream.reset(
+        new google::protobuf::io::ConcatenatingInputStream(zlib_streams, 2));
+    input.reset(
+        new google::protobuf::io::GzipInputStream(
+            sub_stream.get(),
+            google::protobuf::io::GzipInputStream::ZLIB));
+  } else if (encoding == ENCODING_LZMA2) {
+#ifdef ENABLE_LZMA
+    // TODO: We might want Lzma2InputStream
+    lzma_stream lzma = LZMA_STREAM_INIT;
+    lzma_ret r =
+        lzma_stream_decoder(&lzma, lzma_easy_decoder_memusage(9), 0);
+    if (r == LZMA_OK) {
+      const string parsed_raw_body = CombineChunks(chunks_);
+      lzma_buf.reserve(content_size);
+      if (ReadAllLZMAStream(parsed_raw_body, &lzma, &lzma_buf)) {
+        input.reset(
+            new google::protobuf::io::ArrayInputStream(
+                &lzma_buf[0], lzma_buf.size()));
+      } else {
+        LOG(WARNING) << trace_id_ << " Failed to uncompress lzma2 stream";
+        input.reset();
+      }
+    } else {
+      LOG(WARNING) << trace_id_
+                   << " Failed to initialize lzma2 decoder r=" << r;
+      input.reset();
+    }
+    lzma_end(&lzma);
+#else
+    LOG(ERROR) << trace_id_ << " lzma is not supported";
+#endif
+  }
+  if (input.get() == nullptr) {
+    LOG(WARNING) << trace_id_ << "Decode response failed";
+    err_message_ = "Decode response failed";
+    result_ = FAIL;
+    return;
+  }
+  ParseBody(input.get());
+}
+
+HttpResponse::HttpResponse() {
+}
+
+HttpResponse::~HttpResponse() {
+}
+
+void HttpResponse::ParseBody(google::protobuf::io::ZeroCopyInputStream* input) {
+  std::ostringstream ss;
+  const void* buffer;
+  int size;
+  while (input->Next(&buffer, &size)) {
+    ss << string(static_cast<const char*>(buffer), size);
+  }
+  parsed_body_ = ss.str();
+  result_ = OK;
+}
+
+StringPiece HttpResponse::Body() const {
+  return parsed_body_;
+}
+
+bool HttpClient::NetworkErrorStatus::OnNetworkErrorDetected(
+    time_t now) {
+  if (error_started_time_ > 0) {
+    error_until_ = now + error_recover_margin_;
+    return false;
+  }
+
+  error_started_time_ = now;
+  error_until_ = now + error_recover_margin_;
+
+  return true;
+}
+
+bool HttpClient::NetworkErrorStatus::OnNetworkRecovered(
+    time_t now) {
+  if (error_started_time_ == 0)
+    return false;
+
+  // We don't consider the network is recovered until error_until_.
+  if (now < error_until_) {
+    return false;
+  }
+
+  // Here, we consider the network error is really recovered.
+  error_started_time_ = 0;
+  error_until_ = 0;
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/http.h b/client/http.h
new file mode 100644
index 0000000..ff92e7c
--- /dev/null
+++ b/client/http.h
@@ -0,0 +1,610 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_HTTP_H_
+#define DEVTOOLS_GOMA_CLIENT_HTTP_H_
+
+#include <deque>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#ifndef _WIN32
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#else
+#include "socket_helper_win.h"
+#endif
+
+#include <json/json.h>
+
+#include "basictypes.h"
+#include "gtest/gtest_prod.h"
+#include "lockhelper.h"
+#include "luci_context.h"
+#include "oauth2.h"
+#include "string_piece.h"
+#include "thread_annotations.h"
+#include "tls_engine.h"
+#include "worker_thread_manager.h"
+
+using std::string;
+
+namespace google {
+namespace protobuf {
+namespace io {
+class ZeroCopyInputStream;
+}  // namespace io
+}  // namespace protobuf
+}  // namespace google
+
+namespace devtools_goma {
+
+class Descriptor;
+class Histogram;
+class HttpRequest;
+class HttpResponse;
+class HttpRPCStats;
+class OAuth2AccessTokenRefreshTask;
+class OneshotClosure;
+class SocketFactory;
+
+// HttpClient is a HTTP client.  It sends HttpRequest on Descriptor
+// generated by SocketFactory and TLSEngineFactory, and receives
+// the response in HttpResponse.
+class HttpClient {
+ public:
+  struct Options {
+    Options();
+    string dest_host_name;
+    int dest_port;
+    string proxy_host_name;
+    int proxy_port;
+    string extra_params;
+    string authorization;
+    string cookie;
+    bool capture_response_header;
+    string url_path_prefix;
+    string http_host_name;
+    bool use_ssl;
+    string ssl_extra_cert;
+    string ssl_extra_cert_data;
+    int ssl_crl_max_valid_duration;
+    double socket_read_timeout_sec;
+    int min_retry_backoff_ms;
+    int max_retry_backoff_ms;
+
+    OAuth2Config oauth2_config;
+    string gce_service_account;
+    string service_account_json_filename;
+    LuciContextAuth luci_context_auth;
+
+    bool fail_fast;
+    int network_error_margin;
+    int network_error_threshold_percent;
+
+    // Allows throttling if this is true.
+    bool allow_throttle;
+
+    bool reuse_connection;
+
+    // Clear the error record of socket_pool when failed to get socket from
+    // socket_pool, and retry connection.
+    bool force_connect_errorneous_address;
+
+    bool InitFromURL(StringPiece url);
+
+    string SocketHost() const;
+    int SocketPort() const;
+    string RequestURL(StringPiece path) const;
+    string Host() const;
+
+    string DebugString() const;
+    void ClearAuthConfig();
+  };
+
+  // Status is used for each HTTP transaction.
+  // Caller can specify
+  //  - timeout_should_be_http_error
+  //  - timeouts.
+  // The other fields are filled by HttpClient.
+  struct Status {
+    enum State {
+      // Running state. If failed in some step, State would be kept as-is.
+      // Then, caller of HttpClient can know where HttpClient failed.
+      INIT,
+      PENDING,
+      SENDING_REQUEST,
+      REQUEST_SENT,
+      RECEIVING_RESPONSE,
+      RESPONSE_RECEIVED,
+    };
+    Status();
+
+    State state;
+
+    // If true, timeout is treated as http error (default).
+    bool timeout_should_be_http_error;
+    std::deque<int> timeout_secs;
+
+    // Whether connect() was successful for this request.
+    bool connect_success;
+
+    // Whether RPC was finished or not.
+    bool finished;
+
+    // Result of RPC for CallWithAsync. OK=success, or error code.
+    int err;
+    string err_message;
+
+    // Become false if http is disabled with failnow().
+    bool enabled;
+
+    int http_return_code;
+    string response_header;
+
+    // size of message on http (maybe compressed).
+    size_t req_size;
+    size_t resp_size;
+
+    // size of serialized message (not compressed).
+    // for now, it only for proto messages on HttpRPC.
+    // TODO: set this for compressed test message or so.
+    size_t raw_req_size;
+    size_t raw_resp_size;
+
+    // in milliseconds.
+    int throttle_time;
+    int pending_time;
+    int req_build_time;
+    int req_send_time;
+    int wait_time;
+    int resp_recv_time;
+    int resp_parse_time;
+
+    int num_retry;
+    int num_throttled;
+    int num_connect_failed;
+
+    string trace_id;
+    string master_trace_id;  // master request in multi http rpc.
+
+    string DebugString() const;
+  };
+
+  enum ConnectionCloseState {
+    NO_CLOSE,
+    NORMAL_CLOSE,
+    ERROR_CLOSE,
+  };
+
+  // NetworkErrorMonitor can be attached to HttpClient.
+  // When network error is detected, or network is recovered,
+  // corresponding method will be called.
+  // These methods will be called with under mu_ is locked
+  // to be called in serial.
+  class NetworkErrorMonitor {
+   public:
+    virtual ~NetworkErrorMonitor() {}
+    // Called when http request was not succeeded.
+    virtual void OnNetworkErrorDetected() = 0;
+    // Called when http request was succeeded after network error started.
+    virtual void OnNetworkRecovered() = 0;
+  };
+
+  // Request is a request of HTTP transaction.
+  class Request {
+   public:
+    Request();
+    virtual ~Request();
+
+    void Init(const string& method, const string& path,
+              const Options& options);
+
+    void SetMethod(const string& method);
+    void SetRequestPath(const string& path);
+    const string& request_path() const { return request_path_; }
+    void SetHost(const string& host);
+    void SetContentType(const string& content_type);
+    void SetAuthorization(const string& authorization);
+    void SetCookie(const string& cookie);
+    void AddHeader(const string& key, const string& value);
+
+    // CreateMessage returns HTTP request message.
+    virtual string CreateMessage() const = 0;
+
+    // Clone returns clone of this Request.
+    virtual std::unique_ptr<Request> Clone() const = 0;
+
+   protected:
+    // CreateHeader creates a header line.
+    static string CreateHeader(const string& key, const string& value);
+
+    // BuildMessage creates HTTP request message with additional headers
+    // and body.
+    string BuildMessage(const std::vector<string>& headers,
+                        StringPiece body) const;
+
+   private:
+    string method_;
+    string request_path_;
+    string host_;
+    string content_type_;
+    string authorization_;
+    string cookie_;
+    std::vector<string> headers_;
+
+    DISALLOW_ASSIGN(Request);
+  };
+
+  // Response is a response of HTTP transaction.
+  class Response {
+   public:
+    Response();
+    virtual ~Response();
+
+    bool HasHeader() const;
+    StringPiece Header() const;
+
+    // HttpClient will use the following methods to receive HTTP response.
+    void SetRequestPath(const string& path);
+    void SetTraceId(const string& trace_id);
+    void Reset();
+
+    // Buffer returns a buffer pointer and buffer's size.
+    // Received data should be filled in buf[0..buf_size), and call
+    // Recv with number data received in the buffer.
+    void Buffer(char** buf, int* buf_size);
+
+    // Recv receives r bytes in the buffer specified by Buffer().
+    // Returns true if all HTTP response is received so ready to parse.
+    // Returns false if more data is needed to parse response.
+    bool Recv(int r);
+
+    // Parse parses a HTTP response message.
+    void Parse();
+
+    // Number of bytes already received.
+    size_t len() const { return len_; }
+
+    // Maximum buffer size at the moment.
+    // HttpResponse grows buffer size in Buffer if necessary.
+    size_t buffer_size() const { return buffer_.size(); }
+
+    // Remaining bytes for complete responses.
+    // remaining might be zero, if it is not yet known.
+    // Use Recv to check complete response has been received or not.
+    size_t remaining() const { return remaining_; }
+
+    // status_code reports HTTP status code.
+    int status_code() const { return status_code_; }
+
+    // result reports transaction results. OK or FAIL.
+    int result() const { return result_; }
+    const string& err_message() const { return err_message_; }
+
+    // represents whether response has 'Connection: close' header.
+    bool HasConnectionClose() const;
+
+   protected:
+    // ParseBody parses body.
+    // if error occured, updates result_, err_message_.
+    virtual void ParseBody(
+        google::protobuf::io::ZeroCopyInputStream* input) = 0;
+
+    int result_;
+    string err_message_;
+    string trace_id_;
+
+   private:
+    string request_path_;
+
+    string buffer_;  // whole buffer
+    size_t len_;     // received length in buffer_
+    size_t body_offset_;  // position to start response body in buffer_
+    size_t content_length_;  // content length specified in http response header
+    bool is_chunked_;  // chunked transfer encoding?
+    size_t remaining_;  // remaining bytes for full response.
+    std::vector<StringPiece> chunks_;
+
+    int status_code_;
+
+    DISALLOW_COPY_AND_ASSIGN(Response);
+  };
+
+
+  static std::unique_ptr<SocketFactory> NewSocketFactoryFromOptions(
+      const Options& options);
+  static std::unique_ptr<TLSEngineFactory> NewTLSEngineFactoryFromOptions(
+      const Options& options);
+
+  // HttpClient is a http client to a specific server.
+  // Takes ownership of socket_factory and tls_engine_factory.
+  // It doesn't take ownership of wm.
+  HttpClient(std::unique_ptr<SocketFactory> socket_factory,
+             std::unique_ptr<TLSEngineFactory> tls_engine_factory,
+             const Options& options,
+             WorkerThreadManager* wm);
+  ~HttpClient();
+
+  // Initializes Request for method and path.
+  void InitHttpRequest(
+      Request* req, const string& method, const string& path) const;
+
+  // Do performs a HTTP transaction.
+  // Caller have ownership of req, resp and status.
+  // This is synchronous call.
+  void Do(const Request* req, Response* resp, Status* status);
+
+  // DoAsync initiates a HTTP transaction.
+  // Caller have ownership of req, resp and status, until callback is called
+  // (if callback is not NULL) or status->finished becomes true (if callback
+  // is NULL).
+  void DoAsync(const Request* req, Response* resp,
+               Status* status,
+               OneshotClosure* callback);
+
+  // Wait waits for a HTTP transaction initiated by DoAsync with callback=NULL.
+  void Wait(Status* status);
+
+  // Shutdown the client. all on-the-fly requests will fail.
+  void Shutdown();
+  bool shutting_down() const;
+
+  // ramp_up return [0, 100].
+  // ramp_up == 0 means 0% of requests could be sent.
+  // ramp_up == 100 means 100% of requests could be sent.
+  // when !enabled(), it returns 0.
+  // when enabled_from_ == 0, it returns 100.
+  int ramp_up() const;
+
+  // IsHealthyRecently returns false if more than given percentage
+  // (via options_.network_error_threshold_percent) of http requests in
+  // last 3 seconds having status code other than 200.
+  bool IsHealthyRecently();
+  string GetHealthStatusMessage() const;
+  bool IsHealthy() const;
+
+  // Get email address to login with oauth2.
+  string GetAccount();
+  bool GetOAuth2Config(OAuth2Config* config) const;
+  bool SetOAuth2Config(const OAuth2Config& config);
+
+  string DebugString() const;
+
+  void DumpToJson(Json::Value* json) const;
+  void DumpStatsToProto(HttpRPCStats* stats) const;
+
+  // options used to construct this client.
+  // Note that oauth2_config might have been updated and differ from this one.
+  // Use GetOAuth2Config above.
+  const Options& options() const { return options_; }
+
+  // Calculate next backoff msec.
+  // prev_backoff_msec must be positive.
+  static int BackoffMsec(
+      const Options& option, int prev_backoff_msec, bool in_error);
+
+  // public for HttpRPC ping.
+  void IncNumActive();
+  void DecNumActive();
+  // Provided for test that checks socket_pool status.
+  // A test should wait all in-flight tasks land.
+  void WaitNoActive();
+
+  int UpdateHealthStatusMessageForPing(
+      const Status& status, int round_trip_time);
+
+  // NetworkErrorStartedTime return a time network error started.
+  // Returns 0 if no error occurred recently.
+  // The time will be set on fatal http error (302, 401, 403) and when
+  // no socket in socket pool is available to connect to the host.
+  // The time will be cleared when HttpClient get 2xx response.
+  time_t NetworkErrorStartedTime() const;
+
+  // Takes the ownership.
+  void SetMonitor(std::unique_ptr<NetworkErrorMonitor> monitor);
+
+  static const char kGomaLength[];
+
+ private:
+  class Task;
+  friend class Task;
+
+  struct TrafficStat {
+    TrafficStat();
+    int read_byte;
+    int write_byte;
+    int query;
+    int http_err;
+  };
+  typedef std::deque<TrafficStat> TrafficHistory;
+
+  // NetworkErrorStatus checks the network error is continued
+  // from the previous error or not.
+  // Thread-unsafe, must be guarded by mutex.
+  class NetworkErrorStatus {
+   public:
+    explicit NetworkErrorStatus(int margin)
+        : error_recover_margin_(margin),
+          error_started_time_(0),
+          error_until_(0) {}
+
+    // Returns the network error started time.
+    // 0 if network is not in the error state.
+    time_t NetworkErrorStartedTime() const { return error_started_time_; }
+    time_t NetworkErrorUntil() const { return error_until_; }
+
+    // Call this when the network access was error.
+    // Returns true if a new network error is detected.
+    // This will convert level trigger to edge trigger.
+    bool OnNetworkErrorDetected(time_t now);
+
+    // Call this when network access was not error.
+    // Even this called, we keep the error until |error_until_|.
+    // Returns true if the network is really recovered.
+    // This will convert level trigger to edge trigger.
+    bool OnNetworkRecovered(time_t now);
+
+   private:
+    const int error_recover_margin_;
+    // 0 if network is not in the error state. Otherwise, time when the network
+    // error has started.
+    time_t error_started_time_;
+    // Even we get the 2xx http status, we consider the network is still
+    // in the error state until this time.
+    time_t error_until_;
+  };
+
+  // |may_retry| is provided for initial ping.
+  // If |may_retry| is true, NewDescriptor may returns a descriptor of
+  // an address that caused an error in a previous connection.
+  Descriptor* NewDescriptor(bool may_retry);
+  void ReleaseDescriptor(Descriptor* d, ConnectionCloseState close_state);
+
+  double EstimatedRecvTime(size_t bytes);
+
+  string GetOAuth2Authorization() const;
+  bool ShouldRefreshOAuth2AccessToken() const;
+  void RunAfterOAuth2AccessTokenGetReady(
+      WorkerThreadManager::ThreadId thread_id,
+      OneshotClosure* callback);
+
+  void UpdateBackoffUnlocked(bool in_error) EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // Returns time to wait in the queue. If returns 0, no need to wait.
+  int TryStart();
+
+  void IncNumPending();
+  void DecNumPending();
+
+  // Returns milliseconds time to wait in the queue on error.
+  int GetRandomizeBackoffTimeInMs();
+
+  // return true if shutting_down or disabled.
+  bool failnow() const;
+
+  void IncReadByte(int n);
+  void IncWriteByte(int n);
+
+  void UpdateStats(const Status& status);
+
+  void UpdateTrafficHistory();
+
+  void NetworkErrorDetectedUnlocked() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void NetworkRecoveredUnlocked() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  void UpdateStatusCodeHistoryUnlocked() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void AddStatusCodeHistoryUnlocked(int status_code)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  const Options options_;
+  std::unique_ptr<TLSEngineFactory> tls_engine_factory_;
+  std::unique_ptr<SocketFactory> socket_pool_;
+  std::unique_ptr<OAuth2AccessTokenRefreshTask> oauth_refresh_task_;
+
+  WorkerThreadManager* wm_;
+
+  Lock mu_;
+  ConditionVariable cond_;  // signaled when num_active_ is 0.
+  string health_status_ GUARDED_BY(mu_);
+  bool shutting_down_ GUARDED_BY(mu_);
+  std::deque<std::pair<time_t, int>> recent_http_status_code_ GUARDED_BY(mu_);
+  int bad_status_num_in_recent_http_ GUARDED_BY(mu_);
+
+  std::unique_ptr<NetworkErrorMonitor> monitor_ GUARDED_BY(mu_);
+  // Checking network error state. When we get fatal http error
+  // defined in IsFatalNetworkErrorCode(), or when no socket in socket pool is
+  // available to connect to the host, we consider the network error.
+  // When we get 2xx HTTP responses for specified duration, we consider
+  // the network is recovered.
+  // For the other error, this does not care.
+  // guarded by mu_.
+  NetworkErrorStatus network_error_status_ GUARDED_BY(mu_);;
+
+  int num_query_ GUARDED_BY(mu_);
+  int num_active_ GUARDED_BY(mu_);
+  int total_pending_ GUARDED_BY(mu_);
+  int peak_pending_  GUARDED_BY(mu_);
+  int num_pending_ GUARDED_BY(mu_);
+  int num_http_retry_ GUARDED_BY(mu_);
+  int num_http_timeout_ GUARDED_BY(mu_);
+  int num_http_error_ GUARDED_BY(mu_);
+
+  size_t total_write_byte_ GUARDED_BY(mu_);
+  size_t total_read_byte_ GUARDED_BY(mu_);
+  size_t num_writable_ GUARDED_BY(mu_);
+  size_t num_readable_ GUARDED_BY(mu_);
+  std::unique_ptr<Histogram> read_size_ GUARDED_BY(mu_);
+  std::unique_ptr<Histogram> write_size_ GUARDED_BY(mu_);
+
+  size_t total_resp_byte_ GUARDED_BY(mu_);
+  long total_resp_time_ GUARDED_BY(mu_);  // msec.
+
+  int ping_http_return_code_ GUARDED_BY(mu_);
+  int ping_round_trip_time_ms_ GUARDED_BY(mu_);
+
+  std::map<int, int> num_http_status_code_ GUARDED_BY(mu_);
+  TrafficHistory traffic_history_ GUARDED_BY(mu_);
+  PeriodicClosureId traffic_history_closure_id_ GUARDED_BY(mu_);
+  int retry_backoff_ms_;
+  // if enabled_from_ > 0,
+  //   t < enabled_from, then it will be disabled,
+  //   enabled_from <= t, then it is in ramp up period
+  // where t=time().
+  // if enabled_from_ == 0, it is enabled (without checking time()).
+  time_t enabled_from_ GUARDED_BY(mu_);
+
+  int num_network_error_ GUARDED_BY(mu_);
+  int num_network_recovered_ GUARDED_BY(mu_);
+
+  FRIEND_TEST(NetworkErrorStatus, BasicTest);
+  DISALLOW_COPY_AND_ASSIGN(HttpClient);
+};
+
+// HttpRequest is a request of HTTP transaction.
+class HttpRequest : public HttpClient::Request {
+ public:
+  HttpRequest();
+  ~HttpRequest() override;
+
+  void SetBody(const string& body);
+
+  string CreateMessage() const override;
+
+  std::unique_ptr<HttpClient::Request> Clone() const override {
+    return std::unique_ptr<HttpClient::Request>(new HttpRequest(*this));
+  }
+
+ private:
+  string body_;
+
+  DISALLOW_ASSIGN(HttpRequest);
+};
+
+// HttpResponse is a response of HTTP transaction.
+class HttpResponse : public HttpClient::Response {
+ public:
+  HttpResponse();
+  ~HttpResponse() override;
+
+  StringPiece Body() const;
+
+ protected:
+  // ParseBody parses body.
+  // if error occured, updates result_, err_message_.
+  void ParseBody(google::protobuf::io::ZeroCopyInputStream* input) override;
+
+ private:
+  string parsed_body_;  // dechunked and uncompressed
+
+  DISALLOW_COPY_AND_ASSIGN(HttpResponse);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_HTTP_H_
diff --git a/client/http_init.cc b/client/http_init.cc
new file mode 100644
index 0000000..8bec4da
--- /dev/null
+++ b/client/http_init.cc
@@ -0,0 +1,189 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "http_init.h"
+
+#include "file_helper.h"
+#include "glog/logging.h"
+#include "http.h"
+#include "ioutil.h"
+#include "oauth2.h"
+#include "path.h"
+#include "util.h"
+
+#define GOMA_DECLARE_FLAGS_ONLY
+#include "goma_flags.cc"
+
+using std::string;
+
+namespace devtools_goma {
+
+namespace {
+
+template<typename T>
+static bool LoadConfig(const string& filename,
+                       bool (*config_parser)(const string&, T*),
+                       T* config) {
+  string config_string;
+  if (!ReadFileToString(filename.c_str(), &config_string)) {
+    LOG(WARNING) << "Failed to read " << filename;
+    return false;
+  }
+  if (!config_parser(config_string, config)) {
+    LOG(WARNING) << "Failed to parse config in "
+                 << filename
+                 << " config_string=" << config_string;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace
+
+static void InitOAuth2(HttpClient::Options* http_options) {
+    // allow if file doesn't exist, or invalid oauth config.
+    // if not found, or invalid oauth config, start with logout state,
+    // and user could login on status page (/api/loginz -
+    // HandleLoginRequest below).
+    if (!LoadConfig(FLAGS_OAUTH2_CONFIG_FILE,
+                    ParseOAuth2Config,
+                    &http_options->oauth2_config)) {
+      DefaultOAuth2Config(&http_options->oauth2_config);
+      LOG(INFO) << "Using default OAuth2 config.";
+      SaveOAuth2Config(FLAGS_OAUTH2_CONFIG_FILE, http_options->oauth2_config);
+    }
+    CHECK(http_options->oauth2_config.enabled())
+        << "Invalid OAuth2Config in "
+        << FLAGS_OAUTH2_CONFIG_FILE;
+}
+
+void InitHttpClientOptions(HttpClient::Options* http_options) {
+  http_options->proxy_host_name = FLAGS_PROXY_HOST;
+  http_options->proxy_port = FLAGS_PROXY_PORT;
+
+  // fields that would be updated by InitFromURL.
+  http_options->dest_host_name = FLAGS_STUBBY_PROXY_IP_ADDRESS;
+  http_options->dest_port = FLAGS_STUBBY_PROXY_PORT;
+  http_options->use_ssl = FLAGS_USE_SSL;
+  http_options->url_path_prefix = FLAGS_URL_PATH_PREFIX;
+
+  http_options->extra_params = FLAGS_RPC_EXTRA_PARAMS;
+  http_options->fail_fast = FLAGS_FAIL_FAST;
+
+  http_options->reuse_connection = FLAGS_COMPILER_PROXY_REUSE_CONNECTION;
+
+  http_options->force_connect_errorneous_address =
+      FLAGS_COMPILER_PROXY_FORCE_CONNECT_ERRORNEOUS_ADDRESS;
+
+  // Attempt to load and interpret LUCI_CONTEXT. It may define options for an
+  // ambient authentication in LUCI environment. We'll decide whether we will
+  // use them few lines below. Note that LUCI_CONTEXT environment variable may
+  // be defined even if ambient auth is not enabled.
+  const string& luci_context_file = GetEnv("LUCI_CONTEXT");
+  LuciContextAuth luci_context_auth;
+  if (!luci_context_file.empty()) {
+    LuciContext luci_context;
+    CHECK(LoadConfig(luci_context_file,
+                     ParseLuciContext,
+                     &luci_context))
+        << "LUCI_CONTEXT is set but cannot load it."
+        << " filename=" << luci_context_file;
+    luci_context_auth = luci_context.local_auth;
+    LOG_IF(INFO, !luci_context_auth.enabled())
+        << "Running under LUCI, but LUCI_CONTEXT auth is not enabled.";
+  }
+
+  // Preference order
+  // 1. GOMA_HTTP_AUTHORIZATION_FILE
+  //     - probably debug purpose only, overrides other settings.
+  // 2. GOMA_OAUTH2_CONFIG_FILE
+  //    - overrides service account setting for run buildbot locally to test.
+  // 3. GOMA_SERVICE_ACCOUNT_JSON_FILE (maybe used in buildbots)
+  // 4. GOMA_USE_GCE_SERVICE_ACCOUNT (maybe used in buildbots)
+  // 5. LUCI_CONTEXT (ambient in luci environment, if it is enabled)
+  //
+  // Note: having OAuth2 config and LUCI_CONTEXT at once is valid.
+  //       (crbug.com/684735#c14).
+  if (!FLAGS_HTTP_AUTHORIZATION_FILE.empty()) {
+    string auth_header;
+    CHECK(ReadFileToString(FLAGS_HTTP_AUTHORIZATION_FILE.c_str(),
+                           &auth_header))
+        << FLAGS_HTTP_AUTHORIZATION_FILE
+        << " : you need http Authorization header in "
+        << FLAGS_HTTP_AUTHORIZATION_FILE
+        << " or unset GOMA_HTTP_AUTHORIZATION_FILE";
+    auth_header = string(StringRstrip(auth_header));
+    http_options->authorization = auth_header;
+
+    LOG_IF(WARNING, !FLAGS_OAUTH2_CONFIG_FILE.empty())
+        << "GOMA_OAUTH2_CONFIG_FILE is set but ignored. "
+        << FLAGS_OAUTH2_CONFIG_FILE;
+    LOG_IF(WARNING, !FLAGS_SERVICE_ACCOUNT_JSON_FILE.empty())
+        << "GOMA_SERVICE_ACCOUNT_JSON_FILE is set but ignored. "
+        << FLAGS_SERVICE_ACCOUNT_JSON_FILE;
+    LOG_IF(WARNING, !FLAGS_GCE_SERVICE_ACCOUNT.empty())
+        << "GOMA_GCE_SERVICE_ACCOUNT is set but ignored. "
+        << FLAGS_GCE_SERVICE_ACCOUNT;
+    LOG_IF(WARNING, luci_context_auth.enabled())
+        << "LUCI_CONTEXT auth is configured in the environment but ignored.";
+  } else if (!FLAGS_OAUTH2_CONFIG_FILE.empty()) {
+    InitOAuth2(http_options);
+
+    LOG_IF(WARNING, !FLAGS_SERVICE_ACCOUNT_JSON_FILE.empty())
+        << "GOMA_SERVICE_ACCOUNT_JSON_FILE is set but ignored. "
+        << FLAGS_SERVICE_ACCOUNT_JSON_FILE;
+    LOG_IF(WARNING, !FLAGS_GCE_SERVICE_ACCOUNT.empty())
+        << "GOMA_GCE_SERVICE_ACCOUNT is set but ignored. "
+        << FLAGS_GCE_SERVICE_ACCOUNT;
+    LOG_IF(WARNING, luci_context_auth.enabled())
+        << "LUCI_CONTEXT auth is configured in the environment but ignored.";
+
+  } else if (!FLAGS_SERVICE_ACCOUNT_JSON_FILE.empty()) {
+    // TODO: fallback if the file doesn't exit?
+    http_options->service_account_json_filename =
+        FLAGS_SERVICE_ACCOUNT_JSON_FILE;
+
+    LOG_IF(WARNING, !FLAGS_GCE_SERVICE_ACCOUNT.empty())
+        << "GOMA_GCE_SERVICE_ACCOUNT is set but ignored. "
+        << FLAGS_GCE_SERVICE_ACCOUNT;
+    LOG_IF(WARNING, luci_context_auth.enabled())
+        << "LUCI_CONTEXT auth is configured in the environment but ignored.";
+
+  } else if (!FLAGS_GCE_SERVICE_ACCOUNT.empty()) {
+    http_options->gce_service_account = FLAGS_GCE_SERVICE_ACCOUNT;
+
+    LOG_IF(WARNING, luci_context_auth.enabled())
+        << "LUCI_CONTEXT auth is configured in the environment but ignored.";
+
+  } else if (luci_context_auth.enabled()) {
+    LOG(INFO) << "Using LUCI ambient authentication"
+              << "  default_account_id="
+              << luci_context_auth.default_account_id;
+    http_options->luci_context_auth = luci_context_auth;
+  } else {
+#ifndef _WIN32
+    static const char *kHomeEnv = "HOME";
+#else
+    static const char *kHomeEnv = "USERPROFILE";
+#endif
+    const string homedir = GetEnv(kHomeEnv);
+    if (!homedir.empty()) {
+      FLAGS_OAUTH2_CONFIG_FILE =
+          file::JoinPath(homedir, ".goma_oauth2_config");
+      LOG(INFO) << "Use OAUTH2_CONFIG_FILE=" << FLAGS_OAUTH2_CONFIG_FILE;
+      InitOAuth2(http_options);
+    }
+  }
+  http_options->capture_response_header =
+      FLAGS_HTTP_RPC_CAPTURE_RESPONSE_HEADER;
+  http_options->ssl_extra_cert = FLAGS_SSL_EXTRA_CERT;
+  http_options->ssl_extra_cert_data = FLAGS_SSL_EXTRA_CERT_DATA;
+  http_options->ssl_crl_max_valid_duration = FLAGS_SSL_CRL_MAX_VALID_DURATION;
+  http_options->socket_read_timeout_sec =
+      atof(FLAGS_HTTP_SOCKET_READ_TIMEOUT_SECS.c_str());
+  http_options->min_retry_backoff_ms = FLAGS_HTTP_RPC_MIN_RETRY_BACKOFF;
+  http_options->max_retry_backoff_ms = FLAGS_HTTP_RPC_MAX_RETRY_BACKOFF;
+}
+
+}  // namespace devtools_goma
diff --git a/client/http_init.h b/client/http_init.h
new file mode 100644
index 0000000..307ed20
--- /dev/null
+++ b/client/http_init.h
@@ -0,0 +1,16 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_HTTP_INIT_H_
+#define DEVTOOLS_GOMA_CLIENT_HTTP_INIT_H_
+
+#include "http.h"
+
+namespace devtools_goma {
+
+void InitHttpClientOptions(HttpClient::Options* http_options);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_HTTP_INIT_H_
diff --git a/client/http_rpc.cc b/client/http_rpc.cc
new file mode 100644
index 0000000..f025ba6
--- /dev/null
+++ b/client/http_rpc.cc
@@ -0,0 +1,456 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "http_rpc.h"
+
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_specific.h"
+#include "glog/logging.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "google/protobuf/message.h"
+#include "google/protobuf/io/gzip_stream.h"
+#include "google/protobuf/io/zero_copy_stream.h"
+#include "google/protobuf/io/zero_copy_stream_impl.h"
+#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
+MSVC_POP_WARNING()
+#include "ioutil.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+#include "scoped_fd.h"
+#include "simple_timer.h"
+#include "string_piece.h"
+#include "worker_thread_manager.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class HttpRPC::Request : public HttpClient::Request {
+ public:
+  Request(const google::protobuf::Message* req,
+          HttpRPC::Status* status) :
+      req_(req),
+      status_(status) {
+  }
+  ~Request() override {}
+
+  string CreateMessage() const override = 0;
+
+  std::unique_ptr<HttpClient::Request> Clone() const override = 0;
+
+ protected:
+  const google::protobuf::Message* req_;
+  HttpRPC::Status* status_;
+
+ private:
+  DISALLOW_ASSIGN(Request);
+};
+
+class HttpRPC::CallRequest : public HttpRPC::Request {
+ public:
+  CallRequest(const google::protobuf::Message* req, HttpRPC::Status* status);
+  ~CallRequest() override {}
+  void EnableCompression(int level, const string& accept_encoding) {
+    compression_level_ = level;
+    accept_encoding_ = accept_encoding;
+  }
+  string CreateMessage() const override;
+
+  std::unique_ptr<HttpClient::Request> Clone() const override {
+    return std::unique_ptr<HttpClient::Request>(
+        new HttpRPC::CallRequest(*this));
+  }
+
+ private:
+  int compression_level_;
+  string accept_encoding_;
+  DISALLOW_ASSIGN(CallRequest);
+};
+
+class HttpRPC::Response : public HttpClient::Response {
+ public:
+  Response(google::protobuf::Message* resp,
+           HttpRPC::Status* status)
+      : resp_(resp),
+        status_(status) {
+  }
+  ~Response() override {}
+
+ protected:
+  void ParseBody(google::protobuf::io::ZeroCopyInputStream* input) override = 0;
+
+  google::protobuf::Message* resp_;
+  HttpRPC::Status* status_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Response);
+};
+
+class HttpRPC::CallResponse : public HttpRPC::Response {
+ public:
+  CallResponse(google::protobuf::Message* resp,
+               HttpRPC::Status* status)
+      : Response(resp, status) {}
+  ~CallResponse() override {}
+  void ParseBody(google::protobuf::io::ZeroCopyInputStream* input) override;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CallResponse);
+};
+
+class HttpRPC::CallData {
+ public:
+  CallData(std::unique_ptr<HttpRPC::Request> req,
+           std::unique_ptr<HttpRPC::Response> resp,
+           OneshotClosure* callback)
+      : req_(std::move(req)),
+        resp_(std::move(resp)),
+        callback_(callback) {
+  }
+  ~CallData() {
+    if (callback_) {
+      callback_->Run();
+    }
+  }
+
+  HttpRPC::Request* req() const { return req_.get(); }
+  HttpRPC::Response* resp() const { return resp_.get(); }
+
+ private:
+  std::unique_ptr<HttpRPC::Request> req_;
+  std::unique_ptr<HttpRPC::Response> resp_;
+  OneshotClosure* callback_;
+  DISALLOW_COPY_AND_ASSIGN(CallData);
+};
+
+HttpRPC::Options::Options()
+    : compression_level(0),
+      start_compression(false) {
+}
+
+string HttpRPC::Options::DebugString() const {
+  std::ostringstream ss;
+  ss << " compression_level=" << compression_level;
+  if (start_compression)
+    ss << " start_compression";
+  ss << " accept_encoding=" << accept_encoding;
+  ss << " content_type_for_protobuf=" << content_type_for_protobuf;
+  return ss.str();
+}
+
+HttpRPC::HttpRPC(HttpClient* client,
+                 const Options& options)
+    : client_(client),
+      options_(options),
+      compression_enabled_(options.start_compression) {
+  LOG(INFO) << options_.DebugString();
+  CHECK(!options_.content_type_for_protobuf.empty());
+  CHECK(options_.content_type_for_protobuf.find_first_of("\r\n")
+        == string::npos)
+        << "content_type_for_protobuf must not contain CR LF:"
+        << options_.content_type_for_protobuf;
+}
+
+HttpRPC::~HttpRPC() {
+  LOG(INFO) << "HttpRPC terminated.";
+}
+
+int HttpRPC::Ping(WorkerThreadManager* wm,
+                  const string& path,
+                  Status* status) {
+  std::unique_ptr<Status> ping_status(new Status);
+  DCHECK(status);
+  *ping_status = *status;
+  if (ping_status->trace_id.empty()) {
+    ping_status->trace_id = "ping";
+  }
+  long long timeout_secs = -1;
+  if (!ping_status->timeout_secs.empty()) {
+    timeout_secs = ping_status->timeout_secs.front();
+    LOG(INFO) << "ping " << path << " timeout=" << timeout_secs;
+  } else {
+    LOG(INFO) << "ping " << path << " no timeout";
+  }
+  DCHECK(wm) << "There isn't any worker thread to send to";
+  // Make client active until PingDone is called.
+  // Without this, client could shutdown after ping rpc is finished
+  // and before it calls Wait in PingDone.
+  client_->IncNumActive();
+  std::unique_ptr<SimpleTimer> timer(new SimpleTimer);
+  // Ping may be called on the thread not in the worker thread manager.
+  wm->RunClosure(
+      FROM_HERE,
+      NewCallback(
+          this, &HttpRPC::DoPing, path, ping_status.get()),
+      WorkerThreadManager::PRIORITY_LOW);
+  // We can't use Wait() since wm->Dispatch() can be called
+  // on a thread in the worker thread manager only.
+  // TODO: use conditional variable to wait?
+  while (!ping_status->finished) {
+    PlatformThread::Sleep(100);
+    if (timeout_secs > 0 &&
+        timer->GetInNanoSeconds() > timeout_secs * 1000000000) {
+      // TODO: fix HttpRPC's timeout.
+      LOG(ERROR) << "ping timed out, but not finished yet."
+                 << "timer=" << timer->GetInMilliSeconds() << " [ms]";
+      break;
+    }
+  }
+  *status = *ping_status;
+  wm->RunClosure(
+      FROM_HERE,
+      NewCallback(this, &HttpRPC::PingDone,
+                  std::move(ping_status), std::move(timer)),
+      WorkerThreadManager::PRIORITY_LOW);
+  int status_code = client_->UpdateHealthStatusMessageForPing(
+      static_cast<const HttpClient::Status&>(*status), -1);
+  const string& health_status = client_->GetHealthStatusMessage();
+  if (health_status != "ok") {
+    LOG(WARNING) << "Update health status:" << health_status;
+  }
+  return status_code;
+}
+
+void HttpRPC::DoPing(string path, Status* status) {
+  CallWithCallback(path, nullptr, nullptr, status, nullptr);
+}
+
+void HttpRPC::PingDone(std::unique_ptr<Status> status,
+                       std::unique_ptr<SimpleTimer> timer) {
+  LOG(INFO) << "Wait ping status " << status.get();
+  Wait(status.get());
+  int round_trip_time = timer->GetInMs();
+  LOG_IF(WARNING, !status->connect_success)
+      << "failed to connect to backend servers";
+  LOG_IF(WARNING, status->err == ERR_TIMEOUT)
+      << "timed out to send request to backend servers";
+  LOG_IF(WARNING, status->http_return_code != 200)
+      << "http=" << status->http_return_code;
+  LOG_IF(WARNING, !status->err_message.empty())
+      << "http err_message=" << status->err_message;
+  LOG_IF(WARNING, !status->response_header.empty())
+      << "http response header=" << status->response_header;
+  LOG_IF(WARNING, status->err != OK)
+      << "http status err=" << status->err;
+  const string old_health_status = client_->GetHealthStatusMessage();
+  client_->UpdateHealthStatusMessageForPing(
+      static_cast<const HttpClient::Status&>(*status), round_trip_time);
+  const string new_health_status = client_->GetHealthStatusMessage();
+  if (old_health_status != new_health_status) {
+    if (new_health_status == "ok") {
+      LOG(INFO) << "Update health status:" << old_health_status
+                << " to " << new_health_status;
+    } else {
+      LOG(WARNING) << "Update health status:" << old_health_status
+                   << " to " << new_health_status;
+    }
+  }
+  LOG(INFO) << "Release ping status " << status.get();
+  client_->DecNumActive();
+}
+
+int HttpRPC::Call(const string& path,
+                  const google::protobuf::Message* req,
+                  google::protobuf::Message* resp,
+                  Status* status) {
+  DCHECK(status);
+  CallWithCallback(path, req, resp, status, nullptr);
+  Wait(status);
+  return status->err;
+}
+
+void HttpRPC::Wait(Status* status) {
+  client_->Wait(static_cast<HttpClient::Status*>(status));
+}
+
+void HttpRPC::CallWithCallback(
+    const string& path,
+    const google::protobuf::Message* req,
+    google::protobuf::Message* resp,
+    Status* status,
+    OneshotClosure* callback) {
+  std::unique_ptr<CallRequest> call_req(new CallRequest(req, status));
+  if (IsCompressionEnabled()) {
+    call_req->EnableCompression(
+        options_.compression_level, options_.accept_encoding);
+  }
+  std::unique_ptr<Request> http_req = std::move(call_req);
+  client_->InitHttpRequest(http_req.get(), "POST", path);
+  std::unique_ptr<Response> http_resp(new CallResponse(resp, status));
+  http_req->SetContentType(options_.content_type_for_protobuf);
+  std::unique_ptr<CallData> call(
+      new CallData(std::move(http_req), std::move(http_resp), callback));
+
+  // Take pointers before call is moved to pass these addresses DoAsync.
+  const auto* ptr_req = call->req();
+  auto* ptr_resp = call->resp();
+
+  VLOG(3) << "Call async " << call.get();
+  OneshotClosure* done =
+      NewCallback(this, &HttpRPC::CallDone, std::move(call));
+
+  client_->DoAsync(ptr_req, ptr_resp,
+                   static_cast<HttpClient::Status*>(status),
+                   done);
+  return;
+}
+
+void HttpRPC::CallDone(std::unique_ptr<CallData> call) {
+  VLOG(3) << "CallDone " << call.get();
+  if (call->resp()->status_code() != 200) {
+    // Apiary returns 415 to reject Content-Encoding.
+    if (call->resp()->status_code() == 400 ||
+        call->resp()->status_code() == 415 ||
+        call->resp()->result() == FAIL) {
+      DisableCompression();
+    }
+  } else {
+    EnableCompression(call->resp()->Header());
+  }
+  // destructor runs call->callback_
+}
+
+string HttpRPC::DebugString() const {
+  AUTOLOCK(lock, &mu_);
+  std::ostringstream ss;
+  ss << "Compression:";
+  if (compression_enabled_) {
+    ss << "enabled";
+  } else {
+    ss << "disabled";
+  }
+  ss << std::endl;
+  ss << "Accept-Encoding:" << options_.accept_encoding << std::endl;
+  ss << "Content-Type:" << options_.content_type_for_protobuf << std::endl;
+  ss << std::endl;
+  return ss.str();
+}
+
+void HttpRPC::DumpToJson(Json::Value* json) const {
+  client_->DumpToJson(json);
+  AUTOLOCK(lock, &mu_);
+  (*json)["compression"] = (compression_enabled_ ? "enabled" : "disabled");
+  (*json)["accept_encoding"] = options_.accept_encoding;
+  (*json)["content_type"] = options_.content_type_for_protobuf;
+}
+
+void HttpRPC::DumpStatsToProto(HttpRPCStats* stats) const {
+  client_->DumpStatsToProto(stats);
+}
+
+void HttpRPC::DisableCompression() {
+  AUTOLOCK(lock, &mu_);
+  if (compression_enabled_)
+    LOG(WARNING) << "Compression disabled";
+  compression_enabled_ = false;
+}
+
+void HttpRPC::EnableCompression(StringPiece header) {
+  AUTOLOCK(lock, &mu_);
+  StringPiece::size_type accept_encoding =
+      header.find("Accept-Encoding: deflate");
+  if (accept_encoding != StringPiece::npos) {
+    if (!compression_enabled_)
+      LOG(INFO) << "Compression enabled";
+    compression_enabled_ = true;
+  } else {
+    compression_enabled_ = false;
+  }
+}
+
+bool HttpRPC::IsCompressionEnabled() const {
+  AUTOLOCK(lock, &mu_);
+  if (!compression_enabled_)
+    return false;
+  if (options_.compression_level == 0)
+    return false;
+  return true;
+}
+
+HttpRPC::CallRequest::CallRequest(
+    const google::protobuf::Message* req,
+    HttpRPC::Status* status)
+    : Request(req, status),
+      compression_level_(0) {
+}
+
+string HttpRPC::CallRequest::CreateMessage() const {
+  std::vector<string> headers;
+  if (compression_level_ > 0 && accept_encoding_ != "" && req_) {
+    string compressed;
+    headers.push_back(HttpClient::Request::CreateHeader(
+        "Accept-Encoding", accept_encoding_));
+    SimpleTimer compression_timer;
+    google::protobuf::io::StringOutputStream stream(&compressed);
+    google::protobuf::io::GzipOutputStream::Options options;
+    options.format = google::protobuf::io::GzipOutputStream::ZLIB;
+    options.compression_level = compression_level_;
+    google::protobuf::io::GzipOutputStream gzip_stream(&stream, options);
+    req_->SerializeToZeroCopyStream(&gzip_stream);
+    if (!gzip_stream.Close()) {
+      LOG(ERROR) << "GzipOutputStream error:"
+                 << gzip_stream.ZlibErrorMessage();
+    } else if (compressed.size() > 1 && (compressed[1] >> 5 & 1)) {
+      LOG(WARNING) << "response has FDICT, which should not be supported";
+    } else {
+      headers.push_back(
+          HttpClient::Request::CreateHeader("Content-Encoding", "deflate"));
+      status_->raw_req_size = gzip_stream.ByteCount();
+      StringPiece body(compressed);
+      // Omit zlib header (since server assumes no zlib header).
+      body.remove_prefix(2);
+      return BuildMessage(headers, body);
+    }
+  } else {
+    VLOG(1) << "compression unavailable.";
+  }
+
+  // Fallback if compression is not supported or failed.
+  string raw_body;
+  if (req_) {
+    req_->SerializeToString(&raw_body);
+  }
+  status_->raw_req_size = raw_body.size();
+  return BuildMessage(headers, raw_body);
+}
+
+void HttpRPC::CallResponse::ParseBody(
+    google::protobuf::io::ZeroCopyInputStream* input) {
+  if (resp_) {
+    status_->raw_resp_size = resp_->ByteSize();
+    if (!resp_->ParseFromZeroCopyStream(input)) {
+      LOG(WARNING) << trace_id_ << " Parse response failed";
+      err_message_ = "Parse response failed";
+      result_ = FAIL;
+      return;
+    }
+  }
+  result_ = OK;
+  return;
+}
+
+ExecServiceClient::ExecServiceClient(HttpRPC* http_rpc, const string& path)
+    : http_rpc_(http_rpc), path_(path) {
+}
+
+void ExecServiceClient::ExecAsync(const ExecReq* req, ExecResp* resp,
+                                  HttpClient::Status* status,
+                                  OneshotClosure* callback) {
+  http_rpc_->CallWithCallback(path_, req, resp, status, callback);
+}
+
+void ExecServiceClient::Exec(const ExecReq* req, ExecResp* resp,
+                             HttpClient::Status* status) {
+  http_rpc_->Call(path_, req, resp, status);
+}
+
+}  // namespace devtools_goma
diff --git a/client/http_rpc.h b/client/http_rpc.h
new file mode 100644
index 0000000..f6f7247
--- /dev/null
+++ b/client/http_rpc.h
@@ -0,0 +1,132 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_HTTP_RPC_H_
+#define DEVTOOLS_GOMA_CLIENT_HTTP_RPC_H_
+
+#include <memory>
+#include <string>
+
+#include <json/json.h>
+
+#include "basictypes.h"
+#include "lockhelper.h"
+#include "http.h"
+
+using std::string;
+
+namespace google {
+namespace protobuf {
+class Message;
+}  // namespace protobuf
+}  // namespace google
+
+namespace devtools_goma {
+
+class ExecReq;
+class ExecResp;
+class HttpRPCStats;
+class OneshotClosure;
+class SimpleTimer;
+class WorkerThreadManager;
+
+// HttpRPC is a RPC system that uses protobuf over HttpClient.
+class HttpRPC {
+ public:
+  struct Options {
+    Options();
+    int compression_level;
+    bool start_compression;
+    string accept_encoding;
+    string content_type_for_protobuf;
+
+    string DebugString() const;
+  };
+  // TODO: HttpRPC specific status?
+  typedef HttpClient::Status Status;
+
+  // It doesn't take ownership of client.
+  HttpRPC(HttpClient* client, const Options& options);
+  ~HttpRPC();
+
+  // Ping sends ping message.
+  // This is in HttpRPC, not in HttpClient, because we might need to
+  // call via RPC for Apiary case.
+  int Ping(WorkerThreadManager* wm, const string& path,
+           Status *status);
+
+  // Call calls a RPC synchronously.
+  int Call(const string& path,
+           const google::protobuf::Message* req,
+           google::protobuf::Message* resp,
+           Status* status);
+
+  // CallWithCallback initiates a RPC asynchronously.
+  // Caller have ownership of req, resp and status until RPC is finished.
+  // Once RPC is finished, callback is called (if callback != NULL), or
+  // status->finished becomes true (if callback == NULL).
+  void CallWithCallback(
+      const string& path,
+      const google::protobuf::Message* req,
+      google::protobuf::Message* resp,
+      Status* status,
+      OneshotClosure* callback);
+
+  // Wait waits for a RPC initiated by CallWithCallback with callback=NULL.
+  void Wait(Status* status);
+
+  string DebugString() const;
+
+  void DumpToJson(Json::Value* json) const;
+  void DumpStatsToProto(HttpRPCStats* stats) const;
+
+  HttpClient* client() const { return client_; }
+  const Options& options() const { return options_; }
+
+ private:
+  class Request;
+  class CallRequest;
+  class Response;
+  class CallResponse;
+  class CallData;
+
+  void DoPing(string path, Status* status);
+  void PingDone(std::unique_ptr<Status> status,
+                std::unique_ptr<SimpleTimer> timer);
+
+  void CallDone(std::unique_ptr<CallData> call);
+
+  void DisableCompression();
+  void EnableCompression(StringPiece header);
+  bool IsCompressionEnabled() const;
+
+  HttpClient* client_;
+  const Options options_;
+  Lock mu_;
+  bool compression_enabled_;
+
+  DISALLOW_COPY_AND_ASSIGN(HttpRPC);
+};
+
+class ExecServiceClient {
+ public:
+  ExecServiceClient(HttpRPC* http_rpc, const string& path);
+
+  ExecServiceClient(const ExecServiceClient&) = delete;
+  ExecServiceClient& operator=(const ExecServiceClient&) = delete;
+
+  void ExecAsync(const ExecReq* req, ExecResp* resp,
+                 HttpClient::Status* status, OneshotClosure* callback);
+
+  void Exec(const ExecReq* req, ExecResp* resp, HttpClient::Status* status);
+
+ private:
+  HttpRPC* http_rpc_;
+  const string path_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_HTTP_RPC_H_
diff --git a/client/http_rpc_init.cc b/client/http_rpc_init.cc
new file mode 100644
index 0000000..9ab59ba
--- /dev/null
+++ b/client/http_rpc_init.cc
@@ -0,0 +1,20 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "http_rpc_init.h"
+
+#define GOMA_DECLARE_FLAGS_ONLY
+#include "goma_flags.cc"
+
+namespace devtools_goma {
+
+void InitHttpRPCOptions(HttpRPC::Options* options) {
+  options->compression_level = FLAGS_HTTP_RPC_COMPRESSION_LEVEL;
+  options->start_compression = FLAGS_HTTP_RPC_START_COMPRESSION;
+  options->accept_encoding = FLAGS_HTTP_ACCEPT_ENCODING;
+  options->content_type_for_protobuf =
+      FLAGS_CONTENT_TYPE_FOR_PROTOBUF;
+}
+
+}  // namespace devtools_goma
diff --git a/client/http_rpc_init.h b/client/http_rpc_init.h
new file mode 100644
index 0000000..262612f
--- /dev/null
+++ b/client/http_rpc_init.h
@@ -0,0 +1,16 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_HTTP_RPC_INIT_H_
+#define DEVTOOLS_GOMA_CLIENT_HTTP_RPC_INIT_H_
+
+#include "http_rpc.h"
+
+namespace devtools_goma {
+
+void InitHttpRPCOptions(HttpRPC::Options* options);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_HTTP_RPC_INIT_H_
diff --git a/client/http_rpc_unittest.cc b/client/http_rpc_unittest.cc
new file mode 100644
index 0000000..4430d94
--- /dev/null
+++ b/client/http_rpc_unittest.cc
@@ -0,0 +1,1174 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "http_rpc.h"
+
+#include <string>
+#include <sstream>
+
+#include "callback.h"
+#include "compiler_proxy_info.h"
+#include "compiler_specific.h"
+#include "fake_tls_engine.h"
+#include "ioutil.h"
+#include "lockhelper.h"
+#include "mock_socket_factory.h"
+#include "platform_thread.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+#include "scoped_fd.h"
+#include "socket_factory.h"
+#include "worker_thread_manager.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using std::string;
+
+namespace devtools_goma {
+
+class HttpRPCTest : public ::testing::Test {
+ protected:
+  class TestLookupFileContext {
+   public:
+    enum State { INIT, CALL, DONE };
+    TestLookupFileContext(HttpRPC* http_rpc,
+                          OneshotClosure* callback)
+        : http_rpc_(http_rpc),
+          callback_(callback),
+          r_(0),
+          state_(INIT) {
+    }
+
+    HttpRPC* http_rpc_;
+    OneshotClosure* callback_;
+    LookupFileReq req_;
+    LookupFileResp resp_;
+    HttpRPC::Status status_;
+    int r_;
+    int state_;
+  };
+
+  HttpRPCTest() : pool_(-1), cond_(&mu_) {}
+
+  void SetUp() override {
+    wm_.reset(new WorkerThreadManager);
+    wm_->Start(1);
+    pool_ = wm_->StartPool(1, "test");
+    mock_server_.reset(new MockSocketServer(wm_.get()));
+  }
+  void TearDown() override {
+    mock_server_.reset();
+    wm_->Finish();
+    wm_.reset();
+    pool_ = -1;
+  }
+
+  void RunTestLookupFile(TestLookupFileContext* tc) {
+    wm_->RunClosureInPool(
+        FROM_HERE,
+        pool_,
+        NewCallback(
+            this, &HttpRPCTest::DoTestLookupFile, tc),
+        WorkerThreadManager::PRIORITY_LOW);
+  }
+
+  void DoTestLookupFile(TestLookupFileContext* tc) {
+    if (tc->callback_ != nullptr) {
+      tc->http_rpc_->CallWithCallback(
+          "/l", &tc->req_, &tc->resp_, &tc->status_, tc->callback_);
+      AutoLock lock(&mu_);
+      tc->state_ = TestLookupFileContext::CALL;
+      cond_.Signal();
+    } else {
+      int r = tc->http_rpc_->Call(
+          "/l", &tc->req_, &tc->resp_, &tc->status_);
+      AutoLock lock(&mu_);
+      tc->r_ = r;
+      tc->state_ = TestLookupFileContext::DONE;
+      cond_.Signal();
+    }
+  }
+
+  void WaitTestLookupFile(TestLookupFileContext* tc) {
+    wm_->RunClosureInPool(
+        FROM_HERE,
+        pool_,
+        NewCallback(
+            this, &HttpRPCTest::DoWaitTestLookupFile, tc),
+        WorkerThreadManager::PRIORITY_LOW);
+  }
+
+  void DoWaitTestLookupFile(TestLookupFileContext* tc) {
+    tc->http_rpc_->Wait(&tc->status_);
+    AutoLock lock(&mu_);
+    tc->state_ = TestLookupFileContext::DONE;
+    cond_.Signal();
+  }
+
+  OneshotClosure* NewDoneCallback(bool* done) {
+    {
+      AutoLock lock(&mu_);
+      *done = false;
+    }
+    return NewCallback(
+        this, &HttpRPCTest::DoneCallback, done);
+  }
+
+  void DoneCallback(bool* done) {
+    AutoLock lock(&mu_);
+    *done = true;
+    cond_.Signal();
+  }
+
+  std::unique_ptr<WorkerThreadManager> wm_;
+  int pool_;
+  std::unique_ptr<MockSocketServer> mock_server_;
+  Lock mu_;
+  ConditionVariable cond_;
+};
+
+TEST_F(HttpRPCTest, PingFail) {
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(-1));
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 80;
+  HttpClient http_client(
+      std::move(socket_factory), nullptr, options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(0, r);
+  EXPECT_EQ("error: failed to connect to backend servers",
+            http_client.GetHealthStatusMessage());
+  http_client.WaitNoActive();
+}
+
+TEST_F(HttpRPCTest, PingRejected) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 401 Unauthorized\r\n"
+          << "Content-Type: text/plain\r\n"
+          << "Content-Length: 5\r\n\r\n"
+          << "error";
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+  socket_factory->set_dest("clients5.google.com:80");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(80);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 80;
+  HttpClient http_client(
+      std::move(socket_factory), nullptr, options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(401, r);
+  EXPECT_EQ("running: access to backend servers was rejected.",
+            http_client.GetHealthStatusMessage());
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, PingOk) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/plain\r\n"
+          << "Content-Length: 2\r\n\r\n"
+          << "ok";
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:80");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(80);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 80;
+  HttpClient http_client(
+      std::move(socket_factory), nullptr, options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(200, r);
+  EXPECT_EQ("ok", http_client.GetHealthStatusMessage());
+  http_client.WaitNoActive();
+  EXPECT_TRUE(socket_status.is_owned());
+  EXPECT_FALSE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, CallLookupFile) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  LookupFileReq req;
+  string serialized_req;
+  req.SerializeToString(&serialized_req);
+  std::ostringstream req_ss;
+  req_ss << "POST /l HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: " << serialized_req.size() << "\r\n\r\n"
+         << serialized_req;
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  LookupFileResp resp;
+  string serialized_resp;
+  resp.SerializeToString(&serialized_resp);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/x-protocol-buffer\r\n"
+          << "Content-Length: " << serialized_resp.size() << "\r\n\r\n"
+          << serialized_resp;
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:80");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(80);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 80;
+  HttpClient http_client(
+      std::move(socket_factory), nullptr, options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  rpc_options.start_compression = false;
+  HttpRPC http_rpc(&http_client, rpc_options);
+  TestLookupFileContext tc(&http_rpc, nullptr);
+  RunTestLookupFile(&tc);
+  {
+    AutoLock lock(&mu_);
+    while (tc.state_ != TestLookupFileContext::DONE) {
+      cond_.Wait();
+    }
+
+    EXPECT_EQ(req_expected, req_buf);
+    EXPECT_EQ(0, tc.r_);
+    EXPECT_TRUE(tc.status_.connect_success);
+    EXPECT_TRUE(tc.status_.finished);
+    EXPECT_EQ(0, tc.status_.err);
+    EXPECT_EQ("", tc.status_.err_message);
+    EXPECT_EQ(200, tc.status_.http_return_code);
+  }
+  http_client.WaitNoActive();
+  EXPECT_TRUE(socket_status.is_owned());
+  EXPECT_FALSE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, CallAsyncLookupFile) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  LookupFileReq req;
+  string serialized_req;
+  req.SerializeToString(&serialized_req);
+  std::ostringstream req_ss;
+  req_ss << "POST /l HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: " << serialized_req.size() << "\r\n\r\n"
+         << serialized_req;
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  LookupFileResp resp;
+  string serialized_resp;
+  resp.SerializeToString(&serialized_resp);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/x-protocol-buffer\r\n"
+          << "Content-Length: " << serialized_resp.size() << "\r\n\r\n"
+          << serialized_resp;
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:80");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(80);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 80;
+  HttpClient http_client(
+      std::move(socket_factory), nullptr, options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  rpc_options.start_compression = false;
+  HttpRPC http_rpc(&http_client, rpc_options);
+  bool done = false;
+  TestLookupFileContext tc(&http_rpc, NewDoneCallback(&done));
+  RunTestLookupFile(&tc);
+  {
+    AutoLock lock(&mu_);
+    while (tc.state_ != TestLookupFileContext::CALL) {
+      cond_.Wait();
+    }
+
+    EXPECT_TRUE(tc.status_.connect_success);
+    EXPECT_FALSE(tc.status_.finished);
+  }
+
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  WaitTestLookupFile(&tc);
+
+  {
+    AutoLock lock(&mu_);
+    while (!done) {
+      cond_.Wait();
+    }
+    while (tc.state_ != TestLookupFileContext::DONE) {
+      cond_.Wait();
+    }
+    EXPECT_EQ(req_expected, req_buf);
+    EXPECT_EQ(0, tc.r_);
+    EXPECT_TRUE(tc.status_.connect_success);
+    EXPECT_TRUE(tc.status_.finished);
+    EXPECT_EQ(0, tc.status_.err);
+    EXPECT_EQ("", tc.status_.err_message);
+    EXPECT_EQ(200, tc.status_.http_return_code);
+  }
+  http_client.WaitNoActive();
+  EXPECT_TRUE(socket_status.is_owned());
+  EXPECT_FALSE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEnginePingFail) {
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(-1));
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(
+      std::move(socket_factory),
+      std::move(tls_engine_factory),
+      options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(0, r);
+  EXPECT_EQ("error: failed to connect to backend servers",
+            http_client.GetHealthStatusMessage());
+  http_client.WaitNoActive();
+}
+
+TEST_F(HttpRPCTest, TLSEnginePingRejected) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 401 Unauthorized\r\n"
+          << "Content-Type: text/plain\r\n"
+          << "Content-Length: 5\r\n\r\n"
+          << "error";
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(401, r);
+  EXPECT_EQ("running: access to backend servers was rejected.",
+            http_client.GetHealthStatusMessage());
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEnginePingOk) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/plain\r\n"
+          << "Content-Length: 2\r\n\r\n"
+          << "ok";
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(200, r);
+  EXPECT_EQ("ok", http_client.GetHealthStatusMessage());
+  http_client.WaitNoActive();
+  EXPECT_TRUE(socket_status.is_owned());
+  EXPECT_FALSE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineCallLookupFile) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  LookupFileReq req;
+  string serialized_req;
+  req.SerializeToString(&serialized_req);
+  std::ostringstream req_ss;
+  req_ss << "POST /l HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: " << serialized_req.size() << "\r\n\r\n"
+         << serialized_req;
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  LookupFileResp resp;
+  string serialized_resp;
+  resp.SerializeToString(&serialized_resp);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/x-protocol-buffer\r\n"
+          << "Content-Length: " << serialized_resp.size() << "\r\n\r\n"
+          << serialized_resp;
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  rpc_options.start_compression = false;
+  HttpRPC http_rpc(&http_client, rpc_options);
+  TestLookupFileContext tc(&http_rpc, nullptr);
+  RunTestLookupFile(&tc);
+  {
+    AutoLock lock(&mu_);
+    while (tc.state_ != TestLookupFileContext::DONE) {
+      cond_.Wait();
+    }
+
+    EXPECT_EQ(req_expected, req_buf);
+    EXPECT_EQ(0, tc.r_);
+    EXPECT_TRUE(tc.status_.connect_success);
+    EXPECT_TRUE(tc.status_.finished);
+    EXPECT_EQ(0, tc.status_.err);
+    EXPECT_EQ("", tc.status_.err_message);
+    EXPECT_EQ(200, tc.status_.http_return_code);
+  }
+  http_client.WaitNoActive();
+  EXPECT_TRUE(socket_status.is_owned());
+  EXPECT_FALSE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineCallAsyncLookupFile) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  LookupFileReq req;
+  string serialized_req;
+  req.SerializeToString(&serialized_req);
+  std::ostringstream req_ss;
+  req_ss << "POST /l HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: " << serialized_req.size() << "\r\n\r\n"
+         << serialized_req;
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  LookupFileResp resp;
+  string serialized_resp;
+  resp.SerializeToString(&serialized_resp);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/x-protocol-buffer\r\n"
+          << "Content-Length: " << serialized_resp.size() << "\r\n\r\n"
+          << serialized_resp;
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  rpc_options.start_compression = false;
+  HttpRPC http_rpc(&http_client, rpc_options);
+  bool done = false;
+  TestLookupFileContext tc(&http_rpc, NewDoneCallback(&done));
+  RunTestLookupFile(&tc);
+  {
+    AutoLock lock(&mu_);
+    while (tc.state_ != TestLookupFileContext::CALL) {
+      cond_.Wait();
+    }
+
+    EXPECT_TRUE(tc.status_.connect_success);
+    EXPECT_FALSE(tc.status_.finished);
+  }
+
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  WaitTestLookupFile(&tc);
+
+  {
+    AutoLock lock(&mu_);
+    while (!done) {
+      cond_.Wait();
+    }
+    while (tc.state_ != TestLookupFileContext::DONE) {
+      cond_.Wait();
+    }
+    EXPECT_EQ(req_expected, req_buf);
+    EXPECT_EQ(0, tc.r_);
+    EXPECT_TRUE(tc.status_.connect_success);
+    EXPECT_TRUE(tc.status_.finished);
+    EXPECT_EQ(0, tc.status_.err);
+    EXPECT_EQ("", tc.status_.err_message);
+    EXPECT_EQ(200, tc.status_.http_return_code);
+  }
+  http_client.WaitNoActive();
+  EXPECT_TRUE(socket_status.is_owned());
+  EXPECT_FALSE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineFailWithTLSErrorAtSetData) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/plain\r\n"
+          << "Content-Length: 2\r\n\r\n"
+          << "ok";
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  tls_engine_factory->SetBroken(FakeTLSEngine::FAKE_TLS_SET_BROKEN);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(500, r);
+  EXPECT_EQ("running: failed to send request to backend servers",
+            http_client.GetHealthStatusMessage());
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineFailWithTLSErrorAtRead) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  tls_engine_factory->SetBroken(FakeTLSEngine::FAKE_TLS_READ_BROKEN);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  string expected_buf;
+  expected_buf.resize(req_expected.size());
+  EXPECT_EQ(expected_buf, req_buf);  // nothing received at server.
+  EXPECT_EQ(500, r);
+  EXPECT_EQ("running: failed to send request to backend servers",
+            http_client.GetHealthStatusMessage());
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineFailWithTLSErrorAtWrite) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  string req_buf;
+  mock_server_->ServerRead(socks[0], &req_buf);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  tls_engine_factory->SetBroken(FakeTLSEngine::FAKE_TLS_WRITE_BROKEN);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(500, r);
+  EXPECT_EQ("running: failed to send request to backend servers",
+            http_client.GetHealthStatusMessage());
+  // Nothing should be requested to the server.
+  EXPECT_EQ("", req_buf);
+  mock_server_->ServerClose(socks[0]);
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineServerCloseWithoutContentLengthShouldBeOk) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/plain\r\n\r\n"
+          << "ok";
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(200, r);
+  EXPECT_EQ("ok", http_client.GetHealthStatusMessage());
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_FALSE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineServerCloseBeforeSendingHeaderShouldBeError) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n";
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(FAIL, status.err);
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineServerCloseBeforeReadingAnythingShouldBeError) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+    HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  int r = http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(500, r);
+  EXPECT_EQ(FAIL, status.err);
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineServerCloseBeforeSendingEnoughDataShouldBeError) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/plain\r\n"
+          << "Content-Length: 128\r\n\r\n"
+          << "ok";
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(FAIL, status.err);
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineServerCloseWithoutContentLengthShouldNotHangUp) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Content-Type: text/plain\r\n\r\n"
+          << "dummydata";
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  tls_engine_factory->SetMaxReadSize(10);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(OK, status.err);
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_FALSE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+TEST_F(HttpRPCTest, TLSEngineServerCloseWithoutEndOfChunkShouldNotHangUp) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Transfer-Encoding: chunked\r\n"
+          << "Content-Type: text/plain\r\n\r\n"
+          << "1\r\na";  // not sending all data but closed.
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  tls_engine_factory->SetMaxReadSize(10);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(FAIL, status.err);
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+
+TEST_F(HttpRPCTest, TLSEngineServerCloseWithoutAllChunksShouldNotHangUp) {
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  std::ostringstream req_ss;
+  req_ss << "POST /pingz HTTP/1.1\r\n"
+         << "Host: clients5.google.com\r\n"
+         << "User-Agent: " << kUserAgentString << "\r\n"
+         << "Content-Type: binary/x-protocol-buffer\r\n"
+         << "Content-Length: 0\r\n\r\n";
+
+  const string req_expected = req_ss.str();
+  string req_buf;
+  req_buf.resize(req_expected.size());
+  mock_server_->ServerRead(socks[0], &req_buf);
+  std::ostringstream resp_ss;
+  resp_ss << "HTTP/1.1 200 OK\r\n"
+          << "Transfer-Encoding: chunked\r\n"
+          << "Content-Type: text/plain\r\n\r\n"
+          << "1\r\na123\r\nbcd";  // not sending all data but closed.
+  mock_server_->ServerWrite(socks[0], resp_ss.str());
+  mock_server_->ServerClose(socks[0]);
+
+  MockSocketFactory::SocketStatus socket_status;
+  std::unique_ptr<MockSocketFactory> socket_factory(
+      new MockSocketFactory(socks[1], &socket_status));
+
+  socket_factory->set_dest("clients5.google.com:443");
+  socket_factory->set_host_name("clients5.google.com");
+  socket_factory->set_port(443);
+  std::unique_ptr<FakeTLSEngineFactory> tls_engine_factory(
+      new FakeTLSEngineFactory);
+  tls_engine_factory->SetMaxReadSize(10);
+  HttpClient::Options options;
+  options.dest_host_name = "clients5.google.com";
+  options.dest_port = 443;
+  options.use_ssl = true;
+  HttpClient http_client(std::move(socket_factory),
+                         std::move(tls_engine_factory),
+                         options, wm_.get());
+  HttpRPC::Options rpc_options;
+  rpc_options.content_type_for_protobuf = "binary/x-protocol-buffer";
+  HttpRPC http_rpc(&http_client, rpc_options);
+  HttpRPC::Status status;
+  http_rpc.Ping(wm_.get(), "/pingz", &status);
+  EXPECT_EQ(req_expected, req_buf);
+  EXPECT_EQ(FAIL, status.err);
+  http_client.WaitNoActive();
+  EXPECT_FALSE(socket_status.is_owned());
+  EXPECT_TRUE(socket_status.is_closed());
+  EXPECT_TRUE(socket_status.is_err());
+  EXPECT_FALSE(socket_status.is_released());
+}
+
+}  // namespace devtools_goma
diff --git a/client/http_unittest.cc b/client/http_unittest.cc
new file mode 100644
index 0000000..9f5bd08
--- /dev/null
+++ b/client/http_unittest.cc
@@ -0,0 +1,90 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "http.h"
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+TEST(NetworkErrorStatus, BasicTest) {
+  HttpClient::NetworkErrorStatus status(30);
+
+  EXPECT_EQ(0, status.NetworkErrorStartedTime());
+
+  EXPECT_TRUE(status.OnNetworkErrorDetected(100));
+  EXPECT_EQ(100, status.NetworkErrorStartedTime());
+
+  // Don't recover for 30 seconds.
+  EXPECT_FALSE(status.OnNetworkRecovered(110));
+  EXPECT_EQ(100, status.NetworkErrorStartedTime());
+  EXPECT_FALSE(status.OnNetworkRecovered(120));
+  EXPECT_EQ(100, status.NetworkErrorStartedTime());
+  EXPECT_FALSE(status.OnNetworkRecovered(129));
+  EXPECT_EQ(100, status.NetworkErrorStartedTime());
+  // Now recovered.
+  EXPECT_TRUE(status.OnNetworkRecovered(131));
+  EXPECT_EQ(0, status.NetworkErrorStartedTime());
+
+  // Another network issue. (time=200)
+  EXPECT_TRUE(status.OnNetworkErrorDetected(200));
+  EXPECT_EQ(200, status.NetworkErrorStartedTime());
+
+  EXPECT_FALSE(status.OnNetworkRecovered(210));
+  EXPECT_EQ(200, status.NetworkErrorStartedTime());
+  // Network error on time=220, so postpone to recover until time=250.
+  EXPECT_FALSE(status.OnNetworkErrorDetected(220));
+  EXPECT_EQ(200, status.NetworkErrorStartedTime());
+
+  EXPECT_FALSE(status.OnNetworkRecovered(249));
+  EXPECT_EQ(200, status.NetworkErrorStartedTime());
+
+  // Now we consider the network is recovered.
+  EXPECT_TRUE(status.OnNetworkRecovered(251));
+  EXPECT_EQ(0, status.NetworkErrorStartedTime());
+}
+
+TEST(HttpClientOptions, InitFromURLChromeInfraAuth) {
+  HttpClient::Options options;
+  EXPECT_TRUE(options.InitFromURL(
+      "https://chrome-infra-auth.appspot.com/auth/api/v1/server/oauth_config"));
+  EXPECT_EQ("chrome-infra-auth.appspot.com", options.dest_host_name);
+  EXPECT_EQ(443, options.dest_port);
+  EXPECT_TRUE(options.use_ssl);
+  EXPECT_EQ("/auth/api/v1/server/oauth_config", options.url_path_prefix);
+}
+
+TEST(HttpClientOptions, InitFromURLGCEMetadata) {
+  HttpClient::Options options;
+  EXPECT_TRUE(options.InitFromURL(
+      "http://metadata/computeMetadata/v1/instance/service-accounts/"));
+  EXPECT_EQ("metadata", options.dest_host_name);
+  EXPECT_EQ(80, options.dest_port);
+  EXPECT_FALSE(options.use_ssl);
+  EXPECT_EQ("/computeMetadata/v1/instance/service-accounts/",
+            options.url_path_prefix);
+}
+
+TEST(HttpClientOptions, InitFromURLGoogleOAuth2TokenURI) {
+  HttpClient::Options options;
+  EXPECT_TRUE(options.InitFromURL(
+      "https://www.googleapis.com/oauth2/v3/token"));
+  EXPECT_EQ("www.googleapis.com", options.dest_host_name);
+  EXPECT_EQ(443, options.dest_port);
+  EXPECT_TRUE(options.use_ssl);
+  EXPECT_EQ("/oauth2/v3/token", options.url_path_prefix);
+}
+
+TEST(HttpClientOptions, InitFromURLWithExplicitPort) {
+  HttpClient::Options options;
+  EXPECT_TRUE(options.InitFromURL(
+      "http://example.com:8080/foo/bar"));
+  EXPECT_EQ("example.com", options.dest_host_name);
+  EXPECT_EQ(8080, options.dest_port);
+  EXPECT_FALSE(options.use_ssl);
+  EXPECT_EQ("/foo/bar", options.url_path_prefix);
+}
+
+}  // namespace devtools_goma
diff --git a/client/include_cache.cc b/client/include_cache.cc
new file mode 100644
index 0000000..fc2feee
--- /dev/null
+++ b/client/include_cache.cc
@@ -0,0 +1,343 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "include_cache.h"
+
+#include "compiler_specific.h"
+#include "content.h"
+#include "directive_filter.h"
+#include "file_id.h"
+#include "goma_hash.h"
+#include "histogram.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_stats.pb.h"
+MSVC_POP_WARNING()
+#include "strutil.h"
+
+namespace devtools_goma {
+
+// IncludeCache::Item owns |content|.
+class IncludeCache::Item {
+ public:
+  Item(std::unique_ptr<Content> content, const FileId& content_file_id,
+       const SHA256HashValue& directive_hash,
+       size_t original_content_size, size_t updated_count)
+      : content_(std::move(content)),
+        content_file_id_(content_file_id),
+        directive_hash_(directive_hash),
+        original_content_size_(original_content_size),
+        updated_count_(updated_count) {
+  }
+
+  ~Item() {}
+
+  const Content* content() const {
+    return content_.get();
+  }
+
+  const FileId& content_file_id() const {
+    return content_file_id_;
+  }
+
+  const SHA256HashValue& directive_hash() const { return directive_hash_; }
+  void set_directive_hash(const SHA256HashValue& hash) {
+    directive_hash_ = hash;
+  }
+
+  size_t original_content_size() const {
+    return original_content_size_;
+  }
+
+  size_t updated_count() const {
+    return updated_count_;
+  }
+
+ private:
+  const std::unique_ptr<Content> content_;
+  const FileId content_file_id_;
+  SHA256HashValue directive_hash_;
+  const size_t original_content_size_;
+  const size_t updated_count_;
+
+  DISALLOW_COPY_AND_ASSIGN(Item);
+};
+
+IncludeCache* IncludeCache::instance_;
+
+// static
+void IncludeCache::Init(int max_cache_size_in_mb,
+                        bool calculates_directive_hash) {
+  if (max_cache_size_in_mb == 0)
+    return;
+
+  size_t max_cache_size = max_cache_size_in_mb * 1024LL * 1024LL;
+  instance_ = new IncludeCache(max_cache_size, calculates_directive_hash);
+}
+
+// static
+void IncludeCache::Quit() {
+  delete instance_;
+  instance_ = nullptr;
+}
+
+IncludeCache::IncludeCache(size_t max_cache_size,
+                           bool calculates_directive_hash)
+    : calculates_directive_hash_(calculates_directive_hash),
+      count_item_updated_(0),
+      count_item_evicted_(0),
+      current_cache_size_(0),
+      max_cache_size_(max_cache_size) {
+}
+
+IncludeCache::~IncludeCache() {
+}
+
+const IncludeCache::Item* IncludeCache::GetItemIfNotModifiedUnlocked(
+    const string& key, const FileId& file_id) const {
+  auto it = cache_items_.find(key);
+  if (it == cache_items_.end())
+    return nullptr;
+
+  const Item* item = it->second.get();
+  if (file_id != item->content_file_id())
+    return nullptr;
+
+  return item;
+}
+
+std::unique_ptr<Content> IncludeCache::GetCopyIfNotModified(
+    const string& filepath, const FileId& file_id) {
+
+  std::unique_ptr<Content> result;
+  {
+    // Since CreateFromContent might be heavy, we don't want to take
+    // exclusive lock here.
+    AUTO_SHARED_LOCK(lock, &rwlock_);
+    const Item* item = GetItemIfNotModifiedUnlocked(filepath, file_id);
+    if (item != nullptr) {
+      result = Content::CreateFromContent(*item->content());
+    }
+  }
+
+  if (result != nullptr) {
+    hit_count_.Add(1);
+  } else {
+    missed_count_.Add(1);
+  }
+
+  return result;
+}
+
+OptionalSHA256HashValue IncludeCache::GetDirectiveHash(const string& filepath,
+                                                       const FileId& file_id) {
+  DCHECK(calculates_directive_hash_);
+
+  {
+    AUTO_SHARED_LOCK(lock, &rwlock_);
+    const Item* item = GetItemIfNotModifiedUnlocked(filepath, file_id);
+    if (item != nullptr) {
+      return OptionalSHA256HashValue(item->directive_hash());
+    }
+  }
+
+  std::unique_ptr<Content> content(Content::CreateFromFile(filepath));
+  if (content.get() == nullptr) {
+    return OptionalSHA256HashValue();
+  }
+
+  SHA256HashValue hash_value;
+  InsertInternal(filepath, *content, file_id, &hash_value);
+  return OptionalSHA256HashValue(hash_value);
+}
+
+std::unique_ptr<Content> IncludeCache::Insert(
+    const string& key, const Content& content, const FileId& content_file_id) {
+  SHA256HashValue hash_value;
+  return InsertInternal(key, content, content_file_id, &hash_value);
+}
+
+std::unique_ptr<Content> IncludeCache::InsertInternal(
+    const string& key, const Content& content, const FileId& content_file_id,
+    SHA256HashValue* directive_hash) {
+  std::unique_ptr<Content> filtered_content =
+      DirectiveFilter::MakeFilteredContent(content);
+  std::unique_ptr<Content> returned_content(
+      Content::CreateFromContent(*filtered_content));
+  size_t original_size = content.size();
+
+  if (calculates_directive_hash_) {
+    DCHECK(directive_hash);
+    ComputeDataHashKeyForSHA256HashValue(filtered_content->ToStringPiece(),
+                                         directive_hash);
+  }
+
+  AUTO_EXCLUSIVE_LOCK(lock, &rwlock_);
+  auto it = cache_items_.find(key);
+
+  const size_t filtered_content_size = filtered_content->size();
+  if (it == cache_items_.end()) {
+    std::unique_ptr<Item> item(
+        new Item(std::move(filtered_content), content_file_id,
+                 *directive_hash, original_size, 0));
+    cache_items_.emplace_back(key, std::move(item));
+  } else {
+    size_t original_updated_count = it->second->updated_count();
+    ++count_item_updated_;
+    current_cache_size_ -= it->second->content()->size();
+    it->second.reset(new Item(std::move(filtered_content), content_file_id,
+                              *directive_hash,
+                              original_size, original_updated_count + 1));
+  }
+
+  current_cache_size_ += filtered_content_size;
+
+  // Evicts older cache.
+  CHECK_GT(max_cache_size_, 0U);
+  while (max_cache_size_ < current_cache_size_) {
+    DCHECK(!cache_items_.empty());
+
+    current_cache_size_ -= cache_items_.front().second->content()->size();
+    cache_items_.pop_front();
+
+    count_item_evicted_++;
+  }
+
+  return returned_content;
+}
+
+void IncludeCache::Dump(std::ostringstream* ss) {
+  AUTO_SHARED_LOCK(lock, &rwlock_);
+
+  size_t num_cache_item = cache_items_.size();
+
+  Histogram compaction_ratio_histogram;
+  compaction_ratio_histogram.SetName("Compaction Ratio Histogram [%]");
+
+  Histogram item_update_count_histogram;
+  item_update_count_histogram.SetName("Item Update Count Histogram");
+
+  size_t total_original_size_in_bytes = 0;
+  size_t total_filtered_size_in_bytes = 0;
+  size_t max_original_size_in_bytes = 0;
+  size_t max_filtered_size_in_bytes = 0;
+  for (const auto& it : cache_items_) {
+    const Item* item = it.second.get();
+    total_original_size_in_bytes += item->original_content_size();
+    max_original_size_in_bytes = std::max(max_original_size_in_bytes,
+                                          item->original_content_size());
+
+    total_filtered_size_in_bytes += item->content()->size();
+    max_filtered_size_in_bytes = std::max(max_filtered_size_in_bytes,
+                                          item->content()->size());
+
+    double compaction_ratio = 0;
+    if (item->original_content_size() > 0) {
+      compaction_ratio =
+          static_cast<double>(item->content()->size()) /
+          item->original_content_size();
+    }
+    compaction_ratio_histogram.Add(compaction_ratio * 100);
+
+    item_update_count_histogram.Add(item->updated_count());
+  }
+
+  (*ss) << "IncludeCaches summary" << std::endl;
+
+  (*ss) << std::endl;
+  (*ss) << "max cache size = "
+        << max_cache_size_ << " bytes" << std::endl;
+  (*ss) << "current cache size = "
+        << current_cache_size_ << " bytes" << std::endl;
+
+  (*ss) << std::endl;
+  (*ss) << " Hit    = " << hit_count_.value() << std::endl;
+  (*ss) << " Missed = " << missed_count_.value() << std::endl;
+
+  (*ss) << std::endl;
+  (*ss) << "Header num = " << num_cache_item << std::endl;
+
+  if (num_cache_item > 0) {
+    (*ss) << std::endl;
+    (*ss) << "Original Headers: " << std::endl;
+    (*ss) << "  Total   size = "
+          << total_original_size_in_bytes << " bytes" << std::endl;
+    (*ss) << "  Max     size = "
+          << max_original_size_in_bytes << " bytes" << std::endl;
+    (*ss) << "  Average size = "
+          << (total_original_size_in_bytes / num_cache_item)
+          << " bytes" << std::endl;
+
+    (*ss) << "Filtered Headers: " << std::endl;
+    (*ss) << "  Total   size = "
+          << total_filtered_size_in_bytes << " bytes" << std::endl;
+    (*ss) << "  Max     size = "
+          << max_filtered_size_in_bytes << " bytes" << std::endl;
+    (*ss) << "  Average size = "
+          << (total_filtered_size_in_bytes / num_cache_item)
+          << " bytes" << std::endl;
+
+    (*ss) << std::endl;
+    (*ss) << compaction_ratio_histogram.DebugString() << std::endl;
+
+    (*ss) << std::endl;
+    (*ss) << "Item updated count = " << count_item_updated_ << std::endl;
+    (*ss) << "Item evicted count = " << count_item_evicted_ << std::endl;
+
+    (*ss) << std::endl;
+    (*ss) << item_update_count_histogram.DebugString() << std::endl;
+  }
+
+  (*ss) << std::endl;
+}
+
+// static
+void IncludeCache::DumpAll(std::ostringstream* ss) {
+  if (!IncludeCache::IsEnabled()) {
+    (*ss) << "IncludeCache is not enabled." << std::endl;
+    (*ss) << "To enable it, set environment variable "
+          << "GOMA_MAX_INCLUDE_CACHE_SIZE more than 0." << std::endl;
+    return;
+  }
+
+  instance()->Dump(ss);
+}
+
+void IncludeCache::DumpStatsToProto(IncludeCacheStats* stats) {
+
+  stats->set_hit(hit_count_.value());
+  stats->set_missed(missed_count_.value());
+
+  {
+    AUTO_SHARED_LOCK(lock, &rwlock_);
+    stats->set_total_entries(cache_items_.size());
+    stats->set_total_cache_size(current_cache_size_);
+
+    stats->set_updated(count_item_updated_);
+    stats->set_evicted(count_item_evicted_);
+
+    size_t total_original_size_in_bytes = 0;
+    size_t total_filtered_size_in_bytes = 0;
+    size_t max_original_size_in_bytes = 0;
+    size_t max_filtered_size_in_bytes = 0;
+    for (const auto& entry : cache_items_) {
+      const Item* item = entry.second.get();
+
+      total_original_size_in_bytes += item->original_content_size();
+      max_original_size_in_bytes = std::max(max_original_size_in_bytes,
+                                            item->original_content_size());
+
+      total_filtered_size_in_bytes += item->content()->size();
+      max_filtered_size_in_bytes = std::max(max_filtered_size_in_bytes,
+                                            item->content()->size());
+    }
+
+    stats->set_original_total_size(total_original_size_in_bytes);
+    stats->set_original_max_size(max_original_size_in_bytes);
+    stats->set_filtered_total_size(total_filtered_size_in_bytes);
+    stats->set_filtered_max_size(max_filtered_size_in_bytes);
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/include_cache.h b/client/include_cache.h
new file mode 100644
index 0000000..c5a39ca
--- /dev/null
+++ b/client/include_cache.h
@@ -0,0 +1,105 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_INCLUDE_CACHE_H_
+#define DEVTOOLS_GOMA_CLIENT_INCLUDE_CACHE_H_
+
+#include <list>
+#include <memory>
+#include <string>
+
+#include "atomic_stats_counter.h"
+#include "autolock_timer.h"
+#include "goma_hash.h"
+#include "linked_unordered_map.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class Content;
+struct FileId;
+class IncludeCacheStats;
+
+// IncludeCache stores include files which contains only directives.
+class IncludeCache {
+ public:
+  static IncludeCache* instance() {
+    return instance_;
+  }
+
+  static bool IsEnabled() { return instance_ != NULL; }
+  // Initializes IncludeCache.
+  // |max_cache_size_in_mb| specifies the maximum amount of cache size. If cache
+  // size exceeds this value, the oldest cache will be evicted.
+  // When |calculates_directive_hash| is true, we also calculate the hash value
+  // of cache item. This value will be used from DepsCache.
+  static void Init(int max_cache_size_in_mb, bool calculates_directive_hash);
+  static void Quit();
+
+  // Inserts content to cache. We store the content where non-direcitve lines
+  // are removed. Since |content| is copied, it's safe to remove |content| after
+  // insertion. Returned value is directive filtered |content|.
+  std::unique_ptr<Content> Insert(const string& key, const Content& content,
+                                  const FileId& content_file_id);
+
+  // Gets a copy of the inserted content, only when the file_id of the stored
+  // content is the same as |file_id|.
+  std::unique_ptr<Content> GetCopyIfNotModified(const string& filepath,
+                                                const FileId& file_id);
+
+  // Get directive hash. If we have a cache and its FileId is the same as
+  // |file_id|, we return the cached one. Otherwise, we calculate the directive
+  // hash, and save it.
+  // If |filepath| is not found, invalid hash value is returned.
+  OptionalSHA256HashValue GetDirectiveHash(const string& filepath,
+                                           const FileId& file_id);
+
+  void Dump(std::ostringstream* ss);
+  static void DumpAll(std::ostringstream* ss);
+
+  void DumpStatsToProto(IncludeCacheStats* stats);
+
+  bool calculates_directive_hash() const { return calculates_directive_hash_; }
+
+ private:
+  class Item;
+  friend class IncludeCacheTest;
+
+  IncludeCache(size_t max_cache_size, bool calculates_directive_hash);
+  ~IncludeCache();
+
+  std::unique_ptr<Content> InsertInternal(
+      const string& key, const Content& content, const FileId& content_file_id,
+      SHA256HashValue* directive_hash);
+  const Item* GetItemIfNotModifiedUnlocked(const string& key,
+                                           const FileId& file_id) const;
+
+  static IncludeCache* instance_;
+
+  const bool calculates_directive_hash_;
+
+  ReadWriteLock rwlock_;
+  // A map from filepath to unique_ptr<Item>.
+  // The oldest item comes first.
+  // TODO: We might want to use LRU instead of just queue.
+  // Currently we're not updating |cache_items_| after referring.
+  LinkedUnorderedMap<std::string, std::unique_ptr<Item>> cache_items_;
+
+  size_t count_item_updated_;
+  size_t count_item_evicted_;
+  // The total content size of cached items.
+  size_t current_cache_size_;
+  size_t max_cache_size_;
+
+  StatsCounter hit_count_;
+  StatsCounter missed_count_;
+
+  DISALLOW_COPY_AND_ASSIGN(IncludeCache);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_INCLUDE_CACHE_H_
diff --git a/client/include_cache_unittest.cc b/client/include_cache_unittest.cc
new file mode 100644
index 0000000..904b018
--- /dev/null
+++ b/client/include_cache_unittest.cc
@@ -0,0 +1,184 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "include_cache.h"
+
+#include <algorithm>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "content.h"
+#include "file_id.h"
+#include "file_id_cache.h"
+#include "goma_hash.h"
+#include "unittest_util.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class IncludeCacheTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    IncludeCache::Init(1, true);  // 1MB
+  }
+
+  void TearDown() override {
+    IncludeCache::Quit();
+  }
+
+  std::unique_ptr<Content> MakeDirectiveOnlyContent(int size) {
+    string buf(size, '#');
+    for (int i = 1023; i < size; i += 1024) {
+      buf[i] = '\n';
+    }
+
+    return Content::CreateFromString(buf);
+  }
+
+  int Size(IncludeCache* include_cache) const {
+    return include_cache->cache_items_.size();
+  }
+
+  size_t CacheSize(IncludeCache* include_cache) const {
+    return include_cache->current_cache_size_;
+  }
+};
+
+TEST_F(IncludeCacheTest, SetGet) {
+  IncludeCache* ic = IncludeCache::instance();
+
+  std::unique_ptr<Content> original(Content::CreateFromString(
+      "#include <stdio.h>\n"
+      "non-directive-line\n"));
+
+  FileId file_id;
+  file_id.size = original->size();
+  file_id.mtime = 100;
+
+  ic->Insert("kotori", *original, file_id);
+
+  {
+    std::unique_ptr<Content> content(
+        ic->GetCopyIfNotModified("kotori", file_id));
+    EXPECT_TRUE(content.get() != nullptr);
+
+    string actual(content->buf(), content->buf_end());
+    EXPECT_EQ("#include <stdio.h>\n", actual);
+  }
+
+  // When mtime is newer, we cannot take Content.
+  file_id.mtime = 105;
+  {
+    std::unique_ptr<Content> content(
+        ic->GetCopyIfNotModified("kotori", file_id));
+    EXPECT_TRUE(content.get() == nullptr);
+  }
+}
+
+TEST_F(IncludeCacheTest, ExceedMemory) {
+  const int kFileSize = 256 * 1024;
+
+  IncludeCache* ic = IncludeCache::instance();
+
+  std::unique_ptr<Content> content(MakeDirectiveOnlyContent(kFileSize));
+
+  FileId file_id;
+  file_id.size = kFileSize;
+  file_id.mtime = 100;
+
+  ic->Insert("key0", *content, file_id);
+  ic->Insert("key1", *content, file_id);
+  ic->Insert("key2", *content, file_id);
+  ic->Insert("key3", *content, file_id);
+
+  EXPECT_EQ(4, Size(ic));
+  EXPECT_EQ(1024 * 1024UL, CacheSize(ic));
+
+  ic->Insert("key5", *content, file_id);
+
+  // key0 has been evicted, since it is inserted first.
+  EXPECT_EQ(4, Size(ic));
+  EXPECT_EQ(1024 * 1024UL, CacheSize(ic));
+  EXPECT_EQ(nullptr, ic->GetCopyIfNotModified("key0", file_id));
+
+  // key1 is not evicted yet.
+  std::unique_ptr<Content> key1_content(
+      ic->GetCopyIfNotModified("key1", file_id));
+  EXPECT_NE(nullptr, key1_content.get());
+
+  // Insert key0 again.
+  ic->Insert("key0", *content, file_id);
+
+  // Then, key1 should be evicted.
+  EXPECT_EQ(nullptr, ic->GetCopyIfNotModified("key1", file_id));
+}
+
+TEST_F(IncludeCacheTest, GetDirectiveHash)
+{
+  IncludeCache* ic = IncludeCache::instance();
+
+  TmpdirUtil tmpdir("includecache");
+  const string& ah = tmpdir.FullPath("a.h");
+  const string& bh = tmpdir.FullPath("b.h");
+  tmpdir.CreateTmpFile("a.h",
+                       "#include <stdio.h>\n");
+  tmpdir.CreateTmpFile("b.h",
+                       "#include <math.h>\n");
+
+  {
+    SHA256HashValue hash_expected;
+    ComputeDataHashKeyForSHA256HashValue("#include <stdio.h>\n",
+                                         &hash_expected);
+
+    FileIdCache file_id_cache;
+    FileId file_id(file_id_cache.Get(ah));
+    ASSERT_TRUE(file_id.IsValid());
+
+    OptionalSHA256HashValue hash_actual = ic->GetDirectiveHash(ah, file_id);
+    EXPECT_TRUE(hash_actual.valid());
+    EXPECT_EQ(hash_expected, hash_actual.value());
+  }
+
+  // Update file content
+  tmpdir.CreateTmpFile("a.h",
+                       "#include <string.h>\n");
+  {
+    SHA256HashValue hash_expected;
+    ComputeDataHashKeyForSHA256HashValue("#include <string.h>\n",
+                                         &hash_expected);
+
+    FileIdCache file_id_cache;
+    FileId file_id(file_id_cache.Get(ah));
+    ASSERT_TRUE(file_id.IsValid());
+
+    OptionalSHA256HashValue hash_actual = ic->GetDirectiveHash(ah, file_id);
+    EXPECT_TRUE(hash_actual.valid());
+    EXPECT_EQ(hash_expected, hash_actual.value());
+  }
+
+  // Currently IncludeCache does not have a cache for b.h.
+  // However, GetDirectiveHash will succeed, and the cached result
+  // will be stored.
+  {
+    SHA256HashValue hash_expected;
+    ComputeDataHashKeyForSHA256HashValue("#include <math.h>\n", &hash_expected);
+
+    FileIdCache file_id_cache;
+    FileId file_id(file_id_cache.Get(bh));
+    ASSERT_TRUE(file_id.IsValid());
+
+    OptionalSHA256HashValue hash_actual = ic->GetDirectiveHash(bh, file_id);
+    EXPECT_TRUE(hash_actual.valid());
+    EXPECT_EQ(hash_expected, hash_actual.value());
+
+    std::unique_ptr<Content> content(ic->GetCopyIfNotModified(bh, file_id));
+    ASSERT_TRUE(content.get() != nullptr);
+    EXPECT_EQ("#include <math.h>\n", content->ToStringPiece());
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/include_file_finder.cc b/client/include_file_finder.cc
new file mode 100644
index 0000000..21ef26c
--- /dev/null
+++ b/client/include_file_finder.cc
@@ -0,0 +1,267 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "include_file_finder.h"
+
+#include "cpp_parser.h"
+#include "file_dir.h"
+#include "file_id_cache.h"
+#include "include_file_utils.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "string_piece_utils.h"
+
+namespace devtools_goma {
+
+namespace {
+
+// TODO: Merge with CleanPathSep(..) in include_dir_cache.cc.
+string RemoveDuplicateSlash(const string& path) {
+  string res;
+  res.reserve(path.size());
+  for (const auto& ch : path) {
+    if (ch == '/' && !res.empty() && res.back() == '/') {
+      continue;
+    }
+    res += ch;
+  }
+  return res;
+}
+
+}  // anonymous namespace
+
+bool IncludeFileFinder::gch_hack_ = false;
+
+/* static */
+void IncludeFileFinder::Init(bool gch_hack) {
+  gch_hack_ = gch_hack;
+}
+
+IncludeFileFinder::IncludeFileFinder(
+    const std::string& cwd,
+    bool ignore_case,
+    const std::vector<std::string>* include_dirs,
+    const std::vector<std::string>* framework_dirs,
+    FileIdCache* file_id_cache)
+    : cwd_(cwd), ignore_case_(ignore_case), include_dirs_(include_dirs),
+      framework_dirs_(framework_dirs), file_id_cache_(file_id_cache) {
+
+  files_in_include_dirs_.resize(include_dirs_->size());
+
+  // Enumerate all files and directories in each of |include_dirs|.
+  // Files and directories are used to skip unnecessary file checks.
+  for (size_t i = CppParser::kIncludeDirIndexStarting;
+       i < include_dirs_->size(); ++i) {
+    const std::string& abs_include_dir = file::JoinPathRespectAbsolute(
+        cwd_, (*include_dirs)[i]);
+    if (strings::EndsWith(abs_include_dir, ".hmap")) {
+      std::vector<std::pair<std::string, std::string>> entries;
+      if (!ReadHeaderMapContent(abs_include_dir, &entries)) {
+        LOG(WARNING) << "failed to load header map:" << abs_include_dir;
+        continue;
+      }
+
+      for (const auto& entry : entries) {
+        const string& key = entry.first;
+        const string& filename = entry.second;
+
+        const string top = TopPathComponent(key, ignore_case_);
+
+        files_in_include_dirs_[i].insert(top);
+
+        if (include_dir_index_lowerbound_.find(top) ==
+            include_dir_index_lowerbound_.end()) {
+          include_dir_index_lowerbound_.insert(std::make_pair(top, i));
+        }
+
+        hmap_map_.insert(std::make_pair(
+            std::make_pair(i, key), filename));
+      }
+      continue;
+    }
+
+    std::vector<DirEntry> entries;
+    if (!ListDirectory(abs_include_dir, &entries)) {
+      continue;
+    }
+
+    for (const auto& entry : entries) {
+      string name = entry.name;
+
+      if (ignore_case_) {
+        std::transform(name.begin(), name.end(), name.begin(), ::tolower);
+      }
+
+      files_in_include_dirs_[i].insert(name);
+      if (include_dir_index_lowerbound_.find(name) ==
+          include_dir_index_lowerbound_.end()) {
+        include_dir_index_lowerbound_.insert(std::make_pair(name, i));
+      }
+    }
+  }
+}
+
+/* static */
+string IncludeFileFinder::TopPathComponent(string path_in_directive,
+                                           bool ignore_case) {
+  string::size_type slash_pos = string::npos;
+  if (ignore_case) {
+    std::transform(path_in_directive.begin(), path_in_directive.end(),
+                   path_in_directive.begin(), ::tolower);
+    // Since some Windows SDK has a include like "foo\\bar",
+    // we need to support this.
+    slash_pos = path_in_directive.find_first_of("\\/");
+  } else {
+    slash_pos = path_in_directive.find("/");
+  }
+
+  if (slash_pos != string::npos) {
+    path_in_directive = path_in_directive.substr(0, slash_pos);
+  }
+
+  return path_in_directive;
+}
+
+bool IncludeFileFinder::Lookup(
+    const string& path_in_directive,
+    string* filepath,
+    int* include_dir_index) {
+
+  {
+    // Check cache.
+    auto iter = include_path_cache_.find(
+        std::make_pair(path_in_directive, *include_dir_index));
+    if (iter != include_path_cache_.end()) {
+      *filepath = iter->second.first;
+      *include_dir_index = iter->second.second;
+      return true;
+    }
+  }
+
+  // |top| is used to reduce the number of searched include directories
+  // by checking precalculated direct children of include dirs.
+  // e.g. if #include <foo/bar.h> comes, include directories not having
+  // foo directory are not searched.
+  string top = TopPathComponent(path_in_directive, ignore_case_);
+
+  size_t search_start_index = *include_dir_index;
+
+  {
+    // Include dirs with less than search_start_index should not have
+    // |path_in_directive|.
+    // e.g. if |top| is "base" and 1,2,3-th include directories do not
+    // have "base" entry, then search_start_index becomes 4.
+    auto iter = include_dir_index_lowerbound_.find(top);
+    if (iter != include_dir_index_lowerbound_.end()) {
+      search_start_index = std::max(search_start_index, iter->second);
+    } else if (!gch_hack_enabled() &&
+               !strings::StartsWith(path_in_directive, ".")) {
+      // Do not search entry that is not in include_dirs.
+      // If |top| is not in |ininclude_dir_index_lowerbound_|,
+      // it means that |path_in_directive| is not in include directories.
+      // This happens for Mac framework headers.
+      // If |path_in_directive| starts with ".",
+      // we need to search all include_dirs.
+      return LookupFramework(path_in_directive, filepath);
+    }
+  }
+
+  for (size_t i = search_start_index; i < include_dirs_->size(); ++i) {
+    // If |top| entry is not in i-th include dirs, check is skipped.
+    //
+    // |files_in_include_dirs_| only holds file/directory name
+    // in each include directory.
+    // If |top| starts from "." or "..", cannot skip include directory check
+    // because it may point to some sibling directory
+    // that not in |files_in_include_dirs_|.
+    if (!strings::StartsWith(top, ".") &&
+        files_in_include_dirs_[i].find(top) ==
+        files_in_include_dirs_[i].end()) {
+      continue;
+    }
+
+    string join_path;
+    {
+      auto iter = hmap_map_.find(std::make_pair(i, path_in_directive));
+      if (iter != hmap_map_.end()) {
+        join_path = iter->second;
+      } else {
+        join_path = file::JoinPath((*include_dirs_)[i], path_in_directive);
+      }
+    }
+    string try_path;
+    PathResolver::PlatformConvertToString(join_path, &try_path);
+    try_path = RemoveDuplicateSlash(try_path);
+
+    if (gch_hack_enabled()) {
+      const string& gch_path = try_path + GOMA_GCH_SUFFIX;
+      FileId fileid = file_id_cache_->Get(
+          file::JoinPathRespectAbsolute(cwd_, gch_path));
+      if (!fileid.is_directory && fileid.IsValid()) {
+        *filepath = gch_path;
+        *include_dir_index = i;
+        return true;
+      }
+    }
+
+    FileId fileid = file_id_cache_->Get(
+        file::JoinPathRespectAbsolute(cwd_,try_path));
+    if (fileid.is_directory || !fileid.IsValid()) {
+      continue;
+    }
+
+    include_path_cache_.insert(
+        std::make_pair(
+            std::make_pair(path_in_directive, *include_dir_index),
+            std::make_pair(try_path, i)));
+    *filepath = try_path;
+    *include_dir_index = i;
+    return true;
+  }
+
+  return LookupFramework(path_in_directive, filepath);
+}
+
+bool IncludeFileFinder::LookupFramework(const std::string& path_in_directive,
+                                        std::string* filepath) {
+  auto sep_pos = path_in_directive.find('/');
+  if (sep_pos == std::string::npos) {
+    return false;
+  }
+
+  const std::string framework_name =
+      path_in_directive.substr(0, sep_pos) + ".framework";
+  const std::string base_name = path_in_directive.substr(sep_pos + 1);
+
+  for (const auto& framework_dir : *framework_dirs_) {
+    for (const auto& header_dir : {"Headers", "PrivateHeaders"}) {
+      const std::string filename = file::JoinPath(
+          framework_dir, framework_name, header_dir, base_name);
+      const FileId fileid = file_id_cache_->Get(
+          file::JoinPathRespectAbsolute(cwd_, filename));
+      if (!fileid.is_directory && fileid.IsValid()) {
+        *filepath = filename;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool IncludeFileFinder::LookupSubframework(const std::string& path_in_directive,
+                                           const std::string& current_directory,
+                                           std::string* filepath) {
+  const std::string& abs_current = file::JoinPathRespectAbsolute(
+      cwd_, current_directory);
+  for (const auto& fwdir : *framework_dirs_) {
+    if (CreateSubframeworkIncludeFilename(
+            file::JoinPathRespectAbsolute(cwd_, fwdir),
+            abs_current, path_in_directive, filepath)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace devtools_goma
diff --git a/client/include_file_finder.h b/client/include_file_finder.h
new file mode 100644
index 0000000..1757f63
--- /dev/null
+++ b/client/include_file_finder.h
@@ -0,0 +1,92 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_INCLUDE_FILE_FINDER_H_
+#define DEVTOOLS_GOMA_CLIENT_INCLUDE_FILE_FINDER_H_
+
+#include <map>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+namespace devtools_goma {
+
+class FileIdCache;
+
+class IncludeFileFinder {
+ public:
+  static void Init(bool gch_hack);
+  static bool gch_hack_enabled() {
+    return gch_hack_;
+  }
+
+  IncludeFileFinder(const IncludeFileFinder&) = delete;
+  IncludeFileFinder& operator=(const IncludeFileFinder&) = delete;
+
+  IncludeFileFinder(const std::string& cwd,
+                    bool ignore_case,
+                    const std::vector<std::string>* include_dirs,
+                    const std::vector<std::string>* framework_dirs,
+                    FileIdCache* file_id_cache);
+
+  // Search included file and set to |filepath| if path is found.
+  // If |path_in_directive| is found in an include directory,
+  // Lookup(...) returns true.
+  bool Lookup(const std::string& path_in_directive, std::string* filepath,
+              int* include_dir_index);
+
+  // Calculate |top| component in include directive.
+  // e.g.
+  // #include <foo/bar.h> -> |top| is "foo"
+  // #include "bar.h" -> |top| is "bar.h"
+  // #include <hoge\\fuga.h> -> |top| is "hoge"
+  // #include <foo/bar/baz.h> -> |top| is "foo"
+  // #include "../bar.h" -> |top| is ".."
+  // #include <foo\\bar\\baz.h> -> |top| is "foo"
+  // #include <WinBase.h> -> |top| is "winbase.h" in Windows
+  static std::string TopPathComponent(std::string path_in_directive,
+                                      bool ignore_case);
+
+  // TODO: Make this function private
+  // when we can stop fallback to IncludeDirCache.
+  bool LookupSubframework(const std::string& path_in_directive,
+                          const std::string& current_directory,
+                          std::string* filepath);
+
+ private:
+  bool LookupFramework(const std::string& path_in_directive,
+                       std::string* filepath);
+
+  static bool gch_hack_;
+
+  const std::string cwd_;
+  const bool ignore_case_;
+  const std::vector<std::string>* const include_dirs_;
+  const std::vector<std::string>* const framework_dirs_;
+  FileIdCache* file_id_cache_;
+
+  // Holds entries in i-th include directory.
+  // |files_in_include_dirs_[i]| is set of file/directory name in
+  // i-th include directory.
+  std::vector<std::unordered_set<std::string>> files_in_include_dirs_;
+
+  // Holds the minimum include directories index for each entries in
+  // include directories.
+  // e.g. |include_dir_index_lowerbound_["stdio.h"]| represents minimum index
+  // of include directory containing "stdio.h".
+  std::unordered_map<std::string, size_t> include_dir_index_lowerbound_;
+
+  // Cache for (path_in_directive, include_dir_index_start) ->
+  //           (filepath, used_include_dir_index).
+  std::map<std::pair<std::string, int>,
+           std::pair<std::string, int>> include_path_cache_;
+
+  // Map for "include_dir idx + (key in .hmap file)" -> filename in .hmap file.
+  std::map<std::pair<int, std::string>, std::string> hmap_map_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_INCLUDE_FILE_FINDER_H_
diff --git a/client/include_file_utils.cc b/client/include_file_utils.cc
new file mode 100644
index 0000000..90986d9
--- /dev/null
+++ b/client/include_file_utils.cc
@@ -0,0 +1,146 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "include_file_utils.h"
+
+#include <memory>
+
+#include <glog/logging.h>
+
+#include "content.h"
+#include "path.h"
+#include "path_util.h"
+
+namespace devtools_goma {
+
+const char* GOMA_GCH_SUFFIX = ".gch.goma";
+
+bool CreateSubframeworkIncludeFilename(
+    const std::string& fwdir, const std::string& current_directory,
+    const std::string& include_name, std::string* filename) {
+  if (!HasPrefixDir(current_directory, fwdir)) {
+    return false;
+  }
+  size_t pos = current_directory.find('/', fwdir.size()+1);
+  if (pos == std::string::npos) {
+    return false;
+  }
+  std::string frameworkdir = current_directory.substr(0, pos+1) + "Frameworks/";
+  pos = include_name.find('/');
+  if (pos == std::string::npos) {
+    return false;
+  }
+  std::string fwname = include_name.substr(0, pos);
+  std::string incpath = include_name.substr(pos+1);
+  *filename = file::JoinPath(frameworkdir, fwname + ".framework/Headers",
+                             incpath);
+  return true;
+}
+
+bool ReadHeaderMapContent(
+    const std::string& hmap_filename,
+    std::vector<std::pair<std::string, std::string>>* entries) {
+  DCHECK(entries);
+
+  struct HeaderMapBucket {
+    uint32_t key;
+    uint32_t prefix;
+    uint32_t suffix;
+  };
+
+  struct HeaderMap {
+    char magic[4];
+    uint16_t version;
+    uint16_t reserved;
+    uint32_t string_offset;
+    uint32_t string_count;
+    uint32_t hash_capacity;
+    uint32_t max_value_length;
+    HeaderMapBucket buckets[1];
+  };
+
+  std::unique_ptr<Content> file(Content::CreateFromFile(hmap_filename));
+
+  if (!file) {
+    LOG(WARNING) << "hmap file not existed: " << hmap_filename;
+    return false;
+  }
+
+  if (file->size() < sizeof(HeaderMap) - sizeof(HeaderMapBucket)) {
+    LOG(WARNING) << "hmap file size is less than expected"
+                 << " expected: " << sizeof(HeaderMap) - sizeof(HeaderMapBucket)
+                 << " actual: " << file->size()
+                 << " file: " << hmap_filename;
+    return false;
+  }
+
+  const HeaderMap* hmap = reinterpret_cast<const HeaderMap*>(file->buf());
+  if (strncmp(hmap->magic, "pamh", 4)) {
+    LOG(WARNING) << "Invalid hmap file: " << hmap_filename;
+    return false;
+  }
+
+  if (hmap->version != 1) {
+    LOG(WARNING) << "Unknown hmap version (" << hmap->version
+              << "): " << hmap_filename;
+    return false;
+  }
+
+  const char* buf_end = file->buf_end();
+
+  const char* strings =
+      reinterpret_cast<const char*>(hmap) + hmap->string_offset;
+
+  if (strings < file->buf() || buf_end <= strings) {
+    LOG(WARNING) << "Invalid string_offset: " << hmap_filename;
+    return false;
+  }
+
+
+  if (sizeof(HeaderMap) - sizeof(HeaderMapBucket) +
+      static_cast<int64_t>(hmap->hash_capacity) * sizeof(HeaderMapBucket) >
+      file->size()) {
+    LOG(WARNING) << "hmap file size is less than header map's capacity"
+                 << " hash_capacity: " << hmap->hash_capacity
+                 << " expected size: "
+                 << sizeof(HeaderMap) - sizeof(HeaderMapBucket) +
+        static_cast<int64_t>(hmap->hash_capacity) * sizeof(HeaderMapBucket)
+                 << " actual size: " << file->size()
+                 << " file:" << hmap_filename;
+    return false;
+  }
+
+  const auto last_nullpos = file->ToStringPiece().rfind('\0');
+
+  if (last_nullpos == StringPiece::npos &&
+      hmap->hash_capacity != 0) {
+    LOG(WARNING) << "hmap file does not contain null character"
+                 << " in expected place:" << hmap_filename;
+    return false;
+  }
+
+  for (size_t i = 0; i < hmap->hash_capacity; i++) {
+    const HeaderMapBucket& bucket = hmap->buckets[i];
+    if (!bucket.key) {
+      continue;
+    }
+
+    const char* key = strings + bucket.key;
+    const char* prefix = strings + bucket.prefix;
+    const char* suffix = strings + bucket.suffix;
+    if (key >= buf_end || prefix >= buf_end || suffix >= buf_end ||
+        key < file->buf() || prefix < file->buf() || suffix < file->buf() ||
+        std::max({key, prefix, suffix}) > last_nullpos + file->buf()) {
+      LOG(WARNING) << "Invalid hmap file: " << hmap_filename;
+      return false;
+    }
+    std::string filename(prefix);
+    filename += suffix;
+    entries->emplace_back(key, filename);
+  }
+
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/include_file_utils.h b/client/include_file_utils.h
new file mode 100644
index 0000000..7f7fe94
--- /dev/null
+++ b/client/include_file_utils.h
@@ -0,0 +1,26 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_INCLUDE_FILE_UTILS_H_
+#define DEVTOOLS_GOMA_CLIENT_INCLUDE_FILE_UTILS_H_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace devtools_goma {
+
+extern const char* GOMA_GCH_SUFFIX;
+
+bool CreateSubframeworkIncludeFilename(
+    const std::string& fwdir, const std::string& current_directory,
+    const std::string& include_name, std::string* filename);
+
+bool ReadHeaderMapContent(
+    const std::string& hmap_filename,
+    std::vector<std::pair<std::string, std::string>>* entries);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_INCLUDE_FILE_UTILS_H_
diff --git a/client/include_file_utils_unittest.cc b/client/include_file_utils_unittest.cc
new file mode 100644
index 0000000..1ca5402
--- /dev/null
+++ b/client/include_file_utils_unittest.cc
@@ -0,0 +1,17 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <gtest/gtest.h>
+
+#include "include_file_utils.h"
+#include "path.h"
+
+namespace devtools_goma {
+
+#ifndef _WIN32
+
+
+#endif  // _WIN32
+
+}  // namespace devtools_goma
diff --git a/client/include_guard_detector.cc b/client/include_guard_detector.cc
new file mode 100644
index 0000000..6068936
--- /dev/null
+++ b/client/include_guard_detector.cc
@@ -0,0 +1,76 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "include_guard_detector.h"
+
+namespace devtools_goma {
+
+void IncludeGuardDetector::OnProcessCondition() {
+  ++if_depth_;
+  if (if_depth_ > 1)
+    return;
+
+  // Non-ifndef condition is found in toplevel.
+  ok_ = false;
+}
+
+void IncludeGuardDetector::OnProcessIf(const std::string& ident) {
+  if (!ident.empty()) {
+    OnProcessIfndef(ident);
+  } else {
+    OnProcessCondition();
+  }
+}
+
+void IncludeGuardDetector::OnProcessIfndef(const std::string& ident) {
+  ++if_depth_;
+  if (if_depth_ > 1) {
+    // Non toplevel. Just skipping.
+    return;
+  }
+
+  if (!ok_)
+    return;
+
+  if (!detected_ident_.empty()) {
+    // already ifndef has been processed.
+    // multiple ifndef/endif in toplevel.
+    detected_ident_.clear();
+    ok_ = false;
+    return;
+  }
+
+  if (ident.empty()) {
+    // ident of ifndef is invalid.
+    ok_ = false;
+    return;
+  }
+
+  detected_ident_ = ident;
+}
+
+void IncludeGuardDetector::OnProcessEndif() {
+  --if_depth_;
+  if (if_depth_ < 0) {
+    // the number of endif is larger than the number of if.
+    ok_ = false;
+  }
+}
+
+void IncludeGuardDetector::OnProcessOther() {
+  if (if_depth_ > 0)
+    return;
+
+  // toplevel has directives.
+  ok_ = false;
+}
+
+void IncludeGuardDetector::OnPop() {
+  if (if_depth_ != 0) {
+    // if/endif is not matched.
+    ok_ = false;
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/include_guard_detector.h b/client/include_guard_detector.h
new file mode 100644
index 0000000..cf36ac2
--- /dev/null
+++ b/client/include_guard_detector.h
@@ -0,0 +1,63 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_INCLUDE_GUARD_DETECTOR_H_
+#define DEVTOOLS_GOMA_CLIENT_INCLUDE_GUARD_DETECTOR_H_
+
+#include <string>
+
+namespace devtools_goma {
+
+class IncludeGuardDetector {
+ public:
+  IncludeGuardDetector() : ok_(true), if_depth_(0) {}
+
+  IncludeGuardDetector(const IncludeGuardDetector&) = delete;
+  void operator=(const IncludeGuardDetector&) = delete;
+
+  const std::string& detected_ident() const {
+    return detected_ident_;
+  }
+
+  bool IsGuardDetected() const {
+    return ok_ && !detected_ident_.empty();
+  }
+
+  // Called when #ifdef is found.
+  void OnProcessCondition();
+  // Called when #if is found.
+  // |ident| is include guard identifier; e.g. in `#if !defined(FOO)`,
+  // FOO is |ident|. When such identifier cannot be found, ident should
+  // be empty.
+  // TODO: Consider renaming this method so that the definition of
+  // |ident| is clearer.
+  void OnProcessIf(const std::string& ident);
+  // Called when #ifndef is found.
+  void OnProcessIfndef(const std::string& ident);
+  // Called when #endif is found.
+  void OnProcessEndif();
+  // Called when other directive is found.
+  void OnProcessOther();
+  // Called when popping.
+  void OnPop();
+
+ private:
+  // |ok_| gets false when we failed to detect include guard.
+  // For example.
+  // 1. Detected any directive other than the pair of ifndef/endif in toplevel.
+  // 2. Detected more than one ifndef/endif pair in toplevel.
+  // 3. Detected invalid ifndef in toplevel.
+  // 4. if/endif is not balanced (more #if than #endif or vice versa.)
+  // Even if ok_ is true, it does not mean we detected an include
+  // guard. We also need to check detected_ident_ is not empty.
+  bool ok_;
+  // The current depth of if/endif. We say it is toplevel when if_depth_ == 0.
+  int if_depth_;
+  // Detected include guard identifier.
+  std::string detected_ident_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_INCLUDE_GUARD_DETECTOR_H_
diff --git a/client/include_processor.cc b/client/include_processor.cc
new file mode 100644
index 0000000..1f52f5c
--- /dev/null
+++ b/client/include_processor.cc
@@ -0,0 +1,1048 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef _WIN32
+#include <dirent.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#else
+#include "config_win.h"
+#endif
+#ifdef __FreeBSD__
+#include <sys/param.h>
+#endif
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "compiler_flags.h"
+#include "compiler_info.h"
+#include "content.h"
+#include "counterz.h"
+#include "cpp_parser.h"
+#include "directive_filter.h"
+#include "env_flags.h"
+#include "file.h"
+#include "file_dir.h"
+#include "flag_parser.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "glog/vlog_is_on.h"
+#include "goma_init.h"
+#include "include_cache.h"
+#include "include_file_finder.h"
+#include "include_file_utils.h"
+#include "include_processor.h"
+#include "ioutil.h"
+#include "lockhelper.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "scoped_fd.h"
+#include "string_piece.h"
+#include "string_piece_utils.h"
+#include "util.h"
+
+#ifdef _WIN32
+#include "path_resolver.h"
+#include "posix_helper_win.h"
+#endif
+
+#if HAVE_CPU_PROFILER
+#include <gperftools/profiler.h>
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+namespace {
+
+// Reads content from |filepath| and set |next_current_directory|.
+// If |file_id_cache| has a FileId for |filepath|, we use it.
+// Otherwise we take FileId for |filepath| and stores it to |file_id_cache|.
+// We don't take |file_id_cache| ownership.
+std::pair<std::unique_ptr<Content>, FileId> TryInclude(
+    const string& cwd, const string& filepath, string* next_current_directory,
+    FileIdCache* file_id_cache) {
+  const string abs_filepath = file::JoinPathRespectAbsolute(cwd, filepath);
+  FileId file_id(file_id_cache->Get(abs_filepath));
+  if (!file_id.IsValid()) {
+    return {nullptr, FileId()};
+  }
+
+  if (file_id.is_directory) {
+    VLOG(2) << "TryInclude but dir:" << abs_filepath;
+    return {nullptr, FileId()};
+  }
+
+  std::unique_ptr<Content> fp;
+  if (IncludeCache::IsEnabled()) {
+    // When IncludeCache is enabled and the file is not updated,
+    // we load a minified header from memory.
+
+    fp = IncludeCache::instance()->GetCopyIfNotModified(abs_filepath, file_id);
+    if (!fp) {
+      // TODO: If we can use shared_ptr for Content, we would be able
+      // to omit copying from IncludeCache. In that case, we should return
+      // something like shared_ptr<const Content> from this function.
+
+      ScopedFd fd(ScopedFd::OpenForRead(abs_filepath));
+      if (!fd.valid())
+        return {nullptr, FileId()};
+
+      fp = Content::CreateFromFileDescriptor(abs_filepath, fd, file_id.size);
+      if (!fp)
+        return {nullptr, FileId()};
+      fp = IncludeCache::instance()->Insert(abs_filepath, *fp, file_id);
+    }
+  } else {
+    ScopedFd fd(ScopedFd::OpenForRead(abs_filepath));
+    if (!fd.valid())
+      return {nullptr, FileId()};
+
+    fp = Content::CreateFromFileDescriptor(abs_filepath, fd, file_id.size);
+    if (!fp)
+      return {nullptr, FileId()};
+    fp = DirectiveFilter::MakeFilteredContent(*fp);
+  }
+
+  GetBaseDir(filepath, next_current_directory);
+  return {std::move(fp), file_id};
+}
+
+}  // anonymous namespace
+
+class IncludePathsObserver : public CppParser::IncludeObserver {
+ public:
+  IncludePathsObserver(
+      const std::string& cwd,
+      bool ignore_case,
+      CppParser* parser,
+      std::set<string>* shared_include_files,
+      FileIdCache* file_id_cache,
+      IncludeFileFinder* include_file_finder)
+      : cwd_(cwd), ignore_case_(ignore_case), parser_(parser),
+        shared_include_files_(shared_include_files),
+        file_id_cache_(file_id_cache),
+        include_file_finder_(include_file_finder) {
+    CHECK(parser_);
+    CHECK(shared_include_files_);
+    CHECK(file_id_cache_);
+  }
+
+  bool HandleInclude(
+      const string& path,
+      const string& current_directory,
+      const string& current_filepath,
+      char quote_char,  // '"' or '<'
+      int include_dir_index) override {
+    // shared_include_files_ contains a set of include files for compilers.
+    // It's output variables of IncludePathsObserver.
+
+    // parser_->IsProcessedFile(filepath) indicates filepath was already parsed
+    // and no need to parse it again.
+    // So, if it returns true, shared_include_files_ must have filepath.
+    // In other words, there is a case shared_include_files_ have the filepath,
+    // but parser_->IsProcessedFile(filepath) returns false.  It means
+    // the filepath once parsed, but it needs to parse it again (for example
+    // macro changed).
+
+    // parser_->AddFileInput should be called to let parser_ parse the file.
+
+    // include_dir_index is an index to start searching from.
+    //
+    // for #include "...", include_dir_index is current dir index of
+    // the file that is including the path.  note that include_dir_index
+    // would not be kCurrentDirIncludeDirIndex, since CppParser needs
+    // to keep dir index for include file. i.e. an included file will have
+    // the same include dir index as file that includes the file.
+    //
+    // for #include <...>, it is bracket_include_dir_index.
+    //
+    // for #include_next, it will be next include dir index of file
+    // that is including the path. (always quote_char=='<').
+
+    CHECK(!path.empty()) << current_filepath;
+
+    VLOG(2) << current_filepath << ": including "
+            << quote_char << path
+            << " dir:" << current_directory
+            << " include_dir_index:" << include_dir_index;
+
+    string next_current_directory;
+    string filepath;
+
+    if (quote_char == '"') {
+      // Look for current directory.
+      if (HandleIncludeInDir(current_directory, path,
+                             include_dir_index,
+                             &next_current_directory)) {
+        return true;
+      }
+
+      // If not found in current directory, try all include paths.
+      include_dir_index = CppParser::kIncludeDirIndexStarting;
+    }
+
+    // Look for include dirs from |include_dir_index|.
+    int dir_index = include_dir_index;
+    if (!include_file_finder_->Lookup(path, &filepath, &dir_index) &&
+        !include_file_finder_->LookupSubframework(
+            path, current_directory, &filepath)) {
+      VLOG(2) << "Not found: " << path;
+      return false;
+    }
+
+    VLOG(3) << "Lookup => " << filepath << " dir_index=" << dir_index;
+
+    if (parser_->IsProcessedFile(filepath, include_dir_index)) {
+      VLOG(2) << "Already processed:" << quote_char << filepath;
+      return true;
+    }
+
+    auto file_content = TryInclude(
+        cwd_, filepath, &next_current_directory, file_id_cache_);
+    std::unique_ptr<Content> next_fp = std::move(file_content.first);
+
+    if (next_fp.get()) {
+      if (IncludeFileFinder::gch_hack_enabled() &&
+          strings::EndsWith(filepath, GOMA_GCH_SUFFIX) &&
+          !strings::EndsWith(path, GOMA_GCH_SUFFIX)) {
+        VLOG(2) << "Found a precompiled header: " << filepath;
+        shared_include_files_->insert(filepath);
+        return true;
+      }
+
+      VLOG(2) << "Looking into " << filepath << " index=" << dir_index;
+      shared_include_files_->insert(filepath);
+      parser_->AddFileInput(std::move(next_fp), file_content.second, filepath,
+                            next_current_directory, dir_index);
+      return true;
+    }
+    VLOG(2) << "include file not found in dir_cache?";
+    return false;
+  }
+
+  bool HasInclude(
+      const string& path,
+      const string& current_directory,
+      const string& current_filepath,
+      char quote_char,  // '"' or '<'
+      int include_dir_index) override {
+    CHECK(!path.empty()) << current_filepath;
+
+    string next_current_directory;
+    string filepath;
+
+    if (quote_char == '"') {
+      if (HasIncludeInDir(current_directory, path, current_filepath)) {
+        return true;
+      }
+      include_dir_index = CppParser::kIncludeDirIndexStarting;
+    }
+
+    int dir_index = include_dir_index;
+    if (!include_file_finder_->Lookup(path, &filepath, &dir_index)) {
+      VLOG(2) << "Not found: " << path;
+      return false;
+    }
+    const std::string abs_filepath = file::JoinPathRespectAbsolute(
+        cwd_, filepath);
+    if (shared_include_files_->count(filepath) ||
+        access(abs_filepath.c_str(), R_OK) == 0) {
+      DCHECK(!File::IsDirectory(abs_filepath.c_str())) << abs_filepath;
+      return true;
+    }
+    return false;
+  }
+
+ private:
+  bool CanPruneWithTopPathComponent(const string& dir, const string& path) {
+    const std::string& dir_with_top_path_component = file::JoinPath(
+        dir, IncludeFileFinder::TopPathComponent(path, ignore_case_));
+    return !file_id_cache_->Get(dir_with_top_path_component).IsValid();
+  }
+
+  bool HandleIncludeInDir(const string& dir, const string& path,
+                          int include_dir_index,
+                          string* next_current_directory) {
+    GOMA_COUNTERZ("handle include try");
+    if (CanPruneWithTopPathComponent(
+            file::JoinPathRespectAbsolute(cwd_, dir), path)) {
+      GOMA_COUNTERZ("handle include pruned");
+      return false;
+    }
+
+    string filepath = PathResolver::PlatformConvert(
+        file::JoinPathRespectAbsolute(dir, path));
+
+    if (IncludeFileFinder::gch_hack_enabled()) {
+      const string& gchpath = filepath + GOMA_GCH_SUFFIX;
+      std::unique_ptr<Content> fp =
+          TryInclude(cwd_, gchpath, next_current_directory,
+                     file_id_cache_).first;
+      if (fp) {
+        VLOG(2) << "Found a pre-compiled header: " << gchpath;
+        shared_include_files_->insert(gchpath);
+        // We should not check the content of pre-compiled headers.
+        return true;
+      }
+    }
+
+    if (parser_->IsProcessedFile(filepath, include_dir_index)) {
+      VLOG(2) << "Already processed: \"" << filepath << "\"";
+      return true;
+    }
+    auto file_content = TryInclude(cwd_, filepath, next_current_directory,
+                                   file_id_cache_);
+    std::unique_ptr<Content> fp = std::move(file_content.first);
+    FileId fileid = file_content.second;
+    if (fp) {
+      shared_include_files_->insert(filepath);
+      parser_->AddFileInput(std::move(fp), fileid, filepath,
+                            *next_current_directory, include_dir_index);
+      return true;
+    }
+    VLOG(2) << "include file not found in current directoy? filepath="
+            << filepath;
+    return false;
+  }
+
+  bool HasIncludeInDir(const string& dir, const string& path,
+                       const string& current_filepath) {
+    const std::string& filepath = file::JoinPathRespectAbsolute(dir, path);
+    string abs_filepath = file::JoinPathRespectAbsolute(cwd_, filepath);
+    string abs_current_filepath = file::JoinPathRespectAbsolute(
+        cwd_, current_filepath);
+    abs_filepath = PathResolver::ResolvePath(abs_filepath);
+    bool is_current = (abs_filepath == abs_current_filepath);
+    if (is_current)
+      return true;
+    if (!File::IsDirectory(abs_filepath.c_str()) &&
+        (shared_include_files_->count(filepath) ||
+         access(abs_filepath.c_str(), R_OK) == 0 ||
+         (IncludeFileFinder::gch_hack_enabled() &&
+          access((abs_filepath + GOMA_GCH_SUFFIX).c_str(), R_OK) == 0))) {
+      return true;
+    }
+    return false;
+  }
+
+  const std::string cwd_;
+  const bool ignore_case_;
+  CppParser* parser_;
+  std::set<string>* shared_include_files_;
+  FileIdCache* file_id_cache_;
+
+  IncludeFileFinder* include_file_finder_;
+
+  DISALLOW_COPY_AND_ASSIGN(IncludePathsObserver);
+};
+
+class IncludeErrorObserver : public CppParser::ErrorObserver {
+ public:
+  IncludeErrorObserver() {}
+
+  void HandleError(const string& error) override {
+    // Note that we don't set this error observer if VLOG_IS_ON(1) is false.
+    // If you need to change this code, make sure you'll modify
+    // set_error_observer call in IncludeProcessor::GetIncludeFiles()
+    // to be consistent with here.
+    VLOG(1) << error;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(IncludeErrorObserver);
+};
+
+static void CopyIncludeDirs(const std::vector<string>& input_dirs,
+                            const string& toolchain_root,
+                            std::vector<string>* output_dirs) {
+  for (const auto& input_dir : input_dirs) {
+    const string& dir = file::JoinPath(
+        toolchain_root, PathResolver::PlatformConvert(input_dir));
+    output_dirs->push_back(dir);
+  }
+}
+
+#ifndef _WIN32
+static void CopyOriginalFileFromHashCriteria(const string& filepath) {
+  static Lock mu;
+
+  if (access(filepath.c_str(), R_OK) == 0) {
+    return;
+  }
+
+  // Only one thread can copy the GCH.
+  AUTOLOCK(lock, &mu);
+  if (access(filepath.c_str(), R_OK) == 0) {
+    return;
+  }
+
+  const string& hash_criteria_filepath = filepath + ".gch.hash-criteria";
+  std::ifstream ifs(hash_criteria_filepath.c_str());
+  if (!ifs) {
+    return;
+  }
+
+  string line;
+  getline(ifs, line);
+  const char* expected_prefix = "Contents of ";
+  if (!strings::StartsWith(line, expected_prefix)) {
+    return;
+  }
+
+  const string& original_filepath = line.substr(strlen(expected_prefix));
+  VLOG(1) << "hash criteria file found. original filepath: "
+          << original_filepath;
+
+  const string tmp_filepath = filepath + ".tmp";
+  File::Copy(original_filepath.c_str(), tmp_filepath.c_str(), true);
+  rename(tmp_filepath.c_str(), filepath.c_str());
+}
+#endif
+
+static bool NormalizePath(const string& path_to_normalize,
+                          string* normalized_path) {
+  // TODO: Can't we remove this ifdef? Maybe we have make a code
+  //                    that is platform independent?
+  //                    Do we need to resolve symlink on Unix?
+#ifndef _WIN32
+  std::unique_ptr<char[], decltype(&free)> path_buf(
+      realpath(path_to_normalize.c_str(), nullptr), free);
+  if (path_buf.get() == nullptr)
+    return false;
+  normalized_path->assign(path_buf.get());
+#else
+  *normalized_path = PathResolver::ResolvePath(
+      PathResolver::PlatformConvert(path_to_normalize));
+  if (normalized_path->empty() ||
+      (GetFileAttributesA(normalized_path->c_str()) ==
+       INVALID_FILE_ATTRIBUTES)) {
+    return false;
+  }
+#endif
+  return true;
+}
+
+static void MergeDirs(
+    const std::string cwd,
+    const std::vector<string>& dirs,
+    std::vector<string>* include_dirs,
+    std::set<string>* seen_include_dir_set) {
+  for (const auto& dir : dirs) {
+    std::string abs_dir = file::JoinPathRespectAbsolute(cwd, dir);
+    string normalized_dir;
+    if (!NormalizePath(abs_dir, &normalized_dir)) {
+      continue;
+    }
+    // Remove duplicated dirs.
+    if (!seen_include_dir_set->insert(normalized_dir).second) {
+      continue;
+    }
+    include_dirs->push_back(dir);
+  }
+}
+
+static void MergeIncludeDirs(
+    const std::string& cwd,
+    const std::vector<string>& nonsystem_include_dirs,
+    const std::vector<string>& system_include_dirs,
+    std::vector<string>* include_dirs) {
+  std::set<string> seen_include_dir_set;
+
+  // We check system include paths first because we should give more
+  // priority to system paths than non-system paths when we check
+  // duplicates of them. We will push back the system include paths
+  // into include_paths later because the order of include paths
+  // should be non-system path first.
+  std::vector<string> unique_system_include_dirs;
+  MergeDirs(cwd, system_include_dirs, &unique_system_include_dirs,
+            &seen_include_dir_set);
+
+  MergeDirs(cwd, nonsystem_include_dirs, include_dirs,
+            &seen_include_dir_set);
+
+  copy(unique_system_include_dirs.begin(), unique_system_include_dirs.end(),
+       back_inserter(*include_dirs));
+}
+
+bool IncludeProcessor::GetIncludeFiles(
+    const string& filename,
+    const string& current_directory,
+    const CompilerFlags& compiler_flags,
+    const CompilerInfo& compiler_info,
+    std::set<string>* include_files,
+    FileIdCache* file_id_cache) {
+  DCHECK(!current_directory.empty());
+  DCHECK(file::IsAbsolutePath(current_directory)) << current_directory;
+
+  delayed_macro_includes_.clear();
+
+  std::vector<string> non_system_include_dirs;
+  std::vector<string> root_includes;
+  std::vector<string> user_framework_dirs;
+  std::vector<std::pair<string, bool>> commandline_macros;
+#if _WIN32
+  bool ignore_case = true;
+#else
+  bool ignore_case = false;
+#endif
+
+  if (compiler_flags.is_gcc()) {
+    const GCCFlags& flags = static_cast<const GCCFlags&>(compiler_flags);
+    non_system_include_dirs = flags.non_system_include_dirs();
+    root_includes = flags.root_includes();
+    user_framework_dirs = flags.framework_dirs();
+    commandline_macros = flags.commandline_macros();
+  } else if (compiler_flags.is_vc()) {
+    const VCFlags& flags = static_cast<const VCFlags&>(compiler_flags);
+    non_system_include_dirs = flags.include_dirs();
+    root_includes = flags.root_includes();
+    commandline_macros = flags.commandline_macros();
+    ignore_case = true;
+  } else if (compiler_flags.is_clang_tidy()) {
+    const ClangTidyFlags& flags =
+        static_cast<const ClangTidyFlags&>(compiler_flags);
+    non_system_include_dirs = flags.non_system_include_dirs();
+    root_includes = flags.root_includes();
+    user_framework_dirs = flags.framework_dirs();
+    commandline_macros = flags.commandline_macros();
+  } else {
+    LOG(FATAL) << "Bad compiler_flags for IncludeProcessor: "
+               << compiler_flags.DebugString();
+  }
+  VLOG(3) << "non_system_include_dirs=" << non_system_include_dirs;
+  VLOG(3) << "root_includes=" << root_includes;
+  VLOG(3) << "user_framework_dirs=" << user_framework_dirs;
+  VLOG(3) << "commandline_macros=" << commandline_macros;
+
+  for (const auto& include_dir : non_system_include_dirs) {
+    // TODO: Ideally, we should not add .hmap file if this
+    //               file doesn't exist.
+    if (strings::EndsWith(include_dir, ".hmap")) {
+      include_files->insert(include_dir);
+    }
+  }
+
+  std::vector<string> quote_dirs;
+  CopyIncludeDirs(
+      compiler_info.quote_include_paths(),
+      "",
+      &quote_dirs);
+
+  std::vector<string> all_system_include_dirs;
+  if (compiler_info.lang().find("c++") != string::npos) {
+    CopyIncludeDirs(
+        compiler_info.cxx_system_include_paths(),
+        compiler_info.toolchain_root(),
+        &all_system_include_dirs);
+  } else {
+    CopyIncludeDirs(
+        compiler_info.system_include_paths(),
+        compiler_info.toolchain_root(),
+        &all_system_include_dirs);
+  }
+
+  // The first element of include_dirs.include_dirs represents the current input
+  // directory. It's not specified by -I, but we need to handle it when
+  // including file with #include "".
+  std::vector<std::string> include_dirs;
+  std::vector<std::string> framework_dirs;
+  include_dirs.push_back(current_directory);
+  copy(quote_dirs.begin(), quote_dirs.end(),
+       back_inserter(include_dirs));
+
+  cpp_parser_.set_bracket_include_dir_index(
+      include_dirs.size());
+  VLOG(2) << "bracket include dir index=" <<
+      include_dirs.size();
+  MergeIncludeDirs(current_directory,
+                   non_system_include_dirs,
+                   all_system_include_dirs,
+                   &include_dirs);
+
+#ifndef _WIN32
+  std::vector<string> abs_user_framework_dirs;
+  CopyIncludeDirs(
+      user_framework_dirs,
+      "",
+      &abs_user_framework_dirs);
+  std::vector<string> system_framework_dirs;
+  CopyIncludeDirs(
+      compiler_info.system_framework_paths(),
+      compiler_info.toolchain_root(),
+      &system_framework_dirs);
+  MergeIncludeDirs(current_directory,
+                   abs_user_framework_dirs,
+                   system_framework_dirs,
+                   &framework_dirs);
+#else
+  CHECK(compiler_info.system_framework_paths().empty());
+#endif
+
+  // TODO: cleanup paths (// -> /, /./ -> /) in include_dirs
+  // Note that we should not use ResolvePath for these dirs.
+  IncludeFileFinder include_file_finder(
+      current_directory, ignore_case, &include_dirs, &framework_dirs,
+      file_id_cache);
+
+  for (size_t i = 0; i < root_includes.size();) {
+    string abs_filepath =
+        PathResolver::PlatformConvert(
+            file::JoinPathRespectAbsolute(current_directory, root_includes[i]));
+
+    // TODO: this does not seem to apply to Windows. Need verify.
+#ifndef _WIN32
+    if (IncludeFileFinder::gch_hack_enabled()) {
+      // If there is the precompiled header for this header, we'll send
+      // the precompiled header. Note that we don't need to check its content.
+      const string& gch_filepath = abs_filepath + GOMA_GCH_SUFFIX;
+      {
+        ScopedFd fd(ScopedFd::OpenForRead(gch_filepath));
+        if (fd.valid()) {
+          fd.Close();
+          VLOG(1) << "precompiled header found: " << gch_filepath;
+          include_files->insert(root_includes[i] + GOMA_GCH_SUFFIX);
+          root_includes.erase(root_includes.begin() + i);
+          continue;
+        }
+      }
+    }
+#endif
+
+    if (access(abs_filepath.c_str(), R_OK) == 0) {
+#ifndef _WIN32
+      // we don't support *.gch on Win32.
+      CopyOriginalFileFromHashCriteria(abs_filepath);
+#endif
+
+      if (include_files->insert(root_includes[i]).second) {
+        i++;
+      } else {
+        root_includes.erase(root_includes.begin() + i);
+      }
+      continue;
+    }
+
+    std::string filepath;
+    {
+      int dir_index = CppParser::kIncludeDirIndexStarting;
+      if (!include_file_finder.Lookup(root_includes[i],
+                                      &filepath,
+                                      &dir_index)) {
+        LOG(INFO) << (compiler_flags.is_vc() ? "/FI" : "-include")
+                  << " not found: " << root_includes[i];
+        i++;
+        continue;
+      }
+    }
+
+    if (IncludeFileFinder::gch_hack_enabled() &&
+        strings::EndsWith(filepath, GOMA_GCH_SUFFIX)) {
+      VLOG(1) << "precompiled header found: " << filepath;
+      include_files->insert(filepath);
+      root_includes.erase(root_includes.begin() + i);
+      continue;
+    }
+
+    if (include_files->insert(filepath).second) {
+      root_includes[i] = filepath;
+      i++;
+    } else {
+      root_includes.erase(root_includes.begin() + i);
+    }
+  }
+
+  root_includes.push_back(PathResolver::PlatformConvert(filename));
+
+  IncludePathsObserver include_observer(
+      current_directory,
+      ignore_case,
+      &cpp_parser_,
+      include_files, file_id_cache,
+      &include_file_finder);
+  IncludeErrorObserver error_observer;
+  cpp_parser_.set_include_observer(&include_observer);
+  if (VLOG_IS_ON(1))
+    cpp_parser_.set_error_observer(&error_observer);
+  cpp_parser_.SetCompilerInfo(&compiler_info);
+  if (compiler_flags.is_vc()) {
+    cpp_parser_.set_is_vc();
+  }
+
+  for (const auto& commandline_macro : commandline_macros) {
+    const string& macro = commandline_macro.first;
+    if (commandline_macro.second) {
+      size_t found = macro.find('=');
+      if (found == string::npos) {
+        // https://gcc.gnu.org/onlinedocs/gcc/Preprocessor-Options.html
+        // -D name
+        //   Predefine name as a macro, with definition 1.
+        cpp_parser_.AddMacroByString(macro, "1");
+        continue;
+      }
+      const string& key = macro.substr(0, found);
+      const string& value = macro.substr(found + 1, macro.size() - (found + 1));
+      cpp_parser_.AddMacroByString(key, value);
+    } else {
+      cpp_parser_.DeleteMacro(macro);
+    }
+  }
+
+  // From GCC 4.8, stdc-predef.h is automatically included without
+  // -ffreestanding. Also, -fno-hosted is equivalent to -ffreestanding.
+  // See also: https://gcc.gnu.org/gcc-4.8/porting_to.html
+  if (compiler_flags.is_gcc() &&
+      compiler_info.name().find("clang") == string::npos) {
+    const GCCFlags& flags = static_cast<const GCCFlags&>(compiler_flags);
+    if (!(flags.has_ffreestanding() || flags.has_fno_hosted())) {
+      // TODO: Some environment might not have stdc-predef.h
+      // (e.g. android). In that case, IncludeProcess currently emit WARNING,
+      // but it's ignoreable. It would be better to suppress such warning.
+      const string stdc_predef_input(
+          "#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)\n"
+          "#include <stdc-predef.h>\n"
+          "#endif\n");
+      cpp_parser_.AddStringInput(stdc_predef_input, "(stdc-predef)");
+      if (!cpp_parser_.ProcessDirectives()) {
+        LOG(ERROR) << "failed to handle stdc-predef";
+      }
+      // Since base_file_ will be updated in the last AddStringInput, we need
+      // to clear it. Otherwise, test will fail.
+      cpp_parser_.ClearBaseFile();
+    }
+  }
+
+  for (const auto& input : root_includes) {
+    const std::string& abs_input = file::JoinPathRespectAbsolute(
+        current_directory, input);
+    std::unique_ptr<Content> fp(Content::CreateFromFile(abs_input));
+    if (!fp) {
+      LOG(ERROR) << "root include:" << abs_input << " not found";
+      return false;
+    }
+    VLOG(2) << "Looking into " << abs_input;
+
+    string input_basedir;
+    GetBaseDir(input, &input_basedir);
+
+    cpp_parser_.AddFileInput(std::move(fp), file_id_cache->Get(abs_input),
+                             input, input_basedir,
+                             CppParser::kCurrentDirIncludeDirIndex);
+    if (!cpp_parser_.ProcessDirectives()) {
+      LOG(ERROR) << "cpp parser fatal error in " << abs_input;
+      return false;
+    }
+  }
+  return true;
+}
+
+int IncludeProcessor::total_files() const {
+  return cpp_parser_.total_files();
+}
+
+int IncludeProcessor::skipped_files() const {
+  return cpp_parser_.skipped_files();
+}
+
+}  // namespace devtools_goma
+
+#ifdef TEST
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+#include <time.h>
+#include "file_helper.h"
+#include "split.h"
+#include "scoped_tmp_file.h"
+#include "subprocess.h"
+
+// TODO: share this code with include_processor_unittest.
+std::set<string> GetExpectedFiles(const std::vector<string>& args,
+                                  const std::vector<string>& env,
+                                  const string& cwd) {
+  std::set<string> expected_files;
+#ifndef _WIN32
+  // TODO: ReadCommandOutputByPopen couldn't read large outputs
+  // and causes exit=512, so use tmpfile.
+  devtools_goma::ScopedTmpFile tmpfile("include_processor_verify");
+  tmpfile.Close();
+  std::vector<string> run_args;
+  for (size_t i = 0; i < args.size(); ++i) {
+    const string& arg = args[i];
+    if (strncmp(arg.c_str(), "-M", 2) == 0) {
+      if (arg == "-MF" || arg == "-MT" || arg == "-MQ") {
+        ++i;
+      }
+      continue;
+    }
+    if (arg == "-o") {
+      ++i;
+      continue;
+    }
+    if (strncmp(arg.c_str(), "-o", 2) == 0) {
+      continue;
+    }
+    run_args.push_back(arg);
+  }
+  run_args.push_back("-M");
+  run_args.push_back("-MF");
+  run_args.push_back(tmpfile.filename());
+
+  std::vector<string> run_env(env);
+  run_env.push_back("LC_ALL=C");
+
+  // The output format of -M will be
+  //
+  // stdio: /usr/include/stdio.h /usr/include/features.h \\\n
+  //   /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \\\n
+  //   ...
+  int status;
+  devtools_goma::ReadCommandOutputByPopen(
+      run_args[0], run_args, run_env,
+      cwd, devtools_goma::MERGE_STDOUT_STDERR, &status);
+  if (status != 0) {
+    LOG(INFO) << "args:" << run_args;
+    LOG(INFO) << "env:" << run_env;
+    LOG(FATAL) << "status:" << status;
+  }
+  string output;
+  CHECK(devtools_goma::ReadFileToString(tmpfile.filename(), &output));
+  std::vector<string> files;
+  SplitStringUsing(output, " \n\r\\", &files);
+  devtools_goma::PathResolver pr;
+  // Skip the first element as it's the make target.
+  for (size_t i = 1; i < files.size(); i++) {
+    const string& file = files[i];
+    if (!file.empty()) {
+      // Need normalization as GCC may output a same file in different way.
+      // TODO: don't use ResolvePath.
+      expected_files.insert(pr.ResolvePath(
+          file::JoinPathRespectAbsolute(cwd, file)));
+    }
+  }
+#endif
+  return expected_files;
+}
+
+std::set<string> NormalizePaths(
+    const string& cwd, const std::set<string>& paths) {
+  std::set<string> normalized;
+  for (const auto& iter : paths) {
+    normalized.insert(devtools_goma::PathResolver::ResolvePath(
+        file::JoinPathRespectAbsolute(cwd, iter)));
+  }
+  return normalized;
+}
+
+int CompareFiles(const std::set<string>& expected_files,
+                 const std::set<string>& actual_files) {
+  std::vector<string> matched;
+  std::vector<string> extra;
+  std::vector<string> missing;
+  std::set_intersection(expected_files.begin(), expected_files.end(),
+                        actual_files.begin(), actual_files.end(),
+                        back_inserter(matched));
+
+  std::set_difference(actual_files.begin(), actual_files.end(),
+                      expected_files.begin(), expected_files.end(),
+                      back_inserter(extra));
+
+  std::set_difference(expected_files.begin(), expected_files.end(),
+                      actual_files.begin(), actual_files.end(),
+                      back_inserter(missing));
+
+  for (const auto& extra_iter : extra) {
+    LOG(INFO) << "Extra include:" << extra_iter;
+  }
+  for (const auto& missing_iter : missing) {
+    LOG(ERROR) << "Missing include:" << missing_iter;
+  }
+
+  LOG(INFO) << "matched:" << matched.size()
+            << " extra:" << extra.size()
+            << " missing:" << missing.size();
+
+  return missing.size();
+}
+
+void GetAdditionalEnv(
+    const char** envp, const char* name, std::vector<string>* envs) {
+  int namelen = strlen(name);
+  for (const char** e = envp; *e; e++) {
+    if (
+#ifdef _WIN32
+            _strnicmp(*e, name, namelen) == 0
+#else
+            strncmp(*e, name, namelen) == 0
+#endif
+            && (*e)[namelen] == '=') {
+      envs->push_back(*e);
+      return;
+    }
+  }
+}
+
+int main(int argc, char *argv[], const char** envp) {
+  devtools_goma::Init(argc, argv, envp);
+  devtools_goma::InitLogging(argv[0]);
+
+  bool verify_mode = false;
+  if (argc >= 2 && !strcmp(argv[1], "--verify")) {
+    verify_mode = true;
+    argc--;
+    argv++;
+#ifdef _WIN32
+    std::cerr << "--verify is not yet supported on win32" << std::endl;
+    exit(1);
+#endif
+  }
+
+  int loop_count = 1;
+  if (argc >= 2 && strings::StartsWith(argv[1], "--count=")) {
+    loop_count = atoi(argv[1] + 8);
+    argc--;
+    argv++;
+
+    std::cerr << "Run IncludeProcessor::GetIncludeFiles "
+              << loop_count << " times." << std::endl;
+  }
+
+#ifndef _WIN32
+  if (argc == 1) {
+    std::cerr << argv[0] << " [full path of local compiler [args]]"
+              << std::endl;
+    std::cerr << "e.g.: " << argv[0] << " /usr/bin/gcc -c tmp.c" << std::endl;
+    exit(1);
+  }
+  if (argv[1][0] != '/') {
+    std::cerr << "argv[1] is not absolute path for local compiler."
+              << std::endl;
+    exit(1);
+  }
+
+  devtools_goma::InstallReadCommandOutputFunc(
+      devtools_goma::ReadCommandOutputByPopen);
+#else
+  if (argc == 1) {
+    std::cerr << argv[0] << " [full path of local compiler [args]]"
+              << std::endl;
+    std::cerr << "e.g.: " << argv[0] << " C:\\vs\\vc\\bin\\cl.exe /c c1.c"
+              << std::endl;
+    std::cerr << "Compiler path must be absolute path." << std::endl;
+    exit(1);
+  }
+
+  devtools_goma::InstallReadCommandOutputFunc(
+      devtools_goma::ReadCommandOutputByRedirector);
+#endif
+
+  devtools_goma::IncludeFileFinder::Init(false);
+
+  const string cwd = devtools_goma::GetCurrentDirNameOrDie();
+  std::vector<string> args;
+  for (int i = 1; i < argc; i++)
+    args.push_back(argv[i]);
+
+  std::unique_ptr<devtools_goma::CompilerFlags> flags(
+      devtools_goma::CompilerFlags::MustNew(args, cwd));
+  std::vector<string> compiler_info_envs;
+  flags->GetClientImportantEnvs(envp, &compiler_info_envs);
+
+  // These env variables are needed to run cl.exe
+  GetAdditionalEnv(envp, "PATH", &compiler_info_envs);
+  GetAdditionalEnv(envp, "TMP", &compiler_info_envs);
+  GetAdditionalEnv(envp, "TEMP", &compiler_info_envs);
+
+  devtools_goma::CompilerInfoBuilder cib;
+  std::unique_ptr<devtools_goma::CompilerInfoData> cid(
+      cib.FillFromCompilerOutputs(
+          *flags, args[0], compiler_info_envs));
+  devtools_goma::CompilerInfo compiler_info(std::move(cid));
+  if (compiler_info.HasError()) {
+    std::cerr << compiler_info.error_message() << std::endl;
+    exit(1);
+  }
+
+  std::set<string> include_files;
+
+#if HAVE_CPU_PROFILER
+  ProfilerStart(file::JoinPathRespectAbsolute(
+      FLAGS_TMP_DIR, FLAGS_INCLUDE_PROCESSOR_CPU_PROFILE_FILE).c_str());
+#endif
+
+  for (int i = 0; i < loop_count; ++i) {
+    devtools_goma::IncludeProcessor include_processor;
+    devtools_goma::FileIdCache file_id_cache;
+    include_files.clear();
+
+    clock_t start_time = clock();
+    for (const auto& iter : flags->input_filenames()) {
+      bool ok = include_processor.GetIncludeFiles(
+          iter,
+          cwd,
+          *flags,
+          compiler_info,
+          &include_files,
+          &file_id_cache);
+      if (!ok) {
+        std::cerr << "GetIncludeFiles failed" << std::endl;
+        exit(1);
+      }
+    }
+    clock_t end_time = clock();
+
+    // Show the result only for the first time.
+    if (i == 0) {
+      for (const auto& iter : include_files) {
+        std::cout << iter << std::endl;
+      }
+      std::cerr << "listed/skipped/total files: "
+                << include_files.size() << " / "
+                << include_processor.cpp_parser()->skipped_files() << " / "
+                << include_processor.cpp_parser()->total_files() << std::endl;
+    }
+
+    if (loop_count != 1) {
+      std::cerr << "Run " << i << ": ";
+    }
+    std::cerr << (end_time - start_time) * 1000.0 / CLOCKS_PER_SEC << "msec"
+              << std::endl;
+  }
+
+#if HAVE_CPU_PROFILER
+  ProfilerStop();
+#endif
+
+  if (verify_mode) {
+    for (const auto& iter : flags->input_filenames()) {
+      include_files.insert(file::JoinPathRespectAbsolute(cwd, iter));
+    }
+    std::set<string> actual = NormalizePaths(cwd, include_files);
+    std::set<string> expected = GetExpectedFiles(args, compiler_info_envs, cwd);
+    std::cout << "expected" << std::endl;
+    for (const auto& iter : expected) {
+      std::cout << iter << std::endl;
+    }
+    std::cout << "compare" << std::endl;
+    int missings = CompareFiles(expected, actual);
+    if (missings > 0) {
+      LOG(ERROR) << "missing files:" << missings;
+      exit(1);
+    }
+  }
+}
+#endif
diff --git a/client/include_processor.h b/client/include_processor.h
new file mode 100644
index 0000000..6685cd9
--- /dev/null
+++ b/client/include_processor.h
@@ -0,0 +1,58 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_INCLUDE_PROCESSOR_H_
+#define DEVTOOLS_GOMA_CLIENT_INCLUDE_PROCESSOR_H_
+
+#include <map>
+#include <set>
+#include <string>
+
+#include "basictypes.h"
+#include "cpp_parser.h"
+#include "file_id_cache.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class CompilerFlags;
+class CompilerInfo;
+class Content;
+
+class IncludeProcessor {
+ public:
+  IncludeProcessor() {}
+  ~IncludeProcessor() {}
+
+  // Enumerates all include files. When FileIds are created for them,
+  // we cache them in |file_id_cache| so that we can reuse them later,
+  // because creating FileId is so slow especially on Windows.
+  bool GetIncludeFiles(const string& filename,
+                       const string& current_directory,
+                       const CompilerFlags& compiler_flags,
+                       const CompilerInfo& compiler_info,
+                       std::set<string>* include_files,
+                       FileIdCache* file_id_cache);
+
+  const CppParser* cpp_parser() const { return &cpp_parser_; }
+
+  int total_files() const;
+  int skipped_files() const;
+
+ private:
+  CppParser cpp_parser_;
+
+  // [macro, cwd] -> is_include_next
+  std::map<std::pair<string, string>, bool> delayed_macro_includes_;
+
+  friend class IncludeProcessorTest;
+
+  DISALLOW_COPY_AND_ASSIGN(IncludeProcessor);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_INCLUDE_PROCESSOR_H_
diff --git a/client/include_processor_unittest.cc b/client/include_processor_unittest.cc
new file mode 100644
index 0000000..5218c3b
--- /dev/null
+++ b/client/include_processor_unittest.cc
@@ -0,0 +1,3171 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// You can specify the clang binary for this test by
+//
+// GOMATEST_CLANG_PATH=/somewhere/bin/clang ./include_processor_unittest
+
+#include <limits.h>
+#include <stddef.h>
+#include <string.h>
+#ifndef _WIN32
+#include <unistd.h>
+#else
+#include <windows.h>
+#include <winbase.h>
+#include <winreg.h>
+#endif
+
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+#include <gtest/gtest.h>
+
+#include "compiler_flags.h"
+#include "compiler_info.h"
+#include "compiler_info_cache.h"
+#include "file.h"
+#include "file_helper.h"
+#include "file_id_cache.h"
+#include "include_file_finder.h"
+#include "include_processor.h"
+#include "ioutil.h"
+#include "join.h"
+#include "mypath.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "split.h"
+#include "string_piece.h"
+#include "string_piece_utils.h"
+#include "subprocess.h"
+#include "unittest_util.h"
+#include "util.h"
+
+using std::string;
+
+namespace {
+
+#ifdef _WIN32
+// Wrapper batch file for cl.exe
+// "where cl", followed by full paths of cl.exe per line.
+// "env", followed by environments for cl.exe.
+// "run cl", followed by output of cl.exe command.
+static const char* kClWrapperBat =
+                  "@echo off\r\n"
+                  "echo where cl\r\n"
+                  "where cl\r\n"
+                  "echo env\r\n"
+                  "set\r\n"
+                  "echo run cl\r\n"
+                  "cl %1 %2 %3 %4 %5 %6 %7 %8 %9\r\n";
+
+#endif
+}  // namespace
+
+namespace devtools_goma {
+
+class IncludeProcessorTest : public testing::Test {
+ public:
+  IncludeProcessorTest() {
+#ifndef _WIN32
+    char* clang = getenv("GOMATEST_CLANG_PATH");
+    clang_path_ = clang ? clang : "/usr/bin/clang";
+    if (access(clang_path_.c_str(), X_OK) != 0) {
+      PCHECK(!clang) << "The clang you specified doesn't work: " << clang;
+      LOG(ERROR) << "We'll skip clang tests.";
+      clang_path_ = "";
+    }
+#else
+    // This is out\Release\include_processor_unittest.exe or so.
+    const string my_dir = GetMyDirectory();
+
+    top_dir_ = file::JoinPath(my_dir, "..", "..");
+
+    // Read environment.x86 and parse it to env_.
+    // environment.x86 contains \0 separated strings.
+    const string envfile_path = file::JoinPath(my_dir, "environment.x86");
+    string content;
+    CHECK(ReadFileToString(envfile_path, &content))
+        << "failed to read environment.x86: " << envfile_path;
+    env_ = strings::Split(content, '\0');
+    // Remove empty string.
+    env_.erase(std::remove_if(env_.begin(),
+                              env_.end(),
+                              [](const string& s) { return s.empty(); }),
+               env_.end());
+#endif
+  }
+
+  void SetUp() override {
+    tmpdir_util_.reset(new TmpdirUtil("include_processor_unittest"));
+    tmpdir_util_->SetCwd("");
+#ifndef _WIN32
+    InstallReadCommandOutputFunc(ReadCommandOutputByPopen);
+#else
+    cl_wrapper_path_ = CreateTmpFile(kClWrapperBat, "clwrapper.bat");
+
+    InstallReadCommandOutputFunc(ReadCommandOutputByRedirector);
+#endif
+    IncludeFileFinder::Init(true);
+  }
+
+  std::unique_ptr<CompilerInfoData> CreateCompilerInfoWithArgs(
+      const CompilerFlags& flags,
+      const string& bare_gcc,
+      const std::vector<string>& compiler_envs) {
+    CompilerInfoBuilder cib;
+    return cib.FillFromCompilerOutputs(flags, bare_gcc, compiler_envs);
+  }
+
+  ScopedCompilerInfoState GetCompilerInfoFromCacheOrCreate(
+      const CompilerFlags& flags,
+      const string& bare_gcc,
+      const std::vector<string>& compiler_envs) {
+    auto key = CompilerInfoCache::CreateKey(flags, bare_gcc, compiler_envs);
+    ScopedCompilerInfoState cis(CompilerInfoCache::instance()->Lookup(key));
+    if (cis.get() != nullptr) {
+      return cis;
+    }
+
+    return ScopedCompilerInfoState(CompilerInfoCache::instance()->Store(
+        key, CreateCompilerInfoWithArgs(flags, bare_gcc, compiler_envs)));
+  };
+
+  std::set<string> RunIncludeProcessor(const string& source_file,
+                                       const std::vector<string>& args) {
+    std::unique_ptr<CompilerFlags> flags(
+        CompilerFlags::MustNew(args, tmpdir_util_->tmpdir()));
+    std::unique_ptr<CompilerInfoData> data(new CompilerInfoData);
+    data->set_found(true);
+    CompilerInfo compiler_info(std::move(data));
+
+    IncludeProcessor processor;
+    std::set<string> files;
+    FileIdCache file_id_cache;
+    // ASSERT_TRUE cannot be used here, I don't know why.
+    EXPECT_TRUE(processor.GetIncludeFiles(source_file,
+                                          tmpdir_util_->tmpdir(),
+                                          *flags,
+                                          compiler_info,
+                                          &files,
+                                          &file_id_cache));
+    return files;
+  }
+
+  void RunIncludeProcessorToEmptySource(const string& compiler,
+                                        std::set<string>* files) {
+    const string& source_file = CreateTmpFile("", "for_stdcpredef.cc");
+
+    std::vector<string> args;
+    args.push_back(compiler);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::unique_ptr<CompilerFlags> flags(
+        CompilerFlags::MustNew(args, tmpdir_util_->tmpdir()));
+    ScopedCompilerInfoState cis(
+        GetCompilerInfoFromCacheOrCreate(*flags, compiler, env_));
+
+    IncludeProcessor processor;
+    FileIdCache file_id_cache;
+    ASSERT_TRUE(processor.GetIncludeFiles(source_file,
+                                          tmpdir_util_->tmpdir(),
+                                          *flags,
+                                          cis.get()->info(),
+                                          files,
+                                          &file_id_cache));
+  }
+
+  void RemoveAndCheckEmptySourceIncludeHeaders(
+      const string& compiler, std::set<string>* files) {
+    std::set<string> emptysource_files;
+    RunIncludeProcessorToEmptySource(compiler, &emptysource_files);
+    for (const auto& it : emptysource_files) {
+      EXPECT_GT(files->count(it), 0U);
+      files->erase(it);
+    }
+  }
+
+  // Runs test by comparing include_processor with cpp's output.
+#ifndef _WIN32
+  struct GccLikeCompiler {
+    GccLikeCompiler(const string& path, std::vector<string> additional_args) :
+      path(path), additional_args(additional_args) {}
+
+    string path;
+    std::vector<string> additional_args;
+  };
+
+  std::vector<GccLikeCompiler> GccLikeCompilers() const {
+    std::vector<GccLikeCompiler> compilers;
+
+    std::vector<string> c_args;
+    std::vector<string> cpp_args;
+    cpp_args.push_back("-x");
+    cpp_args.push_back("c++");
+
+    compilers.push_back(GccLikeCompiler("/usr/bin/gcc", c_args));
+    compilers.push_back(GccLikeCompiler("/usr/bin/gcc", cpp_args));
+#ifndef __MACH__
+    // TODO: fix this.
+    // On Mac, non system clang seems not know where system libraries exists.
+    if (!clang_path_.empty()) {
+      compilers.push_back(GccLikeCompiler(clang_path_, c_args));
+      compilers.push_back(GccLikeCompiler(clang_path_, cpp_args));
+    }
+#endif
+
+    return compilers;
+  }
+
+  std::set<string> GetExpectedFiles(std::vector<string> args) {
+    args.push_back("-M");
+
+    std::vector<string> env(env_);
+    env.push_back("LC_ALL=C");
+
+    // The output format of -M will be
+    //
+    // stdio: /usr/include/stdio.h /usr/include/features.h \\\n
+    //   /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \\\n
+    //   ...
+    std::vector<string> files;
+    int exit_status;
+    SplitStringUsing(ReadCommandOutputByPopen(args[0], args, env,
+                                              tmpdir_util_->tmpdir(),
+                                              STDOUT_ONLY, &exit_status),
+                     " \n\r\\", &files);
+    LOG_IF(INFO, exit_status != 0)
+        << "non-zero exit status."
+        << " args=" << args
+        << " exit_status=" << exit_status;
+    std::set<string> expected_files;
+    PathResolver pr;
+    // Skip the first element as it's the make target.
+    for (size_t i = 1; i < files.size(); i++) {
+      if (files[i].empty())
+        continue;
+
+      // For the include files in the current directory, gcc or clang returns
+      // it with relative path. we need to normalize it to absolute path.
+      string file = file::JoinPathRespectAbsolute(tmpdir_util_->tmpdir(),
+                                                  files[i]);
+      // Need normalization as GCC may output a same file in different way.
+      // TODO: don't use ResolvePath.
+      expected_files.insert(pr.ResolvePath(file));
+    }
+
+    return expected_files;
+  }
+
+  void RunTest(const string& bare_gcc,
+               const string& include_file,
+               const std::vector<string>& additional_args) {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    copy(additional_args.begin(), additional_args.end(), back_inserter(args));
+    args.push_back(include_file);
+
+    std::set<string> expected_files(GetExpectedFiles(args));
+    ASSERT_FALSE(expected_files.empty());
+    std::unique_ptr<CompilerFlags> flags(
+        CompilerFlags::MustNew(args, tmpdir_util_->tmpdir()));
+
+    ScopedCompilerInfoState cis(
+        GetCompilerInfoFromCacheOrCreate(*flags, bare_gcc, env_));
+    VLOG(1) << cis.get()->info().DebugString();
+
+    IncludeProcessor processor;
+    std::set<string> files;
+    FileIdCache file_id_cache;
+    ASSERT_TRUE(processor.GetIncludeFiles(include_file,
+                                          tmpdir_util_->tmpdir(),
+                                          *flags,
+                                          cis.get()->info(),
+                                          &files,
+                                          &file_id_cache));
+    // TODO: don't use ResolvePath.
+    //  for now, it fails without ResolvePath
+    //    recursive: /dir/../dir/foo.c not found, /dir/./foo.c not found
+    //    include_twice_with_macro: dir/./tmp.h not found
+    PathResolver pr;
+    std::set<string> actual_files;
+    for (const auto& iter : files) {
+      actual_files.insert(pr.ResolvePath(
+          file::JoinPathRespectAbsolute(tmpdir_util_->tmpdir(), iter)));
+    }
+    actual_files.insert(pr.ResolvePath(include_file));
+
+    VLOG(1) << "expected_files: " << expected_files
+            << " actual_files: " << actual_files;
+
+    CompareFiles(expected_files, actual_files);
+  }
+#else
+  void RunClTest(const string& include_file,
+                 const std::vector<string>& additional_args) {
+    std::vector<string> args;
+    args.push_back(cl_wrapper_path_);
+    args.push_back("/nologo");
+    args.push_back("/showIncludes");
+    args.push_back("/c");
+    args.push_back(include_file);
+    copy(additional_args.begin(), additional_args.end(), back_inserter(args));
+    LOG(INFO) << args;
+    static const char kNoteIncluding[] = "Note: including file: ";
+
+    std::vector<string> lines;
+    VLOG(1) << cl_wrapper_path_;
+    VLOG(1) << "args:" << args;
+    VLOG(1) << "env:" << env_;
+    int32_t status;
+    SplitStringUsing(ReadCommandOutputByRedirector(
+                         cl_wrapper_path_, args, env_, tmpdir_util_->tmpdir(),
+                         MERGE_STDOUT_STDERR, &status),
+                     "\n\r", &lines);
+
+    if (status != 0) {
+      LOG(INFO) << "status: " << status;
+      for (size_t i = 0; i < lines.size(); ++i) {
+        LOG(INFO) << "line " << i << ":" << lines[i];
+      }
+      FAIL();
+    }
+    VLOG(1) << "ReadCommand finished " << lines.size() << " lines.";
+    VLOG(2) << lines;
+
+    std::set<string> expected_files;
+    PathResolver pr;
+
+    size_t lineno = 0;
+    for (; lineno < lines.size(); ++lineno) {
+      if (strings::StartsWith(lines[lineno], "where cl")) {
+        ++lineno;
+        break;
+      }
+    }
+
+    string bare_cl = lines[lineno];
+    LOG(INFO) << "bare_cl=" << bare_cl;
+    ++lineno;
+    for (; lineno < lines.size(); ++lineno) {
+      if (strings::StartsWith(lines[lineno], "env")) {
+        ++lineno;
+        break;
+      }
+    }
+
+    std::vector<string> compiler_env;
+    for (; lineno < lines.size(); ++lineno) {
+      if (strings::StartsWith(lines[lineno], "run cl")) {
+        ++lineno;
+        break;
+      }
+      compiler_env.push_back(lines[lineno]);
+    }
+    VLOG(1) << "compiler_env=" << compiler_env;
+
+    // The output format of /showIncludes will be
+    //
+    // Note: including file: c:\Program Files (x86)
+    //       \Microsoft Visual Studio 9.0\VC\INCLUDE\stdio.h\r\n
+    // ...
+    //
+    // Note: some filenames output by /showIncludes are normalized to lower
+    // case charactors. Since it will not cause failure of compile request,
+    // let me compare expected result and actual result after converting both
+    // to lower case characters.
+    for (; lineno < lines.size(); ++lineno) {
+      const string& line = lines[lineno];
+      if (strings::StartsWith(line, kNoteIncluding)) {
+        string path = line.substr(sizeof(kNoteIncluding) - 1);
+        size_t pos = path.find_first_not_of(' ');
+        if (pos != string::npos) {
+          path = path.substr(pos);
+          std::transform(path.begin(), path.end(), path.begin(), ::tolower);
+          expected_files.insert(pr.ResolvePath(path));
+        }
+      }
+    }
+    LOG(INFO) << "# of expected_files=" << expected_files.size();
+    VLOG(1) << "expected_files=" << expected_files;
+    ASSERT_FALSE(expected_files.empty());
+    args[0] = bare_cl;
+    std::unique_ptr<CompilerFlags> flags(
+        CompilerFlags::MustNew(args, tmpdir_util_->tmpdir()));
+
+    ScopedCompilerInfoState cis(
+        GetCompilerInfoFromCacheOrCreate(*flags, bare_cl, env_));
+
+    IncludeProcessor processor;
+    std::set<string> files;
+    FileIdCache file_id_cache;
+    ASSERT_TRUE(processor.GetIncludeFiles(include_file,
+                                          tmpdir_util_->tmpdir(),
+                                          *flags,
+                                          cis.get()->info(),
+                                          &files,
+                                          &file_id_cache));
+    // TODO: don't use ResolvePath.
+    std::set<string> actual_files;
+    for (string path : files) {
+      std::transform(path.begin(), path.end(), path.begin(), ::tolower);
+      actual_files.insert(pr.ResolvePath(path));
+    }
+
+    LOG(INFO) << "# of actual_files=" << actual_files.size();
+    VLOG(1) << "expected_files: " << expected_files
+            << " actual_files: " << actual_files;
+    CompareFiles(expected_files, actual_files);
+  }
+#endif
+
+  static void CompareFiles(const std::set<string>& expected_files,
+                           const std::set<string>& actual_files) {
+    std::vector<string> matched_files;
+    std::vector<string> missing_files;
+    std::vector<string> extra_files;
+
+    set_intersection(expected_files.begin(), expected_files.end(),
+                     actual_files.begin(), actual_files.end(),
+                     back_inserter(matched_files));
+    set_difference(expected_files.begin(), expected_files.end(),
+                   matched_files.begin(), matched_files.end(),
+                   back_inserter(missing_files));
+    set_difference(actual_files.begin(), actual_files.end(),
+                   matched_files.begin(), matched_files.end(),
+                   back_inserter(extra_files));
+
+    LOG(INFO) << "matched:" << matched_files.size()
+              << " extra:" << extra_files.size()
+              << " missing:" << missing_files.size();
+    LOG_IF(INFO, !extra_files.empty()) << "extra files: "
+                                       << strings::Join(extra_files, ", ");
+    LOG_IF(INFO, !missing_files.empty()) << "missing files: "
+                                         << strings::Join(missing_files, ", ");
+
+    EXPECT_EQ(0U, missing_files.size()) << missing_files;
+#ifdef __MACH__
+    // See: b/26573474
+    LOG_IF(WARNING, 0U != extra_files.size()) << extra_files;
+#else
+    EXPECT_EQ(0U, extra_files.size()) << extra_files;
+#endif
+  }
+
+  string CreateTmpFile(const string& content, const string& name) {
+    tmpdir_util_->CreateTmpFile(name, content);
+    return tmpdir_util_->FullPath(name);
+  }
+
+#ifndef _WIN32
+  string CreateTmpDir(const string& dirname) {
+    tmpdir_util_->MkdirForPath(dirname, true);
+    return tmpdir_util_->FullPath(dirname);
+  }
+
+  string CreateTmpHmapWithOneEntry(const string& key,
+                                   const string& prefix,
+                                   const string& suffix,
+                                   const string& name) {
+    struct HeaderMapWithOneEntry {
+      char magic[4];
+      uint16_t version;
+      uint16_t reserved;
+      uint32_t string_offset;
+      uint32_t string_count;
+      uint32_t hash_capacity;
+      uint32_t max_value_length;
+
+      uint32_t key;
+      uint32_t prefix;
+      uint32_t suffix;
+
+      char strings[1];
+    };
+
+    size_t hmap_len = sizeof(HeaderMapWithOneEntry);
+    hmap_len += key.size() + 1;
+    hmap_len += prefix.size() + 1;
+    hmap_len += suffix.size() + 1;
+
+    std::unique_ptr<char[], decltype(&free)> hmap_entity(
+        reinterpret_cast<char*>(calloc(1, hmap_len)), free);
+    HeaderMapWithOneEntry* hmap =
+        reinterpret_cast<HeaderMapWithOneEntry*>(hmap_entity.get());
+    hmap->magic[0] = 'p';
+    hmap->magic[1] = 'a';
+    hmap->magic[2] = 'm';
+    hmap->magic[3] = 'h';
+    hmap->version = 1;
+    hmap->string_offset = offsetof(HeaderMapWithOneEntry, strings);
+    hmap->hash_capacity = 1;
+    hmap->key = 1;
+    hmap->prefix = hmap->key + key.size() + 1;
+    hmap->suffix = hmap->prefix + prefix.size() + 1;
+    strcpy(hmap->strings + hmap->key, key.c_str());
+    strcpy(hmap->strings + hmap->prefix, prefix.c_str());
+    strcpy(hmap->strings + hmap->suffix, suffix.c_str());
+
+    return CreateTmpFile(string(hmap_entity.get(), hmap_len), name);
+  }
+#endif
+
+ protected:
+  static void SetUpTestCase() {
+    // Does not load cache from file.
+    CompilerInfoCache::Init("", "", 3600);
+  };
+
+  static void TearDownTestCase() {
+    CompilerInfoCache::Quit();
+  };
+
+  std::unique_ptr<TmpdirUtil> tmpdir_util_;
+  std::vector<string> env_;
+#ifndef _WIN32
+  string clang_path_;
+#else
+  string cl_wrapper_path_;
+  string top_dir_;
+#endif
+};
+
+#ifndef _WIN32
+TEST_F(IncludeProcessorTest, stdio) {
+  std::vector<string> args;
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#include <stdio.h>", "foo.c"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, iostream) {
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include <iostream>", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, iostream_with_gcc) {
+  std::vector<string> args;
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#include <iostream>", "foo.cpp"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, macro) {
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#define ios <iostream>\n#include ios\n", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, commandline_macro) {
+  std::vector<string> args;
+  args.push_back("-Dios=<iostream>");
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include ios\n", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, commandline_macro_undef) {
+  std::vector<string> args;
+  // Undefnie predefined macro.
+  args.push_back("-U__ELF__");
+  args.push_back("-D__ELF__=<stdio.h>");
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include __ELF__\n", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, unclosed_macro) {
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#define wrong_macro \"foo", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, opt_include_in_system_path) {
+  std::vector<string> args;
+  args.push_back("-include");
+  args.push_back("stdio.h");
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, stdcpredef) {
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& source_file = CreateTmpFile("", "foo.cc");
+  CreateTmpFile("", "stdc-predef.h");
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  args.push_back("-I.");
+  args.push_back("-c");
+  args.push_back(source_file);
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(
+      args, tmpdir_util_->tmpdir()));
+  std::unique_ptr<CompilerInfoData> data(
+      CreateCompilerInfoWithArgs(*flags, bare_gcc, env_));
+
+  data->set_name("g++");
+  data->set_version("g++ (Ubuntu 4.8.2-19ubuntu1) 4.8.2");
+  data->set_predefined_macros(
+      "#define __GNUC__ 4\n"
+      "#define __GNUC_MINOR__ 8\n");
+
+  CompilerInfo compiler_info(std::move(data));
+
+  IncludeProcessor processor;
+  std::set<string> files;
+  FileIdCache file_id_cache;
+  ASSERT_TRUE(processor.GetIncludeFiles(source_file,
+                                        tmpdir_util_->tmpdir(),
+                                        *flags,
+                                        compiler_info,
+                                        &files,
+                                        &file_id_cache));
+
+  // stdc-predef.h should be included.
+  EXPECT_EQ(1U, files.size());
+}
+
+TEST_F(IncludeProcessorTest, ffreestanding) {
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& source_file = CreateTmpFile("", "foo.cc");
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  args.push_back("-ffreestanding");
+  args.push_back("-c");
+  args.push_back(source_file);
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(
+      args, tmpdir_util_->tmpdir()));
+  CompilerInfo compiler_info(
+      CreateCompilerInfoWithArgs(*flags, bare_gcc, env_));
+
+  IncludeProcessor processor;
+  std::set<string> files;
+  FileIdCache file_id_cache;
+  ASSERT_TRUE(processor.GetIncludeFiles(source_file,
+                                        tmpdir_util_->tmpdir(),
+                                        *flags,
+                                        compiler_info,
+                                        &files,
+                                        &file_id_cache));
+
+  // stdc-predef.h should not be included.
+  EXPECT_EQ(0U, files.size());
+}
+
+#ifndef __MACH__
+// Mac's /usr/bin/gcc is actually clang, and it does not know '-fno-hosted'.
+// So, skip this test on Mac.
+TEST_F(IncludeProcessorTest, fnohosted) {
+  // -fno-hosted is not effective for C++.
+  // So, test with gcc (not g++).
+  //
+  // $ g++ -fno-hosted -c ./test.cc
+  // cc1plus: warning: command line option '-fno-hosted' is valid for
+  // C/ObjC but not for C++ [enabled by default]
+
+  const string& bare_gcc = "/usr/bin/gcc";
+  const string& source_file = CreateTmpFile("", "foo.c");
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  args.push_back("-fno-hosted");
+  args.push_back("-c");
+  args.push_back(source_file);
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(
+      args, tmpdir_util_->tmpdir()));
+  CompilerInfo compiler_info(
+      CreateCompilerInfoWithArgs(*flags, bare_gcc, env_));
+  ASSERT_FALSE(compiler_info.HasError());
+
+  IncludeProcessor processor;
+  std::set<string> files;
+  FileIdCache file_id_cache;
+  ASSERT_TRUE(processor.GetIncludeFiles(source_file,
+                                        tmpdir_util_->tmpdir(),
+                                        *flags,
+                                        compiler_info,
+                                        &files,
+                                        &file_id_cache));
+
+  // stdc-predef.h should not be included.
+  EXPECT_EQ(0U, files.size());
+}
+#endif  // !__MACH__
+
+// TODO: Move some tests out from ifndef _WIN32 to share test cases.
+
+TEST_F(IncludeProcessorTest, recursive) {
+  StringPiece tmp_dir_basename = file::Basename(tmpdir_util_->tmpdir());
+  CHECK(!tmp_dir_basename.empty());
+
+  // If we don't normalize .. and ., this will take exponential time.
+  std::ostringstream source;
+  source << "#ifndef FOO_C_\n"
+         << "#define FOO_C_\n"
+         << "#include \"../" << tmp_dir_basename << "/foo.c\"\n"
+         << "#include \"./foo.c\"\n"
+         << "#endif\n";
+
+  std::vector<string> args;
+  RunTest("/usr/bin/gcc", CreateTmpFile(source.str(), "foo.c"), args);
+}
+
+TEST_F(IncludeProcessorTest, opt_include_gch) {
+  const string& bare_gcc = "/usr/bin/g++";
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  const string& orig_header = CreateTmpFile(
+      "#include <stdio.h> // This file must not be parsed", "foo.h");
+  const string& gch_header = CreateTmpFile(
+      "#include <stdio.h> // This file must not be parsed", "foo.h.gch.goma");
+
+  const string& source_file = CreateTmpFile("", "foo.cc");
+  args.push_back("-c");
+  args.push_back(source_file);
+  args.push_back("-include");
+  args.push_back(orig_header);
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(
+      args, tmpdir_util_->tmpdir()));
+  CompilerInfo compiler_info(
+      CreateCompilerInfoWithArgs(*flags, bare_gcc, env_));
+
+  IncludeProcessor processor;
+  std::set<string> files;
+  FileIdCache file_id_cache;
+  ASSERT_TRUE(processor.GetIncludeFiles(source_file,
+                                        tmpdir_util_->tmpdir(),
+                                        *flags,
+                                        compiler_info,
+                                        &files,
+                                        &file_id_cache));
+
+  RemoveAndCheckEmptySourceIncludeHeaders(bare_gcc, &files);
+  ASSERT_EQ(1, static_cast<int>(files.size()));
+  EXPECT_EQ(gch_header, *files.begin());
+}
+
+TEST_F(IncludeProcessorTest, gch) {
+  const string& bare_gcc = "/usr/bin/g++";
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  // We have foo.h, foo.h.gch.goma, a/foo.h, and a/foo.h.gch.goma in this test.
+  CreateTmpDir("a");
+  const string& content = "#include <stdio.h> // This file must not be parsed";
+  // The order of creating of these files are important to ensure the
+  // converage as readdir tends to return new files later.
+  // We want to check both of the following cases:
+  //
+  // 1. Normal header is found first, then pre-compiled one is found.
+  // 2. Pre-compiled header is found first, then normal one is found.
+  CreateTmpFile(content, "foo.h");
+  CreateTmpFile(content, "foo.h.gch.goma");
+  CreateTmpFile(content, "a/foo.h.gch.goma");
+  CreateTmpFile(content, "a/foo.h");
+
+  // Including "foo.h" should fetch foo.h.gch.goma.
+  CreateTmpFile("#include <stdio.h> // This file must not be parsed",
+                "foo.h.gch.goma");
+
+  string source_file = CreateTmpFile("#include \"foo.h\"", "foo.cc");
+
+  args.push_back("-c");
+  args.push_back(source_file);
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(
+      args, tmpdir_util_->tmpdir()));
+  CompilerInfo compiler_info(
+      CreateCompilerInfoWithArgs(*flags, bare_gcc, env_));
+
+  std::unique_ptr<IncludeProcessor> processor(new IncludeProcessor());
+
+  std::set<string> files;
+  FileIdCache file_id_cache;
+  ASSERT_TRUE(processor->GetIncludeFiles(source_file,
+                                         tmpdir_util_->tmpdir(),
+                                         *flags,
+                                         compiler_info,
+                                         &files,
+                                         &file_id_cache));
+
+  RemoveAndCheckEmptySourceIncludeHeaders(bare_gcc, &files);
+  ASSERT_EQ(1, static_cast<int>(files.size()));
+  EXPECT_EQ(tmpdir_util_->FullPath("foo.h.gch.goma"), *files.begin());
+
+  // Get foo.h.gch.goma by including <foo.h> with -I. option.
+  source_file = CreateTmpFile("#include <foo.h>", "foo.cc");
+
+  args.clear();
+  args.push_back(bare_gcc);
+  args.push_back("-I.");
+  args.push_back("-c");
+  args.push_back(source_file);
+
+  flags = CompilerFlags::MustNew(args, tmpdir_util_->tmpdir());
+  processor.reset(new IncludeProcessor());
+
+  files.clear();
+  file_id_cache.Clear();
+  ASSERT_TRUE(processor->GetIncludeFiles(source_file,
+                                         tmpdir_util_->tmpdir(),
+                                         *flags,
+                                         compiler_info,
+                                         &files,
+                                         &file_id_cache));
+
+  RemoveAndCheckEmptySourceIncludeHeaders(bare_gcc, &files);
+  ASSERT_EQ(1, static_cast<int>(files.size()));
+  EXPECT_EQ("./foo.h.gch.goma", *files.begin());
+
+  // We should get a/foo.h.gch.goma by including <a/foo.h> with -I. option.
+  source_file = CreateTmpFile("#include <a/foo.h>", "foo.cc");
+
+  args.clear();
+  args.push_back(bare_gcc);
+  args.push_back("-I.");
+  args.push_back("-c");
+  args.push_back(source_file);
+  flags = CompilerFlags::MustNew(args, tmpdir_util_->tmpdir());
+  processor.reset(new IncludeProcessor());
+
+  files.clear();
+  file_id_cache.Clear();
+  ASSERT_TRUE(processor->GetIncludeFiles(source_file,
+                                         tmpdir_util_->tmpdir(),
+                                         *flags,
+                                         compiler_info,
+                                         &files,
+                                         &file_id_cache));
+
+  RemoveAndCheckEmptySourceIncludeHeaders(bare_gcc, &files);
+  ASSERT_EQ(1, static_cast<int>(files.size()));
+  EXPECT_EQ("./a/foo.h.gch.goma", *files.begin());
+
+  // We should get a/foo.h.gch.goma by including <foo.h> with -Ia -I. option.
+  source_file = CreateTmpFile("#include <foo.h>", "foo.cc");
+
+  args.clear();
+  args.push_back(bare_gcc);
+  args.push_back("-Ia");
+  args.push_back("-I.");
+  args.push_back("-c");
+  args.push_back(source_file);
+  flags = CompilerFlags::MustNew(args, tmpdir_util_->tmpdir());
+  processor.reset(new IncludeProcessor());
+
+  files.clear();
+  file_id_cache.Clear();
+  ASSERT_TRUE(processor->GetIncludeFiles(source_file,
+                                         tmpdir_util_->tmpdir(),
+                                         *flags,
+                                         compiler_info,
+                                         &files,
+                                         &file_id_cache));
+
+  RemoveAndCheckEmptySourceIncludeHeaders(bare_gcc, &files);
+  ASSERT_EQ(1, static_cast<int>(files.size()));
+  EXPECT_EQ("a/foo.h.gch.goma", *files.begin());
+  // We should get foo.h.gch.goma by including <foo.h> with -I. -Ia option.
+  source_file = CreateTmpFile("#include <foo.h>", "foo.cc");
+
+  args.clear();
+  args.push_back(bare_gcc);
+  args.push_back("-I.");
+  args.push_back("-Ia");
+  args.push_back("-c");
+  args.push_back(source_file);
+  flags = CompilerFlags::MustNew(args, tmpdir_util_->tmpdir());
+  processor.reset(new IncludeProcessor());
+
+  files.clear();
+  file_id_cache.Clear();
+  ASSERT_TRUE(processor->GetIncludeFiles(source_file,
+                                         tmpdir_util_->tmpdir(),
+                                         *flags,
+                                         compiler_info,
+                                         &files,
+                                         &file_id_cache));
+
+  RemoveAndCheckEmptySourceIncludeHeaders(bare_gcc, &files);
+  ASSERT_EQ(1, static_cast<int>(files.size()));
+  EXPECT_EQ("./foo.h.gch.goma", *files.begin());
+
+  // A crazy case: when foo.h.gch.goma is explicitly included, we should
+  // examine its content.
+  source_file = CreateTmpFile("#include <foo.h.gch.goma>", "foo.cc");
+  CreateTmpFile("#include <a/foo.h>", "foo.h.gch.goma");
+
+  args.clear();
+  args.push_back(bare_gcc);
+  args.push_back("-I.");
+  args.push_back("-c");
+  args.push_back(source_file);
+  flags = CompilerFlags::MustNew(args, tmpdir_util_->tmpdir());
+  processor.reset(new IncludeProcessor());
+
+  files.clear();
+  file_id_cache.Clear();
+  ASSERT_TRUE(processor->GetIncludeFiles(source_file,
+                                         tmpdir_util_->tmpdir(),
+                                         *flags,
+                                         compiler_info,
+                                         &files,
+                                         &file_id_cache));
+
+  RemoveAndCheckEmptySourceIncludeHeaders(bare_gcc, &files);
+  ASSERT_EQ(2, static_cast<int>(files.size()));
+  std::set<string>::const_iterator iter = files.begin();
+  EXPECT_EQ("./a/foo.h.gch.goma", *iter);
+  ++iter;
+  EXPECT_EQ("./foo.h.gch.goma", *iter);
+}
+
+TEST_F(IncludeProcessorTest, dir_cache) {
+  std::vector<string> args;
+  args.push_back("-I" + tmpdir_util_->tmpdir());
+
+  CreateTmpFile("", "bar.h");
+  // The cache will be constructed here.
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include <bar.h>\n",
+                        "foo.cc"),
+          args);
+
+  // As another file is added, the cache must be discarded.
+  CreateTmpFile("", "baz.h");
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include <baz.h>\n",
+                        "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, I_system_path) {
+  std::vector<string> args;
+  // Though /usr/include is specified before tmpdir_util_->tmpdir(),
+  // we don't use this because system path has this path.
+  args.emplace_back("-I/usr/include");
+  args.emplace_back("-I//////usr///include///");
+  args.emplace_back("-I" + tmpdir_util_->tmpdir());
+
+  CreateTmpFile("", "stdio.h");
+  // The cache will be constructed here.
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include <stdio.h>\n",
+                        "foo.cc"),
+          args);
+
+  // As another file is added, the cache must be discarded.
+  CreateTmpFile("", "baz.h");
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include <stdio.h>\n",
+                        "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, iquote) {
+  std::vector<string> args {
+    "-iquote", "include",
+  };
+  CreateTmpFile("", "include/foo.h");
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include \"foo.h\"\n",
+                        "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, hmap) {
+  const string& bare_gcc = "/usr/bin/g++";
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  const string& include_foo = CreateTmpFile("#include <foo.h>", "foo.cc");
+  const string& bar_header = CreateTmpFile("", "bar.h");
+  const string& hmap_file = "hmap.hmap";
+  CreateTmpHmapWithOneEntry("foo.h", "", bar_header, hmap_file);
+
+  args.push_back("-Ihmap.hmap");
+  args.push_back(include_foo);
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(
+      args, tmpdir_util_->tmpdir()));
+  CompilerInfo compiler_info(
+      CreateCompilerInfoWithArgs(*flags, bare_gcc, env_));
+
+  std::unique_ptr<IncludeProcessor> processor(new IncludeProcessor());
+  std::set<string> files;
+  FileIdCache file_id_cache;
+  ASSERT_TRUE(processor->GetIncludeFiles(include_foo,
+                                         tmpdir_util_->tmpdir(),
+                                         *flags,
+                                         compiler_info,
+                                         &files,
+                                         &file_id_cache));
+
+  RemoveAndCheckEmptySourceIncludeHeaders(bare_gcc, &files);
+  EXPECT_EQ(2, static_cast<int>(files.size()));
+  EXPECT_EQ(1, static_cast<int>(files.count(hmap_file)));
+  EXPECT_EQ(1, static_cast<int>(files.count(bar_header)));
+
+  const string& baz_header = CreateTmpFile("", "baz.h");
+  // Now we should fetch baz.h for #include <foo.h>.
+  CreateTmpHmapWithOneEntry("foo.h", "", baz_header, hmap_file);
+  flags = CompilerFlags::MustNew(args, tmpdir_util_->tmpdir());
+  processor.reset(new IncludeProcessor());
+  files.clear();
+  file_id_cache.Clear();
+  ASSERT_TRUE(processor->GetIncludeFiles(include_foo,
+                                         tmpdir_util_->tmpdir(),
+                                         *flags,
+                                         compiler_info,
+                                         &files,
+                                         &file_id_cache));
+
+  RemoveAndCheckEmptySourceIncludeHeaders(bare_gcc, &files);
+  EXPECT_EQ(2, static_cast<int>(files.size()));
+  EXPECT_EQ(1, static_cast<int>(files.count(hmap_file)));
+  EXPECT_EQ(1, static_cast<int>(files.count(baz_header)));
+}
+
+TEST_F(IncludeProcessorTest, hmap_with_dir) {
+  const string& bare_gcc = "/usr/bin/g++";
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  const string& include_foo = CreateTmpFile(
+      "#include <dir1/foo.h>\n"
+      "#include <dir1/dir2/bar.h>\n",
+      "foo.cc");
+  const string& foo_header = CreateTmpFile("", "foo.h");
+  CreateTmpFile("", "bar.h");
+  CreateTmpHmapWithOneEntry("dir1/foo.h", "", foo_header, "foo.hmap");
+  CreateTmpHmapWithOneEntry("dir1/dir2/bar.h", "", "bar.h", "bar.hmap");
+
+  args.push_back("-Ifoo.hmap");
+  args.push_back("-Ibar.hmap");
+  args.push_back(include_foo);
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(
+      args, tmpdir_util_->tmpdir()));
+  CompilerInfo compiler_info(
+      CreateCompilerInfoWithArgs(*flags, bare_gcc, env_));
+
+  std::unique_ptr<IncludeProcessor> processor(new IncludeProcessor());
+  std::set<string> files;
+  FileIdCache file_id_cache;
+  ASSERT_TRUE(processor->GetIncludeFiles(include_foo,
+                                         tmpdir_util_->tmpdir(),
+                                         *flags,
+                                         compiler_info,
+                                         &files,
+                                         &file_id_cache));
+
+  RemoveAndCheckEmptySourceIncludeHeaders(bare_gcc, &files);
+  EXPECT_EQ(4, static_cast<int>(files.size()));
+  EXPECT_EQ(1, static_cast<int>(files.count("foo.hmap")));
+  EXPECT_EQ(1, static_cast<int>(files.count(foo_header)));
+  EXPECT_EQ(1, static_cast<int>(files.count("bar.hmap")));
+  EXPECT_EQ(1, static_cast<int>(files.count("bar.h")));
+}
+
+
+TEST_F(IncludeProcessorTest, cpp_and_isystem) {
+  std::vector<string> args;
+  CreateTmpFile("", "typeinfo");
+  args.push_back("-isystem");
+  args.push_back(tmpdir_util_->tmpdir());
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include <typeinfo>\n",
+                        "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, funclike_macro1) {
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#define s(x) #x\n"
+                        "#include s(stdio.h)\n",
+                        "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, funclike_macro2) {
+  std::vector<string> args;
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#define X(name) <std##name.h>\n"
+                        "#include X(io)\n",
+                        "foo.c"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, funclike_macro3) {
+  std::vector<string> args;
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#define XY \"stdio.h\"\n"
+                        "#define C(x, y) x ## y\n"
+                        "#include C(X, Y)\n",
+                        "foo.c"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_nested_macros) {
+  std::vector<string> args;
+  CreateTmpFile("#include <stdio.h>\n", "foo1.h");
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#define S(a) #a\n"
+                        "#define _X(x) S(foo##x.h)\n"
+                        "#define X(x) _X(x)\n"
+                        "#include X(__STDC__)\n", "foo.c"),
+          args);
+}
+TEST_F(IncludeProcessorTest, commandline_funclike_macro) {
+  std::vector<string> args;
+  args.push_back("-DS(a)=#a");
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include S(iostream)\n", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, escaped_newline) {
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include <io\\\nstream>\n"
+                        "#inc\\\nlude <string>\n", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, macro_false_recursion) {
+  CreateTmpFile("#include <string>\n", "99");
+  CreateTmpFile("#include <vector>\n", "X(99)");
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#define X(x) x\n"
+                        "#define Y99(x) x(99)\n"
+                        "#define _S(x) #x\n"
+                        "#define S(x) _S(x)\n"
+                        "#include S(Y99(X))\n"
+                        "#include S(Y99(X(X)))\n",  "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, macro_nested_args) {
+  CreateTmpFile("#define _S(x) #x\n"
+                "#define S(x) _S(x)\n"
+                "#define _C(x, y) x ## y\n"
+                "#define C(x, y) _C(x, y)\n",
+                "util.h");
+  CreateTmpFile("#include <vector>\n", "2.h");
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include \"util.h\"\n"
+                        "#define E1(a, b) a\n"
+                        "#define E2(a, b) b\n"
+                        "#include S(C(E2(1, 2), E1(.h, .c)))\n",
+                        "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, macro_varargs) {
+  CreateTmpFile("#include <vector>\n", "c");
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#define X(a, b, c, ...) c\n"
+                        "#include X(\"a\", \"b\", \"c\", \"d\", \"e\")\n",
+                        "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, macro_with_defined) {
+  CreateTmpFile("#include <map>\n", "x.h");
+  CreateTmpFile("#include <set>\n", "y.h");
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#define USE(a) (defined(USE_ ## a) && USE_ ## a)\n"
+                        "#define USE_X 1\n"
+                        "#define USE_Y !USE_X\n"
+                        "#if USE(X)\n"
+                        " #include \"x.h\"\n"
+                        "#endif\n"
+                        "#if USE(Y)\n"
+                        "# include \"y.h\"\n"
+                        "#endif\n",
+                        "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_in_comment) {
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include <string> /* \n"
+                        "#include <iostream> */\n", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_in_linecomment) {
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include <string> // comment \\\n"
+                        "#include <iostream>\n", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_with_predefined) {
+# define _S(x) #x
+# define S(x) _S(x)
+  CreateTmpFile("#include <stdio.h>\n", "foo" S(__GNUC__) ".h");
+# undef S
+  std::vector<string> args;
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#define S(x) #x\n"
+                        "#define _X(x) S(foo##x.h)\n"
+                        "#define X(x) _X(x)\n"
+                        "#include X(__GNUC__)\n", "foo.c"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_with_cpp_predefined) {
+  CreateTmpFile("#include <stdio.h>\n", "foo4.h");
+  CreateTmpFile("#include <vector>\n", "foo6.h");
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#define S(x) #x\n"
+                        "#define _X(x) S(foo##x.h)\n"
+                        "#define X(x) _X(x)\n"
+                        "#include X(__LINE__)\n"
+                        "\n"
+                        "#include X(__LINE__)\n", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_with_pragma_once) {
+  CreateTmpFile("#pragma once\n"
+                "#ifdef ONCE\n"
+                "#include <stdio.h>\n"
+                "#endif\n"
+                "#ifndef ONCE\n"
+                "#define ONCE\n"
+                "#endif\n",
+                "once.h");
+  std::vector<string> args;
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#include \"once.h\"\n"
+                        "#include \"once.h\"\n", "foo.c"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_with_ifdefs) {
+  CreateTmpFile("#include <string>\n", "foo.h");
+  CreateTmpFile("#include <vector>\n", "dummy1.h");
+  CreateTmpFile("#include <set>\n", "dummy2.h");
+  CreateTmpFile("#include <map>\n", "dummy3.h");
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#define T 1\n"
+                        "#ifndef T\n"
+                        "#include \"dummy1.h\"\n"
+                        "#elif !__STDC__\n"
+                        "#include \"dummy2.h\"\n"
+                        "elif defined(__DATE__)\n"
+                        "#include \"foo.h\"\n"
+                        "#else\n"
+                        "#include \"dummy3.h\"\n"
+                        "#endif\n",
+                        "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_with_if_elif_else) {
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#define A 1\n"
+                        "#define B 0\n"
+                        "#if A\n"
+                        "# define A_DEFINED 1\n"
+                        "#elif B\n"
+                        "# define B_DEFINED 1\n"
+                        "#else\n"
+                        "# define A_DEFINED 0\n"
+                        "# define B_DEFINED 0\n"
+                        "#endif\n"
+                        "\n"
+                        "#if A_DEFINED\n"
+                        "# include <vector>\n"
+                        "#endif\n", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_with_cond_expr_1) {
+  CreateTmpFile("#define A(a, b) a + b\n"
+                "#define B(x) 4\n"
+                "#define C(x) -(x)\n", "util.h");
+  CreateTmpFile("#include <string>\n", "foo.h");
+  std::vector<string> args;
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include \"util.h\"\n"
+                        "#if A(1, 2) * B() == 9\n"
+                        "#include \"foo.h\"\n"
+                        "#endif\n",
+                        "foo.cc"),
+          args);
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include \"util.h\"\n"
+                        "#if C(A(1, 2)) * B() == -12\n"
+                        "#include \"foo.h\"\n"
+                        "#endif\n",
+                        "foo.cc"),
+          args);
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include \"util.h\"\n"
+                        "#if A(1, 2) < 4\n"
+                        "#include \"foo.h\"\n"
+                        "#endif\n",
+                        "foo.cc"),
+          args);
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include \"util.h\"\n"
+                        "#if 0\n"
+                        "#if A(1, 2) < 4\n"
+                        "#include \"dummy.h\"\n"
+                        "#endif\n"
+                        "#endif\n",
+                        "foo.cc"),
+          args);
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include \"util.h\"\n"
+                        "#if defined(A) || defined AB\n"
+                        "#include \"foo.h\"\n"
+                        "#endif\n",
+                        "foo.cc"),
+          args);
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#include \"util.h\"\n"
+                        "#if defined(A) && defined(AB)\n"
+                        "#include \"dummy.h\"\n"
+                        "#endif\n",
+                        "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_nested) {
+  std::vector<string> args;
+  CreateTmpFile("#ifdef A\n"
+                "# include <stdio.h>\n"
+                "#else\n"
+                "# define A\n"
+                "# include \"foo.h\"\n"
+                "#endif\n",
+                "foo.h");
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#include \"foo.h\"\n", "foo.c"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_with_macro) {
+  const string& bare_gcc = "/usr/bin/g++";
+
+  const string& source_file = CreateTmpFile(
+      "#define INCLUDE <a.h>\n"
+      "#include INCLUDE\n",
+      "a.cc");
+
+  CreateTmpFile(
+      "#define FOO 100\n",
+      "a.h");
+  CreateTmpFile(
+      "#define FOO 200\n",
+      file::JoinPath("a", "a.h"));
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  args.push_back("-Ia");
+  args.push_back("-c");
+  args.push_back(source_file);
+
+  std::set<string> expected;
+  expected.insert(file::JoinPath("a", "a.h"));
+
+  std::set<string> files = RunIncludeProcessor(source_file, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, include_twice_with_macro) {
+  std::vector<string> args;
+  CreateTmpFile("#include A\n", "foo.h");
+  CreateTmpFile("#include <string>\n", "tmp.h");
+  RunTest("/usr/bin/g++",
+          CreateTmpFile("#define A <vector>\n"
+                        "#include \"foo.h\"\n"
+                        "#undef A\n"
+                        "#define A \"./tmp.h\"\n"
+                        "#include \"foo.h\"\n", "foo.cc"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, include_time_h) {
+  std::vector<string> args;
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#include <sys/types.h>\n"
+                        "#include <time.h>\n", "time.c"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, base_file) {
+  std::vector<string> args;
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#ifdef X\n"
+                        "# include <stdio.h>\n"
+                        "#else\n"
+                        "# define X\n"
+                        "# include __BASE_FILE__\n"
+                        "#endif", "foo.c"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, has_include) {
+  const string define_has_include =
+    "#ifndef __has_include\n"
+    "# define __has_include(x) 0\n"
+    "#endif\n";
+
+  for (const auto& compiler : GccLikeCompilers()) {
+    std::vector<string> args(compiler.additional_args);
+
+    // Check __has_include is defined.
+    RunTest(compiler.path,
+            CreateTmpFile("#ifdef __has_include\n"
+                          "# include <stddef.h>\n"
+                          "#endif", "foo.c"),
+            args);
+
+    // Check __has_include__ is hidden. (for GCC 5 hack)
+    RunTest(compiler.path,
+            CreateTmpFile("#ifdef __has_include__\n"
+                          "# include <stdint.h>\n"
+                          "#endif\n"
+                          "#if defined(__has_include) && !defined(__clang__)\n"
+                          "# if __has_include__(<stddef.h>)\n"
+                          "#  include <stddef.h>\n"
+                          "# endif\n"
+                          "#endif\n", "foo.c"),
+            args);
+
+    // '<' include check.
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_include +
+                          "#if __has_include(<stddef.h>)\n"
+                          "# include <stddef.h>\n"
+                          "#endif\n", "foo.c"),
+            args);
+
+    // '<' include check with expansion.
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_include +
+                          "#define X(name) <std##name.h>\n"
+                          "#if __has_include(X(int))\n"
+                          "# include X(int)\n"
+                          "#endif\n", "foo.c"),
+            args);
+
+    // Nonexistent '<' include check.
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_include +
+                          "#if __has_include(<foo.h>)\n"
+                          "# include <foo.h>\n"
+                          "#else\n"
+                          "# include <string.h>\n"
+                          "#endif\n", "foo.c"),
+            args);
+
+    // '<' include check with whitespaces.
+    CreateTmpFile("", "white  space.h");
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_include +
+                          "#if __has_include(<white  space.h>)\n"
+                          "# include <white  space.h>\n"
+                          "#endif\n", "foo.c"),
+            args);
+
+    // Nonexistent '"' include check.
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_include +
+                          "#if __has_include(\"bar.h\")\n"
+                          "# include \"bar.h\"\n"
+                          "#else\n"
+                          "# include <string.h>\n"
+                          "#endif\n", "bar.c"),
+            args);
+
+    // '"' include check.
+    CreateTmpFile("#include <stdio.h>\n", "baz.h");
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_include +
+                          "#if __has_include(\"baz.h\")\n"
+                          "# include \"baz.h\"\n"
+                          "#else\n"
+                          "# include <string.h>\n"
+                          "#endif\n", "baz.c"),
+            args);
+
+    CreateTmpFile("#define FOOBAR 100\n", "a.h");
+    CreateTmpFile("#define FOOBAR 100\n", file::JoinPath("a", "c.h"));
+    args.push_back("-Ia");
+
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_include +
+                          "#if __has_include(<a.h>)\n"
+                          "# include <a.h>\n"
+                          "#else\n"
+                          "# include <string.h>\n"
+                          "#endif\n", "a.c"),
+            args);
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_include +
+                          "#if __has_include(<b.h>)\n"
+                          "# include <b.h>\n"
+                          "#else\n"
+                          "# include <string.h>\n"
+                          "#endif\n", "b.c"),
+            args);
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_include +
+                          "#if __has_include(<c.h>)\n"
+                          "# include <c.h>\n"
+                          "#else\n"
+                          "# include <string.h>\n"
+                          "#endif\n", "c.c"),
+            args);
+  }
+}
+
+TEST_F(IncludeProcessorTest, has_include_next) {
+  const string define_has_include_next =
+      "#ifndef __has_include_next\n"
+      "# define __has_include_next(x) 0\n"
+      "#endif\n";
+
+  for (const auto& compiler : GccLikeCompilers()) {
+    std::vector<string> args(compiler.additional_args);
+
+    // Check __has_include_next existence.
+    RunTest(compiler.path,
+            CreateTmpFile("#ifndef __has_include\n"
+                          " #include <stdio.h>\n"
+                          "#endif", "foo.c"),
+            args);
+
+    // include_next check.
+    args = compiler.additional_args;
+    args.push_back("-I" + tmpdir_util_->tmpdir());
+    CreateTmpFile(
+        define_has_include_next +
+        "#if __has_include_next(<stdio.h>)\n"
+        "# include_next <stdio.h>\n"
+        "#else\n"
+        "# include <stddef.h>\n"
+        "#endif\n", "stdio.h");
+    RunTest(compiler.path,
+            CreateTmpFile("#include <stdio.h>\n", "foo.c"),
+            args);
+
+    // Nonexistent include_next check.
+    CreateTmpFile(
+        define_has_include_next +
+        "#if __has_include_next(<foo.h>)\n"
+        "# include_next <foo.h>\n"
+        "#endif\n", "foo.h");
+    RunTest(compiler.path,
+            CreateTmpFile("#include <foo.h>\n", "foo.c"),
+            args);
+
+    CreateTmpFile(
+        define_has_include_next +
+        "#if __has_include_next(<a.h>)\n"
+        "# include_next <a.h>\n"
+        "#else\n"
+        "# include <stddef.h>\n"
+        "#endif\n", "a.h");
+    args.push_back("-I.");
+    args.push_back("-Ia");
+
+    RunTest(compiler.path,
+            CreateTmpFile("#include <a.h>\n", "foo.c"),
+            args);
+
+    const string& ah =
+        CreateTmpFile("#define FOOBAR 100\n", file::JoinPath("a", "a.h"));
+    RunTest(compiler.path,
+            CreateTmpFile("#include <a.h>\n", "foo.c"),
+            args);
+
+    // Remove ah because it should not exist in next loop.
+    remove(ah.c_str());
+  }
+}
+
+TEST_F(IncludeProcessorTest, has_feature) {
+  const string define_has_feature =
+    "#ifndef __has_feature\n"
+    "# define __has_feature(x) 0\n"
+    "#endif\n";
+
+  for (const auto& compiler : GccLikeCompilers()) {
+    std::vector<string> args(compiler.additional_args);
+
+    // Check the pre-defined macro itself.
+    RunTest(compiler.path,
+            CreateTmpFile(
+                "#ifdef __has_feature\n"
+                "# include <stdio.h>\n"
+                "#else\n"
+                "# include <stddef.h>\n"
+                "#endif\n", "foo.c"),
+            args);
+
+    RunTest(compiler.path,
+            CreateTmpFile(
+                define_has_feature +
+                "#if __has_feature(attribute_cf_returns_retained)\n"
+                "# include <stdio.h>\n"
+                "#else\n"
+                "# include <stddef.h>\n"
+                "#endif\n", "foo.c"),
+            args);
+
+    RunTest(compiler.path,
+            CreateTmpFile(
+                define_has_feature +
+                "#if __has_feature(no_such_feature)\n"
+                "# include <stdio.h>\n"
+                "#else\n"
+                "# include <stddef.h>\n"
+                "#endif\n", "foo.c"),
+            args);
+
+    // When feature name has both leading and trailing __,
+    // they should be ignored. __feature__ is normalized to feature.
+    RunTest(compiler.path,
+            CreateTmpFile(
+                define_has_feature +
+                "#if __has_feature(__attribute_cf_returns_retained__)\n"
+                "# include <stdio.h>\n"
+                "#else\n"
+                "# include <stddef.h>\n"
+                "#endif\n", "foo.c"),
+            args);
+
+    // When feature name has one of leading or trailing __,
+    // they cannot be ignored.
+    RunTest(compiler.path,
+            CreateTmpFile(
+                define_has_feature +
+                "#if __has_feature(attribute_cf_returns_retained__)\n"
+                "# include <stdio.h>\n"
+                "#else\n"
+                "# include <stddef.h>\n"
+                "#endif\n", "foo.c"),
+            args);
+    RunTest(compiler.path,
+            CreateTmpFile(
+                define_has_feature +
+                "#if __has_feature(__attribute_cf_returns_retained)\n"
+                "# include <stdio.h>\n"
+                "#else\n"
+                "# include <stddef.h>\n"
+                "#endif\n", "foo.c"),
+            args);
+  }
+}
+
+TEST_F(IncludeProcessorTest, has_extension) {
+  const string define_has_extension =
+    "#ifndef __has_extension\n"
+    "# define __has_extension(x) 0\n"
+    "#endif\n";
+
+  for (const auto& compiler : GccLikeCompilers()) {
+    std::vector<string> args(compiler.additional_args);
+
+    // Check the pre-defined macro itself.
+    RunTest(compiler.path,
+            CreateTmpFile(
+                "#ifdef __has_extension\n"
+                "# include <stdio.h>\n"
+                "#else\n"
+                "# include <stddef.h>\n"
+                "#endif\n", "foo.c"),
+            args);
+
+    RunTest(compiler.path,
+            CreateTmpFile(
+                define_has_extension +
+                "#if __has_extension(c_static_assert)\n"
+                "# include <stdio.h>\n"
+                "#else\n"
+                "# include <stddef.h>\n"
+                "#endif\n", "foo.c"),
+            args);
+  }
+}
+
+TEST_F(IncludeProcessorTest, has_cpp_attribute) {
+  const string define_has_cpp_attribute =
+    "#ifndef __has_cpp_attribute\n"
+    "# define __has_cpp_attribute(x) 0\n"
+    "#endif\n";
+
+  for (const auto& compiler : GccLikeCompilers()) {
+    std::vector<string> args(compiler.additional_args);
+
+    // Check __has_cpp_attribute existence.
+    // Don't add define_has_cpp_attribute.
+    RunTest(compiler.path,
+            CreateTmpFile("#ifdef __has_cpp_attribute\n"
+                          "# include <stdio.h>\n"
+                          "#else\n"
+                          "# include <stddef.h>\n"
+                          "#endif\n", "foo.c"),
+            args);
+
+    // This example is taken from
+    // http://clang.llvm.org/docs/LanguageExtensions.html
+    // Note: __has_cpp_attribute(clang::fallthrough) does not work in c mode.
+    // So, added #ifdef__cplusplus.
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_cpp_attribute +
+                          "#ifdef __cplusplus\n"
+                          "#if __has_cpp_attribute(clang::fallthrough)\n"
+                          "# include <stdio.h>\n"
+                          "#else\n"
+                          "# include <stddef.h>\n"
+                          "#endif\n"
+                          "#endif\n", "foo.c"),
+            args);
+
+    // This example is taken from
+    // http://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_cpp_attribute +
+                          "#if __has_cpp_attribute(deprecated)\n"
+                          "# include <stdio.h>\n"
+                          "#else\n"
+                          "# include <stddef.h>\n"
+                          "#endif\n", "foo.c"),
+            args);
+  }
+}
+
+TEST_F(IncludeProcessorTest, has_declspec_attribute) {
+  const string define_has_declspec_attribute =
+    "#ifndef __has_declspec_attribute\n"
+    "# define __has_declspec_attribute(x) 0\n"
+    "#endif\n";
+
+  for (const auto& compiler : GccLikeCompilers()) {
+    std::vector<string> args(compiler.additional_args);
+
+    // Check __has_declspec_attribute existence.
+    // Don't add define_has_declspec_attribute.
+    RunTest(compiler.path,
+            CreateTmpFile("#ifdef __has_declspec_attribute\n"
+                          "# include <stdio.h>\n"
+                          "#else\n"
+                          "# include <stddef.h>\n"
+                          "#endif\n", "foo.c"),
+            args);
+
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_declspec_attribute +
+                          "#if __has_declspec_attribute(__stdcall)\n"
+                          "# include <stdio.h>\n"
+                          "#else\n"
+                          "# include <stddef.h>\n"
+                          "#endif\n", "foo.c"),
+            args);
+  }
+}
+
+TEST_F(IncludeProcessorTest, has_builtin) {
+  const string define_has_builtin =
+    "#ifndef __has_builtin\n"
+    "# define __has_builtin(x) 0\n"
+    "#endif\n";
+
+  for (const auto& compiler : GccLikeCompilers()) {
+    std::vector<string> args(compiler.additional_args);
+
+    // Check __has_builtin existence.
+    // Don't add define_has_builtin.
+    RunTest(compiler.path,
+            CreateTmpFile("#ifdef __has_builtin\n"
+                          "# include <stdio.h>\n"
+                          "#else\n"
+                          "# include <stddef.h>\n"
+                          "#endif\n", "foo.c"),
+            args);
+
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_builtin +
+                          "#if __has_builtin(_InterlockedExchange)\n"
+                          "# include <stdio.h>\n"
+                          "#else\n"
+                          "# include <stddef.h>\n"
+                          "#endif\n", "foo.c"),
+            args);
+
+    RunTest(compiler.path,
+            CreateTmpFile(define_has_builtin +
+                          "#if __has_builtin(__atomic_exchange)\n"
+                          "# include <stdio.h>\n"
+                          "#else\n"
+                          "# include <stddef.h>\n"
+                          "#endif\n", "foo.c"),
+            args);
+  }
+}
+
+TEST_F(IncludeProcessorTest, dont_include_directory) {
+  CreateTmpDir("iostream");
+
+  std::vector<string> args;
+  args.push_back("-I" + tmpdir_util_->tmpdir());
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#include <iostream>", "foo.cpp"),
+          args);
+}
+
+#else
+
+// TODO: Add more IncludeProcessorTest for VC
+TEST_F(IncludeProcessorTest, stdio) {
+  std::vector<string> args;
+  RunClTest(CreateTmpFile("#include <stdio.h>", "foo.c"), args);
+}
+
+TEST_F(IncludeProcessorTest, iostream) {
+  std::vector<string> args;
+  RunClTest(CreateTmpFile("#include <iostream>", "foo.cpp"), args);
+}
+
+TEST_F(IncludeProcessorTest, commandline_define) {
+  std::vector<string> args;
+  args.push_back("/DDEBUG");
+  RunClTest(CreateTmpFile("#ifdef DEBUG\r\n#include <iostream>\r\n#endif\r\n",
+                          "foo.cpp"), args);
+}
+
+
+TEST_F(IncludeProcessorTest, AtFile) {
+  string at_file = CreateTmpFile("/DDEBUG", "at_file.rsp");
+  at_file = "@" + at_file;
+  std::vector<string> args;
+  args.push_back(at_file.c_str());
+  RunClTest(CreateTmpFile("#ifdef DEBUG\r\n#include <iostream>\r\n#endif\r\n",
+                          "foo.cpp"), args);
+}
+
+TEST_F(IncludeProcessorTest, dont_include_directory) {
+  const string& iostream_dir = file::JoinPath(
+      tmpdir_util_->tmpdir(), "iostream");
+  CreateDirectoryA(iostream_dir.c_str(), nullptr);
+
+  std::vector<string> args;
+  args.push_back("/I" + tmpdir_util_->tmpdir());
+  RunClTest(CreateTmpFile("#include <iostream>", "foo.cpp"), args);
+}
+
+#endif  // !_WIN32
+
+TEST_F(IncludeProcessorTest, define_defined_with_paren) {
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#define FOO\n"
+      "#define DEFINED defined(FOO)\n"
+      "#if DEFINED\n"
+      "# include \"bar.h\"\n"
+      "#endif\n"
+      ,
+      "foo.cc");
+  string included = CreateTmpFile("", "bar.h");
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-c");
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(1U, files.size());
+    EXPECT_EQ(included, *files.begin());
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    EXPECT_TRUE(files.empty());
+  }
+}
+
+TEST_F(IncludeProcessorTest, define_defined_without_paren) {
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#define FOO\n"
+      "#define DEFINED defined FOO\n"
+      "#if DEFINED\n"
+      "# include \"bar.h\"\n"
+      "#endif\n"
+      ,
+      "foo.cc");
+  string included = CreateTmpFile("", "bar.h");
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(1U, files.size());
+    EXPECT_EQ(included, *files.begin());
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(1U, files.size());
+    EXPECT_EQ(included, *files.begin());
+  }
+}
+
+TEST_F(IncludeProcessorTest, comment_in_macro) {
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#define BAR bar.h /**/\n"
+      "#define STR_I(x) #x\n"
+      "#define STR(x) STR_I(x)\n"
+      "#include STR(BAR)\n"
+      ,
+      "foo.cc");
+  string included = CreateTmpFile("", "bar.h");
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(1U, files.size());
+    EXPECT_EQ(included, *files.begin());
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(1U, files.size());
+    EXPECT_EQ(included, *files.begin());
+  }
+}
+
+TEST_F(IncludeProcessorTest, comment_in_func_macro) {
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#define BAR(x) bar.h /**/\n"
+      "#define STR_I(x) #x\n"
+      "#define STR(x) STR_I(x)\n"
+      "#include STR(BAR(hoge))\n"
+      ,
+      "foo.cc");
+  string included = CreateTmpFile("", "bar.h");
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(1U, files.size());
+    EXPECT_EQ(included, *files.begin());
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(1U, files.size());
+    EXPECT_EQ(included, *files.begin());
+  }
+}
+
+TEST_F(IncludeProcessorTest, opt_include) {
+  const string& header = CreateTmpFile("", "foo.h");
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-include");
+  args.push_back(header);
+
+  std::set<string> files = RunIncludeProcessor(CreateTmpFile("", "foo.c"),
+                                               args);
+  ASSERT_EQ(1U, files.size());
+  EXPECT_EQ(header, *files.begin());
+}
+
+TEST_F(IncludeProcessorTest, opt_include_in_cwd) {
+  CreateTmpFile("", "foo.h");
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-include");
+  args.push_back("foo.h");
+
+  std::set<string> files = RunIncludeProcessor(CreateTmpFile("", "foo.c"),
+                                               args);
+  ASSERT_EQ(1U, files.size());
+  EXPECT_EQ("foo.h", *files.begin());
+}
+
+TEST_F(IncludeProcessorTest, vc_opt_fi) {
+  const string& header = CreateTmpFile("", "foo.h");
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("/c");
+  args.push_back("/FI" + header);
+
+  std::set<string> files = RunIncludeProcessor(CreateTmpFile("", "foo.c"),
+                                               args);
+  ASSERT_EQ(1U, files.size());
+  ASSERT_EQ(header, *files.begin());
+}
+
+TEST_F(IncludeProcessorTest, no_newline_at_eof) {
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#if 1\n"
+      "#include \"bar.h\"\n"
+      "#include \"baz.h\"\n"
+      "#endif\n",
+      "foo.cc");
+  string bar_h = CreateTmpFile(
+      "#if 0\n"
+      "#include \"hoge.h\"\n"
+      "#endif",
+      "bar.h");
+  string baz_h = CreateTmpFile("", "baz.h");
+  string hoge_h = CreateTmpFile("", "hoge.h");
+
+  std::set<string> expected;
+  expected.insert(bar_h);
+  expected.insert(baz_h);
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(2U, files.size());
+    EXPECT_EQ(expected, files);
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(2U, files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, no_newline_at_eof_identifier) {
+  const string& bare_gcc = "/usr/bin/gcc";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#include \"foo.h\"\n"
+      "#include \"bar.h\"\n"
+      "#\n",
+      "foo.cc");
+  const string& foo_h = CreateTmpFile(
+      "#define foo",  // No newline at the end after an identifier.
+      "foo.h");
+  const string& bar_h = CreateTmpFile(
+      "#ifdef foo\n"
+      "#include \"baz.h\"\n"
+      "#endif\n",
+      "bar.h");
+  const string& baz_h = CreateTmpFile("", "baz.h");
+
+  std::set<string> expected;
+  expected.insert(foo_h);
+  expected.insert(bar_h);
+  expected.insert(baz_h);
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, no_newline_at_eof_number) {
+  const string& bare_gcc = "/usr/bin/gcc";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#include \"foo.h\"\n"
+      "#define S(a) #a\n"
+      "#define X(a) S(a.h)\n"
+      "#include X(FOO)\n"
+      "#\n",
+      "foo.cc");
+  const string& foo_h = CreateTmpFile(
+      "#define FOO 999",  // No newline at the end after a pp-number.
+      "foo.h");
+  const string& nine_h = CreateTmpFile("", "999.h");
+
+  std::set<string> expected;
+  expected.insert(foo_h);
+  expected.insert(nine_h);
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, condition_lines_lf) {
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& source_file = CreateTmpFile(
+      "#define A 1\n"
+      "#define B 1\n"
+      "#if defined(A) && \\\n"
+      "    defined(B)\n"
+      "#include \"bar.h\"\n"
+      "#endif\n",
+      "foo.cc");
+  string bar_h = CreateTmpFile("", "bar.h");
+
+  std::set<string> expected;
+  expected.insert(bar_h);
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(1U, files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, condition_lines_crlf) {
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#define A 1\r\n"
+      "#define B 1\r\n"
+      "#if defined(A) && \\\r\n"
+      "    defined(B)\r\n"
+      "#include \"bar.h\"\r\n"
+      "#endif\\r\n",
+      "foo.cc");
+  string bar_h = CreateTmpFile("", "bar.h");
+
+  std::set<string> expected;
+  expected.insert(bar_h);
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(1U, files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, include_cur_from_include_paths) {
+  // b/7626343
+
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#include \"primpl.h\"\n",
+      "foo.cc");
+  const string& dir1 = "dir1";
+  const string& nspr_h = file::JoinPath(dir1, "nspr.h");
+  CreateTmpFile(
+      "",
+      nspr_h);
+  const string& dir2 = "dir2";
+  const string& primpl_h = file::JoinPath(dir2, "primpl.h");
+  CreateTmpFile(
+      "#include \"nspr.h\"\n",
+      primpl_h);
+
+  std::set<string> expected {nspr_h, primpl_h};
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-I" + dir1);
+    args.push_back("-I" + dir2);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/I" + dir1);
+    args.push_back("/I" + dir2);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, include_next_multiple_file) {
+  // b/7461986
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& source_file = CreateTmpFile(
+      "#include \"limits.h\"\n",  // limits_h_0
+      "foo.cc");
+  const string& limits_h_0 = CreateTmpFile(
+      "#include_next \"limits.h\"\n",  // limits_h_1
+      "limits.h");
+  const string& dir1 = "dir1";
+  const string& limits_h_1 = file::JoinPath(dir1, "limits.h");
+  CreateTmpFile(
+      "#ifndef _LIBC_LIMITS_H\n"  // not defined yet
+      "#include \"syslimits.h\"\n"  // so it should be included
+      "#endif\n",
+      limits_h_1);
+  const string& syslimits_h = file::JoinPath(dir1, "syslimits.h");
+  CreateTmpFile(
+      "",
+      syslimits_h);
+  const string& dir2 = "dir2";
+  // If limits_h_2 is included (before limits_h_1), syslimits.h would not be
+  // included.
+  const string& limits_h_2 = CreateTmpFile(
+      "#define _LIBC_LIMITS_H\n",
+      file::JoinPath(dir2, "limits.h"));
+
+  ASSERT_NE(limits_h_1, limits_h_2);
+
+  std::set<string> expected {
+    limits_h_0, limits_h_1, syslimits_h,
+  };
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-I" + dir1);
+    args.push_back("-I" + dir2);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, include_next_from_include_current_dir) {
+  // b/7461986
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& source_file = CreateTmpFile(
+      "#include \"limits.h\"\n",  // include limits_h_0 (curdir)
+      "foo.cc");
+  const string& limits_h_0 = CreateTmpFile(
+      "#include_next <limits.h>\n",  // include limits_h_1 (first inc dir)
+      "limits.h");
+  const string& dir1 = "dir1";
+  const string& limits_h_1 = file::JoinPath(dir1, "limits.h");
+  CreateTmpFile(
+      "#ifndef _LIBC_LIMITS_H\n"  // not defined yet
+      "#include \"syslimits.h\"\n"  // so it should be included
+      "#endif\n",
+      limits_h_1);
+  const string& syslimits_h = file::JoinPath(dir1, "syslimits.h");
+  CreateTmpFile(
+      "#include_next <limits.h>\n",  // include limits_h_2 (second inc dir)
+      syslimits_h);
+  const string& dir2 = "dir2";
+  // If limits_h_2 is included from syslimits.h
+  const string& limits_h_2 = file::JoinPath(dir2, "limits.h");
+  CreateTmpFile(
+      "#define _LIBC_LIMITS_H\n",
+      limits_h_2);
+
+  ASSERT_NE(limits_h_1, limits_h_2);
+
+  std::set<string> expected {
+    limits_h_0, limits_h_1, syslimits_h, limits_h_2,
+  };
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-I" + dir1);
+    args.push_back("-I" + dir2);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, include_next_from_next_dir) {
+  // b/7462563
+
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& source_file = CreateTmpFile(
+      "#include <_clocale.h>\n",  // clocate_h
+      "foo.cc");
+  const string& dir1 = "dir1";
+  const string& clocale_h = file::JoinPath(dir1, "_clocale.h");
+  CreateTmpFile(
+      "#include_next <clocale>\"\n",  // include clocale_2
+      clocale_h);
+  const string& clocale_1 = CreateTmpFile("", file::JoinPath(dir1, "clocale"));
+  const string& dir2 = "dir2";
+  const string& clocale_2 = file::JoinPath(dir2, "clocale");
+  CreateTmpFile("", clocale_2);
+
+  ASSERT_NE(clocale_1, clocale_2);
+
+  std::set<string> expected {clocale_h, clocale_2};
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-I" + dir1);
+    args.push_back("-I" + dir2);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, invalidated_macro_in_offspring) {
+  const string& bare_gcc = "/usr/bin/gcc";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#define var1\n"
+      "#include \"step1.h\"\n"
+      "#include \"step1.h\"\n"
+      "#\n",
+      "foo.cc");
+  const string& step1_h = CreateTmpFile(
+      "#include \"step2.h\"\n"
+      "#undef var1\n",
+      "step1.h");
+  const string& step2_h = CreateTmpFile(
+      "#if !defined var1\n"
+      "#define var2\n"
+      "#endif\n"
+      "\n"
+      "#ifdef var2\n"
+      "#include \"step3.h\"\n"
+      "#endif\n",
+      "step2.h");
+  const string& step3_h = CreateTmpFile("\n", "step3.h");
+
+  std::set<string> expected;
+  expected.insert(step1_h);
+  expected.insert(step2_h);
+  expected.insert(step3_h);
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, include_ignore_dir) {
+  const string& bare_gcc = "/usr/bin/gcc";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#include \"string\"\n",
+      "foo.cc");
+  const string& string_dir = "string";
+  CHECK(File::CreateDir(tmpdir_util_->FullPath(string_dir).c_str(), 0777));
+  const string& dir1 = "dir1";
+  const string& string_h = file::JoinPath(dir1, "string");
+  CreateTmpFile("", string_h);
+
+  std::set<string> expected {string_h};
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-I" + dir1);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/I" + dir1);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, include_next_ignore_dir) {
+  const string& bare_gcc = "/usr/bin/gcc";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#include <foo.h>\n",
+      "foo.cc");
+  const string& dir1 = "dir1";
+  const string& foo_h = file::JoinPath(dir1, "foo.h");
+  CreateTmpFile("#include <string>\n", foo_h);
+  const string& string1 = file::JoinPath(dir1, "string");
+  CreateTmpFile("#include_next <string>\n", string1);
+  const string& dir2 = "dir2";
+  const string& dir3 = "dir3";
+  const string& string3 = file::JoinPath(dir3, "string");
+  CreateTmpFile("", string3);
+
+  std::set<string> expected {
+    foo_h, string1, string3,
+  };
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-I" + dir1);
+    args.push_back("-I" + dir2);
+    args.push_back("-I" + dir3);
+    args.push_back("-c");
+    args.push_back(source_file);
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/I" + dir1);
+    args.push_back("/I" + dir2);
+    args.push_back("/I" + dir3);
+    args.push_back("/c");
+    args.push_back(source_file);
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, include_path_two_slashes_in_dir_cache) {
+  // b/7618390
+
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#include \"dir2//foo.h\"\n"  // foo_h
+      "#include \"dir3//dir4//bar.h\"\n"  // bar_h
+      "#include \"dir3/dir4/baz.h\"\n",  // baz_h
+      "foo.cc");
+  const string& dir1 = "dir1";
+  const string& dir2 = file::JoinPath(dir1, "dir2");
+  const string& foo_h = file::JoinPath(dir2, "foo.h");
+  CreateTmpFile("", foo_h);
+  const string& dir3 = file::JoinPath(dir1, "dir3");
+  const string& dir4 = file::JoinPath(dir3, "dir4");
+  const string& bar_h = file::JoinPath(dir4, "bar.h");
+  CreateTmpFile("", bar_h);
+  const string& baz_h = file::JoinPath(dir4, "baz.h");
+  CreateTmpFile("", baz_h);
+
+  std::set<string> expected {foo_h, bar_h, baz_h};
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-I" + dir1);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/I" + dir1);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, include_unresolved_path) {
+  const string& bare_gcc = "/usr/bin/g++";
+  const string& bare_cl = "cl.exe";
+  const string& source_file = CreateTmpFile(
+      "#include \"dir2/../foo.h\"\n"  // foo_h
+      "#include \"dir2//../hoge.h\"\n"  // hoge_h
+      "#include \"dir3/../dir4/bar.h\"\n"  // bar_h
+      "#include \"dir3/..//dir4/baz.h\"\n",  // baz_h
+      "foo.cc");
+  const string& dir1 = "dir1";
+  const string& full_dir1 = file::JoinPath(tmpdir_util_->tmpdir(), dir1);
+  CHECK(File::CreateDir(full_dir1.c_str(), 0777));
+  const string& foo_h = CreateTmpFile(
+      "",
+      file::JoinPath(dir1, "foo.h"));
+  const string& hoge_h = CreateTmpFile(
+      "",
+      file::JoinPath(dir1, "hoge.h"));
+  const string& dir2 = file::JoinPath(dir1, "dir2");
+  const string& full_dir2 = file::JoinPath(tmpdir_util_->tmpdir(), dir2);
+  CHECK(File::CreateDir(full_dir2.c_str(), 0777));
+  const string& unresolved_foo_h =
+      file::JoinPath(file::JoinPath(dir2, ".."), "foo.h");
+  ASSERT_NE(unresolved_foo_h, foo_h);
+  const string& unresolved_hoge_h =
+      file::JoinPath(file::JoinPath(dir2, ".."), "hoge.h");
+  ASSERT_NE(unresolved_hoge_h, hoge_h);
+  const string& dir3 = file::JoinPath(dir1, "dir3");
+  const string& full_dir3 = file::JoinPath(tmpdir_util_->tmpdir(), dir3);
+  CHECK(File::CreateDir(full_dir3.c_str(), 0777));
+  const string& dir4 = file::JoinPath(dir1, "dir4");
+  CHECK(File::CreateDir(file::JoinPath(tmpdir_util_->tmpdir(), dir4).c_str(),
+                        0777));
+  const string& bar_h = CreateTmpFile(
+      "",
+      file::JoinPath(dir4, "bar.h"));
+  const string& baz_h = CreateTmpFile(
+      "",
+      file::JoinPath(dir4, "baz.h"));
+  const string& unresolved_bar_h =
+      file::JoinPath(file::JoinPath(file::JoinPath(dir3, ".."), "dir4"),
+                     "bar.h");
+  ASSERT_NE(unresolved_bar_h, bar_h);
+  const string& unresolved_baz_h =
+      file::JoinPath(file::JoinPath(file::JoinPath(dir3, ".."), "dir4"),
+                     "baz.h");
+  ASSERT_NE(unresolved_baz_h, baz_h);
+
+  std::set<string> expected;
+  expected.insert(unresolved_foo_h);
+  expected.insert(unresolved_hoge_h);
+  expected.insert(unresolved_bar_h);
+  expected.insert(unresolved_baz_h);
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_gcc);
+    args.push_back("-I" + dir1);
+    args.push_back("-c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back(bare_cl);
+    args.push_back("/I" + dir1);
+    args.push_back("/c");
+    args.push_back(source_file);
+
+    std::set<string> files = RunIncludeProcessor(source_file, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, newline_before_include) {
+  const string& dir1 = "dir1";
+
+  const string& foo_h = CreateTmpFile(
+      "", file::JoinPath(dir1, "foo.h"));
+  const string& foo_cc = CreateTmpFile(
+      "\n#include \"foo.h\"",
+      file::JoinPath(dir1, "foo.cc"));
+
+  std::vector<string> args;
+  args.push_back("/usr/bin/g++");
+  args.push_back("-c");
+  args.push_back("-I" + dir1);
+  args.push_back(foo_cc);
+
+  std::set<string> expected;
+  expected.insert(foo_h);
+
+  std::set<string> files = RunIncludeProcessor(foo_cc, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, newline_and_spaces_before_include) {
+  const string& dir1 = "dir1";
+
+  const string& foo_h = CreateTmpFile(
+      "", file::JoinPath(dir1, "foo.h"));
+  const string& foo_cc = CreateTmpFile(
+      "f();   \n   #include \"foo.h\"",
+      file::JoinPath(dir1, "foo.cc"));
+
+  std::vector<string> args;
+  args.push_back("/usr/bin/g++");
+  args.push_back("-c");
+  args.push_back("-I" + dir1);
+  args.push_back(foo_cc);
+
+  std::set<string> expected;
+  expected.insert(foo_h);
+
+  std::set<string> files = RunIncludeProcessor(foo_cc, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, noncomment_token_before_include) {
+  const string& dir1 = "dir1";
+
+  CreateTmpFile("", file::JoinPath(dir1, "foo.h"));
+  const string& foo_cc = CreateTmpFile(
+      "f(); \t   #include \"foo.h\"",
+      file::JoinPath(dir1, "foo.cc"));
+
+  std::vector<string> args;
+  args.push_back("/usr/bin/g++");
+  args.push_back("-c");
+  args.push_back("-I" + dir1);
+  args.push_back(foo_cc);
+
+  std::set<string> expected;
+
+  std::set<string> files = RunIncludeProcessor(foo_cc, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, comment_slash_followed_by_include_simple) {
+  const string& dir1 = "dir1";
+
+  const string& foo1_h = CreateTmpFile(
+      "", file::JoinPath(dir1, "foo1.h"));
+  const string& foo2_h = CreateTmpFile(
+      "", file::JoinPath(dir1, "foo2.h"));
+  const string& foo_cc = CreateTmpFile(
+      "   \\\n#include \"foo1.h\"\n  /* test */ \\\n#include \"foo2.h\"",
+      file::JoinPath(dir1, "foo.cc"));
+
+  std::vector<string> args;
+  args.push_back("/usr/bin/g++");
+  args.push_back("-c");
+  args.push_back("-I" + dir1);
+  args.push_back(foo_cc);
+
+  std::set<string> expected;
+  expected.insert(foo1_h);
+  expected.insert(foo2_h);
+
+  std::set<string> files = RunIncludeProcessor(foo_cc, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, comment_slash_followed_by_include_complex1) {
+  const string& dir1 = "dir1";
+
+  const string& foo_h = CreateTmpFile(
+      "", file::JoinPath(dir1, "foo.h"));
+  const string& foo_cc = CreateTmpFile(
+      "  /* test */ \\\r\n /* test 2 */ /* */ \\\n"
+      "\\\n /* foo bar */ \\\n#include \"foo.h\"",
+      file::JoinPath(dir1, "foo.cc"));
+
+  std::vector<string> args;
+  args.push_back("/usr/bin/g++");
+  args.push_back("-c");
+  args.push_back("-I" + dir1);
+  args.push_back(foo_cc);
+
+  std::set<string> expected;
+  expected.insert(foo_h);
+
+  std::set<string> files = RunIncludeProcessor(foo_cc, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, comment_slash_followed_by_include_complex2) {
+  const string& dir1 = "dir1";
+
+  const string& foo_h = CreateTmpFile(
+      "", file::JoinPath(dir1, "foo.h"));
+  const string& foo_cc = CreateTmpFile(
+      "#define FOO \"foo.h\"\n"
+      "  /* test */ \\\r\n /* test 2 */ /* */ \\\n"
+      "\\\n /* foo bar */ \\\n#include FOO",
+      file::JoinPath(dir1, "foo.cc"));
+
+  std::vector<string> args;
+  args.push_back("/usr/bin/g++");
+  args.push_back("-c");
+  args.push_back("-I" + dir1);
+  args.push_back(foo_cc);
+
+  std::set<string> expected;
+  expected.insert(foo_h);
+
+  std::set<string> files = RunIncludeProcessor(foo_cc, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, include_boost_pp_iterate) {
+  const string& foo_cc = CreateTmpFile(
+      // simplified case for BOOST_PP_ITERATE
+      // cf. b/14593802
+      // <boost/preprocessor/cat.hpp>
+      "#define CAT(a, b) CAT_I(a, b)\n"
+      "#define CAT_I(a, b) CAT_II(~, a ## b)\n"
+      "#define CAT_II(p, res) res\n"
+      // <boost/preprocessor/arithmetic/inc.cpp>
+      "#define INC(x) INC_I(x)\n"
+      "#define INC_I(x) INC_ ## x\n"
+      "#define INC_0 1\n"
+      "#define INC_1 2\n"
+      // <boost/preprocessor/iteration/iterate.hpp>
+      "#define DEPTH() 0\n"
+      "\n"
+      "#define ITERATE() CAT(ITERATE_, INC(DEPTH()))\n"
+      "#define ITERATE_1 <bar1.h>\n"
+      "#define ITERATE_2 <bar2.h>\n"
+      // use ITERATE
+      "#include ITERATE()\n",
+      "foo.cc");
+  CreateTmpFile("", "bar1.h");
+  CreateTmpFile("", "bar2.h");
+  std::set<string> expected {file::JoinPath(".", "bar1.h")};
+
+  {
+    std::vector<string> args;
+    args.push_back("/usr/bin/g++");
+    args.push_back("-c");
+    args.push_back("-I.");
+    args.push_back(foo_cc);
+
+    std::set<string> files = RunIncludeProcessor(foo_cc, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+
+  {
+    std::vector<string> args;
+    args.push_back("cl.exe");
+    args.push_back("/c");
+    args.push_back("/I.");
+    args.push_back(foo_cc);
+
+    std::set<string> files = RunIncludeProcessor(foo_cc, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, include_boost_pp_iterate_va_args) {
+  const string& foo_cc = CreateTmpFile(
+      // simplified case for BOOST_PP_ITERATE
+      // cf.
+      // boost v1.49.0
+      // TODO: MSVC has slightly different semantics in __VA_ARGS__,
+      // one more BOOST_PP_CAT needed?
+      // e.g. #define BOOST_PP_VARIADIC_SIZE(...) \
+      //  BOOST_PP_CAT(BOOST_PP_VARIADIC_SIZE_I(<same>),)
+      // <boost/preprocessor/cat.hpp>
+      "#define BOOST_PP_CAT(a, b) BOOST_PP_CAT_I(a, b)\n"
+      "#define BOOST_PP_CAT_I(a, b) a ## b\n"
+      // <boost/preprocessor/tuple/rem.hpp>
+      "#define BOOST_PP_REM(...) __VA_ARGS__\n"
+      // <boost/preprocessor/variadic/size.hpp>
+      "#define BOOST_PP_VARIADIC_SIZE(...) "
+      " BOOST_PP_VARIADIC_SIZE_I(__VA_ARGS__, 64, 63, 62, 61, 60, 59, 58, 57,"
+      " 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41,40,"
+      " 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23,"
+      " 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5,"
+      " 4, 3, 2, 1,)\n"
+      "#define BOOST_PP_VARIADIC_SIZE_I(e0, e1, e2, e3, e4, e5, e6, e7, e8,"
+      " e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22,"
+      " e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36,"
+      " e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47,e48, e49, e50,"
+      " e51, e52, e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,"
+      " size, ...) size\n"
+      // <boost/preprocessor/facilities/overload.hpp>
+      "#define BOOST_PP_OVERLOAD(prefix, ...) "
+      " BOOST_PP_CAT(prefix, BOOST_PP_VARIADIC_SIZE(__VA_ARGS__))\n"
+      // <boost/preprocessor/variadic/elem.hpp>
+      "#define BOOST_PP_VARIADIC_ELEM(n, ...) "
+      " BOOST_PP_CAT(BOOST_PP_VARIADIC_ELEM_, n)(__VA_ARGS__,)\n"
+      "#define BOOST_PP_VARIADIC_ELEM_0(e0, ...) e0\n"
+      "#define BOOST_PP_VARIADIC_ELEM_1(e0, e1, ...) e1\n"
+      "#define BOOST_PP_VARIADIC_ELEM_2(e0, e1, e2, ...) e2\n"
+      // <boost/preprocessor/tuple/elem.hpp>
+      "#define BOOST_PP_TUPLE_ELEM(...) "
+      "  BOOST_PP_OVERLOAD(BOOST_PP_TUPLE_ELEM_O_, __VA_ARGS__)(__VA_ARGS__)\n"
+      "#define BOOST_PP_TUPLE_ELEM_O_2(n, tuple) "
+      " BOOST_PP_VARIADIC_ELEM(n, BOOST_PP_REM tuple)\n"
+      "#define BOOST_PP_TUPLE_ELEM_O_3(size, n, tuple) "
+      " BOOST_PP_TUPLE_ELEM_O_2(n, tuple)\n"
+      // <boost/preprocessor/array/size.hpp>
+      "#define BOOST_PP_ARRAY_SIZE(array) BOOST_PP_TUPLE_ELEM(2, 0, array)\n"
+      // <boost/preprocessor/array/data.hpp>
+      "#define BOOST_PP_ARRAY_DATA(array) BOOST_PP_TUPLE_ELEM(2, 1, array)\n"
+      // <boost/preprocessor/array/elem.hpp>
+      "#define BOOST_PP_ARRAY_ELEM(i, array) "
+      " BOOST_PP_TUPLE_ELEM(BOOST_PP_ARRAY_SIZE(array), i,"
+      " BOOST_PP_ARRAY_DATA(array))\n"
+      // <boost/utility/result_of.hpp>
+      "#define BOOST_RESULT_OF_NUM_ARGS 10\n"
+      "#define BOOST_PP_ITERATION_PARAMS_1 "
+      " (3,(0,BOOST_RESULT_OF_NUM_ARGS,<bar1.h>))\n"
+      // <boost/preprocessor/iteration/detail/iter/forward1.hpp>
+      "#define BOOST_PP_FILENAME_1 "
+      "  BOOST_PP_ARRAY_ELEM(2, BOOST_PP_ITERATION_PARAMS_1)\n"
+      "#define BOOST_PP_ITERATION_1 0\n"
+      "#include BOOST_PP_FILENAME_1\n",
+      "foo.cc");
+  CreateTmpFile("", "bar1.h");
+  std::set<string> expected {file::JoinPath(".", "bar1.h")};
+  {
+    std::vector<string> args;
+    args.push_back("/usr/bin/g++");
+    args.push_back("-c");
+    args.push_back("-I.");
+    args.push_back(foo_cc);
+
+    std::set<string> files = RunIncludeProcessor(foo_cc, args);
+    ASSERT_EQ(expected.size(), files.size());
+    EXPECT_EQ(expected, files);
+  }
+}
+
+TEST_F(IncludeProcessorTest, include_next_self) {
+  const string& bare_gcc = "/usr/bin/g++";
+
+  const string& source_file = CreateTmpFile(
+      "#include \"a.h\"\n",
+      "a.cc");
+  const string& ah = CreateTmpFile(
+      "#include_next <a.h>\n",
+      "a.h");
+
+  const string& aah = file::JoinPath("a", "a.h");
+  CreateTmpFile("", aah);
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  args.push_back("-I.");
+  args.push_back("-Ia");
+  args.push_back("-c");
+  args.push_back(source_file);
+
+  std::set<string> expected {
+    ah,
+    file::JoinPath(".", "a.h"),
+    aah
+  };
+
+  std::set<string> files = RunIncludeProcessor(source_file, args);
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, include_quote_from_current) {
+  const string& bare_gcc = "/usr/bin/g++";
+
+  const string& source_file = CreateTmpFile(
+      "#include \"a.h\"\n",
+      file::JoinPath("a", "a.cc"));
+  const string& aah = CreateTmpFile(
+      "",
+      file::JoinPath("a", "a.h"));
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  args.push_back("-c");
+  args.push_back(source_file);
+
+  std::set<string> expected;
+  expected.insert(aah);
+
+  std::set<string> files = RunIncludeProcessor(source_file, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, include_sibling) {
+  const string& bare_gcc = "/usr/bin/g++";
+
+  const string& source_file = CreateTmpFile(
+      "#include \"../b/b.h\"\n",
+      file::JoinPath("a", "a.cc"));
+  const string& bbh = CreateTmpFile(
+      "",
+      file::JoinPath("a", "..", "b", "b.h"));
+
+  std::vector<string> args;
+  args.push_back(bare_gcc);
+  args.push_back("-c");
+  args.push_back(source_file);
+
+  std::set<string> expected;
+  expected.insert(bbh);
+
+  std::set<string> files = RunIncludeProcessor(source_file, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+#ifdef __MACH__
+
+TEST_F(IncludeProcessorTest, curdir_framework) {
+  // b/31843347
+  CreateTmpDir("EarlGrey.framework");
+  CreateTmpDir("EarlGrey.framework/Headers");
+  CreateTmpFile("", "EarlGrey.framework/Headers/EarlGrey.h");
+
+  std::vector<string> args;
+  args.push_back("-F");
+  args.push_back(".");
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#import <EarlGrey/EarlGrey.h>\n", "foo.mm"),
+          args);
+}
+
+TEST_F(IncludeProcessorTest, sub_framework) {
+  // b/23128924
+  std::vector<string> args;
+  RunTest("/usr/bin/gcc",
+          CreateTmpFile("#include <Accelerate/Accelerate.h>", "foo.cc"),
+          args);
+}
+#endif
+
+TEST_F(IncludeProcessorTest, include_from_dir) {
+  const string& ac = file::JoinPath("test", "a.c");
+  CreateTmpFile("#include \"a.h\"\n", ac);
+
+  const string& ah = file::JoinPath("test", "a.h");
+  CreateTmpFile("", ah);
+
+  std::vector<string> args {"/usr/bin/gcc", "-c", ac};
+  std::set<string> expected {ah};
+
+  std::set<string> files = RunIncludeProcessor(ac, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, include_from_dir_in_include_dir) {
+  const string& ac = "a.c";
+  CreateTmpFile("#include <test/a.h>\n", ac);
+
+  const string& ah = file::JoinPath(".", "test", "a.h");
+  CreateTmpFile("#include \"b.h\"", ah);
+
+  const string& bh = file::JoinPath(".", "test", "b.h");
+  CreateTmpFile("", bh);
+
+  std::vector<string> args {"/usr/bin/gcc", "-I.", "-c", ac};
+  std::set<string> expected {ah, bh};
+
+  std::set<string> files = RunIncludeProcessor(ac, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, include_from_abs_rel_include_dir) {
+  const string& ac = "a.c";
+  CreateTmpFile("#include <abs.h>\n"
+                "#include <rel.h>\n",
+                ac);
+
+  const string& relh = file::JoinPath("rel", "rel.h");
+  CreateTmpFile("", relh);
+
+  const string& absh = CreateTmpFile("", file::JoinPath("abs", "abs.h"));
+
+  std::vector<string> args {"/usr/bin/gcc", "-Irel",
+        "-I" + tmpdir_util_->FullPath("abs"), "-c", ac};
+  std::set<string> expected {relh, absh};
+
+  std::set<string> files = RunIncludeProcessor(ac, args);
+  ASSERT_EQ(expected.size(), files.size());
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, include_guard_once_alias) {
+  const string& ac = file::JoinPath("a", "a.c");
+  CreateTmpFile("#include \"../b/b.h\"\n", ac);
+
+  const string& bh = file::JoinPath("a", "..", "b", "b.h");
+  CreateTmpFile("#pragma once\n"
+                "#include \"../b/b.h\"\n",
+                bh);
+
+  std::vector<string> args {"/usr/bin/gcc", "-c", ac};
+  std::set<string> expected {bh};
+
+  std::set<string> files = RunIncludeProcessor(ac, args);
+  EXPECT_EQ(expected, files);
+}
+
+TEST_F(IncludeProcessorTest, undef_content) {
+  const string& inc = file::JoinPath(".", "inc.h");
+  CreateTmpFile(R"(
+#define THIS FILE
+#include THIS
+#undef THIS
+)", inc);
+
+  const string& ac = file::JoinPath(".", "a.c");
+  CreateTmpFile(R"(
+#define FILE "a.h"
+#include "inc.h"
+#undef FILE
+#define FILE "b.h"
+#include "inc.h"
+)", ac);
+
+  const string& ah = file::JoinPath(".", "a.h");
+  const string& bh = file::JoinPath(".", "b.h");
+  CreateTmpFile("", ah);
+  CreateTmpFile("", bh);
+
+  std::vector<string> args {"/usr/bin/gcc", "-c", ac};
+  std::set<string> expected {inc, ah, bh};
+
+  std::set<string> files = RunIncludeProcessor(ac, args);
+  EXPECT_EQ(expected, files);
+}
+
+}  // namespace devtools_goma
diff --git a/client/ioutil.cc b/client/ioutil.cc
new file mode 100644
index 0000000..d3c0977
--- /dev/null
+++ b/client/ioutil.cc
@@ -0,0 +1,541 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "ioutil.h"
+
+#ifndef _WIN32
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#else
+# include "config_win.h"
+#endif
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "file.h"
+#include "file_dir.h"
+#include "file_id.h"
+#include "glog/logging.h"
+#include "path_util.h"
+#include "scoped_fd.h"
+#include "split.h"
+#include "string_piece_utils.h"
+
+using std::string;
+
+namespace {
+
+// Come from Python 2.7 string.whitespace.
+static const char* kWhitespaces = "\t\n\x0b\x0c\r ";
+
+}  // namespace
+
+namespace devtools_goma {
+
+// Parse HTTP request and response headers and return offset into body
+// and content-length. Content-Length may be missing, and in that case
+// content_length will be set to string::npos.
+// TODO: to be more conformant to the http standard
+bool FindContentLengthAndBodyOffset(
+    StringPiece data, size_t *content_length, size_t *body_offset,
+    bool *is_chunked) {
+  const char kContentLength[] = "Content-Length: ";
+  const char kTransferEncoding[] = "Transfer-Encoding: ";
+  const char kChunked[] = "chunked";
+  const char kCrlf[] = "\r\n";
+  const StringPiece::size_type content_length_pos = data.find(kContentLength);
+  const StringPiece::size_type transfer_encoding_pos =
+      data.find(kTransferEncoding);
+  const StringPiece::size_type response_body = data.find("\r\n\r\n");
+
+  if (response_body == StringPiece::npos) {
+    LOG(ERROR) << "GOMA: Invalid, missing CRLFCRLF";
+    return false;
+  }
+  *body_offset = response_body + 4;
+
+  if (content_length_pos == StringPiece::npos) {
+    // Content-Length does not exist for GET requests. This might be
+    // such request. If so, assume the header is short and return here.
+    *content_length = string::npos;
+  } else  if (content_length_pos >= response_body) {
+    // The content_length string is not in the header, but in the
+    // payload. That means we don't have Content-Length, and we don't
+    // know how much further we should read.
+    *content_length = string::npos;
+  } else {
+    StringPiece lenstr =
+        data.substr(content_length_pos + strlen(kContentLength));
+    *content_length = atoi(string(lenstr).c_str());
+  }
+
+  if (is_chunked != nullptr) {
+    if (transfer_encoding_pos == StringPiece::npos) {
+      // Transfer-Encoding does not exist for GET requests.
+      *is_chunked = false;
+    } else if (transfer_encoding_pos >= response_body) {
+      // The Transfer-Encoding string is not in the header.
+      *is_chunked = false;
+    } else {
+      // The Transfer-Encoding string is in the header.
+      // We should check its value is "chunked" or not.
+      StringPiece transfer_encoding_value = data.substr(
+          transfer_encoding_pos + strlen(kTransferEncoding));
+      StringPiece::size_type value_end = transfer_encoding_value.find(kCrlf);
+      transfer_encoding_value = StringStrip(
+          transfer_encoding_value.substr(0, value_end));
+      if (transfer_encoding_value == kChunked) {
+        *is_chunked = true;
+      } else {
+        *is_chunked = false;
+      }
+    }
+  }
+
+  return true;
+}
+
+StringPiece StringRstrip(StringPiece str) {
+  size_t found = str.find_last_not_of(kWhitespaces);
+  if (found != string::npos)
+    return str.substr(0, found + 1);
+  return str.substr(str.size(), 0);  // empty string piece.
+}
+
+StringPiece StringStrip(StringPiece str) {
+  StringPiece::size_type found = str.find_last_not_of(kWhitespaces);
+  if (found == StringPiece::npos)
+    return str.substr(str.size(), 0);  // empty string piece.
+  str = str.substr(0, found + 1);
+  found = str.find_first_not_of(kWhitespaces);
+  return str.substr(found);
+}
+
+void WriteStringToFileOrDie(const string &data, const string &filename,
+                            int permission) {
+  ScopedFd fd(ScopedFd::Create(filename, permission));
+  if (!fd.valid()) {
+    PLOG(FATAL) << "GOMA: failed to open " << filename;
+  }
+  if (fd.Write(data.c_str(), data.size()) !=
+      static_cast<ssize_t>(data.size())) {
+    PLOG(FATAL) << "GOMA: Cannot write to file " << filename;
+  }
+}
+
+void AppendStringToFileOrDie(const string &data, const string &filename,
+                             int permission) {
+  ScopedFd fd(ScopedFd::OpenForAppend(filename, permission));
+  if (!fd.valid()) {
+    PLOG(FATAL) << "GOMA: failed to open " << filename;
+  }
+  if (fd.Write(data.c_str(), data.size()) !=
+      static_cast<ssize_t>(data.size())) {
+    PLOG(FATAL) << "GOMA: Cannot write to file " << filename;
+  }
+}
+
+void WriteStdout(StringPiece data) {
+#ifdef _WIN32
+  HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
+  DWORD bytes_written = 0;
+  if (!WriteFile(stdout_handle,
+                 data.data(), data.size(),
+                 &bytes_written, nullptr)) {
+    LOG_SYSRESULT(GetLastError());
+  }
+#else
+  std::cout << data << std::flush;
+#endif
+}
+
+void WriteStderr(StringPiece data) {
+#ifdef _WIN32
+  HANDLE stderr_handle = GetStdHandle(STD_ERROR_HANDLE);
+  DWORD bytes_written = 0;
+  if (!WriteFile(stderr_handle,
+      data.data(), data.size(),
+      &bytes_written, nullptr)) {
+    LOG_SYSRESULT(GetLastError());
+  }
+#else
+  std::cerr << data;
+#endif
+}
+
+void FlushLogFiles() {
+#ifndef GLOG_NO_ABBREVIATED_SEVERITIES
+  google::FlushLogFiles(google::INFO);
+#else
+  google::FlushLogFiles(google::GLOG_INFO);
+#endif
+}
+
+void GetBaseDir(const string& filepath, string* base_dir) {
+#ifndef _WIN32
+  const char SEP = '/';
+#else
+  const char SEP = '\\';
+#endif
+  size_t i = filepath.rfind(SEP);
+  if (i == string::npos) {
+    *base_dir = ".";
+  } else {
+    while (i > 0 && filepath[i - 1] == SEP) {
+      i--;
+    }
+    *base_dir = filepath.substr(0, i + 1);
+  }
+}
+
+string GetCurrentDirNameOrDie(void) {
+#ifndef _WIN32
+  // get_cwd() returns the current resolved directory. However, a compiler is
+  // taking PWD as current working directory. PWD might contain unresolved
+  // directory.
+  // We don't return /proc/self/cwd if it is set in PWD, since the corresponding
+  // directory is different among gomacc and compiler_proxy.
+  // See also: b/37259278
+
+  const char* pwd = getenv("PWD");
+  if (pwd != nullptr && IsPosixAbsolutePath(pwd) &&
+      !HasPrefixDir(pwd, "/proc/self/cwd")) {
+    // Align with llvm current_path().
+    // llvm checking PWD id and "." id are the same.
+    FileId pwd_id(pwd);
+    FileId dot_id(".");
+    if (pwd_id.IsValid() && dot_id.IsValid() &&
+        pwd_id.is_directory && pwd_id == dot_id) {
+      return pwd;
+    }
+  }
+
+  char *dir = getcwd(nullptr, 0);
+  CHECK(dir) << "GOMA: Cannot find current directory ";
+  string dir_str(dir);
+  free(dir);
+  return dir_str;
+#else
+  char dir[PATH_MAX];
+  CHECK_NE(GetCurrentDirectoryA(PATH_MAX, dir), (DWORD)0) <<
+      "GOMA: Cannot find current directory: " << GetLastError();
+  string dir_str(dir);
+  return dir_str;
+#endif
+}
+
+// Parse the HTTP response header.
+// Return true if it got whole header, or error response.
+// Return false if it needs more data.
+bool ParseHttpResponse(StringPiece response,
+                       int* http_status_code,
+                       size_t* offset,
+                       size_t* content_length,
+                       bool* is_chunked) {
+  *http_status_code = 0;
+  *offset = 0;
+  *content_length = string::npos;
+  if (is_chunked != nullptr)
+    *is_chunked = false;
+
+  // Check the return code from server. It should be "HTTP/1.? 200 OK\r\n"
+  const char kHttpHeader[] = "HTTP/1.";
+  // + 2 for the minor version and + 4 for status code.
+  if (response.size() < strlen(kHttpHeader) + 2 + 4)
+    return false;
+
+  if (strncmp(response.data(), kHttpHeader, strlen(kHttpHeader)) != 0) {
+    LOG(ERROR) << kHttpHeader << " expected, but got "
+               << string(response.data(), strlen(kHttpHeader));
+    return true;
+  }
+
+  StringPiece codestr = response.substr(strlen(kHttpHeader) + 2);
+  *http_status_code = atoi(string(codestr).c_str());
+  if (*http_status_code != 200 && *http_status_code != 204)
+    return true;
+
+  if (!FindContentLengthAndBodyOffset(response, content_length, offset,
+                                      is_chunked)) {
+    return false;
+  }
+
+  VLOG(3) << "HTTP header=" << response.substr(0, *offset);
+  if (is_chunked != nullptr && *is_chunked) {
+    return true;
+  }
+
+  if (*content_length == string::npos) {
+    return true;
+  }
+
+  if (response.size() < *offset + *content_length) {
+    // if response size is too small, there was some network error.
+    return false;
+  }
+  return true;
+}
+
+void DeleteRecursivelyOrDie(const string& dirname) {
+  CHECK(RecursivelyDelete(dirname)) << dirname;
+}
+
+string EscapeString(const string& str) {
+  std::stringstream escaped_str;
+  escaped_str << "\"";
+  for (size_t i = 0; i < str.size(); ++i) {
+    switch (str[i]) {
+      case '"': escaped_str << "\\\""; break;
+      case '\\': escaped_str << "\\\\"; break;
+      case '\b': escaped_str << "\\b"; break;
+      case '\f': escaped_str << "\\f"; break;
+      case '\n': escaped_str << "\\n"; break;
+      case '\r': escaped_str << "\\r"; break;
+      case '\t': escaped_str << "\\t"; break;
+      case '\033':
+        {
+          // handle escape sequence.
+          // ESC[1m  -> bold
+          // ESC[0m  -> reset
+          // ESC[0;<bold><fgbg><color>m -> foreground
+          //  <bold> "1;" or ""
+          //  <fgbg> "3" foreground or "4" background
+          //  <color> 0 black / 1 red / 2 green / 4 blue
+          // For now, just ignore these escape sequence.
+          size_t next_i = i;
+          size_t j = i;
+          if (j + 2 < str.size() && str[j + 1] == '[') {
+            for (j += 2; j < str.size(); ++j) {
+              if (str[j] == ';' || (isdigit(str[j])))
+                continue;
+              if (str[j] == 'm')
+                next_i = j;
+              break;
+            }
+          }
+          if (next_i != i) {
+            i = next_i;
+            break;
+          }
+        }
+        FALLTHROUGH_INTENDED;
+      default:
+        if (str[i] < 0x20) {
+          escaped_str << "\\u" << std::hex << std::setw(4)
+                      << std::setfill('0') << static_cast<int>(str[i]);
+        } else {
+          escaped_str << str[i];
+        }
+    }
+  }
+  escaped_str << "\"";
+  return escaped_str.str();
+}
+
+string SimpleEncodeChartData(const std::vector<double>& value, double max) {
+  std::ostringstream ss;
+  for (const auto& iter : value) {
+    int v = static_cast<int>(62 * iter / max);
+    if (v < 0) {
+      ss << "_";
+    } else if (v < 26) {
+      ss << static_cast<char>('A' + v);
+    } else if (v < 52) {
+      ss << static_cast<char>('a' + v - 26);
+    } else if (v < 62) {
+      ss << static_cast<char>('0' + v - 52);
+    } else {
+      ss << "9";
+    }
+  }
+  return ss.str();
+}
+
+// Parse chunked transfer coding.
+// You SHOULD NOT indicates trailers in a TE header of a request since we do
+// not expect important headers in the trailers.  In other words, we just
+// discard trailers.
+//
+// Reference: RFC2616 3.6.1 Chunked Transfer Coding.
+bool ParseChunkedBody(StringPiece response,
+                      size_t offset,
+                      size_t* remaining_chunk_length,
+                      std::vector<StringPiece>* chunks) {
+  size_t head = offset;
+  *remaining_chunk_length = string::npos;
+  chunks->clear();
+
+  if (head > response.size()) {
+    LOG(ERROR) << "Given offset is shorter than response length."
+               << " response_len=" << response.size()
+               << " offset=" << offset;
+    return true;
+  }
+
+  while (head < response.size()) {
+    if (!isxdigit(response[head])) {
+      LOG(ERROR) << "Expected hexdigit but got:" << (int)response[head];
+      LOG(ERROR) << " response_len=" << response.size()
+                 << " head=" << head;
+      LOG(ERROR) << "broken chunk:" << response;
+      return true;
+    }
+    char *endptr;
+    const unsigned long chunk_length =
+        strtoul(response.data() + head, &endptr, 16);
+    if (endptr >= response.data() + response.size()) {
+      // reached the end of response.
+      *remaining_chunk_length = chunk_length + 4;
+      return false;
+    } else if (*endptr != '\r' && *endptr != ';') {
+      LOG(ERROR) << "Unexpected character after length:"
+                 << *endptr;
+      return true;
+    }
+
+    if (chunk_length == 0) {  // last chunk.
+      VLOG(2) << "Found last-chunk.";
+      // Confirm the remaining of resp should be like:
+      // 0; chunk-extension CRLF
+      // trailer
+      // CRLF
+
+      // skip chunk-extension.
+      StringPiece::size_type crlf_pos = response.find("\r\n", head);
+      if (crlf_pos == StringPiece::npos) {
+        // need more data.
+        // 4 comes from \r\n<trailer (which can be omitted)>\r\n.
+        *remaining_chunk_length = 4;
+        return false;
+      }
+
+      head = crlf_pos + 2;
+
+      // skip trailer.
+      while (head < response.size()) {
+        // incomplete CR after trailer headers
+        if (response.substr(head) == "\r") {
+          *remaining_chunk_length = 1;
+          return false;
+        }
+
+        // CRLF after trailer headers
+        if (response.substr(head) == "\r\n") {
+          *remaining_chunk_length = 0;
+          return true;
+        }
+
+        crlf_pos = response.find("\r\n", head);
+
+        if (crlf_pos == StringPiece::npos) {
+          // incomplete trailer header ends with CR
+          if (strings::EndsWith(response, "\r")) {
+            *remaining_chunk_length = 3;
+            return false;
+          }
+
+          // incomplete trailer header not include CRLF
+          *remaining_chunk_length = 4;
+          return false;
+        }
+
+        LOG(WARNING) << "Ignoring Chunked Transfer Coding trailer: "
+                     << response.substr(head, crlf_pos - head);
+        head = crlf_pos + 2;
+      }
+
+      // need one more CRLF after trailer headers
+      *remaining_chunk_length = 2;
+      return false;
+    }
+
+    VLOG(2) << "resp len:" << response.size()
+            << ", head:" << head
+            << ", chunk_len:" << chunk_length;
+    // skip chunk-extension.
+    StringPiece::size_type crlf_pos = response.find("\r\n", head);
+    if (crlf_pos == StringPiece::npos) {
+      // need more data.
+      // 4 comes from \r\n<chunk>\r\n.
+      *remaining_chunk_length = chunk_length + 4;
+      return false;
+    } else if (response.size() < crlf_pos + chunk_length + 4) {
+      // need more data.
+      // 4 comes from \r\n<chunk>\r\n.
+      *remaining_chunk_length = crlf_pos + chunk_length + 4 - response.size();
+      return false;
+    }
+
+    head = crlf_pos + 2;
+    chunks->push_back(response.substr(head, chunk_length));
+    if (strncmp(response.data() + head + chunk_length, "\r\n", 2)) {
+      LOG(ERROR) << "chunk does not end with expected CRLF.:"
+                 << "Actual: " << response.substr(head, 2);
+      return true;
+    }
+    head += chunk_length + 2;
+  }
+  // Need more data.  However, I do not know how much remains.
+  // All chunks has read but last chunk's size is not 0.
+  // This means at least one chunk will come.
+  // 0;<chunk-extension>\r\n<trailers>\r\n.
+  *remaining_chunk_length = 5;
+  return false;
+}
+
+string CombineChunks(const std::vector<StringPiece>& chunks) {
+  string dechunked;
+  for (const auto& it : chunks) {
+    dechunked.append(it.data(), it.size());
+  }
+  return dechunked;
+}
+
+std::map<string, string> ParseQuery(const string& query) {
+  std::map<string, string> params;
+  if (query.empty()) {
+    return params;
+  }
+  string query_str = query;
+  size_t pos = query_str.find('#');
+  if (pos != string::npos) {
+    query_str = query.substr(0, pos);
+  }
+  std::vector<string> q = strings::Split(query_str, "&");
+  for (const auto& p : q) {
+    if (p.empty()) {
+      continue;
+    }
+    size_t i = p.find('=');
+    if (i == string::npos) {
+      params.insert(make_pair(p, ""));
+      continue;
+    }
+    string k = p.substr(0, i);
+    string v = p.substr(i + 1);
+    // TODO: url decode?
+    params.insert(make_pair(k, v));
+  }
+  return params;
+}
+
+}  // namespace devtools_goma
diff --git a/client/ioutil.h b/client/ioutil.h
new file mode 100644
index 0000000..2d8f107
--- /dev/null
+++ b/client/ioutil.h
@@ -0,0 +1,114 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_IOUTIL_H_
+#define DEVTOOLS_GOMA_CLIENT_IOUTIL_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "string_piece.h"
+#ifdef _WIN32
+#include "socket_helper_win.h"
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+const int kBufSize = 1024 * 32;
+const int kReadSelectTimeoutSec = 20;
+
+class ScopedSocket;
+
+// Removes tailing spaces from |str|.
+StringPiece StringRstrip(StringPiece str);
+
+// Removes leading and tailing spaces from |str|.
+StringPiece StringStrip(StringPiece str);
+
+void WriteStringToFileOrDie(const string &data, const string &filename,
+                            int permission);
+
+void AppendStringToFileOrDie(const string &data, const string &filename,
+                             int permission);
+
+// Win32 std::cout, std::cerr open as text mode, so cout << "foo\r\n" emits
+// "foo\r\r\n".  It is not ninja friendly.
+// b/6617503
+void WriteStdout(StringPiece data);
+void WriteStderr(StringPiece data);
+
+void FlushLogFiles();
+
+// Get current directory.
+string GetCurrentDirNameOrDie(void);
+
+// Get base directory path of the given |filepath|.
+void GetBaseDir(const string& filepath, string* base_dir);
+
+// Parse the HTTP response header.
+// Return true if it got all header, or error response.
+// Return false if it needs more data.
+//
+// In case of returning true with error, |http_status_code| will not be
+// 200 or 204.  You must not use other fields in such a case.
+//
+// If returning true without error, followings could be set:
+// |http_status_code| represents HTTP status code.
+// |offset| represents offset where HTTP body starts.
+// |content_length| represents value of Content-Length header if exists.
+// If no Content-Length header found in the header, |content_length| is set to
+// string::npos.
+// |is_chunked| become true if HTTP response is sent with chunked transfer
+// encoding. Note that the function will not check chunked transfer coding
+// if |is_chunked| == NULL.
+bool ParseHttpResponse(StringPiece response,
+                       int* http_status_code,
+                       size_t* offset,
+                       size_t* content_length,
+                       bool* is_chunked);
+
+// Parse HTTP request and response headers and return offset into body
+// and content-length. Content-Length may be missing, and in that case
+// content_length will be set to string::npos.
+// If data is encoded with chunked transfer encoding, is_chunked will be
+// set to true.
+//
+// Do not check chunked transfer coding if is_chunked == NULL.
+bool FindContentLengthAndBodyOffset(
+    StringPiece data, size_t *content_length, size_t *body_offset,
+    bool *is_chunked);
+
+void DeleteRecursivelyOrDie(const string& dirname);
+
+string EscapeString(const string& str);
+
+// http://code.google.com/apis/chart/docs/data_formats.html#simple
+string SimpleEncodeChartData(const std::vector<double>& value, double max);
+
+// Parse body encoded with chunked transfer coding.
+// Return true if whole chunks parsed, or error.
+// Return false if it needs more data.
+//
+// remaining_chunk_length:
+// - 0: success (returns true).
+// - string::npos: error (returns true).
+// - otherwise, need more data (returns false).
+//
+// chunks is set only when ParseChunkedBody returns true and
+// *remaining_chunk_length == 0.
+bool ParseChunkedBody(StringPiece response,
+                      size_t offset, size_t* remaining_chunk_length,
+                      std::vector<StringPiece>* chunks);
+
+string CombineChunks(const std::vector<StringPiece>& chunks);
+
+std::map<string, string> ParseQuery(const string& query);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_IOUTIL_H_
diff --git a/client/ioutil_parse_http_response_fuzzer.cc b/client/ioutil_parse_http_response_fuzzer.cc
new file mode 100644
index 0000000..06954ae
--- /dev/null
+++ b/client/ioutil_parse_http_response_fuzzer.cc
@@ -0,0 +1,20 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+
+#include "ioutil.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  std::string input(reinterpret_cast<const char*>(data), size);
+
+  int http_status_code;
+  size_t offset;
+  size_t content_length;
+  bool is_chunked;
+  devtools_goma::ParseHttpResponse(
+      input, &http_status_code, &offset, &content_length, &is_chunked);
+
+  return 0;
+}
diff --git a/client/ioutil_unittest.cc b/client/ioutil_unittest.cc
new file mode 100644
index 0000000..a9b7ebb
--- /dev/null
+++ b/client/ioutil_unittest.cc
@@ -0,0 +1,767 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "ioutil.h"
+
+#include <memory>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "unittest_util.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+#if GTEST_HAS_DEATH_TEST
+TEST(IoutilTest, WriteStringToFileOrDieCrash) {
+#ifndef _WIN32
+  string not_exists = "/tmp/you_may_not_have_this_dir/foo/bar/baz";
+  EXPECT_DEATH(WriteStringToFileOrDie("fuga", not_exists, 0666),
+               "No such file");
+#else
+  string not_exists = "K:\\tmp\\you_may_not_have_this_dir\\foo\\bar\\baz";
+  EXPECT_DEATH(WriteStringToFileOrDie("fuga", not_exists, 0666), "");
+#endif
+}
+#endif  // GTEST_HAS_DEATH_TEST
+
+#ifdef _WIN32
+TEST(IoutilTest, DeleteRecursivelyOrDieCrash) {
+  char tmp_dir[PATH_MAX], first_dir[PATH_MAX];
+  GetTempPathA(PATH_MAX, tmp_dir);
+  if (tmp_dir[strlen(tmp_dir) - 1] == '\\') {
+    tmp_dir[strlen(tmp_dir) - 1] = 0;
+  }
+  sprintf_s(first_dir, PATH_MAX, "%s\\ioutils_unittest_%d",
+            tmp_dir, GetCurrentProcessId());
+  CreateDirectoryA(first_dir, nullptr);
+  string second_dir = first_dir;
+  second_dir += "\\foo";
+  CreateDirectoryA(second_dir.c_str(), nullptr);
+  string file = second_dir;
+  file += "\\something.txt";
+  FILE* fp = nullptr;
+  EXPECT_EQ(0, fopen_s(&fp, file.c_str(), "w"));
+  EXPECT_TRUE(fp != nullptr);
+  fputs("bar", fp);
+  fflush(fp);
+  fclose(fp);
+  // Shall not die here
+  DeleteRecursivelyOrDie(first_dir);
+  // Shall die here
+  EXPECT_DEATH(DeleteRecursivelyOrDie(first_dir), "");
+}
+#endif
+
+#ifndef _WIN32
+TEST(IoutilTest, GetCurrentDirNameOrDie) {
+  // NOTE: '1' in setenv mean overwrite.
+
+  std::unique_ptr<char, decltype(&free)> original_env_pwd(nullptr, free);
+  std::unique_ptr<char, decltype(&free)> original_cwd(nullptr, free);
+  {
+    const char* pwd = getenv("PWD");
+    if (pwd != nullptr) {
+      original_env_pwd.reset(strdup(pwd));
+    }
+
+    // Assuming we can obtain the resolved absolute cwd.
+    original_cwd.reset(getcwd(nullptr, 0));
+    ASSERT_NE(original_cwd.get(), nullptr);
+  }
+
+  // When PWD is invalid place, it should not be used.
+  {
+    ASSERT_EQ(setenv("PWD", "/somewhere/invalid/place", 1), 0);
+    std::string cwd = GetCurrentDirNameOrDie();
+    EXPECT_NE("/somewhere/invalid/place", cwd);
+    // should be the same as getcwd.
+    EXPECT_EQ(original_cwd.get(), cwd);
+  }
+
+  // When PWD is /proc/self/cwd, it should not be used.
+  // Since the meaning of /proc/self/cwd is different among gomacc and
+  // compiler_proxy, we should not use /proc/self/cwd.
+  {
+    ASSERT_EQ(setenv("PWD", "/proc/self/cwd", 1), 0);
+    std::string cwd = GetCurrentDirNameOrDie();
+    EXPECT_NE("/proc/self/cwd", cwd);
+    // should be the same as getcwd.
+    EXPECT_EQ(original_cwd.get(), cwd);
+  }
+
+  {
+    TmpdirUtil tmpdir("ioutil_tmpdir");
+    // TODO: TmpdirUtil does not make cwd. why?
+    tmpdir.MkdirForPath(tmpdir.cwd(), true);
+
+    // Make a symlink $tmpdir_cwd/cwd --> real cwd.
+    std::string newpath = tmpdir.FullPath("cwd");
+    ASSERT_EQ(symlink(original_cwd.get(), newpath.c_str()), 0)
+        << "from=" << newpath << " to=" << original_cwd.get();
+    ASSERT_NE(original_cwd.get(), newpath);
+
+    // set PWD as new path. Then the new path should be taken.
+    setenv("PWD", newpath.c_str(), 1);
+    std::string cwd = GetCurrentDirNameOrDie();
+    EXPECT_EQ(cwd, newpath);
+
+    // Need to unlink symlink. Otherwise. TmpdirUtil will recursively delete
+    // the current working directory. Awful (>x<).
+    ASSERT_EQ(unlink(newpath.c_str()), 0);
+  }
+
+  // ----- tear down the test for the safe.
+  if (original_env_pwd) {
+    setenv("PWD", original_cwd.get(), 1);
+  } else {
+    unsetenv("PWD");
+  }
+}
+#endif
+
+TEST(IoutilTest, FindContentLengthAndBodyOffset) {
+  string data = "HTTP/1.1 200 OK\r\nContent-Length: 5\r\n\r\nH";
+  size_t body_offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_TRUE(FindContentLengthAndBodyOffset(
+      data, &content_length, &body_offset, &is_chunked));
+  EXPECT_EQ(data.size() - 1, body_offset);
+  EXPECT_EQ(5UL, content_length);
+  EXPECT_FALSE(is_chunked);
+
+  data = "GET / HTTP/1.1\r\nContent-Length: 5\r\n\r\nH";
+  EXPECT_TRUE(FindContentLengthAndBodyOffset(
+      data, &content_length, &body_offset, &is_chunked));
+  EXPECT_EQ(data.size() - 1, body_offset);
+  EXPECT_EQ(5UL, content_length);
+  EXPECT_FALSE(is_chunked);
+}
+
+TEST(IoutilTest, FindContentLengthAndBodyOffsetInHeader) {
+  string data = "HTTP/1.1 200 OK\r\nContent-Length: 5\r\nH";
+  size_t body_offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_FALSE(FindContentLengthAndBodyOffset(
+      data, &content_length, &body_offset, &is_chunked));
+  EXPECT_EQ(string::npos, body_offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+
+  data = "GET / HTTP/1.1\r\nContent-Length: 5\r\nH";
+  EXPECT_FALSE(FindContentLengthAndBodyOffset(
+      data, &content_length, &body_offset, &is_chunked));
+  EXPECT_EQ(string::npos, body_offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+}
+
+TEST(IoutilTest, FindContentLengthAndBodyOffsetNoLength) {
+  string data = "HTTP/1.1 200 OK\r\nHost: example.com\r\n\r\nH";
+  size_t body_offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_TRUE(FindContentLengthAndBodyOffset(
+      data, &content_length, &body_offset, &is_chunked));
+  EXPECT_EQ(data.size() - 1, body_offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+
+  data = "HTTP/1.1 200 Ok\r\nHost: example.com\r\n\r\n"
+      "Content-Length: 10";
+  EXPECT_TRUE(FindContentLengthAndBodyOffset(
+      data, &content_length, &body_offset, &is_chunked));
+  EXPECT_EQ(data.size() - strlen("Content-Length: 10"), body_offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+
+  data = "GET / HTTP/1.1\r\nHost: example.com\r\n\r\nH";
+  EXPECT_TRUE(FindContentLengthAndBodyOffset(
+      data, &content_length, &body_offset, &is_chunked));
+  EXPECT_EQ(data.size() - 1, body_offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+
+  data = "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n"
+      "Content-Length: 10";
+  EXPECT_TRUE(FindContentLengthAndBodyOffset(
+      data, &content_length, &body_offset, &is_chunked));
+  EXPECT_EQ(data.size() - strlen("Content-Length: 10"), body_offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+}
+
+TEST(IoutilTest, FindContentLengthAndBodyOffsetChunked) {
+  string data = "HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n1";
+  size_t body_offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_TRUE(FindContentLengthAndBodyOffset(
+      data, &content_length, &body_offset, &is_chunked));
+  EXPECT_EQ(data.size() - 1, body_offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_TRUE(is_chunked);
+
+  data = "GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n1";
+  EXPECT_TRUE(FindContentLengthAndBodyOffset(
+      data, &content_length, &body_offset, &is_chunked));
+  EXPECT_EQ(data.size() - 1, body_offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_TRUE(is_chunked);
+}
+
+TEST(IoutilTest, ParseHttpResponse) {
+  string response = "HTTP/1.1 200 OK\r\nContent-Length: 5\r\n\r\nHello";
+  int http_status_code = 0;
+  size_t offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_TRUE(ParseHttpResponse(response, &http_status_code,
+                                &offset, &content_length, &is_chunked));
+  EXPECT_EQ(200, http_status_code);
+  EXPECT_EQ(response.size() - strlen("Hello"), offset);
+  EXPECT_EQ(5UL, content_length);
+  EXPECT_FALSE(is_chunked);
+}
+
+TEST(IoutilTest, ParseHttpResponseInStatusLine) {
+  string response = "H";
+  int http_status_code = 0;
+  size_t offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_FALSE(ParseHttpResponse(response, &http_status_code,
+                                 &offset, &content_length, &is_chunked));
+  EXPECT_EQ(0, http_status_code);
+  response = "HTTP/1.1 ";
+  EXPECT_FALSE(ParseHttpResponse(response, &http_status_code,
+                                 &offset, &content_length, &is_chunked));
+  EXPECT_EQ(0, http_status_code);
+  response = "HTTP/1.1 200 Ok\r\n";
+  EXPECT_FALSE(ParseHttpResponse(response, &http_status_code,
+                                 &offset, &content_length, &is_chunked));
+  EXPECT_EQ(200, http_status_code);
+
+  response = "HTTP/1.1 204 Ok\r\n";
+  EXPECT_FALSE(ParseHttpResponse(response, &http_status_code,
+                                 &offset, &content_length, &is_chunked));
+  EXPECT_EQ(204, http_status_code);
+}
+
+TEST(IoutilTest, ParseHttpResponseBadStatus) {
+  string response = "220 localhost ESMTP";
+  int http_status_code = 0;
+  size_t offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_TRUE(ParseHttpResponse(response, &http_status_code,
+                                &offset, &content_length, &is_chunked));
+  EXPECT_EQ(0, http_status_code);
+  EXPECT_EQ(0UL, offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+
+  response = "HTTP/1.1 301 Moved Parmenently\r\n";
+  EXPECT_TRUE(ParseHttpResponse(response, &http_status_code,
+                                &offset, &content_length, &is_chunked));
+  EXPECT_EQ(301, http_status_code);
+  EXPECT_EQ(0UL, offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+
+  response = "HTTP/1.1 403 Forbidden\r\n";
+  EXPECT_TRUE(ParseHttpResponse(response, &http_status_code,
+                                &offset, &content_length, &is_chunked));
+  EXPECT_EQ(403, http_status_code);
+  EXPECT_EQ(0UL, offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+
+  response = "HTTP/1.1 502 Bad Gateway\r\n";
+  EXPECT_TRUE(ParseHttpResponse(response, &http_status_code,
+                                &offset, &content_length, &is_chunked));
+  EXPECT_EQ(502, http_status_code);
+  EXPECT_EQ(0UL, offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+}
+
+TEST(IoutilTest, ParseHttpResponseInHeader) {
+  string response = "HTTP/1.1 200 Ok\r\nHost: example.com";
+  int http_status_code = 0;
+  size_t offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_FALSE(ParseHttpResponse(response, &http_status_code,
+                                 &offset, &content_length, &is_chunked));
+  EXPECT_EQ(200, http_status_code);
+  EXPECT_EQ(0UL, offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+
+  response = "HTTP/1.1 200 Ok\r\nHost: example.com\r\nContent-Length: 5\r\n";
+  EXPECT_FALSE(ParseHttpResponse(response, &http_status_code,
+                                 &offset, &content_length, &is_chunked));
+  EXPECT_EQ(200, http_status_code);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_FALSE(is_chunked);
+
+  response = "HTTP/1.1 200 Ok\r\nHost: example.com\r\n"
+      "Content-Length: 5\r\n\r\n";
+  EXPECT_FALSE(ParseHttpResponse(response, &http_status_code,
+                                 &offset, &content_length, &is_chunked));
+  EXPECT_EQ(200, http_status_code);
+  EXPECT_EQ(response.size(), offset);
+  EXPECT_EQ(5UL, content_length);
+  EXPECT_FALSE(is_chunked);
+}
+
+TEST(IoutilTest, ParseHttpResponseShortBody) {
+  string response = "HTTP/1.1 200 Ok\r\nHost: example.com\r\n"
+      "Content-Length: 5\r\n\r\nH";
+  int http_status_code = 0;
+  size_t offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_FALSE(ParseHttpResponse(response, &http_status_code,
+                                 &offset, &content_length, &is_chunked));
+  EXPECT_EQ(200, http_status_code);
+  EXPECT_EQ(response.size() - 1, offset);
+  EXPECT_EQ(5UL, content_length);
+  EXPECT_FALSE(is_chunked);
+}
+
+TEST(IoutilTest, ParseHttpResponseChunked) {
+  string response = "HTTP/1.1 200 Ok\r\nHost: example.com\r\n"
+      "Transfer-Encoding: chunked\r\n\r\n5\r\nhello";
+  int http_status_code = 0;
+  size_t offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_TRUE(ParseHttpResponse(response, &http_status_code,
+                                &offset, &content_length, &is_chunked));
+  EXPECT_EQ(200, http_status_code);
+  EXPECT_EQ(response.size() - strlen("5\r\nhello"), offset);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_TRUE(is_chunked);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldParse) {
+  // HTTP header is dummy.
+  static const char* kResponse =
+      "Dummy\r\n\r\n3\r\ncon\r\n8\r\nsequence\r\n0\r\n\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining = string::npos;
+
+  EXPECT_TRUE(ParseChunkedBody(kResponse,
+                               body_offset, &remaining, &chunks));
+  EXPECT_EQ(0U, remaining);
+  EXPECT_EQ(2U, chunks.size());
+  const string dechunked = CombineChunks(chunks);
+  EXPECT_EQ(11U, dechunked.size());
+  EXPECT_EQ("consequence", dechunked);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldSkipChunkExtension) {
+  // HTTP header is dummy.
+  static const char* kResponse =
+      "Dummy\r\n\r\n3;n=v\r\ncon\r\n8\r\nsequence\r\n0\r\n\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining = string::npos;
+
+  EXPECT_TRUE(ParseChunkedBody(kResponse,
+                               body_offset, &remaining, &chunks));
+  EXPECT_EQ(0U, remaining);
+  EXPECT_EQ(2U, chunks.size());
+  const string dechunked = CombineChunks(chunks);
+  EXPECT_EQ(11U, dechunked.size());
+  EXPECT_EQ("consequence", dechunked);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldIgnoreOriginalDechunkedData) {
+  // HTTP header is dummy.
+  static const char* kResponse =
+      "Dummy\r\n\r\n3;n=v\r\ncon\r\n8\r\nsequence\r\n0\r\n\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  chunks.push_back("con");
+  size_t remaining = string::npos;
+
+  EXPECT_TRUE(ParseChunkedBody(kResponse,
+                               body_offset, &remaining, &chunks));
+  EXPECT_EQ(0U, remaining);
+  EXPECT_EQ(2U, chunks.size());
+  const string dechunked = CombineChunks(chunks);
+  EXPECT_EQ(11U, dechunked.size());
+  EXPECT_EQ("consequence", dechunked);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnFalseWithShortChunk) {
+  // HTTP header is dummy.
+  static const char* kResponse = "Dummy\r\n\r\n3\r\ncon\r\n8\r\nseq";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining = string::npos;
+
+  EXPECT_FALSE(ParseChunkedBody(kResponse,
+                                body_offset, &remaining, &chunks));
+  EXPECT_GT(remaining, 0U);
+  EXPECT_NE(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnFalseIfLengthNotReady) {
+  // HTTP header is dummy.
+  static const char* kResponse = "Dummy\r\n\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining = string::npos;
+
+  EXPECT_FALSE(ParseChunkedBody(kResponse,
+                               body_offset, &remaining, &chunks));
+  EXPECT_GT(remaining, 0U);
+  EXPECT_NE(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnTrueWithIllInput) {
+  // HTTP header is dummy.
+  static const char* kResponse = "Dummy\r\n\r\n\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+
+  EXPECT_TRUE(ParseChunkedBody(kResponse,
+                               body_offset, &remaining, &chunks));
+  EXPECT_EQ(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnFalseEvenIfSizeIsMuchLarger) {
+  // HTTP header is dummy.
+  string response = "Dummy\r\n\r\n3\r\na";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  response.resize(1000);
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_FALSE(ParseChunkedBody(resp,
+                                body_offset, &remaining, &chunks));
+  EXPECT_GT(remaining, 0U);
+  EXPECT_NE(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnFalseIfEndWithChunkLength) {
+  // HTTP header is dummy.
+  string response = "Dummy\r\n\r\n3";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  response.resize(1000);
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_FALSE(ParseChunkedBody(resp,
+                                body_offset, &remaining, &chunks));
+  EXPECT_GT(remaining, 0U);
+  EXPECT_NE(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnTrueIfChunkIsBroken) {
+  // HTTP header is dummy.
+  string response = "Dummy\r\n\r\n3\r\ncon128\r\nseq";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_TRUE(ParseChunkedBody(resp,
+                               body_offset, &remaining, &chunks));
+  EXPECT_EQ(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnTrueIfChunkLengthIsBroken) {
+  // HTTP header is dummy.
+  string response = "Dummy\r\n\r\n3omg_broken_extension\r\nfoo\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_TRUE(ParseChunkedBody(resp,
+                               body_offset, &remaining, &chunks));
+  EXPECT_EQ(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnFalseIfLengthNotComplete) {
+  // HTTP header is dummy.
+  string response = "Dummy\r\n\r\n3\r\nfoo\r\n0";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  response.resize(1000);
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_FALSE(ParseChunkedBody(resp,
+                                body_offset, &remaining, &chunks));
+  EXPECT_GT(remaining, 0U);
+  EXPECT_NE(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnTrueIfOffsetIsWrong) {
+  // HTTP header is dummy.
+  string response = "foo";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  response.resize(1000);
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_TRUE(ParseChunkedBody(resp,
+                               body_offset, &remaining, &chunks));
+  EXPECT_EQ(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnTrueIfLengthIsNegativeNumber) {
+  // HTTP header is dummy.
+  string response = "Dummy\r\n\r\n-1\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  response.resize(1000);
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_TRUE(ParseChunkedBody(resp,
+                               body_offset, &remaining, &chunks));
+  EXPECT_EQ(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldReturnFalseIfNoBody) {
+  // HTTP header is dummy.
+  string response = "dummy\r\n";
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  response.resize(1000);
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_FALSE(ParseChunkedBody(resp,
+                                orig_len, &remaining, &chunks));
+  EXPECT_GT(remaining, 0U);
+  EXPECT_NE(string::npos, remaining);
+}
+
+TEST(IoutilTest, ShouldParseCrimeMitigation) {
+  // CRIME mitigation does followings for obfscating Record Length:
+  // 1. Add a particular number of leading zeros to the size string
+  // 2. Sub-chunk the body to even smaller chunks
+  //
+  // See:
+  // - go/crime-mitigation-at-gfe-faq
+  // - go/crime-mitigation-at-gfe
+  static const char* kResponse =
+      "HTTP/1.1 200 OK\r\n"
+      "Transfer-Encoding: chunked\r\n"
+      "Content-Type: text/plain\r\n"
+      "\r\n"
+      "000004\r\n"
+      "abcd\r\n"
+      "0016\r\n"
+      "efghijklmnopqrstuvwxyz\r\n"
+      "0\r\n"
+      "\r\n";
+  int http_status_code = 0;
+  size_t offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_TRUE(ParseHttpResponse(kResponse, &http_status_code,
+                                &offset, &content_length, &is_chunked));
+  EXPECT_EQ(200, http_status_code);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_EQ(true, is_chunked);
+
+  std::vector<StringPiece> chunks;
+  size_t remaining = string::npos;
+
+  EXPECT_TRUE(ParseChunkedBody(kResponse,
+                               offset, &remaining, &chunks));
+  EXPECT_EQ(0U, remaining);
+  EXPECT_EQ(2U, chunks.size());
+  const string dechunked = CombineChunks(chunks);
+  EXPECT_EQ(26U, dechunked.size());
+  EXPECT_EQ("abcdefghijklmnopqrstuvwxyz", dechunked);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldRequireCrlfAfterLastChunk) {
+  // HTTP header is dummy.
+  string response = "dummy\r\n\r\n0\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  response.resize(1000);
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_FALSE(ParseChunkedBody(resp,
+                                body_offset, &remaining, &chunks));
+  EXPECT_GT(remaining, 0U);
+  EXPECT_NE(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldRequireCrlfAfterTrailer) {
+  // HTTP header is dummy.
+  string response = "dummy\r\n\r\n0\r\nX-header: x\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  response.resize(1000);
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_FALSE(ParseChunkedBody(resp,
+                                body_offset, &remaining, &chunks));
+  EXPECT_GT(remaining, 0U);
+  EXPECT_NE(string::npos, remaining);
+}
+
+TEST(IoutilTest, ParseChunkedBodyTrailerNotHavingCRLF) {
+  // HTTP header is dummy.
+  string response = "dummy\r\n\r\n0\r\nX-header: x";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+
+  EXPECT_FALSE(ParseChunkedBody(response, body_offset, &remaining, &chunks));
+  EXPECT_EQ(remaining, 4U);
+}
+
+TEST(IoutilTest, ParseChunkedBodyTrailerEndsWithCR) {
+  // HTTP header is dummy.
+  string response = "dummy\r\n\r\n0\r\nX-header: x\r";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+
+  EXPECT_FALSE(ParseChunkedBody(response, body_offset, &remaining, &chunks));
+  EXPECT_EQ(remaining, 3U);
+}
+
+TEST(IoutilTest, ParseChunkedBodyTrailerEndsWithCRLF) {
+  // HTTP header is dummy.
+  string response = "dummy\r\n\r\n0\r\nX-header: x\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+
+  EXPECT_FALSE(ParseChunkedBody(response, body_offset, &remaining, &chunks));
+  EXPECT_EQ(remaining, 2U);
+}
+
+TEST(IoutilTest, ParseChunkedBodyTrailerEndsWithCRLFCR) {
+  // HTTP header is dummy.
+  string response = "dummy\r\n\r\n0\r\nX-header: x\r\n\r";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+
+  EXPECT_FALSE(ParseChunkedBody(response, body_offset, &remaining, &chunks));
+  EXPECT_EQ(remaining, 1U);
+}
+
+TEST(IoutilTest, ParseChunkedBodyShouldIgnoreTrailer) {
+  // HTTP header is dummy.
+  string response = "dummy\r\n\r\n0\r\nX-header: x\r\n\r\n";
+  const size_t body_offset = 9;  // Index to start HTTP body.
+  std::vector<StringPiece> chunks;
+  size_t remaining;
+  size_t orig_len = response.size();
+
+  response.resize(1000);
+  StringPiece resp(response.data(), orig_len);
+  EXPECT_TRUE(ParseChunkedBody(resp,
+                               body_offset, &remaining, &chunks));
+}
+
+TEST(IoutilTest, StringRstrip) {
+  EXPECT_EQ("abc", StringRstrip("abc"));
+  EXPECT_EQ("", StringRstrip(""));
+  EXPECT_EQ("abc", StringRstrip("abc\n"));
+  EXPECT_EQ("abc", StringRstrip("abc\r\n"));
+  EXPECT_EQ("abc", StringRstrip("abc\r"));
+  EXPECT_EQ("abc", StringRstrip("abc \r\n"));
+  EXPECT_EQ("abc", StringRstrip("abc \r\n\v\f"));
+  EXPECT_EQ("ab c", StringRstrip("ab c\r\n"));
+  EXPECT_EQ("ab\nc", StringRstrip("ab\nc\r\n"));
+  EXPECT_EQ(" abc", StringRstrip(" abc\r\n"));
+  EXPECT_EQ("", StringStrip("\r\n "));
+}
+
+TEST(IoutilTest, StringStrip) {
+  EXPECT_EQ("abc", StringStrip("abc"));
+  EXPECT_EQ("", StringStrip(""));
+  EXPECT_EQ("abc", StringStrip("\nabc\n"));
+  EXPECT_EQ("abc", StringStrip("\r\nabc\r\n"));
+  EXPECT_EQ("abc", StringStrip("\rabc\r"));
+  EXPECT_EQ("abc", StringStrip(" \r\n abc \r\n"));
+  EXPECT_EQ("abc", StringStrip("\v\f \r\n abc \r\n\v\f"));
+  EXPECT_EQ("ab c", StringStrip("\r\n ab c\r\n"));
+  EXPECT_EQ("ab\nc", StringStrip("\r\n ab\nc\r\n"));
+  EXPECT_EQ("", StringStrip("\r\n "));
+}
+
+TEST(IoutilTest, ChunkedTransferEncodingWithTwoSpace) {
+  static const char* kResponse =
+      "HTTP/1.1 200 OK\r\n"
+      "Server: Apache\r\n"
+      "ETag: \"1d62405a828ad0e52bf86a946ec2113f:1407205214\"\r\n"
+      "Last-Modified: Tue, 05 Aug 2014 02:20:14 GMT\r\n"
+      "Date: Tue, 05 Aug 2014 02:38:45 GMT\r\n"
+      "Transfer-Encoding:  chunked\r\n"
+      "Connection: keep-alive\r\n"
+      "Connection: Transfer-Encoding\r\n"
+      "Content-Type: application/pkix-crl\r\n"
+      "\r\n";
+
+  int http_status_code = 0;
+  size_t offset = string::npos;
+  size_t content_length = string::npos;
+  bool is_chunked = false;
+  EXPECT_TRUE(ParseHttpResponse(kResponse, &http_status_code,
+                                &offset, &content_length, &is_chunked));
+  EXPECT_EQ(200, http_status_code);
+  EXPECT_EQ(string::npos, content_length);
+  EXPECT_EQ(true, is_chunked);
+}
+
+TEST(IoutilTest, ParseQuery) {
+  std::map<string, string> params = ParseQuery("");
+  EXPECT_TRUE(params.empty());
+
+  static const char* kQuery = "a=b&";
+  params = ParseQuery(kQuery);
+  EXPECT_EQ(1U, params.size());
+  EXPECT_EQ("b", params["a"]);
+
+  static const char* kQueryOAuth2 =
+      "state=11882510b1cfd97f015760171d03ec70235880b224fecd15ea1fe490263911d1"
+      "&code=4/bfLfMrXvbZ30pYyjloOqCorPiowNEy6Uqeh_oECiGQ8#";
+  params = ParseQuery(kQueryOAuth2);
+  EXPECT_EQ(2U, params.size());
+  EXPECT_EQ("4/bfLfMrXvbZ30pYyjloOqCorPiowNEy6Uqeh_oECiGQ8", params["code"]);
+  EXPECT_EQ("11882510b1cfd97f015760171d03ec70235880b224fecd15ea1fe490263911d1",
+            params["state"]);
+}
+
+}  // namespace devtools_goma
diff --git a/client/jar_parser.cc b/client/jar_parser.cc
new file mode 100644
index 0000000..d09cb40
--- /dev/null
+++ b/client/jar_parser.cc
@@ -0,0 +1,229 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "jar_parser.h"
+
+#include <limits.h>
+#include <string.h>
+
+#include <memory>
+
+#include "basictypes.h"
+#include "glog/logging.h"
+#include "minizip/unzip.h"
+#include "path.h"
+#include "split.h"
+#include "string_piece_utils.h"
+#ifdef _WIN32
+# include "config_win.h"
+#endif
+
+namespace devtools_goma {
+
+JarParser::JarParser() {
+}
+
+static void AddJarFile(const string& jar_file, const string& cwd,
+                       std::set<string>* jar_files);
+
+static void ReadManifest(char* content, const string& cwd,
+                         std::set<string>* jar_files) {
+  // The format of manifest files is similar to HTTP header
+  // (i.e., "key1: value1<CRLF>key2: value2<CRLF>")
+  // We need only the value of Class-Path.
+  char* p = content;
+  static const char kClassPathHeader[] = "Class-Path: ";
+  const size_t kClassPathHeaderSize = strlen(kClassPathHeader);
+  for (;;) {
+    if (!strncmp(p, kClassPathHeader, kClassPathHeaderSize)) {
+      p += kClassPathHeaderSize;
+      break;
+    }
+
+    p = strchr(p, '\n');
+    if (!p) {
+      return;
+    }
+    p++;
+  }
+
+  char* end = strchr(p, '\r');
+  if (end) {
+    *end = '\0';
+  }
+
+  std::vector<string> class_pathes;
+  SplitStringUsing(p, " ", &class_pathes);
+  for (const auto& path : class_pathes) {
+    if (strings::EndsWith(path, ".jar")) {
+      AddJarFile(path, cwd, jar_files);
+    }
+  }
+}
+
+class ScopedUnzFile {
+ public:
+  explicit ScopedUnzFile(const char* path)
+      : path_(path),
+        unz_file_(unzOpen64(path)),
+        open_current_(false) {
+  }
+  ~ScopedUnzFile() {
+    if (open_current_) {
+      unzCloseCurrentFile(unz_file_);
+      open_current_ = false;
+    }
+    if (IsValid()) {
+      int err = unzClose(unz_file_);
+      LOG_IF(WARNING, err != UNZ_OK) << "unzClose path=" << path_
+                                     << "err=" << err;
+    }
+  }
+
+  bool IsValid() const {
+    return unz_file_ != 0;
+  }
+
+  int GetGlobalInfo64(unz_global_info64* info) {
+    return unzGetGlobalInfo64(unz_file_, info);
+  }
+
+  int GetCurrentFileInfo64(unz_file_info64* fileinfo,
+                           char* filename, unsigned long filename_bufsize,
+                           void* extra, unsigned long extra_bufsize,
+                           char* comment, unsigned long comment_bufsize) {
+    return unzGetCurrentFileInfo64(unz_file_, fileinfo,
+                                   filename, filename_bufsize,
+                                   extra, extra_bufsize,
+                                   comment, comment_bufsize);
+  }
+
+  int OpenCurrentFile() {
+    DCHECK(!open_current_) << path_;
+    int err = unzOpenCurrentFile(unz_file_);
+    open_current_ = (err == UNZ_OK);
+    return err;
+  }
+
+  int ReadCurrentFile(void* buf, unsigned len) {
+    DCHECK(open_current_) << path_;
+    return unzReadCurrentFile(unz_file_, buf, len);
+  }
+
+  int CloseCurrentFile() {
+    open_current_ = false;
+    return unzCloseCurrentFile(unz_file_);
+  }
+
+  int GoToNextFile() {
+    DCHECK(!open_current_) << path_;
+    return unzGoToNextFile(unz_file_);
+  }
+
+ private:
+  const string path_;
+  unzFile unz_file_;
+  bool open_current_;
+  DISALLOW_COPY_AND_ASSIGN(ScopedUnzFile);
+};
+
+static void AddJarFile(const string& jar_file, const string& cwd,
+                       std::set<string>* jar_files) {
+  const string& jar_path = file::JoinPathRespectAbsolute(cwd, jar_file);
+  if (!jar_files->insert(jar_path).second) {
+    return;
+  }
+
+  LOG(INFO) << "Reading jar file: " << jar_path;
+
+  string basedir;
+#ifndef _WIN32
+  char SEP = '/';
+#else
+  char SEP = '\\';
+#endif
+  size_t last_sep_pos = jar_path.rfind(SEP);
+  if (last_sep_pos != string::npos) {
+    basedir = jar_path.substr(0, last_sep_pos);
+  }
+
+  ScopedUnzFile scoped_jar(jar_path.c_str());
+  if (!scoped_jar.IsValid()) {
+    LOG(WARNING) << "Not jar archive? (unzOpen64):" << jar_path;
+    return;
+  }
+
+  int err;
+  unz_global_info64 jar_info;
+  err = scoped_jar.GetGlobalInfo64(&jar_info);
+  if (err) {
+    LOG(WARNING) << "Broken jar archive? (unzGetGlobalInfo64): " << jar_path
+                 << " err=" << err;
+    return;
+  }
+
+  for (ZPOS64_T i = 0; i < jar_info.number_entry; i++) {
+    unz_file_info64 fileinfo;
+    char filename[PATH_MAX];
+    err = scoped_jar.GetCurrentFileInfo64(&fileinfo,
+                                          filename, sizeof(filename),
+                                          nullptr, 0,
+                                          nullptr, 0);
+    if (err) {
+      LOG(WARNING) << "Broken jar archive? (unzGetCurrentFileInfo64): "
+                   << jar_path << " err=" << err;
+      return;
+    }
+
+    static const char kManifestFileName[] = "META-INF/MANIFEST.MF";
+    if (!strcmp(filename, kManifestFileName)) {
+      err = scoped_jar.OpenCurrentFile();
+      if (err) {
+        LOG(WARNING) << "Broken jar archive? (unzOpenCurrentFile): "
+                     << jar_path << " err=" << err;
+        return;
+      }
+
+      size_t sz = static_cast<size_t>(fileinfo.uncompressed_size);
+      std::unique_ptr<char[]> buf(new char[sz + 1]);
+      err = scoped_jar.ReadCurrentFile(buf.get(), sz);
+      if (err < 0) {
+        LOG(WARNING) << "Broken jar archive? (unzReadCurrentFile): "
+                     << jar_path << " err=" << err;
+        return;
+      }
+      buf.get()[fileinfo.uncompressed_size] = '\0';
+      ReadManifest(buf.get(), basedir, jar_files);
+      err = scoped_jar.CloseCurrentFile();
+      LOG_IF(WARNING, err != UNZ_OK) << "CloseCurrentFile: " << jar_path
+                                     << " err=" << err;
+      return;
+    }
+
+    err = scoped_jar.GoToNextFile();
+    if (err == UNZ_END_OF_LIST_OF_FILE) {
+      break;
+    }
+    if (err) {
+      LOG(WARNING) << "Broken jar archive? (unzGoToNextFile): " << jar_path
+                   << " err=" << err;
+      return;
+    }
+  }
+
+  if (!strings::EndsWith(jar_file, ".zip")) {
+    LOG(WARNING) << jar_file << " doesn't contain manifest";
+  }
+}
+
+void JarParser::GetJarFiles(const std::vector<string>& input_jar_files,
+                            const string& cwd,
+                            std::set<string>* jar_files) {
+  for (const auto& input_jar_file : input_jar_files) {
+    AddJarFile(input_jar_file, cwd, jar_files);
+  }
+}
+
+}  //  namespace devtools_goma
diff --git a/client/jar_parser.h b/client/jar_parser.h
new file mode 100644
index 0000000..03cbc86
--- /dev/null
+++ b/client/jar_parser.h
@@ -0,0 +1,30 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_JAR_PARSER_H_
+#define DEVTOOLS_GOMA_CLIENT_JAR_PARSER_H_
+
+#include <set>
+#include <string>
+#include <vector>
+
+using std::string;
+
+namespace devtools_goma {
+
+class JarParser {
+ public:
+  JarParser();
+
+  // Reads |input_jar_files| and push required jar files into |jar_files|.
+  // TODO: We may want to return additional class pathes as well.
+  void GetJarFiles(const std::vector<string>& input_jar_files,
+                   const string& cwd,
+                   std::set<string>* jar_files);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_JAR_PARSER_H_
diff --git a/client/jar_parser_unittest.cc b/client/jar_parser_unittest.cc
new file mode 100644
index 0000000..a45bcd8
--- /dev/null
+++ b/client/jar_parser_unittest.cc
@@ -0,0 +1,89 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <limits.h>
+
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "file.h"
+#include "ioutil.h"
+#include "jar_parser.h"
+#include "mypath.h"
+#include "path.h"
+#include "unittest_util.h"
+#include "util.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// Note: Do not assume JDK is installed on Windows.  For Windows build, we use
+//       prebuilt JAR files for testing.
+
+class JarParserTest : public testing::Test {
+ public:
+  void SetUp() override {
+    tmpdir_util_.reset(new TmpdirUtil("jar_parser_unittest"));
+    tmpdir_util_->SetCwd("");
+  }
+
+ protected:
+  string CopyArchiveIntoTestDir(const string& test_name,
+                                const string& archive) {
+    // This module is build\Release\jar_parser_unittest.exe (msvs) or
+    // out\Release\jar_parser_unittest.exe (ninja).
+    const string parent_dir = file::JoinPath(GetMyDirectory(), "..");
+    const string top_dir = file::JoinPath(parent_dir, "..");
+    const string test_dir = file::JoinPath(top_dir, "test");
+    const string source_file = file::JoinPath(test_dir, test_name + ".jar");
+    const string output_file = tmpdir_util_->FullPath(archive);
+    CHECK(File::Copy(source_file.c_str(), output_file.c_str(), false));
+    return output_file;
+  }
+
+  std::unique_ptr<TmpdirUtil> tmpdir_util_;
+};
+
+TEST_F(JarParserTest, Basic) {
+  std::vector<string> input_jar_files;
+
+  const string& jar = CopyArchiveIntoTestDir("Basic", "foo.jar");
+  input_jar_files.push_back(jar);
+
+  JarParser parser;
+  std::set<string> jar_files;
+  parser.GetJarFiles(input_jar_files, tmpdir_util_->tmpdir(), &jar_files);
+  ASSERT_EQ(1U, jar_files.size());
+  EXPECT_EQ(jar, *jar_files.begin());
+}
+
+TEST_F(JarParserTest, ReadManifest) {
+  std::vector<string> input_jar_files;
+
+  std::vector<string> files;
+  files.push_back("bar.class");
+  const string& foo_jar = CopyArchiveIntoTestDir("Basic", "foo.jar");
+  const string& bar_jar = CopyArchiveIntoTestDir("ReadManifest", "bar.jar");
+
+  // Dup should be ignored.
+  input_jar_files.push_back(bar_jar);
+  input_jar_files.push_back(bar_jar);
+
+  JarParser parser;
+  std::set<string> jar_files_set;
+  parser.GetJarFiles(input_jar_files, tmpdir_util_->tmpdir(), &jar_files_set);
+  std::vector<string> jar_files(jar_files_set.begin(), jar_files_set.end());
+  ASSERT_EQ(2U, jar_files.size());
+  EXPECT_EQ(bar_jar, jar_files[0]);
+  EXPECT_EQ(foo_jar, jar_files[1]);
+}
+
+}  // namespace devtools_goma
diff --git a/client/jarfile_normalizer.cc b/client/jarfile_normalizer.cc
new file mode 100644
index 0000000..4aec701
--- /dev/null
+++ b/client/jarfile_normalizer.cc
@@ -0,0 +1,77 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <cstdlib>
+#include <iostream>
+#include <string>
+
+#include "glog/logging.h"
+#include "goma_init.h"
+#include "jarfile_reader.h"
+#include "scoped_fd.h"
+
+namespace devtools_goma {
+
+class JarFileNormalizer {
+ public:
+  JarFileNormalizer(char *input, char *output):
+    input_(input), output_(output) {}
+
+  bool DoNormalize() {
+    JarFileReader reader(input_);
+    if (!reader.valid()) {
+      std::cerr << "input file: " << input_ << " is invalid. "
+                << "not exist or not a valid jar file."
+                << std::endl;
+      return false;
+    }
+    ScopedFd out(devtools_goma::ScopedFd::CreateExclusive(output_, 0644));
+    if (!out.valid()) {
+      std::cerr << "output file: " << output_ << " cannot be opened."
+                << " file exists or permission denied."
+                << std::endl;
+      return false;
+    }
+
+    for (;;) {
+      char buf[4096];
+      ssize_t read_bytes = reader.Read(buf, sizeof(buf));
+      if (read_bytes < 0) {
+        std::cerr << "failed to read." << std::endl;
+        return false;
+      }
+      CHECK(out.Write(buf, read_bytes) == read_bytes);
+      if (read_bytes < sizeof(buf)) {
+        break;
+      }
+    }
+    return out.Close();
+  }
+
+ private:
+  const std::string input_;
+  const std::string output_;
+};
+
+}  // namespace devtools_goma
+
+int main(int argc, char *argv[], const char** envp) {
+  devtools_goma::Init(argc, argv, envp);
+  devtools_goma::InitLogging(argv[0]);
+
+  if (argc != 3) {
+    std::cerr << argv[0] << " [source jar file] [destination jar file]"
+              << std::endl;
+    std::cerr << "e.g.: " << argv[0] << " test/Basic.jar /tmp/normalized.jar"
+              << std::endl;
+    std::exit(EXIT_FAILURE);
+  }
+  devtools_goma::JarFileNormalizer normalizer(argv[1], argv[2]);
+  if (!normalizer.DoNormalize()) {
+    std::cerr << "Failed to normalize." << std::endl;
+    std::exit(EXIT_FAILURE);
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/client/jarfile_reader.cc b/client/jarfile_reader.cc
new file mode 100644
index 0000000..4f13903
--- /dev/null
+++ b/client/jarfile_reader.cc
@@ -0,0 +1,281 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "jarfile_reader.h"
+
+#include <cstring>
+
+#include "basictypes.h"
+#include "glog/logging.h"
+#include "string_piece_utils.h"
+
+namespace {
+
+static uint16_t ToUInt16(const char* ptr) {
+  return static_cast<uint8_t>(ptr[0]) | (static_cast<uint8_t>(ptr[1]) << 8);
+}
+
+static uint32_t ToUInt32(const char* ptr) {
+  return static_cast<uint8_t>(ptr[0]) |
+      (static_cast<uint8_t>(ptr[1]) << 8) |
+      (static_cast<uint8_t>(ptr[2]) << 16) |
+      (static_cast<uint8_t>(ptr[3]) << 24);
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+/* static */
+std::unique_ptr<FileReader> JarFileReader::Create(const std::string& filename) {
+  if (!CanHandle(filename)) {
+    return nullptr;
+  }
+  std::unique_ptr<JarFileReader> file_reader(new JarFileReader(filename));
+  if (!file_reader->valid() || !file_reader->is_recommended()) {
+    return nullptr;
+  }
+  // TODO: vlog if too chatty.
+  // Since the number of jar files should not be large, and we see the message
+  // once compiler_proxy read the file.  I guess it not so chatty.
+  LOG(INFO) << "JarFileReader is used. filename=" << filename;
+  return std::move(file_reader);
+}
+
+/* static */
+bool JarFileReader::CanHandle(const std::string& filename) {
+  return strings::EndsWith(filename, ".jar");
+}
+
+JarFileReader::JarFileReader(const std::string& filename)
+    : FileReader(filename),
+      buffer_head_pos_(0),
+      last_normalized_absolute_pos_(0),
+      is_buffer_normalized_(false),
+      is_central_directory_started_(false),
+      is_valid_(false), is_recommended_(true), offset_(0),
+      input_filename_(filename) {
+  buffer_.resize(0x30);
+  if (FileReader::Read(&buffer_[0], buffer_.size()) != buffer_.size()) {
+    return;
+  }
+  // If the file looks like ZIP archive, it might be ok to normalize.
+  // Some jar files used by Android build seems not be valid jar file but
+  // we allow jarfile reader to normalize it if it looks like zip file.
+  // (b/38329025)
+  if (strings::StartsWith(buffer_, "PK\x03\x04")) {
+    is_valid_ = true;
+  }
+  // Checks the Jar file magic string (0xcafe) existence.
+  // I am not confident we can normalize a broken jar file, and ease of
+  // finding such a file, let me log.
+  LOG_IF(WARNING, ToUInt16(buffer_.data() + 0x27) != 0xcafe)
+      << "JarFileReader: the file seems not have jar file magic:"
+      << "expect 0xcafe (little endian) but "
+      << std::hex << ToUInt16(buffer_.data() + 0x27)
+      << " input_filename=" << input_filename_;
+
+  // If ziptime has already been applied, we do not need to normalize.
+  //
+  // See also:
+  // https://android.googlesource.com/platform/build/+/master/tools/ziptime/ZipEntry.cpp
+  // kZipTimeStaticDate come from ziptime code above, and it is 2008-01-01.
+  // date format: (year - 1980) << 9 | month << 5 | day.
+  static uint16_t kZipTimeStaticDate = (2008 - 1980) << 9 | 1 << 5 | 1;
+  static uint16_t kZipTimeStaticTime = 0;
+  if (ToUInt16(buffer_.data() + 0x0a) == kZipTimeStaticTime &&
+      ToUInt16(buffer_.data() + 0x0c) == kZipTimeStaticDate) {
+    LOG(INFO) << "JarFileReader won't normalize jar file that has already been"
+              << " normalized with ziptime."
+              << " input_filename=" << input_filename_;
+    is_recommended_ = false;
+    return;
+  }
+  // TODO: skip normalize prebuilt jar files.
+  // Currently, we also normalizes prebuilt library jar files.
+  // Since such files are also stored in output directory, it is difficult to
+  // distinguish.
+
+  NormalizeBuffer();
+}
+
+ssize_t JarFileReader::ReadDataToBuffer(size_t size) {
+  size_t orig_size = buffer_.size();
+  buffer_.resize(orig_size + size);
+  ssize_t read_bytes = FileReader::Read(&buffer_[orig_size], size);
+  if (read_bytes >= 0) {
+    buffer_.resize(orig_size + read_bytes);
+  }
+  VLOG(2) << "input_filename=" << input_filename_
+          << " read buffer_.size()=" << buffer_.size()
+          << " size=" << size
+          << " read_bytes=" << read_bytes;
+  return read_bytes;
+}
+
+// NormalizeBuffer normalizes a timestamp in the header.
+//
+// How it works?
+// 1. find "PK".
+// 2. signature starting from "PK" let us know what header is there, and
+//    normalize a timestamp in it.
+//
+// Serious way of parsing .jar file is uncompressing each ZIP entry until
+// the end of compressed data.  This is what the original jar command do.
+// However, as far as I am inspired by zlib/contrib/minizip/unzip.c,
+// just skipping to the signature seems to usually work.
+//
+// See Also: https://en.wikipedia.org/wiki/Zip_(file_format)#File_headers
+// Note that header structure is the same between ZIP and ZIP64.
+void JarFileReader::NormalizeBuffer() {
+  DCHECK_LE(buffer_head_pos_, last_normalized_absolute_pos_)
+      << "buffer_head_pos must be smaller than or equals to "
+      << "last_normalized_absolute_pos_"
+      << " input_filename=" << input_filename_
+      << " buffer_head_pos=" << buffer_head_pos_
+      << " last_normalized_absolute_pos=" << last_normalized_absolute_pos_;
+  // Normalize the buffer from the last normalized position.
+  size_t cur = last_normalized_absolute_pos_ - buffer_head_pos_;
+  is_buffer_normalized_ = true;
+  for (;;) {
+    cur = buffer_.find("PK", cur);
+    if (cur == string::npos) {
+      // 'K' may come just after 'P'.  Let me mark this not normalized.
+      if (!buffer_.empty() && buffer_[buffer_.size() - 1] == 'P') {
+        is_buffer_normalized_ = false;
+      } else {
+        last_normalized_absolute_pos_ = buffer_head_pos_ + buffer_.size();
+      }
+      return;
+    }
+    if (cur + 4 > buffer_.size()) {
+      // Will cause buffer overrun.
+      VLOG(1) << "would cause buffer overrun."
+              << " input_filename=" << input_filename_
+              << " cur=" << cur
+              << " buffer_head_pos=" << buffer_head_pos_
+              << " buffer_.size()=" << buffer_.size();
+      is_buffer_normalized_ = false;
+      return;
+    }
+    ssize_t offset = GetTimestampOffset(&buffer_[cur]);
+    VLOG(3) << "offset:" << offset;
+    if (offset < 0) {
+      cur += 4;
+      continue;
+    }
+    if (cur + offset + 4 > buffer_.size()) {
+      // Will cause buffer overrun.
+      VLOG(1) << "would cause buffer overrun."
+              << " input_filename=" << input_filename_
+              << " cur=" << cur
+              << " buffer_head_pos=" << buffer_head_pos_
+              << " offset=" << offset
+              << " buffer_.size()=" << buffer_.size();
+      is_buffer_normalized_ = false;
+      return;
+    }
+    // Set timestamp to the epoch time. 1980-01-01T00:00:00
+    // Note that all 0 represents 1980-00-00T00:00:00, which could be invalid.
+    buffer_[cur + offset + 0] = 0;
+    buffer_[cur + offset + 1] = 0;
+    buffer_[cur + offset + 2] = 0x21;
+    buffer_[cur + offset + 3] = 0;
+    // offset from the head of the header + timestamp (4bytes) to go to just
+    // next to timestamp.
+    cur += offset + 4;
+    last_normalized_absolute_pos_ = buffer_head_pos_ + cur;
+  }
+}
+
+ssize_t JarFileReader::GetTimestampOffset(const char* signature) {
+  // Please see also:
+  // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+  static const uint32_t kLocalFileHeaderSignature = 0x04034b50;
+  static const uint32_t kCentralFileHeaderSignature = 0x02014b50;
+
+  uint32_t u32_signature = ToUInt32(signature);
+  VLOG(3) << "signature:" << std::hex << u32_signature
+          << " input_filename=" << input_filename_
+          << " buffer_head_pos=" << buffer_head_pos_
+          << " last_normalized_absolute_pos=" << last_normalized_absolute_pos_
+          << " offset=" << offset_
+          << " buffer_.size()=" << buffer_.size();
+  if (u32_signature == kLocalFileHeaderSignature) {
+    DCHECK(!is_central_directory_started_)
+        << "Local file descriptor signature comes after central directory "
+        << "entry."
+        << " input_filename=" << input_filename_
+        << " buffer_head_pos=" << buffer_head_pos_
+        << " last_normalized_absolute_pos=" << last_normalized_absolute_pos_
+        << " offset_=" << offset_;
+    return 10;
+  } else if (u32_signature == kCentralFileHeaderSignature) {
+    if (!is_central_directory_started_) {
+      is_central_directory_started_ = true;
+    }
+    return 12;
+  }
+  return -1;
+}
+
+ssize_t JarFileReader::Read(void* ptr, size_t len) {
+  // TODO: increase kBufSize when it works fine.
+  // small buffer size is good for checking code but not good for real world.
+  static const size_t kBufSize = 128;
+  // https://en.wikipedia.org/wiki/Zip_(file_format)
+  // Central directory file header should be the largest.
+  static const size_t kMaxHeaderSize = 46;
+  COMPILE_ASSERT(kBufSize > kMaxHeaderSize,
+                 "Buffer size should be larger than ZIP header size.");
+
+  off_t buffer_head_pos_at_beginning = buffer_head_pos_;
+  if (is_buffer_normalized_) {
+    buffer_head_pos_ += FileReader::FlushDataInBuffer(&buffer_, &ptr, &len);
+  }
+  while (len > 0) {
+    ssize_t read_bytes = ReadDataToBuffer(kBufSize);
+    if (read_bytes < 0) {  // Return error soon.
+      return read_bytes;
+    }
+    if (read_bytes != kBufSize) {  // Should be end of the file.
+      VLOG(1) << "input_filename=" << input_filename_
+              << " buffer_head_pos=" << buffer_head_pos_
+              << " buffer_.size()=" << buffer_.size();
+      NormalizeBuffer();
+      // No more data. i.e. no possibility that next buffer may contain the
+      // data that need to be normalized.
+      buffer_head_pos_ += FileReader::FlushDataInBuffer(&buffer_, &ptr, &len);
+      break;
+    }
+
+    VLOG(1) << "input_filename=" << input_filename_
+            << " buffer_head_pos=" << buffer_head_pos_
+            << " buffer_.size()=" << buffer_.size();
+    NormalizeBuffer();
+    if (is_buffer_normalized_) {
+      buffer_head_pos_ += FileReader::FlushDataInBuffer(&buffer_, &ptr, &len);
+    }
+  }
+
+  size_t read_bytes = buffer_head_pos_ - buffer_head_pos_at_beginning;
+  offset_ += read_bytes;
+  VLOG(1) << "input_filename=" << input_filename_
+          << " read_bytes=" << read_bytes
+          << " offset_=" << offset_
+          << " buffer_head_pos=" << buffer_head_pos_
+          << " buffer_.size()=" << buffer_.size()
+          << " is_buffer_normalized=" << is_buffer_normalized_;
+  return read_bytes;
+}
+
+off_t JarFileReader::Seek(off_t offset, ScopedFd::Whence whence) const {
+  CHECK_EQ(whence, ScopedFd::SeekAbsolute)
+      << "Sorry, this function only support seek absolute";
+  CHECK_EQ(offset, offset_)
+      << "Sorry, this function expects the user set just next position.";
+  return offset;
+}
+
+}  // namespace devtools_goma
diff --git a/client/jarfile_reader.h b/client/jarfile_reader.h
new file mode 100644
index 0000000..937e72c
--- /dev/null
+++ b/client/jarfile_reader.h
@@ -0,0 +1,66 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_JARFILE_READER_H_
+#define DEVTOOLS_GOMA_CLIENT_JARFILE_READER_H_
+
+#include <memory>
+#include <string>
+
+#include "file_reader.h"
+#include "gtest/gtest_prod.h"
+
+namespace devtools_goma {
+
+// A subclass of FileReader to normalize Java jar file during reading.
+//
+// TODO: may consider serious implementation if needed.
+//
+// Limitation:
+// The normalization will be done with heuristics that may fail with
+// 2/2**32 possibility.  If that become large issues, we need to fix.
+// Also, I suppose |len| to Read is usually 2MB size.  Performance
+// may suffer if |len| is usually smaller than internal buffer length.
+class JarFileReader : public FileReader {
+ public:
+  ~JarFileReader() override {}
+
+  ssize_t Read(void* ptr, size_t len) override;
+  off_t Seek(off_t offset, ScopedFd::Whence whence) const override;
+  bool valid() const override { return is_valid_; }
+  bool is_recommended() const { return is_recommended_; }
+  static void Register() {
+    FileReaderFactory::Register(&Create);
+  }
+
+ private:
+  static std::unique_ptr<FileReader> Create(const std::string& filename);
+  static bool CanHandle(const std::string& filename);
+  explicit JarFileReader(const std::string& filename);
+
+  ssize_t ReadDataToBuffer(size_t size);
+  void NormalizeBuffer();
+  ssize_t GetTimestampOffset(const char *signature);
+
+  friend class JarFileReaderTest;
+  friend class JarFileNormalizer;
+  FRIEND_TEST(JarFileReaderTest, valid);
+
+  // Fields for buffer management.
+  std::string buffer_;
+  off_t buffer_head_pos_;
+  off_t last_normalized_absolute_pos_;
+  bool is_buffer_normalized_;
+  bool is_central_directory_started_;
+
+  // Fields for user facing part.
+  bool is_valid_;
+  bool is_recommended_;
+  off_t offset_;
+  const std::string input_filename_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_JARFILE_READER_H_
diff --git a/client/jarfile_reader_unittest.cc b/client/jarfile_reader_unittest.cc
new file mode 100644
index 0000000..b635e6c
--- /dev/null
+++ b/client/jarfile_reader_unittest.cc
@@ -0,0 +1,192 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "jarfile_reader.h"
+
+#include <cmath>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "file.h"
+#include "file_helper.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "gtest/gtest.h"
+#include "mypath.h"
+#include "path.h"
+#include "string_piece.h"
+#include "unittest_util.h"
+
+// How to make jar file to be used as expected.jar.
+// 1. create .jar file.
+// 2. execute jarfile_normalizer to create the normalized jar file.
+// 3. use test/verify_normalized_jar.py to verify normalized jar file.
+
+// How to make jar file to be used as ziptime.jar.
+// 1. create .jar file.
+// 2. execute ziptime in android prebuilts.
+// e.g. prebuilts/build-tools/linux-x86/bin/ziptime
+
+namespace devtools_goma {
+
+class JarFileReaderTest : public testing::Test {
+ public:
+  void SetUp() override {
+    tmpdir_util_.reset(new TmpdirUtil("jar_parser_unittest"));
+    tmpdir_util_->SetCwd("");
+  }
+
+ protected:
+  std::string CopyArchiveIntoTestDir(const std::string& test_name,
+                                     const std::string& archive) {
+    // This module is out\Release\jarfile_reader_unittest.
+    const std::string parent_dir = file::JoinPath(GetMyDirectory(), "..");
+    const std::string top_dir = file::JoinPath(parent_dir, "..");
+    const std::string test_dir = file::JoinPath(top_dir, "test");
+    const std::string source_file = file::JoinPath(
+        test_dir, test_name + ".jar");
+    const std::string output_file = tmpdir_util_->FullPath(archive);
+    CHECK(File::Copy(source_file.c_str(), output_file.c_str(), false));
+    return output_file;
+  }
+
+  void EnsureDifferentFiles(const std::string& file1,
+                            const std::string& file2) {
+    std::string content1, content2;
+    ASSERT_TRUE(ReadFileToString(file1, &content1));
+    ASSERT_TRUE(ReadFileToString(file2, &content2));
+    ASSERT_NE(content1, content2);
+  }
+
+  void RunTest(const std::string& expected_file,
+               const std::string& orig_file,
+               size_t buf_size) {
+    EnsureDifferentFiles(expected_file, orig_file);
+
+    ScopedFd fd(ScopedFd::OpenForRead(expected_file));
+    ASSERT_TRUE(fd.valid());
+
+    JarFileReader reader(orig_file);
+    ASSERT_TRUE(reader.valid());
+
+    off_t offset = 0;
+    for (int cnt = 0;; ++cnt) {
+      VLOG(1) << "reading: " << cnt * buf_size
+              << " to " << (cnt + 1) * buf_size;
+      std::unique_ptr<char[]> jar_buf(new char[buf_size]);
+      std::unique_ptr<char[]> expected_buf(new char[buf_size]);
+      EXPECT_EQ(offset, reader.Seek(offset, ScopedFd::SeekAbsolute));
+      ssize_t read_bytes = reader.Read(jar_buf.get(), buf_size);
+      if (read_bytes <= 0) {
+        EXPECT_TRUE(fd.Read(expected_buf.get(), buf_size) <= 0);
+        break;
+      }
+      offset += read_bytes;
+
+      ASSERT_EQ(read_bytes, fd.Read(expected_buf.get(), read_bytes));
+      EXPECT_EQ(StringPiece(expected_buf.get(), read_bytes),
+                StringPiece(jar_buf.get(), read_bytes));
+    }
+  }
+
+  void ReadFile(const std::string& jar_file) {
+    JarFileReader reader(jar_file);
+    ASSERT_TRUE(reader.valid());
+    for (;;) {
+      char buf[4096];
+      ssize_t read_bytes = reader.Read(buf, sizeof(buf));
+      if (read_bytes <= 0) {
+        break;
+      }
+    }
+  }
+
+  bool CanHandle(const std::string& filename) const {
+    return JarFileReader::CanHandle(filename);
+  }
+
+  bool IsValid(const std::string& filename) const {
+    JarFileReader reader(filename);
+    return reader.valid();
+  }
+
+  bool IsRecommended(const std::string& filename) const {
+    JarFileReader reader(filename);
+    return reader.is_recommended();
+  }
+
+  std::unique_ptr<TmpdirUtil> tmpdir_util_;
+};
+
+TEST_F(JarFileReaderTest, valid) {
+  const std::string jar = CopyArchiveIntoTestDir("Basic", "foo.jar");
+
+  JarFileReader reader(jar);
+  EXPECT_TRUE(reader.valid());
+}
+
+TEST_F(JarFileReaderTest, ConfirmItNormalizedBasic) {
+  const std::string jar_original = CopyArchiveIntoTestDir(
+      "Basic", "original.jar");
+  const std::string jar_expected = CopyArchiveIntoTestDir(
+      "Basic_expected", "expected.jar");
+
+  RunTest(jar_expected, jar_original, 32);
+}
+
+TEST_F(JarFileReaderTest, ConfirmItNormalizedComplicated) {
+  const std::string jar_original = CopyArchiveIntoTestDir(
+      "signapk", "original.jar");
+  const std::string jar_expected = CopyArchiveIntoTestDir(
+      "signapk_expected", "expected.jar");
+
+  for (size_t i = 8; i < 22; ++i) {
+    size_t buf_size = std::pow(2, i);
+    LOG(INFO) << "buf_size=" << buf_size;
+    RunTest(jar_expected, jar_original, buf_size);
+  }
+}
+
+#if GTEST_HAS_DEATH_TEST && DCHECK_IS_ON()
+
+TEST_F(JarFileReaderTest, ShouldDieIfLocalFileComesAfterCentralDirectory) {
+  const std::string jar_broken = CopyArchiveIntoTestDir(
+      "Broken", "broken.jar");
+
+  EXPECT_DEATH(ReadFile(jar_broken), "");
+}
+
+TEST_F(JarFileReaderTest, CanHandle) {
+  EXPECT_TRUE(CanHandle("/home/foo/test.jar"));
+  EXPECT_FALSE(CanHandle("/home/foo/test.txt"));
+}
+
+TEST_F(JarFileReaderTest, Valid) {
+  const std::string jar_original = CopyArchiveIntoTestDir(
+      "signapk", "original.jar");
+  const std::string jar_ziptime = CopyArchiveIntoTestDir(
+      "signapk_ziptime", "ziptime.jar");
+  // Note: asm.jar does not have valid jar file magic.
+  const std::string jar_asm = CopyArchiveIntoTestDir(
+      "asm", "asm.jar");
+
+  EXPECT_TRUE(IsValid(jar_original));
+  EXPECT_TRUE(IsValid(jar_ziptime));
+  EXPECT_TRUE(IsValid(jar_asm));
+}
+
+TEST_F(JarFileReaderTest, IsRecommended) {
+  const std::string jar_original = CopyArchiveIntoTestDir(
+      "signapk", "original.jar");
+  const std::string jar_ziptime = CopyArchiveIntoTestDir(
+      "signapk_ziptime", "ziptime.jar");
+
+  EXPECT_TRUE(IsRecommended(jar_original));
+  EXPECT_FALSE(IsRecommended(jar_ziptime));
+}
+
+#endif  // GTEST_HAS_DEATH_TEST && DCHECK_IS_ON()
+
+}  // namespace devtools_goma
diff --git a/client/json_util.cc b/client/json_util.cc
new file mode 100644
index 0000000..46ee1b6
--- /dev/null
+++ b/client/json_util.cc
@@ -0,0 +1,74 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "json_util.h"
+
+namespace devtools_goma {
+
+bool GetStringFromJson(const Json::Value& json, const std::string& key,
+                       std::string* value, std::string* error_message) {
+  if (!json.isMember(key)) {
+    *error_message = "missing " + key;
+    return false;
+  }
+
+  const Json::Value& str_value = json[key];
+  if (!str_value.isString()) {
+    *error_message = key + " is not string";
+    return false;
+  }
+
+  *value = str_value.asString();
+  return true;
+}
+
+bool GetNonEmptyStringFromJson(const Json::Value& json, const std::string& key,
+                               std::string* value, std::string* error_message) {
+  if (!GetStringFromJson(json, key, value, error_message)) {
+    return false;
+  }
+
+  if (value->empty()) {
+    *error_message = key + " is empty";
+    return false;
+  }
+
+  return true;
+}
+
+bool GetIntFromJson(const Json::Value& json, const std::string& key,
+                    int* value, std::string* error_message) {
+  if (!json.isMember(key)) {
+    *error_message = "missing " + key;
+    return false;
+  }
+
+  const Json::Value& int_value = json[key];
+  if (!int_value.isInt()) {
+    *error_message = key + " is not int";
+    return false;
+  }
+
+  *value = int_value.asInt();
+  return true;
+}
+
+bool GetInt64FromJson(const Json::Value& json, const std::string& key,
+                      int64_t* value, std::string* error_message) {
+  if (!json.isMember(key)) {
+    *error_message = "missing " + key;
+    return false;
+  }
+
+  const Json::Value& int64_value = json[key];
+  if (!int64_value.isInt64()) {
+    *error_message = key + " is not int64";
+    return false;
+  }
+
+  *value = int64_value.asInt64();
+  return true;
+}
+
+} // namespace devtools_goma
diff --git a/client/json_util.h b/client/json_util.h
new file mode 100644
index 0000000..aed4b13
--- /dev/null
+++ b/client/json_util.h
@@ -0,0 +1,33 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_JSON_UTIL_H_
+#define DEVTOOLS_GOMA_CLIENT_JSON_UTIL_H_
+
+#include <string>
+
+#include "json/json.h"
+
+namespace devtools_goma {
+
+// Sets the value of |key| in |json| to |value|.
+// Returns true if succeeded.
+// Returns false if the key is missing or the value is not string.
+bool GetStringFromJson(const Json::Value& json, const std::string& key,
+                       std::string* value, std::string* error_message);
+
+// Same as GetStringFromJson. Additionally check the value is not empty.
+// If the value is empty, false is returned, and |error_message| is set.
+bool GetNonEmptyStringFromJson(const Json::Value& json, const std::string& key,
+                               std::string* value, std::string* error_message);
+
+bool GetIntFromJson(const Json::Value& json, const std::string& key,
+                    int* value, std::string* error_message);
+
+bool GetInt64FromJson(const Json::Value& json, const std::string& key,
+                      int64_t* value, std::string* error_message);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_JSON_UTIL_H_
diff --git a/client/jwt.cc b/client/jwt.cc
new file mode 100644
index 0000000..9284cf8
--- /dev/null
+++ b/client/jwt.cc
@@ -0,0 +1,148 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "jwt.h"
+
+#include <openssl/err.h>
+#include <openssl/bio.h>
+#include <openssl/digest.h>
+#include <openssl/pem.h>
+#include <openssl/evp.h>
+
+#include <memory>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include "base64.h"
+#include "glog/logging.h"
+#include "ioutil.h"
+#include "join.h"
+
+namespace devtools_goma {
+
+// A descriptor of the intended target of the assertion.
+// When making an access token request this value is always
+// https://www.googleapis.com/oauth2/v4/token.
+// https://developers.google.com/identity/protocols/OAuth2ServiceAccount#authorizingrequests
+static const char* kAssertionTarget =
+    "https://www.googleapis.com/oauth2/v4/token";
+
+static std::string OpenSSLErrorString(uint32_t err) {
+  char buf[1024];
+  ERR_error_string_n(err, buf, sizeof(buf));
+  return buf;
+}
+
+/* static */
+std::unique_ptr<JsonWebToken::Key> JsonWebToken::Key::Load(
+    const std::string& pem_key) {
+  ScopedBIO bio(BIO_new_mem_buf(pem_key.data(), pem_key.size()));
+  ScopedEVP_PKEY pkey(
+      PEM_read_bio_PrivateKey(bio.get(), nullptr, nullptr, nullptr));
+  if (pkey == nullptr) {
+    return nullptr;
+  }
+  LOG_IF(WARNING, EVP_PKEY_id(pkey.get()) != EVP_PKEY_RSA)
+      << "load non RSA key. id=" << EVP_PKEY_id(pkey.get());
+  return std::unique_ptr<JsonWebToken::Key>(new Key(std::move(pkey)));
+}
+
+std::string JsonWebToken::Key::Sign(const std::string& input) const {
+  ScopedMDCTX mctx(EVP_MD_CTX_create());
+  const EVP_MD* md = EVP_sha256();
+  if (!EVP_DigestSignInit(mctx.get(), nullptr, md, nullptr, pkey_.get())) {
+    LOG(ERROR) << "Failed to DigestSignInit:"
+               << OpenSSLErrorString(ERR_get_error());
+    return "";
+  }
+  EVP_DigestSignUpdate(mctx.get(), input.data(), input.size());
+  size_t siglen = 0;
+  if (!EVP_DigestSignFinal(mctx.get(), nullptr, &siglen)) {
+    LOG(ERROR) << "Failed to get siglen:"
+               << OpenSSLErrorString(ERR_get_error());
+    return "";
+  }
+  std::string sig;
+  sig.resize(siglen);
+  if (!EVP_DigestSignFinal(mctx.get(),
+                           reinterpret_cast<uint8_t*>(&sig[0]), &siglen)) {
+    LOG(ERROR) << "Failed to get sig:"
+               << OpenSSLErrorString(ERR_get_error());
+    return "";
+  }
+  sig.resize(siglen);
+  return sig;
+}
+
+JsonWebToken::JsonWebToken(const ClaimSet& claim_set)
+    : claim_set_(claim_set) {
+}
+
+JsonWebToken::~JsonWebToken() {
+}
+
+std::string JsonWebToken::Token(const Key& key, time_t now) const {
+  std::string header = CreateHeaderJson();
+  std::string claim_set = CreateClaimSetJson(claim_set_, now);
+  std::string base_string = CreateTokenBaseString(header, claim_set);
+  std::string sig = Sign(base_string, key);
+  if (sig.empty()) {
+    return "";
+  }
+  return CreateToken(base_string, sig);
+}
+
+/* static */
+std::string JsonWebToken::CreateHeaderJson() {
+  // Service accounts rely on the RSA SHA-256 algorithm and the JWT token
+  // format.
+  return "{\"alg\":\"RS256\",\"typ\":\"JWT\"}";
+}
+
+/* static */
+std::string JsonWebToken::CreateClaimSetJson(const ClaimSet& cs, time_t now) {
+  std::stringstream ss;
+  ss << "{";
+  ss << "\"iss\":" << EscapeString(cs.iss);
+  if (!cs.sub.empty()) {
+    ss << ",\"sub\":" << EscapeString(cs.sub);
+  }
+  ss << ",\"scope\":" << EscapeString(strings::Join(cs.scopes, " "));
+  ss << ",\"aud\":" << EscapeString(kAssertionTarget);
+  ss << ",\"exp\":" << now + cs.expires_in_sec;
+  ss << ",\"iat\":" << now;
+  ss << "}";
+  return ss.str();
+}
+
+/* static */
+std::string JsonWebToken::CreateTokenBaseString(
+    const std::string& header, const std::string& claim_set) {
+  std::stringstream ss;
+  ss << Base64UrlEncode(header, false);
+  ss << ".";
+  ss << Base64UrlEncode(claim_set, false);
+  return ss.str();
+}
+
+/* static */
+std::string JsonWebToken::Sign(const std::string& base_string, const Key& key) {
+  return key.Sign(base_string);
+}
+
+/* static */
+std::string JsonWebToken::CreateToken(const std::string& base_string,
+                                      const std::string& signature_bytes) {
+  std::stringstream ss;
+  ss << base_string;
+  ss << ".";
+  ss << Base64UrlEncode(signature_bytes, false);
+  return ss.str();
+}
+
+const char JsonWebToken::kGrantTypeEncoded[] =
+        "urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer";
+
+}  // namespace devtools_goma
diff --git a/client/jwt.h b/client/jwt.h
new file mode 100644
index 0000000..ed0aff4
--- /dev/null
+++ b/client/jwt.h
@@ -0,0 +1,126 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_JWT_H_
+#define DEVTOOLS_GOMA_CLIENT_JWT_H_
+
+#include <openssl/bio.h>
+#include <openssl/digest.h>
+#include <openssl/pem.h>
+#include <openssl/evp.h>
+#include <time.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "glog/logging.h"
+
+namespace devtools_goma {
+
+// JsonWebToken creates JWT from claim set and key.
+// https://developers.google.com/identity/protocols/OAuth2ServiceAccount#authorizingrequests
+class JsonWebToken {
+ public:
+  struct ClaimSet {
+    ClaimSet() : expires_in_sec(3600) {}
+
+    // The email address of the service account.
+    std::string iss;
+    // The email address of the user for which the application is
+    // requesting delegated access (if any).
+    std::string sub;
+
+    // The permissions that the application requests.
+    std::vector<std::string> scopes;
+    // The seconds until access token will expire (at most 1 hour).
+    int expires_in_sec;
+  };
+  // Key is private key to sign.
+  class Key {
+   public:
+    ~Key() {}
+    // Load loads PEM formatted text representation key.
+    // Returns nullptr if failed.
+    static std::unique_ptr<Key> Load(const std::string& pem_key);
+
+    // Sign signs input, and returns raw signature bytes.
+    std::string Sign(const std::string& input) const;
+
+   private:
+    // TODO: create openssl_util?
+    template<typename T, void (*func)(T*)>
+    struct Deleter {
+      void operator()(T* obj) {
+        func(obj);
+      }
+    };
+    struct BIODeleter {
+      void operator()(BIO* obj) {
+        int r = BIO_free(obj);
+        LOG_IF(ERROR, r != 1) << "Failed to BIO_free " << obj;
+      }
+    };
+    typedef std::unique_ptr<BIO, BIODeleter> ScopedBIO;
+    typedef std::unique_ptr<EVP_PKEY, Deleter<EVP_PKEY, EVP_PKEY_free>>
+        ScopedEVP_PKEY;
+    typedef std::unique_ptr<EVP_MD_CTX,
+                            Deleter<EVP_MD_CTX, EVP_MD_CTX_destroy>>
+        ScopedMDCTX;
+
+    explicit Key(ScopedEVP_PKEY pkey) : pkey_(std::move(pkey)) {}
+
+    const ScopedEVP_PKEY pkey_;
+
+    DISALLOW_COPY_AND_ASSIGN(Key);
+  };
+
+  explicit JsonWebToken(const ClaimSet& claim_set);
+  ~JsonWebToken();
+
+  // LoadKey returns a Key from pem_key string.
+  // Returns nullptr if failed.
+  static std::unique_ptr<Key> LoadKey(const std::string& pem_key) {
+    return Key::Load(pem_key);
+  }
+
+  // Token generates JWT, including signature, signed by key.
+  std::string Token(const Key& key, time_t now) const;
+
+  static const char kGrantTypeEncoded[];
+ private:
+  friend class JsonWebTokenTest;
+
+  // helper functions.
+
+  // CreateHeaderJson returns JSON representation of JWT header.
+  static std::string CreateHeaderJson();
+
+  // CreateClaimSetJson returns JSON representation of JWT claim set.
+  static std::string CreateClaimSetJson(const ClaimSet& cs, time_t now);
+
+  // CreateTokenBaseString returns JWT token's base string, which will be
+  // a base string, i.e. an input for Sign.
+  // i.e. {Base64url encoded header}.{Base64url encoded claim set}.
+  static std::string CreateTokenBaseString(
+      const std::string& header,
+      const std::string& claim_set);
+
+  // Sign returns signature bytes for base_string.
+  static std::string Sign(const std::string& base_string, const Key& key);
+
+  // CreateToken returns JWT token, from base_string and its signature bytes.
+  static std::string CreateToken(const std::string& base_string,
+                                 const std::string& signature_bytes);
+
+  const ClaimSet claim_set_;
+
+  DISALLOW_COPY_AND_ASSIGN(JsonWebToken);
+};
+
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_JWT_H_
diff --git a/client/jwt_unittest.cc b/client/jwt_unittest.cc
new file mode 100644
index 0000000..b66022c
--- /dev/null
+++ b/client/jwt_unittest.cc
@@ -0,0 +1,122 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "jwt.h"
+
+#include <string>
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+class JsonWebTokenTest : public testing::Test {
+ protected:
+  static std::string CreateHeaderJson() {
+    return JsonWebToken::CreateHeaderJson();
+  }
+
+  static std::string CreateClaimSetJson(
+      const JsonWebToken::ClaimSet& cs, time_t now) {
+    return JsonWebToken::CreateClaimSetJson(cs, now);
+  }
+
+  static std::string CreateTokenBaseString(const std::string& header,
+                                           const std::string& claim_set) {
+    return JsonWebToken::CreateTokenBaseString(header, claim_set);
+  }
+};
+
+TEST_F(JsonWebTokenTest, CreateClaimSetJson) {
+  JsonWebToken::ClaimSet cs;
+  cs.iss = "test@developer.gserviceaccount.com";
+  cs.scopes.push_back("https://www.googleapis.com/auth/userinfo.email");
+  cs.expires_in_sec = 3600;
+  time_t now = 1459931576;
+
+  EXPECT_EQ("{\"iss\":\"test@developer.gserviceaccount.com\""
+            ",\"scope\":\"https://www.googleapis.com/auth/userinfo.email\""
+            ",\"aud\":\"https://www.googleapis.com/oauth2/v4/token\""
+            ",\"exp\":1459935176"
+            ",\"iat\":1459931576"
+            "}", CreateClaimSetJson(cs, now));
+}
+
+TEST_F(JsonWebTokenTest, CreateTokenBaseString) {
+  EXPECT_EQ("eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9."
+            "eyJpc3MiOiJ0ZXN0QGRldmVsb3Blci5nc2VydmljZWFjY291bn"
+            "QuY29tIiwic2NvcGUiOiJodHRwczovL3d3dy5nb29nbGVhcGlz"
+            "LmNvbS9hdXRoL3VzZXJpbmZvLmVtYWlsIiwiYXVkIjoiaHR0cH"
+            "M6Ly93d3cuZ29vZ2xlYXBpcy5jb20vb2F1dGgyL3Y0L3Rva2Vu"
+            "IiwiZXhwIjoxNDU5OTM1MTc2LCJpYXQiOjE0NTk5MzE1NzZ9",
+            CreateTokenBaseString(
+                CreateHeaderJson(),
+                "{\"iss\":\"test@developer.gserviceaccount.com\""
+                ",\"scope\":\"https://www.googleapis.com/auth/userinfo.email\""
+                ",\"aud\":\"https://www.googleapis.com/oauth2/v4/token\""
+                ",\"exp\":1459935176"
+                ",\"iat\":1459931576"
+                "}"));
+}
+
+TEST_F(JsonWebTokenTest, Token) {
+  const char* kPemKey = "-----BEGIN PRIVATE KEY-----\n"
+      "MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCJ2ljEsJpoZmrZ\n"
+      "AHTcs5HiFg9PkXUQJF4aK8jVacBl6C2U0YJGwnCCPYQHyju0++eZRWlAqds4Jn5O\n"
+      "8JclnLs5JFD6Qzlqosqwn4qu8QI7dy4PybjwxRZMQtWm5vY6gHmvID4WEvWjxjL2\n"
+      "mqVOdThYy2YV/3PsCyjf6Z2XYtAZZJoK94w4OpF30IF1wuEZHllh6VJ4wpRiqpT8\n"
+      "bHxSiMlH2CTaoKJowrgAoYENj5eSbnPP0dsSftdA3Ckeu5/A4OjhyrOCsjwZag6J\n"
+      "Ipw5oRRDm9iiRt7dHdtrjEkGsiaFZvqY4sW++8x8MGkPpO+Mc1IvJVjj7khOTHEH\n"
+      "mWORcjbTAgMBAAECggEAHmP0jeghIkLx60UefklYL++NEI2QsS5TUJG2hNX7hHvb\n"
+      "EKPfhJn5E71cDhuXbh7av/99ZLQNkCNsVRrVN4WGAOLwtzt6vPeGl8mUWVzokROF\n"
+      "JBXkn6/TapyRXWotflg0e1cwWM11OdXIBnWxW8qb0XeF2fOnKrKLIFHwXB98oRwn\n"
+      "G6jg3A3F+//PCvTNk+oTJUyNVIrF6MsLN2/a9CJwfQA4bDShnPlQj8ToXRf1mEqv\n"
+      "6i6NqgkXZX9q3jqU3/h66shUGR/ltc7aqsocHt1PJN0SCKPqxSJoGaZl/T7fCgVl\n"
+      "yvGoLrsyfX4WIW0BgICcfyyLwK5h48Gv1nq2kHiiAQKBgQDx6IYXbT4LhmHfJJ9d\n"
+      "3r6sxvBZ4h+0/HYVK/4rG4dvjSF/vVZvhXwKRbNybdRZoZiDp5QZBSN7TkPE8q97\n"
+      "8IQ91MggUqGSroVpU/PmGHIdUXMiU9qfq0F+KAXc5lNAunF4vqybWu16U4RFtpRq\n"
+      "joZKanb0Z0ChivQcI0YVDWNKcQKBgQCR4hbMTo3sHP0J4xKiisCBhkhN5wGo53bE\n"
+      "YIk1E+XE5u1Dp2gBPzhDilrG3PYphjwi0TvrAeWueJHdRJ2FJpe6BLsnJhJiKHkw\n"
+      "zVZHZ+Qn8+1WqnRobODzBXceqqHejDoeDfXBfTo94F6ttEu4EOIG6+1rVxOqaSD8\n"
+      "S52izO6PgwKBgDk4dS9pabm0KcZslT3RCG06CXRZZoKbDRto8pAjzN94FKpwkNeE\n"
+      "TZjob8/rZsVk0fyiUQeyDXiHRMR7W0MH21/8yvHKWemmWmxVrWWJ9sQ0lfVSvG30\n"
+      "RmOe9/QOjzbKYzjacV22HmJHCwyqaWTjHaTQlh6tpb4QbjmRpmwoZIohAoGAcos1\n"
+      "H2ImqVfxjsvOm/WaRZksOI7DjN2BMZwi35wp8zrm3RIa5a+/+7gsoqxoVB5kJWpo\n"
+      "Q5QPxbhBv5zameu9gn+oe4q3MH9a+OihcBuw13X9yui30i57ShXmfBu6UUWFdIe9\n"
+      "iRlMm70KWhWQxovrDUg9+OQ8OrelALRWp7eFMQUCgYEA4fz76VwkMrA8XzY326l5\n"
+      "36qU9oo4AVGN3Xtzh90C3cMYP3IpPTCdfxHvmyte2qC3uYb5EUtB15bX4UXR70bp\n"
+      "FypWqG6mgZ7Mdoh+PvInHDEuf8JdvwbhXlnhzHnfWi7+HjzWUUpS8Il0QuuIbE6q\n"
+      "pDh/d+sLfYP3TWpGOQ1yv6k=\n"
+      "-----END PRIVATE KEY-----\n";
+
+  std::unique_ptr<JsonWebToken::Key> key(JsonWebToken::Key::Load(kPemKey));
+  ASSERT_TRUE(key.get() != nullptr) << "LoadKey failed";
+
+  JsonWebToken::ClaimSet cs;
+  cs.iss = "test@developer.gserviceaccount.com";
+  cs.scopes.push_back("https://www.googleapis.com/auth/userinfo.email");
+  cs.expires_in_sec = 3600;
+
+  JsonWebToken jwt(cs);
+  time_t now = 1459931576;
+
+  // signature generated by
+  // echo '<plain token>' | openssl dgst -sha256 -sign <pem_key> |
+  //   base64 -w 0 | base64 -w 0 | tr '+/=' '-_'
+  EXPECT_EQ("eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9."
+            "eyJpc3MiOiJ0ZXN0QGRldmVsb3Blci5nc2VydmljZWFjY291bn"
+            "QuY29tIiwic2NvcGUiOiJodHRwczovL3d3dy5nb29nbGVhcGlz"
+            "LmNvbS9hdXRoL3VzZXJpbmZvLmVtYWlsIiwiYXVkIjoiaHR0cH"
+            "M6Ly93d3cuZ29vZ2xlYXBpcy5jb20vb2F1dGgyL3Y0L3Rva2Vu"
+            "IiwiZXhwIjoxNDU5OTM1MTc2LCJpYXQiOjE0NTk5MzE1NzZ9.Q"
+            "eIk0WMVjF7_e2LrgFwGyHIDXOt6TA3ErjXEkMCNna8AALdm4Jn"
+            "b-fLyWEqig8u2eDhriDf8-SBHFPMAqjPtqlgiOPhkfno3g2y8G"
+            "W8lC2VEHMLzkOBsEQx9bp4j6NinpyfX7nuiItaPG42IPJ6ElaI"
+            "FdtNO8z6jZ3hDqmyklrpX6AhBDBr0DSpunHQSPIcsGEh_IveaG"
+            "-s-hj36rdYpj2qMz8ikc2zydC-h8eW6DJcKtHAEGG0AGMkykC9"
+            "SRhrqZzcHARgI-GcZC33LnXNHyjF8ChUz9DTCzhwnmqAUm6ldZ"
+            "8AWVd9AyXdwJA5_ApTJHFFDOhaah7PENLfGHCgZzw",
+            jwt.Token(*key, now));
+}
+
+}  // namespace devtools_goma
diff --git a/client/library_path_resolver.cc b/client/library_path_resolver.cc
new file mode 100644
index 0000000..0fbd400
--- /dev/null
+++ b/client/library_path_resolver.cc
@@ -0,0 +1,183 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "library_path_resolver.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include <iterator>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+
+#include "path.h"
+#include "string_piece_utils.h"
+
+#ifdef _WIN32
+#include "posix_helper_win.h"
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+const char* LibraryPathResolver::fakeroot_ = "";
+
+LibraryPathResolver::LibraryPathResolver(const string& cwd)
+    : cwd_(cwd),
+      static_link_(false) {
+#ifdef __MACH__
+  fallback_searchdirs_.push_back("/usr/lib");
+  fallback_searchdirs_.push_back("/usr/local/lib");
+#endif
+}
+
+LibraryPathResolver::~LibraryPathResolver() {
+}
+
+string LibraryPathResolver::ExpandLibraryPath(const string& value) const {
+  string lib_name = "lib";
+#ifdef __MACH__
+  string so_name = lib_name + value + ".dylib";
+  string ar_name = lib_name + value + ".a";
+  // See: linker manual of Mac (-lx).
+  if (strings::EndsWith(value, ".o")) {
+    so_name = value;
+    ar_name = value;
+  }
+#elif defined(_WIN32)
+  StringPiece ext = file::Extension(value);
+  string so_name = value;
+  if (ext != "tlb") {
+    so_name = value + ".tlb";
+  }
+
+  string ar_name = value;
+  if (ext != "lib") {
+    ar_name = value + ".lib";
+  }
+#else
+  string so_name = lib_name + value + ".so";
+  string ar_name = lib_name + value + ".a";
+  // See: GNU linker manual (-l namespace).
+  if (strings::StartsWith(value, ":")) {
+    so_name = ar_name = value.substr(1);
+  }
+#endif
+  string pathname = FindByName(so_name, ar_name);
+  if (pathname.empty()) {
+    LOG(INFO) << "-l" << value << " not found in " << searchdirs_;
+  }
+  return pathname;
+}
+
+string LibraryPathResolver::FindBySoname(const string& soname) const {
+  return FindByName(soname, "");
+}
+
+string LibraryPathResolver::ResolveLibraryFilePath(
+    const string& syslibroot, const string& dirname,
+    const string& so_name, const string& ar_name) const {
+  if (!static_link_) {
+    const string filename = fakeroot_ +
+        file::JoinPath(syslibroot,
+            file::JoinPathRespectAbsolute(
+              file::JoinPathRespectAbsolute(cwd_, dirname),
+              so_name));
+    VLOG(2) << "check:" << filename;
+    if (access(filename.c_str(), R_OK) == 0)
+      return filename.substr(strlen(fakeroot_));
+  }
+  if (ar_name.empty())
+    return "";
+  const string filename = fakeroot_ +
+      file::JoinPath(syslibroot,
+          file::JoinPathRespectAbsolute(
+            file::JoinPathRespectAbsolute(cwd_, dirname),
+            ar_name));
+  VLOG(2) << "check:" << filename;
+  if (access(filename.c_str(), R_OK) == 0)
+    return filename.substr(strlen(fakeroot_));
+
+  return "";
+}
+
+string LibraryPathResolver::FindByName(const string& so_name,
+                                       const string& ar_name) const {
+  for (const auto& dir : searchdirs_) {
+    // Inspite of ld(1) manual, ld won't prepend syslibroot to -L options.
+    // I have checked it with dtruss(1).
+    const string filename = ResolveLibraryFilePath(
+        "", dir, so_name, ar_name);
+    if (!filename.empty())
+      return filename;
+  }
+
+  for (const auto& dir : fallback_searchdirs_) {
+    const string filename = ResolveLibraryFilePath(
+        syslibroot_, dir, so_name, ar_name);
+    if (!filename.empty())
+      return filename;
+  }
+
+  return "";
+}
+
+string LibraryPathResolver::ResolveFilePath(
+    const string& syslibroot, const string& dirname,
+    const string& basename) const {
+  const string filename = fakeroot_ +
+      file::JoinPath(
+          syslibroot,
+          file::JoinPath(file::JoinPathRespectAbsolute(cwd_, dirname),
+                         basename));
+  VLOG(2) << "check:" << filename;
+  if (access(filename.c_str(), R_OK) == 0)
+    return filename.substr(strlen(fakeroot_));
+
+  return "";
+}
+
+string LibraryPathResolver::FindByFullname(const string& name) const {
+  {
+    string filename = fakeroot_ + file::JoinPathRespectAbsolute(cwd_, name);
+    VLOG(2) << "check:" << filename;
+    if (access(filename.c_str(), R_OK) == 0)
+      return filename.substr(strlen(fakeroot_));
+  }
+
+  const string search_name = string(file::Basename(name));
+  for (const auto& dir : searchdirs_) {
+    // Inspite of ld(1) manual, ld won't prepend syslibroot to -L options.
+    const string filename = ResolveFilePath("", dir, search_name);
+    if (!filename.empty())
+      return filename;
+  }
+
+  for (const auto& dir : fallback_searchdirs_) {
+    const string filename = ResolveFilePath(
+        syslibroot_, dir, search_name);
+    if (!filename.empty())
+      return filename;
+  }
+
+  return "";
+}
+
+void LibraryPathResolver::AppendSearchdirs(
+    const std::vector<string>& searchdirs) {
+  copy(searchdirs.begin(), searchdirs.end(), back_inserter(searchdirs_));
+}
+
+void LibraryPathResolver::AddSearchdir(const string& searchdir) {
+  searchdirs_.push_back(searchdir);
+}
+
+
+}  // namespace devtools_goma
diff --git a/client/library_path_resolver.h b/client/library_path_resolver.h
new file mode 100644
index 0000000..32d6ef5
--- /dev/null
+++ b/client/library_path_resolver.h
@@ -0,0 +1,69 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_LIBRARY_PATH_RESOLVER_H_
+#define DEVTOOLS_GOMA_CLIENT_LIBRARY_PATH_RESOLVER_H_
+
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class LibraryPathResolverTest;
+class LinkerInputProcessorTest;
+
+// Expands library name to full path name (e.g. -lfoo => /usr/lib/libfoo.so).
+class LibraryPathResolver {
+ public:
+  explicit LibraryPathResolver(const string& cwd);
+  ~LibraryPathResolver();
+
+  // for -lfoo flag, value is "foo".
+  string ExpandLibraryPath(const string& value) const;
+  // e.g. soname = "libc.so.6"
+  string FindBySoname(const string& soname) const;
+  string FindByFullname(const string& fullname) const;
+  void PreventSharedLibrary() { static_link_ = true; }
+  void SetSyslibroot(const string& path) { syslibroot_ = path; }
+  void SetSysroot(const string& path) { sysroot_ = path; }
+  void AppendSearchdirs(const std::vector<string>& paths);
+  void AddSearchdir(const string& path);
+
+  const std::vector<string>& searchdirs() const { return searchdirs_; }
+  const string& cwd() const { return cwd_; }
+  const string& sysroot() const { return sysroot_; }
+  const string& syslibroot() const { return syslibroot_; }
+
+ private:
+  friend class LibraryPathResolverTest;
+  friend class LinkerInputProcessorTest;
+
+  string FindByName(const string& so_name, const string& ar_name) const;
+  string ResolveLibraryFilePath(
+      const string& syslibroot, const string& dirname,
+      const string& so_name, const string& ar_name) const;
+  string ResolveFilePath(const string& syslibroot, const string& dirname,
+                         const string& filename) const;
+
+  std::vector<string> searchdirs_;
+  std::vector<string> fallback_searchdirs_;
+  const string cwd_;
+  bool static_link_;
+  // For mac -syslibroot option.
+  string syslibroot_;
+  string sysroot_;
+
+  static const char* fakeroot_;
+
+  DISALLOW_COPY_AND_ASSIGN(LibraryPathResolver);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_LIBRARY_PATH_RESOLVER_H_
diff --git a/client/library_path_resolver_unittest.cc b/client/library_path_resolver_unittest.cc
new file mode 100644
index 0000000..81daee4
--- /dev/null
+++ b/client/library_path_resolver_unittest.cc
@@ -0,0 +1,201 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "library_path_resolver.h"
+
+#ifdef _WIN32
+# include "config_win.h"
+# include <shlobj.h>
+#else
+# include <limits.h>
+#endif
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+#include <gtest/gtest.h>
+
+#include "path.h"
+#include "unittest_util.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class LibraryPathResolverTest : public testing::Test {
+ public:
+  void SetUp() override {
+    tmpdir_util_.reset(new TmpdirUtil("library_path_resolver_test"));
+
+    // |tmpdir_| should be kept for LibraryPathResolver::fakeroot_.
+    tmpdir_ = tmpdir_util_->tmpdir();
+    cwd_ = tmpdir_util_->cwd();
+    LibraryPathResolver::fakeroot_ = tmpdir_.c_str();
+  }
+
+  void TearDown() override {
+    LibraryPathResolver::fakeroot_ = "";
+    tmpdir_util_.reset();
+  }
+
+ protected:
+  std::unique_ptr<TmpdirUtil> tmpdir_util_;
+
+  std::string tmpdir_;
+  std::string cwd_;
+};
+
+TEST_F(LibraryPathResolverTest, SimpleTest) {
+#ifdef __linux__
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libX11.a");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libX11.so");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libc.so");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libc.a");
+  tmpdir_util_->CreateEmptyFile("/lib/libc.so.6");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libgcc_s.a");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libgcc.so");
+  tmpdir_util_->CreateEmptyFile("/usr/local/lib/libX11.so");
+  tmpdir_util_->CreateEmptyFile("/usr/local/lib/libglib.so");
+
+  std::vector<string> search_dirs;
+  search_dirs.push_back("/lib");
+  search_dirs.push_back("/usr/lib");
+  LibraryPathResolver library_path_resolver(cwd_);
+  library_path_resolver.AppendSearchdirs(search_dirs);
+  EXPECT_EQ("/usr/lib/libX11.so",
+            library_path_resolver.ExpandLibraryPath("X11"));
+  EXPECT_EQ("/usr/lib/libc.so",
+            library_path_resolver.ExpandLibraryPath("c"));
+  EXPECT_EQ("/usr/lib/libgcc_s.a",
+            library_path_resolver.ExpandLibraryPath("gcc_s"));
+  EXPECT_EQ("/usr/lib/libgcc.so",
+            library_path_resolver.ExpandLibraryPath("gcc"));
+  EXPECT_EQ("", library_path_resolver.ExpandLibraryPath("glib"));
+
+  library_path_resolver.AddSearchdir("/usr/local/lib");
+  EXPECT_EQ("/usr/lib/libX11.so",
+            library_path_resolver.ExpandLibraryPath("X11"));
+  EXPECT_EQ("/usr/local/lib/libglib.so",
+            library_path_resolver.ExpandLibraryPath("glib"));
+
+  EXPECT_EQ("/lib/libc.so.6",
+            library_path_resolver.FindBySoname("libc.so.6"));
+#elif defined(__MACH__)
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libSystem.dylib");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/liby.a");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/crt1.10.6.o");
+  tmpdir_util_->CreateEmptyFile("/usr/local/lib/libdummy.dylib");
+  tmpdir_util_->CreateEmptyFile("/this/is/test/dir/libdummy2.dylib");
+  tmpdir_util_->CreateEmptyFile("/yet/another/dir/libdummy3.dylib");
+
+  // /usr/lib and /usr/local/lib are default search path.
+  std::vector<string> search_dirs;
+  search_dirs.push_back("/this/is/test/dir");
+  LibraryPathResolver library_path_resolver(cwd_);
+  library_path_resolver.AppendSearchdirs(search_dirs);
+
+  EXPECT_EQ("/usr/lib/libSystem.dylib",
+            library_path_resolver.ExpandLibraryPath("System"));
+  EXPECT_EQ("/usr/lib/liby.a",
+            library_path_resolver.ExpandLibraryPath("y"));
+  EXPECT_EQ("/usr/local/lib/libdummy.dylib",
+            library_path_resolver.ExpandLibraryPath("dummy"));
+  EXPECT_EQ("/this/is/test/dir/libdummy2.dylib",
+            library_path_resolver.ExpandLibraryPath("dummy2"));
+
+  library_path_resolver.AddSearchdir("/yet/another/dir");
+  EXPECT_EQ("/usr/lib/libSystem.dylib",
+            library_path_resolver.ExpandLibraryPath("System"));
+  EXPECT_EQ("/yet/another/dir/libdummy3.dylib",
+            library_path_resolver.ExpandLibraryPath("dummy3"));
+
+  EXPECT_EQ("/usr/lib/crt1.10.6.o",
+            library_path_resolver.FindBySoname("crt1.10.6.o"));
+#elif defined(_WIN32)
+  tmpdir_util_->CreateEmptyFile("\\vs9\\vc\\lib\\libcmtd.lib");
+  tmpdir_util_->CreateEmptyFile("\\vs9\\vc\\lib\\msvcprt.lib");
+  tmpdir_util_->CreateEmptyFile(
+      "\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Lib\\msxml2.lib");
+  tmpdir_util_->CreateEmptyFile("\\vs10\\vc\\lib\\libcmtd.lib");
+  std::vector<string> search_dirs;
+  search_dirs.push_back("\\vs9\\vc\\lib");
+  search_dirs.push_back("\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Lib");
+  LibraryPathResolver library_path_resolver(cwd_);
+  library_path_resolver.AppendSearchdirs(search_dirs);
+  EXPECT_EQ("\\vs9\\vc\\lib\\libcmtd.lib",
+            library_path_resolver.ExpandLibraryPath("libcmtd.lib"));
+  EXPECT_EQ("\\vs9\\vc\\lib\\msvcprt.lib",
+            library_path_resolver.ExpandLibraryPath("msvcprt.lib"));
+  EXPECT_EQ("\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Lib\\msxml2.lib",
+            library_path_resolver.ExpandLibraryPath("msxml2.lib"));
+  library_path_resolver.AddSearchdir("\\vs10\\vc\\lib");
+  EXPECT_EQ("\\vs9\\vc\\lib\\libcmtd.lib",
+            library_path_resolver.ExpandLibraryPath("libcmtd.lib"));
+#endif
+}
+
+#ifdef __linux__
+TEST_F(LibraryPathResolverTest, SimpleTestStatic) {
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libX11.a");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libX11.so");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libc.so");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libc.a");
+  tmpdir_util_->CreateEmptyFile("/lib/libc.so.6");
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libgcc_s.a");
+  tmpdir_util_->CreateEmptyFile("/usr/local/lib/libX11.so");
+  tmpdir_util_->CreateEmptyFile("/usr/local/lib/libX11.a");
+  tmpdir_util_->CreateEmptyFile("/usr/local/lib/libglib.so");
+  tmpdir_util_->CreateEmptyFile("/usr/local/lib/libglib.a");
+
+  std::vector<string> search_dirs;
+  search_dirs.push_back("/lib");
+  search_dirs.push_back("/usr/lib");
+  LibraryPathResolver library_path_resolver(cwd_);
+  library_path_resolver.AppendSearchdirs(search_dirs);
+  library_path_resolver.PreventSharedLibrary();
+  EXPECT_EQ("/usr/lib/libX11.a",
+            library_path_resolver.ExpandLibraryPath("X11"));
+  EXPECT_EQ("/usr/lib/libc.a",
+            library_path_resolver.ExpandLibraryPath("c"));
+  EXPECT_EQ("/usr/lib/libgcc_s.a",
+            library_path_resolver.ExpandLibraryPath("gcc_s"));
+  EXPECT_EQ("", library_path_resolver.ExpandLibraryPath("glib"));
+  library_path_resolver.AddSearchdir("/usr/local/lib");
+  EXPECT_EQ("/usr/lib/libX11.a",
+            library_path_resolver.ExpandLibraryPath("X11"));
+  EXPECT_EQ("/usr/local/lib/libglib.a",
+            library_path_resolver.ExpandLibraryPath("glib"));
+}
+#endif
+
+#ifdef __MACH__
+TEST_F(LibraryPathResolverTest, SimpleTestSyslibroot) {
+  tmpdir_util_->CreateEmptyFile("/usr/lib/libSystem.dylib");
+  tmpdir_util_->CreateEmptyFile("/Developer/SDKs/MacOSX10.6.sdk"
+                                "/usr/lib/libSystem.dylib");
+  tmpdir_util_->CreateEmptyFile("lib/libtest.dylib");
+  LibraryPathResolver library_path_resolver(cwd_);
+  EXPECT_EQ("/usr/lib/libSystem.dylib",
+            library_path_resolver.ExpandLibraryPath("System"));
+  EXPECT_EQ("", library_path_resolver.ExpandLibraryPath("test"));
+
+  library_path_resolver.AddSearchdir("lib");
+  EXPECT_EQ("/usr/lib/libSystem.dylib",
+            library_path_resolver.ExpandLibraryPath("System"));
+  EXPECT_EQ(file::JoinPath(cwd_, "lib/libtest.dylib"),
+            library_path_resolver.ExpandLibraryPath("test"));
+
+  library_path_resolver.SetSyslibroot("/Developer/SDKs/MacOSX10.6.sdk");
+  EXPECT_EQ("/Developer/SDKs/MacOSX10.6.sdk/usr/lib/libSystem.dylib",
+            library_path_resolver.ExpandLibraryPath("System"));
+  EXPECT_EQ(file::JoinPath(cwd_, "lib/libtest.dylib"),
+            library_path_resolver.ExpandLibraryPath("test"));
+}
+#endif
+
+}  // namespace devtools_goma
diff --git a/client/linked_unordered_map.h b/client/linked_unordered_map.h
new file mode 100644
index 0000000..1ce44af
--- /dev/null
+++ b/client/linked_unordered_map.h
@@ -0,0 +1,121 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_LINKED_UNORDERED_MAP_H_
+#define DEVTOOLS_GOMA_CLIENT_LINKED_UNORDERED_MAP_H_
+
+#include <list>
+#include <unordered_map>
+
+#include "glog/logging.h"
+
+namespace devtools_goma {
+
+// LinkedUnorderedMap is an unordered map, which keeps insertion order.
+//
+// Note: Unfortunately, this implementation prohibits from using move-only type
+// in key, since key will be copied to list and map.
+//
+// This is not thread-safe.
+template<typename K, typename V, typename H = std::hash<K>>
+class LinkedUnorderedMap {
+ public:
+  using ListType = std::list<std::pair<K, V>>;
+  using MapType = std::unordered_map<K, typename ListType::iterator, H>;
+  using const_iterator = typename ListType::const_iterator;
+  using iterator = typename ListType::iterator;
+
+  size_t size() const {
+    DCHECK_EQ(map_.size(), list_.size());
+    return list_.size();
+  }
+  bool empty() const { return list_.empty(); }
+
+  const std::pair<K, V>& front() const {
+    DCHECK(!empty());
+    return list_.front();
+  }
+
+  void pop_front();
+  // This overwrites the previous entry if key is registered.
+  // Note: To use universal reference, this method is template.
+  // TODO: Aligh interface with std container.
+  template<typename KK, typename VV>
+  void emplace_back(KK&& k, VV&& v);
+
+  // Move the value which iterator points to the last.
+  void MoveToBack(iterator it);
+
+  iterator begin() { return list_.begin(); }
+  const_iterator begin() const { return list_.begin(); }
+  iterator end() { return list_.end(); }
+  const_iterator end() const { return list_.end(); }
+
+  iterator find(const K& key);
+  const_iterator find(const K& key) const;
+
+ private:
+  // Implementation Note: std::list iterator does not die after inserting
+  // or deleting an entry. So, it is safe to have an list iterator in |map_|.
+  ListType list_;
+  MapType map_;
+};
+
+template<typename K, typename V, typename H>
+void LinkedUnorderedMap<K, V, H>::pop_front() {
+  DCHECK(!empty());
+  map_.erase(list_.front().first);
+  list_.pop_front();
+}
+
+template<typename K, typename V, typename H>
+template<typename KK, typename VV>
+void LinkedUnorderedMap<K, V, H>::emplace_back(KK&& key, VV&& value) {
+  auto map_it = map_.find(key);
+  if (map_it == map_.end()) {
+    // key cannot be moved here. used later.
+    list_.emplace_back(key, std::forward<VV>(value));
+    auto back_it = list_.end();
+    --back_it;
+    map_.insert(make_pair(std::forward<KK>(key), back_it));
+  } else {
+    list_.erase(map_it->second);
+    list_.emplace_back(std::forward<KK>(key), std::forward<VV>(value));
+    auto back_it = list_.end();
+    --back_it;
+    map_it->second = back_it;
+  }
+}
+
+template<typename K, typename V, typename H>
+void LinkedUnorderedMap<K, V, H>::MoveToBack(
+    typename LinkedUnorderedMap<K, V, H>::iterator it) {
+  list_.splice(list_.end(), list_, it);
+}
+
+template<typename K, typename V, typename H>
+typename LinkedUnorderedMap<K, V, H>::iterator
+LinkedUnorderedMap<K, V, H>::find(const K& key) {
+  auto it = map_.find(key);
+  if (it == map_.end()) {
+    return list_.end();
+  }
+
+  return it->second;
+}
+
+template<typename K, typename V, typename H>
+typename LinkedUnorderedMap<K, V, H>::const_iterator
+LinkedUnorderedMap<K, V, H>::find(const K& key) const {
+  auto it = map_.find(key);
+  if (it == map_.end()) {
+    return list_.end();
+  }
+
+  return it->second;
+}
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_LINKED_UNORDERED_MAP_H_
diff --git a/client/linked_unordered_map_unittest.cc b/client/linked_unordered_map_unittest.cc
new file mode 100644
index 0000000..8acad1c
--- /dev/null
+++ b/client/linked_unordered_map_unittest.cc
@@ -0,0 +1,138 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "linked_unordered_map.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <glog/stl_logging.h>
+#include <gtest/gtest.h>
+
+#include "goma_hash.h"
+#include "sha256hash_hasher.h"
+
+namespace devtools_goma {
+
+namespace {
+template<typename K, typename V, typename H>
+std::vector<K> ListKeys(const LinkedUnorderedMap<K, V, H>& m) {
+  std::vector<K> keys;
+  for (const auto& entry : m) {
+    keys.push_back(entry.first);
+  }
+
+  return keys;
+}
+}  // anonymous namespace
+
+TEST(LinkedUnorderedMap, Empty) {
+  LinkedUnorderedMap<int, int> m;
+
+  EXPECT_EQ(m.size(), 0U);
+  EXPECT_TRUE(m.empty());
+}
+
+TEST(LinkedUnorderedMap, Basic) {
+  LinkedUnorderedMap<int, int> m;
+
+  m.emplace_back(1, 100);
+  m.emplace_back(4, 400);
+  m.emplace_back(2, 200);
+  m.emplace_back(3, 300);
+  m.emplace_back(5, 500);
+
+  EXPECT_EQ(m.size(), 5U);
+  EXPECT_FALSE(m.empty());
+
+  EXPECT_EQ(m.find(1)->second, 100);
+  EXPECT_EQ(m.find(2)->second, 200);
+  EXPECT_EQ(m.find(3)->second, 300);
+  EXPECT_EQ(m.find(4)->second, 400);
+  EXPECT_EQ(m.find(5)->second, 500);
+
+  // insertion order must be preserved.
+  EXPECT_EQ((std::vector<int> { 1, 4, 2, 3, 5 }), ListKeys(m));
+
+  m.emplace_back(1, 1000);  // should override the previous '1'.
+  EXPECT_EQ((std::vector<int> { 4, 2, 3, 5, 1 }), ListKeys(m));
+
+  m.pop_front();
+  EXPECT_EQ(m.size(), 4U);
+  EXPECT_FALSE(m.empty());
+  EXPECT_EQ((std::vector<int> { 2, 3, 5, 1 }), ListKeys(m));
+  EXPECT_EQ(2, m.front().first);
+  EXPECT_EQ(200, m.front().second);
+}
+
+TEST(LinkedUnorderedMap, NonCopyableType) {
+  LinkedUnorderedMap<int, std::unique_ptr<int>> m;
+  m.emplace_back(1, std::unique_ptr<int>(new int(100)));
+  m.emplace_back(2, std::unique_ptr<int>(new int(200)));
+
+  EXPECT_EQ(m.size(), 2U);
+  EXPECT_EQ(100, *m.find(1)->second);
+  EXPECT_EQ(200, *m.find(2)->second);
+
+  m.pop_front();
+
+  EXPECT_EQ(m.size(), 1U);
+  EXPECT_TRUE(m.find(1) == m.end());
+  EXPECT_EQ(200, *m.find(2)->second);
+}
+
+TEST(LinkedUnorderedMap, MoveToBack) {
+  // Intentionally use move-only type in value to prove it works.
+  LinkedUnorderedMap<int, std::unique_ptr<int>> m;
+  m.emplace_back(1, std::unique_ptr<int>(new int(100)));
+  m.emplace_back(2, std::unique_ptr<int>(new int(200)));
+  m.emplace_back(3, std::unique_ptr<int>(new int(300)));
+
+  {
+    auto it = m.find(2);
+    m.MoveToBack(it);
+
+    EXPECT_EQ((std::vector<int> { 1, 3, 2 }), ListKeys(m));
+    // `it` should be alive even if moved.
+    EXPECT_EQ(200, *it->second);
+    EXPECT_EQ(200, *m.find(2)->second);
+  }
+
+  {
+    auto it = m.find(1);
+    auto jt = m.find(3);
+
+    m.MoveToBack(jt);
+    EXPECT_EQ((std::vector<int> { 1, 2, 3 }), ListKeys(m));
+    m.MoveToBack(it);
+    EXPECT_EQ((std::vector<int> { 2, 3, 1 }), ListKeys(m));
+
+    // still find-able.
+    EXPECT_EQ(100, *m.find(1)->second);
+    EXPECT_EQ(300, *m.find(3)->second);
+
+    // |it| and |jt| should be alive.
+    EXPECT_EQ(100, *it->second);
+    EXPECT_EQ(300, *jt->second);
+  }
+}
+
+TEST(LinkedUnorderedMap, CustomHashFunction) {
+  LinkedUnorderedMap<SHA256HashValue, std::string, SHA256HashValueHasher> m;
+
+  SHA256HashValue h1, h2;
+  ASSERT_TRUE(SHA256HashValue::ConvertFromHexString(
+      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &h1));
+  ASSERT_TRUE(SHA256HashValue::ConvertFromHexString(
+      "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", &h2));
+
+  m.emplace_back(h1, "h1");
+  m.emplace_back(h2, "h2");
+
+  EXPECT_EQ(m.find(h1)->second, "h1");
+  EXPECT_EQ(m.find(h2)->second, "h2");
+}
+
+}  // namespace devtools_goma
diff --git a/client/linker_input_processor.cc b/client/linker_input_processor.cc
new file mode 100644
index 0000000..8747c6a
--- /dev/null
+++ b/client/linker_input_processor.cc
@@ -0,0 +1,579 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "linker_input_processor.h"
+
+#ifndef _WIN32
+#include <ar.h>
+#ifdef __linux__
+#include <elf.h>
+#endif
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#else
+#include "config_win.h"
+#endif
+
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+
+#include "arfile.h"
+#include "cmdline_parser.h"
+#ifdef __linux
+// TODO: port elf.h in MacOSX and eliminate this ifdef.
+// we want to run android cross compile (which uses ELF) on MacOSX.
+#include "elf_parser.h"
+#endif
+#include "compiler_flags.h"
+#include "compiler_info.h"
+#include "compiler_specific.h"
+#include "content.h"
+#include "elf_parser.h"
+#include "framework_path_resolver.h"
+#include "ioutil.h"
+#include "library_path_resolver.h"
+#include "linker_script_parser.h"
+#ifdef __MACH__
+#include "mach_o_parser.h"
+#include <mach-o/fat.h>
+#include <mach-o/loader.h>
+#endif
+#include "path.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+#include "string_piece.h"
+#include "string_piece_utils.h"
+#include "util.h"
+
+#ifndef ELFMAG
+# define ELFMAG "\177ELF"
+# define SELFMAG 4
+#endif
+
+#define TARMAG "!<thin>\n"  // String that begins an thin archive file.
+#define STARMAG 8           // Size of that string.
+
+#ifdef _WIN32
+// Copied from GNU C ar.h
+#define ARMAG   "!<arch>\n"     /* String that begins an archive file.  */
+#define SARMAG  8               /* Size of that string.  */
+#define SEP '\\'
+#else
+#define SEP '/'
+#endif
+
+namespace {
+#ifdef __MACH__
+const int kMaxRecursion = 10;
+#endif
+}
+
+namespace devtools_goma {
+
+LinkerInputProcessor::LinkerInputProcessor(
+    const std::vector<string>& args,
+    const string& current_directory)
+    : flags_(CompilerFlags::New(args, current_directory)),
+      library_path_resolver_(new LibraryPathResolver(current_directory)),
+      framework_path_resolver_(new FrameworkPathResolver(current_directory)) {
+}
+
+LinkerInputProcessor::LinkerInputProcessor(
+    const string& current_directory)
+    : library_path_resolver_(new LibraryPathResolver(current_directory)),
+      framework_path_resolver_(new FrameworkPathResolver(current_directory)) {
+}
+
+
+LinkerInputProcessor::~LinkerInputProcessor() {
+}
+
+bool LinkerInputProcessor::GetInputFilesAndLibraryPath(
+    const CompilerInfo& /* compiler_info */,
+    const CommandSpec& command_spec,
+    std::set<string>* input_files,
+    std::vector<string>* library_paths) {
+  if (flags_.get() == nullptr) {
+    return false;
+  }
+  std::vector<string> driver_args;
+  std::vector<string> driver_envs;
+  if (!CaptureDriverCommandLine(command_spec, &driver_args, &driver_envs)) {
+    return false;
+  }
+  VLOG(1) << "driver command line:" << driver_args;
+  std::vector<string> input_paths;
+  ParseDriverCommandLine(driver_args, &input_paths);
+  VLOG(2) << "input paths:" << input_paths;
+  VLOG(1) << "driver environment:" << driver_envs;
+  // TODO: make sure ld do not need to see LIBRARY_PATH.
+  GetLibraryPath(driver_envs, library_paths);
+  VLOG(1) << "my library path is: " << *library_paths;
+
+  // Note: input_paths could be modified in this loop, you must not
+  // use ranged-for here.  Or, you may see use after free.
+  for (size_t i = 0; i < input_paths.size(); ++i) {
+    if (input_paths[i].empty())
+      continue;
+    const string filename =
+        file::JoinPathRespectAbsolute(flags_->cwd(), input_paths[i]);
+    VLOG(1) << "Input: " << filename;
+    if (!input_files->insert(filename).second) {
+      VLOG(2) << "already checked:" << filename;
+      continue;
+    }
+    switch (CheckFileType(filename)) {
+      case THIN_ARCHIVE_FILE:
+        ParseThinArchive(filename, input_files);
+        break;
+      case OTHER_FILE:
+        TryParseLinkerScript(filename, &input_paths);
+        break;
+      case ELF_BINARY_FILE:
+        TryParseElfNeeded(filename, &input_paths);
+        break;
+      case MACHO_FAT_FILE:
+#ifdef __MACH__
+        TryParseMachONeeded(filename, kMaxRecursion, input_files);
+#endif
+        break;
+      case MACHO_OBJECT_FILE:
+        FALLTHROUGH_INTENDED;
+      case ARCHIVE_FILE:
+        FALLTHROUGH_INTENDED;
+      case BAD_FILE:
+        break;
+    }
+  }
+  VLOG(2) << "input files:" << *input_files;
+  return true;
+}
+
+bool LinkerInputProcessor::CaptureDriverCommandLine(
+    const CommandSpec& command_spec,
+    std::vector<string>* driver_args,
+    std::vector<string>* driver_envs) {
+  CHECK(flags_.get());
+  std::vector<string> dump_args;
+  dump_args.push_back(command_spec.local_compiler_path());
+  dump_args.push_back("-###");
+  for (size_t i = 1; i < flags_->args().size(); ++i) {
+    dump_args.push_back(flags_->args()[i]);
+  }
+  std::vector<string> env;
+  env.push_back("LC_ALL=C");
+  int32_t status = -1;
+  const string dump_output =
+      ReadCommandOutput(dump_args[0], dump_args, env, flags_->cwd(),
+                        MERGE_STDOUT_STDERR, &status);
+  if (status != 0) {
+    LOG(ERROR) << "command failed with exit=" << status
+               << " args=" << dump_args
+               << " env=" << env
+               << " cwd=" << flags_->cwd();
+    return false;
+  }
+
+  return ParseDumpOutput(dump_output, driver_args, driver_envs);
+}
+
+/* static */
+bool LinkerInputProcessor::ParseDumpOutput(
+    const string& dump_output,
+    std::vector<string>* driver_args,
+    std::vector<string>* driver_envs) {
+  // dump_output (gcc -### output) will be
+  // gcc's specs, important envs (COMPILER_PATH, LIBRARY_PATH, etc) and
+  // command to be executed, starting SPACE, following command arguments
+  // in double quotes.
+  StringPiece buf(dump_output);
+  size_t pos;
+  std::vector<string> envs;
+
+  do {
+    pos = buf.find_first_of("\n");
+    StringPiece line = buf.substr(0, pos);
+    VLOG(3) << "ParseDumpOutput: " << line;
+    buf.remove_prefix(pos + 1);
+
+    if (strings::StartsWith(line, "LIBRARY_PATH=") ||
+        strings::StartsWith(line, "COMPILER_PATH=")) {
+      driver_envs->push_back(string(line));
+    }
+    if (line[0] == ' ') {
+      driver_args->clear();
+      if (!ParsePosixCommandLineToArgv(string(line), driver_args))
+        return false;
+    }
+  } while (pos != StringPiece::npos);
+
+  if (driver_args->empty())
+    return false;
+  return true;
+}
+
+void LinkerInputProcessor::ParseDriverCommandLine(
+    const std::vector<string>& args,
+    std::vector<string>* input_paths) {
+  // TODO: make sure that changing file order is acceptable.
+  // Before: as-is except -l options resolved by latter -L options.
+  // Now: files without flags -> files with flags -> -l options ->
+  //      -framework options.
+  FlagParser driver_flag;
+  driver_flag.mutable_options()->flag_prefix = '-';
+  driver_flag.mutable_options()->allows_equal_arg = true;
+  driver_flag.mutable_options()->allows_nonspace_arg = true;
+  driver_flag.mutable_options()->has_command_name = true;
+
+  // Skip values.
+  driver_flag.AddFlag("z");
+  driver_flag.AddFlag("m");
+  driver_flag.AddFlag("o");  // we need this for incremental link?
+  // For Mac.
+  driver_flag.AddFlag("macosx_version_min");
+  driver_flag.AddFlag("exported_symbol");
+  driver_flag.AddFlag("install_name");
+  driver_flag.AddFlag("dylib_install_name");
+
+  // For input files.
+  bool static_link = false;
+  bool no_default_searchpath = false;
+  std::vector<string> searchdirs;
+  std::vector<string> lvalues;
+  std::vector<string> frameworkpaths;
+  std::vector<string> frameworks;
+  std::vector<string> files_to_find;
+  driver_flag.AddBoolFlag("static")->SetSeenOutput(&static_link);
+  driver_flag.AddFlag("L")->SetValueOutputWithCallback(nullptr, &searchdirs);
+  driver_flag.AddFlag("l")->SetValueOutputWithCallback(nullptr, &lvalues);
+  driver_flag.AddFlag("dynamic-linker")->SetValueOutputWithCallback(
+      nullptr, &files_to_find);
+  driver_flag.AddFlag("F")->SetValueOutputWithCallback(
+      nullptr, &frameworkpaths);
+  driver_flag.AddFlag("framework")->SetValueOutputWithCallback(
+      nullptr, &frameworks);
+  driver_flag.AddBoolFlag("Z")->SetSeenOutput(&no_default_searchpath);
+  // sysroot: replaced with '=' in search path. (Linux)
+  FlagParser::Flag* flag_sysroot = driver_flag.AddFlag("-sysroot");
+  // syslibroot: prefix for all search paths. (Mac)
+  FlagParser::Flag* flag_syslibroot = driver_flag.AddFlag("syslibroot");
+  FlagParser::Flag* flag_arch = driver_flag.AddFlag("arch");
+  driver_flag.AddNonFlag()->SetOutput(input_paths);
+  // Don't count soname's value as input files.
+  driver_flag.AddFlag("soname");
+  // TODO: -T (--script) support?
+
+  driver_flag.Parse(args);
+  library_path_resolver_->SetSysroot(flag_sysroot->GetLastValue());
+  library_path_resolver_->SetSyslibroot(flag_syslibroot->GetLastValue());
+  framework_path_resolver_->SetSyslibroot(flag_syslibroot->GetLastValue());
+  library_path_resolver_->AppendSearchdirs(searchdirs);
+  framework_path_resolver_->AppendSearchpaths(frameworkpaths);
+  arch_ = flag_arch->GetLastValue();
+  if (no_default_searchpath)
+    LOG(WARNING) << "sorry -Z is not supported yet.";
+
+  // Start finding -lx from -L dir.
+  if (static_link)
+    library_path_resolver_->PreventSharedLibrary();
+
+  for (const auto& file : files_to_find) {
+    string path = library_path_resolver_->FindByFullname(file);
+    if (path.empty()) {
+      LOG(WARNING) << "file not found:" << file;
+      continue;
+    }
+    input_paths->push_back(path);
+  }
+
+  for (const auto& lvalue : lvalues) {
+    string path = library_path_resolver_->ExpandLibraryPath(lvalue);
+    if (path.empty()) {
+      LOG(WARNING) << "library not found -l" << lvalue;
+      continue;
+    }
+    input_paths->push_back(path);
+  }
+  for (const auto& framework : frameworks) {
+    string path = framework_path_resolver_->ExpandFrameworkPath(framework);
+    if (path.empty()) {
+      LOG(WARNING) << "framework not found -framework " << framework;
+      continue;
+    }
+    input_paths->push_back(path);
+  }
+}
+
+void LinkerInputProcessor::GetLibraryPath(
+    const std::vector<string>& envs,
+    std::vector<string>* library_paths) {
+  StringPiece libpath_string;
+  static const char* kPathPrefix = "LIBRARY_PATH=";
+  for (const auto& env : envs) {
+    if (strings::StartsWith(env, kPathPrefix)) {
+      libpath_string.set(env.c_str(), env.size());
+      libpath_string.remove_prefix(strlen(kPathPrefix));
+      break;
+    }
+  }
+
+  // Use -L if LIBRARY_PATH env. not found. (for clang)
+  if (libpath_string.empty()) {
+    const std::vector<string>& searchdirs =
+        library_path_resolver_->searchdirs();
+    library_paths->assign(searchdirs.begin(), searchdirs.end());
+    return;
+  }
+
+  // Normalize LIBRARY_PATH and append to |library_paths|.
+  size_t pos;
+  const string& cwd = library_path_resolver_->cwd();
+  do {
+    pos = libpath_string.find_first_of(":");
+    StringPiece entry = libpath_string.substr(0, pos);
+    // some/thing/ and some/thing should be the same path.
+    if (strings::EndsWith(entry, "/")) {
+      entry.remove_suffix(1);
+    }
+    // Consider relative path, which might not be needed.
+    library_paths->push_back(
+        file::JoinPathRespectAbsolute(cwd, string(entry)));
+    libpath_string.remove_prefix(pos + 1);
+  } while (pos != StringPiece::npos);
+}
+
+/* static */
+LinkerInputProcessor::FileType LinkerInputProcessor::CheckFileType(
+    const string& path) {
+  ScopedFd fd(ScopedFd::OpenForRead(path));
+  if (!fd.valid())
+    return BAD_FILE;
+  char buf[8];
+  for (int r, len = 0; len < 8;) {
+    r = fd.Read(buf + len, sizeof(buf) - len);
+    if (r < 0) {
+      PLOG(ERROR) << "read " << path;
+      return BAD_FILE;
+    }
+    if (r == 0)
+      return OTHER_FILE;
+    len += r;
+  }
+  if (memcmp(buf, ELFMAG, SELFMAG) == 0)
+    return ELF_BINARY_FILE;
+  if (memcmp(buf, TARMAG, STARMAG) == 0)
+    return THIN_ARCHIVE_FILE;
+  if (memcmp(buf, ARMAG, SARMAG) == 0)
+    return ARCHIVE_FILE;
+#ifdef __MACH__
+  uint32_t* header = reinterpret_cast<uint32_t*>(buf);
+  if (*header == FAT_MAGIC || *header == FAT_CIGAM) {
+    if (strings::EndsWith(path, ".a"))
+      return ARCHIVE_FILE;
+    else
+      return MACHO_FAT_FILE;
+  }
+  if (*header == MH_MAGIC || *header == MH_CIGAM ||
+      *header == MH_MAGIC_64 || *header == MH_CIGAM_64)
+    return MACHO_OBJECT_FILE;
+#endif
+
+  return OTHER_FILE;
+}
+
+/* static */
+void LinkerInputProcessor::ParseThinArchive(
+    const string& filename, std::set<string>* input_files) {
+  VLOG(1) << "thin archive:" << filename;
+  ArFile ar(filename);
+  DCHECK(ar.Exists()) << filename;
+  DCHECK(ar.IsThinArchive()) << filename;
+  size_t pos = filename.rfind(SEP);
+  DCHECK_NE(string::npos, pos) << filename;
+  const string ar_dir = filename.substr(0, pos);
+  VLOG(1) << "ar_dir:" << ar_dir;
+  std::vector<ArFile::EntryHeader> entries;
+  ar.GetEntries(&entries);
+  for (size_t i = 0; i < entries.size(); ++i) {
+    const string entry_name = file::JoinPath(ar_dir, entries[i].ar_name);
+    VLOG(1) << "entry[" << i << "] " << entries[i].ar_name
+            << " " << entry_name;
+    input_files->insert(entry_name);
+  }
+}
+
+void LinkerInputProcessor::TryParseLinkerScript(
+    const string& filename, std::vector<string>* input_paths) {
+  VLOG(1) << "Try linker script:" << filename;
+  LinkerScriptParser parser(
+      Content::CreateFromFile(filename),
+      library_path_resolver_->cwd(),
+      library_path_resolver_->searchdirs(),
+      library_path_resolver_->sysroot());
+  if (parser.Parse()) {
+    VLOG(1) << "linker script:" << filename;
+    if (!parser.startup().empty())
+      input_paths->push_back(parser.startup());
+    if (!parser.inputs().empty()) {
+      for (size_t i = 0; i < parser.inputs().size(); ++i) {
+        input_paths->push_back(parser.inputs()[i]);
+      }
+    }
+    library_path_resolver_->AppendSearchdirs(parser.searchdirs());
+  } else {
+    VLOG(1) << "not linker script:" << filename;
+  }
+}
+
+void LinkerInputProcessor::TryParseElfNeeded(
+    const string& filename,
+    std::vector<string>* input_paths) {
+#ifdef __linux__
+  std::unique_ptr<ElfParser> elf(ElfParser::NewElfParser(filename));
+  if (elf == nullptr || !elf->valid())
+    return;
+  std::vector<string> needed;
+  if (!elf->ReadDynamicNeeded(&needed))
+    return;
+  for (const auto& path : needed) {
+    string pathname = library_path_resolver_->FindBySoname(path);
+    if (pathname.empty()) {
+      LOG(WARNING) << "so not found:" << path << " needed by " << filename;
+      continue;
+    }
+    input_paths->push_back(pathname);
+  }
+#elif defined(_WIN32)
+  UNREFERENCED_PARAMETER(filename);
+  UNREFERENCED_PARAMETER(input_paths);
+#endif
+}
+
+#ifdef __MACH__
+// Although TryParseElfNeeded and TryParseMachONeeded does almost the same,
+// I think two shared object types has significant difference.
+// Elf does not need to be investigated recursively, but MachO dylib does.
+void LinkerInputProcessor::TryParseMachONeeded(
+    const string& filename,
+    const int max_recursion,
+    std::set<string>* input_files) {
+  MachO macho(filename);
+  if (!macho.valid())
+    return;
+
+  std::vector<MachO::DylibEntry> needed;
+  if (!macho.GetDylibs(arch_, &needed))
+    return;
+
+  for (size_t i = 0; i < needed.size(); ++i) {
+    string dylib_name = needed[i].name;
+
+    if (dylib_name[0] == '/')
+      dylib_name = file::JoinPath(library_path_resolver_->syslibroot(),
+                                  dylib_name);
+
+    // If not found with the absolute path, should be searched. (unlikely)
+    if (dylib_name[0] != '/' || (access(dylib_name.c_str(), R_OK) != 0)) {
+      const string path_name = library_path_resolver_->FindBySoname(
+          string(file::Basename(dylib_name)));
+      if (path_name.empty()) {
+        LOG(WARNING) << "dylib not found:" << dylib_name
+                     << " needed by " << filename;
+        continue;
+      }
+      dylib_name = path_name;
+    }
+
+    if (!input_files->insert(dylib_name).second) {
+      VLOG(2) << "already checked:" << filename;
+      continue;
+    }
+    // TODO: consider to parse MACHO_OBJECT_FILE if needed.
+    if (CheckFileType(dylib_name) != MACHO_FAT_FILE)
+      continue;
+
+    if (max_recursion > 0)
+      TryParseMachONeeded(dylib_name, max_recursion - 1, input_files);
+    else
+      LOG(WARNING) << "Hit max dylib recursion depth: "
+                   << " input_files=" << *input_files
+                   << " filename=" << filename
+                   << " kMaxRecursion=" << kMaxRecursion;
+  }
+}
+#endif
+
+}  // namespace devtools_goma
+
+#ifdef TEST
+
+#include <iostream>
+
+int main(int argc, char* argv[], const char** envp) {
+  google::InitGoogleLogging(argv[0]);
+
+  const string cwd = devtools_goma::GetCurrentDirNameOrDie();
+  if (argc < 2) {
+    std::cerr << "Usage: " << argv[0] << " local_compiler_path gcc ..."
+              << std::endl;
+    exit(1);
+  }
+  string local_compiler_path = argv[1];
+  std::vector<string> args;
+  for (int i = 2; i < argc; ++i)
+    args.push_back(argv[i]);
+
+  std::unique_ptr<devtools_goma::CompilerFlags> flags(
+      devtools_goma::CompilerFlags::MustNew(args, cwd));
+  if (!flags->is_gcc()) {
+    std::cerr << "only gcc/g++ is supported" << std::endl;
+    exit(1);
+  }
+  const devtools_goma::GCCFlags& gcc_flags =
+      static_cast<const devtools_goma::GCCFlags&>(*flags);
+  std::vector<string> compiler_info_envs;
+  flags->GetClientImportantEnvs(envp, &compiler_info_envs);
+  devtools_goma::CompilerInfoBuilder cib;
+  std::unique_ptr<devtools_goma::CompilerInfoData> compiler_info_data(
+      cib.FillFromCompilerOutputs(
+          gcc_flags, local_compiler_path, compiler_info_envs));
+  devtools_goma::CompilerInfo compiler_info(std::move(compiler_info_data));
+  if (compiler_info.HasError()) {
+    std::cerr << compiler_info.error_message() << std::endl;
+    exit(1);
+  }
+  devtools_goma::CommandSpec command_spec;
+  command_spec.set_name(flags->compiler_name());
+  command_spec.set_local_compiler_path(local_compiler_path);
+
+  devtools_goma::LinkerInputProcessor linker_input_processor(args, cwd);
+
+  std::set<string> input_files;
+  std::vector<string> library_paths;
+  if (!linker_input_processor.GetInputFilesAndLibraryPath(
+          compiler_info, command_spec, &input_files, &library_paths)) {
+    std::cerr << "GetInputFilesAndLibraryPath failed" << std::endl;
+    exit(1);
+  }
+  std::cout << "#Input files" << std::endl;
+  for (std::set<string>::iterator iter = input_files.begin();
+       iter != input_files.end();
+       ++iter) {
+    std::cout << *iter << std::endl;
+  }
+  std::cout << "#library path" << std::endl;
+  for (size_t i = 0; i < library_paths.size(); i++) {
+    std::cout << library_paths[i] << std::endl;
+  }
+  exit(0);
+}
+#endif
diff --git a/client/linker_input_processor.h b/client/linker_input_processor.h
new file mode 100644
index 0000000..20f851c
--- /dev/null
+++ b/client/linker_input_processor.h
@@ -0,0 +1,98 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_LINKER_INPUT_PROCESSOR_H_
+#define DEVTOOLS_GOMA_CLIENT_LINKER_INPUT_PROCESSOR_H_
+
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class CommandSpec;
+class CompilerFlags;
+class LinkerInputProcessorTest;
+class CompilerInfo;
+class LibraryPathResolver;
+class FrameworkPathResolver;
+
+class LinkerInputProcessor {
+ public:
+  enum FileType {
+    ARCHIVE_FILE,
+    THIN_ARCHIVE_FILE,
+    ELF_BINARY_FILE,
+    OTHER_FILE,
+    BAD_FILE,
+    MACHO_FAT_FILE,
+    MACHO_OBJECT_FILE,
+  };
+  LinkerInputProcessor(const std::vector<string>& args,
+                       const string& current_directory);
+  ~LinkerInputProcessor();
+
+  // Gets input files for command specified by args and library paths.
+  // It runs command with -### flag, which dumps command line arguments
+  // of collect2 or ld, and collects input files and library paths.
+  // It also checks libraries specified by -L and -l.
+  // If a library is a thin archive, it also includes files listed in the
+  // thin archive as input files.
+  // It also tries parsing a file as linker script, and gets input files
+  // described in the linker script.
+  // If Dumped command line arguments contain LIBRARY_PATH=, it set the paths
+  // to library_paths. Otherwise, it set paths parsed from -L options in dumped
+  // command line to library_paths.
+  bool GetInputFilesAndLibraryPath(const CompilerInfo& compiler_info,
+                                   const CommandSpec& command_spec,
+                                   std::set<string>* input_files,
+                                   std::vector<string>* library_paths);
+
+ private:
+  friend class LinkerInputProcessorTest;
+  // Provided for test.
+  explicit LinkerInputProcessor(const string& current_directory);
+  bool CaptureDriverCommandLine(const CommandSpec& command_spec,
+                                std::vector<string>* driver_args,
+                                std::vector<string>* driver_envs);
+
+  // Parses outputs of "gcc -### ..."
+  static bool ParseDumpOutput(const string& dump_output,
+                              std::vector<string>* driver_args,
+                              std::vector<string>* driver_envs);
+
+  void ParseDriverCommandLine(const std::vector<string>& driver_args,
+                              std::vector<string>* input_paths);
+  void GetLibraryPath(const std::vector<string>& driver_envs,
+                      std::vector<string>* library_paths);
+  static FileType CheckFileType(const string& path);
+  static void ParseThinArchive(const string& filename,
+                               std::set<string>* input_files);
+  void TryParseLinkerScript(const string& filename,
+                            std::vector<string>* input_paths);
+  void TryParseElfNeeded(const string& filename,
+                         std::vector<string>* input_paths);
+#ifdef __MACH__
+  void TryParseMachONeeded(const string& filename,
+                           const int max_recursion,
+                           std::set<string>* input_files);
+#endif
+
+  std::unique_ptr<CompilerFlags> flags_;
+  std::unique_ptr<LibraryPathResolver> library_path_resolver_;
+  std::unique_ptr<FrameworkPathResolver> framework_path_resolver_;
+  string arch_;
+
+  DISALLOW_COPY_AND_ASSIGN(LinkerInputProcessor);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_LINKER_INPUT_PROCESSOR_H_
diff --git a/client/linker_input_processor_unittest.cc b/client/linker_input_processor_unittest.cc
new file mode 100644
index 0000000..5ba9242
--- /dev/null
+++ b/client/linker_input_processor_unittest.cc
@@ -0,0 +1,1071 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifdef _WIN32
+# include "config_win.h"
+# include <shlobj.h>
+#endif
+
+#include <limits.h>
+
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+#include <gtest/gtest.h>
+
+#include "compiler_info.h"
+#include "ioutil.h"
+#include "library_path_resolver.h"
+#include "linker_input_processor.h"
+#include "path.h"
+#include "path_util.h"
+#include "unittest_util.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+static const char *kElfBinary = "\177ELF\002\001\001\001blahblahblah";
+static const char *kArFile = "!<arch>\n/        ";
+static const char *kThinArFile = "!<thin>\n/        ";
+#ifdef __MACH__
+static const char *kMachOFatFile = "\xca\xfe\xba\xbe blahblahblah";
+static const char *kMachMagic = "\xfe\xed\xfa\xce blahblahblah";
+static const char *kMachCigam = "\xce\xfa\xed\xfe blahblahblah";
+static const char *kMachMagic64 = "\xfe\xed\xfa\xcf blahblahblah";
+static const char *kMachCigam64 = "\xcf\xfa\xed\xfe blahblahblah";
+#endif
+
+class LinkerInputProcessorTest : public testing::Test {
+ public:
+  void SetUp() override {
+    tmpdir_util_.reset(new TmpdirUtil("linker_input_processor_test"));
+
+    // To be used by LibraryPathResolver::fakeroot_.
+    tmpdir_ = tmpdir_util_->tmpdir();
+    LibraryPathResolver::fakeroot_ = tmpdir_.c_str();
+  }
+
+  void TearDown() override {
+    LibraryPathResolver::fakeroot_ = "";
+    tmpdir_util_.reset();
+  }
+
+  bool ParseDumpOutput(const string& dump_output,
+                       std::vector<string>* driver_args,
+                       std::vector<string>* driver_envs) {
+    return LinkerInputProcessor::ParseDumpOutput(dump_output, driver_args,
+                                                 driver_envs);
+  }
+
+  void ParseDriverCommandLine(
+      const std::vector<string>& driver_args,
+      const string& cwd,
+      string* sysroot,
+      string* arch,
+      std::vector<string>* searchdirs,
+      std::vector<string>* input_paths) {
+    LinkerInputProcessor linker_input_processor(cwd);
+    linker_input_processor.ParseDriverCommandLine(driver_args, input_paths);
+
+    *sysroot = linker_input_processor.library_path_resolver_->sysroot();
+    *arch = linker_input_processor.arch_;
+    const std::vector<string>& parsed_searchdirs =
+      linker_input_processor.library_path_resolver_->searchdirs();
+    copy(parsed_searchdirs.begin(), parsed_searchdirs.end(),
+         back_inserter(*searchdirs));
+  }
+
+  LinkerInputProcessor::FileType CheckFileType(const string& path) {
+    return LinkerInputProcessor::CheckFileType(
+        tmpdir_util_->FullPath(path));
+  }
+
+  void GetLibraryPath(
+      const std::vector<string>& envs,
+      const string& cwd,
+      const std::vector<string>& searchdirs,
+      std::vector<string>* library_paths) {
+    LinkerInputProcessor linker_input_processor(cwd);
+    linker_input_processor.library_path_resolver_->AppendSearchdirs(
+        searchdirs);
+    linker_input_processor.GetLibraryPath(envs, library_paths);
+  }
+
+  void ParseThinArchive(const string& filename, std::set<string>* input_files) {
+    std::set<string> raw_input_files;
+    LinkerInputProcessor::ParseThinArchive(tmpdir_ + filename,
+                                           &raw_input_files);
+    for (const auto& iter : raw_input_files) {
+      VLOG(1) << "input_files:" << iter;
+      EXPECT_TRUE(HasPrefixDir(iter, tmpdir_));
+      input_files->insert(iter.substr(tmpdir_.size()));
+    }
+  }
+
+#ifndef _WIN32
+  void Archive(const string& cwd, const string& op, const string& archive,
+               const std::vector<string>& files) {
+    tmpdir_util_->MkdirForPath(cwd, true);
+    std::stringstream ss;
+    ss << "cd " << tmpdir_ << cwd << " && ar " << op << " " << archive;
+    for (const auto& file : files) {
+      ss << " " << file;
+    }
+    PCHECK(system(ss.str().c_str()) == 0) << ss.str();
+  }
+#endif
+
+ protected:
+  std::unique_ptr<TmpdirUtil> tmpdir_util_;
+
+  std::string tmpdir_;
+};
+
+TEST_F(LinkerInputProcessorTest, ParseGccDumpOutput) {
+  std::vector<string> driver_args;
+  std::vector<string> driver_envs;
+  EXPECT_TRUE(ParseDumpOutput(
+      "Using built-in specs.\n"
+      "Target: x86_64-linux-gnu\n"
+      "Configured with: ../src/configure -v "
+      "--with-pkgversion='Ubuntu 4.4.3-4ubuntu5' "
+      "--with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs "
+      "--enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr\n"
+      "Thread model: posix\n"
+      "gcc version 4.4.3 (Ubuntu 4.4.3-4ubuntu5) \n"
+      "COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:/usr/lib/gcc/x86_64-linux-gnu\n"
+      "LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/\n"
+      "COLLECT_GCC_OPTIONS='-pthread' '-Lout/Release' '-L/lib' '-o' "
+      "'out/Release/chrome' '-shared-libgcc' '-mtune=generic'\n"
+      " \"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/collect2\" \"--build-id\" "
+      "\"--eh-frame-hdr\" \"-m\" \"elf_x86_64\" \"--hash-style=both\" "
+      "\"-dynamic-linker\" \"/lib64/ld-linux-x86-64.so.2\" "
+      "\"-o\" \"out/Release/chrome\" \"-z\" \"relro\" "
+      "\"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crt1.o\" "
+      "\"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crti.o\" "
+      "\"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtbegin.o\" "
+      "\"-Lout/Release\" \"-L/lib\" \"-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3\" "
+      "\"-O1\" \"--as-needed\" \"--gc-sections\" \"--icf=safe\" "
+      "\"--start-group\" "
+      "\"out/Release/obj.target/chrome/chrome/app/chrome_main.o\" "
+      "\"out/Release/obj.target/seccompsandbox/libseccomp_sandbox.a\" "
+      "\"--end-group\" \"-lX11\" \"-ldl\" \"-lXrender\" \"-lXss\" "
+      "\"-lstdc++\" \"-lm\" \"-lgcc_s\" \"-lgcc\" \"-lpthread\" \"-lc\" "
+      "\"-lgcc_s\" \"-lgcc\" \"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtend.o\" "
+      "\"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crtn.o\"\n",
+      &driver_args, &driver_envs));
+
+  std::vector<string> expected_args;
+  expected_args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/collect2");
+  expected_args.push_back("--build-id");
+  expected_args.push_back("--eh-frame-hdr");
+  expected_args.push_back("-m");
+  expected_args.push_back("elf_x86_64");
+  expected_args.push_back("--hash-style=both");
+  expected_args.push_back("-dynamic-linker");
+  expected_args.push_back("/lib64/ld-linux-x86-64.so.2");
+  expected_args.push_back("-o");
+  expected_args.push_back("out/Release/chrome");
+  expected_args.push_back("-z");
+  expected_args.push_back("relro");
+  expected_args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crt1.o");
+  expected_args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crti.o");
+  expected_args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtbegin.o");
+  expected_args.push_back("-Lout/Release");
+  expected_args.push_back("-L/lib");
+  expected_args.push_back("-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3");
+  expected_args.push_back("-O1");
+  expected_args.push_back("--as-needed");
+  expected_args.push_back("--gc-sections");
+  expected_args.push_back("--icf=safe");
+  expected_args.push_back("--start-group");
+  expected_args.push_back(
+      "out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  expected_args.push_back(
+      "out/Release/obj.target/seccompsandbox/libseccomp_sandbox.a");
+  expected_args.push_back("--end-group");
+  expected_args.push_back("-lX11");
+  expected_args.push_back("-ldl");
+  expected_args.push_back("-lXrender");
+  expected_args.push_back("-lXss");
+  expected_args.push_back("-lstdc++");
+  expected_args.push_back("-lm");
+  expected_args.push_back("-lgcc_s");
+  expected_args.push_back("-lgcc");
+  expected_args.push_back("-lpthread");
+  expected_args.push_back("-lc");
+  expected_args.push_back("-lgcc_s");
+  expected_args.push_back("-lgcc");
+  expected_args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtend.o");
+  expected_args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crtn.o");
+  EXPECT_EQ(expected_args, driver_args);
+
+  std::vector<string> expected_envs;
+  expected_envs.push_back(
+      "COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:/usr/lib/gcc/x86_64-linux-gnu");
+  expected_envs.push_back(
+      "LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.4.3/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/");
+  EXPECT_EQ(expected_envs, driver_envs);
+}
+
+TEST_F(LinkerInputProcessorTest, ParseGcc46DumpOutput) {
+  std::vector<string> driver_args;
+  std::vector<string> driver_envs;
+  EXPECT_TRUE(ParseDumpOutput(
+    "Using built-in specs.\n"
+    "COLLECT_GCC=/usr/bin/g++\n"
+    "COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.6/lto-wrapper\n"
+    "Target: x86_64-linux-gnu\n"
+    "Configured with: ../src/configure -v --with-pkgversion='Ubuntu/Linaro"
+    " 4.6.3-1ubuntu5' "
+    "--with-bugurl=file:///usr/share/doc/gcc-4.6/README.Bugs "
+    "--enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr "
+    "--program-suffix=-4.6 --enable-shared --enable-linker-build-id "
+    "--with-system-zlib --libexecdir=/usr/lib --without-included-gettext "
+    "--enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.6 "
+    "--libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu "
+    "--enable-libstdcxx-debug --enable-libstdcxx-time=yes "
+    "--enable-gnu-unique-object --enable-plugin --enable-objc-gc "
+    "--disable-werror --with-arch-32=i686 --with-tune=generic "
+    "--enable-checking=release --build=x86_64-linux-gnu "
+    "--host=x86_64-linux-gnu --target=x86_64-linux-gnu\n"
+    "Thread model: posix\n"
+    "gcc version 4.6.3 (Ubuntu/Linaro 4.6.3-1ubuntu5) \n"
+    "COMPILER_PATH=../../third_party/gold/:"
+    "/usr/lib/gcc/x86_64-linux-gnu/4.6/:/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+    "/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+    "/usr/lib/gcc/x86_64-linux-gnu/\n"
+    "LIBRARY_PATH=../../third_party/gold/:"
+    "/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+    "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu/:"
+    "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../lib/:"
+    "/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:"
+    "/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../:/lib/:"
+    "/usr/lib/\n"
+    "COLLECT_GCC_OPTIONS='-pthread' '-fPIC' '-B' '../../third_party/gold' "
+    "'-o' 'codesighs' '-shared-libgcc' '-mtune=generic' '-march=x86-64'\n"
+    " /usr/lib/gcc/x86_64-linux-gnu/4.6/collect2 \"--sysroot=/\" "
+    "--build-id --no-add-needed --as-needed --eh-frame-hdr -m elf_x86_64 "
+    "\"--hash-style=gnu\" -dynamic-linker /lib64/ld-linux-x86-64.so.2 "
+    "-z relro -o codesighs "
+    "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu/crt1.o "
+    "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu/crti.o "
+    "/usr/lib/gcc/x86_64-linux-gnu/4.6/crtbegin.o -L../../third_party/gold "
+    "-L/usr/lib/gcc/x86_64-linux-gnu/4.6 "
+    "-L/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu "
+    "-L/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../lib "
+    "-L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu "
+    "-L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/4.6/../../.. "
+    "-z noexecstack --threads \"--thread-count=4\" \"--icf=none\" "
+    "\"-rpath=$ORIGIN/lib\" "
+    "--start-group obj/third_party/codesighs/codesighs.codesighs.o "
+    "--end-group \"-lstdc++\" -lm -lgcc_s -lgcc -lpthread -lc -lgcc_s "
+    "-lgcc /usr/lib/gcc/x86_64-linux-gnu/4.6/crtend.o "
+    "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu/crtn.o\n",
+      &driver_args, &driver_envs));
+
+  std::vector<string> expected_args;
+  expected_args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.6/collect2");
+  expected_args.push_back("--sysroot=/");
+  expected_args.push_back("--build-id");
+  expected_args.push_back("--no-add-needed");
+  expected_args.push_back("--as-needed");
+  expected_args.push_back("--eh-frame-hdr");
+  expected_args.push_back("-m");
+  expected_args.push_back("elf_x86_64");
+  expected_args.push_back("--hash-style=gnu");
+  expected_args.push_back("-dynamic-linker");
+  expected_args.push_back("/lib64/ld-linux-x86-64.so.2");
+  expected_args.push_back("-z");
+  expected_args.push_back("relro");
+  expected_args.push_back("-o");
+  expected_args.push_back("codesighs");
+  expected_args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu/crt1.o");
+  expected_args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu/crti.o");
+  expected_args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.6/crtbegin.o");
+  expected_args.push_back("-L../../third_party/gold");
+  expected_args.push_back("-L/usr/lib/gcc/x86_64-linux-gnu/4.6");
+  expected_args.push_back(
+      "-L/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu");
+  expected_args.push_back(
+      "-L/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../lib");
+  expected_args.push_back("-L/lib/x86_64-linux-gnu");
+  expected_args.push_back("-L/lib/../lib");
+  expected_args.push_back("-L/usr/lib/x86_64-linux-gnu");
+  expected_args.push_back("-L/usr/lib/../lib");
+  expected_args.push_back("-L/usr/lib/gcc/x86_64-linux-gnu/4.6/../../..");
+  expected_args.push_back("-z");
+  expected_args.push_back("noexecstack");
+  expected_args.push_back("--threads");
+  expected_args.push_back("--thread-count=4");
+  expected_args.push_back("--icf=none");
+  expected_args.push_back("-rpath=$ORIGIN/lib");
+  expected_args.push_back("--start-group");
+  expected_args.push_back("obj/third_party/codesighs/codesighs.codesighs.o");
+  expected_args.push_back("--end-group");
+  expected_args.push_back("-lstdc++");
+  expected_args.push_back("-lm");
+  expected_args.push_back("-lgcc_s");
+  expected_args.push_back("-lgcc");
+  expected_args.push_back("-lpthread");
+  expected_args.push_back("-lc");
+  expected_args.push_back("-lgcc_s");
+  expected_args.push_back("-lgcc");
+  expected_args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.6/crtend.o");
+  expected_args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu/crtn.o");
+  EXPECT_EQ(expected_args, driver_args);
+
+  std::vector<string> expected_envs;
+  expected_envs.push_back(
+      "COMPILER_PATH=../../third_party/gold/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/:/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/");
+  expected_envs.push_back(
+      "LIBRARY_PATH=../../third_party/gold/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../lib/:"
+      "/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:"
+      "/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../:/lib/:"
+      "/usr/lib/");
+  EXPECT_EQ(expected_envs, driver_envs);
+}
+
+TEST_F(LinkerInputProcessorTest, ParseGccErrorDumpOutput) {
+  std::vector<string> driver_args;
+  std::vector<string> driver_envs;
+  EXPECT_FALSE(ParseDumpOutput(
+      "g++: out/Release/obj.target/memory_test/"
+      "chrome/test/memory_test/memory_test.o: No such file or directory\n"
+      "g++: out/Release/obj.target/chrome/libtest_support_common.a: "
+      "No such file or directory\n"
+      "\n"
+      "Using built-in specs.\n"
+      "Target: x86_64-linux-gnu\n"
+      "Configured with: ../src/configure -v "
+      "--with-pkgversion='Ubuntu 4.4.3-4ubuntu5' "
+      "--with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs "
+      "--enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr\n"
+      "Thread model: posix\n"
+      "gcc version 4.4.3 (Ubuntu 4.4.3-4ubuntu5)\n",
+      &driver_args, &driver_envs));
+}
+
+TEST_F(LinkerInputProcessorTest, ParseClangDumpOutput) {
+  std::vector<string> driver_args;
+  std::vector<string> driver_envs;
+  EXPECT_TRUE(ParseDumpOutput(
+      "clang version 3.0 (trunk 131935)\n"
+      "Target: x86_64-unknown-linux-gnu\n"
+      "Thread model: posix\n"
+      " \"/usr/bin/ld\" \"-z\" \"relro\" \"--hash-style=both\" "
+      "\"--build-id\" \"--eh-frame-hdr\" \"-m\" \"elf_x86_64\" "
+      "\"-dynamic-linker\" \"/lib64/ld-linux-x86-64.so.2\" "
+      "\"-o\" \"out/Release/chrome\" "
+      "\"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib64/crt1.o\" "
+      "\"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib64/crti.o\" "
+      "\"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtbegin.o\" "
+      "\"-Lout/Release\" \"-L/lib\" \"-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3\" "
+      "\"-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib64\" "
+      "\"-L/lib/../lib64\" \"-L/usr/lib/../lib64\" "
+      "\"-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../..\" "
+      "\"-L/usr/lib/x86_64-linux-gnu\" \"-z\" \"noexecstack\" "
+      "\"-O1\" \"--as-needed\" \"--gc-sections\" \"--icf=safe\" "
+      "\"--start-group\" "
+      "\"out/Release/obj.target/chrome/chrome/app/chrome_main.o\" "
+      "\"out/Release/obj.target/seccompsandbox/libseccomp_sandbox.a\" "
+      "\"--end-group\" \"-lX11\" \"-ldl\" \"-lXrender\" \"-lXss\" "
+      "\"-lstdc++\" \"-lm\" \"-lgcc_s\" \"-lgcc\" \"-lpthread\" "
+      "\"-lc\" \"-lgcc_s\" \"-lgcc\" "
+      "\"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtend.o\" "
+      "\"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib64/crtn.o\"\n",
+      &driver_args, &driver_envs));
+  std::vector<string> expected_args;
+  expected_args.push_back("/usr/bin/ld");
+  expected_args.push_back("-z");
+  expected_args.push_back("relro");
+  expected_args.push_back("--hash-style=both");
+  expected_args.push_back("--build-id");
+  expected_args.push_back("--eh-frame-hdr");
+  expected_args.push_back("-m");
+  expected_args.push_back("elf_x86_64");
+  expected_args.push_back("-dynamic-linker");
+  expected_args.push_back("/lib64/ld-linux-x86-64.so.2");
+  expected_args.push_back("-o");
+  expected_args.push_back("out/Release/chrome");
+  expected_args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib64/crt1.o");
+  expected_args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib64/crti.o");
+  expected_args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtbegin.o");
+  expected_args.push_back("-Lout/Release");
+  expected_args.push_back("-L/lib");
+  expected_args.push_back("-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3");
+  expected_args.push_back(
+      "-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib64");
+  expected_args.push_back("-L/lib/../lib64");
+  expected_args.push_back("-L/usr/lib/../lib64");
+  expected_args.push_back("-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../..");
+  expected_args.push_back("-L/usr/lib/x86_64-linux-gnu");
+  expected_args.push_back("-z");
+  expected_args.push_back("noexecstack");
+  expected_args.push_back("-O1");
+  expected_args.push_back("--as-needed");
+  expected_args.push_back("--gc-sections");
+  expected_args.push_back("--icf=safe");
+  expected_args.push_back("--start-group");
+  expected_args.push_back(
+      "out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  expected_args.push_back(
+      "out/Release/obj.target/seccompsandbox/libseccomp_sandbox.a");
+  expected_args.push_back("--end-group");
+  expected_args.push_back("-lX11");
+  expected_args.push_back("-ldl");
+  expected_args.push_back("-lXrender");
+  expected_args.push_back("-lXss");
+  expected_args.push_back("-lstdc++");
+  expected_args.push_back("-lm");
+  expected_args.push_back("-lgcc_s");
+  expected_args.push_back("-lgcc");
+  expected_args.push_back("-lpthread");
+  expected_args.push_back("-lc");
+  expected_args.push_back("-lgcc_s");
+  expected_args.push_back("-lgcc");
+  expected_args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtend.o");
+  expected_args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib64/crtn.o");
+  EXPECT_EQ(expected_args, driver_args);
+
+  std::vector<string> expected_envs;
+  EXPECT_EQ(expected_envs, driver_envs);
+}
+
+#ifdef __linux__
+TEST_F(LinkerInputProcessorTest, ParseGccDriverCommandLine) {
+  string cwd = "/src";
+  tmpdir_util_->SetCwd(cwd);
+  tmpdir_util_->CreateTmpFile("/lib64/ld-linux-x86-64.so.2", kElfBinary);
+  tmpdir_util_->CreateTmpFile(file::JoinPath(cwd, "out/Release/chrome"), "");
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crt1.o",
+      kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crti.o",
+      kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtbegin.o", kElfBinary);
+  tmpdir_util_->CreateTmpFile(file::JoinPath(cwd, "out/Release/.tmp"), "");
+  tmpdir_util_->CreateTmpFile(
+      file::JoinPath(cwd,
+                     "out/Release/obj.target/chrome/chrome/app/chrome_main.o"),
+      kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      file::JoinPath(
+          cwd, "out/Release/obj.target/seccompsandbox/libseccomp_sandbox.a"),
+      kThinArFile);
+  tmpdir_util_->CreateTmpFile("/usr/lib/libX11.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile("/usr/lib/libdl.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile("/usr/lib/libXrender.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile("/usr/lib/libXss.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libstdc++.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile("/usr/lib/libm.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc_s.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc.a", kArFile);
+  tmpdir_util_->CreateTmpFile("/usr/lib/libpthread.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/libc.so", "OUTPUT_FORMAT(elf64-x86-64)");
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtend.o", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crtn.o",
+      kElfBinary);
+
+  std::vector<string> args;
+  args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/collect2");
+  args.push_back("--build-id");
+  args.push_back("--eh-frame-hdr");
+  args.push_back("-m");
+  args.push_back("elf_x86_64");
+  args.push_back("--hash-style=both");
+  args.push_back("-dynamic-linker");
+  args.push_back("/lib64/ld-linux-x86-64.so.2");
+  args.push_back("-o");
+  args.push_back("out/Release/chrome");
+  args.push_back("-z");
+  args.push_back("relro");
+  args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crt1.o");
+  args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crti.o");
+  args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtbegin.o");
+  args.push_back("-Lout/Release");
+  args.push_back("-L/lib");
+  args.push_back("-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3");
+  args.push_back("-L/lib/../lib");
+  args.push_back("-L/usr/lib/../lib");
+  args.push_back("-O1");
+  args.push_back("--as-needed");
+  args.push_back("--gc-sections");
+  args.push_back("--icf=safe");
+  args.push_back("--start-group");
+  args.push_back(
+      "out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  args.push_back(
+      "out/Release/obj.target/seccompsandbox/libseccomp_sandbox.a");
+  args.push_back("--end-group");
+  args.push_back("-lX11");
+  args.push_back("-ldl");
+  args.push_back("-lXrender");
+  args.push_back("-lXss");
+  args.push_back("-lstdc++");
+  args.push_back("-lm");
+  args.push_back("-lgcc_s");
+  args.push_back("-lgcc");
+  args.push_back("-lpthread");
+  args.push_back("-lc");
+  args.push_back("-lgcc_s");
+  args.push_back("-lgcc");
+  args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtend.o");
+  args.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crtn.o");
+
+  std::vector<string> input_paths;
+  std::vector<string> searchdirs;
+  string sysroot;
+  string arch;
+  ParseDriverCommandLine(
+      args, cwd, &sysroot, &arch, &searchdirs, &input_paths);
+  std::vector<string> expected_paths;
+  expected_paths.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crt1.o");
+  expected_paths.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crti.o");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtbegin.o");
+  expected_paths.push_back(
+      "out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  expected_paths.push_back(
+      "out/Release/obj.target/seccompsandbox/libseccomp_sandbox.a");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtend.o");
+  expected_paths.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crtn.o");
+  expected_paths.push_back("/lib64/ld-linux-x86-64.so.2");
+  expected_paths.push_back("/usr/lib/../lib/libX11.so");
+  expected_paths.push_back("/usr/lib/../lib/libdl.so");
+  expected_paths.push_back("/usr/lib/../lib/libXrender.so");
+  expected_paths.push_back("/usr/lib/../lib/libXss.so");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libstdc++.so");
+  expected_paths.push_back("/usr/lib/../lib/libm.so");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc_s.so");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc.a");
+  expected_paths.push_back("/usr/lib/../lib/libpthread.so");
+  expected_paths.push_back("/usr/lib/../lib/libc.so");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc_s.so");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc.a");
+
+  EXPECT_EQ(expected_paths, input_paths);
+
+  std::vector<string> expected_searchdirs;
+  expected_searchdirs.push_back("out/Release");
+  expected_searchdirs.push_back("/lib");
+  expected_searchdirs.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3");
+  expected_searchdirs.push_back("/lib/../lib");
+  expected_searchdirs.push_back("/usr/lib/../lib");
+  EXPECT_EQ(expected_searchdirs, searchdirs);
+
+  EXPECT_EQ("", sysroot);
+  EXPECT_EQ("", arch);
+}
+
+TEST_F(LinkerInputProcessorTest, ParseGccDriverCommandLineStaticLink) {
+  string cwd = "/src";
+  tmpdir_util_->SetCwd(cwd);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crt1.o", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crti.o", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtbeginT.o", kElfBinary);
+  tmpdir_util_->CreateTmpFile(file::JoinPath(cwd, "hello.o"), kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc.a", kArFile);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc_eh.a", kArFile);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/libc.so", "OUTPUT_FORMAT(elf64-x86-64)");
+  tmpdir_util_->CreateTmpFile("/usr/lib/libc.a", kArFile);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtend.o", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crtn.o", kElfBinary);
+
+  std::vector<string> args;
+  // gcc -### -static -o hello hello.o
+  args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/collect2");
+  args.push_back("--build-id");
+  args.push_back("-m");
+  args.push_back("elf_x86_64");
+  args.push_back("--hash-style=both");
+  args.push_back("-static");
+  args.push_back("-o");
+  args.push_back("hello");
+  args.push_back("-z");
+  args.push_back("relro");
+  args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crt1.o");
+  args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crti.o");
+  args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtbeginT.o");
+  args.push_back("-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3");
+  args.push_back("-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3");
+  args.push_back("-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib");
+  args.push_back("-L/lib/../lib");
+  args.push_back("-L/usr/lib/../lib");
+  args.push_back("-L/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../..");
+  args.push_back("-L/usr/lib/x86_64-linux-gnu");
+  args.push_back("hello.o");
+  args.push_back("--start-group");
+  args.push_back("-lgcc");
+  args.push_back("-lgcc_eh");
+  args.push_back("-lc");
+  args.push_back("--end-group");
+  args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtend.o");
+  args.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crtn.o");
+
+  std::vector<string> input_paths;
+  std::vector<string> searchdirs;
+  string sysroot;
+  string arch;
+  ParseDriverCommandLine(
+      args, cwd, &sysroot, &arch, &searchdirs, &input_paths);
+  std::vector<string> expected_paths;
+  expected_paths.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crt1.o");
+  expected_paths.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crti.o");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtbeginT.o");
+  expected_paths.push_back("hello.o");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/crtend.o");
+  expected_paths.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/crtn.o");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc.a");
+  expected_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc_eh.a");
+  expected_paths.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib/libc.a");
+
+  EXPECT_EQ(expected_paths, input_paths);
+
+  std::vector<string> expected_searchdirs;
+  expected_searchdirs.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3");
+  expected_searchdirs.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.4.3");
+  expected_searchdirs.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../../../lib");
+  expected_searchdirs.push_back("/lib/../lib");
+  expected_searchdirs.push_back("/usr/lib/../lib");
+  expected_searchdirs.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/../../..");
+  expected_searchdirs.push_back("/usr/lib/x86_64-linux-gnu");
+
+  EXPECT_EQ(expected_searchdirs, searchdirs);
+
+  EXPECT_EQ("", sysroot);
+  EXPECT_EQ("", arch);
+}
+#endif
+#ifdef __MACH__
+TEST_F(LinkerInputProcessorTest, ParseMacClangDriverCommandLine) {
+  string cwd = "/src";
+  tmpdir_util_->SetCwd(cwd);
+  tmpdir_util_->CreateTmpFile("/usr/lib/libSystem.dylib", kMachOFatFile);
+  tmpdir_util_->CreateTmpFile("hello.o", kMachMagic);
+
+  std::vector<string> args;
+  // clang -### -o hello hello.o
+  args.push_back("/usr/bin/ld");
+  args.push_back("-demangle");
+  args.push_back("-dynamic");
+  args.push_back("-arch");
+  args.push_back("x86_64");
+  args.push_back("-macosx_version_min");
+  args.push_back("10.8.0");
+  args.push_back("-o");
+  args.push_back("hello");
+  args.push_back("hello.o");
+  args.push_back("-lSystem");
+  args.push_back("/usr/bin/../lib/clang/4.1/lib/darwin/libclang_rt.osx.a");
+
+  std::vector<string> input_paths;
+  std::vector<string> searchdirs;
+  string sysroot;
+  string arch;
+  ParseDriverCommandLine(
+      args, cwd, &sysroot, &arch, &searchdirs, &input_paths);
+  std::vector<string> expected_paths;
+  expected_paths.push_back("hello.o");
+  expected_paths.push_back("/usr/bin/../lib/clang/4.1/lib/darwin/"
+                           "libclang_rt.osx.a");
+  expected_paths.push_back("/usr/lib/libSystem.dylib");
+  EXPECT_EQ(expected_paths, input_paths);
+
+  // searchdir should not have default ones.
+  std::vector<string> expected_searchdirs;
+  EXPECT_EQ(expected_searchdirs, searchdirs);
+
+  EXPECT_EQ("", sysroot);
+  EXPECT_EQ("x86_64", arch);
+}
+#endif
+
+#ifdef __linux__
+// TODO: fix library_path_resolver on mac could handle *.so for nacl.
+TEST_F(LinkerInputProcessorTest, ParseNaclGccSolinkDriverCommandLine) {
+  string cwd = "/src/chromium1/native_client/src/untrusted/nacl";
+  tmpdir_util_->SetCwd(cwd);
+  tmpdir_util_->CreateTmpFile(
+      "/src/chromium1/native_client/src/untrusted/nacl/"
+      "../../../../out/Release/gen/tc_glibc/lib32/libimc_syscalls.so",
+      kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/crti.o", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/32/crtbeginS.o", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/src/chromium1/native_client/src/untrusted/nacl/"
+      "../../../../out/Release/obj/native_client/src/untrusted/nacl"
+      "/imc_syscalls_lib.gen/glibc-x86-32-so/imc_syscalls_lib/imc_accept.o",
+      kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/libstdc++.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/libm.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/libc.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/libgcc_s.so", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/32/crtendS.o", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/crtn.o", kElfBinary);
+
+  std::vector<string> args;
+  args.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../libexec/gcc/x86_64-nacl/4.4.3/collect2");
+  args.push_back("--no-add-needed");
+  args.push_back("--eh-frame-hdr");
+  args.push_back("--m");
+  args.push_back("--elf_nacl");
+  args.push_back("-shared");
+  args.push_back("-o");
+  args.push_back(
+      "../../../../out/Release/gen/tc_glibc/lib32/libimc_syscalls.so");
+  args.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/crti.o");
+  args.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/32/crtbeginS.o");
+  args.push_back(
+      "-L/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/32");
+  args.push_back(
+      "-L/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32");
+  args.push_back("-L../../../../out/Release/gen/tc_glibc/lib32");
+  args.push_back(
+      "-L/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3");
+  args.push_back(
+      "-L/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc");
+  args.push_back(
+      "-L/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl/lib");
+  args.push_back("--as-needed");
+  args.push_back(
+      "../../../../out/Release/obj/native_client/src/untrusted/nacl"
+      "/imc_syscalls_lib.gen/glibc-x86-32-so/imc_syscalls_lib/imc_accept.o");
+  args.push_back("-soname");
+  args.push_back("libimc_syscall.so");
+  args.push_back("-lstdc++");
+  args.push_back("-lm");
+  args.push_back("-lgcc_s");
+  args.push_back("-lc");
+  args.push_back("-lgcc_s");
+  args.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/32/crtendS.o");
+  args.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/crtn.o");
+
+  std::vector<string> input_paths;
+  std::vector<string> searchdirs;
+  string sysroot;
+  string arch;
+  ParseDriverCommandLine(
+      args, cwd, &sysroot, &arch, &searchdirs, &input_paths);
+  std::vector<string> expected_paths;
+  expected_paths.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/crti.o");
+  expected_paths.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/32/crtbeginS.o");
+  expected_paths.push_back(
+      "../../../../out/Release/obj/native_client/src/untrusted/nacl"
+      "/imc_syscalls_lib.gen/glibc-x86-32-so/imc_syscalls_lib/imc_accept.o");
+  expected_paths.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/32/crtendS.o");
+  expected_paths.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/crtn.o");
+  expected_paths.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/libstdc++.so");
+  expected_paths.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/libm.so");
+  expected_paths.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/libgcc_s.so");
+  expected_paths.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/libc.so");
+  expected_paths.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32/libgcc_s.so");
+  EXPECT_EQ(expected_paths, input_paths);
+
+  std::vector<string> expected_searchdirs;
+  expected_searchdirs.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/32");
+  expected_searchdirs.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl"
+      "/lib/../lib32");
+  expected_searchdirs.push_back(
+      "../../../../out/Release/gen/tc_glibc/lib32");
+  expected_searchdirs.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3");
+  expected_searchdirs.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc");
+  expected_searchdirs.push_back(
+      "/src/chromium1/src/out/Release/gen/sdk/toolchain/linux_x86_glibc"
+      "/bin/../lib/gcc/x86_64-nacl/4.4.3/../../../../x86_64-nacl/lib");
+  EXPECT_EQ(expected_searchdirs, searchdirs);
+
+  EXPECT_EQ("", sysroot);
+  EXPECT_EQ("", arch);
+}
+#endif
+
+TEST_F(LinkerInputProcessorTest, GetLibraryPath) {
+  std::vector<string> envs;
+  string cwd = "/dummy";
+  tmpdir_util_->SetCwd(cwd);
+  std::vector<string> searchdirs;
+  std::vector<string> library_paths;
+
+  searchdirs.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.6");
+  searchdirs.push_back("/usr/lib/x86_64-linux-gnu");
+  envs.push_back(
+      "COMPILER_PATH=../../third_party/gold/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/:/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/");
+  envs.push_back(
+      "LIBRARY_PATH="
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../lib/:"
+      "/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:"
+      "/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../:/lib/:"
+      "/usr/lib/");
+  GetLibraryPath(envs, cwd, searchdirs, &library_paths);
+  std::vector<string> expected_library_paths;
+  expected_library_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.6");
+  expected_library_paths.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../x86_64-linux-gnu");
+  expected_library_paths.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../lib");
+  expected_library_paths.push_back("/lib/x86_64-linux-gnu");
+  expected_library_paths.push_back("/lib/../lib");
+  expected_library_paths.push_back("/usr/lib/x86_64-linux-gnu");
+  expected_library_paths.push_back("/usr/lib/../lib");
+  expected_library_paths.push_back(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../..");
+  expected_library_paths.push_back("/lib");
+  expected_library_paths.push_back("/usr/lib");
+
+  EXPECT_EQ(expected_library_paths, library_paths);
+}
+
+TEST_F(LinkerInputProcessorTest, GetLibraryPathNoLibraryPathEnv) {
+  std::vector<string> envs;
+  string cwd = "/dummy";
+  tmpdir_util_->SetCwd(cwd);
+  std::vector<string> searchdirs;
+  std::vector<string> library_paths;
+
+  searchdirs.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.6");
+  searchdirs.push_back("/usr/lib/x86_64-linux-gnu");
+  envs.push_back(
+      "COMPILER_PATH=../../third_party/gold/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/4.6/:/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/4.6/:"
+      "/usr/lib/gcc/x86_64-linux-gnu/");
+  GetLibraryPath(envs, cwd, searchdirs, &library_paths);
+  std::vector<string> expected_library_paths;
+  expected_library_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.6");
+  expected_library_paths.push_back("/usr/lib/x86_64-linux-gnu");
+
+  EXPECT_EQ(expected_library_paths, library_paths);
+}
+
+TEST_F(LinkerInputProcessorTest, GetLibraryPathRelativePath) {
+  // Not sure we will see this kind of pattern.
+  std::vector<string> envs;
+  string cwd = "/dummy";
+  tmpdir_util_->SetCwd(cwd);
+  std::vector<string> searchdirs;
+  std::vector<string> library_paths;
+
+  envs.push_back("LIBRARY_PATH=../../third_party/gold/:"
+                 "/usr/lib/gcc/x86_64-linux-gnu/4.6/");
+  GetLibraryPath(envs, cwd, searchdirs, &library_paths);
+  std::vector<string> expected_library_paths;
+  expected_library_paths.push_back("/dummy/../../third_party/gold");
+  expected_library_paths.push_back("/usr/lib/gcc/x86_64-linux-gnu/4.6");
+
+  EXPECT_EQ(expected_library_paths, library_paths);
+}
+
+TEST_F(LinkerInputProcessorTest, CheckFileType) {
+#ifndef _WIN32
+  tmpdir_util_->CreateTmpFile("/lib64/ld-linux-x86-64.so.2", kElfBinary);
+  EXPECT_EQ(LinkerInputProcessor::ELF_BINARY_FILE,
+            CheckFileType("/lib64/ld-linux-x86-64.so.2"));
+  tmpdir_util_->CreateTmpFile(
+      "/src/out/Release/obj.target/chrome/chrome/app/chrome_main.o",
+      kElfBinary);
+  EXPECT_EQ(LinkerInputProcessor::ELF_BINARY_FILE,
+            CheckFileType(
+                "/src/out/Release/obj.target/chrome/chrome/app/chrome_main.o"));
+  tmpdir_util_->CreateTmpFile(
+      "/src/out/Release/obj.target/seccompsandbox/libseccomp_sandbox.a",
+      kThinArFile);
+  EXPECT_EQ(LinkerInputProcessor::THIN_ARCHIVE_FILE,
+            CheckFileType("/src/out/Release/obj.target/"
+                          "seccompsandbox/libseccomp_sandbox.a"));
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc.a", kArFile);
+  EXPECT_EQ(LinkerInputProcessor::ARCHIVE_FILE,
+            CheckFileType("/usr/lib/gcc/x86_64-linux-gnu/4.4.3/libgcc.a"));
+  tmpdir_util_->CreateTmpFile(
+      "/usr/lib/libc.so", "OUTPUT_FORMAT(elf64-x86-64)");
+  EXPECT_EQ(LinkerInputProcessor::OTHER_FILE,
+            CheckFileType("/usr/lib/libc.so"));
+#else
+  tmpdir_util_->CreateTmpFile("\\lib64\\elf.o", kElfBinary);
+  EXPECT_EQ(LinkerInputProcessor::ELF_BINARY_FILE,
+            CheckFileType("\\lib64\\elf.o"));
+  tmpdir_util_->CreateTmpFile("\\out\\Debug\\thinar.a", kThinArFile);
+  EXPECT_EQ(LinkerInputProcessor::THIN_ARCHIVE_FILE,
+            CheckFileType("\\out\\Debug\\thinar.a"));
+  tmpdir_util_->CreateTmpFile("\\out\\Debug\\ar.a", kArFile);
+  EXPECT_EQ(LinkerInputProcessor::ARCHIVE_FILE,
+            CheckFileType("\\out\\Debug\\ar.a"));
+  tmpdir_util_->CreateTmpFile("\\lib\\libc.so", "OUTPUT_FORMAT(elf64-x86-64)");
+  EXPECT_EQ(LinkerInputProcessor::OTHER_FILE,
+            CheckFileType("\\lib\\libc.so"));
+#endif
+#ifdef __MACH__
+  tmpdir_util_->CreateTmpFile("/usr/lib/libSystem.dylib", kMachOFatFile);
+  EXPECT_EQ(LinkerInputProcessor::MACHO_FAT_FILE,
+            CheckFileType("/usr/lib/libSystem.dylib"));
+  tmpdir_util_->CreateTmpFile("magic.o", kMachMagic);
+  EXPECT_EQ(LinkerInputProcessor::MACHO_OBJECT_FILE,
+            CheckFileType("magic.o"));
+  tmpdir_util_->CreateTmpFile("cigam.o", kMachCigam);
+  EXPECT_EQ(LinkerInputProcessor::MACHO_OBJECT_FILE,
+            CheckFileType("cigam.o"));
+  tmpdir_util_->CreateTmpFile("magic64.o", kMachMagic64);
+  EXPECT_EQ(LinkerInputProcessor::MACHO_OBJECT_FILE,
+            CheckFileType("magic64.o"));
+  tmpdir_util_->CreateTmpFile("cigam64.o", kMachCigam64);
+  EXPECT_EQ(LinkerInputProcessor::MACHO_OBJECT_FILE,
+            CheckFileType("cigam64.o"));
+#endif
+}
+
+#ifdef __linux__
+// TODO: investigate reason why this fails.
+TEST_F(LinkerInputProcessorTest, ParseThinArchive) {
+  tmpdir_util_->CreateTmpFile(
+      "/src/out/Release/obj.target/foo/foo.o", kElfBinary);
+  tmpdir_util_->CreateTmpFile(
+      "/src/out/Release/obj.target/foo/bar.o", kElfBinary);
+  std::vector<string> files;
+  files.push_back("../foo/foo.o");
+  files.push_back("../foo/bar.o");
+  Archive("/src/out/Release/obj.target/bar", "rcuT", "libfoo.a", files);
+
+  std::set<string> input_files;
+  ParseThinArchive("/src/out/Release/obj.target/bar/libfoo.a", &input_files);
+  std::set<string> expected_files;
+  expected_files.insert("/src/out/Release/obj.target/bar/../foo/foo.o");
+  expected_files.insert("/src/out/Release/obj.target/bar/../foo/bar.o");
+  EXPECT_EQ(expected_files, input_files);
+}
+#endif
+
+}  // namespace devtools_goma
diff --git a/client/linker_script_parser.cc b/client/linker_script_parser.cc
new file mode 100644
index 0000000..45e4245
--- /dev/null
+++ b/client/linker_script_parser.cc
@@ -0,0 +1,356 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "linker_script_parser.h"
+
+#include <stdio.h>
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+
+#include <iterator>
+#include <string>
+#include <vector>
+
+#include "content.h"
+#include "path.h"
+#include "path_util.h"
+
+#ifdef _WIN32
+#include "posix_helper_win.h"
+#endif
+
+namespace devtools_goma {
+
+const char* LinkerScriptParser::fakeroot_ = "";
+
+LinkerScriptParser::LinkerScriptParser(
+    std::unique_ptr<Content> content,
+    const string& current_directory,
+    const std::vector<string>& searchdirs,
+    const string& sysroot)
+    : content_(new ContentCursor(std::move(content))),
+      current_directory_(current_directory),
+      searchdirs_(searchdirs),
+      sysroot_(sysroot) {
+}
+
+LinkerScriptParser::~LinkerScriptParser() {
+}
+
+bool LinkerScriptParser::Parse() {
+  return ParseUntil("");
+}
+
+bool LinkerScriptParser::ParseUntil(const string& term_token) {
+  string token;
+  while (NextToken(&token)) {
+    if (!term_token.empty() && token == term_token) {
+      return true;
+    } else if (token == "INCLUDE") {
+      if (!ProcessInclude())
+        return false;
+    } else if (token == "INPUT") {
+      if (!ProcessInput())
+        return false;
+    } else if (token == "GROUP") {
+      if (!ProcessGroup())
+        return false;
+    } else if (token == "OUTPUT") {
+      if (!ProcessOutput())
+        return false;
+    } else if (token == "SEARCH_DIR") {
+      if (!ProcessSearchDir())
+        return false;
+    } else if (token == "STARTUP") {
+      if (!ProcessStartup())
+        return false;
+    } else if (token == "(") {
+      VLOG(1) << "Open (";
+      if (!ParseUntil(")")) {
+        LOG(WARNING) << "Unbalanced ()?";
+        return false;
+      }
+      VLOG(1) << "Close )";
+    } else if (token == "{") {
+      VLOG(1) << "Open {";
+      if (!ParseUntil("}")) {
+        LOG(WARNING) << "Unbalanced {}?";
+        return false;
+      }
+      VLOG(1) << "Close }";
+    } else {
+      VLOG(1) << "Ignore token:" << token;
+    }
+  }
+  return term_token.empty();
+}
+
+bool LinkerScriptParser::NextToken(string* token) {
+  const char* p = nullptr;
+  int ch = EOF;
+  bool is_token_start = false;
+  while (!is_token_start) {
+    p = content_->cur();
+    ch = content_->GetChar();
+    VLOG(3) << "token? at " << p - content_->buf()
+            << " '" << static_cast<char>(*p) << "'";
+    switch (ch) {
+      case EOF:
+        VLOG(1) << "EOF";
+        return false;
+      case '/':
+        if (*content_->cur() == '*') {
+          while ((ch = content_->GetChar()) != EOF) {
+            if (!content_->SkipUntil('*'))
+              return false;
+            content_->Advance(1);
+            if (*content_->cur() == '/') {
+              ch = content_->GetChar();
+              break;
+            }
+          }
+          VLOG(2) << "Skip comment:" << string(p, content_->cur() - p);
+          continue;
+        } else if (*content_->cur() == '=') {
+          ch = content_->GetChar();
+          *token = string(p, content_->cur() - p);
+          VLOG(2) << "Token(op) '" << *token << "'";
+          return true;
+        }
+        is_token_start = true;
+        break;
+      case ' ': case '\t': case '\n': case '\r': case ',': case ';':
+        VLOG(2) << "Skip '" << static_cast<char>(ch) << "'";
+        continue;
+      case '(': case ')': case '{': case '}': case ':': case '?':
+      case '~': case '%':
+        *token = string(1, static_cast<char>(ch));
+        VLOG(2) << "Token(char) '" << *token << "'";
+        return true;
+
+      case '=': case '!': case '+': case '-': case '*':
+        if (*content_->cur() == '=') {
+          ch = content_->GetChar();
+          *token = string(p, content_->cur() - p);
+          VLOG(2) << "Token(op) '" << *token << "'";
+          return true;
+        }
+        *token = string(1, static_cast<char>(ch));
+        VLOG(2) << "Token(char) '" << *token << "'";
+        return true;
+
+      case '&': case '|':
+        if (*content_->cur() == '=' || *content_->cur() == ch) {
+          ch = content_->GetChar();
+          *token = string(p, content_->cur() - p);
+          VLOG(2) << "Token(op) '" << *token << "'";
+          return true;
+        }
+        *token = string(1, static_cast<char>(ch));
+        VLOG(2) << "Token(char) '" << *token << "'";
+        return true;
+
+      case '<': case '>':
+        if (*content_->cur() == ch)
+          ch = content_->GetChar();
+        if (*content_->cur() == '=' || *content_->cur() == ch) {
+          ch = content_->GetChar();
+          *token = string(p, content_->cur() - p);
+          VLOG(2) << "Token(op) '" << *token << "'";
+          return true;
+        }
+        *token = string(p, content_->cur() - p);
+        VLOG(1) << "Token(op) '" << *token << "'";
+        return true;
+
+      case '"':
+        p = content_->cur();
+        if (!content_->SkipUntil('"'))
+          return false;
+        content_->Advance(1);
+        *token = string(p, content_->cur() - p - 1);
+        VLOG(2) << "Token(quoted-string) " << *token;
+        return true;
+
+      default:
+        is_token_start = true;
+        break;
+    }
+  }
+  VLOG(3) << "token_start at " << p - content_->buf()
+          << " '" << static_cast<char>(*p) << "'";
+  const char* token_start = p;
+  for (;;) {
+    p = content_->cur();
+    if (p == content_->buf_end()) {
+      *token = string(token_start, p - token_start - 1);
+      VLOG(2) << "Token(EOF) " << *token;
+      return true;
+    }
+    switch (*p) {
+      case ' ': case '\t': case '\n': case '\r': case ',': case ';':
+      case '(': case ')': case '{': case '}':
+      case '"':
+        // end od token.
+        *token = string(token_start, p - token_start);
+        VLOG(2) << "Token '" << *token << "'";
+        return true;
+      default:
+        // '/' or other char might be used in filename.
+        ch = content_->GetChar();
+    }
+  }
+}
+
+bool LinkerScriptParser::GetToken(const string& token) {
+  VLOG(1) << "Expect token " << token;
+  string next_token;
+  if (!NextToken(&next_token)) {
+    LOG(WARNING) << "Expected " << token << ", but got " << next_token;
+    return false;
+  }
+  return token == next_token;
+}
+
+bool LinkerScriptParser::ProcessFileList(bool accept_as_needed) {
+  VLOG(1) << "FileList as_needed=" << accept_as_needed;
+  if (!GetToken("("))
+    return false;
+  string token;
+  while (NextToken(&token)) {
+    if (token == ")") {
+      return true;
+    } else if (token == "AS_NEEDED") {
+      if (accept_as_needed) {
+        if (!ProcessAsNeeded())
+          return false;
+      } else {
+        return false;
+      }
+    } else if (token == "(" || token == "{" || token == "}") {
+      LOG(WARNING) << "Unexpected token " << token << " in file list.";
+    } else {
+      VLOG(1) << "Add to input:" << token;
+      if (token[0] == '/' && !sysroot_.empty() &&
+          HasPrefixDir(current_directory_, sysroot_)) {
+        token = file::JoinPath(sysroot_, token.substr(1));
+      }
+      string input_file;
+      if (FindFile(token, &input_file)) {
+        inputs_.push_back(input_file);
+      } else {
+        LOG(WARNING) << "cannot find full path of the file: " << token;
+      }
+    }
+  }
+  return false;
+}
+
+bool LinkerScriptParser::ProcessFile(string* filename) {
+  VLOG(1) << "File";
+  if (!GetToken("("))
+    return false;
+  if (!NextToken(filename))
+    return false;
+  return GetToken(")");
+}
+
+// INCLUDE filename
+bool LinkerScriptParser::ProcessInclude() {
+  string filename;
+  if (!NextToken(&filename))
+    return false;
+  string include_file;
+  if (!FindFile(filename, &include_file)) {
+    LOG(ERROR) << "file:" << filename << " not found in searchdirs:"
+               << searchdirs_;
+    return false;
+  }
+  LinkerScriptParser parser(
+      Content::CreateFromFile(include_file),
+      current_directory_,
+      searchdirs_,
+      sysroot_);
+  if (!parser.Parse()) {
+    LOG(ERROR) << "INCLUDE " << filename << "(" << include_file << ") "
+               << " parse error";
+    return false;
+  }
+  if (!parser.startup().empty())
+    startup_ = parser.startup();
+  copy(parser.inputs().begin(), parser.inputs().end(),
+       back_inserter(inputs_));
+  if (!parser.output().empty())
+    output_ = parser.output();
+  return true;
+}
+
+// INPUT(file file ...)
+bool LinkerScriptParser::ProcessInput() {
+  VLOG(1) << "Process INPUT";
+  return ProcessFileList(true);
+}
+
+// GROUP(file file ...)
+bool LinkerScriptParser::ProcessGroup() {
+  VLOG(1) << "Process GROUP";
+  return ProcessFileList(true);
+}
+
+// AS_NEEDED(file file ...) only inside of the INPUT or GROUP commands.
+bool LinkerScriptParser::ProcessAsNeeded() {
+  VLOG(1) << "Process AS_NEEDED";
+  return ProcessFileList(false);
+}
+
+// OUTPUT(filename)
+bool LinkerScriptParser::ProcessOutput() {
+  VLOG(1) << "Process OUTPUT";
+  return ProcessFile(&output_);
+}
+
+// SEARCH_DIR(path) => -Lpath
+bool LinkerScriptParser::ProcessSearchDir() {
+  VLOG(1) << "Process SEARCH_DIR";
+  string path;
+  if (!ProcessFile(&path))
+    return false;
+  searchdirs_.push_back(path);
+  return true;
+}
+
+// STARTUP(filename)
+bool LinkerScriptParser::ProcessStartup() {
+  VLOG(1) << "Process STARTUP";
+  return ProcessFile(&startup_);
+}
+
+bool LinkerScriptParser::FindFile(const string& filename,
+                                  string* include_file) {
+  string resolved_filename = fakeroot_ +
+      file::JoinPathRespectAbsolute(current_directory_, filename);
+  if (access(resolved_filename.c_str(), R_OK) == 0) {
+    *include_file = resolved_filename.substr(strlen(fakeroot_));
+    return true;
+  }
+  for (const auto& dir : searchdirs_) {
+    resolved_filename = fakeroot_ +
+        file::JoinPathRespectAbsolute(
+            file::JoinPathRespectAbsolute(current_directory_, dir),
+            filename);
+    if (access(resolved_filename.c_str(), R_OK) == 0) {
+      *include_file = resolved_filename.substr(strlen(fakeroot_));
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace devtools_goma
diff --git a/client/linker_script_parser.h b/client/linker_script_parser.h
new file mode 100644
index 0000000..864cea0
--- /dev/null
+++ b/client/linker_script_parser.h
@@ -0,0 +1,88 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_LINKER_SCRIPT_PARSER_H_
+#define DEVTOOLS_GOMA_CLIENT_LINKER_SCRIPT_PARSER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "content_cursor.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// Linker script parser for Goma.
+// It only supports commands dealing with files.
+// http://sourceware.org/binutils/docs-2.17/ld/File-Commands.html#File-Commands
+// Once Parse successfully done, it returns
+//  searchdirs(): sarch directories
+//  srartup(): startup object filename, if specified.
+//  input(): input files in INPUT, GROUP or AS_NEEDED. may be "-lfile".
+//  output(): output file, if specified.
+class LinkerScriptParser {
+ public:
+  // Constructs a parser to read content.
+  // It takes ownership of |content|.
+  LinkerScriptParser(std::unique_ptr<Content> content,
+                     const string& current_directory,
+                     const std::vector<string>& searchdirs,
+                     const string& sysroot);
+  ~LinkerScriptParser();
+
+  const std::vector<string>& searchdirs() const {
+    return searchdirs_;
+  }
+
+  bool Parse();
+
+  const string& startup() const {
+    return startup_;
+  }
+  const std::vector<string>& inputs() const {
+    return inputs_;
+  }
+  const string& output() const {
+    return output_;
+  }
+
+ private:
+  bool ParseUntil(const string& term_token);
+  bool NextToken(string* token);
+  bool GetToken(const string& token);
+  bool ProcessFileList(bool accept_as_needed);
+  bool ProcessFile(string* filename);
+  bool ProcessInclude();
+  bool ProcessInput();
+  bool ProcessGroup();
+  bool ProcessAsNeeded();
+  bool ProcessOutput();
+  bool ProcessSearchDir();
+  bool ProcessStartup();
+  bool FindFile(const string& filename, string* include_file);
+
+  std::unique_ptr<ContentCursor> content_;
+  const string current_directory_;
+  std::vector<string> searchdirs_;
+  const string sysroot_;
+
+  string startup_;
+  std::vector<string> inputs_;
+  string output_;
+
+  // provided for testing.
+  static const char* fakeroot_;
+
+  friend class LinkerScriptParserTest;
+
+  DISALLOW_COPY_AND_ASSIGN(LinkerScriptParser);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_LINKER_SCRIPT_PARSER_H_
diff --git a/client/linker_script_parser_unittest.cc b/client/linker_script_parser_unittest.cc
new file mode 100644
index 0000000..065c9a8
--- /dev/null
+++ b/client/linker_script_parser_unittest.cc
@@ -0,0 +1,136 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+#include <gtest/gtest.h>
+
+#include "content.h"
+#include "linker_script_parser.h"
+#include "unittest_util.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class LinkerScriptParserTest : public testing::Test {
+ public:
+  void SetUp() override {
+    tmpdir_util_.reset(new TmpdirUtil("linker_script_test"));
+    // To be used by LinkerScriptParser::fakeroot_.
+    tmpdir_ = tmpdir_util_->tmpdir();
+    LinkerScriptParser::fakeroot_ = tmpdir_.c_str();
+  }
+
+  void TearDown() override {
+    LinkerScriptParser::fakeroot_ = "";
+    tmpdir_util_.reset();
+  }
+
+ protected:
+  std::unique_ptr<TmpdirUtil> tmpdir_util_;
+  std::string tmpdir_;
+};
+
+#ifndef _WIN32
+TEST_F(LinkerScriptParserTest, ParseLibcSo) {
+  std::vector<string> searchdirs;
+  // Since script do not see inside, I provide empty files.
+  tmpdir_util_->CreateTmpFile("/lib/libc.so.6", "");
+  tmpdir_util_->CreateTmpFile("/usr/lib/libc_nonshared.a", "");
+  tmpdir_util_->CreateTmpFile("/lib/ld-linux-x86-64.so.2", "");
+  LinkerScriptParser parser(Content::CreateFromString(
+      "/* GNU ld script\n"
+      "   Use the shared library, but some functions are only in\n"
+      "   the static library, so try that secondarily. */\n"
+      "OUTPUT_FORMAT(elf64-x86-64)\n"
+      "GROUP ( /lib/libc.so.6 /usr/lib/libc_nonshared.a "
+      " AS_NEEDED ( /lib/ld-linux-x86-64.so.2 ) )\n"),
+                            "/tmp",
+                            searchdirs,
+                            "");
+  EXPECT_TRUE(parser.Parse());
+
+  EXPECT_EQ("", parser.startup());
+  std::vector<string> expected_inputs;
+  expected_inputs.push_back("/lib/libc.so.6");
+  expected_inputs.push_back("/usr/lib/libc_nonshared.a");
+  expected_inputs.push_back("/lib/ld-linux-x86-64.so.2");
+  EXPECT_EQ(expected_inputs, parser.inputs());
+  EXPECT_EQ("", parser.output());
+}
+#endif
+
+TEST_F(LinkerScriptParserTest, ParseSample) {
+  std::vector<string> searchdirs;
+  LinkerScriptParser parser(Content::CreateFromString(
+      "SECTIONS\n"
+      "{\n"
+      "  . = 0x10000;\n"
+      "  .text : { *(.text) }\n"
+      "  . = 0x8000000;\n"
+      "  .data : { *(.data) }\n"
+      "  .bss : { *(.bss) }\n"
+      "}\n"),
+                            "/tmp",
+                            searchdirs,
+                            "");
+  EXPECT_TRUE(parser.Parse());
+
+  EXPECT_EQ("", parser.startup());
+  std::vector<string> expected_inputs;
+  EXPECT_EQ(expected_inputs, parser.inputs());
+  EXPECT_EQ("", parser.output());
+}
+
+TEST_F(LinkerScriptParserTest, ParseSample2) {
+  std::vector<string> searchdirs;
+  LinkerScriptParser parser(Content::CreateFromString(
+      "floating_point = 0;\n"
+      "SECTIONS\n"
+      "{\n"
+      "  .text :\n"
+      "   {\n"
+      "     *(.text)\n"
+      "     _etext = .;\n"
+      "   }\n"
+      "  _bdata = (. + 3) & 3;\n"
+      "  .data : { *(.data) }\n"
+      "}\n"),
+                            "/tmp",
+                            searchdirs,
+                            "");
+  EXPECT_TRUE(parser.Parse());
+
+  EXPECT_EQ("", parser.startup());
+  std::vector<string> expected_inputs;
+  EXPECT_EQ(expected_inputs, parser.inputs());
+  EXPECT_EQ("", parser.output());
+}
+
+TEST_F(LinkerScriptParserTest, ParseSample3) {
+  std::vector<string> searchdirs;
+  LinkerScriptParser parser(Content::CreateFromString(
+      "OVERLAY 0x1000 : AT (0x4000)\n"
+      " {\n"
+      "  .text0 { o1/*.o(.text) }\n"
+      "  .text1 { o2/*.o(.text) }\n"
+      " }\n"),
+                            "/tmp",
+                            searchdirs,
+                            "");
+  EXPECT_TRUE(parser.Parse());
+
+  EXPECT_EQ("", parser.startup());
+  std::vector<string> expected_inputs;
+  EXPECT_EQ(expected_inputs, parser.inputs());
+  EXPECT_EQ("", parser.output());
+}
+
+}  // namespace devtools_goma
diff --git a/client/local_output_cache.cc b/client/local_output_cache.cc
new file mode 100644
index 0000000..e12d34c
--- /dev/null
+++ b/client/local_output_cache.cc
@@ -0,0 +1,693 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// How garbage collection works:
+// 1. When LocalOutputCache starts, StartLoadCacheEntries() is called.
+//    In that function, it reads all cache entries, sorts them by mtime,
+//    and inserts to |entries_|. When all done, |ready_| becomes true.
+//    All methods like Lookup()/SaveOutput() will be blocked until
+//    |ready_| becomes true.
+//
+//    TODO: We have design choice here. When we block until all load is
+//    done, compile does not start until load is done. When we don't block
+//    but return false until ready, a user might commit duplicated results.
+//    It makes states complex. Currently we're choosing a safer option,
+//    but this should be reconsidered.
+//
+// 2. When loading thread starts, we also start garbage collection thread.
+//
+// 3. During builds, when total cache size exceeds max_cache_size, GC thread
+//    is waken up (by notifying |gc_cond_|)
+//
+// 4. When GC thread awake, and |entries_total_cache_amount_| exceeds
+//    |max_cache_amount_byte|, GC happens. It removes older entries until
+//    |entries_total_cache_amount_| become lower than
+//    |threshold_cache_amount_byte_|.
+//
+// * Cache Directory Structure
+//
+// proto_file = <cache dir>/<first 2 chars of key>/<key>
+//   <key> is always hex notation of SHA256.
+
+#include "local_output_cache.h"
+
+#include <stdio.h>  // For rename
+
+#include <algorithm>
+#include <fstream>
+#include <memory>
+#include <vector>
+
+#include "callback.h"
+#include "execreq_normalizer.h"
+#include "file.h"
+#include "file_dir.h"
+#include "file_helper.h"
+#include "file_id.h"
+#include "glog/logging.h"
+#include "goma_hash.h"
+#include "histogram.h"
+#include "path.h"
+#include "simple_timer.h"
+#include "strutil.h"
+
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_stats.pb.h"
+MSVC_POP_WARNING()
+
+#ifndef _WIN32
+# include <sys/stat.h>
+# include <sys/types.h>
+# include <unistd.h>
+#else
+# include "config_win.h"
+# include "posix_helper_win.h"
+#endif
+
+using std::string;
+
+namespace {
+
+bool DeleteFile(const char* path) {
+#ifndef _WIN32
+  return unlink(path) == 0;
+#else
+  return DeleteFileA(path) != FALSE;
+#endif
+}
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+LocalOutputCache* LocalOutputCache::instance_;
+
+LocalOutputCache::LocalOutputCache(string cache_dir,
+                                   std::int64_t max_cache_amount_byte,
+                                   std::int64_t threshold_cache_amount_byte,
+                                   size_t max_cache_items,
+                                   size_t threshold_cache_items)
+    : cache_dir_(std::move(cache_dir)),
+      max_cache_amount_byte_(max_cache_amount_byte),
+      threshold_cache_amount_byte_(threshold_cache_amount_byte),
+      max_cache_items_(max_cache_items),
+      threshold_cache_items_(threshold_cache_items),
+      ready_cond_(&ready_mu_),
+      ready_(false),
+      entries_total_cache_amount_(0),
+      gc_cond_(&gc_mu_),
+      gc_should_done_(false),
+      gc_working_(false) {
+}
+
+LocalOutputCache::~LocalOutputCache() {
+}
+
+// static
+void LocalOutputCache::Init(string cache_dir,
+                            WorkerThreadManager* wm,
+                            int max_cache_amount_in_mb,
+                            int threshold_cache_amount_in_mb,
+                            size_t max_cache_items,
+                            size_t threshold_cache_items) {
+  CHECK(instance_ == nullptr);
+  if (cache_dir.empty()) {
+    return;
+  }
+
+  if (!EnsureDirectory(cache_dir, 0700)) {
+    LOG(ERROR) << "failed to make cache directory: " << cache_dir
+               << " LocalOutputCache is not enabled";
+    return;
+  }
+
+  std::int64_t max_cache_amount_byte =
+      max_cache_amount_in_mb * std::int64_t(1000000);
+  std::int64_t threshold_cache_amount_byte =
+      threshold_cache_amount_in_mb * std::int64_t(1000000);
+
+  instance_ = new LocalOutputCache(std::move(cache_dir),
+                                   max_cache_amount_byte,
+                                   threshold_cache_amount_byte,
+                                   max_cache_items,
+                                   threshold_cache_items);
+  if (wm != nullptr) {
+    // Loading cache entries can take long time. Don't block here.
+    // When blocked, compiler_proxy start might be failed due to timeout.
+    instance_->StartLoadCacheEntries(wm);
+    instance_->StartGarbageCollection(wm);
+  } else {
+    // wm is nullptr in test. Just make ready_ = true.
+    instance_->SetReady(true);
+  }
+}
+
+// static
+void LocalOutputCache::Quit() {
+  if (instance_ == nullptr) {
+    return;
+  }
+
+  LOG(INFO) << "LocalOutputCache quiting...";
+  // Might be still loading. Wait for that case.
+  instance_->WaitUntilReady();
+  // Stop garbage collection thread.
+  instance_->StopGarbageCollection();
+  instance_->WaitUntilGarbageCollectionThreadDone();
+  LOG(INFO) << "LocalOutputCache GC thread has been terminated.";
+
+  delete instance_;
+  instance_ = nullptr;
+}
+
+void LocalOutputCache::StartLoadCacheEntries(WorkerThreadManager* wm) {
+  wm->RunClosure(FROM_HERE,
+                 NewCallback(this, &LocalOutputCache::LoadCacheEntries),
+                 WorkerThreadManager::PRIORITY_LOW);
+}
+
+void LocalOutputCache::LoadCacheEntries() {
+  // For fine load time measurement.
+  Histogram list_directory_histogram;
+  Histogram file_id_histogram;
+
+  list_directory_histogram.SetName("LocalOutputCache ListDirectory");
+  file_id_histogram.SetName("LocalOutputCache FileId");
+
+  SimpleTimer walk_timer(SimpleTimer::START);
+  size_t total_file_size = 0;
+
+  std::vector<std::pair<SHA256HashValue, CacheEntry>> cache_entries;
+
+  std::vector<DirEntry> key_prefix_entries;
+  {
+    SimpleTimer timer(SimpleTimer::START);
+    if (!ListDirectory(cache_dir_, &key_prefix_entries)) {
+      LOG(ERROR) << "failed to load LocalOutputCache entries:"
+                 << " cache_dir=" << cache_dir_;
+      LoadCacheEntriesDone();
+      return;
+    }
+    list_directory_histogram.Add(timer.GetInNanoSeconds());
+    if (timer.Get() >= 1.0) {
+      LOG(WARNING) << "SLOW ListDirectory: " << cache_dir_;
+    }
+  }
+
+  for (const auto& key_prefix_entry : key_prefix_entries) {
+    if (!key_prefix_entry.is_dir ||
+        key_prefix_entry.name == "." ||
+        key_prefix_entry.name == "..") {
+      continue;
+    }
+
+    string cache_dir_with_key_prefix =
+        file::JoinPath(cache_dir_, key_prefix_entry.name);
+    std::vector<DirEntry> key_entries;
+
+    {
+      SimpleTimer timer(SimpleTimer::START);
+      if (!ListDirectory(cache_dir_with_key_prefix, &key_entries)) {
+        // Might be better to remove this directory contents.
+        continue;
+      }
+      list_directory_histogram.Add(timer.GetInNanoSeconds());
+      if (timer.Get() >= 1.0) {
+        LOG(WARNING) << "SLOW ListDirectory: " << cache_dir_with_key_prefix;
+      }
+    }
+
+    for (const auto& key_entry : key_entries) {
+      if (key_entry.name == "." || key_entry.name == "..") {
+        continue;
+      }
+
+      string cache_file_path =
+          file::JoinPath(cache_dir_with_key_prefix, key_entry.name);
+
+      if (key_entry.is_dir) {
+        // Probably old style cache. remove this.
+        LOG(INFO) << "directory found. remove: " << cache_file_path;
+        if (!RecursivelyDelete(cache_file_path)) {
+          LOG(ERROR) << "failed to remove: " << cache_file_path;
+        }
+        continue;
+      }
+
+      SHA256HashValue key;
+      if (!SHA256HashValue::ConvertFromHexString(key_entry.name, &key)) {
+        LOG(WARNING) << "Invalid filename found. remove: filename="
+                     << cache_file_path;
+        if (!DeleteFile(cache_file_path.c_str())) {
+          LOG(ERROR) << "failed to remove: " << cache_file_path;
+        }
+        continue;
+      }
+
+      FileId id;
+      {
+        SimpleTimer timer(SimpleTimer::START);
+        id = FileId(cache_file_path);
+        file_id_histogram.Add(timer.GetInNanoSeconds());
+        if (timer.Get() >= 1.0) {
+          LOG(WARNING) << "SLOW FileId: " << cache_file_path;
+        }
+      }
+
+      if (!id.IsValid()) {
+        LOG(ERROR) << "unexpectedly file is removed? "
+                   << "path=" << cache_file_path;
+        continue;
+      }
+
+      total_file_size += id.size;
+      cache_entries.emplace_back(key, CacheEntry(id.mtime, id.size));
+    }
+  }
+
+  LOG(INFO) << "walk_time_in_seconds=" << walk_timer.Get() << " "
+            << "total_cache_count=" << cache_entries.size() << " "
+            << "total_size_in_byte=" << total_file_size;
+
+  // DebugString() triggers CHECK if count() == 0.
+  if (list_directory_histogram.count() > 0) {
+    LOG(INFO) << list_directory_histogram.DebugString();
+  }
+  if (file_id_histogram.count() > 0) {
+    LOG(INFO) << file_id_histogram.DebugString();
+  }
+
+  // Sort by mtime. Older cache entry comes first for GC.
+  std::sort(cache_entries.begin(), cache_entries.end(),
+            [](const std::pair<SHA256HashValue, CacheEntry>& lhs,
+               const std::pair<SHA256HashValue, CacheEntry>& rhs) {
+                return lhs.second.mtime < rhs.second.mtime;
+            });
+
+  {
+    AUTO_EXCLUSIVE_LOCK(lock, &entries_mu_);
+    for (auto&& entry : cache_entries) {
+      entries_.emplace_back(std::move(entry.first), std::move(entry.second));
+    }
+    entries_total_cache_amount_ = total_file_size;
+  }
+
+  LoadCacheEntriesDone();
+}
+
+void LocalOutputCache::LoadCacheEntriesDone() {
+  AUTOLOCK(lock, &ready_mu_);
+  ready_ = true;
+  ready_cond_.Broadcast();
+}
+
+void LocalOutputCache::WaitUntilReady() {
+  AUTOLOCK(lock, &ready_mu_);
+  while (!ready_) {
+    ready_cond_.Wait();
+  }
+}
+
+void LocalOutputCache::AddCacheEntry(const SHA256HashValue& key,
+                                     std::int64_t cache_size) {
+  time_t cache_mtime = time(nullptr);
+  bool needs_wake_gc_thread = false;
+  {
+    AUTO_EXCLUSIVE_LOCK(lock, &entries_mu_);
+    entries_.emplace_back(key, CacheEntry(cache_mtime, cache_size));
+    entries_total_cache_amount_ += cache_size;
+
+    if (ShouldInvokeGarbageCollectionUnlocked()) {
+      needs_wake_gc_thread = true;
+    }
+  }
+
+  // Don't call WakeGCThread with holding entries_mu_.
+  if (needs_wake_gc_thread) {
+    WakeGCThread();
+  }
+}
+
+void LocalOutputCache::UpdateCacheEntry(const SHA256HashValue& key) {
+  AUTO_EXCLUSIVE_LOCK(lock, &entries_mu_);
+
+  // Because of GC, key might be removed here.
+  auto it = entries_.find(key);
+  if (it != entries_.end()) {
+    entries_.MoveToBack(it);
+  }
+}
+
+void LocalOutputCache::StartGarbageCollection(WorkerThreadManager* wm) {
+  {
+    AUTOLOCK(lock, &gc_mu_);
+    gc_should_done_ = false;
+    gc_working_ = true;
+  }
+  wm->NewThread(NewCallback(this,
+                            &LocalOutputCache::GarbageCollectionThread),
+                "local-output-cache-gc");
+}
+
+void LocalOutputCache::StopGarbageCollection() {
+  LOG(INFO) << "try to stop gc thread";
+
+  AUTOLOCK(lock, &gc_mu_);
+  gc_should_done_ = true;
+  gc_cond_.Broadcast();
+}
+
+void LocalOutputCache::WakeGCThread() {
+  LOG(INFO) << "try to wake gc thread";
+
+  AUTOLOCK(lock, &gc_mu_);
+  gc_cond_.Broadcast();
+}
+
+void LocalOutputCache::GarbageCollectionThread() {
+  // GC should not start until ready.
+  WaitUntilReady();
+
+  while (true) {
+    while (true) {
+      AUTOLOCK(lock, &gc_mu_);
+
+      // Return if gc done.
+      if (gc_should_done_) {
+        LOG(INFO) << "gc has done. gc thread will be done.";
+        gc_working_ = false;
+        gc_cond_.Signal();
+        return;
+      }
+
+      // With this condition, start GC.
+      if (ShouldInvokeGarbageCollection()) {
+        break;
+      }
+
+      // Wait until gc-wakeup signal comes.
+      gc_cond_.Wait();
+    }
+
+    LOG(INFO) << "LocalOutputCache GC thread awaken";
+    GarbageCollectionStat stat;
+    RunGarbageCollection(&stat);
+
+    LOG(INFO) << "LocalOutputCache GC Done:"
+              << " removed_count=" << stat.num_removed
+              << " removed_bytes=" << stat.removed_bytes
+              << " failed=" << stat.num_failed;
+
+    stats_gc_removed_items_.Add(stat.num_removed);
+    stats_gc_removed_bytes_.Add(stat.removed_bytes);
+    stats_gc_failed_items_.Add(stat.num_failed);
+  }
+}
+
+bool LocalOutputCache::ShouldInvokeGarbageCollection() const {
+  AUTO_SHARED_LOCK(lock, &entries_mu_);
+  return ShouldInvokeGarbageCollectionUnlocked();
+}
+
+bool LocalOutputCache::ShouldInvokeGarbageCollectionUnlocked() const {
+  if (max_cache_amount_byte_ < entries_total_cache_amount_) {
+    LOG(INFO) << "GC will be invoked:"
+              << " max_cache_amount_byte=" << max_cache_amount_byte_
+              << " entries_total_cache_amount=" << entries_total_cache_amount_;
+    return true;
+  }
+  if (max_cache_items_ < entries_.size()) {
+    LOG(INFO) << "GC will be invoked:"
+              << " max_cache_items=" << max_cache_items_
+              << " entries_size=" << entries_.size();
+    return true;
+  }
+
+  return false;
+}
+
+bool LocalOutputCache::ShouldContinueGarbageCollectionUnlocked() const {
+  if (threshold_cache_amount_byte_ < entries_total_cache_amount_) {
+    return true;
+  }
+  if (threshold_cache_items_ < entries_.size()) {
+    return true;
+  }
+
+  return false;
+}
+
+void LocalOutputCache::RunGarbageCollection(GarbageCollectionStat* stat) {
+  // cache exceeded the max size. Removing the cache entries.
+  stats_gc_count_.Add(1);
+  SimpleTimer timer(SimpleTimer::START);
+
+  while (true) {
+    AUTO_EXCLUSIVE_LOCK(lock, &entries_mu_);
+
+    if (!ShouldContinueGarbageCollectionUnlocked()) {
+      break;
+    }
+
+    const CacheEntry& entry = entries_.front().second;
+    string key_string = entries_.front().first.ToHexString();
+
+    string cache_file_path = CacheFilePath(key_string);
+    if (!DeleteFile(cache_file_path.c_str())) {
+      LOG(ERROR) << "failed to remove cache: path=" << cache_file_path;
+      break;
+    }
+
+    stat->num_removed += 1;
+    stat->removed_bytes += entry.amount_byte;
+    entries_total_cache_amount_ -= entry.amount_byte;
+    entries_.pop_front();
+  }
+
+  stats_gc_total_time_ms_.Add(timer.GetInMs());
+}
+
+void LocalOutputCache::WaitUntilGarbageCollectionThreadDone() {
+  AUTOLOCK(lock, &gc_mu_);
+  while (gc_working_) {
+    LOG(INFO) << "LocalOutputCache: waiting GC finished";
+    gc_cond_.Wait();
+  }
+}
+
+void LocalOutputCache::SetReady(bool ready) {
+  AUTOLOCK(lock, &ready_mu_);
+  ready_ = ready;
+}
+
+bool LocalOutputCache::SaveOutput(const string& key,
+                                  const ExecReq* req,
+                                  const ExecResp* resp,
+                                  const string& trace_id) {
+  WaitUntilReady();
+  SimpleTimer timer(SimpleTimer::START);
+
+  if (!resp->has_result()) {
+    return false;
+  }
+
+  SHA256HashValue key_hash;
+  if (!SHA256HashValue::ConvertFromHexString(key, &key_hash)) {
+    LOG(ERROR) << "key is invalid format: key=" << key;
+    return false;
+  }
+
+  // --- Ensure cache directory exists.
+  string cache_dir_with_key_prefix = CacheDirWithKeyPrefix(key);
+  if (!EnsureDirectory(cache_dir_with_key_prefix, 0755)) {
+    LOG(ERROR) << trace_id << " failed to create " << cache_dir_with_key_prefix;
+    return false;
+  }
+
+  // --- Make cache_entry.
+  LocalOutputCacheEntry cache_entry;
+  const ExecResult& result = resp->result();
+  for (const auto& output : result.output()) {
+    string src_path = file::JoinPathRespectAbsolute(
+        req->cwd(), output.filename());
+
+    std::string output_file_content;
+    if (!ReadFileToString(src_path, &output_file_content)) {
+      LOG(ERROR) << " failed to read file: " << src_path;
+      return false;
+    }
+
+    LocalOutputCacheFile* cache_file = cache_entry.add_files();
+    cache_file->set_filename(output.filename());
+    cache_file->set_content(std::move(output_file_content));
+    cache_file->set_is_executable(output.is_executable());
+  }
+
+  // --- Serialize LocalOutputCacheEntry to a file.
+  // When compiler_proxy is killed during writing a file, the file will be
+  // invalid but it might be a valid proto (when we're unlucky).
+  // So, we serialize a data to a tmp file, and rename it.
+  // We should be able to expect this is atomic.
+  std::int64_t cache_amount_in_byte = 0;
+  {
+    string cache_file_path = CacheFilePath(key);
+    string cache_file_tmp_path = cache_file_path + ".tmp";
+
+    string serialized;
+    if (!cache_entry.SerializeToString(&serialized)) {
+      LOG(ERROR) << trace_id << " failed to serialize LocalOutputCacheEntry: "
+                 << " path=" << cache_file_path;
+      return false;
+    }
+    if (!WriteStringToFile(serialized, cache_file_tmp_path)) {
+      stats_save_failure_.Add(1);
+      LOG(ERROR) << trace_id << " failed to write LocalOutputCacheEntry:"
+                 << " path=" << cache_file_path;
+      return false;
+    }
+
+    int r = rename(cache_file_tmp_path.c_str(), cache_file_path.c_str());
+    if (r < 0) {
+      LOG(ERROR) << trace_id << " failed to rename LocalOutputCacheEntry:"
+                 << " path=" << cache_file_path
+                 << " result=" << r;
+      (void)DeleteFile(cache_file_path.c_str());
+      return false;
+    }
+
+    cache_amount_in_byte = serialized.size();
+  }
+
+  AddCacheEntry(key_hash, cache_amount_in_byte);
+
+  stats_save_success_.Add(1);
+  stats_save_success_time_ms_.Add(timer.GetInMs());
+  return true;
+}
+
+bool LocalOutputCache::Lookup(const string& key, ExecResp* resp,
+                              const string& trace_id) {
+  WaitUntilReady();
+  SimpleTimer timer(SimpleTimer::START);
+
+  SHA256HashValue key_hash;
+  if (!SHA256HashValue::ConvertFromHexString(key, &key_hash)) {
+    LOG(DFATAL) << "unexpected key format: key=" << key;
+    return false;
+  }
+
+  // Check cache entry first.
+  {
+    AUTO_SHARED_LOCK(lock, &entries_mu_);
+    auto it = entries_.find(key_hash);
+    if (it == entries_.end()) {
+      stats_lookup_miss_.Add(1);
+      return false;
+    }
+  }
+
+  const string cache_file_path = CacheFilePath(key);
+
+  // Read file.
+  // If GC happened after entries_find(), this file might be lost.
+  std::ifstream ifs(cache_file_path, std::ifstream::binary);
+  if (!ifs.is_open()) {
+    stats_lookup_miss_.Add(1);
+    return false;
+  }
+
+  LocalOutputCacheEntry cache_entry;
+  if (!cache_entry.ParseFromIstream(&ifs)) {
+    LOG(ERROR) << trace_id << " LocalOutputCache: failed to parse:"
+               << " path=" << cache_file_path;
+    stats_lookup_failure_.Add(1);
+    return false;
+  }
+
+  UpdateCacheEntry(key_hash);
+
+  // Create dummy ExecResp from LocalOutputCacheEntry.
+  resp->set_cache_hit(ExecResp::MEM_CACHE);  // TODO: Make LOCAL_CACHE.
+  ExecResult* result = resp->mutable_result();
+  result->set_exit_status(0);
+  for (auto&& file : cache_entry.files()) {
+    ExecResult_Output* output = result->add_output();
+    output->set_filename(file.filename());
+    output->set_is_executable(file.is_executable());
+    FileBlob* blob = output->mutable_blob();
+    blob->set_blob_type(FileBlob::FILE);  // Always FILE.
+    blob->set_file_size(file.content().size());
+    blob->set_content(std::move(file.content()));
+  }
+
+  stats_lookup_success_.Add(1);
+  stats_lookup_success_time_ms_.Add(timer.GetInMs());
+  return true;
+}
+
+std::string LocalOutputCache::CacheDirWithKeyPrefix(StringPiece key) const {
+  return file::JoinPath(cache_dir_, key.substr(0, 2));
+}
+
+std::string LocalOutputCache::CacheFilePath(StringPiece key) const {
+  return file::JoinPath(cache_dir_, key.substr(0, 2), key);
+}
+
+void LocalOutputCache::DumpStatsToProto(LocalOutputCacheStats* stats) {
+  stats->set_save_success(stats_save_success_.value());
+  stats->set_save_success_time_ms(stats_save_success_time_ms_.value());
+  stats->set_save_failure(stats_save_failure_.value());
+
+  stats->set_lookup_success(stats_lookup_success_.value());
+  stats->set_lookup_success_time_ms(stats_lookup_success_time_ms_.value());
+  stats->set_lookup_miss(stats_lookup_miss_.value());
+  stats->set_lookup_failure(stats_lookup_failure_.value());
+
+  stats->set_commit_success(stats_commit_success_.value());
+  stats->set_commit_success_time_ms(stats_commit_success_time_ms_.value());
+  stats->set_commit_failure(stats_commit_failure_.value());
+
+  stats->set_gc_count(stats_gc_count_.value());
+  stats->set_gc_total_time_ms(stats_gc_total_time_ms_.value());
+}
+
+size_t LocalOutputCache::TotalCacheCount() {
+  AUTO_SHARED_LOCK(lock, &entries_mu_);
+  return entries_.size();
+}
+
+std::int64_t LocalOutputCache::TotalCacheAmountInByte() {
+  AUTO_SHARED_LOCK(lock, &entries_mu_);
+  return entries_total_cache_amount_;
+}
+
+// static
+string LocalOutputCache::MakeCacheKey(const ExecReq& req) {
+  ExecReq normalized(req);
+
+  // Use the goma server default.
+  const std::vector<string> flags {
+    "Xclang", "B", "gcc-toolchain", "-sysroot", "resource-dir"
+  };
+
+  // TODO: Set debug_prefix_map, too?
+  NormalizeExecReqForCacheKey(0, true, false,
+                              flags,
+                              std::map<string, string>(),
+                              &normalized);
+
+  string serialized;
+  if (!normalized.SerializeToString(&serialized)) {
+    LOG(ERROR) << "failed to make cache key: "
+               << normalized.DebugString();
+    return string();
+  }
+
+  string digest;
+  ComputeDataHashKey(serialized, &digest);
+  return digest;
+}
+
+} // namespace devtools_goma
diff --git a/client/local_output_cache.h b/client/local_output_cache.h
new file mode 100644
index 0000000..51bda94
--- /dev/null
+++ b/client/local_output_cache.h
@@ -0,0 +1,195 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_LOCAL_OUTPUT_CACHE_H_
+#define DEVTOOLS_GOMA_CLIENT_LOCAL_OUTPUT_CACHE_H_
+
+#include <cstdint>
+#include <string>
+
+#include "atomic_stats_counter.h"
+#include "autolock_timer.h"
+#include "compiler_specific.h"
+#include "goma_hash.h"
+#include "linked_unordered_map.h"
+#include "sha256hash_hasher.h"
+#include "string_piece.h"
+#include "worker_thread_manager.h"
+
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+#include "prototmp/local_output_cache_data.pb.h"
+MSVC_POP_WARNING()
+
+namespace devtools_goma {
+
+class LocalOutputCacheStats;
+
+// LocalOutputCache is a cache that ExecReq -> output files.
+class LocalOutputCache {
+ public:
+  struct GarbageCollectionStat {
+    size_t num_removed = 0;          // # of garbage collected entries
+    size_t num_failed = 0;           // failed to remove
+    std::int64_t removed_bytes = 0;  // total removed bytes
+  };
+
+  static bool IsEnabled() { return instance_ != nullptr; }
+  static LocalOutputCache* instance() { return instance_; }
+
+  // When |server| is nullptr, GC won't run. This will be useful
+  // for testing.
+  static void Init(std::string cache_dir,
+                   WorkerThreadManager* wm,
+                   int max_cache_amount_in_mb,
+                   int threshold_cache_amount_in_mb,
+                   size_t max_cache_items,
+                   size_t threshold_cache_items);
+  static void Quit();
+
+  LocalOutputCache(const LocalOutputCache&) = delete;
+  LocalOutputCache(LocalOutputCache&&) = delete;
+  LocalOutputCache& operator=(const LocalOutputCache&) = delete;
+  LocalOutputCache& operator=(LocalOutputCache&&) = delete;
+
+  // Creates cache key from |req|.
+  static std::string MakeCacheKey(const ExecReq& req);
+
+  // SaveOutput copies output files to cache.
+  // |trace_id| is just used for logging.
+  bool SaveOutput(const std::string& key,
+                  const ExecReq* req,
+                  const ExecResp* resp,
+                  const std::string& trace_id);
+
+  // Finds cache with |key|.
+  // Returns true when a cache is found and read correctly. In this case,
+  // |resp| will be filled with output data.
+  // Otherwise, false is returned.
+  // |trace_id| is just used for logging.
+  bool Lookup(const std::string& key,
+              ExecResp* resp,
+              const std::string& trace_id);
+
+  // Dumps stats.
+  void DumpStatsToProto(LocalOutputCacheStats* stats);
+
+  // For stats. These will be removed after merged to GomaStats.
+  size_t TotalCacheCount();
+  std::int64_t TotalCacheAmountInByte();
+  size_t TotalGCRemovedItems() const { return stats_gc_removed_items_.value(); }
+  std::int64_t TotalGCRemovedBytes() const {
+    return stats_gc_removed_bytes_.value();
+  }
+
+ private:
+  struct CacheEntry {
+    CacheEntry() : mtime(0), amount_byte(0) {}
+    CacheEntry(time_t mtime, std::int64_t amount_byte)
+        : mtime(mtime), amount_byte(amount_byte) {
+    }
+    ~CacheEntry() {
+    }
+
+    time_t mtime;
+    std::int64_t amount_byte;
+  };
+
+  LocalOutputCache(std::string cache_dir,
+                   std::int64_t max_cache_amount_byte,
+                   std::int64_t threashold_cache_amount_byte,
+                   size_t max_cache_items,
+                   size_t threshold_cache_items);
+  ~LocalOutputCache();
+
+  // load cache entries.
+  void StartLoadCacheEntries(WorkerThreadManager* wm);
+  void LoadCacheEntries();
+  void LoadCacheEntriesDone();
+  // Wait until all cache entries are loaded from the file.
+  void WaitUntilReady();
+
+  void AddCacheEntry(const SHA256HashValue& key,
+                     std::int64_t cache_amount_in_byte);
+  // A cache entry is updated, so move it to last.
+  void UpdateCacheEntry(const SHA256HashValue& key);
+
+  void StartGarbageCollection(WorkerThreadManager* wm)
+      LOCKS_EXCLUDED(entries_mu_);
+  void StopGarbageCollection() LOCKS_EXCLUDED(entries_mu_);
+  void GarbageCollectionThread() LOCKS_EXCLUDED(entries_mu_);
+  bool ShouldInvokeGarbageCollection() const
+      LOCKS_EXCLUDED(entries_mu_);
+  bool ShouldInvokeGarbageCollectionUnlocked() const
+      SHARED_LOCKS_REQUIRED(entries_mu_);
+  bool ShouldContinueGarbageCollectionUnlocked() const
+      SHARED_LOCKS_REQUIRED(entries_mu_);
+  void RunGarbageCollection(GarbageCollectionStat* stat)
+      LOCKS_EXCLUDED(entries_mu_);
+  void WakeGCThread() LOCKS_EXCLUDED(entries_mu_);
+  void WaitUntilGarbageCollectionThreadDone() LOCKS_EXCLUDED(entries_mu_);
+
+  // Used only for test.
+  void SetReady(bool ready);
+
+  // Full path of cache directory + key prefix.
+  std::string CacheDirWithKeyPrefix(StringPiece key) const;
+  // Full path of cache directory + key prefix + key.
+  std::string CacheFilePath(StringPiece key) const;
+
+  static LocalOutputCache* instance_;
+
+  // LocalOutputCache configurations
+  const std::string cache_dir_;
+  const std::int64_t max_cache_amount_byte_;
+  const std::int64_t threshold_cache_amount_byte_;
+  const size_t max_cache_items_;
+  const size_t threshold_cache_items_;
+
+  // Using in initial load of cache entries.
+  // After loading all cache entries, |ready_| will become true.
+  Lock ready_mu_;
+  ConditionVariable ready_cond_;
+  bool ready_ GUARDED_BY(ready_mu_);
+
+  // cache entries. Older cache is first.
+  using CacheEntryMap =
+      LinkedUnorderedMap<SHA256HashValue, CacheEntry, SHA256HashValueHasher>;
+  ReadWriteLock entries_mu_ ACQUIRED_AFTER(gc_mu_);
+  CacheEntryMap entries_ GUARDED_BY(entries_mu_);
+  // total cache amount in bytes.
+  std::int64_t entries_total_cache_amount_ GUARDED_BY(entries_mu_);
+
+  Lock gc_mu_;
+  ConditionVariable gc_cond_;
+  bool gc_should_done_ GUARDED_BY(gc_mu_);
+  bool gc_working_ GUARDED_BY(gc_mu_);
+
+  StatsCounter stats_save_success_;
+  StatsCounter stats_save_success_time_ms_;
+  StatsCounter stats_save_failure_;
+
+  StatsCounter stats_lookup_success_;
+  StatsCounter stats_lookup_success_time_ms_;
+  StatsCounter stats_lookup_miss_;
+  StatsCounter stats_lookup_failure_;
+
+  StatsCounter stats_commit_success_;
+  StatsCounter stats_commit_success_time_ms_;
+  StatsCounter stats_commit_failure_;
+
+  StatsCounter stats_gc_count_;
+  StatsCounter stats_gc_total_time_ms_;
+
+  StatsCounter stats_gc_removed_items_;
+  StatsCounter stats_gc_removed_bytes_;
+  StatsCounter stats_gc_failed_items_;
+
+  friend class LocalOutputCacheTest;
+};
+
+} // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_LOCAL_OUTPUT_CACHE_H_
diff --git a/client/local_output_cache_data.proto b/client/local_output_cache_data.proto
new file mode 100644
index 0000000..4eef93b
--- /dev/null
+++ b/client/local_output_cache_data.proto
@@ -0,0 +1,17 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+syntax = "proto3";
+
+package devtools_goma;
+
+message LocalOutputCacheFile {
+  string filename = 1;
+  bytes content = 2;
+  bool is_executable = 3;
+}
+
+message LocalOutputCacheEntry {
+  repeated LocalOutputCacheFile files = 1;
+}
diff --git a/client/local_output_cache_unittest.cc b/client/local_output_cache_unittest.cc
new file mode 100644
index 0000000..bebdc0c
--- /dev/null
+++ b/client/local_output_cache_unittest.cc
@@ -0,0 +1,288 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "local_output_cache.h"
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "content.h"
+#include "file.h"
+#include "path.h"
+#include "string_piece.h"
+#include "unittest_util.h"
+
+#ifdef _WIN32
+# include "posix_helper_win.h"
+#endif
+
+namespace devtools_goma {
+
+//
+// <tmpdir>/cache -- LocalOutputCache
+//          build -- build directory
+//
+
+class LocalOutputCacheTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    tmpdir_.reset(new TmpdirUtil("localoutputcache-test"));
+    tmpdir_->MkdirForPath("build", true);
+    tmpdir_->MkdirForPath("cache", true);
+  }
+
+  void TearDown() override {
+    LocalOutputCache::Quit();
+  }
+
+  void InitLocalOutputCache() {
+    const std::int64_t max_cache_amount = 1000000;
+    const std::int64_t threshold_cache_amount = 10000000;
+    const size_t max_items = 1000;
+    const size_t threshold_items = 1000;
+    InitLocalOutputCacheWithParams(max_cache_amount,
+                                   threshold_cache_amount,
+                                   max_items,
+                                   threshold_items);
+  }
+
+  void InitLocalOutputCacheWithParams(std::int64_t max_cache_amount,
+                                      std::int64_t threshold_cache_amount,
+                                      size_t max_items,
+                                      size_t threshold_items) {
+    LocalOutputCache::Init(tmpdir_->FullPath("cache"),
+                           nullptr,
+                           max_cache_amount,
+                           threshold_cache_amount,
+                           max_items,
+                           threshold_items);
+  }
+
+  ExecReq MakeFakeExecReq() {
+    ExecReq req;
+    req.mutable_command_spec()->set_name("clang");
+    req.mutable_command_spec()->set_version("4.2.1");
+    req.mutable_command_spec()->set_target("x86_64-unknown-linux-gnu");
+    req.set_cwd(tmpdir_->FullPath("build"));
+    return req;
+  }
+
+  ExecReq MakeFakeExecReqWithArgs(const std::vector<std::string>& args) {
+    ExecReq req = MakeFakeExecReq();
+    for (const auto& arg : args) {
+      req.add_arg(arg);
+    }
+    return req;
+  }
+
+  ExecResp MakeFakeExecResp() {
+    ExecResp resp;
+    resp.mutable_result()->set_exit_status(0);
+    ExecResult_Output* output = resp.mutable_result()->add_output();
+    output->set_filename("output.o");
+    return resp;
+  }
+
+  std::string CacheFilePath(StringPiece key) {
+    return LocalOutputCache::instance()->CacheFilePath(key);
+  }
+
+  bool ShouldInvokeGarbageCollection() {
+    return LocalOutputCache::instance()->ShouldInvokeGarbageCollection();
+  }
+
+  void RunGarbageCollection(LocalOutputCache::GarbageCollectionStat* stat) {
+    LocalOutputCache::instance()->RunGarbageCollection(stat);
+  }
+
+  std::unique_ptr<TmpdirUtil> tmpdir_;
+};
+
+TEST_F(LocalOutputCacheTest, Match) {
+  InitLocalOutputCache();
+
+  const std::string trace_id = "(test-match)";
+
+  // 1. Make ExecReq and ExecResp for fake compile
+  ExecReq req = MakeFakeExecReq();
+  ExecResp resp = MakeFakeExecResp();
+
+  // 2. Try to Save output.
+  tmpdir_->CreateTmpFile("build/output.o", "(output)");
+  std::string key = LocalOutputCache::MakeCacheKey(req);
+
+  EXPECT_TRUE(LocalOutputCache::instance()->SaveOutput(
+                  key, &req, &resp, trace_id));
+
+  // 3. Clean build directory
+  tmpdir_->RemoveTmpFile("build/output.o");
+
+  // 4. Lookup
+  ExecResp looked_up_resp;
+  EXPECT_TRUE(LocalOutputCache::instance()->Lookup(key,
+                                                   &looked_up_resp,
+                                                   trace_id));
+
+  // 5. Check ExecResp content
+  EXPECT_EQ(1, looked_up_resp.result().output_size());
+  EXPECT_EQ("output.o",
+            looked_up_resp.result().output(0).filename());
+}
+
+TEST_F(LocalOutputCacheTest, NoMatch) {
+  InitLocalOutputCache();
+
+  const std::string trace_id = "(test-nomatch)";
+
+  // 1. Make ExecReq and ExecResp for fake compile
+  ExecReq req = MakeFakeExecReq();
+  ExecResp resp = MakeFakeExecResp();
+
+  // 2. Try to Save output.
+  tmpdir_->CreateTmpFile("build/output.o", "(output)");
+  std::string key = LocalOutputCache::MakeCacheKey(req);
+
+  EXPECT_TRUE(LocalOutputCache::instance()->SaveOutput(
+                  key, &req, &resp, trace_id));
+
+  // 3. Clean build directory
+  tmpdir_->RemoveTmpFile("build/output.o");
+
+  // 4. Lookup (should fail here)
+  ExecResp looked_up_resp;
+  std::string fake_key =
+      "000000000000000000000000000000000000000000000000000000000000fa6e";
+  EXPECT_FALSE(LocalOutputCache::instance()->Lookup(fake_key,
+                                                    &looked_up_resp,
+                                                    trace_id));
+}
+
+TEST_F(LocalOutputCacheTest, CollectGarbage) {
+  InitLocalOutputCacheWithParams(0, 0, 100, 100);
+
+  const std::string trace_id = "(garbage)";
+
+  // Make Item.
+  ExecReq req = MakeFakeExecReq();
+  ExecResp resp = MakeFakeExecResp();
+  tmpdir_->CreateTmpFile("build/output.o", "(output)");
+  std::string key = LocalOutputCache::instance()->MakeCacheKey(req);
+  EXPECT_TRUE(LocalOutputCache::instance()->SaveOutput(
+                  key, &req, &resp, trace_id));
+
+  // Check key exists.
+  std::string path = CacheFilePath(key);
+  EXPECT_EQ(0, access(path.c_str(), F_OK));
+
+  // The item should be removed here, since max cache amount is small enough.
+  {
+    LocalOutputCache::GarbageCollectionStat stat;
+    RunGarbageCollection(&stat);
+    EXPECT_NE(0, access(path.c_str(), F_OK));
+    EXPECT_EQ(1U, stat.num_removed);
+    EXPECT_EQ(0U, stat.num_failed);
+  }
+}
+
+TEST_F(LocalOutputCacheTest, WontCollectGarbage) {
+  InitLocalOutputCacheWithParams(1000000, 1000000, 100, 100);
+
+  const std::string trace_id = "(garbage)";
+
+  // Make Item.
+  ExecReq req = MakeFakeExecReq();
+  ExecResp resp = MakeFakeExecResp();
+  tmpdir_->CreateTmpFile("build/output.o", "(output)");
+  std::string key = LocalOutputCache::instance()->MakeCacheKey(req);
+  EXPECT_TRUE(LocalOutputCache::instance()->SaveOutput(
+                  key, &req, &resp, trace_id));
+
+  // Check key exists.
+  std::string path = CacheFilePath(key);
+  EXPECT_EQ(0, access(path.c_str(), F_OK));
+
+  // Run garbage collection. Here, anything won't be removed, since
+  // max cache amount is large enough.
+  {
+    LocalOutputCache::GarbageCollectionStat stat;
+    RunGarbageCollection(&stat);
+    EXPECT_EQ(0, access(path.c_str(), F_OK));
+    EXPECT_EQ(0U, stat.num_removed);
+    EXPECT_EQ(0U, stat.num_failed);
+  }
+}
+
+TEST_F(LocalOutputCacheTest, CollectGarbageByNumItems) {
+  // Allow max 99 items.
+  InitLocalOutputCacheWithParams(10000000, 10000000, 99, 60);
+
+  const std::string trace_id = "(garbage)";
+
+  std::vector<std::string> keys;
+  std::unordered_set<std::string> key_set;
+
+  // Make 99 items.
+  for (int i = 0; i < 99; ++i) {
+    ExecReq req = MakeFakeExecReqWithArgs(std::vector<std::string> {
+        "clang",
+        "-DFOO=" + std::to_string(i),
+    });
+
+    ExecResp resp = MakeFakeExecResp();
+    tmpdir_->CreateTmpFile("build/output.o", "(output)");
+    std::string key = LocalOutputCache::instance()->MakeCacheKey(req);
+    keys.push_back(key);
+    key_set.insert(key);
+    EXPECT_TRUE(LocalOutputCache::instance()->SaveOutput(
+                    key, &req, &resp, trace_id));
+  }
+
+  // All keys must be different.
+  EXPECT_EQ(99UL, key_set.size());
+
+  // Check key exists.
+  for (const auto& key : keys) {
+    std::string path = CacheFilePath(key);
+    EXPECT_EQ(0, access(path.c_str(), F_OK));
+  }
+
+  // GC won't run yet.
+  EXPECT_FALSE(ShouldInvokeGarbageCollection());
+
+  // Add last one.
+  {
+    ExecReq req = MakeFakeExecReqWithArgs(std::vector<std::string> {
+        "clang",
+        "-DFOO=" + std::to_string(99),
+    });
+
+    ExecResp resp = MakeFakeExecResp();
+    tmpdir_->CreateTmpFile("build/output.o", "(output)");
+    std::string key = LocalOutputCache::instance()->MakeCacheKey(req);
+    keys.push_back(key);
+    key_set.insert(key);
+    EXPECT_TRUE(LocalOutputCache::instance()->SaveOutput(
+                    key, &req, &resp, trace_id));
+  }
+
+  // All keys must be different.
+  EXPECT_EQ(100UL, key_set.size());
+
+  // GC should run now.
+  EXPECT_TRUE(ShouldInvokeGarbageCollection());
+
+  // Run garbage collection.
+  // Since threshold is 60, 40 items must be removed.
+  {
+    LocalOutputCache::GarbageCollectionStat stat;
+    RunGarbageCollection(&stat);
+    EXPECT_EQ(40U, stat.num_removed);
+    EXPECT_EQ(0U, stat.num_failed);
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/log_cleaner.cc b/client/log_cleaner.cc
new file mode 100644
index 0000000..b79c815
--- /dev/null
+++ b/client/log_cleaner.cc
@@ -0,0 +1,118 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "log_cleaner.h"
+
+#include <stdlib.h>
+
+#ifndef _WIN32
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#else
+#include "config_win.h"
+#include <stack>
+#include "filetime_win.h"
+#endif
+
+#include "file.h"
+#include "file_id.h"
+#include "file_dir.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "path.h"
+#include "scoped_fd.h"
+#include "string_piece_utils.h"
+
+namespace devtools_goma {
+
+LogCleaner::LogCleaner() {
+}
+
+LogCleaner::~LogCleaner() {
+}
+
+void LogCleaner::AddLogBasename(const string& basename) {
+  LOG(INFO) << "log basename:" << basename;
+  basenames_.push_back(basename);
+}
+
+void LogCleaner::CleanOldLogs(time_t t) {
+  const std::vector<string>& log_dirs = google::GetLoggingDirectories();
+  LOG(INFO) << "clean old logs in " << log_dirs;
+
+  std::set<string> old_logs;
+  for (const auto& dir : log_dirs) {
+    FindOldLogsInDir(dir, t, &old_logs);
+  }
+  if (old_logs.empty()) {
+    LOG(INFO) << "no old logs found.";
+    return;
+  }
+  for (const auto& old_log : old_logs) {
+    LOG(INFO) << "remove old log:" << old_log;
+    if (remove(old_log.c_str()) != 0) {
+      PLOG(WARNING) << "delete:" << old_log;
+    }
+  }
+}
+
+void LogCleaner::FindOldLogsInDir(const string& log_dir, time_t t,
+                                  std::set<string>* old_logs) {
+  VLOG(1) << "log_dir:" << log_dir;
+  std::vector<DirEntry> entries;
+  if (!ListDirectory(log_dir, &entries))
+    return;
+
+  for (const auto& entry : entries) {
+    if (entry.is_dir)
+      continue;
+    if (!IsMyLogFile(entry.name))
+      continue;
+
+    string fullname = file::JoinPath(log_dir, entry.name);
+#ifndef _WIN32
+    char real_fullname[PATH_MAX];
+    if (realpath(fullname.c_str(), real_fullname) == nullptr) {
+      VLOG(1) << "realpath:" << fullname;
+      continue;
+    }
+    string log_filename = real_fullname;
+#else
+    string log_filename = fullname;
+#endif
+
+    FileId fi(log_filename);
+    if (!fi.IsValid()) {
+      LOG(ERROR) << "Failed to get file id:" << log_filename;
+    } else if (fi.mtime < t) {
+      VLOG(1) << "old log:" << log_filename;
+      old_logs->insert(log_filename);
+    } else {
+      VLOG(1) << "new log:" << log_filename;
+    }
+  }
+}
+
+bool LogCleaner::IsMyLogFile(const string& name) const {
+  static const char *kLogLevel[] = {
+    "INFO", "WARNING", "ERROR", "FATAL"
+  };
+  for (const auto& basename : basenames_) {
+    if (strings::StartsWith(name, basename) &&
+        name.size() > basename.size() &&
+        name[basename.size()] == '.') {
+      for (const auto& level : kLogLevel) {
+        if (strstr(name.c_str(), level) != nullptr) {
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+}  // namespace devtools_goma
diff --git a/client/log_cleaner.h b/client/log_cleaner.h
new file mode 100644
index 0000000..b560405
--- /dev/null
+++ b/client/log_cleaner.h
@@ -0,0 +1,43 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_LOG_CLEANER_H_
+#define DEVTOOLS_GOMA_CLIENT_LOG_CLEANER_H_
+
+#include <ctime>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class LogCleaner {
+ public:
+  LogCleaner();
+  ~LogCleaner();
+
+  // Adds log's basename to be cleaned.
+  void AddLogBasename(const string& basename);
+
+  // Cleans log files older than t.
+  void CleanOldLogs(time_t t);
+
+ private:
+  friend class LogCleanerTest;
+  void FindOldLogsInDir(const string& log_dir, time_t t,
+                        std::set<string>* old_logs);
+  bool IsMyLogFile(const string& name) const;
+
+  std::vector<string> basenames_;
+
+  DISALLOW_COPY_AND_ASSIGN(LogCleaner);
+};
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_LOG_CLEANER_H_
diff --git a/client/log_cleaner_unittest.cc b/client/log_cleaner_unittest.cc
new file mode 100644
index 0000000..53d8f1b
--- /dev/null
+++ b/client/log_cleaner_unittest.cc
@@ -0,0 +1,51 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "log_cleaner.h"
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+class LogCleanerTest : public testing::Test {
+ public:
+  LogCleanerTest() {
+    log_cleaner_.AddLogBasename("compiler_proxy");
+    log_cleaner_.AddLogBasename("compiler_proxy-subproc");
+    log_cleaner_.AddLogBasename("gcc");
+    log_cleaner_.AddLogBasename("g++");
+  }
+
+  bool IsMyLogFile(const string& name) {
+    return log_cleaner_.IsMyLogFile(name);
+  }
+
+ private:
+  LogCleaner log_cleaner_;
+};
+
+TEST_F(LogCleanerTest, IsMyLogFile) {
+  EXPECT_TRUE(IsMyLogFile(
+      "compiler_proxy.example.com.goma.log.INFO."
+      "20111017-165526.12857"));
+  EXPECT_TRUE(IsMyLogFile(
+      "compiler_proxy.example.com.goma.log.WARNING."
+      "20111017-165526.12857"));
+  EXPECT_TRUE(IsMyLogFile(
+      "compiler_proxy.example.com.goma.log.ERROR."
+      "20111017-165526.12857"));
+  EXPECT_TRUE(IsMyLogFile(
+      "compiler_proxy-subproc.example.com.goma.log.INFO."
+      "20111017-165526.12857"));
+  EXPECT_TRUE(IsMyLogFile(
+      "gcc.example.com.goma.log.INFO."
+      "20111017-165526.12857"));
+  EXPECT_TRUE(IsMyLogFile(
+      "g++.example.com.goma.log.INFO."
+      "20111017-165526.12857"));
+  EXPECT_FALSE(IsMyLogFile("g++.log"));
+}
+
+}  // namespace devtools_goma
diff --git a/client/log_service_client.cc b/client/log_service_client.cc
new file mode 100644
index 0000000..c01ccc4
--- /dev/null
+++ b/client/log_service_client.cc
@@ -0,0 +1,331 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "log_service_client.h"
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_specific.h"
+#include "cpu.h"
+#include "glog/logging.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_log.pb.h"
+MSVC_POP_WARNING()
+#include "string_piece_utils.h"
+#include "http_rpc.h"
+#include "worker_thread_manager.h"
+
+#ifdef __MACH__
+#include "mac_version.h"
+#elif defined(__linux__)
+#include <gnu/libc-version.h>
+#endif
+
+namespace {
+
+static devtools_goma::CpuFeatures GetCpuFeatures() {
+  devtools_goma::CPU cpu;
+  devtools_goma::CpuFeatures features;
+
+  features.set_mmx(cpu.has_mmx());
+  features.set_sse(cpu.has_sse());
+  features.set_sse2(cpu.has_sse2());
+  features.set_sse3(cpu.has_sse3());
+  features.set_sse41(cpu.has_sse41());
+  features.set_sse42(cpu.has_sse42());
+  features.set_popcnt(cpu.has_popcnt());
+  features.set_avx(cpu.has_avx());
+  features.set_avx2(cpu.has_avx2());
+  features.set_aesni(cpu.has_aesni());
+  features.set_non_stop_time_stamp_counter(
+      cpu.has_non_stop_time_stamp_counter());
+
+  return features;
+}
+
+static devtools_goma::OSInfo GetOsInfo() {
+  devtools_goma::OSInfo os_info;
+
+#if defined(_WIN32)
+  // TODO: set windows version
+  os_info.mutable_win_info();
+#elif defined(__MACH__)
+  os_info.mutable_mac_info()->set_mac_osx_minor_version(
+      devtools_goma::MacOSXMinorVersion());
+#elif defined(__linux__)
+  // TODO: set linux (ubuntu) version (libc version is better?)
+  os_info.mutable_linux_info()->set_gnu_libc_version(gnu_get_libc_version());
+#endif
+
+  return os_info;
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+class LogServiceClient::SaveLogJob {
+ public:
+  explicit SaveLogJob(LogServiceClient* log_service)
+      : log_service_(log_service), cpu_features_(GetCpuFeatures()),
+        os_info_(GetOsInfo()) {
+  }
+
+  void AddExecLog(const ExecLog& exec_log) {
+    ExecLog* log = req_.add_exec_log();
+    *log = exec_log;
+
+    const HttpClient::Options& options =
+        log_service_->http_rpc_->client()->options();
+    log->set_use_ssl(options.use_ssl);
+
+    log->set_auth_type(ExecLog_AuthenticationType_NONE);
+    if (options.oauth2_config.enabled()) {
+      log->set_auth_type(ExecLog_AuthenticationType_OAUTH2_APPLICATION);
+      if (!options.gce_service_account.empty()) {
+        log->set_auth_type(
+            ExecLog_AuthenticationType_OAUTH2_GCE_SERVICE_ACCOUNT);
+      } else if (!options.service_account_json_filename.empty()) {
+        log->set_auth_type(ExecLog_AuthenticationType_OAUTH2_SERVICE_ACCOUNT);
+      }
+    } else if (options.luci_context_auth.enabled()) {
+      log->set_auth_type(ExecLog_AuthenticationType_OAUTH2_LUCI_LOCAL_AUTH);
+    } else if (!options.authorization.empty()) {
+      if (strings::StartsWith(options.authorization, "Bearer ")) {
+        log->set_auth_type(ExecLog_AuthenticationType_OAUTH2_UNSPEC);
+      } else {
+        log->set_auth_type(ExecLog_AuthenticationType_UNKNOWN);
+      }
+    }
+    *log->mutable_cpu_features() = cpu_features_;
+    *log->mutable_os_info() = os_info_;
+  }
+
+  void AddMemoryLog(const MemoryUsageLog& memory_usage_log) {
+    MemoryUsageLog* log = req_.add_memory_usage_log();
+    *log = memory_usage_log;
+  }
+
+  bool HasReachedMaxLogSize() const {
+    return num_log() >= log_service_->max_log_in_req_;
+  }
+
+  size_t num_exec_log() const {
+    return req_.exec_log_size();
+  }
+
+  size_t num_memory_usage_log() const {
+    return req_.memory_usage_log_size();
+  }
+
+  size_t num_log() const {
+    return num_exec_log() + num_memory_usage_log();
+  }
+
+  void Call() {
+    LOG(INFO) << "SaveLog"
+              << " exec_log=" << num_exec_log()
+              << " memory_usage_log=" << num_memory_usage_log()
+              << " size=" << req_.ByteSize();
+    log_service_->http_rpc_->CallWithCallback(
+        log_service_->save_log_path_, &req_, &resp_, &http_rpc_stat_,
+        NewCallback(this, &LogServiceClient::SaveLogJob::Done));
+  }
+
+  void Delete() {
+    VLOG(1) << "Delete";
+    delete this;
+  }
+
+ private:
+  ~SaveLogJob() {
+  }
+  void Done() {
+    VLOG(1) << "SaveLog Done";
+    LOG_IF(INFO, !http_rpc_stat_.response_header.empty())
+        << "SaveLog done: http response=" << http_rpc_stat_.response_header;
+    if (http_rpc_stat_.err) {
+      LOG(WARNING) << http_rpc_stat_.err_message;
+    }
+    log_service_->FinishSaveLogJob();
+    delete this;
+  }
+
+  LogServiceClient* log_service_;
+
+  SaveLogReq req_;
+  SaveLogResp resp_;
+  HttpRPC::Status http_rpc_stat_;
+
+  devtools_goma::CpuFeatures cpu_features_;
+  const devtools_goma::OSInfo os_info_;
+
+  DISALLOW_COPY_AND_ASSIGN(SaveLogJob);
+};
+
+LogServiceClient::LogServiceClient(
+    HttpRPC* http_rpc,
+    const string& save_log_path,
+    size_t max_log_in_req,
+    int max_pending_ms,
+    WorkerThreadManager* wm)
+    : wm_(wm),
+      http_rpc_(http_rpc),
+      save_log_path_(save_log_path),
+      max_log_in_req_(max_log_in_req),
+      max_pending_ms_(max_pending_ms),
+      periodic_callback_id_(kInvalidPeriodicClosureId),
+      cond_(&mu_),
+      save_log_job_(nullptr),
+      num_save_log_job_(0),
+      last_timestamp_ms_(0) {
+  CHECK_GT(max_log_in_req_, 0U);
+  timer_.Start();
+  last_timestamp_ms_ = timer_.GetInMilliSeconds();
+}
+
+LogServiceClient::~LogServiceClient() {
+  CHECK_EQ(periodic_callback_id_, kInvalidPeriodicClosureId);
+  CHECK(save_log_job_ == nullptr);
+  CHECK_EQ(0, num_save_log_job_);
+}
+
+struct ExecLogSaveFunc {
+  explicit ExecLogSaveFunc(const ExecLog& exec_log) : log(exec_log) {}
+  void operator()(LogServiceClient::SaveLogJob* job) const {
+    job->AddExecLog(log);
+  }
+  const ExecLog& log;
+};
+
+struct MemoryUsageLogSaveFunc {
+  explicit MemoryUsageLogSaveFunc(const MemoryUsageLog& memory_usage_log) :
+      log(memory_usage_log) {}
+  void operator()(LogServiceClient::SaveLogJob* job) const {
+    job->AddMemoryLog(log);
+  }
+  const MemoryUsageLog& log;
+};
+
+void LogServiceClient::SaveExecLog(const ExecLog& exec_log) {
+  VLOG(2) << "SaveExecLog";
+  SaveLogImpl(ExecLogSaveFunc(exec_log));
+}
+
+void LogServiceClient::SaveMemoryUsageLog(const MemoryUsageLog& mem_usage_log) {
+  VLOG(2) << "SaveMemoryUsageLog";
+  SaveLogImpl(MemoryUsageLogSaveFunc(mem_usage_log));
+}
+
+template<typename SaveLogFunc>
+void LogServiceClient::SaveLogImpl(const SaveLogFunc& func) {
+  SaveLogJob* job = nullptr;
+  {
+    AUTOLOCK(lock, &mu_);
+    last_timestamp_ms_ = timer_.GetInMilliSeconds();
+    if (!http_rpc_->client()->shutting_down() &&
+        periodic_callback_id_ == kInvalidPeriodicClosureId) {
+      periodic_callback_id_ = wm_->RegisterPeriodicClosure(
+          FROM_HERE,
+          std::min(max_pending_ms_ / 10, 1000),
+          NewPermanentCallback(this, &LogServiceClient::CheckPending));
+    }
+    if (save_log_job_ == nullptr)
+      save_log_job_ = new SaveLogJob(this);
+
+    func(save_log_job_);
+
+    if (http_rpc_->client()->shutting_down()
+        || save_log_job_->HasReachedMaxLogSize()) {
+      job = save_log_job_;
+      save_log_job_ = nullptr;
+    }
+    if (job != nullptr) {
+      ++num_save_log_job_;
+    }
+  }
+  if (job != nullptr)
+    job->Call();
+}
+
+void LogServiceClient::Flush() {
+  VLOG(1) << "Flush";
+  SaveLogJob* job = nullptr;
+  {
+    AUTOLOCK(lock, &mu_);
+    last_timestamp_ms_ = timer_.GetInMilliSeconds();
+    if (save_log_job_ == nullptr)
+      return;
+    if (save_log_job_->num_log() == 0) {
+      save_log_job_->Delete();
+      save_log_job_ = nullptr;
+      return;
+    }
+    job = save_log_job_;
+    save_log_job_ = nullptr;
+    if (job != nullptr) {
+      ++num_save_log_job_;
+    }
+  }
+  if (job != nullptr) {
+    wm_->RunClosure(
+        FROM_HERE,
+        NewCallback(job, &LogServiceClient::SaveLogJob::Call),
+        WorkerThreadManager::PRIORITY_MED);
+  }
+}
+
+void LogServiceClient::Wait() {
+  LOG(INFO) << "Wait";
+  AUTOLOCK(lock, &mu_);
+  DCHECK(http_rpc_->client()->shutting_down());
+  if (periodic_callback_id_ != kInvalidPeriodicClosureId) {
+    wm_->UnregisterPeriodicClosure(periodic_callback_id_);
+    periodic_callback_id_ = kInvalidPeriodicClosureId;
+  }
+  if (save_log_job_ != nullptr) {
+    save_log_job_->Delete();
+    save_log_job_ = nullptr;
+  }
+  while (save_log_job_ != nullptr || num_save_log_job_ > 0) {
+    LOG(INFO) << "num_save_log_job=" << num_save_log_job_;
+    cond_.Wait();
+  }
+}
+
+void LogServiceClient::CheckPending() {
+  VLOG(1) << "CheckPending";
+  SaveLogJob* job = nullptr;
+  {
+    AUTOLOCK(lock, &mu_);
+    if (save_log_job_ == nullptr)
+      return;
+    if (save_log_job_->num_log() == 0)
+      return;
+    if (timer_.GetInMilliSeconds() < last_timestamp_ms_ + max_pending_ms_)
+      return;
+    job = save_log_job_;
+    save_log_job_ = nullptr;
+    if (job != nullptr) {
+      ++num_save_log_job_;
+    }
+  }
+  if (job != nullptr) {
+    wm_->RunClosure(
+        FROM_HERE,
+        NewCallback(job, &LogServiceClient::SaveLogJob::Call),
+        WorkerThreadManager::PRIORITY_MED);
+  }
+}
+
+void LogServiceClient::FinishSaveLogJob() {
+  AUTOLOCK(lock, &mu_);
+  --num_save_log_job_;
+  CHECK_GE(num_save_log_job_, 0);
+  if (num_save_log_job_ == 0)
+    cond_.Signal();
+}
+
+}  // namespace devtools_goma
diff --git a/client/log_service_client.h b/client/log_service_client.h
new file mode 100644
index 0000000..170361b
--- /dev/null
+++ b/client/log_service_client.h
@@ -0,0 +1,84 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_LOG_SERVICE_CLIENT_H_
+#define DEVTOOLS_GOMA_CLIENT_LOG_SERVICE_CLIENT_H_
+
+#include <string>
+
+#include "basictypes.h"
+#include "lockhelper.h"
+#include "simple_timer.h"
+#include "worker_thread_manager.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class ExecLog;
+class HttpRPC;
+class MemoryUsageLog;
+class PermanentClosure;
+class WorkerThreadManager;
+
+class LogServiceClient {
+ public:
+  LogServiceClient(HttpRPC* http_rpc,
+                   const string& save_log_path,
+                   size_t max_log_in_req,
+                   int max_pending_ms,
+                   WorkerThreadManager* wm);
+  ~LogServiceClient();
+
+  // Saves exec_log in goma backends.
+  // Should be called on a WorkerThread.
+  void SaveExecLog(const ExecLog& exec_log);
+
+  void SaveMemoryUsageLog(const MemoryUsageLog& memory_usage_log);
+
+  // Flushes pending logs.
+  // Could be called on main thread (non WorkerThread).
+  void Flush();
+
+  // Waits for all active requests.
+  // Could be called on main thread (non WorkerThread).
+  void Wait();
+
+ private:
+  class SaveLogJob;
+  friend class SaveLogJob;
+  friend struct ExecLogSaveFunc;
+  friend struct MemoryUsageLogSaveFunc;
+  void CheckPending();
+  void FinishSaveLogJob();
+  template<typename SaveLogFunc>
+  void SaveLogImpl(const SaveLogFunc& func);
+
+  WorkerThreadManager* wm_;
+  HttpRPC* http_rpc_;
+  const string save_log_path_;
+  const size_t max_log_in_req_;
+  const int max_pending_ms_;
+
+  PeriodicClosureId periodic_callback_id_;
+
+  // mu_ protects save_log_job_, num_save_log_job_ and last_timestamp_ms_.
+  Lock mu_;
+  // Condition to check num_save_log_job_ becomes 0.
+  ConditionVariable cond_;
+  // Current SaveLogJob accumulating logs.
+  SaveLogJob* save_log_job_;
+  // Number of SaveLogJobs sending to the server.
+  int num_save_log_job_;
+  SimpleTimer timer_;
+  // Time when Save*Log was called.
+  long long last_timestamp_ms_;
+
+  DISALLOW_COPY_AND_ASSIGN(LogServiceClient);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_LOG_SERVICE_CLIENT_H_
diff --git a/client/luci_context.cc b/client/luci_context.cc
new file mode 100644
index 0000000..24f34f8
--- /dev/null
+++ b/client/luci_context.cc
@@ -0,0 +1,181 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "luci_context.h"
+
+#include <string>
+
+#include "glog/logging.h"
+#include "json/json.h"
+#include "json_util.h"
+
+namespace devtools_goma {
+
+namespace {
+
+static bool ParseLocalAuth(const Json::Value& local_auth,
+                           LuciContextAuth* luci_context_auth) {
+  static const char kRpcPort[] = "rpc_port";
+  static const char kSecret[] = "secret";
+  static const char kAccounts[] = "accounts";
+  static const char kDefaultAccountId[] = "default_account_id";
+  static const char kId[] = "id";
+
+  luci_context_auth->clear();
+
+  if (!local_auth.isObject()) {
+    LOG(WARNING) << "local_auth is not object";
+    return false;
+  }
+
+  std::string err;
+  if (!GetIntFromJson(local_auth, kRpcPort,
+                      &luci_context_auth->rpc_port, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+
+  if (!GetStringFromJson(local_auth, kSecret,
+                         &luci_context_auth->secret, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+
+  if (local_auth.isMember(kAccounts)) {
+    const auto& accounts = local_auth[kAccounts];
+    if (!accounts.isArray()) {
+      LOG(WARNING) << "local_auth['accounts'] is not a list";
+      return false;
+    }
+    for (const auto& account : accounts) {
+      if (!account.isObject()) {
+        LOG(WARNING) << "not an object in local_auth['accounts']";
+        return false;
+      }
+      LuciContextAuthAccount luci_account;
+      if (!GetStringFromJson(account, kId, &luci_account.id, &err)) {
+        LOG(WARNING) << "error when reading account:" << err;
+        return false;
+      }
+      luci_context_auth->accounts.push_back(luci_account);
+    }
+  }
+
+  // Note: it can be missing or be null. In this case, LUCI authentication
+  // should not be used by default. It is still valid LuciContextAuth object
+  // though.
+  if (local_auth.isMember(kDefaultAccountId) &&
+      !local_auth[kDefaultAccountId].isNull()) {
+    if (!GetStringFromJson(local_auth, kDefaultAccountId,
+                           &luci_context_auth->default_account_id, &err)) {
+      LOG(WARNING) << err;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+}  // namespace
+
+bool ParseLuciContext(
+    const std::string& json_body, LuciContext* luci_context) {
+  DCHECK(luci_context);
+  static const char kLocalAuth[] = "local_auth";
+
+  Json::Reader reader;
+  Json::Value root;
+  if (!reader.parse(json_body, root, false)) {
+    LOG(WARNING) << "invalid json";
+    return false;
+  }
+
+  if (root.isMember(kLocalAuth)) {
+    if (!ParseLocalAuth(root[kLocalAuth], &luci_context->local_auth)) {
+      return false;
+    }
+  } else {
+    LOG(INFO) << "missing " << kLocalAuth;
+  }
+
+  // TODO: implement swarming?
+  // https://github.com/luci/luci-py/blob/master/client/LUCI_CONTEXT.md
+
+  return true;
+}
+
+std::string LuciOAuthTokenRequest::ToString() const {
+  static const char kScopes[] = "scopes";
+  static const char kSecret[] = "secret";
+  static const char kAccountId[] = "account_id";
+
+  if (scopes.empty() || secret.empty()) {
+    LOG(WARNING) << "trying to make string from invalid LuciOAuthTokenRequest";
+    return std::string();
+  }
+
+  Json::Value root;
+  for (const auto& scope : scopes) {
+    root[kScopes].append(Json::Value(scope));
+  }
+  root[kSecret] = secret;
+
+  // 'account_id' can be empty if using old protocol that doesn't allow
+  // specifying accounts. See LuciContextAuth::enabled().
+  if (!account_id.empty()) {
+    root[kAccountId] = account_id;
+  }
+
+  Json::FastWriter writer;
+  return writer.write(root);
+}
+
+bool ParseLuciOAuthTokenResponse(
+    const std::string& json_body, LuciOAuthTokenResponse* resp) {
+  DCHECK(resp);
+  static const char kErrorCode[] = "error_code";
+  static const char kErrorMessage[] = "error_message";
+  static const char kAccessToken[] = "access_token";
+  static const char kExpiry[] = "expiry";
+
+  Json::Reader reader;
+  Json::Value root;
+  if (!reader.parse(json_body, root, false)) {
+    LOG(WARNING) << "invalid json";
+    return false;
+  }
+
+  Json::Value default_error_code(0);
+  const Json::Value& error_code = root.get(kErrorCode, default_error_code);
+  if (!error_code.isInt()) {
+    LOG(WARNING) << kErrorCode << " is not int";
+    return false;
+  }
+
+  resp->error_code = error_code.asInt();
+  if (resp->error_code != 0) {
+    std::string err;
+    if (!GetStringFromJson(root, kErrorMessage, &resp->error_message, &err)) {
+      LOG(WARNING) << err
+                   << " error_code=" << resp->error_code;
+      return false;
+    }
+    return true;
+  }
+
+  std::string err;
+  if (!GetStringFromJson(root, kAccessToken, &resp->access_token, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+  if (!GetInt64FromJson(root, kExpiry, &resp->expiry, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/luci_context.h b/client/luci_context.h
new file mode 100644
index 0000000..5f8d97d
--- /dev/null
+++ b/client/luci_context.h
@@ -0,0 +1,103 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// https://github.com/luci/luci-py/blob/master/client/LUCI_CONTEXT.md
+
+#ifndef DEVTOOLS_GOMA_CLIENT_LUCI_CONTEXT_H_
+#define DEVTOOLS_GOMA_CLIENT_LUCI_CONTEXT_H_
+
+#include <string>
+#include <vector>
+
+namespace devtools_goma {
+
+struct LuciContextAuthAccount {
+  // Logical identifier of the account (e.g "task" or "system").
+  std::string id;
+};
+
+struct LuciContextAuth {
+  // RPC port of LuciLocalAuthService
+  int rpc_port;
+  // secret used for OAuthTokenRequest.
+  std::string secret;
+  // list of accounts available through LUCI context.
+  std::vector<LuciContextAuthAccount> accounts;
+  // an account to use by default, see enabled().
+  std::string default_account_id;
+
+  LuciContextAuth() : rpc_port(-1) {}
+
+  // Returns true if LUCI local auth should be used by default in this process.
+  bool enabled() const {
+    // There two flavors of the protocol:
+    //  1. One doesn't use 'accounts' or 'default_account_id', and has local
+    //     auth always enabled. This is deprecated.
+    //  2. Another always uses 'accounts', and has local auth enabled only if
+    //     'default_account_id' is set.
+    return rpc_port > 0 && !secret.empty()
+           && (accounts.empty() || !default_account_id.empty());
+  }
+
+  void clear() {
+    rpc_port = -1;
+    secret.clear();
+    accounts.clear();
+    default_account_id.clear();
+  }
+};
+
+struct LuciContext {
+  LuciContextAuth local_auth;
+  // There may be more stuff here in the future.
+
+  void clear() {
+    local_auth.clear();
+  }
+};
+
+struct LuciOAuthTokenRequest {
+  std::vector<std::string> scopes;
+  std::string secret;
+  std::string account_id;
+
+  std::string ToString() const;
+};
+
+struct LuciOAuthTokenResponse {
+  // an error code (or 0 if success)
+  int error_code;
+  // optional error message
+  std::string error_message;
+
+  // the actual access token
+  std::string access_token;
+  // its expiration time, as unix timestamp
+  int64_t expiry;
+
+  LuciOAuthTokenResponse() : error_code(-1), expiry(-1) {}
+
+  void clear() {
+    error_code = -1;
+    error_message.clear();
+    access_token.clear();
+    expiry = -1;
+  }
+};
+
+// Parse LUCI_CONTEXT file contents.
+// Returns false on invalid JSON.
+// Or, return false if some required fields in LuciContextAuth are missing.
+//
+// Note that this function returns true even if local_auth is missing in
+// JSON, please use valid() method before using what is in local_auth.
+bool ParseLuciContext(
+    const std::string& json_body, LuciContext* luci_context);
+
+bool ParseLuciOAuthTokenResponse(
+    const std::string& json_body, LuciOAuthTokenResponse* resp);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_LUCI_CONTEXT_H_
diff --git a/client/luci_context_unittest.cc b/client/luci_context_unittest.cc
new file mode 100644
index 0000000..b44efd8
--- /dev/null
+++ b/client/luci_context_unittest.cc
@@ -0,0 +1,118 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "luci_context.h"
+
+#include <gtest/gtest.h>
+
+#include "json/json.h"
+
+namespace devtools_goma {
+
+TEST(LuciContextTest, ParseLuciContextAuthSuccess) {
+  static const char kLuciContext[] =
+      "{\"local_auth\":{\"rpc_port\":54140,"
+      "\"secret\":\"this_is_secret_string\","
+      "\"accounts\":[{\"id\":\"acc_a\"},{\"id\":\"acc_b\"}],"
+      "\"default_account_id\":\"acc_a\"}}";
+
+  LuciContext luci_context;
+  EXPECT_TRUE(ParseLuciContext(kLuciContext, &luci_context));
+  EXPECT_TRUE(luci_context.local_auth.enabled());
+  EXPECT_EQ(54140, luci_context.local_auth.rpc_port);
+  EXPECT_EQ("this_is_secret_string", luci_context.local_auth.secret);
+  EXPECT_EQ(2, luci_context.local_auth.accounts.size());
+  EXPECT_EQ("acc_a", luci_context.local_auth.accounts[0].id);
+  EXPECT_EQ("acc_b", luci_context.local_auth.accounts[1].id);
+  EXPECT_EQ("acc_a", luci_context.local_auth.default_account_id);
+}
+
+TEST(LuciContextTest, ParseLuciContextAuthOldProtocol) {
+  static const char kLuciContext[] =
+      "{\"local_auth\":{\"rpc_port\":54140,"
+      "\"secret\":\"this_is_secret_string\"}}";
+
+  LuciContext luci_context;
+  EXPECT_TRUE(ParseLuciContext(kLuciContext, &luci_context));
+  EXPECT_TRUE(luci_context.local_auth.enabled());
+}
+
+TEST(LuciContextTest, ParseLuciContextAuthDisabled) {
+  static const char kLuciContext[] =
+      "{\"local_auth\":{\"rpc_port\":54140,"
+      "\"secret\":\"this_is_secret_string\","
+      "\"accounts\":[{\"id\":\"acc_a\"},{\"id\":\"acc_b\"}]}}";
+
+  LuciContext luci_context;
+  EXPECT_TRUE(ParseLuciContext(kLuciContext, &luci_context));
+  EXPECT_FALSE(luci_context.local_auth.enabled());
+}
+
+TEST(LuciContextTest, ParseLuciContextAuthDisabledNull) {
+  static const char kLuciContext[] =
+      "{\"local_auth\":{\"rpc_port\":54140,"
+      "\"secret\":\"this_is_secret_string\","
+      "\"accounts\":[{\"id\":\"acc_a\"},{\"id\":\"acc_b\"}],"
+      "\"default_account_id\":null}}";
+
+  LuciContext luci_context;
+  EXPECT_TRUE(ParseLuciContext(kLuciContext, &luci_context));
+  EXPECT_FALSE(luci_context.local_auth.enabled());
+}
+
+TEST(LuciContextTest, ParseLuciContextAuthBadAccounts) {
+  static const char kLuciContext[] =
+      "{\"local_auth\":{\"rpc_port\":54140,"
+      "\"secret\":\"this_is_secret_string\","
+      "\"accounts\":[\"not an object\"],"
+      "\"default_account_id\":\"acc_a\"}}";
+
+  LuciContext luci_context;
+  EXPECT_FALSE(ParseLuciContext(kLuciContext, &luci_context));
+}
+
+TEST(LuciContextTest, LuciOAuthTokenRequestToString) {
+  LuciOAuthTokenRequest req;
+  req.scopes.push_back("https://www.googleapis.com/auth/userinfo.email");
+  req.scopes.push_back("https://www.googleapis.com/auth/plus.me");
+  req.secret = "this_is_secret";
+  req.account_id = "account_id";
+  std::string request = req.ToString();
+
+  Json::Reader reader;
+  Json::Value root;
+  EXPECT_TRUE(reader.parse(request, root, false));
+  EXPECT_TRUE(root["scopes"].isArray());
+  EXPECT_EQ(2U, root["scopes"].size());
+  EXPECT_EQ("https://www.googleapis.com/auth/userinfo.email",
+            root["scopes"][0].asString());
+  EXPECT_EQ("https://www.googleapis.com/auth/plus.me",
+            root["scopes"][1].asString());
+  EXPECT_EQ("this_is_secret", root["secret"].asString());
+  EXPECT_EQ("account_id", root["account_id"].asString());
+}
+
+TEST(LuciContextTest, ParseLuciOAuthTokenResponse) {
+  static const char kResponse[] =
+      "{\"access_token\":\"ya29.token\",\"expiry\":1487915944}";
+
+  LuciOAuthTokenResponse resp;
+  EXPECT_TRUE(ParseLuciOAuthTokenResponse(kResponse, &resp));
+  EXPECT_EQ(0, resp.error_code);
+  EXPECT_EQ("ya29.token", resp.access_token);
+  EXPECT_EQ(1487915944, resp.expiry);
+}
+
+TEST(LuciContextTest, ParseLuciOAuthTokenResponseErrorCase) {
+  static const char kResponse[] =
+      "{\"error_code\": 123, \"error_message\": \"omg, error\"}";
+
+  LuciOAuthTokenResponse resp;
+  EXPECT_TRUE(ParseLuciOAuthTokenResponse(kResponse, &resp));
+  EXPECT_EQ(123, resp.error_code);
+  EXPECT_EQ("omg, error", resp.error_message);
+  EXPECT_EQ("", resp.access_token);
+}
+
+}  // namespace devtools_goma
diff --git a/client/mach_o_parser.cc b/client/mach_o_parser.cc
new file mode 100644
index 0000000..dc9fe3d
--- /dev/null
+++ b/client/mach_o_parser.cc
@@ -0,0 +1,249 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "mach_o_parser.h"
+
+#include <sys/mman.h>
+
+// expect followings would be executed in Mac OS X, or provide headers.
+#include <mach-o/fat.h>
+#include <mach-o/loader.h>
+
+#include <glog/logging.h>
+
+#include "scoped_fd.h"
+
+namespace {
+
+static void SwapFatArchByteOrder(fat_arch* arch) {
+  arch->cputype = OSSwapInt32(arch->cputype);
+  arch->cpusubtype = OSSwapInt32(arch->cpusubtype);
+  arch->offset = OSSwapInt32(arch->offset);
+  arch->size = OSSwapInt32(arch->size);
+  arch->align = OSSwapInt32(arch->align);
+}
+
+static const string GetArchName(cpu_type_t type, cpu_subtype_t subtype) {
+  if (type == CPU_TYPE_I386 && subtype == CPU_SUBTYPE_I386_ALL) {
+    return "i386";
+  } else if (type == CPU_TYPE_X86_64 && subtype == CPU_SUBTYPE_X86_64_ALL) {
+    return "x86_64";
+  } else if (type == CPU_TYPE_POWERPC) {
+    return "powerpc";
+  } else {
+    LOG(ERROR) << "unknown CPU type or subtype found:"
+                 << " cpu_type=" << type
+                 << " cpu_subtype=" << subtype;
+    return "";
+  }
+}
+
+bool GetFatArchs(
+    const devtools_goma::ScopedFd& fd,
+    std::vector<fat_arch>* archs,
+    string* raw) {
+  DCHECK(archs);
+  // Parse fat header.
+  if (fd.Seek(0, devtools_goma::ScopedFd::SeekAbsolute)
+      == static_cast<off_t>(-1)) {
+    PLOG(WARNING) << "seek 0: fd=" << fd;
+    return false;
+  }
+
+  fat_header header;
+  if (fd.Read(&header, sizeof(header)) != sizeof(header)) {
+    PLOG(WARNING) << "read fat header:" << fd;
+    return false;
+  }
+  if (raw != nullptr)
+    raw->assign(reinterpret_cast<char*>(&header), sizeof(header));
+  bool reversed;
+  if (header.magic == FAT_MAGIC) {
+    reversed = false;
+  } else if (header.magic == FAT_CIGAM) {
+    reversed = true;
+  } else {
+    // Since we may ask GetFatArch to read the file, it won't be error.
+    VLOG(1) << "not a FAT file magic: "
+            << " fd=" << fd
+            << " magic=" << std::hex << header.magic;
+    return false;
+  }
+
+  // Parse fat arch.
+  if (reversed) {
+    header.nfat_arch = OSSwapInt32(header.nfat_arch);
+  }
+  for (uint32_t i = 0; i < header.nfat_arch; ++i) {
+    fat_arch arch;
+    if (fd.Read(&arch, sizeof(arch)) != sizeof(arch)) {
+      PLOG(WARNING) << "read fat arch:"
+                    << " entry_id=" << i
+                    << " fd=" << fd;
+      return false;
+    }
+    if (raw != nullptr)
+      raw->append(reinterpret_cast<char*>(&arch), sizeof(arch));
+    if (reversed) {
+      SwapFatArchByteOrder(&arch);
+    }
+    archs->push_back(arch);
+  }
+  return true;
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+bool GetFatHeader(const ScopedFd& fd, MacFatHeader* fheader) {
+  std::vector<fat_arch> archs;
+  if (!GetFatArchs(fd, &archs, &fheader->raw))
+    return false;
+
+  for (std::vector<fat_arch>::iterator it = archs.begin();
+       it != archs.end(); ++it) {
+    MacFatArch arch;
+    arch.arch_name = GetArchName(it->cputype, it->cpusubtype);
+    arch.offset = it->offset;
+    arch.size = it->size;
+    fheader->archs.push_back(arch);
+    VLOG(1) << "fat:"
+            << " arch=" << arch.arch_name
+            << " offset=" << arch.offset
+            << " size=" << arch.size;
+  }
+
+  return true;
+}
+
+MachO::MachO(const string& filename)
+    : filename_(filename) {
+  fd_.reset(ScopedFd::OpenForRead(filename));
+  // TODO: support non-fat mach object if needed.
+  std::vector<fat_arch> archs;
+  if (!GetFatArchs(fd_, &archs, nullptr)) {
+    LOG(WARNING) << "Cannot read FAT header:"
+                 << " filename=" << filename
+                 << " fd=" << fd_;
+  }
+  for (std::vector<fat_arch>::iterator it = archs.begin();
+       it != archs.end(); ++it) {
+    archs_.insert(make_pair(
+         GetArchName(it->cputype, it->cpusubtype),
+         *it));
+  }
+}
+
+MachO::~MachO() {
+}
+
+bool MachO::GetDylibs(const string& cpu_type, std::vector<DylibEntry>* dylibs) {
+  std::map<string, fat_arch>::const_iterator found = archs_.find(cpu_type);
+  if (found == archs_.end()) {
+    LOG(WARNING) << "unknown cpu type: " << cpu_type;
+    return false;
+  }
+
+  const size_t offset = found->second.offset;
+  const size_t len = found->second.size;
+  VLOG(1) << "mmap "
+          << " len=" << len
+          << " offset=" << offset;
+  char* mmapped = reinterpret_cast<char*>(
+      mmap(nullptr, len, PROT_READ, MAP_PRIVATE, fd_.fd(), offset));
+  if (mmapped == MAP_FAILED) {
+    LOG(ERROR) << "mmap failed:"
+               << " filename=" << filename_
+               << " fd=" << fd_.fd()
+               << " len=" << len
+               << " offset=" << offset;
+    return false;
+  }
+  const mach_header* header = reinterpret_cast<const mach_header*>(mmapped);
+  load_command* command;
+  if (header->magic == MH_MAGIC) {
+    command = reinterpret_cast<load_command*>(mmapped + sizeof(mach_header));
+  } else if (header->magic == MH_MAGIC_64) {
+    command = reinterpret_cast<load_command*>(mmapped + sizeof(mach_header_64));
+  } else {
+    // We might not see the different endian mach object.
+    if (header->magic == MH_CIGAM || header->magic == MH_CIGAM_64) {
+      LOG(WARNING) << "Mach object with non-supported endian.";
+    }
+    LOG(WARNING) << "strange magic: "
+                 << " filename=" << filename_
+                 << " magic=" << std::hex << header->magic;
+    munmap(mmapped, len);
+    return false;
+  }
+  VLOG(1) << "mach header info:"
+          << " magic=" << header->magic
+          << " cputype=" << header->cputype
+          << " cpusubtype=" << header->cpusubtype
+          << " filetype=" << header->filetype
+          << " ncmds=" << header->ncmds
+          << " sizeofcmds=" << header->sizeofcmds
+          << " flags=" << header->flags;
+  CHECK_EQ(header->cputype, found->second.cputype);
+  CHECK_EQ(header->cpusubtype, found->second.cpusubtype);
+
+  for (uint32_t i = 0; i < header->ncmds; ++i) {
+    // Since we do not support different endian, we do not convert |command|.
+    // If we support different endian, we should also convert data structures
+    // used in this loop.
+    VLOG(2) << "cmd:"
+            << " type=" << std::hex << command->cmd
+            << " size=" << command->cmdsize;
+    switch (command->cmd) {
+      case LC_IDFVMLIB:
+        FALLTHROUGH_INTENDED;
+      case LC_LOADFVMLIB:
+        LOG(ERROR) << "Sorry, FVMLIB support is not implemented yet.";
+        break;
+      case LC_LOAD_DYLIB:
+        FALLTHROUGH_INTENDED;
+      case LC_LOAD_WEAK_DYLIB:
+        FALLTHROUGH_INTENDED;
+      case LC_REEXPORT_DYLIB:
+        {
+          dylib_command* dycom = reinterpret_cast<dylib_command*>(command);
+          if (dycom->dylib.name.offset < command->cmdsize) {
+            DylibEntry entry;
+            entry.name = string(reinterpret_cast<char*>(command) +
+                dycom->dylib.name.offset);
+            entry.timestamp = dycom->dylib.timestamp;
+            entry.current_version = dycom->dylib.current_version;
+            entry.compatibility_version = dycom->dylib.compatibility_version;
+            dylibs->push_back(entry);
+          } else {
+            LOG(WARNING) << "dylib command broken:"
+                         << " cmd=" << command->cmd
+                         << " cmdsize=" << command->cmdsize
+                         << " dylib.name.offset=" << dycom->dylib.name.offset;
+          }
+        }
+        break;
+      default:
+        VLOG(2) << "command is skipped:"
+                << " type=" << std::hex << command->cmd
+                << " size=" << command->cmdsize;
+        break;
+    }
+    command = reinterpret_cast<load_command*>(
+        reinterpret_cast<char*>(command) + command->cmdsize);
+    CHECK_GT(reinterpret_cast<char*>(command), mmapped);
+    CHECK_LT(reinterpret_cast<char*>(command), mmapped + len);
+  }
+
+  munmap(mmapped, len);
+  return true;
+}
+
+bool MachO::valid() const {
+  return fd_.valid();
+}
+
+}  // namespace devtools_goma
diff --git a/client/mach_o_parser.h b/client/mach_o_parser.h
new file mode 100644
index 0000000..7f8668a
--- /dev/null
+++ b/client/mach_o_parser.h
@@ -0,0 +1,62 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_MACH_O_PARSER_H_
+#define DEVTOOLS_GOMA_CLIENT_MACH_O_PARSER_H_
+
+#include <sys/types.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "scoped_fd.h"
+
+using std::string;
+struct fat_arch;
+
+namespace devtools_goma {
+
+struct MacFatArch {
+  string arch_name;
+  off_t offset;
+  size_t size;
+};
+
+struct MacFatHeader {
+  string raw;
+  std::vector<MacFatArch> archs;
+};
+
+// Gets Fat header from the file.
+// Returns true if it is FAT file and succeeded to get the header.
+// Otherwise, false.
+bool GetFatHeader(const ScopedFd& fd, MacFatHeader* fheader);
+
+class MachO {
+ public:
+  struct DylibEntry {
+    string name;
+    uint32_t timestamp;
+    uint32_t current_version;
+    uint32_t compatibility_version;
+  };
+  explicit MachO(const string& filename);
+  ~MachO();
+  bool GetDylibs(const string& cpu_type, std::vector<DylibEntry>* dylibs);
+  bool valid() const;
+
+ private:
+  std::map<string, fat_arch> archs_;
+  string filename_;
+  ScopedFd fd_;
+
+  DISALLOW_COPY_AND_ASSIGN(MachO);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_MACH_O_PARSER_H_
diff --git a/client/machine_info.cc b/client/machine_info.cc
new file mode 100644
index 0000000..f3b984e
--- /dev/null
+++ b/client/machine_info.cc
@@ -0,0 +1,188 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "machine_info.h"
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "basictypes.h"
+#include "glog/logging.h"
+#include "scoped_fd.h"
+#include "util.h"
+
+#if defined(_WIN32)
+#include <psapi.h>
+#include <windows.h>
+#endif
+
+#if defined(__linux__)
+#include <unistd.h>
+#endif
+
+#if defined(__MACH__)
+#include <libproc.h>
+#include <sys/sysctl.h>
+#include <sys/proc_info.h>
+#endif
+
+namespace devtools_goma {
+
+#if defined(_WIN32)
+
+int GetNumCPUs() {
+  SYSTEM_INFO sysinfo;
+  GetSystemInfo(&sysinfo);
+  return sysinfo.dwNumberOfProcessors;
+}
+
+int64_t GetSystemTotalMemory() {
+  MEMORYSTATUSEX status;
+  status.dwLength = sizeof(status);
+  if (!GlobalMemoryStatusEx(&status)) {
+    fprintf(stderr, "GlobalMemoryStatusEx failed: %d\n", GetLastError());
+    return 0;
+  }
+
+  return status.ullTotalPhys;
+}
+
+int64_t GetConsumingMemoryOfCurrentProcess() {
+  DWORD process_id = GetCurrentProcessId();
+
+  ScopedFd process(OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, process_id));
+  if (!process.valid()) {
+    PLOG(ERROR) << "OpenProcess failed";
+    return 0;
+  }
+
+  PROCESS_MEMORY_COUNTERS pmc;
+  if (!GetProcessMemoryInfo(process.handle(), &pmc, sizeof(pmc))) {
+    PLOG(ERROR) << "GetProcessMemoryInfo failed";
+    return 0;
+  }
+
+  return pmc.WorkingSetSize;
+}
+
+#elif defined(__linux__)
+
+int GetNumCPUs() {
+  int cpus = sysconf(_SC_NPROCESSORS_ONLN);
+  if (cpus < 0) {
+    PLOG(ERROR) << "sysconf(_SC_NPROCESSORS_ONLN) failed";
+    return 0;
+  }
+
+  return cpus;
+}
+
+int64_t GetSystemTotalMemory() {
+  const int64_t page_size = sysconf(_SC_PAGESIZE);
+  if (page_size < 0) {
+    PLOG(ERROR) << "sysconf(_SC_PAGESIZE) failed";
+    return 0;
+  }
+
+  const int64_t num_pages = sysconf(_SC_PHYS_PAGES);
+  if (num_pages < 0) {
+    PLOG(ERROR) << "sysconf(_SC_PHYS_PAGES) failed";
+    return 0;
+  }
+
+  return page_size * num_pages;
+}
+
+int64_t GetConsumingMemoryOfCurrentProcess() {
+  // Reads /proc/self/statm
+  // The second column is the number of pages for resident.
+
+  const int64_t page_size = sysconf(_SC_PAGESIZE);
+  if (page_size < 0) {
+    PLOG(ERROR) << "sysconf(_SC_PAGESIZE) failed";
+    return 0;
+  }
+
+  ScopedFd fd(ScopedFd::OpenForRead("/proc/self/statm"));
+  if (!fd.valid()) {
+    PLOG(ERROR) << "Opening /proc/self/statm failed";
+    return 0;
+  }
+
+  char buf[1024];
+  if (fd.Read(buf, 1024) < 0) {
+    PLOG(ERROR) << "Reading /proc/self/statm failed";
+    return 0;
+  }
+
+  int num_pages;
+  if (sscanf(buf, "%*d %d", &num_pages) != 1) {
+    LOG(ERROR) << "Data from /proc/self/statm is not in expected form";
+    return 0;
+  }
+
+  return num_pages * page_size;
+}
+
+#elif defined(__MACH__)
+int GetNumCPUs() {
+  static const char* kCandidates[] = {
+    "hw.logicalcpu_max", "hw.ncpu"
+  };
+
+  int size = 0;
+  size_t len = sizeof(size);
+  for (const auto& candidate : kCandidates) {
+    if (sysctlbyname(candidate, &size, &len, nullptr, 0) == 0) {
+      return size;
+    }
+  }
+
+  // Failed for all candidates.
+  LOG(ERROR) << "sysctlbyname for GetNumCPUs failed";
+  return 0;
+}
+
+int64_t GetSystemTotalMemory() {
+  int64_t size;
+  size_t len = sizeof(size);
+
+  if (sysctlbyname("hw.memsize", &size, &len, nullptr, 0) < 0) {
+    PLOG(ERROR) << "sysctlbyname(hw.memsize) failed";
+    return 0;
+  }
+
+  return size;
+}
+
+int64_t GetConsumingMemoryOfCurrentProcess() {
+  const pid_t pid = Getpid();
+
+  struct proc_taskinfo taskinfo;
+  int infosize = proc_pidinfo(pid, PROC_PIDTASKINFO, 0,
+                              &taskinfo, sizeof(taskinfo));
+  if (infosize < 0) {
+    PLOG(ERROR) << "proc_pidinfo failed";
+    return 0;
+  }
+
+  // According to this blog,
+  // http://vinceyuan.blogspot.jp/2011/12/wrong-info-from-procpidinfo.html
+  // we have to check proc_pidinfo returning value. Sometimes proc_pidinfo
+  // returns too few bytes.
+  if (infosize < sizeof(taskinfo)) {
+    LOG(ERROR) << "proc_pidinfo returned too few bytes " << infosize
+               << " (expected " << sizeof(taskinfo) << ")";
+    return 0;
+  }
+
+  return taskinfo.pti_resident_size;
+}
+
+#else
+#  error "Unknown architecture"
+#endif
+
+}  // namespace devtools_goma
diff --git a/client/machine_info.h b/client/machine_info.h
new file mode 100644
index 0000000..aa17609
--- /dev/null
+++ b/client/machine_info.h
@@ -0,0 +1,29 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_MACHINE_INFO_H_
+#define DEVTOOLS_GOMA_CLIENT_MACHINE_INFO_H_
+
+#include <stdint.h>
+
+namespace devtools_goma {
+
+// Gets the number of CPUs. If failed obtaining, 0 will be returned.
+int GetNumCPUs();
+
+// Gets the total size of memory in bytes.
+// If failed obtaining, 0 will be returned.
+int64_t GetSystemTotalMemory();
+
+// Gets consumed memory of the current process in bytes.
+//   On Linux, this is equal to "RES" in top.
+//   On Windows, this is equal to "Working Set" in Task Manager.
+//   On Mac, this is equal to "Real Memory" in Activity Monitor.
+// If failed obtaining, 0 will be returned.
+int64_t GetConsumingMemoryOfCurrentProcess();
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_MACHINE_INFO_H_
diff --git a/client/machine_info_unittest.cc b/client/machine_info_unittest.cc
new file mode 100644
index 0000000..87c40f6
--- /dev/null
+++ b/client/machine_info_unittest.cc
@@ -0,0 +1,18 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "machine_info.h"
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+TEST(MachineInfoTest, Smoke) {
+  EXPECT_NE(0, GetNumCPUs());
+  EXPECT_NE(0, GetSystemTotalMemory());
+  EXPECT_NE(0, GetConsumingMemoryOfCurrentProcess());
+}
+
+}  // namespace devtools_goma
diff --git a/client/mock_socket_factory.cc b/client/mock_socket_factory.cc
new file mode 100644
index 0000000..300c3ae
--- /dev/null
+++ b/client/mock_socket_factory.cc
@@ -0,0 +1,177 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "mock_socket_factory.h"
+
+#ifndef _WIN32
+#include <sys/socket.h>
+#include <sys/types.h>
+#else
+#include "socket_helper_win.h"
+#endif
+
+#include "callback.h"
+#include "compiler_specific.h"
+#include "platform_thread.h"
+#include "worker_thread_manager.h"
+
+#include "glog/logging.h"
+
+namespace devtools_goma {
+
+int OpenSocketPairForTest(int socks[2]) {
+#ifdef _WIN32
+  // On Win32, no AF_UNIX (nor AF_LOCAL).
+  sa_family_t af = AF_INET;
+#else
+  // On linux or so, socketpair only accepts AF_UNIX (or AF_LOCAL).
+  int af = AF_UNIX;
+#endif
+  int r = socketpair(af, SOCK_STREAM, 0, socks);
+  LOG(INFO) << "socketpair r=" << r << " 0=" << socks[0] << " 1=" << socks[1];
+  return r;
+}
+
+MockSocketFactory::~MockSocketFactory() {
+  if (observer_ && is_owned_ && sock_ > 0) {
+    observer_->WillCloseSocket(sock_);
+  }
+#ifndef _WIN32
+  close(sock_);
+#else
+  closesocket(sock_);
+#endif
+  if (socket_status_ != nullptr) {
+    socket_status_->is_closed_ = true;
+  }
+  LOG(INFO) << "close sock=" << sock_;
+}
+
+ScopedSocket MockSocketFactory::NewSocket() {
+  CHECK(is_owned_);
+  if (sock_ > 0) {
+    set_is_owned(false);
+  }
+  LOG(INFO) << "new sock=" << sock_;
+  return ScopedSocket(sock_);
+}
+
+void MockSocketFactory::ReleaseSocket(ScopedSocket&& sock) {
+  LOG(INFO) << "release sock=" << sock;
+  if (socket_status_ != nullptr) {
+    socket_status_->is_released_ = true;
+  }
+  sock.release();
+  set_is_owned(true);
+}
+
+void MockSocketFactory::CloseSocket(ScopedSocket&& sock, bool err) {
+  if (observer_ && sock.get() == sock_) {
+    observer_->WillCloseSocket(sock_);
+  }
+#ifndef _WIN32
+  close(sock_);
+#else
+  closesocket(sock_);
+#endif
+  LOG(INFO) << "close sock=" << sock_;
+  if (socket_status_ != nullptr) {
+    socket_status_->is_err_ = err;
+    socket_status_->is_closed_ = true;
+  }
+  sock_ = -1;
+  CHECK(!is_owned_);
+}
+
+MockSocketServer::MockSocketServer(WorkerThreadManager* wm)
+    : wm_(wm) {
+  int n = wm_->num_threads();
+  pool_ = wm_->StartPool(1, "mock_socket_server");
+  while (wm_->num_threads() < n + 1U)
+    PlatformThread::Sleep(1000);
+}
+
+MockSocketServer::~MockSocketServer() {
+}
+
+void MockSocketServer::ServerRead(int sock, string* buf) {
+  wm_->RunClosureInPool(
+      FROM_HERE,
+      pool_,
+      NewCallback(
+          this, &MockSocketServer::DoServerRead, sock, buf),
+      WorkerThreadManager::PRIORITY_LOW);
+}
+
+void MockSocketServer::DoServerRead(int sock, string* buf) {
+  const size_t read_size = buf->size();
+  size_t nread = 0;
+  LOG(INFO) << "DoServerRead sock=" << sock << " size=" << read_size;
+  while (nread < read_size) {
+#ifndef _WIN32
+    int n = read(sock, &(*buf)[nread], read_size - nread);
+#else
+    int n = recv(sock, &(*buf)[nread], read_size - nread, 0);
+#endif
+    LOG(INFO) << "DoServerRead sock=" << sock << " " << (read_size - nread)
+              << " => " << n
+              << " data=" << string(buf->data() + nread, n);
+    if (n < 0) {
+      PLOG(ERROR) << "read";
+      break;
+    } else if (n == 0) {
+      break;
+    }
+    nread += n;
+  }
+}
+
+void MockSocketServer::ServerWrite(int sock, string buf) {
+  wm_->RunClosureInPool(
+      FROM_HERE,
+      pool_,
+      NewCallback(
+          this, &MockSocketServer::DoServerWrite, sock, buf),
+      WorkerThreadManager::PRIORITY_LOW);
+}
+
+void MockSocketServer::DoServerWrite(int sock, string buf) {
+  size_t written = 0;
+  LOG(INFO) << "DoServerWrite sock=" << sock << " size=" << buf.size();
+  while (written < buf.size()) {
+#ifndef _WIN32
+    int n = write(sock, &buf[written], buf.size() - written);
+#else
+    int n = send(sock, &buf[written], buf.size() - written, 0);
+#endif
+    LOG(INFO) << "DoServerWrite sock=" << sock << " " << (buf.size() - written)
+              << " => " << n;
+    if (n <= 0) {
+      PLOG(ERROR) << "write";
+      break;
+    }
+    written += n;
+  }
+}
+
+void MockSocketServer::ServerClose(int sock) {
+  wm_->RunClosureInPool(
+      FROM_HERE,
+      pool_,
+      NewCallback(
+          this, &MockSocketServer::DoServerClose, sock),
+      WorkerThreadManager::PRIORITY_LOW);
+}
+
+void MockSocketServer::DoServerClose(int sock) {
+  LOG(INFO) << "DoServerClose sock=" << sock;
+#ifndef _WIN32
+  close(sock);
+#else
+  closesocket(sock);
+#endif
+}
+
+}  // namespace devtools_goma
diff --git a/client/mock_socket_factory.h b/client/mock_socket_factory.h
new file mode 100644
index 0000000..b924261
--- /dev/null
+++ b/client/mock_socket_factory.h
@@ -0,0 +1,133 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_MOCK_SOCKET_FACTORY_H_
+#define DEVTOOLS_GOMA_CLIENT_MOCK_SOCKET_FACTORY_H_
+
+#include <string>
+
+#include "basictypes.h"
+#include "scoped_fd.h"
+#include "socket_factory.h"
+
+#ifdef _WIN32
+# include "socket_helper_win.h"
+#else
+# include <unistd.h>
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+class WorkerThreadManager;
+
+int OpenSocketPairForTest(int socks[2]);
+
+// SocketFactory for test.
+class MockSocketFactory : public SocketFactory {
+ public:
+
+  class SocketStatus {
+   public:
+    SocketStatus()
+        : is_owned_(true),
+          is_closed_(false),
+          is_released_(false),
+          is_err_(false) {
+    }
+
+    bool is_closed() { return is_closed_; }
+    bool is_owned() { return is_owned_; }
+    bool is_released() { return is_released_; }
+    bool is_err() { return is_err_; }
+
+   private:
+    friend class MockSocketFactory;
+
+    bool is_owned_; // true if the socket is owned by MockSocketFactory.
+    bool is_closed_; // true if the socket is closed.
+    bool is_released_;// true if the socket has been obtained once and released.
+    bool is_err_; // true if the socket is closed with error.
+  };
+
+  // Does not take ownership of |socket_status|
+  explicit MockSocketFactory(int sock, SocketStatus* socket_status = nullptr)
+      : sock_(sock),
+        dest_("mock:80"),
+        host_name_("mock"),
+        port_(80),
+        is_owned_(true),
+        socket_status_(socket_status) {
+  }
+  ~MockSocketFactory() override;
+  bool IsInitialized() const override { return true; }
+
+  ScopedSocket NewSocket() override;
+
+  void ReleaseSocket(ScopedSocket&& sock) override;
+  void CloseSocket(ScopedSocket&& sock, bool err) override;
+
+  string DestName() const override { return dest_; }
+  string host_name() const override { return host_name_; }
+  int port() const override { return port_; }
+  string DebugString() const override { return "MockSocketFactory"; }
+
+  void set_dest(const string& dest) { dest_ = dest; }
+  void set_host_name(const string& host_name) { host_name_ = host_name; }
+  void set_port(int port) { port_ = port; }
+
+  void set_is_owned(bool b) {
+    is_owned_ = b;
+    if (socket_status_ != nullptr) {
+      socket_status_->is_owned_ = b;
+    }
+  }
+
+ private:
+  int sock_;
+  string dest_;
+  string host_name_;
+  int port_;
+
+  // |is_owned_| is used to hold the state for ~MockSocketFactory()
+  // this value should be same with log_->is_owned_
+  bool is_owned_;
+  SocketStatus* socket_status_;
+  DISALLOW_COPY_AND_ASSIGN(MockSocketFactory);
+};
+
+class MockSocketServer {
+ public:
+  // MockSocketServer create a new pool in wm and runs the following action
+  // on a thread in the pool.
+  explicit MockSocketServer(WorkerThreadManager* wm);
+  ~MockSocketServer();
+
+  // Test server will read from sock and store received data in buf.
+  // Caller should set expected size to buf by buf->resize(N).
+  // Once N bytes are read in buf, this action will finish.
+  void ServerRead(int sock, string* buf);
+
+  // Test server will write buf to sock.
+  void ServerWrite(int sock, string buf);
+
+  // Test server will close the sock.
+  void ServerClose(int sock);
+
+ private:
+  void DoServerRead(int sock, string* buf);
+  void DoServerWrite(int sock, string buf);
+  void DoServerClose(int sock);
+
+  WorkerThreadManager* wm_;
+  int pool_;
+
+  DISALLOW_COPY_AND_ASSIGN(MockSocketServer);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_MOCK_SOCKET_FACTORY_H_
diff --git a/client/multi_http_rpc.cc b/client/multi_http_rpc.cc
new file mode 100644
index 0000000..1cdaab2
--- /dev/null
+++ b/client/multi_http_rpc.cc
@@ -0,0 +1,526 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "multi_http_rpc.h"
+
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_specific.h"
+#include "glog/logging.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+#include "lockhelper.h"
+#include "scoped_fd.h"  // for FAIL
+#include "simple_timer.h"
+#include "worker_thread_manager.h"
+
+namespace devtools_goma {
+
+MultiHttpRPC::Options::Options()
+    : max_req_in_call(0),
+      req_size_threshold_in_call(0),
+      check_interval_ms(0) {
+}
+
+class MultiHttpRPC::MultiJob {
+ public:
+  // Job is a single call.
+  // done callback will be called on the same thread that call is requested.
+  class Job {
+   public:
+    Job(WorkerThreadManager* wm,
+        HttpRPC::Status* http_rpc_stat,
+        const google::protobuf::Message* req,
+        google::protobuf::Message* resp,
+        OneshotClosure* callback)
+        : wm_(wm),
+          thread_id_(wm_->GetCurrentThreadId()),
+          http_rpc_stat_(http_rpc_stat),
+          req_(req), resp_(resp), callback_(callback) {
+      req_size_ = req_->ByteSize();
+      timer_.Start();
+    }
+
+    HttpRPC::Status* http_rpc_stat() const { return http_rpc_stat_; }
+    const google::protobuf::Message* req() const { return req_; }
+    int req_size() const { return req_size_; }
+    google::protobuf::Message* mutable_resp() { return resp_; }
+
+    void StartCall(Job* master_job) {
+      DCHECK(http_rpc_stat_ != nullptr);
+      DCHECK(!http_rpc_stat_->finished);
+      if (master_job != nullptr) {
+        http_rpc_stat_->master_trace_id = master_job->http_rpc_stat()->trace_id;
+      }
+      http_rpc_stat_->pending_time = timer_.GetInMs();
+    }
+
+    void Done() {
+      DCHECK(!http_rpc_stat_->finished);
+      http_rpc_stat_->finished = true;  // will wake up HttpRPC::Wait
+      http_rpc_stat_ = nullptr;
+      if (callback_ != nullptr) {
+        wm_->RunClosureInThread(FROM_HERE,
+                                thread_id_, callback_,
+                                WorkerThreadManager::PRIORITY_MED);
+        callback_ = nullptr;
+      }
+      delete this;
+    }
+
+   private:
+    ~Job() {
+      CHECK(callback_ == nullptr);
+    }
+    WorkerThreadManager* wm_;
+    WorkerThreadManager::ThreadId thread_id_;
+    HttpRPC::Status* http_rpc_stat_;
+    const google::protobuf::Message* req_;
+    int req_size_;
+    google::protobuf::Message* resp_;
+    OneshotClosure* callback_;
+    SimpleTimer timer_;
+    DISALLOW_COPY_AND_ASSIGN(Job);
+  };
+
+  MultiJob(WorkerThreadManager* wm, MultiHttpRPC* multi_rpc)
+      : wm_(wm),
+        multi_rpc_(multi_rpc),
+        req_size_(0) {
+  }
+
+  // Adds single call to this Multi call.
+  // It must be called before calling Setup().
+  void AddCall(HttpRPC::Status* http_rpc_stat,
+               const google::protobuf::Message* req,
+               google::protobuf::Message* resp,
+               OneshotClosure* callback) {
+    Job* job = new Job(wm_, http_rpc_stat, req, resp, callback);
+    jobs_.push_back(job);
+    req_size_ += job->req_size();
+  }
+  size_t num_call() const { return jobs_.size(); }
+  size_t req_size() const { return req_size_; }
+
+  // Calls requests added by AddCall.
+  // This MultiJob will be deleted once responses are handled.
+  void Call() {
+    DCHECK_GT(jobs_.size(), 0U);
+    VLOG(1) << "multi rpc " << multi_rpc_->multi_path_
+            << " Call num_call=" << num_call();
+    if (num_call() == 1) {
+      jobs_[0]->StartCall(nullptr);
+      // Uses other HttpRPC::Status for underlying http rpc call.
+      http_rpc_stat_ = *jobs_[0]->http_rpc_stat();
+      DCHECK(!http_rpc_stat_.finished);
+      LOG(INFO) << http_rpc_stat_.trace_id << " rpc single";
+      multi_rpc_->http_rpc_->CallWithCallback(
+          multi_rpc_->path_, jobs_[0]->req(), jobs_[0]->mutable_resp(),
+          mutable_status(),
+          NewCallback(
+              this, &MultiHttpRPC::MultiJob::SingleDone));
+      return;
+    }
+
+    CHECK_GT(jobs_.size(), 0U);
+    multi_rpc_->Setup(this);
+    // Initializes with the first ExecReq's status (authorization,
+    // timeout_secs, etc.)
+    http_rpc_stat_ = *jobs_[0]->http_rpc_stat();
+    DCHECK(!http_rpc_stat_.finished);
+    LOG(INFO) << http_rpc_stat_.master_trace_id << " rpc multi:"
+              << TraceIdList();
+    multi_rpc_->http_rpc_->CallWithCallback(
+        multi_rpc_->multi_path_, req(), mutable_resp(), mutable_status(),
+        NewCallback(
+            this, &MultiHttpRPC::MultiJob::Done));
+  }
+
+  void SetReq(std::unique_ptr<google::protobuf::Message> req) {
+    req_ = std::move(req);
+  }
+  void SetResp(std::unique_ptr<google::protobuf::Message> resp) {
+    resp_ = std::move(resp);
+  }
+
+  const std::vector<Job*> jobs() const { return jobs_; }
+
+  const google::protobuf::Message* req() const { return req_.get(); }
+  google::protobuf::Message* mutable_resp() { return resp_.get(); }
+  HttpRPC::Status* mutable_status() { return &http_rpc_stat_; }
+
+  // Cancels pending jobs. Must be called before calling Call.
+  void Cancel() {
+    VLOG(1) << "multi rpc " << multi_rpc_->multi_path_
+            << " Cancel num_call=" << num_call();
+    for (size_t i = 0; i < jobs_.size(); ++i) {
+      Job* job = jobs_[i];
+      HttpRPC::Status* stat = job->http_rpc_stat();
+      stat->connect_success = false;
+      stat->err = FAIL;
+      stat->err_message = "multi_rpc canceled";
+      job->Done();  // job will be deleted.
+      jobs_[i] = nullptr;
+    }
+    delete this;
+  }
+
+ private:
+  ~MultiJob() {}
+
+  string TraceIdList() const {
+    std::ostringstream ss;
+    for (const auto* job : jobs_) {
+      ss << " " << job->http_rpc_stat()->trace_id;
+    }
+    return ss.str();
+  }
+
+  // Multi call done callback.
+  void Done() {
+    VLOG(1) << "multi rpc " << multi_rpc_->multi_path_
+            << " Done num_call=" << num_call();
+    LOG(INFO) << http_rpc_stat_.master_trace_id << " rpc multi done:"
+              << TraceIdList();
+    LOG_IF(INFO, !http_rpc_stat_.response_header.empty())
+        << "MultiHttpRPC done: http response="
+        << http_rpc_stat_.response_header;
+    if (http_rpc_stat_.err) {
+      LOG(WARNING) << http_rpc_stat_.err_message;
+      if (http_rpc_stat_.http_return_code == 404)
+        multi_rpc_->Disable();
+    }
+    for (size_t i = 0; i < jobs_.size(); ++i) {
+      Job* job = jobs_[i];
+      HttpRPC::Status* stat = job->http_rpc_stat();
+      DCHECK(!stat->finished);
+      if (i == 0) {
+        // size and time stat stored only in the first call.
+        stat->req_size = http_rpc_stat_.req_size;
+        stat->resp_size = http_rpc_stat_.resp_size;
+        stat->raw_req_size = http_rpc_stat_.raw_req_size;
+        stat->raw_resp_size = http_rpc_stat_.raw_resp_size;
+        stat->req_build_time = http_rpc_stat_.req_build_time;
+        stat->req_send_time = http_rpc_stat_.req_send_time;
+        stat->wait_time = http_rpc_stat_.wait_time;
+        stat->resp_recv_time = http_rpc_stat_.resp_recv_time;
+        stat->resp_parse_time = http_rpc_stat_.resp_parse_time;
+        stat->num_retry = http_rpc_stat_.num_retry;
+      }
+      multi_rpc_->Done(this, i, stat, job->mutable_resp());
+      stat->connect_success = true;
+      stat->err = http_rpc_stat_.err;
+      stat->err_message = http_rpc_stat_.err_message;
+      if (stat->err == OK && stat->http_return_code != 200) {
+        stat->err = FAIL;
+        std::ostringstream ss;
+        ss << "MultiCall ok:" << stat->err_message
+           << " but SingleCall error:" << stat->http_return_code;
+        stat->err_message = ss.str();
+      }
+      stat->response_header = http_rpc_stat_.response_header;
+      job->Done();  // job will be deleted.
+      jobs_[i] = nullptr;
+    }
+    multi_rpc_->JobDone();
+    delete this;
+  }
+
+  // Single call done callback.
+  void SingleDone() {
+    LOG(INFO) << http_rpc_stat_.trace_id << " rpc single done";
+    VLOG(1) << "multi rpc " << multi_rpc_->multi_path_
+            << " SingleDone num_call=" << num_call();
+    CHECK_EQ(jobs_.size(), 1U);
+    CHECK(http_rpc_stat_.finished);
+    // Copy http_rpc_stat_ except finished.
+    // If finished becomes true, waiting thread would destruct HttpRPC::Status.
+    // job's http_rpc_stat finished would become true in Job::Done().
+    HttpRPC::Status status = http_rpc_stat_;
+    status.finished = false;
+    *jobs_[0]->http_rpc_stat() = status;
+    jobs_[0]->Done();  // job will be deleted.
+    jobs_[0] = nullptr;
+    multi_rpc_->JobDone();
+    delete this;
+  }
+
+  WorkerThreadManager* wm_;
+  MultiHttpRPC* multi_rpc_;
+
+  std::unique_ptr<google::protobuf::Message> req_;
+  std::unique_ptr<google::protobuf::Message> resp_;
+  HttpRPC::Status http_rpc_stat_;
+  std::vector<Job*> jobs_;
+  size_t req_size_;
+
+  DISALLOW_COPY_AND_ASSIGN(MultiJob);
+};
+
+MultiHttpRPC::MultiHttpRPC(
+    HttpRPC* http_rpc,
+    const string& path,
+    const string& multi_path,
+    const Options& options,
+    WorkerThreadManager* wm)
+    : wm_(wm),
+      http_rpc_(http_rpc),
+      path_(path),
+      multi_path_(multi_path),
+      options_(options),
+      periodic_callback_id_(kInvalidPeriodicClosureId),
+      cond_(&mu_),
+      num_multi_job_(0),
+      available_(true),
+      num_call_by_req_num_(0),
+      num_call_by_req_size_(0),
+      num_call_by_latency_(0) {
+  CHECK_GT(options_.max_req_in_call, 0U);
+  num_call_by_multi_.resize(options_.max_req_in_call + 1);
+}
+
+MultiHttpRPC::~MultiHttpRPC() {
+  CHECK_EQ(periodic_callback_id_, kInvalidPeriodicClosureId);
+}
+
+void MultiHttpRPC::Call(
+    HttpRPC::Status* http_rpc_stat,
+    const google::protobuf::Message* req,
+    google::protobuf::Message* resp,
+    OneshotClosure* callback) {
+  if (!available_ || options_.max_req_in_call == 1) {
+    {
+      AUTOLOCK(lock, &mu_);
+      ++num_call_by_multi_[1];
+    }
+    http_rpc_->CallWithCallback(
+        path_, req, resp, http_rpc_stat, callback);
+    return;
+  }
+
+  MultiJob* multi_job = nullptr;
+  {
+    AUTOLOCK(lock, &mu_);
+
+    // If it is the first call, register periodic checker.
+    if (!http_rpc_->client()->shutting_down() &&
+        periodic_callback_id_ == kInvalidPeriodicClosureId) {
+      periodic_callback_id_ = wm_->RegisterPeriodicClosure(
+          FROM_HERE, options_.check_interval_ms,
+          NewPermanentCallback(this, &MultiHttpRPC::CheckPending));
+    }
+
+    const string& key = MultiJobKey(req);
+    MultiJob* pending_multi_job = pending_multi_jobs_[key];
+    if (pending_multi_job == nullptr) {
+      pending_multi_job = pending_multi_jobs_[key] = new MultiJob(wm_, this);
+    }
+    pending_multi_job->AddCall(http_rpc_stat, req, resp, callback);
+    bool call_now = http_rpc_->client()->shutting_down();
+    if (pending_multi_job->num_call() == options_.max_req_in_call) {
+      ++num_call_by_req_num_;
+      call_now = true;
+    } else if (pending_multi_job->req_size() >=
+               options_.req_size_threshold_in_call) {
+      ++num_call_by_req_size_;
+      call_now = true;
+    }
+    if (call_now) {
+      multi_job = pending_multi_job;
+      ++num_multi_job_;
+      pending_multi_jobs_[key] = nullptr;
+      DCHECK_LE(multi_job->num_call(), options_.max_req_in_call);
+      ++num_call_by_multi_[multi_job->num_call()];
+    }
+  }
+  if (multi_job != nullptr)
+    multi_job->Call();
+}
+
+void MultiHttpRPC::Wait() {
+  LOG(INFO) << "Wait";
+  AUTOLOCK(lock, &mu_);
+  DCHECK(http_rpc_->client()->shutting_down());
+  if (periodic_callback_id_ != kInvalidPeriodicClosureId) {
+    wm_->UnregisterPeriodicClosure(periodic_callback_id_);
+    periodic_callback_id_ = kInvalidPeriodicClosureId;
+  }
+  for (auto& entry : pending_multi_jobs_) {
+    if (entry.second != nullptr) {
+      entry.second->Cancel();
+      entry.second = nullptr;
+    }
+  }
+  for (;;) {
+    bool busy = num_multi_job_ > 0;
+    if (!busy) {
+      for (const auto& entry : pending_multi_jobs_) {
+        if (entry.second != nullptr) {
+          busy = true;
+          break;
+        }
+      }
+    }
+    if (!busy) {
+      break;
+    }
+    LOG(INFO) << "num_multi_job=" << num_multi_job_;
+    cond_.Wait();
+  }
+}
+
+bool MultiHttpRPC::available() {
+  AUTOLOCK(lock, &mu_);
+  return available_;
+}
+
+string MultiHttpRPC::MultiJobKey(const google::protobuf::Message* req) {
+  return "";
+}
+
+void MultiHttpRPC::CheckPending() {
+  std::vector<MultiJob*> multi_jobs;
+  PeriodicClosureId periodic_callback_to_delete = kInvalidPeriodicClosureId;
+  {
+    AUTOLOCK(lock, &mu_);
+    for (auto& entry : pending_multi_jobs_) {
+      MultiJob* pending_multi_job = entry.second;
+      if (pending_multi_job != nullptr &&
+          pending_multi_job->num_call() > 0) {
+        multi_jobs.push_back(pending_multi_job);
+        entry.second = nullptr;
+        DCHECK_LE(pending_multi_job->num_call(), options_.max_req_in_call);
+        ++num_call_by_latency_;
+        ++num_call_by_multi_[pending_multi_job->num_call()];
+      }
+    }
+    if (periodic_callback_id_ != kInvalidPeriodicClosureId && !available_) {
+      periodic_callback_to_delete = periodic_callback_id_;
+      periodic_callback_id_ = kInvalidPeriodicClosureId;
+    }
+  }
+  for (const auto& multi_job : multi_jobs) {
+    wm_->RunClosure(
+        FROM_HERE,
+        NewCallback(
+            multi_job, &MultiHttpRPC::MultiJob::Call),
+        WorkerThreadManager::PRIORITY_MED);
+  }
+
+  if (periodic_callback_to_delete != kInvalidPeriodicClosureId) {
+    LOG(INFO) << "Unregister periodic callback for MultiHttpRPC "
+              << multi_path_;
+    // This runs on alamer worker. unregister the closure on another worker.
+    wm_->RunClosure(
+        FROM_HERE,
+        NewCallback(
+            this, &MultiHttpRPC::UnregisterCheckPending,
+            periodic_callback_to_delete),
+        WorkerThreadManager::PRIORITY_IMMEDIATE);
+  }
+}
+
+void MultiHttpRPC::UnregisterCheckPending(PeriodicClosureId id) {
+  wm_->UnregisterPeriodicClosure(id);
+}
+
+void MultiHttpRPC::Disable() {
+  AUTOLOCK(lock, &mu_);
+  LOG_IF(WARNING, available_) << "Disable MultiHttpRPC call " << multi_path_;
+  available_ = false;
+}
+
+void MultiHttpRPC::JobDone() {
+  AUTOLOCK(lock, &mu_);
+  --num_multi_job_;
+}
+
+string MultiHttpRPC::DebugString() const {
+  AUTOLOCK(lock, &mu_);
+
+  std::ostringstream ss;
+  ss << "path=" << path_ << std::endl;
+  if (available_) {
+    ss << "multi_path=" << multi_path_ << std::endl;
+    ss << " max req in call=" << options_.max_req_in_call
+       << " : call=" << num_call_by_req_num_ << std::endl
+       << " req size threshold in call=" << options_.req_size_threshold_in_call
+       << " : call=" << num_call_by_req_size_ << std::endl
+       << " check interval ms=" << options_.check_interval_ms
+       << " : call=" << num_call_by_latency_ << std::endl;
+  } else {
+    ss << "multi_call disabled" << std::endl;
+  }
+  ss << "num call by multi:" << std::endl;
+  for (size_t i = 1; i < num_call_by_multi_.size(); ++i) {
+    ss << i << " reqs in call=" << num_call_by_multi_[i] << std::endl;
+  }
+  return ss.str();
+}
+
+MultiFileStore::MultiFileStore(
+    HttpRPC* http_rpc,
+    const string& path,
+    const MultiHttpRPC::Options& options,
+    WorkerThreadManager* wm)
+    : MultiHttpRPC(http_rpc, path, path, options, wm) {
+}
+
+MultiFileStore::~MultiFileStore() {
+}
+
+void MultiFileStore::StoreFile(
+    HttpRPC::Status* http_rpc_stat,
+    const StoreFileReq* req, StoreFileResp* resp,
+    OneshotClosure* callback) {
+  Call(http_rpc_stat, req, resp, callback);
+}
+
+void MultiFileStore::Setup(MultiHttpRPC::MultiJob* job) {
+  std::unique_ptr<StoreFileReq> req(new StoreFileReq);
+  const StoreFileReq* one_req = nullptr;
+  for (auto* j : job->jobs()) {
+    one_req = static_cast<const StoreFileReq*>(j->req());
+    DCHECK_EQ(1, one_req->blob_size());
+    StoreFileReq* mutable_one_req = const_cast<StoreFileReq*>(one_req);
+    req->add_blob()->Swap(mutable_one_req->mutable_blob(0));
+  }
+  one_req = static_cast<const StoreFileReq*>(job->jobs()[0]->req());
+  *req->mutable_requester_info() = one_req->requester_info();
+  job->SetReq(std::move(req));
+  job->SetResp(std::unique_ptr<google::protobuf::Message>(new StoreFileResp));
+}
+
+void MultiFileStore::Done(MultiHttpRPC::MultiJob* multi_job,
+                          int i, HttpRPC::Status* stat,
+                          google::protobuf::Message* resp) {
+  if (i < static_cast<int>(multi_job->jobs().size())) {
+    const StoreFileReq* one_req =
+        static_cast<const StoreFileReq*>(multi_job->jobs()[i]->req());
+    StoreFileReq* mutable_one_req = const_cast<StoreFileReq*>(one_req);
+    const StoreFileReq* multi_req =
+        static_cast<const StoreFileReq*>(multi_job->req());
+    StoreFileReq* mutable_multi_req = const_cast<StoreFileReq*>(multi_req);
+    mutable_one_req->mutable_blob(0)->Swap(mutable_multi_req->mutable_blob(i));
+  }
+
+  StoreFileResp* multi_resp =
+      static_cast<StoreFileResp*>(multi_job->mutable_resp());
+  StoreFileResp* one_resp = static_cast<StoreFileResp*>(resp);
+  if (i < multi_resp->hash_key_size()) {
+    stat->http_return_code = 200;
+    one_resp->add_hash_key(multi_resp->hash_key(i));
+  } else {
+    stat->http_return_code = 500;
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/multi_http_rpc.h b/client/multi_http_rpc.h
new file mode 100644
index 0000000..fa74968
--- /dev/null
+++ b/client/multi_http_rpc.h
@@ -0,0 +1,134 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_MULTI_HTTP_RPC_H_
+#define DEVTOOLS_GOMA_CLIENT_MULTI_HTTP_RPC_H_
+
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "http_rpc.h"
+#include "lockhelper.h"
+#include "unordered.h"
+
+using std::string;
+
+namespace google {
+namespace protobuf {
+class Message;
+}  // namespace protobuf
+}  // namespace google
+
+namespace devtools_goma {
+
+class ExecReq;
+class ExecResp;
+class OneshotClosure;
+class StoreFileReq;
+class StoreFileResp;
+class WorkerThreadManager;
+
+// MultiExecClient is an ExecService.Exec API service implementation that
+// is realized by ExecService.MultiExec stub on top of HttpRPC.
+// Client can use Exec() as single Exec API call, but MultiExecClient packs
+// at most max_req_in_call into single MultiExec call to path over http_rpc.
+// It also checks pending requests in each check_interval_ms, and if any
+// pending Exec requests, it issues MultiExec call.
+class MultiHttpRPC {
+ public:
+  struct Options {
+    Options();
+    size_t max_req_in_call;
+    size_t req_size_threshold_in_call;
+    int check_interval_ms;
+  };
+
+  virtual ~MultiHttpRPC();
+
+  virtual void Call(HttpRPC::Status* http_rpc_stat,
+                    const google::protobuf::Message* req,
+                    google::protobuf::Message* resp,
+                    OneshotClosure* callback);
+
+  void Wait();
+
+  const Options& options() { return options_; }
+  bool available();
+
+  string DebugString() const;
+
+ protected:
+  class MultiJob;
+  friend class MultiJob;
+
+  MultiHttpRPC(HttpRPC* http_rpc,
+               const string& path, const string& multi_path,
+               const Options& options,
+               WorkerThreadManager* wm);
+
+  // Returns a key for pending multi job for the given req.
+  // req will be batched in same multi job if the key is the same.
+  // Returns "" by default (so no affinity).
+  virtual string MultiJobKey(const google::protobuf::Message* req);
+
+  virtual void Setup(MultiJob* job) = 0;
+  virtual void Done(MultiJob* job, int i, HttpRPC::Status* stat,
+                    google::protobuf::Message* resp) = 0;
+
+  void CheckPending();
+  void UnregisterCheckPending(PeriodicClosureId id);
+  void Disable();
+
+  void JobDone();
+
+  WorkerThreadManager* wm_;
+  HttpRPC* http_rpc_;
+  const string path_;
+  const string multi_path_;
+  const Options options_;
+
+  PeriodicClosureId periodic_callback_id_;
+
+  Lock mu_;
+  // Condition to check num_multi_job_ becomes 0.
+  ConditionVariable cond_;
+  int num_multi_job_;  // number of jobs on-the-fly.
+
+  unordered_map<string, MultiJob*> pending_multi_jobs_;
+  bool available_;
+  std::vector<int> num_call_by_multi_;
+  int num_call_by_req_num_;
+  int num_call_by_req_size_;
+  int num_call_by_latency_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MultiHttpRPC);
+};
+
+class MultiFileStore : public MultiHttpRPC {
+ public:
+  MultiFileStore(HttpRPC* http_rpc,
+                 const string& path,
+                 const MultiHttpRPC::Options& options,
+                 WorkerThreadManager* wm);
+  ~MultiFileStore() override;
+
+  void StoreFile(HttpRPC::Status* http_rpc_stat,
+                 const StoreFileReq* req, StoreFileResp* resp,
+                 OneshotClosure* callback);
+
+  void Setup(MultiHttpRPC::MultiJob* job) override;
+  void Done(MultiHttpRPC::MultiJob* job,
+            int i, HttpRPC::Status* stat,
+            google::protobuf::Message* resp) override;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MultiFileStore);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_MULTI_HTTP_RPC_H_
diff --git a/client/mypath.cc b/client/mypath.cc
new file mode 100644
index 0000000..3f26fd3
--- /dev/null
+++ b/client/mypath.cc
@@ -0,0 +1,268 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "mypath.h"
+
+#include <limits.h>
+#ifndef _WIN32
+#include <pwd.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#ifdef __MACH__
+# include <mach-o/dyld.h>
+#endif
+#ifdef __FreeBSD__
+# include <sys/types.h>
+# include <sys/sysctl.h>
+#endif
+#endif
+
+#include <vector>
+
+#include "glog/logging.h"
+#include "basictypes.h"
+#ifdef _WIN32
+# include "config_win.h"
+# include <psapi.h>
+# pragma comment(lib, "psapi.lib")
+# include <lmcons.h>  // for UNLEN
+#endif
+#include "env_flags.h"
+#include "file.h"
+#include "file_dir.h"
+#include "path.h"
+#include "util.h"
+
+GOMA_DECLARE_string(CACHE_DIR);
+GOMA_DECLARE_string(TMP_DIR);
+
+namespace {
+
+#ifndef _WIN32
+const char kGomaTmpDirPrefix[] = "goma_";
+#else
+const char kGomaTmpDir[] = "goma";
+#endif
+const char kGomaCrashDumpDir[] = "goma_crash";
+const char kGomaCacheDir[] = "goma_cache";
+
+template<typename UnaryFunction>
+static string GetEnvMatchedCondition(
+    const std::vector<const char*>& candidates,
+    UnaryFunction condition,
+    const char* default_value) {
+  for (const auto* candidate : candidates) {
+    const string value = devtools_goma::GetEnv(candidate);
+    if (!value.empty() && condition(value)) {
+      return value;
+    }
+  }
+  return default_value;
+}
+
+static string GetTempDirectoryEnv() {
+  static const char* kTmpdirEnvs[] = {
+    "TEST_TMPDIR",
+    "TMPDIR",
+    "TMP",
+  };
+  return GetEnvMatchedCondition(
+      std::vector<const char*>(&kTmpdirEnvs[0],
+                               &kTmpdirEnvs[arraysize(kTmpdirEnvs)]),
+      [](const string& tmpdir) {
+        return File::IsDirectory(tmpdir.c_str());
+      },
+      "/tmp");
+}
+
+#ifdef __linux__
+static string GetUserRuntimeDirectory() {
+  char buf[1024];
+  snprintf(buf, sizeof(buf), "/run/user/%d", getuid());
+  if (File::IsDirectory(buf)) {
+    return buf;
+  }
+  return string();
+}
+#endif
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+string GetUsernameEnv() {
+  static const char* kRoot = "root";
+  static const char* kUserEnvs[] = {
+    "SUDO_USER",
+    "USERNAME",
+    "USER",
+    "LOGNAME",
+  };
+
+  return GetEnvMatchedCondition(
+      std::vector<const char*>(&kUserEnvs[0],
+                               &kUserEnvs[arraysize(kUserEnvs)]),
+      [](const string& user) {
+        return user != kRoot;
+      },
+      "");
+}
+
+string GetUsernameNoEnv() {
+#ifndef _WIN32
+  uid_t uid = getuid();
+  struct passwd* pw = getpwuid(uid);
+  if (uid == 0 || pw == nullptr || pw->pw_name == nullptr ||
+      *pw->pw_name == '\0') {
+    return "";
+  }
+  return pw->pw_name;
+#else
+  char buf[UNLEN + 1] = {0};
+  DWORD len = UNLEN;
+  ::GetUserNameA(buf, &len);
+  return buf;
+#endif
+}
+
+string GetUsername() {
+  string username = GetUsernameEnv();
+  if (!username.empty()) {
+    return username;
+  }
+  username = GetUsernameNoEnv();
+  if (!username.empty()) {
+    SetEnv("USER", username);
+    return username;
+  }
+  return "unknown";
+}
+
+string GetNodename() {
+#ifndef _WIN32
+  // Gets nodename, which is a good enough approximation to a
+  // hostname, for debugging purposes, for now.
+  struct utsname u;
+  if (uname(&u) == 0) {
+    return u.nodename;
+  }
+  PLOG(ERROR) << "uname failed";
+#else
+  // Get NetBIOS name for now to avoid network queries.
+  char buffer[MAX_COMPUTERNAME_LENGTH + 1] = {0};
+  DWORD len = MAX_COMPUTERNAME_LENGTH + 1;
+  if (GetComputerNameA(buffer, &len) && len) {
+    string nodename(buffer, len);
+    return nodename;
+  }
+  LOG(ERROR) << "GetComputerName " << GetLastError();
+#endif
+  return "localhost";
+}
+
+string GetMyPathname() {
+  string myself_fullpath;
+#ifdef _WIN32
+  char path[PATH_MAX] = {0};
+  HANDLE process = GetCurrentProcess();
+  PCHECK(GetModuleFileNameExA(process, nullptr, path, PATH_MAX));
+  myself_fullpath = path;
+#elif defined(__MACH__)
+  myself_fullpath = _dyld_get_image_name(0);
+#elif defined(__FreeBSD__)
+  char buf[PATH_MAX + 1];
+  const int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+  size_t length = sizeof(buf);
+  PCHECK(sysctl(mib, 4, buf, &length, nullptr, 0) >= 0);
+  CHECK_GT(length, 1U);
+  myself_fullpath.assign(buf, length - 1);
+#else
+  char buf[PATH_MAX + 1];
+  ssize_t len;
+  PCHECK((len = readlink("/proc/self/exe", buf, PATH_MAX)) >= 0);
+  CHECK_LT(static_cast<size_t>(len), sizeof buf);
+  buf[len] = '\0';
+  myself_fullpath = buf;
+#endif
+  return myself_fullpath;
+}
+
+string GetMyDirectory() {
+#ifndef _WIN32
+  const char SEP = '/';
+#else
+  const char SEP = '\\';
+#endif
+  string myself_fullpath = GetMyPathname();
+  size_t last_slash = myself_fullpath.rfind(SEP);
+  CHECK(last_slash != string::npos);
+  return myself_fullpath.substr(0, last_slash);
+}
+
+// NOTE: When updating this, you also need to update get_temp_directory() in
+// client/goma-wrapper and GetGomaTmpDir in goma_ctl.py.
+string GetGomaTmpDir() {
+  if (FLAGS_TMP_DIR != "") {
+    return FLAGS_TMP_DIR;
+  }
+
+  string tmpdir;
+#ifdef __linux__
+  tmpdir = GetUserRuntimeDirectory();
+#endif
+  if (tmpdir.empty()) {
+    tmpdir = GetTempDirectoryEnv();
+  }
+  CHECK(!tmpdir.empty()) << "Could not determine temp directory. "
+                         << "Make sure TMPDIR or TMP are not empty.";
+
+  // Assume goma_ctl.py creates /tmp/goma_<user> or %TEMP%\goma.
+#ifndef _WIN32
+  string private_name(kGomaTmpDirPrefix);
+  const string username = GetUsername();
+  if (username == "" || username == "unknown") {
+    LOG(ERROR) << "bad username:" << username;
+  }
+  private_name.append(username);
+#else
+  string private_name(kGomaTmpDir);
+#endif
+  string private_tmpdir = file::JoinPath(tmpdir, private_name);
+  return private_tmpdir;
+}
+
+void CheckTempDirectory(const string& tmpdir) {
+  if (!EnsureDirectory(tmpdir, 0700)) {
+    LOG(FATAL) << "failed to create goma tmp dir or "
+               << "private goma tmp dir is not dir: " << tmpdir;
+  }
+
+#ifndef _WIN32
+  struct stat st;
+  // We must use lstat instead of stat to avoid symlink attack (b/69717657).
+  PCHECK(lstat(tmpdir.c_str(), &st) == 0) << "lstat " << tmpdir;
+  if ((st.st_mode & 077) != 0) {
+    LOG(FATAL) << "private goma tmp dir is not owned only by you. "
+               << "please check owner/permission of " << tmpdir
+               << ".  It must not be readable/writable by group/other. "
+               << "e.g.  $ chmod go-rwx " << tmpdir;
+  }
+#endif
+}
+
+string GetCrashDumpDirectory() {
+  return file::JoinPath(GetGomaTmpDir(), kGomaCrashDumpDir);
+}
+
+string GetCacheDirectory() {
+  if (FLAGS_CACHE_DIR != "") {
+    return FLAGS_CACHE_DIR;
+  }
+
+  return file::JoinPath(GetGomaTmpDir(), kGomaCacheDir);
+}
+
+}  // namespace devtools_goma
diff --git a/client/mypath.h b/client/mypath.h
new file mode 100644
index 0000000..b566c43
--- /dev/null
+++ b/client/mypath.h
@@ -0,0 +1,61 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_MYPATH_H_
+#define DEVTOOLS_GOMA_CLIENT_MYPATH_H_
+
+#include <string>
+
+using std::string;
+
+namespace devtools_goma {
+
+// Gets username who execute this program from environment variable.
+// Note: it won't return correct username in gomacc/win, or gomacc
+// under scons, etc.
+// Returns empty string if not found.
+string GetUsernameEnv();
+
+// Get username who execute this program without environment variable.
+// Returns empty string if not found.
+string GetUsernameNoEnv();
+
+// Get username who execute this program from environment variable,
+// or system call.  It will set username in $USER.
+// Returns "unknown" if not found.
+string GetUsername();
+
+// Gets nodename/hostname which this program runs on.
+string GetNodename();
+
+// Gets this executable's path name.
+string GetMyPathname();
+
+// Gets directory in which this executable is.
+string GetMyDirectory();
+
+// Get temporary directory to be used by gomacc and compiler_proxy.
+// Temporary files, cache and an ipc socket file should be made under this
+// directory for security.  Note that since an ipc socket file is created
+// under this directory, the result of this function must be the same
+// between gomacc and compiler_proxy.
+//
+// Note that we must ensure the directory is owned by the user who runs
+// gomacc or compiler_proxy by CheckTempDirectory.
+// (Once at the beginning of a program should be enough.)
+string GetGomaTmpDir();
+
+// Check temp directory is directory, owned only by self.
+void CheckTempDirectory(const string& tmpdir);
+
+// Get a directory name to store a crash dump.
+string GetCrashDumpDirectory();
+
+// Get a directory name to store a cache.
+string GetCacheDirectory();
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_MYPATH_H_
diff --git a/client/mypath_unittest.cc b/client/mypath_unittest.cc
new file mode 100644
index 0000000..0670766
--- /dev/null
+++ b/client/mypath_unittest.cc
@@ -0,0 +1,69 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "mypath.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "file.h"
+#include "file_dir.h"
+#include "ioutil.h"
+#include "path.h"
+#include "util.h"
+
+TEST(Util, GetUsername) {
+  const string& user = devtools_goma::GetUsername();
+  // smoke test.
+  EXPECT_FALSE(user.empty());
+  EXPECT_NE(user, "root");
+  EXPECT_NE(user, "unknown");
+}
+
+TEST(Util, GetUsernameWithoutEnv) {
+  devtools_goma::SetEnv("SUDO_USER", "");
+  devtools_goma::SetEnv("USERNAME", "");
+  devtools_goma::SetEnv("USER", "");
+  devtools_goma::SetEnv("LOGNAME", "");
+
+  EXPECT_EQ(devtools_goma::GetEnv("USER"), "");
+
+  EXPECT_TRUE(devtools_goma::GetUsernameEnv().empty());
+  const string username = devtools_goma::GetUsernameNoEnv();
+  EXPECT_FALSE(username.empty());
+  EXPECT_NE(username, "root");
+  EXPECT_NE(username, "unknown");
+  EXPECT_EQ(username, devtools_goma::GetUsername());
+  EXPECT_EQ(username, devtools_goma::GetUsernameEnv());
+}
+
+#ifndef _WIN32
+// TODO: enable CheckTempDiretoryNotDirectory on win.
+// EXPECT_DEATH doesn't work well on windows?
+// it failed to capture fatal message, but got
+// *** Check failure stack trace: ***.
+TEST(Util, CheckTempDiretoryNotDirectory) {
+  const string& tmpdir = devtools_goma::GetGomaTmpDir();
+  devtools_goma::RecursivelyDelete(tmpdir);
+  CHECK(File::CreateDir(tmpdir.c_str(), 0700)) << tmpdir;
+  const string& tmpdir_file =
+      file::JoinPath(tmpdir, "tmpdir_is_not_dir");
+  devtools_goma::WriteStringToFileOrDie("", tmpdir_file, 0700);
+  EXPECT_DEATH(devtools_goma::CheckTempDirectory(tmpdir_file),
+               "private goma tmp dir is not dir");
+  devtools_goma::RecursivelyDelete(tmpdir);
+}
+
+TEST(Util, CheckTempDiretoryBadPermission) {
+  const string& tmpdir = devtools_goma::GetGomaTmpDir();
+  devtools_goma::RecursivelyDelete(tmpdir);
+  mode_t omask = umask(022);
+  PCHECK(mkdir(tmpdir.c_str(), 0744) == 0) << tmpdir;
+  umask(omask);
+  EXPECT_DEATH(devtools_goma::CheckTempDirectory(tmpdir),
+               "private goma tmp dir is not owned only by you.");
+  devtools_goma::RecursivelyDelete(tmpdir);
+}
+
+#endif
diff --git a/client/named_pipe_client_win.cc b/client/named_pipe_client_win.cc
new file mode 100644
index 0000000..8b3f15b
--- /dev/null
+++ b/client/named_pipe_client_win.cc
@@ -0,0 +1,66 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "named_pipe_client_win.h"
+
+#include <cstdlib>
+
+#include <glog/logging.h>
+
+#include "simple_timer.h"
+#include "platform_thread.h"
+
+namespace devtools_goma {
+
+NamedPipeFactory::NamedPipeFactory(const std::string& name)
+    : name_(name) {
+}
+
+NamedPipeFactory::~NamedPipeFactory() {
+}
+
+ScopedNamedPipe NamedPipeFactory::New() {
+  std::string pipename = "\\\\.\\pipe\\" + name_;
+  // TODO: This is mitigation for b/36493466
+  const int kTimeoutMillisec = 13 * 1000;
+  SimpleTimer t;
+
+  for (;;) {
+    int left_time = kTimeoutMillisec - t.GetInMs();
+    if (left_time <= 0) {
+      break;
+    }
+
+    if (!WaitNamedPipeA(pipename.c_str(), left_time)) {
+      DWORD last_error = GetLastError();
+      if (last_error == ERROR_SEM_TIMEOUT) {
+        LOG(ERROR) << "Timed-out to WaitNamedPipe " << pipename
+                   << " with timeout_ms=" << kTimeoutMillisec;
+      }
+      LOG_SYSRESULT(last_error);
+      return ScopedNamedPipe();
+    }
+
+    ScopedNamedPipe pipe(CreateFileA(pipename.c_str(),
+                                     GENERIC_READ | GENERIC_WRITE,
+                                     0,
+                                     nullptr,
+                                     OPEN_EXISTING,
+                                     FILE_FLAG_OVERLAPPED,
+                                     nullptr));
+    if (!pipe.valid()) {
+      DWORD last_error = GetLastError();
+      if (last_error == ERROR_PIPE_BUSY) {
+        continue;
+      }
+      LOG_SYSRESULT(GetLastError());
+      return ScopedNamedPipe();
+    }
+    return pipe;
+  }
+  LOG(ERROR) << "Timed-out to create new pipe:" << pipename;
+  return ScopedNamedPipe();
+}
+
+}  // namespace devtools_goma
diff --git a/client/named_pipe_client_win.h b/client/named_pipe_client_win.h
new file mode 100644
index 0000000..e3cf525
--- /dev/null
+++ b/client/named_pipe_client_win.h
@@ -0,0 +1,38 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_NAMED_PIPE_CLIENT_WIN_H_
+#define DEVTOOLS_GOMA_CLIENT_NAMED_PIPE_CLIENT_WIN_H_
+
+#ifdef _WIN32
+
+#include <string>
+
+#include "named_pipe_win.h"
+
+namespace devtools_goma {
+
+class NamedPipeFactory {
+ public:
+  explicit NamedPipeFactory(const std::string& name);
+  ~NamedPipeFactory();
+
+  NamedPipeFactory(const NamedPipeFactory&) = delete;
+  NamedPipeFactory& operator=(const NamedPipeFactory&) = delete;
+
+  ScopedNamedPipe New();
+
+  const std::string& DestName() const {
+    return name_;
+  }
+
+ private:
+  const std::string name_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // _WIN32
+
+#endif  // DEVTOOLS_GOMA_CLIENT_NAMED_PIPE_CLIENT_WIN_H_
diff --git a/client/named_pipe_client_win_unittest.cc b/client/named_pipe_client_win_unittest.cc
new file mode 100644
index 0000000..91ea4ec
--- /dev/null
+++ b/client/named_pipe_client_win_unittest.cc
@@ -0,0 +1,286 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "named_pipe_client_win.h"
+
+#include <string>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "named_pipe_win.h"
+#include "named_pipe_server_win.h"
+#include "worker_thread_manager.h"
+
+namespace devtools_goma {
+
+class NamedPipeClientTest : public ::testing::Test {
+ public:
+  class MockHandler : public NamedPipeServer::Handler {
+   public:
+    MockHandler() : wait_sec_(0) {}
+    ~MockHandler() override {}
+    void HandleIncoming(NamedPipeServer::Request* req) override {
+      LOG(INFO) << "Handle incoming: msg=" << req->request_message();
+      EXPECT_EQ(expect_request_, req->request_message());
+      PlatformThread::Sleep(wait_sec_ * 1000);
+      LOG(INFO) << "reply response: msg=" << reply_;
+      req->SendReply(reply_);
+    }
+
+    void Transaction(const std::string& expect_req,
+                     const std::string& reply) {
+      expect_request_ = expect_req;
+      reply_ = reply;
+    }
+
+    void SetWaitSec(int wait_sec) {
+      wait_sec_ = wait_sec;
+    }
+
+   private:
+    std::string expect_request_;
+    std::string reply_;
+    int wait_sec_;
+  };
+};
+
+TEST(NamedPipeClientTest, Simple) {
+  WorkerThreadManager wm;
+  wm.Start(1);
+
+  std::unique_ptr<NamedPipeClientTest::MockHandler> handler(
+      new NamedPipeClientTest::MockHandler);
+  static const char kReq[] = "POST /e HTTP/1.1\r\n";
+  static const char kResp[] = "HTTP/1.1 200 OK\r\n";
+  handler->Transaction(kReq, kResp);
+
+  LOG(INFO) << "pipe server starts";
+  NamedPipeServer server(&wm, handler.get());
+  static const char kName[] = "named-pipe-client-win-unittest";
+  server.Start(kName);
+
+  LOG(INFO) << "pipe clients starts";
+  NamedPipeFactory factory(kName);
+  ScopedNamedPipe pipe = factory.New();
+  if (!pipe.valid()) {
+    LOG_SYSRESULT(GetLastError());
+  }
+  ASSERT_TRUE(pipe.valid());
+
+  LOG(INFO) << "send message " << kReq;
+  ssize_t num_written = pipe.WriteWithTimeout(kReq, strlen(kReq), 5);
+  EXPECT_EQ(strlen(kReq), num_written);
+
+  LOG(INFO) << "wait for response...";
+  std::string buf;
+  buf.resize(1024);
+  ssize_t num_read = pipe.ReadWithTimeout(&buf[0], buf.size(), 5);
+  EXPECT_EQ(strlen(kResp), num_read);
+  buf.resize(num_read);
+  LOG(INFO) << "response=" << buf;
+  EXPECT_EQ(kResp, buf);
+
+  LOG(INFO) << "pipe server stopping...";
+  server.Stop();
+
+  wm.Finish();
+}
+
+TEST(NamedPipeClientTest, LargeResponse) {
+  WorkerThreadManager wm;
+  wm.Start(1);
+
+  std::unique_ptr<NamedPipeClientTest::MockHandler> handler(
+      new NamedPipeClientTest::MockHandler);
+  static const char kReq[] = "POST /e HTTP/1.1\r\n";
+  std::string resp = "HTTP/1.1 200 OK\r\n";
+  const int kBufsize = 1024;
+  // response is more than kBufsize
+  // but less than kOutputBufSize in named_pipe_server_win.cc (64 * 1024).
+  resp.resize(2 * 1024 + 512);
+  handler->Transaction(kReq, resp);
+
+  LOG(INFO) << "pipe server starts";
+  NamedPipeServer server(&wm, handler.get());
+  static const char kName[] = "named-pipe-client-win-unittest";
+  server.Start(kName);
+
+  LOG(INFO) << "pipe clients starts";
+  NamedPipeFactory factory(kName);
+  ScopedNamedPipe pipe = factory.New();
+  if (!pipe.valid()) {
+    LOG_SYSRESULT(GetLastError());
+  }
+  ASSERT_TRUE(pipe.valid());
+
+  LOG(INFO) << "send message " << kReq;
+  ssize_t num_written = pipe.WriteWithTimeout(kReq, strlen(kReq), 5);
+  EXPECT_EQ(strlen(kReq), num_written);
+
+  LOG(INFO) << "wait for response...";
+  std::string received;
+  for (;;) {
+    LOG(INFO) << "received=" << received.size()
+              << " try read=" << kBufsize;
+    std::string buf;
+    buf.resize(kBufsize);
+    ssize_t num_read = pipe.ReadWithTimeout(&buf[0], buf.size(), 5);
+    if (num_read == 0) {
+      break;
+    }
+    EXPECT_GT(num_read, 0)
+        << "received=" << received.size()
+        << " err=" << num_read;
+    EXPECT_LE(num_read, kBufsize)
+        << "received=" << received.size()
+        << " read=" << num_read;
+    buf.resize(num_read);
+    received += buf;
+    if (received.size() == resp.size()) {
+      break;
+    }
+  }
+  EXPECT_EQ(resp, received);
+
+  LOG(INFO) << "pipe server stopping...";
+  server.Stop();
+
+  wm.Finish();
+}
+
+TEST(NamedPipeClientTest, LargeResponseThanOutputBuffer) {
+  WorkerThreadManager wm;
+  wm.Start(1);
+
+  std::unique_ptr<NamedPipeClientTest::MockHandler> handler(
+      new NamedPipeClientTest::MockHandler);
+  static const char kReq[] = "POST /e HTTP/1.1\r\n";
+  std::string resp = "HTTP/1.1 200 OK\r\n";
+  // response is more than kOutputBufSize
+  // in named_pipe_server_win.cc (128 * 1024).
+  const int kRespBufsize = 130 * 1024;
+  resp.resize(kRespBufsize);
+  handler->Transaction(kReq, resp);
+
+  LOG(INFO) << "pipe server starts";
+  NamedPipeServer server(&wm, handler.get());
+  static const char kName[] = "named-pipe-client-win-unittest";
+  server.Start(kName);
+
+  LOG(INFO) << "pipe clients starts";
+  NamedPipeFactory factory(kName);
+  ScopedNamedPipe pipe = factory.New();
+  if (!pipe.valid()) {
+    LOG_SYSRESULT(GetLastError());
+  }
+  ASSERT_TRUE(pipe.valid());
+
+  LOG(INFO) << "send message " << kReq;
+  ssize_t num_written = pipe.WriteWithTimeout(kReq, strlen(kReq), 5);
+  EXPECT_EQ(strlen(kReq), num_written);
+
+  LOG(INFO) << "wait for response...";
+  std::string received;
+  size_t bufsize = 1024;
+  for (;;) {
+    std::string buf;
+    if (!received.empty()) {
+      bufsize = kRespBufsize - received.size();
+    }
+    buf.resize(bufsize);
+    LOG(INFO) << "received=" << received.size()
+              << " try read=" << bufsize;
+    ssize_t num_read = pipe.ReadWithTimeout(&buf[0], buf.size(), 5);
+    if (num_read == 0) {
+      break;
+    }
+    EXPECT_GT(num_read, 0)
+        << "received=" << received.size()
+        << " err=" << num_read;
+    EXPECT_LE(num_read, bufsize)
+        << "received=" << received.size()
+        << " read=" << num_read;
+    buf.resize(num_read);
+    received += buf;
+    if (received.size() == resp.size()) {
+      break;
+    }
+  }
+  EXPECT_EQ(resp, received);
+
+  LOG(INFO) << "pipe server stopping...";
+  server.Stop();
+
+  wm.Finish();
+}
+
+TEST(NamedPipeClientTest, Timeout) {
+  WorkerThreadManager wm;
+  wm.Start(1);
+
+  std::unique_ptr<NamedPipeClientTest::MockHandler> handler(
+      new NamedPipeClientTest::MockHandler);
+  static const char kReq[] = "POST /e HTTP/1.1\r\n";
+  static const char kResp[] = "HTTP/1.1 200 OK\r\n";
+  handler->Transaction(kReq, kResp);
+  handler->SetWaitSec(5);
+
+  LOG(INFO) << "pipe server starts";
+  NamedPipeServer server(&wm, handler.get());
+  static const char kName[] = "named-pipe-client-win-unittest";
+  server.Start(kName);
+
+  LOG(INFO) << "pipe clients starts";
+  NamedPipeFactory factory(kName);
+  ScopedNamedPipe pipe = factory.New();
+  if (!pipe.valid()) {
+    LOG_SYSRESULT(GetLastError());
+  }
+  ASSERT_TRUE(pipe.valid());
+
+  LOG(INFO) << "send message " << kReq;
+  ssize_t num_written = pipe.WriteWithTimeout(kReq, strlen(kReq), 5);
+  EXPECT_EQ(strlen(kReq), num_written);
+
+  LOG(INFO) << "wait for response...";
+  std::string received;
+  const int kBufsize = 1024;
+  for (;;) {
+    std::string buf;
+    buf.resize(kBufsize);
+    LOG(INFO) << "received=" << received.size()
+              << " try read=" << buf.size();
+    ssize_t num_read = pipe.ReadWithTimeout(&buf[0], buf.size(), 1);
+    if (num_read == 0) {
+      break;
+    }
+    if (num_read == ERR_TIMEOUT) {
+      LOG(INFO) << "error timeout";
+      PlatformThread::Sleep(2 * 1000);
+      continue;
+    }
+    EXPECT_GT(num_read, 0)
+        << "received=" << received.size()
+        << " err=" << num_read;
+    EXPECT_LE(num_read, buf.size())
+        << "received=" << received.size()
+        << " read=" << num_read;
+    buf.resize(num_read);
+    LOG(INFO) << "receive: " << buf;
+    received += buf;
+    if (received.size() == strlen(kResp)) {
+      break;
+    }
+  }
+  EXPECT_EQ(kResp, received);
+
+  LOG(INFO) << "pipe server stopping...";
+  server.Stop();
+
+  wm.Finish();
+}
+
+
+}  // namespace devtools_goma
diff --git a/client/named_pipe_server_win.cc b/client/named_pipe_server_win.cc
new file mode 100644
index 0000000..ce0b7c2
--- /dev/null
+++ b/client/named_pipe_server_win.cc
@@ -0,0 +1,759 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#include "named_pipe_server_win.h"
+
+#include <deque>
+#include <set>
+#include <string>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "config_win.h"
+#include "string_piece.h"
+#include "worker_thread_manager.h"
+
+namespace devtools_goma {
+
+static const int kInputBufSize = 64 * 1024;  // bytes
+static const int kOutputBufSize = 128 * 1024;  // bytes
+static const int kTimeoutMillisec = 50;
+
+class NamedPipeServer::Conn {
+ public:
+  Conn(NamedPipeServer* server, ScopedNamedPipe&& pipe)
+      : server_(server),
+        pipe_(std::move(pipe)),
+        thread_id_(server->wm_->GetCurrentThreadId()),
+        err_(0),
+        written_(0),
+        closed_thread_id_(0) {
+    memset(&overlapped_, 0, sizeof overlapped_);
+    buf_.resize(kInputBufSize);
+    req_.reset(new Req(this));
+    close_watcher_.reset(new CloseWatcher(this));
+  }
+  ~Conn() {
+    // Cancel all pending I/O before delete of this instance.
+    // It is meaningless to proceed pending I/O after the delete,
+    // and also cause use-after-free to execute completion routine.
+    if (pipe_.get()) {
+      if (CancelIo(pipe_.get()) == 0) {
+        LOG_SYSRESULT(GetLastError());
+        LOG(ERROR) << "cancel io failed: " << this;
+      }
+    }
+  }
+
+  Conn(const Conn&) = delete;
+  Conn& operator=(const Conn&) = delete;
+
+  NamedPipeServer::Request* req() const {
+    return req_.get();
+  }
+
+  bool BelongsToCurrentThread() const {
+    return THREAD_ID_IS_SELF(thread_id_);
+  }
+
+  bool Start() {
+    VLOG(1) << "conn start " << this;
+    DCHECK(BelongsToCurrentThread());
+    memset(&overlapped_, 0, sizeof overlapped_);
+    CHECK_EQ(reinterpret_cast<LPOVERLAPPED>(this), &overlapped_);
+    return ReadFileEx(pipe_.get(), &buf_[0], kInputBufSize,
+                      &overlapped_,
+                      &NamedPipeServer::Conn::ReadCompleted) != 0;
+  }
+
+  bool Reply() {
+    VLOG(1) << "conn reply " << this;
+    DCHECK(BelongsToCurrentThread());
+
+    // stop Read detecting EOF.
+    // no need to detect EOF once it starts replying.
+    if (CancelIo(pipe_.get()) == 0) {
+      LOG_SYSRESULT(GetLastError());
+      LOG(ERROR) << "cancel EOF detector " << this;
+    }
+    {
+      AUTOLOCK(lock, &mu_);
+      closed_callback_.reset();
+    }
+    VLOG_IF(1, buf_.size() > kOutputBufSize)
+        << "conn reply too large: size=" << buf_.size();
+    CHECK_EQ(written_, 0) << "conn reply";
+    memset(&overlapped_, 0, sizeof overlapped_);
+    CHECK_EQ(reinterpret_cast<LPOVERLAPPED>(this), &overlapped_);
+    return WriteFileEx(pipe_.get(), &buf_[written_], buf_.size() - written_,
+                       &overlapped_,
+                       &NamedPipeServer::Conn::WriteCompleted) != 0;
+  }
+
+  void WatchClosed() {
+    DCHECK(BelongsToCurrentThread());
+    {
+      AUTOLOCK(lock, &mu_);
+      if (closed_callback_ == nullptr) {
+        // WatchClosed might be called after Reply.
+        // no need to start close_watcher_.
+        return;
+      }
+    }
+    close_watcher_->Run();
+  }
+
+  void Flush() {
+    if (FlushFileBuffers(pipe_.get()) == 0) {
+      LOG_SYSRESULT(GetLastError());
+      LOG(ERROR) << "conn failed to flush " << this;
+    }
+  }
+
+  DWORD error() const { return err_; }
+
+ private:
+  class Req : public NamedPipeServer::Request {
+   public:
+    explicit Req(Conn* conn) : conn_(conn) {}
+    ~Req() override {}
+
+    Req(const Req&) = delete;
+    Req& operator=(const Req&) = delete;
+
+    StringPiece request_message() const override {
+      return conn_->request_message_;
+    }
+    void SendReply(StringPiece reply) override {
+      conn_->SendReply(reply);
+    }
+    void NotifyWhenClosed(OneshotClosure* callback) override {
+      conn_->NotifyWhenClosed(callback);
+    }
+
+   private:
+    Conn* conn_;
+  };
+
+  class CloseWatcher {
+   public:
+    explicit CloseWatcher(Conn* conn) : conn_(conn) {}
+    ~CloseWatcher() {}
+
+    CloseWatcher(const CloseWatcher&) = delete;
+    CloseWatcher& operator=(const CloseWatcher&) = delete;
+
+    void Run() {
+      memset(&overlapped_, 0, sizeof overlapped_);
+      // start Read and if it got error, fire close notifier.
+      CHECK_EQ(reinterpret_cast<LPOVERLAPPED>(this), &overlapped_);
+      if (ReadFileEx(conn_->pipe_.get(), eofBuf_, sizeof eofBuf_,
+                     &overlapped_,
+                     &NamedPipeServer::Conn::CloseWatcher::EOFDetected) == 0) {
+        DWORD err = GetLastError();
+        if (err == ERROR_HANDLE_EOF) {
+          NotifyClosed(err, 0);
+          return;
+        }
+        LOG_SYSRESULT(err);
+        LOG(ERROR) << "conn failed to setup eof detector " << this;
+      }
+    }
+
+   private:
+    static void EOFDetected(
+        DWORD err, DWORD num_bytes, LPOVERLAPPED overlapped) {
+      VLOG(1) << "EOFDetected err=" << err
+              << " num_bytes=" << num_bytes;
+      CloseWatcher* cw = reinterpret_cast<CloseWatcher*>(overlapped);
+      cw->NotifyClosed(err, num_bytes);
+    }
+
+    void NotifyClosed(DWORD err, DWORD num_bytes) {
+      if (err == 0) {
+        if (GetOverlappedResult(conn_->pipe_.get(), &overlapped_,
+                                &num_bytes, FALSE) == 0) {
+          LOG_SYSRESULT(GetLastError());
+          LOG(ERROR) << "conn close watcher error";
+        }
+        err = GetLastError();
+      }
+      conn_->NotifyClosed(err, num_bytes);
+    }
+
+    OVERLAPPED overlapped_;
+    Conn* conn_;
+    char eofBuf_[1];
+  };
+
+  void SendReply(StringPiece reply) {
+    buf_ = string(reply);
+    server_->ReadyToReply(this);
+  }
+
+  void NotifyWhenClosed(OneshotClosure* callback) {
+    CHECK(callback != nullptr);
+    {
+      AUTOLOCK(lock, &mu_);
+      CHECK(closed_callback_ == nullptr);
+      closed_callback_.reset(callback);
+      closed_thread_id_ = server_->wm_->GetCurrentThreadId();
+    }
+    server_->NotifyWhenClosed(this);
+  }
+
+  static void ReadCompleted(
+      DWORD err, DWORD num_bytes, LPOVERLAPPED overlapped) {
+    VLOG(1) << "ReadCompleted err=" << err
+            << " num_bytes=" << num_bytes;
+    Conn* conn = reinterpret_cast<Conn*>(overlapped);
+    conn->ReadDone(err, num_bytes);
+  }
+
+  static void WriteCompleted(
+      DWORD err, DWORD num_bytes, LPOVERLAPPED overlapped) {
+    VLOG(1) << "WriteCompleted err=" << err
+            << " num_bytes=" << num_bytes;
+    Conn* conn = reinterpret_cast<Conn*>(overlapped);
+    conn->WriteDone(err, num_bytes);
+  }
+
+  void ReadDone(DWORD err, DWORD num_bytes) {
+    DCHECK(BelongsToCurrentThread());
+    err_ = err;
+    if (num_bytes >= 0) {
+      request_message_ = StringPiece(buf_.data(), num_bytes);
+    }
+    server_->ReadDone(this);
+  }
+
+  void NotifyClosed(DWORD err, DWORD num_bytes) {
+    DCHECK(BelongsToCurrentThread());
+    if (err == ERROR_OPERATION_ABORTED) {
+      // I/O operation were canceled.  No need to notify.
+      return;
+    }
+    LOG(INFO) << "named pipe closed. err=" << err;
+    err_ = err;
+    server_->Closed(this);
+    OneshotClosure* callback = nullptr;
+    WorkerThreadManager::ThreadId thread_id;
+    {
+      AUTOLOCK(lock, &mu_);
+      callback = closed_callback_.release();
+      thread_id = closed_thread_id_;
+    }
+    if (callback != nullptr) {
+      CHECK_NE(thread_id, 0U);
+      server_->wm_->RunClosureInThread(
+          FROM_HERE,
+          thread_id,
+          NewCallback(static_cast<Closure*>(callback), &Closure::Run),
+          WorkerThreadManager::PRIORITY_HIGH);
+    }
+  }
+
+  void WriteDone(DWORD err, DWORD num_bytes) {
+    DCHECK(BelongsToCurrentThread());
+    err_ = err;
+    if (err == 0) {
+      BOOL r = false;
+      if (GetOverlappedResult(pipe_.get(), &overlapped_,
+                              &num_bytes, FALSE)) {
+        if (num_bytes > 0) {
+          written_ += num_bytes;
+          if (written_ == buf_.size()) {
+            server_->WriteDone(this);
+            return;
+          }
+          CHECK_LT(written_, buf_.size()) << "conn write overrun?";
+          memset(&overlapped_, 0, sizeof overlapped_);
+          CHECK_EQ(reinterpret_cast<LPOVERLAPPED>(this), &overlapped_);
+          r = WriteFileEx(pipe_.get(),
+                          &buf_[written_], buf_.size() - written_,
+                          &overlapped_,
+                          &NamedPipeServer::Conn::WriteCompleted);
+          if (r != 0) {
+            return;
+          }
+        }
+        LOG(ERROR) << "conn write num_bytes=" << num_bytes
+                   << " written=" << written_
+                   << " WriteFileEx=" << r;
+      }
+      err = GetLastError();
+      if (err == ERROR_IO_PENDING) {
+        // never happens?
+        return;
+      }
+      err_ = err;
+    }
+    LOG_SYSRESULT(err);
+    LOG(ERROR) << "conn write done error err=" << err
+               << " num_bytes=" << num_bytes
+               << " buf_size=" << buf_.size()
+               << " written=" << written_;
+    server_->WriteDone(this);
+  }
+
+  OVERLAPPED overlapped_;  // should be initial member at offset 0.
+  NamedPipeServer* server_;
+  ScopedNamedPipe pipe_;
+  WorkerThreadManager::ThreadId thread_id_;
+  DWORD err_;
+  std::string buf_;
+  StringPiece request_message_;
+  size_t written_;
+
+  Lock mu_;  // protect closed_thread_id_ and closed_callback_
+  WorkerThreadManager::ThreadId closed_thread_id_;
+  std::unique_ptr<OneshotClosure> closed_callback_;
+
+  std::unique_ptr<Req> req_;
+  std::unique_ptr<CloseWatcher> close_watcher_;
+};
+
+NamedPipeServer::~NamedPipeServer() {
+  CHECK(!ready_.valid());
+  CHECK(!watch_closed_.valid());
+  CHECK(!reply_.valid());
+  CHECK(!shutdown_.valid());
+  CHECK(!done_.valid());
+  CHECK(!flush_.valid());
+  CHECK(!flusher_done_.valid());
+  CHECK(actives_.empty());
+  CHECK(replies_.empty());
+  CHECK(finished_.empty());
+  CHECK(flushes_.empty());
+}
+
+void NamedPipeServer::Start(const std::string& name) {
+  LOG(INFO) << "Start for " << name;
+  ready_.reset(CreateEvent(nullptr, TRUE, FALSE, nullptr));
+  if (!ready_.valid()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to create event for ready";
+  }
+  watch_closed_.reset(CreateEvent(nullptr, FALSE, FALSE, nullptr));
+  if (!watch_closed_.valid()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to create event for watch_closed";
+  }
+  reply_.reset(CreateEvent(nullptr, FALSE, FALSE, nullptr));
+  if (!reply_.valid()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to create event for reply";
+  }
+  shutdown_.reset(CreateEvent(nullptr, TRUE, FALSE, nullptr));
+  if (!shutdown_.valid()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to create event for shutdown";
+  }
+  done_.reset(CreateEvent(nullptr, TRUE, FALSE, nullptr));
+  if (!done_.valid()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to create event for done";
+  }
+
+  flush_.reset(CreateEvent(nullptr, FALSE, FALSE, nullptr));
+  if (!flush_.valid()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to create event for flush";
+  }
+
+  flusher_done_.reset(CreateEvent(nullptr, FALSE, FALSE, nullptr));
+  if (!flusher_done_.valid()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to create event for flusher";
+  }
+  wm_->NewThread(NewCallback(this, &NamedPipeServer::Flusher),
+                 "pipe_flusher");
+
+  wm_->NewThread(NewCallback(this, &NamedPipeServer::Run, name),
+                 "pipe_server");
+
+  DWORD w = WaitForSingleObject(ready_.handle(), 10*1000); // 10 secs timeout
+  if (w != WAIT_OBJECT_0) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to wait for ready: w=" << w;
+  }
+}
+
+void NamedPipeServer::Stop() {
+  LOG(INFO) << "Stop";
+  if (!shutdown_.valid() || !done_.valid()) {
+    LOG(INFO) << "not running?";
+    return;
+  }
+  {
+    AUTOLOCK(lock, &mu_);
+    shutting_down_ = true;
+  }
+  if (!SetEvent(shutdown_.handle())) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to signal shutdown";
+  }
+  bool finished = false;
+  HANDLE events[2];
+  events[0] = done_.handle();
+  events[1] = flusher_done_.handle();
+  while (!finished) {
+    DWORD w = WaitForMultipleObjectsEx(
+        2, events,
+        TRUE,  // wait all,
+        INFINITE,
+        TRUE);
+    switch (w) {
+      case WAIT_OBJECT_0:  FALLTHROUGH_INTENDED;
+      case WAIT_OBJECT_0 + 1:
+        finished = true;
+        break;
+      case WAIT_IO_COMPLETION:
+        continue;
+      default:
+        LOG_SYSRESULT(GetLastError());
+        LOG(FATAL) << "Failed to wait for done: w=" << w;
+    }
+  }
+  LOG(INFO) << "done";
+  ready_.Close();
+  watch_closed_.Close();
+  reply_.Close();
+  shutdown_.Close();
+  done_.Close();
+  flush_.Close();
+  flusher_done_.Close();
+
+  std::set<Conn*> conns;
+  {
+    AUTOLOCK(lock, &mu_);
+    conns.insert(actives_.begin(), actives_.end());
+    actives_.clear();
+    conns.insert(replies_.begin(), replies_.end());
+    replies_.clear();
+    conns.insert(finished_.begin(), finished_.end());
+    finished_.clear();
+    conns.insert(flushes_.begin(), flushes_.end());
+    flushes_.clear();
+  }
+  for (const auto* conn : conns) {
+    delete conn;
+  }
+}
+
+void NamedPipeServer::ReadyToReply(Conn* conn) {
+  {
+    AUTOLOCK(lock, &mu_);
+    actives_.erase(conn);
+    watches_.erase(conn);
+    if (shutting_down_) {
+      LOG(WARNING) << "will not update replies_ because shutting down.";
+      delete conn;
+      return;
+    } else {
+      replies_.push_back(conn);
+    }
+  }
+  if (!SetEvent(reply_.handle())) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to signal reply";
+  }
+}
+
+void NamedPipeServer::NotifyWhenClosed(Conn* conn) {
+  {
+    AUTOLOCK(lock, &mu_);
+    watches_.insert(conn);
+  }
+  if (!SetEvent(watch_closed_.handle())) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to signal watch_closed";
+  }
+}
+
+void NamedPipeServer::Run(std::string name) {
+  thread_id_ = wm_->GetCurrentThreadId();
+  std::string pipename = "\\\\.\\pipe\\" + name;
+  LOG(INFO) << "Run pipe=" << pipename;
+
+  ScopedFd connected(CreateEvent(
+      nullptr,  // default security attribute
+      TRUE,  // manual reset event
+      TRUE,  // initial state = signaled
+      nullptr));
+  if (!connected.valid()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to create event for connect";
+  }
+
+  OVERLAPPED o_connect;
+  o_connect.hEvent = connected.handle();
+  bool is_pending = NewPipe(pipename, &o_connect);
+
+  if (!SetEvent(ready_.handle())) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(FATAL) << "Failed to signal ready";
+  }
+  LOG(INFO) << "pipe=" << pipename << " ready";
+  HANDLE events[4];
+  events[0] = connected.handle();
+  events[1] = watch_closed_.handle();
+  events[2] = reply_.handle();
+  events[3] = shutdown_.handle();
+  for (;;) {
+    DWORD w = WaitForMultipleObjectsEx(
+        4, events,
+        FALSE,  // wait all
+        INFINITE,
+        TRUE);
+    switch (w) {
+      case WAIT_OBJECT_0:  // connected
+        if (is_pending) {
+          DWORD num_bytes = 0;
+          BOOL ok = GetOverlappedResult(
+              pipe_.get(),
+              &o_connect,
+              &num_bytes,
+              FALSE);
+          if (!ok) {
+            LOG_SYSRESULT(GetLastError());
+            LOG(ERROR) << "Failed to GetOverlappedResult for connect";
+            return;
+          }
+        }
+        if (pipe_.valid()) {
+          VLOG(1) << "connected";
+          Conn* conn = new Conn(this, std::move(pipe_));
+          {
+            AUTOLOCK(lock, &mu_);
+            actives_.insert(conn);
+          }
+          if (!conn->Start()) {
+            LOG(ERROR) << "conn start failed";
+            {
+              AUTOLOCK(lock, &mu_);
+              actives_.erase(conn);
+            }
+            delete conn;
+          }
+        }
+        is_pending = NewPipe(pipename, &o_connect);
+        VLOG(1) << "new pipe is_pending=" << is_pending;
+        break;
+
+      case WAIT_OBJECT_0 + 1:  // watch closed
+        VLOG(1) << "watch closed";
+        ProcessWatchClosed();
+        break;
+
+      case WAIT_OBJECT_0 + 2:  // ready to reply
+        VLOG(1) << "ready to reply";
+        ProcessReplies();
+        break;
+
+      case WAIT_OBJECT_0 + 3:
+        LOG(INFO) << "shutting down";
+        if (CancelIo(pipe_.get()) == 0) {
+          LOG_SYSRESULT(GetLastError());
+          LOG(ERROR) << "cancel connect named pipe";
+        }
+        if (!SetEvent(done_.handle())) {
+          LOG_SYSRESULT(GetLastError());
+          LOG(FATAL) << "Failed to signal done";
+        }
+        return;
+
+      case WAIT_IO_COMPLETION:
+        VLOG(2) << "io completion";
+        // The wait is satisfied by a completed read or write operation.
+        // This allows the system to execute the completion routine.
+        break;
+
+      default:
+        LOG_SYSRESULT(GetLastError());
+        LOG(FATAL) << "WaitForMultipleObjectsEx";
+        return;
+    }
+  }
+}
+
+bool NamedPipeServer::NewPipe(
+    const std::string& pipename, OVERLAPPED* overlapped) {
+  DCHECK(THREAD_ID_IS_SELF(thread_id_));
+
+  pipe_ = ScopedNamedPipe(
+      CreateNamedPipeA(pipename.c_str(),
+                       PIPE_ACCESS_DUPLEX |
+                         FILE_FLAG_OVERLAPPED,
+                       PIPE_TYPE_BYTE |
+                         PIPE_READMODE_BYTE |
+                         PIPE_WAIT |
+                         PIPE_REJECT_REMOTE_CLIENTS,
+                       PIPE_UNLIMITED_INSTANCES,
+                       kOutputBufSize,
+                       kInputBufSize,
+                       kTimeoutMillisec,
+                       nullptr));  // TODO: set security attributes.
+  if (!pipe_.valid()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to open pipe " << pipename;
+    return false;
+  }
+
+  if (ConnectNamedPipe(pipe_.get(), overlapped)) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to ConnectNamedPipe";
+    return false;
+  }
+  switch (GetLastError()) {
+    case ERROR_IO_PENDING:
+      // overlapped connection in progress.
+      return true;
+
+    case ERROR_PIPE_CONNECTED:
+      // client is already connected, signal.
+      if (SetEvent(overlapped->hEvent)) {
+        break;
+      }
+      // FALLTHROUGH
+    default:
+      LOG_SYSRESULT(GetLastError());
+      LOG(ERROR) << "Failed to ConnectNamedPipe";
+  }
+  return false;
+}
+
+void NamedPipeServer::ReadDone(Conn* conn) {
+  VLOG(1) << "ReadDone err=" << conn->error();
+  DCHECK(THREAD_ID_IS_SELF(thread_id_));
+  if (conn->error() != 0) {
+    LOG(ERROR) << "Read error:" << conn->error();
+    {
+      AUTOLOCK(lock, &mu_);
+      actives_.erase(conn);
+    }
+    delete conn;
+    return;
+  }
+  wm_->RunClosure(FROM_HERE,
+                  NewCallback(handler_,
+                              &NamedPipeServer::Handler::HandleIncoming,
+                              conn->req()),
+                  WorkerThreadManager::PRIORITY_HIGH);
+}
+
+void NamedPipeServer::ProcessWatchClosed() {
+  VLOG(1) << "ProcessWatchClosed";
+  DCHECK(THREAD_ID_IS_SELF(thread_id_));
+  std::set<Conn*> watches;
+  {
+    AUTOLOCK(lock, &mu_);
+    watches.swap(watches_);
+  }
+  for (auto& conn : watches) {
+    VLOG(1) << "process watch conn=" << conn;
+    conn->WatchClosed();
+  }
+}
+
+void NamedPipeServer::ProcessReplies() {
+  VLOG(1) << "ProcessReplies";
+  DCHECK(THREAD_ID_IS_SELF(thread_id_));
+  std::deque<Conn*> replies;
+  {
+    AUTOLOCK(lock, &mu_);
+    replies.swap(replies_);
+  }
+  for (auto& conn : replies) {
+    VLOG(1) << "process reply conn=" << conn;
+    if (!conn->Reply()) {
+      LOG_SYSRESULT(GetLastError());
+      LOG(WARNING) << "Reply error";
+      {
+        AUTOLOCK(lock, &mu_);
+        CHECK_EQ(watches_.count(conn), 0U);
+      }
+      delete conn;
+    } else {
+      AUTOLOCK(lock, &mu_);
+      finished_.insert(conn);
+    }
+  }
+}
+
+void NamedPipeServer::Closed(Conn* conn) {
+  DCHECK(THREAD_ID_IS_SELF(thread_id_));
+  VLOG(1) << "Closed";
+  AUTOLOCK(lock, &mu_);
+  actives_.erase(conn);
+}
+
+void NamedPipeServer::WriteDone(Conn* conn) {
+  DCHECK(THREAD_ID_IS_SELF(thread_id_));
+  VLOG(1) << "WriteDone";
+  {
+    AUTOLOCK(lock, &mu_);
+    CHECK_EQ(watches_.count(conn), 0U);
+    finished_.erase(conn);
+    flushes_.insert(conn);
+  }
+  if (!SetEvent(flush_.handle())) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to signal flush";
+  }
+}
+
+void NamedPipeServer::Flusher() {
+  LOG(INFO) << "Run flusher";
+
+  HANDLE events[2];
+  events[0] = flush_.handle();
+  events[1] = shutdown_.handle();
+  for (;;) {
+    DWORD w = WaitForMultipleObjectsEx(
+        2, events,
+        FALSE,  // wait all
+        INFINITE,
+        TRUE);
+    switch (w) {
+      case WAIT_OBJECT_0:  // flush
+        ProcessFlushes();
+        break;
+
+      case WAIT_OBJECT_0 + 1:  // shutdown
+        LOG(INFO) << "shutting down";
+        if (!SetEvent(flusher_done_.handle())) {
+          LOG_SYSRESULT(GetLastError());
+          LOG(FATAL) << "Failed to signal done";
+        }
+        return;
+      case WAIT_IO_COMPLETION:
+        break;
+      default:
+        LOG_SYSRESULT(GetLastError());
+        LOG(FATAL) << "WaitForMultipleObjectsEx";
+        return;
+    }
+  }
+}
+
+void NamedPipeServer::ProcessFlushes() {
+  VLOG(1) << "ProcessFlushes";
+  std::set<Conn*> flushes;
+  {
+    AUTOLOCK(lock, &mu_);
+    flushes.swap(flushes_);
+  }
+  for (auto& conn : flushes) {
+    VLOG(1) << "process flush conn=" << conn;
+    conn->Flush();
+    {
+      AUTOLOCK(lock, &mu_);
+      CHECK_EQ(watches_.count(conn), 0U);
+      CHECK_EQ(finished_.count(conn), 0U);
+    }
+    delete conn;
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/named_pipe_server_win.h b/client/named_pipe_server_win.h
new file mode 100644
index 0000000..d027d1c
--- /dev/null
+++ b/client/named_pipe_server_win.h
@@ -0,0 +1,106 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_NAMED_PIPE_SERVER_WIN_H_
+#define DEVTOOLS_GOMA_CLIENT_NAMED_PIPE_SERVER_WIN_H_
+
+#ifdef _WIN32
+
+#include <deque>
+#include <memory>
+#include <set>
+#include <string>
+
+#include <AccCtrl.h>
+#include <Aclapi.h>
+
+#include "lockhelper.h"
+#include "named_pipe_win.h"
+#include "string_piece.h"
+#include "worker_thread_manager.h"
+
+namespace devtools_goma {
+
+class OneshotClosure;
+
+// NamedPipe server that handle request-response communication like HTTP.
+// Each message can't exceed 64KB.
+class NamedPipeServer {
+ public:
+  class Request {
+   public:
+    virtual ~Request() {}
+    virtual StringPiece request_message() const = 0;
+    virtual void SendReply(StringPiece reply) = 0;
+    virtual void NotifyWhenClosed(OneshotClosure* callback) = 0;
+  };
+  class Handler {
+   public:
+    virtual ~Handler() {}
+    virtual void HandleIncoming(Request* req) = 0;
+  };
+  NamedPipeServer(WorkerThreadManager* wm,
+                  Handler* handler)
+      : wm_(wm),
+        thread_id_(0),
+        handler_(handler),
+        shutting_down_(false) {
+  }
+  ~NamedPipeServer();
+
+  NamedPipeServer(const NamedPipeServer&) = delete;
+  NamedPipeServer(NamedPipeServer&&) = delete;
+  NamedPipeServer& operator=(const NamedPipeServer&) = delete;
+  NamedPipeServer& operator=(NamedPipeServer&&) = delete;
+
+  void Start(const std::string& name);
+
+  void Stop();
+
+ private:
+  class Conn;
+  friend class Conn;
+
+  void NotifyWhenClosed(Conn* conn);
+  void ReadyToReply(Conn* conn);
+  void Run(std::string name);
+
+  bool NewPipe(const std::string& name, OVERLAPPED* overlapped);
+  void ReadDone(Conn* conn);
+  void ProcessWatchClosed();
+  void ProcessReplies();
+  void Closed(Conn* conn);
+  void WriteDone(Conn* conn);
+
+  void Flusher();
+  void ProcessFlushes();
+
+  WorkerThreadManager* wm_;
+  WorkerThreadManager::ThreadId thread_id_;  // for Run
+  Handler* handler_;
+
+  ScopedNamedPipe pipe_;
+
+  ScopedFd ready_;
+  ScopedFd watch_closed_;
+  ScopedFd reply_;
+  ScopedFd shutdown_;
+  ScopedFd done_;
+  ScopedFd flush_;
+  ScopedFd flusher_done_;
+
+  Lock mu_;
+  std::set<Conn*> actives_;
+  std::set<Conn*> watches_;
+  std::deque<Conn*> replies_;
+  std::set<Conn*> finished_;
+  std::set<Conn*> flushes_;
+  bool shutting_down_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // _WIN32
+
+#endif  // DEVTOOLS_GOMA_CLIENT_NAMED_PIPE_SERVER_WIN_H_
diff --git a/client/named_pipe_server_win_unittest.cc b/client/named_pipe_server_win_unittest.cc
new file mode 100644
index 0000000..a6da7c2
--- /dev/null
+++ b/client/named_pipe_server_win_unittest.cc
@@ -0,0 +1,99 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "named_pipe_server_win.h"
+
+#include <string>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "named_pipe_win.h"
+#include "worker_thread_manager.h"
+
+namespace devtools_goma {
+
+class NamedPipeServerTest : public ::testing::Test {
+ public:
+  class MockHandler : public NamedPipeServer::Handler {
+   public:
+    ~MockHandler() override {}
+    void HandleIncoming(NamedPipeServer::Request* req) override {
+      LOG(INFO) << "Handle incoming: msg=" << req->request_message();
+      EXPECT_EQ(expect_request_, req->request_message());
+      req->SendReply(reply_);
+    }
+
+    void Transaction(const std::string& expect_req,
+                     const std::string& reply) {
+      expect_request_ = expect_req;
+      reply_ = reply;
+    }
+
+   private:
+    std::string expect_request_;
+    std::string reply_;
+  };
+};
+
+TEST(NamedPipeServerTest, Simple) {
+  WorkerThreadManager wm;
+  wm.Start(1);
+
+  std::unique_ptr<NamedPipeServerTest::MockHandler> handler(
+      new NamedPipeServerTest::MockHandler);
+  static const char kReq[] = "POST /e HTTP/1.1\r\n";
+  static const char kResp[] = "HTTP/1.1 200 OK\r\n";
+  handler->Transaction(kReq, kResp);
+
+  LOG(INFO) << "pipe server starts";
+  NamedPipeServer server(&wm, handler.get());
+  server.Start("named-pipe-server-win-unittest");
+
+  LOG(INFO) << "pipe clients starts";
+  ScopedNamedPipe pipe(
+      CreateFileA("\\\\.\\pipe\\named-pipe-server-win-unittest",
+                  GENERIC_READ | GENERIC_WRITE,
+                  0,
+                  nullptr,
+                  OPEN_EXISTING,
+                  0,
+                  nullptr));
+  if (!pipe.valid()) {
+    LOG_SYSRESULT(GetLastError());
+  }
+  ASSERT_TRUE(pipe.valid());
+
+  LOG(INFO) << "pipe opened";
+
+  LOG(INFO) << "send message " << kReq;
+  DWORD num_bytes = 0;
+  if (!WriteFile(pipe.get(), kReq, strlen(kReq), &num_bytes, nullptr)) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to WriteFile to pipe";
+    GTEST_FAIL();
+  }
+  EXPECT_EQ(strlen(kReq), num_bytes);
+
+  LOG(INFO) << "wait for response...";
+  num_bytes = 0;
+  std::string buf;
+  buf.resize(1024);
+  if (!ReadFile(pipe.get(), &buf[0], buf.size(), &num_bytes, nullptr)) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to ReadFile from pipe";
+    GTEST_FAIL();
+  }
+  EXPECT_EQ(strlen(kResp), num_bytes);
+  buf.resize(num_bytes);
+  LOG(INFO) << "response=" << buf;
+  EXPECT_EQ(kResp, buf);
+
+  LOG(INFO) << "pipe server stopping...";
+  server.Stop();
+
+  wm.Finish();
+}
+
+}  // namespace devtools_goma
diff --git a/client/named_pipe_win.cc b/client/named_pipe_win.cc
new file mode 100644
index 0000000..6e553e8
--- /dev/null
+++ b/client/named_pipe_win.cc
@@ -0,0 +1,184 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "named_pipe_win.h"
+
+#include <glog/logging.h>
+
+#include "simple_timer.h"
+
+namespace devtools_goma {
+
+ScopedNamedPipe::~ScopedNamedPipe() {
+  Close();
+}
+
+void ScopedNamedPipe::StreamWrite(std::ostream& os) const {
+  os << handle_;
+}
+
+ssize_t ScopedNamedPipe::Read(void* ptr, size_t len) const {
+  DWORD bytes_read = 0;
+  if (!ReadFile(handle_, ptr, len, &bytes_read, nullptr)) {
+    LOG_SYSRESULT(GetLastError());
+    return -1;
+  }
+  return bytes_read;
+}
+
+ssize_t ScopedNamedPipe::Write(const void* ptr, size_t len) const {
+  DWORD bytes_written = 0;
+  if (!WriteFile(handle_, ptr, len, &bytes_written, nullptr)) {
+    LOG_SYSRESULT(GetLastError());
+    return -1;
+  }
+  return bytes_written;
+}
+
+namespace {
+
+void IOCompletionRoutine(
+    DWORD error_code, DWORD num_bytes, LPOVERLAPPED overlapped) {
+}
+
+ssize_t WaitAsyncOp(HANDLE handle, ssize_t bufsize,
+                    LPOVERLAPPED op, int timeout_sec) {
+  int timeout_millisec = timeout_sec*1000;
+  SimpleTimer t;
+  DWORD w = ERROR_TIMEOUT;
+  while (timeout_millisec >= 0) {
+    t.Start();
+    w = WaitForSingleObjectEx(handle, timeout_millisec, TRUE);
+    switch (w) {
+      case WAIT_OBJECT_0:
+        timeout_millisec -= t.GetInMs();
+        {
+          DWORD num_bytes = 0;
+          if (GetOverlappedResult(handle, op, &num_bytes, FALSE)) {
+            return num_bytes;
+          }
+          DWORD err = GetLastError();
+          if (err == ERROR_IO_INCOMPLETE) {
+            continue;
+          }
+          if (err == ERROR_MORE_DATA) {
+            return bufsize;
+          }
+          LOG_SYSRESULT(err);  // async op's error.
+          return FAIL;
+        }
+
+      case WAIT_IO_COMPLETION:
+        timeout_millisec -= t.GetInMs();
+        continue;
+
+      case WAIT_TIMEOUT:
+        break;
+
+      default:
+        LOG_SYSRESULT(GetLastError());
+        LOG(ERROR) << "wait AsyncOp w=" << w;
+        break;
+
+    }
+    break;
+  }
+  if (CancelIo(handle) == 0) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "cancel by timeout";
+  }
+  DWORD num_bytes = 0;
+  if (GetOverlappedResult(handle, op, &num_bytes, TRUE) != 0) {
+    // partially completed?
+    return num_bytes;
+  }
+  DWORD err = GetLastError();
+  CHECK_NE(err, static_cast<DWORD>(ERROR_IO_INCOMPLETE))
+      << "GetOverlappedResult with bWait=TRUE should not result "
+      << "in ERROR_IO_INCOMPLETE";
+  switch (err) {
+    case ERROR_MORE_DATA:
+      // io completed before CancelIo?
+      return bufsize;
+    case ERROR_OPERATION_ABORTED:
+      // io cancelled by CancelIo.
+      break;
+    default:
+      // unexpected error?
+      LOG_SYSRESULT(err);
+      LOG(ERROR) << "cancel result error=" << err;
+  }
+  if (w == WAIT_TIMEOUT) {
+    return ERR_TIMEOUT;
+  }
+  return FAIL;
+}
+
+}  // anonymous namespace
+
+ssize_t ScopedNamedPipe::ReadWithTimeout(
+    char* buf, size_t bufsize, int timeout_sec) const {
+  OVERLAPPED op;
+  memset(&op, 0, sizeof op);
+  BOOL ret = ReadFileEx(handle_, buf, bufsize, &op,
+                        &IOCompletionRoutine);
+  if (!ret) {
+    LOG_SYSRESULT(GetLastError());
+    return FAIL;
+  }
+  return WaitAsyncOp(handle_, bufsize, &op, timeout_sec);
+}
+
+ssize_t ScopedNamedPipe::WriteWithTimeout(
+    const char* buf, size_t bufsize, int timeout_sec) const {
+  OVERLAPPED op;
+  memset(&op, 0, sizeof op);
+  BOOL ret = WriteFileEx(handle_, buf, bufsize, &op,
+                         &IOCompletionRoutine);
+  if (!ret) {
+    LOG_SYSRESULT(GetLastError());
+    return FAIL;
+  }
+  return WaitAsyncOp(handle_, bufsize, &op, timeout_sec);
+}
+
+int ScopedNamedPipe::WriteString(StringPiece message, int timeout) const {
+  const char* p = message.data();
+  int size = message.size();
+  while (size > 0) {
+    int ret = WriteWithTimeout(p, size, timeout);
+    if (ret < 0) {
+      LOG(ERROR) << "write failure: " << ret
+                 << " writen=" << (message.size() - size)
+                 << " size=" << size
+                 << " out of " << message.size();
+      return ret;
+    }
+    p += ret;
+    size -= ret;
+  }
+  return OK;
+}
+
+std::string ScopedNamedPipe::GetLastErrorMessage() const {
+  char message[1024];
+  FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr,
+                 GetLastError(), 0,
+                 message, sizeof(message), nullptr);
+  return message;
+}
+
+void ScopedNamedPipe::reset(HANDLE handle) {
+  Close();
+  handle_ = handle;
+}
+
+bool ScopedNamedPipe::Close() {
+  if (valid()) {
+    return CloseHandle(release()) == TRUE;
+  }
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/named_pipe_win.h b/client/named_pipe_win.h
new file mode 100644
index 0000000..ab0b218
--- /dev/null
+++ b/client/named_pipe_win.h
@@ -0,0 +1,71 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_NAMED_PIPE_WIN_H_
+#define DEVTOOLS_GOMA_CLIENT_NAMED_PIPE_WIN_H_
+
+#ifdef _WIN32
+
+#include <ostream>
+#include <string>
+
+#include "config_win.h"
+#include "scoped_fd.h"
+#include "string_piece.h"
+
+namespace devtools_goma {
+
+class ScopedNamedPipe : public IOChannel {
+ public:
+  ScopedNamedPipe() : handle_(INVALID_HANDLE_VALUE) {}
+  ScopedNamedPipe(ScopedNamedPipe&& other) : handle_(other.release()) {}
+  explicit ScopedNamedPipe(HANDLE handle) : handle_(handle) {}
+  ~ScopedNamedPipe() override;
+
+  ScopedNamedPipe& operator=(ScopedNamedPipe&& other) {
+    if (this == &other) {
+      return *this;
+    }
+    reset(other.release());
+    return *this;
+  }
+
+  ScopedNamedPipe(const ScopedNamedPipe&) = delete;
+  ScopedNamedPipe& operator=(const ScopedNamedPipe&) = delete;
+
+  void StreamWrite(std::ostream& os) const override;
+
+  ssize_t Read(void* ptr, size_t len) const override;
+  ssize_t Write(const void* ptr, size_t len) const override;
+  ssize_t ReadWithTimeout(char* buf,
+                          size_t bufsize,
+                          int timeout_sec) const override;
+  ssize_t WriteWithTimeout(const char* buf,
+                           size_t bufsize,
+                           int timeout_sec) const override;
+  int WriteString(StringPiece message, int timeout) const override;
+
+  bool is_secure() const override { return true; }
+
+  std::string GetLastErrorMessage() const override;
+
+  bool valid() const { return handle_ != INVALID_HANDLE_VALUE; }
+  HANDLE get() { return handle_; }
+  HANDLE release() {
+    HANDLE handle = handle_;
+    handle_ = INVALID_HANDLE_VALUE;
+    return handle;
+  }
+  void reset(HANDLE handle);
+  bool Close();
+
+ private:
+  HANDLE handle_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // _WIN32
+
+#endif  // DEVTOOLS_GOMA_CLIENT_NAMED_PIPE_WIN_H_
diff --git a/client/oauth2.cc b/client/oauth2.cc
new file mode 100644
index 0000000..8db3cb6
--- /dev/null
+++ b/client/oauth2.cc
@@ -0,0 +1,235 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "oauth2.h"
+
+#include <sstream>
+
+#include "file_helper.h"
+#include "glog/logging.h"
+#include "ioutil.h"
+#include "json/json.h"
+#include "json_util.h"
+#include "strutil.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+bool ParseOAuth2AccessToken(const string& json,
+                            string* token_type,
+                            string* access_token,
+                            int* expires_in) {
+  static const char kAccessToken[] = "access_token";
+  static const char kTokenType[] = "token_type";
+  static const char kExpiresIn[] = "expires_in";
+
+  Json::Reader reader;
+  Json::Value root;
+  if (!reader.parse(json, root, false)) {
+    LOG(WARNING) << "invalid json";
+    return false;
+  }
+
+  string err;
+  if (!GetStringFromJson(root, kAccessToken, access_token, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+  if (!GetStringFromJson(root, kTokenType, token_type, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+  if (!GetIntFromJson(root, kExpiresIn, expires_in, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+
+  if (*expires_in == 0) {
+    return false;
+  }
+
+  return true;
+}
+
+static const char kAuthURI[] = "auth_uri";
+static const char kTokenURI[] = "token_uri";
+static const char kScope[] = "scope";
+static const char kClientId[] = "client_id";
+static const char kClientSecret[] = "client_secret";
+// chrome-infra-auth.appspot oauth_config replies with client_not_so_secret.
+static const char kClientNotSoSecret[] = "client_not_so_secret";
+static const char kRefreshToken[] = "refresh_token";
+static const char kType[] = "type";
+
+static const char kGoogleScope[] =
+    "https://www.googleapis.com/auth/userinfo.email";
+
+// Google OAuth2 clients always have a secret, even if the client is an
+// installed application/utility such as this.
+// Please see following URL to understand why it is ok to do:
+// https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/master/auth.py
+static const char kDefaultClientId[] =
+    "687418631491-r6m1c3pr0lth5atp4ie07f03ae8omefc.apps.googleusercontent.com";
+static const char kDefaultSecret[] = "R7e-JO3L5sKVczuR-dKQrijF";
+
+void DefaultOAuth2Config(OAuth2Config* config) {
+  config->auth_uri = kGoogleAuthURI;
+  config->token_uri = kGoogleTokenURI;
+  config->scope = kGoogleScope;
+  config->client_id = kDefaultClientId;
+  config->client_secret = kDefaultSecret;
+  CHECK(config->enabled());
+}
+
+bool ParseOAuth2Config(const string& str, OAuth2Config* config) {
+  Json::Reader reader;
+  Json::Value root;
+  if (!reader.parse(str, root, false)) {
+    LOG(WARNING) << "invalid json";
+    return false;
+  }
+
+  string err;
+
+  string auth_uri;
+  if (!GetNonEmptyStringFromJson(root, kAuthURI, &auth_uri, &err)) {
+    LOG(WARNING) << err;
+    auth_uri = kGoogleAuthURI;
+  }
+
+  string token_uri;
+  if (!GetNonEmptyStringFromJson(root, kTokenURI, &token_uri, &err)) {
+    LOG(WARNING) << err;
+    token_uri = kGoogleTokenURI;
+  }
+
+  string scope;
+  if (!GetNonEmptyStringFromJson(root, kScope, &scope, &err)) {
+    LOG(WARNING) << err;
+    scope = kGoogleScope;
+  }
+
+  string client_id;
+  if (!GetNonEmptyStringFromJson(root, kClientId, &client_id, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+
+  string client_secret;
+  if (!GetNonEmptyStringFromJson(root, kClientSecret, &client_secret, &err)) {
+    if (!GetNonEmptyStringFromJson(root, kClientNotSoSecret, &client_secret,
+                                   &err)) {
+      LOG(WARNING) << err;
+      return false;
+    }
+  }
+
+  string type;
+  if (!GetNonEmptyStringFromJson(root, kType, &type, &err)) {
+    LOG(WARNING) << err;
+  }
+
+  string refresh_token;
+  (void)GetStringFromJson(root, kRefreshToken, &refresh_token, &err);
+
+  config->auth_uri = auth_uri;
+  config->token_uri = token_uri;
+  config->scope = scope;
+  config->client_id = client_id;
+  config->client_secret = client_secret;
+  config->refresh_token = refresh_token;
+  config->type = type;
+  return true;
+}
+
+string FormatOAuth2Config(const OAuth2Config& config) {
+  Json::Value root;
+  root[kAuthURI] = config.auth_uri;
+  root[kTokenURI] = config.token_uri;
+  root[kScope] = config.scope;
+  root[kClientId] = config.client_id;
+  root[kClientSecret] = config.client_secret;
+  root[kRefreshToken] = config.refresh_token;
+  root[kType] = config.type;
+  Json::FastWriter writer;
+  return writer.write(root);
+}
+
+bool SaveOAuth2Config(const string& filename, const OAuth2Config& config) {
+  string config_string = FormatOAuth2Config(config);
+  if (!WriteStringToFile(config_string, filename.c_str())) {
+    LOG(ERROR) << "Failed to write " << filename;
+    return false;
+  }
+  return true;
+}
+
+bool ParseServiceAccountJson(const string& str, ServiceAccountConfig* config) {
+  // chrome-infra's /creds/service_accounts doesn't have
+  // project_id, auth_uri, token_uri, auth_provider_x509_cert_url,
+  // client_x509_cert_url, different from service account json
+  // downloaded from google cloud console.
+
+  Json::Reader reader;
+  Json::Value root;
+  if (!reader.parse(str, root, false)) {
+    LOG(WARNING) << "invalid json";
+    return false;
+  }
+
+  string err;
+
+  string type_str;
+  if (!GetStringFromJson(root, "type", &type_str, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+  if (type_str != "service_account") {
+    LOG(WARNING) << "unexpected type: " << type_str;
+    return false;
+  }
+
+  string private_key;
+  if (!GetNonEmptyStringFromJson(root, "private_key", &private_key, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+
+  string client_email;
+  if (!GetNonEmptyStringFromJson(root, "client_email", &client_email, &err)) {
+    LOG(WARNING) << err;
+    return false;
+  }
+
+  string project_id;
+  string private_key_id;
+  string client_id;
+  string auth_uri;
+  string token_uri;
+  string auth_provider_x509_cert_url;
+  string client_x509_cert_url;
+  (void)GetStringFromJson(root, "project_id", &project_id, &err);
+  (void)GetStringFromJson(root, "private_key_id", &private_key_id, &err);
+  (void)GetStringFromJson(root, "client_id", &client_id, &err);
+  (void)GetStringFromJson(root, "auth_uri", &auth_uri, &err);
+  (void)GetStringFromJson(root, "token_uri", &token_uri, &err);
+  (void)GetStringFromJson(root, "auth_provider_x509_cert_url",
+                  &auth_provider_x509_cert_url, &err);
+  (void)GetStringFromJson(root, "client_x509_cert_url", &client_x509_cert_url,
+                          &err);
+
+  config->project_id = project_id;
+  config->private_key_id = private_key_id;
+  config->private_key = private_key;
+  config->client_email = client_email;
+  config->client_id = client_id;
+  config->auth_uri = auth_uri;
+  config->token_uri = token_uri;
+  config->auth_provider_x509_cert_url = auth_provider_x509_cert_url;
+  config->client_x509_cert_url = client_x509_cert_url;
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/oauth2.h b/client/oauth2.h
new file mode 100644
index 0000000..68dce91
--- /dev/null
+++ b/client/oauth2.h
@@ -0,0 +1,102 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_OAUTH2_H_
+#define DEVTOOLS_GOMA_CLIENT_OAUTH2_H_
+
+#include <string>
+
+#include "string_piece.h"
+
+namespace devtools_goma {
+
+struct OAuth2Config {
+  std::string auth_uri;
+  std::string token_uri;
+  std::string scope;
+
+  std::string client_id;
+  std::string client_secret;
+  std::string refresh_token;
+
+  // "type": "authorized_user" is used in gRPC GoogleDefaultCredentials.
+  // TODO: Stop to use OAuth2Config with ServiceAccountConfig.
+  std::string type;
+
+  bool enabled() const {
+    return !auth_uri.empty() && !token_uri.empty() && !scope.empty() &&
+        !client_id.empty() && !client_secret.empty();
+  }
+  bool valid() const {
+    return enabled() && !refresh_token.empty();
+  }
+  void clear() {
+    auth_uri.clear();
+    token_uri.clear();
+    scope.clear();
+    client_id.clear();
+    client_secret.clear();
+    refresh_token.clear();
+    type.clear();
+  }
+};
+
+// ServiceAccountConfig has fields in service account json
+// generated by google cloud console.
+// Json file should have type field, which value must be "service_account".
+struct ServiceAccountConfig {
+  // required for GoogleOAuth2AccessTokenRefreshTask
+  std::string private_key;  // to sign JWT.
+  std::string client_email;  // claim set iss.
+
+  // optional. goma client doesn't use these fields.
+  // (some of them are used only for logging.)
+  std::string project_id;
+  std::string private_key_id;
+  std::string client_id;
+  std::string auth_uri;
+  std::string token_uri;
+  std::string auth_provider_x509_cert_url;
+  std::string client_x509_cert_url;
+};
+
+// Parse OAuth2 Access Token in refresh token response.
+// Returns true on success, and token_type is set to |token_type|,
+// access_token is set to |access_token|, and expires_in filed in json is
+// set to |expires_in|.
+bool ParseOAuth2AccessToken(const std::string& json,
+                            std::string* token_type,
+                            std::string* access_token,
+                            int* expires_in);
+
+// Returns default OAuth2 config.
+void DefaultOAuth2Config(OAuth2Config* config);
+
+// Parse OAuth2 config file.
+// Returns true on success and all OAuth2Config fields are ready to use.
+bool ParseOAuth2Config(const std::string& str, OAuth2Config* config);
+
+// Format OAuth2 config for OAuth2 config file.
+std::string FormatOAuth2Config(const OAuth2Config& config);
+
+bool SaveOAuth2Config(const std::string& filename, const OAuth2Config& config);
+
+// Parse ServiceAccount JSON file.
+bool ParseServiceAccountJson(const std::string& str,
+                             ServiceAccountConfig* config);
+
+const char kGoogleAuthURI[] =
+    "https://accounts.google.com/o/oauth2/auth";
+const char kGoogleTokenURI[] =
+    "https://www.googleapis.com/oauth2/v3/token";
+const char kGoogleTokenInfoURI[] =
+    "https://www.googleapis.com/oauth2/v3/tokeninfo";
+const char kGoogleTokenAudienceURI[] =
+    "https://www.googleapis.com/oauth2/v4/token";
+
+const char kGomaAuthScope[] = "https://www.googleapis.com/auth/userinfo.email";
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_OAUTH2_H_
diff --git a/client/oauth2_token.cc b/client/oauth2_token.cc
new file mode 100644
index 0000000..40d5330
--- /dev/null
+++ b/client/oauth2_token.cc
@@ -0,0 +1,1002 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "oauth2_token.h"
+
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_specific.h"
+#include "file_helper.h"
+#include "glog/logging.h"
+#include "http.h"
+#include "json/json.h"
+#include "json_util.h"
+#include "jwt.h"
+#include "scoped_fd.h"
+#include "socket_factory.h"
+#include "string_piece.h"
+#include "string_piece_utils.h"
+
+namespace devtools_goma {
+
+namespace {
+
+using std::string;
+
+const char kGCERefreshToken[] = "gce-metadata-service-account";
+const char kServiceAccountRefreshTokenPrefix[] =
+    "google-cloud-service-account:";
+const char kgRPCType[] = "authorized_user";
+
+// If something error happens during the refresh, the refresh task retries
+// refresh for this time period.
+static const int kRefreshTimeoutSec = 10;
+// If something error happens in the refresh of access token, the refresh task
+// will not fetch access token again for this period.
+static const int kErrorRefreshPendingSec = 60;
+
+class AuthRefreshConfig {
+ public:
+  virtual ~AuthRefreshConfig() {}
+  virtual bool enabled() const = 0;
+  virtual bool valid() const = 0;
+  virtual bool GetOAuth2Config(OAuth2Config* config) const = 0;
+  virtual bool SetOAuth2Config(const OAuth2Config& config) = 0;
+  virtual bool CanRefresh() const = 0;
+  virtual bool InitRequest(HttpRequest* req) const = 0;
+  // TODO: use StringPiece for resp_body instead?
+  virtual bool ParseResponseBody(const string& resp_body,
+                                 string* token_type,
+                                 string* access_token,
+                                 int* expires_in) const = 0;
+};
+
+class GoogleOAuth2AccessTokenRefreshTask : public OAuth2AccessTokenRefreshTask {
+ public:
+  // Doesn't take ownership of wm.
+  // Takes ownership of client and req.
+  GoogleOAuth2AccessTokenRefreshTask(
+      WorkerThreadManager* wm,
+      std::unique_ptr<AuthRefreshConfig> config,
+      std::unique_ptr<HttpClient> client,
+      std::unique_ptr<HttpRequest> req)
+      : wm_(wm),
+        config_(std::move(config)),
+        client_(std::move(client)),
+        req_(std::move(req)),
+        cond_(&mu_) {
+    LOG_IF(ERROR, !config_->enabled() || !config_->valid())
+        << "config enabled=" << config_->enabled()
+        << " valid=" << config_->valid();
+  }
+
+  ~GoogleOAuth2AccessTokenRefreshTask() override {
+    CHECK(!cancel_refresh_now_);
+    CHECK(!cancel_refresh_);
+    CHECK(shutting_down_);
+  }
+
+  string GetAccount() override {
+    string access_token;
+    {
+      AUTOLOCK(lock, &mu_);
+      access_token = access_token_;
+      if (access_token.empty()) {
+        return "";
+      }
+      if (!account_email_.empty()) {
+        return account_email_;
+      }
+    }
+
+    HttpClient::Options options = client_->options();
+    options.InitFromURL(kGoogleTokenInfoURI);
+    HttpClient client(
+        HttpClient::NewSocketFactoryFromOptions(options),
+        HttpClient::NewTLSEngineFactoryFromOptions(options),
+        options, wm_);
+
+    HttpRequest req;
+    std::ostringstream param;
+    param << "?access_token=" << access_token;
+    client.InitHttpRequest(&req, "GET", param.str());
+    req.AddHeader("Connection", "close");
+
+    HttpResponse resp;
+    HttpClient::Status status;
+    LOG(INFO) << "get tokeninfo for access_token";
+    client.Do(&req, &resp, &status);
+    if (status.err) {
+      LOG(WARNING) << "tokeninfo err=" << status.err
+                   << " " << status.err_message;
+      return "";
+    }
+    if (status.http_return_code != 200) {
+      LOG(WARNING) << "tokeninfo status=" << status.http_return_code;
+      return "";
+    }
+
+    string email;
+    {
+      string err;
+      Json::Reader reader;
+      Json::Value root;
+      if (reader.parse(string(resp.Body()), root, false)) {
+        if (!GetNonEmptyStringFromJson(root, "email", &email, &err)) {
+          LOG(WARNING) << "parse tokeninfo: " << err;
+        }
+      } else {
+        LOG(WARNING) << "invalid json";
+      }
+    }
+    {
+      AUTOLOCK(lock, &mu_);
+      account_email_ = email;
+    }
+    return email;
+  }
+
+  bool GetOAuth2Config(OAuth2Config* config) const override {
+    return config_->GetOAuth2Config(config);
+  }
+
+  bool SetOAuth2Config(const OAuth2Config& config) override {
+    if (!config_->SetOAuth2Config(config)) {
+      LOG(WARNING) << "failed to set oauth2 config.";
+      return false;
+    }
+    token_expires_at_ = time(nullptr);
+    token_type_.clear();
+    access_token_.clear();
+    account_email_.clear();
+    return true;
+  }
+
+  string GetAuthorization() const override {
+    time_t now = time(nullptr);
+    AUTOLOCK(lock, &mu_);
+    if (now < token_expires_at_ &&
+        !token_type_.empty() && !access_token_.empty()) {
+      return token_type_ + " " + access_token_;
+    }
+    return "";
+  }
+
+  bool ShouldRefresh() const override {
+    time_t now = time(nullptr);
+    AUTOLOCK(lock, &mu_);
+    if (!config_->CanRefresh()) {
+      return false;
+    }
+    if (last_network_error_ > 0 &&
+        now < last_network_error_ + kErrorRefreshPendingSec) {
+      LOG(WARNING)
+          << "prohibit to refresh OAuth2 access token for certain duration."
+          << " last_network_error=" << last_network_error_
+          << " pending=" << kErrorRefreshPendingSec;
+      return false;
+    }
+    return now >= token_expires_at_ ||
+        token_type_.empty() || access_token_.empty();
+  }
+
+  void RunAfterRefresh(WorkerThreadManager::ThreadId thread_id,
+                       OneshotClosure* closure) override {
+    time_t now = time(nullptr);
+    {
+      AUTOLOCK(lock, &mu_);
+      if (now < token_expires_at_ || shutting_down_) {
+        DCHECK(shutting_down_ || !access_token_.empty());
+        // access token is valid or oauth2 not available, go ahead.
+        wm_->RunClosureInThread(FROM_HERE,
+                                thread_id, closure,
+                                WorkerThreadManager::PRIORITY_MED);
+        return;
+      }
+      if (last_network_error_ > 0 &&
+          now < last_network_error_ + kErrorRefreshPendingSec) {
+        LOG(WARNING) << "will not refresh token."
+                     << " last_network_error=" << last_network_error_
+                     << " pending=" << kErrorRefreshPendingSec;
+        wm_->RunClosureInThread(FROM_HERE,
+                                thread_id, closure,
+                                WorkerThreadManager::PRIORITY_MED);
+        return;
+      }
+      // should refresh access token.
+      pending_tasks_.push_back(std::make_pair(thread_id, closure));
+      switch (state_) {
+        case NOT_STARTED: // first run.
+          state_ = RUN;
+          refresh_deadline_ = now + kRefreshTimeoutSec;
+          refresh_backoff_ms_ = client_->options().min_retry_backoff_ms;
+          break;
+        case RUN:
+          return;
+      }
+      if (!has_set_thread_id_) {
+        refresh_task_thread_id_ = wm_->GetCurrentThreadId();
+        has_set_thread_id_ = true;
+      }
+      wm_->RunClosureInThread(
+          FROM_HERE,
+          refresh_task_thread_id_,
+          NewCallback(
+              this, &GoogleOAuth2AccessTokenRefreshTask::RunRefresh),
+          WorkerThreadManager::PRIORITY_IMMEDIATE);
+    }
+  }
+
+  void Shutdown() override {
+    {
+      AUTOLOCK(lock, &mu_);
+      if (shutting_down_) {
+        return;
+      }
+      shutting_down_ = true;
+      if (cancel_refresh_now_ || cancel_refresh_) {
+        if (THREAD_ID_IS_SELF(refresh_task_thread_id_)) {
+          // in goma_fetch.cc, refresh_task_thread_id_ and current thread
+          // is same, so call cancel in the same thread.
+          // since Wait() is also called on the same thread, there would be
+          // no chance to run Cancel on the thread and never get cond_
+          // signalled.
+          if (cancel_refresh_now_) {
+            LOG(INFO) << "cancel now " << cancel_refresh_now_;
+            cancel_refresh_now_->Cancel();
+            cancel_refresh_now_ = nullptr;
+            cond_.Signal();
+          }
+          if (cancel_refresh_) {
+            LOG(INFO) << "cancel " << cancel_refresh_now_;
+            cancel_refresh_now_->Cancel();
+            cancel_refresh_now_ = nullptr;
+            cond_.Signal();
+          }
+        } else {
+          LOG(INFO) << "cancelling now..." << cancel_refresh_now_;
+          LOG(INFO) << "cancelling..." << cancel_refresh_;
+          wm_->RunClosureInThread(
+              FROM_HERE,
+              refresh_task_thread_id_,
+              NewCallback(
+                  this, &GoogleOAuth2AccessTokenRefreshTask::Cancel),
+              WorkerThreadManager::PRIORITY_IMMEDIATE);
+        }
+      }
+    }
+    client_->Shutdown();
+  }
+
+  void Wait() override {
+    {
+      AUTOLOCK(lock, &mu_);
+      CHECK(shutting_down_) << "You must call Shutdown() beforehand.";
+      LOG(INFO) << "Wait cancel_refresh_now=" << cancel_refresh_now_;
+      LOG(INFO) << "Wait cancel_refresh_=" << cancel_refresh_;
+      while (cancel_refresh_now_ != nullptr || cancel_refresh_ != nullptr) {
+        cond_.Wait();
+      }
+    }
+    client_.reset();
+  }
+
+ private:
+  enum State {
+    NOT_STARTED,
+    RUN,
+  };
+
+  void InitRequest() {
+    if (!config_->enabled()) {
+      LOG(INFO) << "not enabled.";
+      return;
+    }
+    if (!config_->InitRequest(req_.get())) {
+      LOG(WARNING) << "failed to init request.";
+    }
+  }
+
+  void ParseOAuth2AccessTokenUnlocked(int* next_update_in) {
+    static const int kOAuthExpireTimeMarginInSec = 60;
+    if (status_->err != OK) {
+      LOG(ERROR) << "HTTP communication failed to refresh OAuth2 access token."
+                 << " err_message=" << status_->err_message;
+      return;
+    }
+    int expires_in = 0;
+    if (!config_->ParseResponseBody(string(resp_.Body()),
+                                    &token_type_,
+                                    &access_token_,
+                                    &expires_in)) {
+      LOG(ERROR) << "Failed to parse OAuth2 access token:"
+                 << resp_.Body();
+      token_type_.clear();
+      access_token_.clear();
+      account_email_.clear();
+      return;
+    }
+    time_t now = time(nullptr);
+    token_expires_at_ = now + expires_in - kOAuthExpireTimeMarginInSec;
+    LOG(INFO) << "Got new OAuth2 access token."
+              << " now=" << now
+              << " expires_in=" << expires_in
+              << " token_expires_at=" << token_expires_at_;
+    VLOG(1) << "access_token=" << access_token_;
+    // expires_in is usually large enough. e.g. 3600.
+    // If it is small, auto update of access token will not work.
+    *next_update_in = expires_in - kOAuthExpireTimeMarginInSec * 2;
+    LOG_IF(WARNING, *next_update_in <= 0)
+        << "expires_in is too small.  auto update will not work."
+        << " next_update_in=" << *next_update_in
+        << " expires_in=" << expires_in
+        << " kOAuthExpireTimeMarginInSec=" << kOAuthExpireTimeMarginInSec;
+  }
+
+  void Done() {
+    DCHECK(THREAD_ID_IS_SELF(refresh_task_thread_id_));
+    bool http_ok = true;
+    if (status_->err != OK &&
+        (status_->http_return_code == 0 ||
+         status_->http_return_code / 100 == 5)) {
+      time_t now = time(nullptr);
+      http_ok = false;
+      {
+        AUTOLOCK(lock, &mu_);
+        if (now < refresh_deadline_) {
+          LOG(WARNING) << "refresh failed http=" << status_->http_return_code
+                       << " retry until deadline=" << refresh_deadline_
+                       << " refresh_backoff_ms_=" << refresh_backoff_ms_;
+
+          refresh_backoff_ms_ = HttpClient::BackoffMsec(
+              client_->options(), refresh_backoff_ms_, true);
+          LOG(INFO) << "backoff"
+                    << " refresh_backoff_ms=" << refresh_backoff_ms_;
+          CHECK(cancel_refresh_ == nullptr)
+              << "Somebody else seems to run refresh task and failing?";
+          cancel_refresh_ = wm_->RunDelayedClosureInThread(
+              FROM_HERE,
+              wm_->GetCurrentThreadId(),
+              refresh_backoff_ms_,
+              NewCallback(
+                  this, &GoogleOAuth2AccessTokenRefreshTask::RunRefresh));
+          return;
+        }
+        LOG(WARNING) << "refresh failed http=" << status_->http_return_code
+                     << " deadline_exceeded now=" << now
+                     << " deadline=" << refresh_deadline_;
+
+        // If last_network_error_ is set, ShouldRefresh() starts returning
+        // false to make task local fallback.  Let me make it postponed
+        // until refresh attempts reaches refresh_deadline_.
+        last_network_error_ = now;
+      }
+    }
+    LOG_IF(ERROR, status_->err != OK)
+        << "refresh failed."
+        << " err=" << status_->err
+        << " err_message=" << status_->err_message
+        << " http=" << status_->http_return_code;
+    VLOG(1) << "Get access token done.";
+    std::vector<std::pair<WorkerThreadManager::ThreadId,
+                          OneshotClosure*>> callbacks;
+    int next_update_in = 0;
+    {
+      AUTOLOCK(lock, &mu_);
+      DCHECK_EQ(state_, RUN);
+      state_ = NOT_STARTED;
+      refresh_deadline_ = 0;
+      ParseOAuth2AccessTokenUnlocked(&next_update_in);
+      if (http_ok && !access_token_.empty()) {
+        last_network_error_ = 0;
+        refresh_backoff_ms_ = 0;
+      }
+      callbacks.swap(pending_tasks_);
+    }
+    for (const auto& callback : callbacks) {
+      wm_->RunClosureInThread(FROM_HERE,
+                              callback.first, callback.second,
+                              WorkerThreadManager::PRIORITY_MED);
+    }
+    if (next_update_in > 0) {
+      {
+        AUTOLOCK(lock, &mu_);
+        if (shutting_down_) {
+          return;
+        }
+        if (cancel_refresh_now_) {
+          // The other RunRefreshNow task seems to be running.
+          // We will not add new delayed task.
+          LOG(INFO) << "The other OAuth2 RunRefreshNow task has already been "
+                    << "registred.  We will not override with newone.";
+          return;
+        }
+
+        DCHECK(THREAD_ID_IS_SELF(refresh_task_thread_id_));
+        cancel_refresh_now_ = wm_->RunDelayedClosureInThread(
+            FROM_HERE, refresh_task_thread_id_,
+            next_update_in * 1000,
+            NewCallback(
+                this, &GoogleOAuth2AccessTokenRefreshTask::RunRefreshNow));
+      }
+      LOG(INFO) << "Registered the OAuth2 refresh task to be executed later."
+                << " next_update_in=" << next_update_in;
+    }
+  }
+
+  void RunRefreshUnlocked() {
+    DCHECK_EQ(RUN, state_);
+    DCHECK(THREAD_ID_IS_SELF(refresh_task_thread_id_));
+    InitRequest();
+    // Make HttpClient get access token.
+    LOG(INFO) << "Going to refresh OAuth2 access token.";
+    resp_.Reset();
+    status_.reset(new HttpClient::Status);
+    status_->trace_id = "oauth2Refresh";
+    client_->DoAsync(
+        req_.get(), &resp_, status_.get(),
+        NewCallback(
+            this, &GoogleOAuth2AccessTokenRefreshTask::Done));
+  }
+
+  void RunRefresh() {
+    LOG(INFO) << "Run refresh.";
+
+    AUTOLOCK(lock, &mu_);
+    DCHECK(THREAD_ID_IS_SELF(refresh_task_thread_id_));
+
+    // Set nullptr to make OAuth2AccessTokenRefreshTask::Cancel() know
+    // it must not execute cancel_refresh_->Cancel().
+    cancel_refresh_ = nullptr;
+    cond_.Signal();
+    if (shutting_down_) {
+      return;
+    }
+    RunRefreshUnlocked();
+  }
+
+  // RunRefreshNow() is used for RunDelayedClosureInThread in Done() above.
+  void RunRefreshNow() {
+    LOG(INFO) << "Run refresh now.";
+
+    AUTOLOCK(lock, &mu_);
+    DCHECK(THREAD_ID_IS_SELF(refresh_task_thread_id_));
+    CHECK(cancel_refresh_now_)
+        << "RunRefreshNow has been cancelled, but called?";
+    // Set nullptr to make OAuth2AccessTokenRefreshTask::Cancel() know
+    // it must not execute cancel_refresh_now_->Cancel().
+    cancel_refresh_now_ = nullptr;
+    cond_.Signal();
+    if (shutting_down_) {
+      return;
+    }
+    switch (state_) {
+      case NOT_STARTED: // first run.
+        state_ = RUN;
+        refresh_deadline_ = time(nullptr) + kRefreshTimeoutSec;
+        refresh_backoff_ms_ = client_->options().min_retry_backoff_ms;
+        break;
+      case RUN:
+        return;
+    }
+    RunRefreshUnlocked();
+  }
+
+  void Cancel() {
+    AUTOLOCK(lock, &mu_);
+    DCHECK(THREAD_ID_IS_SELF(refresh_task_thread_id_));
+    if (cancel_refresh_now_) {
+      cancel_refresh_now_->Cancel();
+      cancel_refresh_now_ = nullptr;
+      cond_.Signal();
+      LOG(INFO) << "cancelled";
+    }
+    if (cancel_refresh_) {
+      cancel_refresh_->Cancel();
+      cancel_refresh_ = nullptr;
+      cond_.Signal();
+      LOG(INFO) << "cancelled";
+    }
+  }
+
+  WorkerThreadManager* wm_;
+  std::unique_ptr<AuthRefreshConfig> config_;
+  std::unique_ptr<HttpClient> client_;
+  std::unique_ptr<HttpRequest> req_;
+  HttpResponse resp_;
+  std::unique_ptr<HttpClient::Status> status_;
+
+  Lock mu_;  // protecting following members.
+  // signaled when cancel_refresh_now_ or cancel_refresh_ become nullptr.
+  ConditionVariable cond_;
+  State state_ = NOT_STARTED;
+  time_t refresh_deadline_ = 0;
+  string token_type_;
+  string access_token_;
+  string account_email_;
+  time_t token_expires_at_ = 0;
+  time_t last_network_error_ = 0;
+  int refresh_backoff_ms_ = 0;
+  std::vector<std::pair<WorkerThreadManager::ThreadId,
+                        OneshotClosure*>> pending_tasks_;
+
+  // This class cannot have an ownership of CancelableClosure.
+  // It is valid until Cancel() is called or the closure is executed, and
+  // cancel_refresh_now_ is used as a flag to represent the CancelableClosure
+  // is valid (i.e. we can execute cancel_refresh_now_->Cancel()).
+  //
+  // cancel_refresh_now_ should set to nullptr when it become invalid.
+  // cancel_refresh_ should also set to nullptr when it become invalid.
+  WorkerThreadManager::CancelableClosure* cancel_refresh_now_ = nullptr;
+  WorkerThreadManager::CancelableClosure* cancel_refresh_ = nullptr;
+  WorkerThreadManager::ThreadId refresh_task_thread_id_;
+  bool has_set_thread_id_ = false;
+  bool shutting_down_ = false;
+
+  DISALLOW_COPY_AND_ASSIGN(GoogleOAuth2AccessTokenRefreshTask);
+};
+
+class OAuth2RefreshConfig : public AuthRefreshConfig {
+ public:
+  OAuth2RefreshConfig(const OAuth2RefreshConfig&) = delete;
+  OAuth2RefreshConfig& operator=(const OAuth2RefreshConfig&) = delete;
+
+  bool enabled() const override {
+    return config_.enabled();
+  }
+
+  bool valid() const override {
+    return config_.valid();
+  }
+
+  bool GetOAuth2Config(OAuth2Config* config) const override {
+    if (!config_.enabled() && config_.refresh_token != kGCERefreshToken) {
+      return false;
+    }
+    *config = config_;
+    return true;
+  }
+
+  bool SetOAuth2Config(const OAuth2Config& config) override {
+    if (config_.token_uri != config.token_uri) {
+      LOG(ERROR) << "unacceptable token_uri change:" << config.token_uri;
+      return false;
+    }
+    if (config_.refresh_token.empty() && !config.refresh_token.empty()) {
+      LOG(INFO) << "set refresh token";
+    } else if (config.refresh_token.empty()) {
+      LOG(WARNING) << "clear refresh token";
+    } else if (config_.refresh_token != config.refresh_token) {
+      LOG(INFO) << "update refresh token";
+    }
+    config_ = config;
+    return true;
+  }
+
+  bool CanRefresh() const override {
+    // if refresh token is not given, couldn't get access token and
+    // no need to refresh.
+    // go with logout state (i.e. no Authorization header).
+    return !config_.refresh_token.empty();
+  }
+
+  bool ParseResponseBody(const string& resp_body,
+                         string* token_type,
+                         string* access_token,
+                         int* expires_in) const override {
+    return ParseOAuth2AccessToken(
+        resp_body, token_type, access_token, expires_in);
+  }
+
+ protected:
+  explicit OAuth2RefreshConfig(const OAuth2Config& config)
+      : config_(config) {}
+
+  OAuth2Config config_;
+};
+
+class GCEServiceAccountRefreshConfig : public OAuth2RefreshConfig {
+ public:
+  GCEServiceAccountRefreshConfig(const GCEServiceAccountRefreshConfig&)
+      = delete;
+  GCEServiceAccountRefreshConfig&
+      operator=(const GCEServiceAccountRefreshConfig&) = delete;
+
+  static std::unique_ptr<OAuth2AccessTokenRefreshTask> New(
+      WorkerThreadManager* wm, const HttpClient::Options& http_options) {
+    HttpClient::Options options = http_options;
+    options.ClearAuthConfig();
+    options.allow_throttle = false;
+
+    LOG(INFO) << "gce service account:"
+              << http_options.gce_service_account;
+    // https://cloud.google.com/compute/docs/authentication#applications
+    const char kMetadataURI[] =
+        "http://metadata/computeMetadata/v1/instance/service-accounts/";
+    std::ostringstream url;
+    url << kMetadataURI << http_options.gce_service_account << "/token";
+    options.InitFromURL(url.str());
+    std::unique_ptr<HttpClient> client(new HttpClient(
+        HttpClient::NewSocketFactoryFromOptions(options),
+        HttpClient::NewTLSEngineFactoryFromOptions(options),
+        options, wm));
+
+    // HTTP setup.
+    std::unique_ptr<HttpRequest> req(new HttpRequest);
+    client->InitHttpRequest(req.get(), "GET", "");
+    req->AddHeader("Connection", "close");
+    req->AddHeader("Metadata-Flavor", "Google");
+
+    OAuth2Config config = http_options.oauth2_config;
+    config.auth_uri = kGoogleAuthURI;
+    config.token_uri = kGoogleTokenURI;
+    config.scope = "scope_is_configured_when_instance_created";
+    config.client_id = "client_is_not_needed";
+    config.client_secret = "client_secret_is_not_needed";
+    config.refresh_token = kGCERefreshToken;
+
+    std::unique_ptr<AuthRefreshConfig> refresh_config(
+        new GCEServiceAccountRefreshConfig(config));
+
+    return std::unique_ptr<OAuth2AccessTokenRefreshTask>(
+        new GoogleOAuth2AccessTokenRefreshTask(
+            wm, std::move(refresh_config), std::move(client), std::move(req)));
+  }
+
+  bool InitRequest(HttpRequest* req) const override {
+    // on GCE, just get service account token from metadata server.
+    LOG(INFO) << "init request:GCE service account";
+    return true;
+  }
+
+ private:
+  explicit GCEServiceAccountRefreshConfig(const OAuth2Config& config)
+      : OAuth2RefreshConfig(config) {}
+};
+
+class ServiceAccountRefreshConfig : public OAuth2RefreshConfig {
+ public:
+  ServiceAccountRefreshConfig(const ServiceAccountRefreshConfig&) = delete;
+  ServiceAccountRefreshConfig&
+      operator=(const ServiceAccountRefreshConfig&) = delete;
+
+  static std::unique_ptr<OAuth2AccessTokenRefreshTask> New(
+      WorkerThreadManager* wm, const HttpClient::Options& http_options) {
+    HttpClient::Options options = http_options;
+    options.ClearAuthConfig();
+    options.allow_throttle = false;
+
+    LOG(INFO) << "service account:"
+              << http_options.service_account_json_filename;
+    // https://developers.google.com/identity/protocols/OAuth2ServiceAccount#authorizingrequests
+    options.InitFromURL(kGoogleTokenAudienceURI);
+    string path = options.url_path_prefix;
+    options.url_path_prefix = "/";
+    std::unique_ptr<HttpClient> client(new HttpClient(
+        HttpClient::NewSocketFactoryFromOptions(options),
+        HttpClient::NewTLSEngineFactoryFromOptions(options),
+        options, wm));
+
+    // HTTP setup.
+    std::unique_ptr<HttpRequest> req(new HttpRequest);
+    client->InitHttpRequest(req.get(), "POST", path);
+    req->SetContentType("application/x-www-form-urlencoded");
+    req->AddHeader("Connection", "close");
+    OAuth2Config config = http_options.oauth2_config;
+    config.auth_uri = kGoogleAuthURI;
+    config.token_uri = kGoogleTokenURI;
+    config.scope = kGomaAuthScope;
+    config.client_id = "client_is_not_needed";
+    config.client_secret = "client_secret_is_not_needed";
+    config.refresh_token = kServiceAccountRefreshTokenPrefix +
+        http_options.service_account_json_filename;
+    LOG(INFO) << config.refresh_token;
+
+    std::unique_ptr<AuthRefreshConfig> refresh_config(
+        new ServiceAccountRefreshConfig(config));
+
+    return std::unique_ptr<OAuth2AccessTokenRefreshTask>(
+        new GoogleOAuth2AccessTokenRefreshTask(
+            wm, std::move(refresh_config), std::move(client), std::move(req)));
+  }
+
+  bool InitRequest(HttpRequest* req) const override {
+    const string& service_account_json_filename =
+        config_.refresh_token.substr(
+            strlen(kServiceAccountRefreshTokenPrefix));
+    LOG(INFO) << service_account_json_filename;
+    // service account.
+    string saj;
+    if (!ReadFileToString(service_account_json_filename, &saj)) {
+      LOG(ERROR) << "Failed to read "
+                 << service_account_json_filename;
+      return false;
+    }
+    ServiceAccountConfig sa;
+    if (!ParseServiceAccountJson(saj, &sa)) {
+      LOG(ERROR) << "Failed to parse service account json in "
+                 << service_account_json_filename;
+      return false;
+    }
+    std::unique_ptr<JsonWebToken::Key> key(JsonWebToken::LoadKey(
+        sa.private_key));
+    if (key == nullptr) {
+      LOG(ERROR) << "Invalid private key in "
+                 << service_account_json_filename;
+      return false;
+    }
+    LOG(INFO) << "service account:"
+              << sa.client_email
+              << " client_id=" << sa.client_id
+              << " project_id=" << sa.project_id
+              << " private_key_id=" << sa.private_key_id;
+    JsonWebToken::ClaimSet cs;
+    cs.iss = sa.client_email;
+    cs.scopes.emplace_back(kGomaAuthScope);
+    cs.expires_in_sec = 3600;
+    JsonWebToken jwt(cs);
+    string assertion = jwt.Token(*key, time(nullptr));
+    const string req_body = strings::StrCat(
+        "grant_type=", JsonWebToken::kGrantTypeEncoded,
+        "&assertion=", assertion);
+    VLOG(1) << req_body;
+    req->SetBody(req_body);
+    return true;
+  }
+
+ private:
+  explicit ServiceAccountRefreshConfig(const OAuth2Config& config)
+      : OAuth2RefreshConfig(config) {}
+};
+
+class RefreshTokenRefreshConfig : public OAuth2RefreshConfig {
+ public:
+  RefreshTokenRefreshConfig(const RefreshTokenRefreshConfig&) = delete;
+  RefreshTokenRefreshConfig&
+      operator=(const RefreshTokenRefreshConfig&) = delete;
+
+  static std::unique_ptr<OAuth2AccessTokenRefreshTask> New(
+      WorkerThreadManager* wm, const HttpClient::Options& http_options) {
+    HttpClient::Options options = http_options;
+    options.ClearAuthConfig();
+    options.allow_throttle = false;
+
+    LOG(INFO) << "oauth2 enabled";
+
+    OAuth2Config config = http_options.oauth2_config;
+    if (config.token_uri != kGoogleTokenURI) {
+      LOG(ERROR) << "unsupported token_uri=" << config.token_uri;
+      return nullptr;
+    }
+    options.InitFromURL(kGoogleTokenURI);
+    string path = options.url_path_prefix;
+    // client will be used for tokeninfo path too
+    // clear path prefix in options and put path in request.
+    options.url_path_prefix = "/";
+    std::unique_ptr<HttpClient> client(new HttpClient(
+        HttpClient::NewSocketFactoryFromOptions(options),
+        HttpClient::NewTLSEngineFactoryFromOptions(options),
+        options, wm));
+
+    // HTTP setup.
+    std::unique_ptr<HttpRequest> req(new HttpRequest);
+    client->InitHttpRequest(req.get(), "POST", path);
+    req->SetContentType("application/x-www-form-urlencoded");
+    req->AddHeader("Connection", "close");
+    config.type = kgRPCType;
+
+    std::unique_ptr<AuthRefreshConfig> refresh_config(
+        new RefreshTokenRefreshConfig(config));
+
+    return std::unique_ptr<OAuth2AccessTokenRefreshTask>(
+        new GoogleOAuth2AccessTokenRefreshTask(
+            wm, std::move(refresh_config), std::move(client), std::move(req)));
+  }
+
+  bool InitRequest(HttpRequest* req) const override {
+    LOG(INFO) << "init request:refresh token";
+
+    // TODO: reconstruct client if config_.token_uri has been changed?
+    const string req_body = strings::StrCat(
+        "client_id=", config_.client_id,
+        "&client_secret=", config_.client_secret,
+        "&refresh_token=", config_.refresh_token,
+        "&grant_type=refresh_token");
+    VLOG(1) << req_body;
+    req->SetBody(req_body);
+    return true;
+  }
+
+ private:
+  explicit RefreshTokenRefreshConfig(const OAuth2Config& config)
+      : OAuth2RefreshConfig(config) {}
+};
+
+class LuciAuthRefreshConfig : public AuthRefreshConfig {
+ public:
+  static std::unique_ptr<OAuth2AccessTokenRefreshTask> New(
+      WorkerThreadManager* wm, const HttpClient::Options& http_options) {
+    static const char kLuciLocalAuthServiceHost[] = "127.0.0.1";
+    static const char kLuciLocalAuthServicePath[] =
+        "/rpc/LuciLocalAuthService.GetOAuthToken";
+
+    HttpClient::Options options = http_options;
+    options.ClearAuthConfig();
+    options.allow_throttle = false;
+
+    const LuciContextAuth& local_auth = http_options.luci_context_auth;
+    options.use_ssl = false;
+    options.dest_host_name = kLuciLocalAuthServiceHost;
+    options.dest_port = local_auth.rpc_port;
+    options.url_path_prefix = kLuciLocalAuthServicePath;
+
+    LOG(INFO) << "LUCI_CONTEXT local_auth is used with account: "
+              << local_auth.default_account_id;
+
+    std::unique_ptr<HttpClient> client(new HttpClient(
+        HttpClient::NewSocketFactoryFromOptions(options),
+        nullptr, options, wm));
+
+    std::unique_ptr<HttpRequest> req(new HttpRequest);
+    client->InitHttpRequest(req.get(), "POST", "");
+    req->SetContentType("application/json");
+    req->AddHeader("Connection", "close");
+
+    std::unique_ptr<AuthRefreshConfig> refresh_config(
+        new LuciAuthRefreshConfig(local_auth));
+
+    return std::unique_ptr<OAuth2AccessTokenRefreshTask>(
+        new GoogleOAuth2AccessTokenRefreshTask(
+            wm, std::move(refresh_config), std::move(client), std::move(req)));
+  }
+
+  bool enabled() const override {
+    return true;
+  }
+
+  bool valid() const override {
+    return local_auth_.enabled();
+  }
+
+  bool GetOAuth2Config(OAuth2Config* config) const override {
+    LOG(WARNING) << "GetOAuth2Config won't work for LUCI_CONTEXT.";
+    return false;
+  }
+
+  bool SetOAuth2Config(const OAuth2Config& config) override {
+    LOG(WARNING) << "SetOAuth2Config won't work for LUCI_CONTEXT.";
+    return false;
+  }
+
+  bool CanRefresh() const override {
+    return valid();
+  }
+
+  bool InitRequest(HttpRequest* req) const override {
+    LuciOAuthTokenRequest treq;
+    treq.scopes.push_back(kGomaAuthScope);
+    treq.secret = local_auth_.secret;
+    treq.account_id = local_auth_.default_account_id;
+
+    VLOG(1) << treq.ToString();
+    req->SetBody(treq.ToString());
+    return true;
+  }
+
+  bool ParseResponseBody(const string& resp_body,
+                         string* token_type,
+                         string* access_token,
+                         int* expires_in) const override {
+    static const char kTokenType[] = "Bearer";
+    LuciOAuthTokenResponse resp;
+    if (!ParseLuciOAuthTokenResponse(resp_body, &resp)) {
+      LOG(WARNING) << "Failed to parse luci auth token response."
+                   << " body=" << resp_body;
+      return false;
+    }
+    time_t now = time(nullptr);
+    *token_type = kTokenType;
+    *access_token = resp.access_token;
+    *expires_in = resp.expiry - now;
+    return true;
+  }
+
+ private:
+  explicit LuciAuthRefreshConfig(const LuciContextAuth& local_auth)
+      : local_auth_(local_auth) {}
+
+  LuciContextAuth local_auth_;
+};
+
+}  // namespace
+
+/* static */
+std::unique_ptr<OAuth2AccessTokenRefreshTask>
+OAuth2AccessTokenRefreshTask::New(
+    WorkerThreadManager* wm,
+    const HttpClient::Options& http_options) {
+  if (!http_options.gce_service_account.empty()) {
+    return GCEServiceAccountRefreshConfig::New(wm, http_options);
+  }
+
+  if (!http_options.service_account_json_filename.empty()) {
+    return ServiceAccountRefreshConfig::New(wm, http_options);
+  }
+
+  if (http_options.oauth2_config.enabled()) {
+    return RefreshTokenRefreshConfig::New(wm, http_options);
+  }
+
+  if (http_options.luci_context_auth.enabled()) {
+    return LuciAuthRefreshConfig::New(wm, http_options);
+  }
+
+  return nullptr;
+}
+
+string ExchangeOAuth2RefreshToken(
+    WorkerThreadManager* wm,
+    const HttpClient::Options& http_options,
+    const OAuth2Config& config,
+    const string& code,
+    const string& redirect_uri) {
+  if (config.token_uri != kGoogleTokenURI) {
+    LOG(ERROR) << "unsupported token_uri=" << config.token_uri;
+    return "";
+  }
+  HttpClient::Options options = http_options;
+  options.InitFromURL(kGoogleTokenURI);
+  HttpClient client(
+      HttpClient::NewSocketFactoryFromOptions(options),
+      HttpClient::NewTLSEngineFactoryFromOptions(options),
+      options, wm);
+
+  HttpRequest req;
+  client.InitHttpRequest(&req, "POST", "");
+  req.SetContentType("application/x-www-form-urlencoded");
+  req.AddHeader("Connection", "close");
+
+  std::ostringstream req_body;
+  req_body << "code=" << code
+           << "&client_id=" << config.client_id
+           << "&client_secret=" << config.client_secret
+           << "&redirect_uri=" << redirect_uri
+           << "&grant_type=authorization_code";
+  VLOG(1) << req_body.str();
+  req.SetBody(req_body.str());
+
+  HttpResponse resp;
+  HttpClient::Status status;
+  LOG(INFO) << "exchange code to refresh_token";
+  client.Do(&req, &resp, &status);
+  if (status.err) {
+    LOG(WARNING) << "exchange refresh token err=" << status.err
+                 << " " << status.err_message;
+    return "";
+  }
+  if (status.http_return_code != 200) {
+    LOG(WARNING) << "exchange refresh status=" << status.http_return_code;
+    return "";
+  }
+  string token;
+  {
+    string err;
+    Json::Reader reader;
+    Json::Value root;
+    if (reader.parse(string(resp.Body()), root, false)) {
+      if (!GetNonEmptyStringFromJson(root, "refresh_token", &token, &err)) {
+        LOG(WARNING) << "parse exchange result: " << err;
+      }
+    } else {
+      LOG(WARNING) << "invalid json";
+    }
+  }
+  return token;
+}
+
+}  // namespace devtools_goma
diff --git a/client/oauth2_token.h b/client/oauth2_token.h
new file mode 100644
index 0000000..78549fc
--- /dev/null
+++ b/client/oauth2_token.h
@@ -0,0 +1,53 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_OAUTH2_TOKEN_H_
+#define DEVTOOLS_GOMA_CLIENT_OAUTH2_TOKEN_H_
+
+#include <memory>
+#include <string>
+
+#include "http.h"
+#include "oauth2.h"
+#include "worker_thread_manager.h"
+
+namespace devtools_goma {
+
+class OneshotClosure;
+
+class OAuth2AccessTokenRefreshTask {
+ public:
+  // Creates new OAuth2AccessTokenRefreshTask for http_options.
+  // Caller should take ownership of returned object.
+  static std::unique_ptr<OAuth2AccessTokenRefreshTask> New(
+      WorkerThreadManager* em,
+      const HttpClient::Options& http_options);
+
+  OAuth2AccessTokenRefreshTask() {}
+  virtual ~OAuth2AccessTokenRefreshTask() {}
+
+  virtual std::string GetAccount() = 0;
+
+  virtual bool GetOAuth2Config(OAuth2Config* config) const = 0;
+  virtual bool SetOAuth2Config(const OAuth2Config& config) = 0;
+
+  virtual std::string GetAuthorization() const = 0;
+  virtual bool ShouldRefresh() const = 0;
+  virtual void RunAfterRefresh(WorkerThreadManager::ThreadId thread_id,
+                               OneshotClosure* closure) = 0;
+  virtual void Shutdown() = 0;
+  virtual void Wait() = 0;
+ private:
+  DISALLOW_COPY_AND_ASSIGN(OAuth2AccessTokenRefreshTask);
+};
+
+std::string ExchangeOAuth2RefreshToken(WorkerThreadManager* wm,
+                                       const HttpClient::Options& options,
+                                       const OAuth2Config& config,
+                                       const std::string& code,
+                                       const std::string& redirect_uri);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_OAUTH2_TOKEN_H_
diff --git a/client/oauth2_unittest.cc b/client/oauth2_unittest.cc
new file mode 100644
index 0000000..4ef6a78
--- /dev/null
+++ b/client/oauth2_unittest.cc
@@ -0,0 +1,280 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "oauth2.h"
+
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "glog/logging.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+TEST(OAuth2Test, ParseOAuth2AccessToken) {
+  static const char* kJsonResponse =
+      "{\r\n"
+      " \"access_token\": \"ya12.this_is_token\",\r\n"
+      " \"token_type\": \"Bearer\",\r\n"
+      " \"expires_in\": 3600\r\n"
+      "}\r\n";
+
+  string token_type;
+  string access_token;
+  int expires_in;
+  EXPECT_TRUE(ParseOAuth2AccessToken(
+      kJsonResponse, &token_type, &access_token, &expires_in));
+  EXPECT_EQ("Bearer", token_type);
+  EXPECT_EQ("ya12.this_is_token", access_token);
+  EXPECT_EQ(3600, expires_in);
+}
+
+TEST(OAuth2Test, ParseOAuth2AccessTokenNoSpaces) {
+  static const char* kJsonResponse =
+      "{\r\n"
+      " \"access_token\":\"1/fFBGRNJru1FQd44AzqT3Zg\",\r\n"
+      " \"token_type\":\"Bearer\",\r\n"
+      " \"expires_in\":3920\r\n"
+      "}\r\n";
+
+  string token_type;
+  string access_token;
+  int expires_in;
+  EXPECT_TRUE(ParseOAuth2AccessToken(
+      kJsonResponse, &token_type, &access_token, &expires_in));
+  EXPECT_EQ("Bearer", token_type);
+  EXPECT_EQ("1/fFBGRNJru1FQd44AzqT3Zg", access_token);
+  EXPECT_EQ(3920, expires_in);
+}
+
+TEST(OAuth2Test, ParseOAuth2AccessTokenError) {
+  static const char* kJsonResponse =
+      "{\r\n"
+      " \"error\" : \"authorization_pending\""
+      "}\r\n";
+  string token_type;
+  string access_token;
+  int expires_in;
+  EXPECT_FALSE(ParseOAuth2AccessToken(
+      kJsonResponse, &token_type, &access_token, &expires_in));
+}
+
+TEST(OAuth2Test, ParseOAuth2Config) {
+  static const char* kConfigStr =
+      "{\"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\""
+      ", \"redirect_uri\": \"http://localhost:57003\""
+      ", \"client_id\": \"575346572923.apps.googleusercontent.com\""
+      ", \"scope\": \"https://www.googleapis.com/auth/userinfo.email\""
+      ", \"token_uri\": \"https://www.googleapis.com/oauth2/v3/token\""
+      ", \"client_secret\": \"xxx_client_secret_xxx\""
+      ", \"refresh_token\": \"xxx_refresh_token_xxx\""
+      ", \"type\": \"authorized_user\"}";
+
+  OAuth2Config config;
+  EXPECT_TRUE(ParseOAuth2Config(kConfigStr, &config));
+  EXPECT_TRUE(config.valid());
+  EXPECT_EQ("https://accounts.google.com/o/oauth2/auth", config.auth_uri);
+  EXPECT_EQ("https://www.googleapis.com/oauth2/v3/token", config.token_uri);
+  EXPECT_EQ("https://www.googleapis.com/auth/userinfo.email", config.scope);
+  EXPECT_EQ("575346572923.apps.googleusercontent.com", config.client_id);
+  EXPECT_EQ("xxx_client_secret_xxx", config.client_secret);
+  EXPECT_EQ("xxx_refresh_token_xxx", config.refresh_token);
+  EXPECT_EQ("authorized_user", config.type);
+}
+
+TEST(OAuth2Test, ParseOAuth2ConfigWithoutType) {
+  static const char* kConfigStr =
+      "{\"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\""
+      ", \"redirect_uri\": \"http://localhost:57003\""
+      ", \"client_id\": \"575346572923.apps.googleusercontent.com\""
+      ", \"scope\": \"https://www.googleapis.com/auth/userinfo.email\""
+      ", \"token_uri\": \"https://www.googleapis.com/oauth2/v3/token\""
+      ", \"client_secret\": \"xxx_client_secret_xxx\""
+      ", \"refresh_token\": \"xxx_refresh_token_xxx\"}";
+
+  OAuth2Config config;
+  EXPECT_TRUE(ParseOAuth2Config(kConfigStr, &config));
+  EXPECT_TRUE(config.valid());
+  EXPECT_EQ("https://accounts.google.com/o/oauth2/auth", config.auth_uri);
+  EXPECT_EQ("https://www.googleapis.com/oauth2/v3/token", config.token_uri);
+  EXPECT_EQ("https://www.googleapis.com/auth/userinfo.email", config.scope);
+  EXPECT_EQ("575346572923.apps.googleusercontent.com", config.client_id);
+  EXPECT_EQ("xxx_client_secret_xxx", config.client_secret);
+  EXPECT_EQ("xxx_refresh_token_xxx", config.refresh_token);
+  EXPECT_EQ("", config.type);
+}
+
+TEST(OAuth2Test, ParseOAuth2ConfigForChromeInfraAuth) {
+  // https://chrome-infra-auth.appspot.com/auth/api/v1/server/oauth_config
+  // with secret modification.
+  static const char* kConfigStr =
+      "{\"client_not_so_secret\": \"xxx_client_secret_xxx\""
+      ", \"additional_client_ids\": "
+      "[\"1037249634491-mvrb78t4pov1kcq626e4ipcemtfvv31k.apps."
+      "googleusercontent.com\""
+      ", \"174799409470-4nitjq4rqk8brkdl6nb8l2gagui5inuk.apps."
+      "googleusercontent.com\""
+      ", \"174799409470-8k3b89iov4racu9jrf7if3k4591voig3.apps."
+      "googleusercontent.com\""
+      ", \"174799409470-gbrk5dsauquu72522f8qpg4qo7oim2b5.apps."
+      "googleusercontent.com\""
+      ", \"446450136466-2hr92jrq8e6i4tnsa56b52vacp7t3936.apps."
+      "googleusercontent.com\""
+      ", \"5071639625-1lppvbtck1morgivc6sq4dul7klu27sd.apps."
+      "googleusercontent.com\""
+      ", \"553957813421-p7tl669udlpng6i0uqin89irf9uuuhqa.apps."
+      "googleusercontent.com\""
+      ", \"31977622648-utchjftf485h6q7fih17jdl7pusqabc4.apps."
+      "googleusercontent.com\""
+      ", \"32555940559.apps.googleusercontent.com\"]"
+      ", \"client_id\": \"575346572923.apps.googleusercontent.com\""
+      ", \"primary_url\": null"
+      ", \"type\": \"authorized_user\"}";
+  OAuth2Config config;
+  EXPECT_TRUE(ParseOAuth2Config(kConfigStr, &config));
+  EXPECT_EQ("https://accounts.google.com/o/oauth2/auth", config.auth_uri);
+  EXPECT_EQ("https://www.googleapis.com/oauth2/v3/token", config.token_uri);
+  EXPECT_EQ("https://www.googleapis.com/auth/userinfo.email", config.scope);
+  EXPECT_EQ("575346572923.apps.googleusercontent.com", config.client_id);
+  EXPECT_EQ("xxx_client_secret_xxx", config.client_secret);
+  EXPECT_EQ("", config.refresh_token);
+  EXPECT_EQ("authorized_user", config.type);
+}
+
+TEST(OAuth2Test, ParseOAuth2ConfigError) {
+  static const char* kConfigStr =
+     "{\"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\""
+     ", \"redirect_uri\": \"http://localhost:57003\""
+     ", \"client_id\": \"575346572923.apps.googleusercontent.com\""
+     ", \"scope\": \"https://www.googleapis.com/auth/userinfo.email\""
+     ", \"token_uri\": \"https://www.googleapis.com/oauth2/v3/token\""
+     ", \"client_secret\": \"\""
+     ", \"refresh_token\": \"\"}";
+
+  OAuth2Config config;
+  EXPECT_FALSE(ParseOAuth2Config(kConfigStr, &config));
+  EXPECT_FALSE(config.valid());
+}
+
+TEST(OAuth2Test, FormatOAuth2Config) {
+  OAuth2Config config;
+  config.auth_uri = "https://accounts.google.com/o/oauth2/auth";
+  config.token_uri = "https://www.googleapis.com/oauth2/v3/token";
+  config.scope = "https://www.googleapis.com/auth/userinfo.email";
+  config.client_id = "575346572923.apps.googleusercontent.com";
+  config.client_secret = "xxx_client_secret_xxx";
+  config.refresh_token = "xxx_refresh_token_xxx";
+  config.type = "authorized_user";
+
+  EXPECT_TRUE(config.valid());
+  string config_str = FormatOAuth2Config(config);
+  LOG(INFO) << config_str;
+  OAuth2Config got_config;
+  EXPECT_TRUE(ParseOAuth2Config(config_str, &got_config));
+  EXPECT_TRUE(got_config.valid());
+  EXPECT_EQ(config.auth_uri, got_config.auth_uri);
+  EXPECT_EQ(config.token_uri, got_config.token_uri);
+  EXPECT_EQ(config.scope, got_config.scope);
+  EXPECT_EQ(config.client_id, got_config.client_id);
+  EXPECT_EQ(config.client_secret, got_config.client_secret);
+  EXPECT_EQ(config.refresh_token, got_config.refresh_token);
+  EXPECT_EQ(config.type, got_config.type);
+}
+
+TEST(OAuth2Test, ParseServiceAccountJson) {
+  // This private key is the same as one in jwt_unittest.cc.
+
+  static const char* kServiceAccountJson = "{\n"
+      "  \"type\": \"service_account\",\n"
+      "  \"project_id\": \"google.com:cxx-compiler-service\",\n"
+      "  \"private_key_id\": \"c8c64bdffb032ad014993d4509521cbb4d64c388\",\n"
+      "  \"private_key\": \"-----BEGIN PRIVATE KEY-----\\n"
+      "MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCJ2ljEsJpoZmrZ\\n"
+      "AHTcs5HiFg9PkXUQJF4aK8jVacBl6C2U0YJGwnCCPYQHyju0++eZRWlAqds4Jn5O\\n"
+      "8JclnLs5JFD6Qzlqosqwn4qu8QI7dy4PybjwxRZMQtWm5vY6gHmvID4WEvWjxjL2\\n"
+      "mqVOdThYy2YV/3PsCyjf6Z2XYtAZZJoK94w4OpF30IF1wuEZHllh6VJ4wpRiqpT8\\n"
+      "bHxSiMlH2CTaoKJowrgAoYENj5eSbnPP0dsSftdA3Ckeu5/A4OjhyrOCsjwZag6J\\n"
+      "Ipw5oRRDm9iiRt7dHdtrjEkGsiaFZvqY4sW++8x8MGkPpO+Mc1IvJVjj7khOTHEH\\n"
+      "mWORcjbTAgMBAAECggEAHmP0jeghIkLx60UefklYL++NEI2QsS5TUJG2hNX7hHvb\\n"
+      "EKPfhJn5E71cDhuXbh7av/99ZLQNkCNsVRrVN4WGAOLwtzt6vPeGl8mUWVzokROF\\n"
+      "JBXkn6/TapyRXWotflg0e1cwWM11OdXIBnWxW8qb0XeF2fOnKrKLIFHwXB98oRwn\\n"
+      "G6jg3A3F+//PCvTNk+oTJUyNVIrF6MsLN2/a9CJwfQA4bDShnPlQj8ToXRf1mEqv\\n"
+      "6i6NqgkXZX9q3jqU3/h66shUGR/ltc7aqsocHt1PJN0SCKPqxSJoGaZl/T7fCgVl\\n"
+      "yvGoLrsyfX4WIW0BgICcfyyLwK5h48Gv1nq2kHiiAQKBgQDx6IYXbT4LhmHfJJ9d\\n"
+      "3r6sxvBZ4h+0/HYVK/4rG4dvjSF/vVZvhXwKRbNybdRZoZiDp5QZBSN7TkPE8q97\\n"
+      "8IQ91MggUqGSroVpU/PmGHIdUXMiU9qfq0F+KAXc5lNAunF4vqybWu16U4RFtpRq\\n"
+      "joZKanb0Z0ChivQcI0YVDWNKcQKBgQCR4hbMTo3sHP0J4xKiisCBhkhN5wGo53bE\\n"
+      "YIk1E+XE5u1Dp2gBPzhDilrG3PYphjwi0TvrAeWueJHdRJ2FJpe6BLsnJhJiKHkw\\n"
+      "zVZHZ+Qn8+1WqnRobODzBXceqqHejDoeDfXBfTo94F6ttEu4EOIG6+1rVxOqaSD8\\n"
+      "S52izO6PgwKBgDk4dS9pabm0KcZslT3RCG06CXRZZoKbDRto8pAjzN94FKpwkNeE\\n"
+      "TZjob8/rZsVk0fyiUQeyDXiHRMR7W0MH21/8yvHKWemmWmxVrWWJ9sQ0lfVSvG30\\n"
+      "RmOe9/QOjzbKYzjacV22HmJHCwyqaWTjHaTQlh6tpb4QbjmRpmwoZIohAoGAcos1\\n"
+      "H2ImqVfxjsvOm/WaRZksOI7DjN2BMZwi35wp8zrm3RIa5a+/+7gsoqxoVB5kJWpo\\n"
+      "Q5QPxbhBv5zameu9gn+oe4q3MH9a+OihcBuw13X9yui30i57ShXmfBu6UUWFdIe9\\n"
+      "iRlMm70KWhWQxovrDUg9+OQ8OrelALRWp7eFMQUCgYEA4fz76VwkMrA8XzY326l5\\n"
+      "36qU9oo4AVGN3Xtzh90C3cMYP3IpPTCdfxHvmyte2qC3uYb5EUtB15bX4UXR70bp\\n"
+      "FypWqG6mgZ7Mdoh+PvInHDEuf8JdvwbhXlnhzHnfWi7+HjzWUUpS8Il0QuuIbE6q\\n"
+      "pDh/d+sLfYP3TWpGOQ1yv6k=\\n"
+      "-----END PRIVATE KEY-----\\n\",\n"
+      "  \"client_email\": \"test@"
+      "developer.gserviceaccount.com\", \n"
+      "  \"client_id\": \"test.apps.googleusercontent.com\",\n"
+      "  \"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\n"
+      "  \"token_uri\": \"https://accounts.google.com/o/oauth2/token\",\n"
+      "  \"auth_provider_x509_cert_url\": "
+      "\"https://www.googleapis.com/oauth2/v1/certs\",\n"
+      "  \"client_x509_cert_url\": \"https://www.googleapis.com/"
+      "robot/v1/metadata/x509/test%40developer.gserviceaccount.com\"\n"
+      "}";
+  ServiceAccountConfig saconfig;
+  EXPECT_TRUE(ParseServiceAccountJson(kServiceAccountJson, &saconfig));
+  EXPECT_EQ("google.com:cxx-compiler-service", saconfig.project_id);
+  EXPECT_EQ("c8c64bdffb032ad014993d4509521cbb4d64c388",
+            saconfig.private_key_id);
+
+  EXPECT_EQ(
+      "-----BEGIN PRIVATE KEY-----\n"
+      "MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCJ2ljEsJpoZmrZ\n"
+      "AHTcs5HiFg9PkXUQJF4aK8jVacBl6C2U0YJGwnCCPYQHyju0++eZRWlAqds4Jn5O\n"
+      "8JclnLs5JFD6Qzlqosqwn4qu8QI7dy4PybjwxRZMQtWm5vY6gHmvID4WEvWjxjL2\n"
+      "mqVOdThYy2YV/3PsCyjf6Z2XYtAZZJoK94w4OpF30IF1wuEZHllh6VJ4wpRiqpT8\n"
+      "bHxSiMlH2CTaoKJowrgAoYENj5eSbnPP0dsSftdA3Ckeu5/A4OjhyrOCsjwZag6J\n"
+      "Ipw5oRRDm9iiRt7dHdtrjEkGsiaFZvqY4sW++8x8MGkPpO+Mc1IvJVjj7khOTHEH\n"
+      "mWORcjbTAgMBAAECggEAHmP0jeghIkLx60UefklYL++NEI2QsS5TUJG2hNX7hHvb\n"
+      "EKPfhJn5E71cDhuXbh7av/99ZLQNkCNsVRrVN4WGAOLwtzt6vPeGl8mUWVzokROF\n"
+      "JBXkn6/TapyRXWotflg0e1cwWM11OdXIBnWxW8qb0XeF2fOnKrKLIFHwXB98oRwn\n"
+      "G6jg3A3F+//PCvTNk+oTJUyNVIrF6MsLN2/a9CJwfQA4bDShnPlQj8ToXRf1mEqv\n"
+      "6i6NqgkXZX9q3jqU3/h66shUGR/ltc7aqsocHt1PJN0SCKPqxSJoGaZl/T7fCgVl\n"
+      "yvGoLrsyfX4WIW0BgICcfyyLwK5h48Gv1nq2kHiiAQKBgQDx6IYXbT4LhmHfJJ9d\n"
+      "3r6sxvBZ4h+0/HYVK/4rG4dvjSF/vVZvhXwKRbNybdRZoZiDp5QZBSN7TkPE8q97\n"
+      "8IQ91MggUqGSroVpU/PmGHIdUXMiU9qfq0F+KAXc5lNAunF4vqybWu16U4RFtpRq\n"
+      "joZKanb0Z0ChivQcI0YVDWNKcQKBgQCR4hbMTo3sHP0J4xKiisCBhkhN5wGo53bE\n"
+      "YIk1E+XE5u1Dp2gBPzhDilrG3PYphjwi0TvrAeWueJHdRJ2FJpe6BLsnJhJiKHkw\n"
+      "zVZHZ+Qn8+1WqnRobODzBXceqqHejDoeDfXBfTo94F6ttEu4EOIG6+1rVxOqaSD8\n"
+      "S52izO6PgwKBgDk4dS9pabm0KcZslT3RCG06CXRZZoKbDRto8pAjzN94FKpwkNeE\n"
+      "TZjob8/rZsVk0fyiUQeyDXiHRMR7W0MH21/8yvHKWemmWmxVrWWJ9sQ0lfVSvG30\n"
+      "RmOe9/QOjzbKYzjacV22HmJHCwyqaWTjHaTQlh6tpb4QbjmRpmwoZIohAoGAcos1\n"
+      "H2ImqVfxjsvOm/WaRZksOI7DjN2BMZwi35wp8zrm3RIa5a+/+7gsoqxoVB5kJWpo\n"
+      "Q5QPxbhBv5zameu9gn+oe4q3MH9a+OihcBuw13X9yui30i57ShXmfBu6UUWFdIe9\n"
+      "iRlMm70KWhWQxovrDUg9+OQ8OrelALRWp7eFMQUCgYEA4fz76VwkMrA8XzY326l5\n"
+      "36qU9oo4AVGN3Xtzh90C3cMYP3IpPTCdfxHvmyte2qC3uYb5EUtB15bX4UXR70bp\n"
+      "FypWqG6mgZ7Mdoh+PvInHDEuf8JdvwbhXlnhzHnfWi7+HjzWUUpS8Il0QuuIbE6q\n"
+      "pDh/d+sLfYP3TWpGOQ1yv6k=\n"
+      "-----END PRIVATE KEY-----\n", saconfig.private_key);
+  EXPECT_EQ("test@"
+            "developer.gserviceaccount.com", saconfig.client_email);
+  EXPECT_EQ("test.apps.googleusercontent.com", saconfig.client_id);
+  EXPECT_EQ("https://accounts.google.com/o/oauth2/auth", saconfig.auth_uri);
+  EXPECT_EQ("https://accounts.google.com/o/oauth2/token", saconfig.token_uri);
+
+  EXPECT_EQ("https://www.googleapis.com/oauth2/v1/certs",
+            saconfig.auth_provider_x509_cert_url);
+  EXPECT_EQ("https://www.googleapis.com/robot/v1/metadata/x509/test"
+            "%40developer.gserviceaccount.com",
+            saconfig.client_x509_cert_url);
+}
+
+}  // namespace devtools_goma
diff --git a/client/openssl_engine.cc b/client/openssl_engine.cc
new file mode 100644
index 0000000..e1988e4
--- /dev/null
+++ b/client/openssl_engine.cc
@@ -0,0 +1,1605 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "openssl_engine.h"
+
+#include <openssl/asn1.h>
+#include <openssl/crypto.h>
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+#include <openssl/x509v3.h>
+#include <algorithm>
+#include <cctype>
+#include <iomanip>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_specific.h"
+#include "file.h"
+#include "file_dir.h"
+#include "file_helper.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "http.h"
+#include "ioutil.h"
+#include "mypath.h"
+#include "openssl_engine_helper.h"
+#include "path.h"
+#include "platform_thread.h"
+#include "scoped_fd.h"
+#include "socket_pool.h"
+#include "string_piece_utils.h"
+
+#ifndef OPENSSL_IS_BORINGSSL
+#error "This code is written for BoringSSL"
+#endif
+
+namespace devtools_goma {
+
+namespace {
+
+// Prevent use of SSL on error for this period.
+static const time_t kErrorTimeoutSecs = 60;
+
+// Wait for this period if no more sockets are in the pool.
+static const unsigned int kWaitForThingsGetsBetterInMs = 1000;
+
+#ifndef _WIN32
+pthread_once_t g_openssl_init_once = PTHREAD_ONCE_INIT;
+#else
+INIT_ONCE g_openssl_init_once;
+#endif
+
+class ScopedBIOFree {
+ public:
+  inline void operator()(BIO *x) const { if (x) CHECK(BIO_free(x)); }
+};
+
+class ScopedX509Free {
+ public:
+  inline void operator()(X509 *x) const { if (x) X509_free(x); }
+};
+
+class ScopedX509StoreCtxFree {
+ public:
+  inline void operator()(X509_STORE_CTX *x) const {
+    if (x) X509_STORE_CTX_free(x);
+  }
+};
+
+class ScopedX509StoreFree {
+ public:
+  inline void operator()(X509_STORE *x) const { if (x) X509_STORE_free(x); }
+};
+
+template<typename T>
+string GetHumanReadableInfo(T* data, int (*func)(BIO*, T*)) {
+  std::unique_ptr<BIO, ScopedBIOFree> bio(BIO_new(BIO_s_mem()));
+  func(bio.get(), data);
+  char* x509_for_print;
+  const int x509_for_print_len = BIO_get_mem_data(bio.get(), &x509_for_print);
+  string ret(x509_for_print, x509_for_print_len);
+
+  return ret;
+}
+
+string GetHumanReadableCert(X509* x509) {
+  return GetHumanReadableInfo<X509>(x509, X509_print);
+}
+
+string GetHumanReadableCRL(X509_CRL* x509_crl) {
+  return GetHumanReadableInfo<X509_CRL>(x509_crl, X509_CRL_print);
+}
+
+string GetHumanReadableCerts(STACK_OF(X509)* x509s) {
+  string ret;
+  for (size_t i = 0; i < sk_X509_num(x509s); i++) {
+    ret.append(GetHumanReadableCert(sk_X509_value(x509s, i)));
+  }
+  return ret;
+}
+
+string GetHumanReadableSessionInfo(const SSL_SESSION* s) {
+  std::ostringstream ss;
+  ss << "SSL Session info:";
+  ss << " protocol=" << SSL_SESSION_get_version(s);
+  unsigned int len;
+  const uint8_t* c = SSL_SESSION_get_id(s, &len);
+  std::ostringstream sess_id;
+  for (size_t i = 0; i < len; ++i) {
+    sess_id << std::setfill('0') << std::setw(2)
+            << std::hex << static_cast<int>(c[i]);
+  }
+  ss << " session_id=" << sess_id.str();
+  ss << " time=" << SSL_SESSION_get_time(s);
+  ss << " timeout=" << SSL_SESSION_get_timeout(s);
+  return ss.str();
+}
+
+string GetHumanReadableSSLInfo(const SSL* ssl) {
+  const SSL_CIPHER* cipher = SSL_get_current_cipher(ssl);
+  std::ostringstream ss;
+  ss << "SSL info:";
+  ss << " cipher:"
+     << " name=" << SSL_CIPHER_get_name(cipher)
+     << " bits=" << SSL_CIPHER_get_bits(cipher, nullptr)
+     << " version=" << SSL_CIPHER_get_version(cipher);
+  uint16_t curve_id = SSL_get_curve_id(ssl);
+  if (curve_id != 0) {
+    ss << " curve=" << SSL_get_curve_name(curve_id);
+  }
+  return ss.str();
+}
+
+// A class that controls lifetime of the SSL session.
+class OpenSSLSessionCache {
+ public:
+  static void Init() {
+    InitOpenSSLSessionCache();
+  }
+
+  // Set configs for the SSL session to the SSL context.
+  static void Setup(SSL_CTX* ctx) {
+    if (!cache_)
+      InitOpenSSLSessionCache();
+
+    DCHECK(cache_);
+    SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_CLIENT);
+    SSL_CTX_sess_set_remove_cb(ctx, RemoveSessionCallBack);
+  }
+
+  // Set a session to a SSL structure instance if we have a cache.
+  static bool SetCachedSession(SSL_CTX* ctx, SSL* ssl) {
+    DCHECK(cache_);
+    return cache_->SetCachedSessionInternal(ctx, ssl);
+  }
+
+  static void RecordSession(SSL* ssl) {
+    DCHECK(cache_);
+    DCHECK(ssl);
+    SSL_SESSION* sess = SSL_get1_session(ssl);
+    SSL_CTX* ctx = SSL_get_SSL_CTX(ssl);
+    LOG(INFO) << "Storing SSL session."
+              << " ctx=" << ctx
+              << " session_info=" << GetHumanReadableSessionInfo(sess)
+              << " secure_renegotiation_support="
+              << SSL_get_secure_renegotiation_support(ssl);
+    if (!cache_->RecordSessionInternal(ctx, sess)) {
+      LOG(INFO) << "Tried to store already stored session.";
+      // Since SSL_get1_session increases a reference count of |sess|,
+      // we need to decrease the reference count here.
+      // Note that we do not decrease the reference count if
+      // RecordSessionInternal returned true because we need to keep
+      // the session valid while we have it in our session cache store.
+      SSL_SESSION_free(sess);
+    }
+  }
+
+ private:
+  OpenSSLSessionCache() {}
+  ~OpenSSLSessionCache() {
+    // Destructor deletes all cached sessions.
+    for (auto& it : session_map_) {
+      SSL_SESSION_free(it.second);
+    }
+    session_map_.clear();
+  }
+
+  static void InitOpenSSLSessionCache() {
+    cache_ = new OpenSSLSessionCache();
+    atexit(FinalizeOpenSSLSessionCache);
+  }
+
+  static void FinalizeOpenSSLSessionCache() {
+    if (cache_)
+      delete cache_;
+    cache_ = nullptr;
+  }
+
+  static void RemoveSessionCallBack(SSL_CTX* ctx, SSL_SESSION* sess) {
+    DCHECK(cache_);
+
+    LOG(INFO) << "Released stored SSL session."
+              << " session_info=" << GetHumanReadableSessionInfo(sess);
+    cache_->RemoveSessionInternal(ctx);
+  }
+
+  // To avoid race condition, you SHOULD call SSL_set_session while
+  // |mu_| is held.  Or, you may cause use-after-free.
+  //
+  // The SSL_SESSION instance life time is controlled by reference counting.
+  // SSL_set_session increase the reference count, and SSL_SESSION_free
+  // or SSL_free SSL instance that has the session decrease the reference
+  // count.  When session is revoked, SSL_SESSION instance is free'd via
+  // RemoveSession.  At the same time, RemoveSession removes the instance
+  // from internal session_map_.
+  // If you do SSL_set_session outside of |mu_| lock, you may use the
+  // SSL_SESSION instance already free'd.
+  // Note that increasing reference count and decreasing reference count
+  // are done under a lock held by BoringSSL, we do not need to lock for them.
+  // That is why we use ReadWriteLock.
+  // TODO: use mutex lock if it is much faster than shared lock.
+  bool SetCachedSessionInternal(SSL_CTX* ctx, SSL* ssl) {
+    AUTO_SHARED_LOCK(lock, &mu_);
+    SSL_SESSION* sess = GetInternalUnlocked(ctx);
+    if (sess == nullptr)
+      return false;
+
+    VLOG(3) << "Reused session."
+            << " ctx=" << ctx
+            << " session_info=" << GetHumanReadableSessionInfo(sess);
+    SSL_set_session(ssl, sess);
+    return true;
+  }
+
+  // Returns true if the session is added.
+  bool RecordSessionInternal(SSL_CTX* ctx, SSL_SESSION* session) {
+    AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+    if (GetInternalUnlocked(ctx) != nullptr)
+      return false;
+
+    CHECK(session_map_.insert(std::make_pair(ctx, session)).second);
+    return true;
+  }
+
+  // Returns true if the session is removed.
+  bool RemoveSessionInternal(SSL_CTX* ctx) {
+    AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+    unordered_map<SSL_CTX*, SSL_SESSION*>::iterator found =
+        session_map_.find(ctx);
+    if (found == session_map_.end()) {
+      return false;
+    }
+
+    // Decrement reference count to revoke the session when nobody use it.
+    // See: https://www.openssl.org/docs/ssl/SSL_SESSION_free.html
+    SSL_SESSION_free(found->second);
+    session_map_.erase(found);
+    return true;
+  }
+
+  SSL_SESSION* GetInternalUnlocked(SSL_CTX* ctx) {
+    unordered_map<SSL_CTX*, SSL_SESSION*>::iterator found =
+        session_map_.find(ctx);
+    if (found != session_map_.end()) {
+      return found->second;
+    }
+    return nullptr;
+  }
+
+  ReadWriteLock mu_;
+  // Won't take ownership of SSL_CTX*.
+  // Ownership of SSL_SESSION* is kept by the OpenSSL library, but
+  // we decrement a reference count to notify it an obsolete session.
+  unordered_map<SSL_CTX*, SSL_SESSION*> session_map_;
+
+  static OpenSSLSessionCache* cache_;
+
+  DISALLOW_COPY_AND_ASSIGN(OpenSSLSessionCache);
+};
+
+/* static */
+OpenSSLSessionCache* OpenSSLSessionCache::cache_ = nullptr;
+
+// A class that controls socket_pool used in OpenSSL engine.
+class OpenSSLSocketPoolCache {
+ public:
+  static void Init() {
+    if (!cache_) {
+      cache_ = new OpenSSLSocketPoolCache;
+      atexit(FinalizeOpenSSLSocketPoolCache);
+    }
+  }
+
+  static SocketPool* GetSocketPool(const string& host, int port) {
+    DCHECK(cache_);
+    return cache_->GetSocketPoolInternal(host, port);
+  }
+
+ private:
+  OpenSSLSocketPoolCache() {}
+  ~OpenSSLSocketPoolCache() {
+    for (const auto& iter : socket_pools_) {
+      delete iter.second;
+    }
+  }
+
+  static void FinalizeOpenSSLSocketPoolCache() {
+    delete cache_;
+    cache_ = nullptr;
+  }
+
+  SocketPool* GetSocketPoolInternal(const string& host, int port) {
+    std::ostringstream ss;
+    ss << host << ":" << port;
+    const string key = ss.str();
+
+    AUTOLOCK(lock, &socket_pool_mu_);
+    SocketPool* socket_pool = nullptr;
+    std::pair<unordered_map<string, SocketPool*>::iterator, bool> p =
+        socket_pools_.insert(std::make_pair(key, socket_pool));
+    if (p.second) {
+      p.first->second = new SocketPool(host, port);
+    }
+    return p.first->second;
+  }
+
+  Lock socket_pool_mu_;
+  unordered_map<string, SocketPool*> socket_pools_;
+
+  static OpenSSLSocketPoolCache* cache_;
+  DISALLOW_COPY_AND_ASSIGN(OpenSSLSocketPoolCache);
+};
+
+/* static */
+OpenSSLSocketPoolCache* OpenSSLSocketPoolCache::cache_ = nullptr;
+
+class OpenSSLCertificateStore {
+ public:
+  static void Init() {
+    if (!store_) {
+      store_ = new OpenSSLCertificateStore;
+      store_->InitInternal();
+      atexit(FinalizeOpenSSLCertificateStore);
+    }
+  }
+
+  static bool AddCertificateFromFile(const string& filename) {
+    DCHECK(store_);
+    if (store_->IsKnownCertfileInternal(filename)) {
+      LOG(INFO) << "Known cerficiate:" << filename;
+      return false;
+    }
+
+    string user_cert;
+    if (!ReadFileToString(filename.c_str(), &user_cert)) {
+      LOG(ERROR) << "Failed to read:" << filename;
+      return false;
+    }
+    return store_->AddCertificateFromStringInternal(filename, user_cert);
+  }
+
+  static bool AddCertificateFromString(
+      const string& source, const string& cert) {
+    DCHECK(store_);
+    return store_->AddCertificateFromStringInternal(source, cert);
+  }
+
+  static void SetCertsToCTX(SSL_CTX* ctx) {
+    DCHECK(store_);
+    store_->SetCertsToCTXInternal(ctx);
+  }
+
+  static bool IsReady() {
+    DCHECK(store_);
+    return store_->IsReadyInternal();
+  }
+
+  static string GetTrustedCertificates() {
+    DCHECK(store_);
+    return store_->GetTrustedCertificatesInternal();
+  }
+
+ private:
+  OpenSSLCertificateStore() {}
+  ~OpenSSLCertificateStore() {}
+
+  static void FinalizeOpenSSLCertificateStore() {
+    delete store_;
+    store_ = nullptr;
+  }
+
+  void InitInternal() {
+    string root_certs;
+    CHECK(GetTrustedRootCerts(&root_certs))
+        << "Failed to read trusted root certificates from the system.";
+    AddCertificateFromStringInternal("system", root_certs);
+    LOG(INFO) << "Loaded root certificates.";
+  }
+
+  bool IsReadyInternal() const {
+    AUTO_SHARED_LOCK(lock, &mu_);
+    return certs_.size() != 0;
+  }
+
+  // Note: you must not return the value via const reference.
+  // trusted_certificates_ is a member of the class, which is protected
+  // by the mutex (mu_).  It could be updated after return of the function
+  // by another thread.
+  string GetTrustedCertificatesInternal() const {
+    AUTO_SHARED_LOCK(lock, &mu_);
+    return trusted_certificates_;
+  }
+
+  void SetCertsToCTXInternal(SSL_CTX* ctx) const {
+    AUTO_SHARED_LOCK(lock, &mu_);
+    for (const auto& it : certs_) {
+      LOG(INFO) << "setting certs from: " << it.first
+                << " size=" << it.second->size();
+      for (const auto& x509 : *it.second) {
+        X509_STORE_add_cert(SSL_CTX_get_cert_store(ctx), x509.get());
+      }
+    }
+  }
+
+  bool IsKnownCertfileInternal(const string& filename) const {
+    AUTO_SHARED_LOCK(lock, &mu_);
+    return certs_.find(filename) != certs_.end();
+  }
+
+  bool AddCertificateFromStringInternal(const string& source,
+                                        const string& cert) {
+    // Create BIO instance to be used by PEM_read_bio_X509_AUX.
+    std::unique_ptr<BIO, ScopedBIOFree> bio(
+        BIO_new_mem_buf(cert.data(), cert.size()));
+
+    AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+    auto it = certs_.insert(std::make_pair(source, nullptr));
+    if (!it.second) {
+      LOG(WARNING) << "cert store already has certificate for "
+                   << source;
+      return false;
+    }
+    it.first->second.reset(
+        new std::vector<std::unique_ptr<X509, ScopedX509Free>>());
+    for (;;) {
+      std::unique_ptr<X509, ScopedX509Free> x509(
+          PEM_read_bio_X509_AUX(bio.get(), nullptr, nullptr, nullptr));
+      if (x509.get() == nullptr)
+        break;
+
+      const string readable_cert = GetHumanReadableCert(x509.get());
+      LOG(INFO) << "Certificate loaded from " << source << ": "
+                << readable_cert;
+      trusted_certificates_.append(readable_cert);
+      it.first->second->emplace_back(std::move(x509));
+    }
+    if (ERR_GET_REASON(ERR_peek_last_error()) == PEM_R_NO_START_LINE)
+      ERR_clear_error();
+    else
+      LOG(ERROR) << "Unexpected error occured during reading SSL certificate."
+                 << " source:" << source;
+    // TODO: log error with source info when no certificate found.
+    LOG_IF(ERROR, it.first->second->size() == 0)
+        << "No certificate found in " << source;
+    return it.first->second->size() > 0;
+  }
+
+  ReadWriteLock mu_;
+  std::map<string,
+           std::unique_ptr<
+               std::vector<std::unique_ptr<X509, ScopedX509Free>>>> certs_;
+
+  string trusted_certificates_;
+
+  static OpenSSLCertificateStore* store_;
+  DISALLOW_COPY_AND_ASSIGN(OpenSSLCertificateStore);
+};
+
+/* static */
+OpenSSLCertificateStore* OpenSSLCertificateStore::store_ = nullptr;
+
+class OpenSSLCRLCache {
+ public:
+  static void Init() {
+    if (!cache_) {
+      cache_ = new OpenSSLCRLCache;
+      atexit(FinalizeOpenSSLCRLCache);
+    }
+  }
+
+  // Caller owns returned X509_CRL*.
+  // It is caller's responsibility to free it with X509_CRL_free.
+  static ScopedX509CRL LookupCRL(const string& url) {
+    DCHECK(cache_);
+    return cache_->LookupCRLInternal(url);
+  }
+
+  // Returns true if url exists in internal database and successfully removed.
+  // Otherwise, e.g. not registered, returns false.
+  static bool DeleteCRL(const string& url) {
+    DCHECK(cache_);
+    return cache_->DeleteCRLInternal(url);
+  }
+
+  // Won't take ownership of |crl|.  This function duplicates it internally.
+  static void SetCRL(const string& url, X509_CRL* crl) {
+    DCHECK(cache_);
+    return cache_->SetCRLInternal(url, crl);
+  }
+
+ private:
+  OpenSSLCRLCache() {}
+  ~OpenSSLCRLCache() {
+    crls_.clear();
+  }
+  static void FinalizeOpenSSLCRLCache() {
+    delete cache_;
+    cache_ = nullptr;
+  }
+
+  // Note: caller should free X509_CRL.
+  ScopedX509CRL LookupCRLInternal(const string& url) {
+    AUTO_SHARED_LOCK(lock, &mu_);
+    const auto& it = crls_.find(url);
+    if (it == crls_.end())
+      return nullptr;
+    return ScopedX509CRL(X509_CRL_dup(it->second.get()));
+  }
+
+  bool DeleteCRLInternal(const string& url) {
+    AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+    const auto& it = crls_.find(url);
+    if (it == crls_.end())
+      return false;
+    crls_.erase(it);
+    return true;
+  }
+
+  void SetCRLInternal(const string& url, X509_CRL* crl) {
+    AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+    if (crls_.count(url) > 0) {
+      DeleteCRLInternal(url);
+    }
+    CHECK(crls_.insert(
+            std::make_pair(url, ScopedX509CRL(X509_CRL_dup(crl)))).second)
+        << "We already have the same URL in CRL store."
+        << " url=" << url;
+  }
+
+  ReadWriteLock mu_;
+  std::map<string, ScopedX509CRL> crls_;
+
+  static OpenSSLCRLCache* cache_;
+  DISALLOW_COPY_AND_ASSIGN(OpenSSLCRLCache);
+};
+
+/* static */
+OpenSSLCRLCache* OpenSSLCRLCache::cache_ = nullptr;
+
+// Goma client also uses BoringSSL.
+// Let's follow chromium's net/socket/ssl_client_socket_impl.cc.
+// It uses BoringSSL default but avoid to select CBC ciphers.
+const char* kCipherList = "ALL:!SHA256:!SHA384:!aPSK:!ECDSA+SHA1";
+const int kCrlIoTimeout = 1000;  // milliseconds.
+const size_t kMaxDownloadCrlRetry = 5;  // times.
+
+void InitOpenSSL() {
+  CRYPTO_library_init();
+  OpenSSLSessionCache::Init();
+  OpenSSLSocketPoolCache::Init();
+  OpenSSLCertificateStore::Init();
+  OpenSSLCRLCache::Init();
+  LOG(INFO) << "OpenSSL is initialized.";
+}
+
+#ifdef _WIN32
+BOOL WINAPI InitOpenSSLWin(PINIT_ONCE, PVOID, PVOID*) {
+  InitOpenSSL();
+  return TRUE;
+}
+#endif
+
+int NormalizeChar(int input) {
+  if (!isalnum(input)) {
+    return '_';
+  }
+  return input;
+}
+
+// Converts non-alphanum in a filename to '_'.
+string NormalizeToUseFilename(const string& input) {
+  string out(input);
+  std::transform(out.begin(), out.end(), out.begin(), NormalizeChar);
+  return out;
+}
+
+ScopedX509CRL ParseCrl(const string& crl_str) {
+  // See: http://www.openssl.org/docs/apps/crl.html
+  if (crl_str.find("-----BEGIN X509 CRL-----") != string::npos) {  // PEM
+    std::unique_ptr<BIO, ScopedBIOFree> bio(
+        BIO_new_mem_buf(crl_str.data(), crl_str.size()));
+    return ScopedX509CRL(
+        PEM_read_bio_X509_CRL(bio.get(), nullptr, nullptr, nullptr));
+  } else {  // DER
+    const unsigned char* p =
+        reinterpret_cast<const unsigned char*>(crl_str.data());
+    return ScopedX509CRL(d2i_X509_CRL(nullptr, &p, crl_str.size()));
+  }
+}
+
+string GetSubjectCommonName(X509* x509) {
+  static const size_t kMaxHostname = 1024;
+
+  X509_NAME* subject = X509_get_subject_name(x509);
+  char buf[kMaxHostname];
+  if (X509_NAME_get_text_by_NID(subject, NID_commonName, buf, sizeof(buf))
+      != -1) {
+    return buf;
+  }
+  return "";
+}
+
+std::vector<string> GetAltDNSNames(X509* x509) {
+  int index = X509_get_ext_by_NID(x509, NID_subject_alt_name, -1);
+  if (index < 0) {
+    LOG(INFO) << "cert has no subject alt name";
+    return std::vector<string>();
+  }
+  X509_EXTENSION* subject_alt_name_extension = X509_get_ext(x509, index);
+  if (!subject_alt_name_extension) {
+    LOG(INFO) << "cert has no subject alt name extension";
+    return std::vector<string>();
+  }
+
+  GENERAL_NAMES* subject_alt_names = reinterpret_cast<GENERAL_NAMES*>(
+      X509V3_EXT_d2i(subject_alt_name_extension));
+  if (!subject_alt_names) {
+    LOG(INFO) << "unable to get subject alt name extension";
+    return std::vector<string>();
+  }
+  VLOG(1) << "subject alt names=" << sk_GENERAL_NAME_num(subject_alt_names);
+
+  std::vector<string> names;
+  for (size_t i = 0; i < sk_GENERAL_NAME_num(subject_alt_names); ++i) {
+    GENERAL_NAME* subject_alt_name =
+        sk_GENERAL_NAME_value(subject_alt_names, i);
+    switch (subject_alt_name->type) {
+      case GEN_DNS:
+        {
+          unsigned char* dns_name =
+              ASN1_STRING_data(subject_alt_name->d.dNSName);
+          if (!dns_name)
+            continue;
+          int len = ASN1_STRING_length(subject_alt_name->d.dNSName);
+          string name = string(reinterpret_cast<char*>(dns_name), len);
+          VLOG(1) << "subject alt name[" << i << "]=" << name;
+          names.push_back(name);
+        }
+        break;
+
+      case GEN_IPADD:
+        VLOG(1) << "ignore ip address";
+        break;
+
+      default:
+        LOG(INFO) << "unsupported alt name type:" << subject_alt_name->type;
+        break;
+    }
+  }
+  sk_GENERAL_NAME_pop_free(subject_alt_names, GENERAL_NAME_free);
+  return names;
+}
+
+bool MatchAltIPAddress(X509* x509, int af, void* ap) {
+  int index = X509_get_ext_by_NID(x509, NID_subject_alt_name, -1);
+  if (index < 0) {
+    LOG(INFO) << "cert has no subject alt name";
+    return false;
+  }
+  X509_EXTENSION* subject_alt_name_extension = X509_get_ext(x509, index);
+  if (!subject_alt_name_extension) {
+    LOG(INFO) << "cert has no subject alt name extension";
+    return false;
+  }
+
+  GENERAL_NAMES* subject_alt_names = reinterpret_cast<GENERAL_NAMES*>(
+      X509V3_EXT_d2i(subject_alt_name_extension));
+  if (!subject_alt_names) {
+    LOG(INFO) << "unable to get subject alt name extension";
+    return false;
+  }
+  VLOG(1) << "subject alt names=" << sk_GENERAL_NAME_num(subject_alt_names);
+
+  bool matched = false;
+  for (size_t i = 0; i < sk_GENERAL_NAME_num(subject_alt_names); ++i) {
+    GENERAL_NAME* subject_alt_name =
+        sk_GENERAL_NAME_value(subject_alt_names, i);
+    switch (subject_alt_name->type) {
+      case GEN_DNS:
+        VLOG(1) << "ignore dns name";
+        break;
+
+      case GEN_IPADD:
+        {
+          // ASN1_OCTET_STRING *iPAddress;
+          unsigned char* ipaddr =
+              ASN1_STRING_data(subject_alt_name->d.iPAddress);
+          if (!ipaddr)
+            continue;
+          int len = ASN1_STRING_length(subject_alt_name->d.iPAddress);
+          switch (len) {
+            case 4:
+              if (af == AF_INET) {
+                if (memcmp(ipaddr, ap, len) == 0) {
+                  matched = true;
+                }
+              }
+              break;
+            case 16:
+              if (af == AF_INET6) {
+                if (memcmp(ipaddr, ap, len) == 0) {
+                  matched = true;
+                }
+              }
+              break;
+            default:
+              LOG(WARNING) << "invalid IP address: length=" << len;
+          }
+        }
+        break;
+
+      default:
+        LOG(INFO) << "unsupported alt name type:" << subject_alt_name->type;
+        break;
+    }
+    if (matched) {
+      break;
+    }
+  }
+  sk_GENERAL_NAME_pop_free(subject_alt_names, GENERAL_NAME_free);
+  return matched;
+}
+
+
+// URL should be http (not https).
+void DownloadCrl(
+    ScopedSocket* sock,
+    const HttpRequest& req,
+    HttpResponse* resp) {
+  resp->Reset();
+
+  // Send request.
+  if (!sock->valid()) {
+    LOG(ERROR) << "connection failure:" << *sock;
+    return;
+  }
+
+  const string& request = req.CreateMessage();
+  if (sock->WriteString(request, kCrlIoTimeout) != OK) {
+    LOG(ERROR) << "write failure:"
+               << " fd=" << *sock;
+    return;
+  }
+
+  for (;;) {
+    char* buf;
+    int buf_size;
+    resp->Buffer(&buf, &buf_size);
+    ssize_t len = sock->ReadWithTimeout(buf, buf_size, kCrlIoTimeout);
+    if (len < 0) {
+      LOG(ERROR) << "read failure:"
+                 << " fd=" << *sock
+                 << " len=" << len
+                 << " resp has_header=" << resp->HasHeader()
+                 << " resp status_code=" << resp->status_code()
+                 << " resp len=" << resp->len();
+      return;
+    }
+    if (resp->Recv(len)) {
+      resp->Parse();
+      return;
+    }
+  }
+  // UNREACHABLE.
+}
+
+string GetCrlUrl(X509* x509) {
+  int loc = X509_get_ext_by_NID(x509, NID_crl_distribution_points, -1);
+  if (loc < 0)
+    return "";
+  X509_EXTENSION* ext = X509_get_ext(x509, loc);
+  ASN1_OCTET_STRING* asn1_os = X509_EXTENSION_get_data(ext);
+  const unsigned char* data = ASN1_STRING_data(asn1_os);
+  const long data_len = ASN1_STRING_length(asn1_os);
+  STACK_OF(DIST_POINT)* dps = d2i_CRL_DIST_POINTS(nullptr, &data, data_len);
+  if (dps == nullptr) {
+    LOG(ERROR) << "could not find distpoints in CRL.";
+    return "";
+  }
+  string url;
+  for (size_t i = 0; i < sk_DIST_POINT_num(dps) && url.empty(); i++) {
+    DIST_POINT* dp = sk_DIST_POINT_value(dps, i);
+    if (dp->distpoint && dp->distpoint->type == 0) {
+      STACK_OF(GENERAL_NAME)* general_names = dp->distpoint->name.fullname;
+      for (size_t j = 0; j < sk_GENERAL_NAME_num(general_names) && url.empty();
+           j++) {
+        GENERAL_NAME* general_name = sk_GENERAL_NAME_value(general_names, j);
+        if (general_name->type == GEN_URI) {
+          url.assign(reinterpret_cast<const char*>(general_name->d.ia5->data));
+          if (url.find("http://") != 0) {
+            LOG(INFO) << "Unsupported distribution point URI:" << url;
+            url.clear();
+            continue;
+          }
+        } else {
+          LOG(INFO) << "Unsupported distribution point type:"
+                    << general_name->type;
+        }
+      }
+    }
+  }
+  sk_DIST_POINT_pop_free(dps, DIST_POINT_free);
+  return url;
+}
+
+bool VerifyCrl(X509_CRL* crl, X509_STORE_CTX* store_ctx) {
+  bool ok = true;
+  STACK_OF(X509)* x509s = X509_STORE_get1_certs(store_ctx,
+                                                X509_CRL_get_issuer(crl));
+  for (size_t j = 0; j < sk_X509_num(x509s); j++) {
+    EVP_PKEY *pkey;
+    pkey = X509_get_pubkey(sk_X509_value(x509s, j));
+    if (!X509_CRL_verify(crl, pkey)) {
+      ok = false;
+      break;
+    }
+  }
+  sk_X509_pop_free(x509s, X509_free);
+  return ok;
+}
+
+bool IsCrlExpired(const string& label, X509_CRL* crl,
+                  int crl_max_valid_duration) {
+  // Is the CRL expired?
+  if (!X509_CRL_get_nextUpdate(crl) ||
+      X509_cmp_current_time(X509_CRL_get_nextUpdate(crl)) <= 0) {
+    LOG(INFO) << "CRL is expired: label=" << label
+              << " info=" << GetHumanReadableCRL(crl);
+    return true;
+  }
+
+  // Does the CRL hit max valid duration set by the user?
+  if (crl_max_valid_duration >= 0) {
+    ASN1_TIME* crl_last_update = X509_CRL_get_lastUpdate(crl);
+    time_t t = time(nullptr) - crl_max_valid_duration;
+    if (X509_cmp_time(crl_last_update, &t) < 0) {
+      LOG(INFO) << "CRL is too old to use.  We need to refresh: "
+                << " label=" << label
+                << " crl_max_valid_duration_=" << crl_max_valid_duration
+                << " info=" << GetHumanReadableCRL(crl);
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // anonymous namespace
+
+//
+// OpenSSLContext
+//
+void OpenSSLContext::Init(
+    const string& hostname,
+    int crl_max_valid_duration,
+    OneshotClosure* invalidate_closure) {
+  AUTOLOCK(lock, &mu_);
+  // To keep room to support higher version, let's allow to understand all
+  // TLS protocols here, and limit min supported version below.
+  // Note: if TLSv1_method is used, it won't understand TLS 1.1 or TLS 1.2.
+  // See: http://www.openssl.org/docs/ssl/SSL_CTX_new.html
+  ctx_ = SSL_CTX_new(TLS_method());
+  CHECK(ctx_);
+
+  // Disable legacy protocols.
+  SSL_CTX_set_min_proto_version(ctx_, TLS1_VERSION);
+
+  OpenSSLSessionCache::Setup(ctx_);
+
+  SSL_CTX_set_verify(ctx_, SSL_VERIFY_PEER, nullptr);
+  CHECK(SSL_CTX_set_cipher_list(ctx_, kCipherList));
+  // TODO: write more config to ctx_.
+
+  OpenSSLCertificateStore::SetCertsToCTX(ctx_);
+  certs_info_ = OpenSSLCertificateStore::GetTrustedCertificates();
+  hostname_ = hostname;
+  crl_max_valid_duration_ = crl_max_valid_duration;
+  notify_invalidate_closure_ = invalidate_closure;
+}
+
+OpenSSLContext::OpenSSLContext()
+    : is_crl_ready_(false), last_error_time_(0), ref_cnt_(0) {
+}
+
+OpenSSLContext::~OpenSSLContext() {
+  CHECK_EQ(ref_cnt_, 0UL);
+  // The remove callback is called by SSL_CTX_free.
+  // See: http://www.openssl.org/docs/ssl/SSL_CTX_sess_set_get_cb.html
+  SSL_CTX_free(ctx_);
+
+  // In case it's not called.
+  if (notify_invalidate_closure_ != nullptr) {
+    delete notify_invalidate_closure_;
+  }
+}
+
+ScopedX509CRL OpenSSLContext::GetX509CrlsFromUrl(
+    const string& url, string* crl_str) {
+  LOG(INFO) << "DownloadCrl:" << url;
+
+  HttpClient::Options options;
+  if (!proxy_host_.empty()) {
+    options.proxy_host_name = proxy_host_;
+    options.proxy_port = proxy_port_;
+  }
+  options.InitFromURL(url);
+
+  HttpRequest req;
+  req.Init("GET", "", options);
+  req.AddHeader("Connection", "close");
+  HttpResponse resp;
+  resp.SetRequestPath(url);
+  resp.SetTraceId("downloadCrl");
+
+  SocketPool* socket_pool(OpenSSLSocketPoolCache::GetSocketPool(
+      options.SocketHost(), options.SocketPort()));
+  if (socket_pool == nullptr) {
+    LOG(ERROR) << "Socket Pool is nullptr:"
+               << " host=" << options.SocketHost()
+               << " port=" << options.SocketPort();
+    return nullptr;
+  }
+
+  for (size_t retry = 0;
+       retry < std::max(kMaxDownloadCrlRetry, socket_pool->NumAddresses());
+       ++retry) {
+    ScopedSocket sock(socket_pool->NewSocket());
+    if (!sock.valid()) {
+      // We might have used up all candidate addresses in the pool.
+      // It might be better to wait a while.
+      LOG(WARNING) << "It seems to fail to connect to all available addresses."
+                   << " Going to wait for a while."
+                   << " kWaitForThingsGetsBetterInMs="
+                   << kWaitForThingsGetsBetterInMs;
+      PlatformThread::Sleep(kWaitForThingsGetsBetterInMs);
+      continue;
+    }
+    DownloadCrl(&sock, req, &resp);
+    if (resp.status_code() != 200) {
+      LOG(WARNING) << "download CRL retrying:"
+                   << " retry=" << retry
+                   << " url=" << url
+                   << " http=" << resp.status_code();
+      socket_pool->CloseSocket(std::move(sock), true);
+      continue;
+    }
+    crl_str->assign(string(resp.Body()));
+    ScopedX509CRL x509_crl(ParseCrl(*crl_str));
+    if (x509_crl == nullptr) {
+      LOG(WARNING) << "failed to parse CRL data:"
+                   << " url=" << url
+                   << " contents length=" << crl_str->length()
+                   << " resp header=" << resp.Header();
+      socket_pool->CloseSocket(std::move(sock), true);
+      continue;
+    }
+    // we requested "Connection: close", so close the socket, but no error.
+    socket_pool->CloseSocket(std::move(sock), false);
+    return x509_crl;
+  }
+
+  LOG(ERROR) << "failed to download CRL from " << url;
+  return nullptr;
+}
+
+bool OpenSSLContext::SetupCrlsUnlocked(STACK_OF(X509)* x509s) {
+  CHECK(!is_crl_ready_);
+  crls_.clear();
+  std::unique_ptr<X509_STORE, ScopedX509StoreFree> store(X509_STORE_new());
+  std::unique_ptr<X509_STORE_CTX, ScopedX509StoreCtxFree>
+      store_ctx(X509_STORE_CTX_new());
+  X509_STORE_CTX_init(store_ctx.get(), store.get(), nullptr, x509s);
+  const int num_x509s = sk_X509_num(x509s);
+  for (int i = 0; i < num_x509s; i++) {
+    X509* x509 = sk_X509_value(x509s, i);
+    string url = GetCrlUrl(x509);
+    if (url.empty())
+      continue;
+    ScopedX509CRL crl;
+    string crl_str;
+
+    // CRL is loaded in following steps:
+    // 1. try memory cache.
+    // 2. try disk cache.
+    // 3. download from URL.
+
+    // Read from memory cache.
+    bool is_mem_cache_used = false;
+    crl = OpenSSLCRLCache::LookupCRL(url);
+    if (crl) {
+      if (IsCrlExpired("memory", crl.get(), crl_max_valid_duration_)) {
+        OpenSSLCRLCache::DeleteCRL(url);
+        crl.reset();
+      }
+      // Is the CRL valid?
+      if (crl.get() && !VerifyCrl(crl.get(), store_ctx.get())) {
+        LOG(WARNING) << "Failed to verify memory cached CRL."
+                     << " url=" << url;
+        OpenSSLCRLCache::DeleteCRL(url);
+        crl.reset();
+      }
+
+      is_mem_cache_used = (crl.get() != nullptr);
+    }
+
+    // Read from disk cache.
+    const string& cache_file =
+        file::JoinPath(GetCacheDirectory(),
+                       "CRL-" + NormalizeToUseFilename(url));
+    bool is_disk_cache_used = false;
+    if (!is_mem_cache_used && ReadFileToString(cache_file.c_str(), &crl_str)) {
+      crl = ParseCrl(crl_str);
+      if (crl &&
+          IsCrlExpired(cache_file, crl.get(), crl_max_valid_duration_)) {
+        remove(cache_file.c_str());
+        crl.reset();
+      }
+
+      // Is the CRL valid?
+      if (crl.get() && !VerifyCrl(crl.get(), store_ctx.get())) {
+        LOG(WARNING) << "Failed to verify disk cached CRL: " << cache_file;
+        remove(cache_file.c_str());
+        crl.reset();
+      }
+
+      is_disk_cache_used = (crl.get() != nullptr);
+    }
+
+    // Download from URL.
+    if (!is_mem_cache_used && !is_disk_cache_used) {
+      crl = GetX509CrlsFromUrl(url, &crl_str);
+      if (crl &&
+          IsCrlExpired(url, crl.get(), crl_max_valid_duration_)) {
+        crl.reset();
+      }
+
+      // Is the CRL valid?
+      if (crl.get() && !VerifyCrl(crl.get(), store_ctx.get())) {
+        LOG(WARNING) << "Failed to verify CRL: " << url;
+        crl.reset();
+      }
+    }
+
+    // Without CRL, TLS is not safe.
+    if (!crl.get()) {
+      std::ostringstream ss;
+      ss << "CRL is not available";
+      last_error_ = ss.str();
+      last_error_time_ = time(nullptr);
+      ss << ":" << GetHumanReadableCert(x509);
+      // This error may occurs if the network is broken, unstable,
+      // or untrustable.
+      // We believe that not running compiler_proxy is better than hiding
+      // the strange situation. However, at the same time, sudden death is
+      // usually difficult for users to understand what is bad.
+      // Decision: die at start-time, won't die after that, but it seems
+      // too late to die here.
+      LOG(ERROR) << ss.str();
+      return false;
+    }
+
+    X509_STORE_add_crl(SSL_CTX_get_cert_store(ctx_), crl.get());
+    certs_info_.append(GetHumanReadableCRL(crl.get()));
+    if (!is_mem_cache_used && !is_disk_cache_used) {
+      LOG(INFO) << "CRL loaded from: " << url;
+      const string& cache_dir = string(file::Dirname(cache_file));
+      if (!EnsureDirectory(cache_dir, 0700)) {
+        LOG(WARNING) << "Failed to create cache dir: " << cache_dir;
+      }
+      if (WriteStringToFile(crl_str, cache_file)) {
+        LOG(INFO) << "CRL is cached to: " << cache_file;
+      } else {
+        LOG(WARNING) << "Failed to write CRL cache to: " << cache_file;
+      }
+    }
+    if (is_disk_cache_used) {
+      LOG(INFO) << "Read CRL from cache:"
+                << " url=" << url
+                << " cache_file=" << cache_file;
+    }
+    if (is_mem_cache_used) {
+      LOG(INFO) << "loaded CRL in memory: " << url;
+    } else {
+      OpenSSLCRLCache::SetCRL(url, crl.get());
+      // If loaded from memory, we can assume we have already shown CRL info
+      // to log, and we do not show it again.
+      LOG(INFO) << GetHumanReadableCRL(crl.get());
+    }
+    crls_.emplace_back(std::move(crl));
+  }
+
+  LOG_IF(WARNING, crls_.empty())
+      << "A certificate should usually have its CRL."
+      << " If we cannot not load any CRLs, something should be broken."
+      << " certificates=" << GetHumanReadableCerts(x509s);
+
+  if (!crls_.empty()) {
+    VLOG(1) << "CRL is loaded.  We will check it during verification.";
+    X509_VERIFY_PARAM *verify_param = X509_VERIFY_PARAM_new();
+    X509_VERIFY_PARAM_set_flags(verify_param, X509_V_FLAG_CRL_CHECK);
+    SSL_CTX_set1_param(ctx_, verify_param);
+    X509_VERIFY_PARAM_free(verify_param);
+    LOG(INFO) << "We may reject if the domain is not listed in loaded CRLs.";
+  }
+
+  is_crl_ready_ = true;
+  return true;
+}
+
+bool OpenSSLContext::IsRevoked(STACK_OF(X509)* x509s) {
+  AUTOLOCK(lock, &mu_);
+  time_t now = time(nullptr);
+  if (!last_error_.empty() && now < last_error_time_ + kErrorTimeoutSecs) {
+    LOG(ERROR) << "Preventing using SSL because of:" << last_error_
+               << " last_error_time_=" << last_error_time_;
+    return true;
+  }
+  if (!is_crl_ready_ && !SetupCrlsUnlocked(x509s)) {
+    LOG(ERROR) << "Failed to load CRLs:"
+               << GetHumanReadableCerts(x509s);
+    return true;
+  }
+  // Check CRLs.
+  for (size_t i = 0; i < sk_X509_num(x509s); i++) {
+    X509* x509 = sk_X509_value(x509s, i);
+    for (size_t j = 0; j < crls_.size(); j++) {
+      X509_REVOKED* rev;
+      if (X509_CRL_get0_by_cert(crls_[j].get(), &rev, x509)) {
+        LOG(ERROR) << "Certificate is already revoked:"
+                   << GetHumanReadableCert(x509);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+/* static */
+bool OpenSSLContext::IsHostnameMatched(
+    StringPiece hostname, StringPiece pattern) {
+  StringPiece::size_type pos = pattern.find("*");
+  if (pos == StringPiece::npos && pattern == hostname) {
+    return true;
+  }
+
+  StringPiece prefix = pattern.substr(0, pos);
+  StringPiece suffix = pattern.substr(pos + 1);  // skip "*".
+  VLOG(1) << "prefix=" << prefix;
+  VLOG(1) << "suffix=" << suffix;
+  if (!prefix.empty() && !strings::StartsWith(hostname, prefix)) {
+    return false;
+  }
+  if (!suffix.empty() && !strings::EndsWith(hostname, suffix)) {
+    return false;
+  }
+  StringPiece wildcard_part = hostname.substr(
+      prefix.length(),
+      hostname.length() - prefix.length() - suffix.length());
+  if (wildcard_part.find(".") != StringPiece::npos) {
+    return false;
+  }
+  return true;
+}
+
+bool OpenSSLContext::IsValidServerIdentity(X509* cert) {
+  AUTOLOCK(lock, &mu_);
+  struct in_addr in4;
+  if (inet_pton(AF_INET, hostname_.c_str(), &in4) == 1) {
+    // hostname is IPv4 addr.
+    if (MatchAltIPAddress(cert, AF_INET, &in4)) {
+      LOG(INFO) << "Hostname matches with IPv4 address:"
+                << " hostname=" << hostname_;
+      return true;
+    }
+    LOG(INFO) << "Hostname(IPv4) didn't match with certificate:"
+              << " hostname=" << hostname_;
+    return false;
+  }
+
+  struct in6_addr in6;
+  if (inet_pton(AF_INET6, hostname_.c_str(), &in6) == 1) {
+    // hostname is IPv6 addr.
+    if (MatchAltIPAddress(cert, AF_INET6, &in6)) {
+      LOG(INFO) << "Hostname matches with IPv6 address:"
+                << " hostname=" << hostname_;
+      return true;
+    }
+    LOG(INFO) << "Hostname(IPv6) didn't match with certificate:"
+              << " hostname=" << hostname_;
+    return false;
+  }
+
+  const std::vector<string>& sans = GetAltDNSNames(cert);
+  if (sans.empty()) {
+    // Subject common name is used only when dNSName is not available.
+    //
+    // See: http://tools.ietf.org/html/rfc2818#section-3.1
+    // > If a subjectAltName extension of type dNSName is present, that MUST
+    // > be used as the identity. Otherwise, the (most specific) Common Name
+    // > field in the Subject field of the certificate MUST be used.
+    const string& cn = GetSubjectCommonName(cert);
+    if (OpenSSLContext::IsHostnameMatched(hostname_, cn)) {
+      LOG(INFO) << "Hostname matches with common name:"
+                << " hostname=" << hostname_
+                << " cn=" << cn;
+      return true;
+    }
+    LOG(INFO) << "Hostname didn't match with common name:"
+              << " hostname=" << hostname_
+              << " cn=" << cn;
+    return false;
+  }
+  for (const auto& san : sans) {
+    if (OpenSSLContext::IsHostnameMatched(hostname_, san)) {
+      LOG(INFO) << "Hostname matches with subject alternative names:"
+                << " hostname=" << hostname_
+                << " san=" << san;
+      return true;
+    }
+  }
+  LOG(ERROR) << "Hostname did not match with certificate:"
+             << " hostname=" << hostname_;
+  return false;
+}
+
+void OpenSSLContext::SetProxy(const string& proxy_host, const int proxy_port) {
+  proxy_host_.assign(proxy_host);
+  proxy_port_ = proxy_port;
+}
+
+void OpenSSLContext::Invalidate() {
+  OneshotClosure* c = nullptr;
+  {
+    AUTOLOCK(lock, &mu_);
+    if (notify_invalidate_closure_) {
+      c = notify_invalidate_closure_;
+      notify_invalidate_closure_ = nullptr;
+    }
+  }
+
+  if (c) {
+    c->Run();
+  }
+}
+
+SSL* OpenSSLContext::NewSSL(bool* session_reused) {
+  CHECK(session_reused);
+  SSL* ssl = SSL_new(ctx_);
+  CHECK(ssl) << "Failed on SSL_new.";
+
+  // TLS Server Name Indication (SNI).
+  DCHECK(!hostname_.empty());
+  CHECK(SSL_set_tlsext_host_name(ssl, hostname_.c_str()))
+      << "TLS Server Name Indication (SNI) failed:" << hostname_;
+  *session_reused = true;
+  if (!OpenSSLSessionCache::SetCachedSession(ctx_, ssl)) {
+    LOG(INFO) << "No session is cached. We need to start from handshake."
+              << " ctx=" << ctx_
+              << " hostname=" << hostname_;
+    *session_reused = false;
+  }
+
+  ++ref_cnt_;
+  return ssl;
+}
+
+void OpenSSLContext::DeleteSSL(SSL* ssl) {
+  DCHECK(ssl);
+  DCHECK_GT(ref_cnt_, 0UL);
+  --ref_cnt_;
+  SSL_free(ssl);
+}
+
+void OpenSSLContext::RecordSession(SSL* ssl) {
+  OpenSSLSessionCache::RecordSession(ssl);
+}
+
+//
+// TLS Engine
+//
+OpenSSLEngine::OpenSSLEngine()
+  : ssl_(nullptr), network_bio_(nullptr),
+    want_read_(false), want_write_(false),
+    recycled_(false), need_self_verify_(false),
+    need_to_store_session_(false), state_(BEFORE_INIT) {}
+
+OpenSSLEngine::~OpenSSLEngine() {
+  if (ssl_ != nullptr) {
+    // TODO: actually send shutdown to server not BIO.
+    SSL_shutdown(ssl_);
+    ctx_->DeleteSSL(ssl_);
+  }
+  if (network_bio_ != nullptr) {
+    BIO_free_all(network_bio_);
+  }
+}
+
+void OpenSSLEngine::Init(OpenSSLContext* ctx) {
+  DCHECK(ctx);
+  DCHECK(!ssl_);
+  DCHECK_EQ(state_, BEFORE_INIT);
+
+  // If IsCrlReady() comes after creating SSL*, ssl_ may not check CRLs
+  // even if it should do.  Since loaded CRLs are cached in OpenSSLContext,
+  // penalty to check it should be little.
+  need_self_verify_ = !ctx->IsCrlReady();
+  bool session_reused = false;
+  ssl_ = ctx->NewSSL(&session_reused);
+  DCHECK(ssl_);
+  if (!session_reused) {
+    LOG(INFO) << "Need to register session by myself."
+              << " hostname=" << ctx->hostname();
+    need_to_store_session_ = true;
+  }
+  DCHECK(ssl_);
+
+  // Since internal_bio is free'd by SSL_free, we do not need to keep this
+  // separately.
+  BIO* internal_bio;
+  CHECK(BIO_new_bio_pair(&internal_bio, kBufSize, &network_bio_, kBufSize))
+      << "BIO_new_bio_pair failed.";
+  SSL_set_bio(ssl_, internal_bio, internal_bio);
+
+  ctx_ = ctx;
+  Connect();  // Do not check anything since nothing has started here.
+  state_ = IN_CONNECT;
+}
+
+bool OpenSSLEngine::IsIOPending() const {
+  return (state_ == IN_CONNECT) || want_read_ || want_write_;
+}
+
+int OpenSSLEngine::GetDataToSendTransport(string* data) {
+  DCHECK_NE(state_, BEFORE_INIT);
+  size_t max_read = BIO_ctrl(network_bio_, BIO_CTRL_PENDING, 0, nullptr);
+  if (max_read > 0) {
+    data->resize(max_read);
+    char* buf = &((*data)[0]);
+    int read_bytes = BIO_read(network_bio_, buf, max_read);
+    DCHECK_GT(read_bytes, 0);
+    CHECK_EQ(static_cast<int>(max_read), read_bytes);
+  }
+  want_write_ = false;
+  if (state_ == IN_CONNECT) {
+    int status = Connect();
+    if (status < 0 && status != TLSEngine::TLS_WANT_READ &&
+        status != TLSEngine::TLS_WANT_WRITE)
+      return TLSEngine::TLS_VERIFY_ERROR;
+  }
+  return max_read;
+}
+
+size_t OpenSSLEngine::GetBufSizeFromTransport() {
+  return BIO_ctrl_get_write_guarantee(network_bio_);
+}
+
+int OpenSSLEngine::SetDataFromTransport(const StringPiece& data) {
+  DCHECK_NE(state_, BEFORE_INIT);
+  size_t max_write = BIO_ctrl_get_write_guarantee(network_bio_);
+  CHECK_LE(data.size(), max_write);
+  int ret = BIO_write(network_bio_, data.data(), data.size());
+  CHECK_EQ(ret, static_cast<int>(data.size()));
+  want_read_ = false;
+  if (state_ == IN_CONNECT) {
+    int status = Connect();
+    if (status < 0 && status != TLSEngine::TLS_WANT_READ &&
+        status != TLSEngine::TLS_WANT_WRITE)
+      return TLSEngine::TLS_VERIFY_ERROR;
+  }
+  return ret;
+}
+
+int OpenSSLEngine::Read(void* data, int size) {
+  DCHECK_EQ(state_, READY);
+  int ret = SSL_read(ssl_, data, size);
+  return UpdateStatus(ret);
+}
+
+int OpenSSLEngine::Write(const void* data, int size) {
+  DCHECK_EQ(state_, READY);
+  int ret = SSL_write(ssl_, data, size);
+  return UpdateStatus(ret);
+}
+
+int OpenSSLEngine::UpdateStatus(int return_value) {
+  want_read_ = false;
+  want_write_ = false;
+  if (return_value > 0)
+    return return_value;
+
+  int ssl_err = SSL_get_error(ssl_, return_value);
+  switch (ssl_err) {
+    case SSL_ERROR_WANT_READ:
+      want_read_ = true;
+      return TLSEngine::TLS_WANT_READ;
+    case SSL_ERROR_WANT_WRITE:
+      want_write_ = true;
+      return TLSEngine::TLS_WANT_WRITE;
+    case SSL_ERROR_SSL:
+      if (SSL_get_verify_result(ssl_) != X509_V_OK) {
+        // Renew CRLs in the next connection but fails for this time.
+        LOG(WARNING) << "Resetting CRLs because of verify error."
+                     << " details=" << X509_verify_cert_error_string(
+                         SSL_get_verify_result(ssl_));
+        ctx_->Invalidate();
+      }
+      FALLTHROUGH_INTENDED;
+    default:
+      LOG(ERROR) << "OpenSSL error"
+                 << " ret=" << return_value
+                 << " ssl_err=" << ssl_err
+                 << " err_msg=" << GetLastErrorMessage();
+      return TLSEngine::TLS_ERROR;
+  }
+}
+
+int OpenSSLEngine::Connect() {
+  int ret = SSL_connect(ssl_);
+  if (ret > 0) {
+    VLOG(3) << "session reused=" << SSL_session_reused(ssl_);
+    state_ = READY;
+    if (need_self_verify_) {
+      LOG(INFO) << GetHumanReadableSSLInfo(ssl_);
+
+      STACK_OF(X509)* x509s = SSL_get_peer_cert_chain(ssl_);
+      if (!x509s) {
+        LOG(ERROR) << "No x509 stored in SSL structure.";
+        return TLSEngine::TLS_VERIFY_ERROR;
+      }
+      LOG(INFO) << GetHumanReadableCerts(x509s)
+                << " session_info="
+                << GetHumanReadableSessionInfo(SSL_get_session(ssl_));
+
+      // Get server certificate to verify.
+      // For ease of the code, I will not get certificate from the certificate
+      // chain got above.
+      std::unique_ptr<X509, ScopedX509Free> cert(
+          SSL_get_peer_certificate(ssl_));
+      if (cert.get() == nullptr) {
+        LOG(ERROR) << "Cannot obtain the server's certificate";
+        return TLSEngine::TLS_VERIFY_ERROR;
+      }
+
+      LOG(INFO) << "Checking server's identity.";
+      // OpenSSL library does not check a name written in certificate
+      // matches what we are connecting now.
+      // We MUST do it by ourselves. Or, we allow spoofing.
+      if (!ctx_->IsValidServerIdentity(cert.get())) {
+        return TLSEngine::TLS_VERIFY_ERROR;
+      }
+
+      // Since CRL did not set when SSL started, CRL verification should be
+      // done by myself. Note that this is usually treated by OpenSSL library.
+      LOG(INFO) << "need to verify revoked certificate by myself.";
+      if (ctx_->IsRevoked(x509s)) {
+        return TLSEngine::TLS_VERIFY_ERROR;
+      }
+    }
+    if (need_to_store_session_) {
+      ctx_->RecordSession(ssl_);
+    }
+  }
+  return UpdateStatus(ret);
+}
+
+string OpenSSLEngine::GetErrorString() const {
+  char error_message[1024];
+  ERR_error_string_n(ERR_peek_last_error(),
+                     error_message, sizeof error_message);
+  return error_message;
+}
+
+string OpenSSLEngine::GetLastErrorMessage() const {
+  std::ostringstream oss;
+  oss << GetErrorString();
+  const string& ctx_err = ctx_->GetLastErrorMessage();
+  if (!ctx_err.empty()) {
+    oss << " ctx_error=" << ctx_err;
+  }
+  if (ERR_GET_REASON(ERR_peek_last_error()) ==
+      SSL_R_CERTIFICATE_VERIFY_FAILED) {
+    oss << " verify_error="
+        << X509_verify_cert_error_string(SSL_get_verify_result(ssl_));
+  }
+  return oss.str();
+}
+
+OpenSSLEngineCache::OpenSSLEngineCache() :
+    ctx_(nullptr), crl_max_valid_duration_(-1) {
+#ifndef _WIN32
+  pthread_once(&g_openssl_init_once, InitOpenSSL);
+#else
+  InitOnceExecuteOnce(&g_openssl_init_once, InitOpenSSLWin, nullptr, nullptr);
+#endif
+}
+
+OpenSSLEngineCache::~OpenSSLEngineCache() {
+  // If OpenSSLEngineCache is deleted correctly, we can expect:
+  // 1. all outgoing sockets are closed.
+  // 2. all counterpart OpenSSLEngine instances are free'd.
+  // 3. contexts_to_delete_ should be empty.
+  // 4. reference count of ctx_ should be zero.
+  CHECK(contexts_to_delete_.empty());
+  CHECK(!ctx_.get() || ctx_->ref_cnt() == 0UL);
+}
+
+OpenSSLEngine* OpenSSLEngineCache::GetOpenSSLEngineUnlocked() {
+  if (ctx_.get() == nullptr) {
+    CHECK(OpenSSLCertificateStore::IsReady())
+        << "OpenSSLCertificateStore does not have any certificates.";
+    ctx_.reset(new OpenSSLContext);
+    ctx_->Init(hostname_, crl_max_valid_duration_,
+               NewCallback(this, &OpenSSLEngineCache::InvalidateContext));
+    if (!proxy_host_.empty())
+      ctx_->SetProxy(proxy_host_, proxy_port_);
+  }
+  OpenSSLEngine* engine = new OpenSSLEngine();
+  engine->Init(ctx_.get());
+  return engine;
+}
+
+void OpenSSLEngineCache::AddCertificateFromFile(
+    const string& ssl_cert_filename) {
+  OpenSSLCertificateStore::AddCertificateFromFile(ssl_cert_filename);
+}
+
+void OpenSSLEngineCache::AddCertificateFromString(
+    const string& ssl_cert) {
+  OpenSSLCertificateStore::AddCertificateFromString("user", ssl_cert);
+}
+
+TLSEngine* OpenSSLEngineCache::NewTLSEngine(int sock) {
+  AUTOLOCK(lock, &mu_);
+  unordered_map<int, OpenSSLEngine*>::iterator found = ssl_map_.find(sock);
+  if (found != ssl_map_.end()) {
+    found->second->SetRecycled();
+    return found->second;
+  }
+  OpenSSLEngine* engine = GetOpenSSLEngineUnlocked();
+  CHECK(ssl_map_.insert(std::make_pair(sock, engine)).second)
+      << "ssl_map_ should not have the same key:" << sock;
+  VLOG(1) << "SSL engine allocated. sock=" << sock;
+  return engine;
+}
+
+void OpenSSLEngineCache::WillCloseSocket(int sock) {
+  AUTOLOCK(lock, &mu_);
+  VLOG(1) << "SSL engine release. sock=" << sock;
+  unordered_map<int, OpenSSLEngine*>::iterator found = ssl_map_.find(sock);
+  if (found != ssl_map_.end()) {
+    delete found->second;
+    ssl_map_.erase(found);
+  }
+
+  if (!contexts_to_delete_.empty()) {
+    std::vector<std::unique_ptr<OpenSSLContext>> new_contexts_to_delete;
+    for (auto& ctx : contexts_to_delete_) {
+      if (ctx->ref_cnt() == 0) {
+        CHECK(ctx.get() != ctx_.get());
+        continue;
+      }
+      new_contexts_to_delete.emplace_back(std::move(ctx));
+    }
+    contexts_to_delete_ = std::move(new_contexts_to_delete);
+  }
+}
+
+void OpenSSLEngineCache::InvalidateContext() {
+  AUTOLOCK(lock, &mu_);
+  // OpenSSLContext instance should be held until ref_cnt become zero
+  // i.e. no OpenSSLEngine instance use it.
+  LOG_IF(ERROR, ctx_->hostname() != hostname_)
+      << "OpenSSLContext hostname is different from OpenSSLEngineFactory one. "
+      << " It might be changed after ctx_ is created?"
+      << " ctx=" << ctx_->hostname()
+      << " factory=" << hostname_;
+  contexts_to_delete_.emplace_back(std::move(ctx_));
+}
+
+}  // namespace devtools_goma
diff --git a/client/openssl_engine.h b/client/openssl_engine.h
new file mode 100644
index 0000000..907d163
--- /dev/null
+++ b/client/openssl_engine.h
@@ -0,0 +1,222 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_OPENSSL_ENGINE_H_
+#define DEVTOOLS_GOMA_CLIENT_OPENSSL_ENGINE_H_
+
+#ifdef _WIN32
+#include "socket_helper_win.h"
+#endif
+
+#include <openssl/ssl.h>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "tls_engine.h"
+#include "unordered.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class HttpRequest;
+class HttpResponse;
+class OneshotClosure;
+class ScopedSocket;
+
+class ScopedX509CRLFree {
+ public:
+  inline void operator()(X509_CRL *x) const { if (x) X509_CRL_free(x); }
+};
+
+typedef std::unique_ptr<X509_CRL, ScopedX509CRLFree> ScopedX509CRL;
+
+class OpenSSLContext {
+ public:
+  OpenSSLContext();
+  ~OpenSSLContext();
+
+  // |invalidate_closure| should be one-shot closure.
+  // It will be deleted after running.
+  // |invalidate_closure| MUST NOT call any OpenSSLContext methods
+  // to avoid dead lock.
+  void Init(const string& hostname,
+            int crl_max_valid_duration,
+            OneshotClosure* invalidate_closure);
+  // Set proxy to be used to download CRLs.
+  void SetProxy(const string& proxy_host, const int proxy_port);
+
+  // Returns true if server's identity is valid.
+  bool IsValidServerIdentity(X509* cert);
+
+  // Returns true if one of X509 certificates have revoked.
+  bool IsRevoked(STACK_OF(X509)* x509s);
+
+  string GetCertsInfo() const {
+    AUTOLOCK(lock, &mu_);
+    return certs_info_;
+  }
+  bool IsCrlReady() const {
+    AUTOLOCK(lock, &mu_);
+    return is_crl_ready_;
+  }
+  const string GetLastErrorMessage() const {
+    AUTOLOCK(lock, &mu_);
+    return last_error_;
+  }
+  size_t ref_cnt() { return ref_cnt_; }
+  SSL* NewSSL(bool* session_reused);
+  void DeleteSSL(SSL* ssl);
+  void RecordSession(SSL* ssl);
+  void Invalidate();
+
+  // Returns true if |hostname| matches |pattern|.
+  // |pattern| may have a wildcard explained in RFC2818 Section 3.1.
+  // See: http://tools.ietf.org/html/rfc2818#section-3.1
+  //
+  // Limitation: it does not support the case with multiple wildcards.
+  static bool IsHostnameMatched(StringPiece hostname, StringPiece pattern);
+
+  const string& hostname() { return hostname_; }
+
+ private:
+  ScopedX509CRL GetX509CrlsFromUrl(const string& url, string* crl_str);
+
+  // Loads CRLs based on X509v3 CRL distribution point.
+  // Assert |mu_| should be held.
+  bool SetupCrlsUnlocked(STACK_OF(X509)* x509s);
+
+  // Lock for OpenSSL context (ctx_, crls_, is_crl_ready_, certs_info_
+  // and last_error_).
+  Lock mu_;
+  SSL_CTX* ctx_;
+  // Since we do not know good way to get CRLs from SSL_CTX, we will use crls_
+  // to check revoked certificate.
+  std::vector<ScopedX509CRL> crls_;
+  string proxy_host_;
+  int proxy_port_;
+  string certs_info_;
+  string hostname_;
+  bool is_crl_ready_;
+  string last_error_;
+  time_t last_error_time_;
+  int crl_max_valid_duration_;
+
+  // ref_cnt_ represents the number of OpenSSLEngine using the class instance.
+  // It is increased by NewSSL, and decreased by DeleteSSL.
+  // If ref_cnt_ become 0, OpenSSLEngineCache can delete the instance.
+  //
+  // Note: NewSSL, DeleteSSL, ref_cnt() MUST be called under
+  // OpenSSLEngineCache lock. Or, OpenSSLEngineCache may delete OpenSSLContext
+  // in use:
+  // e.g. th1: checks ref_cnt() == 0 -> th2: NewTLSEngine -> th1: delete.
+  size_t ref_cnt_;
+
+  OneshotClosure* notify_invalidate_closure_;
+
+  DISALLOW_COPY_AND_ASSIGN(OpenSSLContext);
+};
+
+// OpenSSLEngine is not synchronized.
+class OpenSSLEngine : public TLSEngine {
+ public:
+  bool IsIOPending() const override;
+
+  int GetDataToSendTransport(string* data) override;
+  size_t GetBufSizeFromTransport() override;
+  int SetDataFromTransport(const StringPiece& data) override;
+
+  int Read(void* data, int size) override;
+  int Write(const void* data, int size) override;
+
+  string GetLastErrorMessage() const override;
+
+  // Shows this engine has already used before.
+  bool IsRecycled() const override { return recycled_; }
+
+ protected:
+  friend class OpenSSLEngineCache;
+  OpenSSLEngine();
+  ~OpenSSLEngine() override;
+  // Will not take ownership of ctx.
+  void Init(OpenSSLContext* ctx);
+  void SetRecycled() { recycled_ = true; }
+
+ private:
+  // Returns |return_value| if |return_value| > 0.
+  // Note that positive |return_value| usually means the number of data
+  // read / written.
+  // Otherwise, returns TLSEngine::TLSErrorReason to make a caller know
+  // error reason.
+  int UpdateStatus(int return_value);
+
+  // Returns 1 if TLS handshake was successfully completed, and a TLS connection
+  // has been established.
+  // Otherwise, returns TLSEngine::TLSErrorReason to make a caller know
+  // error reason.
+  int Connect();
+  string GetErrorString() const;
+
+  SSL* ssl_;
+  BIO* network_bio_;
+  bool want_read_;
+  bool want_write_;
+  bool recycled_;
+  bool need_self_verify_;
+  bool need_to_store_session_;
+  OpenSSLContext* ctx_;  // OpenSSLEngineCache has ownership.
+
+  enum SSL_ENGINE_STATE { BEFORE_INIT, IN_CONNECT, READY } state_;
+
+  DISALLOW_COPY_AND_ASSIGN(OpenSSLEngine);
+};
+
+class OpenSSLEngineCache : public TLSEngineFactory {
+ public:
+  OpenSSLEngineCache();
+  ~OpenSSLEngineCache() override;
+  TLSEngine* NewTLSEngine(int sock) override;
+  void WillCloseSocket(int sock) override;
+  void AddCertificateFromFile(const string& ssl_cert_filename);
+  void AddCertificateFromString(const string& ssl_cert);
+  string GetCertsInfo() override {
+    return ctx_->GetCertsInfo();
+  }
+  void SetHostname(const string& hostname) override {
+    AUTOLOCK(lock, &mu_);
+    hostname_ = hostname;
+  }
+  void SetProxy(const string& proxy_host, const int proxy_port) {
+    AUTOLOCK(lock, &mu_);
+    proxy_host_ = proxy_host;
+    proxy_port_ = proxy_port;
+  }
+  void SetCRLMaxValidDurationInSeconds(int duration) {
+    crl_max_valid_duration_ = duration;
+  }
+
+ private:
+  // This function should be called with mu_ lock held.
+  OpenSSLEngine* GetOpenSSLEngineUnlocked();
+  void InvalidateContext();
+
+  // Lock for ctx_, contexts_to_delete_, ssl_map_, certs_ and proxy configs.
+  Lock mu_;
+  std::unique_ptr<OpenSSLContext> ctx_;
+  std::vector<std::unique_ptr<OpenSSLContext>> contexts_to_delete_;
+  unordered_map<int, OpenSSLEngine*> ssl_map_;
+  // Proxy configs to download CRLs.
+  string hostname_;
+  string proxy_host_;
+  int proxy_port_;
+  int crl_max_valid_duration_;
+
+  DISALLOW_COPY_AND_ASSIGN(OpenSSLEngineCache);
+};
+
+}  // namespace devtools_goma
+#endif  // DEVTOOLS_GOMA_CLIENT_OPENSSL_ENGINE_H_
diff --git a/client/openssl_engine_helper.h b/client/openssl_engine_helper.h
new file mode 100644
index 0000000..98d93c7
--- /dev/null
+++ b/client/openssl_engine_helper.h
@@ -0,0 +1,21 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_OPENSSL_ENGINE_HELPER_H_
+#define DEVTOOLS_GOMA_CLIENT_OPENSSL_ENGINE_HELPER_H_
+
+#include <string>
+
+using std::string;
+
+namespace devtools_goma {
+
+// Gets trusted root certificates (PEM).
+// Returns true if succeeded.  Otherwise, false.
+bool GetTrustedRootCerts(string* certs);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_OPENSSL_ENGINE_HELPER_H_
diff --git a/client/openssl_engine_helper_generic.cc b/client/openssl_engine_helper_generic.cc
new file mode 100644
index 0000000..ea7e7be
--- /dev/null
+++ b/client/openssl_engine_helper_generic.cc
@@ -0,0 +1,16 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "openssl_engine_helper.h"
+#include "roots.h"
+
+namespace devtools_goma {
+
+bool GetTrustedRootCerts(string* certs) {
+  certs->assign(certs_roots_pem_start, certs_roots_pem_size);
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/client/openssl_engine_helper_mac.cc b/client/openssl_engine_helper_mac.cc
new file mode 100644
index 0000000..1979d1b
--- /dev/null
+++ b/client/openssl_engine_helper_mac.cc
@@ -0,0 +1,127 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "openssl_engine_helper.h"
+
+#include <string>
+
+#include <Security/SecImportExport.h>
+#include <Security/SecKeychainSearch.h>
+#include <Security/SecKey.h>
+#include <Security/SecCertificate.h>
+#include <CoreFoundation/CoreFoundation.h>
+
+#include "glog/logging.h"
+
+using std::string;
+
+namespace {
+
+// http://www.apple.com/certificateauthority/ca_program.html
+const char* kRootKeychainStore = "/System/Library/Keychains/X509Anchors";
+
+// Searches / retrieves certificates.
+// TODO: replace several deprecated functions, which I could not
+//                    find alternative functions to use.
+OSStatus ReadCertsFromKeychain(SecKeychainRef chain_ref,
+                               CFMutableArrayRef out_array,
+                               int *num_items) {
+  OSStatus ret;
+  SecKeychainSearchRef search_ref;
+
+  ret = SecKeychainSearchCreateFromAttributes(chain_ref,
+                                              kSecCertificateItemClass,
+                                              nullptr,
+                                              &search_ref);
+  if (ret)
+    return ret;
+
+  for (;;) {
+    SecKeychainItemRef item_ref;
+    ret = SecKeychainSearchCopyNext(search_ref, &item_ref);
+    if (ret) {
+      if (ret == errSecItemNotFound)
+        ret = noErr;
+      break;
+    }
+    CFArrayAppendValue(out_array, item_ref);
+    CFRelease(item_ref);
+    (*num_items)++;
+  }
+  CFRelease(search_ref);
+
+  return ret;
+}
+
+// Converts certificates to string with PEM format.
+OSStatus WriteCertsToMemory(CFMutableArrayRef export_items, string* certs) {
+  CFDataRef export_data;
+  OSStatus ret;
+  ret = SecKeychainItemExport(export_items, kSecFormatPEMSequence,
+      kSecItemPemArmour, nullptr, &export_data);
+  if (ret)
+    return ret;
+
+  certs->assign(reinterpret_cast<const char*>(CFDataGetBytePtr(export_data)),
+                CFDataGetLength(export_data));
+  CFRelease(export_data);
+
+  return ret;
+}
+
+// Dumps the system root certificates to |out_certs| with PEM format.
+bool DumpCertificates(SecKeychainRef keychain_ref, string* out_certs) {
+  OSStatus ret;
+  int num_certs;
+  CFMutableArrayRef export_items = CFArrayCreateMutable(
+      nullptr, 0, &kCFTypeArrayCallBacks);
+  if (export_items == nullptr) {
+    LOG(ERROR) << "Failed to allocate memory for certificates.";
+    return false;
+  }
+
+  ret = ReadCertsFromKeychain(keychain_ref, export_items, &num_certs);
+  if (ret) {
+    LOG(ERROR) << "Failed to read root certificates keychain:"
+               << " ret=" << ret;
+    CFRelease(export_items);
+    return false;
+  }
+
+  ret = WriteCertsToMemory(export_items, out_certs);
+  if (ret) {
+    LOG(ERROR) << "Failed to copy root certificates keychain:"
+               << " ret=" << ret;
+    CFRelease(export_items);
+    return false;
+  }
+
+  CFRelease(export_items);
+  return true;
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+bool GetTrustedRootCerts(string* certs) {
+  OSStatus err_status;
+  SecKeychainRef keychain_ref;
+  bool ret = false;
+
+  err_status = SecKeychainOpen(kRootKeychainStore, &keychain_ref);
+  if (err_status) {
+    LOG(ERROR) << "Failed to open root certificates keychain:"
+               << kRootKeychainStore
+               << " ret=" << err_status;
+    return false;
+  }
+  ret = DumpCertificates(keychain_ref, certs);
+  CFRelease(keychain_ref);
+
+  return ret;
+}
+
+}  // namespace devtools_goma
diff --git a/client/openssl_engine_helper_win.cc b/client/openssl_engine_helper_win.cc
new file mode 100644
index 0000000..50db5c9
--- /dev/null
+++ b/client/openssl_engine_helper_win.cc
@@ -0,0 +1,59 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "openssl_engine_helper.h"
+
+#include <windows.h>
+
+#include <string>
+
+#include "certs_resource.h"
+#include "glog/logging.h"
+
+using std::string;
+
+namespace {
+
+bool LoadTrustedRootCertsInResource(string* certs) {
+  // Since we use the current process resource, HMODULE can be nullptr.
+  HRSRC resource_info = FindResource(nullptr, MAKEINTRESOURCE(ROOT_CA_NAME),
+                                     RT_RCDATA);
+  if (resource_info == nullptr) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Cannot find the root certificate resource.";
+    return false;
+  }
+  HGLOBAL resource_handle = LoadResource(nullptr, resource_info);
+  if (resource_handle == nullptr) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Cannot load the root certificate resource.";
+    return false;
+  }
+  LPVOID resource = LockResource(resource_handle);
+  if (resource == nullptr) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Cannot obtain pointer to the root certificate resource.";
+    return false;
+  }
+  const DWORD resource_size = SizeofResource(nullptr, resource_info);
+  if (resource_size == 0) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Cannot get size of root certificate resource.";
+    return false;
+  }
+
+  certs->assign(static_cast<const char*>(resource), resource_size);
+  return true;
+}
+
+}  // anonymous namespace.
+
+namespace devtools_goma {
+
+bool GetTrustedRootCerts(string* certs) {
+  return LoadTrustedRootCertsInResource(certs);
+}
+
+}  // namespace devtools_goma
diff --git a/client/openssl_engine_unittest.cc b/client/openssl_engine_unittest.cc
new file mode 100644
index 0000000..c4bbd65
--- /dev/null
+++ b/client/openssl_engine_unittest.cc
@@ -0,0 +1,472 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "openssl_engine.h"
+
+#include <openssl/dh.h>
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+#include <openssl/bio.h>
+#include <memory>
+#include <string>
+
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+#include "string_piece.h"
+#include "unittest_util.h"
+
+using std::string;
+
+namespace {
+
+static const size_t kBufsize = 4096;
+/*
+ * How to generate following PEM?
+ *
+ * 1. generate private key.
+ * $ openssl genpkey -out k.pem -algorithm rsa
+ * (or, EC key like google.com)
+ * $ openssl ecparam -genkey -out k.pem -name prime256v1
+ * $ openssl pkcs8 -topk8 -in k.pem -out key.pem -nocrypt
+ *
+ * 2. generate self-signed certificate. (with SHA256 signature)
+ * $ openssl req -new -x509 -key key.pem -out cert.pem -days 36500 \
+ *   -sha256 -config test/openssl.cnf
+ *
+ * 3. copy generated key.pem and cert.pem to tests directory.
+ * $ cp {key,cert}.pem /where/you/have/tests
+ */
+static const char* kCert = "cert.pem";
+static const char* kKey = "key.pem";
+static const char* kAnotherCert = "another_cert.pem";
+static const char* kAnotherKey = "another_key.pem";
+
+/*
+ * How to generate following PEM?
+ * 1. go run $GOROOT/src/crypto/tls/generate_cert.go -ca \
+ *    -duration $((100 * 365 * 24))h \
+ *    -host 127.0.0.1,::1
+ * 2. cp {key,cert}.pem /where/you/have/tests
+ */
+static const char* kCertIPAddr = "cert_127.0.0.1.pem";
+static const char* kKeyIPAddr = "key_127.0.0.1.pem";
+
+/*
+ * To generate DHParam PEM:
+ * $ openssl dhparam 1024 > dhparam.pem
+ */
+static const char* kDHParam = "dhparam.pem";
+static const int kDummyFd = 123;
+
+class ScopedSSLCtxFree {
+ public:
+  inline void operator()(SSL_CTX* x) const { if (x) SSL_CTX_free(x); }
+};
+
+void SetupEphemeralDH(SSL_CTX* ctx) {
+  // For DHE.
+  // Since creating a DH param takes certain long time, let us read a
+  // pre-computed param from a file.
+  // Note that creating ECDH param is quick.
+  BIO* bio =
+      BIO_new_file(devtools_goma::GetTestFilePath(kDHParam).c_str(), "r");
+  CHECK(bio);
+  DH* dh = PEM_read_bio_DHparams(bio, nullptr, nullptr, nullptr);
+  CHECK(dh);
+  CHECK(SSL_CTX_set_tmp_dh(ctx, dh));
+  DH_free(dh);
+  CHECK(BIO_free(bio));
+}
+
+SSL_CTX* SetupServerContext(const char* cert_file, const char* key_file) {
+  SSL_CTX* s_ctx = SSL_CTX_new(SSLv23_method());
+  CHECK(s_ctx);
+  CHECK(SSL_CTX_use_certificate_file(
+      s_ctx, devtools_goma::GetTestFilePath(cert_file).c_str(),
+      SSL_FILETYPE_PEM));
+  CHECK(SSL_CTX_use_PrivateKey_file(
+      s_ctx, devtools_goma::GetTestFilePath(key_file).c_str(),
+      SSL_FILETYPE_PEM));
+  CHECK(SSL_CTX_set_default_verify_paths(s_ctx));
+  SetupEphemeralDH(s_ctx);
+  return s_ctx;
+}
+
+class OpenSSLServerEngine {
+ public:
+  explicit OpenSSLServerEngine(SSL_CTX* ctx) : need_retry_(false) {
+    ssl_ = SSL_new(ctx);
+    CHECK(ssl_);
+
+    // initialize BIOs.
+    CHECK(BIO_new_bio_pair(&internal_bio_, kBufsize, &network_bio_, kBufsize));
+
+    SSL_set_bio(ssl_, internal_bio_, internal_bio_);
+    state_ = IN_ACCEPT;
+    Accept();
+  }
+
+  ~OpenSSLServerEngine() {
+    SSL_free(ssl_);
+    CHECK(BIO_free(network_bio_));
+  }
+
+  string GetErrorMessage() const {
+    char errmsg[1024];
+    ERR_error_string_n(ERR_peek_last_error(), errmsg, sizeof(errmsg));
+    return errmsg;
+  }
+
+  void UpdateStatus(int return_value) {
+    need_retry_ = false;
+    int ssl_err = SSL_get_error(ssl_, return_value);
+    switch (ssl_err) {
+      case SSL_ERROR_WANT_READ:
+        need_retry_ = true;
+        return;
+      case SSL_ERROR_WANT_WRITE:
+        need_retry_ = true;
+        return;
+      default:
+        LOG(INFO) << "OpenSSL error"
+                   << " ret=" << return_value
+                   << " ssl_err=" << ssl_err
+                   << " error_message=" << GetErrorMessage();
+    }
+  }
+
+  void Accept() {
+    int ret = SSL_accept(ssl_);
+    if (ret > 0)
+      state_ = READY;
+    UpdateStatus(ret);
+  }
+
+  int GetSizeToSend() {
+    return BIO_ctrl(network_bio_, BIO_CTRL_PENDING, 0, nullptr);
+  }
+
+  int GetDataToSendTransport(string* data, size_t acceptable_size) {
+    char buf[kBufsize];
+    if (acceptable_size > sizeof(buf))
+      acceptable_size = sizeof(buf);
+    int r = BIO_read(network_bio_, buf, acceptable_size);
+    if (r > 0) {
+      data->assign(buf, r);
+    }
+    if (state_ == IN_ACCEPT) {
+      Accept();
+    }
+    return r;
+  }
+
+  int GetSpaceForDataFromTransport() {
+    return BIO_ctrl_get_read_request(network_bio_);
+  }
+
+  int SetDataFromTransport(StringPiece data) {
+    int r = BIO_write(network_bio_, data.data(), data.size());
+    if (state_ == IN_ACCEPT) {
+      Accept();
+    }
+    return r;
+  }
+
+  int Read(string* data) {
+    char tmp[kBufsize];
+    int r = SSL_read(ssl_, tmp, sizeof(tmp));
+    if (r > 0) {
+      data->assign(tmp, r);
+    }
+    UpdateStatus(r);
+    return r;
+  }
+
+  int Write(StringPiece message) {
+    int r = SSL_write(ssl_, message.data(), message.size());
+    UpdateStatus(r);
+    return r;
+  }
+
+  bool CanRetry() {
+    return need_retry_;
+  }
+
+  bool InInit() {
+    return SSL_in_init(ssl_) != 0;
+  }
+
+  string StateString() {
+    return SSL_state_string_long(ssl_);
+  }
+
+ private:
+  SSL* ssl_;
+  BIO* internal_bio_;
+  BIO* network_bio_;
+
+  enum ServerEngineStatus { IN_ACCEPT, READY } state_;
+  bool need_retry_;
+
+ DISALLOW_COPY_AND_ASSIGN(OpenSSLServerEngine);
+};
+
+}  // namespace
+
+namespace devtools_goma {
+
+class OpenSSLEngineTest : public :: testing::Test {
+ protected:
+  void SetUp() override {
+    OpenSSLEngineCache* openssl_engine_cache = new OpenSSLEngineCache;
+    openssl_engine_cache->AddCertificateFromFile(GetTestFilePath(kCert));
+    openssl_engine_cache->SetHostname("clients5.google.com");
+    factory_.reset(openssl_engine_cache);
+  }
+
+  void TearDown() override {
+    engine_ = nullptr;
+    if (factory_.get()) {
+      factory_->WillCloseSocket(kDummyFd);
+      factory_.reset();
+    }
+  }
+
+  void AddCertificateFromFile(const string& filename) {
+    static_cast<OpenSSLEngineCache*>(factory_.get())->AddCertificateFromFile(
+        GetTestFilePath(filename));
+  }
+
+  void SetHostname(const string& hostname) {
+    factory_->SetHostname(hostname);
+  }
+
+  void SetupEngine() {
+    engine_ = factory_->NewTLSEngine(kDummyFd);
+  }
+
+  void TearDownEngine() {
+    if (engine_ != nullptr) {
+      factory_->WillCloseSocket(kDummyFd);
+      engine_ = nullptr;
+    }
+  }
+
+  bool Communicate(SSL_CTX* server_ctx) {
+    static const size_t kMaxIterate = 64;
+
+    OpenSSLServerEngine server_engine(server_ctx);
+    SetupEngine();
+    bool s_sent = false;
+    bool c_sent = false;
+    bool s_recv = false;
+    bool c_recv = false;
+
+    // TODO: simulate HTTP communication.
+    // To do that, we need buffering I/O.
+    // Also, |server_engine| will not send any data to transport layer without
+    // writing something to it.  Writing "" seems to be fine, though.
+    for (size_t i = 0; i < kMaxIterate; ++i) {
+      // Server: Send to client.
+      if (!s_sent) {
+        const string msg = "Hello From Server";
+        int r = server_engine.Write(msg);
+        if (r < 0 && !server_engine.CanRetry()) {
+          LOG(ERROR) << "Did not send server data but could not retry.";
+          return false;
+        }
+        if (r > 0) {
+          s_sent = true;
+          VLOG(1) << "server sent.";
+        }
+      }
+
+      // Server: receive from client.
+      if (!s_recv) {
+        string data;
+        int r = server_engine.Read(&data);
+        if (r > 0) {
+          VLOG(1) << "Sever received: " << data;
+          s_recv = true;
+        }
+      }
+
+      // Client: Send to server.
+      if (!c_sent && !engine_->IsIOPending()) {
+        const string msg = "Hello From Client";
+        int r = engine_->Write(msg.c_str(), msg.length());
+        if (r > 0) {
+          c_sent = true;
+          VLOG(1) << "client sent.";
+        }
+      }
+
+      // Client: receive from server.
+      if (!c_recv && !engine_->IsIOPending()) {
+        char tmp[kBufsize];
+        int r = engine_->Read(tmp, sizeof(tmp));
+        if (r > 0) {
+          c_recv = true;
+          VLOG(1) << "Client received:" << string(tmp, r);
+        }
+      }
+
+      // Both ways communication succeeded.
+      if (s_recv && c_recv)
+        return true;
+
+      VLOG_IF(1, server_engine.InInit())
+          << "server waiting in SSL_accept: "
+          << server_engine.StateString();
+
+      /* server to client */
+      size_t r1, r2;
+      do {
+        r1 = server_engine.GetSizeToSend();
+        r2 = engine_->GetBufSizeFromTransport();
+        VLOG(2) << " r1=" << r1 << " r2=" << r2;
+        size_t num = r1;
+        if (r2 < num)
+          num = r2;
+        if (num) {
+          string data;
+          int r = server_engine.GetDataToSendTransport(&data, num);
+          CHECK_GT(r, 0);
+          CHECK_LE(r, static_cast<int>(num));
+          CHECK_EQ(r, static_cast<int>(data.size()));
+          /* possibly r < num (non-contiguous data) */
+          VLOG(3) << "data=" << data;
+          r = engine_->SetDataFromTransport(data);
+          if (r < 0)
+            return false;
+          if (static_cast<int>(num) != r) {
+            LOG(ERROR) << "SetDataFromTransport should accept the data size "
+                       << "that is mentioned with GetBufSizeFromTransport."
+                       << " num=" << num
+                       << " r=" << r;
+            return false;
+          }
+        }
+      } while (r1 && r2);
+
+      /* client to server */
+      {
+        string data;
+        size_t r3 = engine_->GetDataToSendTransport(&data);
+        if (r3) {
+          CHECK_EQ(r3, data.size());
+          int r = server_engine.SetDataFromTransport(data);
+          VLOG(3) << "r=" << r << " data.size=" << data.size();
+          CHECK_EQ(static_cast<int>(r3), r)
+              << "For ease of implementation, we expect |server_engine| accept "
+              << " all data from the client."
+              << " If you find the test won't pass here, please "
+              << " revise the code to accept it."
+              << " r3=" << r3
+              << " r=" << r;
+        }
+      }
+    }
+    LOG(ERROR) << "Hit max iterate."
+               << " kMaxIterate=" << kMaxIterate;
+    return false;
+  }
+
+ private:
+  TLSEngine* engine_;
+  std::unique_ptr<TLSEngineFactory> factory_;
+};
+
+TEST_F(OpenSSLEngineTest, SuccessfulCommunication) {
+  // Get SSL_CTX having the certificate set in the client.
+  std::unique_ptr<SSL_CTX, ScopedSSLCtxFree> s_ctx(
+      SetupServerContext(kCert, kKey));
+  EXPECT_TRUE(Communicate(s_ctx.get()));
+}
+
+TEST_F(OpenSSLEngineTest, VerifyByIPAddr) {
+  // Get SSL_CTX having the certificate set in the client.
+  std::unique_ptr<SSL_CTX, ScopedSSLCtxFree> s_ctx(
+      SetupServerContext(kCertIPAddr, kKeyIPAddr));
+
+  AddCertificateFromFile(kCertIPAddr);
+  // Set the hostname that matches the certificate subjectAltName ip Address.
+  SetHostname("127.0.0.1");
+
+  EXPECT_TRUE(Communicate(s_ctx.get()));
+}
+
+TEST_F(OpenSSLEngineTest, VerifyByIPv6Addr) {
+  // Get SSL_CTX having the certificate set in the client.
+  std::unique_ptr<SSL_CTX, ScopedSSLCtxFree> s_ctx(
+      SetupServerContext(kCertIPAddr, kKeyIPAddr));
+
+  AddCertificateFromFile(kCertIPAddr);
+  // Set the hostname that matches the certificate subjectAltName ip Address.
+  SetHostname("::1");
+
+  EXPECT_TRUE(Communicate(s_ctx.get()));
+}
+
+TEST_F(OpenSSLEngineTest, VerifyErrorByIPAddrMismatch) {
+  // Get SSL_CTX having the certificate set in the client.
+  std::unique_ptr<SSL_CTX, ScopedSSLCtxFree> s_ctx(
+      SetupServerContext(kCertIPAddr, kKeyIPAddr));
+
+  AddCertificateFromFile(kCertIPAddr);
+  // Set the hostname that doesn't match the certificate subjectAltName
+  // ip Address.
+  SetHostname("192.168.0.1");
+
+  EXPECT_FALSE(Communicate(s_ctx.get()));
+}
+
+TEST_F(OpenSSLEngineTest, VerifyErrorByIPv6AddrMismatch) {
+  // Get SSL_CTX having the certificate set in the client.
+  std::unique_ptr<SSL_CTX, ScopedSSLCtxFree> s_ctx(
+      SetupServerContext(kCertIPAddr, kKeyIPAddr));
+
+  AddCertificateFromFile(kCertIPAddr);
+  // Set the hostname that doesn't match the certificate subjectAltName
+  // ip Address.
+  SetHostname("fe80::42a8:f0ff:fe44:ffe6");
+
+  EXPECT_FALSE(Communicate(s_ctx.get()));
+}
+
+TEST_F(OpenSSLEngineTest, VerifyErrorByHostMismatch) {
+  // Get SSL_CTX having the certificate set in the client.
+  std::unique_ptr<SSL_CTX, ScopedSSLCtxFree> s_ctx(
+      SetupServerContext(kCert, kKey));
+
+  // Set the hostname that does not match the certificate subjectAltName.
+  SetHostname("www.googleusercontent.com");
+
+  EXPECT_FALSE(Communicate(s_ctx.get()));
+  // OpenSSL engine should not bypass the check for the same context
+  // in the second time.
+  TearDownEngine();
+  EXPECT_FALSE(Communicate(s_ctx.get()));
+}
+
+TEST_F(OpenSSLEngineTest, VerifyError) {
+  // Get SSL_CTX having the certificate not used in the client.
+  std::unique_ptr<SSL_CTX, ScopedSSLCtxFree> s_ctx(
+      SetupServerContext(kAnotherCert, kAnotherKey));
+
+  EXPECT_FALSE(Communicate(s_ctx.get()));
+}
+
+TEST(OpenSSLContext, IsHostnameMatched) {
+  EXPECT_TRUE(
+      OpenSSLContext::IsHostnameMatched(
+          "clients5.google.com", "clients5.google.com"));
+  EXPECT_TRUE(OpenSSLContext::IsHostnameMatched("foo.a.com", "*.a.com"));
+  EXPECT_FALSE(OpenSSLContext::IsHostnameMatched("bar.foo.a.com", "*.a.com"));
+  EXPECT_TRUE(OpenSSLContext::IsHostnameMatched("foo.com", "f*.com"));
+  EXPECT_FALSE(OpenSSLContext::IsHostnameMatched("bar.com", "f*.com"));
+}
+
+}  // namespace devtools_goma
diff --git a/client/posix_helper_win.cc b/client/posix_helper_win.cc
new file mode 100644
index 0000000..c136fa2
--- /dev/null
+++ b/client/posix_helper_win.cc
@@ -0,0 +1,85 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "posix_helper_win.h"
+
+#include <cstring>
+#include <string>
+
+#include "path.h"
+#include "rand_util.h"
+#include "string_piece.h"
+
+namespace {
+
+const int kMaxRetry = 5;
+
+}  // namespace
+
+namespace devtools_goma {
+
+int access(const char* path, int amode) {
+  DWORD attr = GetFileAttributesA(path);
+  switch (amode) {
+    case R_OK:
+      {
+        if (attr == INVALID_FILE_ATTRIBUTES ||
+            attr & FILE_ATTRIBUTE_DIRECTORY) {
+          return -1;
+        }
+        return 0;
+      }
+    case W_OK:
+      {
+        if (attr & FILE_ATTRIBUTE_READONLY || attr & FILE_ATTRIBUTE_SYSTEM ||
+            attr & FILE_ATTRIBUTE_HIDDEN || attr & FILE_ATTRIBUTE_DIRECTORY) {
+          return -1;
+        }
+        return 0;
+      }
+    case X_OK:
+      {
+        if (attr == INVALID_FILE_ATTRIBUTES ||
+            attr & FILE_ATTRIBUTE_DIRECTORY) {
+          return -1;
+        }
+        StringPiece extension = file::Extension(path);
+        // TODO: use PATHEXT env. instead.
+        if (extension == "exe" || extension == "cmd" || extension == "bat") {
+          return 0;
+        }
+        return -1;
+      }
+    case F_OK:
+      return (attr == INVALID_FILE_ATTRIBUTES) ? -1 : 0;
+    default:
+      return -1;
+  }
+}
+
+char *mkdtemp(char *tmpl) {
+  StringPiece t(tmpl);
+  StringPiece::size_type pos = t.find_last_not_of('X');
+  if (pos == StringPiece::npos) {
+    return nullptr;
+  }
+  ++pos;  // to point the beginning of Xs.
+  size_t x_length = t.length() - pos;
+  if (x_length < 6) {
+    return nullptr;
+  }
+
+  const std::string prefix = std::string(t.substr(0, pos));
+  for (int retry = 0; retry < kMaxRetry; ++retry) {
+    std::string dirname = prefix + GetRandomAlphanumeric(x_length);
+    if (CreateDirectoryA(dirname.c_str(), nullptr)) {
+      std::memcpy(tmpl, dirname.c_str(), t.length());
+      return tmpl;
+    }
+  }
+  return nullptr;
+}
+
+}  // namespace devtools_goma
diff --git a/client/posix_helper_win.h b/client/posix_helper_win.h
new file mode 100644
index 0000000..992ebbf
--- /dev/null
+++ b/client/posix_helper_win.h
@@ -0,0 +1,30 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_POSIX_HELPER_WIN_H_
+#define DEVTOOLS_GOMA_CLIENT_POSIX_HELPER_WIN_H_
+
+#ifndef _WIN32
+#error Win32 only
+#endif
+
+// Windows POSIX emulation layer
+
+#include "config_win.h"
+
+#define R_OK    4               /* Test for read permission.  */
+#define W_OK    2               /* Test for write permission.  */
+#define X_OK    1               /* Test for execute permission.  */
+#define F_OK    0               /* Test for existence.  */
+
+namespace devtools_goma {
+
+int access(const char* path, int amode);
+
+char *mkdtemp(char *tmpl);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_POSIX_HELPER_WIN_H_
diff --git a/client/posix_helper_win_unittest.cc b/client/posix_helper_win_unittest.cc
new file mode 100644
index 0000000..8d5ecb8
--- /dev/null
+++ b/client/posix_helper_win_unittest.cc
@@ -0,0 +1,42 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "posix_helper_win.h"
+
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+#include "ioutil.h"
+#include "mypath.h"
+#include "path.h"
+
+namespace devtools_goma {
+
+TEST(PosixHelperWin, mkdtemp) {
+  const char kTemplate[] = "abc_XXXXXX";
+  string original = file::JoinPath(GetGomaTmpDir(), kTemplate);
+  string to_change = original;
+  CHECK(mkdtemp(&to_change[0]));
+  EXPECT_NE(original, to_change);
+  DeleteRecursivelyOrDie(to_change);
+}
+
+TEST(PosixHelperWin, mkdtemp_insufficient_Xs) {
+  const char kTemplate[] = "abc_XXXXX";  // expect at least 6 Xs but 5.
+  string original = file::JoinPath(GetGomaTmpDir(), kTemplate);
+  string to_change = original;
+  EXPECT_EQ(nullptr, mkdtemp(&to_change[0]));
+  EXPECT_EQ(original, to_change);
+  EXPECT_DEATH(DeleteRecursivelyOrDie(to_change), "");
+}
+
+TEST(PosixHelperWin, mkdtemp_no_Xs) {
+  const char kTemplate[] = "abcdefg";
+  string original = file::JoinPath(GetGomaTmpDir(), kTemplate);
+  string to_change = original;
+  EXPECT_EQ(nullptr, mkdtemp(&to_change[0]));
+  EXPECT_EQ(original, to_change);
+  EXPECT_DEATH(DeleteRecursivelyOrDie(to_change), "");
+}
+
+}  // namespace devtools_goma
diff --git a/client/predefined_macros.h b/client/predefined_macros.h
new file mode 100644
index 0000000..104ae5f
--- /dev/null
+++ b/client/predefined_macros.h
@@ -0,0 +1,41 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_PREDEFINED_MACROS_H_
+#define DEVTOOLS_GOMA_CLIENT_PREDEFINED_MACROS_H_
+
+#include "basictypes.h"
+
+namespace devtools_goma {
+
+const char* const kPredefinedObjectMacros[] = {
+  "__FILE__",
+  "__LINE__",
+  "__DATE__",
+  "__TIME__",
+  "__COUNTER__",
+  "__BASE_FILE__",
+};
+const int kPredefinedObjectMacroSize =
+    arraysize(kPredefinedObjectMacros);
+
+const char* const kPredefinedFunctionMacros[] = {
+  "__has_include",
+  "__has_include__",
+  "__has_include_next",
+  "__has_include_next__",
+  "__has_feature",
+  "__has_extension",
+  "__has_attribute",
+  "__has_cpp_attribute",
+  "__has_declspec_attribute",
+  "__has_builtin",
+};
+const int kPredefinedFunctionMacroSize =
+    arraysize(kPredefinedFunctionMacros);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_PREDEFINED_MACROS_H_
diff --git a/client/rand_util.cc b/client/rand_util.cc
new file mode 100644
index 0000000..7d519bf
--- /dev/null
+++ b/client/rand_util.cc
@@ -0,0 +1,70 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "rand_util.h"
+
+#include <openssl/err.h>
+#include <openssl/rand.h>
+#include <limits>
+#include <random>
+
+#include "glog/logging.h"
+
+namespace {
+
+// My implementation of <random> cryptographically secure random number
+// generator.
+//
+// Since Chromium C++11 does not allow us to use cryptographically secure
+// random numbers in <random>, let me implement wrapper to RAND_bytes here.
+// https://groups.google.com/a/chromium.org/d/msg/chromium-dev/t7vf5etS7cw/kZIeZUokAAAJ
+class MyCryptographicSecureRNG {
+ public:
+  typedef unsigned int result_type;
+
+  MyCryptographicSecureRNG() {}
+
+  MyCryptographicSecureRNG(const MyCryptographicSecureRNG&) = delete;
+  void operator=(const MyCryptographicSecureRNG&) = delete;
+
+  ~MyCryptographicSecureRNG() {}
+
+  static constexpr result_type min() {
+    return std::numeric_limits<result_type>::min();
+  }
+
+  static constexpr result_type max() {
+    return std::numeric_limits<result_type>::max();
+  }
+
+  result_type operator()() {
+    result_type buf;
+    CHECK(RAND_bytes(reinterpret_cast<uint8_t*>(&buf), sizeof buf) == 1)
+        << "BoringSSL's RAND_bytes must not fail to get random. "
+        << ERR_get_error();
+    return buf;
+  }
+};
+
+}  // namespace
+
+namespace devtools_goma {
+
+std::string GetRandomAlphanumeric(size_t length) {
+  static const char kAlphanumericTable[] =
+      "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+  DCHECK_EQ(62, sizeof(kAlphanumericTable) - 1);
+
+  MyCryptographicSecureRNG gen;
+  std::uniform_int_distribution<> dis(0, sizeof(kAlphanumericTable) - 2);
+
+  std::string buf;
+  buf.reserve(length);
+  for (size_t i = 0; i < length; ++i) {
+    buf += kAlphanumericTable[dis(gen)];
+  }
+  return buf;
+}
+
+}  // namespace devtools_goma
diff --git a/client/rand_util.h b/client/rand_util.h
new file mode 100644
index 0000000..0ce7784
--- /dev/null
+++ b/client/rand_util.h
@@ -0,0 +1,16 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_RAND_UTIL_H_
+#define DEVTOOLS_GOMA_CLIENT_RAND_UTIL_H_
+
+#include <string>
+
+namespace devtools_goma {
+
+std::string GetRandomAlphanumeric(size_t length);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_RAND_UTIL_H_
diff --git a/client/rand_util_unittest.cc b/client/rand_util_unittest.cc
new file mode 100644
index 0000000..b843e49
--- /dev/null
+++ b/client/rand_util_unittest.cc
@@ -0,0 +1,23 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "rand_util.h"
+
+#include <cctype>
+
+#include "gtest/gtest.h"
+
+namespace devtools_goma {
+
+TEST(RandUtil, GetRandomAlphanumeric) {
+  const size_t kSize = 128;
+  std::string rnd = GetRandomAlphanumeric(kSize);
+
+  for (const auto& c : rnd) {
+    EXPECT_TRUE(std::isalnum(c));
+  }
+  EXPECT_EQ(kSize, rnd.size());
+}
+
+}  // namespace devtools_goma
diff --git a/client/report_env.sh b/client/report_env.sh
new file mode 100755
index 0000000..9ebe0de
--- /dev/null
+++ b/client/report_env.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+#
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+#
+# A simple script which reports the information of this machine.
+# This would help users to report their environment.
+#
+
+uname=$(uname)
+kernel=$(uname -sr)
+
+if [ "x$uname" = "x" ]; then
+  echo 'Failed to run uname'
+  exit 1
+fi
+
+net=$(ping -c 3 apidata.googleusercontent.com | sed '
+/^64 bytes from .*ttl=/!d
+s///
+s/time=//
+' | awk '
+{
+  ttl += $1;
+  time += $2;
+}
+END {
+  printf "ttl=%d %.1fms", ttl / 3, time / 3;
+}
+')
+
+if [ $uname = "Linux" ]; then
+  if [ -f /etc/issue.net ]; then
+    dist="$(head -n 1 /etc/issue.net) "
+  elif [ -f /etc/issue ]; then
+    dist="$(head -n 1 /etc/issue) "
+  fi
+  cpu=$(sed -n '/^model name\s*:\s*/{s///;p;q}' /proc/cpuinfo)
+  ncores=$(grep -E '^processor\s*:' /proc/cpuinfo | wc -l)
+  ram=$(awk '/^MemTotal: *[0-9]/ {printf "%.1f", $2/1024/1024}' /proc/meminfo)
+elif [ $uname = "Darwin" ]; then
+  cpu=$(sysctl -n machdep.cpu.brand_string)
+  ncores=$(sysctl -i -n hw.availcpu)
+  # Yosemite does not have hw.availcpu.
+  if [ -z "$ncores" ]; then
+    ncores=$(sysctl -n hw.logicalcpu)
+  fi
+  ram=$(sysctl -n hw.memsize | awk '{printf "%.1f", $1 / 1024/1024/1024}')
+elif [ $uname = "FreeBSD" ]; then
+  cpu=$(sysctl -n hw.model)
+  ncores=$(sysctl -n hw.ncpu)
+  ram=$(sysctl -n hw.physmem | awk '{printf "%.1f", $1 / 1024/1024/1024}')
+else
+  echo "$kernel (unknown OS)"
+  exit 0
+fi
+echo "${dist}${kernel} ${cpu} x${ncores} ${ram}GB ${net}"
diff --git a/client/resources/BUILD.gn b/client/resources/BUILD.gn
new file mode 100644
index 0000000..efd8901
--- /dev/null
+++ b/client/resources/BUILD.gn
@@ -0,0 +1,129 @@
+# Copyright 2017 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+genc = "../genc.py"
+
+config("include_target_gen_dir") {
+  include_dirs = [ "$target_gen_dir" ]
+}
+
+action("gen_compiler_proxy_status_html5") {
+  script = genc
+  sources = [
+    "compiler_proxy_status_html5.html",
+  ]
+  outputs = [
+    "$target_gen_dir/compiler_proxy_status_html5.c",
+    "$target_gen_dir/compiler_proxy_status_html5.h",
+  ]
+  args = [
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+    rebase_path("compiler_proxy_status_html5.html"),
+  ]
+  public_configs = [ ":include_target_gen_dir" ]
+}
+
+action("gen_compiler_proxy_status_script") {
+  script = genc
+  sources = [
+    "compiler_proxy_status_script.js",
+  ]
+  outputs = [
+    "$target_gen_dir/compiler_proxy_status_script.c",
+    "$target_gen_dir/compiler_proxy_status_script.h",
+  ]
+  args = [
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+    rebase_path("compiler_proxy_status_script.js"),
+  ]
+  public_configs = [ ":include_target_gen_dir" ]
+}
+
+action("gen_compiler_proxy_contentionz_script") {
+  script = genc
+  sources = [
+    "compiler_proxy_contentionz_script.js",
+  ]
+  outputs = [
+    "$target_gen_dir/compiler_proxy_contentionz_script.c",
+    "$target_gen_dir/compiler_proxy_contentionz_script.h",
+  ]
+  args = [
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+    rebase_path("compiler_proxy_contentionz_script.js"),
+  ]
+  public_configs = [ ":include_target_gen_dir" ]
+}
+
+action("gen_compiler_proxy_status_style") {
+  script = genc
+  sources = [
+    "compiler_proxy_status_style.css",
+  ]
+  outputs = [
+    "$target_gen_dir/compiler_proxy_status_style.c",
+    "$target_gen_dir/compiler_proxy_status_style.h",
+  ]
+  args = [
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+    rebase_path("compiler_proxy_status_style.css"),
+  ]
+  public_configs = [ ":include_target_gen_dir" ]
+}
+
+action("gen_compilerz_html") {
+  script = genc
+  sources = [
+    "compilerz_html.html",
+  ]
+  outputs = [
+    "$target_gen_dir/compilerz_html.c",
+    "$target_gen_dir/compilerz_html.h",
+  ]
+  args = [
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+    rebase_path("compilerz_html.html"),
+  ]
+  public_configs = [ ":include_target_gen_dir" ]
+}
+
+action("gen_compilerz_script") {
+  script = genc
+  sources = [
+    "compilerz_script.js",
+  ]
+  outputs = [
+    "$target_gen_dir/compilerz_script.c",
+    "$target_gen_dir/compilerz_script.h",
+  ]
+  args = [
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+    rebase_path("compilerz_script.js"),
+  ]
+  public_configs = [ ":include_target_gen_dir" ]
+}
+
+action("gen_compilerz_style") {
+  script = genc
+  sources = [
+    "compilerz_style.css",
+  ]
+  outputs = [
+    "$target_gen_dir/compilerz_style.c",
+    "$target_gen_dir/compilerz_style.h",
+  ]
+  args = [
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+    rebase_path("compilerz_style.css"),
+  ]
+  public_configs = [ ":include_target_gen_dir" ]
+}
+
diff --git a/client/resources/compiler_proxy_contentionz_script.js b/client/resources/compiler_proxy_contentionz_script.js
new file mode 100644
index 0000000..b3f5b56
--- /dev/null
+++ b/client/resources/compiler_proxy_contentionz_script.js
@@ -0,0 +1,32 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+'use strict';
+
+function addSortFunction(key) {
+  var sorter = function(a, b) {
+    var avalue = parseFloat($(a).find(key).text());
+    var bvalue = parseFloat($(b).find(key).text());
+    return avalue > bvalue ? -1 : 1;
+  }
+
+  $('th' + key).click(
+      function() {
+        var tbody = $('body > table > tbody > tr')
+        var sorted_table = tbody.sort(sorter);
+        $('tbody').html(sorted_table);
+      });
+}
+
+function init() {
+  addSortFunction('.count');
+  addSortFunction('.total-wait');
+  addSortFunction('.max-wait');
+  addSortFunction('.ave-wait');
+  addSortFunction('.total-hold');
+  addSortFunction('.max-hold');
+  addSortFunction('.ave-hold');
+
+  $('th.total-wait').trigger("click");
+}
diff --git a/client/resources/compiler_proxy_status_html5.html b/client/resources/compiler_proxy_status_html5.html
new file mode 100644
index 0000000..3b1f607
--- /dev/null
+++ b/client/resources/compiler_proxy_status_html5.html
@@ -0,0 +1,209 @@
+<!DOCTYPE html>
+
+<!-- Copyright 2016 The Goma Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+-->
+
+<html>
+<head>
+
+<title>GOMA: CompilerProxy status page</title>
+<link rel="stylesheet" href="/static/compiler_proxy_status_style.css">
+<script src="/static/jquery.min.js"></script>
+<script src="/static/compiler_proxy_status_script.js"></script>
+</head>
+
+<body onload="init()">
+
+<header class="clearfix">
+  <h1><span>Goma</span></h1>
+  <div id="account">status:<span id="http_status"></span>&nbsp;
+                    <span id="email"></span>&nbsp;
+                    <a id="login" href=""></a></div>
+  <div id="error" class="warning"></div>
+  <div id="mismatch" class="warning"></div>
+</header>
+
+<div class="main">
+  <div class="side-menu">
+    <p><a href="#active">Active Tasks (<span id="count-active-tasks">0</span>)</a></p>
+    <p><a href="#finished">Finished Tasks (<span id="count-finished-tasks">0</span>)</a></p>
+    <p><a href="#failed">Failed Tasks (<span id="count-failed-tasks">0</span>)</a></p>
+    <p><a href="#long">Long Tasks (<span id="count-long-tasks">0</span>)</a></p>
+
+    <p><a href="#task-stats">Task stats</a></p>
+    <p><a href="#network-stats">Network stats</a></p>
+    <p><a href="#settings">Settings</a></p>
+  </div>
+
+  <div class="content">
+    <div id="task-summary-page" class="page">
+      <p id="task-summary-page-menu">
+        <span id="task-summary-first" class="menu-icon icon icon-first"></span>
+        <span id="task-summary-prev" class="menu-icon icon icon-prev"></span>
+        <span id="task-summary-figures"><span id="task-summary-offset-begin"></span>-<span id="task-summary-offset-end"></span> of <span id="task-summary-total"></span></span>
+        <span id="task-summary-next" class="menu-icon icon icon-next"></span>
+        <span id="task-summary-last" class="menu-icon icon icon-last"></span>
+      </p>
+      <table id="task-summary-table">
+        <thead><tr>
+          <th id="task-summary-head-id" class="task-summary-head task-summary-id">id</th>
+          <th id="task-summary-head-time" class="task-summary-head task-summary-time">time</th>
+          <th id="task-summary-head-pid" class="task-summary-head task-summary-pid">pid</th>
+          <th id="task-summary-head-state" class="task-summary-head task-summary-state">task state</th>
+          <th id="task-summary-head-status" class="task-summary-head task-summary-status">status</th>
+          <th id="task-summary-head-subproc-pid" class="task-summary-head task-summary-subproc-pid">subproc pid</th>
+          <th id="task-summary-head-subproc-state" class="task-summary-head task-summary-subproc-state">subproc state</th>
+          <th id="task-summary-head-flag" class="task-summary-head task-summary-flag">flag</th>
+          <th id="task-summary-head-major-factor" class="task-summary-head task-summary-major-factor">major factor</th>
+        </tr></thead>
+        <tbody id="task-summary-list"></tbody>
+      </table>
+
+      <h3>Legend</h3>
+      <p><form id="task-filter">
+        <input type="checkbox" name="task-status-running" checked> <span class="task-status-running">Running</span>
+        <input type="checkbox" name="task-status-success" checked> <span class="task-status-success">Succeeded</span>
+        <input type="checkbox" name="task-status-cachehit" checked> <span class="task-status-cachehit">Cache hit</span>
+        <input type="checkbox" name="task-status-local-cachehit" checked> <span class="task-status-local-cachehit">Local Cache hit</span>
+        <input type="checkbox" name="task-status-local-fallback" checked> <span class="task-status-local-fallback">Local fallback</span>
+        <input type="checkbox" name="task-status-retry" checked> <span class="task-status-retry">Retry</span>
+        <input type="checkbox" name="task-status-mismatch" checked> <span class="task-status-mismatch">Mismatch</span>
+        <input type="checkbox" name="task-status-gomaerror" checked> <span class="task-status-gomaerror">Goma Error</span>
+        <input type="checkbox" name="task-status-failure" checked> <span class="task-status-failure">Failed</span>
+        <input type="checkbox" name="task-status-conftestfailure" checked> <span class="task-status-conftestfailure">Failed but maybe during ./configure</span>
+        <input type="checkbox" name="task-status-cancel" checked> <span class="task-status-cancel">Canceled</span>
+        <button id="btn-task-check-all">Check all</button>
+        <button id="btn-task-uncheck-all">Uncheck all</button>
+      </form></p>
+    </div>
+
+    <div id="task-detail-page" class="page">
+      <p><a href="#" id="task-show-prev">&lt;&lt; Prev Task</a> <a href="#" id="task-show-next">Next Task &gt;&gt;</a></p>
+      <table id="task-detail-table"></table>
+    </div>
+
+    <div id="task-stats-page" class="page">
+      <h2>Task stats</h2>
+      <table id="task-stats-info" class="stats-info-table">
+        <colgroup>
+          <col style="width: 15em;">
+          <col style="width: 5em;">
+          <col>
+        </colgroup>
+
+        <tr class="stats-info-table-heading"><td></td><th colspan="2">Task Counts (limit: <span id="task-stats-num-limit"></span>)</th></tr>
+        <tr id="task-stats-num-max"><td>max</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-num-actives"><td>actives</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-num-pendings"><td>pendings</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+
+        <tr class="stats-info-table-heading"><td></td><th colspan="2">Request</th></tr>
+        <tr id="task-stats-request-total"><td>total</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-success"><td>success</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-failure"><td>failure</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-success-finished"><td>finished</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-success-aborted"><td>aborted</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-success-cache-hit"><td>cache hit</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-success-retry"><td>retry</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-local-run"><td>local run</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-local-finished"><td>local finished</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-local-killed"><td>local killed</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-fail-fallback"><td>fail fallback</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-request-compiler-proxy-fail"><td>compiler proxy fail</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+
+        <tr class="stats-info-table-heading"><td></td><th colspan="2">Remote VS Local</th></tr>
+        <tr id="task-stats-compiler-race-total"><td>both run</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-compiler-race-goma-win"><td>goma win</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-compiler-race-local-win"><td>local win</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+
+        <tr class="stats-info-table-heading"><td></td><th colspan="2">Compiler Info</th></tr>
+        <tr id="task-stats-compiler-info-stores"><td>compiler info stores</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-compiler-info-store-dups"><td>compiler info stores dups</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-compiler-info-miss"><td>compiler info miss</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-compiler-info-fail"><td>compiler info fail</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+
+        <tr class="stats-info-table-heading"><td></td><th colspan="2">File</th></tr>
+        <tr id="task-stats-file-total"><td>total</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-file-uploaded"><td>upload</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+        <tr id="task-stats-file-missed"><td>missed</td><td><meter value="0"></meter></td><td class="text"></td></tr>
+      </table>
+    </div>
+
+    <div id="network-stats-page" class="page">
+      <h2>Network stats</h2>
+      <h3>Network</h3>
+      <table>
+        <tr><th>QPS</th><th>Traffic</th></tr>
+        <tr><td><img id="http-rpc-qps"></td><td><img id="http-rpc-traffic"></td></tr>
+      </table>
+      <h3>Http RPC Info</h3>
+      <table id="http-rpc-info" class="stats-info-table">
+        <colgroup>
+          <col style="width: 15em;">
+          <col>
+        </colgroup>
+
+        <tr class="stats-info-table-heading"><td></td><th>Health</th></tr>
+        <tr><td>status</td><td id="rpc-health-status"></td></tr>
+        <tr class="stats-info-table-heading"><td></td><th>RPC</th></tr>
+        <tr><td>num_active</td><td id="rpc-num-active"></td></tr>
+        <tr><td>num_query</td><td id="rpc-num-query"></td></tr>
+        <tr><td>num_retry</td><td id="rpc-num-retry"></td></tr>
+        <tr><td>num_timeout</td><td id="rpc-num-timeout"></td></tr>
+        <tr><td>num_error</td><td id="rpc-num-error"></td></tr>
+        <tr class="stats-info-table-heading"><td></td><th>Socket</th></tr>
+        <tr><td>socket_pool</td><td id="rpc-socket-pool"></td></tr>
+        <tr><td>read bps</td><td id="rpc-read-bps">/s</td></tr>
+        <tr><td>total read</td><td id="rpc-read-byte"></td></tr>
+        <tr><td>write bps</td><td id="rpc-write-bps">/s</td></tr>
+        <tr><td>total write</td><td id="rpc-write-byte"></td></tr>
+        <tr class="stats-info-table-heading"><td></td><th>Request header</th></tr>
+        <tr><td>compression</td><td id="rpc-compression"></td></tr>
+        <tr><td>accept_encoding</td><td id="rpc-accept-encoding"></td></tr>
+        <tr><td>content_type</td><td id="rpc-content-type"></td></tr>
+        <tr class="stats-info-table-heading"><td></td><th>Authorization</th></tr>
+        <tr><td>authorization</td><td id="rpc-authorization"></td></tr>
+        <tr><td>oauth2</td><td id="rpc-oauth2"></td></tr>
+        <tr class="stats-info-table-heading"><td></td><th>User Agent</th></tr>
+        <tr><td>user_agent</td><td id="rpc-user-agent"></td></tr>
+        <tr class="stats-info-table-heading"><td></td><th>Cookie</th></tr>
+        <tr><td>cookie</td><td id="rpc-cookie"></td></tr>
+        <tr class="stats-info-table-heading"><td></td><th>Extra params</th></tr>
+        <tr><td>url_path_prefix</td><td id="rpc-url-path-prefix"></td></tr>
+        <tr><td>extra_params</td><td id="rpc-extra-params"></td></tr>
+        <tr class="stats-info-table-heading"><td></td><th>SSL</th></tr>
+        <tr><td>ssl</td><td id="rpc-ssl"></td></tr>
+        <tr><td>ssl_extra_cert</td><td id="rpc-ssl-extra-cert"></td></tr>
+        <tr class="stats-info-table-heading"><td></td><th>Others</th></tr>
+        <tr><td>capture_response_header</td><td id="rpc-capture-response-header"></td></tr>
+        <tr><td>socket_read_timeout_sec</td><td id="rpc-socket-read-timeout-sec"></td></tr>
+      </table>
+    </div>
+
+    <div id="settings-page" class="page">
+      <h2>Task Update</h2>
+      <p><form>
+        Update:
+        <input type="radio" name="update-task" value="on" checked>On
+        frequency <input id="update-freq" value="1000" size="4">ms
+        <input type="radio" name="update-task" value="off">Off
+      </form></p>
+      <p>Current status: <span id="update-status"></span></p>
+      <h2>Task View</h2>
+      <p><form>Page Size:
+        <input type="radio" name="pagesize" value="25" checked>25
+        <input type="radio" name="pagesize" value="50">50
+        <input type="radio" name="pagesize" value="100">100
+      </form></p>
+    </div>
+
+  </div>
+</div>
+
+<hr style="clear: both">
+<p id="endpoints">{{ENDPOINTS}}</p>
+<p id="goma_version"></p>
+<p id="global_info">{{GLOBAL_INFO}}</p>
+</body>
+</html>
diff --git a/client/resources/compiler_proxy_status_script.js b/client/resources/compiler_proxy_status_script.js
new file mode 100644
index 0000000..e950c28
--- /dev/null
+++ b/client/resources/compiler_proxy_status_script.js
@@ -0,0 +1,1146 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+'use strict';
+
+// Add starts with.
+if (!String.prototype.startsWith) {
+  /**
+   * Returns true if the string starts with searchString.
+   * @param {string} searchString The characters to be searched for at the start
+   * of this string.
+   * @param {number} position Optional. The position in this string at which to
+   * begin searching for searchString; defaults to 0.
+   * @return {bool} True if the string starts with searchString. Otherwise false.
+   */
+  String.prototype.startsWith = function(searchString, position) {
+    position = position || 0;
+    return this.lastIndexOf(searchString, position) === position;
+  };
+}
+
+/**
+ * Human readable bytes.
+ * @param{Number} bytes.
+ * @return{string} human readable bytes.
+ */
+function humanReadableBytes(num) {
+  var sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
+  if (num == 0) {
+    return '0';
+  }
+  var i = parseInt(Math.floor(Math.log(num) / Math.log(1024)));
+  if (i >= sizes.length) {
+    i = sizes.length - 1;
+  }
+  return (Math.round(num / Math.pow(1024, i) * 100) / 100) + sizes[i];
+};
+
+/* Returns a basename of path. For example:
+ * C:\Program Files (x86)\VC\bin\cl.exe --> cl.exe
+ * /usr/bin/clang++                     --> clang++
+ *
+ * @param {string} path path.
+ * @return {string} basename of path.
+ */
+function basename(path) {
+  var splitted = path.split(/[\/\\\\]/)
+  return splitted[splitted.length - 1];
+}
+
+function makeFlagSummary(flag) {
+  if (!flag)
+    return '';
+
+  var args = flag.split(' ');
+
+  var compiler = basename(args[0]);
+
+  var target = '';
+  for (var i = 0; i < args.length; ++i) {
+    // Catches '-o <filename>', '/Fe <filename>', or '/Fo <filename>' cases.
+    if (args[i] == '-o' || args[i] == '/Fe' || args[i] == '/Fo') {
+      if (i + 1 < args.length) {
+        target = basename(args[i + 1]);
+        break;
+      }
+    }
+
+    // Catches '-o<filename>' case.
+    if (args[i].startsWith('-o')) {
+      target = basename(args[i].substring(2));
+      break;
+    }
+
+    // Catches '/Fe<filename>', or '/Fo<filename>' cases.
+    if (args[i].startsWith('/Fe') || args[i].startsWith('/Fo')) {
+      target = basename(args[i].substring(3));
+      break;
+    }
+  }
+
+  return compiler + ' ... ' + target;
+}
+
+/**
+ * Gets "task" status.
+ * @param {Object} task task object.
+ * @return {string} task status
+ */
+function taskStatus(task) {
+  var success = true;
+  if ('exit' in task) {
+    if (task['exit'] != 0) {
+      success = false;
+    }
+  } else if ('http' in task) {
+    if (task['http'] != 200) {
+      success = false;
+    }
+  } else {
+    // if no http, it would be running.
+    return 'running';
+  }
+  if (task['canceled']) {
+    return 'cancel';
+  }
+  if (task['goma_error'] || task['compiler_proxy_error']) {
+    return 'gomaerror';
+  }
+  if (!success) {
+    if (task['flag'] && task['flag'].match(/ conftest\./)) {
+      return 'conftestfailure';
+    }
+    return 'failure';
+  }
+  if (task['fail_fallback']) {
+    return 'failure';
+  }
+
+  if (task['command_version_mismatch']
+      || task['command_binary_hash_mismatch']
+      || task['command_subprograms_mismatch']) {
+    return 'mismatch';
+  }
+
+  if (task['state'] != 'FINISHED') {
+    return 'local-fallback';
+  }
+  if (task['retry'] > 0) {
+    return 'retry';
+  }
+  if (task['cache'] == 'local hit') {
+    return 'local-cachehit';
+  }
+  if (task['cache'] == 'hit') {
+    return 'cachehit';
+  }
+  return 'success';
+}
+
+// Returns true if task has compiler mismatch.
+function taskHasMismatch(task) {
+  var keys = ['command_version_mismatch',
+              'command_binary_hash_mismatch',
+              'command_subprograms_mismatch'];
+  for (var i = 0; i < keys.length; ++i) {
+    var key = keys[i];
+    if ((key in task) && task[key] != "") {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+function taskHasGomaccRevisionMismatch(task) {
+  if ('gomacc_revision_mismatch' in task &&
+      task['gomacc_revision_mismatch'] != "") {
+    return true;
+  }
+
+  return false;
+}
+// ----------------------------------------------------------------------
+
+function GomaTaskView() {
+  this.currentTaskView = 'active';
+  this.taskStartingOffset = 0;
+  this.taskMaxSize = 25;
+  this.tasks = {
+    active: [],
+    finished: [],
+    failed: [],
+    long: []
+  };
+  // id -> Task
+  this.finishedTasks = {},
+  this.failedTasks = {},
+  // id -> Task
+  this.taskDetailCache = {},
+  this.currentDetailTaskId = 0;
+  this.currentTaskOrderKey = null;
+  this.currentTaskOrderAscending = true;
+
+  // For Task stats
+  this.maxActives = 0;
+}
+
+GomaTaskView.prototype = {
+  setPageSize: function(size) {
+    this.taskMaxSize = size;
+  },
+
+  updateTaskView: function() {
+    var currentTasks = this._filteredTasks();
+    this._updateTaskViewWith(currentTasks);
+  },
+
+  // Update compiler mismatch view.
+  // We don't filter tasks here.
+  updateMismatchView: function() {
+    var compilerMismatchDetected = false;
+    var gomaccRevisionMismatchDetected = false;
+    var mismatchedTaskIdSet = {};
+    for (var i = 0; i < this.tasks.finished.length; ++i) {
+      var task = this.tasks.finished[i];
+      if (taskHasMismatch(task)) {
+        compilerMismatchDetected = true;
+        mismatchedTaskIdSet[task.id] = true;
+      }
+      if (taskHasGomaccRevisionMismatch(task)) {
+        gomaccRevisionMismatchDetected = true;
+      }
+    }
+    for (var i = 0; i < this.tasks.failed.length; ++i) {
+      var task = this.tasks.failed[i];
+      if (taskHasMismatch(task)) {
+        compilerMismatchDetected = true;
+        mismatchedTaskIdSet[task.id] = true;
+      }
+      if (taskHasGomaccRevisionMismatch(task)) {
+        gomaccRevisionMismatchDetected = true;
+      }
+    }
+
+    var mismatchArea = $('#mismatch');
+    if (!(compilerMismatchDetected || gomaccRevisionMismatchDetected)) {
+      mismatchArea.hide();
+      return;
+    }
+
+    mismatchArea.empty();
+
+    if (gomaccRevisionMismatchDetected) {
+      $('<div>revision between gomacc and compiler_proxy ' +
+          'is mismatched. Probably goma has been updated. ' +
+          'Restart compiler_proxy.</div>').appendTo(mismatchArea);
+    }
+
+    if (compilerMismatchDetected) {
+      var mismatchedTaskIds = [];
+      for (var id in mismatchedTaskIdSet) {
+        mismatchedTaskIds.push(id);
+      }
+      mismatchedTaskIds.sort(function(lhs, rhs) { return lhs - rhs; });
+
+      $('<div>Compiler Mismatch</div>').appendTo(mismatchArea);
+      for (var i = 0; i < mismatchedTaskIds.length; ++i) {
+        var id = mismatchedTaskIds[i];
+        $('<a>').attr('href', '#task' + id).text(id + "")
+            .appendTo(mismatchArea);
+        mismatchArea.append(' ');
+      }
+    }
+
+    mismatchArea.show();
+  },
+
+  updateTaskStats: function() {
+    var resp = this.resp;
+    var numActives = gomaTaskView.tasks['active'].length;
+
+    if (numActives > this.maxActives) {
+      this.maxActives = numActives;
+    }
+
+    function setTextAndMeter(id, value, maxValue) {
+      if (maxValue < 1) {
+        maxValue = 1;
+      }
+
+      $(id + ' .text').text(value);
+      $(id + ' meter').attr('value', value)
+          .attr('min', 0)
+          .attr('max', maxValue)
+          .text((100 * value / maxValue) + '%');
+    }
+
+    var taskMeterMax = this.maxActives < 100 ? 100 : this.maxActives;
+    $('#task-stats-num-limit').text(resp['num_exec']['max_active_tasks']);
+    setTextAndMeter('#task-stats-num-max', this.maxActives, taskMeterMax);
+    setTextAndMeter('#task-stats-num-actives', numActives, taskMeterMax);
+    setTextAndMeter('#task-stats-num-pendings', resp['num_exec']['pending'], taskMeterMax);
+
+    var requestMeterMax = resp['num_exec']['request'] < 1 ? 1 : resp['num_exec']['request'];
+    setTextAndMeter('#task-stats-request-total', resp['num_exec']['request'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-success', resp['num_exec']['success'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-failure', resp['num_exec']['failure'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-success-finished', resp['num_exec']['goma_finished'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-success-cache-hit', resp['num_exec']['goma_cache_hit'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-success-aborted', resp['num_exec']['goma_aborted'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-success-retry', resp['num_exec']['goma_retry'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-local-run', resp['num_exec']['local_run'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-local-finished', resp['num_exec']['local_finished'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-local-killed', resp['num_exec']['local_killed'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-fail-fallback', resp['num_exec']['fail_fallback'], requestMeterMax);
+    setTextAndMeter('#task-stats-request-compiler-proxy-fail', resp['num_exec']['compiler_proxy_fail'], requestMeterMax);
+
+    var gomaMeterMax = resp['num_exec']['goma_finished'] + resp['num_exec']['goma_aborted'];
+    setTextAndMeter('#task-stats-goma-killed', resp['num_exec']['local_killed'], gomaMeterMax);
+    setTextAndMeter('#task-stats-goma-aborted', resp['num_exec']['goma_aborted'], gomaMeterMax);
+
+    var localMeterMax = resp['num_exec']['local_run'];
+    setTextAndMeter('#task-stats-local-killed', resp['num_exec']['local_killed'], localMeterMax);
+    setTextAndMeter('#task-stats-local-aborted', resp['num_exec']['goma_aborted'], localMeterMax);
+
+    var raceMeterMax = resp['num_exec']['local_killed'] + resp['num_exec']['goma_aborted'];
+    setTextAndMeter('#task-stats-compiler-race-total', raceMeterMax, raceMeterMax);
+    setTextAndMeter('#task-stats-compiler-race-goma-win', resp['num_exec']['local_killed'], raceMeterMax);
+    setTextAndMeter('#task-stats-compiler-race-local-win', resp['num_exec']['goma_aborted'], raceMeterMax);
+
+    var compilerInfoMeterMax = resp['num_exec']['compiler_info_stores'];
+    setTextAndMeter('#task-stats-compiler-info-stores', resp['num_exec']['compiler_info_stores'], compilerInfoMeterMax);
+    setTextAndMeter('#task-stats-compiler-info-store-dups', resp['num_exec']['compiler_info_store_dups'], compilerInfoMeterMax);
+    setTextAndMeter('#task-stats-compiler-info-miss', resp['num_exec']['compiler_info_miss'], compilerInfoMeterMax);
+    setTextAndMeter('#task-stats-compiler-info-fail', resp['num_exec']['compiler_info_fail'], compilerInfoMeterMax);
+
+    var fileInfoMeterMax = resp['num_file']['requested'];
+    setTextAndMeter('#task-stats-file-total', resp['num_file']['requested'], fileInfoMeterMax);
+    setTextAndMeter('#task-stats-file-uploaded', resp['num_file']['uploaded'], fileInfoMeterMax);
+    setTextAndMeter('#task-stats-file-missed', resp['num_file']['missed'], fileInfoMeterMax);
+  },
+
+  updateNetworkStats: function() {
+    var httprpc = this.resp['http_rpc'];
+
+    $('#http-rpc-qps').attr('src', httprpc['qps_chart']);
+    $('#http-rpc-traffic').attr('src', httprpc['traffic_chart']);
+
+    $('#rpc-health-status').text(httprpc['health_status']);
+
+    $('#rpc-num-active').text(httprpc['num_active']);
+    $('#rpc-num-query').text(httprpc['num_query']);
+    $('#rpc-num-retry').text(httprpc['num_http_retry']);
+    $('#rpc-num-timeout').text(httprpc['num_http_timeout']);
+    $('#rpc-num-error').text(httprpc['num_http_error']);
+
+    $('#rpc-socket-pool').text(httprpc['socket_pool']);
+    $('#rpc-read-bps').text(humanReadableBytes(httprpc['read_bps']));
+    $('#rpc-read-byte').text(humanReadableBytes(httprpc['read_byte']));
+    $('#rpc-write-bps').text(humanReadableBytes(httprpc['write_bps']));
+    $('#rpc-write-byte').text(humanReadableBytes(httprpc['write_byte']));
+
+    $('#rpc-compression').text(httprpc['compression']);
+    $('#rpc-accept-encoding').text(httprpc['accept_encoding']);
+    $('#rpc-authorization').text(httprpc['authorization']);
+    $('#rpc-cookie').text(httprpc['cookie']);
+    $('#rpc-content-type').text(httprpc['content_type']);
+    $('#rpc-oauth2').text(httprpc['oauth2']);
+    $('#rpc-capture-response-header').text(httprpc['capture_response_header']);
+    $('#rpc-ssl').text(httprpc['ssl']);
+
+    $('#rpc-url-path-prefix').text(httprpc['url_path_prefix']);
+    $('#rpc-extra-params').text(httprpc['extra_params']);
+    $('#rpc-ssl-extra-cert').text(httprpc['ssl_extra_cert']);
+
+    $('#rpc-user-agent').text(httprpc['user_agent']);
+
+    if (httprpc['health_status'] != 'ok') {
+      $('#http-rpc-info').addClass('warning');
+    } else {
+      $('#http-rpc-info').removeClass('warning');
+    }
+  },
+
+  setTaskPositionFirst: function() {
+    var currentTasks = this._filteredTasks();
+    this.taskStartingOffset = 0;
+    this._updateTaskViewWith(currentTasks);
+  },
+
+  setTaskPositionPrev: function() {
+    var currentTasks = this._filteredTasks();
+    this.taskStartingOffset -= this.taskMaxSize;
+    if (currentTasks.length < this.taskStartingOffset)
+      this.taskStartingOffset = 0;
+    if (this.taskStartingOffset < 0)
+      this.taskStartingOffset = 0;
+    this._updateTaskViewWith(currentTasks);
+  },
+
+  setTaskPositionNext: function() {
+    var currentTasks = this._filteredTasks();
+    this.taskStartingOffset += this.taskMaxSize;
+    if (this.taskStartingOffset >= currentTasks.length) {
+      this.taskStartingOffset = currentTasks.length - this.taskMaxSize;
+    }
+    if (this.taskStartingOffset < 0) {
+      this.taskStartingOffset = 0;
+    }
+    this._updateTaskViewWith(currentTasks);
+  },
+
+  setTaskPositionLast: function() {
+    var currentTasks = this._filteredTasks();
+    this.taskStartingOffset = currentTasks.length - this.taskMaxSize;
+    if (this.taskStartingOffst < 0)
+      this.taskStartingOffset = 0;
+    this._updateTaskViewWith(currentTasks);
+  },
+
+  // Changes task order.
+  changeTaskOrder: function(key) {
+    if (this.currentTaskOrderKey != key) {
+      // Setting New key
+      this.currentTaskOrderKey = key;
+      this.currentTaskOrderAscending = true;
+    } else if (this.currentTaskOrderAscending) {
+      // Setting the same key. Changing the order.
+      this.currentTaskOrderAscending = false;
+    } else {
+      // Setting the same key twice. Remove the key.
+      this.currentTaskOrderKey = null;
+    }
+
+    this._updateTaskOrderView();
+  },
+
+  showTaskDetail: function(taskId) {
+    console.log('showTaskDetail: ' + taskId);
+
+    if (taskId < 0)
+      return;
+
+    this.currentDetailTaskId = taskId;
+
+    var xhr = new XMLHttpRequest();
+    xhr.onreadystatechange = (function(that) { return function() {
+      if (this.readyState == 4) {
+        var responseText = this.responseText;
+        this.onreadystatechange = null;
+        if (this.status != 200) {
+          if (taskId in that.taskDetailCache)
+            that._showTaskDetailWith(that.taskDetailCache[taskId]);
+          else
+            that._showEmptyTaskDetailWith(taskId);
+          return;
+        }
+
+        var detail = JSON.parse(responseText);
+        that._showTaskDetailWith(detail);
+
+        // Cache the detail only when it's finished.
+        if (that.finishedTasks[taskId])
+          that.taskDetailCache[taskId] = detail;
+      }
+    };})(this);
+
+    var url = taskUpdater.url + '?id=' + taskId;
+    xhr.open('POST', url);
+    xhr.send();
+  },
+
+  // Returns the list of tasks that are filtered.
+  _filteredTasks: function() {
+    var currentTasks = this.tasks[this.currentTaskView];
+    var result = [];
+
+    var checker = {};
+    $('#task-filter input').each(function() {
+      if (this.checked)
+        checker[this.name] = this.checked;
+    });
+
+    for (var i = 0; i < currentTasks.length; ++i) {
+      var task = currentTasks[i];
+      var className = 'task-status-' + taskStatus(task);
+      if (className in checker) {
+        result.push(task);
+      }
+    }
+
+    // Sorts here.
+    if (this.currentTaskOrderKey != null) {
+      var sortKey = this.currentTaskOrderKey.replace('-', '_');
+
+      var compare = function(v1, v2, ascending) {
+        if (v1 == v2) {
+          return 0;
+        }
+
+        if (v1 && v2) {
+          if (v1 < v2) {
+            return -ascending;
+          }
+          return ascending;
+        }
+        if (!v2) {
+          return ascending;
+        }
+        return -ascending;
+      }
+
+      var sortFunc = (function(ascending) {
+        return function(a, b) {
+          var v1 = a[sortKey];
+          var v2 = b[sortKey];
+          return compare(v1, v2, ascending);
+        };
+      })(this.currentTaskOrderAscending ? 1 : -1);
+
+      // When key is 'time', if 'time' is missing, we should use 'elapsed'
+      // instead.
+      if (sortKey == 'time') {
+        sortFunc = (function(ascending) {
+          return function(a, b) {
+            var v1 = a['time'] || a['elapsed'];
+            var v2 = b['time'] || b['elapsed'];
+            return compare(v1, v2, ascending);
+          };
+        })(this.currentTaskOrderAscending ? 1 : -1);
+      }
+
+      result.sort(sortFunc);
+    }
+
+    return result;
+  },
+
+  _updateTaskOrderView: function() {
+    $('.task-summary-head').removeClass('with-icon-ascending');
+    $('.task-summary-head').removeClass('with-icon-descending');
+    if (this.currentTaskOrderKey == null)
+      return;
+
+    var id = 'task-summary-head-' + this.currentTaskOrderKey;
+    var elem = $('#' + id);
+    if (this.currentTaskOrderAscending) {
+      elem.addClass('with-icon-ascending');
+    } else {
+      elem.addClass('with-icon-descending');
+    }
+  },
+
+  _updateTaskViewWith: function(currentTasks) {
+    var taskSummaryList = $('#task-summary-list');
+    taskSummaryList.empty();
+
+    var startPos = this.taskStartingOffset;
+    var endPos = startPos + this.taskMaxSize;
+    if (startPos < 0)
+      startPos = 0;
+    if (endPos >= currentTasks.length)
+      endPos = currentTasks.length;
+    if (endPos < startPos)
+      endPos = startPos;
+
+    for (var i = startPos; i < endPos; ++i) {
+      var task = currentTasks[i];
+
+      var tr = $('<tr>');
+      var taskStatusName = taskStatus(task);
+      tr.addClass('task-status-' + taskStatusName);
+      $('<td class="task-summary-id">').text(task.id).appendTo(tr);
+      if ('time' in task) {
+        $('<td class="task-summary-time">').text(task.time).appendTo(tr);
+      } else if ('elapsed' in task) {
+        $('<td class="task-summary-time">').text(task.elapsed).appendTo(tr);
+      } else {
+        $('<td class="task-sumamry-time">').appendTo(tr);
+      }
+      $('<td class="task-summary-pid">').text(task.pid).appendTo(tr);
+      $('<td class="task-summary-state">').text(task.state).appendTo(tr);
+      $('<td class="task-summary-status">').text(taskStatusName.toUpperCase().replace('-', ' ')).appendTo(tr);
+      $('<td class="task-summary-subproc-pid">').text(task.subproc_pid).appendTo(tr);
+      $('<td class="task-summary-subproc-state">').text(task.subproc_state).appendTo(tr);
+      // TODO: Show full flag when a cursor is hovered?
+      // It will take a big area, though... b/24883527
+      $('<td class="task-summary-flag">').text(makeFlagSummary(task.flag)).appendTo(tr);
+      $('<td class="task-summary-major-factor">').text(task.major_factor).appendTo(tr);
+
+      tr.click((function(taskId) {
+        return function() {
+          location.hash = '#task' + taskId;
+        };
+      })(task.id));
+      tr.css({'cursor': 'pointer'});
+      taskSummaryList.append(tr);
+    }
+
+    $('#task-summary-total').text(currentTasks.length);
+    $('#task-summary-offset-begin').text(startPos + 1);
+    $('#task-summary-offset-end').text(endPos);
+
+    if (startPos == 0) {
+      $('#task-summary-first').addClass('disabled');
+      $('#task-summary-prev').addClass('disabled');
+    } else {
+      $('#task-summary-first').removeClass('disabled');
+      $('#task-summary-prev').removeClass('disabled');
+    }
+
+    if (endPos >= currentTasks.length) {
+      $('#task-summary-next').addClass('disabled');
+      $('#task-summary-last').addClass('disabled');
+    } else {
+      $('#task-summary-next').removeClass('disabled');
+      $('#task-summary-last').removeClass('disabled');
+    }
+  },
+
+  // Just show task id if task is empty.
+  _showEmptyTaskDetailWith: function(taskId) {
+    var task = {
+      id: taskId,
+    };
+
+    this._showTaskDetailWith(task);
+  },
+
+  _showTaskDetailWith: function(task) {
+    var table = $('#task-detail-table');
+    table.empty();
+
+    function addTextItem(key, value) {
+      var tr = $('<tr>');
+      $('<td>').text(key).appendTo(tr);
+      $('<td>').text(value).appendTo(tr);
+      table.append(tr);
+      return true;
+    }
+
+    function addHTMLItem(key, value) {
+      var tr = $('<tr>');
+      $('<td>').text(key).appendTo(tr);
+      $('<td>').html(value).appendTo(tr);
+      table.append(tr);
+
+      return true;
+    }
+
+    function addLink(key) {
+      if (!(key in task))
+        return false;
+
+      var value = task[key];
+      var tr = $('<tr>');
+      $('<td>').text(key).appendTo(tr);
+      $('<td>').html($('<a>').attr('href', value).text(value)).appendTo(tr);
+
+      table.append(tr);
+      return true;
+    }
+
+    function addLineBreak() {
+      var tr = $('<tr>');
+      tr.addClass('task-linebreak');
+      $('<td>').text('');
+      $('<td>').text('');
+      table.append(tr);
+    }
+
+    function add(key, opt_suffix) {
+      if (!(key in task))
+        return false;
+
+      var value = task[key];
+      if (opt_suffix)
+        value += opt_suffix;
+
+      return addTextItem(key, value);
+    }
+
+    function addLongString(key, opt_usingPre) {
+      if (!(key in task))
+        return false;
+
+      var value = task[key];
+
+      var details = $('<details>');
+      $('<summary>see more ...</summary>').appendTo(details);
+      if (opt_usingPre) {
+        $('<pre>').text(value).appendTo(details);
+      } else {
+        $('<div>').text(value).appendTo(details);
+      }
+
+      return addHTMLItem(key, details);
+    }
+
+    function addArrayItem(key) {
+      if (!(key in task))
+        return false;
+
+      var values = task[key];
+      var value = values.join('<br>');
+
+      var details = $('<details>');
+      $('<summary>see more...</summary>').appendTo(details);
+      $('<div>').html(value).appendTo(details);
+
+      return addHTMLItem(key, details);
+    }
+
+    add('id');
+    add('elapsed');
+    add('time');
+    add('pid');
+    add('state');
+    add('subproc_state');
+    add('subproc_pid');
+    addLongString('flag');
+    add('major_factor');
+    add('command_version_mismatch');
+    add('command_binary_hash_mismatch');
+    add('command_subprograms_mismatch');
+    addLineBreak();
+
+    add('start_time');
+    addLineBreak();
+
+    if (addArrayItem('error_message')) {
+      addLineBreak();
+    }
+
+    add('cache_key');
+    add('http');
+    add('exit');
+    add('retry');
+    addLineBreak();
+    if (addArrayItem('exec_request_retry_reason')) {
+      addLineBreak();
+    }
+
+    // # of files
+    add('total_input');
+    add('uploading_input');
+    add('missing_input');
+    addLineBreak();
+
+    // size
+    add('gcc_req_size');
+    add('gcc_resp_size');
+    add('exec_req_size');
+    add('exec_resp_size');
+    add('output_file_size');
+    add('chunk_resp_size');
+    addLineBreak();
+
+    /* time */
+    add('compiler_info_process_time');
+    if (('depscache_used' in task) && task['depscache_used'] == 'true') {
+      add('include_preprocess_time', ' (cached)');
+    } else {
+      add('include_preprocess_time');
+    }
+    add('include_fileload_time');
+    add('include_fileload_pending_time');
+    add('include_fileload_run_time');
+    add('rpc_call_time');
+    add('file_response_time');
+    addLineBreak();
+
+
+    /* file_response_time break down */
+    add('output_file_rpc');
+    add('output_file_rpc_req_build_time');
+    add('output_file_rpc_req_send_time');
+    add('output_file_rpc_wait_time');
+    add('output_file_rpc_resp_recv_time');
+    add('output_file_rpc_resp_parse_time');
+    addLineBreak();
+
+    /* local run */
+    if (('local_pending_ms' in task) || 'local_run_ms' in task) {
+      add('local_pending_ms');
+      add('local_run_ms');
+      add('local_mem_kb');
+      add('local_output_file_time');
+      add('local_output_file_size');
+      add('local_run_reason');
+      addLineBreak();
+    }
+
+    /* command detail */
+    add('cwd');
+    addLineBreak();
+    if (add('orig_flag')) {
+      addLineBreak();
+    }
+    if (addArrayItem('env')) {
+      addLineBreak();
+    }
+    addArrayItem('exec_output_file');
+    addLineBreak();
+    if (addLongString('stdout', true)) {
+      addLineBreak();
+    }
+    if (addLongString('stderr', true)) {
+      addLineBreak();
+    }
+    addArrayItem('inputs');
+    addArrayItem('system_library_paths');
+    add('response_header');
+
+    if (('state' in task) && task['state'] === 'FINISHED') {
+      var tr = $('<tr>');
+      var exec_req_dump_url =
+          './api/taskz?id=' + encodeURIComponent(task['id']) + '&dump=req';
+      $('<td>').text('dump_exec_req').appendTo(tr);
+
+      var form = $('<form>')
+          .attr('method', 'POST')
+          .attr('action', exec_req_dump_url)
+          .html($('<button>').text('dump exec req'));
+
+      $('<td>').html(form).appendTo(tr);
+
+      table.append(tr);
+    }
+
+    var prevTaskId = task.id - 1;
+    if (prevTaskId >= 0) {
+      $('#task-show-prev').attr('href', '#task' + prevTaskId);
+    } else {
+      $('#task-show-prev').removeAttr('href');
+    }
+    var nextTaskId = task.id + 1;
+    $('#task-show-next').attr('href', '#task' + nextTaskId);
+
+    showPage('task-detail');
+  }
+};
+
+var gomaTaskView = new GomaTaskView();
+
+// showPage shows only one page.
+function showPage(pageName) {
+  $('.page').hide();
+  $('#' + pageName + '-page').show();
+
+  gomaTaskView.updateTaskView();
+}
+
+// ----------------------------------------------------------------------
+
+/**
+ * Current task updater.
+ * @type {Loader}
+ */
+var taskUpdater = null;
+
+/**
+ * Constructs loadder that gets from 'url' and calls 'updater'.
+ * @param {string} url url to request XHR.
+ * @param {Function} updater closure to update by response text.
+ * @constructor
+ */
+function Loader(url, updater) {
+  this.url = url;
+  this.updater = updater;
+  this.suspended = true;
+  this.xhr = null;
+  this.params = {};
+}
+
+/**
+ * Starts loading.
+ */
+Loader.prototype.start = function() {
+   this.suspended = false;
+   this.load();
+};
+
+/**
+ * Stops loading.
+ */
+Loader.prototype.stop = function() {
+  this.suspended = true;
+};
+
+/**
+ * Updates load status shown in id='update-status'.
+ * @param {string} msg loader status message.
+ */
+Loader.prototype.updateLoaderStatus = function(msg) {
+  document.getElementById('update-status').innerText = msg;
+};
+
+/**
+ * Set parameter
+ * @param {string} key key for POST parameter
+ * @param {string} value value for POST parameter
+ */
+Loader.prototype.setParameter = function(key, value) {
+  this.params[key] = value;
+};
+
+/**
+ * Loads data from 'url' and call 'updater'. Repeats this with 1 sec interval
+ * while 'suspended' is false.
+ */
+Loader.prototype.load = function() {
+  var self = this;
+
+  this.xhr = new XMLHttpRequest();
+  this.xhr.onreadystatechange = function() {
+    if (this.readyState == 4) {
+      var responseText = self.xhr.responseText;
+      self.xhr.onreadystatechange = null;
+      self.xhr = null;
+      if (this.status == 200) {
+        self.updateLoaderStatus('parsing');
+        setTimeout(function() {
+          if (self.suspended) {
+            self.updateLoaderStatus('suspended');
+          } else {
+            self.updater(responseText);
+            // TODO: We may want validation here?
+            var updateFreq =
+                document.getElementById('update-freq').value | 0;
+            setTimeout(function() { self.load(); }, updateFreq);
+            self.updateLoaderStatus('waiting');
+          }
+        }, 1);
+      } else {
+        var errorArea = $('#error');
+        errorArea.show();
+        errorArea.text(
+            'request to ' + self.url + ' got error ' + this.status);
+      }
+    }
+  };
+
+  var url = self.url;
+  var param = $.param(self.params);
+  if (param != '') {
+    url += '?' + param;
+  }
+
+  this.xhr.open('POST', url);
+  this.xhr.send();
+  this.updateLoaderStatus('loading');
+};
+
+// ----------------------------------------------------------------------
+
+/**
+ * Updates goma version.
+ * @param {Array} resp json response data [my_version, pulled_version].
+ */
+function updateGomaVersion(resp) {
+  var my_version = resp[0];
+  var pulled_version = resp[1];
+  var d = document.getElementById('goma_version');
+  if (my_version >= pulled_version) {
+    d.innerText = 'goma_version: ' + my_version;
+  } else {
+    d.innerText = 'goma_version: ' + my_version +
+                  ' [' + pulled_version + ' available]';
+    d.setAttribute('class', 'warning');
+  }
+}
+
+/**
+ * Update taskz response in 'domNode'.
+ * @param {Element} domNode dom node.
+ * @param {string} response JSON text.
+ */
+function updateTaskView(domNode, response) {
+  var resp = JSON.parse(response);
+
+  if (resp['goma_version']) {
+    updateGomaVersion(resp['goma_version']);
+  }
+
+  if (resp['active']) {
+    gomaTaskView.tasks['active'] = resp['active'];
+    $('#count-active-tasks').text('' + gomaTaskView.tasks['active'].length);
+  }
+  if (resp['finished']) {
+    var finished = resp['finished'];
+    for (var i = finished.length - 1; i >= 0; --i) {
+      var task = finished[i];
+      if (!(task.id in gomaTaskView.finishedTasks)) {
+        gomaTaskView.tasks['finished'].unshift(task);
+        gomaTaskView.finishedTasks[task.id] = task;
+      }
+    }
+    $('#count-finished-tasks').text('' + gomaTaskView.tasks['finished'].length);
+  }
+  if (resp['failed']) {
+    var failed = resp['failed'];
+    for (var i = failed.length - 1; i >= 0; --i) {
+      var task = failed[i];
+      if (!(task.id in gomaTaskView.failedTasks)) {
+        gomaTaskView.tasks['failed'].unshift(task)
+        gomaTaskView.failedTasks[task.id] = task;
+      }
+    }
+    $('#count-failed-tasks').text('' + gomaTaskView.tasks['failed'].length);
+  }
+  if (resp['long']) {
+    gomaTaskView.tasks['long'] = resp['long'];
+    $('#count-long-tasks').text('' + gomaTaskView.tasks['long'].length);
+  }
+
+  if (resp['last_update_ms']) {
+    taskUpdater.setParameter('after', resp['last_update_ms']);
+  }
+
+  gomaTaskView.resp = resp;
+
+  gomaTaskView.updateTaskView();
+  gomaTaskView.updateMismatchView();
+  gomaTaskView.updateTaskStats();
+  gomaTaskView.updateNetworkStats();
+}
+
+/**
+ * Request to update 'taskview' by /api/taskz.
+ */
+function startTaskUpdater() {
+  taskUpdater = new Loader('/api/taskz',
+    function(response) {
+      updateTaskView(document.getElementById('taskview'), response);
+    });
+  taskUpdater.start();
+}
+
+function onHashChange() {
+  if (location.hash == '#active') {
+    gomaTaskView.currentTaskView = 'active';
+    gomaTaskView.setTaskPositionFirst();
+    showPage('task-summary');
+  } else if (location.hash == '#finished') {
+    gomaTaskView.currentTaskView = 'finished';
+    gomaTaskView.setTaskPositionFirst();
+    showPage('task-summary');
+  } else if (location.hash == '#failed') {
+    gomaTaskView.currentTaskView = 'failed';
+    gomaTaskView.setTaskPositionFirst();
+    showPage('task-summary');
+  } else if (location.hash == '#long') {
+    gomaTaskView.currentTaskView = 'long';
+    gomaTaskView.setTaskPositionFirst();
+    showPage('task-summary');
+  } else if (location.hash == '#task-stats') {
+    showPage('task-stats');
+  } else if (location.hash == '#network-stats') {
+    showPage('network-stats');
+  } else if (location.hash == '#settings') {
+    showPage('settings');
+  } else if (location.hash.startsWith('#task')) {
+    var taskId = parseInt(location.hash.substr(5));
+    if (!isNaN(taskId))
+      gomaTaskView.showTaskDetail(taskId);
+  }
+}
+
+function accountUpdate() {
+  $.get('/api/accountz',
+    function(data) {
+      $('#http_status').text(data.status);
+      $('#email').text(data.account);
+      var login = $('#login');
+      login.text(data.text);
+      login.attr("href", data.href);
+    })
+   .always(function() {
+     setTimeout(accountUpdate, 10*60*1000);
+   });
+}
+
+function init() {
+  startTaskUpdater();
+  accountUpdate();
+
+  // Prevents text selection when clicking icons many times.
+  $('.menu-icon').mousedown(function(e) { e.preventDefault(); });
+
+  $('#task-summary-first').click(function() {
+    if ($(this).hasClass('disabled'))
+      return;
+    gomaTaskView.setTaskPositionFirst();
+  });
+  $('#task-summary-prev').click(function() {
+    if ($(this).hasClass('disabled'))
+      return;
+    gomaTaskView.setTaskPositionPrev();
+  });
+  $('#task-summary-next').click(function() {
+    if ($(this).hasClass('disabled'))
+      return;
+    gomaTaskView.setTaskPositionNext();
+  });
+  $('#task-summary-last').click(function() {
+    if ($(this).hasClass('disabled'))
+      return;
+    gomaTaskView.setTaskPositionLast();
+  });
+
+  $('.task-summary-head').click(function() {
+    var id = $(this).attr('id');
+    // remove 'task-summary-head-'
+    if (!id.startsWith('task-summary-head-')) {
+      console.error('task-summary-head class element should'
+          + ' have id starting with task-summary-head');
+      return;
+    }
+
+    var key = id.slice('task-summary-head-'.length);
+    gomaTaskView.changeTaskOrder(key);
+  });
+
+  $('input[name="update-task"]:radio').change(function() {
+    var value = $(this).val();
+    if (taskUpdater == null)
+      return;
+
+    if (value == 'on')
+      taskUpdater.start();
+    else
+      taskUpdater.stop();
+  });
+
+  $('input[name="pagesize"]:radio').change(function() {
+    var pagesize = parseInt($(this).val(), 10);
+    if (isNaN(pagesize))
+      pagesize = 25;
+    gomaTaskView.setPageSize(pagesize);
+  });
+
+  $('#btn-task-check-all').click(function(event) {
+    $('#task-filter input').each(function() {
+      this.checked = true;
+    });
+    event.preventDefault();
+  });
+
+  $('#btn-task-uncheck-all').click(function(event) {
+    $('#task-filter input').each(function() {
+      this.checked = false;
+    });
+    event.preventDefault();
+  });
+
+  window.onhashchange = onHashChange;
+
+  // The default view is 'active'. But we can check the hash value
+  // to determine what page should be shown first.
+  gomaTaskView.currentTaskView = 'active';
+  showPage('task-summary');
+  onHashChange();
+}
diff --git a/client/resources/compiler_proxy_status_style.css b/client/resources/compiler_proxy_status_style.css
new file mode 100644
index 0000000..e10f067
--- /dev/null
+++ b/client/resources/compiler_proxy_status_style.css
@@ -0,0 +1,297 @@
+/* Copyright 2015 The Goma Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+body {
+  font-family: "Arial Unicode MS", Arial, sans-serif;
+  font-size: 13px;
+}
+
+table {
+  font-size: 100%;
+}
+
+.warning {
+  background-color: pink;
+}
+
+.clearfix:after {
+  display: block;
+  clear: both;
+  content: "";
+}
+
+/* --- HEADER ----------------------------------------------------------- */
+
+header {
+  height: 120px;
+}
+
+header h1 {
+  float: left;
+  height: 120px;
+  width: 400px;
+  margin: 10px 0 0 0;
+}
+
+
+#account {
+ float: right;
+}
+
+#error, #mismatch {
+  display: none; /* Don't show this by default */
+  float: left;
+  height: 100px;
+  width:256px;
+  padding: 10px;
+  margin: 0 10px;
+  overflow: scroll;
+}
+
+/* --- SIDE MENU -------------------------------------------------------- */
+
+.side-menu {
+  width: 160px;
+  float: left;
+}
+
+.content {
+  margin-left: 160px;
+}
+
+/* --- FOOTER ----------------------------------------------------------- */
+
+#endpoints {
+  float: left;
+}
+
+#goma_version {
+  float: right;
+}
+
+#global_info {
+  clear: both;
+}
+
+/* --- TASK VIEW -------------------------------------------------------- */
+
+#task-summary-page-menu span {
+  vertical-align: middle;
+}
+
+#task-summary-table {
+  border-collapse: collapse;
+  table-layout: fixed;
+  width: 100%;
+  min-width: 1024px;
+}
+
+#task-summary-table th {
+  text-shadow: #ddf 1px 1px 0;
+
+  border-bottom: 2px solid #6f7277;
+  padding: 3px;
+  text-align: center;
+  color: #4b4a4a;
+  overflow: hidden;
+}
+
+#task-summary-table td {
+  padding: 2px 1ex;
+  border-top: 2px solid white;
+  white-space: nowrap;
+}
+
+#task-summary-figures {
+  display: inline-block;
+  min-width: 8em;
+  text-align: center;
+}
+
+#task-detail-table .task-linebreak {
+  height: 1em;
+}
+
+.task-summary-head {
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -khtml-user-select: none;
+  -ms-user-select: none;
+}
+
+.task-summary-id {
+  width: 7ex;
+  text-align: right;
+}
+.task-summary-time {
+  width: 7ex;
+  text-align: right;
+}
+.task-summary-pid {
+  width: 7ex;
+  text-align: right;
+}
+.task-summary-state {
+  width: 16ex;  /* needs 16ex so that LOCAL_FINISHED fits. */
+}
+.task-summary-status {
+  width: 16ex;
+}
+.task-summary-subproc-pid {
+  width: 14ex;
+  text-align: right;
+}
+.task-summary-subproc-state {
+  width: 16ex;
+}
+.task-summary-flag {
+  width: 60%;
+  overflow: hidden;
+}
+.task-summary-major-factor {
+  width: 40ex;
+}
+
+#task-filter input[type="checkbox"] {
+  vertical-align: middle;
+}
+
+.stats-info-table {
+  width: 100%;
+}
+
+.stats-info-table-heading td, .stats-info-table-heading th {
+  border-bottom: solid 2px #eee;
+}
+
+.stats-info-table th {
+  text-align: left;
+  padding-top: 1em;
+}
+
+.stats-info-table td:first-child {
+  text-align: right;
+  padding-right: 1em;
+}
+
+/* --- TASKS ------------------------------------------------------------ */
+
+.task-status-success {
+  background-color: lightgreen;
+  margin-top: 2px;
+  margin-bottom: 2px;
+}
+
+.task-status-cachehit {
+  background-color: lime;
+  margin-top: 2px;
+  margin-bottom: 2px;
+}
+
+.task-status-local-cachehit {
+  background-color: lime;
+  margin-top: 2px;
+  margin-bottom: 2px;
+}
+
+.task-status-local-fallback {
+  background-color: khaki;
+  margin-top: 2px;
+  margin-bottom: 2px;
+}
+
+.task-status-retry {
+  background-color: greenyellow;
+  margin-top: 2px;
+  margin-bottom: 2px;
+}
+
+.task-status-gomaerror {
+  background-color: pink;
+  margin-top: 2px;
+  margin-bottom: 2px;
+}
+
+.task-status-failure {
+  background-color: mistyrose;
+  margin-top: 2px;
+  margin-bottom: 2px;
+}
+
+.task-status-conftestfailure {
+  background-color: orange;
+  margin-top: 2px;
+  margin-bottom: 2px;
+}
+
+.task-status-mismatch {
+  background-color: peru;
+  margin-top: 2px;
+  margin-bottom: 2px;
+}
+
+.task-status-cancel {
+  background-color: lightgrey;
+  margin-top: 2px;
+  margin-bottom: 2px;
+}
+
+/* --- IMAGES ----------------------------------------------------------- */
+
+.icon {
+  display: inline-block;
+  width: 19px;
+  height: 19px;
+}
+
+.icon-first {
+  cursor: pointer;
+  background-image: url();
+}
+
+.icon-prev {
+  cursor: pointer;
+  background-image: url();
+}
+
+.icon-next {
+  cursor: pointer;
+  background-image: url();
+}
+
+.icon-last {
+  cursor: pointer;
+  background-image: url();
+}
+
+.icon-first.disabled {
+  background-image: url();
+}
+
+.icon-prev.disabled {
+  background-image: url();
+}
+
+.icon-next.disabled {
+  background-image: url();
+}
+
+.icon-last.disabled {
+  background-image: url();
+}
+
+.with-icon-ascending {
+  background-image: url();
+  background-repeat: no-repeat;
+  background-position: left center;
+  padding-left: 17px !important;
+}
+
+.with-icon-descending {
+  background-image: url();
+  background-repeat: no-repeat;
+  background-position: left center;
+  padding-left: 17px !important;
+}
diff --git a/client/resources/compilerz_html.html b/client/resources/compilerz_html.html
new file mode 100644
index 0000000..e70f032
--- /dev/null
+++ b/client/resources/compilerz_html.html
@@ -0,0 +1,27 @@
+<!DOCTYPE html>
+
+<!-- Copyright 2016 The Goma Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+-->
+
+<html>
+<head>
+<title>GOMA: Compiler Information</title>
+<link rel="stylesheet" href="/static/compilerz.css">
+<script src="/static/jquery.min.js"></script>
+<script src="/static/compilerz.js"></script>
+</head>
+
+<body onload="init()">
+<h1>Compilers</h1>
+
+<table border="1">
+  <thead>
+    <tr><th>name</th><th>local compiler path</th><th>version</th><th>hash</th></tr>
+  </thead>
+  <tbody id="compilers-body"></tbody>
+</table>
+
+</body>
+</html>
diff --git a/client/resources/compilerz_script.js b/client/resources/compilerz_script.js
new file mode 100644
index 0000000..14c4875
--- /dev/null
+++ b/client/resources/compilerz_script.js
@@ -0,0 +1,51 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+function showCompilers(json) {
+  if (!('compilers' in json)) {
+    console.error('No compilers found in json', json);
+    return;
+  }
+
+  var compilers = json['compilers'];
+  var $body = $('#compilers-body');
+  for (var i = 0; i < compilers.length; ++i) {
+    var c = compilers[i];
+
+    var $name_td = $('<td>');
+    if ('name' in c) {
+      $name_td.text(c['name']);
+    }
+
+    var $path_td = $('<td>');
+    if ('local_compiler_path' in c) {
+      $path_td.text(c['local_compiler_path']);
+    }
+
+    var $version_td = $('<td>');
+    if ('version' in c) {
+      $version_td.text(c['version']);
+    }
+
+    var $hash_td = $('<td>');
+    if ('real_compiler_hash' in c) {
+      $hash_td.text(c['real_compiler_hash']);
+    }
+
+    var $tr = $('<tr>');
+    $tr.append($name_td)
+        .append($path_td)
+        .append($version_td)
+        .append($hash_td)
+        .appendTo($body);
+  }
+}
+
+function loadCompilers() {
+  $.get('/api/compilerz', showCompilers);
+}
+
+function init() {
+  loadCompilers()
+}
diff --git a/client/resources/compilerz_style.css b/client/resources/compilerz_style.css
new file mode 100644
index 0000000..a612f37
--- /dev/null
+++ b/client/resources/compilerz_style.css
@@ -0,0 +1,9 @@
+/* Copyright 2017 The Goma Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+body {
+  font-family: "Arial Unicode MS", Arial, sans-serif;
+  font-size: 13px;
+}
diff --git a/client/scoped_tmp_file.cc b/client/scoped_tmp_file.cc
new file mode 100644
index 0000000..50d9a74
--- /dev/null
+++ b/client/scoped_tmp_file.cc
@@ -0,0 +1,111 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "scoped_tmp_file.h"
+
+#include <limits.h>
+
+#include <sstream>
+
+#include "file.h"
+#include "file_dir.h"
+#include "glog/logging.h"
+#include "mypath.h"
+#include "path.h"
+#include "scoped_fd.h"
+
+#ifdef _WIN32
+# include "posix_helper_win.h"
+#endif
+
+namespace devtools_goma {
+
+ScopedTmpFile::ScopedTmpFile(const string& prefix) {
+#ifndef _WIN32
+  static const char kMkstempMarker[] = "XXXXXX";
+  filename_.assign(file::JoinPath(GetGomaTmpDir(), prefix));
+  filename_.append(kMkstempMarker);
+  fd_.reset(mkstemp(&filename_[0]));
+#else
+  char temp_file[MAX_PATH] = {0};
+  if (GetTempFileNameA(GetGomaTmpDir().c_str(),
+                       prefix.c_str(), 0, temp_file) != 0) {
+    filename_ = temp_file;
+    fd_.reset(ScopedFd::Create(filename_, 0600));
+  }
+#endif
+  if (!fd_.valid()) {
+    PLOG(ERROR) << "failed to create temp file:"
+                << " filename=" << filename_;
+  }
+}
+
+ScopedTmpFile::ScopedTmpFile(const string& prefix, const string& extension) {
+  DCHECK(!extension.empty());
+  DCHECK_EQ(extension[0], '.');
+  static const int kNumRetries = 5;
+  for (int retry = 0; retry < kNumRetries; ++retry) {
+    std::ostringstream ss;
+    ss << prefix;
+    ss << rand();
+    ss << extension;
+    filename_ = file::JoinPath(GetGomaTmpDir(), ss.str());
+    fd_.reset(ScopedFd::CreateExclusive(filename_, 0600));
+    if (fd_.valid())
+      break;
+    LOG(INFO) << "failed to make a unique file: " << filename_;
+  }
+  LOG_IF(ERROR, !fd_.valid()) << "Could not have a valid tmp file."
+                              << " prefix=" << prefix
+                              << " extension=" << extension;
+}
+
+ScopedTmpFile::~ScopedTmpFile() {
+  Close();
+  remove(filename_.c_str());
+}
+
+bool ScopedTmpFile::valid() const {
+  return fd_.valid();
+}
+
+ssize_t ScopedTmpFile::Write(const void* ptr, size_t len) const {
+  return fd_.Write(ptr, len);
+}
+
+bool ScopedTmpFile::Close() {
+  return fd_.Close();
+}
+
+ScopedTmpDir::ScopedTmpDir(const string& prefix) {
+  char tmpdir[PATH_MAX];
+  CheckTempDirectory(GetGomaTmpDir());
+  static const char kTmpdirTemplate[] = "%s/%s_XXXXXXXX";
+  DCHECK_LT(prefix.size() + sizeof(kTmpdirTemplate),
+            static_cast<size_t>(PATH_MAX));
+#ifdef _WIN32
+  sprintf_s(tmpdir, sizeof(tmpdir), kTmpdirTemplate,
+            GetGomaTmpDir().c_str(), prefix.c_str());
+#else
+  snprintf(tmpdir, sizeof(tmpdir), kTmpdirTemplate,
+           GetGomaTmpDir().c_str(), prefix.c_str());
+#endif
+  if (mkdtemp(tmpdir) == nullptr) {
+    dirname_.clear();
+  } else {
+    dirname_ = tmpdir;
+  }
+}
+
+ScopedTmpDir::~ScopedTmpDir() {
+  if (!valid()) {
+    return;
+  }
+  if (!RecursivelyDelete(dirname_)) {
+    LOG(ERROR) << "Failed to delete temporary directory: " << dirname_;
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/scoped_tmp_file.h b/client/scoped_tmp_file.h
new file mode 100644
index 0000000..d3588be
--- /dev/null
+++ b/client/scoped_tmp_file.h
@@ -0,0 +1,60 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SCOPED_TMP_FILE_H_
+#define DEVTOOLS_GOMA_CLIENT_SCOPED_TMP_FILE_H_
+
+#include <string>
+
+#include "scoped_fd.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// A class to provide a temporary file available within the scope.
+// The temporary file is created in constructor and deleted in destructor.
+// The file descriptor is opened in constructor, and closed when the user call
+// Close.  If Close is not called, it is automatically closed in destructor.
+class ScopedTmpFile {
+ public:
+  explicit ScopedTmpFile(const string& prefix);
+  // |extension| should starts with '.'. e.g. ".cc"
+  ScopedTmpFile(const string& prefix, const string& extension);
+  ~ScopedTmpFile();
+
+  const string& filename() const { return filename_; }
+  bool valid() const;
+  ssize_t Write(const void* ptr, size_t len) const;
+  bool Close();
+
+ private:
+  ScopedFd fd_;
+  string filename_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedTmpFile);
+};
+
+// A class to provide a temporary directory available within the scope.
+class ScopedTmpDir {
+ public:
+  explicit ScopedTmpDir(const string& prefix);
+  ~ScopedTmpDir();
+
+  ScopedTmpDir(ScopedTmpDir&&) = delete;
+  ScopedTmpDir(const ScopedTmpDir&) = delete;
+  ScopedTmpDir& operator=(const ScopedTmpDir&) = delete;
+  ScopedTmpDir& operator=(ScopedTmpDir&&) = delete;
+
+  const string& dirname() const { return dirname_; }
+  bool valid() const { return !dirname_.empty(); }
+
+ private:
+  string dirname_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SCOPED_TMP_FILE_H_
diff --git a/client/settings.cc b/client/settings.cc
new file mode 100644
index 0000000..09e8955
--- /dev/null
+++ b/client/settings.cc
@@ -0,0 +1,108 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "settings.h"
+
+#include <string>
+
+#include "callback.h"
+#include "compiler_specific.h"
+#include "env_flags.h"
+#include "glog/logging.h"
+#include "http.h"
+#include "http_init.h"
+#include "http_rpc.h"
+#include "http_rpc_init.h"
+#include "mypath.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/settings.pb.h"
+MSVC_POP_WARNING()
+#include "util.h"
+#include "worker_thread_manager.h"
+
+#define GOMA_DECLARE_FLAGS_ONLY
+#include "goma_flags.cc"
+
+namespace devtools_goma {
+
+void SettingsGetCall(HttpRPC* http_rpc,
+                     SettingsReq* req, SettingsResp* resp,
+                     HttpRPC::Status* status) {
+  http_rpc->Call("", req, resp, status);
+}
+
+void ApplySettings(const string& settings_server,
+                   const string& expect_settings,
+                   WorkerThreadManager* wm) {
+  HttpClient::Options http_options;
+  InitHttpClientOptions(&http_options);
+  http_options.InitFromURL(settings_server);
+  HttpClient client(
+      HttpClient::NewSocketFactoryFromOptions(http_options),
+      HttpClient::NewTLSEngineFactoryFromOptions(http_options),
+      http_options, wm);
+
+  HttpRPC::Options http_rpc_options;
+  InitHttpRPCOptions(&http_rpc_options);
+  HttpRPC http_rpc(&client, http_rpc_options);
+
+  HttpRPC::Status status;
+  SettingsReq req;
+  req.set_hostname(ToShortNodename(GetNodename()));
+  if (!FLAGS_USE_CASE.empty()) {
+    req.set_use_case(FLAGS_USE_CASE);
+  }
+  SettingsResp resp;
+
+  LOG(INFO) << "Settings get from " << settings_server
+            << " req=" << req.DebugString();
+  std::unique_ptr<WorkerThreadRunner> call(
+      new WorkerThreadRunner(
+          wm, FROM_HERE,
+          NewCallback(SettingsGetCall, &http_rpc, &req, &resp, &status)));
+  call.reset();
+
+  if (status.err) {
+    LOG(ERROR) << "Settings.Get error: " << status.DebugString();
+    if (!expect_settings.empty()) {
+      LOG(FATAL) << "expect settings:" << expect_settings
+                 << " but failed to get settings";
+    }
+    return;
+  }
+  if (resp.has_settings()) {
+    LOG(INFO) << "Settings name=" << resp.settings().name();
+    if (!resp.settings().endpoint_url().empty()) {
+      HttpClient::Options o;
+      o.InitFromURL(resp.settings().endpoint_url());
+      LOG(INFO) << "endpoint url=" << resp.settings().endpoint_url()
+                << " STUBBY_PROXY_IP_ADDRESS=" << o.dest_host_name
+                << " STUBBY_PROXY_PORT=" << o.dest_port
+                << " USE_SSL=" << o.use_ssl
+                << " URL_PATH_PREFIX=" << o.url_path_prefix;
+      FLAGS_STUBBY_PROXY_IP_ADDRESS = o.dest_host_name;
+      FLAGS_STUBBY_PROXY_PORT = o.dest_port;
+      FLAGS_USE_SSL = o.use_ssl;
+      FLAGS_URL_PATH_PREFIX = o.url_path_prefix;
+    }
+
+    if (!resp.settings().certificate().empty()) {
+      LOG(INFO) << "certificate=" << resp.settings().certificate();
+      FLAGS_SSL_EXTRA_CERT_DATA = resp.settings().certificate();
+    }
+    LOG(INFO) << "Settings updated";
+    if (!expect_settings.empty()) {
+      CHECK_EQ(resp.settings().name(), expect_settings)
+          << ": unexpected settings";
+    }
+  } else {
+    LOG(WARNING) << "no settings";
+    if (!expect_settings.empty()) {
+      LOG(FATAL) << "expect settings:" << expect_settings
+                 << " but no settings";
+    }
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/settings.h b/client/settings.h
new file mode 100644
index 0000000..417f5eb
--- /dev/null
+++ b/client/settings.h
@@ -0,0 +1,20 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SETTINGS_H_
+#define DEVTOOLS_GOMA_CLIENT_SETTINGS_H_
+
+#include <string>
+
+namespace devtools_goma {
+
+class WorkerThreadManager;
+
+void ApplySettings(const std::string& settings_server,
+                   const std::string& expect_settings,
+                   WorkerThreadManager* wm);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SETTINGS_H_
diff --git a/client/settings.proto b/client/settings.proto
new file mode 100644
index 0000000..ee5d75a
--- /dev/null
+++ b/client/settings.proto
@@ -0,0 +1,23 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+syntax = "proto3";
+
+package devtools_goma;
+
+message Settings {
+  string name = 3;
+
+  string endpoint_url = 1;
+  string certificate = 2;
+}
+
+message SettingsReq {
+  string hostname = 1;
+  string use_case = 2;
+}
+
+message SettingsResp {
+  Settings settings = 1;
+}
diff --git a/client/sha256hash_hasher.h b/client/sha256hash_hasher.h
new file mode 100644
index 0000000..0feeb82
--- /dev/null
+++ b/client/sha256hash_hasher.h
@@ -0,0 +1,21 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SHA256HASH_HASHER_H_
+#define DEVTOOLS_GOMA_CLIENT_SHA256HASH_HASHER_H_
+
+#include "goma_hash.h"
+
+namespace devtools_goma {
+
+// SHA256HashValueHasher can be used for hash function of unordered_map.
+struct SHA256HashValueHasher {
+  size_t operator()(const SHA256HashValue& hash_value) const {
+    return hash_value.Hash();
+  }
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SHA256HASH_HASHER_H_
diff --git a/client/simple_timer.cc b/client/simple_timer.cc
new file mode 100644
index 0000000..2d7a81a
--- /dev/null
+++ b/client/simple_timer.cc
@@ -0,0 +1,38 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "simple_timer.h"
+
+// SimpleTimer::Start() and SimpleTimer::GetInNanoSeconds() are
+// platform specific. See simple_timer_*.cc.
+
+namespace devtools_goma {
+
+SimpleTimer::SimpleTimer(CtorFlag cf) {
+  if (cf == START) {
+    Start();
+  }
+}
+
+SimpleTimer::SimpleTimer() {
+  Start();
+}
+
+SimpleTimer::~SimpleTimer() {}
+
+// Return elapsed time in seconds.
+double SimpleTimer::Get() const {
+  return GetInNanoSeconds() / 1000000000.0;
+}
+
+// Return elapsed time in milliseconds.
+int SimpleTimer::GetInMs() const {
+  return static_cast<int>(GetInMilliSeconds());
+}
+
+long long SimpleTimer::GetInMilliSeconds() const {
+  return GetInNanoSeconds() / 1000000;
+}
+
+}  // namespace devtools_goma
diff --git a/client/simple_timer.h b/client/simple_timer.h
new file mode 100644
index 0000000..7032d98
--- /dev/null
+++ b/client/simple_timer.h
@@ -0,0 +1,55 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SIMPLE_TIMER_H_
+#define DEVTOOLS_GOMA_CLIENT_SIMPLE_TIMER_H_
+
+#ifdef _WIN32
+# include <Windows.h>
+#elif defined(__linux__)
+# include <time.h>
+#elif defined(__MACH__)
+# include <cstdint>
+#else
+# error "unknown platform"
+#endif
+
+namespace devtools_goma {
+
+class SimpleTimer {
+ public:
+  enum CtorFlag { NO_START, START };
+  explicit SimpleTimer(CtorFlag cf);
+  SimpleTimer();
+  ~SimpleTimer();
+  void Start();
+
+  // Return elapsed time in seconds.
+  double Get() const;
+
+  // Returns elapsed time in milliseconds for short interval.
+  // 2^31 / 1000 / 3600 / 24 ~= 24.8, so roughly it would round about 24 days.
+  int GetInMs() const;
+
+  // Return elapsed time in milliseconds.
+  long long GetInMilliSeconds() const;
+
+  // Return elapsed time in nanoseconds.
+  long long GetInNanoSeconds() const;
+
+ private:
+#ifdef _WIN32
+  LARGE_INTEGER start_time_;
+  LARGE_INTEGER frequency_;
+#elif defined(__linux__)
+  struct timespec start_time_;
+#elif defined(__MACH__)
+  uint64_t start_time_;
+#endif
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SIMPLE_TIMER_H_
diff --git a/client/simple_timer_linux.cc b/client/simple_timer_linux.cc
new file mode 100644
index 0000000..6ed33f9
--- /dev/null
+++ b/client/simple_timer_linux.cc
@@ -0,0 +1,39 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "simple_timer.h"
+
+#include <cstdint>
+
+#include "glog/logging.h"
+
+namespace devtools_goma {
+
+void SimpleTimer::Start() {
+  clock_gettime(CLOCK_MONOTONIC, &start_time_);
+}
+
+long long SimpleTimer::GetInNanoSeconds() const {
+  struct timespec end_time;
+  clock_gettime(CLOCK_MONOTONIC, &end_time);
+
+  uint64_t end_time_int =
+      end_time.tv_sec * 1000000000LL + end_time.tv_nsec;
+  uint64_t start_time_int =
+      start_time_.tv_sec * 1000000000LL + start_time_.tv_nsec;
+
+  DCHECK_LE(start_time_int, end_time_int);
+  if (end_time_int < start_time_int) {
+    // This shouldn't happen, but check.
+    LOG(ERROR) << "SimpleTimer is not monotonic:"
+               << " start_time=" << start_time_int
+               << " end_time=" << end_time_int;
+    return 0;
+  }
+
+  return static_cast<long long>(end_time_int - start_time_int);
+}
+
+}  // namespace devtools_goma
diff --git a/client/simple_timer_mac.cc b/client/simple_timer_mac.cc
new file mode 100644
index 0000000..7bdbe3d
--- /dev/null
+++ b/client/simple_timer_mac.cc
@@ -0,0 +1,60 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "simple_timer.h"
+
+#include <mach/mach_time.h>
+
+#include <type_traits>
+
+#include "glog/logging.h"
+
+namespace {
+long long MachToNanoSec(uint64_t mach_diff_time) {
+  // code from chromium/src/base/time/time_mac.cc
+
+  static mach_timebase_info_data_t timebase_info;
+  static_assert(std::is_trivially_destructible<mach_timebase_info_data_t>::value,
+                "mach_timebase_info_data_t must be trivially destructible");
+  if (timebase_info.denom == 0) {
+    // Zero-initialization of statics guarantees that denom will be 0 before
+    // calling mach_timebase_info.  mach_timebase_info will never set denom to
+    // 0 as that would be invalid, so the zero-check can be used to determine
+    // whether mach_timebase_info has already been called.  This is
+    // recommended by Apple's QA1398.
+    kern_return_t kr = mach_timebase_info(&timebase_info);
+    CHECK_EQ(kr, KERN_SUCCESS);
+    CHECK_NE(0UL, timebase_info.denom);
+  }
+
+  // numer and denom are both expected to be 1.
+  uint64_t result = mach_diff_time;
+  result *= timebase_info.numer;
+  result /= timebase_info.denom;
+  return static_cast<long long>(result);
+}
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+void SimpleTimer::Start() {
+  // mach_absolute_time is monotonic.
+  start_time_ = mach_absolute_time();
+}
+
+long long SimpleTimer::GetInNanoSeconds() const {
+  uint64_t end_time = mach_absolute_time();
+
+  DCHECK_LE(start_time_, end_time);
+  if (end_time < start_time_) {
+    // This shouldn't happen, but check.
+    LOG(ERROR) << "SimpleTimer is not monotonic:"
+               << " start_time=" << start_time_
+               << " end_time=" << end_time;
+    return 0;
+  }
+  return MachToNanoSec(end_time - start_time_);
+}
+
+}  // namespace devtools_goma
diff --git a/client/simple_timer_unittest.cc b/client/simple_timer_unittest.cc
new file mode 100644
index 0000000..2223098
--- /dev/null
+++ b/client/simple_timer_unittest.cc
@@ -0,0 +1,26 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "simple_timer.h"
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+// smoke test to ensure SimpleTimer does not return minus value.
+TEST(SimpleTimer, smoke) {
+  SimpleTimer st(SimpleTimer::START);
+
+  for (int i = 0; i < 1000; ++i) {
+    EXPECT_GE(st.GetInNanoSeconds(), 0);
+    EXPECT_GE(st.Get(), 0.0);
+
+    // The second call should have a larger time.
+    long long t1 = st.GetInNanoSeconds();
+    long long t2 = st.GetInNanoSeconds();
+    EXPECT_GE(t2, t1);
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/simple_timer_win.cc b/client/simple_timer_win.cc
new file mode 100644
index 0000000..f892515
--- /dev/null
+++ b/client/simple_timer_win.cc
@@ -0,0 +1,38 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "simple_timer.h"
+
+#include "glog/logging.h"
+
+namespace devtools_goma {
+
+void SimpleTimer::Start() {
+  ::QueryPerformanceCounter(&start_time_);
+  ::QueryPerformanceFrequency(&frequency_);
+}
+
+// In chromium's base/time/time_win.cc, QPC is not used in some conditions.
+// But we assume that goma users use goma on machines QPC works correctly.
+long long SimpleTimer::GetInNanoSeconds() const {
+  LARGE_INTEGER end_time;
+  ::QueryPerformanceCounter(&end_time);
+
+  DCHECK_LE(start_time_.QuadPart, end_time.QuadPart);
+  if (end_time.QuadPart < start_time_.QuadPart) {
+    // This shouldn't happen, but check.
+    LOG(ERROR) << "SimpleTimer is not monotonic: "
+               << " start_time=" << start_time_.QuadPart
+               << " end_time=" << end_time.QuadPart;
+    return 0;
+  }
+
+  // https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx
+  double diff = end_time.QuadPart - start_time_.QuadPart;
+  diff *= 1000000000;
+  diff /= frequency_.QuadPart;
+  return diff;
+}
+
+}  // namespace devtools_goma
diff --git a/client/socket_descriptor.cc b/client/socket_descriptor.cc
new file mode 100644
index 0000000..2218590
--- /dev/null
+++ b/client/socket_descriptor.cc
@@ -0,0 +1,319 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "socket_descriptor.h"
+
+#ifndef _WIN32
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#else
+#include <Winsock2.h>
+#endif
+
+#include <memory>
+
+#include "callback.h"
+#include "compiler_specific.h"
+#include "glog/logging.h"
+#include "worker_thread.h"
+
+namespace devtools_goma {
+
+SocketDescriptor::SocketDescriptor(ScopedSocket&& fd,
+                                   WorkerThreadManager::Priority priority,
+                                   WorkerThreadManager::WorkerThread* worker)
+    : fd_(std::move(fd)),
+      priority_(priority),
+      worker_(worker),
+      readable_closure_(nullptr),
+      writable_closure_(nullptr),
+      timeout_(0),
+      last_time_(worker->Now()),
+      ALLOW_THIS_IN_INITIALIZER_LIST(
+          timeout_run_closure_(NewPermanentCallback(
+              this, &SocketDescriptor::TimeoutClosure))),
+      timeout_closure_(nullptr),
+      read_in_queue_(false),
+      write_in_queue_(false),
+      timeout_in_queue_(false),
+      active_read_(false),
+      active_write_(false),
+      write_poll_registered_(false),
+      is_closed_(false),
+      need_retry_(false) {
+  thread_ = GetCurrentThreadId();
+  CHECK(fd_.valid());
+}
+
+SocketDescriptor::~SocketDescriptor() {
+  CHECK(!read_in_queue_);
+  CHECK(!write_in_queue_);
+  CHECK(!timeout_in_queue_);
+  // Note that WorkerThreadManager::DeleteSocketDescriptor will take care of
+  // unregistering closures from polling loop.
+  // We do not need either to call UnregisterPollEvent or
+  // CHECK(!write_poll_registered_).
+}
+
+void SocketDescriptor::NotifyWhenReadable(
+    std::unique_ptr<PermanentClosure> closure) {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  readable_closure_ = std::move(closure);
+  last_time_ = worker_->Now();
+  active_read_ = true;
+  worker_->RegisterPollEvent(this, DescriptorPoller::kReadEvent);
+  VLOG(1) << "Notify when " << fd_.get()
+          << " readable" << readable_closure_.get();
+}
+
+void SocketDescriptor::NotifyWhenWritable(
+    std::unique_ptr<PermanentClosure> closure) {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  writable_closure_ = std::move(closure);
+  last_time_ = worker_->Now();
+  active_write_ = true;
+  worker_->RegisterPollEvent(this, DescriptorPoller::kWriteEvent);
+  write_poll_registered_ = true;
+  VLOG(1) << "Notify when " << fd_.get()
+          << " writable" << writable_closure_.get();
+}
+
+void SocketDescriptor::ClearReadable() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  VLOG(1) << "Clear " << fd_.get() << " readable " << readable_closure_.get();
+  readable_closure_.reset();
+  active_read_ = false;
+  worker_->UnregisterPollEvent(this, DescriptorPoller::kReadEvent);
+}
+
+void SocketDescriptor::ClearWritable() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  VLOG(1) << "Clear " << fd_.get() << " writable " << writable_closure_.get();
+  writable_closure_.reset();
+  active_write_ = false;
+  if (write_poll_registered_) {
+    worker_->UnregisterPollEvent(this, DescriptorPoller::kWriteEvent);
+    write_poll_registered_ = false;
+  }
+}
+
+void SocketDescriptor::NotifyWhenTimedout(double timeout,
+                                          OneshotClosure* closure) {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  DCHECK(!timeout_closure_);
+  timeout_ = timeout;
+  timeout_closure_.reset(closure);
+  last_time_ = worker_->Now();
+  worker_->RegisterTimeoutEvent(this);
+}
+
+void SocketDescriptor::ChangeTimeout(double timeout) {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  DCHECK(timeout_closure_);
+  timeout_ = timeout;
+  last_time_ = worker_->Now();
+}
+
+void SocketDescriptor::ClearTimeout() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  timeout_ = 0;
+  if (timeout_closure_) {
+    timeout_closure_.reset();
+  }
+  worker_->UnregisterTimeoutEvent(this);
+}
+
+ssize_t SocketDescriptor::Read(void* ptr, size_t len) {
+  need_retry_ = false;
+  last_time_ = worker_->Now();
+  ssize_t r = fd_.Read(ptr, len);
+  if (r < 0)
+    UpdateLastErrorStatus();
+  if (r == 0)
+    is_closed_ = true;
+  return r;
+}
+
+ssize_t SocketDescriptor::Write(const void* ptr, size_t len) {
+  need_retry_ = false;
+  last_time_ = worker_->Now();
+  ssize_t r = fd_.Write(ptr, len);
+  if (r < 0)
+    UpdateLastErrorStatus();
+  return r;
+}
+
+bool SocketDescriptor::NeedRetry() const {
+  return need_retry_;
+}
+
+int SocketDescriptor::ShutdownForSend() {
+  need_retry_ = false;
+  last_time_ = worker_->Now();
+  int r;
+#ifndef _WIN32
+  r = shutdown(fd_.get(), SHUT_WR);
+#else
+  r = shutdown(fd_.get(), SD_SEND);
+#endif
+  if (r < 0)
+    UpdateLastErrorStatus();
+  return r;
+}
+
+bool SocketDescriptor::IsReadable() const {
+  int n;
+#ifndef _WIN32
+  bool ioctl_ret = ioctl(fd_.get(), FIONREAD, &n) == -1;
+#else
+  DWORD byte_returned;
+  bool ioctl_ret = WSAIoctl(fd_.get(), FIONREAD, nullptr, 0, &n, sizeof(n),
+                            &byte_returned, nullptr, nullptr) != SOCKET_ERROR;
+#endif
+  return !(ioctl_ret || n == 0);
+}
+
+void SocketDescriptor::StopRead() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  active_read_ = false;
+}
+
+void SocketDescriptor::StopWrite() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  active_write_ = false;
+}
+
+void SocketDescriptor::RestartRead() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  active_read_ = true;
+}
+
+void SocketDescriptor::RestartWrite() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  active_write_ = true;
+  if (!write_poll_registered_) {
+    VLOG(2) << "Register write again: fd=" << fd();
+    worker_->RegisterPollEvent(this, DescriptorPoller::kWriteEvent);
+    write_poll_registered_ = true;
+  }
+}
+
+bool SocketDescriptor::wait_readable() const {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  return active_read_ && readable_closure_ != nullptr && !read_in_queue_;
+}
+
+bool SocketDescriptor::wait_writable() const {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  return active_write_ && writable_closure_ != nullptr && !write_in_queue_;
+}
+
+OneshotClosure* SocketDescriptor::GetReadableClosure() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  OneshotClosure* c =
+      GetClosure(&read_in_queue_, &active_read_, readable_closure_.get());
+  if (c != nullptr) {
+    last_time_ = worker_->Now();
+  }
+  return c;
+}
+
+OneshotClosure* SocketDescriptor::GetWritableClosure() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  OneshotClosure* c =
+      GetClosure(&write_in_queue_, &active_write_, writable_closure_.get());
+  if (c != nullptr) {
+    last_time_ = worker_->Now();
+  }
+  return c;
+}
+
+OneshotClosure* SocketDescriptor::GetTimeoutClosure() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  if (timeout_ > 0 && (worker_->Now() - last_time_) > timeout_ &&
+    !read_in_queue_ && !write_in_queue_ && !timeout_in_queue_) {
+    return GetClosure(&timeout_in_queue_, nullptr, timeout_run_closure_.get());
+  }
+  return nullptr;
+}
+
+OneshotClosure* SocketDescriptor::GetClosure(
+    bool* in_queue, bool* active, PermanentClosure* closure) {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  if ((active == nullptr && (!active_read_ && !active_write_)) ||
+      ((active != nullptr) && !(*active)))
+    return nullptr;
+  if (closure == nullptr)
+    return nullptr;
+  DCHECK(in_queue != nullptr);
+  if (*in_queue)
+    return nullptr;
+  *in_queue = true;
+
+  return NewCallback(this, &SocketDescriptor::RunCallback,
+                     closure, in_queue, active);
+}
+
+void SocketDescriptor::RunCallback(
+    PermanentClosure* closure, bool* in_queue, bool* active) {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  DCHECK(closure != nullptr);
+  DCHECK(in_queue != nullptr);
+  DCHECK(*in_queue);
+  *in_queue = false;
+  if ((active == nullptr && (!active_read_ && !active_write_)) ||
+      ((active != nullptr) && !(*active))) {
+    // no need to delete closure.  it must be permanent closure.
+    return;
+  }
+  closure->Run();
+}
+
+void SocketDescriptor::TimeoutClosure() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  if (read_in_queue_ || write_in_queue_)
+    return;
+  if (!active_read_ && !active_write_)
+    return;
+  if (timeout_ > 0 && (worker_->Now() - last_time_) > timeout_) {
+    // no need to delete closure. it deletes itself.
+    OneshotClosure* closure = timeout_closure_.release();
+    if (closure) {
+      LOG(INFO) << "socket timeout fd=" << fd_.get()
+                << " timeout=" << timeout_;
+      closure->Run();
+    }
+  }
+}
+
+void SocketDescriptor::UpdateLastErrorStatus() {
+#ifndef _WIN32
+  if (errno == EINTR || errno == EAGAIN) {
+    need_retry_ = true;
+    return;
+  }
+#endif
+
+  char error_message[1024] = {0};
+#ifndef _WIN32
+  // Meaning of returned value of strerror_r is different between
+  // XSI and GNU. Need to ignore.
+  (void)strerror_r(errno, error_message, sizeof error_message);
+#else
+  FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, 0, WSAGetLastError(), 0,
+                 error_message, sizeof error_message, 0);
+#endif
+  last_error_message_ = error_message;
+}
+
+void SocketDescriptor::UnregisterWritable() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  if (!active_write_ && write_poll_registered_) {
+    worker_->UnregisterPollEvent(this, DescriptorPoller::kWriteEvent);
+    write_poll_registered_ = false;
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/socket_descriptor.h b/client/socket_descriptor.h
new file mode 100644
index 0000000..5265414
--- /dev/null
+++ b/client/socket_descriptor.h
@@ -0,0 +1,117 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SOCKET_DESCRIPTOR_H_
+#define DEVTOOLS_GOMA_CLIENT_SOCKET_DESCRIPTOR_H_
+
+#include <memory>
+
+#include "basictypes.h"
+#include "scoped_fd.h"
+#include "worker_thread_manager.h"
+#include "descriptor.h"
+
+namespace devtools_goma {
+
+class SocketDescriptor : public Descriptor {
+ public:
+  SocketDescriptor(ScopedSocket&& fd,
+                   WorkerThreadManager::Priority priority,
+                   WorkerThreadManager::WorkerThread* worker);
+  ~SocketDescriptor() override;
+
+  virtual int fd() const { return fd_.get(); }
+  virtual const IOChannel* wrapper() const { return &fd_; }
+  ScopedSocket ReleaseFd() { return std::move(fd_); }
+  virtual WorkerThreadManager::Priority priority() const { return priority_; }
+  SocketDescriptor* socket_descriptor() override { return this; }
+  // closure must be permanent closure.
+  void NotifyWhenReadable(
+      std::unique_ptr<PermanentClosure> closure) override;
+  // closure must be permanent closure.
+  void NotifyWhenWritable(
+      std::unique_ptr<PermanentClosure> closure) override;
+  virtual void ClearReadable();
+  void ClearWritable() override;
+  // closure must be one-shot closure.
+  void NotifyWhenTimedout(double timeout,
+                          OneshotClosure* closure) override;
+  void ChangeTimeout(double timeout) override;
+  virtual void ClearTimeout();
+  ssize_t Read(void* ptr, size_t len) override;
+  ssize_t Write(const void* ptr, size_t len) override;
+
+  bool NeedRetry() const override;
+  virtual int ShutdownForSend();
+  string GetLastErrorMessage() const override { return last_error_message_; }
+  virtual bool IsReadable() const;
+  bool IsClosed() const { return is_closed_; }
+  bool CanReuse() const override {
+    return !IsClosed() && last_error_message_.empty();
+  }
+  void StopRead() override;
+  void StopWrite() override;
+  virtual void RestartRead();
+  virtual void RestartWrite();
+  void UnregisterWritable();
+
+  bool wait_readable() const;
+  bool wait_writable() const;
+
+  OneshotClosure* GetReadableClosure();
+  OneshotClosure* GetWritableClosure();
+  OneshotClosure* GetTimeoutClosure();
+
+ private:
+  // Gets a one-shot closure to run permanent "closure" and
+  // mark a closure in run queue,  so that it won't add new closure in run
+  // queue while the closure is waiting to run.
+  // If the closure is not permanent closure, make sure delete closure when
+  // GetClosure() returns nullptr, or check *in_queue is false before calling
+  // GetClosure().
+  OneshotClosure* GetClosure(bool* in_queue, bool* active,
+                             PermanentClosure* closure);
+
+  // Marks no closure in run queue and runs permanent "closure".
+  // If not active_, it doesn't run closure.
+  void RunCallback(PermanentClosure* closure, bool* in_queue, bool* active);
+
+  // Fires timeout.
+  // If read or write closure in queue while this closure has been pending
+  // in queue, cancel timeout.
+  void TimeoutClosure();
+
+  void UpdateLastErrorStatus();
+
+  ScopedSocket fd_;
+  const WorkerThreadManager::Priority priority_;
+  WorkerThreadManager::WorkerThread* worker_;
+  // permanent closure.
+  std::unique_ptr<PermanentClosure> readable_closure_;
+  // permanent closure.
+  std::unique_ptr<PermanentClosure> writable_closure_;
+  double timeout_;
+  double last_time_;
+
+  // permanent to TimeoutClosure()
+  std::unique_ptr<PermanentClosure> timeout_run_closure_;
+  // single shot specified by NotifyWhenTimeout.
+  std::unique_ptr<OneshotClosure> timeout_closure_;
+  WorkerThreadManager::ThreadId thread_;
+  bool read_in_queue_;
+  bool write_in_queue_;
+  bool timeout_in_queue_;
+  bool active_read_;
+  bool active_write_;
+  string last_error_message_;
+  bool write_poll_registered_;
+  bool is_closed_;
+  bool need_retry_;
+  DISALLOW_COPY_AND_ASSIGN(SocketDescriptor);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SOCKET_DESCRIPTOR_H_
diff --git a/client/socket_factory.h b/client/socket_factory.h
new file mode 100644
index 0000000..c5a53ba
--- /dev/null
+++ b/client/socket_factory.h
@@ -0,0 +1,96 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SOCKET_FACTORY_H_
+#define DEVTOOLS_GOMA_CLIENT_SOCKET_FACTORY_H_
+
+#include <deque>
+#include <string>
+
+#include "basictypes.h"
+
+#ifdef _WIN32
+# include "socket_helper_win.h"
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+class ScopedSocket;
+
+class SocketFactoryObserver {
+ public:
+  virtual ~SocketFactoryObserver() {}
+  virtual void WillCloseSocket(int sock) = 0;
+};
+
+// TODO: template for ScopedSocket and ScopedNamedPipe.
+class SocketFactory {
+ public:
+  virtual ~SocketFactory() {}
+
+  // Doesn't take ownership of observer.
+  void SetObserver(SocketFactoryObserver* observer) {
+    observer_ = observer;
+  }
+
+  // Returns true if socket factory is initialized.
+  // Note:
+  // Once the socket factory has a network issue, its IsInitialized become
+  // false.  Unless NewSocket is called, it continues to return false.
+  // TODO: remove this method if feasible.
+  virtual bool IsInitialized() const = 0;
+
+  // Returns new available socket.
+  // Caller should return the socket to the socket factory by ReleaseSocket()
+  // if socket could be reused or CloseSocket() if socket should be closed.
+  // e.g.
+  //   ScopedSocket s(socket_factory.NewSocket());
+  //   // use s
+  //   if (err) {
+  //     socket_factory.CloseSocket(std::move(s), err);
+  //   } else {
+  //     socket_factory.ReleaseSocket(std::move(s));
+  //     // or socket_factory.CloseSocket(std::move(s), false);
+  //   }
+  // Note: NewSocket() can be called even if IsInitialized() is false.
+  virtual ScopedSocket NewSocket() = 0;
+
+  // Releases used socket to socket factory.
+  // The returned socket may be reused for NewSocket().
+  // When it is about to close the socket, it will notify observer if the
+  // observer is set.  Actual timing to close the socket is implementation
+  // dependent.
+  // Don't release a socket that an error happened, or that won't be reused.
+  // Use CloseSocket() instead.
+  virtual void ReleaseSocket(ScopedSocket&& sock) = 0;
+
+  // Closes used socket. It will notify observer if the observer is set.
+  virtual void CloseSocket(ScopedSocket&& sock, bool err) = 0;
+  // Clear errors associated with an address.
+  // SocketPool (subclass of this class) remembers avoid to use an address
+  // that CloseSocket with error, and this method clears such information.
+  virtual void ClearErrors() {}
+
+  // Destination name in form of "host:port".
+  virtual string DestName() const = 0;
+  virtual string host_name() const { return ""; }
+  virtual int port() const { return -1; }
+
+  virtual string DebugString() const = 0;
+
+ protected:
+  SocketFactory() : observer_(NULL) {}
+
+  SocketFactoryObserver* observer_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SocketFactory);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SOCKET_FACTORY_H_
diff --git a/client/socket_pool.cc b/client/socket_pool.cc
new file mode 100644
index 0000000..5bdb13b
--- /dev/null
+++ b/client/socket_pool.cc
@@ -0,0 +1,733 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "socket_pool.h"
+
+#ifndef _WIN32
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#endif
+
+#include <map>
+#include <sstream>
+
+#include "autolock_timer.h"
+#include "basictypes.h"
+#include "fileflag.h"
+#include "glog/logging.h"
+#include "lockhelper.h"
+#include "platform_thread.h"
+#include "scoped_fd.h"
+#include "simple_timer.h"
+
+namespace devtools_goma {
+
+// Do not use socket that is older than this, for HTTP Keep-Alive. It
+// can be longer, but be on the safer side and do not bother with long
+// timeouts.
+const long long kIdleSocketTimeoutNanoseconds = 5LL * 1000 * 1000 * 1000;
+
+// Do not use the address that we got error for this period.
+// Note if we have keep-alive socket to that address, it will be used.
+// if we got success after error from the addresss, we'll clear error status.
+const int kErrorAddressTimeoutSecs = 60;
+
+// Retry creation of socket pool for this period (milliseconds).
+const int kSocketPoolSetupTimeoutInMs = 10 * 1000;
+// Wait connection success for this period (milliseconds).
+const int kConnTimeoutInMs = 3 * 1000;
+
+SocketPool::SocketPool(const string& host_name, int port)
+    : host_name_(host_name),
+      port_(port),
+      current_addr_(nullptr) {
+  SimpleTimer timer;
+  int32_t retry_backoff_ms = 50;
+  while (timer.GetInMs() < kSocketPoolSetupTimeoutInMs) {
+    Errno eno;
+    {
+      AUTOLOCK(lock, &mu_);
+      eno = InitializeUnlocked();
+      if (eno == OK)
+        break;
+    }
+    if (eno == FAIL) {
+      PlatformThread::Sleep(retry_backoff_ms);
+      retry_backoff_ms *= 2;
+      if (retry_backoff_ms > kConnTimeoutInMs)
+        retry_backoff_ms = kConnTimeoutInMs;
+    }
+  }
+  LOG_IF(WARNING, !IsInitialized()) << "failed to initialize socket pool in "
+                                    << timer.GetInMs() << " msec."
+                                    << " host_name=" << host_name
+                                    << " port=" << port;
+}
+
+SocketPool::~SocketPool() {
+  for (const auto& it : socket_pool_) {
+    const int fd = it.first;
+    if (observer_ != nullptr) {
+      observer_->WillCloseSocket(fd);
+    }
+    ScopedSocket s(fd);
+    s.Close();
+  }
+}
+
+ScopedSocket SocketPool::NewSocket() {
+  int new_fd = -1;
+  std::vector<int> close_sockets;
+  {
+    // See if something from socket pool is re-usable.
+    AUTOLOCK(lock, &mu_);
+    while (!socket_pool_.empty()) {
+      // If the socket has been idle for less than X seconds, use it.
+      if (socket_pool_.front().second.GetInNanoSeconds() <
+          kIdleSocketTimeoutNanoseconds) {
+        new_fd = socket_pool_.front().first;
+        VLOG(1) << "Reusing socket: " << new_fd
+                << ", socket pool size: " << socket_pool_.size();
+        socket_pool_.pop_front();
+        break;
+      } else {
+        const int fd = socket_pool_.front().first;
+        VLOG(1) << "Expiring too old socket: " << fd
+                << ", socket pool size: " << socket_pool_.size();
+        close_sockets.push_back(fd);
+        fd_addrs_.erase(fd);
+        socket_pool_.pop_front();
+      }
+    }
+  }
+  for (const auto& fd : close_sockets) {
+    if (observer_ != nullptr) {
+      observer_->WillCloseSocket(fd);
+    }
+    ScopedSocket s(fd);
+    s.Close();
+    // fd was removed fd_addrs_ above.
+  }
+  if (new_fd >= 0)
+    return ScopedSocket(new_fd);
+
+  int addrs_size;
+  {
+    AUTOLOCK(lock, &mu_);
+    addrs_size = static_cast<int>(addrs_.size());
+  }
+  new_fd = -1;
+  time_t error_time = 0;
+  for (int retry = 0; retry < std::max(1, addrs_size); ++retry) {
+    AddrData addr;
+    {
+      AUTOLOCK(lock, &mu_);
+      if (new_fd >= 0) {
+        SetErrorTimestampUnlocked(new_fd, error_time);
+      }
+      if (current_addr_ == nullptr || current_addr_->error_timestamp > 0) {
+        LOG(INFO) << "need to retry with other address for " << host_name_;
+        if (InitializeUnlocked() != OK) {
+          DCHECK(current_addr_ == nullptr);
+          LOG(ERROR) << "no other address available";
+          return ScopedSocket();
+        }
+        DCHECK(!socket_pool_.empty());
+        DCHECK_LT(socket_pool_.front().second.GetInNanoSeconds(),
+                  kIdleSocketTimeoutNanoseconds);
+        new_fd = socket_pool_.front().first;
+        socket_pool_.pop_front();
+        DCHECK_GE(new_fd, 0);
+        return ScopedSocket(new_fd);
+      }
+      DCHECK(current_addr_ != nullptr);
+      addr = *current_addr_;
+    }
+
+    ScopedSocket socket_fd(socket(addr.storage.ss_family, SOCK_STREAM, 0));
+    if (!socket_fd.valid()) {
+#ifndef _WIN32
+      PLOG(WARNING) << "socket";
+#else
+      LOG(WARNING) << "socket error=" << WSAGetLastError();
+#endif
+      return socket_fd;
+    }
+
+    int r;
+    // TODO: use nonblocking connect with timeout.
+    while ((r = connect(socket_fd.get(), addr.addr_ptr(), addr.len)) < 0) {
+      if (errno == EINTR) {
+        continue;
+      }
+#ifndef _WIN32
+      PLOG(WARNING) << "connect " << addr.name;
+#else
+      LOG(WARNING) << "connect " << addr.name
+                   << " error=" << WSAGetLastError();
+#endif
+      break;
+    }
+    {
+      AUTOLOCK(lock, &mu_);
+      fd_addrs_.insert(std::make_pair(socket_fd.get(), addr.name));
+    }
+    if (r < 0) {
+      new_fd = socket_fd.get();
+      error_time = time(nullptr);
+      continue;  // try other address.
+    }
+    if (!socket_fd.SetCloseOnExec()) {
+      LOG(ERROR) << "failed to set FD_CLOEXEC";
+      AUTOLOCK(lock, &mu_);
+      fd_addrs_.erase(socket_fd.get());
+      return ScopedSocket();
+    }
+    if (!socket_fd.SetNonBlocking()) {
+      LOG(ERROR) << "failed to set O_NONBLOCK";
+      AUTOLOCK(lock, &mu_);
+      fd_addrs_.erase(socket_fd.get());
+      return ScopedSocket();
+    }
+    return socket_fd;
+  }
+  LOG(ERROR) << "Too many retries in NewSocket";
+  return ScopedSocket();
+}
+
+void SocketPool::ReleaseSocket(ScopedSocket&& sock) {
+  AUTOLOCK(lock, &mu_);
+  VLOG(1) << "pushing socket for recycling " << sock.get();
+  int sock_fd = sock.get();
+  socket_pool_.emplace_back(sock.release(), SimpleTimer());
+  SetErrorTimestampUnlocked(sock_fd, 0);
+}
+
+void SocketPool::CloseSocket(ScopedSocket&& sock, bool err) {
+  VLOG(1) << "close socket " << sock.get();
+  if (observer_ != nullptr) {
+    observer_->WillCloseSocket(sock.get());
+  }
+  AUTOLOCK(lock, &mu_);
+  int sock_fd = sock.get();
+  sock.Close();
+  SetErrorTimestampUnlocked(sock_fd, (err ? time(nullptr) : 0));
+  fd_addrs_.erase(sock_fd);
+}
+
+void SocketPool::ClearErrors() {
+  LOG(INFO) << "Clear all errors associated to addresses.";
+  AUTOLOCK(lock, &mu_);
+  for (auto& addr : addrs_) {
+    addr.error_timestamp = 0;
+  }
+}
+
+void SocketPool::SetErrorTimestampUnlocked(int sock, time_t t) {
+  const unordered_map<int, string>::const_iterator p = fd_addrs_.find(sock);
+  if (p == fd_addrs_.end()) {
+    LOG(ERROR) << "sock " << sock << " not found in fd_addrs";
+    return;
+  }
+  const string& addr_name = p->second;
+  // fast path. most case, current_addr_ is the addr for the sock.
+  if (current_addr_ != nullptr && current_addr_->name == addr_name) {
+    current_addr_->error_timestamp = t;
+    return;
+  }
+  // slow path.
+  for (auto& addr : addrs_) {
+    if (addr.name == addr_name) {
+      addr.error_timestamp = t;
+      return;
+    }
+  }
+  LOG(WARNING) << "sock " << sock << " addr:" << addr_name << " not found";
+}
+
+SocketPool::AddrData::AddrData()
+    : len(0),
+      ai_socktype(0),
+      ai_protocol(0),
+      error_timestamp(0) {
+  memset(&storage, 0, sizeof storage);
+}
+
+const struct sockaddr* SocketPool::AddrData::addr_ptr() const {
+  return reinterpret_cast<const struct sockaddr*>(&storage);
+}
+
+void SocketPool::AddrData::Invalidate() {
+  len = 0;
+}
+
+bool SocketPool::AddrData::IsValid() const {
+  return len > 0;
+}
+
+bool SocketPool::AddrData::InitFromIPv4Addr(const string& ipv4, int port) {
+  struct sockaddr_in* addr_in =
+      reinterpret_cast<struct sockaddr_in*>(&this->storage);
+  this->len = sizeof(struct sockaddr_in);
+  this->ai_socktype = SOCK_STREAM;
+  this->ai_protocol = 0;
+  this->name = ipv4;
+  addr_in->sin_family = AF_INET;
+  addr_in->sin_port = htons(static_cast<u_short>(port));
+  if (inet_pton(AF_INET, ipv4.c_str(), &addr_in->sin_addr.s_addr) <= 0) {
+    Invalidate();
+    return false;
+  }
+  return true;
+}
+
+void SocketPool::AddrData::InitFromAddrInfo(const struct addrinfo* ai) {
+  char buf[128];
+  COMPILE_ASSERT(sizeof buf >= INET_ADDRSTRLEN, buf_too_small_inet);
+  COMPILE_ASSERT(sizeof buf >= INET6_ADDRSTRLEN, buf_too_small_inet6);
+
+  this->len = ai->ai_addrlen;
+  memcpy(&this->storage, ai->ai_addr, this->len);
+  this->ai_socktype = ai->ai_socktype;
+  this->ai_protocol = ai->ai_protocol;
+  switch (ai->ai_family) {
+    case AF_INET:
+      {
+        struct sockaddr_in* in =
+            reinterpret_cast<struct sockaddr_in*>(&this->storage);
+        this->name = inet_ntop(AF_INET, &in->sin_addr, buf, sizeof buf);
+      }
+      break;
+    case AF_INET6:
+      {
+        struct sockaddr_in6* in6 =
+            reinterpret_cast<struct sockaddr_in6*>(&this->storage);
+        this->name = inet_ntop(AF_INET6, &in6->sin6_addr, buf, sizeof buf);
+      }
+      break;
+    default:
+      LOG(ERROR) << "Unknown address family:" << ai->ai_family;
+  }
+}
+
+/* static */
+void SocketPool::ResolveAddress(
+    const string& hostname, int port,
+    std::vector<SocketPool::AddrData>* addrs) {
+  if (hostname.empty()) {
+    LOG(ERROR) << "hostname is empty";
+    return;
+  }
+  if (isdigit(hostname[0])) {
+    // Try using it as IP address
+    AddrData addr;
+    if (addr.InitFromIPv4Addr(hostname, port)) {
+      addrs->push_back(addr);
+      return;
+    }
+  }
+  sa_family_t afs[2] = { AF_INET, AF_INET6 };
+  std::ostringstream port_oss;
+  port_oss << port;
+  const string port_string = port_oss.str();
+  for (const auto& af : afs) {
+    struct addrinfo hints;
+    struct addrinfo *result, *rp;
+    memset(&hints, 0, sizeof(struct addrinfo));
+    hints.ai_family = af;
+    hints.ai_socktype = SOCK_STREAM;
+    hints.ai_flags = 0;
+    hints.ai_protocol = 0;
+    int gai_error_code = getaddrinfo(
+        hostname.c_str(), port_string.c_str(), &hints, &result);
+    if (gai_error_code != 0) {
+      if (af == AF_INET) {
+        LOG(ERROR) << "getaddrinfo failed: " << gai_strerror(gai_error_code)
+                   << " host:" << hostname
+                   << " port:" << port_string
+                   << " af:" << hints.ai_family;
+      } else {
+        // ok with no IPv6 addr.
+        LOG(INFO) << "getaddrinfo failed: " << gai_strerror(gai_error_code)
+                  << " host:" << hostname
+                  << " port:" << port_string
+                  << " af:" << hints.ai_family;
+      }
+      continue;
+    }
+
+    for (rp = result; rp != nullptr; rp = rp->ai_next) {
+      AddrData addr;
+      addr.InitFromAddrInfo(rp);
+      addrs->push_back(addr);
+    }
+    freeaddrinfo(result);
+  }
+  LOG_IF(ERROR, addrs->empty()) << "Failed to resolve " << hostname;
+}
+
+class SocketPool::ScopedSocketList {
+ public:
+  // Doesn't take ownership of addrs.
+  explicit ScopedSocketList(std::vector<AddrData>* addrs)
+      : addrs_(addrs) {
+    socks_.resize(addrs->size());
+  }
+
+  // Connect to initiate connection to all addrs with nonblocking socket.
+  // Returns socket if connection is established.
+  // Returns -1 otherwise.
+  // *nfds will be the number of connection initiated.
+  ScopedSocket Connect(int* nfds, AddrData** addr) {
+    *nfds = 0;
+    *addr = nullptr;
+    time_t now = time(nullptr);
+    time_t min_error_timestamp = now;
+    for (const auto& address : *addrs_) {
+      if (address.error_timestamp < min_error_timestamp) {
+        min_error_timestamp = address.error_timestamp;
+      }
+    }
+
+    for (size_t i = 0; i < addrs_->size(); ++i) {
+      if ((*addrs_)[i].error_timestamp == min_error_timestamp) {
+        // Use this addr even if it marked as error recently.
+        // Most case, min_error_timestamp is 0 (some ip wasn't marked as error).
+        // or this addr had error most long time ago in addrs.
+        // Note that if len(addrs_)==1, the addr is used regardless of
+        // error_timestamp to avoid "no other address available" by just
+        // one error on the addr.
+        // The addr, however, mignt not be used if connect fails.
+        LOG_IF(WARNING, min_error_timestamp > 0)
+            << "addrs[" << i << "] " << (*addrs_)[i].name
+            << " min_error_timestamp=" << min_error_timestamp;
+      } else {
+        CHECK_GT((*addrs_)[i].error_timestamp, min_error_timestamp);
+        if (now < (*addrs_)[i].error_timestamp + kErrorAddressTimeoutSecs) {
+          LOG(WARNING) << "addrs[" << i << "] " << (*addrs_)[i].name
+                       << " don't use until "
+                       << ((*addrs_)[i].error_timestamp
+                           + kErrorAddressTimeoutSecs)
+                       << " error_timestamp=" << (*addrs_)[i].error_timestamp
+                       << " now=" << now;
+          continue;
+        }
+        // else error happened long time ago, so try again.
+      }
+
+      socks_[i] = ScopedSocket(
+          socket((*addrs_)[i].storage.ss_family, SOCK_STREAM, 0));
+      if (!socks_[i].valid()) {
+#ifndef _WIN32
+        PLOG(WARNING) << "socket:" << (*addrs_)[i].name;
+#else
+        LOG(WARNING) << "socket:" << (*addrs_)[i].name
+                     << " error=" << WSAGetLastError();
+#endif
+        continue;
+      }
+      if (!socks_[i].SetCloseOnExec()) {
+        LOG(WARNING) << "failed to set FD_CLOEXEC";
+        socks_[i].Close();
+        continue;
+      }
+      if (!socks_[i].SetNonBlocking()) {
+        LOG(WARNING) << "failed to set O_NONBLOCK";
+        socks_[i].Close();
+        continue;
+      }
+
+      ++*nfds;
+      // connect with nonblocking socket.
+      if (connect(socks_[i].get(),
+                  (*addrs_)[i].addr_ptr(),
+                  (*addrs_)[i].len) == 0) {
+        // If connect returns immediately on nonblocking socket,
+        // it's fast enough so use it.
+        *addr = &(*addrs_)[i];
+        return std::move(socks_[i]);
+      }
+#ifdef WIN32
+      if (WSAGetLastError() != WSAEWOULDBLOCK) {
+        LOG(WARNING) << "connect to " << (*addrs_)[i].name
+                     << " WSA:" << WSAGetLastError();
+        socks_[i].Close();
+        continue;
+      }
+#else
+      if (errno != EINPROGRESS) {
+        PLOG(WARNING) << "connect to " << (*addrs_)[i].name;
+        socks_[i].Close();
+        continue;
+      }
+#endif
+    }
+    return ScopedSocket();
+  }
+
+  // Poll nonblocking connect at most timeout_ms milliseconds.
+  // Returns a connected socket, if connection has been established,
+  // Returns -1 if poll has not yet finished.
+  // nfds will be number of socket that is connecting.
+  // if *nfds <= 0, no need to call Poll again.
+  // TODO: reuse DescriptorPoller?
+  ScopedSocket Poll(int timeout_ms, int* nfds, AddrData** addr);
+
+ private:
+  std::vector<AddrData>* addrs_;
+  std::vector<ScopedSocket> socks_;
+
+#ifdef WIN32
+  fd_set fdset_;
+#else
+  std::vector<struct pollfd> pfds_;
+#endif
+  DISALLOW_COPY_AND_ASSIGN(ScopedSocketList);
+};
+
+#ifdef WIN32
+ScopedSocket SocketPool::ScopedSocketList::Poll(
+    int timeout_ms, int* nfds, AddrData** addr) {
+  *nfds = 0;
+  *addr = nullptr;
+  fd_set exceptfds;
+  FD_ZERO(&fdset_);
+  FD_ZERO(&exceptfds);
+  for (const auto& sock : socks_) {
+    if (!sock.valid())
+      continue;
+    MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+    FD_SET(sock.get(), &fdset_);
+    FD_SET(sock.get(), &exceptfds);
+    MSVC_POP_WARNING();
+    ++*nfds;
+  }
+  if (*nfds == 0) {
+    return ScopedSocket();
+  }
+  TIMEVAL timeout;
+  timeout.tv_sec = timeout_ms / 1000;
+  timeout.tv_usec = (timeout_ms % 1000) * 1000;
+  int r = select(*nfds, nullptr, &fdset_, &exceptfds, &timeout);
+  if (r == SOCKET_ERROR) {
+    LOG(ERROR) << "connect select error="
+               << WSAGetLastError();
+    return ScopedSocket();
+  }
+  if (r == 0) {
+    LOG(ERROR) << "connect timeout:" << timeout_ms << " msec";
+    return ScopedSocket();
+  }
+  for (size_t i = 0; i < socks_.size(); ++i) {
+    if (!socks_[i].valid())
+      continue;
+    if (FD_ISSET(socks_[i].get(), &fdset_)) {
+      *addr = &(*addrs_)[i];
+      return std::move(socks_[i]);
+    }
+    if (FD_ISSET(socks_[i].get(), &exceptfds)) {
+      int val = 0;
+      int val_size = sizeof(val);
+      if (getsockopt(socks_[i].get(), SOL_SOCKET, SO_ERROR,
+                     reinterpret_cast<char*>(&val), &val_size) != 0) {
+        LOG(ERROR) << "getsockopt failed."
+                   << " name=" << (*addrs_)[i].name
+                   << " sock=" << socks_[i].get()
+                   << " WSA:" << WSAGetLastError();
+        continue;
+      }
+      if (val_size != sizeof(val)) {
+        LOG(ERROR) << "getsockopt failed."
+                   << " name=" << (*addrs_)[i].name
+                   << " sock=" << socks_[i].get()
+                   << " val_size=" << val_size;
+        continue;
+      }
+      LOG(ERROR) << "getsockopt(SO_ERROR)."
+                 << " name=" << (*addrs_)[i].name
+                 << " sock=" << socks_[i].get()
+                 << " val=" << val;
+    }
+  }
+  return ScopedSocket();
+}
+#else
+ScopedSocket SocketPool::ScopedSocketList::Poll(
+    int timeout_ms, int* nfds, AddrData** addr) {
+  *nfds = 0;
+  *addr = nullptr;
+  pfds_.resize(socks_.size());
+  for (const auto& sock : socks_) {
+    if (!sock.valid())
+      continue;
+    pfds_[*nfds].fd = sock.get();
+    pfds_[*nfds].events = POLLOUT;
+    ++*nfds;
+  }
+  if (*nfds == 0) {
+    return ScopedSocket();
+  }
+  int r = poll(&pfds_[0], *nfds, timeout_ms);
+  if (r == -1) {
+    PLOG_IF(ERROR, errno != EINTR) << "connect poll error";
+    return ScopedSocket();
+  }
+  if (r == 0) {
+    PLOG(ERROR) << "connect timeout:" << timeout_ms << " msec";
+    return ScopedSocket();
+  }
+  for (int i = 0; i < *nfds; ++i) {
+    if (pfds_[i].revents & POLLOUT) {
+      int fd = pfds_[i].fd;
+      for (size_t j = 0; j < socks_.size(); ++j) {
+        if (!socks_[j].valid())
+          continue;
+        if (socks_[j].get() == fd) {
+          *addr = &(*addrs_)[j];
+          return std::move(socks_[j]);
+        }
+      }
+    }
+  }
+  return ScopedSocket();
+}
+#endif
+
+Errno SocketPool::InitializeUnlocked() {
+  // lock held.
+  current_addr_ = nullptr;
+  std::map<string, time_t> last_errors;
+  for (const auto& addr : addrs_) {
+    if (addr.error_timestamp > 0) {
+      last_errors.insert(std::make_pair(addr.name, addr.error_timestamp));
+    }
+  }
+  addrs_.clear();
+  SimpleTimer timer;
+  // TODO: avoid calling ResolveAddress if Initialize called immediately
+  // again?
+  ResolveAddress(host_name_, port_, &addrs_);
+  for (auto& addr : addrs_) {
+    const std::map<string, time_t>::const_iterator found =
+        last_errors.find(addr.name);
+    if (found != last_errors.end()) {
+      addr.error_timestamp = found->second;
+    }
+    LOG(INFO) << host_name_ << " resolved as " << addr.name
+              << " error_timestamp:" << addr.error_timestamp;
+  }
+  int resolve_ms = timer.GetInMs();
+  if (resolve_ms > 1000) {
+    LOG(ERROR) << "SLOW resolve " << host_name_ << " " << addrs_.size()
+               << " in " << resolve_ms << " msec";
+  } else {
+    LOG(INFO) << "resolve " << host_name_ << " " << addrs_.size()
+              << " in " << resolve_ms << " msec";
+  }
+
+  timer.Start();
+  ScopedSocketList socks(&addrs_);
+
+  int nfds;
+  ScopedSocket s(socks.Connect(&nfds, &current_addr_));
+  if (s.valid()) {
+    DCHECK(current_addr_ != nullptr);
+    DCHECK(current_addr_->IsValid());
+    int connect_ms = timer.GetInMs();
+    if (connect_ms > 1000) {
+      LOG(ERROR) << "SLOW connected"
+                 << ": use addr:" << current_addr_->name
+                 << " for " << host_name_
+                 << " in " << connect_ms << " msec";
+    } else {
+      LOG(INFO) << "connected"
+                << ": use addr:" << current_addr_->name
+                << " for " << host_name_
+                << " in " << connect_ms << " msec";
+    }
+    fd_addrs_.insert(std::make_pair(s.get(), current_addr_->name));
+    socket_pool_.emplace_back(s.release(), SimpleTimer());
+    return OK;
+  }
+  if (nfds <= 0) {
+    LOG(ERROR) << "Server at "
+               << host_name_ << ":" << port_ << " not reachable.";
+    DCHECK(current_addr_ == nullptr);
+    return FAIL;
+  }
+  int remaining;
+  while ((remaining = kConnTimeoutInMs - timer.GetInMs()) > 0) {
+    s = socks.Poll(remaining, &nfds, &current_addr_);
+    if (s.valid()) {
+      break;
+    }
+    if (nfds <= 0) {
+      break;
+    }
+  }
+  LOG(INFO) << "connect done in " << timer.GetInMs() << " msec";
+  if (!s.valid()) {
+    DCHECK(current_addr_ == nullptr);
+    LOG(ERROR) << "Server at "
+               << host_name_ << ":" << port_ << " not reachable.";
+    if (remaining <= 0)
+      return ERR_TIMEOUT;
+    return FAIL;
+  }
+  DCHECK(current_addr_ != nullptr);
+  DCHECK(current_addr_->IsValid());
+  LOG(INFO) << "use addr:" << current_addr_->name << " for " << host_name_;
+  fd_addrs_.insert(std::make_pair(s.get(), current_addr_->name));
+  socket_pool_.emplace_back(s.release(), SimpleTimer());
+  return OK;
+}
+
+bool SocketPool::IsInitialized() const {
+  AUTOLOCK(lock, &mu_);
+  return current_addr_ != nullptr && current_addr_->IsValid();
+}
+
+string SocketPool::DestName() const {
+  std::ostringstream ss;
+  ss << host_name_ << ":" << port_;
+  return ss.str();
+}
+
+size_t SocketPool::NumAddresses() const {
+  AUTOLOCK(lock, &mu_);
+  return addrs_.size();
+}
+
+string SocketPool::DebugString() const {
+  std::ostringstream ss;
+  ss << "dest:" << DestName();
+  string name;
+  size_t socket_pool_size = 0;
+  size_t open_sockets = 0;
+  {
+    AUTOLOCK(lock, &mu_);
+    if (current_addr_ != nullptr) {
+      name = current_addr_->name;
+    } else {
+      name = "0.0.0.0";
+    }
+    socket_pool_size = socket_pool_.size();
+    open_sockets = fd_addrs_.size();
+  }
+  ss << " addr:" << name;
+  ss << " pool_size:" << socket_pool_size;
+  ss << " open_sockets:" << open_sockets;
+  return ss.str();
+}
+
+}  // namespace devtools_goma
diff --git a/client/socket_pool.h b/client/socket_pool.h
new file mode 100644
index 0000000..6439ba4
--- /dev/null
+++ b/client/socket_pool.h
@@ -0,0 +1,113 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SOCKET_POOL_H_
+#define DEVTOOLS_GOMA_CLIENT_SOCKET_POOL_H_
+
+#ifndef _WIN32
+#include <sys/socket.h>
+#include <sys/types.h>
+#endif
+#include <time.h>
+
+#include <deque>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "basictypes.h"
+#include "lockhelper.h"
+#include "simple_timer.h"
+#include "socket_factory.h"
+#include "scoped_fd.h"
+#include "unordered.h"
+
+using std::string;
+
+struct addrinfo;
+
+namespace devtools_goma {
+
+class SimpleTimer;
+
+// TODO: template for ScopedSocket and ScopedNamedPipe.
+class SocketPool : public SocketFactory {
+ public:
+  SocketPool(const string& host_name, int port);
+  ~SocketPool() override;
+  bool IsInitialized() const override;
+  ScopedSocket NewSocket() override;
+
+  // Releases the socket. The socket will be reused if NewSocket is called
+  // within some period of time.
+  void ReleaseSocket(ScopedSocket&& sock) override;
+
+  // Closes the socket.
+  // Marks the current address had error if err is true, so
+  // it won't use the current address for some period of time when
+  // it needs to open new connection.
+  void CloseSocket(ScopedSocket&& sock, bool err) override;
+
+  // Clears errors associated with addresses.
+  void ClearErrors() override;
+
+  string DestName() const override;
+  string host_name() const override { return host_name_; }
+  int port() const override { return port_; }
+
+  size_t NumAddresses() const;
+
+  string DebugString() const override;
+
+ private:
+  struct AddrData {
+    AddrData();
+    struct sockaddr_storage storage;
+    size_t len;
+    int ai_socktype;
+    int ai_protocol;
+    string name;
+    time_t error_timestamp;
+
+    const struct sockaddr* addr_ptr() const;
+    void Invalidate();
+    bool IsValid() const;
+    bool InitFromIPv4Addr(const string& ipv4, int port);
+    void InitFromAddrInfo(const struct addrinfo* ai);
+  };
+  class ScopedSocketList;
+
+  // Resolves hostname:port and stores in addrs.
+  static void ResolveAddress(const string& hostname, int port,
+                             std::vector<AddrData>* addrs);
+
+  // Initializes socket_pool.
+  // Returns OK if at least one address could be available, and put
+  // connected socket into socket_pool_.
+  // Returns FAIL if no address is available.
+  // Returns ERR_TIMEOUT if timeout.
+  Errno InitializeUnlocked();
+
+  // Sets error_timetamp in AddrData for sock to t
+  void SetErrorTimestampUnlocked(int sock, time_t t);
+
+  // This host:port is for means the address we will connect directly.
+  // So, this can be either a destination address or a proxy address.
+  string host_name_;
+  int port_;
+
+  Lock mu_;
+  std::vector<AddrData> addrs_;
+  AddrData* current_addr_;  // point in addrs_, or NULL.
+  unordered_map<int, string> fd_addrs_;
+  // TODO: use ScopedSocket. std::pair doesn't support movable yet?
+  std::deque<std::pair<int, SimpleTimer>> socket_pool_;
+
+  DISALLOW_COPY_AND_ASSIGN(SocketPool);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SOCKET_POOL_H_
diff --git a/client/spawner.h b/client/spawner.h
new file mode 100644
index 0000000..024964d
--- /dev/null
+++ b/client/spawner.h
@@ -0,0 +1,146 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SPAWNER_H_
+#define DEVTOOLS_GOMA_CLIENT_SPAWNER_H_
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+#include "config_win.h"
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+// A subclass of Spawner spawns a child process.  It takes a file as stdin
+// for the child process, and redirects the child process' stdout to another
+// file.
+//
+// Spawning a process may cause strange behavior under multi-threaded
+// environment especially in posix, and prohibited in our code base.
+// You might only need to use a subclass of this class in:
+// gomacc (not dispatcher) or subprocess_impl.
+class Spawner {
+ public:
+  enum WaitPolicy {
+    NO_HANG = 0,
+    WAIT_INFINITE = 1,
+    NEED_KILL = 2,
+  };
+  enum ConsoleOutputOption {
+    MERGE_STDOUT_STDERR = 0,
+    STDOUT_ONLY = 1,
+  };
+#ifdef _WIN32
+  // On Windows, the common convention of invalid PID is 0 (see
+  // http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx for
+  // discussions, another common invalid pid value is DWORD(-1), which is
+  // 0xffffffff and not 64-bit friendly).
+  static const int kInvalidPid = 0;
+#else
+  static const int kInvalidPid = -1;
+#endif
+  virtual ~Spawner() {}
+
+  // Set files for redirection.
+  // If either of |stdin_filename|, |stdout_filename|, or |stderr_filename| is
+  // not empty, it is used as stdin, stdout, or stderr of the child process.
+  // |option| to specify which console outputs are stored to |stdout_filename|
+  // when |stderr_filename| is empty.
+  // Note: this must be called BEFORE the Run method.
+  // Note: you cannot use this method with SetConsoleOutputBuffer.
+  void SetFileRedirection(const string& stdin_filename,
+                          const string& stdout_filename,
+                          const string& stderr_filename,
+                          ConsoleOutputOption option) {
+    stdin_filename_ = stdin_filename;
+    stdout_filename_ = stdout_filename;
+    stderr_filename_ = stderr_filename;
+    console_output_option_ = option;
+  }
+
+  // Set buffer to redirect stdout and stderr.
+  // |option| to specify which console outputs are stored to |console_output|.
+  // Note: this must be called BEFORE the Run method.
+  // Note: if |stdout_filename| or |stderr_filename| are set by
+  //       SetFileRedirection, you cannot use this method.
+  void SetConsoleOutputBuffer(string* console_output,
+                              ConsoleOutputOption option) {
+    console_output_ = console_output;
+    console_output_option_ = option;
+  }
+
+  // If |detach| is true, the Spawner detaches the process.
+  // Note: this must be called BEFORE the Run method.
+  void SetDetach(bool detach) { detach_ = detach; }
+
+  // If |umask| is positive value, it is used as umask of the process.
+  // Note: this feature only works on SpawnerPosix.
+  void SetUmask(int32_t umask) { umask_ = umask; }
+
+  // Spawns a child process.
+  // Returns a child process id on success.
+  // Returns kInvalidPid on non fatal error, and dies with fatal error.
+  // |prog| is a program name, |args| is its arguments, |envs| is its
+  // environment, and |cwd| is a current working directory.
+  virtual int Run(const string& cmd, const std::vector<string>& args,
+                  const std::vector<string>& envs, const string& cwd) = 0;
+
+  // Kills the process.
+  // Returns true if the process is still running.
+  // Returns false if the process has been terminated.
+  // TODO: flip the return value meaning. True sounds success.
+  virtual bool Kill() = 0;
+
+  // Waits for process termination.
+  // If |wait_policy| is NO_HANG, it just returns current status.
+  // If |wait_policy| is WAIT_INFINITE, it wait until the process finishes.
+  // If |wait_policy| is NEED_KILL, it kills process if the process is running.
+  // Returns true if the process is still running.
+  // Returns false if the process has been terminated.
+  virtual bool Wait(WaitPolicy wait_policy) = 0;
+
+  // Returns true if the process is running.
+  virtual bool IsChildRunning() const = 0;
+
+  // Returns true if the process is signaled.
+  virtual bool IsSignaled() const = 0;
+
+  // Set the process is signaled.
+  virtual void SetSignaled() = 0;
+
+  // Returns the exit code of the process.
+  virtual int ChildStatus() const = 0;
+
+  // Returns the memory used during the execution.
+  // Returns -1 if this info is not available.
+  virtual int64_t ChildMemKb() const = 0;
+
+  // Returns the signal that caused the child process to terminate.
+  // (Only meaningful for SpawnerPosix).
+  virtual int ChildTermSignal() const = 0;
+
+ protected:
+  Spawner() :
+      console_output_(NULL), detach_(false), umask_(-1),
+      console_output_option_(MERGE_STDOUT_STDERR) {}
+
+  string stdin_filename_, stdout_filename_, stderr_filename_;
+  string* console_output_;
+  bool detach_;
+  int32_t umask_;
+  ConsoleOutputOption console_output_option_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Spawner);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SPAWNER_H_
diff --git a/client/spawner_posix.cc b/client/spawner_posix.cc
new file mode 100644
index 0000000..0058bc6
--- /dev/null
+++ b/client/spawner_posix.cc
@@ -0,0 +1,485 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "spawner_posix.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <spawn.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <vector>
+#include <sstream>
+
+#include "file.h"
+#include "file_helper.h"
+#include "fileflag.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "timestamp.h"
+#include "mypath.h"
+#include "path.h"
+
+namespace {
+
+static const int kInvalidProcessStatus = -256;
+
+struct SubprocExit {
+  SubprocExit() : lineno(0), last_errno(0), signal(0) {
+    memset(&ru, 0, sizeof(ru));
+  }
+  int lineno;
+  int last_errno;
+  int signal;
+  struct rusage ru;
+};
+
+void __attribute__((__noreturn__)) SubprocExitReport(
+    int fd, const SubprocExit& se, int exit_value) {
+  if (write(fd, &se, sizeof(se)) != sizeof(se)) {
+    close(fd);
+    _exit(exit_value ? exit_value : 1);
+  }
+  _exit(exit_value);
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+SpawnerPosix::SpawnerPosix()
+    : pid_(Spawner::kInvalidPid),
+      prog_pid_(Spawner::kInvalidPid),
+      subprocess_dying_(false), is_signaled_(false),
+      sent_sig_(0), status_(kInvalidProcessStatus), process_mem_kb_(-1),
+      signal_(0) {
+}
+
+SpawnerPosix::~SpawnerPosix() {
+  if (!console_out_file_.empty())
+    remove(console_out_file_.c_str());
+}
+
+int SpawnerPosix::Run(const string& cmd, const std::vector<string>& args,
+                      const std::vector<string>& envs, const string& cwd) {
+  if (console_output_) {
+    std::ostringstream filenamebuf;
+    filenamebuf << "goma_tmp." << rand() << "." << GetCurrentTimestampMs()
+                << ".out";
+    console_out_file_ = file::JoinPath(GetGomaTmpDir(), filenamebuf.str());
+    stdout_filename_ = console_out_file_;
+  }
+
+  const bool need_redirect =
+      !(stdin_filename_.empty() &&
+        stdout_filename_.empty() &&
+        stderr_filename_.empty()) || detach_;
+  ScopedFd stdin_fd;
+  ScopedFd stdout_fd;
+  ScopedFd stderr_fd;
+  if (need_redirect) {
+    ScopedFd devnullfd(ScopedFd::OpenNull());
+    stdin_fd.reset(dup(devnullfd.fd()));
+    if (!stdin_filename_.empty())
+      stdin_fd.reset(ScopedFd::OpenForRead(stdin_filename_));
+    stdout_fd.reset(dup(devnullfd.fd()));
+    if (!stdout_filename_.empty()) {
+      stdout_fd.reset(ScopedFd::Create(stdout_filename_, 0600));
+    }
+    stderr_fd.reset(dup(devnullfd.fd()));
+    if (!stderr_filename_.empty()) {
+      stderr_fd.reset(ScopedFd::Create(stderr_filename_, 0600));
+    } else if (!stdout_filename_.empty() &&
+               console_output_option_ == MERGE_STDOUT_STDERR) {
+      // stdout is not empty, but stderr is empty.
+      stderr_fd.reset(dup(stdout_fd.fd()));
+    }
+  }
+
+  // Pipe for passing SubprocExit information.
+  // pipe(7) says write(2) of less than PIPE_BUF bytes must be atomic.
+  int pipe_fd[2];
+  PCHECK(pipe(pipe_fd) == 0);
+  exit_fd_.reset(pipe_fd[0]);
+  ScopedFd child_exit_fd(pipe_fd[1]);
+
+  // We can't use posix_spawn, because we'd like to control
+  // current directory of each subprocess.
+  const char* dir = cwd.c_str();
+  const char* prog = cmd.c_str();
+  std::vector<const char*> argvp;
+  for (const auto& arg : args)
+    argvp.push_back(arg.c_str());
+  argvp.push_back(nullptr);
+  std::vector<const char*> envp;
+  for (const auto& env : envs)
+    envp.push_back(env.c_str());
+  envp.push_back(nullptr);
+
+  // SubprocessImpl will try to send SIGINT or SIGTERM to kill the subprocess
+  // but ignore them in this process. This process will wait for child process
+  // termination (child process will be killed by SIGINT or SIGTERM to
+  // the process group).
+  // Also block SIGCHLD until it resets SIGCHLD in child process.
+  sigset_t sigset;
+  sigemptyset(&sigset);
+  sigaddset(&sigset, SIGINT);
+  sigaddset(&sigset, SIGTERM);
+  sigaddset(&sigset, SIGCHLD);
+  PCHECK(sigprocmask(SIG_BLOCK, &sigset, nullptr) == 0);
+  pid_t pid = fork();
+  if (pid < 0) {
+    PLOG(ERROR) << "fork failed. pid=" << pid;
+    pid_ = Spawner::kInvalidPid;
+    return pid_;
+  }
+  if (pid == 0) {
+    // child process.
+    // You can use only async-signal safe functions here.
+    //
+    SubprocExit se;
+
+    if (stdin_fd.valid() && dup2(stdin_fd.fd(), STDIN_FILENO) < 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+    if (stdout_fd.valid() && dup2(stdout_fd.fd(), STDOUT_FILENO) < 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+    if (stderr_fd.valid() && dup2(stderr_fd.fd(), STDERR_FILENO) < 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+    for (int i = STDERR_FILENO + 1; i < 256; ++i) {
+      if (i == child_exit_fd.fd()) continue;
+      close(i);
+    }
+
+    if (detach_) {
+      // Create own session.
+      if (setsid() < 0) {
+        se.lineno = __LINE__ -1;
+        se.last_errno = errno;
+        SubprocExitReport(child_exit_fd.fd(), se, 1);
+      }
+      pid_t pid;
+      if ((pid = fork())) {
+        if (pid < 0) {
+          se.lineno = __LINE__ - 2;
+          se.last_errno = errno;
+          SubprocExitReport(child_exit_fd.fd(), se, 1);
+        }
+        exit(0);
+      }
+    } else {
+      // Create own process group.
+      if (setpgid(0, 0) != 0) {
+        se.lineno = __LINE__ - 1;
+        se.last_errno = errno;
+        SubprocExitReport(child_exit_fd.fd(), se, 1);
+      }
+    }
+
+    // Reset SIGCHLD handler.  we'll get exit status of prog_pid
+    // by blocking waitpid() later.
+    struct sigaction sa;
+    memset(&sa, 0, sizeof sa);
+    sa.sa_handler = SIG_DFL;
+    if (sigaction(SIGCHLD, &sa, nullptr) < 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+    // Ignore SIGINT and SIGTERM handler and unblock them.
+    sa.sa_handler = SIG_IGN;
+    if (sigaction(SIGINT, &sa, nullptr) < 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+    sa.sa_handler = SIG_IGN;
+    if (sigaction(SIGTERM, &sa, nullptr) < 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+
+    sigset_t unblock_sigset;
+    sigemptyset(&unblock_sigset);
+    sigaddset(&unblock_sigset, SIGCHLD);
+    sigaddset(&unblock_sigset, SIGINT);
+    sigaddset(&unblock_sigset, SIGTERM);
+    if (sigprocmask(SIG_UNBLOCK, &unblock_sigset, nullptr) != 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+
+    if (chdir(dir) < 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+
+    posix_spawnattr_t spawnattr;
+    posix_spawnattr_init(&spawnattr);
+    // Reset SIGINT and SIGTERM signal handlers in child process.
+    sigset_t default_sigset;
+    sigemptyset(&default_sigset);
+    sigaddset(&default_sigset, SIGINT);
+    sigaddset(&default_sigset, SIGTERM);
+    if (posix_spawnattr_setsigdefault(&spawnattr, &default_sigset) != 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+    // Don't mask any signals in child process.
+    sigset_t sigmask;
+    sigemptyset(&sigmask);
+    if (posix_spawnattr_setsigmask(&spawnattr, &sigmask) != 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+    if (umask_ >= 0) {
+      umask(umask_);
+    }
+    pid_t prog_pid;
+    // TODO: use POSIX_SPAWN_USEVFORK (_GNU_SOURCE).
+    if (posix_spawn(
+            &prog_pid, prog, nullptr, &spawnattr,
+            const_cast<char**>(&argvp[0]), const_cast<char**>(&envp[0])) != 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+    // report prog_pid to parent.
+    if (write(child_exit_fd.fd(), &prog_pid, sizeof(prog_pid))
+        != sizeof(prog_pid)) {
+      se.lineno = __LINE__ - 2;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+
+    int status = -1;
+    while (waitpid(prog_pid, &status, 0) == -1) {
+      if (errno != EINTR) break;
+    }
+    if (getrusage(RUSAGE_CHILDREN, &se.ru) != 0) {
+      se.lineno = __LINE__ - 1;
+      se.last_errno = errno;
+      SubprocExitReport(child_exit_fd.fd(), se, 1);
+    }
+    if (WIFSIGNALED(status)) {
+      se.signal = WTERMSIG(status);
+    }
+    SubprocExitReport(child_exit_fd.fd(), se, WEXITSTATUS(status));
+  }
+  pid_ = pid;
+  int r = read(exit_fd_.fd(), &prog_pid_, sizeof(prog_pid_));
+  if (r != sizeof(prog_pid_)) {
+    PLOG(ERROR) << "failed to get prog_pid for pid=" << pid_;
+    prog_pid_ = Spawner::kInvalidPid;
+  }
+  PCHECK(sigprocmask(SIG_UNBLOCK, &sigset, nullptr) == 0);
+
+  return pid_;
+}
+
+bool SpawnerPosix::Kill() {
+  int sig = SIGINT;
+  if (pid_ == Spawner::kInvalidPid) {
+    // means not started yet.
+    return false;
+  }
+  if (!is_signaled_) {
+    is_signaled_ = true;  // try to kill in Wait()
+  } else {
+    sig = SIGTERM;
+  }
+  bool running = (status_ == kInvalidProcessStatus);
+  sent_sig_ = sig;
+  sig_timer_.Start();
+  if (prog_pid_ != Spawner::kInvalidPid) {
+    if (kill(-prog_pid_, sig) != 0) {
+      PLOG(WARNING) << " kill "
+                    << " prog_pgrp=" << prog_pid_;
+      if (kill(prog_pid_, sig) != 0) {
+        PLOG(WARNING) << " kill "
+                      << " prog_pid=" << prog_pid_;
+      }
+    }
+  }
+  if (kill(-pid_, sig) != 0) {
+    PLOG(WARNING) << " kill "
+                  << " pgrp=" << pid_;
+    if (kill(pid_, sig) != 0) {
+      PLOG(WARNING) << " kill "
+                    << " pid=" << pid_;
+      running = false;
+    }
+  }
+  return running;
+}
+
+bool SpawnerPosix::Wait(WaitPolicy wait_policy) {
+  int status = -1;
+  if (pid_ != Spawner::kInvalidPid) {
+    const bool need_kill = (wait_policy == NEED_KILL);
+    const int waitpid_options = (wait_policy == WAIT_INFINITE) ? 0 : WNOHANG;
+    int r;
+    bool pgrp_dead = false;
+    bool process_dead = false;
+    if ((r = waitpid(-pid_, &status, waitpid_options)) == -1) {
+      if (errno == ECHILD) {
+        pgrp_dead = true;
+      } else {
+        PLOG(ERROR) << "waitpid " << " pgrp=" << pid_;
+      }
+    }
+    if (r == -1) {
+      // process might be killed before setting process group.
+      if ((r = waitpid(pid_, &status, waitpid_options)) == -1) {
+        if (errno == ECHILD) {
+          process_dead = true;
+        } else {
+          PLOG(ERROR) << "waitpid " << " pid=" << pid_;
+        }
+      }
+    }
+    if (r == 0) {
+      // one or more children in the process group exist, but have not yet
+      // changed state.
+      if (!need_kill) {
+        // process is still running.
+        DCHECK(!pgrp_dead || !process_dead);
+        return false;
+      }
+    } else if (r == pid_) {
+      status_ = WEXITSTATUS(status);
+    }
+    // Check subprocess itself and its process group still exist.
+    // Note that subprocess didn't set own process group yet, so we need
+    // check pid_ too.
+    if (need_kill) {
+      int sig = SIGINT;
+      if (!pgrp_dead) {
+        if (sent_sig_ == 0) {
+          sent_sig_ = sig;
+          sig_timer_.Start();
+        }
+        if (kill(-pid_, sig) == -1) {
+          pgrp_dead = true;
+        }
+      }
+      if (!process_dead) {
+        if (sent_sig_ == 0) {
+          sent_sig_ = sig;
+          sig_timer_.Start();
+        }
+        if (kill(pid_, sig) == -1) {
+          process_dead = true;
+        }
+      }
+    }
+    if (!pgrp_dead && kill(-pid_, 0) == -1) {
+      pgrp_dead = true;
+    }
+    if (!process_dead && kill(pid_, 0) == -1) {
+      process_dead = true;
+    }
+    if (pgrp_dead && process_dead) {
+      if (subprocess_dying_) {
+        LOG(INFO) << "all process were finished. pid=" << pid_;
+      } else {
+        VLOG(2) << "all processes were finished. pid=" << pid_;
+      }
+    } else {
+      if (is_signaled_) {
+        LOG_IF(INFO, !subprocess_dying_)
+            << "process may still exist "
+            << " need_kill=" << need_kill
+            << " pgrp_dead=" << pgrp_dead
+            << " process_dead=" << process_dead
+            << " pid=" << pid_;
+        subprocess_dying_ = true;
+      } else {
+        LOG_EVERY_N(INFO, 100)
+            << "process is running "
+            << " pid=" << pid_
+            << " need_kill=" << need_kill
+            << " pgrp_dead=" << pgrp_dead
+            << " process_dead=" << process_dead
+            << " is_signaled=" << is_signaled_;
+      }
+      return false;
+    }
+  }
+  string sig_source;
+  if (exit_fd_.valid()) {
+    SubprocExit se;
+    int r = read(exit_fd_.fd(), &se, sizeof(se));
+    if (r == sizeof(se)) {
+      if (se.lineno > 0 || se.last_errno > 0) {
+        LOG(WARNING) << "subproc abort: pid=" << pid_
+                     << " at " << __FILE__ << ":" << se.lineno
+                     << " err=" << strerror(se.last_errno)
+                     << "[" << se.last_errno << "]";
+      }
+      process_mem_kb_ = se.ru.ru_maxrss;
+      signal_ = se.signal;
+      sig_source = "subproc_exit";
+      if (signal_ != 0 && signal_ != SIGINT && signal_ != SIGTERM) {
+        LOG(WARNING) << "subproc was terminated unexpectedly."
+                     << " pid=" << pid_
+                     << " signal=" << signal_;
+      } else if (signal_ == 0 && WIFSIGNALED(status)) {
+        signal_ = WTERMSIG(status);
+        sig_source = "wtermsig";
+        if (signal_ != 0 && signal_ != SIGINT && signal_ != SIGTERM) {
+          LOG(WARNING) << "mediator process was terminated unexpectedly."
+                       << " pid=" << pid_
+                       << " signal=" << signal_;
+        }
+      }
+    } else {
+      sig_source = "exit_fd_read_err";
+      PLOG(WARNING) << "read SubprocExit:"
+                    << " pid=" << pid_
+                    << " ret=" << r;
+    }
+  } else {
+    sig_source = "exit_fd_invalid";
+  }
+  if (console_output_) {
+    DCHECK(!console_out_file_.empty());
+    ReadFileToString(console_out_file_, console_output_);
+  }
+  LOG_IF(INFO, sent_sig_ != 0)
+      << "signal=" << sent_sig_ << " sent to pid=" << pid_
+      << " prog_pid=" << prog_pid_
+      << " " << sig_timer_.GetInMs() << "msec ago,"
+      << " terminated by signal=" << signal_
+      << " from " << sig_source
+      << " exit=" << status_;
+  return true;
+}
+
+bool SpawnerPosix::IsChildRunning() const {
+  return pid_ != Spawner::kInvalidPid && status_ == kInvalidProcessStatus;
+}
+
+}  // namespace devtools_goma
diff --git a/client/spawner_posix.h b/client/spawner_posix.h
new file mode 100644
index 0000000..d319365
--- /dev/null
+++ b/client/spawner_posix.h
@@ -0,0 +1,62 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SPAWNER_POSIX_H_
+#define DEVTOOLS_GOMA_CLIENT_SPAWNER_POSIX_H_
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+#include "scoped_fd.h"
+#include "simple_timer.h"
+#include "spawner.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// A subclass of Spawner for POSIX.
+// It spawns a process internally to capture child process' output.
+class SpawnerPosix : public Spawner {
+ public:
+  SpawnerPosix();
+  ~SpawnerPosix() override;
+
+  int Run(const string& cmd, const std::vector<string>& args,
+          const std::vector<string>& envs, const string& cwd) override;
+  bool Kill() override;
+  bool Wait(WaitPolicy wait_policy) override;
+  bool IsChildRunning() const override;
+  bool IsSignaled() const override { return is_signaled_; }
+  void SetSignaled() override { is_signaled_ = true; }
+  int ChildStatus() const override { return status_; }
+  int64_t ChildMemKb() const override { return process_mem_kb_; }
+  int ChildTermSignal() const override { return signal_; }
+
+ private:
+  pid_t pid_;
+  pid_t prog_pid_;
+  ScopedFd exit_fd_;
+  // true once it emits log message of killing the subprocess.
+  bool subprocess_dying_;
+  bool is_signaled_;
+  int sent_sig_;
+  SimpleTimer sig_timer_;
+
+  int status_;
+  int64_t process_mem_kb_;
+  int signal_;
+
+  string console_out_file_;
+
+  DISALLOW_COPY_AND_ASSIGN(SpawnerPosix);
+};
+
+typedef SpawnerPosix PlatformSpawner;
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SPAWNER_POSIX_H_
diff --git a/client/spawner_win.cc b/client/spawner_win.cc
new file mode 100644
index 0000000..e9b534a
--- /dev/null
+++ b/client/spawner_win.cc
@@ -0,0 +1,918 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "spawner_win.h"
+
+#include <psapi.h>
+#pragma comment(lib, "psapi.lib")
+
+#include <algorithm>
+#include <deque>
+#include <memory>
+#include <sstream>
+
+#include <glog/logging.h>
+
+#include "compiler_specific.h"
+#include "file.h"
+#include "file_dir.h"
+#include "mypath.h"
+#include "path.h"
+#include "string_piece.h"
+#include "strutil.h"
+#include "util.h"
+
+#define MAX_ENV_BLOCK 32767
+
+namespace {
+
+const DWORD kWaitTimeout = 10;
+const DWORD kTerminateExitCode = 1;
+
+string GetSubprocTempDirectory() {
+  std::ostringstream oss;
+  oss << "goma_temp" << "." << GetCurrentProcessId();
+  return file::JoinPath(devtools_goma::GetGomaTmpDir(), oss.str());
+}
+
+bool IsEnvVar(const string& env_line, const string& env_prefix) {
+  return var_strcaseprefix(env_line.c_str(), env_prefix.c_str()) != nullptr;
+}
+
+string EscapeCommandlineArg(const string& arg) {
+  // TODO: More accurate escape.
+
+  string escaped_arg;
+  for (char ch : arg) {
+    if (ch == '"') {
+      escaped_arg += '\\';
+    }
+    escaped_arg += ch;
+  }
+
+  if (escaped_arg.find(' ') == string::npos) {
+    return escaped_arg;
+  }
+
+  return '"' + escaped_arg + '"';
+}
+
+// Iter should be an iterator of string containers.
+template <typename Iter>
+string PrepareCommandLine(const char* cwd, const char* prog,
+                          Iter env_begin, Iter env_end,
+                          Iter argv_begin, Iter argv_end) {
+  // Check if we have PATH spec and/or PATHEXT spec.
+  static const size_t kPathLength = 5;
+  static const char* kPathStr = "PATH=";
+  static const size_t kPathExtLength = 8;
+  static const char* kPathExtStr = "PATHEXT=";
+
+  string path_spec;
+  string pathext_spec;
+  for (Iter i = env_begin; i != env_end; ++i) {
+    if (IsEnvVar(*i, kPathStr)) {
+      path_spec = i->substr(kPathLength);
+    }
+    if (IsEnvVar(*i, kPathExtStr)) {
+      pathext_spec = i->substr(kPathExtLength);
+    }
+  }
+
+  // TODO: remove this when |prog| become full path.
+  CHECK(!path_spec.empty()) << "PATH env. should be set.";
+  CHECK(!pathext_spec.empty()) << "PATHEXT env. should be set.";
+  string command_line;
+  if (!devtools_goma::GetRealExecutablePath(
+      nullptr, prog, cwd, path_spec, pathext_spec, &command_line,
+      nullptr, nullptr)) {
+    return string();
+  }
+
+  if (command_line[0] != '\"') {
+    command_line = EscapeCommandlineArg(command_line);
+  }
+  for (Iter i = argv_begin; i != argv_end; ++i) {
+    // argv[0] should be prog.
+    if (i == argv_begin)
+      continue;
+    command_line.append(" ");
+    command_line.append(EscapeCommandlineArg(*i));
+  }
+
+  return command_line;
+}
+
+// Iter should be an iterator of string containers.
+template <typename Iter>
+void PrepareEnvBlock(Iter begin, Iter end, std::vector<char>* env) {
+  const size_t kMaxEnv = 32767;
+  env->resize(kMaxEnv);  // max env size
+  size_t index = 0;
+  for (Iter i = begin; i != end; i++) {
+    const string& e = *i;
+    size_t len = e.size();
+    strcpy_s(&((*env)[index]), kMaxEnv - index, e.c_str());
+    index += len + 1;
+    if (index >= kMaxEnv) {
+      LOG(WARNING) << "env block exceeds capacity";
+      index = kMaxEnv - 1;
+      break;
+    }
+  }
+  env->at(index) = 0;
+}
+
+string CreateJobName(DWORD pid, StringPiece command) {
+  std::ostringstream ss;
+  // Get <prog> from "<prog>".
+  size_t pos = command.find_first_of('"', 1);
+  if (pos != StringPiece::npos) {
+    command = command.substr(1, pos - 1);
+  }
+
+  ss << "goma job:"
+     << " pid=" << pid
+     << " exe=" << file::Basename(command);
+  string job_name(ss.str());
+  if (job_name.length() > MAX_PATH)
+    job_name.erase(MAX_PATH);
+  return job_name;
+}
+
+void SetProcessMemoryUsage(HANDLE child_handle, SIZE_T* mem_bytes) {
+  PROCESS_MEMORY_COUNTERS pmc;
+  if (GetProcessMemoryInfo(child_handle, &pmc, sizeof(pmc))) {
+    *mem_bytes = pmc.PeakWorkingSetSize;
+  } else {
+    LOG_SYSRESULT(GetLastError());
+  }
+}
+
+bool WaitThread(devtools_goma::ScopedFd* thread, DWORD timeout) {
+  if (thread->valid()) {
+    DWORD r = WaitForSingleObject(thread->handle(), timeout);
+    switch (r) {
+      case WAIT_ABANDONED:
+        LOG_SYSRESULT(GetLastError());
+        LOG(ERROR) << "Wait: join Thread error?"
+                   << " thread=" << thread->handle();
+        break;
+      case WAIT_OBJECT_0:
+        thread->reset(nullptr);
+        break;
+      case WAIT_TIMEOUT:
+        VLOG(1) << "wait timeout=" << timeout;
+        return false;
+      default:
+        LOG_SYSRESULT(GetLastError());
+        LOG(ERROR) << "Unexpected return value for WaitForSingleObject."
+                   << " r=" << r;
+        break;
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+static const DWORD kInvalidProcessStatus = 0xffffffff;
+
+string* SpawnerWin::temp_dir_;
+
+/* static */
+void SpawnerWin::Setup() {
+  if (temp_dir_ != nullptr) {
+    delete temp_dir_;
+  }
+  temp_dir_ = new string(GetSubprocTempDirectory());
+  RecursivelyDelete(*temp_dir_);
+  CHECK(File::CreateDir(temp_dir_->c_str(), 0755)) << temp_dir_->c_str();
+  LOG(INFO) << "Create temp dir: " << *temp_dir_;
+}
+
+/* static */
+void SpawnerWin::TearDown() {
+  if (temp_dir_ == nullptr) {
+    return;
+  }
+  if (RecursivelyDelete(*temp_dir_)) {
+    LOG(INFO) << "Remove temp dir: " << *temp_dir_;
+  } else {
+    LOG(ERROR) << "Remove temp dir failed?: " << *temp_dir_;
+  }
+  delete temp_dir_;
+  temp_dir_ = nullptr;
+}
+
+SpawnerWin::SpawnerWin()
+    : input_thread_(nullptr), input_thread_id_(0), stop_input_thread_(false),
+      output_thread_(nullptr), output_thread_id_(0),
+      stop_output_thread_(nullptr), process_status_(kInvalidProcessStatus),
+      process_mem_bytes_(0), is_signaled_(false) {
+}
+
+SpawnerWin::~SpawnerWin() {
+  CleanUp();
+}
+
+int SpawnerWin::Run(const string& cmd, const std::vector<string>& args,
+                    const std::vector<string>& envs, const string& cwd) {
+  DCHECK(!child_process_.valid());
+
+  std::vector<string> environs;
+  for (const auto& e : envs) {
+    if (temp_dir_ != nullptr) {
+      if (IsEnvVar(e, "TEMP=")) {
+        environs.push_back("TEMP=" + *temp_dir_);
+        continue;
+      }
+      if (IsEnvVar(e, "TMP=")) {
+        environs.push_back("TMP=" + *temp_dir_);
+        continue;
+      }
+    }
+    environs.push_back(e);
+  }
+
+  // Having files to redirect or console output should be gathered.
+  // And do not detach.
+  bool need_redirect =
+      (!(stdin_filename_.empty() &&
+         stdout_filename_.empty() &&
+         stderr_filename_.empty()) ||
+       console_output_) && !detach_;
+  if (need_redirect) {
+    DCHECK(!console_output_ ||
+           (stdout_filename_.empty() && stderr_filename_.empty()))
+        << "You cannot use SetFileRedirection with SetConsoleOutputBuffer"
+        << " console_output_=" << console_output_
+        << " stdout_filename_=" << stdout_filename_
+        << " stderr_filename_=" << stderr_filename_;
+
+    const string command_line =
+        PrepareCommandLine(cwd.c_str(), cmd.c_str(),
+                           environs.cbegin(), environs.cend(),
+                           args.begin(), args.end());
+    if (command_line.empty()) {
+      return Spawner::kInvalidPid;
+    }
+    std::vector<char> env;
+    PrepareEnvBlock(environs.cbegin(), environs.cend(), &env);
+    return RunRedirected(command_line, &env, cwd, stdout_filename_,
+                         stdin_filename_);
+  }
+  PROCESS_INFORMATION pi;
+  STARTUPINFOA si;
+
+  ZeroMemory(&pi, sizeof(PROCESS_INFORMATION));
+  ZeroMemory(&si, sizeof(STARTUPINFO));
+  si.cb = sizeof(STARTUPINFO);
+
+  DWORD create_flag = 0;
+  if (detach_) {
+    create_flag |= DETACHED_PROCESS;
+  }
+
+  string command_line =
+      PrepareCommandLine(cwd.c_str(), cmd.c_str(),
+                         environs.cbegin(), environs.cend(),
+                         args.begin(), args.end());
+  if (command_line.empty()) {
+    return Spawner::kInvalidPid;
+  }
+  VLOG(1) << "Run: command_line:" << command_line
+          << " cwd:" << cwd;
+
+  std::vector<char> envp;
+  PrepareEnvBlock(environs.cbegin(), environs.cend(), &envp);
+  // If environment is empty, use parent process's environment.
+  LPVOID env_ptr = envp[0] ? &(envp[0]) : nullptr;
+  const DWORD process_create_flag =
+      create_flag | CREATE_SUSPENDED | CREATE_BREAKAWAY_FROM_JOB;
+  if (CreateProcessA(nullptr, &(command_line[0]), nullptr, nullptr, FALSE,
+                     process_create_flag, env_ptr, cwd.c_str(), &si, &pi)) {
+    child_process_.reset(pi.hProcess);
+    job_name_ = CreateJobName(pi.dwProcessId, command_line);
+    VLOG(1) << "Job name:" << job_name_;
+    child_job_ = AssignProcessToNewJobObject(
+        child_process_.handle(), job_name_);
+
+    process_status_ = STILL_ACTIVE;
+    ResumeThread(pi.hThread);
+    CloseHandle(pi.hThread);
+  } else {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "failed to CreateProcess job_name=" << job_name_;
+  }
+  VLOG(1) << "Run: pid=" << pi.dwProcessId;
+  return pi.dwProcessId;
+}
+
+void SpawnerWin::UpdateProcessStatus(DWORD timeout) {
+  DWORD res = WaitForSingleObject(child_process_.handle(), timeout);
+
+  if (res == WAIT_TIMEOUT) {
+    process_status_ = STILL_ACTIVE;
+    return;
+  }
+
+  if (res == WAIT_FAILED) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to update child process status. job_name="
+               << job_name_;
+    process_status_ = kTerminateExitCode;
+    return;
+  }
+
+  DCHECK_EQ(res, WAIT_OBJECT_0);
+
+  if (!GetExitCodeProcess(child_process_.handle(), &process_status_)) {
+    // TODO: come up with good way to handle this.
+    // I expect it temporary error, and return false to make a SpawnerWin user
+    // ignore this error.
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Process should be signaled but we cannot get exit code."
+               << " job_name=" << job_name_;
+    // Assume the process is dead, and set kTerminateExitCode.
+    process_status_ = kTerminateExitCode;
+  }
+}
+
+bool SpawnerWin::KillAndWait(DWORD timeout) {
+  if (!is_signaled_) {
+    if (input_thread_.valid()) {
+      stop_input_thread_ = true;
+    }
+    if (child_job_.valid()) {
+      if (!TerminateJobObject(child_job_.handle(), kTerminateExitCode))
+        LOG_SYSRESULT(GetLastError());
+    } else {
+      if (!TerminateProcess(child_process_.handle(), kTerminateExitCode))
+        LOG_SYSRESULT(GetLastError());
+    }
+    is_signaled_ = true;
+  }
+
+  std::vector<HANDLE> handles;
+  if (child_job_.valid())
+    handles.push_back(child_job_.handle());
+  handles.push_back(child_process_.handle());
+  // Wait the process is terminated.
+  // Since WaitForSingleObject(child_job_.handle()) seems not wait termination
+  // of |child_process_|, we need to wait it.
+  VLOG(1) << "Wait: child timeout=" << timeout;
+  DWORD ret = WaitForMultipleObjects(
+      handles.size(), &(handles[0]), TRUE, timeout);
+  if (ret == WAIT_TIMEOUT) {
+    VLOG(1) << "wait timeout=" << timeout;
+    return true;
+  } else if (ret < WAIT_OBJECT_0 || ret > WAIT_OBJECT_0 + handles.size() - 1) {
+    // Some handlers are abandoned or WAIT_FAILED.
+    // See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687025(v=vs.85).aspx
+    // TODO: come up with good way to handle this.
+    // I expect it temporary error, and return false to make a SpawnerWin user
+    // ignore this error.
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Wait: termiante error? Process seems not signaled."
+               << " WaitForMultipleObjects returned:" << ret
+               << " nCount=" << handles.size()
+               << " timeout=" << timeout
+               << " job_name=" << job_name_;
+    return false;
+  }
+  UpdateProcessStatus(timeout);
+  return process_status_ == STILL_ACTIVE;
+}
+
+bool SpawnerWin::FinalizeProcess(DWORD timeout) {
+  VLOG(1) << "Wait: child_process finished " << process_status_;
+  if (!WaitThread(&input_thread_, timeout)) {
+    LOG(WARNING) << "input thread timed out=" << timeout
+                 << " job_name=" << job_name_;
+  }
+  CHECK(child_process_.valid());
+  SetProcessMemoryUsage(child_process_.handle(), &process_mem_bytes_);
+  child_process_.reset(nullptr);
+  if (!child_job_.Close()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to release child job handle. "
+               << "job_name=" << job_name_;
+  }
+  // OutputThread should terminate with failing to read from child_stdout and
+  // child_stderr from child process, which should happen when child process
+  // has been terminated.
+  // If the OutputThread doesn't finish with some error, we'll trigger
+  // stop_output_thread_ in CleanUp to terminate OutputThread.
+  if (!WaitThread(&output_thread_, INFINITE)) {
+    LOG(INFO) << "output thread timed out=" << timeout
+              << " job_name=" << job_name_;
+  }
+  LOG_IF(ERROR, stdout_file_.valid())
+      << "stdout_file is still valid. job_name=" << job_name_;
+  LOG_IF(ERROR, stderr_file_.valid())
+      << "stderr_file is still valid. job_name=" << job_name_;
+  return true;
+}
+
+bool SpawnerWin::Kill() {
+  return KillAndWait(kWaitTimeout);
+}
+
+bool SpawnerWin::Wait(Spawner::WaitPolicy wait_policy) {
+  const DWORD timeout =
+      (wait_policy==Spawner::WAIT_INFINITE) ? INFINITE : kWaitTimeout;
+  const bool need_kill = (wait_policy==Spawner::NEED_KILL);
+
+  // child_process_ is valid while subprocess is running.
+  if (!child_process_.valid()) {
+    VLOG(1) << "Wait: child_process already invalid";
+    CHECK_NE(STILL_ACTIVE, process_status_);
+    LOG_IF(ERROR, stdout_file_.valid())
+        << "stdout_file is still valid. job_name=" << job_name_;
+    LOG_IF(ERROR, stderr_file_.valid())
+        << "stderr_file is still valid. job_name=" << job_name_;
+    return false;
+  }
+  UpdateProcessStatus(timeout);
+  if (process_status_ != STILL_ACTIVE) {
+    // Process is not active.
+    return !FinalizeProcess(timeout);
+  }
+  // Process is still active.
+  if (!need_kill) {
+    return true;
+  }
+
+  VLOG(1) << "Wait: need kill";
+  bool running = KillAndWait(timeout);
+  if (running) {
+    return true;
+  }
+  return !FinalizeProcess(timeout);
+}
+
+// TODO: make stderr stored to the specified file.
+int SpawnerWin::RunRedirected(const string& command_line,
+                              std::vector<char>* env,
+                              const string& cwd,
+                              const string& out_file,
+                              const string& in_file) {
+  VLOG(1) << "RunRedirect: command_line:" << command_line
+          << " cwd:" << cwd
+          << " out_file:" << out_file
+          << " in_file:" << in_file;
+  CHECK_GT(command_line.length(), 0U);
+  stop_output_thread_.reset(CreateEvent(nullptr, TRUE, FALSE, nullptr));
+  PCHECK(stop_output_thread_.valid());
+
+  PROCESS_INFORMATION pi;
+  STARTUPINFOA si;
+
+  ZeroMemory(&pi, sizeof(PROCESS_INFORMATION));
+  ZeroMemory(&si, sizeof(STARTUPINFO));
+  si.cb = sizeof(STARTUPINFO);
+
+  SECURITY_ATTRIBUTES sa;
+
+  sa.nLength = sizeof(SECURITY_ATTRIBUTES);
+  sa.lpSecurityDescriptor = nullptr;
+  sa.bInheritHandle = TRUE;
+
+  ScopedFd stdout_read_tmp, stderr_read_tmp;  // parent stdout/err read handle
+  ScopedFd stdout_write, stderr_write;  // child stdout/err write handle
+  ScopedFd stdin_write_tmp;  // parent stdin write handle
+  ScopedFd stdin_read;  // child stdin read handle
+
+  // Create child stdout pipe
+  if (!CreatePipe(stdout_read_tmp.ptr(), stdout_write.ptr(), &sa, 0)) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to create pipe for stdout. "
+               << " cmd: " << command_line
+               << " cwd: " << cwd;
+    return kInvalidPid;
+  }
+
+  switch (console_output_option_) {
+    case STDOUT_ONLY:
+      stderr_write.reset(ScopedFd::OpenNull());
+      if (!stderr_write.valid()) {
+        LOG(ERROR) << "Failed to open NUL."
+                   << " cmd: " << command_line
+                   << " cwd: " << cwd;
+        return kInvalidPid;
+      }
+      break;
+    case MERGE_STDOUT_STDERR:
+      // TODO: During development, I found that stderr output are
+      //                  not redirected to the pipe as stdout.  Both MSDN and
+      //                  CodeProject examples redirect out/err to same file.
+      //                  I'm not sure if that's a bug on Windows side or my
+      //                  end.  Due to schedule, I'll just output both to same
+      //                  file for now.
+      if (!DuplicateHandle(GetCurrentProcess(), stdout_write.handle(),
+                           GetCurrentProcess(), stderr_write.ptr(),
+                           0, TRUE, DUPLICATE_SAME_ACCESS)) {
+        LOG_SYSRESULT(GetLastError());
+        LOG(ERROR) << "Failed to duplicate stderr handle."
+                   << " cmd: " << command_line
+                   << " cwd: " << cwd;
+        return kInvalidPid;
+      }
+      break;
+    default:
+      LOG(ERROR) << "Unknown console_output_option is set:"
+                 << console_output_option_;
+      return kInvalidPid;
+  }
+
+  // Create child stdin pipe
+  if (!CreatePipe(stdin_read.ptr(), stdin_write_tmp.ptr(), &sa, 0)) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to create pipe for stdin. "
+               << " cmd: " << command_line
+               << " cwd: " << cwd;
+    return kInvalidPid;
+  }
+
+  if (!DuplicateHandle(GetCurrentProcess(), stdout_read_tmp.handle(),
+                       GetCurrentProcess(), child_stdout_.ptr(),
+                       0, FALSE, DUPLICATE_SAME_ACCESS)) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to duplicate stdout handle."
+               << " cmd: " << command_line
+               << " cwd: " << cwd;
+    return kInvalidPid;
+  }
+
+  if (!DuplicateHandle(GetCurrentProcess(), stdin_write_tmp.handle(),
+                       GetCurrentProcess(), child_stdin_.ptr(),
+                       0, FALSE, DUPLICATE_SAME_ACCESS)) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to duplicate stdin handle."
+               << " cmd: " << command_line
+               << " cwd: " << cwd;
+    return kInvalidPid;
+  }
+
+  stdout_read_tmp.reset(nullptr);
+  stderr_read_tmp.reset(nullptr);
+  stdin_write_tmp.reset(nullptr);
+
+  if (!out_file.empty()) {
+    string file_path = file::JoinPathRespectAbsolute(cwd, out_file);
+    stdout_file_.reset(CreateFileA(file_path.c_str(), GENERIC_WRITE,
+                                   FILE_SHARE_WRITE, nullptr, CREATE_ALWAYS,
+                                   FILE_ATTRIBUTE_NORMAL, nullptr));
+  }
+
+  si.hStdOutput = stdout_write.handle();
+  si.hStdInput = stdin_read.handle();
+  si.hStdError = stderr_write.handle();
+  si.wShowWindow = SW_HIDE;
+  si.dwFlags = STARTF_USESTDHANDLES | STARTF_USESHOWWINDOW;
+
+  // If environment is empty, use parent process's environment.
+  LPVOID env_ptr = (*env)[0] ? &((*env)[0]) : nullptr;
+  string cmd = command_line;
+  // TODO: Code around here looks like Run().
+  // Can we share some code?
+  const DWORD process_create_flag =
+      CREATE_NEW_CONSOLE | CREATE_SUSPENDED | CREATE_BREAKAWAY_FROM_JOB;
+  BOOL result = CreateProcessA(nullptr, &(cmd[0]), nullptr, nullptr, TRUE,
+                               process_create_flag,
+                               env_ptr, cwd.c_str(), &si, &pi);
+
+  if (!result) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to create process."
+               << " cmd: " << command_line
+               << " cwd: " << cwd;
+    return kInvalidPid;
+  }
+  // Child launched, close parent copy of pipe handles.
+  stdout_write.reset(nullptr);
+  stderr_write.reset(nullptr);
+  stdin_read.reset(nullptr);
+
+  process_status_ = STILL_ACTIVE;
+  child_process_.reset(pi.hProcess);
+  job_name_ = CreateJobName(pi.dwProcessId, command_line);
+  VLOG(1) << "Job name:" << job_name_;
+  child_job_ = AssignProcessToNewJobObject(child_process_.handle(), job_name_);
+
+  output_thread_.reset(
+      CreateThread(nullptr, 0, OutputThread, this, 0, &output_thread_id_));
+  ResumeThread(pi.hThread);
+  CloseHandle(pi.hThread);
+
+  if (!in_file.empty()) {
+    input_file_ = in_file;
+    input_thread_.reset(
+        CreateThread(nullptr, 0, InputThread, this, 0, &input_thread_id_));
+  }
+
+  VLOG(1) << "Run: pid=" << pi.dwProcessId;
+  return pi.dwProcessId;
+}
+
+// static
+ScopedFd SpawnerWin::AssignProcessToNewJobObject(
+    ScopedFd::FileDescriptor child_process, const string& job_name) {
+  ScopedFd job_fd(CreateJobObjectA(nullptr, job_name.c_str()));
+  if (!job_fd.handle()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "failed to CreateJobObject"
+               << " job_name=" << job_name;
+    return ScopedFd();
+  }
+
+  if (GetLastError() == ERROR_ALREADY_EXISTS) {
+    LOG(ERROR) << "Object already exist."
+               << " job_name=" << job_name;
+    return ScopedFd();
+  }
+
+  // We kill all processes associated with the job when the handle is closed.
+  // To force it, we prevent child processes from breaking away the job.
+  // Note that we need to use JOBOBJECT_EXTENDED_LIMIT_INFORMATION to set them.
+  // See:
+  // http://msdn.microsoft.com/en-us/library/windows/desktop/ms684161(v=vs.85).aspx#managing_job_objects
+  // http://msdn.microsoft.com/en-us/library/windows/desktop/ms684147(v=vs.85).aspx
+  // http://msdn.microsoft.com/en-us/library/windows/desktop/ms684925(v=vs.85).aspx
+  // http://msdn.microsoft.com/en-us/library/windows/desktop/ms686216(v=vs.85).aspx
+  JOBOBJECT_EXTENDED_LIMIT_INFORMATION info;
+  if (!QueryInformationJobObject(job_fd.handle(),
+                                 JobObjectExtendedLimitInformation,
+                                 &info,
+                                 sizeof(info),
+                                 nullptr)) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "failed to get job extended limit info"
+               << " job name=" << job_name;
+    return ScopedFd();
+  }
+  info.BasicLimitInformation.LimitFlags |= JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;
+  info.BasicLimitInformation.LimitFlags &= ~JOB_OBJECT_LIMIT_BREAKAWAY_OK;
+  info.BasicLimitInformation.LimitFlags &=
+      ~JOB_OBJECT_LIMIT_SILENT_BREAKAWAY_OK;
+  if (!SetInformationJobObject(job_fd.handle(),
+                               JobObjectExtendedLimitInformation,
+                               &info, sizeof(info))) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "failed to set job extended limit info"
+               << " job name=" << job_name;
+    return ScopedFd();
+  }
+
+  if (!AssignProcessToJobObject(job_fd.handle(), child_process)) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "failed to AssignProcessToJobObject"
+               << " job_name=" << job_name;
+    return ScopedFd();
+  }
+
+  return job_fd;
+}
+
+void SpawnerWin::CleanUp() {
+  VLOG(1) << "CleanUp";
+  if (input_thread_.valid()) {
+    LOG(ERROR) << "input_thread still valid. job_name=" << job_name_;
+    CHECK_NE(::GetCurrentThreadId(), input_thread_id_);
+    stop_input_thread_ = true;
+    WaitForSingleObject(input_thread_.handle(), INFINITE);
+    input_thread_.reset(nullptr);
+  }
+  process_status_ = kInvalidProcessStatus;
+  child_process_.reset(nullptr);
+  if (!child_job_.Close()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to release child job handle."
+               << " job_name=" << job_name_;
+  }
+  if (output_thread_.valid()) {
+    LOG(ERROR) << "output_thread still valid. job_name=" << job_name_;
+    CHECK_NE(::GetCurrentThreadId(), output_thread_id_);
+    DCHECK(stop_output_thread_.handle());
+    SetEvent(stop_output_thread_.handle());
+    VLOG(2) << "Join OutputThread";
+    WaitForSingleObject(output_thread_.handle(), INFINITE);
+    output_thread_.reset(nullptr);
+  }
+  stop_output_thread_.reset(nullptr);
+  stdout_file_.reset(nullptr);
+  stderr_file_.reset(nullptr);
+
+  child_stdin_.reset(nullptr);
+  child_stdout_.reset(nullptr);
+  child_stderr_.reset(nullptr);
+  output_thread_id_ = 0;
+}
+
+bool SpawnerWin::WriteToPipe() {
+  const char* filepath = input_file_.c_str();
+  VLOG(1) << "WriteToPipe from " << filepath;
+  ScopedFd input(CreateFileA(filepath, GENERIC_READ, 0, nullptr, OPEN_EXISTING,
+                             FILE_ATTRIBUTE_READONLY, nullptr));
+  DWORD read, written;
+  CHAR buf[4096];
+  BOOL success = FALSE;
+
+  for (;;) {
+    if (stop_input_thread_)
+      break;
+    success = ReadFile(input.handle(), buf, 4096, &read, nullptr);
+    // End of file under synchronous read operation.
+    // See: http://msdn.microsoft.com/en-us/library/windows/desktop/aa365690(v=vs.85).aspx
+    if (success && read == 0) {
+      break;
+    }
+    if (!success) {
+      DWORD error = GetLastError();
+      LOG_SYSRESULT(error);
+      LOG(ERROR) << "ReadFile failed:"
+                 << " filepath=" << filepath
+                 << " read=" << read
+                 << " job_name=" << job_name_;
+      return false;
+    }
+
+    if (stop_input_thread_)
+      break;
+    success = WriteFile(child_stdin_.handle(), buf, read, &written, nullptr);
+    // Since this is an anonymous pipe, WriteFile blocks until |read| bytes has
+    // been written.
+    // See "Remarks" section:
+    // http://msdn.microsoft.com/en-us/library/windows/desktop/aa365152(v=vs.85).aspx
+    if (!success) {
+      DWORD error = GetLastError();
+      // When the child is killed, WriteFile would fail with ERROR_BROKEN_PIPE.
+      if (stop_input_thread_ && error == ERROR_BROKEN_PIPE) {
+        VLOG(1) << "broken pipe caused by process termination."
+                << " filepath=" << filepath
+                << " read=" << read
+                << " written=" << written;
+        return false;
+      }
+      LOG_SYSRESULT(error);
+      LOG(ERROR) << "WriteFile failed:"
+                 << " filepath=" << filepath
+                 << " read=" << read
+                 << " written=" << written
+                 << " job_name=" << job_name_;
+      return false;
+    }
+    if (read != written) {
+      LOG(ERROR) << "Failed to WriteFile |read| length."
+                 << " The execution result may strange."
+                 << " filepath=" << filepath
+                 << " read=" << read
+                 << " written=" << written
+                 << " success=" << success
+                 << " job_name=" << job_name_;
+      return false;
+    }
+    VLOG(2) << "WriteToPipe read=" << read << " written=" << written;
+  }
+
+  // close the pipe handle so the child process stops reading.
+  if (child_stdin_.Close()) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "close stdin handler failed."
+               << " job_name=" << job_name_;
+    return false;
+  }
+  VLOG(1) << "WriteToPipe finished";
+  return true;
+}
+
+bool SpawnerWin::Redirect() {
+  bool stdout_open = false;
+  bool stderr_open = false;
+  VLOG(1) << "Redirect";
+  if (child_stdout_.valid()) {
+    VLOG(2) << "ReadFromStdout";
+    stdout_open = ReadFromPipe(child_stdout_.handle(), stdout_file_.handle());
+  }
+  if (child_stderr_.valid()) {
+    VLOG(2) << "ReadFromStderr";
+    stderr_open = ReadFromPipe(child_stderr_.handle(), stderr_file_.handle());
+  }
+  return stdout_open || stderr_open;
+}
+
+bool SpawnerWin::ReadFromPipe(HANDLE pipe, HANDLE file) {
+  DWORD avail = 0;
+  if (!PeekNamedPipe(pipe, nullptr, 0, nullptr, &avail, nullptr)) {
+    DWORD err = GetLastError();
+    if (err != ERROR_HANDLE_EOF && err != ERROR_BROKEN_PIPE) {
+      LOG_SYSRESULT(err);
+      LOG(ERROR) << "PeekNamedPipe error:" << err
+                 << " job_name=" << job_name_;
+    }
+    return false;
+  }
+  if (avail) {
+    VLOG(2) << "ReadFromPipe avail=" << avail;
+    std::unique_ptr<char[]> buffer(new char[avail + 1]);
+    memset(buffer.get(), 0, avail + 1);
+    DWORD read = 0, written = 0;
+    DWORD r = ReadFile(pipe, buffer.get(), avail, &read, nullptr);
+    if (!r) {
+      LOG_SYSRESULT(GetLastError());
+      LOG(ERROR) << "ReadFile err avail=" << avail
+                 << " job_name=" << job_name_;
+      return false;
+    } else if (read == 0) {
+      // reached EOF, but avail > 0 ?
+      LOG(ERROR) << "ReadFile read 0 avail=" << avail
+                 << " job_name=" << job_name_;
+      return false;
+    }
+    if (file != INVALID_HANDLE_VALUE && file != 0) {
+      r = WriteFile(file, buffer.get(), read, &written, nullptr);
+      if (!r) {
+        LOG_SYSRESULT(GetLastError());
+        LOG(ERROR) << "WriteFile err size=" << read << " written=" << written
+                   << " job_name=" << job_name_;
+        return false;
+      }
+      LOG_IF(ERROR, read != written)
+          << "WriteFile size=" << read << " written=" << written
+          << " job_name=" << job_name_;
+    } else {
+      VLOG(1) << "ignored to output to log file";
+    }
+    VLOG(2) << "ReadFromPipe read=" << read << " written=" << written;
+    if (console_output_) {
+      console_output_->append(buffer.get(), read);
+    }
+  }
+  return true;
+}
+
+void SpawnerWin::Flush() {
+  VLOG(1) << "Flush";
+  stdout_file_.reset(nullptr);
+  stderr_file_.reset(nullptr);
+}
+
+/* static */
+DWORD WINAPI SpawnerWin::InputThread(LPVOID thread_params) {
+  SpawnerWin* self = reinterpret_cast<SpawnerWin*>(thread_params);
+  DCHECK(self);
+
+  // TODO: handles WriteToPipe error.
+  self->WriteToPipe();
+  return 0;
+}
+
+/* static */
+DWORD WINAPI SpawnerWin::OutputThread(LPVOID thread_params) {
+  SpawnerWin* self = reinterpret_cast<SpawnerWin*>(thread_params);
+  DCHECK(self);
+
+  HANDLE stop = self->stop_output_thread_.handle();
+
+  for (;;) {
+    bool active = self->Redirect();
+    if (!active) {
+      VLOG(1) << "OutputThread: redirect closed";
+      break;
+    }
+
+    VLOG(2) << "OutputThread: Wait";
+    DWORD r = WaitForSingleObject(stop, kWaitTimeout);
+    if (r == WAIT_TIMEOUT) {
+      continue;
+    }
+    switch (r) {
+      case WAIT_OBJECT_0:
+        LOG(WARNING) << "OutputThread: Stop before child process ended "
+                     << "job_name=" << self->job_name_;
+        break;
+      case WAIT_ABANDONED:
+        LOG_SYSRESULT(GetLastError());
+        LOG(ERROR) << "Wait: stop_output_thread error? "
+                   << " job_name=" << self->job_name_;
+        break;
+      default:
+        LOG_SYSRESULT(GetLastError());
+        LOG(ERROR) << "Unexpected return value from WaitForSingleObject."
+                   << " r=" << r
+                   << " job_name=" << self->job_name_;
+        break;
+    }
+    self->Redirect();
+    break;
+  }
+  self->Flush();
+  return 0;
+}
+
+}  // namespace devtools_goma
diff --git a/client/spawner_win.h b/client/spawner_win.h
new file mode 100644
index 0000000..3d27d3f
--- /dev/null
+++ b/client/spawner_win.h
@@ -0,0 +1,122 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SPAWNER_WIN_H_
+#define DEVTOOLS_GOMA_CLIENT_SPAWNER_WIN_H_
+
+#include "config_win.h"
+
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "scoped_fd.h"
+#include "spawner.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// A subclass of Spawner for Windows.
+// It spawns a thread internally to capture child process' output.
+class SpawnerWin : public Spawner {
+ public:
+  SpawnerWin();
+  ~SpawnerWin() override;
+
+  int Run(const string& prog,
+                  const std::vector<string>& argv,
+                  const std::vector<string>& env,
+                  const string& cwd) override;
+  bool Kill() override;
+  bool Wait(WaitPolicy wait_policy) override;
+  bool IsChildRunning() const override {
+    return process_status_ == STILL_ACTIVE;
+  }
+  bool IsSignaled() const override { return is_signaled_; }
+  void SetSignaled() override { is_signaled_ = true; }
+  int ChildStatus() const override {
+    return static_cast<int>(process_status_);
+  }
+  int ChildTermSignal() const override {
+    return 0;
+  }
+  // Not supported yet.
+  int64_t ChildMemKb() const override {
+    if (process_mem_bytes_ == 0)
+      return -1;
+    return static_cast<int64_t>(process_mem_bytes_) / 1024;
+  }
+
+  static void Setup();
+  static void TearDown();
+
+ private:
+  int RunRedirected(const string& command_line,
+                    std::vector<char>* env,
+                    const string& cwd,
+                    const string& out_file,
+                    const string& in_file);
+  void UpdateProcessStatus(DWORD timeout);
+  // Returns true if the process is still active.
+  bool KillAndWait(DWORD timeout);
+  // Returns true when it has been finished.
+  bool FinalizeProcess(DWORD timeout);
+
+  // Returns true if it finish writing |input_file_| to |child_stdin_|.
+  bool WriteToPipe();
+
+  // Redirect stdout/stderr to file.
+  // Returns true while still redirecting.
+  // Returns false if both stdout/stderr pipes are closed.
+  bool Redirect();
+  // Returns true if pipe is still alive.
+  bool ReadFromPipe(HANDLE pipe, HANDLE file);
+  // Flush stdout/stderr files.
+  void Flush();
+
+  // CleanUp cleans up all threads and handles.
+  void CleanUp();
+
+  // Creates a new JobObject, and assign |child_process| to it.
+  // Returns a ScopedFd for JobObject. When failed, invalid ScopedFd will be
+  // returned.
+  static ScopedFd AssignProcessToNewJobObject(
+      ScopedFd::FileDescriptor child_process,
+      const string& job_name);
+
+  static DWORD WINAPI OutputThread(LPVOID thread_params);
+  static DWORD WINAPI InputThread(LPVOID thread_params);
+
+  ScopedFd input_thread_;  // thread to send input of the child process
+  DWORD input_thread_id_;
+  bool stop_input_thread_;  // Let InputThread to finish itself if this is true.
+
+  ScopedFd output_thread_;  // thread to receive output of the child process
+  DWORD output_thread_id_;
+  ScopedFd stop_output_thread_;  // event to notify the redir thread to exit
+
+  DWORD process_status_;
+  SIZE_T process_mem_bytes_;
+
+  string job_name_;
+  ScopedFd child_job_;
+  ScopedFd child_process_;
+  ScopedFd child_stdin_, child_stdout_, child_stderr_;
+  ScopedFd stdout_file_, stderr_file_;
+  string input_file_;
+
+  bool is_signaled_;
+
+  static string* temp_dir_;
+
+  DISALLOW_COPY_AND_ASSIGN(SpawnerWin);
+};
+
+typedef SpawnerWin PlatformSpawner;
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SPAWNER_WIN_H_
diff --git a/client/spawner_win_unittest.cc b/client/spawner_win_unittest.cc
new file mode 100644
index 0000000..8ccdda9
--- /dev/null
+++ b/client/spawner_win_unittest.cc
@@ -0,0 +1,195 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+// This is a Windows-only unit test
+#ifdef _WIN32
+#include "spawner_win.h"
+
+#include "compiler_specific.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/subprocess.pb.h"
+MSVC_POP_WARNING()
+#include "util.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+TEST(SpawnerWin, SpawnerAndLogToFile) {
+  devtools_goma::SubProcessReq req;
+
+  char buffer[PATH_MAX] = {0};
+  GetModuleFileNameA(nullptr, buffer, PATH_MAX);
+  *strrchr(buffer, '\\') = 0;
+
+  const string cwd(buffer);
+  const string prog(".\\dump_env.exe");
+  std::vector<string> argv;
+  argv.push_back("dump_env.exe");
+  argv.push_back("arg1");
+  argv.push_back("arg2");
+  std::vector<string> envs;
+  envs.push_back("TEST_STRING1=goma");
+  envs.push_back("TEST_STRING2=win");
+  // TODO: remove these when spawn_win do not find command.
+  envs.push_back("PATH=" + devtools_goma::GetEnv("PATH"));
+  envs.push_back("PATHEXT=" + devtools_goma::GetEnv("PATHEXT"));
+
+  const string stdout_filename("dump_env.stdout.log");
+  const string stderr_filename("dump_env.stderr.log");
+
+  // priority not supported yet
+  // req.set_priority(devtools_goma::SubProcessReq_Priority_HIGH_PRIORITY);
+
+  strcat_s(buffer, PATH_MAX, "\\");
+  strcat_s(buffer, PATH_MAX, "dump_env.stdout.log");
+  _unlink(buffer);
+
+  devtools_goma::SpawnerWin spawner;
+  spawner.SetFileRedirection("", stdout_filename, stderr_filename,
+                             devtools_goma::Spawner::MERGE_STDOUT_STDERR);
+  spawner.SetDetach(false);
+  int pid = spawner.Run(prog, argv, envs, cwd);
+  EXPECT_NE(0, pid);
+  while (spawner.IsChildRunning())
+    spawner.Wait(devtools_goma::Spawner::WAIT_INFINITE);
+
+  FILE* fp = nullptr;
+  fopen_s(&fp, buffer, "r");
+  ASSERT_TRUE(fp != nullptr);
+  char temp[PATH_MAX];
+  fgets(temp, PATH_MAX, fp);  // first line is the exe name
+  fgets(temp, PATH_MAX, fp);
+  EXPECT_STREQ("arg1\n", temp);
+  fgets(temp, PATH_MAX, fp);
+  EXPECT_STREQ("arg2\n", temp);
+  fgets(temp, PATH_MAX, fp);
+  EXPECT_STREQ("TEST_STRING1=goma\n", temp);
+  fgets(temp, PATH_MAX, fp);
+  EXPECT_STREQ("TEST_STRING2=win\n", temp);
+
+  fclose(fp);
+}
+
+TEST(SpawnerWin, SpawnerAndLogToString) {
+  char buffer[PATH_MAX] = {0};
+  GetModuleFileNameA(nullptr, buffer, PATH_MAX);
+  *strrchr(buffer, '\\') = 0;
+
+  std::string cwd(buffer);
+  std::string prog(".\\dump_env.exe");
+  std::vector<std::string> argv, env;
+  argv.push_back("dump_env.exe");
+  argv.push_back("arg1");
+  argv.push_back("arg2");
+  env.push_back("TEST_STRING1=goma");
+  env.push_back("TEST_STRING2=win");
+  // TODO: remove these when spawn_win do not find command.
+  env.push_back("PATH=" + devtools_goma::GetEnv("PATH"));
+  env.push_back("PATHEXT=" + devtools_goma::GetEnv("PATHEXT"));
+
+  // priority not supported yet
+  // req.set_priority(devtools_goma::SubProcessReq_Priority_HIGH_PRIORITY);
+
+  devtools_goma::SpawnerWin spawner;
+  std::string output;
+  spawner.SetConsoleOutputBuffer(&output,
+                                 devtools_goma::Spawner::MERGE_STDOUT_STDERR);
+  int pid = spawner.Run(prog, argv, env, cwd);
+  EXPECT_NE(0, pid);
+  while (spawner.IsChildRunning())
+    spawner.Wait(devtools_goma::Spawner::WAIT_INFINITE);
+
+  char* next_token;
+  char* token = strtok_s(&output[0], "\r\n", &next_token);
+  EXPECT_TRUE(token != nullptr);
+  token = strtok_s(nullptr, "\r\n", &next_token);
+  EXPECT_TRUE(token != nullptr);
+  EXPECT_STREQ("arg1", token);
+  token = strtok_s(nullptr, "\r\n", &next_token);
+  EXPECT_TRUE(token != nullptr);
+  EXPECT_STREQ("arg2", token);
+  token = strtok_s(nullptr, "\r\n", &next_token);
+  EXPECT_TRUE(token != nullptr);
+  EXPECT_STREQ("TEST_STRING1=goma", token);
+  token = strtok_s(nullptr, "\r\n", &next_token);
+  EXPECT_TRUE(token != nullptr);
+  EXPECT_STREQ("TEST_STRING2=win", token);
+}
+
+TEST(SpawnerWin, SpawnerEscapeArgs) {
+  char buffer[PATH_MAX] = {0};
+  GetModuleFileNameA(nullptr, buffer, PATH_MAX);
+  *strrchr(buffer, '\\') = 0;
+
+  std::string cwd(buffer);
+  std::string prog(".\\dump_env.exe");
+  std::vector<std::string> argv, env;
+  argv.push_back("dump_env.exe");
+  argv.push_back(
+      "-imsvcC:\\Program Files (x86)\\Microsoft Visual Studio 14.0"
+      "\\VC\\INCLUDE");
+  argv.push_back(
+      "-imsvcC:\\Program Files (x86)\\Windows Kits"
+      "\\10\\include\\10.0.14393.0\\um");
+  argv.push_back(
+      "-DSTR=\"str\"");
+  // TODO: remove these when spawn_win do not find command.
+  env.push_back("PATH=" + devtools_goma::GetEnv("PATH"));
+  env.push_back("PATHEXT=" + devtools_goma::GetEnv("PATHEXT"));
+
+  // priority not supported yet
+  // req.set_priority(devtools_goma::SubProcessReq_Priority_HIGH_PRIORITY);
+
+  devtools_goma::SpawnerWin spawner;
+  std::string output;
+  spawner.SetConsoleOutputBuffer(&output,
+                                 devtools_goma::Spawner::MERGE_STDOUT_STDERR);
+  int pid = spawner.Run(prog, argv, env, cwd);
+  EXPECT_NE(0, pid);
+  while (spawner.IsChildRunning())
+    spawner.Wait(devtools_goma::Spawner::WAIT_INFINITE);
+
+  char* next_token;
+  char* token = strtok_s(&output[0], "\r\n", &next_token);
+  EXPECT_TRUE(token != nullptr);
+  token = strtok_s(nullptr, "\r\n", &next_token);
+  EXPECT_TRUE(token != nullptr);
+  EXPECT_STREQ(
+      "-imsvcC:\\Program Files (x86)\\Microsoft Visual Studio 14.0"
+      "\\VC\\INCLUDE",
+      token);
+  token = strtok_s(nullptr, "\r\n", &next_token);
+  EXPECT_TRUE(token != nullptr);
+  EXPECT_STREQ(
+      "-imsvcC:\\Program Files (x86)\\Windows Kits"
+      "\\10\\include\\10.0.14393.0\\um",
+      token);
+  token = strtok_s(nullptr, "\r\n", &next_token);
+  EXPECT_TRUE(token != nullptr);
+  EXPECT_STREQ("-DSTR=\"str\"", token);
+}
+
+TEST(SpawnerWin, SpawnerFailed) {
+  std::string cwd = "c:\\";
+  std::string prog("dump_env.exe");
+  std::vector<std::string> argv, env;
+  argv.push_back("dump_env.exe");
+  argv.push_back("arg1");
+  argv.push_back("arg2");
+  env.push_back("TEST_STRING1=goma");
+  env.push_back("TEST_STRING2=win");
+  // TODO: remove these when spawn_win do not find command.
+  env.push_back("PATH=C:\\non_exist_folder;C:\\non_exist_folder2");
+  env.push_back("PATHEXT=" + devtools_goma::GetEnv("PATHEXT"));
+
+  devtools_goma::SpawnerWin spawner;
+  std::string output;
+  spawner.SetConsoleOutputBuffer(&output,
+                                 devtools_goma::Spawner::MERGE_STDOUT_STDERR);
+  int pid = spawner.Run(prog, argv, env, cwd);
+  EXPECT_EQ(0, pid);
+}
+
+#endif  // _WIN32
diff --git a/client/static_darray.cc b/client/static_darray.cc
new file mode 100644
index 0000000..4c19cd7
--- /dev/null
+++ b/client/static_darray.cc
@@ -0,0 +1,47 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "static_darray.h"
+
+#include <glog/logging.h>
+
+namespace devtools_goma {
+
+int StaticDoubleArray::Lookup(const string& word) const {
+  LookupHelper helper(this);
+  for (size_t i = 0; i < word.length(); ++i) {
+    if (!helper.Lookup(word[i]))
+      return -1;
+  }
+  return helper.GetValue();
+}
+
+StaticDoubleArray::LookupHelper::LookupHelper(const StaticDoubleArray* array)
+    : array_(array), index_(0) {
+  DCHECK(array_);
+}
+
+bool StaticDoubleArray::LookupHelper::Lookup(char c) {
+  DCHECK(0 <= index_ && index_ < array_->nodes_len);
+  int next = array_->nodes[index_].base + array_->Encode(c);
+  if (next < 0 || array_->nodes_len <= next)
+    return false;
+  if (index_ != array_->nodes[next].check)
+    return false;
+  index_ = next;
+  return true;
+}
+
+int StaticDoubleArray::LookupHelper::GetValue() {
+  DCHECK(0 <= index_ && index_ < array_->nodes_len);
+  int next = array_->nodes[index_].base + array_->terminate_code;
+  if (next < 0 || array_->nodes_len <= next)
+    return -1;
+  if (index_ != array_->nodes[next].check || array_->nodes[next].base > 0)
+    return -1;
+  return -array_->nodes[next].base;
+}
+
+}  // namespace devtools_goma
diff --git a/client/static_darray.h b/client/static_darray.h
new file mode 100644
index 0000000..eaa0d4d
--- /dev/null
+++ b/client/static_darray.h
@@ -0,0 +1,51 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_STATIC_DARRAY_H_
+#define DEVTOOLS_GOMA_CLIENT_STATIC_DARRAY_H_
+
+#include <string>
+
+#include "basictypes.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+struct StaticDoubleArray {
+  struct Node { short base; short check; };
+  StaticDoubleArray(const Node* n, int len, char base, int tcode)
+      : nodes(n), nodes_len(len), encode_base(base), terminate_code(tcode) {}
+  const Node* nodes;
+  const int nodes_len;
+  const char encode_base;
+  const int terminate_code;
+
+  // Returns the value for the given word.
+  int Lookup(const string& word) const;
+
+  // Incremental lookup helper.
+  class LookupHelper {
+   public:
+    explicit LookupHelper(const StaticDoubleArray* array);
+    bool Lookup(char c);
+
+    // Finishes the lookup and returns the value.
+    int GetValue();
+
+   private:
+    const StaticDoubleArray* array_;
+    int index_;
+  };
+
+ private:
+  int Encode(char c) const { return c - encode_base + 1; }
+
+  DISALLOW_COPY_AND_ASSIGN(StaticDoubleArray);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_STATIC_DARRAY_H_
diff --git a/client/static_darray_unittest.cc b/client/static_darray_unittest.cc
new file mode 100644
index 0000000..2173b1b
--- /dev/null
+++ b/client/static_darray_unittest.cc
@@ -0,0 +1,68 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <cstdlib>
+#include <set>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+#include <gtest/gtest.h>
+
+#include "basictypes.h"
+#include "static_darray.h"
+
+namespace devtools_goma {
+#include "static_darray_test_array.h"
+
+namespace {
+const int kMaxAppendChars = 20;
+}
+
+class StaticDoubleArrayTest : public testing::Test {
+ public:
+  void SetUp() override {
+    srand(static_cast<unsigned int>(time(nullptr)));
+    for (const auto& word : kDArrayKeywords) {
+      keywords_.insert(string(word));
+    }
+  }
+
+ protected:
+  void LookupWord(const string& word) {
+    int value = kDArrayArray.Lookup(word);
+    if (keywords_.find(word) == keywords_.end()) {
+      EXPECT_EQ(-1, value);
+    } else {
+      ASSERT_LT(static_cast<size_t>(value), arraysize(kDArrayKeywords));
+      EXPECT_EQ(word, string(kDArrayKeywords[value]));
+    }
+  }
+
+  std::set<string> keywords_;
+};
+
+TEST_F(StaticDoubleArrayTest, Lookup) {
+  for (std::set<string>::iterator iter = keywords_.begin();
+       iter != keywords_.end();
+       ++iter) {
+    string keyword(*iter);
+    LookupWord(keyword);
+
+    if (keyword.length() > 1) {
+      int random_substr_len = rand() % (keyword.length() - 1) + 1;
+      LookupWord(keyword.substr(0, random_substr_len));
+    }
+
+    std::ostringstream ss;
+    ss << keyword;
+    int random_append_len = rand() % kMaxAppendChars;
+    for (int i = 0; i < random_append_len; ++i) {
+      ss << static_cast<char>(rand() % 127 + 1);
+    }
+    LookupWord(ss.str());
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/subprocess.cc b/client/subprocess.cc
new file mode 100644
index 0000000..3ee981c
--- /dev/null
+++ b/client/subprocess.cc
@@ -0,0 +1,288 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "subprocess.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+
+#ifndef _WIN32
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+
+#include <algorithm>
+#include <deque>
+#include <iostream>
+#include <memory>
+
+#include "env_flags.h"
+#include "compiler_flags.h"
+#include "compiler_specific.h"
+#include "file_id.h"
+#include "glog/logging.h"
+#include "ioutil.h"
+#include "join.h"
+#include "path.h"
+#include "scoped_fd.h"
+#ifndef _WIN32
+#include "spawner_posix.h"
+#else
+#include "spawner_win.h"
+#endif
+#include "string_piece.h"
+
+using std::string;
+
+namespace {
+
+#ifdef _WIN32
+string GetPathExt(const std::vector<string>& envs) {
+  return devtools_goma::GetEnvFromEnvIter(envs.begin(), envs.end(), "PATHEXT");
+}
+#else
+string GetPathExt(const std::vector<string>& envs ALLOW_UNUSED) {
+  return "";
+}
+#endif
+
+bool GetRealPrognameAndEnvs(const devtools_goma::FileId* gomacc_fileid,
+                            const string& prog,
+                            const std::vector<string>& args,
+                            std::vector<string>* envs,
+                            string* real_progname) {
+  static const char kPath[] = "PATH";
+  *real_progname = prog;
+  if (gomacc_fileid != nullptr) {
+    // We should set ReadCommand to avoid gomacc in GetRealExecutablePath.
+#ifndef _WIN32
+    InstallReadCommandOutputFunc(devtools_goma::ReadCommandOutputByPopen);
+#else
+    InstallReadCommandOutputFunc(devtools_goma::ReadCommandOutputByRedirector);
+#endif
+  }
+
+  string no_goma_env_path;
+  if (!GetRealExecutablePath(gomacc_fileid, prog, ".",
+                             devtools_goma::GetEnvFromEnvIter(
+                                 envs->begin(), envs->end(), kPath),
+                             GetPathExt(*envs),
+                             real_progname, &no_goma_env_path, nullptr)) {
+    LOG(ERROR) << "failed to get executable path."
+               << " prog=" << prog
+               << " path=" << devtools_goma::GetEnvFromEnvIter(
+                   envs->begin(), envs->end(), kPath)
+               << " pathext=" << GetPathExt(*envs);
+    return false;
+  }
+  if (!devtools_goma::ReplaceEnvInEnvIter(envs->begin(), envs->end(),
+                                          kPath, no_goma_env_path)) {
+    LOG(ERROR) << "failed to replace path env."
+               << " kPath=" << kPath
+               << " path=" << devtools_goma::GetEnvFromEnvIter(
+                   envs->begin(), envs->end(), kPath)
+               << " no_goma_env_path=" << no_goma_env_path;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace devtools_goma {
+
+#ifdef _WIN32
+
+int SpawnAndWait(const string& prog, const std::vector<string>& args,
+                 const std::vector<string>& envs) {
+  return SpawnAndWaitNonGomacc(nullptr, prog, args, envs);
+}
+
+int SpawnAndWaitNonGomacc(const FileId* gomacc_fileid, const string& prog,
+                          const std::vector<string>& args,
+                          std::vector<string> envs) {
+  string real_progname;
+  GetRealPrognameAndEnvs(gomacc_fileid, prog, args, &envs, &real_progname);
+
+  std::unique_ptr<SpawnerWin> spawner(new SpawnerWin);
+  int status = spawner->Run(
+      real_progname, args, envs, GetCurrentDirNameOrDie());
+  if (status == Spawner::kInvalidPid) {
+    return -1;
+  }
+  while (spawner->IsChildRunning())
+    spawner->Wait(Spawner::WAIT_INFINITE);
+  return spawner->ChildStatus();
+}
+
+#else
+
+int Execvpe(const string& prog, const std::vector<string>& args,
+            const std::vector<string>& envs) {
+  return ExecvpeNonGomacc(nullptr, prog, args, envs);
+}
+
+int ExecvpeNonGomacc(const FileId* gomacc_fileid,
+                     const string& prog, const std::vector<string>& args,
+                     std::vector<string> envs) {
+  string real_progname;
+  GetRealPrognameAndEnvs(gomacc_fileid, prog, args, &envs, &real_progname);
+
+  std::vector<const char*> argvp;
+  std::vector<const char*> envp;
+
+  for (const auto& arg : args) {
+    argvp.push_back(arg.c_str());
+  }
+  argvp.push_back(nullptr);
+
+  for (const auto& env : envs) {
+    envp.push_back(env.c_str());
+  }
+  envp.push_back(nullptr);
+
+  return execve(real_progname.c_str(), const_cast<char**>(&argvp[0]),
+                const_cast<char**>(&envp[0]));
+}
+
+#endif
+
+#ifndef _WIN32
+string ReadCommandOutputByPopen(
+    const string& prog, const std::vector<string>& argv,
+    const std::vector<string>& envs,
+    const string& cwd, CommandOutputOption option, int32_t* status) {
+  string commandline;
+  if (!cwd.empty()) {
+    commandline = "sh -c 'cd " + cwd + " && ";
+  }
+  for (const auto& env : envs)
+    commandline += env + " ";
+  for (const auto& arg : argv) {
+    // Escaping only <, >, ( and ) is OK for now.
+    if (arg.find_first_of(" <>();&'#") == string::npos) {
+      CHECK(arg.find_first_of("\\\"") == string::npos) << arg;
+      commandline += arg + " ";
+    } else {
+      commandline += "\"" + arg + "\" ";
+    }
+  }
+  if (!cwd.empty()) {
+    commandline += "'";
+  }
+  if (option == MERGE_STDOUT_STDERR)
+    commandline += " 2>&1";
+
+  FILE* p = popen(commandline.c_str(), "r");
+  CHECK(p) << "popen for " << prog << " (" << commandline << ") failed";
+
+  std::ostringstream strbuf;
+  while (true) {
+    const size_t kBufSize = 64 * 1024;
+    char buf[kBufSize];
+    size_t len = fread(buf, 1, kBufSize, p);
+    if (len == 0) {
+      if (errno == EINTR)
+        continue;
+      CHECK(feof(p)) << "could not read output for: " << commandline;
+      break;
+    }
+    strbuf.write(buf, len);
+  }
+
+  int exit_status = pclose(p);
+  if (status) {
+    *status = exit_status;
+  } else {
+    LOG_IF(FATAL, exit_status != 0)
+        << "If the caller expects the non-zero exit status, "
+        << "the caller must set non-nullptr status in the argument."
+        << " prog=" << prog
+        << " args=" << strings::Join(argv, " ")
+        << " cwd=" << cwd
+        << " exit_status=" << exit_status
+        << " output=" << strbuf.str();
+  }
+
+  return strbuf.str();
+}
+
+void Daemonize(const string& stderr_filename, int pid_record_fd,
+               const std::set<int>& preserve_fds) {
+  PCHECK(setsid() >= 0);
+  PCHECK(chdir("/") == 0);
+  umask(0);
+
+  // Fork again, so we'll never get tty.
+  pid_t pid;
+  if ((pid = fork())) {
+    PCHECK(pid > 0);
+    exit(0);
+  }
+
+  pid = Getpid();
+  if (pid_record_fd >= 0) {
+    PCHECK(write(pid_record_fd, &pid, sizeof(pid)) == sizeof(pid));
+  } else {
+    std::cout << pid << std::endl;
+  }
+
+  int devnullfd = ScopedFd::OpenNull();
+  CHECK_GE(devnullfd, 0);
+  PCHECK(dup2(devnullfd, STDIN_FILENO) >= 0);
+  PCHECK(dup2(devnullfd, STDOUT_FILENO) >= 0);
+
+  int stderrfd = -1;
+  if (!stderr_filename.empty())
+    stderrfd = open(stderr_filename.c_str(), O_WRONLY|O_CREAT, 0660);
+
+  if (stderrfd >= 0) {
+    PCHECK(dup2(stderrfd, STDERR_FILENO) >= 0);
+  } else {
+    PCHECK(dup2(devnullfd, STDERR_FILENO) >= 0);
+  }
+
+  // Close all file descriptors except stdin/stdout/stderr and in preserve_fds.
+  int maxfd = sysconf(_SC_OPEN_MAX);
+  for (int fd = STDERR_FILENO + 1; fd < maxfd; ++fd) {
+    if (preserve_fds.count(fd) == 0)
+      close(fd);
+  }
+}
+
+#else
+
+string ReadCommandOutputByRedirector(const string& prog,
+    const std::vector<string>& argv, const std::vector<string>& env,
+    const string& cwd, CommandOutputOption option, int32_t* status) {
+  SpawnerWin spawner;
+  Spawner::ConsoleOutputOption output_option =
+      Spawner::MERGE_STDOUT_STDERR;
+  if (option == STDOUT_ONLY)
+    output_option = Spawner::STDOUT_ONLY;
+  string output;
+  spawner.SetConsoleOutputBuffer(&output, output_option);
+  spawner.Run(prog, argv, env, cwd);
+  while (spawner.IsChildRunning())
+    spawner.Wait(Spawner::WAIT_INFINITE);
+  int exit_status = spawner.ChildStatus();
+  if (status) {
+    *status = exit_status;
+  } else {
+    LOG_IF(FATAL, exit_status != 0)
+        << "If the caller expects the non-zero exit status, "
+        << "the caller must set non-nullptr status in the argument."
+        << " prog=" << prog
+        << " cwd=" << cwd
+        << " exit_status=" << exit_status;
+  }
+  return output;
+}
+
+#endif
+
+}  // namespace devtools_goma
diff --git a/client/subprocess.h b/client/subprocess.h
new file mode 100644
index 0000000..1694e58
--- /dev/null
+++ b/client/subprocess.h
@@ -0,0 +1,94 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// Note:
+// You SHOULD NOT use functions here in multi-threaded env.  (See spawner.h)
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SUBPROCESS_H_
+#define DEVTOOLS_GOMA_CLIENT_SUBPROCESS_H_
+
+#include <set>
+#include <string>
+#include <vector>
+
+#include "util.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+struct FileId;
+
+#ifdef _WIN32
+
+// execute program.
+// returns -1 on start failure.
+// return child process exit status from SpawnerWin.
+int SpawnAndWait(const string& prog, const std::vector<string>& args,
+                 const std::vector<string>& envs);
+
+// execute program but automatically avoids executing gomacc.
+// If |gomacc_fileid| == nullptr, this program won't avoid to execute gomacc.
+// returns -1 on start failure.
+// return child process exit status from SpawnerWin.
+int SpawnAndWaitNonGomacc(const FileId* gomacc_fileid, const string& prog,
+                          const std::vector<string>& args,
+                          std::vector<string> envs);
+
+#else
+
+// execute program.
+// If success, this function won't return.
+// returns -1 on failure like execve system call.
+//
+// Don't use this in multi threaded env.  (See spawner.h)
+int Execvpe(const string& prog, const std::vector<string>& args,
+            const std::vector<string>& envs);
+
+// execute program but automatically avoids executing gomacc.
+// If |gomacc_fileid| == nullptr, this program won't avoid to execute gomacc.
+//
+// Don't use this in multi threaded env.  (See spawner.h)
+int ExecvpeNonGomacc(const FileId* gomacc_fileid,
+                     const string& prog, const std::vector<string>& args,
+                     std::vector<string> envs);
+
+#endif
+
+#ifndef _WIN32
+// Execute commandline by popen and read first 64kB of output into string.
+// Exist code will be stored to |status|.
+// If exit code is not zero and |status| == NULL, fatal error.
+//
+// Don't use this in multi threaded env.
+string ReadCommandOutputByPopen(
+    const string& prog, const std::vector<string>& argv,
+    const std::vector<string>& env,
+    const string& cwd, CommandOutputOption option, int32_t* status);
+
+// The caller must fork before calling this.
+// If |stderr_filename| is not empty, it redirects the spawning child's
+// stderr output to the file.
+// If |pid_record_fd| is >= 0, it writes out the spawning child's pid (i.e.
+// grandchild's pid to the caller's parent) to the fd and closes it.
+// All fds except stdin/stdout/stderr and in |preserve_fds| will be closed.
+//
+// Don't use this in multi threaded env.
+void Daemonize(const string& stderr_filename, int pid_record_fd,
+               const std::set<int>& preserve_fds);
+#else
+// Execute commandline by spawner_win and read output into string.
+// Exist code will be stored to |status|.
+// If exit code is not zero and |status| == NULL, fatal error.
+//
+// Don't use this in multi threaded env.  (See spawner.h)
+string ReadCommandOutputByRedirector(const string& prog,
+    const std::vector<string>& argv, const std::vector<string>& env,
+    const string& cwd, CommandOutputOption option, int32_t* status);
+#endif
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SUBPROCESS_H_
diff --git a/client/subprocess.protodevel b/client/subprocess.protodevel
new file mode 100644
index 0000000..8254b62
--- /dev/null
+++ b/client/subprocess.protodevel
@@ -0,0 +1,130 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// Message used between compiler proxy and subprocess controller.
+
+syntax = "proto2";
+
+package devtools_goma;
+
+message SubProcessState {
+  // process state transitions.
+  // From           To             Event(s)
+  // SETUP          PENDING        register in subprocess controller
+  // PENDING        RUN            subprocess controller starts the proc
+  // PENDING        SIGNALED       Kill()
+  // RUN            SIGNALED       Kill(), kill proc
+  // *              FINISHED       subprocess is terminated.
+  //
+  // Invariants:
+  // SETUP          pid_ == -1
+  // PENDING        pid_ == -1
+  // RUN            pid_ > 0
+  // FINISHED       status_ is valid
+  enum State {
+    SETUP = 0;
+    PENDING = 1;
+    RUN = 2;
+    SIGNALED = 3;
+    FINISHED = 4;
+    NUM_STATE = 5;
+  };
+
+  enum Pid {
+    kInvalidPid = -1;
+  };
+}
+
+message SubProcessReq {
+  enum Priority {
+    HIGHEST_PRIORITY = 0;
+    HIGH_PRIORITY = 1;
+    LOW_PRIORITY = 2;
+  };
+  enum Weight {
+    HEAVY_WEIGHT = 0;
+    LIGHT_WEIGHT = 1;
+  };
+  enum OutputOption {
+    MERGE_STDOUT_STDERR = 0;
+    STDOUT_ONLY = 1;
+  }
+
+  // Unique id of subprocess. Assigned by SubProcessControllerClient.
+  optional int32 id = 1;
+  optional string trace_id = 2;
+
+  optional string prog = 10;
+  repeated string argv = 11;
+  repeated string env = 12;
+  optional string cwd = 13;
+  optional string stdin_filename = 14;
+  optional string stdout_filename = 15;
+  optional string stderr_filename = 16;
+  optional OutputOption output_option = 17 [default=MERGE_STDOUT_STDERR];
+  optional int32 umask = 18 [default = -1];
+
+  optional Priority priority = 20;
+  optional Weight weight = 21;
+
+  // If detach is true, no feedback comes from subprocess controller server.
+  optional bool detach = 30;
+};
+
+message SubProcessRun {
+  optional int32 id = 1;
+};
+
+message SubProcessKill {
+  optional int32 id = 1;
+};
+
+message SubProcessSetOption {
+  // The new limit of max subprocs.
+  optional int32 max_subprocs = 1;
+  // The new limit of max subprocs for low priority process.
+  optional int32 max_subprocs_low_priority = 2;
+  // The new limit of max subprocs for heavy weight process.
+  optional int32 max_subprocs_heavy_weight = 3;
+};
+
+message SubProcessStarted {
+  optional int32 id = 1;
+  optional int32 pid = 2 [default = -1];  // kInvalidPid
+  optional int32 pending_ms = 10;
+};
+
+message SubProcessTerminated {
+  // Since program exit status is usually positive value on Posix and Windows,
+  // we use negative value here.
+  enum TerminateStatus {
+    // This means task is not started yet.
+    // This is the default value of status.
+    kNotStarted = -256;
+    // This means task is not started because of subprocess_controller's error.
+    kInternalError = -257;
+  }
+
+  // If status == kInternalError, one of followings should be set to error.
+  enum ErrorTerminate {
+    // If kill of the process failed.
+    kFailedToKill = 1;
+    // If the given id is not registered in subprocess_controller_server.
+    kFailedToLookup = 2;
+    // If subprocess_controller_server failed to spawn a subprocess.
+    kFailedToSpawn = 3;
+  };
+
+  optional int32 id = 1;
+  // status should be either of process'es exit status or enum TerminateStatus.
+  optional int32 status = 2 [default = -256];  // kNotStarted.
+  // If signaled, this value should be set.
+  optional int32 term_signal = 3;
+  // status should be either of default or enum ErrorTerminate.
+  optional ErrorTerminate error = 4;
+
+  optional int32 run_ms = 10;
+  optional int64 mem_kb = 11;
+};
diff --git a/client/subprocess_controller.cc b/client/subprocess_controller.cc
new file mode 100644
index 0000000..7a1514f
--- /dev/null
+++ b/client/subprocess_controller.cc
@@ -0,0 +1,241 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "subprocess_controller.h"
+
+#include <string.h>
+
+#ifndef _WIN32
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+
+#include <memory>
+#include <string>
+#include <sstream>
+
+#include "breakpad.h"
+#include "compiler_specific.h"
+#include "compiler_proxy_info.h"
+#include "env_flags.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "google/protobuf/message.h"
+MSVC_POP_WARNING()
+#include "mypath.h"
+#include "platform_thread.h"
+#include "scoped_fd.h"
+#include "subprocess_controller_client.h"
+#include "subprocess_controller_server.h"
+
+GOMA_DECLARE_bool(COMPILER_PROXY_ENABLE_CRASH_DUMP);
+
+namespace devtools_goma {
+
+const size_t SubProcessController::kMessageHeaderLen = sizeof(int) * 2;
+const size_t SubProcessController::kOpOffset = 0;
+const size_t SubProcessController::kSizeOffset = sizeof(int);
+
+static const int kMaxSubProcs = 3;
+static const int kMaxSubProcsForLowPriority = 1;
+static const int kMaxSubProcsForHeavyWeight = 1;
+
+SubProcessController::Options::Options()
+    : max_subprocs(kMaxSubProcs),
+      max_subprocs_low_priority(kMaxSubProcsForLowPriority),
+      max_subprocs_heavy_weight(kMaxSubProcsForHeavyWeight),
+      dont_kill_subprocess(false) {
+}
+
+string SubProcessController::Options::DebugString() const {
+  std::ostringstream ss;
+  ss << " max_subprocs=" << max_subprocs
+     << " max_subprocs_low_priority=" << max_subprocs_low_priority
+     << " max_subprocs_heavy_weight=" << max_subprocs_heavy_weight
+     << " dont_kill_subprocess=" << dont_kill_subprocess;
+  return ss.str();
+}
+
+/* static */
+#ifndef _WIN32
+void SubProcessController::Initialize(
+    const char* arg0, const Options& options) {
+  int sockfd[2];
+  if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockfd) != 0) {
+    PLOG(FATAL) << "socketpair";
+  }
+  pid_t pid = fork();
+  if (pid < 0) {
+    PLOG(FATAL) << "fork";
+  }
+  if (pid == 0) {
+    // child.
+    string argv0(arg0);
+    argv0 += "-subproc";
+
+    ScopedFd devnullfd(ScopedFd::OpenNull());
+    PCHECK(dup2(devnullfd.fd(), STDIN_FILENO) >= 0);
+    PCHECK(dup2(devnullfd.fd(), STDOUT_FILENO) >= 0);
+    PCHECK(dup2(devnullfd.fd(), STDERR_FILENO) >= 0);
+    devnullfd.reset(-1);
+    close(sockfd[1]);
+    for (int i = STDERR_FILENO + 1; i < 256; ++i) {
+      if (i == sockfd[0]) continue;
+      close(i);
+    }
+
+    google::InitGoogleLogging(argv0.c_str());
+    google::InstallFailureSignalHandler();
+    if (FLAGS_COMPILER_PROXY_ENABLE_CRASH_DUMP) {
+      // sleep 1s to get other GUID for minidump than main compiler_proxy's.
+      // need this only for linux?
+      PlatformThread::Sleep(1000);
+      devtools_goma::InitCrashReporter(devtools_goma::GetCrashDumpDirectory());
+    }
+    LOG(INFO) << "goma built revision " << kBuiltRevisionString;
+    {
+      std::ostringstream ss;
+      DumpEnvFlag(&ss);
+      LOG(INFO) << "goma flags:" << ss.str();
+    }
+    LOG(INFO) << "SubProcessControllerServer launched";
+    SubProcessControllerServer* server =
+        new SubProcessControllerServer(sockfd[0], options);
+    server->Loop();
+    delete server;
+    LOG(INFO) << "SubProcessControllerServer terminated";
+    exit(0);
+  }
+  close(sockfd[0]);
+  SubProcessControllerClient::Create(sockfd[1], pid, options);
+}
+#else
+
+struct ServerParam {
+  int sockfd_;
+  SubProcessController::Options options_;
+};
+
+DWORD WINAPI SubProcessController::StartServer(LPVOID param) {
+  std::unique_ptr<ServerParam> args(reinterpret_cast<ServerParam*>(param));
+  std::unique_ptr<SubProcessControllerServer> server(
+      new SubProcessControllerServer(args->sockfd_, args->options_));
+  server->Loop();
+  LOG(INFO) << "SubProcessControllerServer terminated";
+  return 0;
+}
+
+void SubProcessController::Initialize(
+    const char*, const Options& options) {
+  int sockfd[2];
+  CHECK_EQ(async_socketpair(sockfd),  0);
+
+  LOG(INFO) << "SubProcessControllerServer launching ...";
+  ServerParam* args = new ServerParam;
+  args->sockfd_ = sockfd[0];
+  args->options_ = options;
+  DWORD server_thread_id = 0;
+  ScopedFd server_thread(CreateThread(nullptr, 0, StartServer, args, 0,
+                                      &server_thread_id));
+  if (server_thread.valid()) {
+    SubProcessControllerClient::Create(sockfd[1], server_thread_id, options);
+  }
+}
+#endif
+
+SubProcessController::SubProcessController()
+    : read_len_(0) {
+}
+
+SubProcessController::~SubProcessController() {
+}
+
+bool SubProcessController::AddMessage(
+    int op, const google::protobuf::Message& message) {
+  int old_size = pending_write_.size();
+  string msg;
+  message.SerializeToString(&msg);
+  int size = msg.size();
+  pending_write_.resize(old_size + kMessageHeaderLen + size);
+  memcpy(&pending_write_[old_size + kOpOffset], &op, sizeof(int));
+  memcpy(&pending_write_[old_size + kSizeOffset], &size, sizeof(int));
+  memcpy(&pending_write_[old_size + kMessageHeaderLen], msg.data(), size);
+  return old_size == 0;
+}
+
+bool SubProcessController::has_pending_write() const {
+  return !pending_write_.empty();
+}
+
+bool SubProcessController::WriteMessage(const IOChannel* fd) {
+  VLOG(2) << "WriteMessage fd=" << *fd
+          << " pending_write=" << pending_write_.size();
+  if (pending_write_.empty())
+    return false;
+
+  int r = fd->Write(&pending_write_[0], pending_write_.size());
+  if (r <= 0) {
+    if (errno == EINTR || errno == EAGAIN)
+      return true;
+    PLOG(FATAL) << "write " << *fd << " failed " << r;
+  }
+  pending_write_ = pending_write_.substr(r);
+  return !pending_write_.empty();
+}
+
+bool SubProcessController::ReadMessage(const IOChannel* fd,
+                                       int* op, int* len) {
+  VLOG(2) << "ReadMessage fd=" << *fd;
+  if (pending_read_.empty()) {
+    pending_read_.resize(kMessageHeaderLen);
+    read_len_ = 0;
+  }
+
+  char* buf = &pending_read_[read_len_];
+  int buf_size = pending_read_.size() - read_len_;
+  int r = fd->Read(buf, buf_size);
+  if (r == 0) {
+    *op = CLOSED;
+    return true;
+  } else if (r < 0) {
+#ifndef _WIN32
+    if (errno == EINTR || errno == EAGAIN)
+      return false;
+#endif
+    PLOG(FATAL) << "read " << *fd << " failed " << r;
+  }
+  read_len_ += r;
+  if (read_len_ >= kMessageHeaderLen) {
+    const int* data = reinterpret_cast<int*>(&pending_read_[0]);
+    *op = data[0];
+    *len = data[1];
+    if (kMessageHeaderLen + *len > pending_read_.size()) {
+      pending_read_.resize(kMessageHeaderLen + *len);
+      return false;
+    }
+    VLOG(2) << "ReadMessage op=" << *op
+            << " len=" << *len
+            << " read_len=" << read_len_;
+    return (kMessageHeaderLen + *len) == read_len_;
+  }
+  return false;
+}
+
+const char* SubProcessController::payload_data() const {
+  return &pending_read_[kMessageHeaderLen];
+}
+
+void SubProcessController::ReadDone() {
+  VLOG(2) << "ReadDone";
+  pending_read_.clear();
+  read_len_ = 0;
+}
+
+}  // namespace devtools_goma
diff --git a/client/subprocess_controller.h b/client/subprocess_controller.h
new file mode 100644
index 0000000..c4c4980
--- /dev/null
+++ b/client/subprocess_controller.h
@@ -0,0 +1,159 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SUBPROCESS_CONTROLLER_H_
+#define DEVTOOLS_GOMA_CLIENT_SUBPROCESS_CONTROLLER_H_
+
+#include <memory>
+#include <set>
+#include <string>
+
+#include "basictypes.h"
+#include "scoped_fd.h"
+
+#ifdef _WIN32
+#include "socket_helper_win.h"
+#endif
+
+namespace google {
+namespace protobuf {
+class Message;
+}  // namespace protobuf
+}  // namespace google
+
+namespace devtools_goma {
+
+class ScopedSocket;
+class SubProcessKill;
+class SubProcessReq;
+class SubProcessRun;
+class SubProcessSetOption;
+class SubProcessStarted;
+class SubProcessTerminated;
+
+// SubProcessController consists of server and client.
+// A SubProcessController server runs in single threaded process and manages
+// SubProcessImpl and actual subprocesses.
+// A SubProcessController client runs with worker thread manager and serves
+// for SubProcessTask.
+// These server and client communicate with a socket created by socketpair(2).
+
+// message format:
+//   op: first int
+//   len: second int
+//   payload: len bytes
+//    client->server
+//      op=REGISTER;    payload: serialized SubProcessReq
+//      op=REQUEST_RUN; payload: serialized SubProcessRun
+//      op=KILL;        payload: serialized SubProcessKill
+//      op=SET_OPTION;  payload: serialized SubProcessSetOption
+//    server->client
+//      op=STARTED;     payload: serialized SubProcessStarted
+//      op=TERMINATED;  payload: serialized SubProcessTerminated
+class SubProcessController {
+ public:
+  struct Options {
+    Options();
+    int max_subprocs;
+    int max_subprocs_low_priority;
+    int max_subprocs_heavy_weight;
+    bool dont_kill_subprocess;
+    // dont_kill_commands should be normalized to lower case on Windows.
+    std::set<std::string> dont_kill_commands;
+
+    std::string DebugString() const;
+  };
+  enum Op {
+    CLOSED = -1,
+    NOP = 0,
+
+    REGISTER = 1,
+    REQUEST_RUN = 2,
+    KILL = 3,
+    SET_OPTION = 4,
+
+    STARTED = 10,
+    TERMINATED = 11,
+  };
+
+  // Initializes SubProcessController subsystem.
+  // Must be called before creating threads.
+  static void Initialize(const char* arg0, const Options& options);
+  virtual ~SubProcessController();
+
+  // Register subproc.  Takes ownership of req.
+  // Client -> server
+  virtual void Register(std::unique_ptr<SubProcessReq> req) = 0;
+
+  // Request to run the subproc.  Takes ownership of run.
+  // Client -> server
+  virtual void RequestRun(std::unique_ptr<SubProcessRun> run) = 0;
+
+  // Kills the subproc.  Takes ownership of kill.
+  // Client -> server
+  virtual void Kill(std::unique_ptr<SubProcessKill> kill) = 0;
+
+  // Sets option. Takes the ownership of |option|.
+  // Client -> server.
+  virtual void SetOption(std::unique_ptr<SubProcessSetOption> option) = 0;
+
+  // Notifies the subproc is started.  Takes ownership of started.
+  // This function takes raw pointer because it is used for
+  // NewCallback in SubProcessControllerClient::DoRead().
+  // Server -> client.
+  virtual void Started(std::unique_ptr<SubProcessStarted> started) = 0;
+
+  // Notifies the subproc is terminated.  Takes ownership of terminated.
+  // This function takes raw pointer because it is used for
+  // NewCallback in SubProcessControllerClient::DoRead().
+  // Server -> client.
+  virtual void Terminated(std::unique_ptr<SubProcessTerminated> terminated) = 0;
+
+ protected:
+  SubProcessController();
+
+  // Adds message of op, with message to pending_write_.
+  // Returns true if it is the initial request in pending_write_.
+  // Must use on the same thread everytime.
+  bool AddMessage(int op, const google::protobuf::Message& message);
+
+  bool has_pending_write() const;
+
+  // Writes pending_write_ message through fd.
+  // Returns true if it has still more data to send.
+  // Returns false if there is no more data to send.
+  bool WriteMessage(const IOChannel* fd);
+
+  // Reads message through fd.
+  // If it returns true, you can read payload data in payload_data().
+  // Once you processed data, you need to call ReadDone().
+  // Must use on the same thread everytime.
+  bool ReadMessage(const IOChannel* fd, int* op, int* len);
+
+  // Access payload data read by ReadMessage.
+  // Valid only between ReadMessage() and ReadDone().
+  const char* payload_data() const;
+
+  // Discards read message.
+  void ReadDone();
+
+#ifdef _WIN32
+  static DWORD WINAPI StartServer(LPVOID thread_params);
+#endif
+
+  static const size_t kMessageHeaderLen;
+  static const size_t kOpOffset;
+  static const size_t kSizeOffset;
+
+ private:
+  std::string pending_write_;
+  std::string pending_read_;
+  size_t read_len_;
+  DISALLOW_COPY_AND_ASSIGN(SubProcessController);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SUBPROCESS_CONTROLLER_H_
diff --git a/client/subprocess_controller_client.cc b/client/subprocess_controller_client.cc
new file mode 100644
index 0000000..b5bc234
--- /dev/null
+++ b/client/subprocess_controller_client.cc
@@ -0,0 +1,587 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "subprocess_controller_client.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#else
+#include "config_win.h"
+#include "socket_helper_win.h"
+#endif
+
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_specific.h"
+#include "socket_descriptor.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/subprocess.pb.h"
+MSVC_POP_WARNING()
+#include "subprocess_task.h"
+#include "worker_thread_manager.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+SubProcessControllerClient *gSubProcessController;
+
+/* static */
+SubProcessControllerClient* SubProcessControllerClient::Create(
+    int fd, pid_t pid, const Options& options) {
+  // Must be called before starting threads.
+  gSubProcessController = new SubProcessControllerClient(fd, pid, options);
+  CHECK(gSubProcessController != nullptr);
+  return gSubProcessController;
+}
+
+/* static */
+bool SubProcessControllerClient::IsRunning() {
+  return gSubProcessController != nullptr;
+}
+
+/* static */
+SubProcessControllerClient* SubProcessControllerClient::Get() {
+  CHECK(gSubProcessController != nullptr);
+  return gSubProcessController;
+}
+
+/* static */
+void SubProcessControllerClient::Initialize(
+    WorkerThreadManager* wm, const string& tmp_dir) {
+  wm->NewThread(
+      NewCallback(
+          Get(), &SubProcessControllerClient::Setup,
+          wm, tmp_dir), "subprocess_controller_client");
+}
+
+SubProcessControllerClient::SubProcessControllerClient(int fd,
+                                                       pid_t pid,
+                                                       const Options& options)
+    : wm_(nullptr),
+      thread_id_(0),
+      d_(nullptr),
+      fd_(fd),
+      server_pid_(pid),
+      cond_(&mu_),
+      next_id_(0),
+      current_options_(options),
+      periodic_closure_id_(kInvalidPeriodicClosureId),
+      quit_(false),
+      initialized_(false) {
+}
+
+SubProcessControllerClient::~SubProcessControllerClient() {
+  CHECK(quit_);
+  CHECK(subproc_tasks_.empty());
+  CHECK_EQ(periodic_closure_id_, kInvalidPeriodicClosureId);
+  ScopedSocket fd(wm_->DeleteSocketDescriptor(d_));
+  fd.Close();
+  d_ = nullptr;
+  thread_id_ = 0;
+  wm_ = nullptr;
+  gSubProcessController = nullptr;
+}
+
+void SubProcessControllerClient::Setup(
+    WorkerThreadManager* wm, string tmp_dir) {
+  wm_ = wm;
+  thread_id_ = wm_->GetCurrentThreadId();
+  d_ = wm_->RegisterSocketDescriptor(std::move(fd_),
+                                     WorkerThreadManager::PRIORITY_MED);
+  SetInitialized();
+  d_->NotifyWhenReadable(
+      NewPermanentCallback(this, &SubProcessControllerClient::DoRead));
+  tmp_dir_ = tmp_dir;
+  {
+    AUTOLOCK(lock, &mu_);
+    CHECK_EQ(periodic_closure_id_, kInvalidPeriodicClosureId);
+    periodic_closure_id_ = wm_->RegisterPeriodicClosure(
+        FROM_HERE, 10 * 1000, NewPermanentCallback(
+            this, &SubProcessControllerClient::RunCheckSignaled));
+  }
+  LOG(INFO) << "SubProcessControllerClient Initialized fd=" << d_->fd();
+}
+
+void SubProcessControllerClient::SetInitialized() {
+  AUTOLOCK(lock, &initialized_mu_);
+  initialized_ = true;
+}
+
+bool SubProcessControllerClient::Initialized() const {
+  AUTOLOCK(lock, &initialized_mu_);
+  return initialized_;
+}
+
+void SubProcessControllerClient::Quit() {
+  LOG(INFO) << "SubProcessControllerClient Quit";
+
+  std::vector<std::unique_ptr<SubProcessKill>> kills;
+  {
+    AUTOLOCK(lock, &mu_);
+    quit_ = true;
+    for (std::map<int, SubProcessTask*>::iterator iter = subproc_tasks_.begin();
+         iter != subproc_tasks_.end();
+         ++iter) {
+      std::unique_ptr<SubProcessKill> kill(new SubProcessKill);
+      kill->set_id(iter->first);
+      kills.emplace_back(std::move(kill));
+    }
+  }
+  for (size_t i = 0; i < kills.size(); ++i) {
+    Kill(std::move(kills[i]));
+  }
+  {
+    AUTOLOCK(lock, &mu_);
+    if (periodic_closure_id_ != kInvalidPeriodicClosureId) {
+      wm_->UnregisterPeriodicClosure(periodic_closure_id_);
+      periodic_closure_id_ = kInvalidPeriodicClosureId;
+    }
+  }
+}
+
+void SubProcessControllerClient::Shutdown() {
+  LOG(INFO) << "SubProcessControllerClient shutdown";
+  {
+    AUTOLOCK(lock, &mu_);
+    CHECK(quit_);
+    CHECK_EQ(periodic_closure_id_, kInvalidPeriodicClosureId);
+    while (!subproc_tasks_.empty()) {
+      LOG(INFO) << "wait for subproc_tasks_ become empty";
+      cond_.Wait();
+    }
+  }
+  // Not to pass SubProcessControllerClient::SendRequest to send Kill,
+  // this should be executed with PRIORITY_MED.
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id_,
+      NewCallback(
+          this, &SubProcessControllerClient::Delete),
+      WorkerThreadManager::PRIORITY_MED);
+}
+
+void SubProcessControllerClient::RegisterTask(SubProcessTask* task) {
+  CHECK_EQ(-1, task->req().id()) << task->req().DebugString();
+  CHECK_EQ(SubProcessState::PENDING, task->state())
+      << task->req().DebugString();
+  int id = 0;
+  bool quit = false;
+  {
+    AUTOLOCK(lock, &mu_);
+    if (quit_) {
+      quit = true;
+      // don't put in subproc_tasks_.
+    } else {
+      id = ++next_id_;
+      // detach task would not notify back, so no need to set it
+      // in subproc_tasks_.
+      if (!task->req().detach()) {
+        subproc_tasks_.insert(std::make_pair(id, task));
+      }
+    }
+  }
+  if (quit) {
+    LOG(INFO) << task->req().trace_id() << ": RegisterTask in quit";
+    std::unique_ptr<SubProcessTerminated> terminated(new SubProcessTerminated);
+    terminated->set_id(id);
+    terminated->set_status(SubProcessTerminated::kNotStarted);
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        thread_id_,
+        devtools_goma::NewCallback(
+            task, &SubProcessTask::Terminated, std::move(terminated)),
+        WorkerThreadManager::PRIORITY_MED);
+    return;
+  }
+  VLOG(1) << task->req().trace_id() << ": RegisterTask id=" << id;
+  task->mutable_req()->set_id(id);
+  std::unique_ptr<SubProcessReq> req(new SubProcessReq);
+  *req = task->req();
+  Register(std::move(req));
+}
+
+void SubProcessControllerClient::Register(std::unique_ptr<SubProcessReq> req) {
+  {
+    AUTOLOCK(lock, &mu_);
+    if (quit_)
+      return;
+  }
+  VLOG(1) << "Register id=" << req->id() << " " << req->trace_id();
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id_,
+      devtools_goma::NewCallback(
+          this, &SubProcessControllerClient::SendRequest,
+          SubProcessController::REGISTER,
+          std::unique_ptr<google::protobuf::Message>(std::move(req))),
+      WorkerThreadManager::PRIORITY_MED);
+}
+
+void SubProcessControllerClient::RequestRun(
+    std::unique_ptr<SubProcessRun> run) {
+  VLOG(1) << "Run id=" << run->id();
+  {
+    AUTOLOCK(lock, &mu_);
+    if (quit_)
+      return;
+  }
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id_,
+      devtools_goma::NewCallback(
+          this, &SubProcessControllerClient::SendRequest,
+          SubProcessController::REQUEST_RUN,
+          std::unique_ptr<google::protobuf::Message>(std::move(run))),
+      WorkerThreadManager::PRIORITY_MED);
+}
+
+void SubProcessControllerClient::Kill(std::unique_ptr<SubProcessKill> kill) {
+  {
+    AUTOLOCK(lock, &mu_);
+    if (periodic_closure_id_ == kInvalidPeriodicClosureId) {
+      return;
+    }
+  }
+  LOG(INFO) << "Kill id=" << kill->id();
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id_,
+      devtools_goma::NewCallback(
+          this, &SubProcessControllerClient::SendRequest,
+          SubProcessController::KILL,
+          std::unique_ptr<google::protobuf::Message>(std::move(kill))),
+      WorkerThreadManager::PRIORITY_MED);
+}
+
+void SubProcessControllerClient::SetOption(
+    std::unique_ptr<SubProcessSetOption> option) {
+  {
+    AUTOLOCK(lock, &mu_);
+    if (periodic_closure_id_ == kInvalidPeriodicClosureId) {
+      return;
+    }
+
+    current_options_.max_subprocs = option->max_subprocs();
+    current_options_.max_subprocs_low_priority =
+        option->max_subprocs_low_priority();
+    current_options_.max_subprocs_heavy_weight =
+        option->max_subprocs_heavy_weight();
+  }
+  LOG(INFO) << "SetOption"
+            << " max_subprocs=" << option->max_subprocs()
+            << " max_subprocs_heavy_weight="
+            << option->max_subprocs_heavy_weight()
+            << " max_subprocs_low_priority="
+            << option->max_subprocs_low_priority();
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id_,
+      devtools_goma::NewCallback(
+          this, &SubProcessControllerClient::SendRequest,
+          SubProcessController::SET_OPTION,
+          std::unique_ptr<google::protobuf::Message>(std::move(option))),
+      WorkerThreadManager::PRIORITY_MED);
+}
+
+void SubProcessControllerClient::Started(
+    std::unique_ptr<SubProcessStarted> started) {
+  VLOG(1) << "Started " << started->id() << " pid=" << started->pid();
+  DCHECK(BelongsToCurrentThread());
+  int id = started->id();
+  SubProcessTask* task = nullptr;
+  {
+    AUTOLOCK(lock, &mu_);
+    std::map<int, SubProcessTask*>::iterator found =
+        subproc_tasks_.find(id);
+    if (found != subproc_tasks_.end()) {
+      task = found->second;
+    }
+  }
+  if (task == nullptr) {
+    LOG(WARNING) << "No task for id=" << id;
+    std::unique_ptr<SubProcessKill> kill(new SubProcessKill);
+    kill->set_id(id);
+    Kill(std::move(kill));
+    return;
+  }
+  task->Started(std::move(started));
+}
+
+void SubProcessControllerClient::Terminated(
+    std::unique_ptr<SubProcessTerminated> terminated) {
+  DCHECK(BelongsToCurrentThread());
+  VLOG(1) << "Terminated " << terminated->id()
+          << " status=" << terminated->status();
+  int id = terminated->id();
+  SubProcessTask* task = nullptr;
+  {
+    AUTOLOCK(lock, &mu_);
+    std::map<int, SubProcessTask*>::iterator found =
+        subproc_tasks_.find(id);
+    if (found != subproc_tasks_.end()) {
+      task = found->second;
+      subproc_tasks_.erase(found);
+    }
+  }
+  if (task != nullptr) {
+    bool async = task->async_callback();
+    task->Terminated(std::move(terminated));
+    // If task is synchronous (!async), task may already be deleted here.
+    if (async) {
+      wm_->RunClosureInThread(
+          FROM_HERE,
+          task->thread_id(),
+          NewCallback(
+              task, &SubProcessTask::Done),
+          WorkerThreadManager::PRIORITY_MED);
+    }
+  } else {
+    std::ostringstream ss;
+    ss << "no task found for id=" << id
+       << " status=" << terminated->status()
+       << " error=" << SubProcessTerminated_ErrorTerminate_Name(
+           terminated->error());
+    if (terminated->error() == SubProcessTerminated::kFailedToLookup) {
+      LOG(INFO) << ss.str();
+    } else {
+      LOG(WARNING) << ss.str();
+    }
+  }
+
+  {
+    AUTOLOCK(lock, &mu_);
+    if (quit_ && subproc_tasks_.empty()) {
+      LOG(INFO) << "all subproc_tasks done";
+      d_->StopRead();
+      d_->StopWrite();
+      CHECK(subproc_tasks_.empty());
+      cond_.Signal();
+    }
+  }
+}
+
+int SubProcessControllerClient::NumPending() const {
+  AUTOLOCK(lock, &mu_);
+  int num_pending = 0;
+  for (std::map<int, SubProcessTask*>::const_iterator iter =
+           subproc_tasks_.begin();
+       iter != subproc_tasks_.end();
+       ++iter) {
+    SubProcessTask* task = iter->second;
+    switch (task->state()) {
+      case SubProcessState::SETUP: case SubProcessState::PENDING:
+        ++num_pending;
+        break;
+      default:
+        { }
+    }
+  }
+  return num_pending;
+}
+
+bool SubProcessControllerClient::BelongsToCurrentThread() const {
+  return THREAD_ID_IS_SELF(thread_id_);
+}
+
+void SubProcessControllerClient::Delete() {
+  DCHECK(BelongsToCurrentThread());
+  d_->ClearReadable();
+  delete this;
+}
+
+void SubProcessControllerClient::SendRequest(
+    SubProcessController::Op op,
+    std::unique_ptr<google::protobuf::Message> message) {
+  DCHECK(BelongsToCurrentThread());
+  if (AddMessage(op, *message)) {
+    VLOG(3) << "SendRequest has pending write";
+    d_->NotifyWhenWritable(
+        NewPermanentCallback(this, &SubProcessControllerClient::DoWrite));
+  }
+}
+
+void SubProcessControllerClient::DoWrite() {
+  VLOG(2) << "DoWrite";
+  DCHECK(BelongsToCurrentThread());
+  if (!WriteMessage(d_->wrapper())) {
+    VLOG(3) << "DoWrite no pending";
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        thread_id_,
+        NewCallback(
+            this, &SubProcessControllerClient::WriteDone),
+        WorkerThreadManager::PRIORITY_IMMEDIATE);
+  }
+}
+
+void SubProcessControllerClient::WriteDone() {
+  VLOG(2) << "WriteDone";
+  DCHECK(BelongsToCurrentThread());
+  if (has_pending_write())
+    return;
+  d_->ClearWritable();
+}
+
+void SubProcessControllerClient::DoRead() {
+  VLOG(2) << "DoRead";
+  DCHECK(BelongsToCurrentThread());
+  int op = 0;
+  int len = 0;
+  if (!ReadMessage(d_->wrapper(), &op, &len)) {
+    VLOG(2) << "pending read op=" << op << " len=" << len;
+    return;
+  }
+  VLOG(2) << "DoRead op=" << op << " len=" << len;
+  switch (op) {
+    case SubProcessController::CLOSED:
+#ifndef _WIN32
+      LOG(ERROR) << "SubProcessControllerServer died unexpectedly."
+                 << " pid=" << server_pid_;
+      {
+        // subprocess controller server process was killed or crashed?
+        int status = 0;
+        if (waitpid(server_pid_, &status, 0) == -1) {
+          PLOG(FATAL) << "SubProcessControllerServer wait failed pid="
+                      << server_pid_;
+        }
+        int exit_status = WEXITSTATUS(status);
+        int signaled = 0;
+        if (WIFSIGNALED(status)) {
+          signaled = WTERMSIG(status);
+        }
+        LOG(INFO) << "SubProcessControllerServer exited "
+                  << " status=" << exit_status
+                  << " signal=" << signaled;
+        if (exit_status != 0 && signaled != 0) {
+          LOG(FATAL) << "unexpected SubProcessControllerServer exit";
+        }
+      }
+      exit(0);
+#else
+      // subprocess controller server is a thread, not a process on Windows.
+      LOG(FATAL) << "SubProcessControllerServer died unexpectedly.";
+#endif
+
+    // Note: STARTED and TERMINATED should run closure with the same priority
+    // Otherwise, they may not be executed in order.
+    case SubProcessController::STARTED: {
+        std::unique_ptr<SubProcessStarted> started(new SubProcessStarted);
+        if (started->ParseFromArray(payload_data(), len)) {
+          wm_->RunClosureInThread(
+              FROM_HERE,
+              thread_id_,
+              devtools_goma::NewCallback(
+                  this, &SubProcessControllerClient::Started,
+                  std::move(started)),
+              WorkerThreadManager::PRIORITY_MED);
+        } else {
+          LOG(ERROR) << "broken SubProcessStarted";
+        }
+      }
+      break;
+
+    case SubProcessController::TERMINATED: {
+        std::unique_ptr<SubProcessTerminated> terminated(
+            new SubProcessTerminated);
+        if (terminated->ParseFromArray(payload_data(), len)) {
+          wm_->RunClosureInThread(
+              FROM_HERE,
+              thread_id_,
+              devtools_goma::NewCallback(
+                  this, &SubProcessControllerClient::Terminated,
+                  std::move(terminated)),
+              WorkerThreadManager::PRIORITY_MED);
+        } else {
+          LOG(ERROR) << "broken SubProcessTerminated";
+        }
+      }
+      break;
+
+    default:
+      LOG(FATAL) << "Unknown SubProcessController::Op " << op;
+  }
+  ReadDone();
+  return;
+}
+
+void SubProcessControllerClient::RunCheckSignaled() {
+  if (gSubProcessController == nullptr) {
+    // RunCheckSignaled is periodic closure managed by gSubProcessController,
+    // it should never be called when gSubProcessController == nullptr.
+    LOG(FATAL) << "gSubProcessController is nullptr";
+    return;
+  }
+  // Switch from alarm worker to client thread.
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id_,
+      NewCallback(
+          this, &SubProcessControllerClient::CheckSignaled),
+      WorkerThreadManager::PRIORITY_MED);
+}
+
+void SubProcessControllerClient::CheckSignaled() {
+  if (gSubProcessController == nullptr) {
+    // gSubProcessController (and this pointer) may be nullptr because Delete is
+    // higher priority (put in WorkerThreadManager in Shutdown).
+    // Should not access any member fields here.
+    return;
+  }
+  DCHECK(BelongsToCurrentThread());
+  std::vector<std::unique_ptr<SubProcessKill>> kills;
+  {
+    AUTOLOCK(lock, &mu_);
+    for (std::map<int, SubProcessTask*>::const_iterator iter =
+             subproc_tasks_.begin();
+         iter != subproc_tasks_.end();
+         ++iter) {
+      int id = iter->first;
+      SubProcessTask* task = iter->second;
+      if (task->state() == SubProcessState::SIGNALED) {
+        std::unique_ptr<SubProcessKill> kill(new SubProcessKill);
+        kill->set_id(id);
+        kills.emplace_back(std::move(kill));
+      }
+    }
+  }
+  if (!kills.empty()) {
+    for (size_t i = 0; i < kills.size(); ++i) {
+      Kill(std::move(kills[i]));
+    }
+  }
+}
+
+string SubProcessControllerClient::DebugString() const {
+  AUTOLOCK(lock, &mu_);
+  std::ostringstream ss;
+
+  ss << "options: " << current_options_.DebugString() << '\n';
+
+  for (std::map<int, SubProcessTask*>::const_iterator iter =
+           subproc_tasks_.begin();
+       iter != subproc_tasks_.end();
+       ++iter) {
+    int id = iter->first;
+    SubProcessTask* task = iter->second;
+    ss << id << " "
+       << task->req().trace_id() << " "
+       << SubProcessState::State_Name(task->state()) << " "
+       << SubProcessReq::Priority_Name(task->req().priority()) << " "
+       << SubProcessReq::Weight_Name(task->req().weight()) << " "
+       << "pid=" << task->started().pid() << "\n";
+  }
+  return ss.str();
+}
+
+}  // namespace devtools_goma
diff --git a/client/subprocess_controller_client.h b/client/subprocess_controller_client.h
new file mode 100644
index 0000000..8535407
--- /dev/null
+++ b/client/subprocess_controller_client.h
@@ -0,0 +1,111 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SUBPROCESS_CONTROLLER_CLIENT_H_
+#define DEVTOOLS_GOMA_CLIENT_SUBPROCESS_CONTROLLER_CLIENT_H_
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "basictypes.h"
+#include "lockhelper.h"
+#include "scoped_fd.h"
+#include "subprocess_controller.h"
+#include "worker_thread_manager.h"
+
+namespace devtools_goma {
+
+class SubProcessTask;
+
+// SubPrcessControllerClient runs in multi-thread mode, and communicates
+// with SubProcessControllerServer via fd.
+// The communication runs in the thread where Setup() is called.
+class SubProcessControllerClient: public SubProcessController {
+ public:
+  static bool IsRunning();
+  static SubProcessControllerClient* Get();
+  static void Initialize(WorkerThreadManager* wm, const std::string& tmp_dir);
+
+  WorkerThreadManager* wm() const { return wm_; }
+  const std::string& tmp_dir() const { return tmp_dir_; }
+
+  void SetInitialized();
+  bool Initialized() const;
+
+  // Quit stops serving new SubProcessTask, and kills running subprocesses.
+  void Quit();
+  // Shutdown cleanups SubProcessControllerClient.
+  // Quit must be called before Shutdown.
+  void Shutdown();
+
+  void RegisterTask(SubProcessTask* task);
+
+  // Sends request to server.
+  void RequestRun(std::unique_ptr<SubProcessRun> run) override;
+  void Kill(std::unique_ptr<SubProcessKill> kill) override;
+  void SetOption(std::unique_ptr<SubProcessSetOption> option) override;
+
+  int NumPending() const;
+
+  bool BelongsToCurrentThread() const;
+
+  std::string DebugString() const;
+
+ private:
+  friend class SubProcessController;
+
+  // Takes ownership of fd.
+  // pid is process id of subprocess controller server.
+  static SubProcessControllerClient* Create(
+      int fd, pid_t pid, const Options& options);
+
+  SubProcessControllerClient(int fd, pid_t pid, const Options& options);
+  ~SubProcessControllerClient() override;
+  void Setup(WorkerThreadManager* wm, std::string tmp_dir);
+  void Delete();
+
+  // Sends request to server.
+  void Register(std::unique_ptr<SubProcessReq> req) override;
+
+  // Handles server notification.
+  void Started(std::unique_ptr<SubProcessStarted> started) override;
+  void Terminated(std::unique_ptr<SubProcessTerminated> terminated) override;
+
+  void SendRequest(SubProcessController::Op op,
+                   std::unique_ptr<google::protobuf::Message> message);
+  void DoWrite();
+  void WriteDone();
+
+  void DoRead();
+
+  void RunCheckSignaled();
+  void CheckSignaled();
+
+  WorkerThreadManager* wm_;
+  WorkerThreadManager::ThreadId thread_id_;
+  SocketDescriptor* d_;
+  // Ownership is transferred to d_ at Setup().
+  ScopedSocket fd_;
+  const pid_t server_pid_;
+  std::string tmp_dir_;
+
+  Lock mu_;
+  ConditionVariable cond_;  // condition to wait for all subproc_tasks_ done.
+  int next_id_ GUARDED_BY(mu_);
+  std::map<int, SubProcessTask*> subproc_tasks_ GUARDED_BY(mu_);
+  Options current_options_ GUARDED_BY(mu_);
+  PeriodicClosureId periodic_closure_id_ GUARDED_BY(mu_);
+  bool quit_ GUARDED_BY(mu_);
+
+  Lock initialized_mu_;
+  bool initialized_ GUARDED_BY(initialized_mu_);
+
+  DISALLOW_COPY_AND_ASSIGN(SubProcessControllerClient);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SUBPROCESS_CONTROLLER_CLIENT_H_
diff --git a/client/subprocess_controller_server.cc b/client/subprocess_controller_server.cc
new file mode 100644
index 0000000..5de5bc7
--- /dev/null
+++ b/client/subprocess_controller_server.cc
@@ -0,0 +1,539 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "subprocess_controller_server.h"
+
+#include <memory>
+#include <set>
+#include <string.h>
+
+#ifndef _WIN32
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/select.h>
+#include <unistd.h>
+#endif
+
+#include "compiler_specific.h"
+#include "fileflag.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "ioutil.h"
+#include "path.h"
+#include "platform_thread.h"
+#include "subprocess_impl.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/subprocess.pb.h"
+MSVC_POP_WARNING()
+
+#ifdef _WIN32
+#include "spawner_win.h"
+#endif
+
+namespace {
+
+static bool CanKillCommand(StringPiece command,
+                           const std::set<string>& dont_kill_commands) {
+  string prog = string(file::Stem(command));
+#ifdef _WIN32
+  std::transform(prog.begin(), prog.end(), prog.begin(), ::tolower);
+#endif
+  return dont_kill_commands.find(prog) == dont_kill_commands.end();
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+static const int kIdleIntervalMilliSec = 500;
+static const int kWaitIntervalMilliSec = 5;
+
+#ifndef _WIN32
+// siginfo is passed from signal handler to SubProcessControllerServer loop.
+static int g_signal_fd;
+
+void SigChldAction(int signo ALLOW_UNUSED,
+                   siginfo_t* siginfo,
+                   void* context ALLOW_UNUSED) {
+  if (write(g_signal_fd, siginfo, sizeof(siginfo_t)) != sizeof(siginfo_t))
+    abort();
+}
+#endif
+
+SubProcessControllerServer::SubProcessControllerServer(
+    int sock_fd,
+    const SubProcessController::Options& options)
+    : sock_fd_(sock_fd),
+#ifndef _WIN32
+      signal_fd_(-1),
+#endif
+      timeout_millisec_(kIdleIntervalMilliSec),
+      options_(options) {
+  LOG(INFO) << "SubProcessControllerServer started fd=" << sock_fd
+            << " " << options_.DebugString();
+#ifdef _WIN32
+  SpawnerWin::Setup();
+#endif
+}
+
+SubProcessControllerServer::~SubProcessControllerServer() {
+#ifdef _WIN32
+  SpawnerWin::TearDown();
+#endif
+  LOG(INFO) << "SubProcessControllerServer deleted.";
+}
+
+void SubProcessControllerServer::Loop() {
+  VLOG(1) << "Loop";
+#ifndef _WIN32
+  SetupSigchldHandler();
+#endif
+  DCHECK(sock_fd_.valid());
+#ifndef _WIN32
+  DCHECK(signal_fd_.valid());
+#endif
+  for (;;) {
+    if (!sock_fd_.valid()) {
+      VLOG(1) << "sock_fd closed";
+      break;
+    }
+    fd_set read_fd;
+    fd_set write_fd;
+    FD_ZERO(&read_fd);
+    FD_ZERO(&write_fd);
+    MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+    FD_SET(sock_fd_.get(), &read_fd);
+    MSVC_POP_WARNING();
+    if (has_pending_write()) {
+      MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+      FD_SET(sock_fd_.get(), &write_fd);
+      MSVC_POP_WARNING();
+    }
+    int max_fd = std::max(-1, sock_fd_.get());
+#ifndef _WIN32
+    FD_SET(signal_fd_.fd(), &read_fd);
+    max_fd = std::max(max_fd, signal_fd_.fd());
+#endif
+    struct timeval tv;
+    tv.tv_sec = timeout_millisec_ / 1000;
+    tv.tv_usec = (timeout_millisec_ - (tv.tv_sec * 1000)) * 1000;
+    int r = select(max_fd + 1, &read_fd, &write_fd, nullptr, &tv);
+    if (r < 0) {
+      if (errno == EINTR || errno == EAGAIN)
+        continue;
+      PLOG(FATAL) << "select";
+    }
+    VLOG(2) << "r=" << r
+            << " sock_fd=" << FD_ISSET(sock_fd_.get(), &read_fd)
+#ifndef _WIN32
+            << " signal_fd=" << FD_ISSET(signal_fd_.fd(), &read_fd)
+#endif
+            << " t=" << tv.tv_sec << "," << tv.tv_usec;
+    if (r == 0) {
+      DoTimeout();
+      continue;
+    }
+    if (FD_ISSET(sock_fd_.get(), &write_fd)) {
+      DoWrite();
+      if (!has_pending_write()) {
+        FlushLogFiles();
+      }
+    }
+    if (FD_ISSET(sock_fd_.get(), &read_fd)) {
+      DoRead();
+    }
+#ifndef _WIN32
+    if (FD_ISSET(signal_fd_.fd(), &read_fd)) {
+      DoSignal();
+    }
+#endif
+  }
+  LOG(INFO) << "Terminating...";
+  FlushLogFiles();
+  for (std::map<int, SubProcessImpl*>::iterator iter = subprocs_.begin();
+       iter != subprocs_.end();
+       ++iter) {
+    SubProcessImpl* s = iter->second;
+    if (s->req().detach()) {
+      delete s;
+      continue;
+    }
+    const SubProcessTerminated* terminated = nullptr;
+    s->Kill();
+    // Wait for the running subprocess termination.
+    // Because Wait() would emit log message and it would take some time to
+    // terminate the subprocess, it will sleep for a while.
+    // b/5370450
+    while ((terminated = s->Wait(true)) == nullptr) {
+      devtools_goma::PlatformThread::Sleep(10000);
+    }
+    delete terminated;
+    delete s;
+  }
+  FlushLogFiles();
+  subprocs_.clear();
+}
+
+void SubProcessControllerServer::Register(std::unique_ptr<SubProcessReq> req) {
+  LOG(INFO) << "id=" << req->id() << " Register " << req->trace_id();
+  bool dont_kill = false;
+  if (options_.dont_kill_subprocess ||
+      !CanKillCommand(req->prog(), options_.dont_kill_commands)) {
+    dont_kill = true;
+  }
+  VLOG(1) << "id=" << req->id() << " Kill? " << req->trace_id()
+          << " prog=" << req->prog()
+          << " dont_kill=" << dont_kill;
+  SubProcessImpl* s = new SubProcessImpl(*req, dont_kill);
+  CHECK(subprocs_.insert(std::make_pair(req->id(), s)).second);
+  TrySpawnSubProcess();
+}
+
+void SubProcessControllerServer::RequestRun(
+    std::unique_ptr<SubProcessRun> run) {
+  VLOG(1) << "id=" << run->id() << " Run";
+  SubProcessImpl* s = LookupSubProcess(run->id());
+  if (s == nullptr) {
+    LOG(WARNING) << "id=" << run->id() << " request run unknown id "
+                 << "(maybe already killed?)";
+    return;
+  }
+  s->RaisePriority();
+  TrySpawnSubProcess();
+}
+
+void SubProcessControllerServer::Kill(std::unique_ptr<SubProcessKill> kill) {
+  VLOG(1) << "id=" << kill->id() << " Kill";
+  SubProcessImpl* s = LookupSubProcess(kill->id());
+  if (s == nullptr) {
+    LOG(WARNING) << "id=" << kill->id() << " kill unknown id "
+                 << "(maybe already killed?)";
+    return;
+  }
+  if (!s->Kill()) {
+    std::unique_ptr<SubProcessTerminated> terminated(s->Wait(false));
+    if (terminated != nullptr) {
+      Terminated(std::move(terminated));
+      return;
+    }
+    ErrorTerminate(kill->id(), SubProcessTerminated::kFailedToKill);
+  }
+}
+
+void SubProcessControllerServer::SetOption(
+    std::unique_ptr<SubProcessSetOption> opt) {
+  if (opt->has_max_subprocs() &&
+      options_.max_subprocs != opt->max_subprocs()) {
+    if (opt->max_subprocs() > 0) {
+      options_.max_subprocs = opt->max_subprocs();
+      LOG(INFO) << "option changed: max_subprocs="
+                << opt->max_subprocs();
+    } else {
+      LOG(WARNING) << "option max_subprocs is not changed: "
+                   << "max_subprocs should be positive. value="
+                   << opt->max_subprocs();
+    }
+  }
+
+  if (opt->has_max_subprocs_low_priority() &&
+      options_.max_subprocs_low_priority != opt->max_subprocs_low_priority()) {
+    if (opt->max_subprocs_low_priority() > 0) {
+      options_.max_subprocs_low_priority = opt->max_subprocs_low_priority();
+      LOG(INFO) << "option changed: max_subprocs_low_priority="
+                << opt->max_subprocs_low_priority();
+    } else {
+      LOG(WARNING) << "option max_subprocs_low_priority is not changed: "
+                   << "max_subprocs_low_priority should be positive. value="
+                   << opt->max_subprocs_low_priority();
+    }
+  }
+
+  if (opt->has_max_subprocs_heavy_weight() &&
+      options_.max_subprocs_heavy_weight != opt->max_subprocs_heavy_weight()) {
+    if (opt->max_subprocs_heavy_weight() > 0) {
+      options_.max_subprocs_heavy_weight = opt->max_subprocs_heavy_weight();
+      LOG(INFO) << "option changed: max_subprocs_heavy_weight="
+                << opt->max_subprocs_heavy_weight();
+    } else {
+      LOG(WARNING) << "option max_subprocs_heavy_weight is not changed: "
+                   << "max_subprocs_heavy_weight should be positive. value="
+                   << opt->max_subprocs_heavy_weight();
+    }
+  }
+}
+
+void SubProcessControllerServer::Started(
+    std::unique_ptr<SubProcessStarted> started) {
+  LOG(INFO) << "id=" << started->id() << " Started pid=" << started->pid();
+  SendNotify(SubProcessController::STARTED, *started);
+}
+
+void SubProcessControllerServer::Terminated(
+    std::unique_ptr<SubProcessTerminated> terminated) {
+  LOG_IF(INFO, terminated->status() != SubProcessTerminated::kInternalError)
+      << "id=" << terminated->id() << " Terminated"
+      << " status=" << terminated->status();
+
+  std::map<int, SubProcessImpl*>::iterator found =
+      subprocs_.find(terminated->id());
+  if (found != subprocs_.end()) {
+    delete found->second;
+    subprocs_.erase(found);
+  }
+  SendNotify(SubProcessController::TERMINATED, *terminated);
+
+  TrySpawnSubProcess();
+}
+
+SubProcessImpl* SubProcessControllerServer::LookupSubProcess(int id) {
+  std::map<int, SubProcessImpl*>::iterator found = subprocs_.find(id);
+  if (found == subprocs_.end()) {
+    // There is information gap between server and client.
+    // The server can execute a subprocess and send SubProcessTerminated
+    // any time. If it send SubProcessTerminated, the subprocess's id is
+    // removed from subprocs_.
+    // If SubProcessTerminated is in-flight, the client does not know it
+    // removed from server's subprocs_, and it may send the request for the id.
+    // If the client is not broken, REGISTER should come before anything else.
+    // We MUST NOT think unknown id as error.
+    LOG(INFO) << "id=" << id << " failed to LookupSubProcess "
+              << "(maybe already killed?)";
+    // In case subprocess_controller_client leaks id,
+    // we will send ErrorTerminate.
+    ErrorTerminate(id, SubProcessTerminated::kFailedToLookup);
+    return nullptr;
+  }
+  return found->second;
+}
+
+void SubProcessControllerServer::TrySpawnSubProcess() {
+  VLOG(1) << "TrySpawnSubProcess";
+
+  int running = 0;
+  int num_heavy_weight = 0;
+  SubProcessImpl* candidate = nullptr;
+  // Find next candidate from subprocs_.
+  // Higher priority will be selected.
+  // If the same priority exists, oldest one (smallest id number in the
+  // priority) will be selected.  In other words, latter subproc with the
+  // same priority in the list would not be executed before former subproc.
+  // subproc weight is not checked to select next candidate.
+  for (std::map<int, SubProcessImpl*>::iterator iter = subprocs_.begin();
+       iter != subprocs_.end();
+       ++iter) {
+    SubProcessImpl* s = iter->second;
+    VLOG(2) << s->req().id() << " " << s->req().trace_id()
+            << " " << SubProcessState::State_Name(s->state());
+    if (s->state() == SubProcessState::PENDING &&
+        s->req().priority() == SubProcessReq::HIGHEST_PRIORITY) {
+      // hightest priority is used in SubProcessTask::ReadCommandOutput.
+      DCHECK_EQ(SubProcessReq::LIGHT_WEIGHT, s->req().weight());
+      candidate = s;
+      break;
+    }
+    if (s->state() == SubProcessState::RUN) {
+      ++running;
+      if (running >= options_.max_subprocs) {
+        VLOG(1) << "Too many subprocesses already running";
+        return;
+      }
+      if (s->req().weight() == SubProcessReq::HEAVY_WEIGHT) {
+        ++num_heavy_weight;
+      }
+    }
+    if (s->state() != SubProcessState::PENDING)
+      continue;
+    if (candidate == nullptr) {
+      candidate = s;
+      continue;
+    }
+    if (candidate->req().priority() == SubProcessReq::LOW_PRIORITY &&
+        s->req().priority() == SubProcessReq::HIGH_PRIORITY) {
+      candidate = s;
+    }
+  }
+  if (candidate == nullptr) {
+    VLOG(2) << "no candidate";
+    return;
+  }
+
+  VLOG(2) << "candiate:" << candidate->req().id()
+          << " " << candidate->req().trace_id();
+  // Once a candidate is selected, check max_subprocs_heavey_weight
+  // and max_subprocs_low_priority.
+  if (candidate->req().weight() == SubProcessReq::HEAVY_WEIGHT &&
+      num_heavy_weight >= options_.max_subprocs_heavy_weight) {
+    VLOG(1) << "Heavy weight subprocess already running "
+            << num_heavy_weight;
+    return;
+  }
+
+  if (candidate->req().priority() == SubProcessReq::LOW_PRIORITY &&
+      running >= options_.max_subprocs_low_priority) {
+    VLOG(1) << "candidate priority is low";
+    return;
+  }
+  std::unique_ptr<SubProcessStarted> started(candidate->Spawn());
+  if (started != nullptr) {
+    Started(std::move(started));
+    return;
+  }
+  if (candidate->req().detach()) {
+    return;
+  }
+  ErrorTerminate(candidate->req().id(), SubProcessTerminated::kFailedToSpawn);
+}
+
+void SubProcessControllerServer::ErrorTerminate(
+    int id, SubProcessTerminated_ErrorTerminate reason) {
+  VLOG(1) << "id=" << id << " ErrorTerminate";
+  std::unique_ptr<SubProcessTerminated> terminated(new SubProcessTerminated);
+  terminated->set_id(id);
+  terminated->set_status(SubProcessTerminated::kInternalError);
+  terminated->set_error(reason);
+  Terminated(std::move(terminated));
+}
+
+void SubProcessControllerServer::SendNotify(
+    int op, const google::protobuf::Message& message) {
+  VLOG(2) << "SendNotify op=" << op << " message=" << message.DebugString();
+  AddMessage(op, message);
+}
+
+void SubProcessControllerServer::DoWrite() {
+  VLOG(2) << "DoWrite";
+  WriteMessage(&sock_fd_);
+}
+
+void SubProcessControllerServer::DoRead() {
+  VLOG(2) << "DoRead";
+  int op = 0;
+  int len = 0;
+  if (!ReadMessage(&sock_fd_, &op, &len)) {
+    return;
+  }
+  VLOG(2) << "op=" << op << " len=" << len;
+  switch (op) {
+    case SubProcessController::CLOSED:
+      sock_fd_.reset(-1);
+      break;
+
+    case SubProcessController::REGISTER: {
+        std::unique_ptr<SubProcessReq> req(new SubProcessReq);
+        if (req->ParseFromArray(payload_data(), len)) {
+          Register(std::move(req));
+        } else {
+          LOG(ERROR) << "broken SubProcessReq";
+        }
+      }
+      break;
+
+    case SubProcessController::REQUEST_RUN: {
+        std::unique_ptr<SubProcessRun> run(new SubProcessRun);
+        if (run->ParseFromArray(payload_data(), len)) {
+          RequestRun(std::move(run));
+        } else {
+          LOG(ERROR) << "broken SubProcessRun";
+        }
+      }
+      break;
+
+    case SubProcessController::KILL: {
+        std::unique_ptr<SubProcessKill> kill(new SubProcessKill);
+        if (kill->ParseFromArray(payload_data(), len)) {
+          Kill(std::move(kill));
+        } else {
+          LOG(ERROR) << "broken SubProcessKill";
+        }
+      }
+      break;
+
+    case SubProcessController::SET_OPTION: {
+        std::unique_ptr<SubProcessSetOption> option(new SubProcessSetOption);
+        if (option->ParseFromArray(payload_data(), len)) {
+          SetOption(std::move(option));
+        } else {
+          LOG(ERROR) << "broken SubProcessSetOption";
+        }
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unknown SubProcessController::Op " << op;
+  }
+  ReadDone();
+  return;
+}
+
+#ifndef _WIN32
+void SubProcessControllerServer::SetupSigchldHandler() {
+  int fds[2];
+  PCHECK(pipe(fds) == 0);
+  signal_fd_.reset(fds[0]);
+  g_signal_fd = fds[1];
+  SetFileDescriptorFlag(g_signal_fd, FD_CLOEXEC);
+
+  struct sigaction sa;
+  memset(&sa, 0, sizeof(struct sigaction));
+  sa.sa_sigaction = SigChldAction;
+  sa.sa_flags = SA_NOCLDSTOP | SA_SIGINFO | SA_RESTART;
+  PCHECK(sigaction(SIGCHLD, &sa, nullptr) == 0);
+}
+
+void SubProcessControllerServer::DoSignal() {
+  VLOG(1) << "DoSignal";
+  siginfo_t si;
+  int r = read(signal_fd_.fd(), &si, sizeof(si));
+  if (r <= 0) {
+    PLOG(FATAL) << "signal_fd " << r;
+  }
+  LOG(INFO) << "signal pid=" << si.si_pid << " status=" << si.si_status;
+  for (std::map<int, SubProcessImpl*>::iterator iter = subprocs_.begin();
+       iter != subprocs_.end();
+       ++iter) {
+    SubProcessImpl* s = iter->second;
+    if (s->started().pid() == si.si_pid) {
+      s->Signaled(si.si_status);
+      timeout_millisec_ = kWaitIntervalMilliSec;
+      return;
+    }
+  }
+  LOG(WARNING) << "no subprocess found for pid:" << si.si_pid;
+  timeout_millisec_ = kIdleIntervalMilliSec;
+}
+#endif
+
+void SubProcessControllerServer::DoTimeout() {
+  VLOG(1) << "DoTimeout";
+  bool check_terminated = true;
+  bool in_signaled = false;
+  while (check_terminated) {
+    check_terminated = false;
+    in_signaled = false;
+    for (std::map<int, SubProcessImpl*>::iterator iter = subprocs_.begin();
+         iter != subprocs_.end();
+         ++iter) {
+      SubProcessImpl* s = iter->second;
+      if (s->started().pid() == SubProcessState::kInvalidPid)
+        continue;
+      bool need_kill = s->state() == SubProcessState::SIGNALED;
+      if (need_kill)
+        in_signaled = true;
+      std::unique_ptr<SubProcessTerminated> terminated(s->Wait(need_kill));
+      if (terminated != nullptr) {
+        Terminated(std::move(terminated));
+        // subprocs_ was modified, so iter was invalidated.
+        check_terminated = true;
+        break;
+      }
+    }
+  }
+  // If no subprocess is in SIGNALED, we don't need to wait for terminated
+  // task in kWaitIntervalMilliSec.
+  if (!in_signaled)
+    timeout_millisec_ = kIdleIntervalMilliSec;
+}
+
+}  // namespace devtools_goma
diff --git a/client/subprocess_controller_server.h b/client/subprocess_controller_server.h
new file mode 100644
index 0000000..95024cb
--- /dev/null
+++ b/client/subprocess_controller_server.h
@@ -0,0 +1,80 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// SubProcessController server side.
+// Runs in single threaded process.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SUBPROCESS_CONTROLLER_SERVER_H_
+#define DEVTOOLS_GOMA_CLIENT_SUBPROCESS_CONTROLLER_SERVER_H_
+
+#include <map>
+#include <memory>
+
+#ifndef _WIN32  // The order of including these files matters in _WIN32
+#include "basictypes.h"
+#include "scoped_fd.h"
+#endif
+#include "compiler_specific.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/subprocess.pb.h"
+MSVC_POP_WARNING()
+#include "subprocess_controller.h"
+
+namespace devtools_goma {
+
+class SubProcessImpl;
+
+// SubProcessControllerServer runs in a single thread process, and
+// communicates with SubProcessControllerClient via pipe_fd.
+//
+class SubProcessControllerServer: public SubProcessController {
+ public:
+  // Take ownsership of sock_fd.
+  SubProcessControllerServer(int sock_fd,
+                             const SubProcessController::Options& options);
+  ~SubProcessControllerServer() override;
+
+  void Loop();
+
+ private:
+  SubProcessImpl* LookupSubProcess(int id);
+
+  void Register(std::unique_ptr<SubProcessReq> req) override;
+  void RequestRun(std::unique_ptr<SubProcessRun> run) override;
+  void Kill(std::unique_ptr<SubProcessKill> kill) override;
+  void SetOption(std::unique_ptr<SubProcessSetOption> option) override;
+
+  void Started(std::unique_ptr<SubProcessStarted> started) override;
+  void Terminated(std::unique_ptr<SubProcessTerminated> terminated) override;
+
+  void TrySpawnSubProcess();
+
+  void ErrorTerminate(int id, SubProcessTerminated_ErrorTerminate reason);
+
+  void SendNotify(int op, const google::protobuf::Message& message);
+  void DoWrite();
+  void DoRead();
+
+#ifndef _WIN32
+  void SetupSigchldHandler();
+
+  void DoSignal();
+#endif
+  void DoTimeout();
+
+  std::map<int, SubProcessImpl*> subprocs_;
+  ScopedSocket sock_fd_;
+#ifndef _WIN32
+  ScopedFd signal_fd_;
+#endif
+  int timeout_millisec_;
+  SubProcessController::Options options_;
+
+  DISALLOW_COPY_AND_ASSIGN(SubProcessControllerServer);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SUBPROCESS_CONTROLLER_SERVER_H_
diff --git a/client/subprocess_impl.cc b/client/subprocess_impl.cc
new file mode 100644
index 0000000..54ae7f0
--- /dev/null
+++ b/client/subprocess_impl.cc
@@ -0,0 +1,149 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "subprocess_impl.h"
+
+#include <vector>
+
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#ifndef _WIN32
+#include "spawner_posix.h"
+#else
+#include "spawner_win.h"
+#endif
+
+namespace devtools_goma {
+
+SubProcessImpl::SubProcessImpl(const SubProcessReq& req,
+                               bool dont_kill_subprocess)
+    : state_(SubProcessState::PENDING),
+      spawner_(new PlatformSpawner),
+      kill_subprocess_(!dont_kill_subprocess) {
+  VLOG(1) << "new SubProcessImpl " << req.id()
+          << " " << req.trace_id();
+  req_ = req;
+  VLOG(2) << "new " << req_.DebugString();
+}
+
+SubProcessImpl::~SubProcessImpl() {
+  VLOG(1) << "delete SubProcessImpl " << req_.id()
+          << " " << req_.trace_id();
+  VLOG(2) << "delete " << req_.DebugString();
+}
+
+SubProcessStarted* SubProcessImpl::Spawn() {
+  LOG(INFO) << "id=" << req_.id() << " spawn " << req_.trace_id();
+  DCHECK_EQ(SubProcessState::PENDING, state_);
+  DCHECK_EQ(SubProcessState::kInvalidPid, started_.pid());
+
+  started_.set_pending_ms(timer_.GetInMs());
+
+  std::vector<string> args(req_.argv().begin(), req_.argv().end());
+  std::vector<string> envs;
+  for (const auto& env : req_.env())
+    envs.push_back(env.c_str());
+
+  Spawner::ConsoleOutputOption output_option =
+      Spawner::MERGE_STDOUT_STDERR;
+  if (req_.output_option() == SubProcessReq::STDOUT_ONLY)
+    output_option = Spawner::STDOUT_ONLY;
+  spawner_->SetFileRedirection(req_.stdin_filename(),
+                               req_.stdout_filename(),
+                               req_.stderr_filename(),
+                               output_option);
+  spawner_->SetDetach(req_.detach());
+  if (req_.has_umask()) {
+    spawner_->SetUmask(req_.umask());
+  }
+  VLOG(1) << "id=" << req_.id()
+          << " to_spawn " << req_.trace_id()
+          << " prog=" << req_.prog()
+          << " args=" << args
+          << " envs=" << envs
+          << " cwd=" << req_.cwd();
+  int pid = spawner_->Run(req_.prog(), args, envs, req_.cwd());
+  if (pid == Spawner::kInvalidPid) {
+    LOG(ERROR) << "id=" << req_.id() << " spawn " << req_.trace_id()
+               << " failed";
+    return nullptr;
+  }
+  started_.set_pid(pid);
+  timer_.Start();
+  state_ = SubProcessState::RUN;
+  started_.set_id(req_.id());
+  if (req_.detach())
+    return nullptr;
+  SubProcessStarted* started = new SubProcessStarted;
+  *started = started_;
+  return started;
+}
+
+void SubProcessImpl::RaisePriority() {
+  LOG(INFO) << "id=" << req_.id() << " Run " << req_.trace_id();
+  req_.set_priority(SubProcessReq::HIGH_PRIORITY);
+}
+
+bool SubProcessImpl::Kill() {
+  if (started_.pid() == SubProcessState::kInvalidPid) {
+    LOG(INFO) << "id=" << req_.id() << " Kill before run "
+              << req_.trace_id();
+    return false;
+  }
+
+  bool running = spawner_->IsChildRunning();
+  if (kill_subprocess_) {
+    LOG(INFO) << "id=" << req_.id() << " kill " << req_.trace_id()
+              << " pid=" << started_.pid()
+              << " child_signaled=" << spawner_->IsSignaled()
+              << " running=" << running;
+    return spawner_->Kill();
+  } else {
+    LOG(INFO) << "id=" << req_.id() << " ignore kill " << req_.trace_id()
+              << " pid=" << started_.pid() << " running=" << running;
+    return running;
+  }
+}
+
+void SubProcessImpl::Signaled(int status) {
+  LOG(INFO) << "id=" << req_.id() << " Signaled " << req_.trace_id()
+            << " pid=" << started_.pid()
+            << " status=" << status;
+  spawner_->SetSignaled();
+  terminated_.set_status(status);
+  state_ = SubProcessState::SIGNALED;
+}
+
+SubProcessTerminated* SubProcessImpl::Wait(bool need_kill) {
+  VLOG(1) << "Wait " << req_.id() << " " << req_.trace_id()
+          << " pid=" << started_.pid()
+          << " state=" << SubProcessState::State_Name(state_);
+
+  if (need_kill) {
+    spawner_->Wait(Spawner::NEED_KILL);
+  } else {
+    spawner_->Wait(Spawner::NO_HANG);
+  }
+  if (spawner_->IsChildRunning()) {
+    // still running.
+    return nullptr;
+  }
+  terminated_.set_status(spawner_->ChildStatus());
+  if (spawner_->ChildMemKb() > 0)
+    terminated_.set_mem_kb(spawner_->ChildMemKb());
+  if (spawner_->ChildTermSignal() != 0)
+    terminated_.set_term_signal(spawner_->ChildTermSignal());
+  terminated_.set_run_ms(timer_.GetInMs());
+
+  state_ = SubProcessState::FINISHED;
+  VLOG(1) << "Terminated " << req_.id() << " " << req_.trace_id()
+          << " pid=" << started_.pid();
+  terminated_.set_id(req_.id());
+  SubProcessTerminated* terminated = new SubProcessTerminated;
+  *terminated = terminated_;
+  return terminated;
+}
+
+}  // namespace devtools_goma
diff --git a/client/subprocess_impl.h b/client/subprocess_impl.h
new file mode 100644
index 0000000..721d26b
--- /dev/null
+++ b/client/subprocess_impl.h
@@ -0,0 +1,64 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SUBPROCESS_IMPL_H_
+#define DEVTOOLS_GOMA_CLIENT_SUBPROCESS_IMPL_H_
+
+#include <memory>
+
+#include "basictypes.h"
+#include "compiler_specific.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/subprocess.pb.h"
+MSVC_POP_WARNING()
+#include "spawner.h"
+#include "simple_timer.h"
+#include "scoped_fd.h"
+
+namespace devtools_goma {
+
+// A SubProcessImpl is associated with a single subprocess.
+// It is created and owned by SubProcessControllerServer.
+class SubProcessImpl {
+ public:
+  SubProcessImpl(const SubProcessReq& req, bool dont_kill_subprocess);
+  ~SubProcessImpl();
+
+  SubProcessState::State state() const { return state_; }
+  const SubProcessReq& req() const { return req_; }
+  const SubProcessStarted& started() const { return started_; }
+
+  SubProcessStarted* Spawn();
+
+  void RaisePriority();
+
+  // Kills the subprocess.
+  // Returns true if the subprocess is still running.
+  // Returns false if the subprocess has been terminated.
+  bool Kill();
+
+  void Signaled(int status);
+
+  // Waits for the subprocess termination.
+  // If |need_kill| is true, it will kill the subprocess.
+  // Returns SubProcessTerminated object if the subprocess has been terminated.
+  // Returns NULL if the subprocess is still running.
+  SubProcessTerminated* Wait(bool need_kill);
+
+ private:
+  SubProcessState::State state_;
+  SubProcessReq req_;
+  SubProcessStarted started_;
+  SubProcessTerminated terminated_;
+  std::unique_ptr<Spawner> spawner_;
+  SimpleTimer timer_;
+  bool kill_subprocess_;
+
+  DISALLOW_COPY_AND_ASSIGN(SubProcessImpl);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SUBPROCESS_IMPL_H_
diff --git a/client/subprocess_option_setter.cc b/client/subprocess_option_setter.cc
new file mode 100644
index 0000000..6f12800
--- /dev/null
+++ b/client/subprocess_option_setter.cc
@@ -0,0 +1,86 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <memory>
+
+#include "subprocess_option_setter.h"
+
+#include "compiler_specific.h"
+#include "glog/logging.h"
+#include "subprocess_controller_client.h"
+
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_stats.pb.h"
+#include "prototmp/subprocess.pb.h"
+MSVC_POP_WARNING()
+
+namespace devtools_goma {
+
+SubProcessOptionSetter::SubProcessOptionSetter(
+    int max_subprocs,
+    int max_subprocs_low_priority,
+    int max_subprocs_heavy_weight,
+    int burst_max_subprocs,
+    int burst_max_subprocs_low_priority,
+    int burst_max_subprocs_heavy_weight)
+    : max_subprocs_(max_subprocs),
+      max_subprocs_low_priority_(max_subprocs_low_priority),
+      max_subprocs_heavy_weight_(max_subprocs_heavy_weight),
+      burst_max_subprocs_(burst_max_subprocs),
+      burst_max_subprocs_low_priority_(burst_max_subprocs_low_priority),
+      burst_max_subprocs_heavy_weight_(burst_max_subprocs_heavy_weight) {
+  LOG_IF(ERROR, max_subprocs < max_subprocs_low_priority)
+      << "should be max_subproc >= max_subprocs_low_priority.";
+  LOG_IF(ERROR, max_subprocs < max_subprocs_heavy_weight)
+      << "should be max_subproc >= max_subprocs_heavy_weight.";
+  LOG_IF(ERROR, burst_max_subprocs < burst_max_subprocs_low_priority)
+      << "should be burst_max_subproc >= burst_max_subprocs_low_priority.";
+  LOG_IF(ERROR, burst_max_subprocs < burst_max_subprocs_heavy_weight)
+      << "should be burst_max_subproc >= burst_max_subprocs_heavy_weight.";
+}
+
+void SubProcessOptionSetter::TurnOnBurstMode(BurstModeReason reason) {
+  if (!SubProcessControllerClient::IsRunning())
+    return;
+
+  switch (reason) {
+  case BurstModeReason::NETWORK_ERROR:
+    stats_count_burst_by_network_error_.Add(1);
+    break;
+  case BurstModeReason::COMPILER_DISABLED:
+    stats_count_burst_by_compiler_disabled_.Add(1);
+    break;
+  default:
+    LOG(ERROR) << "unknown burst mode reason: "
+               << static_cast<int>(reason);
+    break;
+  }
+
+  std::unique_ptr<SubProcessSetOption> option(new SubProcessSetOption);
+  option->set_max_subprocs(burst_max_subprocs_);
+  option->set_max_subprocs_low_priority(burst_max_subprocs_low_priority_);
+  option->set_max_subprocs_heavy_weight(burst_max_subprocs_heavy_weight_);
+  SubProcessControllerClient::Get()->SetOption(std::move(option));
+}
+
+void SubProcessOptionSetter::TurnOffBurstMode() {
+  if (!SubProcessControllerClient::IsRunning())
+    return;
+
+  std::unique_ptr<SubProcessSetOption> option(new SubProcessSetOption);
+  option->set_max_subprocs(max_subprocs_);
+  option->set_max_subprocs_low_priority(max_subprocs_low_priority_);
+  option->set_max_subprocs_heavy_weight(max_subprocs_heavy_weight_);
+  SubProcessControllerClient::Get()->SetOption(std::move(option));
+}
+
+void SubProcessOptionSetter::DumpStatsToProto(SubProcessStats* stats) {
+  stats->set_count_burst_by_network_error(
+      stats_count_burst_by_network_error_.value());
+  stats->set_count_burst_by_compiler_disabled(
+      stats_count_burst_by_compiler_disabled_.value());
+}
+
+}  // namespace devtools_goma
diff --git a/client/subprocess_option_setter.h b/client/subprocess_option_setter.h
new file mode 100644
index 0000000..f70ea3b
--- /dev/null
+++ b/client/subprocess_option_setter.h
@@ -0,0 +1,76 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SUBPROCESS_OPTION_SETTER_H_
+#define DEVTOOLS_GOMA_CLIENT_SUBPROCESS_OPTION_SETTER_H_
+
+#include "atomic_stats_counter.h"
+#include "http.h"
+
+namespace devtools_goma {
+
+class SubProcessStats;
+
+enum class BurstModeReason {
+  NETWORK_ERROR,
+  COMPILER_DISABLED,
+};
+
+class SubProcessOptionSetter {
+ public:
+  SubProcessOptionSetter() = delete;
+  SubProcessOptionSetter(SubProcessOptionSetter&) = delete;
+  SubProcessOptionSetter& operator=(const SubProcessOptionSetter&) = delete;
+
+  SubProcessOptionSetter(int max_subprocs,
+                         int max_subprocs_low_priority,
+                         int max_subprocs_heavy_weight,
+                         int burst_max_subprocs,
+                         int burst_max_subprocs_low_priority,
+                         int burst_max_subprocs_heavy_weight);
+  ~SubProcessOptionSetter() {}
+
+  void TurnOnBurstMode(BurstModeReason reason);
+  void TurnOffBurstMode();
+
+  void DumpStatsToProto(SubProcessStats* stats);
+
+ private:
+  const int max_subprocs_;
+  const int max_subprocs_low_priority_;
+  const int max_subprocs_heavy_weight_;
+  const int burst_max_subprocs_;
+  const int burst_max_subprocs_low_priority_;
+  const int burst_max_subprocs_heavy_weight_;
+
+  StatsCounter stats_count_burst_by_network_error_;
+  StatsCounter stats_count_burst_by_compiler_disabled_;
+};
+
+class NetworkErrorMonitor : public HttpClient::NetworkErrorMonitor {
+ public:
+  NetworkErrorMonitor() = delete;
+  NetworkErrorMonitor(NetworkErrorMonitor&) = delete;
+  NetworkErrorMonitor& operator=(const NetworkErrorMonitor&) = delete;
+
+  explicit NetworkErrorMonitor(SubProcessOptionSetter* option_setter) :
+      option_setter_(option_setter) {}
+  ~NetworkErrorMonitor() override {}
+
+  void OnNetworkErrorDetected() override {
+    option_setter_->TurnOnBurstMode(BurstModeReason::NETWORK_ERROR);
+  }
+
+  void OnNetworkRecovered() override {
+    option_setter_->TurnOffBurstMode();
+  }
+
+ private:
+  SubProcessOptionSetter* option_setter_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SUBPROCESS_OPTION_SETTER_H_
diff --git a/client/subprocess_task.cc b/client/subprocess_task.cc
new file mode 100644
index 0000000..66f1269
--- /dev/null
+++ b/client/subprocess_task.cc
@@ -0,0 +1,278 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "subprocess_task.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_specific.h"
+#include "file.h"
+#include "file_helper.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/subprocess.pb.h"
+MSVC_POP_WARNING()
+#include "scoped_tmp_file.h"
+#include "subprocess_controller_client.h"
+
+namespace devtools_goma {
+
+/* static */
+string SubProcessTask::ReadCommandOutput(
+    const string& prog,
+    const std::vector<string>& argv, const std::vector<string>& envs,
+    const string& cwd, CommandOutputOption option, int32_t* status) {
+  CHECK(!SubProcessControllerClient::Get()->BelongsToCurrentThread());
+  std::vector<const char*> args;
+  for (const auto& arg : argv)
+    args.push_back(arg.c_str());
+  args.push_back(nullptr);
+
+  SubProcessTask s(prog, prog.c_str(), const_cast<char**>(&args[0]));
+  SubProcessReq* req = s.mutable_req();
+  for (const auto& env : envs)
+    req->add_env(env);
+  if (cwd.empty()) {
+    req->set_cwd(SubProcessControllerClient::Get()->tmp_dir());
+  } else {
+    req->set_cwd(cwd);
+  }
+  ScopedTmpFile tmpfile("goma_compiler_proxy.subproc");
+  if (!tmpfile.valid()) {
+    PLOG(ERROR) << "Failed to create tempfile to store stdout.";
+    *status = SubProcessTerminated::kInternalError;
+    return "";
+  }
+  tmpfile.Close();
+  const string& tempfilename_stdout = tmpfile.filename();
+  req->set_stdout_filename(tempfilename_stdout);
+  if (option == STDOUT_ONLY)
+    req->set_output_option(SubProcessReq::STDOUT_ONLY);
+
+  req->set_priority(SubProcessReq::HIGHEST_PRIORITY);
+  req->set_weight(SubProcessReq::LIGHT_WEIGHT);
+
+  s.StartInternal(nullptr);  // blocking.
+  string output;
+  if (!ReadFileToString(tempfilename_stdout, &output)) {
+    LOG(ERROR) << "Failed to read tempfile for storing stdout."
+               << " tempfilename_stdout=" << tempfilename_stdout;
+    *status = SubProcessTerminated::kInternalError;
+    return "";
+  }
+  VLOG(3) << "output=" << output;
+  int32_t exit_status = s.terminated().status();
+  if (status) {
+    *status = exit_status;
+  } else {
+    LOG_IF(FATAL, exit_status != 0)
+        << "If the caller expects the non-zero exit status, "
+        << "the caller must set non-nullptr status in the argument."
+        << " prog=" << prog
+        << " cwd=" << cwd
+        << " exit_status=" << exit_status
+        << " argv=" << argv;
+  }
+  return output;
+}
+
+SubProcessTask::SubProcessTask(
+    const string& trace_id, const char* prog, char* const argv[])
+    : thread_id_(0),
+      callback_(nullptr),
+      cond_(&mu_),
+      state_(SubProcessState::SETUP) {
+  DCHECK(SubProcessControllerClient::IsRunning());
+  DCHECK(SubProcessControllerClient::Get()->Initialized());
+  thread_id_ = SubProcessControllerClient::Get()->wm()->GetCurrentThreadId();
+  VLOG(1) << trace_id << " new SubProcessTask";
+  req_.set_id(-1);
+  req_.set_trace_id(trace_id);
+  req_.set_prog(prog);
+  for (char* const* arg = argv; *arg != nullptr; ++arg) {
+    req_.add_argv(*arg);
+  }
+  req_.set_priority(SubProcessReq::LOW_PRIORITY);
+  req_.set_weight(SubProcessReq::LIGHT_WEIGHT);
+}
+
+SubProcessTask::~SubProcessTask() {
+  VLOG(1) << req_.trace_id() << " delete";
+  DCHECK(callback_ == nullptr);
+  if (!req_.detach())
+    DCHECK_EQ(SubProcessState::FINISHED, state_);
+  if (SubProcessControllerClient::IsRunning())
+    DCHECK(BelongsToCurrentThread());
+}
+
+bool SubProcessTask::BelongsToCurrentThread() const {
+  return THREAD_ID_IS_SELF(thread_id_);
+}
+
+void SubProcessTask::Start(OneshotClosure* callback) {
+  VLOG(1) << req_.trace_id() << " start";
+  DCHECK(BelongsToCurrentThread());
+  DCHECK_EQ(SubProcessState::SETUP, state_);
+  DCHECK(!callback_);
+  if (req_.detach())
+    CHECK(callback == nullptr);
+  else
+    CHECK(callback != nullptr);
+  StartInternal(callback);
+}
+
+void SubProcessTask::StartInternal(OneshotClosure* callback) {
+  DCHECK(BelongsToCurrentThread());
+  DCHECK_EQ(SubProcessState::SETUP, state_);
+  DCHECK(!callback_);
+  callback_ = callback;
+
+  {
+    AUTOLOCK(lock, &mu_);
+    state_ = SubProcessState::PENDING;
+  }
+  SubProcessControllerClient::Get()->RegisterTask(this);
+  if (req_.detach()) {
+    CHECK(callback == nullptr);
+    delete this;
+    return;
+  }
+  if (callback == nullptr) {
+    // blocking mode.
+    AUTOLOCK(lock, &mu_);
+    while (state_ != SubProcessState::FINISHED) {
+      cond_.Wait();
+    }
+  }
+}
+
+void SubProcessTask::RequestRun() {
+  VLOG(1) << req_.trace_id() << " request run ";
+  DCHECK(BelongsToCurrentThread());
+  std::unique_ptr<SubProcessRun> run;
+  {
+    AUTOLOCK(lock, &mu_);
+    if (state_ == SubProcessState::SETUP) {
+      LOG(FATAL) << req_.trace_id()
+                 << " run in SETUP:" << req_.DebugString();
+    }
+    if (state_ != SubProcessState::PENDING) {
+      VLOG(1) << req_.trace_id()
+              << " run in not PENDING:" << req_.DebugString();
+      return;
+    }
+    req_.set_priority(SubProcessReq::HIGH_PRIORITY);
+    run.reset(new SubProcessRun);
+    run->set_id(req_.id());
+  }
+  SubProcessControllerClient::Get()->RequestRun(std::move(run));
+}
+
+bool SubProcessTask::Kill() {
+  VLOG(1) << req_.trace_id() << " kill";
+  DCHECK(BelongsToCurrentThread());
+
+  std::unique_ptr<SubProcessKill> kill;
+  bool r = false;
+  {
+    AUTOLOCK(lock, &mu_);
+    switch (state_) {
+      case SubProcessState::SETUP:
+        LOG(INFO) << req_.trace_id()
+                  << " killed in SETUP:" << req_.DebugString();
+        break;
+      case SubProcessState::PENDING:
+        state_ = SubProcessState::SIGNALED;
+        kill.reset(new SubProcessKill);
+        kill->set_id(req_.id());
+        r = false;
+        break;
+      case SubProcessState::RUN:
+        state_ = SubProcessState::SIGNALED;
+        kill.reset(new SubProcessKill);
+        kill->set_id(req_.id());
+        r = true;
+        break;
+      case SubProcessState::SIGNALED:
+        r = false;
+        break;
+      case SubProcessState::FINISHED:
+        r = false;
+        break;
+      default:
+        break;
+    }
+  }
+  if (kill)
+    SubProcessControllerClient::Get()->Kill(std::move(kill));
+  return r;
+}
+
+/* static */
+int SubProcessTask::NumPending() {
+  return SubProcessControllerClient::Get()->NumPending();
+}
+
+void SubProcessTask::Started(std::unique_ptr<SubProcessStarted> started) {
+  VLOG(1) << req_.trace_id() << " started " << started->pid();
+  DCHECK(!BelongsToCurrentThread());
+  {
+    AUTOLOCK(lock, &mu_);
+    if (state_ != SubProcessState::PENDING) {
+      CHECK_EQ(SubProcessState::SIGNALED, state_)
+          << req_.trace_id()
+          << " state=" << SubProcessState::State_Name(state_)
+          << started->DebugString();
+    } else {
+      state_ = SubProcessState::RUN;
+    }
+    started_ = *started;
+  }
+  LOG(INFO) << req_.trace_id() << " started pid=" << started_.pid()
+            << " state=" << SubProcessState::State_Name(state_);
+}
+
+void SubProcessTask::Terminated(
+    std::unique_ptr<SubProcessTerminated> terminated) {
+  VLOG(1) << req_.trace_id() << " terminated " << terminated->status();
+  DCHECK(!BelongsToCurrentThread());
+  {
+    AUTOLOCK(lock, &mu_);
+    if (started_.pid() != SubProcessState::kInvalidPid) {
+      LOG(INFO) << req_.trace_id() << " terminated pid=" << started_.pid()
+                << " status=" << terminated->status();
+    } else {
+      VLOG(1) << req_.trace_id() << " subproc terminated";
+    }
+    terminated_ = *terminated;
+    state_ = SubProcessState::FINISHED;
+    cond_.Signal();
+  }
+}
+
+void SubProcessTask::Done() {
+  VLOG(1) << req_.trace_id() << " done";
+  // SubProcessControllerClient might have been finished before calling
+  // this method.
+  if (SubProcessControllerClient::IsRunning())
+    DCHECK(BelongsToCurrentThread());
+  if (callback_) {
+    OneshotClosure* callback = callback_;
+    callback_ = nullptr;
+    callback->Run();
+  }
+  delete this;
+}
+
+}  // namespace devtools_goma
diff --git a/client/subprocess_task.h b/client/subprocess_task.h
new file mode 100644
index 0000000..b79e66f
--- /dev/null
+++ b/client/subprocess_task.h
@@ -0,0 +1,141 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_SUBPROCESS_TASK_H_
+#define DEVTOOLS_GOMA_CLIENT_SUBPROCESS_TASK_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "compiler_specific.h"
+#include "lockhelper.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/subprocess.pb.h"
+MSVC_POP_WARNING()
+#include "util.h"
+#include "worker_thread_manager.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class Closure;
+class SubProcessControllerClient;
+
+// A SubProcessTask is managed by SubProcessControllerClient and is
+// a peer of SubProcessImpl that is managed by SubProcessControllerServer.
+// Typical usage is:
+//     SubProcessTask* task = new SubProcessTask(trace_id, prog, argv);
+//     SubProcessReq* req = task->mutable_req();
+//     // setup request in req.
+//     task->Start(callback);
+//     // SubProcessControllerClient takes ownership of task.
+//     // you can access task until callback is called.
+//     // task->status(), task->started().pid(), ...
+//  Once callback is called, the subprocess is terminated, and
+//  the SubProcessTask will be deleted after returning the callback.
+class SubProcessTask {
+ public:
+  // Provides ReadCommandOutput interface.
+  // It uses SubProcessTask blocking mode internally.
+  // |status| basically shows exit status of the program.
+  // Since program exit status is usually positive value on Posix and Windows,
+  // ReadCommandOutput set SubProcessTerminated::kInternalError to |status|
+  // for its internal error.
+  static string ReadCommandOutput(
+      const string& prog,
+      const std::vector<string>& argv, const std::vector<string>& env,
+      const string& cwd, CommandOutputOption option, int32_t* status);
+
+  // Creates new sub process task.
+  // The created instance will be used on the thread where it was created.
+  SubProcessTask(const string& trace_id,
+                 const char* prog, char* const argv[]);
+
+  WorkerThreadManager::ThreadId thread_id() { return thread_id_; }
+
+  SubProcessState::State state() const {
+    AUTOLOCK(lock, &mu_);
+    return state_;
+  }
+
+  // Client can set subprocess configuration via mutable_req().
+  // Must be called before Start() call.
+  SubProcessReq* mutable_req() { return &req_; }
+
+  const SubProcessReq& req() const { return req_; }
+  const SubProcessStarted& started() const { return started_; }
+  const SubProcessTerminated& terminated() const { return terminated_; }
+
+  // Starts subprocess.
+  // It returns immediately and callback will be called when the process is
+  // finished, and delete it by itself.
+  // If req().detach() is true, callback must be NULL.  The process will run
+  // in detached mode.  SubProcessTask will be deleted before returning from
+  // Start().
+  // state(): SETUP -> PENDING.
+  void Start(OneshotClosure* callback);
+
+  // Requests to run the subprocess in high priority.
+  void RequestRun();
+
+  // Kills the subprocess. callback will be called when the process is killed.
+  // state(): PENDING, RUN -> SIGNALED: returns true
+  // state(): SIGNALED, TERMINATED -> returns false.
+  bool Kill();
+
+  static int NumPending();
+
+ private:
+  friend class SubProcessControllerClient;
+  ~SubProcessTask();
+  bool BelongsToCurrentThread() const;
+  bool async_callback() const { return callback_ != NULL; }
+
+  // Starts subprocess.
+  // If callback is not NULL, it returns immediately and callback will be
+  // called when the process is finished, and delete it by itself.
+  // state(): SETUP -> PENDING.
+  //
+  // If callback is NULL, it waits for subprocess termination.
+  // state(): SETUP -> .. -> FINISHED.
+  // Caller should delete it.
+  void StartInternal(OneshotClosure* callback);
+
+  // Feedback from subprocess controller.
+
+  // The subprocess is started with pid.
+  // Runs in subprocess controller's context.
+  // Takes ownership of started.
+  void Started(std::unique_ptr<SubProcessStarted> started);
+
+  // The subprocess is terminated.
+  // Runs in subprocess controller's context.
+  // Takes ownership of terminated.
+  void Terminated(std::unique_ptr<SubProcessTerminated> terminated);
+
+  // Calls callback_ and delete itself.
+  void Done();
+
+  SubProcessReq req_;
+  SubProcessStarted started_;
+  SubProcessTerminated terminated_;
+
+  WorkerThreadManager::ThreadId thread_id_;
+
+  OneshotClosure* callback_;
+
+  Lock mu_;  // protect state_.
+  ConditionVariable cond_;
+  SubProcessState::State state_;
+
+  DISALLOW_COPY_AND_ASSIGN(SubProcessTask);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_SUBPROCESS_TASK_H_
diff --git a/client/subprocess_task_unittest.cc b/client/subprocess_task_unittest.cc
new file mode 100644
index 0000000..f0197c0
--- /dev/null
+++ b/client/subprocess_task_unittest.cc
@@ -0,0 +1,279 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "subprocess_task.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "callback.h"
+#include "lockhelper.h"
+#include "mypath.h"
+#include "platform_thread.h"
+#include "subprocess_controller.h"
+#include "subprocess_controller_client.h"
+#include "util.h"
+#include "worker_thread_manager.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using std::string;
+
+namespace {
+
+const bool kDontKillSubProcess =
+#ifdef __MACH__
+    true;
+#else
+    false;
+#endif
+}
+
+namespace devtools_goma {
+
+class SubProcessTaskTest : public ::testing::Test {
+ public:
+  SubProcessTaskTest() : cond_(&mu_) {}
+
+ protected:
+  class SubProcessContext {
+   public:
+    SubProcessContext(const string& trace_id,
+                      const char* prog,
+                      const char* const *argv)
+        : trace_id_(trace_id),
+          prog_(prog),
+          argv_(argv),
+          s_(nullptr),
+          status_(-256),
+          done_(false) {
+    }
+    ~SubProcessContext() {
+    }
+
+    const string trace_id_;
+    const char* prog_;
+    const char* const * argv_;
+    SubProcessTask* s_;
+    int status_;
+    bool done_;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(SubProcessContext);
+  };
+
+  void SetUp() override {
+    CheckTempDirectory(GetGomaTmpDir());
+    SubProcessController::Options options;
+    options.dont_kill_subprocess = kDontKillSubProcess;
+    SubProcessController::Initialize(
+        "subprocess_task_unittest", options);
+    wm_.reset(new WorkerThreadManager);
+    wm_->Start(2);
+    SubProcessControllerClient::Initialize(wm_.get(), GetGomaTmpDir());
+    int max_wait = 100;
+    while (!SubProcessControllerClient::IsRunning() ||
+           !SubProcessControllerClient::Get()->Initialized()) {
+      PlatformThread::Sleep(1000);
+      if (--max_wait <= 0) {
+        LOG(FATAL) << "SubProcessControllerClient not running.";
+      }
+    }
+  }
+
+  void TearDown() override {
+    SubProcessControllerClient::Get()->Quit();
+    SubProcessControllerClient::Get()->Shutdown();
+    wm_->Finish();
+    wm_.reset();
+  }
+
+  void WaitDone(bool* done) {
+    AutoLock lock(&mu_);
+    while (!*done) {
+      cond_.Wait();
+    }
+  }
+
+  void SignalDone(bool* done) {
+    EXPECT_FALSE(*done);
+    AutoLock lock(&mu_);
+    *done = true;
+    cond_.Signal();
+  }
+
+  void RunTestReadCommandOutput() {
+    bool done = false;
+    wm_->RunClosure(
+        FROM_HERE,
+        NewCallback(
+            this, &SubProcessTaskTest::TestReadCommandOutput, &done),
+        WorkerThreadManager::PRIORITY_LOW);
+    WaitDone(&done);
+  }
+
+  void TestReadCommandOutput(bool* done) {
+    EXPECT_FALSE(*done);
+    std::vector<string> argv;
+#ifdef _WIN32
+    argv.push_back("cmd");
+    argv.push_back("/c");
+#endif
+    argv.push_back("echo");
+    argv.push_back("hello");
+    std::vector<string> env;
+#ifndef _WIN32
+    EXPECT_EQ("hello\n",
+              SubProcessTask::ReadCommandOutput("/bin/echo", argv, env, "",
+                                                MERGE_STDOUT_STDERR, nullptr));
+#else
+    // TODO: remove env after I revise redirector_win.cc.
+    env.push_back("PATHEXT=" + GetEnv("PATHEXT"));
+    env.push_back("PATH=" + GetEnv("PATH"));
+    EXPECT_EQ("hello\r\n",
+              SubProcessTask::ReadCommandOutput("cmd", argv, env, "",
+                                                MERGE_STDOUT_STDERR, nullptr));
+#endif
+    SignalDone(done);
+  }
+
+  void RunTestSubProcessTrue() {
+#ifndef _WIN32
+    const char* const argv[] = {"true", nullptr};
+#else
+    const char* const argv[] = {"cmd", "/c", "exit", "0", nullptr};
+#endif
+    static const char* kTruePath =
+#ifdef __MACH__
+      "/usr/bin/true";
+#elif !_WIN32
+      "/bin/true";
+#else
+      "cmd";
+#endif
+    SubProcessContext c("true", kTruePath, argv);
+    EXPECT_NE(0, c.status_);
+    wm_->RunClosure(
+        FROM_HERE,
+        NewCallback(
+            this, &SubProcessTaskTest::TestSubProcess, &c),
+        WorkerThreadManager::PRIORITY_LOW);
+    WaitDone(&c.done_);
+    EXPECT_EQ(0, c.status_);
+  }
+
+  void RunTestSubProcessFalse() {
+#ifndef _WIN32
+    const char* const argv[] = {"false", nullptr};
+#else
+    const char* const argv[] = {"cmd", "/c", "exit", "1", nullptr};
+#endif
+    static const char* kFalsePath =
+#ifdef __MACH__
+      "/usr/bin/false";
+#elif !_WIN32
+      "/bin/false";
+#else
+      "cmd";
+#endif
+    SubProcessContext c("false", kFalsePath, argv);
+    EXPECT_NE(0, c.status_);
+    wm_->RunClosure(
+        FROM_HERE,
+        NewCallback(
+            this, &SubProcessTaskTest::TestSubProcess, &c),
+        WorkerThreadManager::PRIORITY_LOW);
+    WaitDone(&c.done_);
+    EXPECT_EQ(1, c.status_);
+  }
+
+  void RunTestSubProcessKill() {
+#ifndef _WIN32
+    const char* const argv[] = {"sleep", "100", nullptr};
+    SubProcessContext c("sleep", "/bin/sleep", argv);
+#else
+    const char* const argv[] = {"cmd", "/c", "timeout", "/t", "1", "/nobreak",
+                                ">NUL", nullptr};
+    SubProcessContext c("sleep", "cmd", argv);
+#endif
+    EXPECT_NE(0, c.status_);
+    wm_->RunClosure(
+        FROM_HERE,
+        NewCallback(
+            this, &SubProcessTaskTest::TestSubProcess, &c),
+        WorkerThreadManager::PRIORITY_LOW);
+    PlatformThread::Sleep(10000);
+    EXPECT_NE(SubProcessState::RUN, c.s_->state());
+    wm_->RunClosure(
+        FROM_HERE,
+        NewCallback(
+            this, &SubProcessTaskTest::TestSubProcessKill, &c),
+        WorkerThreadManager::PRIORITY_IMMEDIATE);
+    WaitDone(&c.done_);
+    EXPECT_EQ(1, c.status_);
+  }
+
+  void TestSubProcess(SubProcessContext* c) {
+    EXPECT_TRUE(c->s_ == nullptr);
+    EXPECT_FALSE(c->done_);
+    c->s_ = new SubProcessTask(c->trace_id_, c->prog_,
+                               const_cast<char* const *>(c->argv_));
+    c->s_->mutable_req()->set_cwd(
+        SubProcessControllerClient::Get()->tmp_dir());
+    EXPECT_EQ(SubProcessState::SETUP, c->s_->state());
+#ifdef _WIN32
+    // TODO: remove env after I revise redirector_win.cc.
+    c->s_->mutable_req()->add_env("PATH=" + GetEnv("PATH"));
+    c->s_->mutable_req()->add_env("PATHEXT=" + GetEnv("PATHEXT"));
+#endif
+    c->s_->Start(
+        NewCallback(
+            this, &SubProcessTaskTest::TestSubProcessDone, c));
+    EXPECT_NE(SubProcessState::SETUP, c->s_->state());
+  }
+
+  void TestSubProcessDone(SubProcessContext* c) {
+    EXPECT_TRUE(c->s_ != nullptr);
+    EXPECT_FALSE(c->done_);
+    EXPECT_EQ(SubProcessState::FINISHED, c->s_->state());
+    EXPECT_EQ(c->s_->req().id(), c->s_->started().id());
+    EXPECT_NE(-1, c->s_->started().pid());
+    EXPECT_EQ(c->s_->req().id(), c->s_->terminated().id());
+    c->status_ = c->s_->terminated().status();
+    c->s_ = nullptr;
+    SignalDone(&c->done_);
+  }
+
+  void TestSubProcessKill(SubProcessContext* c) {
+    EXPECT_TRUE(c->s_ != nullptr);
+    EXPECT_FALSE(c->done_);
+    EXPECT_NE(-1, c->s_->started().pid());
+    EXPECT_EQ(SubProcessState::RUN, c->s_->state());
+    EXPECT_TRUE(c->s_->Kill());
+    EXPECT_EQ(SubProcessState::SIGNALED, c->s_->state());
+    EXPECT_FALSE(c->s_->Kill());
+  }
+
+  std::unique_ptr<WorkerThreadManager> wm_;
+  Lock mu_;
+  ConditionVariable cond_;
+};
+
+TEST_F(SubProcessTaskTest, ReadCommandOutput) {
+  RunTestReadCommandOutput();
+}
+
+TEST_F(SubProcessTaskTest, RunTrue) {
+  RunTestSubProcessTrue();
+}
+
+TEST_F(SubProcessTaskTest, RunFalse) {
+  RunTestSubProcessFalse();
+}
+
+}  // namespace devtools_goma
diff --git a/client/symlink.py b/client/symlink.py
new file mode 100755
index 0000000..298c4a9
--- /dev/null
+++ b/client/symlink.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+#
+# Copyright 2015 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generate symlinks from one to the others.
+
+Usage:
+  % symlink.py --target gomacc "gcc,g++,javac"
+"""
+
+import argparse
+import errno
+import os
+
+
+def main():
+  parser = argparse.ArgumentParser(description='create symlink')
+  parser.add_argument('--force', action='store_true',
+                      help='remove symlink if exist')
+  parser.add_argument('--target', help='symlink target name', required=True)
+  parser.add_argument('links', metavar='files', nargs='+', help='link names')
+  args = parser.parse_args()
+
+  for f in args.links:
+    if args.force:
+      # this is "rm -f", it is ok to fail.
+      try:
+        os.remove(f)
+      except OSError, err:
+        if err.errno != errno.ENOENT:
+          raise
+    os.symlink(args.target, f)
+
+if __name__ == '__main__':
+  main()
diff --git a/client/threadpool_http_server.cc b/client/threadpool_http_server.cc
new file mode 100644
index 0000000..088cc53
--- /dev/null
+++ b/client/threadpool_http_server.cc
@@ -0,0 +1,1135 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// A threadpool HTTP server implementation.
+
+#include "threadpool_http_server.h"
+
+#ifndef _WIN32
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <pthread.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#else
+#include "socket_helper_win.h"
+#endif
+
+#include <algorithm>
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+#include <iterator>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_proxy_info.h"
+#include "compiler_specific.h"
+#include "socket_descriptor.h"
+#include "file.h"
+#include "fileflag.h"
+#include "glog/logging.h"
+#include "goma_ipc_addr.h"
+#include "goma_ipc_peer.h"
+#include "ioutil.h"
+#ifdef _WIN32
+#include "named_pipe_server_win.h"
+#endif
+#include "platform_thread.h"
+#include "scoped_fd.h"
+#include "simple_timer.h"
+#include "split.h"
+#include "strutil.h"
+#include "trustedipsmanager.h"
+#include "worker_thread_manager.h"
+
+#define BACKLOG 128
+
+namespace devtools_goma {
+
+// TODO: make it flag?
+const int kDefaultTimeoutSec = 900;
+
+ThreadpoolHttpServer::ThreadpoolHttpServer(const string& listen_addr,
+                                           int port,
+                                           int num_find_ports,
+                                           WorkerThreadManager* wm,
+                                           int num_threads,
+                                           HttpHandler *http_handler,
+                                           int max_num_sockets)
+    : listen_addr_(listen_addr),
+      port_(port), port_ready_(false), num_find_ports_(num_find_ports),
+      wm_(wm), pool_(WorkerThreadManager::kFreePool),
+      num_http_threads_(num_threads),
+      http_handler_(http_handler), monitor_(nullptr),
+      trustedipsmanager_(nullptr),
+      max_num_sockets_(max_num_sockets), cond_(&mu_),
+      idle_counting_(true),
+      last_closure_id_(kInvalidClosureId) {
+  for (int i = 0; i < NUM_SOCKET_TYPES; ++i) {
+    max_sockets_[i] = max_num_sockets_;
+    num_sockets_[i] = 0;
+    idle_counter_[i] = 0;
+  }
+  if (num_threads > 0) {
+    pool_ = wm->StartPool(num_threads, "threadpool_http_server");
+    DCHECK_NE(WorkerThreadManager::kFreePool, pool_);
+  }
+}
+
+ThreadpoolHttpServer::~ThreadpoolHttpServer() {
+}
+
+void ThreadpoolHttpServer::SetMonitor(Monitor* monitor) {
+  monitor_ = monitor;
+}
+
+void ThreadpoolHttpServer::SetTrustedIpsManager(
+    TrustedIpsManager* trustedipsmanager) {
+  trustedipsmanager_ = trustedipsmanager;
+}
+
+#ifdef _WIN32
+class ThreadpoolHttpServer::PipeHandler : public NamedPipeServer::Handler {
+ public:
+  explicit PipeHandler(ThreadpoolHttpServer* server) : server_(server) {}
+  ~PipeHandler() override {}
+
+  PipeHandler(const PipeHandler&) = delete;
+  PipeHandler& operator=(const PipeHandler&) = delete;
+
+  void HandleIncoming(NamedPipeServer::Request* req) override {
+    server_->SendNamedPipeJobToWorkerThread(req);
+  }
+
+ private:
+  ThreadpoolHttpServer* server_;
+};
+#endif
+
+void ThreadpoolHttpServer::StartIPC(
+    const string& addr, int num_threads,
+    int max_overcommit_incoming_sockets) {
+#ifdef _WIN32
+  pipe_handler_.reset(new PipeHandler(this));
+  pipe_server_.reset(new NamedPipeServer(wm_, pipe_handler_.get()));
+  pipe_server_->Start(addr);
+
+  // Each thread has a select() for at most FD_SETSIZE of sockets.
+  // 1 for event pipe fd.
+  // Note that NamedPipeServer doesn't use select(). It only waits for
+  // connection for a named pipe, creates new named pipe once
+  // the connection is established, and read/write pipes with asynchronous
+  // overlapped I/O.
+  int max_incoming = std::min(
+      max_num_sockets_,
+      num_threads * (FD_SETSIZE + max_overcommit_incoming_sockets - 1));
+  max_incoming = std::min(
+      max_incoming,
+      num_http_threads_ * (FD_SETSIZE + max_overcommit_incoming_sockets - 1));
+#else
+  // compiler_proxy would consume almost 3 fds per request, so it would be
+  // safe to limit active accepting sockets by max_num_sockets / 3.
+  // Each worker thread has pipe (2 fds) and we use 2 sockets to accept
+  // requests, so we count them too.
+  int max_incoming = max_num_sockets_ / 3 - num_threads * 2 - 2;
+  const int kNumRetry = 10;
+  bool socket_ok = false;
+  for (int i = 0; i < kNumRetry; ++i) {
+    if (OpenUnixDomainSocket(addr)) {
+      socket_ok = true;
+      break;
+    }
+    devtools_goma::PlatformThread::Sleep(1);
+  }
+  CHECK(socket_ok) << "Failed to open " << addr;
+  LOG(INFO) << "unix domain:" << addr;
+
+#endif
+  LOG(INFO) << "max incoming: " << max_incoming
+            << " FD_SETSIZE=" << FD_SETSIZE
+            << " max_num_sockets=" << max_num_sockets_
+#ifdef USE_EPOLL
+            << " USE_EPOLL=1"
+#elif USE_KQUEUE
+            << " USE_KQUEUE=1"
+#endif
+            << " threads=" << num_threads
+            << "+" << num_http_threads_;
+  CHECK_GT(max_incoming, 0);
+  SetAcceptLimit(max_incoming, ThreadpoolHttpServer::SOCKET_IPC);
+}
+
+void ThreadpoolHttpServer::StopIPC() {
+#ifdef _WIN32
+  pipe_server_->Stop();
+#else
+  CloseUnixDomainSocket();
+#endif
+}
+
+#ifndef _WIN32
+bool ThreadpoolHttpServer::OpenUnixDomainSocket(const string& path) {
+  GomaIPCAddr addr;
+  socklen_t addr_len = InitializeGomaIPCAddress(path, &addr);
+  remove(path.c_str());
+  // TODO: use named pipe.
+  un_socket_.reset(socket(AF_GOMA_IPC, SOCK_STREAM, 0));
+  if (!un_socket_.valid())
+    return false;
+  CHECK_EQ(0, SetFileDescriptorFlag(un_socket_.get(), FD_CLOEXEC));
+  if (!un_socket_.SetNonBlocking()) {
+    PLOG(ERROR) << "set non blocking";
+    un_socket_.reset(-1);
+    return false;
+  }
+  if (!un_socket_.SetReuseAddr()) {
+    PLOG(ERROR) << "setsockopt SO_REUSEADDR";
+    un_socket_.reset(-1);
+    return false;
+  }
+  if (bind(un_socket_.get(), (struct sockaddr*)&addr, addr_len) < 0) {
+    PLOG(ERROR) << "bind";
+    un_socket_.reset(-1);
+    return false;
+  }
+  // drop permission to others.
+  if (chmod(path.c_str(), S_IRUSR|S_IWUSR) != 0) {
+    PLOG(ERROR) << "chmod";
+    un_socket_.reset(-1);
+    return false;
+  }
+  un_socket_name_ = path;
+  listen(un_socket_.get(), BACKLOG);
+  return true;
+}
+
+void ThreadpoolHttpServer::CloseUnixDomainSocket() {
+  if (un_socket_.valid()) {
+    un_socket_.Close();
+    if (!un_socket_name_.empty()) {
+      remove(un_socket_name_.c_str());
+    }
+  }
+}
+#endif
+
+void ThreadpoolHttpServer::SetAcceptLimit(int n, SocketType socket_type) {
+  CHECK_GE(socket_type, 0);
+  CHECK_LT(socket_type, NUM_SOCKET_TYPES);
+  CHECK_GE(n, 0);
+  CHECK_LE(n, max_num_sockets_);
+  max_sockets_[socket_type] = n;
+}
+
+/* static */
+bool ThreadpoolHttpServer::ParseRequestLine(
+    StringPiece request, string* method, string* req_path, string* query) {
+  // Find the first request string which would look like
+  // 'GET / HTTP/1.1\r\n'
+  StringPiece::size_type pos = request.find("\r\n");
+  if (pos == StringPiece::npos) {
+    return false;
+  }
+  const string firstline = string(request.substr(0, pos));
+  std::vector<string> method_path_protocol;
+  SplitStringUsing(firstline, " ", &method_path_protocol);
+  if (method_path_protocol.size() != 3) {
+    return false;
+  }
+  *method = method_path_protocol[0];
+  const string &request_uri(method_path_protocol[1]);
+  size_t question_mark;
+  if ((question_mark = request_uri.find("?")) != string::npos) {
+    *req_path = request_uri.substr(0, question_mark);
+    *query =
+        request_uri.substr(question_mark + 1,
+                           request_uri.size() - question_mark - 1);
+  } else {
+    *req_path = request_uri;
+    query->clear();
+  }
+  return true;
+}
+
+ThreadpoolHttpServer::HttpServerRequest::HttpServerRequest(
+    WorkerThreadManager* wm,
+    ThreadpoolHttpServer* server,
+    const Stat& stat,
+    Monitor* monitor)
+    : wm_(wm), thread_id_(0),
+      server_(server),
+      monitor_(monitor),
+      request_offset_(0),
+      request_content_length_(0),
+      request_len_(0),
+      parsed_valid_http_request_(false),
+      peer_pid_(0),
+      stat_(stat) {
+}
+
+#ifdef _WIN32
+class ThreadpoolHttpServer::RequestFromNamedPipe : public HttpServerRequest {
+ public:
+  RequestFromNamedPipe(
+      WorkerThreadManager* wm,
+      ThreadpoolHttpServer* server,
+      const Stat& stat,
+      Monitor* monitor,
+      NamedPipeServer::Request* req)
+      : HttpServerRequest(wm, server, stat, monitor),
+        req_(req) {
+  }
+  RequestFromNamedPipe(const RequestFromNamedPipe&) = delete;
+  RequestFromNamedPipe& operator=(const RequestFromNamedPipe&) = delete;
+
+  bool IsTrusted() override {
+    return CheckCredential();
+  }
+  bool CheckCredential() override;
+
+  void Start();
+  void SendReply(const string& response) override;
+  void NotifyWhenClosed(OneshotClosure* callback) override;
+
+ private:
+  ~RequestFromNamedPipe() override {}
+
+  NamedPipeServer::Request* req_;
+};
+
+bool ThreadpoolHttpServer::RequestFromNamedPipe::CheckCredential() {
+  // TODO: get peer_pid_ ?
+  return true;
+}
+
+void ThreadpoolHttpServer::RequestFromNamedPipe::Start() {
+  stat_.waiting_time_msec = stat_.timer.GetInMs();
+  stat_.timer.Start();
+  thread_id_ = wm_->GetCurrentThreadId();
+
+  request_ = string(req_->request_message());
+  request_len_ = request_.size();
+  bool request_is_chunked = false;
+  if (!FindContentLengthAndBodyOffset(
+          request_,
+          &request_content_length_,
+          &request_offset_,
+          &request_is_chunked)) {
+    LOG(ERROR) << "failed to find content length and body offset:"
+               << request_;
+    server_->HandleIncoming(this);
+    return;
+  }
+  // We do not support request encoded with chunked transfer coding.
+  if (request_is_chunked) {
+    LOG(ERROR) << "request is encoded with chunked transfer coding:"
+               << request_;
+    server_->HandleIncoming(this);
+    return;
+  }
+  if (request_len_ < request_offset_ + request_content_length_) {
+    LOG(ERROR) << "request not fully received? "
+               << " len=" << request_len_
+               << " offset=" << request_offset_
+               << " content_length=" << request_content_length_;
+    server_->HandleIncoming(this);
+    return;
+  }
+  stat_.read_req_time_msec = stat_.timer.GetInMs();
+  if (!ParseRequestLine(request_,
+                        &method_, &req_path_, &query_)) {
+    LOG(ERROR) << "parse request line failed";
+    server_->HandleIncoming(this);
+    return;
+  }
+  stat_.req_size = request_len_;
+  parsed_valid_http_request_ = true;
+  server_->HandleIncoming(this);
+  return;
+}
+
+void ThreadpoolHttpServer::RequestFromNamedPipe::SendReply(
+    const string& response) {
+  stat_.handler_time_msec = stat_.timer.GetInMs();
+  stat_.resp_size = response.size();
+  stat_.timer.Start();
+  req_->SendReply(response);
+  if (monitor_)
+    monitor_->FinishHandle(stat_);
+  delete this;
+}
+
+
+void ThreadpoolHttpServer::RequestFromNamedPipe::NotifyWhenClosed(
+    OneshotClosure* callback) {
+  req_->NotifyWhenClosed(callback);
+}
+#endif
+
+class ThreadpoolHttpServer::RequestFromSocket : public HttpServerRequest {
+ public:
+  RequestFromSocket(
+      WorkerThreadManager* wm,
+      ScopedSocket&& sock, SocketType sock_type, const Stat& stat,
+      Monitor* monitor,
+      TrustedIpsManager* trustedipsmanager,
+      ThreadpoolHttpServer* server);
+  RequestFromSocket() = delete;
+  RequestFromSocket(const RequestFromSocket&) = delete;
+  RequestFromSocket& operator=(const RequestFromSocket&) = delete;
+
+  bool CheckCredential() override;
+  bool IsTrusted() override;
+
+  void Start();
+  void SendReply(const string& response) override;
+  void NotifyWhenClosed(OneshotClosure* callback) override;
+
+ private:
+  ~RequestFromSocket() override;
+
+  void NotifyWhenClosedInternal(
+      WorkerThreadManager::ThreadId thread_id,
+      OneshotClosure* callback);
+  void DoRead();
+  void DoWrite();
+  void DoTimeout();
+  void ReadFinished();
+  void WriteFinished();
+  void DoReadEOF();
+  void DoCheckClosed();
+  void DoClosed();
+  void Finish();
+
+  ScopedSocket sock_;
+  SocketType socket_type_;
+  SocketDescriptor* d_;
+  bool request_is_chunked_;
+  size_t response_written_;
+  TrustedIpsManager* trustedipsmanager_;
+
+  // true if it finished read request, and waiting for ReadFinished()
+  // called back.  In other words, callback to ReadFinished on the fly in
+  // worker thread manager.
+  bool read_finished_;
+  // true if it got timed out, and waiting for Finish() called back.
+  // In other words, callback to Finish on the fly in worker thread manager.
+  bool timed_out_;
+
+  WorkerThreadManager::ThreadId closed_thread_id_;
+  OneshotClosure* closed_callback_;
+};
+
+ThreadpoolHttpServer::RequestFromSocket::RequestFromSocket(
+    WorkerThreadManager* wm,
+    ScopedSocket&& sock, SocketType socket_type,
+    const ThreadpoolHttpServer::Stat& stat,
+    ThreadpoolHttpServer::Monitor* monitor,
+    TrustedIpsManager* trustedipsmanager,
+    ThreadpoolHttpServer* server)
+    : HttpServerRequest(wm, server, stat, monitor),
+      sock_(std::move(sock)),
+      socket_type_(socket_type),
+      d_(nullptr),
+      response_written_(0),
+      trustedipsmanager_(trustedipsmanager),
+      read_finished_(false),
+      timed_out_(false),
+      closed_thread_id_(0),
+      closed_callback_(nullptr) {
+}
+
+ThreadpoolHttpServer::RequestFromSocket::~RequestFromSocket() {
+  delete closed_callback_;
+  ScopedSocket fd(wm_->DeleteSocketDescriptor(d_));
+  d_ = nullptr;
+  server_->RemoveAccept(socket_type_);
+}
+
+bool ThreadpoolHttpServer::RequestFromSocket::CheckCredential() {
+  if (socket_type_ != SOCKET_IPC) {
+    return false;
+  }
+  if (d_ == nullptr) {
+    return false;
+  }
+  return CheckGomaIPCPeer(d_->wrapper(), &peer_pid_);
+}
+
+bool ThreadpoolHttpServer::RequestFromSocket::IsTrusted() {
+  if (trustedipsmanager_ == nullptr)
+    return true;
+  if (d_ == nullptr) {
+    return false;
+  }
+  union {
+    struct sockaddr_storage storage;
+    struct sockaddr_in in;
+  } addr;
+  socklen_t addrlen = sizeof(addr);
+  int r = getpeername(d_->fd(), reinterpret_cast<sockaddr*>(&addr), &addrlen);
+  if (r != 0) {
+    PLOG(WARNING) << "getpeername";
+    return false;
+  }
+  if (addr.storage.ss_family == AF_UNIX) {
+    VLOG(1) << "Access from unix domain socket";
+    return CheckCredential();
+  }
+  if (addr.storage.ss_family != AF_INET) {
+    LOG(WARNING) << "Access from no-INET:" << addr.storage.ss_family;
+    return false;
+  }
+  bool trusted = trustedipsmanager_->IsTrustedClient(addr.in.sin_addr);
+  char buf[128];
+  if (trusted) {
+    VLOG(1) << "Access from "
+            << inet_ntop(AF_INET, &addr.in.sin_addr, buf, sizeof buf)
+            << " trusted";
+    return true;
+  }
+  LOG(WARNING) << "Access from "
+               << inet_ntop(AF_INET, &addr.in.sin_addr, buf, sizeof buf)
+               << " untrusted";
+  return false;
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::Start() {
+  stat_.waiting_time_msec = stat_.timer.GetInMs();
+  stat_.timer.Start();
+  thread_id_ = wm_->GetCurrentThreadId();
+  d_ = wm_->RegisterSocketDescriptor(std::move(sock_),
+                                     WorkerThreadManager::PRIORITY_HIGH);
+
+  d_->NotifyWhenReadable(NewPermanentCallback(
+      this, &ThreadpoolHttpServer::RequestFromSocket::DoRead));
+  d_->NotifyWhenTimedout(
+      kDefaultTimeoutSec,
+      NewCallback(
+          this, &ThreadpoolHttpServer::RequestFromSocket::DoTimeout));
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::NotifyWhenClosed(
+    OneshotClosure* callback) {
+  CHECK(closed_callback_ == nullptr);
+  CHECK(callback != nullptr);
+  CHECK(read_finished_);
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id_,
+      NewCallback(
+          this,
+          &ThreadpoolHttpServer::RequestFromSocket::NotifyWhenClosedInternal,
+          wm_->GetCurrentThreadId(),
+          callback),
+      WorkerThreadManager::PRIORITY_HIGH);
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::NotifyWhenClosedInternal(
+    WorkerThreadManager::ThreadId thread_id,
+    OneshotClosure* callback) {
+  CHECK(closed_callback_ == nullptr);
+  CHECK(callback != nullptr);
+  CHECK(read_finished_);
+  closed_thread_id_ = thread_id;
+  closed_callback_ = callback;
+  d_->NotifyWhenReadable(NewPermanentCallback(
+      this, &ThreadpoolHttpServer::RequestFromSocket::DoCheckClosed));
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::DoRead() {
+  CHECK(d_);
+  // If it already got timed out, do nothing.  Eventually, Finish() will be
+  // called.
+  if (timed_out_)
+    return;
+  bool found_header = request_offset_ > 0 && request_content_length_ > 0;
+  int buf_size = request_.size() - request_len_;
+  if (found_header) {
+    if (request_.size() < request_offset_ + request_content_length_) {
+      request_.resize(request_offset_ + request_content_length_);
+    }
+  } else if (buf_size < kBufSize / 2) {
+    request_.resize(request_.size() + kBufSize);
+  }
+  char* buf = &request_[request_len_];
+  buf_size = request_.size() - request_len_;
+  CHECK_GT(buf_size, 0)
+      << " request_len=" << request_len_
+      << " request_.size=" << request_.size()
+      << " offset=" << request_offset_
+      << " content_length=" << request_content_length_;
+  int r = d_->Read(buf, buf_size);
+  if (r <= 0) {  // EOF or error
+    if (d_->NeedRetry())
+      return;
+    d_->StopRead();
+    read_finished_ = true;
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        thread_id_,
+        NewCallback(
+            this, &ThreadpoolHttpServer::RequestFromSocket::ReadFinished),
+        WorkerThreadManager::PRIORITY_IMMEDIATE);
+    return;
+  }
+  request_len_ += r;
+  StringPiece req(request_.data(), request_len_);
+  if (found_header ||
+      FindContentLengthAndBodyOffset(
+          req, &request_content_length_, &request_offset_,
+          &request_is_chunked_)) {
+    // We do not support request encoded with chunked transfer coding.
+    if (request_is_chunked_) {  // treat this as error.
+      LOG(ERROR) << "request is encoded with chunked transfer coding:"
+                 << req;
+      d_->StopRead();
+      read_finished_ = true;
+      wm_->RunClosureInThread(
+          FROM_HERE,
+          thread_id_,
+          NewCallback(
+              this, &ThreadpoolHttpServer::RequestFromSocket::ReadFinished),
+          WorkerThreadManager::PRIORITY_IMMEDIATE);
+      return;
+    }
+    if (request_len_ < request_offset_ + request_content_length_) {
+      // not fully received yet.
+      return;
+    }
+    stat_.read_req_time_msec = stat_.timer.GetInMs();
+    if (ParseRequestLine(req, &method_, &req_path_, &query_)) {
+      d_->StopRead();
+      stat_.req_size = request_len_;
+      read_finished_ = true;
+      parsed_valid_http_request_ = true;
+      wm_->RunClosureInThread(
+          FROM_HERE,
+          thread_id_,
+          NewCallback(
+              this, &ThreadpoolHttpServer::RequestFromSocket::ReadFinished),
+          WorkerThreadManager::PRIORITY_IMMEDIATE);
+    }
+  }
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::DoWrite() {
+  DCHECK(d_);
+  int n = d_->Write(
+      response_.data() + response_written_,
+      response_.size() - response_written_);
+  if (n <= 0) {
+    if (d_->NeedRetry())
+      return;
+    d_->StopWrite();
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        thread_id_,
+        NewCallback(
+            this, &ThreadpoolHttpServer::RequestFromSocket::Finish),
+        WorkerThreadManager::PRIORITY_HIGH);
+    return;
+  }
+  response_written_ += n;
+  if (response_written_ == response_.size()) {
+    d_->StopWrite();
+    stat_.write_resp_time_msec = stat_.timer.GetInMs();
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        thread_id_,
+        NewCallback(
+            this, &ThreadpoolHttpServer::RequestFromSocket::WriteFinished),
+        WorkerThreadManager::PRIORITY_IMMEDIATE);
+  }
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::DoTimeout() {
+  // If it already finished reading, do nothing.  Eventually, ReadFinished()
+  // will be called.
+  if (read_finished_)
+    return;
+  d_->StopRead();
+  d_->StopWrite();
+  timed_out_ = true;
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id_,
+      NewCallback(
+          this, &ThreadpoolHttpServer::RequestFromSocket::Finish),
+      WorkerThreadManager::PRIORITY_HIGH);
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::DoCheckClosed() {
+  d_->StopRead();
+  d_->StopWrite();
+  if (!d_->IsReadable() && closed_callback_ != nullptr) {
+    VLOG(1) << "closed=" << d_->fd();
+  } else {
+    PLOG(WARNING) << "readable after request? fd=" << d_->fd();
+  }
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id_,
+      NewCallback(
+          this, &ThreadpoolHttpServer::RequestFromSocket::DoClosed),
+      WorkerThreadManager::PRIORITY_IMMEDIATE);
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::DoClosed() {
+  d_->ClearReadable();
+  OneshotClosure* callback = closed_callback_;
+  closed_callback_ = nullptr;
+  if (callback != nullptr) {
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        closed_thread_id_,
+        NewCallback(static_cast<Closure*>(callback), &Closure::Run),
+        WorkerThreadManager::PRIORITY_HIGH);
+  }
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::ReadFinished() {
+  CHECK(read_finished_);
+  stat_.timer.Start();
+  d_->ClearReadable();
+  d_->ClearTimeout();
+
+  server_->HandleIncoming(this);
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::WriteFinished() {
+  CHECK(d_);
+  d_->ClearWritable();
+
+  d_->ShutdownForSend();
+  // Wait for readable, and expecting Read()==0 (EOF).
+  d_->NotifyWhenReadable(NewPermanentCallback(
+      this, &ThreadpoolHttpServer::RequestFromSocket::DoReadEOF));
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::DoReadEOF() {
+  CHECK(d_);
+  char buf[1];
+  int r = d_->Read(buf, sizeof buf);
+  if (r == 0) {
+    // EOF
+    VLOG(1) << d_->fd() << " EOF";
+  } else if (r < 0) {
+    const string err = d_->GetLastErrorMessage();
+    // client may have closed once it had received all response message,
+    // before server ack EOF.
+    VLOG(1) << "shutdown error? fd=" << d_->fd() << ":" << err;
+  } else {
+    // unexpected receiving data?
+    LOG(WARNING) << "unexpected data after shutdown fd=" << d_->fd();
+  }
+  d_->StopRead();
+  wm_->RunClosureInThread(
+      FROM_HERE,
+      thread_id_,
+      NewCallback(
+          this, &ThreadpoolHttpServer::RequestFromSocket::Finish),
+      WorkerThreadManager::PRIORITY_HIGH);
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::Finish() {
+  if (monitor_)
+    monitor_->FinishHandle(stat_);
+  delete this;
+}
+
+void ThreadpoolHttpServer::RequestFromSocket::SendReply(
+    const string& response) {
+  response_ = response;
+  stat_.handler_time_msec = stat_.timer.GetInMs();
+  stat_.resp_size = response.size();
+  stat_.timer.Start();
+  d_->NotifyWhenWritable(
+      NewPermanentCallback(
+          this, &ThreadpoolHttpServer::RequestFromSocket::DoWrite));
+}
+
+void ThreadpoolHttpServer::HandleIncoming(HttpServerRequest* request) {
+  if (request->ParsedValidHttpRequest()) {
+    http_handler_->HandleHttpRequest(request);
+  } else {
+    request->SendReply("500 Unexpected Server Error\r\n\r\n");
+  }
+}
+
+// Returns true if bind succeeded with at most num_find_ports retries.
+// The parameter sa and port may be modified when retries happen.
+static bool BindPortWithRetries(int fd, int num_find_ports,
+                                struct sockaddr_in* sa, int* port) {
+  socklen_t sa_size = sizeof(*sa);
+  int num_retries = 0;
+  int orig_port = *port;
+  for (;;) {
+    sa->sin_port = htons(static_cast<u_short>(*port));
+
+    if (bind(fd, (struct sockaddr*)sa, sa_size) >= 0) {
+      return true;
+    }
+
+    if (num_retries < num_find_ports) {
+      PLOG(WARNING) << "bind failed for port " << *port
+                    << ". We will check the next port...";
+      ++num_retries;
+      ++*port;
+    } else {
+      PLOG(ERROR) << "bind failed with " << num_retries << " retries. "
+                  << "We checked ports from " << orig_port
+                  << " to " << *port << " inclusive.";
+      return false;
+    }
+  }
+}
+
+class ThreadpoolHttpServer::IdleClosure {
+ public:
+  // closure must be a permanent callback.
+  IdleClosure(SocketType socket_type,
+              int count,
+              ThreadpoolHttpServer::RegisteredClosureID id,
+              std::unique_ptr<PermanentClosure> closure)
+      : socket_type_(socket_type),
+        count_(count),
+        id_(id),
+        closure_(std::move(closure)) {
+  }
+  ~IdleClosure() {
+  }
+
+  SocketType socket_type() const { return socket_type_; }
+  int count() const { return count_; }
+  ThreadpoolHttpServer::RegisteredClosureID id() const { return id_; }
+  PermanentClosure* closure() const { return closure_.get(); }
+
+ private:
+  const SocketType socket_type_;
+  const int count_;
+  const ThreadpoolHttpServer::RegisteredClosureID id_;
+  std::unique_ptr<PermanentClosure> closure_;
+  DISALLOW_COPY_AND_ASSIGN(IdleClosure);
+};
+
+ThreadpoolHttpServer::RegisteredClosureID
+ThreadpoolHttpServer::RegisterIdleClosure(
+    SocketType socket_type, int count,
+    std::unique_ptr<PermanentClosure> closure) {
+  DCHECK_GT(count, 0);
+  AUTOLOCK(lock, &mu_);
+  ++last_closure_id_;
+  CHECK_GT(last_closure_id_, kInvalidClosureId);
+
+  idle_closures_.push_back(
+      new IdleClosure(socket_type, count,
+                      last_closure_id_, std::move(closure)));
+  return last_closure_id_;
+}
+
+void ThreadpoolHttpServer::UnregisterIdleClosure(
+    RegisteredClosureID id) {
+  AUTOLOCK(lock, &mu_);
+  for (std::vector<IdleClosure*>::iterator iter = idle_closures_.begin();
+       iter != idle_closures_.end();
+       ++iter) {
+    IdleClosure* idle_closure = *iter;
+    if (idle_closure->id() == id) {
+      delete idle_closure;
+      idle_closures_.erase(iter);
+      return;
+    }
+  }
+
+  LOG(ERROR) << "try to unregister invalid closure"
+             << " id=" << id;
+}
+
+void ThreadpoolHttpServer::UpdateSocketIdleUnlocked(SocketType socket_type) {
+  if (!idle_counting_) {
+    LOG(INFO) << "update socket type:" << socket_type
+              << " while suspending idle counting";
+    return;
+  }
+  if (num_sockets_[socket_type] == 0) {
+    ++idle_counter_[socket_type];
+    for (size_t i = 0; i < idle_closures_.size(); ++i) {
+      IdleClosure* idle_closure = idle_closures_[i];
+      if (idle_closure->socket_type() == socket_type &&
+          ((idle_counter_[socket_type] % idle_closure->count()) == 0)) {
+        LOG(INFO) << "idle closure socket_type:" << socket_type
+                  << " idle_counter=" << idle_counter_[socket_type];
+        wm_->RunClosure(FROM_HERE,
+                        idle_closure->closure(),
+                        WorkerThreadManager::PRIORITY_MIN);
+      }
+    }
+  }
+}
+
+int ThreadpoolHttpServer::Loop() {
+  struct sockaddr_in sa;
+  ScopedSocket incoming_socket;  // the main waiting socket
+  socklen_t sa_size = sizeof(sa);
+
+  // TODO: listen IPv6 if any.  Need to fix BindPortWithRetries().
+  incoming_socket.reset(socket(AF_INET, SOCK_STREAM, 0));
+  if (!incoming_socket.valid()) {
+    PLOG(ERROR) << "socket";
+    return 1;
+  }
+  CHECK(incoming_socket.SetCloseOnExec());
+  CHECK(incoming_socket.SetNonBlocking());
+
+  if (!incoming_socket.SetReuseAddr()) {
+    PLOG(ERROR) << "setsockopt SO_REUSEADDR";
+    return 1;
+  }
+
+  memset(&sa, 0, sizeof(sa));
+  if (listen_addr_ == "localhost") {
+    sa.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  } else if (listen_addr_ == "") {
+    sa.sin_addr.s_addr = htonl(INADDR_ANY);
+  } else {
+    LOG(FATAL) << "Unsupported listen_addr:" << listen_addr_;
+  }
+  sa.sin_family = AF_INET;
+
+  if (!BindPortWithRetries(incoming_socket.get(), num_find_ports_,
+                           &sa, &port_)) {
+    return 1;
+  }
+
+  listen(incoming_socket.get(), BACKLOG);
+
+  if (getsockname(incoming_socket.get(),
+                  (struct sockaddr*)&sa, &sa_size) == 0) {
+    std::cout << "GOMA version " << kBuiltRevisionString << " is ready."
+              << std::endl;
+    std::cout << "HTTP server now listening to port " << ntohs(sa.sin_port)
+              << ", access with http://localhost:" << ntohs(sa.sin_port)
+              << std::endl;
+  }
+  {
+    AUTOLOCK(lock, &mu_);
+    port_ready_ = true;
+    cond_.Broadcast();
+  }
+  LOG(INFO) << "listen on port " << ntohs(sa.sin_port);
+  for (;;) {
+    if (http_handler_->shutting_down()) {
+      LOG(INFO) << "Shutting down...";
+      un_socket_.reset(-1);
+      incoming_socket.reset(-1);
+      return 0;
+    }
+    fd_set read_fd;
+    int max_fd = incoming_socket.get();
+    FD_ZERO(&read_fd);
+    MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+    FD_SET(incoming_socket.get(), &read_fd);
+    MSVC_POP_WARNING();
+    if (un_socket_.valid()) {
+      MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+      FD_SET(un_socket_.get(), &read_fd);
+      MSVC_POP_WARNING();
+      max_fd = std::max(max_fd, un_socket_.get());
+    }
+    struct timeval tv;
+    tv.tv_sec = 1;
+    tv.tv_usec = 0;
+    int r = select(max_fd + 1, &read_fd, nullptr, nullptr, &tv);
+    if (r == 0) {
+      // timeout?
+      AUTOLOCK(lock, &mu_);
+      // 1 sec idle on both socket.
+      for (int i = 0; i < NUM_SOCKET_TYPES; ++i) {
+        UpdateSocketIdleUnlocked(static_cast<SocketType>(i));
+      }
+      continue;
+    } else if (r == -1) {
+      PLOG(WARNING) << "select";
+      continue;
+    }
+    if (FD_ISSET(incoming_socket.get(), &read_fd)) {
+      struct sockaddr_in tmpisa;
+      socklen_t addrlen = sizeof(tmpisa);
+      ScopedSocket accepted_socket(accept(incoming_socket.get(),
+                                          (struct sockaddr*)&tmpisa,
+                                          &addrlen));
+      if (!accepted_socket.valid()) {
+        if (errno == EINTR)
+          continue;
+        PLOG(ERROR) << "accept incoming_socket";
+        return 1;
+      }
+      AddAccept(SOCKET_TCP);
+      if (!accepted_socket.SetCloseOnExec()) {
+        LOG(ERROR) << "failed to set FD_CLOEXEC";
+        RemoveAccept(SOCKET_TCP);
+        accepted_socket.Close();
+        return 1;
+      }
+      // send the new incoming socket to a worker thread.
+      SendJobToWorkerThread(std::move(accepted_socket), SOCKET_TCP);
+    } else {
+      AUTOLOCK(lock, &mu_);
+      // tcp was idle, but unix would have some event in 1 sec.
+      UpdateSocketIdleUnlocked(SOCKET_TCP);
+    }
+    if (un_socket_.valid() && FD_ISSET(un_socket_.get(), &read_fd)) {
+      GomaIPCAddr tmpaddr;
+      socklen_t addrlen = sizeof(tmpaddr);
+      ScopedSocket accepted_socket(accept(un_socket_.get(),
+                                          (struct sockaddr*)&tmpaddr,
+                                          &addrlen));
+      if (!accepted_socket.valid()) {
+        if (errno == EINTR)
+          continue;
+        PLOG(ERROR) << "accept unix domain socket";
+        if (errno == EMFILE) {
+          PlatformThread::Sleep(100000);
+          continue;
+        }
+        return 1;
+      }
+      AddAccept(SOCKET_IPC);
+      if (!accepted_socket.SetCloseOnExec()) {
+        LOG(ERROR) << "failed to set FD_CLOEXEC";
+        RemoveAccept(SOCKET_IPC);
+        accepted_socket.Close();
+        return 1;
+      }
+      VLOG(1) << "un_socket=" << un_socket_.get()
+              << "=>" << accepted_socket;
+      SendJobToWorkerThread(std::move(accepted_socket), SOCKET_IPC);
+    } else if (un_socket_.valid()) {
+      AUTOLOCK(lock, &mu_);
+      // unix was idle, but tcp would have some event in 1 sec.
+      UpdateSocketIdleUnlocked(SOCKET_IPC);
+    }
+  }
+  // Unreachable
+}
+
+void ThreadpoolHttpServer::Wait() {
+  AUTOLOCK(lock, &mu_);
+  LOG(INFO) << "Wait for http requests...";
+  for (;;) {
+    bool busy = false;
+    for (int i = 0; i < NUM_SOCKET_TYPES; ++i) {
+      if (num_sockets_[i] > 0) {
+        LOG(INFO) << "socket[" << i << "]=" << num_sockets_[i];
+        busy = true;
+        break;
+      }
+    }
+    if (busy) {
+      cond_.Wait();
+      continue;
+    }
+    LOG(INFO) << "All http requests done.";
+    return;
+  }
+}
+
+int ThreadpoolHttpServer::idle_counter(SocketType socket_type) const {
+  AUTOLOCK(lock, &mu_);
+  return idle_counter_[socket_type];
+}
+
+void ThreadpoolHttpServer::SuspendIdleCounter() {
+  AUTOLOCK(lock, &mu_);
+  LOG(INFO) << "suspend idle counter";
+  idle_counting_ = false;
+}
+
+void ThreadpoolHttpServer::ResumeIdleCounter() {
+  AUTOLOCK(lock, &mu_);
+  LOG(INFO) << "resume idle counter";
+  idle_counting_ = true;
+}
+
+void ThreadpoolHttpServer::AddAccept(SocketType socket_type) {
+  AUTOLOCK(lock, &mu_);
+  // WorkerThreadManager is using select(2) to handle sockets I/O
+  // (for compaibility reason), so it couldn't handle fd >= max_num_sockets_.
+  ++num_sockets_[socket_type];
+  if (idle_counting_) {
+    idle_counter_[socket_type] = 0;
+  } else {
+    LOG(INFO) << "accept socket type:" << socket_type
+              << " while suspending idle counting";
+  }
+  while ((num_sockets_[socket_type] > max_sockets_[socket_type]) ||
+         (num_sockets_[SOCKET_TCP] + num_sockets_[SOCKET_IPC] >=
+          max_num_sockets_)) {
+    LOG(WARNING) << "Too many accepting socket: "
+                 << " tcp:" << num_sockets_[SOCKET_TCP]
+                 << " ipc:" << num_sockets_[SOCKET_IPC];
+    // Wait some request finishes and release socket by RemoveAccept().
+    cond_.Wait();
+  }
+}
+
+void ThreadpoolHttpServer::RemoveAccept(SocketType socket_type) {
+  AUTOLOCK(lock, &mu_);
+  --num_sockets_[socket_type];
+  // Notify some request waiting in AddAccept().
+  cond_.Signal();
+}
+
+void ThreadpoolHttpServer::WaitPortReady() {
+  AUTOLOCK(lock, &mu_);
+  while (!port_ready_) {
+    LOG(INFO) << "http server is not yet ready";
+    cond_.Wait();
+  }
+}
+
+#ifdef _WIN32
+void ThreadpoolHttpServer::SendNamedPipeJobToWorkerThread(
+    NamedPipeServer::Request* req) {
+  WaitPortReady();
+  RequestFromNamedPipe* http_server_request =
+      new RequestFromNamedPipe(wm_, this, Stat(), monitor_, req);
+
+  wm_->RunClosureInPool(
+      FROM_HERE,
+      pool_,
+      NewCallback(
+          http_server_request,
+          &ThreadpoolHttpServer::RequestFromNamedPipe::Start),
+      WorkerThreadManager::PRIORITY_HIGH);
+}
+#endif
+void ThreadpoolHttpServer::SendJobToWorkerThread(
+    ScopedSocket&& socket, SocketType socket_type) {
+  WaitPortReady();
+  RequestFromSocket* http_server_request =
+      new RequestFromSocket(wm_, std::move(socket), socket_type, Stat(),
+                            monitor_, trustedipsmanager_, this);
+  wm_->RunClosureInPool(
+      FROM_HERE,
+      pool_,
+      NewCallback(
+          http_server_request,
+          &ThreadpoolHttpServer::RequestFromSocket::Start),
+      WorkerThreadManager::PRIORITY_HIGH);
+}
+
+}  // namespace devtools_goma
diff --git a/client/threadpool_http_server.h b/client/threadpool_http_server.h
new file mode 100644
index 0000000..5a033df
--- /dev/null
+++ b/client/threadpool_http_server.h
@@ -0,0 +1,277 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_THREADPOOL_HTTP_SERVER_H_
+#define DEVTOOLS_GOMA_CLIENT_THREADPOOL_HTTP_SERVER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "lockhelper.h"
+#ifdef _WIN32
+#include "named_pipe_server_win.h"
+#endif
+#include "scoped_fd.h"
+#include "simple_timer.h"
+#include "string_piece.h"
+#include "worker_thread_manager.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class TrustedIpsManager;
+
+class ThreadpoolHttpServer {
+ public:
+  typedef int RegisteredClosureID;
+
+  enum SocketType {
+    SOCKET_TCP,  // for http of status page etc.
+    SOCKET_IPC,  // for IPC between gomacc and compiler_proxy.
+    NUM_SOCKET_TYPES
+  };
+  class Stat {
+   public:
+    Stat()
+        : req_size(0),
+          resp_size(0),
+          waiting_time_msec(0),
+          read_req_time_msec(0),
+          handler_time_msec(0),
+          write_resp_time_msec(0) {}
+    ~Stat() {}
+
+    SimpleTimer timer;
+    size_t req_size;
+    size_t resp_size;
+    // Time (in ms).
+    int waiting_time_msec;
+    int read_req_time_msec;
+    int handler_time_msec;
+    int write_resp_time_msec;
+  };
+  class Monitor {
+   public:
+    Monitor() {}
+    virtual ~Monitor() {}
+
+    virtual void FinishHandle(const Stat& stat) = 0;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(Monitor);
+  };
+
+  class HttpServerRequest;
+  class HttpHandler {
+   public:
+    HttpHandler() {}
+    virtual ~HttpHandler() {}
+
+    // HandleHttpRequest is responsible for freeing http_server_request by
+    // calling http_server_request->SendReply()
+    virtual void HandleHttpRequest(HttpServerRequest* http_server_request) = 0;
+
+    virtual bool shutting_down() = 0;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(HttpHandler);
+  };
+
+  class HttpServerRequest {
+   public:
+    HttpServerRequest(WorkerThreadManager* wm,
+                      ThreadpoolHttpServer* server,
+                      const Stat& stat,
+                      Monitor* monitor);
+
+    // Checks credential of peer.
+    virtual bool CheckCredential() = 0;
+
+    virtual bool IsTrusted() = 0;
+
+    // Send response and delete this object.
+    virtual void SendReply(const string& response) = 0;
+
+    // Full request string with all the headers and body.
+    const string& request() const { return request_; }
+
+    StringPiece header() const {
+      StringPiece h(request_.data(), request_offset_);
+      return h;
+    }
+    size_t header_size() const { return request_offset_; }
+
+    // Request body data.
+    const char* request_content() const {
+      return request_.data() + request_offset_;
+    }
+    size_t request_content_length() const {
+      return request_content_length_;
+    }
+
+    // "GET", "POST", etc.
+    const string& method() const { return method_; }
+    const string& req_path() const { return req_path_; }
+
+    // The string after ?
+    const string& query() const { return query_; }
+
+    pid_t peer_pid() const { return peer_pid_; }
+
+    // if the HTTP Request was valid.
+    bool ParsedValidHttpRequest() const { return parsed_valid_http_request_; }
+
+    const ThreadpoolHttpServer& server() const { return *server_; }
+
+    // Sets callback for request close.
+    // It may be called on other thread than request's thread.
+    // callback will be called on the thread where this method was called.
+    virtual void NotifyWhenClosed(OneshotClosure* callback) = 0;
+
+   protected:
+    virtual ~HttpServerRequest() {}
+
+    WorkerThreadManager* wm_;
+    WorkerThreadManager::ThreadId thread_id_;
+    ThreadpoolHttpServer* server_;
+    Monitor* monitor_;
+
+    size_t request_offset_;
+    size_t request_content_length_;
+    size_t request_len_;
+    string request_;
+    string method_;
+    string req_path_;
+    string query_;
+    string response_;
+    // true if it got valid http request.
+    bool parsed_valid_http_request_;
+
+    pid_t peer_pid_;
+    Stat stat_;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(HttpServerRequest);
+  };
+  static const RegisteredClosureID kInvalidClosureId = 0;
+
+  ThreadpoolHttpServer(const string& listen_addr,
+                       int port, int num_find_ports,
+                       WorkerThreadManager* wm, int num_threads,
+                       HttpHandler* http_handler, int max_num_sockets);
+  ~ThreadpoolHttpServer();
+
+  void HandleIncoming(HttpServerRequest* request);
+
+  // Starts the main loop waiting for HTTP connections.
+  int Loop();
+
+  // Waits for all http requests process.
+  void Wait();
+
+  // Sets monitor.  Doesn't take ownership.
+  void SetMonitor(Monitor* monitor);
+
+  // Sets TrustedIpsManager.  Doesn't take ownership.
+  void SetTrustedIpsManager(TrustedIpsManager* trustedipsmanager);
+
+  // Starts IPC handlers on addr.  Must call before Loop.
+  // num_threads and max_overcommit_incoming_sockets are used
+  // to calculate max num incoming requests for IPC handlers.
+  void StartIPC(const string& addr, int num_threads,
+                int max_overcommit_incoming_sockets);
+
+  // Stops IPC handlers.
+  void StopIPC();
+
+  // Utility function: Parse HTTP request string and extract method,
+  // path, and query string.
+  static bool ParseRequestLine(
+      StringPiece request, string* method, string* path, string* query);
+
+  int port() const { return port_; }
+
+  const string& un_socket_name() const { return un_socket_name_; }
+
+  // Registers idle closure.  closure must be permanent callback.
+  // closure will be called after idle counter reaches "count".
+  // Takes ownership of closure.
+  RegisteredClosureID RegisterIdleClosure(
+      SocketType socket_type, int count,
+      std::unique_ptr<PermanentClosure> closure);
+  // Unregisters idle closure.
+  void UnregisterIdleClosure(RegisteredClosureID id);
+
+  // Idle counter for socket_type.
+  int idle_counter(SocketType socket_type) const;
+
+  void SuspendIdleCounter();
+  void ResumeIdleCounter();
+
+ private:
+  class RequestFromSocket;
+  class IdleClosure;
+#ifdef _WIN32
+  class PipeHandler;
+  class RequestFromNamedPipe;
+#else
+
+  // Opens unix domain socket to serve.  Must call before Loop().
+  // Returns true if unix domain socket is successully opened.
+  // On Windows, the path is actually the port number for socket IPC.
+  bool OpenUnixDomainSocket(const string& path);
+  void CloseUnixDomainSocket();
+#endif
+
+  // Sets limits of accepting sockets for "socket_type".
+  void SetAcceptLimit(int n, SocketType socket_type);
+
+  void AddAccept(SocketType socket_type);
+  void RemoveAccept(SocketType socket_type);
+
+  void WaitPortReady();
+#ifdef _WIN32
+  void SendNamedPipeJobToWorkerThread(NamedPipeServer::Request* req);
+#endif
+  void SendJobToWorkerThread(ScopedSocket&& socket, SocketType socket_type);
+  void UpdateSocketIdleUnlocked(SocketType socket_type);
+
+  const string listen_addr_;
+  int port_;
+  int port_ready_;  // guarded by mu_
+  int num_find_ports_;
+  WorkerThreadManager* wm_;
+  int pool_;
+  int num_http_threads_;
+  HttpHandler* http_handler_;
+  Monitor* monitor_;
+  TrustedIpsManager* trustedipsmanager_;
+  ScopedSocket un_socket_;
+  string un_socket_name_;
+
+  const int max_num_sockets_;
+
+  Lock mu_;
+  ConditionVariable cond_;
+  int max_sockets_[NUM_SOCKET_TYPES];
+  int num_sockets_[NUM_SOCKET_TYPES];
+  int idle_counter_[NUM_SOCKET_TYPES];
+  bool idle_counting_;
+  std::vector<IdleClosure*> idle_closures_;
+  RegisteredClosureID last_closure_id_;
+
+#ifdef _WIN32
+  std::unique_ptr<PipeHandler> pipe_handler_;
+  std::unique_ptr<NamedPipeServer> pipe_server_;
+#endif
+
+  DISALLOW_COPY_AND_ASSIGN(ThreadpoolHttpServer);
+};
+
+}  // namespace devtools_goma
+#endif  // DEVTOOLS_GOMA_CLIENT_THREADPOOL_HTTP_SERVER_H_
diff --git a/client/threadpool_http_server_unittest.cc b/client/threadpool_http_server_unittest.cc
new file mode 100644
index 0000000..40018b8
--- /dev/null
+++ b/client/threadpool_http_server_unittest.cc
@@ -0,0 +1,67 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "threadpool_http_server.h"
+
+#include <string>
+#include <gtest/gtest.h>
+
+using devtools_goma::ThreadpoolHttpServer;
+using std::string;
+
+namespace {
+
+TEST(ThreadpoolHttpServerTest, TestParseRequestLineWithoutQuery) {
+  static const string kRequest(
+      "GET /hoge HTTP/1.1\r\n"
+      "Host: hogehoge.com\r\n"
+      "\r\n");
+  string method, path, query;
+  EXPECT_TRUE(
+      ThreadpoolHttpServer::ParseRequestLine(
+          kRequest, &method, &path, &query));
+  EXPECT_EQ("GET", method);
+  EXPECT_EQ("/hoge", path);
+  EXPECT_EQ("", query);
+}
+
+TEST(ThreadpoolHttpServerTest, TestParseRequestLineWithQuery) {
+  static const string kRequest(
+      "GET /hoge.cgi?hoge=fugafuga HTTP/1.1\r\n"
+      "Host: hogehoge.com\r\n"
+      "\r\n");
+  string method, path, query;
+  EXPECT_TRUE(
+      ThreadpoolHttpServer::ParseRequestLine(
+          kRequest, &method, &path, &query));
+  EXPECT_EQ("GET", method);
+  EXPECT_EQ("/hoge.cgi", path);
+  EXPECT_EQ("hoge=fugafuga", query);
+}
+
+TEST(ThreadpoolHttpServerTest, BrokenRequest) {
+  static const string kRequest(
+      "GET /hoge.cgi?hoge=fugafuga\r\n"
+      "Host: hogehoge.com\r\n"
+      "\r\n");
+  string method, path, query;
+  EXPECT_FALSE(
+      ThreadpoolHttpServer::ParseRequestLine(
+          kRequest, &method, &path, &query));
+}
+
+TEST(ThreadpoolHttpServerTest, BrokenRequest2) {
+  // Try some request without CRLF
+  static const string kRequest(
+        "GET /hoge.cgi?hoge=fugafuga\n"
+        "Host: hogehoge.com\n"
+        "\n");
+  string method, path, query;
+  EXPECT_FALSE(
+      ThreadpoolHttpServer::ParseRequestLine(
+          kRequest, &method, &path, &query));
+}
+
+}  // namespace
diff --git a/client/timestamp.cc b/client/timestamp.cc
new file mode 100644
index 0000000..470de1b
--- /dev/null
+++ b/client/timestamp.cc
@@ -0,0 +1,28 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_GET_TIMESTAMP_H_
+#define DEVTOOLS_GOMA_CLIENT_GET_TIMESTAMP_H_
+
+#ifndef _WIN32
+#include <sys/time.h>
+#else
+#include "gettimeofday_helper_win.h"
+#endif
+#include <time.h>
+
+#include "timestamp.h"
+
+namespace devtools_goma {
+
+millitime_t GetCurrentTimestampMs() {
+  struct timeval tv;
+  gettimeofday(&tv, nullptr);
+  return static_cast<millitime_t>(tv.tv_sec) * 1000 + tv.tv_usec / 1000;
+}
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_GET_TIMESTAMP_H_
diff --git a/client/timestamp.h b/client/timestamp.h
new file mode 100644
index 0000000..6388178
--- /dev/null
+++ b/client/timestamp.h
@@ -0,0 +1,16 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_TIMESTAMP_H_
+#define DEVTOOLS_GOMA_CLIENT_TIMESTAMP_H_
+
+namespace devtools_goma {
+
+typedef long long millitime_t;
+millitime_t GetCurrentTimestampMs();
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_TIMESTAMP_H_
diff --git a/client/tls_descriptor.cc b/client/tls_descriptor.cc
new file mode 100644
index 0000000..46ed75c
--- /dev/null
+++ b/client/tls_descriptor.cc
@@ -0,0 +1,353 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "tls_descriptor.h"
+
+#include <sstream>
+
+#include "callback.h"
+#include "compiler_proxy_info.h"
+#include "compiler_specific.h"
+#include "socket_descriptor.h"
+#include "glog/logging.h"
+#include "ioutil.h"
+
+namespace devtools_goma {
+
+TLSDescriptor::TLSDescriptor(
+    SocketDescriptor* desc, TLSEngine* e,
+    const Options& options,
+    WorkerThreadManager* wm)
+    : d_(desc), engine_(e), wm_(wm),
+      readable_closure_(nullptr), writable_closure_(nullptr),
+      network_write_offset_(0), ssl_pending_(false),
+      active_read_(false), active_write_(false),
+      io_failed_(false),
+      options_(options), connect_status_(READY), is_closed_(false),
+      cancel_readable_closure_(nullptr) {
+  thread_ = GetCurrentThreadId();
+}
+
+TLSDescriptor::~TLSDescriptor() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  if (cancel_readable_closure_) {
+    cancel_readable_closure_->Cancel();
+    cancel_readable_closure_ = nullptr;
+  }
+}
+
+void TLSDescriptor::Init() {
+  if (options_.use_proxy && !engine_->IsRecycled())
+    connect_status_ = NEED_WRITE;
+
+  d_->NotifyWhenReadable(
+      NewPermanentCallback(this, &TLSDescriptor::TransportLayerReadable));
+  d_->NotifyWhenWritable(
+      NewPermanentCallback(this, &TLSDescriptor::TransportLayerWritable));
+}
+
+void TLSDescriptor::NotifyWhenReadable(
+    std::unique_ptr<PermanentClosure> closure) {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  readable_closure_ = std::move(closure);
+  active_read_ = true;
+  RestartTransportLayer();
+  VLOG(1) << "Notify when " << d_->fd()
+          << " readable " << readable_closure_.get();
+}
+
+void TLSDescriptor::NotifyWhenWritable(
+    std::unique_ptr<PermanentClosure> closure) {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  writable_closure_ = std::move(closure);
+  active_write_ = true;
+  RestartTransportLayer();
+  VLOG(1) << "Notify when " << d_->fd()
+          << " writable " << writable_closure_.get();
+}
+
+void TLSDescriptor::ClearWritable() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  VLOG(1) << "Clear " << d_->fd() << " writable " << writable_closure_.get();
+  active_write_ = false;
+  writable_closure_.reset();
+}
+
+void TLSDescriptor::NotifyWhenTimedout(double timeout,
+                                       OneshotClosure* closure) {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  d_->NotifyWhenTimedout(timeout, closure);
+}
+
+void TLSDescriptor::ChangeTimeout(double timeout) {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  // once is_closed_, timeout closure is cleared (in StopTransportLayer)
+  if (is_closed_)
+    return;
+  d_->ChangeTimeout(timeout);
+}
+
+ssize_t TLSDescriptor::Read(void* ptr, size_t len) {
+  cancel_readable_closure_ = nullptr;
+  if (io_failed_)
+    return -1;
+  // It seems stack if we do not restart transport layer communications.
+  // It might be because TLS may send something like ACK, we guess.
+  ResumeTransportWritable();
+
+  const int ret = engine_->Read(ptr, len);
+  if (ret == TLSEngine::TLS_WANT_READ || ret == TLSEngine::TLS_WANT_WRITE) {
+    if (is_closed_) {
+      LOG(INFO) << "socket has already been closed by peer: fd=" << d_->fd();
+      return 0;
+    }
+    ssl_pending_ = true;
+  } else if (ret < 0) {  // TLSEngine error except want read/write.
+    LOG(ERROR) << "Error occured during application read.";
+  } else {
+    ssl_pending_ = false;
+  }
+  if (is_closed_ && ret > 0) {
+    // Make readable_closure_ read all available data.
+    DCHECK(readable_closure_.get());
+    cancel_readable_closure_ = wm_->RunDelayedClosureInThread(
+        FROM_HERE, thread_, 0,
+        NewCallback(static_cast<Closure*>(readable_closure_.get()),
+                    &Closure::Run));
+  }
+  return ret;
+}
+
+ssize_t TLSDescriptor::Write(const void* ptr, size_t len) {
+  if (io_failed_ || is_closed_)
+    return -1;
+  ResumeTransportWritable();
+  const int ret = engine_->Write(ptr, len);
+  if (ret == TLSEngine::TLS_WANT_READ || ret == TLSEngine::TLS_WANT_WRITE) {
+    ssl_pending_ = true;
+  } else if (ret < 0) {  // TLSEngine error except want read/write.
+    LOG(ERROR) << "Error occured during application write.";
+  } else {
+    ssl_pending_ = false;
+  }
+  return ret;
+}
+
+bool TLSDescriptor::NeedRetry() const {
+  // TLS engine will not get interrupted but view from application side
+  // should be similar.
+  return ssl_pending_ && !io_failed_ && !is_closed_;
+}
+
+string TLSDescriptor::GetLastErrorMessage() const {
+  string err_msg = d_->GetLastErrorMessage();
+  if (!err_msg.empty())
+    err_msg.append(" ,");
+  return err_msg + "TLS engine:" + engine_->GetLastErrorMessage();
+}
+
+void TLSDescriptor::StopRead() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  active_read_ = false;
+  if (!active_write_ && !ssl_pending_) {
+    StopTransportLayer();
+  }
+  if (cancel_readable_closure_) {
+    cancel_readable_closure_->Cancel();
+    cancel_readable_closure_ = nullptr;
+  }
+}
+
+void TLSDescriptor::StopWrite() {
+  DCHECK(THREAD_ID_IS_SELF(thread_));
+  active_write_ = false;
+  if (!active_read_ && !ssl_pending_) {
+    StopTransportLayer();
+  }
+}
+
+void TLSDescriptor::TransportLayerReadable() {
+  size_t read_size = std::min(engine_->GetBufSizeFromTransport(),
+                              sizeof(network_read_buffer_));
+  const ssize_t read_bytes = d_->Read(network_read_buffer_, read_size);
+  if (read_bytes < 0 && d_->NeedRetry())
+      return;
+
+  if (read_bytes == 0) {  // EOF.
+    LOG(INFO) << "Remote closed. "
+              << " fd=" << d_->fd()
+              << " ret=" << read_bytes
+              << " err=" << d_->GetLastErrorMessage();
+    is_closed_ = true;
+    StopTransportLayer();
+    PutClosuresInRunQueue();
+    return;
+  }
+  if (read_bytes < 0) {  // error.
+    LOG(WARNING) << "Transport layer read " << d_->fd() << " failed."
+                 << " ret=" << read_bytes
+                 << " err=" << d_->GetLastErrorMessage();
+    StopTransportLayer();
+    io_failed_ = true;
+    PutClosuresInRunQueue();
+    return;
+  } else if (connect_status_ == READY) {
+    int ret = engine_->SetDataFromTransport(
+        StringPiece(network_read_buffer_, read_bytes));
+    if (ret < 0) {  // Error in TLS engine.
+      StopTransportLayer();
+      io_failed_ = true;
+      PutClosuresInRunQueue();
+      return;
+    }
+    CHECK_EQ(ret, static_cast<int>(read_bytes));
+
+    ResumeTransportWritable();
+    if (!engine_->IsIOPending()) {
+      PutClosuresInRunQueue();
+      return;
+    }
+  } else if (connect_status_ == NEED_READ) {
+    int status_code = 0;
+    size_t offset;
+    size_t content_length;
+    proxy_response_.append(network_read_buffer_, read_bytes);
+    if (ParseHttpResponse(proxy_response_, &status_code, &offset,
+                          &content_length, nullptr)) {
+      if (status_code / 100 == 2) {
+        connect_status_ = READY;
+        ResumeTransportWritable();
+      } else {
+        LOG(ERROR) << "Proxy's status code != 2xx."
+                   << " Details:" << proxy_response_;
+        StopTransportLayer();
+        io_failed_ = true;
+        PutClosuresInRunQueue();
+      }
+    }
+  } else if (connect_status_ == NEED_WRITE) {
+    LOG(ERROR) << "Unexpected read occured when waiting writable."
+               << "buf:" << StringPiece(network_read_buffer_, read_bytes);
+  }
+}
+
+void TLSDescriptor::TransportLayerWritable() {
+
+  if (network_write_buffer_.empty()) {
+    if (connect_status_ == READY)
+      CHECK_GE(engine_->GetDataToSendTransport(&network_write_buffer_), 0);
+    else if (connect_status_ == NEED_WRITE)
+      network_write_buffer_ = CreateProxyRequestMessage();
+    network_write_offset_ = 0;
+    if (network_write_buffer_.size() == 0)
+      SuspendTransportWritable();
+    if (!engine_->IsIOPending()) {
+      PutClosuresInRunQueue();
+      return;
+    }
+  }
+
+  if (network_write_buffer_.size() - network_write_offset_ > 0) {
+    int ret = d_->Write(network_write_buffer_.c_str() + network_write_offset_,
+                        network_write_buffer_.size() - network_write_offset_);
+    if (ret < 0 && d_->NeedRetry())
+      return;
+    if (ret <= 0) {
+      LOG(WARNING) << "Transport layer write " << d_->fd() << " failed."
+                   << " ret=" << ret
+                   << " err=" << d_->GetLastErrorMessage();
+      StopTransportLayer();
+      io_failed_ = true;
+      PutClosuresInRunQueue();
+      return;
+    } else {
+      network_write_offset_ += ret;
+      if (network_write_buffer_.size() == network_write_offset_) {
+        network_write_buffer_.clear();
+        network_write_offset_ = 0;
+        if (connect_status_ == NEED_WRITE)
+          connect_status_ = NEED_READ;
+      }
+    }
+  }
+}
+
+void TLSDescriptor::PutClosuresInRunQueue() const {
+  // TODO: check readable/writeble of data if possible.
+  // Since SSL_pending seems not works well with BIO pair, we cannot check
+  // readable.  I could not find a good function to check it writable.
+  bool set_callback = false;
+  if (active_write_ && writable_closure_.get() != nullptr) {
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        thread_, writable_closure_.get(),
+        WorkerThreadManager::PRIORITY_IMMEDIATE);
+    set_callback = true;
+  }
+
+  if (active_read_ && readable_closure_.get() != nullptr) {
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        thread_, readable_closure_.get(),
+        WorkerThreadManager::PRIORITY_IMMEDIATE);
+    set_callback = true;
+  }
+  LOG_IF(ERROR, !set_callback)
+    << "PutClosuresInRunQueue actually did nothing. "
+    << "We expect control goes back to the user of this libary."
+    << " active_write=" << active_write_
+    << " writable_closure=" << (writable_closure_ != nullptr)
+    << " active_read=" << active_read_
+    << " readable_closure" << (readable_closure_ != nullptr)
+    << " is_closed=" << is_closed_
+    << " io_failed=" << io_failed_;
+}
+
+void TLSDescriptor::SuspendTransportWritable() {
+  d_->StopWrite();
+  d_->UnregisterWritable();
+}
+
+void TLSDescriptor::ResumeTransportWritable() {
+  if (is_closed_) {
+    LOG(INFO) << "socket has already been closed: fd=" << d_->fd();
+    return;
+  }
+  d_->RestartWrite();
+}
+
+void TLSDescriptor::StopTransportLayer() {
+  d_->StopRead();
+  d_->StopWrite();
+  if (is_closed_) {
+    d_->ClearTimeout();
+  }
+}
+
+void TLSDescriptor::RestartTransportLayer() {
+  if (is_closed_) {
+    LOG(INFO) << "socket has already been closed: fd=" << d_->fd();
+    return;
+  }
+  d_->RestartRead();
+  d_->RestartWrite();
+}
+
+string TLSDescriptor::CreateProxyRequestMessage() {
+  std::ostringstream http_send_message;
+  std::ostringstream dest_host_port;
+  dest_host_port << options_.dest_host_name << ":" << options_.dest_port;
+  http_send_message << "CONNECT " << dest_host_port.str() << " HTTP/1.1\r\n";
+  http_send_message << "Host: " << dest_host_port.str() << "\r\n";
+  http_send_message << "UserAgent: " << kUserAgentString << "\r\n";
+  http_send_message << "\r\n";
+  return http_send_message.str();
+}
+
+bool TLSDescriptor::CanReuse() const {
+  return !is_closed_ && !io_failed_ && d_->CanReuse();
+}
+
+}  // namespace devtools_goma
diff --git a/client/tls_descriptor.h b/client/tls_descriptor.h
new file mode 100644
index 0000000..d1e0f73
--- /dev/null
+++ b/client/tls_descriptor.h
@@ -0,0 +1,113 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_TLS_DESCRIPTOR_H_
+#define DEVTOOLS_GOMA_CLIENT_TLS_DESCRIPTOR_H_
+
+#include <memory>
+
+#include "basictypes.h"
+#include "descriptor.h"
+#include "ioutil.h"
+#include "tls_engine.h"
+#include "worker_thread_manager.h"
+
+namespace devtools_goma {
+
+class Closure;
+class SocketDescriptor;
+
+class TLSDescriptor : public Descriptor {
+ public:
+  struct Options {
+    Options() : use_proxy(false) {}
+    string dest_host_name;
+    int dest_port;
+    bool use_proxy;
+  };
+  // It doesn't take ownership of e and wm.
+  // It keep desc inside but desc should be deleted by WorkerThreadManager.
+  TLSDescriptor(
+      SocketDescriptor* desc, TLSEngine* e,
+      const Options& options,
+      WorkerThreadManager* wm);
+  ~TLSDescriptor() override;
+
+  // To be deleted by WorkerThreadManager.
+  SocketDescriptor* socket_descriptor() override { return d_; }
+
+  void NotifyWhenReadable(std::unique_ptr<PermanentClosure> closure) override;
+  void NotifyWhenWritable(std::unique_ptr<PermanentClosure> closure) override;
+  void ClearWritable() override;
+  void NotifyWhenTimedout(double timeout, OneshotClosure* closure) override;
+  void ChangeTimeout(double timeout) override;
+  ssize_t Read(void* ptr, size_t len) override;
+  ssize_t Write(const void* ptr, size_t len) override;
+
+  bool NeedRetry() const override;
+  bool CanReuse() const override;
+  // TODO: implement the same feature with shutdown(2) for SSL.
+  string GetLastErrorMessage() const override;
+  void StopRead() override;
+  void StopWrite() override;
+
+  void Init();
+
+ private:
+  void PutClosuresInRunQueue() const;
+
+  // TransportLayerReadable/Writable are called back when a socket get ready.
+  void TransportLayerReadable();
+  void TransportLayerWritable();
+
+  // Suspend to wait writable.
+  void SuspendTransportWritable();
+  void ResumeTransportWritable();
+
+  // Stop / restart notification from transport layer.
+  void StopTransportLayer();
+  void RestartTransportLayer();
+
+  // HTTP request to ask proxy to connect the server.
+  string CreateProxyRequestMessage();
+
+  SocketDescriptor* d_;
+  TLSEngine* engine_;
+
+  WorkerThreadManager* wm_;
+  WorkerThreadManager::ThreadId thread_;
+  std::unique_ptr<PermanentClosure> readable_closure_;
+  std::unique_ptr<PermanentClosure> writable_closure_;
+  char network_read_buffer_[kBufSize];
+  string network_write_buffer_;
+  size_t network_write_offset_;
+  // Shows application read/write failed because TLS engine needs more work.
+  bool ssl_pending_;
+  // Shows readable_closure_ can be callable.
+  bool active_read_;
+  // Shows writable_closure_ can be callable.
+  bool active_write_;
+  // Shows transport layer communication failed.
+  bool io_failed_;
+
+  // HTTP proxy related paramters.
+  const Options options_;
+  enum ConnectStatus { NEED_WRITE, NEED_READ, READY} connect_status_;
+  // Shows underlying SocketDescriptor closed.
+  bool is_closed_;
+  string proxy_response_;
+  // Only used if transport layer socket is closed but we need to keep
+  // http.cc read TLSDescriptor.  (b/22515030)
+  // In such situation we need to let HttpClient::Task::DoRead to read
+  // TLSDescriptor but at the same time, we need to allow it to stop
+  // TLSDescriptor.  If TLSDescriptor is stopped, this wrapper is disabled
+  // not to run readable closure.
+  WorkerThreadManager::CancelableClosure* cancel_readable_closure_;
+  DISALLOW_COPY_AND_ASSIGN(TLSDescriptor);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_TLS_DESCRIPTOR_H_
diff --git a/client/tls_engine.h b/client/tls_engine.h
new file mode 100644
index 0000000..a7da4f5
--- /dev/null
+++ b/client/tls_engine.h
@@ -0,0 +1,89 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// TLSEngine is an interface of Transport Layer Security (TLS) engine class.
+// This is a middle man between application and socket.
+// See: Example in http://www.openssl.org/docs/crypto/BIO_s_bio.html
+//
+// TLSEngineFactory is an interface of TLSEngine factory class.
+// Returns the TLSEngine that matches a socket descriptor.
+// If the socket descriptor is new, TLSEngine instance is created.
+
+#ifndef DEVTOOLS_GOMA_CLIENT_TLS_ENGINE_H_
+#define DEVTOOLS_GOMA_CLIENT_TLS_ENGINE_H_
+
+#include <string>
+
+#include "socket_factory.h"
+#include "string_piece.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+// TLSEngine may not be synchronized.  It must be synchronized externally.
+class TLSEngine {
+ public:
+  // Error type returned by TLS engine.
+  enum TLSErrorReason {
+    TLS_NO_ERROR = 0,
+    TLS_ERROR = -1,
+    TLS_WANT_READ = -2,
+    TLS_WANT_WRITE = -3,
+    TLS_VERIFY_ERROR = -4,
+  };
+
+  // Returns true if the transport layer is not ready.
+  virtual bool IsIOPending() const = 0;
+
+  // An interface to the transport layer:
+  // Sets |data| to be sent to the transport layer.
+  // Returns |data| size (>=0) to send or TLSErrorReason if error.
+  virtual int GetDataToSendTransport(string* data) = 0;
+  // Returns size to be written to the engine.
+  virtual size_t GetBufSizeFromTransport() = 0;
+  // Sets |data| come from the transport layer.
+  // Returns size (>=0) written to the engine or TLSErrorReason if error.
+  virtual int SetDataFromTransport(const StringPiece& data) = 0;
+
+  // An interface to an application:
+  // Read and Write return number of read/write bytes if success.
+  // Otherwise, TLSErrorReason.
+  virtual int Read(void* data, int size) = 0;
+  virtual int Write(const void* data, int size) = 0;
+
+  // Returns a human readable last error message.
+  virtual string GetLastErrorMessage() const = 0;
+
+  // Returns true if the instance is recycled.
+  // This is usually used for skipping initialize process.
+  virtual bool IsRecycled() const = 0;
+
+ protected:
+  virtual ~TLSEngine() {}
+};
+
+// TLSEngineFactory is synchronized.
+class TLSEngineFactory : public SocketFactoryObserver {
+ public:
+  virtual ~TLSEngineFactory() {}
+  // Returns new TLSEngine instance used for |sock|.
+  // If this get the known |sock|, TLSEngine will be returned from a pool.
+  // i.e. caller does not have an ownership of returned value.
+  virtual TLSEngine* NewTLSEngine(int sock) = 0;
+  // A SocketFactoryObserver interface.
+  // Releases TLSEngine associated with the |sock|.
+  virtual void WillCloseSocket(int sock) = 0;
+  // Returns human readable string of certificates and CRLs TLSEngine's use.
+  virtual string GetCertsInfo() = 0;
+  // Set a hostname to connect.
+  // A subjectAltName of type dNSName in a server certificate should
+  // match with |hostname|, or TLSEngine returns TLS_VERIFY_ERROR.
+  virtual void SetHostname(const string& hostname) = 0;
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_TLS_ENGINE_H_
diff --git a/client/trustedipsmanager.cc b/client/trustedipsmanager.cc
new file mode 100644
index 0000000..1ad491b
--- /dev/null
+++ b/client/trustedipsmanager.cc
@@ -0,0 +1,94 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "trustedipsmanager.h"
+
+#include <stdlib.h>
+#ifndef _WIN32
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#endif
+
+#include <sstream>
+
+#include "glog/logging.h"
+#include "join.h"
+#include "split.h"
+
+namespace devtools_goma {
+
+TrustedIpsManager::TrustedIpsManager() {
+  // Always allow from localhost.
+  AddAllow("127.0.0.1");
+}
+
+TrustedIpsManager::~TrustedIpsManager() {
+}
+
+void TrustedIpsManager::AddAllow(const string& netspec) {
+  trusted_.push_back(NetSpec(netspec));
+}
+
+bool TrustedIpsManager::IsTrustedClient(const struct in_addr& addr) const {
+  for (std::vector<NetSpec>::const_iterator iter = trusted_.begin();
+       iter != trusted_.end();
+       ++iter) {
+    if (iter->Match(addr))
+      return true;
+  }
+  return false;
+}
+
+string TrustedIpsManager::DebugString() const {
+  std::ostringstream out;
+  out << "TrustedClients[";
+  std::vector<string> res;
+  for (std::vector<NetSpec>::const_iterator iter = trusted_.begin();
+       iter != trusted_.end();
+       ++iter) {
+    res.push_back(iter->DebugString());
+  }
+  string netspecs;
+  JoinStrings(res, ",", &netspecs);
+  out << netspecs;
+  out << "]";
+  return out.str();
+}
+
+TrustedIpsManager::NetSpec::NetSpec(const string& netspec)
+    : netmask_(0xffffffff) {
+  std::vector<string> res;
+  SplitStringUsing(netspec, "/", &res);
+  CHECK_GT(res.size(), 0U) << "Wrong format of netspec:" << netspec;
+  inet_aton(res[0].c_str(), &in_addr_);
+  if (res.size() == 2) {
+    int masklen = atoi(res[1].c_str());
+    CHECK_LE(masklen, 32);
+    if (masklen == 0) {
+      netmask_ = 0;
+    } else {
+      netmask_ = 0xffffffff << (32 - masklen);
+    }
+  }
+  in_addr_.s_addr = htonl(ntohl(in_addr_.s_addr) & netmask_);
+}
+
+TrustedIpsManager::NetSpec::~NetSpec() {
+}
+
+bool TrustedIpsManager::NetSpec::Match(const struct in_addr& addr) const {
+  return (ntohl(addr.s_addr) & netmask_) == ntohl(in_addr_.s_addr);
+}
+
+string TrustedIpsManager::NetSpec::DebugString() const {
+  std::ostringstream out;
+  char buf[128];
+  out << inet_ntop(AF_INET, const_cast<in_addr*>(&in_addr_), buf, sizeof buf)
+      << "/" << std::hex << netmask_;
+  return out.str();
+}
+
+}  // namespace devtools_goma
diff --git a/client/trustedipsmanager.h b/client/trustedipsmanager.h
new file mode 100644
index 0000000..bf8e845
--- /dev/null
+++ b/client/trustedipsmanager.h
@@ -0,0 +1,59 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_TRUSTEDIPSMANAGER_H_
+#define DEVTOOLS_GOMA_CLIENT_TRUSTEDIPSMANAGER_H_
+
+#ifndef _WIN32
+#include <netinet/in.h>
+#else
+#include "socket_helper_win.h"
+#endif
+
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class TrustedIpsManager {
+ public:
+  TrustedIpsManager();
+  ~TrustedIpsManager();
+
+  // Adds "netspec" as trusted network.
+  // "netspec" is dotted-decimal IPv4 address with or without netmask length.
+  // e.g. "127.0.0.1", "192.168.1.1/24".
+  void AddAllow(const string& netspec);
+
+  bool IsTrustedClient(const struct in_addr& addr) const;
+
+  string DebugString() const;
+
+ private:
+  class NetSpec {
+   public:
+    explicit NetSpec(const string& netspec);
+    ~NetSpec();
+
+    bool Match(const struct in_addr& addr) const;
+
+    string DebugString() const;
+
+   private:
+    struct in_addr in_addr_;
+    int netmask_;
+  };
+  std::vector<NetSpec> trusted_;
+
+  DISALLOW_COPY_AND_ASSIGN(TrustedIpsManager);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_TRUSTEDIPSMANAGER_H_
diff --git a/client/trustedipsmanager_unittest.cc b/client/trustedipsmanager_unittest.cc
new file mode 100644
index 0000000..e7ebeee
--- /dev/null
+++ b/client/trustedipsmanager_unittest.cc
@@ -0,0 +1,62 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "trustedipsmanager.h"
+
+#ifndef _WIN32
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#endif
+
+#include <string>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using std::string;
+
+namespace {
+
+class TrustedipsmanagerTest : public ::testing::Test {
+ protected:
+  bool IsTrustedClient(const string& ip) {
+    struct in_addr in;
+    inet_aton(ip.c_str(), &in);
+    return trusted_.IsTrustedClient(in);
+  }
+
+  devtools_goma::TrustedIpsManager trusted_;
+};
+
+TEST_F(TrustedipsmanagerTest, Basic) {
+  EXPECT_EQ("TrustedClients[127.0.0.1/ffffffff]", trusted_.DebugString());
+  EXPECT_TRUE(IsTrustedClient("127.0.0.1"));
+  EXPECT_FALSE(IsTrustedClient("192.168.1.1"));
+  EXPECT_FALSE(IsTrustedClient("192.168.1.2"));
+  EXPECT_FALSE(IsTrustedClient("192.168.2.1"));
+  EXPECT_FALSE(IsTrustedClient("10.0.0.1"));
+
+  trusted_.AddAllow("192.168.1.1");
+  EXPECT_EQ("TrustedClients[127.0.0.1/ffffffff,192.168.1.1/ffffffff]",
+            trusted_.DebugString());
+  EXPECT_TRUE(IsTrustedClient("192.168.1.1"));
+  EXPECT_FALSE(IsTrustedClient("192.168.1.2"));
+
+  trusted_.AddAllow("192.168.1.0/24");
+  EXPECT_EQ("TrustedClients[127.0.0.1/ffffffff,192.168.1.1/ffffffff,"
+            "192.168.1.0/ffffff00]", trusted_.DebugString());
+  EXPECT_TRUE(IsTrustedClient("192.168.1.1"));
+  EXPECT_TRUE(IsTrustedClient("192.168.1.2"));
+  EXPECT_FALSE(IsTrustedClient("192.168.2.1"));
+
+  trusted_.AddAllow("0.0.0.0/0");
+  EXPECT_EQ("TrustedClients[127.0.0.1/ffffffff,192.168.1.1/ffffffff,"
+            "192.168.1.0/ffffff00,0.0.0.0/0]", trusted_.DebugString());
+  EXPECT_TRUE(IsTrustedClient("192.168.2.1"));
+  EXPECT_TRUE(IsTrustedClient("10.0.0.1"));
+}
+
+}  // namespace
diff --git a/client/unittest_util.cc b/client/unittest_util.cc
new file mode 100644
index 0000000..71b515f
--- /dev/null
+++ b/client/unittest_util.cc
@@ -0,0 +1,141 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "unittest_util.h"
+
+#ifdef _WIN32
+# include "config_win.h"
+# include <shlobj.h>
+#else
+# include <sys/stat.h>
+# include <sys/types.h>
+#endif
+#include <cstdio>
+#include <limits.h>
+#include <string>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "file.h"
+#include "ioutil.h"
+#include "mypath.h"
+#include "path.h"
+
+#ifdef _WIN32
+# include "posix_helper_win.h"
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+TmpdirUtil::TmpdirUtil(const string& id) : cwd_("/cwd") {
+  char tmpdir[PATH_MAX];
+  CheckTempDirectory(GetGomaTmpDir());
+#ifndef _WIN32
+  static const char kTmpdirTemplate[] = "/tmp/%s_XXXXXXXX";
+  DCHECK_LT(id.size() + sizeof(kTmpdirTemplate),
+            static_cast<size_t>(PATH_MAX));
+  snprintf(tmpdir, sizeof(tmpdir), kTmpdirTemplate, id.c_str());
+  PCHECK(mkdtemp(tmpdir) != nullptr) << tmpdir;
+#else
+  static const char kTmpdirTemplate[] = "%s\\%s_XXXXXXXX";
+  DCHECK_LT(id.size() + sizeof(kTmpdirTemplate),
+            static_cast<size_t>(PATH_MAX));
+  sprintf_s(tmpdir, sizeof(tmpdir), kTmpdirTemplate,
+            GetGomaTmpDir().c_str(), id.c_str());
+  CHECK(mkdtemp(tmpdir)) << "failed to make" << tmpdir
+                         << " error code=" << GetLastError();
+#endif
+  tmpdir_ = tmpdir;
+}
+
+TmpdirUtil::~TmpdirUtil() {
+  DeleteRecursivelyOrDie(tmpdir_);
+}
+
+string TmpdirUtil::realcwd() const {
+  return file::JoinPath(tmpdir_, cwd_);
+}
+
+string TmpdirUtil::FullPath(const string& path) const {
+  return file::JoinPath(tmpdir_, file::JoinPathRespectAbsolute(cwd_, path));
+}
+
+void TmpdirUtil::CreateTmpFile(const string& path, const string& data) {
+  MkdirForPath(path, false);
+  WriteStringToFileOrDie(data, FullPath(path), 0666);
+}
+
+void TmpdirUtil::CreateEmptyFile(const string& path) {
+  CreateTmpFile(path, "");
+}
+
+void TmpdirUtil::RemoveTmpFile(const string& path) {
+#ifndef _WIN32
+  unlink(FullPath(path).c_str());
+#else
+  DeleteFileA(FullPath(path).c_str());
+#endif
+}
+
+void TmpdirUtil::MkdirForPath(const string& path, bool is_dir) {
+  string fullpath = FullPath(path);
+#ifndef _WIN32
+  size_t pos = tmpdir_.size();
+  while (pos != string::npos) {
+    pos = fullpath.find_first_of('/', pos + 1);
+    if (pos != string::npos) {
+      VLOG(1) << "dir:" << fullpath.substr(0, pos);
+      if (access(fullpath.substr(0, pos).c_str(), R_OK) == 0)
+        continue;
+      PCHECK(mkdir(fullpath.substr(0, pos).c_str(), 0777) == 0)
+          << pos << ": " << fullpath.substr(0, pos);
+    }
+  }
+  if (is_dir) {
+    VLOG(1) << "dir:" << fullpath;
+    PCHECK(mkdir(fullpath.c_str(), 0777) == 0) << fullpath;
+  }
+#else
+  string dirname;
+  if (is_dir) {
+    dirname = fullpath;
+  } else {
+    size_t last_slash = fullpath.rfind('\\');
+    dirname = fullpath.substr(0, last_slash);
+    if (File::IsDirectory(dirname.c_str()))
+      return;
+  }
+  int result = SHCreateDirectoryExA(nullptr, dirname.c_str(), nullptr);
+  EXPECT_EQ(ERROR_SUCCESS, result);
+  DWORD attr = GetFileAttributesA(dirname.c_str());
+  // TODO: revise after write patch to glog to support PLOG on Win.
+  CHECK_NE(attr, INVALID_FILE_ATTRIBUTES)
+      << dirname
+      << " error code=" << GetLastError();
+  CHECK(attr & FILE_ATTRIBUTE_DIRECTORY)
+      << dirname
+      << " attr=" << attr
+      << " error code=" << GetLastError();
+#endif
+}
+
+string GetTestFilePath(const string& test_name) {
+  // This module is out/Release/ar_unittest (Linux & Mac),
+  // build\Release\ar_unittest.exe (Windows msvs),
+  // or out\Release\ar_unittest.exe (Windows ninja).
+  // Test files should be stored under test directory.
+  const string fullpath =
+      file::JoinPath(GetMyDirectory(), "..", "..", "test", test_name);
+
+  CHECK_EQ(access(fullpath.c_str(), R_OK), 0)
+    << "Cannot read test file:"
+    << " filename=" << fullpath;
+  return fullpath;
+}
+
+}  // namespace devtools_goma
diff --git a/client/unittest_util.h b/client/unittest_util.h
new file mode 100644
index 0000000..7dcc237
--- /dev/null
+++ b/client/unittest_util.h
@@ -0,0 +1,41 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_UNITTEST_UTIL_H_
+#define DEVTOOLS_GOMA_CLIENT_UNITTEST_UTIL_H_
+
+#include <string>
+
+using std::string;
+
+namespace devtools_goma {
+
+class TmpdirUtil {
+ public:
+  explicit TmpdirUtil(const string& id);
+  virtual ~TmpdirUtil();
+  // Note: avoid CreateFile not to see "CreateFileW not found" on Win.
+  virtual void CreateTmpFile(const string& path, const string& data);
+  virtual void CreateEmptyFile(const string& path);
+  virtual void MkdirForPath(const string& path, bool is_dir);
+
+  virtual void RemoveTmpFile(const string& path);
+
+  const string& tmpdir() const { return tmpdir_; }
+  const string& cwd() const { return cwd_; }
+  string realcwd() const;
+  void SetCwd(const string cwd) { cwd_ = cwd; }
+  string FullPath(const string& path) const;
+
+ private:
+  string cwd_;
+  string tmpdir_;
+};
+
+string GetTestFilePath(const string& test_name);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_UNITTEST_UTIL_H_
diff --git a/client/util.cc b/client/util.cc
new file mode 100644
index 0000000..babe920
--- /dev/null
+++ b/client/util.cc
@@ -0,0 +1,321 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "util.h"
+
+#include <algorithm>
+#include <deque>
+
+#include "compiler_flags.h"
+#include "env_flags.h"
+#include "file_id.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "split.h"
+#include "string_piece.h"
+#include "string_util.h"
+
+using std::string;
+
+namespace {
+// Path separators are platform dependent
+#ifndef _WIN32
+const char* kPathListSep = ":";
+#else
+const char* kPathListSep = ";";
+#endif
+
+#ifdef _WIN32
+
+std::deque<string> ParsePathExts(const string& pathext_spec) {
+  std::vector<string> pathexts;
+  if (!pathext_spec.empty()) {
+    SplitStringUsing(pathext_spec, kPathListSep, &pathexts);
+  } else {
+    // If |pathext_spec| is empty, we should use the default PATHEXT.
+    // See:
+    // http://technet.microsoft.com/en-us/library/cc723564.aspx#XSLTsection127121120120
+    static const char* kDefaultPathext = ".COM;.EXE;.BAT;.CMD";
+    SplitStringUsing(kDefaultPathext, kPathListSep, &pathexts);
+  }
+
+  for (auto& pathext : pathexts) {
+    std::transform(pathext.begin(), pathext.end(), pathext.begin(),
+                   ::tolower);
+  }
+  return std::deque<string>(pathexts.begin(), pathexts.end());
+}
+
+bool HasExecutableExtension(const std::deque<string>& pathexts,
+                            const string& filename) {
+  const size_t pos = filename.rfind(".");
+  if (pos == string::npos)
+    return false;
+
+  string ext = filename.substr(pos);
+  std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
+  for (const auto& pathext : pathexts) {
+    if (ext == pathext)
+      return true;
+  }
+  return false;
+}
+
+string GetExecutableWithExtension(const std::deque<string>& pathexts,
+                                  const string& prefix) {
+  for (const auto& pathext : pathexts) {
+    const string& candidate = prefix + pathext;
+    DWORD attr = GetFileAttributesA(candidate.c_str());
+    if (attr != INVALID_FILE_ATTRIBUTES &&
+        (attr & FILE_ATTRIBUTE_DIRECTORY) == 0) {
+      return candidate;
+    }
+  }
+  return "";
+}
+
+#endif
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+static ReadCommandOutputFunc gReadCommandOutput = nullptr;
+
+void InstallReadCommandOutputFunc(ReadCommandOutputFunc func) {
+  gReadCommandOutput = func;
+}
+
+string ReadCommandOutput(
+    const string& prog, const std::vector<string>& argv,
+    const std::vector<string>& env,
+    const string& cwd, CommandOutputOption option, int32_t* status) {
+  if (gReadCommandOutput == nullptr) {
+    LOG(FATAL) << "gReadCommandOutput should be set before calling."
+               << " prog=" << prog
+               << " cwd=" << cwd
+               << " argv=" << argv
+               << " env=" << env;
+  }
+  return gReadCommandOutput(prog, argv, env, cwd, option, status);
+}
+
+#ifdef _WIN32
+
+string ResolveExtension(const string& cmd, const string& pathext_env,
+                        const string& cwd) {
+  std::deque<string> pathexts = ParsePathExts(pathext_env);
+  if (HasExecutableExtension(pathexts, cmd)) {
+    pathexts.push_front("");
+  }
+  const string& path = file::JoinPathRespectAbsolute(cwd, cmd);
+  return GetExecutableWithExtension(pathexts, path);
+}
+
+#endif
+
+// True if |candidate_path| is gomacc, by running it under an invalid GOMA env
+// flag.  It is usually used to confirm |candidate_path| is not gomacc.
+// If candadate_path is (a copy of or a symlink to) gomacc, it will die with
+// "unknown GOMA_ parameter".
+// It assumes real compiler doesn't emit "GOMA" in its output.
+// On Windows, path must include a directory where mspdb*.dll,
+// otherwise, real cl.exe will pops up a dialog:
+//  This application has failed to start because mspdb100.dll was not found.
+// Error mode SEM_FAILCRITICALERRORS and SEM_NOGPFAULTERRORBOX
+// prevent from popping up message box on error, which we did in
+// compiler_proxy.cc:main()
+bool IsGomacc(
+    const string& candidate_path,
+    const string& path,
+    const string& pathext,
+    const string& cwd) {
+  // TODO: fix workaround.
+  // Workaround not to pause with dialog when cl.exe is executed.
+  if (CompilerFlags::IsVCCommand(candidate_path))
+    return false;
+
+  std::vector<string> argv;
+  argv.push_back(candidate_path);
+  std::vector<string> env;
+  env.push_back("GOMA_WILL_FAIL_WITH_UKNOWN_FLAG=true");
+  env.push_back("PATH=" + path);
+  if (!pathext.empty())
+    env.push_back("PATHEXT=" + pathext);
+  int32_t status = 0;
+  string out = ReadCommandOutput(candidate_path, argv, env, cwd,
+                                 MERGE_STDOUT_STDERR, &status);
+  return (status == 1) && (out.find("GOMA") != string::npos);
+}
+
+bool GetRealExecutablePath(
+    const FileId* gomacc_fileid,
+    const string& cmd, const string& cwd,
+    const string& path_env,
+    const string& pathext_env,
+    string* local_executable_path,
+    string* no_goma_path_env,
+    bool* is_in_relative_path) {
+  CHECK(local_executable_path);
+#ifndef _WIN32
+  DCHECK(pathext_env.empty());
+#else
+  std::deque<string> pathexts = ParsePathExts(pathext_env);
+  if (HasExecutableExtension(pathexts, cmd)) {
+    pathexts.push_front("");
+  }
+#endif
+
+  if (no_goma_path_env)
+    *no_goma_path_env = path_env;
+
+  // Fast path.
+  // If cmd contains '/', it is just cwd/cmd.
+  if (cmd.find_first_of(PathResolver::kPathSep) != string::npos) {
+    string candidate_path = file::JoinPathRespectAbsolute(cwd, cmd);
+#ifndef _WIN32
+    if (access(candidate_path.c_str(), X_OK) != 0)
+      return false;
+#else
+    candidate_path = GetExecutableWithExtension(pathexts, candidate_path);
+    if (candidate_path.empty())
+      return false;
+#endif
+    const FileId candidate_fileid(candidate_path);
+    if (is_in_relative_path)
+      *is_in_relative_path = !file::IsAbsolutePath(cmd);
+
+    if (!candidate_fileid.IsValid())
+      return false;
+
+    if (gomacc_fileid && candidate_fileid == *gomacc_fileid)
+      return false;
+
+    if (gomacc_fileid &&
+        IsGomacc(candidate_path, path_env, pathext_env, cwd))
+      return false;
+
+    *local_executable_path = candidate_path;
+    return true;
+  }
+
+  for (size_t pos = 0, next_pos; pos != string::npos; pos = next_pos) {
+    next_pos = path_env.find(kPathListSep, pos);
+    StringPiece dir;
+    if (next_pos == StringPiece::npos) {
+      dir.set(path_env.c_str() + pos, path_env.size() - pos);
+    } else {
+      dir.set(path_env.c_str() + pos, next_pos - pos);
+      ++next_pos;
+    }
+
+    if (is_in_relative_path)
+      *is_in_relative_path = !file::IsAbsolutePath(dir);
+
+    // Empty paths should be treated as the current directory.
+    if (dir.empty()) {
+      dir = cwd;
+    }
+    VLOG(2) << "dir:" << dir;
+
+    string candidate_path(PathResolver::ResolvePath(file::JoinPath(
+        file::JoinPathRespectAbsolute(cwd, dir),
+        cmd)));
+    VLOG(2) << "candidate:" << candidate_path;
+
+#ifndef _WIN32
+    if (access(candidate_path.c_str(), X_OK) != 0)
+      continue;
+#else
+    candidate_path = GetExecutableWithExtension(pathexts, candidate_path);
+    if (candidate_path.empty())
+      continue;
+#endif
+
+    FileId candidate_fileid(candidate_path);
+    if (candidate_fileid.IsValid()) {
+      if (gomacc_fileid && candidate_fileid == *gomacc_fileid &&
+          next_pos != string::npos) {
+        // file is the same as gomacc.
+        // Update local path.
+        // TODO: drop a path of gomacc only. preserve other paths
+        // For example,
+        // PATH=c:\P\MVS10.0\Common7\Tools;c:\goma;c:\P\MVS10.0\VC\bin
+        // we should not drop c:\P\MVS10.0\Common7\Tools.
+        if (no_goma_path_env)
+          *no_goma_path_env = path_env.substr(next_pos);
+      } else {
+        // file is executable, and from file id, it is different
+        // from gomacc.
+        if (gomacc_fileid &&
+            IsGomacc(candidate_path, path_env.substr(pos), pathext_env, cwd)) {
+          LOG(ERROR) << "You have 2 goma directories in your path? "
+                     << candidate_path << " seems gomacc";
+          if (next_pos != string::npos && no_goma_path_env)
+            *no_goma_path_env = path_env.substr(next_pos);
+          continue;
+        }
+        *local_executable_path = candidate_path;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// Platform independent getenv.
+string GetEnv(const string& name) {
+#ifndef _WIN32
+  char* ret = getenv(name.c_str());
+  if (ret == nullptr)
+    return "";
+  return ret;
+#else
+  DWORD size = GetEnvironmentVariableA(name.c_str(), nullptr, 0);
+  if (size == 0) {
+    CHECK(GetLastError() == ERROR_ENVVAR_NOT_FOUND);
+    return "";
+  }
+  string envvar(size, '\0');
+  DWORD ret = GetEnvironmentVariableA(name.c_str(), &envvar[0], size);
+  CHECK_EQ(ret, size - 1)
+      << "GetEnvironmentVariableA failed but should not:" << name
+      << " ret=" << ret << " size=" << size;
+  CHECK_EQ(envvar[ret], '\0');
+  // cut off the null-terminating character.
+  return envvar.substr(0, ret);
+#endif
+}
+
+void SetEnv(const string& name, const string& value) {
+#ifndef _WIN32
+  if (setenv(name.c_str(), value.c_str(), 1) != 0) {
+    PLOG(ERROR) << "setenv name=" << name << " value=" << value;
+  }
+#else
+  BOOL ret = SetEnvironmentVariableA(name.c_str(), value.c_str());
+  if (!ret) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "setenv name=" << name << " value=" << value;
+  }
+#endif
+}
+
+pid_t Getpid() {
+#ifdef _WIN32
+  return static_cast<pid_t>(::GetCurrentProcessId());
+#else
+  return getpid();
+#endif
+}
+
+string ToShortNodename(const string& nodename) {
+  std::vector<string> entries = strings::Split(nodename, ".");
+  return ToLower(entries[0]);
+}
+
+}  // namespace devtools_goma
diff --git a/client/util.h b/client/util.h
new file mode 100644
index 0000000..bc1fe47
--- /dev/null
+++ b/client/util.h
@@ -0,0 +1,189 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_UTIL_H_
+#define DEVTOOLS_GOMA_CLIENT_UTIL_H_
+
+#ifndef _WIN32
+# include <unistd.h>
+#else
+# include <direct.h>
+#endif
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+#include "config_win.h"
+#endif
+
+using std::string;
+
+namespace devtools_goma {
+
+struct FileId;
+
+// Options to be used with ReadCommandOutput to specify which command output
+// will be returned.
+enum CommandOutputOption {
+  MERGE_STDOUT_STDERR,
+  STDOUT_ONLY,
+};
+
+typedef string (*ReadCommandOutputFunc)(const string& prog,
+                                        const std::vector<string>& argv,
+                                        const std::vector<string>& env,
+                                        const string& cwd,
+                                        CommandOutputOption option,
+                                        int32_t* status);
+
+// Installs new ReadCommandOutput function.
+// ReadCommandOutput function should be installed before calling it.
+void InstallReadCommandOutputFunc(ReadCommandOutputFunc func);
+
+// Calls current ReadCommandOutput function.
+// If exit status of the command is not zero and |status| == NULL,
+// then fatal error.
+// Note: You MUST call InstallReadCommandOuptutFunc beforehand.
+string ReadCommandOutput(
+    const string& prog, const std::vector<string>& argv,
+    const std::vector<string>& env,
+    const string& cwd, CommandOutputOption option, int32_t* status);
+
+// Returns true if |candidate_path| (at |cwd| with PATH=|path|) is gomacc.
+// Note: this is usually used to confirm the |candidate_path| is not gomacc.
+// Note: You MUST call InstallReadCommandOuptutFunc beforehand.
+bool IsGomacc(
+    const string& candidate_path, const string& path, const string& pathext,
+    const string& cwd);
+
+// Find a real path name of |cmd| from |path_env|.
+// It avoids to choose the file having same FileId with |gomacc_fileid|.
+// It returns true on success, and |local_compiler_path| (real compiler path)
+// and |no_goma_path| (PATH env. without gomacc) are set.
+// On Windows, |pathext_env| is used as PATHEXT parameter.
+// Other platform should set empty |pathext_env| or fatal error.
+// |cwd| represents current working directory.
+// If returned |local_compiler_path| depends on the current working directory,
+// |is_in_relative_path| become true.
+// Note: you can use NULL to |no_goma_path_env| and |is_in_relative_path|
+// if you do not need them.
+// Note: You MUST call InstallReadCommandOuptutFunc beforehand if you
+//       use gomacc_fileid.
+bool GetRealExecutablePath(
+    const FileId* gomacc_fileid,
+    const string& cmd, const string& cwd,
+    const string& path_env, const string& pathext_env,
+    string* local_compiler_path, string* no_goma_path_env,
+    bool* is_in_relative_path);
+
+#ifdef _WIN32
+// Resolves path extension of |cmd| using PATHEXT environment given with
+// |pathext_env|.  If |cmd| is not an absolute path, it is automatically
+// converted to an absolute path using |cwd|.
+string ResolveExtension(const string& cmd, const string& pathext_env,
+                        const string& cwd);
+#endif
+
+// Platform independent getenv.
+// Note: in chromium/win, gomacc can only get environments that was
+// extracted by build/toolchain/win/setup_toolchain.py.
+string GetEnv(const string& name);
+
+// Platform independent setenv.
+void SetEnv(const string& name, const string& value);
+
+// Gets iterator to the environment variable entry.
+template <typename Iter>
+Iter GetEnvIterFromEnvIter(Iter env_begin, Iter env_end,
+                           const string& name, bool ignore_case) {
+  string key = name + "=";
+  if (ignore_case)
+    std::transform(key.begin(), key.end(), key.begin(), ::tolower);
+
+  for (Iter i = env_begin; i != env_end; ++i) {
+    string token = i->substr(0, key.length());
+    if (ignore_case)
+      std::transform(token.begin(), token.end(), token.begin(), ::tolower);
+    if (token == key) {
+      return i;
+    }
+  }
+  return env_end;
+}
+
+// Gets an environment variable between |envs_begin| and |envs_end|.
+// Do not care |name| case if |ignore_case| is true.
+template <typename Iter>
+string GetEnvFromEnvIter(Iter env_begin, Iter env_end,
+                         const string& name, bool ignore_case) {
+  Iter found = GetEnvIterFromEnvIter(env_begin, env_end, name, ignore_case);
+  if (found == env_end)
+    return "";
+  return found->substr(name.length() + 1);  // Also cuts off "=".
+}
+
+// Gets an environment variable between |envs_begin| and |envs_end|.
+// It automatically ignores case according to the platform.
+template <typename Iter>
+string GetEnvFromEnvIter(Iter env_begin, Iter env_end, const string& name) {
+#ifdef _WIN32
+  return GetEnvFromEnvIter(env_begin, env_end, name, true);
+#else
+  return GetEnvFromEnvIter(env_begin, env_end, name, false);
+#endif
+}
+
+// Replace an environment variable |name| value to |to_replace|
+// between |envs_begin| and |envs_end|.
+// It automatically ignores case according to the platform.
+template <typename Iter>
+bool ReplaceEnvInEnvIter(Iter env_begin, Iter env_end,
+                         const string& name, const string& to_replace) {
+#ifdef _WIN32
+  Iter found = GetEnvIterFromEnvIter(env_begin, env_end, name, true);
+#else
+  Iter found = GetEnvIterFromEnvIter(env_begin, env_end, name, false);
+#endif
+  if (found != env_end) {
+    found->replace(name.size() + 1, found->size() - (name.size() + 1),
+                   to_replace);
+    return true;
+  }
+  return false;
+}
+
+// Platform independent getpid function.
+pid_t Getpid();
+
+// Wrapper for chdir(). VS2015 warns using chdir().
+// Returns true if succeeded.
+inline bool Chdir(const char* path) {
+#ifndef _WIN32
+  return chdir(path) == 0;
+#else
+  return _chdir(path) == 0;
+#endif
+}
+
+// Convert node name to short and lower case nodename.
+// e.g.
+// slave123 -> slave123 (Linux CCompute)
+// vm123-m1.golo.chromium.org -> vm123-m1 (Mac golo)
+// BUILD123-M1 -> build123-m1 (Windows golo)
+string ToShortNodename(const string& nodename);
+
+}  // namespace devtools_goma
+
+// Use unordered_map<K, V>::reserve() if we can use C++11 library.
+template<typename UnorderedMap>
+void UnorderedMapReserve(size_t size, UnorderedMap* m) {
+  m->rehash(std::ceil(size / m->max_load_factor()));
+}
+
+#endif  // DEVTOOLS_GOMA_CLIENT_UTIL_H_
diff --git a/client/util_unittest.cc b/client/util_unittest.cc
new file mode 100644
index 0000000..036137e
--- /dev/null
+++ b/client/util_unittest.cc
@@ -0,0 +1,179 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "util.h"
+
+#include <algorithm>
+#include <iterator>
+#include <string>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "unittest_util.h"
+
+using std::string;
+
+namespace {
+
+#ifdef _WIN32
+string LocateExecutable(
+    const char* cwd_in, const char* path_in, const char* pathext_in,
+    const char* cmd_in) {
+  string path;
+  if (path_in == nullptr) {
+    path = devtools_goma::GetEnv("PATH");
+    CHECK(!path.empty());
+  } else {
+    path.assign(path_in);
+  }
+
+  string pathext;
+  if (pathext_in == nullptr) {
+    pathext = devtools_goma::GetEnv("PATHEXT");
+    CHECK(!pathext.empty());
+  } else {
+    pathext.assign(pathext_in);
+  }
+
+  string exec_path;
+  if (devtools_goma::GetRealExecutablePath(
+      nullptr, cmd_in, cwd_in, path, pathext, &exec_path, nullptr, nullptr)) {
+    return exec_path;
+  }
+  return "";
+}
+#endif
+
+}  // namespace
+
+TEST(Util, GetRealExecutablePath) {
+  // TODO: write test for POSIX.
+#ifdef _WIN32
+  string located = LocateExecutable("", nullptr, nullptr, "cmd");
+  EXPECT_GT(located.size(), 3UL);
+
+  // Shouls accept command with an extension.
+  located = LocateExecutable("", nullptr, nullptr, "cmd.exe");
+  EXPECT_GT(located.size(), 7UL);
+
+  // Should ignore case.
+  located = LocateExecutable("", nullptr, nullptr, "cmd.ExE");
+  EXPECT_GT(located.size(), 7UL);
+
+  // Not existing file.
+  located = LocateExecutable("", nullptr, nullptr, "shall_not_have_this_file");
+  EXPECT_TRUE(located.empty());
+
+  // Empty PATHEXT.  Default pathext is used. i.e. it should not be empty.
+  located = LocateExecutable("", nullptr, "", "cmd");
+  EXPECT_GT(located.size(), 3UL);
+
+  // Strange PATHEXT.  Nothing should match.
+  located = LocateExecutable("", nullptr, ".non_exist_pathext", "cmd");
+  EXPECT_TRUE(located.empty());
+
+  // Expected PATHEXT.
+  located = LocateExecutable("", nullptr, ".exe", "cmd");
+  EXPECT_GT(located.size(), 3UL);
+
+  // Expected PATHEXT with upper case letters.
+  located = LocateExecutable("", nullptr, ".EXE", "cmd");
+  EXPECT_GT(located.size(), 3UL);
+
+  // Unexpected PATHEXT.
+  located = LocateExecutable("", nullptr, ".com", "cmd");
+  EXPECT_TRUE(located.empty());
+
+  // Extension is not listed in PATHEXT. Nothing should match.
+  located = LocateExecutable("", nullptr, ".com", "cmd.exe");
+  EXPECT_TRUE(located.empty());
+
+  // Expected PATHEXT comes after unexpected PATHEXT.
+  located = LocateExecutable("", nullptr, ".com;.exe", "cmd");
+  EXPECT_GT(located.size(), 3UL);
+
+  // Expected PATHEXT comes after unexpected PATHEXT (upper case letters).
+  located = LocateExecutable("", nullptr, ".COM;.EXE", "cmd");
+  EXPECT_GT(located.size(), 3UL);
+
+  // Expected PATHEXT should be automatically added even if full-path given.
+  string expected = located;
+  string input = located.substr(0, located.length() - 4);
+  EXPECT_FALSE(input.empty());
+  located = LocateExecutable("", "", nullptr, input.c_str());
+  EXPECT_EQ(expected, located);
+#endif
+  // TODO: revise this using TmpdirUtil.
+}
+
+TEST(Util, GetEnvFromEnvIter) {
+  using devtools_goma::GetEnvFromEnvIter;
+  std::vector<string> envs;
+  envs.push_back("PATH=/usr/bin");
+  envs.push_back("pAtHeXt=.EXE");
+
+  // Should return "" for unknown name.
+  EXPECT_EQ(GetEnvFromEnvIter(envs.begin(), envs.end(), "not_exist", true),
+            "");
+  EXPECT_EQ(GetEnvFromEnvIter(envs.begin(), envs.end(), "not_exist", false),
+            "");
+
+  // Should return "" if case is different and ignore_case=false.
+  EXPECT_EQ(GetEnvFromEnvIter(envs.begin(), envs.end(), "pAtH", false), "");
+  EXPECT_EQ(GetEnvFromEnvIter(envs.begin(), envs.end(), "pathext", false), "");
+
+  // Should return value if case is different and ignore_case=true.
+  EXPECT_EQ(GetEnvFromEnvIter(envs.begin(), envs.end(), "pAtH", true),
+            "/usr/bin");
+  EXPECT_EQ(GetEnvFromEnvIter(envs.begin(), envs.end(), "pathext", true),
+            ".EXE");
+}
+
+TEST(Util, ReplaceEnvInEnvIter) {
+  using devtools_goma::ReplaceEnvInEnvIter;
+  std::vector<string> envs;
+  envs.push_back("dummy1=dummy");
+  envs.push_back("PATH=/usr/bin");
+  envs.push_back("dummy2=dummy");
+
+  std::vector<string> expected_envs;
+
+  // Should return false if env not found and envs should be kept as is.
+  std::copy(envs.begin(), envs.end(), std::back_inserter(expected_envs));
+  EXPECT_FALSE(ReplaceEnvInEnvIter(envs.begin(), envs.end(), "not_exist",
+                                   "should not change"));
+  EXPECT_EQ(expected_envs, envs);
+
+  // Should return true if env is replaced.
+  EXPECT_TRUE(ReplaceEnvInEnvIter(envs.begin(), envs.end(), "PATH", "/sbin"));
+  expected_envs[1] = "PATH=/sbin";
+  EXPECT_EQ(expected_envs, envs);
+
+#ifdef _WIN32
+  // Should not change the original env name.
+  EXPECT_TRUE(ReplaceEnvInEnvIter(envs.begin(), envs.end(), "path",
+                                  "c:\\"));
+  expected_envs[1] = "PATH=c:\\";
+  EXPECT_EQ(expected_envs, envs);
+#endif
+}
+
+TEST(Util, GetEnvShouldReturnValueContainingNul) {
+  const string& env = devtools_goma::GetEnv("PATH");
+  EXPECT_EQ(string(env.c_str()), env);
+}
+
+TEST(Util, ToShortNodename) {
+  std::vector<std::pair<string, string>> testcases = {
+    {"slave123-m1", "slave123-m1"},
+    {"build123-m1.golo.chromium.org", "build123-m1"},
+    {"BUILD123-M1", "build123-m1"},
+  };
+
+  for (const auto& tc : testcases) {
+    EXPECT_EQ(tc.second, devtools_goma::ToShortNodename(tc.first));
+  }
+}
diff --git a/client/vcflags.c b/client/vcflags.c
new file mode 100644
index 0000000..4b6de0c
--- /dev/null
+++ b/client/vcflags.c
@@ -0,0 +1,31 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+// cl /nologo /Bxvcflags.exe none_exist_file.cc
+// will dump all VC++ predefined macro.
+// This hack works for VC++ 2008 and 2010.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(void) {
+  char* env_flags = NULL;
+
+  if (_dupenv_s(&env_flags, NULL, "MSC_CMD_FLAGS") == 0 && env_flags != NULL) {
+    printf("%s\n", env_flags);
+    free(env_flags);
+  }
+
+  if (_dupenv_s(&env_flags, NULL, "MSC_IDE_FLAGS") == 0 && env_flags != NULL) {
+    printf("%s\n", env_flags);
+    free(env_flags);
+  }
+
+  /* We must return EXIT_FAILURE here to stop cl.exe.
+     Our goal for vcflags is to dump preprocessor definitions and stop,
+     not to actually compile the code.
+   */
+  return EXIT_FAILURE;
+}
diff --git a/client/vstestrun.cc b/client/vstestrun.cc
new file mode 100644
index 0000000..db74e72
--- /dev/null
+++ b/client/vstestrun.cc
@@ -0,0 +1,112 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// Usage:
+//  vstestrun --vsver=9.0 command line
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+
+#ifndef _WIN32
+#error This module is Windows only
+#endif
+#include "config_win.h"
+#include "file.h"
+#include "file_dir.h"
+#include "mypath.h"
+#include "path.h"
+#include "string_piece_utils.h"
+#include "vsvars.h"
+
+
+int RunWithVSVars(std::string vsvars_path, int argv0, int argc, char** argv) {
+  std::string tmpdir = devtools_goma::GetGomaTmpDir();
+  devtools_goma::RecursivelyDelete(tmpdir);
+  std::string batchfile = file::JoinPath(tmpdir, "vsrun.bat");
+  std::ofstream batch;
+  batch.open(batchfile.c_str());
+  batch << "call \"" << vsvars_path << "\"" << std::endl;
+  for (int i = argv0; i < argc; ++i) {
+    bool need_quote = strchr(argv[i], ' ') != nullptr;
+    if (need_quote)
+      batch << "\"";
+    batch << argv[i];
+    if (need_quote)
+      batch << "\"";
+    batch << " ";
+  }
+  batch << std::endl;
+  batch.close();
+
+  PROCESS_INFORMATION pi;
+  STARTUPINFOA si;
+  ZeroMemory(&pi, sizeof(PROCESS_INFORMATION));
+  ZeroMemory(&si, sizeof(STARTUPINFO));
+  si.cb = sizeof(STARTUPINFO);
+
+  std::string cmdline = "cmd /c \"" + batchfile + "\"";
+
+  if (!CreateProcessA(nullptr, &(cmdline[0]), nullptr, nullptr, FALSE, 0,
+                      nullptr, ".", &si, &pi)) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to run " << batchfile;
+    return -1;
+  }
+  CloseHandle(pi.hThread);
+  WaitForSingleObject(pi.hProcess, INFINITE);
+  DWORD exit_status = 1;
+  DWORD result = GetExitCodeProcess(pi.hProcess, &exit_status);
+  if (result != TRUE && exit_status == 0) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to get exit code";
+    exit_status = 1;
+  }
+  CloseHandle(pi.hProcess);
+  LOG(INFO) << "exit_status:" << exit_status;
+  return exit_status;
+}
+
+int main(int argc, char** argv) {
+  std::vector<std::string> vsvers;
+  int argv0 = -1;
+  for (int i = 1; i < argc; i++) {
+    if (strings::StartsWith(argv[i], "--vsver=")) {
+      vsvers.push_back(argv[i] + strlen("--vsver="));
+      continue;
+    }
+    argv0 = i;
+    break;
+  }
+  if (argv0 < 0) {
+    std::cerr << "Usage:" << argv[0] << " [--vsver=version] command line..."
+              << std::endl;
+    exit(1);
+  }
+  LOG(INFO) << "argv0=" << argv0;
+  if (vsvers.empty()) {
+    vsvers.push_back("12.0");
+  }
+
+  std::set<std::string> vsvars;
+  for (const auto& vsver : vsvers) {
+    LOG(INFO) << "vsver:" << vsver;
+    devtools_goma::GetVSVarsPath(vsver, &vsvars);
+  }
+  CHECK(!vsvars.empty()) << vsvers;
+
+  for (const auto& iter : vsvars) {
+    int r = RunWithVSVars(iter, argv0, argc, argv);
+    if (r != 0) {
+      LOG(ERROR) << "Failed to run with " << iter;
+      exit(r);
+    }
+  }
+  exit(0);
+}
diff --git a/client/vsvars.cc b/client/vsvars.cc
new file mode 100644
index 0000000..48ae08d
--- /dev/null
+++ b/client/vsvars.cc
@@ -0,0 +1,91 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+#ifndef _WIN32
+#error This module is Windows only
+#endif
+
+#include <memory>
+
+#include "vsvars.h"
+
+#include <limits.h>
+#include <windows.h>
+#include <winbase.h>
+#include <winreg.h>
+
+#include <glog/logging.h>
+#include <glog/stl_logging.h>
+
+#include "config_win.h"
+#include "path.h"
+#include "posix_helper_win.h"
+
+namespace {
+
+const char* kVCRegPath[] = {
+  "SOFTWARE\\Microsoft\\VisualStudio\\",
+  "SOFTWARE\\Wow6432Node\\Microsoft\\VisualStudio\\",
+  "SOFTWARE\\Wow6432Node\\Microsoft\\VCExpress\\",
+};
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+string GetVCInstallDir(const string& reg_path) {
+  string install_dir;
+  HKEY regKey;
+  if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, reg_path.c_str(), 0, KEY_READ, &regKey)
+      != ERROR_SUCCESS) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to find regkey for " << reg_path;
+    return "";
+  }
+  DWORD reg_type;
+  DWORD data_size = PATH_MAX;
+  std::unique_ptr<char[]> data(new char[data_size]);
+  DWORD ret = RegQueryValueExA(regKey, "InstallDir", nullptr, &reg_type,
+                       reinterpret_cast<LPBYTE>(data.get()), &data_size);
+  if (ret == ERROR_SUCCESS) {
+    install_dir = string(data.get());
+  } else if (ret == ERROR_MORE_DATA) {
+    CHECK_GT(data_size, 0U);
+    data.reset(new char[data_size]);
+    if (RegQueryValueExA(regKey, "InstallDir", nullptr, &reg_type,
+                         reinterpret_cast<LPBYTE>(data.get()), &data_size) ==
+        ERROR_SUCCESS) {
+      install_dir = string(data.get());
+    } else {
+      LOG_SYSRESULT(GetLastError());
+      LOG(ERROR) << "Failed to get InstallDir for " << reg_path;
+    }
+  } else {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Failed to get size of InstallDir for " << reg_path;
+  }
+  RegCloseKey(regKey);
+  return install_dir;
+}
+
+void GetVSVarsPath(string vs_version, std::set<string>* vsvars) {
+  for (const auto* path : kVCRegPath) {
+    string install_dir = GetVCInstallDir(path + vs_version);
+    VLOG(1) << "VC " << path << vs_version << " " << install_dir;
+    if (!install_dir.empty()) {
+      const string tooldir = file::JoinPath(file::JoinPath(install_dir, ".."),
+                                            "Tools");
+      // TODO: check vsvars64.bat for x64 support?
+      string vsvar_path = file::JoinPath(tooldir, "vsvars32.bat");
+      if (access(vsvar_path.c_str(), R_OK) == 0) {
+        vsvars->insert(vsvar_path);
+      } else {
+        LOG(ERROR) << "vsvars32.bat not found:" << vsvar_path;
+      }
+    }
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/client/vsvars.h b/client/vsvars.h
new file mode 100644
index 0000000..c3a9d51
--- /dev/null
+++ b/client/vsvars.h
@@ -0,0 +1,36 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_VSVARS_H_
+#define DEVTOOLS_GOMA_CLIENT_VSVARS_H_
+
+#ifndef _WIN32
+#error This module is Windows only
+#endif
+
+#include <set>
+#include <string>
+
+using std::string;
+
+namespace devtools_goma {
+
+// Gets VC InstallDir from |reg_path| in HKEY_LOCAL_MACHINE.
+// Returns a path in InstallDir registry, e.g
+// c:\Program Files (x86)\Microsoft Visual Studio 12.0\Common7\IDE
+// Returns empty string if not found.
+string GetVCInstallDir(const string& reg_path);
+
+// Gets vsvars32.bat path for |vs_version|.
+// |vs_version| is something like "12.0", "11.0", etc.
+// For example:
+//   "12.0" -> Visual Studio 2013
+//   "11.0" -> Visual Studio 2012
+//   "10.0" -> Visual Studio 2010
+void GetVSVarsPath(string vs_version, std::set<string>* vsvars);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_VSVARS_H_
diff --git a/client/watchdog.cc b/client/watchdog.cc
new file mode 100644
index 0000000..74f6eb5
--- /dev/null
+++ b/client/watchdog.cc
@@ -0,0 +1,122 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "watchdog.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include "callback.h"
+#include "compiler_specific.h"
+#include "compile_service.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "ioutil.h"
+#include "mypath.h"
+#include "path.h"
+#include "threadpool_http_server.h"
+#include "util.h"
+
+#ifdef _WIN32
+#include "posix_helper_win.h"
+#endif
+
+namespace devtools_goma {
+
+#ifndef _WIN32
+static const char *kGomaccName = "gomacc";
+#else
+static const char *kGomaccName = "gomacc.exe";
+#endif
+
+Watchdog::Watchdog()
+    : dir_(GetMyDirectory()),
+      gomacc_path_(file::JoinPath(dir_, kGomaccName)),
+      server_(nullptr),
+      idle_counter_(0),
+      service_(nullptr),
+      closure_id_(ThreadpoolHttpServer::kInvalidClosureId) {
+}
+
+Watchdog::~Watchdog() {
+  LOG(INFO) << "stop watchdog";
+  if (server_ && closure_id_ != ThreadpoolHttpServer::kInvalidClosureId) {
+    server_->UnregisterIdleClosure(closure_id_);
+    closure_id_ = ThreadpoolHttpServer::kInvalidClosureId;
+  }
+  server_ = nullptr;
+  service_ = nullptr;
+}
+
+void Watchdog::Start(ThreadpoolHttpServer* server, int count) {
+  LOG(INFO) << "start watchdog in " << count << " idle count.";
+  server_ = server;
+  idle_counter_ = count;
+  std::unique_ptr<PermanentClosure> closure(
+      NewPermanentCallback(this, &Watchdog::Check));
+  closure_id_ = server_->RegisterIdleClosure(
+      ThreadpoolHttpServer::SOCKET_IPC, count,
+      std::move(closure));
+}
+
+void Watchdog::SetTarget(CompileService* service,
+                         const std::vector<string>& goma_ipc_env) {
+  service_ = service;
+  goma_ipc_env_ = goma_ipc_env;
+  LOG(INFO) << "watchdog target:" << goma_ipc_env;
+}
+
+void Watchdog::Check() {
+  if (server_ == nullptr || service_ == nullptr) {
+    LOG(ERROR) << "watchdog: no server or service.";
+    return;
+  }
+  int last_idle_counter =
+      server_->idle_counter(ThreadpoolHttpServer::SOCKET_IPC);
+  if (last_idle_counter < idle_counter_) {
+    LOG(WARNING) << "not idle:" << last_idle_counter << " < " << idle_counter_;
+    return;
+  }
+  // Watchdog runs "gomacc port", which will call /portz, but we don't want
+  // to make server as active by this request.
+  // Keep idle while it's checking port via goma ipc.
+  server_->SuspendIdleCounter();
+
+  if (access(gomacc_path_.c_str(), X_OK) != 0) {
+    LOG(INFO) << "gomacc:" << gomacc_path_ << " not found";
+    service_->Quit();
+    return;
+  }
+
+  std::vector<string> argv;
+  argv.push_back(gomacc_path_);
+  argv.push_back("port");
+  std::vector<string> env(goma_ipc_env_);
+  int32_t status = 0;
+  const string out = ReadCommandOutput(gomacc_path_, argv, env, dir_,
+                                       MERGE_STDOUT_STDERR, &status);
+  if (status != 0) {
+    LOG(ERROR) << "ReadCommandOutput gets non-zero exit code. Going to quit."
+               << " gomacc_path=" << gomacc_path_
+               << " status=" << status
+               << " cwd=" << dir_;
+    service_->Quit();
+    return;
+  }
+  int port = atoi(out.c_str());
+  if (port != server_->port()) {
+    LOG(INFO) << "gomacc port:" << port << " not match with"
+              << " my port:" << server_->port()
+              << " gomacc-out:" << out;
+    service_->Quit();
+    return;
+  }
+  LOG(INFO) << "gomacc port match with my port:" << port;
+  server_->ResumeIdleCounter();
+  FlushLogFiles();
+}
+
+}  // namespace devtools_goma
diff --git a/client/watchdog.h b/client/watchdog.h
new file mode 100644
index 0000000..2ffc2fc
--- /dev/null
+++ b/client/watchdog.h
@@ -0,0 +1,54 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_WATCHDOG_H_
+#define DEVTOOLS_GOMA_CLIENT_WATCHDOG_H_
+
+#include <string>
+#include <vector>
+
+#include "basictypes.h"
+#include "threadpool_http_server.h"
+
+using std::string;
+
+namespace devtools_goma {
+
+class Closure;
+class CompileService;
+
+// compiler proxy watchdog.
+// It periodically runs "gomacc port" and see the port is the same as
+// this process's port.  If it doesn't match, commit suicide.
+class Watchdog {
+ public:
+  Watchdog();
+  ~Watchdog();
+
+  // Starts watchdog with server's idle timer.
+  // Doesn't take ownership of server.
+  void Start(ThreadpoolHttpServer* server, int count);
+
+  // Sets watchdog target.
+  // Doesn't take ownership of service.
+  void SetTarget(CompileService* service,
+                 const std::vector<string>& goma_ipc_env);
+ private:
+  void Check();
+
+  const string dir_;
+  const string gomacc_path_;
+  ThreadpoolHttpServer* server_;
+  int idle_counter_;
+  CompileService* service_;
+  std::vector<string> goma_ipc_env_;
+  ThreadpoolHttpServer::RegisteredClosureID closure_id_;
+
+  DISALLOW_COPY_AND_ASSIGN(Watchdog);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_WATCHDOG_H_
diff --git a/client/worker_thread.cc b/client/worker_thread.cc
new file mode 100644
index 0000000..9b3f484
--- /dev/null
+++ b/client/worker_thread.cc
@@ -0,0 +1,645 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "worker_thread.h"
+
+#include <algorithm>
+#include <sstream>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_specific.h"
+#include "socket_descriptor.h"
+#include "glog/logging.h"
+#include "ioutil.h"
+
+#ifdef _WIN32
+# include "socket_helper_win.h"
+#endif
+
+namespace {
+
+const long long kNanoSecondsPerSecond = 1000000000LL;
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+WorkerThreadManager::WorkerThread::ClosureData::ClosureData(
+    const char* const location,
+    Closure* closure,
+    int queuelen,
+    int tick,
+    long long timestamp_ns)
+    : location_(location),
+      closure_(closure),
+      queuelen_(queuelen),
+      tick_(tick),
+      timestamp_ns_(timestamp_ns) {
+}
+
+WorkerThreadManager::WorkerThread::ClosureData::ClosureData() :
+    location_("idle"),
+    closure_(nullptr),
+    queuelen_(0),
+    tick_(0),
+    timestamp_ns_(0) {
+}
+
+void WorkerThreadManager::WorkerThread::DelayedClosureImpl::Run() {
+    Closure* closure = GetClosure();
+    if (closure != nullptr) {
+      VLOG(3) << "delayed=" << closure;
+      closure->Run();
+    } else {
+      VLOG(1) << "closure " << location() << " has been cancelled";
+    }
+    // Delete delayed_closure after closure runs.
+    delete this;
+}
+
+class WorkerThreadManager::WorkerThread::PeriodicClosure {
+ public:
+  PeriodicClosure(PeriodicClosureId id, const char* const location,
+                  double now, int ms, std::unique_ptr<PermanentClosure> closure)
+      : id_(id),
+        location_(location),
+        last_time_(now),
+        periodic_ms_(ms),
+        closure_(std::move(closure)) {
+  }
+
+  PeriodicClosureId id() const { return id_; }
+  const char* location() const { return location_; }
+
+  PermanentClosure* GetClosure(double now) {
+    CHECK_GE(now, last_time_);
+    if (now >= last_time_ + (periodic_ms_ / 1000.0)) {
+      last_time_ = now;
+      return closure_.get();
+    }
+    return nullptr;
+  }
+
+  PermanentClosure* closure() const { return closure_.get(); }
+  std::unique_ptr<PermanentClosure> ReleaseClosure() {
+    return std::move(closure_);
+  }
+
+ private:
+  const PeriodicClosureId id_;
+  const char* const location_;
+  double last_time_;
+  const int periodic_ms_;
+  std::unique_ptr<PermanentClosure> closure_;
+  DISALLOW_COPY_AND_ASSIGN(PeriodicClosure);
+};
+
+WorkerThreadManager::WorkerThread::WorkerThread(
+    WorkerThreadManager* wm, int pool, const std::string& name)
+    : wm_(wm),
+      pool_(pool),
+      handle_(kNullThreadHandle),
+      tick_(0),
+      now_ns_(0),
+      shutting_down_(false),
+      quit_(false),
+      name_(name),
+      cond_handle_(&mu_),
+      cond_id_(&mu_),
+      auto_lock_stat_next_closure_(nullptr),
+      auto_lock_stat_poll_events_(nullptr) {
+  int pipe_fd[2];
+#ifndef _WIN32
+  PCHECK(pipe(pipe_fd) == 0);
+#else
+  CHECK_EQ(async_socketpair(pipe_fd), 0);
+#endif
+  ScopedSocket pr(pipe_fd[0]);
+  PCHECK(pr.SetCloseOnExec());
+  PCHECK(pr.SetNonBlocking());
+  ScopedSocket pw(pipe_fd[1]);
+  PCHECK(pw.SetCloseOnExec());
+  PCHECK(pw.SetNonBlocking());
+  id_ = 0;
+  // poller takes ownership of both pipe fds.
+  poller_.reset(DescriptorPoller::NewDescriptorPoller(
+      new SocketDescriptor(std::move(pr), PRIORITY_HIGH, this),
+      std::move(pw)));
+  timer_.Start();
+  if (g_auto_lock_stats) {
+    // TODO: Split stats per pool.
+    auto_lock_stat_next_closure_ = g_auto_lock_stats->NewStat(
+        "worker_thread::NextClosure");
+
+    auto_lock_stat_poll_events_ = g_auto_lock_stats->NewStat(
+        "descriptor_poller::PollEvents");
+  }
+  for (int priority = PRIORITY_MIN; priority < NUM_PRIORITIES; ++priority) {
+    max_queuelen_[priority] = 0;
+    max_wait_time_ns_[priority] = 0;
+  }
+}
+
+WorkerThreadManager::WorkerThread::~WorkerThread() {
+  CHECK_EQ(kNullThreadHandle, handle_);
+  CHECK(!id_);
+}
+
+/* static */
+void WorkerThreadManager::WorkerThread::Initialize() {
+#ifndef _WIN32
+  pthread_once(&key_worker_once_,
+      &WorkerThreadManager::WorkerThread::InitializeWorkerKey);
+#else
+  InitOnceExecuteOnce(&key_worker_once_,
+      &WorkerThreadManager::WorkerThread::InitializeWorkerKey,
+      nullptr, nullptr);
+#endif
+}
+
+/* static */
+WorkerThreadManager::WorkerThread*
+WorkerThreadManager::WorkerThread::GetCurrentWorker() {
+#ifndef _WIN32
+  return static_cast<WorkerThread*>(pthread_getspecific(key_worker_));
+#else
+  return static_cast<WorkerThread*>(TlsGetValue(key_worker_));
+#endif
+}
+
+long long WorkerThreadManager::WorkerThread::NowInNs() {
+  if (now_ns_ == 0)
+    now_ns_ = timer_.GetInNanoSeconds();
+  return now_ns_;
+}
+
+double WorkerThreadManager::WorkerThread::Now() {
+  return static_cast<double>(NowInNs()) / kNanoSecondsPerSecond;
+}
+
+void WorkerThreadManager::WorkerThread::Shutdown() {
+  AUTOLOCK(lock, &mu_);
+  shutting_down_ = true;
+}
+
+void WorkerThreadManager::WorkerThread::Quit() {
+  AUTOLOCK(lock, &mu_);
+  shutting_down_ = true;
+  quit_ = true;
+}
+
+void WorkerThreadManager::WorkerThread::ThreadMain() {
+#ifndef _WIN32
+  pthread_setspecific(key_worker_, this);
+#else
+  TlsSetValue(key_worker_, this);
+#endif
+  {
+    AUTOLOCK(lock, &mu_);
+    while (handle_ == kNullThreadHandle)
+      cond_handle_.Wait();
+  }
+  CHECK_NE(handle_, kNullThreadHandle);
+  {
+    AUTOLOCK(lock, &mu_);
+    id_ = GetThreadId(handle_);
+    VLOG(1) << "Start thread:" << id_;
+    cond_id_.Signal();
+  }
+  while (Dispatch()) { }
+  LOG(INFO) << id_ << " Dispatch loop finished";
+  {
+    AUTOLOCK(lock, &mu_);
+    for (int priority = PRIORITY_MIN; priority < NUM_PRIORITIES; ++priority) {
+      CHECK(pendings_[priority].empty());
+    }
+    CHECK(descriptors_.empty());
+    CHECK(periodic_closures_.empty());
+    CHECK(quit_);
+  }
+}
+
+bool WorkerThreadManager::WorkerThread::Dispatch() {
+  now_ns_ = 0;
+  if (!NextClosure())
+    return false;
+  if (current_.closure_ == nullptr)
+    return true;
+  VLOG(2) << "Loop closure=" << current_.closure_;
+  long long start_ns = timer_.GetInNanoSeconds();
+  current_.closure_->Run();
+  long long duration_ns = timer_.GetInNanoSeconds() - start_ns;
+  static const double kLongClosureSec = 60.0;
+  if (duration_ns > kLongClosureSec * kNanoSecondsPerSecond) {
+    LOG(WARNING) << id_ << " closure run too long:"
+                 << static_cast<double>(duration_ns) / kNanoSecondsPerSecond
+                 << " sec"
+                 << " " << current_.location_
+                 << " " << current_.closure_;
+  }
+  return true;
+}
+
+#ifndef _WIN32
+pthread_once_t WorkerThreadManager::WorkerThread::key_worker_once_ =
+                   PTHREAD_ONCE_INIT;
+pthread_key_t WorkerThreadManager::WorkerThread::key_worker_;
+#else
+INIT_ONCE WorkerThreadManager::WorkerThread::key_worker_once_;
+DWORD WorkerThreadManager::WorkerThread::key_worker_ = TLS_OUT_OF_INDEXES;
+#endif
+
+SocketDescriptor*
+WorkerThreadManager::WorkerThread::RegisterSocketDescriptor(
+    ScopedSocket&& fd, WorkerThreadManager::Priority priority) {
+  AUTOLOCK(lock, &mu_);
+  DCHECK_LT(priority, WorkerThreadManager::PRIORITY_IMMEDIATE);
+  SocketDescriptor* d = new SocketDescriptor(std::move(fd), priority, this);
+  CHECK(descriptors_.insert(std::make_pair(d->fd(), d)).second);
+  return d;
+}
+
+ScopedSocket WorkerThreadManager::WorkerThread::DeleteSocketDescriptor(
+    SocketDescriptor* d) {
+  AUTOLOCK(lock, &mu_);
+  poller_->UnregisterDescriptor(d);
+  ScopedSocket fd(d->ReleaseFd());
+  if (fd.valid()) {
+    std::map<int, SocketDescriptor*>::iterator found =
+        descriptors_.find(fd.get());
+    if (found != descriptors_.end()) {
+      delete found->second;
+      descriptors_.erase(found);
+    }
+  }
+  return fd;
+}
+
+void WorkerThreadManager::WorkerThread::RegisterPeriodicClosure(
+    PeriodicClosureId id, const char* const location,
+    int ms, std::unique_ptr<PermanentClosure> closure) {
+  AUTOLOCK(lock, &mu_);
+  periodic_closures_.emplace_back(
+     new PeriodicClosure(id, location, Now(), ms, std::move(closure)));
+}
+
+void WorkerThreadManager::WorkerThread::UnregisterPeriodicClosure(
+    PeriodicClosureId id, UnregisteredClosureData* data) {
+  DCHECK(data);
+  AUTOLOCK(lock, &mu_);
+  CHECK_NE(id, kInvalidPeriodicClosureId);
+
+  {
+    std::unique_ptr<PermanentClosure> closure;
+
+    auto it = std::find_if(periodic_closures_.begin(), periodic_closures_.end(),
+                           [id](const std::unique_ptr<PeriodicClosure>& it) {
+                             return it->id() == id;
+                           });
+    if (it != periodic_closures_.end()) {
+      closure = (*it)->ReleaseClosure();
+      // Since location is used when this function
+      // takes long time, this should be set when it's available.
+      data->SetLocation((*it)->location());
+      periodic_closures_.erase(it);
+    }
+
+    DCHECK(closure) << "Removing unregistered closure id=" << id;
+
+    std::deque<ClosureData> pendings;
+    while (!pendings_[PRIORITY_IMMEDIATE].empty()) {
+      ClosureData pending_closure =
+        pendings_[PRIORITY_IMMEDIATE].front();
+      pendings_[PRIORITY_IMMEDIATE].pop_front();
+      if (pending_closure.closure_ == closure.get())
+        continue;
+      pendings.push_back(pending_closure);
+    }
+    pendings_[PRIORITY_IMMEDIATE].swap(pendings);
+  }
+
+  // Notify that |closure| is removed from the queues.
+  // SetDone(true) after |closure| has been deleted.
+  data->SetDone(true);
+}
+
+void WorkerThreadManager::WorkerThread::RunClosure(
+    const char* const location,
+    Closure* closure, Priority priority) {
+  DCHECK_GE(priority, PRIORITY_MIN);
+  DCHECK_LT(priority, NUM_PRIORITIES);
+  {
+    AUTOLOCK(lock, &mu_);
+    AddClosure(location, priority, closure);
+    // If this is the same thread, or this worker is running some closure
+    // (or in other words, this worker is not in select wait),
+    // next Dispatch could pick a closure from pendings_, so we don't need
+    // to signal via pipe.
+    if (THREAD_ID_IS_SELF(id_) || current_.closure_ != nullptr)
+      return;
+  }
+  // send select loop something to read about, so new pendings will be
+  // processed soon.
+  poller_->Signal();
+}
+
+WorkerThreadManager::CancelableClosure*
+WorkerThreadManager::WorkerThread::RunDelayedClosure(
+    const char* const location,
+    int msec, Closure* closure) {
+  AUTOLOCK(lock, &mu_);
+  DelayedClosureImpl* delayed_closure =
+      new DelayedClosureImpl(location, Now() + msec/1000.0, closure);
+  delayed_pendings_.push(delayed_closure);
+  return delayed_closure;
+}
+
+size_t WorkerThreadManager::WorkerThread::load() const {
+  AUTOLOCK(lock, &mu_);
+  size_t n = 0;
+  if (current_.closure_ != nullptr)
+    n += 1;
+  n += descriptors_.size();
+  for (int priority = PRIORITY_MIN; priority < NUM_PRIORITIES; ++priority) {
+    int w = 1 << priority;
+    n += pendings_[priority].size() * w;
+  }
+  return n;
+}
+
+size_t WorkerThreadManager::WorkerThread::pendings() const {
+  AUTOLOCK(lock, &mu_);
+  size_t n = 0;
+  for (int priority = PRIORITY_MIN; priority < NUM_PRIORITIES; ++priority) {
+    n += pendings_[priority].size();
+  }
+  return n;
+}
+
+bool WorkerThreadManager::WorkerThread::IsIdle() const {
+  AUTOLOCK(lock, &mu_);
+  return current_.closure_ == nullptr && descriptors_.size() == 0;
+}
+
+string WorkerThreadManager::WorkerThread::DebugString() const {
+  AUTOLOCK(lock, &mu_);
+  std::ostringstream s;
+  s << "thread[" << id_ << "/" << name_ << "] ";
+  s << " tick=" << tick_;
+  s << " " << current_.location_;
+  if (current_.closure_) {
+    s << " " << current_.closure_;
+  }
+  s << ": " << descriptors_.size() << " descriptors";
+  s << ": poll_interval=" << poll_interval_;
+  s << ": ";
+  for (int priority = PRIORITY_MIN; priority < NUM_PRIORITIES; ++priority) {
+    s << WorkerThreadManager::Priority_Name(priority)
+      << "[" << pendings_[priority].size() << " pendings "
+      << " q=" << max_queuelen_[priority]
+      << " w=" << static_cast<double>(max_wait_time_ns_[priority]) /
+        kNanoSecondsPerSecond << "] ";
+  }
+  s << ": delayed=" << delayed_pendings_.size();
+  s << ": periodic=" << periodic_closures_.size();
+  if (pool_ != 0)
+    s << ": pool=" << pool_;
+  return s.str();
+}
+
+bool WorkerThreadManager::WorkerThread::NextClosure() {
+  AUTOLOCK_WITH_STAT(lock, &mu_, auto_lock_stat_next_closure_);
+  VLOG(5) << "NextClosure";
+  DCHECK_EQ(0, now_ns_);  // Now() and NowInNs() will get new time
+  ++tick_;
+  current_ = ClosureData();
+
+  // Default descriptor polling timeout.
+  // If there are pending closures, it will check descriptors without timeout.
+  // If there are deplayed closures, it will reduce intervals to the nearest
+  // delayed closure.
+  static const int kPollIntervalMilliSec = 500;
+  static const long long kPollIntervalNanoSec =
+      static_cast<long long>(kPollIntervalMilliSec) * 1000000;
+
+  poll_interval_ = kPollIntervalMilliSec;
+
+  int priority = PRIORITY_IMMEDIATE;
+  for (priority = PRIORITY_IMMEDIATE; priority >= PRIORITY_MIN; --priority) {
+    if (!pendings_[priority].empty()) {
+      // PRIORITY_IMMEDIATE has higher priority than descriptors.
+      if (priority == PRIORITY_IMMEDIATE) {
+        current_ = GetClosure(
+            static_cast<WorkerThreadManager::Priority>(priority));
+        return true;
+      }
+      // For lower priorities, descriptor availability is checked before
+      // running the closures.
+      poll_interval_ = 0;
+      break;
+    }
+  }
+
+  if (poll_interval_ > 0 && !delayed_pendings_.empty()) {
+    // Adjust poll_interval for delayed closure.
+    int next_delay = static_cast<int>(
+      (delayed_pendings_.top()->time() - Now()) * 1000);
+    if (next_delay < 0)
+      next_delay = 0;
+    poll_interval_ = std::min(poll_interval_, next_delay);
+  }
+  DescriptorPoller::CallbackQueue io_pendings;
+  VLOG(2) << "poll_interval=" << poll_interval_;
+  CHECK_GE(poll_interval_, 0);
+
+  long long poll_start_time_ns = timer_.GetInNanoSeconds();
+  poller_->PollEvents(descriptors_, poll_interval_,
+                      priority, &io_pendings,
+                      &mu_, &auto_lock_stat_poll_events_);
+  // update NowInNs().
+  now_ns_ = timer_.GetInNanoSeconds();
+  CHECK_GE(now_ns_, poll_start_time_ns);
+  // on Windows, poll time would be 0.51481 or so when no event happened.
+  // multiply 1.1 (i.e. 0.55) would be good.
+  if (NowInNs() - poll_start_time_ns > 1.1 * kPollIntervalNanoSec) {
+    LOG(WARNING) << id_ << " poll too slow:"
+                 << (NowInNs() - poll_start_time_ns) << " nsec"
+                 << " interval=" << poll_interval_ << " msec"
+                 << " #descriptors=" << descriptors_.size()
+                 << " priority=" << priority;
+    if (NowInNs() - poll_start_time_ns > 1 * kNanoSecondsPerSecond) {
+      for (const auto& desc : descriptors_) {
+        LOG(WARNING) << id_ << " list of sockets on slow poll:"
+                     << " fd=" << desc.first
+                     << " sd=" << desc.second
+                     << " sd.fd=" << desc.second->fd()
+                     << " readable=" << desc.second->IsReadable()
+                     << " closed=" << desc.second->IsClosed()
+                     << " canreuse=" << desc.second->CanReuse()
+                     << " err=" << desc.second->GetLastErrorMessage();
+      }
+    }
+  }
+
+  // Check delayed closures.
+  while (!delayed_pendings_.empty() &&
+         (delayed_pendings_.top()->time() < Now() || shutting_down_)) {
+    DelayedClosureImpl* delayed_closure = delayed_pendings_.top();
+    delayed_pendings_.pop();
+    AddClosure(delayed_closure->location(), PRIORITY_IMMEDIATE,
+               NewCallback(delayed_closure, &DelayedClosureImpl::Run));
+  }
+
+  // Check periodic closures.
+  for (const auto& periodic_closure : periodic_closures_) {
+    PermanentClosure* closure = periodic_closure->GetClosure(Now());
+    if (closure != nullptr) {
+      VLOG(3) << "periodic=" << closure;
+      AddClosure(periodic_closure->location(),
+                 PRIORITY_IMMEDIATE, closure);
+    }
+  }
+
+  // Check descriptors I/O.
+  for (auto& iter : io_pendings) {
+    Priority io_priority = iter.first;
+    std::deque<OneshotClosure*>& pendings = iter.second;
+    while (!pendings.empty()) {
+      // TODO: use original location
+      AddClosure(FROM_HERE, io_priority, pendings.front());
+      pendings.pop_front();
+    }
+  }
+
+  // Check pendings again.
+  for (priority = PRIORITY_IMMEDIATE; priority >= PRIORITY_MIN; --priority) {
+    if (!pendings_[priority].empty()) {
+      VLOG(2) << "pendings " << WorkerThreadManager::Priority_Name(priority);
+      current_ = GetClosure(
+          static_cast<WorkerThreadManager::Priority>(priority));
+      return true;
+    }
+  }
+
+  // No pendings.
+  DCHECK_LT(priority, PRIORITY_MIN);
+  if (quit_) {
+    VLOG(3) << "NextClosure: terminating";
+    if (delayed_pendings_.empty() &&
+        periodic_closures_.empty() &&
+        descriptors_.empty()) {
+      pool_ = WorkerThreadManager::kDeadPool;
+      return false;
+    } else {
+      LOG(INFO) << "NextClosure: terminating but still active "
+                << " delayed_pendings=" << delayed_pendings_.size()
+                << " periodic_closures=" << periodic_closures_.size()
+                << " descriptors=" << descriptors_.empty();
+    }
+  }
+  VLOG(4) << "NextClosure: no closure to run";
+  return true;
+}
+
+void WorkerThreadManager::WorkerThread::AddClosure(
+    const char* const location,
+    WorkerThreadManager::Priority priority,
+    Closure* closure) {
+  // mu_ held.
+  ClosureData closure_data(location, closure,
+                           pendings_[priority].size(),
+                           tick_,
+                           timer_.GetInNanoSeconds());
+  if (closure_data.queuelen_ > max_queuelen_[priority]) {
+    max_queuelen_[priority] = closure_data.queuelen_;
+  }
+  pendings_[priority].push_back(closure_data);
+}
+
+WorkerThreadManager::WorkerThread::ClosureData
+WorkerThreadManager::WorkerThread::GetClosure(
+    WorkerThreadManager::Priority priority) {
+  // mu_ held.
+  CHECK(!pendings_[priority].empty());
+  ClosureData closure_data = pendings_[priority].front();
+  pendings_[priority].pop_front();
+  long long wait_time_ns =
+      timer_.GetInNanoSeconds() - closure_data.timestamp_ns_;
+  static const long long kLongWaitTimeNanoSec = 60 * kNanoSecondsPerSecond;
+  if (wait_time_ns > max_wait_time_ns_[priority]) {
+    max_wait_time_ns_[priority] = wait_time_ns;
+  }
+  if (wait_time_ns > kLongWaitTimeNanoSec) {
+    LOG(WARNING) << id_ << " too long in pending queue "
+                 << WorkerThreadManager::Priority_Name(priority)
+                 << " "
+                 << static_cast<double>(wait_time_ns) / kNanoSecondsPerSecond
+                 << " [sec] queuelen=" << closure_data.queuelen_
+                 << " tick=" << (tick_ - closure_data.tick_);
+  }
+  return closure_data;
+}
+
+#ifndef _WIN32
+void WorkerThreadManager::WorkerThread::InitializeWorkerKey() {
+  pthread_key_create(&key_worker_, nullptr);
+}
+#else
+BOOL WINAPI WorkerThreadManager::WorkerThread::InitializeWorkerKey(
+    PINIT_ONCE, PVOID, PVOID*) {
+  key_worker_ = TlsAlloc();
+  return TRUE;
+}
+#endif
+
+void WorkerThreadManager::WorkerThread::RegisterPollEvent(
+    SocketDescriptor* d, DescriptorPoller::EventType type) {
+  AUTOLOCK(lock, &mu_);
+  poller_->RegisterPollEvent(d, type);
+}
+
+void WorkerThreadManager::WorkerThread::UnregisterPollEvent(
+    SocketDescriptor* d, DescriptorPoller::EventType type) {
+  AUTOLOCK(lock, &mu_);
+  poller_->UnregisterPollEvent(d, type);
+}
+
+void WorkerThreadManager::WorkerThread::RegisterTimeoutEvent(
+    SocketDescriptor* d) {
+  AUTOLOCK(lock, &mu_);
+  poller_->RegisterTimeoutEvent(d);
+}
+
+void WorkerThreadManager::WorkerThread::UnregisterTimeoutEvent(
+    SocketDescriptor* d) {
+  AUTOLOCK(lock, &mu_);
+  poller_->UnregisterTimeoutEvent(d);
+}
+
+void WorkerThreadManager::WorkerThread::Start() {
+  CHECK(PlatformThread::Create(this, &handle_));
+  AUTOLOCK(lock, &mu_);
+  CHECK_NE(handle_, kNullThreadHandle);
+  cond_handle_.Signal();
+  while (id_ == 0)
+    cond_id_.Wait();
+}
+
+void WorkerThreadManager::WorkerThread::Join() {
+  if (handle_ != kNullThreadHandle) {
+    LOG(INFO) << "Join thread:" << DebugString();
+    {
+      AUTOLOCK(lock, &mu_);
+      CHECK(quit_);
+    }
+    FlushLogFiles();
+    PlatformThread::Join(handle_);
+  }
+  handle_ = kNullThreadHandle;
+  id_ = 0;
+}
+
+}  // namespace devtools_goma
diff --git a/client/worker_thread.h b/client/worker_thread.h
new file mode 100644
index 0000000..c56dbff
--- /dev/null
+++ b/client/worker_thread.h
@@ -0,0 +1,205 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_WORKER_THREAD_H_
+#define DEVTOOLS_GOMA_CLIENT_WORKER_THREAD_H_
+
+#include <deque>
+#include <map>
+#include <queue>
+#include <vector>
+
+#include "basictypes.h"
+#include "callback.h"
+#include "descriptor_poller.h"
+#include "lockhelper.h"
+#include "platform_thread.h"
+#include "scoped_fd.h"
+#include "simple_timer.h"
+#include "worker_thread_manager.h"
+
+namespace devtools_goma {
+
+class AutoLockStat;
+class SocketDescriptor;
+
+class WorkerThreadManager::WorkerThread : public PlatformThread::Delegate {
+ public:
+  class DelayedClosureImpl : public CancelableClosure {
+   public:
+    DelayedClosureImpl(const char* const location,
+                       double t, Closure* closure)
+        : CancelableClosure(location, closure), time_(t) {}
+    double time() const { return time_; }
+    Closure* GetClosure() {
+      Closure* closure = closure_;
+      closure_ = NULL;
+      return closure;
+    }
+
+   private:
+    friend class WorkerThread;
+    friend class WorkerThreadTest;
+    ~DelayedClosureImpl() override {}
+
+    // Run closure if it is still set, and destroy itself.
+    void Run();
+
+    double time_;
+    DISALLOW_COPY_AND_ASSIGN(DelayedClosureImpl);
+  };
+
+  static void Initialize();
+  static WorkerThread* GetCurrentWorker();
+
+  WorkerThread(WorkerThreadManager* wm, int pool, const std::string& name);
+  ~WorkerThread() override;
+
+  int pool() const { return pool_; }
+  ThreadId id() const { return id_; }
+  long long NowInNs();
+  double Now();
+  void Start();
+
+  // Runs delayed closures as soon as possible.
+  void Shutdown();
+
+  // Requests to quit dispatch loop of the WorkerThread's thread, and terminate
+  // the thread.
+  void Quit();
+
+  // Joins the WorkerThread's thread.  You must call Quit() before Join(), and
+  // call Join() before destructing the WorkerThread.
+  void Join();
+
+  void ThreadMain() override;
+  bool Dispatch();
+
+  // Registers file descriptor fd in priority.
+  SocketDescriptor* RegisterSocketDescriptor(
+      ScopedSocket&& fd, WorkerThreadManager::Priority priority);
+  ScopedSocket DeleteSocketDescriptor(SocketDescriptor* d);
+
+  void RegisterPollEvent(SocketDescriptor* d, DescriptorPoller::EventType);
+  void UnregisterPollEvent(SocketDescriptor* d, DescriptorPoller::EventType);
+  void RegisterTimeoutEvent(SocketDescriptor* d);
+  void UnregisterTimeoutEvent(SocketDescriptor* d);
+
+  void RegisterPeriodicClosure(PeriodicClosureId id,
+                               const char* const location,
+                               int ms,
+                               std::unique_ptr<PermanentClosure> closure);
+  void UnregisterPeriodicClosure(PeriodicClosureId id,
+                                 UnregisteredClosureData* data);
+
+  void RunClosure(const char* const location,
+                  Closure* closure, Priority priority);
+  CancelableClosure* RunDelayedClosure(
+      const char* const location,
+      int msec, Closure* closure);
+
+  size_t load() const;
+  size_t pendings() const;
+
+  bool IsIdle() const;
+  string DebugString() const;
+
+ private:
+  struct ClosureData {
+    ClosureData(const char* const location_,
+                Closure* closure_,
+                int queuelen,
+                int tick,
+                long long timestamp_ns);
+    ClosureData();
+    const char* location_;
+    Closure* closure_;
+    int queuelen_;
+    int tick_;
+    long long timestamp_ns_;
+  };
+
+  class CompareDelayedClosureImpl {
+   public:
+    bool operator()(DelayedClosureImpl* a, DelayedClosureImpl* b) const {
+      return a->time() > b->time();
+    }
+  };
+  typedef std::priority_queue<DelayedClosureImpl*,
+                              std::vector<DelayedClosureImpl*>,
+                              CompareDelayedClosureImpl> DelayedClosureQueue;
+
+  // Forward declaration, actual prototype in worker_thread.cc.
+  class PeriodicClosure;
+
+  friend class WorkerThreadTest;
+
+  // Updates current_closure_ to run if any.
+  // Returns false if no closure to run now (no pending, no network I/O and
+  // no timeout).
+  bool NextClosure();
+
+  // Adds closure in priority.
+  // Assert mu_ held.
+  void AddClosure(const char* const location,
+                  WorkerThreadManager::Priority priority,
+                  Closure* closure);
+
+  // Gets closure in priority.
+  // Assert mu_ held.
+  ClosureData GetClosure(WorkerThreadManager::Priority priority);
+
+#ifndef _WIN32
+  static void InitializeWorkerKey();
+#else
+  static BOOL WINAPI InitializeWorkerKey(PINIT_ONCE, PVOID, PVOID*);
+#endif
+
+  WorkerThreadManager* wm_;
+  int pool_;
+  ThreadHandle handle_;
+  ThreadId id_;
+  ClosureData current_;
+  SimpleTimer timer_;
+  int tick_;
+  long long now_ns_;
+  bool shutting_down_;
+  bool quit_;
+
+  const std::string name_;
+
+  Lock mu_;
+  ConditionVariable cond_handle_;  // signaled when handle_ is ready.
+  ConditionVariable cond_id_;      // signaled when id_ is ready.
+  // These auto_lock_stat_* are owned by g_auto_lock_stats.
+  AutoLockStat* auto_lock_stat_next_closure_;
+  AutoLockStat* auto_lock_stat_poll_events_;
+
+  std::deque<ClosureData> pendings_[NUM_PRIORITIES];
+  int max_queuelen_[NUM_PRIORITIES];
+  long long max_wait_time_ns_[NUM_PRIORITIES];
+
+  // delayed_pendings_ and periodic_closures_ are handled in PRIORITY_IMMEDIATE
+  DelayedClosureQueue delayed_pendings_;
+  std::vector<std::unique_ptr<PeriodicClosure>> periodic_closures_;
+
+  std::map<int, SocketDescriptor*> descriptors_;
+  std::unique_ptr<DescriptorPoller> poller_;
+  int poll_interval_;
+
+#ifndef _WIN32
+  static pthread_once_t key_worker_once_;
+  static pthread_key_t key_worker_;
+#else
+  static INIT_ONCE key_worker_once_;
+  static DWORD key_worker_;
+#endif
+  DISALLOW_COPY_AND_ASSIGN(WorkerThread);
+};
+
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_WORKER_THREAD_H_
diff --git a/client/worker_thread_manager.cc b/client/worker_thread_manager.cc
new file mode 100644
index 0000000..3b1911c
--- /dev/null
+++ b/client/worker_thread_manager.cc
@@ -0,0 +1,424 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "worker_thread_manager.h"
+
+#ifndef _WIN32
+#include <limits.h>
+#endif  // _WIN32
+
+#include <queue>
+#include <sstream>
+
+#include "autolock_timer.h"
+#include "callback.h"
+#include "compiler_specific.h"
+#include "socket_descriptor.h"
+#include "descriptor_poller.h"
+#include "glog/logging.h"
+#include "simple_timer.h"
+#include "worker_thread.h"
+
+#ifdef _WIN32
+# include "socket_helper_win.h"
+#endif
+
+namespace devtools_goma {
+
+/* static */
+const int WorkerThreadManager::kDeadPool = -2;
+const int WorkerThreadManager::kAlarmPool = -1;
+const int WorkerThreadManager::kFreePool = 0;
+
+// Once we register atfork handler, we can't unregister it.
+// However, we'd like to fork at SetUp in each unit test of
+// subprocess_task_unittest.
+// g_initialize_atfork is used to call pthread_atfork once.
+// g_enable_fork will be true when WorkerThreadManager is not alive.
+static bool g_initialize_atfork = false;
+static bool g_enable_fork = false;
+
+WorkerThreadManager::CancelableClosure::CancelableClosure(
+    const char* const location, Closure* closure)
+    : closure_(closure),
+      location_(location) {
+}
+
+WorkerThreadManager::CancelableClosure::~CancelableClosure() {
+  CHECK(closure_ == nullptr);
+}
+
+void WorkerThreadManager::CancelableClosure::Cancel() {
+  delete closure_;
+  closure_ = nullptr;
+}
+
+const char* WorkerThreadManager::CancelableClosure::location() const {
+  return location_;
+}
+
+#ifndef _WIN32
+static void DontCallForkInWorkerThreadManager() {
+  if (!g_enable_fork)
+    DLOG(FATAL) << "fork called";
+}
+#endif
+
+WorkerThreadManager::WorkerThreadManager()
+    : alarm_worker_(nullptr),
+      next_worker_index_(0),
+      next_pool_(kFreePool + 1),
+      next_periodic_closure_id_(1) {
+  WorkerThread::Initialize();
+#ifndef _WIN32
+  g_enable_fork = false;
+  if (!g_initialize_atfork) {
+    pthread_atfork(&DontCallForkInWorkerThreadManager, nullptr, nullptr);
+    g_initialize_atfork = true;
+  }
+#endif
+}
+
+WorkerThreadManager::~WorkerThreadManager() {
+  CHECK(alarm_worker_ == nullptr);
+  for (const auto* worker : workers_) {
+    CHECK(worker == nullptr);
+  }
+  g_enable_fork = true;
+}
+
+void WorkerThreadManager::Start(int num_threads) {
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+  CHECK(workers_.empty());
+  CHECK(GetCurrentWorker() == nullptr);
+  alarm_worker_ = new WorkerThread(this, kAlarmPool, "alarm_worker");
+  alarm_worker_->Start();
+  next_worker_index_ = 0;
+  for (int i = 0; i < num_threads; ++i) {
+    WorkerThread* worker = new WorkerThread(this, kFreePool, "worker");
+    worker->Start();
+    workers_.push_back(worker);
+  }
+}
+
+int WorkerThreadManager::StartPool(int num_threads, const std::string& name) {
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+  CHECK(GetCurrentWorker() == nullptr);
+  int pool = next_pool_++;
+  for (int i = 0; i < num_threads; ++i) {
+    WorkerThread* worker = new WorkerThread(this, pool, name);
+    worker->Start();
+    workers_.push_back(worker);
+  }
+  return pool;
+}
+
+void WorkerThreadManager::NewThread(OneshotClosure* callback,
+                                    const std::string& name) {
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+  int pool = next_pool_++;
+  WorkerThread* worker = new WorkerThread(this, pool, name);
+  worker->Start();
+  workers_.push_back(worker);
+  worker->RunClosure(FROM_HERE, callback, PRIORITY_IMMEDIATE);
+}
+
+size_t WorkerThreadManager::num_threads() const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  return workers_.size();
+}
+
+void WorkerThreadManager::Shutdown() {
+  LOG(INFO) << "Shutdown";
+  AUTO_SHARED_LOCK(lock, &mu_);
+  CHECK(GetCurrentWorker() == nullptr);
+  if (alarm_worker_ != nullptr)
+    alarm_worker_->Shutdown();
+  for (auto* worker : workers_) {
+    if (worker)
+      worker->Shutdown();
+  }
+}
+
+void WorkerThreadManager::Finish() {
+  LOG(INFO) << "Finish";
+  AUTO_EXCLUSIVE_LOCK(lock, &mu_);
+  CHECK(GetCurrentWorker() == nullptr);
+  if (alarm_worker_ != nullptr)
+    alarm_worker_->Quit();
+  for (auto* worker : workers_) {
+    if (worker)
+      worker->Quit();
+  }
+  // join threads
+  if (alarm_worker_) {
+    alarm_worker_->Join();
+    delete alarm_worker_;
+    alarm_worker_ = nullptr;
+  }
+  for (std::vector<WorkerThread*>::iterator iter = workers_.begin();
+       iter != workers_.end();
+       ++iter) {
+    WorkerThread* worker = *iter;
+    if (worker) {
+      worker->Join();
+      delete worker;
+      *iter = nullptr;
+    }
+  }
+}
+
+WorkerThreadManager::ThreadId WorkerThreadManager::GetCurrentThreadId() {
+  return devtools_goma::GetCurrentThreadId();
+}
+
+bool WorkerThreadManager::Dispatch() {
+  WorkerThread* worker = GetCurrentWorker();
+  DCHECK(worker) << "thread " << GetCurrentThreadId();
+  return worker->Dispatch();
+}
+
+SocketDescriptor* WorkerThreadManager::RegisterSocketDescriptor(
+    ScopedSocket&& fd, WorkerThreadManager::Priority priority) {
+  WorkerThread* worker = GetCurrentWorker();
+  DCHECK(worker) << "thread " << GetCurrentThreadId();
+  return worker->RegisterSocketDescriptor(std::move(fd), priority);
+}
+
+ScopedSocket WorkerThreadManager::DeleteSocketDescriptor(
+    SocketDescriptor* d) {
+  WorkerThread* worker = GetCurrentWorker();
+  DCHECK(worker) << "thead " << GetCurrentThreadId();
+  return worker->DeleteSocketDescriptor(d);
+}
+
+PeriodicClosureId WorkerThreadManager::NextPeriodicClosureId() {
+  AUTOLOCK(lock, &periodic_closure_id_mu_);
+  return next_periodic_closure_id_++;
+}
+
+PeriodicClosureId WorkerThreadManager::RegisterPeriodicClosure(
+    const char* const location,
+    int ms, std::unique_ptr<PermanentClosure> closure) {
+  DCHECK(alarm_worker_);
+  PeriodicClosureId id = NextPeriodicClosureId();
+
+  alarm_worker_->RunClosure(
+      FROM_HERE,
+      NewCallback(
+          &WorkerThreadManager::RegisterPeriodicClosureOnAlarmer,
+          alarm_worker_, id, location, ms, std::move(closure)),
+      PRIORITY_IMMEDIATE);
+
+  return id;
+}
+
+/* static */
+void WorkerThreadManager::RegisterPeriodicClosureOnAlarmer(
+    WorkerThread* alarmer, PeriodicClosureId id, const char* location,
+    int ms, std::unique_ptr<PermanentClosure> closure) {
+  alarmer->RegisterPeriodicClosure(id, location, ms, std::move(closure));
+}
+
+void WorkerThreadManager::UnregisterPeriodicClosure(PeriodicClosureId id) {
+  CHECK(GetCurrentWorker() != alarm_worker_);
+  DCHECK(alarm_worker_);
+
+  UnregisteredClosureData unregistered_data;
+  alarm_worker_->RunClosure(
+      FROM_HERE,
+      NewCallback(
+          alarm_worker_,
+          &WorkerThreadManager::WorkerThread::UnregisterPeriodicClosure,
+          id, &unregistered_data),
+      PRIORITY_IMMEDIATE);
+
+  SimpleTimer timer;
+  timer.Start();
+  // Make sure periodic closure was destructed before returning from
+  // this method.
+  while (!unregistered_data.Done()) {
+    const char* location = unregistered_data.Location();
+    LOG_EVERY_N(INFO, 100)
+        << "UnregisterPeriodicClosure id=" << id
+        << " location="
+        << (location ? location : "")
+        << " timer=" << timer.GetInMilliSeconds() << " [ms]";
+    CHECK_LT(timer.GetInMilliSeconds(), 60 * 1000)
+        << "UnregisterPeriodicClosure didn't finish in 60 seconds";
+    PlatformThread::Sleep(10);
+  }
+}
+
+void WorkerThreadManager::RunClosure(
+    const char* const location,
+    Closure* closure, Priority priority) {
+  RunClosureInPool(location, kFreePool, closure, priority);
+}
+
+void WorkerThreadManager::RunClosureInPool(
+    const char* const location,
+    int pool, Closure* closure, Priority priority) {
+  // Note: having global pendings queue make slower than this implementation?
+  WorkerThread* candidate_worker = nullptr;
+  {
+    AUTO_EXCLUSIVE_LOCK(lock, &mu_);  // updates |next_worker_index_|.
+    size_t min_load = INT_MAX;
+    size_t i;
+    for (i = next_worker_index_;
+         i < next_worker_index_ + workers_.size();
+         ++i) {
+      WorkerThread* worker = workers_[i % workers_.size()];
+      if (!worker) continue;
+      if (worker->pool() != pool) continue;
+      if (worker == GetCurrentWorker() && worker->pendings() == 0) {
+        candidate_worker = worker;
+        break;
+      }
+      size_t load = worker->load();
+      if (load == 0) {
+        candidate_worker = worker;
+        break;
+      }
+      if (load < min_load) {
+        min_load = load;
+        candidate_worker = worker;
+      }
+    }
+    CHECK(candidate_worker);
+    next_worker_index_ = (i + 1) % workers_.size();
+  }
+  return candidate_worker->RunClosure(location, closure, priority);
+}
+
+void WorkerThreadManager::RunClosureInThread(
+    const char* const location,
+    ThreadId id,
+    Closure* closure, Priority priority) {
+  WorkerThread* worker = GetWorker(id);
+  DCHECK(worker);
+  worker->RunClosure(location, closure, priority);
+}
+
+WorkerThreadManager::CancelableClosure*
+WorkerThreadManager::RunDelayedClosureInThread(
+    const char* const location,
+    ThreadId id, int msec,
+    Closure* closure) {
+  WorkerThread* worker = GetWorker(id);
+  DCHECK(worker);
+  return worker->RunDelayedClosure(location, msec, closure);
+}
+
+string WorkerThreadManager::DebugString() const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  std::ostringstream s;
+  s << workers_.size() << " workers\n";
+  for (const auto& worker : workers_) {
+    if (!worker) continue;
+    s << worker->DebugString();
+    s << "\n";
+  }
+
+  s << "\n";
+  return s.str();
+}
+
+void WorkerThreadManager::DebugLog() const {
+  AUTO_SHARED_LOCK(lock, &mu_);
+  int num_idles = 0;
+  for (const auto& worker : workers_) {
+    if (!worker) continue;
+    if (worker->IsIdle()) {
+      num_idles++;
+      continue;
+    }
+    LOG(INFO) << worker->DebugString();
+  }
+  LOG(INFO) << "idle workers:" << num_idles;
+}
+
+/* static */
+string WorkerThreadManager::Priority_Name(int priority) {
+  switch (priority) {
+    case PRIORITY_LOW: return "PriLow";
+    case PRIORITY_MED: return "PriMed";
+    case PRIORITY_HIGH: return "PriHigh";
+    case PRIORITY_IMMEDIATE: return "PriImmediate";
+    default:
+      break;
+  }
+  std::ostringstream ss;
+  ss << "PriUnknown[" << priority << "]";
+  return ss.str();
+}
+
+WorkerThreadManager::WorkerThread* WorkerThreadManager::GetWorker(ThreadId id) {
+  WorkerThread* worker = nullptr;
+  {
+    AUTO_SHARED_LOCK(lock, &mu_);
+    worker = GetWorkerUnlocked(id);
+  }
+  if (worker != nullptr)
+    return worker;
+  LOG(FATAL) << "No worker for id=" << id
+             << " current=" << GetCurrentThreadId() << " " << DebugString();
+  return nullptr;
+}
+
+WorkerThreadManager::WorkerThread* WorkerThreadManager::GetWorkerUnlocked(
+    ThreadId id) {
+  for (auto* worker : workers_) {
+    if (worker && id == worker->id()) {
+      return worker;
+    }
+  }
+  return nullptr;
+}
+
+WorkerThreadManager::WorkerThread* WorkerThreadManager::GetCurrentWorker() {
+  return WorkerThread::GetCurrentWorker();
+}
+
+WorkerThreadRunner::WorkerThreadRunner(
+    WorkerThreadManager* wm,
+    const char* const location, OneshotClosure* closure)
+    : cond_(&mu_),
+      done_(false) {
+  LOG(INFO) << "run closure=" << closure
+            << " from " << location;
+  wm->RunClosure(location,
+                 NewCallback(
+                     this,
+                     &WorkerThreadRunner::Run,
+                     closure),
+                 WorkerThreadManager::PRIORITY_MED);
+}
+
+WorkerThreadRunner::~WorkerThreadRunner() {
+  Wait();
+}
+
+void WorkerThreadRunner::Wait() {
+  AUTOLOCK(lock, &mu_);
+  while (!done_) {
+    cond_.Wait();
+  }
+}
+
+bool WorkerThreadRunner::Done() const {
+  AUTOLOCK(lock, &mu_);
+  return done_;
+}
+
+void WorkerThreadRunner::Run(OneshotClosure* closure) {
+  closure->Run();
+  LOG(INFO) << "done closure=" << closure;
+  AUTOLOCK(lock, &mu_);
+  done_ = true;
+  cond_.Signal();
+}
+
+}  // namespace devtools_goma
diff --git a/client/worker_thread_manager.h b/client/worker_thread_manager.h
new file mode 100644
index 0000000..300778b
--- /dev/null
+++ b/client/worker_thread_manager.h
@@ -0,0 +1,239 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_CLIENT_WORKER_THREAD_MANAGER_H_
+#define DEVTOOLS_GOMA_CLIENT_WORKER_THREAD_MANAGER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "autolock_timer.h"
+#include "basictypes.h"
+#include "lockhelper.h"
+#include "platform_thread.h"
+
+#define GOMA_WORKER_THREAD_STRINGFY(i) #i
+#define GOMA_WORKER_THREAD_STR(i) GOMA_WORKER_THREAD_STRINGFY(i)
+#define FROM_HERE __FILE__ ":" GOMA_WORKER_THREAD_STR(__LINE__)
+
+using std::string;
+
+namespace devtools_goma {
+
+class Closure;
+class IOChannel;
+class OneshotClosure;
+class PermanentClosure;
+class ScopedSocket;
+class SocketDescriptor;
+class WorkerThreadManagerTest;
+
+using PeriodicClosureId = int;
+const PeriodicClosureId kInvalidPeriodicClosureId = -1;
+
+class WorkerThreadManager {
+ public:
+  // Windows often pass back 0xfffffffe (pseudo handle) as thread handle.
+  // Therefore the reliable way of selecting a thread is to use the thread id.
+  // ThreadHandle is used for Join().
+  typedef PlatformThreadHandle ThreadHandle;
+  typedef PlatformThreadId ThreadId;
+
+  // Default pool ids.
+  static const int kDeadPool;  // for terminated workers.
+  static const int kAlarmPool;  // for periodic closures.
+  static const int kFreePool;  // for RunClosure().
+
+  class WorkerThread;
+  // Priority of closures and descriptors.
+  enum Priority {
+    PRIORITY_MIN = 0,
+    PRIORITY_LOW = 0,    // Used in compile_task.
+    PRIORITY_MED,        // Used in http rpc and subprocess ipc.
+    PRIORITY_HIGH,       // Used in http server (http and goma ipc serving)
+    PRIORITY_IMMEDIATE,  // Called without descriptor polling.
+                         // Used to clear notification closures of descriptor,
+                         // delayed closures, or periodic closures.
+    NUM_PRIORITIES
+  };
+
+  // Thread unsafe.  See RunDelayedClosureInThread.
+  class CancelableClosure {
+   public:
+    CancelableClosure(const char* const locaction, Closure* closure);
+    const char* location() const;
+    void Cancel();
+   protected:
+    virtual ~CancelableClosure();
+    Closure* closure_;
+   private:
+    const char* const location_;
+    DISALLOW_COPY_AND_ASSIGN(CancelableClosure);
+  };
+
+  // See UnregisterPeriodicClosure
+  class UnregisteredClosureData {
+   public:
+    UnregisteredClosureData() : done_(false), location_(nullptr) {}
+
+    bool Done() const {
+      AUTOLOCK(lock, &mu_);
+      return done_;
+    }
+    void SetDone(bool b) {
+      AUTOLOCK(lock, &mu_);
+      done_ = b;
+    }
+
+    const char* Location() const {
+      AUTOLOCK(lock, &mu_);
+      return location_;
+    }
+    void SetLocation(const char* location) {
+      AUTOLOCK(lock, &mu_);
+      location_ = location;
+    }
+
+   private:
+    Lock mu_;
+    bool done_;
+    const char* location_;
+
+    DISALLOW_COPY_AND_ASSIGN(UnregisteredClosureData);
+  };
+
+  WorkerThreadManager();
+  ~WorkerThreadManager();
+
+  // Starts worker threads.
+  void Start(int num_threads);
+
+  // Starts pool of num_threads.  Returns pool id that can be used for
+  // RunClosureInPool().
+  // Can't be called on a worker thread.
+  int StartPool(int num_threads, const std::string& name);
+
+  // Starts new dedicated worker thread.
+  void NewThread(OneshotClosure* closure, const std::string& name);
+
+  size_t num_threads() const;
+
+  // Shutdown. runs delayed closures as soon as possible.
+  // Can't be called on a worker thread.
+  void Shutdown();
+
+  // Finishes all workers.
+  // Can't be called on a worker thread.
+  void Finish();
+
+  ThreadId GetCurrentThreadId();
+
+  // Run one step in current worker thread.
+  // Returns true if the worker thread is active.
+  // Returns false if the worker thread is terminating.
+  bool Dispatch();
+
+  // Registers file descriptor in current worker thread.
+  SocketDescriptor* RegisterSocketDescriptor(
+      ScopedSocket&& fd, Priority priority);
+  ScopedSocket DeleteSocketDescriptor(SocketDescriptor* d);
+
+  // Registers periodic closure.
+  PeriodicClosureId RegisterPeriodicClosure(
+      const char* const location,
+      int ms, std::unique_ptr<PermanentClosure> closure);
+
+  // Unregisters periodic closure.
+  void UnregisterPeriodicClosure(PeriodicClosureId id);
+
+  // Runs closure on least loaded worker thread in kFreePool.
+  void RunClosure(const char* const location,
+                  Closure* closure, Priority priority);
+
+  // Runs closure in pool, which was created by StartPool().
+  void RunClosureInPool(const char* const location,
+                        int pool,
+                        Closure* closure,
+                        Priority priority);
+
+  // Runs closure on specified worker thread.
+  void RunClosureInThread(const char* const location,
+                          ThreadId id, Closure* closure,
+                          Priority priority);
+
+  // Runs closure after msec on specified worker thread.
+  // It takes onwership of closure. It will be deleted if it is canceled.
+  // Normal closure will be deleted when it runs, so just pass ownership
+  // of the closure.
+  // Permanent closure won't be deleted when it runs, so it would be
+  // difficult to tell who is the owner of the closure; thus, don't pass
+  // permanent closure to this.
+  // CancelableClosure will be valid until closure returns, or
+  // Cancel is called.
+  // CancelableClosure is thread unsafe.  Access it only in the specified
+  // worker thread.
+  CancelableClosure* RunDelayedClosureInThread(
+      const char* const location,
+      ThreadId handle, int msec, Closure* closure);
+
+  string DebugString() const;
+  void DebugLog() const;
+
+  static string Priority_Name(int priority);
+
+ private:
+  friend class WorkerThreadManagerTest;
+  struct Periodic;
+
+  static void RegisterPeriodicClosureOnAlarmer(
+      WorkerThread* alarmer, PeriodicClosureId id, const char* location,
+      int ms, std::unique_ptr<PermanentClosure> closure);
+
+  WorkerThread* GetWorker(ThreadId id);
+  WorkerThread* GetWorkerUnlocked(ThreadId id);
+  WorkerThread* GetCurrentWorker();
+
+  PeriodicClosureId NextPeriodicClosureId();
+
+  // |mu_| protects |workers_|, |next_worker_index_| and |next_pool_|.
+  ReadWriteLock mu_;
+  std::vector<WorkerThread*> workers_;
+  WorkerThread* alarm_worker_;
+  size_t next_worker_index_;
+  int next_pool_;
+
+  Lock periodic_closure_id_mu_;
+  PeriodicClosureId next_periodic_closure_id_;
+
+  DISALLOW_COPY_AND_ASSIGN(WorkerThreadManager);
+};
+
+// WorkerThreadRunner runs closure in worker thread manager.
+// It will wait for closure completion before it is destructed.
+class WorkerThreadRunner {
+ public:
+  WorkerThreadRunner(WorkerThreadManager* wm,
+                     const char* const location,
+                     OneshotClosure* closure);
+  ~WorkerThreadRunner();
+
+  void Wait();
+  bool Done() const;
+
+ private:
+  void Run(OneshotClosure* closure);
+
+  Lock mu_;
+  ConditionVariable cond_;
+  bool done_;
+
+  DISALLOW_COPY_AND_ASSIGN(WorkerThreadRunner);
+};
+
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_CLIENT_WORKER_THREAD_MANAGER_H_
diff --git a/client/worker_thread_manager_unittest.cc b/client/worker_thread_manager_unittest.cc
new file mode 100644
index 0000000..213e389
--- /dev/null
+++ b/client/worker_thread_manager_unittest.cc
@@ -0,0 +1,624 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include <memory>
+
+#include "worker_thread_manager.h"
+
+#ifndef _WIN32
+#include <pthread.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#else
+#include "socket_helper_win.h"
+#endif
+
+#include "callback.h"
+#include "compiler_specific.h"
+#include "socket_descriptor.h"
+#include "lockhelper.h"
+#include "mock_socket_factory.h"
+#include "platform_thread.h"
+#include "scoped_fd.h"
+#include "simple_timer.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+class WorkerThreadManagerTest : public ::testing::Test {
+ public:
+  WorkerThreadManagerTest()
+      : cond_(&mu_),
+        test_threadid_(0),
+        num_test_threadid_(0),
+        periodic_counter_(0) {
+  }
+  ~WorkerThreadManagerTest() override {
+  }
+
+ protected:
+  class TestReadContext {
+   public:
+    TestReadContext(int fd, double timeout)
+        : fd_(fd), timeout_(timeout), num_read_(-1), d_(nullptr),
+          timeout_called_(false) {
+    }
+    ~TestReadContext() {
+    }
+    const int fd_;
+    const double timeout_;
+    int num_read_;
+    SocketDescriptor* d_;
+    bool timeout_called_;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(TestReadContext);
+  };
+
+  class TestWriteContext {
+   public:
+    TestWriteContext(int fd, int total_write)
+        : fd_(fd), total_write_(total_write), num_write_(-1), d_(nullptr) {
+    }
+    ~TestWriteContext() {
+    }
+    const int fd_;
+    const int total_write_;
+    int num_write_;
+    SocketDescriptor* d_;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(TestWriteContext);
+  };
+
+  void SetUp() override {
+    wm_.reset(new WorkerThreadManager);
+    test_threadid_ = 0;
+    num_test_threadid_ = 0;
+    periodic_counter_ = 0;
+  }
+  void TearDown() override {
+    wm_.reset(nullptr);
+  }
+
+  void Reset() {
+    AutoLock lock(&mu_);
+    test_threadid_ = 0;
+    num_test_threadid_ = 0;
+  }
+
+  OneshotClosure* NewTestRun() {
+    {
+      AutoLock lock(&mu_);
+      EXPECT_TRUE(!test_threadid_);
+    }
+    return NewCallback(
+        this, &WorkerThreadManagerTest::TestRun);
+  }
+
+  void TestRun() {
+    AutoLock lock(&mu_);
+    test_threadid_ = wm_->GetCurrentThreadId();
+    cond_.Signal();
+  }
+
+  void WaitTestRun() {
+    AutoLock lock(&mu_);
+    while (test_threadid_ == 0) {
+      cond_.Wait();
+    }
+  }
+
+  OneshotClosure* NewTestDispatch() {
+    {
+      AutoLock lock(&mu_);
+      EXPECT_TRUE(!test_threadid_);
+    }
+    return NewCallback(
+        this, &WorkerThreadManagerTest::TestDispatch);
+  }
+
+  void TestDispatch() {
+    while (wm_->Dispatch()) {
+      AutoLock lock(&mu_);
+      if (test_threadid_ == 0)
+        continue;
+      EXPECT_EQ(test_threadid_, wm_->GetCurrentThreadId());
+      cond_.Signal();
+      return;
+    }
+    LOG(FATAL) << "Dispatch unexpectedly finished";
+  }
+
+  OneshotClosure* NewTestThreadId(
+      WorkerThreadManager::ThreadId id) {
+    return NewCallback(
+        this, &WorkerThreadManagerTest::TestThreadId, id);
+  }
+
+  void TestThreadId(WorkerThreadManager::ThreadId id) {
+    EXPECT_EQ(id, wm_->GetCurrentThreadId());
+    AutoLock lock(&mu_);
+    ++num_test_threadid_;
+    cond_.Signal();
+  }
+
+  void WaitTestThreadHandle(int num) {
+    AutoLock lock(&mu_);
+    while (num_test_threadid_ < num) {
+      cond_.Wait();
+    }
+  }
+
+  std::unique_ptr<PermanentClosure> NewPeriodicRun() {
+    {
+      AutoLock lock(&mu_);
+      periodic_counter_ = 0;
+    }
+    return NewPermanentCallback(
+        this, &WorkerThreadManagerTest::TestPeriodicRun);
+  }
+
+  void TestPeriodicRun() {
+    AutoLock lock(&mu_);
+    ++periodic_counter_;
+    cond_.Signal();
+  }
+
+  void WaitTestPeriodicRun(int n) {
+    AutoLock lock(&mu_);
+    while (periodic_counter_ < n) {
+      cond_.Wait();
+    }
+  }
+
+  OneshotClosure* NewTestDescriptorRead(TestReadContext* tc) {
+    {
+      AutoLock lock(&mu_);
+      EXPECT_GT(tc->fd_, 0);
+      EXPECT_LT(tc->num_read_, 0);
+      EXPECT_TRUE(tc->d_ == nullptr);
+    }
+    return NewCallback(
+        this, &WorkerThreadManagerTest::TestDescriptorRead, tc);
+  }
+
+  void TestDescriptorRead(TestReadContext* tc) {
+    ScopedSocket sock;
+    double timeout = 0;
+    {
+      AutoLock lock(&mu_);
+      EXPECT_LT(tc->num_read_, 0);
+      EXPECT_TRUE(tc->d_ == nullptr);
+      timeout = tc->timeout_;
+      EXPECT_FALSE(tc->timeout_called_);
+      sock.reset(tc->fd_);
+    }
+    SocketDescriptor* d =
+        wm_->RegisterSocketDescriptor(
+            std::move(sock), WorkerThreadManager::PRIORITY_HIGH);
+    d->NotifyWhenReadable(
+        NewPermanentCallback(this, &WorkerThreadManagerTest::DoRead, tc));
+    if (timeout > 0) {
+      d->NotifyWhenTimedout(
+          timeout,
+          NewCallback(
+              this, &WorkerThreadManagerTest::DoTimeout, tc));
+    }
+    AutoLock lock(&mu_);
+    tc->num_read_ = 0;
+    tc->d_ = d;
+    cond_.Signal();
+  }
+
+  void DoRead(TestReadContext* tc) {
+    SocketDescriptor* d = nullptr;
+    {
+      AutoLock lock(&mu_);
+      EXPECT_GE(tc->num_read_, 0);
+      EXPECT_EQ(tc->fd_, tc->d_->fd());
+      EXPECT_EQ(WorkerThreadManager::PRIORITY_HIGH, tc->d_->priority());
+      d = tc->d_;
+    }
+    char buf[1] = { 42 };
+    int n = d->Read(buf, 1);
+    if (n > 0) {
+      EXPECT_EQ(1, n);
+    } else {
+      d->StopRead();
+      wm_->RunClosureInThread(
+          FROM_HERE,
+          wm_->GetCurrentThreadId(),
+          NewCallback(
+              this, &WorkerThreadManagerTest::DoStopRead, tc),
+          WorkerThreadManager::PRIORITY_IMMEDIATE);
+    }
+    AutoLock lock(&mu_);
+    ++tc->num_read_;
+    cond_.Signal();
+  }
+
+  void WaitTestRead(TestReadContext* tc, int n) {
+    AutoLock lock(&mu_);
+    while (tc->num_read_ != n) {
+      cond_.Wait();
+    }
+  }
+
+  void DoTimeout(TestReadContext* tc) {
+    SocketDescriptor* d = nullptr;
+    {
+      AutoLock lock(&mu_);
+      EXPECT_EQ(tc->fd_, tc->d_->fd());
+      EXPECT_EQ(WorkerThreadManager::PRIORITY_HIGH, tc->d_->priority());
+      EXPECT_GT(tc->timeout_, 0.0);
+      EXPECT_FALSE(tc->timeout_called_);
+      d = tc->d_;
+    }
+    d->StopRead();
+    wm_->RunClosureInThread(
+        FROM_HERE,
+        wm_->GetCurrentThreadId(),
+        NewCallback(
+            this, &WorkerThreadManagerTest::DoStopRead, tc),
+        WorkerThreadManager::PRIORITY_IMMEDIATE);
+    AutoLock lock(&mu_);
+    tc->timeout_called_ = true;
+    cond_.Signal();
+  }
+
+  void DoStopRead(TestReadContext* tc) {
+    int fd;
+    SocketDescriptor* d = nullptr;
+    {
+      AutoLock lock(&mu_);
+      EXPECT_EQ(tc->fd_, tc->d_->fd());
+      EXPECT_EQ(WorkerThreadManager::PRIORITY_HIGH, tc->d_->priority());
+      fd = tc->fd_;
+      d = tc->d_;
+    }
+    d->ClearReadable();
+    d->ClearTimeout();
+    ScopedSocket sock(wm_->DeleteSocketDescriptor(d));
+    EXPECT_EQ(fd, sock.get());
+    sock.Close();
+    AutoLock lock(&mu_);
+    tc->d_ = nullptr;
+    cond_.Signal();
+  }
+
+  void WaitTestReadFinish(TestReadContext* tc) {
+    AutoLock lock(&mu_);
+    while (tc->d_ != nullptr) {
+      cond_.Wait();
+    }
+  }
+
+  OneshotClosure* NewTestDescriptorWrite(TestWriteContext* tc) {
+    {
+      AutoLock lock(&mu_);
+      EXPECT_GT(tc->fd_, 0);
+      EXPECT_LT(tc->num_write_, 0);
+      EXPECT_TRUE(tc->d_ == nullptr);
+    }
+    return NewCallback(
+        this, &WorkerThreadManagerTest::TestDescriptorWrite, tc);
+  }
+
+  void TestDescriptorWrite(TestWriteContext* tc) {
+    ScopedSocket sock;
+    {
+      AutoLock lock(&mu_);
+      EXPECT_LT(tc->num_write_, 0);
+      EXPECT_TRUE(tc->d_ == nullptr);
+      sock.reset(tc->fd_);
+    }
+    SocketDescriptor* d =
+        wm_->RegisterSocketDescriptor(
+            std::move(sock), WorkerThreadManager::PRIORITY_HIGH);
+    d->NotifyWhenWritable(
+        NewPermanentCallback(this, &WorkerThreadManagerTest::DoWrite, tc));
+    AutoLock lock(&mu_);
+    tc->num_write_ = 0;
+    tc->d_ = d;
+    cond_.Signal();
+  }
+
+  void DoWrite(TestWriteContext* tc) {
+    int num_write = 0;
+    int total_write = 0;
+    SocketDescriptor* d = nullptr;
+    {
+      AutoLock lock(&mu_);
+      EXPECT_GE(tc->num_write_, 0);
+      EXPECT_EQ(tc->fd_, tc->d_->fd());
+      EXPECT_EQ(WorkerThreadManager::PRIORITY_HIGH, tc->d_->priority());
+      num_write = tc->num_write_;
+      total_write = tc->total_write_;
+      d = tc->d_;
+    }
+    char buf[1] = { 42 };
+    int n = 0;
+    if (num_write < total_write) {
+      n = d->Write(buf, 1);
+    }
+    if (n > 0) {
+      EXPECT_EQ(1, n);
+    } else {
+      d->StopWrite();
+      wm_->RunClosureInThread(
+          FROM_HERE,
+          wm_->GetCurrentThreadId(),
+          NewCallback(
+              this, &WorkerThreadManagerTest::DoStopWrite, tc),
+          WorkerThreadManager::PRIORITY_IMMEDIATE);
+      return;
+    }
+    AutoLock lock(&mu_);
+    ++tc->num_write_;
+    cond_.Signal();
+  }
+
+  void WaitTestWrite(TestWriteContext* tc, int n) {
+    AutoLock lock(&mu_);
+    while (tc->num_write_ < n) {
+      cond_.Wait();
+    }
+  }
+
+  void DoStopWrite(TestWriteContext* tc) {
+    int fd;
+    SocketDescriptor* d = nullptr;
+    {
+      AutoLock lock(&mu_);
+      EXPECT_EQ(tc->fd_, tc->d_->fd());
+      EXPECT_EQ(WorkerThreadManager::PRIORITY_HIGH, tc->d_->priority());
+      fd = tc->fd_;
+      d = tc->d_;
+    }
+    d->ClearWritable();
+    ScopedSocket sock(wm_->DeleteSocketDescriptor(d));
+    EXPECT_EQ(fd, sock.get());
+    sock.Close();
+    AutoLock lock(&mu_);
+    tc->d_ = nullptr;
+    cond_.Signal();
+  }
+
+  void WaitTestWriteFinish(TestWriteContext* tc) {
+    AutoLock lock(&mu_);
+    while (tc->d_ != nullptr) {
+      cond_.Wait();
+    }
+  }
+
+  WorkerThreadManager::ThreadId test_threadid() const {
+    AutoLock lock(&mu_);
+    return test_threadid_;
+  }
+
+  int num_test_threadid() const {
+    AutoLock lock(&mu_);
+    return num_test_threadid_;
+  }
+
+  int periodic_counter() const {
+    AutoLock lock(&mu_);
+    return periodic_counter_;
+  }
+
+  std::unique_ptr<WorkerThreadManager> wm_;
+  Lock mu_;
+
+ private:
+  ConditionVariable cond_;
+  WorkerThreadManager::ThreadId test_threadid_;
+  int num_test_threadid_;
+  int periodic_counter_;
+  DISALLOW_COPY_AND_ASSIGN(WorkerThreadManagerTest);
+};
+
+TEST_F(WorkerThreadManagerTest, NoRun) {
+  wm_->Start(2);
+  EXPECT_EQ(2U, wm_->num_threads());
+  wm_->Finish();
+}
+
+TEST_F(WorkerThreadManagerTest, RunClosure) {
+  wm_->Start(2);
+  wm_->RunClosure(FROM_HERE, NewTestRun(),
+                  WorkerThreadManager::PRIORITY_LOW);
+  WaitTestRun();
+  wm_->Finish();
+  EXPECT_NE(test_threadid(), static_cast<WorkerThreadManager::ThreadId>(0));
+  EXPECT_NE(test_threadid(), wm_->GetCurrentThreadId());
+}
+
+TEST_F(WorkerThreadManagerTest, Dispatch) {
+  wm_->Start(1);
+  wm_->RunClosure(FROM_HERE, NewTestDispatch(),
+                  WorkerThreadManager::PRIORITY_LOW);
+  wm_->RunClosure(FROM_HERE, NewTestRun(),
+                  WorkerThreadManager::PRIORITY_LOW);
+  WaitTestRun();
+  wm_->Finish();
+  EXPECT_NE(test_threadid(), static_cast<WorkerThreadManager::ThreadId>(0));
+  EXPECT_NE(test_threadid(), wm_->GetCurrentThreadId());
+}
+
+TEST_F(WorkerThreadManagerTest, RunClosureInThread) {
+  wm_->Start(2);
+  wm_->RunClosure(FROM_HERE, NewTestRun(),
+                  WorkerThreadManager::PRIORITY_LOW);
+  WaitTestRun();
+  EXPECT_NE(test_threadid(), static_cast<WorkerThreadManager::ThreadId>(0));
+  EXPECT_NE(test_threadid(), wm_->GetCurrentThreadId());
+  WorkerThreadManager::ThreadId id = test_threadid();
+  Reset();
+  EXPECT_TRUE(!test_threadid());
+  EXPECT_EQ(num_test_threadid(), 0);
+  const int kNumTestThreadHandle = 100;
+  for (int i = 0; i < kNumTestThreadHandle; ++i) {
+    wm_->RunClosureInThread(FROM_HERE, id, NewTestThreadId(id),
+                          WorkerThreadManager::PRIORITY_LOW);
+  }
+  WaitTestThreadHandle(kNumTestThreadHandle);
+  wm_->Finish();
+}
+
+TEST_F(WorkerThreadManagerTest, RunClosureInPool) {
+  wm_->Start(1);
+  int pool = wm_->StartPool(1, "test");
+  EXPECT_NE(pool, WorkerThreadManager::kAlarmPool);
+  EXPECT_NE(pool, WorkerThreadManager::kFreePool);
+  EXPECT_EQ(2U, wm_->num_threads());
+
+  wm_->RunClosure(FROM_HERE, NewTestRun(),
+                  WorkerThreadManager::PRIORITY_LOW);
+  WaitTestRun();
+  EXPECT_NE(test_threadid(), static_cast<WorkerThreadManager::ThreadId>(0));
+  EXPECT_NE(test_threadid(), wm_->GetCurrentThreadId());
+  WorkerThreadManager::ThreadId free_id = test_threadid();
+  Reset();
+  EXPECT_TRUE(!test_threadid());
+
+  wm_->RunClosureInPool(FROM_HERE, pool, NewTestRun(),
+                        WorkerThreadManager::PRIORITY_LOW);
+  WaitTestRun();
+  EXPECT_NE(test_threadid(), static_cast<WorkerThreadManager::ThreadId>(0));
+  EXPECT_NE(test_threadid(), wm_->GetCurrentThreadId());
+  EXPECT_NE(test_threadid(), free_id);
+  WorkerThreadManager::ThreadId pool_id = test_threadid();
+  Reset();
+  EXPECT_TRUE(!test_threadid());
+  EXPECT_TRUE(!num_test_threadid());
+  const int kNumTestThreadHandle = 100;
+  for (int i = 0; i < kNumTestThreadHandle; ++i) {
+    wm_->RunClosureInPool(FROM_HERE, pool, NewTestThreadId(pool_id),
+                          WorkerThreadManager::PRIORITY_LOW);
+  }
+  WaitTestThreadHandle(kNumTestThreadHandle);
+  wm_->Finish();
+}
+
+TEST_F(WorkerThreadManagerTest, PeriodicClosure) {
+  wm_->Start(1);
+  SimpleTimer timer;
+  PeriodicClosureId id = wm_->RegisterPeriodicClosure(
+      FROM_HERE, 100, NewPeriodicRun());
+  WaitTestPeriodicRun(2);
+  wm_->UnregisterPeriodicClosure(id);
+  wm_->Finish();
+  EXPECT_GE(timer.GetInMs(), 200);
+}
+
+TEST_F(WorkerThreadManagerTest, DescriptorReadable) {
+  wm_->Start(1);
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  TestReadContext tc(socks[0], 0.0);
+  ScopedSocket s(socks[1]);
+  wm_->RunClosure(FROM_HERE, NewTestDescriptorRead(&tc),
+                  WorkerThreadManager::PRIORITY_LOW);
+  WaitTestRead(&tc, 0);
+  {
+    AutoLock lock(&mu_);
+    EXPECT_EQ(0, tc.num_read_);
+    EXPECT_TRUE(tc.d_ != nullptr);
+  }
+  char buf[1] = { 42 };
+  EXPECT_EQ(1, s.Write(buf, 1));
+  WaitTestRead(&tc, 1);
+  {
+    AutoLock lock(&mu_);
+    EXPECT_EQ(1, tc.num_read_);
+    EXPECT_TRUE(tc.d_ != nullptr);
+  }
+  s.Close();
+  WaitTestReadFinish(&tc);
+  {
+    AutoLock lock(&mu_);
+    EXPECT_EQ(2, tc.num_read_);
+    EXPECT_TRUE(tc.d_ == nullptr);
+  }
+  wm_->Finish();
+}
+
+TEST_F(WorkerThreadManagerTest, DescriptorWritable) {
+  wm_->Start(1);
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  const int kTotalWrite = 8192;
+  TestWriteContext tc(socks[1], kTotalWrite);
+  ScopedSocket s0(socks[0]);
+  ScopedSocket s1(socks[1]);
+  wm_->RunClosure(FROM_HERE, NewTestDescriptorWrite(&tc),
+                  WorkerThreadManager::PRIORITY_LOW);
+  WaitTestWrite(&tc, 1);
+  {
+    AutoLock lock(&mu_);
+    EXPECT_GE(tc.num_write_, 1);
+    EXPECT_TRUE(tc.d_ != nullptr);
+  }
+  char buf[1] = { 42 };
+  int total_read = 0;
+  for (;;) {
+    int n = s0.Read(buf, 1);
+    if (n == 0) {
+      break;
+    } else if (n < 0) {
+      PLOG(ERROR) << "read " << n;
+      break;
+    }
+    EXPECT_EQ(1, n);
+    total_read += n;
+  }
+  WaitTestWriteFinish(&tc);
+  {
+    AutoLock lock(&mu_);
+    EXPECT_TRUE(tc.d_ == nullptr);
+    EXPECT_EQ(kTotalWrite, tc.num_write_);
+    EXPECT_EQ(kTotalWrite, total_read);
+  }
+  s1.Close();
+  wm_->Finish();
+}
+
+TEST_F(WorkerThreadManagerTest, DescriptorTimeout) {
+  wm_->Start(1);
+  int socks[2];
+  PCHECK(OpenSocketPairForTest(socks) == 0);
+  TestReadContext tc(socks[0], 0.5);
+  ScopedSocket s(socks[1]);
+  wm_->RunClosure(FROM_HERE, NewTestDescriptorRead(&tc),
+                  WorkerThreadManager::PRIORITY_LOW);
+  WaitTestRead(&tc, 0);
+  {
+    AutoLock lock(&mu_);
+    EXPECT_EQ(0, tc.num_read_);
+    EXPECT_TRUE(tc.d_ != nullptr);
+  }
+  char buf[1] = { 42 };
+  EXPECT_EQ(1, s.Write(buf, 1));
+  WaitTestRead(&tc, 1);
+  {
+    AutoLock lock(&mu_);
+    EXPECT_EQ(1, tc.num_read_);
+    EXPECT_FALSE(tc.timeout_called_);
+    EXPECT_TRUE(tc.d_ != nullptr);
+  }
+  WaitTestReadFinish(&tc);
+  {
+    AutoLock lock(&mu_);
+    EXPECT_EQ(1, tc.num_read_);
+    EXPECT_TRUE(tc.timeout_called_);
+    EXPECT_TRUE(tc.d_ == nullptr);
+  }
+  wm_->Finish();
+}
+
+}  // namespace devtools_goma
diff --git a/client/worker_thread_unittest.cc b/client/worker_thread_unittest.cc
new file mode 100644
index 0000000..5c0ce3f
--- /dev/null
+++ b/client/worker_thread_unittest.cc
@@ -0,0 +1,62 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "worker_thread.h"
+
+#include "compiler_specific.h"
+#include "socket_descriptor.h"
+#include "glog/logging.h"
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+class WorkerThreadTest : public ::testing::Test {
+ protected:
+  void TestDelayedClosureQueue() {
+    WorkerThreadManager::WorkerThread::DelayedClosureQueue delayed_pendings;
+    WorkerThreadManager::WorkerThread::DelayedClosureImpl *first
+        = new WorkerThreadManager::WorkerThread::DelayedClosureImpl(
+            "first", 1, nullptr);
+    WorkerThreadManager::WorkerThread::DelayedClosureImpl *second
+        = new WorkerThreadManager::WorkerThread::DelayedClosureImpl(
+            "second", 2, nullptr);
+    WorkerThreadManager::WorkerThread::DelayedClosureImpl *third
+        = new WorkerThreadManager::WorkerThread::DelayedClosureImpl(
+            "third", 3, nullptr);
+    ASSERT_TRUE(delayed_pendings.empty());
+    delayed_pendings.push(first);
+    delayed_pendings.push(third);
+    delayed_pendings.push(second);
+
+    EXPECT_EQ(3U, delayed_pendings.size());
+    WorkerThreadManager::WorkerThread::DelayedClosureImpl* dci = nullptr;
+    ASSERT_TRUE(!delayed_pendings.empty());
+    dci = delayed_pendings.top();
+    EXPECT_EQ(first, dci);
+    delayed_pendings.pop();
+    dci->Run();
+
+    ASSERT_TRUE(!delayed_pendings.empty());
+    dci = delayed_pendings.top();
+    EXPECT_EQ(second, dci);
+    delayed_pendings.pop();
+    dci->Run();
+
+    ASSERT_TRUE(!delayed_pendings.empty());
+    dci = delayed_pendings.top();
+    EXPECT_EQ(third, dci);
+    delayed_pendings.pop();
+    dci->Run();
+
+    ASSERT_TRUE(delayed_pendings.empty());
+  }
+};
+
+TEST_F(WorkerThreadTest, DelayedClosureQueue) {
+  TestDelayedClosureQueue();
+}
+
+}  // namespace devtools_goma
diff --git a/codereview.settings b/codereview.settings
new file mode 100644
index 0000000..2e94883
--- /dev/null
+++ b/codereview.settings
@@ -0,0 +1,7 @@
+# This file is used by git-cl to get repository specific information.
+CC_LIST: goma+review@google.com
+GERRIT_HOST: True
+GERRIT_SQUASH_UPLOADS: True
+# Without this, git cl won't work with corpsso.
+# cf. http://crbug.com/603378
+GERRIT_SKIP_ENSURE_AUTHENTICATED: true
diff --git a/lib/BUILD.gn b/lib/BUILD.gn
new file mode 100644
index 0000000..6a2d6de
--- /dev/null
+++ b/lib/BUILD.gn
@@ -0,0 +1,287 @@
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+proto_out_dir = "prototmp"
+
+import("//third_party/protobuf/proto_library.gni")
+
+proto_library("goma_proto") {
+  sources = [
+    "goma_data.proto",
+    "goma_log.proto",
+  ]
+}
+
+config("lib_config") {
+  include_dirs = [ "." ]
+}
+static_library("lib") {
+  sources = [
+    "cmdline_parser.cc",
+    "cmdline_parser.h",
+    "compiler_flags.cc",
+    "compiler_flags.h",
+    "compress_util.cc",
+    "compress_util.h",
+    "execreq_normalizer.cc",
+    "execreq_normalizer.h",
+    "execreq_verifier.cc",
+    "execreq_verifier.h",
+    "file_helper.cc",
+    "file_helper.h",
+    "file_reader.cc",
+    "file_reader.h",
+    "fileflag.cc",
+    "fileflag.h",
+    "flag_parser.cc",
+    "flag_parser.h",
+    "goma_data_util.cc",
+    "goma_data_util.h",
+    "known_warning_options.h",
+    "path_resolver.cc",
+    "path_resolver.h",
+    "path_util.cc",
+    "path_util.h",
+    "scoped_fd.cc",
+    "scoped_fd.h",
+    "unordered.h",
+  ]
+
+  public_configs = [ ":lib_config" ]
+
+  public_deps = [
+    ":goma_proto",
+    "//base",
+    "//third_party:glog",
+    "//third_party/protobuf:protobuf_full",
+  ]
+  if (enable_lzma) {
+    public_deps += [ "//third_party:liblzma" ]
+  }
+}
+
+static_library("goma_hash") {
+  sources = [
+    "goma_hash.cc",
+    "goma_hash.h",
+  ]
+  public_deps = [
+    ":lib",
+  ]
+  deps = [
+    "//base",
+  ]
+  if (os == "linux") {
+    deps += [ "//third_party/boringssl:boringssl" ]
+  }
+  if (os == "win") {
+    deps += [ "//third_party:glog" ]
+  }
+}
+
+static_library("goma_file") {
+  sources = [
+    "goma_file.cc",
+    "goma_file.h",
+  ]
+  public_deps = [
+    ":lib",
+    "//base",
+  ]
+  deps = [
+    ":goma_hash",
+    "//third_party:glog",
+  ]
+}
+
+executable("cmdline_parser_unittest") {
+  testonly = true
+  sources = [
+    "cmdline_parser_unittest.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("compiler_flags_test") {
+  testonly = true
+  sources = [
+    "compiler_flags_test.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("compress_util_unittest") {
+  testonly = true
+  sources = [
+    "compress_util_unittest.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("execreq_normalizer_unittest") {
+  testonly = true
+  sources = [
+    "execreq_normalizer_test.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("execreq_verifier_unittest") {
+  testonly = true
+  sources = [
+    "execreq_verifier_test.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("file_reader_unittest") {
+  testonly = true
+  sources = [
+    "file_reader_unittest.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+  ]
+}
+
+executable("flag_parser_unittest") {
+  testonly = true
+  sources = [
+    "flag_parser_unittest.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:gtest",
+  ]
+}
+
+executable("goma_data_util_unittest") {
+  testonly = true
+  sources = [
+    "goma_data_util_unittest.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:gtest",
+  ]
+}
+
+executable("goma_hash_unittest") {
+  testonly = true
+  sources = [
+    "goma_hash_unittest.cc",
+  ]
+  deps = [
+    ":goma_hash",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:gtest",
+  ]
+}
+
+executable("goma_file_unittest") {
+  testonly = true
+  sources = [
+    "goma_file_unittest.cc",
+  ]
+  deps = [
+    ":goma_file",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:gtest",
+  ]
+}
+
+executable("path_resolver_unittest") {
+  testonly = true
+  sources = [
+    "path_resolver_unittest.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:gtest",
+  ]
+}
+
+executable("path_unittest") {
+  testonly = true
+  sources = [
+    "path_unittest.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:gtest",
+  ]
+}
+
+executable("path_util_unittest") {
+  testonly = true
+  sources = [
+    "path_util_unittest.cc",
+  ]
+  deps = [
+    ":lib",
+    "//base:goma_unittest",
+    "//build/config/sanitizers:deps",
+    "//third_party:gtest",
+  ]
+}
+
+proto_library("goma_stats_proto") {
+  sources = [
+    "goma_stats.proto",
+  ]
+}
+
+proto_library("goma_statz_stats_proto") {
+  sources = [
+    "goma_statz_stats.proto",
+  ]
+
+  deps = [
+    ":goma_stats_proto",
+  ]
+}
diff --git a/lib/cmdline_parser.cc b/lib/cmdline_parser.cc
new file mode 100644
index 0000000..bf26d09
--- /dev/null
+++ b/lib/cmdline_parser.cc
@@ -0,0 +1,176 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "cmdline_parser.h"
+
+#include <ctype.h>
+
+#include "basictypes.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+using std::string;
+
+namespace devtools_goma {
+
+// Parsing Command-Line Arguments (on posix for gcc, javac)
+bool ParsePosixCommandLineToArgv(const string& cmdline,
+                                 std::vector<string>* argv) {
+  bool dquote = false;
+  bool squote = false;
+  bool backslash = false;
+  bool in_arg = false;
+  string arg = "";
+
+  for (size_t i = 0; i < cmdline.size(); ++i) {
+    char ch = cmdline[i];
+    if (!in_arg) {
+      if (isspace(ch)) continue;
+      in_arg = true;
+    }
+    DCHECK(in_arg);
+    if (isspace(ch) && !squote && !dquote && !backslash) {
+      in_arg = false;
+      argv->push_back(arg);
+      arg = "";
+      continue;
+    }
+    if (squote) {  // in single quote, anything will be saved as-is.
+      if (ch == '\'') {
+        squote = false;
+        continue;
+      }
+      arg += ch;
+      continue;
+    }
+    DCHECK(!squote);
+    if (backslash) {
+      backslash = false;
+      if (ch == '\n')
+        continue;
+      // "a\b" -> a\b, "a\\b" -> a\b, "a\"b" -> a"b, "a\bc" -> abc
+      if (dquote && ch != '\\' && ch != '"')
+        arg += '\\';
+      arg += ch;
+      continue;
+    }
+    DCHECK(!backslash);
+    if (ch == '\\') {  // backslash is available inside quote.
+      backslash = true;
+      continue;
+    }
+    if (dquote) {
+      if (ch == '\"') {
+        dquote = false;
+        continue;
+      }
+      arg += ch;
+      continue;
+    }
+    DCHECK(!dquote);
+    if (ch == '\'') {
+      squote = true;
+      continue;
+    } else if (ch == '\"') {
+      dquote = true;
+      continue;
+    }
+    arg += ch;
+  }
+  if (in_arg) {
+    argv->push_back(arg);
+  }
+  if (backslash) {
+    LOG(ERROR) << "no next char for backslash: " << cmdline;
+    return false;
+  }
+  if (squote || dquote) {
+    LOG(ERROR) << "no closing quote: " << cmdline;
+    return false;
+  }
+  return true;
+}
+
+// Parsing Command-Line Arguments (on Windows)
+// http://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
+bool ParseWinCommandLineToArgv(const string& cmdline,
+                               std::vector<string>* argv) {
+  size_t num_backslash = 0;
+  bool arg_delimiter = false;
+  bool in_quote = false;
+  string arg = "";
+  for (size_t i = 0; i < cmdline.size(); ++i) {
+    char c = cmdline[i];
+    switch (c) {
+      case '\\':
+        ++num_backslash;
+        continue;
+      case '"':
+        if (num_backslash > 0) {
+          // If an even number of backslashes is followed by a double
+          // quotation mark, one backslash is placed in the argv array for
+          // every pair of backslashes, and the double quotation mark is
+          // interpreted as a string delimiter.
+          for (size_t j = 0; j < num_backslash / 2; ++j) {
+            arg += "\\";
+          }
+          // If an odd number of backslashes is followed by a double quotation
+          // mark, one backslash is placed in the argv array for every pair of
+          // backslashes, and the double quotation mark is "escaped" by the
+          // remaining backslash, causing a literal double quotation mark (")
+          // to be placed in argv
+          if (num_backslash % 2 == 1) {
+            arg += "\"";
+          } else {
+            in_quote = !in_quote;
+          }
+        } else {
+          in_quote = !in_quote;
+        }
+        num_backslash = 0;
+        continue;
+      case ' ': case '\t': case '\r': case '\n':
+        if (!in_quote)
+          arg_delimiter = true;
+        FALLTHROUGH_INTENDED;
+      default:
+        // Backslashes are interpreted literally, unless they immediately
+        // precede a double quotation mark.
+        if (num_backslash > 0) {
+          for (size_t j = 0; j < num_backslash; ++j)
+            arg += "\\";
+          num_backslash = 0;
+        }
+        if (arg_delimiter) {
+          // We cannot handle "" as an empty argument, but it might
+          // never be a problem.
+          if (!arg.empty())
+            argv->push_back(arg);
+          arg = "";
+          arg_delimiter = false;
+        } else {
+          arg += c;
+        }
+    }
+  }
+  // Last argument.
+  // Backslashes are interpreted literally, unless they immediately
+  // precede a double quotation mark.
+  if (num_backslash > 0) {
+    for (size_t j = 0; j < num_backslash; ++j)
+      arg += "\\";
+  }
+  if (!arg.empty())
+    argv->push_back(arg);
+
+  if (in_quote) {
+    LOG(ERROR) << "no closing quote: " << cmdline;
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/lib/cmdline_parser.h b/lib/cmdline_parser.h
new file mode 100644
index 0000000..2850454
--- /dev/null
+++ b/lib/cmdline_parser.h
@@ -0,0 +1,28 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_CMDLINE_PARSER_H_
+#define DEVTOOLS_GOMA_LIB_CMDLINE_PARSER_H_
+
+#include <string>
+#include <vector>
+
+using std::string;
+
+namespace devtools_goma {
+
+// Parsing Command-Line Arguments (on posix for gcc, javac)
+// Note: parsed |cmdline| will be appended to argv.
+bool ParsePosixCommandLineToArgv(const string& cmdline,
+                                 std::vector<string>* argv);
+
+// Parsing Command-Line Arguments (on Windows)
+// Note: parsed |cmdline| will be appended to argv.
+bool ParseWinCommandLineToArgv(const string& cmdline,
+                               std::vector<string>* argv);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_CMDLINE_PARSER_H_
diff --git a/lib/cmdline_parser_unittest.cc b/lib/cmdline_parser_unittest.cc
new file mode 100644
index 0000000..8fadb55
--- /dev/null
+++ b/lib/cmdline_parser_unittest.cc
@@ -0,0 +1,346 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "cmdline_parser.h"
+
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+using std::string;
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvSimple) {
+  const string input = "a b c";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b");
+  expected_argv.push_back("c");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvWithQuote) {
+  const string input = "a \"b \" \'c \'";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b ");
+  expected_argv.push_back("c ");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvWithQuoteInDifferentQuote) {
+  const string input = "a \"b \' \" \'c \" \'";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b \' ");
+  expected_argv.push_back("c \" ");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvNoCloseQuoteAfterBackslash) {
+  const string input = "a \"b \\\" \" \'c \\\'";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b \" ");
+  expected_argv.push_back("c \\");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvKeepNonEscapeInDQuote) {
+  const string input = "a \"b \\c \" \"d\\?e\" f";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b \\c ");
+  expected_argv.push_back("d\\?e");
+  expected_argv.push_back("f");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvConjunctSpaceWithBackslash) {
+  const string input = "a b\\ c d";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b c");
+  expected_argv.push_back("d");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvKeepCharAfterBackslashAsIs) {
+  const string input = "a b\\c d";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("bc");
+  expected_argv.push_back("d");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvBackslashAfterBackslash) {
+  const string input = "a b\\\\c d";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b\\c");
+  expected_argv.push_back("d");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvIgnoreEndlAfterBackslash) {
+  const string input = "a b\\\nc d";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("bc");
+  expected_argv.push_back("d");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvConjunctCharAfterBackslash) {
+  const string input = "a b\\ \"c \" \"d \"\\ e f\\ \' g \'\\ h i";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b c ");
+  expected_argv.push_back("d  e");
+  expected_argv.push_back("f  g  h");
+  expected_argv.push_back("i");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvBackslashEndlInQuote) {
+  const string input = "a \"b\\\nc\" \'d\\\ne\' f";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("bc");
+  expected_argv.push_back("d\\\ne");
+  expected_argv.push_back("f");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvSingleBackslashInQuote) {
+  const string input = "a \"b\\c\" \'d\\e\' f";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b\\c");
+  expected_argv.push_back("d\\e");
+  expected_argv.push_back("f");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvDoubleBackslashesInQuote) {
+  const string input = "a \"b\\\\c\" \'d\\\\e\' f";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b\\c");
+  expected_argv.push_back("d\\\\e");
+  expected_argv.push_back("f");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvTripleBackslashesInQuote) {
+  const string input = "a \"b\\\\\\c\" \'d\\\\\\e\' f";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b\\\\c");
+  expected_argv.push_back("d\\\\\\e");
+  expected_argv.push_back("f");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineToArgvReturnFalseForUnfinished) {
+  const string open_single_quote = "\"";
+  const string open_double_quote = "\'";
+  const string open_backslash = "\\";
+  std::vector<string> argv;
+  EXPECT_FALSE(devtools_goma::ParsePosixCommandLineToArgv(open_single_quote,
+                                                          &argv));
+  EXPECT_FALSE(devtools_goma::ParsePosixCommandLineToArgv(open_double_quote,
+                                                          &argv));
+  EXPECT_FALSE(devtools_goma::ParsePosixCommandLineToArgv(open_backslash,
+                                                          &argv));
+}
+
+TEST(CmdlineParserTest, ParsePosixCommandLineShouldKeepOriginalArgv) {
+  const string input = "a b";
+  std::vector<string> argv;
+
+  argv.push_back("0");
+  argv.push_back("1");
+  EXPECT_TRUE(devtools_goma::ParsePosixCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("0");
+  expected_argv.push_back("1");
+  expected_argv.push_back("a");
+  expected_argv.push_back("b");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+// All test vectors for ParseWinCommandLineToArgv are come from:
+// Results of Parsing Command Lines in
+// http://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
+//
+// Note:
+// In the document argv[3] is always capitailzed but I thought it typo.
+TEST(CmdlineParserTest, ParseWinCommandLineToArgvRule1) {
+  const string input = "\"abc\" d e";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParseWinCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("abc");
+  expected_argv.push_back("d");
+  expected_argv.push_back("e");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParseWinCommandLineToArgvRule2) {
+  const string input = "a\\\\\\b d\"e f\"g h";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParseWinCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a\\\\\\b");
+  expected_argv.push_back("de fg");
+  expected_argv.push_back("h");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+
+TEST(CmdlineParserTest, ParseWinCommandLineToArgvRule3) {
+  const string input = "a\\\\\\\"b c d";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParseWinCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a\\\"b");
+  expected_argv.push_back("c");
+  expected_argv.push_back("d");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParseWinCommandLineToArgvRule4) {
+  const string input = "a\\\\\\\\\"b c\" d e";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParseWinCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a\\\\b c");
+  expected_argv.push_back("d");
+  expected_argv.push_back("e");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParseWinCommandLineToArgvRule5) {
+  const string input = " \t\n\r";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParseWinCommandLineToArgv(input, &argv));
+  EXPECT_EQ(0U, argv.size());
+}
+
+TEST(CmdlineParserTest, ParseWinCommandLineToArgvRule6) {
+  const string input = "  \n a \r  b \t  c  ";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParseWinCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("a");
+  expected_argv.push_back("b");
+  expected_argv.push_back("c");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParseWinCommandLineToArgvRule7) {
+  const string input = " \n \" a \" b\t\n\t \"c \"\n\t\" d\t\" ";
+  std::vector<string> argv;
+  EXPECT_TRUE(devtools_goma::ParseWinCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back(" a ");
+  expected_argv.push_back("b");
+  expected_argv.push_back("c ");
+  expected_argv.push_back(" d\t");
+
+  EXPECT_EQ(expected_argv, argv);
+}
+
+TEST(CmdlineParserTest, ParseWinCommandLineToArgvReturnFalseWithoutEndQuote) {
+  const string input = "\"";
+  std::vector<string> argv;
+  EXPECT_FALSE(devtools_goma::ParseWinCommandLineToArgv(input, &argv));
+}
+
+TEST(CmdlineParserTest, ParseWinCommandLineToArgvShouldKeepOriginalArgv) {
+  const string input = "a b";
+  std::vector<string> argv;
+
+  argv.push_back("0");
+  argv.push_back("1");
+  EXPECT_TRUE(devtools_goma::ParseWinCommandLineToArgv(input, &argv));
+
+  std::vector<string> expected_argv;
+  expected_argv.push_back("0");
+  expected_argv.push_back("1");
+  expected_argv.push_back("a");
+  expected_argv.push_back("b");
+
+  EXPECT_EQ(expected_argv, argv);
+}
diff --git a/lib/compiler_flags.cc b/lib/compiler_flags.cc
new file mode 100644
index 0000000..43d7438
--- /dev/null
+++ b/lib/compiler_flags.cc
@@ -0,0 +1,2068 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "compiler_flags.h"
+
+#include <ctype.h>
+#include <algorithm>
+#include <cstdio>
+#include <cstdlib>
+#include <iterator>
+#include <sstream>
+
+#include "cmdline_parser.h"
+#include "file_helper.h"
+#include "flag_parser.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "join.h"
+#include "known_warning_options.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "path_util.h"
+#include "split.h"
+#include "string_piece_utils.h"
+#include "strutil.h"
+using std::string;
+
+namespace devtools_goma {
+
+namespace {
+
+// Normalize paths surrounded by '"' to paths without it.
+// e.g. "c:\Windows\Program Files" -> c:\Windows\Program Files.
+string NormalizeWin32Path(absl::string_view path) {
+  // TODO: omit orphan '"' at the end of path?
+  if (strings::StartsWith(path, "\"")) {
+    if (strings::EndsWith(path, "\"")) {
+      path = path.substr(1, path.length() - 2);
+    } else {
+      path = path.substr(1);
+    }
+  }
+  return string(path);
+}
+
+string ToNormalizedBasename(absl::string_view in) {
+  // Manual file::Basename.
+  // Note file::Basename does not understand "\\" as a path delimiter
+  // on non-Windows.
+  absl::string_view::size_type last_sep = in.find_last_of("/\\");
+  if (last_sep != absl::string_view::npos) {
+    in.remove_prefix(last_sep + 1);
+  }
+  string out = string(in);
+  std::transform(out.begin(), out.end(), out.begin(), ::tolower);
+  return out;
+}
+
+}  // namespace
+
+/* static */
+std::unique_ptr<CompilerFlags> CompilerFlags::New(
+    const std::vector<string>& args, const string& cwd) {
+  if (args.empty()) {
+    LOG(ERROR) << "Empty args";
+    return nullptr;
+  }
+  if (IsGCCCommand(args[0])) {
+    return std::unique_ptr<CompilerFlags>(new GCCFlags(args, cwd));
+  } else if (IsVCCommand(args[0]) || IsClangClCommand(args[0])) {
+    // clang-cl gets compatible options with cl.exe.
+    // See Also: http://clang.llvm.org/docs/UsersManual.html#clang-cl
+    return std::unique_ptr<CompilerFlags>(new VCFlags(args, cwd));
+  } else if (IsJavacCommand(args[0])) {
+    return std::unique_ptr<CompilerFlags>(new JavacFlags(args, cwd));
+  } else if (IsClangTidyCommand(args[0])) {
+    return std::unique_ptr<CompilerFlags>(new ClangTidyFlags(args, cwd));
+  } else if (IsJavaCommand(args[0])) {
+    return std::unique_ptr<CompilerFlags>(new JavaFlags(args, cwd));
+  }
+
+  LOG(WARNING) << "Unknown command: " << args[0];
+  return nullptr;
+}
+
+/* static */
+std::unique_ptr<CompilerFlags> CompilerFlags::MustNew(
+    const std::vector<string>& args, const string& cwd) {
+  std::unique_ptr<CompilerFlags> flags = CompilerFlags::New(args, cwd);
+  LOG_IF(FATAL, !flags) << "unsupported command line:" << args;
+  return flags;
+}
+
+CompilerFlags::CompilerFlags(const std::vector<string>& args, const string& cwd)
+    : args_(args), cwd_(cwd), is_successful_(false) {
+  CHECK(!args.empty());
+  compiler_name_ = args[0];
+}
+
+// TODO: wtf
+void CompilerFlags::Fail(const string& msg, const std::vector<string>& args) {
+  fail_message_ = "Flag parsing failed: " + msg + "\n";
+  fail_message_ += "ARGS:\n";
+  for (const auto& arg : args) {
+    fail_message_ += " " + arg;
+  }
+  fail_message_ += "\n";
+  is_successful_ = false;
+}
+
+// static
+bool CompilerFlags::ExpandPosixArgs(
+    const string& cwd, const std::vector<string>& args,
+    std::vector<string>* expanded_args,
+    std::vector<string>* optional_input_filenames) {
+  for (size_t i = 0; i < args.size(); ++i) {
+    const string& arg = args[i];
+    bool need_expand = false;
+    if (strings::StartsWith(arg, "@")) {
+      need_expand = true;
+
+      // MacOSX uses @executable_path, @loader_path or @rpath as prefix
+      // of install_name (b/6845420).
+      // It could also be a linker rpath (b/31920050).
+      bool is_linker_magic_token = false;
+      if (strings::StartsWith(arg, "@executable_path/") ||
+           strings::StartsWith(arg, "@loader_path/") ||
+           strings::StartsWith(arg, "@rpath/")) {
+        is_linker_magic_token = true;
+      }
+      if (is_linker_magic_token &&
+          i > 0 &&
+          (args[i - 1] == "-rpath" || args[i - 1] == "-install_name")) {
+          need_expand = false;
+      }
+      if (is_linker_magic_token &&
+          i > 2 &&
+          args[i - 3] == "-Xlinker" &&
+          (args[i - 2] == "-rpath" || args[i - 2] == "-install_name") &&
+          args[i - 1] == "-Xlinker") {
+          need_expand = false;
+      }
+    }
+    if (!need_expand) {
+      expanded_args->push_back(arg);
+      continue;
+    }
+    const string& source_list_filename =
+        PathResolver::PlatformConvert(arg.substr(1));
+    string source_list;
+    if (!ReadFileToString(
+            file::JoinPathRespectAbsolute(cwd, source_list_filename),
+            &source_list)) {
+      LOG(WARNING) << "failed to read: " << source_list_filename
+                   << " at " << cwd;
+      return false;
+    }
+    if (optional_input_filenames) {
+      optional_input_filenames->push_back(source_list_filename);
+    }
+
+    if (!ParsePosixCommandLineToArgv(source_list, expanded_args)) {
+      LOG(WARNING) << "failed to parse command line: " << source_list;
+      return false;
+    }
+    VLOG(1) << "expanded_args:" << *expanded_args;
+  }
+  return true;
+}
+
+// Return the base name of compiler, such as 'x86_64-linux-gcc-4.3',
+// 'g++', derived from compiler_name.
+string CompilerFlags::compiler_base_name() const {
+  string compiler_base_name = compiler_name_;
+  size_t found_slash = compiler_base_name.rfind('/');
+  if (found_slash != string::npos) {
+    compiler_base_name = compiler_base_name.substr(found_slash + 1);
+  }
+  return compiler_base_name;
+}
+
+/* static */
+bool CompilerFlags::IsGCCCommand(absl::string_view arg) {
+  const absl::string_view stem = file::Stem(arg);
+  if (stem.find("gcc") != absl::string_view::npos ||
+      stem.find("g++") != absl::string_view::npos)
+    return true;
+  // As a substring "cc" would be found even in other commands such
+  // as "distcc", we check if the name is "cc" or "*-cc"
+  // (e.g., "i586-mingw32msvc-cc").
+  if (stem == "c++" ||
+      stem == "cc" || strings::EndsWith(arg, "-cc"))
+    return true;
+  if (IsClangCommand(arg))
+    return true;
+  return false;
+}
+
+/* static */
+bool CompilerFlags::IsClangCommand(absl::string_view arg) {
+  const absl::string_view stem = file::Stem(arg);
+  // allow pnacl-clang etc.
+  // However, don't allow clang-tidy.
+  if (stem == "clang" || stem == "clang++" ||
+      strings::EndsWith(stem, "-clang") ||
+      strings::EndsWith(stem, "-clang++"))
+    return true;
+
+  // For b/25937763 but we should not consider the followings as clang:
+  // clang-cl, clang-check, clang-tblgen, clang-format, clang-tidy-diff, etc.
+  static const char kClang[] = "clang-";
+  static const char kClangxx[] = "clang++-";
+  absl::string_view version = stem;
+  if (strings::StartsWith(stem, kClang))
+    version.remove_prefix(sizeof(kClang) - 1);
+  else if (strings::StartsWith(stem, kClangxx))
+    version.remove_prefix(sizeof(kClangxx) - 1);
+  if (stem == version)
+    return false;
+  // version should only have digits and '.'.
+  return version.find_first_not_of("0123456789.") == absl::string_view::npos;
+}
+
+/* static */
+bool CompilerFlags::IsNaClGCCCommand(absl::string_view arg) {
+  const absl::string_view basename = file::Basename(arg);
+  return basename.find("nacl-gcc") != absl::string_view::npos ||
+         basename.find("nacl-g++") != absl::string_view::npos;
+}
+
+/* static */
+bool CompilerFlags::IsVCCommand(absl::string_view arg) {
+  // As a substring "cl" would be found in other commands like "clang" or
+  // "nacl-gcc".  Also, "cl" is case-insensitive on Windows and can be postfixed
+  // with ".exe".
+  const string& s = ToNormalizedBasename(arg);
+  return s == "cl.exe" || s == "cl";
+}
+
+/* static */
+bool CompilerFlags::IsClangClCommand(absl::string_view arg) {
+  const string& s = ToNormalizedBasename(arg);
+  return s == "clang-cl.exe" || s == "clang-cl";
+}
+
+/* static */
+bool CompilerFlags::IsPNaClClangCommand(absl::string_view arg) {
+  const absl::string_view stem = file::Stem(arg);
+  return stem == "pnacl-clang" || stem == "pnacl-clang++";
+}
+
+/* static */
+bool CompilerFlags::IsJavacCommand(absl::string_view arg) {
+  const absl::string_view basename = file::Basename(arg);
+  return basename.find("javac") != absl::string_view::npos;
+}
+
+/* static */
+bool CompilerFlags::IsClangTidyCommand(absl::string_view arg) {
+  const string& s = ToNormalizedBasename(arg);
+  return s == "clang-tidy";
+}
+
+/* static */
+bool CompilerFlags::IsJavaCommand(absl::string_view arg) {
+  const absl::string_view stem = file::Stem(arg);
+  return stem == "java";
+}
+
+/* static */
+string CompilerFlags::GetCompilerName(absl::string_view arg) {
+  if (IsGCCCommand(arg)) {
+    return GCCFlags::GetCompilerName(arg);
+  } else if (IsVCCommand(arg) || IsClangClCommand(arg)) {
+    return VCFlags::GetCompilerName(arg);
+  } else if (IsJavacCommand(arg)) {
+    return JavacFlags::GetCompilerName(arg);
+  } else if (IsClangTidyCommand(arg)) {
+    return ClangTidyFlags::GetCompilerName(arg);
+  } else if (IsJavaCommand(arg)) {
+    return JavaFlags::GetCompilerName(arg);
+  }
+  return "";
+}
+
+string CompilerFlags::DebugString() const {
+  std::stringstream ss;
+  for (const auto& arg : args_) {
+    ss << arg << " ";
+  }
+  if (!expanded_args_.empty() && args_ != expanded_args_) {
+    ss << " -> ";
+    for (const auto& arg : expanded_args_) {
+      ss << arg << " ";
+    }
+  }
+  return ss.str();
+}
+
+void CompilerFlags::GetClientImportantEnvs(
+    const char** envp, std::vector<string>* out_envs) const {
+  for (const char** e = envp; *e; e++) {
+    if (IsClientImportantEnv(*e)) {
+      out_envs->push_back(*e);
+    }
+  }
+}
+
+void CompilerFlags::GetServerImportantEnvs(
+    const char** envp, std::vector<string>* out_envs) const {
+  for (const char** e = envp; *e; e++) {
+    if (IsServerImportantEnv(*e)) {
+      out_envs->push_back(*e);
+    }
+  }
+}
+
+template <bool is_defined>
+class MacroStore : public FlagParser::Callback {
+ public:
+  explicit MacroStore(std::vector<std::pair<string, bool> >* macros)
+      : macros_(macros) {}
+
+  // Returns parsed flag value of value for flag.
+  string ParseFlagValue(const FlagParser::Flag& /* flag */,
+                        const string& value) override {
+    macros_->push_back(std::make_pair(value, is_defined));
+    return value;
+  }
+
+ private:
+  std::vector<std::pair<string, bool> >* macros_;
+};
+
+/* static */
+string GCCFlags::GetCompilerName(absl::string_view arg) {
+  absl::string_view name = file::Basename(arg);
+  if (name.find("clang++") != string::npos) {
+    return "clang++";
+  } else if (name.find("clang") != string::npos) {
+    return "clang";
+  } else if (name.find("g++") != string::npos || name == "c++") {
+    return "g++";
+  } else {
+    return "gcc";
+  }
+}
+
+// Return the key 'gcc' or 'g++' with architecture and version
+// stripped from compiler_name.
+string GCCFlags::compiler_name() const {
+  return GetCompilerName(compiler_name_);
+}
+
+GCCFlags::GCCFlags(const std::vector<string>& args, const string& cwd)
+    : CompilerFlags(args, cwd),
+      is_cplusplus_(false),
+      has_nostdinc_(false),
+      has_no_integrated_as_(false),
+      has_pipe_(false),
+      has_ffreestanding_(false),
+      has_fno_hosted_(false),
+      has_fno_sanitize_blacklist_(false),
+      has_fsyntax_only_(false),
+      has_wrapper_(false),
+      is_precompiling_header_(false),
+      is_stdin_input_(false) {
+  if (!CompilerFlags::ExpandPosixArgs(cwd, args,
+                                      &expanded_args_,
+                                      &optional_input_filenames_)) {
+    Fail("Unable to expand args", args);
+    return;
+  }
+  bool has_at_file = !optional_input_filenames_.empty();
+  bool no_integrated_as = false;
+  bool fno_integrated_as = false;
+  bool ffreestanding = false;
+  bool fno_hosted = false;
+  bool fsyntax_only = false;
+  bool print_file_name = false;
+
+  FlagParser parser;
+  DefineFlags(&parser);
+
+  FlagParser::Flag* flag_c = parser.AddBoolFlag("c");
+  FlagParser::Flag* flag_S = parser.AddBoolFlag("S");
+  FlagParser::Flag* flag_E = parser.AddBoolFlag("E");
+  FlagParser::Flag* flag_M = parser.AddBoolFlag("M");
+  FlagParser::Flag* flag_MD = parser.AddBoolFlag("MD");
+  FlagParser::Flag* flag_MMD = parser.AddBoolFlag("MMD");
+  FlagParser::Flag* flag_g = parser.AddPrefixFlag("g");
+  parser.AddBoolFlag("nostdinc")->SetSeenOutput(&has_nostdinc_);
+  parser.AddBoolFlag("nostdinc++")->SetOutput(&compiler_info_flags_);
+  parser.AddBoolFlag("nostdlibinc")->SetOutput(&compiler_info_flags_);
+  parser.AddBoolFlag("integrated-as")->SetOutput(&compiler_info_flags_);
+  parser.AddBoolFlag("no-integrated-as")->SetSeenOutput(
+      &no_integrated_as);
+  parser.AddBoolFlag("fno-integrated-as")->SetSeenOutput(
+      &fno_integrated_as);
+  parser.AddBoolFlag("pipe")->SetSeenOutput(&has_pipe_);
+  parser.AddBoolFlag("-pipe")->SetSeenOutput(&has_pipe_);
+  parser.AddBoolFlag("ffreestanding")->SetSeenOutput(&ffreestanding);
+  parser.AddBoolFlag("fno-hosted")->SetSeenOutput(&fno_hosted);
+  parser.AddBoolFlag("fsyntax-only")->SetSeenOutput(&fsyntax_only);
+  parser.AddBoolFlag("print-file-name")->SetSeenOutput(&print_file_name);
+  parser.AddBoolFlag("-print-file-name")->SetSeenOutput(&print_file_name);
+  FlagParser::Flag* flag_x = parser.AddFlag("x");
+  FlagParser::Flag* flag_o = parser.AddFlag("o");
+  FlagParser::Flag* flag_MF = parser.AddFlag("MF");
+  FlagParser::Flag* flag_isysroot = parser.AddFlag("isysroot");
+  // TODO: Consider split -fprofile-* flags? Some options take an extra
+  // arguement, other do not. Merging such kind of flags do not look good.
+  FlagParser::Flag* flag_fprofile = parser.AddPrefixFlag("fprofile-");
+  FlagParser::Flag* flag_fprofile_sample_use =
+      parser.AddFlag("fprofile-sample-use");
+
+  parser.AddFlag("wrapper")->SetSeenOutput(&has_wrapper_);
+
+  // -mllvm takes extra arg.
+  // ASAN uses -mllvm -asan-blacklist=$FILE
+  // TSAN uses -mllvm -tsan-blacklist=$FILE
+  std::vector<string> llvm_options;
+  parser.AddFlag("mllvm")->SetOutput(&llvm_options);
+  FlagParser::Flag* flag_fsanitize_blacklist =
+      parser.AddFlag("fsanitize-blacklist");
+  FlagParser::Flag* flag_fsanitize = parser.AddFlag("fsanitize");
+  flag_fsanitize->SetOutput(&compiler_info_flags_);
+
+  // TODO: follow -fno-sanitize-blacklist spec.
+  // http://clang.llvm.org/docs/UsersManual.html:
+  // > -fno-sanitize-blacklist: don't use blacklist file,
+  // > if it was specified *earlier in the command line*.
+  parser.AddFlag("fno_sanitize_blacklist")->SetSeenOutput(
+      &has_fno_sanitize_blacklist_);
+
+  FlagParser::Flag* flag_resource_dir = parser.AddFlag("resource-dir");
+  flag_resource_dir->SetOutput(&compiler_info_flags_);
+
+  FlagParser::Flag* flag_fdebug_prefix_map =
+      parser.AddFlag("fdebug-prefix-map");
+  FlagParser::Flag* flag_gsplit_dwarf = parser.AddBoolFlag("gsplit-dwarf");
+  flag_gsplit_dwarf->SetOutput(&compiler_info_flags_);
+
+  parser.AddFlag("m")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("arch")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("target")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("-target")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("gcc-toolchain")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("-gcc-toolchain")->SetOutput(&compiler_info_flags_);
+  // TODO: Uncomment this and remove isysroot_ once we stop
+  //               supporting API version 0.
+  // parser.AddFlag("isysroot")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("imultilib")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("isystem")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("iquote")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("idirafter")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("-sysroot")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("B")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("iframework")->SetOutput(&compiler_info_flags_);
+  parser.AddPrefixFlag("O")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("b")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("V")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("specs")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("-specs")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("std")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("-std")->SetOutput(&compiler_info_flags_);
+  parser.AddPrefixFlag("f")->SetOutput(&compiler_info_flags_);
+  parser.AddBoolFlag("pthread")->SetOutput(&compiler_info_flags_);
+  parser.AddBoolFlag("undef")->SetOutput(&compiler_info_flags_);
+  // If pnacl-clang, it need to support --pnacl-bias and --pnacl-*-bias.
+  // See: b/17982273
+  if (IsPNaClClangCommand(compiler_base_name())) {
+    parser.AddPrefixFlag("-pnacl-bias=")->SetOutput(&compiler_info_flags_);
+    parser.AddBoolFlag("-pnacl-arm-bias")->SetOutput(&compiler_info_flags_);
+    parser.AddBoolFlag("-pnacl-mips-bias")->SetOutput(&compiler_info_flags_);
+    parser.AddBoolFlag("-pnacl-i686-bias")->SetOutput(&compiler_info_flags_);
+    parser.AddBoolFlag("-pnacl-x86_64-bias")->SetOutput(&compiler_info_flags_);
+    parser.AddBoolFlag("-pnacl-allow-translate")->SetOutput(
+        &compiler_info_flags_);
+  }
+  parser.AddBoolFlag("no-canonical-prefixes")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("Xclang")->SetOutput(&compiler_info_flags_);
+  parser.AddFlag("I")->SetValueOutputWithCallback(
+      nullptr, &non_system_include_dirs_);
+  // We should allow both -imacro and --imacro, -include and --include.
+  // See: b/10020850.
+  std::vector<string> includes, imacros;
+  parser.AddFlag("imacros")->SetValueOutputWithCallback(nullptr, &imacros);
+  parser.AddFlag("-imacros")->SetValueOutputWithCallback(nullptr, &imacros);
+  parser.AddFlag("include")->SetValueOutputWithCallback(nullptr, &includes);
+  parser.AddFlag("-include")->SetValueOutputWithCallback(nullptr, &includes);
+  // TODO: We need to consider the order of -I and -F.
+  parser.AddFlag("F")->SetValueOutputWithCallback(nullptr, &framework_dirs_);
+  // TODO: Support -iprefix, -I-, and etc.
+  MacroStore<true> defined_macro_store(&commandline_macros_);
+  MacroStore<false> undefined_macro_store(&commandline_macros_);
+  parser.AddFlag("D")->SetCallbackForParsedArgs(&defined_macro_store);
+  parser.AddFlag("U")->SetCallbackForParsedArgs(&undefined_macro_store);
+
+  // Special handle for "-W", "-Wa,", "-Wl,", "-Wp,".
+  // We want to parse "-Wa,", "-Wp,"
+  // We want to mark "-Wl," unknown.
+  // However, we want to parse -Wsomething.
+  FlagParser::Flag* flag_W = parser.AddPrefixFlag("W");
+  FlagParser::Flag* flag_Wa = parser.AddPrefixFlag("Wa,");
+  FlagParser::Flag* flag_Wl = parser.AddPrefixFlag("Wl,");
+  FlagParser::Flag* flag_Wp = parser.AddPrefixFlag("Wp,");
+  std::vector<string> assembler_flags;
+  std::vector<string> preprocessor_flags;
+  flag_Wa->SetValueOutputWithCallback(nullptr, &assembler_flags);
+  flag_Wp->SetValueOutputWithCallback(nullptr, &preprocessor_flags);
+
+  parser.AddNonFlag()->SetOutput(&input_filenames_);
+
+  parser.Parse(expanded_args_);
+  unknown_flags_ = parser.unknown_flag_args();
+
+  // -Wa, is a flag for assembler.
+  // -Wa,--noexecstack is often used.
+  if (!assembler_flags.empty()) {
+    std::vector<string> subflags;
+    for (const auto& fs : assembler_flags) {
+      for (auto&& f : strings::Split(fs, ',')) {
+        subflags.emplace_back(f);
+      }
+    }
+
+    FlagParser pp;
+    FlagParser::Options* opts = pp.mutable_options();
+    opts->flag_prefix = '-';
+    opts->allows_equal_arg = true;
+    opts->allows_nonspace_arg = true;
+    opts->has_command_name = false;
+
+    pp.AddBoolFlag("-noexecstack");  // --noexecstack to make stack unexecutable
+    pp.AddFlag("-defsym");  // --defsym,SYM=VALUE to defin symbol SYM.
+    pp.AddPrefixFlag("I");  // -Iout/somewhere; add include path
+    pp.AddBoolFlag("gdwarf-2");  // -gdwarf-2; debug info
+    pp.AddFlag("march");  // -march=foo; set architecture
+    pp.AddFlag("mfpu");  // -mfpu=foo; set cpu
+
+    pp.Parse(subflags);
+    for (const auto& unknown : pp.unknown_flag_args()) {
+      unknown_flags_.push_back("-Wa," + unknown);
+    }
+  }
+
+  if (flag_Wl->seen()) {
+    // For "-Wl,", Mark the whole flag as unknown.
+    // We won't support linker flags.
+    for (const auto& v : flag_Wl->values()) {
+      unknown_flags_.push_back("-Wl," + v);
+    }
+  }
+
+  // Note: -Wp,-D -Wp,FOOBAR can be considered as -Wp,-D,FOOBAR
+  if (!preprocessor_flags.empty()) {
+    std::vector<string> subflags;
+    for (const auto& fs : preprocessor_flags) {
+      for (auto&& f : strings::Split(fs, ',')) {
+        subflags.emplace_back(f);
+      }
+    }
+
+    FlagParser pp;
+    FlagParser::Options* opts = pp.mutable_options();
+    opts->flag_prefix = '-';
+    opts->allows_equal_arg = true;
+    opts->allows_nonspace_arg = true;
+    opts->has_command_name = false;
+
+    pp.AddFlag("D")->SetCallbackForParsedArgs(&defined_macro_store);
+    pp.AddFlag("U")->SetCallbackForParsedArgs(&undefined_macro_store);
+    FlagParser::Flag* flag_MD_pp = pp.AddFlag("MD");
+
+    pp.Parse(subflags);
+
+    if (flag_MD_pp->seen()) {
+      output_files_.push_back(flag_MD_pp->GetLastValue());
+    }
+    for (const auto& unknown : pp.unknown_flag_args()) {
+      unknown_flags_.push_back("-Wp," + unknown);
+    }
+  }
+
+  // Check -W flag.
+  for (const auto& value : flag_W->values()) {
+    if (!IsKnownWarningOption(value)) {
+      unknown_flags_.push_back("-W" + value);
+    }
+  }
+
+  // Check debug flags. We match -g with prefix flag. It covers too much.
+  // If the value is not known, we'd like to mark it as unknown option.
+  for (const auto& value : flag_g->values()) {
+    if (!IsKnownDebugOption(value)) {
+      unknown_flags_.push_back("-g" + value);
+    }
+  }
+
+  if (!has_at_file) {
+    // no @file in args.
+    CHECK_EQ(args_, expanded_args_);
+    expanded_args_.clear();
+  }
+
+  if (flag_isysroot->seen())
+    isysroot_ = flag_isysroot->GetLastValue();
+  if (flag_resource_dir->seen())
+    resource_dir_ = flag_resource_dir->GetLastValue();
+  if (flag_fsanitize->seen()) {
+    for (const auto& value : flag_fsanitize->values()) {
+      std::vector<string> vs = strings::Split(value, ',');
+      for (const auto& v : vs) {
+        fsanitize_.insert(v);
+      }
+    }
+  }
+  if (flag_fdebug_prefix_map->seen()) {
+    for (const auto& value : flag_fdebug_prefix_map->values()) {
+      size_t pos = value.find("=");
+      if (pos == string::npos) {
+        LOG(ERROR) << "invalid argument is given to -fdebug-prefix-map:"
+                   << value;
+        return;
+      }
+      bool inserted = fdebug_prefix_map_.insert(
+          std::make_pair(value.substr(0, pos), value.substr(pos + 1))).second;
+      LOG_IF(INFO, !inserted) << "-fdebug-prefix-map has duplicated entry."
+                              << " ignored: " << value;
+    }
+    // -fdebug-prefix-map does not affect system include dirs or
+    // predefined macros.  We do not include it in compiler_info_flags_.
+    // Especially for clang, it is only used in lib/CodeGen/CGDebugInfo.cpp,
+    // which is code to generate debug info.
+  }
+
+  string output = "a.out";
+  is_successful_ = true;
+
+  mode_ = COMPILE;
+  if (flag_E->seen() || flag_M->seen()) {
+    mode_ = PREPROCESS;
+    output = "";
+  } else if (!flag_c->seen() && !flag_S->seen()) {
+    mode_ = LINK;
+  }
+
+  if (input_filenames_.size() == 1) {
+    if (input_filenames_[0] == "-" || input_filenames_[0] == "/dev/stdin") {
+      is_stdin_input_ = true;
+    }
+  } else if (mode_ != LINK && input_filenames_.size() > 1) {
+    string buf = absl::StrJoin(input_filenames_, ", ");
+    Fail("multiple input file names: " + buf, args);
+  }
+
+  if (!llvm_options.empty()) {
+    // TODO: no need to set -*-blacklist options in compiler_info_flags_?
+    std::copy(llvm_options.begin(), llvm_options.end(),
+              back_inserter(compiler_info_flags_));
+
+    FlagParser llvm_parser;
+    FlagParser::Options* opts = llvm_parser.mutable_options();
+    opts->flag_prefix = '-';
+    opts->allows_equal_arg = true;
+    opts->has_command_name = false;
+
+    llvm_parser.AddFlag("asan-blacklist")->SetValueOutputWithCallback(
+        nullptr, &optional_input_filenames_);
+    llvm_parser.AddFlag("tsan-blacklist")->SetValueOutputWithCallback(
+        nullptr, &optional_input_filenames_);
+    llvm_parser.Parse(llvm_options);
+  }
+  if (flag_fsanitize_blacklist->seen()) {
+    const std::vector<string>& values = flag_fsanitize_blacklist->values();
+    for (const auto& value : values) {
+      // -fsanitize-blacklist doesn't affect system include dirs or
+      // predefined macros, so don't include it in compiler_info_flags_.
+      optional_input_filenames_.push_back(value);
+    }
+  }
+
+  if (flag_x->seen()) {
+    compiler_info_flags_.push_back("-x");
+    compiler_info_flags_.push_back(flag_x->GetLastValue());
+  }
+  if (has_nostdinc_) {
+    compiler_info_flags_.push_back("-nostdinc");
+  }
+  if (no_integrated_as) {
+    compiler_info_flags_.push_back("-no-integrated-as");
+    has_no_integrated_as_ = true;
+  }
+  if (fno_integrated_as) {
+    compiler_info_flags_.push_back("-fno-integrated-as");
+    has_no_integrated_as_ = true;
+  }
+  if (ffreestanding) {
+    compiler_info_flags_.push_back("-ffreestanding");
+    has_ffreestanding_ = true;
+  }
+  if (fno_hosted) {
+    compiler_info_flags_.push_back("-fno-hosted");
+    has_fno_hosted_ = true;
+  }
+  if (fsyntax_only) {
+    compiler_info_flags_.push_back("-fsyntax-only");
+    has_fsyntax_only_ = true;
+  }
+
+  if (!isysroot_.empty()) {
+    compiler_info_flags_.push_back("-isysroot");
+    compiler_info_flags_.push_back(isysroot_);
+  }
+
+  // Workaround for ChromeOS.
+  // https://code.google.com/p/chromium/issues/detail?id=338646
+  //
+  // TODO: remove this when we drop chromeos wrapper support.
+  // In https://code.google.com/p/chromium/issues/detail?id=316963,
+  // we are discussing about the drop of chromeos wrapper support.
+  // In other words, goma is called by the wrapper, and we do not have
+  // the wrapper installed in the goma server.
+  for (const auto& it : commandline_macros_) {
+    if (it.first == "__KERNEL__" && it.second) {
+      compiler_info_flags_.push_back("-D__KERNEL__");
+      break;
+    }
+  }
+
+  // All files specified by -imacros are processed before all files
+  // specified by -include.
+  std::copy(imacros.begin(), imacros.end(), back_inserter(root_includes_));
+  std::copy(includes.begin(), includes.end(), back_inserter(root_includes_));
+
+  if (flag_o->seen()) {
+    output_files_.push_back(flag_o->GetLastValue());
+    output = flag_o->GetLastValue();
+  }
+
+  if (flag_MF->seen()) {
+    output_files_.push_back(flag_MF->GetLastValue());
+  }
+
+  if (print_file_name) {
+    Fail("not supported on remote", args);
+  }
+
+  if (flag_x->seen()) {
+    lang_ = flag_x->GetLastValue();
+  } else {
+    lang_ = GetLanguage(compiler_name_,
+                        (!input_filenames_.empty() ? input_filenames_[0] : ""));
+  }
+  is_cplusplus_ = (lang_.find("c++") != string::npos);
+  if (mode_ == COMPILE)
+    is_precompiling_header_ = strings::EndsWith(lang_, "-header");
+
+  // Create a default output flag. FIXME: is this necessary?
+  if (output_files_.empty() && !input_filenames_.empty()) {
+    size_t ext_start = input_filenames_[0].rfind('.');
+    if (flag_E->seen() || flag_M->seen()) {
+      // output will be stdout.
+      return;
+    } else if (flag_S->seen()) {
+      if (string::npos != ext_start)
+        output = input_filenames_[0].substr(0, ext_start) + ".s";
+      else
+        return;
+    } else if (is_precompiling_header_) {
+      output =  input_filenames_[0] + ".gch";
+    } else if (flag_c->seen()) {
+      if (string::npos != ext_start)
+        output = input_filenames_[0].substr(0, ext_start) + ".o";
+      else
+        return;
+    }
+    output_files_.push_back(output);
+  }
+
+  // if -MD or -MMD flag was specified, and -MF flag was not specified, assume
+  // .d file output.
+  if ((flag_MD->seen() || flag_MMD->seen()) && !flag_MF->seen()) {
+    size_t ext_start = output.rfind('.');
+    if (string::npos != ext_start) {
+      output_files_.push_back(output.substr(0, ext_start) + ".d");
+    }
+  }
+
+  if (flag_gsplit_dwarf->seen()) {
+    if (mode_ == COMPILE) {
+      output_files_.push_back(
+          file::JoinPath(file::Dirname(output), file::Stem(output)) + ".dwo");
+    }
+
+    const string& input0 = input_filenames_[0];
+    if (mode_ == LINK && file::Extension(input0) != "o") {
+      output_files_.push_back(
+          file::JoinPath(file::Dirname(input0), file::Stem(input0)) + ".dwo");
+    }
+  }
+
+  bool use_profile_input = false;
+  string profile_input_dir;
+  for (const auto& flag : flag_fprofile->values()) {
+    use_profile_input |= strings::StartsWith(flag, "use");
+
+    // Pick the last profile dir, this is how GCC works.
+    size_t profile_dir_start = string::npos;
+    if (strings::StartsWith(flag, "use=") || strings::StartsWith(flag, "dir="))
+      profile_dir_start = 4;
+    else if (strings::StartsWith(flag, "generate="))
+      profile_dir_start = 9;
+    if (profile_dir_start != string::npos) {
+      profile_input_dir = flag.substr(profile_dir_start);
+    }
+    compiler_info_flags_.emplace_back("-fprofile-" + flag);
+  }
+  if (use_profile_input && !is_precompiling_header_) {
+    if (profile_input_dir.empty())
+      profile_input_dir = cwd;
+    for (const auto& filename : input_filenames_) {
+      size_t ext_start = filename.rfind('.');
+      if (ext_start == string::npos)
+        continue;
+      size_t last_dir = filename.rfind('/');
+      if (last_dir == string::npos)
+        last_dir = 0;
+      else
+        last_dir++;
+      optional_input_filenames_.push_back(file::JoinPath(
+          profile_input_dir,
+          filename.substr(last_dir, ext_start - last_dir) + ".gcda"));
+    }
+  }
+  if (flag_fprofile_sample_use->seen()) {
+    optional_input_filenames_.push_back(
+        flag_fprofile_sample_use->GetLastValue());
+  }
+}
+
+const std::vector<string> GCCFlags::include_dirs() const {
+  std::vector<string> dirs(non_system_include_dirs_);
+  std::copy(framework_dirs_.begin(), framework_dirs_.end(),
+            back_inserter(dirs));
+  return dirs;
+}
+
+bool GCCFlags::IsClientImportantEnv(const char* env) const {
+  if (IsServerImportantEnv(env)) {
+    return true;
+  }
+
+  // Allow WINEDEBUG= only in client.
+  if (strings::StartsWith(env, "WINEDEBUG=")) {
+    return true;
+  }
+
+  // These are used for nacl on Win.
+  // Don't send this to server.
+  if ((var_strcaseprefix(env, "PATHEXT=") != nullptr) ||
+      (var_strcaseprefix(env, "SystemRoot=") != nullptr)) {
+    return true;
+  }
+
+  return false;
+}
+
+bool GCCFlags::IsServerImportantEnv(const char* env) const {
+  // http://gcc.gnu.org/onlinedocs/gcc/Environment-Variables.html
+  // SYSROOT is not mentioned in the above but it seems this changes
+  // the behavior of GCC.
+  //
+  // Although ld(1) manual mentions following variables, they are not added
+  // without actual needs. That is because it may lead security risks and
+  // gold (linker used by chromium) seems not use them.
+  // - LD_RUN_PATH
+  // - LD_LIBRARY_PATH
+  //
+  // PWD is used for current working directory. b/27487704
+
+  static const char* kCheckEnvs[] = {
+    "SYSROOT=",
+    "LIBRARY_PATH=",
+    "CPATH=",
+    "C_INCLUDE_PATH=",
+    "CPLUS_INCLUDE_PATH=",
+    "OBJC_INCLUDE_PATH=",
+    "DEPENDENCIES_OUTPUT=",
+    "SUNPRO_DEPENDENCIES=",
+    "MACOSX_DEPLOYMENT_TARGET=",
+    "SDKROOT=",
+    "PWD=",
+    "DEVELOPER_DIR=",
+  };
+
+  for (const char* check_env : kCheckEnvs) {
+    if (strings::StartsWith(env, check_env)) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/* static */
+void GCCFlags::DefineFlags(FlagParser* parser) {
+  FlagParser::Options* opts = parser->mutable_options();
+  opts->flag_prefix = '-';
+  opts->allows_equal_arg = true;
+  opts->allows_nonspace_arg = true;
+
+  // clang options can be taken from:
+  // https://github.com/llvm-mirror/clang/blob/master/include/clang/Driver/Options.td
+  // gcc options
+  // https://gcc.gnu.org/onlinedocs/gcc-6.4.0/gcc/Option-Summary.html#Option-Summary
+
+  enum FlagType {
+    kNormal, kPrefix, kBool,
+  };
+  static const struct {
+    const char* name;
+    FlagType flag_type;
+  } kFlags[] = {
+    // gcc/clang flags
+    { "-C", kBool },  // preprocessor option; don't remove comment
+    { "-P", kBool },  // preprocessor option; Disable linemarker output in -E mode
+    { "-include", kNormal },  // preprocess <file> first
+    { "-macros", kNormal }, // preprocess <file> first
+    { "-param", kNormal },
+    { "-sysroot", kNormal },
+    { "-version", kBool },  // --version
+    { "B", kNormal },  // add dir to compiler's search paths
+    { "D", kNormal },  // preprocessor defines
+    { "F", kNormal },
+    { "I", kNormal },  // add dir to header search paths
+    { "L", kNormal },  // add dir to linker search paths
+    { "MF", kNormal },  // specify dependency output
+    { "MP", kBool },  // Create phony target for each dependency (other than main file)
+    { "MQ", kBool },  // Specify name of main file output to quote in depfile
+    { "MT", kNormal },
+    { "Qunused-arguments", kBool },  // Don't emit warning for unused driver arguments
+    { "V", kNormal },  // specify target version
+    { "W", kPrefix },  // -Wsomething; disable/disable warnings
+    { "Wa,", kPrefix }, // Options to assembly
+    { "Wl,", kPrefix }, // Options to linker
+    { "Wp,", kPrefix }, // Options to proprocessor
+    { "Xassembler", kNormal },
+    { "Xlinker", kNormal },
+    { "Xpreprocessor", kNormal },
+    { "ansi", kBool },  // -ansi. choose c dialect
+    { "arch", kNormal },  // processor type
+    { "b", kNormal },  // specify target machine
+    { "dA", kBool } ,  // Annotate the assembler output with miscellaneous debugging information.
+    { "dD", kBool },  // Like '-dM', without predefined macros etc.
+    { "dM", kBool },  // Generate a list of ‘#define’ directiv.
+    { "g", kPrefix },  // debug information. NOTE: Needs special treatment.
+    { "gsplit-dwarf", kBool },  // to enable the generation of split DWARF.
+    { "idirafter", kNormal },
+    { "iframework", kNormal },
+    { "imacros", kNormal },   // preprocess <file> first
+    { "imultilib", kNormal },
+    { "include", kNormal },   // preprocess <file> first
+    { "iquote", kNormal },
+    { "isysroot", kNormal },
+    { "isystem", kNormal },
+    { "m", kNormal },  // machine dependent options
+    { "o", kNormal },  // specify output
+    { "pedantic", kBool },  // old form of -Wpedantic (older gcc has this)
+    { "pg", kBool },  // Generate extra code for gprof
+    { "specs", kNormal },
+    { "std", kNormal },
+    { "target", kNormal },
+    { "v", kBool },  // Show commands to run and use verbose output
+    { "w", kBool },  // Inhibit all warning messages.
+    { "x", kNormal },  // specify language
+
+    // darwin options
+    { "-serialize-diagnostics", kNormal },
+    { "allowable_client", kNormal },
+    { "client_name", kNormal },
+    { "compatibility_version", kNormal },
+    { "current_version", kNormal },
+    { "dylib_file", kNormal },
+    { "dylinker_install_name", kNormal },
+    { "exported_symbols_list", kNormal },
+    { "filelist", kNormal },
+    { "framework", kNormal },
+    { "image_base", kNormal },
+    { "init", kNormal },
+    { "install_name", kNormal },
+    { "multiply_defined", kNormal },
+    { "multiply_defined_unused", kNormal },
+    { "no-canonical-prefixes", kBool },
+    { "pagezero_size", kNormal },
+    { "read_only_relocs", kNormal },
+    { "seg_addr_table", kNormal },
+    { "seg_addr_table_filename", kNormal },
+    { "segs_read_only_addr", kNormal },
+    { "segs_read_write_addr", kNormal },
+    { "sub_library", kNormal },
+    { "sub_umbrella", kNormal },
+    { "umbrella", kNormal },
+    { "undefined", kNormal },
+    { "unexported_symbols_list", kNormal },
+    { "weak_reference_mismatches", kNormal },
+    // TODO: -segproto takes 3 arguments (segname, max_prot and init_prot)
+    // TODO: -segaddr takes 2 arguments (name and address)
+    // TODO: -sectobjectsymbols takes 2 arguments (segname and sectname)
+    // TODO: -sectorder takes 3 arguments (segname, sectname and orderfile)
+
+    // for clang
+    { "-coverage", kBool },  // take code coverage
+    { "Xanalyzer", kNormal },
+    { "Xclang", kNormal },
+    { "gcc-toolchain", kNormal },
+    { "nostdlibinc", kBool },  // Do not search the standard system directories for include files, but do search compiler builtin include directories.
+    { "print-libgcc-file-name", kBool },  // Print the library path for the currently used compiler runtime library
+    { "print-prog-name=", kPrefix },  // Print the full program path of <name> -print-prog-name=<name>
+
+    // linker flags
+    // https://gcc.gnu.org/onlinedocs/gcc/Link-Options.html
+    { "nodefaultlibs", kBool  }, // Do not use the standard system libraries
+    { "nostdlib", kBool },  // Do not use the standard system startup files or libraries
+    { "nostdlib++", kBool },  // Don't use the ld_stdlib++ section
+    { "pie", kBool },  // Produce a dynamically linked position independent executable
+    { "rdynamic", kBool },  // Pass the flag -export-dynamic to the ELF linker
+    { "static", kBool },  // this overrides -pie and prevents linking with the shared libraries.
+  };
+
+  for (const auto& f : kFlags) {
+    switch (f.flag_type) {
+    case kNormal:
+      parser->AddFlag(f.name);
+      break;
+    case kPrefix:
+      parser->AddPrefixFlag(f.name);
+      break;
+    case kBool:
+      parser->AddBoolFlag(f.name);
+      break;
+    }
+  }
+}
+
+// static
+bool GCCFlags::IsKnownWarningOption(absl::string_view option) {
+  // TODO: If we can have constexpr version of is_sorted,
+  // we can check this in compile time.
+  DCHECK(std::is_sorted(std::begin(kKnownWarningOptions),
+                        std::end(kKnownWarningOptions),
+                        [](absl::string_view lhs, absl::string_view rhs) {
+                          return lhs < rhs;
+                        }))
+      << "kKnownWarningOptions must be sorted";
+
+  // for "foo=x", take "foo=" only.
+  string::size_type p = option.find('=');
+  if (p != string::npos) {
+    option = option.substr(0, p + 1);  // Keep '='.
+  }
+
+  return std::binary_search(std::begin(kKnownWarningOptions),
+                            std::end(kKnownWarningOptions),
+                            option);
+}
+
+// static
+bool GCCFlags::IsKnownDebugOption(absl::string_view v) {
+  // See https://gcc.gnu.org/onlinedocs/gcc/Debugging-Options.html
+  // -gz is not handled here, since it's used like -gz=<type>.
+  // It's not suitable to handle it here.
+  static const char* const kKnownDebugOptions[] {
+    "",
+    "0", "1", "2", "3",
+    "column-info",
+    "dw",
+    "dwarf", "dwarf-2", "dwarf-3", "dwarf-4", "dwarf-5",
+    "gdb", "gdb1", "gdb2", "gdb3",
+    "gnu-pubnames",
+    "line-tables-only",
+    "no-column-info",
+    "no-record-gcc-switches",
+    "no-strict-dwarf",
+    "pubnames",
+    "record-gcc-switches",
+    "split-dwarf",
+    "stabs", "stabs+", "stabs0", "stabs1", "stabs2", "stabs3",
+    "strict-dwarf",
+    "vms", "vms0", "vms1", "vms2", "vms3",
+    "xcoff", "xcoff+", "xcoff0", "xcoff1", "xcoff2", "xcoff3",
+  };
+
+  DCHECK(std::is_sorted(std::begin(kKnownDebugOptions),
+                        std::end(kKnownDebugOptions),
+                        [](absl::string_view lhs, absl::string_view rhs) {
+                          return lhs < rhs;
+                        }))
+      << "kKnownDebugOptions must be sorted";
+
+  return std::binary_search(std::begin(kKnownDebugOptions),
+                            std::end(kKnownDebugOptions),
+                            v);
+}
+
+/* static */
+string GCCFlags::GetLanguage(const string& compiler_name,
+                             const string& input_filename) {
+  // Decision based on a compiler name.
+  bool is_cplusplus = false;
+  if (compiler_name.find("g++") != string::npos) {
+    is_cplusplus = true;
+  }
+  if (input_filename.empty())
+    return is_cplusplus ? "c++" : "c";
+
+  // Decision based on a file extension.
+  string suffix = GetFileNameExtension(input_filename);
+  if (!is_cplusplus && suffix != "c") {
+    // GCC may change the language by suffix of input files.
+    // See gcc/gcc.c and gcc/cp/lang-specs.h .
+    // Note that slow operation is OK because we've checked .c first
+    // so we come here rarely.
+    if (suffix == "cc" ||
+        suffix == "cxx" ||
+        suffix == "cpp" ||
+        suffix == "cp" ||
+        suffix == "c++" ||
+        suffix == "C" ||
+        suffix == "CPP" ||
+        suffix == "ii" ||
+        suffix == "H" ||
+        suffix == "hpp" ||
+        suffix == "hp" ||
+        suffix == "hxx" ||
+        suffix == "h++" ||
+        suffix == "HPP" ||
+        suffix == "tcc" ||
+        suffix == "hh" ||
+        suffix == "mm" ||
+        suffix == "M" ||
+        suffix == "mii") {
+      is_cplusplus = true;
+    }
+  }
+  if (is_cplusplus && suffix == "m") {
+    // g++ and clang++ think .m as objc not objc++. (b/11521718)
+    is_cplusplus = false;
+  }
+
+  const string lang = is_cplusplus ? "c++" : "c";
+  if (!suffix.empty()) {
+    if (suffix[0] == 'm' || suffix[0] == 'M')
+      return string("objective-") + lang;
+
+    if (suffix[0] == 'h' || suffix[0] == 'H' || suffix == "tcc")
+      return lang + "-header";
+  }
+  return lang;
+}
+
+/* static */
+string GCCFlags::GetFileNameExtension(const string& filepath) {
+  return string(::devtools_goma::GetFileNameExtension(filepath));
+}
+
+void ParseJavaClassPaths(
+    const std::vector<string>& class_paths,
+    std::vector<string>* jar_files) {
+  for (const string& class_path : class_paths) {
+    const std::vector<string> paths = strings::Split(class_path, ':');
+    for (const string& path : paths) {
+      // TODO: We need to handle directories.
+      absl::string_view ext = ::devtools_goma::GetFileNameExtension(path);
+      if (ext == "jar" || ext == "zip") {
+        jar_files->push_back(path);
+      }
+    }
+  }
+}
+
+JavacFlags::JavacFlags(const std::vector<string>& args, const string& cwd)
+    : CompilerFlags(args, cwd) {
+  if (!CompilerFlags::ExpandPosixArgs(cwd, args,
+                                      &expanded_args_,
+                                      &optional_input_filenames_)) {
+    Fail("Unable to expand args", args);
+    return;
+  }
+  bool has_at_file = !optional_input_filenames_.empty();
+
+  is_successful_ = true;
+  lang_ = "java";
+
+
+  FlagParser parser;
+  DefineFlags(&parser);
+  std::vector<string> boot_class_paths;
+  std::vector<string> class_paths;
+  std::vector<string> remained_flags;
+  // The destination directory for class files.
+  FlagParser::Flag* flag_d = parser.AddFlag("d");
+  flag_d->SetValueOutputWithCallback(nullptr, &output_dirs_);
+  // The directory to place generated source files.
+  parser.AddFlag("s")->SetValueOutputWithCallback(nullptr, &output_dirs_);
+  // Maybe classpaths are loaded in following way:
+  // 1. bootstrap classes
+  // 2. extension classes
+  // 3. user classes.
+  // and we might need to search bootclasspath first, extdirs, and classpath
+  // in this order.
+  // https://docs.oracle.com/javase/8/docs/technotes/tools/findingclasses.html
+  parser.AddFlag("bootclasspath")->SetValueOutputWithCallback(
+      nullptr, &boot_class_paths);
+  // TODO: Support -Xbootclasspath if needed.
+  parser.AddFlag("cp")->SetValueOutputWithCallback(nullptr, &class_paths);
+  parser.AddFlag("classpath")->SetValueOutputWithCallback(
+      nullptr, &class_paths);
+  // TODO: Handle CLASSPATH environment variables.
+  // TODO: Handle -extdirs option.
+  FlagParser::Flag* flag_processor = parser.AddFlag("processor");
+  // TODO: Support -sourcepath.
+  parser.AddNonFlag()->SetOutput(&remained_flags);
+
+  parser.Parse(expanded_args_);
+  unknown_flags_ = parser.unknown_flag_args();
+
+  if (!has_at_file) {
+    // no @file in args.
+    CHECK_EQ(args_, expanded_args_);
+    expanded_args_.clear();
+  }
+
+  for (const auto& arg : remained_flags) {
+    if (strings::EndsWith(arg, ".java")) {
+      input_filenames_.push_back(arg);
+      const string& output_filename = arg.substr(0, arg.size() - 5) + ".class";
+      if (!flag_d->seen()) {
+        output_files_.push_back(output_filename);
+      }
+    }
+  }
+
+  ParseJavaClassPaths(boot_class_paths, &jar_files_);
+  ParseJavaClassPaths(class_paths, &jar_files_);
+
+  if (flag_processor->seen()) {
+    for (const string& value : flag_processor->values()) {
+      const std::vector<string> classes = strings::Split(value, ',');
+      for (const string& c : classes) {
+        processors_.push_back(c);
+      }
+    }
+  }
+}
+
+/* static */
+void JavacFlags::DefineFlags(FlagParser* parser) {
+  FlagParser::Options* opts = parser->mutable_options();
+  opts->flag_prefix = '-';
+
+  parser->AddFlag("d");
+  parser->AddFlag("s");
+  parser->AddFlag("cp");
+  parser->AddFlag("classpath");
+  parser->AddFlag("processor");
+}
+
+/* static */
+string JavacFlags::GetCompilerName(absl::string_view /*arg*/) {
+  return "javac";
+}
+
+class Win32PathNormalizer : public FlagParser::Callback {
+ public:
+  // Returns parsed flag value of value for flag.
+  string ParseFlagValue(const FlagParser::Flag& flag,
+                        const string& value) override;
+};
+
+string Win32PathNormalizer::ParseFlagValue(
+    const FlagParser::Flag& /* flag */, const string& value) {
+  return NormalizeWin32Path(value);
+}
+
+/* static */
+string VCFlags::GetCompilerName(absl::string_view arg) {
+  if (IsClangClCommand(arg)) {
+    return "clang-cl";
+  }
+  return "cl.exe";
+}
+
+string VCFlags::compiler_name() const {
+  return GetCompilerName(compiler_name_);
+}
+
+VCFlags::VCFlags(const std::vector<string>& args, const string& cwd)
+    : CompilerFlags(args, cwd),
+      is_cplusplus_(true),
+      ignore_stdinc_(false),
+      require_mspdbserv_(false) {
+  bool result = ExpandArgs(cwd, args, &expanded_args_,
+                           &optional_input_filenames_);
+  if (!result) {
+    Fail("Unable to expand args", args);
+    return;
+  }
+
+  FlagParser parser;
+  DefineFlags(&parser);
+  Win32PathNormalizer normalizer;
+
+  // Compile only, no link
+  FlagParser::Flag* flag_c = parser.AddBoolFlag("c");
+
+  // Preprocess only, do not compile
+  FlagParser::Flag* flag_E = parser.AddBoolFlag("E");
+  FlagParser::Flag* flag_EP = parser.AddBoolFlag("EP");
+  FlagParser::Flag* flag_P = parser.AddBoolFlag("P");
+
+  // Ignore "standard places".
+  FlagParser::Flag* flag_X = parser.AddBoolFlag("X");
+
+  // Compile file as .c
+  FlagParser::Flag* flag_Tc = parser.AddFlag("Tc");
+
+  // Compile all files as .c
+  FlagParser::Flag* flag_TC = parser.AddBoolFlag("TC");
+
+  // Compile file as .cpp
+  FlagParser::Flag* flag_Tp = parser.AddFlag("Tp");
+
+  // Compile all files as .cpp
+  FlagParser::Flag* flag_TP = parser.AddBoolFlag("TP");
+
+  // Specify output.
+  FlagParser::Flag* flag_o = parser.AddFlag("o");  // obsoleted but always there
+  FlagParser::Flag* flag_Fo = parser.AddPrefixFlag("Fo");  // obj file path
+  FlagParser::Flag* flag_Fe = parser.AddPrefixFlag("Fe");  // exe file path
+
+  // Optimization prefix
+  parser.AddPrefixFlag("O")->SetOutput(&compiler_info_flags_);
+
+  // M[DT]d? define _DEBUG, _MT, and _DLL.
+  parser.AddPrefixFlag("MD")->SetOutput(&compiler_info_flags_);
+  parser.AddPrefixFlag("MT")->SetOutput(&compiler_info_flags_);
+
+  // standard
+  parser.AddBoolFlag("permissive-")->SetOutput(&compiler_info_flags_);
+  parser.AddPrefixFlag("std:")->SetOutput(&compiler_info_flags_);
+
+  // Additional include path.
+  parser.AddFlag("I")->SetValueOutputWithCallback(&normalizer, &include_dirs_);
+
+  MacroStore<true> defined_macro_store(&commandline_macros_);
+  MacroStore<false> undefined_macro_store(&commandline_macros_);
+  parser.AddFlag("D")->SetCallbackForParsedArgs(&defined_macro_store);
+  parser.AddFlag("U")->SetCallbackForParsedArgs(&undefined_macro_store);
+
+  // specifies the architecture for code generation.
+  // It is passed to compiler_info_flags_ to get macros.
+  parser.AddFlag("arch")->SetOutput(&compiler_info_flags_);
+
+  // Flags that affects predefined macros
+  FlagParser::Flag* flag_ZI = parser.AddBoolFlag("ZI");
+  FlagParser::Flag* flag_RTC = parser.AddPrefixFlag("RTC");
+  FlagParser::Flag* flag_Zc_wchar_t = parser.AddBoolFlag("Zc:wchar_t");
+
+  FlagParser::Flag* flag_Zi = parser.AddBoolFlag("Zi");
+
+  parser.AddFlag("FI")->SetValueOutputWithCallback(nullptr, &root_includes_);
+
+  FlagParser::Flag* flag_Yc = parser.AddPrefixFlag("Yc");
+  FlagParser::Flag* flag_Yu = parser.AddPrefixFlag("Yu");
+  FlagParser::Flag* flag_Fp = parser.AddPrefixFlag("Fp");
+
+  // Machine options used by clang-cl.
+  FlagParser::Flag* flag_m = parser.AddFlag("m");
+  FlagParser::Flag* flag_fmsc_version = parser.AddFlag("fmsc-version");
+  FlagParser::Flag* flag_fsanitize = parser.AddFlag("fsanitize");
+  FlagParser::Flag* flag_fno_sanitize_blacklist = nullptr;
+  FlagParser::Flag* flag_fsanitize_blacklist = nullptr;
+  FlagParser::Flag* flag_mllvm = parser.AddFlag("mllvm");
+  FlagParser::Flag* flag_isystem = parser.AddFlag("isystem");
+  // TODO: check -iquote?
+  // http://clang.llvm.org/docs/UsersManual.html#id8
+  FlagParser::Flag* flag_imsvc = parser.AddFlag("imsvc");
+  if (compiler_name() == "clang-cl") {
+    flag_m->SetOutput(&compiler_info_flags_);
+    flag_fmsc_version->SetOutput(&compiler_info_flags_);
+    flag_fsanitize->SetOutput(&compiler_info_flags_);
+    // TODO: do we need to support more sanitize options?
+    flag_fno_sanitize_blacklist =
+        parser.AddBoolFlag("fno-sanitize-blacklist");
+    flag_fsanitize_blacklist = parser.AddFlag("fsanitize-blacklist=");
+    flag_mllvm->SetOutput(&compiler_info_flags_);
+    flag_isystem->SetOutput(&compiler_info_flags_);
+    flag_imsvc->SetOutput(&compiler_info_flags_);
+    // Make it understand Xclang.
+    parser.AddFlag("Xclang")->SetOutput(&compiler_info_flags_);
+  }
+
+  parser.AddNonFlag()->SetOutput(&input_filenames_);
+
+  parser.Parse(expanded_args_);
+  unknown_flags_ = parser.unknown_flag_args();
+
+  is_successful_ = true;
+
+  lang_ = "c++";
+  // CL.exe default to C++ unless /Tc /TC specified,
+  // or the file is named .c and /Tp /TP are not specified.
+  if (flag_Tc->seen() || flag_TC->seen() ||
+      ((!input_filenames_.empty() &&
+        GetFileNameExtension(input_filenames_[0]) == "c") &&
+        !flag_TP->seen() && !flag_Tp->seen())) {
+    is_cplusplus_ = false;
+    lang_ = "c";
+  }
+
+  // Handle implicit macros, lang_ must not change after this.
+  // See http://msdn.microsoft.com/en-us/library/b0084kay(v=vs.90).aspx
+  if (lang_ == "c++") {
+    implicit_macros_.append("#define __cplusplus\n");
+  }
+  if (flag_ZI->seen()) {
+    implicit_macros_.append("#define _VC_NODEFAULTLIB\n");
+  }
+  if (flag_RTC->seen()) {
+    implicit_macros_.append("#define __MSVC_RUNTIME_CHECKS\n");
+  }
+  if (flag_Zc_wchar_t->seen()) {
+    implicit_macros_.append("#define _NATIVE_WCHAR_T_DEFINED\n");
+    implicit_macros_.append("#define _WCHAR_T_DEFINED\n");
+  }
+
+  // Debug information format.
+  // http://msdn.microsoft.com/en-us/library/958x11bc.aspx
+  // For VC, /Zi and /ZI generated PDB.
+  // For clang-cl, /Zi is alias to /Z7. /ZI is not supported.
+  // Probably OK to deal them as the same?
+  // See https://msdn.microsoft.com/en-us/library/958x11bc.aspx,
+  // and http://clang.llvm.org/docs/UsersManual.html
+  if (compiler_name() != "clang-cl" && (flag_Zi->seen() || flag_ZI->seen())) {
+    require_mspdbserv_ = true;
+  }
+
+  if (flag_fsanitize_blacklist && flag_fsanitize_blacklist->seen() &&
+      !flag_fno_sanitize_blacklist->seen()) {
+    // TODO: follow -fno-sanitize-blacklist spec.
+    // http://clang.llvm.org/docs/UsersManual.html:
+    // > -fno-sanitize-blacklist: don't use blacklist file,
+    // > if it was specified *earlier in the command line*.
+    const std::vector<string>& values = flag_fsanitize_blacklist->values();
+    std::copy(values.begin(), values.end(),
+              back_inserter(optional_input_filenames_));
+  }
+
+  if (flag_X->seen()) {
+    ignore_stdinc_ = true;
+    compiler_info_flags_.push_back("/X");
+  }
+
+  if (flag_EP->seen() || flag_E->seen()) {
+    return;  // output to stdout
+  }
+
+  if (flag_Yc->seen()) {
+    creating_pch_ = flag_Yc->GetLastValue();
+  }
+  if (flag_Yu->seen()) {
+    using_pch_ = flag_Yu->GetLastValue();
+  }
+  if (flag_Fp->seen()) {
+    using_pch_filename_ = flag_Fp->GetLastValue();
+  }
+
+  string new_extension = ".obj";
+  string force_output;
+  if (flag_Fo->seen())
+    force_output = flag_Fo->GetLastValue();
+
+  if (flag_P->seen()) {
+    new_extension = ".i";
+    // any option to control output filename?
+    force_output = "";
+  } else if (!flag_c->seen()) {
+    new_extension = ".exe";
+    if (flag_Fe->seen()) {
+      force_output = flag_Fe->GetLastValue();
+    } else {
+      force_output = "";
+    }
+  }
+
+  // Single file with designated destination
+  if (input_filenames_.size() == 1) {
+    if (force_output.empty() && flag_o->seen()) {
+      force_output = flag_o->GetLastValue();
+    }
+
+    if (!force_output.empty()) {
+      output_files_.push_back(ComposeOutputFilePath(input_filenames_[0],
+          force_output, new_extension));
+    }
+    if (!output_files_.empty()) {
+      return;
+    }
+  }
+
+  for (const auto& input_filename : input_filenames_) {
+    output_files_.push_back(
+        ComposeOutputFilePath(input_filename, force_output, new_extension));
+  }
+}
+
+bool VCFlags::IsClientImportantEnv(const char* env) const {
+  if (IsServerImportantEnv(env)) {
+    return true;
+  }
+
+  // We don't override these variables in goma server.
+  // So, these are client important, but don't send to server.
+  static const char* kCheckEnvs[] = {
+    "PATHEXT=",
+    "SystemDrive=",
+    "SystemRoot=",
+  };
+
+  for (const char* check_env : kCheckEnvs) {
+    if (var_strcaseprefix(env, check_env) != nullptr) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool VCFlags::IsServerImportantEnv(const char* env) const {
+  static const char* kCheckEnvs[] = {
+    "INCLUDE=",
+    "LIB=",
+    "MSC_CMD_FLAGS=",
+    "VCINSTALLDIR=",
+    "VSINSTALLDIR=",
+    "WindowsSdkDir=",
+  };
+
+  for (const char* check_env : kCheckEnvs) {
+    if (var_strcaseprefix(env, check_env) != nullptr) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// static
+void VCFlags::DefineFlags(FlagParser* parser) {
+  FlagParser::Options* opts = parser->mutable_options();
+  // define all known flags of cl.exe here.
+  // undefined flag here would be treated as non flag arg
+  // if the arg begins with alt_flag_prefix.
+  // b/18063824
+  // https://code.google.com/p/chromium/issues/detail?id=427942
+  opts->flag_prefix = '-';
+  opts->alt_flag_prefix = '/';
+  opts->allows_nonspace_arg = true;
+
+  // http://msdn.microsoft.com//library/fwkeyyhe.aspx
+  // note: some bool flag may take - as suffix even if it is documented
+  // on the above URL? clang-cl defines such flag.
+  parser->AddBoolFlag("?");  // alias of help
+  parser->AddPrefixFlag("AI");  // specifies a directory to search for #using
+  parser->AddPrefixFlag("analyze");  // enable code analysis
+  parser->AddPrefixFlag("arch");  // specifies the architecture for code gen
+  parser->AddBoolFlag("await");  // enable resumable functions extension
+
+  parser->AddBoolFlag("bigobj");  // increases the num of addressable sections
+
+  parser->AddBoolFlag("C");  // preserves comments during preprocessing
+  parser->AddBoolFlag("c");  // compile only
+  parser->AddPrefixFlag("cgthreads");  // specify num of cl.exe threads
+  parser->AddPrefixFlag("clr");
+  parser->AddPrefixFlag("constexpr");  // constexpr options
+
+  parser->AddFlag("D");  // define macro
+  parser->AddPrefixFlag("doc");  // process documentation comments
+  // /diagnostics:<args,...> controls the format of diagnostic messages
+  parser->AddPrefixFlag("diagnostics:");
+
+  parser->AddBoolFlag("E");  // preprocess to stdout
+  parser->AddPrefixFlag("EH");  // exception ahdling model
+  parser->AddBoolFlag("EP");  // disable linemarker output and preprocess
+  parser->AddPrefixFlag("errorReport");
+
+  parser->AddFlag("F");  // set stack size
+  parser->AddPrefixFlag("favor");  // optimize for architecture specifics
+  parser->AddPrefixFlag("FA");  // output assembly code file
+  parser->AddPrefixFlag("Fa");  // output assembly code to this file
+  parser->AddBoolFlag("FC");  // full path of source code in diagnostic text
+  parser->AddPrefixFlag("Fd");  // set pdb file name
+  parser->AddPrefixFlag("Fe");  // set output executable file or directory
+  parser->AddFlag("FI");  // include file before parsing
+  parser->AddPrefixFlag("Fi");  // set preprocess output file name
+  parser->AddPrefixFlag("Fm");  // set map file name
+  parser->AddPrefixFlag("Fo");  // set output object file or directory
+  parser->AddPrefixFlag("fp");  // specify floating proint behavior
+  parser->AddPrefixFlag("Fp");  // set pch file name
+  parser->AddPrefixFlag("FR");  // .sbr file
+  parser->AddPrefixFlag("Fr");  // .sbr file without info on local var
+  parser->AddBoolFlag("FS");  // force synchronous PDB writes
+  parser->AddFlag("FU");  // #using
+  parser->AddBoolFlag("Fx");  // merges injected code
+
+  parser->AddBoolFlag("GA");  // optimize for win app
+  parser->AddBoolFlag("Gd");  // calling convention
+  parser->AddBoolFlag("Ge");  // enable stack probes
+  parser->AddBoolFlag("GF");  // enable string pool
+  parser->AddBoolFlag("GF-");  // disable string pooling
+  parser->AddBoolFlag("GH");  // call hook function _pexit
+  parser->AddBoolFlag("Gh");  // call hook function _penter
+  parser->AddBoolFlag("GL");  // enables whole program optimization
+  parser->AddBoolFlag("GL-");
+  parser->AddBoolFlag("Gm");  // enables minimal rebuild
+  parser->AddBoolFlag("Gm-");
+  parser->AddBoolFlag("GR");  // enable emission of RTTI data
+  parser->AddBoolFlag("GR-");  // disable emission of RTTI data
+  parser->AddBoolFlag("Gr");  // calling convention
+  parser->AddBoolFlag("GS");  // buffer security check
+  parser->AddBoolFlag("GS-");
+  parser->AddPrefixFlag("Gs");  // controls stack probes
+  parser->AddBoolFlag("GT");  // fibre safety thread-local storage
+  parser->AddBoolFlag("guard:cf");  // enable control flow guard
+  parser->AddBoolFlag("guard:cf-");  // disable control flow guard
+  parser->AddBoolFlag("Gv");  // calling convention
+  parser->AddBoolFlag("Gw");  // put each data item in its own section
+  parser->AddBoolFlag("Gw-");  // don't put each data item in its own section
+  parser->AddBoolFlag("GX");  // enable exception handling
+  parser->AddBoolFlag("Gy");   // put each function in its own section
+  parser->AddBoolFlag("Gy-");  // don't put each function in its own section
+  parser->AddBoolFlag("GZ");  // same as /RTC
+  parser->AddBoolFlag("Gz");  // calling convention
+
+  parser->AddPrefixFlag("H");  // restricts the length of external names
+  parser->AddBoolFlag("HELP");  // alias of help
+  parser->AddBoolFlag("help");  // display available options
+  parser->AddBoolFlag("homeparams");  // copy register parameters to stack
+  parser->AddBoolFlag("hotpatch");  // create hotpatchable image
+
+  parser->AddFlag("I");  // add directory to include search path
+
+  parser->AddBoolFlag("J");  // make char type unsinged
+
+  parser->AddBoolFlag("kernel");  // create kernel mode binary
+  parser->AddBoolFlag("kernel-");
+
+  parser->AddBoolFlag("LD");  // create DLL
+  parser->AddBoolFlag("LDd");  // create debug DLL
+  parser->AddFlag("link");  // forward options to the linker
+  parser->AddBoolFlag("LN");
+
+  parser->AddPrefixFlag("MD");  // use DLL run time
+  // MD, MDd
+  parser->AddPrefixFlag("MP");  // build with multiple process
+  parser->AddPrefixFlag("MT");  // use static run time
+  // MT, MTd
+
+  parser->AddBoolFlag("nologo");
+
+  parser->AddPrefixFlag("O");  // optimization level
+  // O1, O2
+  // Ob[012], Od, Oi, Oi-, Os, Ot, Ox, Oy, Oy-
+  parser->AddBoolFlag("openmp");
+
+  parser->AddBoolFlag("P");  // preprocess to file
+  // set standard-conformance mode (feature set subject to change)
+  parser->AddBoolFlag("permissive-");
+
+  parser->AddPrefixFlag("Q");
+  // Qfast_transcendentals, QIfirst, Qimprecise_fwaits, Qpar
+  // Qsafe_fp_loads, Qrev-report:n
+
+  parser->AddPrefixFlag("RTC");  // run time error check
+
+  parser->AddBoolFlag("sdl");  // additional security check
+  parser->AddBoolFlag("sdl-");
+  parser->AddBoolFlag("showIncludes");  // print info about included files
+  parser->AddPrefixFlag("std:");  // C++ standard version
+
+  parser->AddFlag("Tc");  // specify a C source file
+  parser->AddBoolFlag("TC");  // treat all source files as C
+  parser->AddFlag("Tp");  // specify a C++ source file
+  parser->AddBoolFlag("TP");  // treat all source files as C++
+
+  parser->AddFlag("U");  // undefine macro
+  parser->AddBoolFlag("u");  // remove all predefined macros
+
+  parser->AddPrefixFlag("V");  // Sets the version string
+  parser->AddPrefixFlag("vd");  // control vtordisp placement
+  // for member pointers.
+  parser->AddBoolFlag("vmb");  // use a best-case representation method
+  parser->AddBoolFlag("vmg");  // use a most-general representation
+  // set the default most-general representation
+  parser->AddBoolFlag("vmm");  // to multiple inheritance
+  parser->AddBoolFlag("vms");  // to single inheritance
+  parser->AddBoolFlag("vmv");  // to virtual inheritance
+  parser->AddBoolFlag("volatile");
+
+  parser->AddPrefixFlag("W");  // warning
+  // W0, W1, W2, W3, W4, Wall, WX, WX-, WL, Wp64
+  parser->AddPrefixFlag("w");  // disable warning
+  // wd4005, ...
+
+  parser->AddBoolFlag("X");  // ignore standard include paths
+
+  parser->AddBoolFlag("Y-");  // ignore precompiled header
+  parser->AddPrefixFlag("Yc");  // create precompiled header
+  parser->AddBoolFlag("Yd");  // place debug information
+  parser->AddPrefixFlag("Yl");  // inject PCH reference for debug library
+  parser->AddPrefixFlag("Yu");  // use precompiled header
+
+  parser->AddBoolFlag("Z7");  // debug information format
+  parser->AddBoolFlag("Za");  // disable language extensions
+  parser->AddPrefixFlag("Zc");  // conformance
+  // line number only debug information; b/30077868
+  parser->AddBoolFlag("Zd");
+  parser->AddBoolFlag("Ze");  // enable microsoft extensions
+  parser->AddBoolFlag("ZH:SHA_256");  // use SHA256 for file checksum
+  parser->AddBoolFlag("Zg");  // generate function prototype
+  parser->AddBoolFlag("ZI");  // produce pdb
+  parser->AddBoolFlag("Zi");  // enable debug information
+  parser->AddBoolFlag("Zl");  // omit default library name
+  parser->AddPrefixFlag("Zm");  // specify precompiled header memory limit
+  parser->AddBoolFlag("Zo");  // enhance optimized debugging
+  parser->AddBoolFlag("Zo-");
+  parser->AddPrefixFlag("Zp");  // default maximum struct packing alignment
+  // Zp1, Zp2, Zp4, Zp8, Zp16
+  parser->AddFlag("Zs");  // syntax check only
+  parser->AddPrefixFlag("ZW");  // windows runtime compilation
+
+  // New flags from VS2015 Update 2
+  parser->AddPrefixFlag("source-charset:");  // set source character set.
+  parser->AddPrefixFlag("execution-charset:");  // set execution character set.
+  parser->AddBoolFlag("utf-8");  // set both character set to utf-8.
+  parser->AddBoolFlag("validate-charset");  //  validate utf-8 files.
+  parser->AddBoolFlag("validate-charset-");
+
+  // /d2XXX is undocument flag for debugging.
+  // See b/27777598, b/68147091
+  parser->AddPrefixFlag("d2");
+
+  // also see clang-cl
+  // http://llvm.org/klaus/clang/blob/master/include/clang/Driver/CLCompatOptions.td
+  parser->AddFlag("o");  // set output file or directory
+  parser->AddBoolFlag("fallback");
+  parser->AddBoolFlag("G1");
+  parser->AddBoolFlag("G2");
+  parser->AddFlag("imsvc");  // both -imsvc, /imsvc.
+
+  // clang-cl flags. only accepts if it starts with '-'.
+  opts->flag_prefix = '-';
+  opts->alt_flag_prefix = '\0';
+  parser->AddFlag("m");
+  parser->AddFlag("fmsc-version");
+  parser->AddFlag("fsanitize");
+  parser->AddBoolFlag("fcolor-diagnostics");  // Use color for diagnostics
+  parser->AddBoolFlag("fno-standalone-debug");  // turn on the vtable-based optimization
+  parser->AddBoolFlag("fstandalone-debug");  // turn off the vtable-based optimization
+  parser->AddBoolFlag("gcolumn-info");  // debug information (-g)
+  parser->AddBoolFlag("gline-tables-only");  // debug information (-g)
+  parser->AddFlag("Xclang");
+  parser->AddFlag("isystem");
+  parser->AddPrefixFlag("-analyze");  // enable code analysis (--analyze)
+
+  opts->flag_prefix = '-';
+  opts->alt_flag_prefix = '/';
+}
+
+// static
+bool VCFlags::ExpandArgs(const string& cwd, const std::vector<string>& args,
+                         std::vector<string>* expanded_args,
+                         std::vector<string>* optional_input_filenames) {
+  // Expand arguments which start with '@'.
+  for (const auto& arg : args) {
+    if (strings::StartsWith(arg, "@")) {
+      const string& source_list_filename =
+          PathResolver::PlatformConvert(arg.substr(1));
+      string source_list;
+      if (!ReadFileToString(
+               file::JoinPathRespectAbsolute(cwd, source_list_filename),
+               &source_list)) {
+        LOG(ERROR) << "failed to read: " << source_list_filename;
+        return false;
+      }
+      if (optional_input_filenames) {
+        optional_input_filenames->push_back(source_list_filename);
+      }
+
+      if (source_list[0] == '\xff' && source_list[1] == '\xfe') {
+        // UTF-16LE.
+        // do we need to handle FEFF(UTF-16BE) case or others?
+        // TODO: handle real wide character.
+        // use WideCharToMultiByte on Windows, and iconv on posix?
+        VLOG(1) << "Convert WC to MB in @" << source_list_filename;
+        string source_list_mb;
+        // We don't need BOM (the first 2 bytes: 0xFF 0xFE)
+        source_list_mb.resize(source_list.size() / 2 - 1);
+        for (size_t i = 2; i < source_list.size(); i += 2) {
+          source_list_mb[i / 2 - 1] = source_list[i];
+          if (source_list[i + 1] != 0) {
+            LOG(ERROR) << "failed to convert:" << source_list_filename;
+            return false;
+          }
+        }
+        source_list.swap(source_list_mb);
+        VLOG(1) << "source_list:" << source_list;
+      }
+      if (!ParseWinCommandLineToArgv(source_list, expanded_args)) {
+        LOG(WARNING) << "failed to parse command line: " << source_list;
+        return false;
+      }
+      VLOG(1) << "expanded_args:" << *expanded_args;
+    } else {
+      expanded_args->push_back(arg);
+    }
+  }
+  return true;
+}
+
+// static
+string VCFlags::GetFileNameExtension(const string& orig_filepath) {
+  string filepath = PathResolver::PlatformConvert(orig_filepath,
+                                                  PathResolver::kWin32PathSep,
+                                                  PathResolver::kPreserveCase);
+  string extension =
+      string(::devtools_goma::GetFileNameExtension(filepath));
+  return extension;
+}
+
+// static
+string VCFlags::ComposeOutputFilePath(const string& input_file_name,
+                                      const string& output_file_or_dir,
+                                      const string& output_file_ext) {
+  string input_file = NormalizeWin32Path(input_file_name);
+  string output_target = NormalizeWin32Path(output_file_or_dir);
+
+  bool output_is_dir = false;
+  if (output_target.length() &&
+      output_target[output_target.length() - 1] == '\\') {
+    output_is_dir = true;
+  }
+  if (output_target.length() && !output_is_dir) {
+    return output_target;
+  }
+
+  // We need only the filename part of input file
+  size_t begin = input_file.find_last_of("/\\");
+  size_t end = input_file.rfind('.');
+  begin = (begin == string::npos) ? 0 : begin + 1;
+  end = (end == string::npos) ? input_file_name.size() : end;
+  string new_output;
+  if (end > begin) {
+    new_output = input_file.substr(begin, end - begin);
+    new_output.append(output_file_ext);
+    if (output_target.length() && output_is_dir) {
+      new_output = output_target + new_output;
+    }
+  } else {
+    new_output = output_target;
+  }
+  return new_output;
+}
+
+// ----------------------------------------------------------------------
+// ClangTidyFlags
+
+ClangTidyFlags::ClangTidyFlags(const std::vector<string>& args,
+                               const string& cwd)
+    : CompilerFlags(args, cwd), seen_hyphen_hyphen_(false) {
+  if (!CompilerFlags::ExpandPosixArgs(cwd, args,
+                                      &expanded_args_,
+                                      &optional_input_filenames_)) {
+    Fail("Unable to expand args", args);
+    return;
+  }
+
+  FlagParser parser;
+  DefineFlags(&parser);
+
+  FlagParser::Flag* flag_export_fixes = parser.AddFlag("export-fixes");
+  parser.AddFlag("extra-arg")->SetValueOutputWithCallback(
+      nullptr, &extra_arg_);
+  parser.AddFlag("extra-arg-before")->SetValueOutputWithCallback(
+      nullptr, &extra_arg_before_);
+  FlagParser::Flag* flag_p = parser.AddFlag("p");
+
+  parser.Parse(expanded_args_);
+  unknown_flags_ = parser.unknown_flag_args();
+
+  if (flag_p->seen()) {
+    build_path_ = flag_p->GetLastValue();
+  }
+
+  // The file specified in -export-fix will have suggested fix.
+  // This can be considered as output.
+  if (flag_export_fixes->seen()) {
+    output_files_.push_back(flag_export_fixes->GetLastValue());
+  }
+
+  // We use absolute path for source_files.
+  // clang-tidy has 2 kinds of current working directory.
+  // One is for clang-tidy itself, the other is for include processor,
+  // which is specified in the compilation database.
+  // Converting them is hard, so we'd like to use absolute path.
+  std::vector<string> source_files;
+  for (size_t i = 1; i < args.size(); ++i) {
+    if (seen_hyphen_hyphen_) {
+      args_after_hyphen_hyphen_.push_back(args[i]);
+      continue;
+    }
+
+    if (args[i] == "--") {
+      seen_hyphen_hyphen_ = true;
+      continue;
+    }
+
+    if (!args[i].empty() && args[i][0] == '-') {
+      // Skip this option since this is clang-tidy option.
+      continue;
+    }
+    source_files.push_back(file::JoinPath(cwd, args[i]));
+  }
+
+  input_filenames_ = std::move(source_files);
+  is_successful_ = true;
+}
+
+void ClangTidyFlags::SetClangArgs(const std::vector<string>& clang_args,
+                                  const string& dir) {
+  gcc_flags_.reset(new GCCFlags(clang_args, dir));
+  is_successful_ = is_successful_ && gcc_flags_->is_successful();
+  lang_ = gcc_flags_->lang();
+}
+
+void ClangTidyFlags::SetCompilationDatabasePath(const string& compdb_path) {
+  optional_input_filenames_.push_back(compdb_path);
+}
+
+string ClangTidyFlags::compiler_name() const {
+  return "clang-tidy";
+}
+
+// static
+void ClangTidyFlags::DefineFlags(FlagParser* parser) {
+  FlagParser::Options* opts = parser->mutable_options();
+  opts->flag_prefix = '-';
+  opts->allows_equal_arg = true;
+  opts->allows_nonspace_arg = true;
+
+  parser->AddBoolFlag("analyze-temporary-dtors");
+  parser->AddFlag("checks");
+  parser->AddFlag("config");
+  parser->AddBoolFlag("dump_config");
+  parser->AddBoolFlag("enable-check-profile");
+  parser->AddBoolFlag("explain-config");
+  parser->AddBoolFlag("fix");
+  parser->AddBoolFlag("fix-errors");
+  parser->AddFlag("header-filter");
+  parser->AddFlag("line-filter");
+  parser->AddFlag("p");
+  parser->AddBoolFlag("list-checks");
+  parser->AddBoolFlag("system-headers");
+  parser->AddBoolFlag("warning-as-errors");
+}
+
+// static
+string ClangTidyFlags::GetCompilerName(absl::string_view /*arg*/) {
+  return "clang-tidy";
+}
+
+JavaFlags::JavaFlags(const std::vector<string>& args, const string& cwd)
+    : CompilerFlags(args, cwd) {
+  is_successful_ = true;
+  lang_ = "java bytecode";
+
+  FlagParser parser;
+  DefineFlags(&parser);
+  std::vector<string> class_paths;
+  std::vector<string> system_properties;
+  std::vector<string> remained_flags;
+  parser.AddFlag("cp")->SetValueOutputWithCallback(nullptr, &class_paths);
+  parser.AddFlag("classpath")->SetValueOutputWithCallback(
+      nullptr, &class_paths);
+  parser.AddFlag("D")->SetValueOutputWithCallback(
+      nullptr, &system_properties);
+  parser.AddFlag("jar")->SetValueOutputWithCallback(
+      nullptr, &input_filenames_);
+  parser.AddNonFlag()->SetOutput(&remained_flags);
+  parser.Parse(args_);
+  unknown_flags_ = parser.unknown_flag_args();
+
+  ParseJavaClassPaths(class_paths, &jar_files_);
+}
+
+/* static */
+void JavaFlags::DefineFlags(FlagParser* parser) {
+  FlagParser::Options* opts = parser->mutable_options();
+  opts->flag_prefix = '-';
+
+  parser->AddFlag("D");
+  parser->AddFlag("cp");
+  parser->AddFlag("classpath");
+  parser->AddFlag("jar");
+}
+
+// ----------------------------------------------------------------------
+
+string GetCxxCompilerVersionFromCommandOutputs(const string& /* command */,
+                                               const string& dumpversion,
+                                               const string& version) {
+  string result(GetFirstLine(dumpversion));
+  // Both GCC and clang contain their full version info in the first
+  // line of their --version output.
+  // E.g., clang version 2.9 (trunk 127176), gcc (Ubuntu 4.4.3-4ubuntu5) 4.4.3
+  result += "[" + NormalizeGccVersion(GetFirstLine(version)) + "]";
+  return result;
+}
+
+string GetFirstLine(const string& buf) {
+  size_t pos = buf.find_first_of("\r\n");
+  if (pos == string::npos) {
+    return buf;
+  }
+  return buf.substr(0, pos);
+}
+
+string NormalizeGccVersion(const string& version) {
+  // gcc version string format:
+  // <program name> <package version string> <version string>
+  // Note: <package version string> is "(<something>)" by default.
+  // Then, we can expect the string until '(' is <program name>.
+  size_t pos = version.find('(');
+  if (pos == string::npos)
+    return version;
+
+  const string program_name = version.substr(0, pos);
+  // No need to normalize clang.
+  if (program_name.find("clang") != string::npos)
+    return version;
+  // Only need to normalize cc/c++/gcc/g++/<arch>-<os>-gcc/<arch>-<os>-g++.
+  // TODO: should we handle <arch>-<os>-cc or so?
+  if (program_name.find("g++") == string::npos &&
+      program_name.find("gcc") == string::npos &&
+      program_name != "c++ " &&
+      program_name != "cc ") {
+    return version;
+  }
+
+  return version.substr(pos);
+}
+
+}  // namespace devtools_goma
diff --git a/lib/compiler_flags.h b/lib/compiler_flags.h
new file mode 100644
index 0000000..188fa1b
--- /dev/null
+++ b/lib/compiler_flags.h
@@ -0,0 +1,432 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_COMPILER_FLAGS_H_
+#define DEVTOOLS_GOMA_LIB_COMPILER_FLAGS_H_
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+
+#include "flag_parser.h"
+#include "string_piece.h"
+using std::string;
+
+namespace devtools_goma {
+
+class CompilerFlags {
+ public:
+  // Returns new instance of subclass of CompilerFlags based on |args|.
+  // Returns NULL if args is empty or args[0] is unsupported command.
+  static std::unique_ptr<CompilerFlags> New(const std::vector<string>& args,
+                                            const string& cwd);
+
+  // MustNew is like New but causes FATAL crash if New returns NULL.
+  static std::unique_ptr<CompilerFlags> MustNew(const std::vector<string>& args,
+                                                const string& cwd);
+
+  virtual ~CompilerFlags() {}
+
+  const std::vector<string>& args() const { return args_; }
+  const std::vector<string>& expanded_args() const { return expanded_args_; }
+
+  const std::vector<string>& output_files() const { return output_files_; }
+  const std::vector<string>& output_dirs() const { return output_dirs_; }
+
+  const std::vector<string>& input_filenames() const {
+    return input_filenames_;
+  }
+  const std::vector<string>& optional_input_filenames() const {
+    return optional_input_filenames_;
+  }
+
+  string compiler_base_name() const;
+  string implicit_macros() const { return implicit_macros_; }
+
+  bool is_successful() const { return is_successful_; }
+  const string& fail_message() const { return fail_message_; }
+
+  virtual string compiler_name() const = 0;
+
+  virtual string lang() const { return lang_; }
+  virtual bool is_gcc() const { return false; }
+  virtual bool is_javac() const { return false; }
+  virtual bool is_vc() const { return false; }
+  virtual bool is_clang_tidy() const { return false; }
+  virtual bool is_java() const { return false; }
+
+  // Returns true if the |env| is important for compiler_proxy running env.
+  // This will be sent from gomacc to compiler_proxy.
+  virtual bool IsClientImportantEnv(const char* env) const = 0;
+  // Returns true if the |env| is important for goma backend.
+  // This will be sent from compiler_proxy to goma backend.
+  // All of server important envs must be client important, too.
+  virtual bool IsServerImportantEnv(const char* env) const = 0;
+
+  // Finds client important environment variables, which change the behavior
+  // of this compiler into out_envs.
+  void GetClientImportantEnvs(const char** envp,
+                              std::vector<string>* out_envs) const;
+  void GetServerImportantEnvs(const char** envp,
+                              std::vector<string>* out_envs) const;
+
+
+  const string& cwd() const { return cwd_; }
+  // In clang-tidy case, the directory in which IncludeProcessor will run
+  // is not necessarily the same as cwd().
+  virtual const string& cwd_for_include_processor() const { return cwd_; }
+
+  // The flags which changes the result of gcc -v (e.g., system include paths).
+  const std::vector<string>& compiler_info_flags() const {
+    return compiler_info_flags_;
+  }
+  // The flags which looks like a flag, but that we don't know.
+  const std::vector<string>& unknown_flags() const {
+    return unknown_flags_;
+  }
+
+  string DebugString() const;
+
+  // True if arg is gcc command name. Note that clang is considered as
+  // gcc variant, so IsGCCCommand("clang") returns true.  However, since
+  // clang-cl is not compatible with gcc, IsGCCCommand("clang-cl") returns
+  // false.
+  static bool IsGCCCommand(absl::string_view arg);
+  static bool IsClangCommand(absl::string_view arg);
+  static bool IsClangClCommand(absl::string_view arg);
+  static bool IsVCCommand(absl::string_view arg);
+  static bool IsNaClGCCCommand(absl::string_view arg);
+  static bool IsPNaClClangCommand(absl::string_view arg);
+  static bool IsJavacCommand(absl::string_view arg);
+  static bool IsClangTidyCommand(absl::string_view arg);
+  static bool IsJavaCommand(absl::string_view arg);
+  static string GetCompilerName(absl::string_view arg);
+
+  // Expands @response_file in |args| and sets in |expand_args| and
+  // |optional_input_filenames| on posix environments (for gcc/javac).
+  // TODO: refactor to support windows platform.
+  // Returns true if successful.  Note that it also returns true if |args|
+  // doesn't contains @response_file.
+  // Returns false if some error.
+  static bool ExpandPosixArgs(const string& cwd,
+                              const std::vector<string>& args,
+                              std::vector<string>* expand_args,
+                              std::vector<string>* optional_input_filenames);
+
+ protected:
+  CompilerFlags(const std::vector<string>& args, const string& cwd);
+  void Fail(const string& msg, const std::vector<string>& args);
+
+  std::vector<string> args_;
+  std::vector<string> expanded_args_;
+  // Storing target filename for output related flags.
+  std::vector<string> output_files_;
+  // Storing directory names specified as output directory.
+  // e.g. javac's -d option and -s option.
+  std::vector<string> output_dirs_;
+  string compiler_name_;
+  std::vector<string> input_filenames_;
+  std::vector<string> optional_input_filenames_;
+  string cwd_;
+  std::vector<string> compiler_info_flags_;
+  string lang_;
+  std::vector<string> unknown_flags_;
+
+  bool is_successful_;
+  string fail_message_;
+  string implicit_macros_;
+};
+
+class GCCFlags : public CompilerFlags {
+ public:
+  enum Mode {
+    PREPROCESS, COMPILE, LINK
+  };
+
+  GCCFlags(const std::vector<string>& args, const string& cwd);
+
+  const std::vector<string> include_dirs() const;
+  const std::vector<string>& non_system_include_dirs() const {
+    return non_system_include_dirs_;
+  }
+  const std::vector<string>& root_includes() const { return root_includes_; }
+  const std::vector<string>& framework_dirs() const { return framework_dirs_; }
+
+  const std::vector<std::pair<string, bool>>& commandline_macros() const {
+    return commandline_macros_;
+  }
+
+  string compiler_name() const override;
+
+  Mode mode() const { return mode_; }
+
+  string isysroot() const { return isysroot_; }
+  const string& resource_dir() const { return resource_dir_; }
+  const std::set<string>& fsanitize() const { return fsanitize_; }
+  const std::map<string, string>& fdebug_prefix_map() const {
+    return fdebug_prefix_map_;
+  }
+
+  bool is_cplusplus() const { return is_cplusplus_; }
+  bool has_nostdinc() const { return has_nostdinc_; }
+  bool has_no_integrated_as() const { return has_no_integrated_as_; }
+  bool has_pipe() const { return has_pipe_; }
+  bool has_ffreestanding() const { return has_ffreestanding_; }
+  bool has_fno_hosted() const { return has_fno_hosted_; }
+  bool has_fno_sanitize_blacklist() const {
+    return has_fno_sanitize_blacklist_;
+  }
+  bool has_fsyntax_only() const { return has_fsyntax_only_; }
+  bool has_resource_dir() const { return !resource_dir_.empty(); }
+  bool has_wrapper() const { return has_wrapper_; }
+  bool is_precompiling_header() const { return is_precompiling_header_; }
+  bool is_stdin_input() const { return is_stdin_input_; }
+
+  bool is_gcc() const override { return true; }
+
+  bool IsClientImportantEnv(const char* env) const override;
+  bool IsServerImportantEnv(const char* env) const override;
+
+  static void DefineFlags(FlagParser* parser);
+
+  static string GetCompilerName(absl::string_view arg);
+
+  // If we know -Wfoo, returns true for "foo".
+  static bool IsKnownWarningOption(absl::string_view option);
+  static bool IsKnownDebugOption(absl::string_view v);
+
+ private:
+  friend class GCCFlagsTest;
+  static string GetLanguage(const string& compiler_name,
+                            const string& input_filename);
+  // Get file extension of the given |filepath|.
+  static string GetFileNameExtension(const string& filepath);
+
+  std::vector<string> remote_flags_;
+  std::vector<string> non_system_include_dirs_;
+  std::vector<string> root_includes_;
+  std::vector<string> framework_dirs_;
+  // The second value is true if the macro is defined and false if undefined.
+  std::vector<std::pair<string, bool>> commandline_macros_;
+  Mode mode_;
+  string isysroot_;
+  string resource_dir_;
+  // -fsanitize can be specified multiple times, and can be comma separated
+  // values.
+  std::set<string> fsanitize_;
+  std::map<string, string> fdebug_prefix_map_;
+  bool is_cplusplus_;
+  bool has_nostdinc_;
+  bool has_no_integrated_as_;
+  bool has_pipe_;
+  bool has_ffreestanding_;
+  bool has_fno_hosted_;
+  bool has_fno_sanitize_blacklist_;
+  bool has_fsyntax_only_;
+  bool has_wrapper_;
+  bool is_precompiling_header_;
+  bool is_stdin_input_;
+};
+
+class JavacFlags : public CompilerFlags {
+ public:
+  JavacFlags(const std::vector<string>& args, const string& cwd);
+
+  string compiler_name() const override {
+    return "javac";
+  }
+
+  bool is_javac() const override { return true; }
+
+  bool IsClientImportantEnv(const char* env) const override { return false; }
+  bool IsServerImportantEnv(const char* env) const override { return false; }
+
+  static void DefineFlags(FlagParser* parser);
+  static string GetCompilerName(absl::string_view arg);
+
+  const std::vector<string>& jar_files() const { return jar_files_; }
+
+  const std::vector<string>& processors() const { return processors_; }
+
+ private:
+  friend class JavacFlagsTest;
+
+  std::vector<string> jar_files_;
+  std::vector<string> processors_;
+};
+
+class VCFlags : public CompilerFlags {
+ public:
+  VCFlags(const std::vector<string>& args, const string& cwd);
+
+  const std::vector<string>& include_dirs() const { return include_dirs_; }
+  const std::vector<string>& root_includes() const { return root_includes_; }
+  const std::vector<std::pair<string, bool>>& commandline_macros() const {
+    return commandline_macros_;
+  }
+
+  bool is_cplusplus() const { return is_cplusplus_; }
+  bool ignore_stdinc() const { return ignore_stdinc_; }
+  bool require_mspdbserv() const { return require_mspdbserv_; }
+
+  string compiler_name() const override;
+
+  bool is_vc() const override { return true; }
+
+  bool IsClientImportantEnv(const char* env) const override;
+  bool IsServerImportantEnv(const char* env) const override;
+
+  static void DefineFlags(FlagParser* parser);
+  static bool ExpandArgs(const string& cwd, const std::vector<string>& args,
+                         std::vector<string>* expanded_args,
+                         std::vector<string>* optional_input_filenames);
+
+  const string& creating_pch() const { return creating_pch_; }
+  const string& using_pch() const { return using_pch_; }
+  const string& using_pch_filename() const { return using_pch_filename_; }
+
+  static string GetCompilerName(absl::string_view arg);
+
+ private:
+  friend class VCFlagsTest;
+  // Get file extension of the given |filepath|.
+  static string GetFileNameExtension(const string& filepath);
+  // Compose output file path
+  static string ComposeOutputFilePath(const string& input_file_name,
+                                      const string& output_file_or_dir,
+                                      const string& output_file_ext);
+
+  std::vector<string> include_dirs_;
+  std::vector<string> root_includes_;
+  // The second value is true if the macro is defined and false if undefined.
+  std::vector<std::pair<string, bool>> commandline_macros_;
+  bool is_cplusplus_;
+  bool ignore_stdinc_;
+  string creating_pch_;
+  string using_pch_;
+  // The filename of .pch, if specified.
+  string using_pch_filename_;
+  bool require_mspdbserv_;
+};
+
+// ClangTidy will be used like this.
+// $ clang-tidy -checks='*' foo.cc -- -I. -std=c++11
+// This command line contains options for clang-tidy and options for clang.
+// clang options are parsed in the internal |gcc_flags_|.
+// When '--' is not given in the command line, compilation database
+// (compile_commands.json) is read. Otherwise, compilation database won't
+// be used.
+class ClangTidyFlags : public CompilerFlags {
+ public:
+  ClangTidyFlags(const std::vector<string>& args, const string& cwd);
+
+  string compiler_name() const override;
+  bool is_clang_tidy() const override { return true; }
+
+  const string& cwd_for_include_processor() const override {
+    return gcc_flags_->cwd();
+  }
+
+  // Sets the corresponding clang args for IncludeProcessor.
+  // These are set in CompilerTask::InitCompilerFlags.
+  void SetClangArgs(const std::vector<string>& clang_args, const string& dir);
+  void SetCompilationDatabasePath(const string& compdb_path);
+  void set_is_successful(bool flag) { is_successful_ = flag; }
+
+  // NOTE: These methods are valid only after SetClangArgs() is called.
+  // Calling these before SetClangArgs() will cause undefined behavior.
+  const std::vector<string>& non_system_include_dirs() const {
+    return gcc_flags_->non_system_include_dirs();
+  }
+  const std::vector<string>& root_includes() const {
+    return gcc_flags_->root_includes();
+  }
+  const std::vector<string>& framework_dirs() const {
+    return gcc_flags_->framework_dirs();
+  }
+  const std::vector<std::pair<string, bool>>& commandline_macros() const {
+    return gcc_flags_->commandline_macros();
+  }
+  bool is_cplusplus() const { return gcc_flags_->is_cplusplus(); }
+  bool has_nostdinc() const { return gcc_flags_->has_nostdinc(); }
+
+  const string& build_path() const { return build_path_; }
+  const std::vector<string>& extra_arg() const { return extra_arg_; }
+  const std::vector<string>& extra_arg_before() const {
+    return extra_arg_before_;
+  }
+
+  bool seen_hyphen_hyphen() const { return seen_hyphen_hyphen_; }
+  const std::vector<string>& args_after_hyphen_hyphen() const {
+    return args_after_hyphen_hyphen_;
+  }
+
+  bool IsClientImportantEnv(const char* env) const override { return false; }
+  bool IsServerImportantEnv(const char* env) const override { return false; }
+
+  static void DefineFlags(FlagParser* parser);
+  static string GetCompilerName(absl::string_view arg);
+
+ private:
+  string build_path_;  // the value of option "-p".
+  std::vector<string> extra_arg_;
+  std::vector<string> extra_arg_before_;
+
+  bool seen_hyphen_hyphen_;
+  std::vector<string> args_after_hyphen_hyphen_;
+
+  // Converted clang flag. This should be made in the constructor.
+  std::unique_ptr<GCCFlags> gcc_flags_;
+};
+
+class JavaFlags : public CompilerFlags {
+ public:
+  JavaFlags(const std::vector<string>& args, const string& cwd);
+
+  string compiler_name() const override {
+    return "java";
+  }
+
+  bool is_java() const override { return true; }
+
+  bool IsClientImportantEnv(const char* env) const override { return false; }
+  bool IsServerImportantEnv(const char* env) const override { return false; }
+
+  static void DefineFlags(FlagParser* parser);
+  static string GetCompilerName(absl::string_view arg) {
+    return "java";
+  }
+  const std::vector<string>& jar_files() const { return jar_files_; }
+
+ private:
+  std::vector<string> jar_files_;
+};
+
+// Get the version of gcc/clang to fill CommandSpec.
+// dumpversion is the result of gcc/clang -dumpversion
+// version is the result of gcc/clang --version
+string GetCxxCompilerVersionFromCommandOutputs(const string& command,
+                                               const string& dumpversion,
+                                               const string& version);
+
+// Truncate string at \r\n.
+string GetFirstLine(const string& buf);
+
+// Remove a program name from |version| if it comes from gcc/g++.
+string NormalizeGccVersion(const string& version);
+
+// Parses list of given class paths, and appends .jar and .zip to |jar_files|.
+// Note: |jar_files| will not be cleared inside, and the output will be
+// appended.
+void ParseJavaClassPaths(const std::vector<string>& class_paths,
+                         std::vector<string>* jar_files);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_COMPILER_FLAGS_H_
diff --git a/lib/compiler_flags_test.cc b/lib/compiler_flags_test.cc
new file mode 100644
index 0000000..2411667
--- /dev/null
+++ b/lib/compiler_flags_test.cc
@@ -0,0 +1,4558 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "compiler_flags.h"
+
+#include <limits.h>
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
+
+#include "file.h"
+#include "file_dir.h"
+#include "file_helper.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "gtest/gtest.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "string_piece_utils.h"
+#ifdef _WIN32
+# include "config_win.h"
+// we'll ignore the warnings:
+// warning C4996: 'strdup': The POSIX name for this item is deprecated.
+# pragma warning(disable:4996)
+#endif  // _WIN32
+using File::CreateDir;
+using google::GetExistingTempDirectories;
+using std::string;
+using strings::StrCat;
+
+namespace devtools_goma {
+
+
+static void ExpectHasElement(const std::vector<string>& v,
+                             const string& elem) {
+  EXPECT_TRUE(std::find(v.begin(), v.end(), elem) != v.end()) << elem;
+}
+
+static void GetOutputFileForHello(const std::vector<string>& opts,
+                                  string* output,
+                                  GCCFlags::Mode mode) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  std::copy(opts.begin(), opts.end(), back_inserter(args));
+  args.push_back("hello.c");
+
+  GCCFlags flags(args, "/");
+  if (flags.output_files().size() >= 1) {
+    CHECK_EQ(static_cast<int>(flags.output_files().size()), 1);
+    *output = flags.output_files().front();
+  } else {
+    *output = "";
+  }
+  EXPECT_EQ(mode, flags.mode()) << args;
+}
+
+class GCCFlagsTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    std::vector<string> tmp_dirs;
+    GetExistingTempDirectories(&tmp_dirs);
+    CHECK_GT(tmp_dirs.size(), 0);
+
+#ifndef _WIN32
+    string pid = std::to_string(getpid());
+#else
+    string pid = std::to_string(GetCurrentProcessId());
+#endif
+    tmp_dir_ = file::JoinPath(
+        tmp_dirs[0], StrCat("compiler_flags_unittest_", pid));
+
+    CreateDir(tmp_dir_, 0777);
+  }
+  void TearDown() override {
+    RecursivelyDelete(tmp_dir_);
+  }
+  string GetFileNameExtension(const string& filename) {
+    return GCCFlags::GetFileNameExtension(filename);
+  }
+  string GetLanguage(const string& compiler_name,
+                     const string& input_filename) {
+    return GCCFlags::GetLanguage(compiler_name, input_filename);
+  }
+
+  string tmp_dir_;
+};
+
+TEST_F(GCCFlagsTest, GetFileNameExtension) {
+  EXPECT_EQ("", GetFileNameExtension(""));
+  EXPECT_EQ("cpp", GetFileNameExtension("foo.cpp"));
+  EXPECT_EQ("c", GetFileNameExtension("foo.cpp.c"));
+  EXPECT_EQ("c", GetFileNameExtension("a/b/x.x/foo.c"));
+  EXPECT_EQ("", GetFileNameExtension(".cpp"));
+}
+
+TEST_F(GCCFlagsTest, GetLanguage) {
+  EXPECT_EQ("c", GetLanguage("gcc", "foo"));
+  EXPECT_EQ("c", GetLanguage("gcc", "foo.c"));
+  EXPECT_EQ("c++", GetLanguage("gcc", "foo.cc"));
+  EXPECT_EQ("c++", GetLanguage("gcc", "foo.cpp"));
+  EXPECT_EQ("c++", GetLanguage("g++", "foo"));
+  EXPECT_EQ("c++", GetLanguage("g++", "foo.c"));
+  EXPECT_EQ("c++", GetLanguage("g++", "foo.cc"));
+  EXPECT_EQ("c++", GetLanguage("g++", "foo.cpp"));
+  EXPECT_EQ("objective-c", GetLanguage("gcc", "foo.m"));
+  EXPECT_EQ("objective-c", GetLanguage("g++", "foo.m"));
+  EXPECT_EQ("objective-c++", GetLanguage("gcc", "foo.mm"));
+  EXPECT_EQ("objective-c++", GetLanguage("g++", "foo.mm"));
+  EXPECT_EQ("c-header", GetLanguage("gcc", "foo.h"));
+  EXPECT_EQ("c++-header", GetLanguage("gcc", "foo.hpp"));
+  EXPECT_EQ("c++-header", GetLanguage("g++", "foo.h"));
+
+  // clang rule.
+  EXPECT_EQ("c", GetLanguage("clang", "foo"));
+  EXPECT_EQ("c", GetLanguage("clang", "foo.c"));
+  EXPECT_EQ("c++", GetLanguage("clang", "foo.cc"));
+  EXPECT_EQ("c++", GetLanguage("clang", "foo.cpp"));
+  EXPECT_EQ("c++", GetLanguage("clang++", "foo"));
+  EXPECT_EQ("c++", GetLanguage("clang++", "foo.c"));
+  EXPECT_EQ("c++", GetLanguage("clang++", "foo.cc"));
+  EXPECT_EQ("c++", GetLanguage("clang++", "foo.cpp"));
+  EXPECT_EQ("objective-c", GetLanguage("clang", "foo.m"));
+  EXPECT_EQ("objective-c", GetLanguage("clang++", "foo.m"));
+  EXPECT_EQ("objective-c++", GetLanguage("clang", "foo.mm"));
+  EXPECT_EQ("objective-c++", GetLanguage("clang++", "foo.mm"));
+  EXPECT_EQ("c-header", GetLanguage("clang", "foo.h"));
+  EXPECT_EQ("c++-header", GetLanguage("clang", "foo.hpp"));
+  EXPECT_EQ("c++-header", GetLanguage("clang++", "foo.h"));
+}
+
+TEST_F(GCCFlagsTest, Basic) {
+  std::vector<string> args;
+  args.push_back("/usr/bin/x86_64-pc-linux-gnu-gcc-4.3");
+  args.push_back("-c");
+  args.push_back("-m32");
+  args.push_back("-mtune=generic");
+  args.push_back("foobar.c");
+  args.push_back("-oout/foobar.o");
+  args.push_back("-MF");
+  args.push_back("deps/foobar.d");
+  args.push_back("-Wp,-MD,deps/foobar2.d");
+  args.push_back("-L");
+  args.push_back("/usr/local/lib");
+  args.push_back("-I");
+  args.push_back("/usr/local/include");
+  args.push_back("-D");
+  args.push_back("FOO");
+  args.push_back("-Uhoge");
+  args.push_back("-isystem");
+  args.push_back("/usr");
+  args.push_back("-include");
+  args.push_back("/usr/include/stdio.h");
+  args.push_back("-imacros");
+  args.push_back("/usr/include/stdlib.h");
+  args.push_back("--include");
+  args.push_back("/usr/include/string.h");
+  args.push_back("--imacros");
+  args.push_back("/usr/include/stdint.h");
+  args.push_back("-MT");
+  args.push_back("hoge");
+  args.push_back("-isysroot");
+  args.push_back("/tmp");
+  args.push_back("-x");
+  args.push_back("c++");
+  args.push_back("-arch");
+  args.push_back("ppc");
+  args.push_back("-g");
+  args.push_back("-nostdinc");
+  args.push_back("-nostdinc++");
+  args.push_back("-nostdlibinc");
+  args.push_back("--param");
+  args.push_back("key=value");
+  args.push_back("-b");
+  args.push_back("i386");
+  args.push_back("-V");
+  args.push_back("4.0");
+  args.push_back("-specs");
+  args.push_back("foo.spec");
+  args.push_back("-std");
+  args.push_back("c99");
+  args.push_back("-target");
+  args.push_back("arm-linux-androideabi");
+
+  GCCFlags flags(args, "/");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_FALSE(flags.is_stdin_input());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+  EXPECT_EQ("x86_64-pc-linux-gnu-gcc-4.3", flags.compiler_base_name());
+  EXPECT_EQ("gcc", flags.compiler_name());
+
+  const std::vector<string> expected_compiler_info_flags {
+      "-m32",
+      // TODO: This doesn't change include directory actually.
+      "-mtune=generic",
+      "-isystem", "/usr",
+      "-arch", "ppc",
+      "-nostdinc++",
+      "-nostdlibinc",
+      "-b", "i386",
+      "-V", "4.0",
+      "-specs", "foo.spec",
+      "-std", "c99",
+      "-target", "arm-linux-androideabi",
+      "-x", "c++",
+      "-nostdinc",
+      "-isysroot", "/tmp",
+  };
+  EXPECT_EQ(expected_compiler_info_flags, flags.compiler_info_flags());
+
+  ASSERT_EQ(1U, flags.input_filenames().size());
+  EXPECT_EQ("foobar.c", flags.input_filenames()[0]);
+
+  ASSERT_EQ(1U, flags.include_dirs().size());
+  EXPECT_EQ("/usr/local/include", flags.include_dirs()[0]);
+
+  EXPECT_EQ(1U, flags.non_system_include_dirs().size());
+  EXPECT_EQ("/usr/local/include", flags.include_dirs()[0]);
+
+  EXPECT_EQ(4U, flags.root_includes().size());
+  EXPECT_EQ("/usr/include/stdlib.h", flags.root_includes()[0]);
+  EXPECT_EQ("/usr/include/stdint.h", flags.root_includes()[1]);
+  EXPECT_EQ("/usr/include/stdio.h", flags.root_includes()[2]);
+  EXPECT_EQ("/usr/include/string.h", flags.root_includes()[3]);
+
+  EXPECT_EQ(0U, flags.framework_dirs().size());
+  EXPECT_EQ(2U, flags.commandline_macros().size());
+  EXPECT_EQ("FOO", flags.commandline_macros()[0].first);
+  EXPECT_TRUE(flags.commandline_macros()[0].second);
+  EXPECT_EQ("hoge", flags.commandline_macros()[1].first);
+  EXPECT_FALSE(flags.commandline_macros()[1].second);
+
+  // output file order is not important.
+  const std::set<string> expected_output_files {
+    "out/foobar.o", "deps/foobar.d", "deps/foobar2.d"
+  };
+  EXPECT_EQ(expected_output_files,
+            std::set<string>(flags.output_files().begin(),
+                             flags.output_files().end()));
+
+  EXPECT_TRUE(flags.is_cplusplus());
+  EXPECT_TRUE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+  EXPECT_FALSE(flags.has_pipe());
+  EXPECT_EQ("/tmp", flags.isysroot());
+  EXPECT_TRUE(flags.is_gcc());
+  EXPECT_FALSE(flags.is_javac());
+  EXPECT_FALSE(flags.is_vc());
+  EXPECT_FALSE(flags.is_clang_tidy());
+  EXPECT_FALSE(flags.is_java());
+}
+
+TEST_F(GCCFlagsTest, Optimize) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-O");
+  args.push_back("-o");
+  args.push_back("hello.o");
+  args.push_back("-c");
+  args.push_back("hello.c");
+
+  GCCFlags flags(args, "/");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_FALSE(flags.is_stdin_input());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+  EXPECT_EQ("gcc", flags.compiler_base_name());
+  EXPECT_EQ("gcc", flags.compiler_name());
+
+  ASSERT_EQ(1, static_cast<int>(flags.compiler_info_flags().size()));
+  EXPECT_EQ("-O", flags.compiler_info_flags()[0]);
+
+  ASSERT_EQ(1, static_cast<int>(flags.input_filenames().size()));
+  EXPECT_EQ("hello.c", flags.input_filenames()[0]);
+
+  const std::vector<string>& output_files = flags.output_files();
+  ASSERT_EQ(1, static_cast<int>(output_files.size()));
+  EXPECT_EQ("hello.o", output_files[0]);
+
+  EXPECT_FALSE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+  EXPECT_FALSE(flags.has_pipe());
+
+  EXPECT_TRUE(flags.is_gcc());
+  EXPECT_FALSE(flags.is_javac());
+  EXPECT_FALSE(flags.is_clang_tidy());
+  EXPECT_FALSE(flags.is_java());
+}
+
+TEST_F(GCCFlagsTest, GxxBaseName) {
+  std::vector<string> args;
+  args.push_back("/usr/bin/x86_64-pc-linux-gnu-g++-4.3");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ("x86_64-pc-linux-gnu-g++-4.3", flags.compiler_base_name());
+  EXPECT_EQ("g++", flags.compiler_name());
+  EXPECT_TRUE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+}
+
+TEST_F(GCCFlagsTest, Fission) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-gsplit-dwarf");
+  args.push_back("-o");
+  args.push_back("hello.o");
+  args.push_back("-c");
+  args.push_back("hello.c");
+
+  GCCFlags flags(args, "/");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_FALSE(flags.is_stdin_input());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+  EXPECT_EQ("gcc", flags.compiler_base_name());
+  EXPECT_EQ("gcc", flags.compiler_name());
+
+  const std::vector<string>& output_files = flags.output_files();
+  ASSERT_EQ(2U, output_files.size());
+  EXPECT_EQ("hello.o", output_files[0]);
+  EXPECT_EQ("hello.dwo", output_files[1]);
+
+  EXPECT_FALSE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+  EXPECT_FALSE(flags.has_pipe());
+
+  EXPECT_TRUE(flags.is_gcc());
+  EXPECT_FALSE(flags.is_javac());
+  EXPECT_FALSE(flags.is_clang_tidy());
+  EXPECT_FALSE(flags.is_java());
+}
+
+TEST_F(GCCFlagsTest, FissionNoO) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-gsplit-dwarf");
+  args.push_back("-c");
+  args.push_back("hello.c");
+
+  GCCFlags flags(args, "/");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_FALSE(flags.is_stdin_input());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+  EXPECT_EQ("gcc", flags.compiler_base_name());
+  EXPECT_EQ("gcc", flags.compiler_name());
+
+  const std::vector<string>& output_files = flags.output_files();
+  ASSERT_EQ(2U, output_files.size());
+  EXPECT_EQ("hello.o", output_files[0]);
+  EXPECT_EQ("hello.dwo", output_files[1]);
+
+  EXPECT_FALSE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+  EXPECT_FALSE(flags.has_pipe());
+
+  EXPECT_TRUE(flags.is_gcc());
+  EXPECT_FALSE(flags.is_javac());
+  EXPECT_FALSE(flags.is_clang_tidy());
+  EXPECT_FALSE(flags.is_java());
+}
+
+TEST_F(GCCFlagsTest, FissionDifferentOutput) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-gsplit-dwarf");
+  args.push_back("-o");
+  args.push_back("world.o");
+  args.push_back("-c");
+  args.push_back("hello.c");
+
+  GCCFlags flags(args, "/");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_FALSE(flags.is_stdin_input());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+  EXPECT_EQ("gcc", flags.compiler_base_name());
+  EXPECT_EQ("gcc", flags.compiler_name());
+
+  const std::vector<string>& output_files = flags.output_files();
+  ASSERT_EQ(2U, output_files.size());
+  EXPECT_EQ("world.o", output_files[0]);
+  EXPECT_EQ("world.dwo", output_files[1]);
+
+  EXPECT_FALSE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+  EXPECT_FALSE(flags.has_pipe());
+
+  EXPECT_TRUE(flags.is_gcc());
+  EXPECT_FALSE(flags.is_javac());
+  EXPECT_FALSE(flags.is_clang_tidy());
+  EXPECT_FALSE(flags.is_java());
+}
+
+TEST_F(GCCFlagsTest, FissionCompileAndLink) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-gsplit-dwarf");
+  args.push_back("-o");
+  args.push_back("world");
+  args.push_back("hello.c");
+
+  GCCFlags flags(args, "/");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_FALSE(flags.is_stdin_input());
+  EXPECT_EQ(GCCFlags::LINK, flags.mode());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+  EXPECT_EQ("gcc", flags.compiler_base_name());
+  EXPECT_EQ("gcc", flags.compiler_name());
+
+  const std::vector<string>& output_files = flags.output_files();
+  ASSERT_EQ(2U, output_files.size());
+  EXPECT_EQ("world", output_files[0]);
+  EXPECT_EQ("hello.dwo", output_files[1]);
+
+  EXPECT_FALSE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+  EXPECT_FALSE(flags.has_pipe());
+
+  EXPECT_TRUE(flags.is_gcc());
+  EXPECT_FALSE(flags.is_javac());
+  EXPECT_FALSE(flags.is_clang_tidy());
+  EXPECT_FALSE(flags.is_java());
+}
+
+TEST_F(GCCFlagsTest, FissionJustLink) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-gsplit-dwarf");
+  args.push_back("-o");
+  args.push_back("world");
+  args.push_back("hello.o");
+
+  GCCFlags flags(args, "/");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_FALSE(flags.is_stdin_input());
+  EXPECT_EQ(GCCFlags::LINK, flags.mode());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+  EXPECT_EQ("gcc", flags.compiler_base_name());
+  EXPECT_EQ("gcc", flags.compiler_name());
+
+  const std::vector<string>& output_files = flags.output_files();
+  ASSERT_EQ(1U, output_files.size());
+  EXPECT_EQ("world", output_files[0]);
+
+  EXPECT_FALSE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+  EXPECT_FALSE(flags.has_pipe());
+
+  EXPECT_TRUE(flags.is_gcc());
+  EXPECT_FALSE(flags.is_javac());
+  EXPECT_FALSE(flags.is_clang_tidy());
+  EXPECT_FALSE(flags.is_java());
+}
+
+TEST_F(GCCFlagsTest, ClangBaseName) {
+  std::vector<string> args;
+  args.push_back("/usr/src/chromium/src/"
+                 "third_party/llvm-build/Release+Assets/bin/clang");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ("clang", flags.compiler_base_name());
+  EXPECT_EQ("clang", flags.compiler_name());
+  EXPECT_FALSE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+}
+
+TEST_F(GCCFlagsTest, ClangxxBaseName) {
+  std::vector<string> args;
+  args.push_back("/usr/src/chromium/src/"
+                 "third_party/llvm-build/Release+Assets/bin/clang++");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ("clang++", flags.compiler_base_name());
+  EXPECT_EQ("clang++", flags.compiler_name());
+  EXPECT_TRUE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+}
+
+TEST_F(GCCFlagsTest, PnaclClangBaseName) {
+  std::vector<string> args;
+  args.push_back("toolchain/linux_x86_pnacl/newlib/bin/pnacl-clang");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ("pnacl-clang", flags.compiler_base_name());
+  EXPECT_EQ("clang", flags.compiler_name());
+  EXPECT_FALSE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+}
+
+TEST_F(GCCFlagsTest, PnaclClangxxBaseName) {
+  std::vector<string> args;
+  args.push_back("toolchain/linux_x86_pnacl/newlib/bin/pnacl-clang++");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ("pnacl-clang++", flags.compiler_base_name());
+  EXPECT_EQ("clang++", flags.compiler_name());
+  EXPECT_TRUE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+  EXPECT_FALSE(flags.has_no_integrated_as());
+}
+
+TEST_F(GCCFlagsTest, GccPipe) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-o");
+  args.push_back("hello.o");
+  args.push_back("-pipe");
+  args.push_back("-c");
+  args.push_back("hello.c");
+  GCCFlags flags(args, "/");
+  EXPECT_TRUE(flags.has_pipe());
+}
+
+TEST_F(GCCFlagsTest, GccFfreestanding) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-o");
+  args.push_back("hello.o");
+  args.push_back("-ffreestanding");
+  args.push_back("-c");
+  args.push_back("hello.c");
+  GCCFlags flags(args, "/");
+  EXPECT_TRUE(flags.has_ffreestanding());
+  EXPECT_FALSE(flags.has_fno_hosted());
+  EXPECT_FALSE(flags.has_fsyntax_only());
+  std::vector<string> want_compiler_info_flags;
+  want_compiler_info_flags.push_back("-ffreestanding");
+  EXPECT_EQ(want_compiler_info_flags, flags.compiler_info_flags());
+}
+
+TEST_F(GCCFlagsTest, GccFnohosted) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-o");
+  args.push_back("hello.o");
+  args.push_back("-fno-hosted");
+  args.push_back("-c");
+  args.push_back("hello.c");
+  GCCFlags flags(args, "/");
+  EXPECT_FALSE(flags.has_ffreestanding());
+  EXPECT_TRUE(flags.has_fno_hosted());
+  EXPECT_FALSE(flags.has_fsyntax_only());
+  std::vector<string> want_compiler_info_flags;
+  want_compiler_info_flags.push_back("-fno-hosted");
+  EXPECT_EQ(want_compiler_info_flags, flags.compiler_info_flags());
+}
+
+TEST_F(GCCFlagsTest, GccWrapper) {
+  // See https://gcc.gnu.org/wiki/DebuggingGCC
+  // $ gcc <parameters> -wrapper gdb,--args
+  // $ gcc <parameters> -wrapper valgrind
+  std::vector<string> origs {
+    "gcc", "-o", "hello.o", "-c", "hello.c",
+  };
+
+  {
+    GCCFlags flags(origs, "/");
+    EXPECT_FALSE(flags.has_wrapper());
+  }
+  {
+    std::vector<string> args(origs);
+    args.insert(args.end(), { "-wrapper", "valgrind" });
+    GCCFlags flags(args, "/");
+    EXPECT_TRUE(flags.has_wrapper());
+  }
+}
+
+TEST_F(GCCFlagsTest, GccUndef) {
+  std::vector<string> origs {
+    "gcc", "-undef", "-c", "hello.c",
+  };
+
+  GCCFlags flags(origs, "/");
+
+  std::vector<string> want_compiler_info_flags {
+    "-undef",
+  };
+  EXPECT_EQ(want_compiler_info_flags, flags.compiler_info_flags());
+}
+
+TEST_F(GCCFlagsTest, ClangFSyntaxOnly) {
+  std::vector<string> args;
+  args.push_back("clang");
+  args.push_back("-o");
+  args.push_back("hello.o");
+  args.push_back("-fsyntax-only");
+  args.push_back("-c");
+  args.push_back("hello.c");
+  GCCFlags flags(args, "/");
+  EXPECT_TRUE(flags.has_fsyntax_only());
+  EXPECT_FALSE(flags.has_fno_hosted());
+  EXPECT_FALSE(flags.has_ffreestanding());
+  std::vector<string> want_compiler_info_flags;
+  want_compiler_info_flags.push_back("-fsyntax-only");
+  EXPECT_EQ(want_compiler_info_flags, flags.compiler_info_flags());
+}
+
+TEST_F(GCCFlagsTest, ClangFprofileInstrGenerate) {
+  std::vector<string> args {
+    "clang", "-o", "hello.o", "-fprofile-instr-generate", "-c", "hello.c"};
+  GCCFlags flags(args, "/");
+
+  std::vector<string> want_compiler_info_flags {"-fprofile-instr-generate"};
+  EXPECT_EQ(want_compiler_info_flags, flags.compiler_info_flags());
+}
+
+TEST_F(GCCFlagsTest, ClangXoption) {
+  std::vector<string> args;
+  args.push_back("clang");
+  args.push_back("-o");
+  args.push_back("hello.o");
+  args.push_back("-Xclang");
+  args.push_back("-load");
+  args.push_back("-Xclang");
+  args.push_back("/usr/src/chromium/src/tools/clang/scripts/../../../"
+                 "third_party/llvm-build/Release+Asserts/lib/"
+                 "libFindBadConstructs.so");
+  args.push_back("-Xclang");
+  args.push_back("-add-plugin");
+  args.push_back("-Xclang");
+  args.push_back("find-bad-constructs");
+  args.push_back("-c");
+  args.push_back("hello.c");
+  GCCFlags flags(args, "/");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_FALSE(flags.is_stdin_input());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+  EXPECT_EQ("clang", flags.compiler_base_name());
+  EXPECT_EQ("clang", flags.compiler_name());
+  ASSERT_EQ(1U, flags.input_filenames().size());
+  EXPECT_EQ("hello.c", flags.input_filenames()[0]);
+  const std::vector<string>& output_files = flags.output_files();
+  ASSERT_EQ(1U, output_files.size());
+  EXPECT_EQ("hello.o", output_files[0]);
+}
+
+TEST_F(GCCFlagsTest, ClangNoIntegratedAs) {
+  // -no-integrated-as
+  std::vector<string> args;
+  args.push_back("clang");
+  args.push_back("-no-integrated-as");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ("clang", flags.compiler_base_name());
+  EXPECT_EQ("clang", flags.compiler_name());
+  EXPECT_TRUE(flags.has_no_integrated_as());
+  EXPECT_FALSE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+
+  const std::vector<string>& compiler_info_flags = flags.compiler_info_flags();
+  ASSERT_EQ(1UL, compiler_info_flags.size());
+  EXPECT_EQ("-no-integrated-as", compiler_info_flags[0]);
+}
+
+TEST_F(GCCFlagsTest, ClangFnoIntegratedAs) {
+  // -fno-integrated-as
+  std::vector<string> args;
+  args.push_back("clang");
+  args.push_back("-fno-integrated-as");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ("clang", flags.compiler_base_name());
+  EXPECT_EQ("clang", flags.compiler_name());
+  EXPECT_TRUE(flags.has_no_integrated_as());
+  EXPECT_FALSE(flags.is_cplusplus());
+  EXPECT_FALSE(flags.has_nostdinc());
+
+  const std::vector<string>& compiler_info_flags = flags.compiler_info_flags();
+  ASSERT_EQ(1UL, compiler_info_flags.size());
+  EXPECT_EQ("-fno-integrated-as", compiler_info_flags[0]);
+}
+
+TEST_F(GCCFlagsTest, PnaclClangPnaclBias) {
+  std::vector<string> args;
+  const string& pnacl_command = "/tmp/pnacl-clang++";
+  ASSERT_TRUE(CompilerFlags::IsPNaClClangCommand(pnacl_command));
+  args.push_back(pnacl_command);
+  args.push_back("--pnacl-bias=x86-32-nonsfi");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ("clang++", flags.compiler_name());
+
+  std::vector<string> expected_compiler_info_flags;
+  expected_compiler_info_flags.push_back("--pnacl-bias=x86-32-nonsfi");
+  EXPECT_EQ(expected_compiler_info_flags, flags.compiler_info_flags());
+
+  // --pnacl-arm-bias
+  args[1] = "--pnacl-arm-bias";
+  GCCFlags flags_arm(args, "/");
+  expected_compiler_info_flags[0] = "--pnacl-arm-bias";
+  EXPECT_EQ(expected_compiler_info_flags, flags_arm.compiler_info_flags());
+
+  // --pnacl-mips-bias
+  args[1] = "--pnacl-mips-bias";
+  GCCFlags flags_mips(args, "/");
+  expected_compiler_info_flags[0] = "--pnacl-mips-bias";
+  EXPECT_EQ(expected_compiler_info_flags, flags_mips.compiler_info_flags());
+
+  // --pnacl-i686-bias
+  args[1] = "--pnacl-i686-bias";
+  GCCFlags flags_i686(args, "/");
+  expected_compiler_info_flags[0] = "--pnacl-i686-bias";
+  EXPECT_EQ(expected_compiler_info_flags, flags_i686.compiler_info_flags());
+
+  // --pnacl-x86_64-bias
+  args[1] = "--pnacl-x86_64-bias";
+  GCCFlags flags_x86_64(args, "/");
+  expected_compiler_info_flags[0] = "--pnacl-x86_64-bias";
+  EXPECT_EQ(expected_compiler_info_flags, flags_x86_64.compiler_info_flags());
+}
+
+TEST_F(GCCFlagsTest, PnaclClangPnaclBiasShouldNotBeDetectedByClang) {
+  std::vector<string> args;
+  args.push_back("/tmp/clang++");
+  args.push_back("--pnacl-bias=x86-32-nonsfi");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ("clang++", flags.compiler_base_name());
+  EXPECT_EQ("clang++", flags.compiler_name());
+
+  std::vector<string> expected_compiler_info_flags;
+  EXPECT_EQ(expected_compiler_info_flags, flags.compiler_info_flags());
+}
+
+TEST_F(GCCFlagsTest, Mode) {
+  std::vector<string> opts;
+  string output;
+
+  opts.push_back("-c");
+  GetOutputFileForHello(opts, &output, GCCFlags::COMPILE);
+  EXPECT_EQ("hello.o", output);
+
+  opts[0] = "-S";
+  GetOutputFileForHello(opts, &output, GCCFlags::COMPILE);
+  EXPECT_EQ("hello.s", output);
+
+  opts[0] = "-E";
+  GetOutputFileForHello(opts, &output, GCCFlags::PREPROCESS);
+  EXPECT_EQ("", output);
+
+  opts[0] = "-M";
+  GetOutputFileForHello(opts, &output, GCCFlags::PREPROCESS);
+  EXPECT_EQ("", output);
+
+  // opts[0] = "-M";
+  opts.push_back("-c");
+  GetOutputFileForHello(opts, &output, GCCFlags::PREPROCESS);
+  EXPECT_EQ("", output);
+
+  opts[0] = "-E";
+  opts[1] = "-c";
+  GetOutputFileForHello(opts, &output, GCCFlags::PREPROCESS);
+  EXPECT_EQ("", output);
+
+  opts[0] = "-c";
+  opts[1] = "-M";
+  GetOutputFileForHello(opts, &output, GCCFlags::PREPROCESS);
+  EXPECT_EQ("", output);
+
+  opts[0] = "-c";
+  opts[1] = "-E";
+  GetOutputFileForHello(opts, &output, GCCFlags::PREPROCESS);
+  EXPECT_EQ("", output);
+
+  opts[0] = "-S";
+  opts[1] = "-M";
+  GetOutputFileForHello(opts, &output, GCCFlags::PREPROCESS);
+  EXPECT_EQ("", output);
+
+  opts[0] = "-M";
+  opts[1] = "-S";
+  GetOutputFileForHello(opts, &output, GCCFlags::PREPROCESS);
+  EXPECT_EQ("", output);
+
+  opts[0] = "-c";
+  opts[1] = "-S";
+  GetOutputFileForHello(opts, &output, GCCFlags::COMPILE);
+  EXPECT_EQ("hello.s", output);
+
+  opts[0] = "-S";
+  opts[1] = "-c";
+  GetOutputFileForHello(opts, &output, GCCFlags::COMPILE);
+  EXPECT_EQ("hello.s", output);
+}
+
+TEST_F(GCCFlagsTest, PrintFileName) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-c");
+  args.push_back("-print-file-name");
+  args.push_back("hello.c");
+
+  GCCFlags flags(args, "/");
+  EXPECT_FALSE(flags.is_successful());
+  EXPECT_FALSE(flags.is_stdin_input());
+  EXPECT_FALSE(flags.is_cplusplus());
+}
+
+TEST_F(GCCFlagsTest, Stdin) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-c");
+  args.push_back("-xc++");
+  args.push_back("-");
+  {
+    GCCFlags flags(args, "/");
+    EXPECT_TRUE(flags.is_successful());
+    EXPECT_TRUE(flags.is_stdin_input());
+  }
+
+  args.pop_back();
+  args.push_back("/dev/stdin");
+  {
+    GCCFlags flags(args, "/");
+    EXPECT_TRUE(flags.is_successful());
+    EXPECT_TRUE(flags.is_stdin_input());
+  }
+}
+
+TEST_F(GCCFlagsTest, Profile) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-c");
+  args.push_back("hello.c");
+  args.push_back("-fprofile-dir=foo");
+
+  // fprofile-use isn't set yet.
+  {
+    GCCFlags flags(args, "/");
+    EXPECT_TRUE(flags.is_successful());
+    EXPECT_TRUE(flags.optional_input_filenames().empty());
+  }
+  // Now -fprofile-use is specified.
+  args.push_back("-fprofile-use");
+  {
+    GCCFlags flags(args, "/");
+    EXPECT_TRUE(flags.is_successful());
+    ASSERT_EQ(1, static_cast<int>(flags.optional_input_filenames().size()));
+#ifndef _WIN32
+    EXPECT_EQ("foo/hello.gcda", flags.optional_input_filenames()[0]);
+#else
+    EXPECT_EQ("foo\\hello.gcda", flags.optional_input_filenames()[0]);
+#endif
+  }
+
+  // The output directory should have been changed.
+  args.push_back("-fprofile-generate=bar");
+  {
+    GCCFlags flags(args, "/");
+    EXPECT_TRUE(flags.is_successful());
+    ASSERT_EQ(1, static_cast<int>(flags.optional_input_filenames().size()));
+#ifndef _WIN32
+    EXPECT_EQ("bar/hello.gcda", flags.optional_input_filenames()[0]);
+#else
+    EXPECT_EQ("bar\\hello.gcda", flags.optional_input_filenames()[0]);
+#endif
+  }
+}
+
+TEST_F(GCCFlagsTest, ProfileCwd) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-c");
+  args.push_back("foo/hello.c");
+  args.push_back("-fprofile-use");
+
+  // We'll check .gcda files in the current directory.
+  args.push_back("-fprofile-use");
+  {
+#ifndef _WIN32
+    GCCFlags flags(args, "/tmp");
+#else
+    GCCFlags flags(args, "C:\\tmp");
+#endif
+    EXPECT_TRUE(flags.is_successful());
+    ASSERT_EQ(1, static_cast<int>(flags.optional_input_filenames().size()));
+#ifndef _WIN32
+    EXPECT_EQ("/tmp/hello.gcda", flags.optional_input_filenames()[0]);
+#else
+    EXPECT_EQ("C:\\tmp\\hello.gcda", flags.optional_input_filenames()[0]);
+#endif
+  }
+}
+
+TEST_F(GCCFlagsTest, AtFile) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  const string& at_file = file::JoinPath(tmp_dir_, "at_file");
+  args.push_back("@" + at_file);
+
+  // The at-file doesn't exist.
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_FALSE(flags->is_successful());
+
+  ASSERT_TRUE(WriteStringToFile(
+      "-c -DFOO '-DBAR=\"a b\\c\"' foo.cc", at_file));
+  flags = CompilerFlags::MustNew(args, ".");
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("gcc", flags->compiler_name());
+  EXPECT_EQ(5U, flags->expanded_args().size());
+  EXPECT_EQ("gcc", flags->expanded_args()[0]);
+  EXPECT_EQ("-c", flags->expanded_args()[1]);
+  EXPECT_EQ("-DFOO", flags->expanded_args()[2]);
+  EXPECT_EQ("-DBAR=\"a b\\c\"", flags->expanded_args()[3]);
+  EXPECT_EQ("foo.cc", flags->expanded_args()[4]);
+  ASSERT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("foo.cc", flags->input_filenames()[0]);
+  ASSERT_EQ(1U, flags->optional_input_filenames().size());
+  EXPECT_EQ(PathResolver::PlatformConvert(at_file),
+            flags->optional_input_filenames()[0]);
+
+  ASSERT_TRUE(WriteStringToFile(
+      " -c -DFOO '-DBAR=\"a b\\c\"' \n foo.cc\n", at_file));
+  flags = CompilerFlags::MustNew(args, ".");
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("gcc", flags->compiler_name());
+  EXPECT_EQ(5U, flags->expanded_args().size());
+  EXPECT_EQ("gcc", flags->expanded_args()[0]);
+  EXPECT_EQ("-c", flags->expanded_args()[1]);
+  EXPECT_EQ("-DFOO", flags->expanded_args()[2]);
+  EXPECT_EQ("-DBAR=\"a b\\c\"", flags->expanded_args()[3]);
+  EXPECT_EQ("foo.cc", flags->expanded_args()[4]);
+  ASSERT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("foo.cc", flags->input_filenames()[0]);
+  ASSERT_EQ(1U, flags->optional_input_filenames().size());
+  EXPECT_EQ(PathResolver::PlatformConvert(at_file),
+            flags->optional_input_filenames()[0]);
+}
+
+TEST_F(GCCFlagsTest, Idirafter) {
+  std::vector<string> args;
+  args.push_back("g++");
+  args.push_back("-idirafter");
+  args.push_back("include");
+  args.push_back("-c");
+  args.push_back("foo.cc");
+
+  GCCFlags flags(args, ".");
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+  ASSERT_EQ(2U, flags.compiler_info_flags().size());
+  EXPECT_EQ("-idirafter", flags.compiler_info_flags()[0]);
+  EXPECT_EQ("include", flags.compiler_info_flags()[1]);
+}
+
+TEST_F(GCCFlagsTest, PreprocessFlags) {
+  const std::vector<string> args {
+    "g++", "-c", "foo.cc",
+    "-Wp,-Dfoo=bar,-Ufoo2", "-Ufoo", "-Dfoo2=bar2",
+    "-Ufoo3", "-Wp,-Dfoo3=bar3", "-Wp,-Dfoo4=bar4,-Ufoo4",
+    "-Wp,-MD,deps/foobar.d",
+    "-Wp,-unknown1,-unknown2",
+    "-Wp,-unknown3",
+  };
+
+  GCCFlags flags(args, ".");
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+
+  const std::vector<std::pair<string, bool>> expected_macros {
+    { "foo", false },
+    { "foo2=bar2", true },
+    { "foo3", false },
+    { "foo=bar",  true },
+    { "foo2", false },
+    { "foo3=bar3", true },
+    { "foo4=bar4", true },
+    { "foo4", false },
+  };
+  EXPECT_EQ(expected_macros, flags.commandline_macros());
+
+  const std::vector<string> expected_output_files {
+    "deps/foobar.d",
+  };
+  EXPECT_EQ(expected_output_files, flags.output_files());
+
+  const std::vector<string> expected_unknown_flags {
+    "-Wp,-unknown1",
+    "-Wp,-unknown2",
+    "-Wp,-unknown3",
+  };
+  EXPECT_EQ(expected_unknown_flags, flags.unknown_flags());
+}
+
+TEST_F(GCCFlagsTest, LinkerFlags) {
+  const std::vector<string> args {
+    "g++",
+    "-Wl,--start-group",
+    "-Wl,--end-group",
+    "-Wl,--threads",
+    "foo.c",
+  };
+
+  GCCFlags flags(args, ".");
+  EXPECT_TRUE(flags.is_successful());
+
+  // all -Wl, are treated as unknown for now.
+  const std::vector<string> expected_unknown_flags {
+    "-Wl,--start-group",
+    "-Wl,--end-group",
+    "-Wl,--threads",
+  };
+  EXPECT_EQ(expected_unknown_flags, flags.unknown_flags());
+}
+
+TEST_F(GCCFlagsTest, AssemblerFlags) {
+  const std::vector<string> args {
+    "g++",
+    "-Wa,--noexecstack",
+    "-Wa,--defsym,STEREO_OUTPUT",
+    "-Wa,--defsym",
+    "-Wa,FOO",
+    "-Wa,-Iout/somewhere",
+    "-Wa,-gdwarf-2",
+    "-Wa,-march=foo",
+    "-Wa,-march,foo",
+    "-Wa,-mfpu=neon",
+    "-c",
+    "foo.c",
+    "-Wa,-unknown1,-unknown2",
+    "-Wa,-unknown3",
+  };
+
+  GCCFlags flags(args, ".");
+  EXPECT_TRUE(flags.is_successful());
+
+  const std::vector<string> expected_unknown_flags {
+    "-Wa,-unknown1",
+    "-Wa,-unknown2",
+    "-Wa,-unknown3",
+  };
+  EXPECT_EQ(expected_unknown_flags, flags.unknown_flags());
+}
+
+TEST_F(GCCFlagsTest, MixW) {
+  const std::vector<string> args {
+    "g++", "-c", "foo.c",
+    "-Wall",
+    "-W",
+    "-Wextra",
+    "-Wno-div-by-zero",
+    "-Wunknown",
+    "-Wp,-Dfoo=bar,-Ufoo",
+    "-Wa,--noexecstack",
+    "-Wl,--defsym,STEREO_OUTPUT",
+    "-Wl,--defsym",
+    "-Wl,FOO",
+    "-Wa,-unknown1,-unknown2",
+    "-Wl,-unknown3",
+  };
+
+  GCCFlags flags(args, ".");
+  EXPECT_TRUE(flags.is_successful());
+
+  const std::vector<string> expected_unknown_flags {
+    "-Wa,-unknown1",
+    "-Wa,-unknown2",
+    "-Wl,--defsym,STEREO_OUTPUT",
+    "-Wl,--defsym",
+    "-Wl,FOO",
+    "-Wl,-unknown3",
+    "-Wunknown",
+  };
+  EXPECT_EQ(expected_unknown_flags, flags.unknown_flags());
+}
+
+TEST_F(GCCFlagsTest, MD) {
+  std::vector<string> args;
+  args.push_back("g++");
+  args.push_back("-MD");
+  args.push_back("-c");
+  args.push_back("foo.cc");
+
+  GCCFlags flags(args, ".");
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+
+  std::vector<string> output_files = flags.output_files();
+  ASSERT_EQ(2U, output_files.size());
+  std::sort(output_files.begin(), output_files.end());
+  EXPECT_EQ("foo.d", output_files[0]);
+  EXPECT_EQ("foo.o", output_files[1]);
+}
+
+TEST_F(GCCFlagsTest, MMD) {
+  std::vector<string> args;
+  args.push_back("g++");
+  args.push_back("-MMD");
+  args.push_back("-c");
+  args.push_back("foo.cc");
+
+  GCCFlags flags(args, ".");
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+
+  std::vector<string> output_files = flags.output_files();
+  ASSERT_EQ(2U, output_files.size());
+  std::sort(output_files.begin(), output_files.end());
+  EXPECT_EQ("foo.d", output_files[0]);
+  EXPECT_EQ("foo.o", output_files[1]);
+}
+
+TEST_F(GCCFlagsTest, DebugFlags) {
+  const std::vector<string> args {
+    "g++", "-c", "foo.cc",
+    "-g", "-g0", "-g1", "-g2", "-g3",
+    "-gcolumn-info", "-gdw", "-gdwarf-2", "-gdwarf-3",
+    "-ggdb3", "-ggnu-pubnames", "-gline-tables-only", "-gsplit-dwarf",
+    "-gunknown",
+  };
+  const std::vector<string> expected_unknown_flags {
+    "-gunknown",
+  };
+
+  GCCFlags flags(args, ".");
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+  EXPECT_EQ(expected_unknown_flags, flags.unknown_flags());
+}
+
+TEST_F(GCCFlagsTest, UnknownFlags) {
+  const std::vector<string> args {
+    "g++", "-c", "foo.cc", "-unknown1", "--unknown2",
+  };
+  const std::vector<string> expected {
+    "-unknown1", "--unknown2",
+  };
+
+  GCCFlags flags(args, ".");
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_EQ(expected, flags.unknown_flags());
+}
+
+TEST_F(GCCFlagsTest, KnownWarningOptions) {
+  // -W
+  EXPECT_TRUE(GCCFlags::IsKnownWarningOption(""));
+
+  // -Waddress
+  EXPECT_TRUE(GCCFlags::IsKnownWarningOption("address"));
+
+  // -Wunknown (no such options)
+  EXPECT_FALSE(GCCFlags::IsKnownWarningOption("unknown"));
+
+  // -Walloc-size-larger-than=100
+  EXPECT_TRUE(GCCFlags::IsKnownWarningOption("alloc-size-larger-than=100"));
+  // -Walloc-size-larger-than. This needs "=n"
+  EXPECT_FALSE(GCCFlags::IsKnownWarningOption("alloc-size-larger-than"));
+}
+
+class JavacFlagsTest : public testing::Test {
+ public:
+  void SetUp() override {
+    std::vector<string> tmp_dirs;
+    GetExistingTempDirectories(&tmp_dirs);
+    CHECK_GT(tmp_dirs.size(), 0);
+
+#ifndef _WIN32
+    string pid = std::to_string(getpid());
+#else
+    string pid = std::to_string(GetCurrentProcessId());
+#endif
+    tmp_dir_ = file::JoinPath(
+        tmp_dirs[0], StrCat("compiler_flags_unittest_", pid));
+
+    CreateDir(tmp_dir_, 0777);
+  }
+
+  void TearDown() override {
+    RecursivelyDelete(tmp_dir_);
+  }
+
+ protected:
+  string tmp_dir_;
+};
+
+TEST_F(JavacFlagsTest, Basic) {
+  std::vector<string> args;
+  args.push_back("javac");
+  args.push_back("-J-Xmx512M");
+  args.push_back("-target");
+  args.push_back("1.5");
+  args.push_back("-d");
+  args.push_back("dst");
+  args.push_back("-s");
+  args.push_back("src");
+  args.push_back("-cp");
+  args.push_back("/tmp:a.jar:b.jar");
+  args.push_back("-classpath");
+  args.push_back("c.jar");
+  args.push_back("-bootclasspath");
+  args.push_back("boot1.jar:boot2.jar");
+  args.push_back("Hello.java");
+  args.push_back("World.java");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_TRUE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  JavacFlags* javac_flags = static_cast<JavacFlags*>(flags.get());
+  EXPECT_EQ("javac", flags->compiler_name());
+  ASSERT_EQ(2U, flags->input_filenames().size());
+  EXPECT_EQ("Hello.java", flags->input_filenames()[0]);
+  EXPECT_EQ("World.java", flags->input_filenames()[1]);
+  std::vector<string> expected_jar_files = {
+    "boot1.jar",
+    "boot2.jar",
+    "a.jar",
+    "b.jar",
+    "c.jar",
+  };
+  EXPECT_EQ(expected_jar_files, javac_flags->jar_files());
+  EXPECT_EQ(0U, flags->output_files().size());
+  ASSERT_EQ(2U, flags->output_dirs().size());
+  EXPECT_EQ("dst", flags->output_dirs()[0]);
+  EXPECT_EQ("src", flags->output_dirs()[1]);
+}
+
+TEST_F(JavacFlagsTest, AtFile) {
+  std::vector<string> args;
+  args.push_back("javac");
+  const string& at_file = file::JoinPath(tmp_dir_, "at_file");
+  args.push_back("@" + at_file);
+
+  // The at-file doesn't exist.
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_FALSE(flags->is_successful());
+
+  ASSERT_TRUE(
+      WriteStringToFile("Hello.java World.java\r\n\t-d dst\r\n-s src",
+                        at_file));
+  flags = CompilerFlags::MustNew(args, ".");
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_TRUE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("javac", flags->compiler_name());
+  EXPECT_EQ(7U, flags->expanded_args().size());
+  EXPECT_EQ("javac", flags->expanded_args()[0]);
+  EXPECT_EQ("Hello.java", flags->expanded_args()[1]);
+  EXPECT_EQ("World.java", flags->expanded_args()[2]);
+  EXPECT_EQ("-d", flags->expanded_args()[3]);
+  EXPECT_EQ("dst", flags->expanded_args()[4]);
+  EXPECT_EQ("-s", flags->expanded_args()[5]);
+  EXPECT_EQ("src", flags->expanded_args()[6]);
+  ASSERT_EQ(2U, flags->input_filenames().size());
+  EXPECT_EQ("Hello.java", flags->input_filenames()[0]);
+  EXPECT_EQ("World.java", flags->input_filenames()[1]);
+  ASSERT_EQ(1U, flags->optional_input_filenames().size());
+  EXPECT_EQ(PathResolver::PlatformConvert(at_file),
+            flags->optional_input_filenames()[0]);
+  EXPECT_EQ(0U, flags->output_files().size());
+  ASSERT_EQ(2U, flags->output_dirs().size());
+  EXPECT_EQ("dst", flags->output_dirs()[0]);
+  EXPECT_EQ("src", flags->output_dirs()[1]);
+}
+
+TEST_F(JavacFlagsTest, NoDestination) {
+  std::vector<string> args;
+  args.push_back("javac");
+  args.push_back("Hello.java");
+  args.push_back("World.java");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_TRUE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("javac", flags->compiler_name());
+  ASSERT_EQ(2U, flags->input_filenames().size());
+  EXPECT_EQ("Hello.java", flags->input_filenames()[0]);
+  EXPECT_EQ("World.java", flags->input_filenames()[1]);
+  ASSERT_EQ(2U, flags->output_files().size());
+  EXPECT_EQ("Hello.class", flags->output_files()[0]);
+  EXPECT_EQ("World.class", flags->output_files()[1]);
+}
+
+TEST_F(JavacFlagsTest, Processor) {
+  const std::vector<string> args {
+    "javac", "-processorpath", "classes.jar",
+    "-processor", "dagger.internal.codegen.ComponentProcessor",
+    "All.java"
+  };
+  const std::vector<string> expected_processors {
+    "dagger.internal.codegen.ComponentProcessor",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_TRUE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+
+  JavacFlags* javac_flags = static_cast<JavacFlags*>(flags.get());
+  EXPECT_EQ(expected_processors, javac_flags->processors());
+}
+
+TEST_F(JavacFlagsTest, MultipleProcessorArgs) {
+  const std::vector<string> args {
+    "javac", "-processorpath", "classes.jar",
+    "-processor", "dagger.internal.codegen.ComponentProcessor",
+    "-processor", "com.google.auto.value.processor.AutoValueProcessor",
+    "All.java"
+  };
+  const std::vector<string> expected_processors {
+    "dagger.internal.codegen.ComponentProcessor",
+    "com.google.auto.value.processor.AutoValueProcessor",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_TRUE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+
+  JavacFlags* javac_flags = static_cast<JavacFlags*>(flags.get());
+  EXPECT_EQ(expected_processors, javac_flags->processors());
+}
+
+TEST_F(JavacFlagsTest, MultipleProcessorsInArg) {
+  const std::vector<string> args {
+    "javac", "-processorpath", "classes.jar",
+    "-processor",
+    "dagger.internal.codegen.ComponentProcessor,"
+        "com.google.auto.value.processor.AutoValueProcessor",
+    "All.java"
+  };
+  const std::vector<string> expected_processors {
+    "dagger.internal.codegen.ComponentProcessor",
+    "com.google.auto.value.processor.AutoValueProcessor",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_TRUE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+
+  JavacFlags* javac_flags = static_cast<JavacFlags*>(flags.get());
+  EXPECT_EQ(expected_processors, javac_flags->processors());
+}
+
+TEST_F(JavacFlagsTest, ParseJavaClassPaths) {
+  std::vector<string> input = {
+    "a.jar:b.zip:c.class",
+    "d.jar",
+    "e",
+  };
+  std::vector<string> output;
+  ParseJavaClassPaths(input, &output);
+  std::vector<string> expected = {
+    "a.jar", "b.zip", "d.jar",
+  };
+  EXPECT_EQ(expected, output);
+}
+
+TEST_F(JavacFlagsTest, UnknownFlags) {
+  const std::vector<string> args {
+    "javac", "-unknown1", "--unknown2",
+    "All.java"
+  };
+  const std::vector<string> expected {
+    "-unknown1", "--unknown2",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_EQ(expected, flags->unknown_flags());
+}
+
+class VCFlagsTest : public testing::Test {
+ protected:
+  string GetFileNameExtension(const string& filename) {
+    return VCFlags::GetFileNameExtension(filename);
+  }
+  string ComposeOutputFilePath(const string& input, const string& output,
+                               const string& ext) {
+    return VCFlags::ComposeOutputFilePath(input, output, ext);
+  }
+
+  void SetUp() override {
+    std::vector<string> tmp_dirs;
+    GetExistingTempDirectories(&tmp_dirs);
+    CHECK_GT(tmp_dirs.size(), 0);
+
+#ifndef _WIN32
+    string pid = std::to_string(getpid());
+#else
+    string pid = std::to_string(GetCurrentProcessId());
+#endif
+    tmp_dir_ = file::JoinPath(
+        tmp_dirs[0], StrCat("compiler_flags_unittest_", pid));
+
+    CreateDir(tmp_dir_, 0777);
+  }
+
+  void TearDown() override {
+    RecursivelyDelete(tmp_dir_);
+  }
+
+ protected:
+  string tmp_dir_;
+};
+
+TEST_F(VCFlagsTest, GetFileNameExtension) {
+  EXPECT_EQ("", GetFileNameExtension(""));
+  EXPECT_EQ("cpp", GetFileNameExtension("foo.cpp"));
+  EXPECT_EQ("c", GetFileNameExtension("foo.cpp.c"));
+  EXPECT_EQ("C", GetFileNameExtension("C:\\a\\b\\x.x\\foo.C"));
+  EXPECT_EQ("", GetFileNameExtension(".cpp"));
+}
+
+TEST_F(VCFlagsTest, Basic) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("/X");
+  args.push_back("/c");
+  args.push_back("foobar.c");
+  args.push_back("/I");
+  args.push_back("d:\\usr\\local\\include");
+  args.push_back("/I\"d:\\usr\\include\"");
+  args.push_back("/I\"D:/usr/local\"");
+  args.push_back("/D");
+  args.push_back("FOO");
+  args.push_back("/DNDEBUG");
+  args.push_back("/O1");
+  args.push_back("/GF");
+  args.push_back("/Gm-");
+  args.push_back("/EHsc");
+  args.push_back("/RTC1");
+  args.push_back("/MTd");
+  args.push_back("/GS");
+  args.push_back("/Gy");
+  args.push_back("/fp:precise");
+  args.push_back("/Zc:wchar_t");
+  args.push_back("/Zc:forScope");
+  args.push_back("/GR-");
+  args.push_back("/Fp\"Debug\\foobar.pch\"");
+  args.push_back("/Fa\"Debug\"");
+  args.push_back("/Fo\"foobar.obj\"");
+  args.push_back("/Fd\"D:/foobar/Debug/foobar.pdb\"");
+  args.push_back("/Gd");
+  args.push_back("/FIpreprocess.h");
+  args.push_back("/Yccreate_preprocess.h");
+  args.push_back("/Yuuse_preprocess.h");
+  args.push_back("/TP");
+  args.push_back("/analyze-");
+  args.push_back("/errorReport:queue");
+  args.push_back("/source-charset:utf-8");
+  args.push_back("/execution-charset:utf-8");
+  args.push_back("/utf-8");
+  args.push_back("/validate-charset");
+  args.push_back("/validate-charset-");
+  args.push_back("/permissive-");
+  args.push_back("/std:c++14");
+  args.push_back("/diagnostics:classic,column-");
+
+  VCFlags flags(args, "D:\\foobar");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+
+  EXPECT_EQ("cl.exe", flags.compiler_base_name());
+  EXPECT_EQ("cl.exe", flags.compiler_name());
+
+  ASSERT_EQ(5, static_cast<int>(flags.compiler_info_flags().size()));
+  const std::vector<string> expected_compiler_info_flags {
+    "/O1", "/MTd", "/permissive-", "/std:c++14", "/X",
+  };
+  EXPECT_EQ(expected_compiler_info_flags, flags.compiler_info_flags());
+
+  ASSERT_EQ(1, static_cast<int>(flags.input_filenames().size()));
+  EXPECT_EQ("foobar.c", flags.input_filenames()[0]);
+  EXPECT_EQ(2U, flags.commandline_macros().size());
+  EXPECT_EQ("FOO", flags.commandline_macros()[0].first);
+  EXPECT_TRUE(flags.commandline_macros()[0].second);
+  EXPECT_EQ("NDEBUG", flags.commandline_macros()[1].first);
+  EXPECT_TRUE(flags.commandline_macros()[1].second);
+  EXPECT_TRUE(flags.is_cplusplus());
+  EXPECT_TRUE(flags.ignore_stdinc());
+  EXPECT_FALSE(flags.require_mspdbserv());
+  EXPECT_FALSE(flags.is_gcc());
+  EXPECT_FALSE(flags.is_javac());
+  EXPECT_TRUE(flags.is_vc());
+  EXPECT_FALSE(flags.is_clang_tidy());
+  EXPECT_FALSE(flags.is_java());
+
+  ASSERT_EQ(1U, flags.root_includes().size());
+  EXPECT_EQ("preprocess.h", flags.root_includes()[0]);
+
+  EXPECT_EQ("create_preprocess.h", flags.creating_pch());
+  EXPECT_EQ("use_preprocess.h", flags.using_pch());
+
+  const std::vector<string>& output_files = flags.output_files();
+  ASSERT_EQ(1, static_cast<int>(output_files.size()));
+  EXPECT_EQ("foobar.obj", output_files[0]);
+}
+
+TEST_F(VCFlagsTest, BasicMixedDash) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("/X");
+  args.push_back("/c");
+  args.push_back("foobar.c");
+  args.push_back("-I");
+  args.push_back("d:\\usr\\local\\include");
+  args.push_back("-I\"d:\\usr\\include\"");
+  args.push_back("-I\"D:/usr/local\"");
+  args.push_back("-D");
+  args.push_back("FOO");
+  args.push_back("-DNDEBUG");
+  args.push_back("-O1");
+  args.push_back("/GF");
+  args.push_back("/Gm-");
+  args.push_back("/EHsc");
+  args.push_back("/RTC1");
+  args.push_back("/MTd");
+  args.push_back("/GS");
+  args.push_back("/Gy");
+  args.push_back("/fp:precise");
+  args.push_back("/Zc:wchar_t");
+  args.push_back("/Zc:forScope");
+  args.push_back("/GR-");
+  args.push_back("/Fp\"Debug\\foobar.pch\"");
+  args.push_back("/Fa\"Debug\"");
+  args.push_back("/Fo\"foobar.obj\"");
+  args.push_back("/Fd\"D:/foobar/Debug/foobar.pdb\"");
+  args.push_back("/Gd");
+  args.push_back("/TP");
+  args.push_back("/analyze-");
+  args.push_back("/errorReport:queue");
+
+  VCFlags flags(args, "D:\\foobar");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+
+  EXPECT_EQ("cl.exe", flags.compiler_base_name());
+  EXPECT_EQ("cl.exe", flags.compiler_name());
+
+  ASSERT_EQ(3, static_cast<int>(flags.compiler_info_flags().size()));
+  EXPECT_EQ("-O1", flags.compiler_info_flags()[0]);
+  EXPECT_EQ("/MTd", flags.compiler_info_flags()[1]);
+  EXPECT_EQ("/X", flags.compiler_info_flags()[2]);
+
+  ASSERT_EQ(1, static_cast<int>(flags.input_filenames().size()));
+  EXPECT_EQ("foobar.c", flags.input_filenames()[0]);
+  EXPECT_EQ(2U, flags.commandline_macros().size());
+  EXPECT_EQ("FOO", flags.commandline_macros()[0].first);
+  EXPECT_TRUE(flags.commandline_macros()[0].second);
+  EXPECT_EQ("NDEBUG", flags.commandline_macros()[1].first);
+  EXPECT_TRUE(flags.commandline_macros()[1].second);
+  EXPECT_TRUE(flags.is_cplusplus());
+  EXPECT_TRUE(flags.ignore_stdinc());
+  EXPECT_FALSE(flags.require_mspdbserv());
+  EXPECT_FALSE(flags.is_gcc());
+  EXPECT_FALSE(flags.is_javac());
+  EXPECT_TRUE(flags.is_vc());
+  EXPECT_FALSE(flags.is_clang_tidy());
+  EXPECT_FALSE(flags.is_java());
+
+  const std::vector<string>& output_files = flags.output_files();
+  ASSERT_EQ(1, static_cast<int>(output_files.size()));
+  EXPECT_EQ("foobar.obj", output_files[0]);
+}
+
+TEST_F(VCFlagsTest, AtFile) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  const string& at_file = file::JoinPath(tmp_dir_, "at_file");
+  args.push_back("@" + PathResolver::PlatformConvert(
+      at_file, PathResolver::kWin32PathSep, PathResolver::kPreserveCase));
+
+  // The at_file doesn't exist.
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_FALSE(flags->is_successful());
+
+  ASSERT_TRUE(WriteStringToFile(
+      "/X /c foobar.c /I d:\\usr\\local\\include /I\"d:\\usr\\include\" "
+      "/I\"D:/usr/local\" /D FOO /DNODEBUG /O1 /GF /Gm- /EHsc /RTC1 /MTd "
+      "/GS /Gy /fp:precise /Zc:wchar_t /Zc:forScope /GR- "
+      "/FP\"Debug\\foobar.pch\" /Fa\"Debug\" /Fo\"foobar.obj\" "
+      "/Fd\"D:/foobar/Debug/foobar.pdb\" /Gd /TP /analyze- /errorReport:queue",
+      at_file));
+
+  flags = CompilerFlags::MustNew(args, "D:\\foobar");
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_TRUE(flags->fail_message().empty()) << flags->fail_message();
+
+  EXPECT_EQ("cl.exe", flags->compiler_base_name());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+
+  ASSERT_EQ(3U, flags->compiler_info_flags().size());
+  EXPECT_EQ("/O1", flags->compiler_info_flags()[0]);
+  EXPECT_EQ("/MTd", flags->compiler_info_flags()[1]);
+  EXPECT_EQ("/X", flags->compiler_info_flags()[2]);
+
+  ASSERT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("foobar.c", flags->input_filenames()[0]);
+  ASSERT_EQ(1U, flags->optional_input_filenames().size());
+  EXPECT_EQ(PathResolver::PlatformConvert(at_file),
+            flags->optional_input_filenames()[0]);
+
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+
+  VCFlags* vc_flags = static_cast<VCFlags*>(flags.get());
+  EXPECT_FALSE(vc_flags->require_mspdbserv());
+
+  const std::vector<string>& output_files = flags->output_files();
+  ASSERT_EQ(1U, output_files.size());
+  EXPECT_EQ("foobar.obj", output_files[0]);
+}
+
+TEST_F(VCFlagsTest, AtFileQuote) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  const string& at_file = file::JoinPath(tmp_dir_, "at_file");
+  args.push_back("@" + PathResolver::PlatformConvert(
+      at_file, PathResolver::kWin32PathSep, PathResolver::kPreserveCase));
+
+  // The at_file doesn't exist.
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_FALSE(flags->is_successful());
+
+  ASSERT_TRUE(WriteStringToFile(
+      "/c /Fo\"C:\\goma work\\client\\build\\Release\\obj\\gtest\\\\\" "
+      "/Fd\"C:\\goma work\\client\\build\\Release\\gtest.pdb\" "
+      "/Gd /TP /analyze- /errorReport:prompt "
+      "\"gtest\\src\\gtest-filepath.cc\" "
+      "\"gtest\\src\\gtest-printers.cc\" "
+      "\"gtest\\src\\gtest-port.cc\" "
+      "\"gtest\\src\\gtest-death-test.cc\" "
+      "\"gtest\\src\\gtest-typed-test.cc\" "
+      "gtest\\src\\gtest.cc \"gtest\\src\\gtest-test-part.cc\" /MP",
+      at_file));
+
+  flags = CompilerFlags::MustNew(args, "C:\\goma work");
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_TRUE(flags->fail_message().empty()) << flags->fail_message();
+
+  EXPECT_EQ("cl.exe", flags->compiler_base_name());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+
+  ASSERT_EQ(0U, flags->compiler_info_flags().size());
+
+  ASSERT_EQ(7U, flags->input_filenames().size());
+  EXPECT_EQ("gtest\\src\\gtest-filepath.cc", flags->input_filenames()[0]);
+  EXPECT_EQ("gtest\\src\\gtest-printers.cc", flags->input_filenames()[1]);
+  EXPECT_EQ("gtest\\src\\gtest-port.cc", flags->input_filenames()[2]);
+  EXPECT_EQ("gtest\\src\\gtest-death-test.cc", flags->input_filenames()[3]);
+  EXPECT_EQ("gtest\\src\\gtest-typed-test.cc", flags->input_filenames()[4]);
+  EXPECT_EQ("gtest\\src\\gtest.cc", flags->input_filenames()[5]);
+  EXPECT_EQ("gtest\\src\\gtest-test-part.cc", flags->input_filenames()[6]);
+  ASSERT_EQ(1U, flags->optional_input_filenames().size());
+  EXPECT_EQ(PathResolver::PlatformConvert(at_file),
+            flags->optional_input_filenames()[0]);
+
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+
+  VCFlags* vc_flags = static_cast<VCFlags*>(flags.get());
+  EXPECT_FALSE(vc_flags->require_mspdbserv());
+
+  const std::vector<string>& output_files = flags->output_files();
+  ASSERT_EQ(7U, output_files.size());
+  EXPECT_EQ("C:\\goma work\\client\\build\\Release\\obj\\gtest\\"
+            "gtest-filepath.obj", flags->output_files()[0]);
+  EXPECT_EQ("C:\\goma work\\client\\build\\Release\\obj\\gtest\\"
+            "gtest-printers.obj", flags->output_files()[1]);
+  EXPECT_EQ("C:\\goma work\\client\\build\\Release\\obj\\gtest\\"
+            "gtest-port.obj", flags->output_files()[2]);
+  EXPECT_EQ("C:\\goma work\\client\\build\\Release\\obj\\gtest\\"
+            "gtest-death-test.obj", flags->output_files()[3]);
+  EXPECT_EQ("C:\\goma work\\client\\build\\Release\\obj\\gtest\\"
+            "gtest-typed-test.obj", flags->output_files()[4]);
+  EXPECT_EQ("C:\\goma work\\client\\build\\Release\\obj\\gtest\\"
+            "gtest.obj", flags->output_files()[5]);
+  EXPECT_EQ("C:\\goma work\\client\\build\\Release\\obj\\gtest\\"
+            "gtest-test-part.obj", flags->output_files()[6]);
+}
+
+TEST_F(VCFlagsTest, WCAtFile) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  const string& at_file = file::JoinPath(tmp_dir_, "at_file");
+  args.push_back("@" + PathResolver::PlatformConvert(
+      at_file, PathResolver::kWin32PathSep, PathResolver::kPreserveCase));
+
+  // The at_file doesn't exist.
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_FALSE(flags->is_successful());
+
+  static const char kCmdLine[] =
+      "\xff\xfe/\0X\0 \0/\0c\0 \0f\0o\0o\0b\0a\0r\0.\0c\0";
+  const string kWCCmdLine(kCmdLine, sizeof kCmdLine - 1);
+  ASSERT_TRUE(WriteStringToFile(kWCCmdLine, at_file));
+
+  flags = CompilerFlags::MustNew(args, "D:\\foobar");
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_TRUE(flags->fail_message().empty()) << flags->fail_message();
+
+  EXPECT_EQ("cl.exe", flags->compiler_base_name());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+
+  ASSERT_EQ(1U, flags->compiler_info_flags().size());
+  EXPECT_EQ("/X", flags->compiler_info_flags()[0]);
+
+  ASSERT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("foobar.c", flags->input_filenames()[0]);
+  ASSERT_EQ(1U, flags->optional_input_filenames().size());
+  EXPECT_EQ(PathResolver::PlatformConvert(at_file),
+            flags->optional_input_filenames()[0]);
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+
+  VCFlags* vc_flags = static_cast<VCFlags*>(flags.get());
+  EXPECT_FALSE(vc_flags->require_mspdbserv());
+
+  const std::vector<string>& output_files = flags->output_files();
+  ASSERT_EQ(1U, output_files.size());
+  EXPECT_EQ("foobar.obj", output_files[0]);
+}
+
+TEST_F(VCFlagsTest, Optimize) {
+  std::vector<string> args;
+  args.push_back("cl");
+  args.push_back("/O1");
+  args.push_back("/c");
+  args.push_back("hello.c");
+  args.push_back("hello2.cc");
+
+  VCFlags flags(args, "C:\\");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_TRUE(flags.fail_message().empty()) << flags.fail_message();
+
+  EXPECT_EQ("cl", flags.compiler_base_name());
+  EXPECT_EQ("cl.exe", flags.compiler_name());
+
+  ASSERT_EQ(1, static_cast<int>(flags.compiler_info_flags().size()));
+  EXPECT_EQ("/O1", flags.compiler_info_flags()[0]);
+
+  ASSERT_EQ(2, static_cast<int>(flags.input_filenames().size()));
+  EXPECT_EQ("hello.c", flags.input_filenames()[0]);
+  EXPECT_EQ("hello2.cc", flags.input_filenames()[1]);
+
+  const std::vector<string>& output_files = flags.output_files();
+  ASSERT_EQ(2, static_cast<int>(output_files.size()));
+  EXPECT_EQ("hello.obj", output_files[0]);
+  EXPECT_EQ("hello2.obj", output_files[1]);
+
+  EXPECT_FALSE(flags.ignore_stdinc());
+  EXPECT_FALSE(flags.require_mspdbserv());
+
+  EXPECT_FALSE(flags.is_gcc());
+  EXPECT_FALSE(flags.is_javac());
+  EXPECT_TRUE(flags.is_vc());
+  EXPECT_FALSE(flags.is_clang_tidy());
+  EXPECT_FALSE(flags.is_java());
+}
+
+// For cl.exe, unknown flags are treated as input.
+// So nothing will be treated as unknown.
+TEST_F(VCFlagsTest, UnknownFlags) {
+  const std::vector<string> args {
+    "cl", "/c", "hello.c", "/UNKNOWN", "/UNKNOWN2",
+  };
+  VCFlags flags(args, "C:\\");
+
+  EXPECT_TRUE(flags.is_successful());
+  EXPECT_TRUE(flags.unknown_flags().empty());
+}
+
+TEST_F(VCFlagsTest, ComposeOutputPath) {
+  EXPECT_EQ("hello.exe", ComposeOutputFilePath("hello.c", "", ".exe"));
+  EXPECT_EQ("d:\\src\\hello.obj",
+      ComposeOutputFilePath("hello.c", "d:\\src\\", ".obj"));
+  EXPECT_EQ("d:\\src\\hello.obj",
+      ComposeOutputFilePath("src\\hello.c", "\"d:\\src\\\"", ".obj"));
+  EXPECT_EQ("d:\\src\\\\hello.exe",
+      ComposeOutputFilePath("src\\main\\hello.c", "\"d:\\src\\\\\"", ".exe"));
+  EXPECT_EQ("k:\\output\\vcflags.exe",
+      ComposeOutputFilePath("src\\main.cc", "k:\\output\\vcflags.exe", ".exe"));
+  EXPECT_EQ("k:\\output\\vcflags.exe",
+      ComposeOutputFilePath("src\\main.cc",
+                            "\"k:\\output\\vcflags.exe\"", ".exe"));
+}
+
+class JavaFlagsTest : public testing::Test {
+ public:
+  void SetUp() override {
+    std::vector<string> tmp_dirs;
+    GetExistingTempDirectories(&tmp_dirs);
+    CHECK_GT(tmp_dirs.size(), 0);
+
+#ifndef _WIN32
+    string pid = std::to_string(getpid());
+#else
+    string pid = std::to_string(GetCurrentProcessId());
+#endif
+    tmp_dir_ = file::JoinPath(
+        tmp_dirs[0], StrCat("compiler_flags_unittest_", pid));
+
+    CreateDir(tmp_dir_, 0777);
+  }
+
+  void TearDown() override {
+    RecursivelyDelete(tmp_dir_);
+  }
+
+ protected:
+  string tmp_dir_;
+};
+
+TEST_F(JavaFlagsTest, Basic) {
+  std::vector<string> args = {
+    "prebuilts/jdk/jdk8/linux-x86/bin/java",
+    "-Djdk.internal.lambda.dumpProxyClasses="
+        "JAVA_LIBRARIES/apache-xml_intermediates/desugar_dumped_classes",
+    "-jar",
+    "out/host/linux-x86/framework/desugar.jar",
+    "--classpath_entry",
+    "JAVA_LIBRARIES/core-libart_intermediates/classes-header.jar",
+    "--classpath_entry",
+    "JAVA_LIBRARIES/core-oj_intermediates/classes-header.jar",
+    "--min_sdk_version",
+    "10000",
+    "--allow_empty_bootclasspath",
+    "-i",
+    "JAVA_LIBRARIES/apache-xml_intermediates/classes.jar",
+    "-o",
+    "JAVA_LIBRARIES/apache-xml_intermediates/classes-desugar.jar.tmp",
+    "-cp","/tmp:a.jar:b.jar",
+    "-classpath", "c.jar",
+  };
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "."));
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_TRUE(flags->is_java());
+  EXPECT_EQ("java", flags->compiler_name());
+  ASSERT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("out/host/linux-x86/framework/desugar.jar",
+            flags->input_filenames()[0]);
+  EXPECT_EQ(0U, flags->output_files().size());
+
+  JavaFlags* java_flags = static_cast<JavaFlags*>(flags.get());
+  std::vector<string> expected_jar_files = {
+    "a.jar",
+    "b.jar",
+    "c.jar",
+  };
+  EXPECT_EQ(expected_jar_files, java_flags->jar_files());
+}
+
+class CompilerFlagsTest : public ::testing::Test {
+};
+
+TEST_F(CompilerFlagsTest, CommandClassification) {
+  static const int kGCC = 1 << 0;
+  static const int kClang = 1 << 1;
+  static const int kVC = 1 << 2;
+  static const int kClangCl = 1 << 3;
+  static const int kJavac = 1 << 4;
+  static const int kClangTidy = 1 << 5;
+
+  struct TestCast {
+    const char* command;
+    int expected;
+  } testcases[] = {
+    // gcc
+    { "gcc", kGCC },
+    { "/usr/bin/gcc", kGCC },
+    { "x86_64-linux-gnu-gcc", kGCC },
+    { "g++", kGCC },
+    { "/usr/bin/g++", kGCC },
+    { "x86_64-linux-gnu-g++", kGCC },
+    { "c++", kGCC },
+    { "/usr/bin/c++", kGCC },
+    { "cc", kGCC },
+    { "/usr/bin/cc", kGCC },
+    { "i586-mingw32msvc-cc", kGCC },
+    { "g++-4.8", kGCC },
+    { "arm-gnueabihf-gcc-4.9", kGCC },
+    { "nacl-gcc", kGCC },
+    { "i686-nacl-gcc", kGCC },
+    { "nacl-gcc.exe", kGCC },
+    // clang
+    { "clang", kGCC | kClang },
+    { "clang.exe", kGCC | kClang },
+    { "/usr/local/bin/clang", kGCC | kClang },
+    { "clang++", kGCC | kClang },
+    { "/usr/local/bin/clang++", kGCC | kClang },
+    { "pnacl-clang", kGCC | kClang },
+    { "pnacl-clang++", kGCC | kClang },
+    { "clang++-3.7", kGCC | kClang },
+    { "/usr/local/google/home/jlebar/bin/clang++-3.7", kGCC | kClang },
+    { "armv7a-cros-linux-gnueabi-clang++", kGCC | kClang },  // ChromeOS clang
+    { "/usr/bin/local/clang-tidy/clang", kGCC | kClang },  // not clang-tidy.
+    // clang (negative)
+    { "clang-check", 0 },
+    { "clang-tblgen", 0 },
+    { "clang-format", 0 },
+    { "clang-tidy-diff", 0 },  // not clang-tidy, too.
+    // cl
+    { "cl", kVC },
+    { "CL", kVC },
+    { "cl.exe", kVC },
+    { "CL.EXE", kVC },
+    { "cL.eXe", kVC },
+    { "Cl.Exe", kVC },
+    { "C:\\VS10\\VC\\bin\\cl.exe", kVC },
+    { "D:\\Program Files\\Microsoft Visual Studio 10\\VC\\bin\\Cl.Exe", kVC },
+    { "D:\\VS9\\cl.exe\\cl.exe", kVC },
+    // cl (negative)
+    { "D:\\VS9\\cl.exe\\cl.exe.manifest", 0 },
+    { "D:\\VS9\\cl.exe\\", 0 },
+    { "cl.exe.manifest", 0 },
+    // clang-cl
+    { "clang-cl", kClangCl },
+    { "clang-cl.exe", kClangCl },
+    { "CLANG-CL.EXE", kClangCl },
+    { "/usr/local/bin/clang-cl", kClangCl },
+    { "/usr/local/bin/clang-cl.exe", kClangCl },
+    { "C:\\clang-cl", kClangCl },
+    { "C:\\clang-cl.exe", kClangCl },
+    { "D:\\example\\clang-cl.exe", kClangCl },
+    { "D:\\EXAMPLE\\CLANG-CL.EXE", kClangCl },
+    // javac
+    { "javac", kJavac },
+    { "/usr/bin/javac", kJavac },
+    // javac (negative)
+    { "/usr/bin/javaco/yes", 0 },
+    // clang-tidy
+    { "clang-tidy", kClangTidy },
+    { "/usr/bin/local/clang-tidy", kClangTidy },
+    // others
+    { "nacl.exe", 0 },
+    { "D:\\nacl_sdk\\pepper_18\\toolchain\\win_x86_newlib\\bin\\nacl.exe", 0 },
+    { "/usr/lib/gcc/bin/ar", 0 },
+  };
+
+  for (const auto& tc : testcases) {
+    EXPECT_EQ(devtools_goma::CompilerFlags::IsGCCCommand(tc.command),
+              (tc.expected & kGCC) ? true : false)
+        << "command = " << tc.command;
+    EXPECT_EQ(devtools_goma::CompilerFlags::IsClangCommand(tc.command),
+              (tc.expected & kClang) ? true : false)
+        << "command = " << tc.command;
+    EXPECT_EQ(devtools_goma::CompilerFlags::IsVCCommand(tc.command),
+              (tc.expected & kVC) ? true : false)
+        << "command = " << tc.command;
+    EXPECT_EQ(devtools_goma::CompilerFlags::IsClangClCommand(tc.command),
+              (tc.expected & kClangCl) ? true : false)
+        << "command = " << tc.command;
+    EXPECT_EQ(devtools_goma::CompilerFlags::IsJavacCommand(tc.command),
+              (tc.expected & kJavac) ? true : false)
+        << "command = " << tc.command;
+    EXPECT_EQ(devtools_goma::CompilerFlags::IsClangTidyCommand(tc.command),
+              (tc.expected & kClangTidy) ? true : false)
+        << "command = " << tc.command;
+  }
+}
+
+TEST_F(CompilerFlagsTest, GetCompilerName) {
+  using devtools_goma::CompilerFlags;
+  EXPECT_EQ("gcc", CompilerFlags::GetCompilerName("gcc"));
+  EXPECT_EQ("gcc", CompilerFlags::GetCompilerName("gcc.exe"));
+  EXPECT_EQ("gcc", CompilerFlags::GetCompilerName("/usr/bin/gcc"));
+  EXPECT_EQ("gcc", CompilerFlags::GetCompilerName("x86_64-linux-gnu-gcc"));
+
+  EXPECT_EQ("g++", CompilerFlags::GetCompilerName("g++"));
+  EXPECT_EQ("g++", CompilerFlags::GetCompilerName("g++.exe"));
+  EXPECT_EQ("g++", CompilerFlags::GetCompilerName("/usr/bin/g++"));
+  EXPECT_EQ("g++", CompilerFlags::GetCompilerName("x86_64-linux-gnu-g++"));
+
+  EXPECT_EQ("gcc", CompilerFlags::GetCompilerName("nacl-gcc"));
+  EXPECT_EQ("gcc", CompilerFlags::GetCompilerName("nacl-gcc.exe"));
+  EXPECT_EQ("gcc", CompilerFlags::GetCompilerName("i686-nacl-gcc"));
+  EXPECT_EQ("gcc", CompilerFlags::GetCompilerName("i686-nacl-gcc.exe"));
+  EXPECT_EQ("g++", CompilerFlags::GetCompilerName("nacl-g++"));
+  EXPECT_EQ("g++", CompilerFlags::GetCompilerName("nacl-g++.exe"));
+  EXPECT_EQ("g++", CompilerFlags::GetCompilerName("i686-nacl-g++"));
+  EXPECT_EQ("g++", CompilerFlags::GetCompilerName("i686-nacl-g++.exe"));
+  EXPECT_EQ("", CompilerFlags::GetCompilerName("nacl.exe"));
+  EXPECT_EQ("", CompilerFlags::GetCompilerName(
+      "D:\\nacl_sdk\\pepper_18\\toolchain\\win_x86_newlib\\bin\\nacl.exe"));
+
+  EXPECT_EQ("clang", CompilerFlags::GetCompilerName("clang"));
+  EXPECT_EQ("clang", CompilerFlags::GetCompilerName("clang.exe"));
+  EXPECT_EQ("clang", CompilerFlags::GetCompilerName("/usr/local/bin/clang"));
+  EXPECT_EQ("clang", CompilerFlags::GetCompilerName("pnacl-clang"));
+  EXPECT_EQ("clang", CompilerFlags::GetCompilerName("pnacl-clang.exe"));
+  EXPECT_EQ("clang++", CompilerFlags::GetCompilerName("clang++"));
+  EXPECT_EQ("clang++", CompilerFlags::GetCompilerName("clang++.exe"));
+  EXPECT_EQ("clang++", CompilerFlags::GetCompilerName(
+      "/usr/local/bin/clang++"));
+  EXPECT_EQ("clang++", CompilerFlags::GetCompilerName("pnacl-clang++"));
+  EXPECT_EQ("clang++", CompilerFlags::GetCompilerName("pnacl-clang++.exe"));
+  EXPECT_EQ("", CompilerFlags::GetCompilerName("clang-tblgen"));
+
+  EXPECT_EQ("cl.exe", CompilerFlags::GetCompilerName("cl"));
+  EXPECT_EQ("cl.exe", CompilerFlags::GetCompilerName("CL"));
+  EXPECT_EQ("cl.exe", CompilerFlags::GetCompilerName("cl.exe"));
+  EXPECT_EQ("cl.exe", CompilerFlags::GetCompilerName("CL.EXE"));
+  EXPECT_EQ("cl.exe", CompilerFlags::GetCompilerName(
+      "C:\\VS10\\VC\\bin\\cl.exe"));
+  EXPECT_EQ("cl.exe", CompilerFlags::GetCompilerName(
+      "D:\\Program Files\\Microsoft Visual Studio 10\\VC\\bin\\Cl.Exe"));
+  EXPECT_EQ("cl.exe", CompilerFlags::GetCompilerName(
+      "D:\\VS9\\cl.exe\\cl.exe"));
+  EXPECT_EQ("", CompilerFlags::GetCompilerName("cl.exe.manifest"));
+  EXPECT_EQ("", CompilerFlags::GetCompilerName(
+      "D:\\VS9\\cl.exe\\cl.exe.manifest"));
+  EXPECT_EQ("", CompilerFlags::GetCompilerName(
+      "D:\\VS9\\cl.exe\\"));
+
+  EXPECT_EQ("javac", CompilerFlags::GetCompilerName("javac"));
+  EXPECT_EQ("javac", CompilerFlags::GetCompilerName("/usr/bin/javac"));
+}
+
+TEST_F(CompilerFlagsTest, GccFlags) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-c");
+  args.push_back("hello.c");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "/tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.o", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.c", flags->input_filenames()[0]);
+  EXPECT_EQ("gcc", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("gcc", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/tmp", flags->cwd());
+
+  const size_t env_array_length = 10;
+  const char** env =
+      static_cast<const char**>(malloc(sizeof(const char*) * env_array_length));
+  env[0] = strdup("PATH=/usr/bin:/bin");
+  env[1] = strdup("SYSROOT=/tmp/1234");
+  env[2] = strdup("LIBRARY_PATH=../libsupp");
+  env[3] = strdup("CPATH=.:/special/include");
+  env[4] = strdup("C_INCLUDE_PATH=.:/special/include");
+  env[5] = strdup("CPLUS_INCLUDE_PATH=.:/special/include/c++");
+  env[6] = strdup("OBJC_INCLUDE_PATH=./special/include/objc");
+  env[7] = strdup("DEPENDENCIES_OUTPUT=foo.d");
+  env[8] = strdup("SUNPRO_DEPENDENCIES=foo.d");
+  env[9] = nullptr;
+
+  std::vector<string> important_env;
+  flags->GetClientImportantEnvs(env, &important_env);
+
+  std::vector<string> expected_env;
+  expected_env.push_back("SYSROOT=/tmp/1234");
+  expected_env.push_back("LIBRARY_PATH=../libsupp");
+  expected_env.push_back("CPATH=.:/special/include");
+  expected_env.push_back("C_INCLUDE_PATH=.:/special/include");
+  expected_env.push_back("CPLUS_INCLUDE_PATH=.:/special/include/c++");
+  expected_env.push_back("OBJC_INCLUDE_PATH=./special/include/objc");
+  expected_env.push_back("DEPENDENCIES_OUTPUT=foo.d");
+  expected_env.push_back("SUNPRO_DEPENDENCIES=foo.d");
+  EXPECT_EQ(expected_env, important_env);
+
+  for (size_t i = 0; i < env_array_length; ++i) {
+    if (env[i] != nullptr) {
+      free(const_cast<char*>(env[i]));
+    }
+  }
+  free(env);
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  std::vector<string> compiler_info_flags;
+  EXPECT_EQ(compiler_info_flags, gcc_flags->compiler_info_flags());
+  EXPECT_EQ(devtools_goma::GCCFlags::COMPILE, gcc_flags->mode());
+  EXPECT_EQ("", gcc_flags->isysroot());
+  EXPECT_FALSE(gcc_flags->is_cplusplus());
+  EXPECT_FALSE(gcc_flags->has_nostdinc());
+  EXPECT_FALSE(gcc_flags->has_no_integrated_as());
+  EXPECT_FALSE(gcc_flags->has_pipe());
+}
+
+TEST_F(CompilerFlagsTest, ClangImportantEnv) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-c");
+  args.push_back("hello.c");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "/tmp"));
+
+  const size_t env_array_length = 9;
+  const char** env =
+      static_cast<const char**>(malloc(sizeof(const char*) * env_array_length));
+  env[0] = strdup("PATH=/usr/bin:/bin");
+  env[1] = strdup("SYSROOT=/tmp/1234");
+  env[2] = strdup("LIBRARY_PATH=../libsupp");
+  env[3] = strdup("CPATH=.:/special/include");
+  env[4] = strdup("C_INCLUDE_PATH=.:/special/include");
+  env[5] = strdup("MACOSX_DEPLOYMENT_TARGET=10.7");
+  env[6] = strdup("SDKROOT=/tmp/path_to_root");
+  env[7] = strdup("DEVELOPER_DIR=/tmp/path_to_developer_dir");
+  env[8] = nullptr;
+
+  std::vector<string> important_env;
+  flags->GetClientImportantEnvs(env, &important_env);
+
+  std::vector<string> expected_env;
+  expected_env.push_back("SYSROOT=/tmp/1234");
+  expected_env.push_back("LIBRARY_PATH=../libsupp");
+  expected_env.push_back("CPATH=.:/special/include");
+  expected_env.push_back("C_INCLUDE_PATH=.:/special/include");
+  expected_env.push_back("MACOSX_DEPLOYMENT_TARGET=10.7");
+  expected_env.push_back("SDKROOT=/tmp/path_to_root");
+  expected_env.push_back("DEVELOPER_DIR=/tmp/path_to_developer_dir");
+  EXPECT_EQ(expected_env, important_env);
+
+  for (size_t i = 0; i < env_array_length; ++i) {
+    if (env[i] != nullptr) {
+      free(const_cast<char*>(env[i]));
+    }
+  }
+  free(env);
+}
+
+TEST_F(CompilerFlagsTest, IsImportantEnvGCC) {
+  const struct {
+    const char* env;
+    const bool client_important;
+    const bool server_important;
+  } kTestCases[] {
+    { "SYSROOT=/tmp/1234", true, true },
+    { "LIBRARY_PATH=../libsupp", true, true },
+    { "CPATH=.:/special/include", true, true },
+    { "C_INCLUDE_PATH=.:/include", true, true },
+    { "CPLUS_INCLUDE_PATH=.:/include", true, true },
+    { "DEPENDENCIES_OUTPUT=/tmp/to", true, true },
+    { "SUNPRO_DEPENDENCIES=/tmp/to", true, true },
+    { "MACOSX_DEPLOYMENT_TARGET=/tmp/to", true, true },
+    { "SDKROOT=/tmp/to", true, true },
+    { "PWD=/tmp/to", true, true },
+    { "DEVELOPER_DIR=/tmp/to", true, true },
+
+    { "PATHEXT=.EXE", true, false },
+    { "pathext=.EXE", true, false },
+    { "SystemRoot=C:\\Windows", true, false },
+    { "systemroot=C:\\Windows", true, false },
+
+    { "SystemDrive=C:", false, false },
+    { "systemdrive=C:", false, false },
+    { "LD_PRELOAD=foo.so", false, false },
+    { "ld_preload=foo.so", false, false },
+  };
+
+  std::vector<string> args {
+    "gcc", "-c", "hello.c",
+  };
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "/tmp"));
+
+  for (const auto& tc : kTestCases) {
+    ASSERT_TRUE(!tc.server_important || tc.client_important);
+    EXPECT_EQ(flags->IsClientImportantEnv(tc.env), tc.client_important)
+        << tc.env;
+    EXPECT_EQ(flags->IsServerImportantEnv(tc.env), tc.server_important)
+        << tc.env;
+  }
+}
+
+TEST_F(CompilerFlagsTest, ChromeLinuxCompileFlag) {
+  std::vector<string> args;
+  args.push_back("g++");
+  args.push_back("-DNO_HEAPCHECKER");
+  args.push_back("-DENABLE_REMOTING=1");
+  args.push_back("-I.");
+  args.push_back("-Igpu");
+  args.push_back("-Ithird_party/sqlite");
+  args.push_back("-Werror");
+  args.push_back("-pthread");
+  args.push_back("-fno-exceptions");
+  args.push_back("-Wall");
+  args.push_back("-Wno-unused-parameter");
+  args.push_back("-Wno-missing-field-initializers");
+  args.push_back("-fvisibility=hidden");
+  args.push_back("-pipe");
+  args.push_back("-fPIC");
+  args.push_back("-fno-strict-aliasing");
+  args.push_back("-I/usr/include/nss");
+  args.push_back("-O2");
+  args.push_back("-fno-ident");
+  args.push_back("-fdata-sections");
+  args.push_back("-ffunction-sections");
+  args.push_back("-fno-rtti");
+  args.push_back("-fno-threadsafe-statics");
+  args.push_back("-fvisibility-inlines-hidden");
+  args.push_back("-MMD");
+  args.push_back("-MF");
+  args.push_back("out/Release/.deps/out/Release/obj.target/"
+                 "chrome/chrome/app/chrome_main.o.d.raw");
+  args.push_back("-c");
+  args.push_back("-o");
+  args.push_back("out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  args.push_back("chrome/app/chrome_main.cc");
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/local/src"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(2U, flags->output_files().size());
+  ExpectHasElement(flags->output_files(),
+                   "out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  ExpectHasElement(flags->output_files(),
+                   "out/Release/.deps/out/Release/obj.target/"
+                   "chrome/chrome/app/chrome_main.o.d.raw");
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("chrome/app/chrome_main.cc", flags->input_filenames()[0]);
+  EXPECT_EQ("g++", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("g++", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/usr/local/src", flags->cwd());
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  EXPECT_FALSE(gcc_flags->is_precompiling_header());
+  EXPECT_FALSE(gcc_flags->is_stdin_input());
+  std::vector<string> compiler_info_flags;
+  compiler_info_flags.push_back("-pthread");
+  compiler_info_flags.push_back("-fno-exceptions");
+  compiler_info_flags.push_back("-fvisibility=hidden");
+  compiler_info_flags.push_back("-fPIC");
+  compiler_info_flags.push_back("-fno-strict-aliasing");
+  compiler_info_flags.push_back("-O2");
+  compiler_info_flags.push_back("-fno-ident");
+  compiler_info_flags.push_back("-fdata-sections");
+  compiler_info_flags.push_back("-ffunction-sections");
+  compiler_info_flags.push_back("-fno-rtti");
+  compiler_info_flags.push_back("-fno-threadsafe-statics");
+  compiler_info_flags.push_back("-fvisibility-inlines-hidden");
+  EXPECT_EQ(compiler_info_flags, gcc_flags->compiler_info_flags());
+  EXPECT_EQ("", gcc_flags->isysroot());
+  EXPECT_EQ(devtools_goma::GCCFlags::COMPILE, gcc_flags->mode());
+  EXPECT_TRUE(gcc_flags->is_cplusplus());
+  EXPECT_FALSE(gcc_flags->has_nostdinc());
+  EXPECT_FALSE(gcc_flags->has_no_integrated_as());
+  EXPECT_TRUE(gcc_flags->has_pipe());
+  ASSERT_EQ(4, static_cast<int>(gcc_flags->include_dirs().size()));
+  EXPECT_EQ(".", gcc_flags->include_dirs()[0]);
+  EXPECT_EQ("gpu", gcc_flags->include_dirs()[1]);
+  EXPECT_EQ("third_party/sqlite", gcc_flags->include_dirs()[2]);
+  EXPECT_EQ("/usr/include/nss", gcc_flags->include_dirs()[3]);
+  ASSERT_EQ(4U, gcc_flags->non_system_include_dirs().size());
+  EXPECT_EQ(".", gcc_flags->non_system_include_dirs()[0]);
+  EXPECT_EQ("gpu", gcc_flags->non_system_include_dirs()[1]);
+  EXPECT_EQ("third_party/sqlite", gcc_flags->non_system_include_dirs()[2]);
+  EXPECT_EQ("/usr/include/nss", gcc_flags->non_system_include_dirs()[3]);
+  ASSERT_EQ(0U, gcc_flags->root_includes().size());
+  ASSERT_EQ(0U, gcc_flags->framework_dirs().size());
+  ASSERT_EQ(2U, gcc_flags->commandline_macros().size());
+  EXPECT_EQ("NO_HEAPCHECKER", gcc_flags->commandline_macros()[0].first);
+  EXPECT_TRUE(gcc_flags->commandline_macros()[0].second);
+  EXPECT_EQ("ENABLE_REMOTING=1", gcc_flags->commandline_macros()[1].first);
+  EXPECT_TRUE(gcc_flags->commandline_macros()[1].second);
+}
+
+TEST_F(CompilerFlagsTest, ChromeLinuxLinkFlag) {
+  std::vector<string> args;
+  args.push_back("g++");
+  args.push_back("-pthread");
+  args.push_back("-Wl,-z,noexecstack");
+  args.push_back("-Lout/Release");
+  args.push_back("-L/lib");
+  args.push_back("-Wl,-uIsHeapProfilerRunning,-uProfilerStart");
+  args.push_back("-Wl,-u_Z21InitialMallocHook_NewPKvj,"
+                 "-u_Z22InitialMallocHook_MMapPKvS0_jiiix,"
+                 "-u_Z22InitialMallocHook_SbrkPKvi");
+  args.push_back("-Wl,-u_Z21InitialMallocHook_NewPKvm,"
+                 "-u_Z22InitialMallocHook_MMapPKvS0_miiil,"
+                 "-u_Z22InitialMallocHook_SbrkPKvl");
+  args.push_back("-Wl,-O1");
+  args.push_back("-Wl,--as-needed");
+  args.push_back("-Wl,--gc-sections");
+  args.push_back("-Wl,--icf=safe");
+  args.push_back("-o");
+  args.push_back("out/Release/chrome");
+  args.push_back("-Wl,--start-group");
+  args.push_back("out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  args.push_back("out/Release/obj.target/chrome/"
+                 "chrome/app/chrome_main_posix.o");
+  args.push_back("-Wl,--end-group");
+  args.push_back("-lX11");
+  args.push_back("-ldl");
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/local/src"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("out/Release/chrome", flags->output_files()[0]);
+  EXPECT_EQ(2U, flags->input_filenames().size());
+  ExpectHasElement(flags->input_filenames(),
+                   "out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  ExpectHasElement(
+      flags->input_filenames(),
+      "out/Release/obj.target/chrome/chrome/app/chrome_main_posix.o");
+  EXPECT_EQ("g++", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("g++", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/usr/local/src", flags->cwd());
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  EXPECT_FALSE(gcc_flags->is_precompiling_header());
+  EXPECT_FALSE(gcc_flags->is_stdin_input());
+  std::vector<string> compiler_info_flags;
+  compiler_info_flags.push_back("-pthread");
+  EXPECT_EQ(compiler_info_flags, gcc_flags->compiler_info_flags());
+  EXPECT_EQ(devtools_goma::GCCFlags::LINK, gcc_flags->mode());
+  EXPECT_EQ("", gcc_flags->isysroot());
+  EXPECT_TRUE(gcc_flags->is_cplusplus());
+  EXPECT_FALSE(gcc_flags->has_nostdinc());
+  EXPECT_FALSE(gcc_flags->has_no_integrated_as());
+  EXPECT_FALSE(gcc_flags->has_pipe());
+}
+
+TEST_F(CompilerFlagsTest, ChromeLinuxClangCompileFlag) {
+  std::vector<string> args;
+  args.push_back("clang++");
+  args.push_back("-fcolor-diagnostics");
+  args.push_back("-DNO_HEAPCHECKER");
+  args.push_back("-DENABLE_REMOTING=1");
+  args.push_back("-I.");
+  args.push_back("-Igpu");
+  args.push_back("-Ithird_party/sqlite");
+  args.push_back("-Werror");
+  args.push_back("-pthread");
+  args.push_back("-fno-exceptions");
+  args.push_back("-Wall");
+  args.push_back("-Wno-unused-parameter");
+  args.push_back("-Wno-missing-field-initializers");
+  args.push_back("-fvisibility=hidden");
+  args.push_back("-pipe");
+  args.push_back("-fPIC");
+  args.push_back("-fno-strict-aliasing");
+  args.push_back("-I/usr/include/nss");
+  args.push_back("-O2");
+  args.push_back("-fno-ident");
+  args.push_back("-fdata-sections");
+  args.push_back("-ffunction-sections");
+  args.push_back("-fno-rtti");
+  args.push_back("-fno-threadsafe-statics");
+  args.push_back("-fvisibility-inlines-hidden");
+  args.push_back("-MMD");
+  args.push_back("-MF");
+  args.push_back("out/Release/.deps/out/Release/obj.target/"
+                 "chrome/chrome/app/chrome_main.o.d.raw");
+  args.push_back("-c");
+  args.push_back("-o");
+  args.push_back("out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  args.push_back("chrome/app/chrome_main.cc");
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/local/src"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(2U, flags->output_files().size());
+  ExpectHasElement(flags->output_files(),
+                   "out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  ExpectHasElement(flags->output_files(),
+                   "out/Release/.deps/out/Release/obj.target/"
+                   "chrome/chrome/app/chrome_main.o.d.raw");
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("chrome/app/chrome_main.cc", flags->input_filenames()[0]);
+  EXPECT_EQ("clang++", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang++", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/usr/local/src", flags->cwd());
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  EXPECT_FALSE(gcc_flags->is_precompiling_header());
+  EXPECT_FALSE(gcc_flags->is_stdin_input());
+  std::vector<string> compiler_info_flags;
+  compiler_info_flags.push_back("-fcolor-diagnostics");
+  compiler_info_flags.push_back("-pthread");
+  compiler_info_flags.push_back("-fno-exceptions");
+  compiler_info_flags.push_back("-fvisibility=hidden");
+  compiler_info_flags.push_back("-fPIC");
+  compiler_info_flags.push_back("-fno-strict-aliasing");
+  compiler_info_flags.push_back("-O2");
+  compiler_info_flags.push_back("-fno-ident");
+  compiler_info_flags.push_back("-fdata-sections");
+  compiler_info_flags.push_back("-ffunction-sections");
+  compiler_info_flags.push_back("-fno-rtti");
+  compiler_info_flags.push_back("-fno-threadsafe-statics");
+  compiler_info_flags.push_back("-fvisibility-inlines-hidden");
+  EXPECT_EQ(compiler_info_flags, gcc_flags->compiler_info_flags());
+  EXPECT_EQ(devtools_goma::GCCFlags::COMPILE, gcc_flags->mode());
+  EXPECT_EQ("", gcc_flags->isysroot());
+  EXPECT_TRUE(gcc_flags->is_cplusplus());
+  EXPECT_FALSE(gcc_flags->has_nostdinc());
+  EXPECT_FALSE(gcc_flags->has_no_integrated_as());
+  EXPECT_TRUE(gcc_flags->has_pipe());
+  ASSERT_EQ(4, static_cast<int>(gcc_flags->include_dirs().size()));
+  EXPECT_EQ(".", gcc_flags->include_dirs()[0]);
+  EXPECT_EQ("gpu", gcc_flags->include_dirs()[1]);
+  EXPECT_EQ("third_party/sqlite", gcc_flags->include_dirs()[2]);
+  EXPECT_EQ("/usr/include/nss", gcc_flags->include_dirs()[3]);
+  ASSERT_EQ(4U, gcc_flags->non_system_include_dirs().size());
+  EXPECT_EQ(".", gcc_flags->non_system_include_dirs()[0]);
+  EXPECT_EQ("gpu", gcc_flags->non_system_include_dirs()[1]);
+  EXPECT_EQ("third_party/sqlite", gcc_flags->non_system_include_dirs()[2]);
+  EXPECT_EQ("/usr/include/nss", gcc_flags->non_system_include_dirs()[3]);
+  ASSERT_EQ(0U, gcc_flags->root_includes().size());
+  ASSERT_EQ(0U, gcc_flags->framework_dirs().size());
+  ASSERT_EQ(2U, gcc_flags->commandline_macros().size());
+  EXPECT_EQ("NO_HEAPCHECKER", gcc_flags->commandline_macros()[0].first);
+  EXPECT_TRUE(gcc_flags->commandline_macros()[0].second);
+  EXPECT_EQ("ENABLE_REMOTING=1", gcc_flags->commandline_macros()[1].first);
+  EXPECT_TRUE(gcc_flags->commandline_macros()[1].second);
+}
+
+TEST_F(CompilerFlagsTest, ChromeLinuxClangLinkFlag) {
+  std::vector<string> args;
+  args.push_back("clang++");
+  args.push_back("-fcolor-diagnostics");
+  args.push_back("-pthread");
+  args.push_back("-Wl,-z,noexecstack");
+  args.push_back("-Lout/Release");
+  args.push_back("-L/lib");
+  args.push_back("-Wl,-uIsHeapProfilerRunning,-uProfilerStart");
+  args.push_back("-Wl,-u_Z21InitialMallocHook_NewPKvj,"
+                 "-u_Z22InitialMallocHook_MMapPKvS0_jiiix,"
+                 "-u_Z22InitialMallocHook_SbrkPKvi");
+  args.push_back("-Wl,-u_Z21InitialMallocHook_NewPKvm,"
+                 "-u_Z22InitialMallocHook_MMapPKvS0_miiil,"
+                 "-u_Z22InitialMallocHook_SbrkPKvl");
+  args.push_back("-Wl,-O1");
+  args.push_back("-Wl,--as-needed");
+  args.push_back("-Wl,--gc-sections");
+  args.push_back("-Wl,--icf=safe");
+  args.push_back("-o");
+  args.push_back("out/Release/chrome");
+  args.push_back("-Wl,--start-group");
+  args.push_back("out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  args.push_back("out/Release/obj.target/chrome/"
+                 "chrome/app/chrome_main_posix.o");
+  args.push_back("-Wl,--end-group");
+  args.push_back("-lX11");
+  args.push_back("-ldl");
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/local/src"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("out/Release/chrome", flags->output_files()[0]);
+  EXPECT_EQ(2U, flags->input_filenames().size());
+  ExpectHasElement(flags->input_filenames(),
+                   "out/Release/obj.target/chrome/chrome/app/chrome_main.o");
+  ExpectHasElement(
+      flags->input_filenames(),
+      "out/Release/obj.target/chrome/chrome/app/chrome_main_posix.o");
+  EXPECT_EQ("clang++", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang++", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/usr/local/src", flags->cwd());
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  EXPECT_FALSE(gcc_flags->is_precompiling_header());
+  EXPECT_FALSE(gcc_flags->is_stdin_input());
+  std::vector<string> compiler_info_flags;
+  compiler_info_flags.push_back("-fcolor-diagnostics");
+  compiler_info_flags.push_back("-pthread");
+  EXPECT_EQ(compiler_info_flags, gcc_flags->compiler_info_flags());
+  EXPECT_EQ(devtools_goma::GCCFlags::LINK, gcc_flags->mode());
+  EXPECT_EQ("", gcc_flags->isysroot());
+  EXPECT_TRUE(gcc_flags->is_cplusplus());
+  EXPECT_FALSE(gcc_flags->has_nostdinc());
+  EXPECT_FALSE(gcc_flags->has_no_integrated_as());
+  EXPECT_FALSE(gcc_flags->has_pipe());
+}
+
+
+TEST_F(CompilerFlagsTest, ChromeASANCompileFlag) {
+  std::vector<string> args;
+  args.push_back(
+      "/usr/src/chrome/src/third_party/asan/asan_clang_Linux/bin/clang++");
+  args.push_back("-fcolor-diagnostics");
+  args.push_back("-fasan");
+  args.push_back("-w");
+  args.push_back("-mllvm");
+  args.push_back("-asan-blacklist="
+                 "/usr/src/chrome/src/third_party/asan/asan_blacklist.txt");
+  args.push_back("-DNO_TCMALLOC");
+  args.push_back("-Ithird_party/icu/public/common");
+  args.push_back("-Werror");
+  args.push_back("-pthread");
+  args.push_back("-fno-exceptions");
+  args.push_back("-Wall");
+  args.push_back("-fvisibility=hidden");
+  args.push_back("-pipe");
+  args.push_back("-fPIC");
+  args.push_back("-MMD");
+  args.push_back("-MF");
+  args.push_back("out/Release/.deps/out/Release/obj.target/base_unittests/"
+                 "base/message_loop_unittest.o.d.raw");
+  args.push_back("-c");
+  args.push_back("-o");
+  args.push_back("out/Release/obj.target/base_unittests/"
+                 "base/message_loop_unittest.o base/message_loop_unittest.o");
+  args.push_back("out/Release/obj.target/base_unittests/"
+                 "base/message_loop_unittest.o base/message_loop_unittest.cc");
+
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/src/chrome/src"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(2U, flags->output_files().size());
+  EXPECT_EQ("out/Release/obj.target/base_unittests/"
+            "base/message_loop_unittest.o base/message_loop_unittest.o",
+            flags->output_files()[0]);
+  EXPECT_EQ("out/Release/.deps/out/Release/obj.target/base_unittests/"
+            "base/message_loop_unittest.o.d.raw",
+            flags->output_files()[1]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("out/Release/obj.target/base_unittests/"
+            "base/message_loop_unittest.o base/message_loop_unittest.cc",
+            flags->input_filenames()[0]);
+  EXPECT_EQ(1U, flags->optional_input_filenames().size());
+  EXPECT_EQ("/usr/src/chrome/src/third_party/asan/asan_blacklist.txt",
+            flags->optional_input_filenames()[0]);
+  EXPECT_EQ("clang++", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang++", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/usr/src/chrome/src", flags->cwd());
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  EXPECT_FALSE(gcc_flags->is_precompiling_header());
+  EXPECT_FALSE(gcc_flags->is_stdin_input());
+  std::vector<string> compiler_info_flags;
+  compiler_info_flags.push_back("-fcolor-diagnostics");
+  compiler_info_flags.push_back("-fasan");
+  compiler_info_flags.push_back("-pthread");
+  compiler_info_flags.push_back("-fno-exceptions");
+  compiler_info_flags.push_back("-fvisibility=hidden");
+  compiler_info_flags.push_back("-fPIC");
+  compiler_info_flags.push_back("-mllvm");
+  compiler_info_flags.push_back(
+      "-asan-blacklist="
+      "/usr/src/chrome/src/third_party/asan/asan_blacklist.txt");
+  EXPECT_EQ(compiler_info_flags, gcc_flags->compiler_info_flags());
+  EXPECT_EQ(devtools_goma::GCCFlags::COMPILE, gcc_flags->mode());
+  EXPECT_TRUE(gcc_flags->is_cplusplus());
+  EXPECT_FALSE(gcc_flags->has_nostdinc());
+  EXPECT_FALSE(gcc_flags->has_no_integrated_as());
+  EXPECT_TRUE(gcc_flags->has_pipe());
+  ASSERT_EQ(1, static_cast<int>(gcc_flags->include_dirs().size()));
+  EXPECT_EQ("third_party/icu/public/common", gcc_flags->include_dirs()[0]);
+  ASSERT_EQ(1U, gcc_flags->non_system_include_dirs().size());
+  EXPECT_EQ("third_party/icu/public/common",
+            gcc_flags->non_system_include_dirs()[0]);
+  ASSERT_EQ(0U, gcc_flags->root_includes().size());
+  ASSERT_EQ(0U, gcc_flags->framework_dirs().size());
+  ASSERT_EQ(1U, gcc_flags->commandline_macros().size());
+  EXPECT_EQ("NO_TCMALLOC", gcc_flags->commandline_macros()[0].first);
+  EXPECT_TRUE(gcc_flags->commandline_macros()[0].second);
+}
+
+TEST_F(CompilerFlagsTest, ChromeTSANCompileFlag) {
+  std::vector<string> args;
+  args.push_back(
+      "/usr/src/chrome/src/third_party/llvm-build/Release+Asserts/bin/clang++");
+  args.push_back("-fcolor-diagnostics");
+  args.push_back("-MMD");
+  args.push_back("-MF");
+  args.push_back("obj/base/message_loop/"
+                 "base_unittests.message_loop_unittest.o.d");
+  args.push_back("-DTHREAD_SANITIZER");
+  args.push_back("-I../../third_party/icu/public/common");
+  args.push_back("-Werror");
+  args.push_back("-pthread");
+  args.push_back("-fno-exceptions");
+  args.push_back("-Wall");
+  args.push_back("-fvisibility=hidden");
+  args.push_back("-pipe");
+  args.push_back("-fsanitize=thread");
+  args.push_back("-fPIC");
+  args.push_back("-mllvm");
+  args.push_back("-tsan-blacklist="
+                 "../../tools/valgrind/tsan_v2/ignores.txt");
+  args.push_back("-c");
+  args.push_back("../../base/message_loop/message_loop_unittest.cc");
+  args.push_back("-o");
+  args.push_back("obj/base/message_loop/"
+                 "base_unittests.message_loop_unittest.o");
+
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/src/chrome/src/out/Release"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(2U, flags->output_files().size());
+  EXPECT_EQ("obj/base/message_loop/base_unittests.message_loop_unittest.o",
+            flags->output_files()[0]);
+  EXPECT_EQ("obj/base/message_loop/base_unittests.message_loop_unittest.o.d",
+            flags->output_files()[1]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("../../base/message_loop/message_loop_unittest.cc",
+            flags->input_filenames()[0]);
+  EXPECT_EQ(1U, flags->optional_input_filenames().size());
+  EXPECT_EQ("../../tools/valgrind/tsan_v2/ignores.txt",
+            flags->optional_input_filenames()[0]);
+  EXPECT_EQ("clang++", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang++", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/usr/src/chrome/src/out/Release", flags->cwd());
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  EXPECT_FALSE(gcc_flags->is_precompiling_header());
+  EXPECT_FALSE(gcc_flags->is_stdin_input());
+  std::vector<string> compiler_info_flags;
+  compiler_info_flags.push_back("-fcolor-diagnostics");
+  compiler_info_flags.push_back("-pthread");
+  compiler_info_flags.push_back("-fno-exceptions");
+  compiler_info_flags.push_back("-fvisibility=hidden");
+  compiler_info_flags.push_back("-fsanitize=thread");
+  compiler_info_flags.push_back("-fPIC");
+  compiler_info_flags.push_back("-mllvm");
+  compiler_info_flags.push_back(
+      "-tsan-blacklist="
+      "../../tools/valgrind/tsan_v2/ignores.txt");
+  EXPECT_EQ(compiler_info_flags, gcc_flags->compiler_info_flags());
+  EXPECT_EQ(devtools_goma::GCCFlags::COMPILE, gcc_flags->mode());
+  EXPECT_TRUE(gcc_flags->is_cplusplus());
+  EXPECT_FALSE(gcc_flags->has_nostdinc());
+  EXPECT_FALSE(gcc_flags->has_no_integrated_as());
+  EXPECT_TRUE(gcc_flags->has_pipe());
+  ASSERT_EQ(1, static_cast<int>(gcc_flags->include_dirs().size()));
+  EXPECT_EQ("../../third_party/icu/public/common",
+            gcc_flags->include_dirs()[0]);
+  ASSERT_EQ(1U, gcc_flags->non_system_include_dirs().size());
+  EXPECT_EQ("../../third_party/icu/public/common",
+            gcc_flags->non_system_include_dirs()[0]);
+  ASSERT_EQ(0U, gcc_flags->root_includes().size());
+  ASSERT_EQ(0U, gcc_flags->framework_dirs().size());
+  ASSERT_EQ(1U, gcc_flags->commandline_macros().size());
+  EXPECT_EQ("THREAD_SANITIZER", gcc_flags->commandline_macros()[0].first);
+  EXPECT_TRUE(gcc_flags->commandline_macros()[0].second);
+}
+
+TEST_F(CompilerFlagsTest, ChromeTSANCompileFlagWithSanitizeBlacklist) {
+  std::vector<string> args;
+  args.push_back(
+      "/usr/src/chrome/src/third_party/llvm-build/Release+Asserts/bin/clang++");
+  args.push_back("-fcolor-diagnostics");
+  args.push_back("-MMD");
+  args.push_back("-MF");
+  args.push_back("obj/base/message_loop/"
+                 "base_unittests.message_loop_unittest.o.d");
+  args.push_back("-DTHREAD_SANITIZER");
+  args.push_back("-I../../third_party/icu/public/common");
+  args.push_back("-Werror");
+  args.push_back("-pthread");
+  args.push_back("-fno-exceptions");
+  args.push_back("-Wall");
+  args.push_back("-fvisibility=hidden");
+  args.push_back("-pipe");
+  args.push_back("-fsanitize=thread");
+  args.push_back("-fPIC");
+  args.push_back("-fsanitize-blacklist="
+                 "../../tools/valgrind/tsan_v2/ignores.txt");
+  args.push_back("-c");
+  args.push_back("../../base/message_loop/message_loop_unittest.cc");
+  args.push_back("-o");
+  args.push_back("obj/base/message_loop/"
+                 "base_unittests.message_loop_unittest.o");
+
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/src/chrome/src/out/Release"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(2U, flags->output_files().size());
+  EXPECT_EQ("obj/base/message_loop/base_unittests.message_loop_unittest.o",
+            flags->output_files()[0]);
+  EXPECT_EQ("obj/base/message_loop/base_unittests.message_loop_unittest.o.d",
+            flags->output_files()[1]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("../../base/message_loop/message_loop_unittest.cc",
+            flags->input_filenames()[0]);
+  EXPECT_EQ(1U, flags->optional_input_filenames().size());
+  EXPECT_EQ("../../tools/valgrind/tsan_v2/ignores.txt",
+            flags->optional_input_filenames()[0]);
+  EXPECT_EQ("clang++", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang++", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/usr/src/chrome/src/out/Release", flags->cwd());
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  EXPECT_FALSE(gcc_flags->is_precompiling_header());
+  EXPECT_FALSE(gcc_flags->is_stdin_input());
+  std::vector<string> compiler_info_flags;
+  compiler_info_flags.push_back("-fcolor-diagnostics");
+  compiler_info_flags.push_back("-pthread");
+  compiler_info_flags.push_back("-fno-exceptions");
+  compiler_info_flags.push_back("-fvisibility=hidden");
+  compiler_info_flags.push_back("-fsanitize=thread");
+  compiler_info_flags.push_back("-fPIC");
+  EXPECT_EQ(compiler_info_flags, gcc_flags->compiler_info_flags());
+  EXPECT_EQ(devtools_goma::GCCFlags::COMPILE, gcc_flags->mode());
+  EXPECT_TRUE(gcc_flags->is_cplusplus());
+  EXPECT_FALSE(gcc_flags->has_nostdinc());
+  EXPECT_FALSE(gcc_flags->has_no_integrated_as());
+  EXPECT_TRUE(gcc_flags->has_pipe());
+  ASSERT_EQ(1, static_cast<int>(gcc_flags->include_dirs().size()));
+  EXPECT_EQ("../../third_party/icu/public/common",
+            gcc_flags->include_dirs()[0]);
+  ASSERT_EQ(1U, gcc_flags->non_system_include_dirs().size());
+  EXPECT_EQ("../../third_party/icu/public/common",
+            gcc_flags->non_system_include_dirs()[0]);
+  ASSERT_EQ(0U, gcc_flags->root_includes().size());
+  ASSERT_EQ(0U, gcc_flags->framework_dirs().size());
+  ASSERT_EQ(1U, gcc_flags->commandline_macros().size());
+  EXPECT_EQ("THREAD_SANITIZER", gcc_flags->commandline_macros()[0].first);
+  EXPECT_TRUE(gcc_flags->commandline_macros()[0].second);
+}
+
+TEST_F(CompilerFlagsTest, ChromeMacDylibLink) {
+  std::vector<string> args;
+  args.push_back("clang++");
+  args.push_back("-shared");
+  args.push_back("-Wl,-search_paths_first");
+  args.push_back("-Wl,-dead_strip");
+  args.push_back("-compatibility_version");
+  args.push_back("1.0.0");
+  args.push_back("-current_version");
+  args.push_back("111.1.4");
+  args.push_back("-mmacosx-version-min=10.5");
+  args.push_back("-isysroot");
+  args.push_back("/Developer/SDKs/MacOSX10.5.sdk");
+  args.push_back("-arch");
+  args.push_back("i386");
+  args.push_back("-Lout/Release");
+  args.push_back("-install_name");
+  args.push_back("/usr/lib/libSystem.B.dylib");
+  args.push_back("-o");
+  args.push_back("out/Release/libclosure_blocks_leopard_compat_stub.dylib");
+  args.push_back("out/Release/obj.target/closure_blocks_leopard_compat/"
+                 "content/browser/mac/closure_blocks_leopard_compat.o");
+
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/src/chrome/src"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("out/Release/libclosure_blocks_leopard_compat_stub.dylib",
+            flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("out/Release/obj.target/closure_blocks_leopard_compat/"
+            "content/browser/mac/closure_blocks_leopard_compat.o",
+            flags->input_filenames()[0]);
+  EXPECT_EQ("clang++", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang++", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/usr/src/chrome/src", flags->cwd());
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  EXPECT_FALSE(gcc_flags->is_precompiling_header());
+  EXPECT_FALSE(gcc_flags->is_stdin_input());
+  EXPECT_EQ(devtools_goma::GCCFlags::LINK, gcc_flags->mode());
+}
+
+TEST_F(CompilerFlagsTest, ChromeMacInstallName) {
+  std::vector<string> args;
+  args.push_back("clang++");
+  args.push_back("-shared");
+  args.push_back("-framework");
+  args.push_back("Cocoa");
+  args.push_back("-Wl,-search_paths_first");
+  args.push_back("-Wl,-ObjC");
+  args.push_back("-Wl,-dead_strip");
+  args.push_back("-mmacosx-version-min=10.6");
+  args.push_back("-L.");
+  args.push_back("-install_name");
+  args.push_back("@executable_path/../Frameworks/"
+                 "Content Shell Framework.framework/"
+                 "Content Shell Framework");
+  args.push_back("-o");
+  args.push_back("Content Shell Framework.framework/"
+                 "Versions/A/Content Shell Framework");
+
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/src/chrome/src"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+}
+
+TEST_F(CompilerFlagsTest, ChromeMacRpath) {
+  std::vector<string> args;
+  args.push_back("clang++");
+  args.push_back("-rpath");
+  args.push_back("@executable_path/../../..");
+  args.push_back("-o");
+  args.push_back("content_shell_helper_app_executable/"
+                 "Content Shell Helper");
+
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/src/chrome/src"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+}
+
+TEST_F(CompilerFlagsTest, ChromeMacLinkerRpath) {
+  std::vector<string> args;
+  args.push_back("clang++");
+  args.push_back("-Xlinker");
+  args.push_back("-rpath");
+  args.push_back("-Xlinker");
+  args.push_back("@executable_path/Frameworks");
+  args.push_back("-Xlinker");
+  args.push_back("-objc_abi_version");
+  args.push_back("-Xlinker");
+  args.push_back("2");
+  args.push_back("-arch");
+  args.push_back("x86_64");
+  args.push_back("-o");
+  args.push_back("obj/base/x64/base_unittests");
+
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "/usr/src/chrome/src"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+}
+
+TEST_F(CompilerFlagsTest, ClangFDebugPrefixMap) {
+  std::vector<string> args;
+  args.push_back("clang++");
+  args.push_back("-fdebug-prefix-map=/foo/bar=/baz");
+  args.push_back("-fdebug-prefix-map=/a=/b=/c");
+  args.push_back("-fdebug-prefix-map=/d=");
+  args.push_back("-c");
+  args.push_back("hello.cc");
+
+  GCCFlags flags(args, "/usr/src/chrome/src");
+
+  EXPECT_EQ(args, flags.args());
+  EXPECT_TRUE(flags.is_successful());
+
+  std::map<string, string> want_fdebug_prefix_map;
+  want_fdebug_prefix_map["/foo/bar"] = "/baz";
+  want_fdebug_prefix_map["/a"] = "/b=/c";
+  want_fdebug_prefix_map["/d"] = "";
+  EXPECT_EQ(want_fdebug_prefix_map, flags.fdebug_prefix_map());
+  EXPECT_EQ(std::vector<string>(), flags.compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ClangShouldDetectBrokenFDebugPrefixMap) {
+  std::vector<string> args;
+  args.push_back("clang++");
+  args.push_back("-fdebug-prefix-map=/foo");
+  args.push_back("-c");
+  args.push_back("hello.cc");
+
+  GCCFlags flags(args, "/usr/src/chrome/src");
+
+  EXPECT_EQ(args, flags.args());
+  EXPECT_FALSE(flags.is_successful());
+}
+
+TEST_F(CompilerFlagsTest, ClangShouldUseFirstFDebugPrefixMap) {
+  std::vector<string> args;
+  args.push_back("clang++");
+  args.push_back("-fdebug-prefix-map=/foo=/bar");
+  args.push_back("-fdebug-prefix-map=/foo=/baz");
+  args.push_back("-c");
+  args.push_back("hello.cc");
+
+  GCCFlags flags(args, "/usr/src/chrome/src");
+
+  EXPECT_EQ(args, flags.args());
+  EXPECT_TRUE(flags.is_successful());
+
+  std::map<string, string> want_fdebug_prefix_map;
+  want_fdebug_prefix_map["/foo"] = "/bar";
+  EXPECT_EQ(want_fdebug_prefix_map, flags.fdebug_prefix_map());
+  EXPECT_EQ(std::vector<string>(), flags.compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ClangKnownFlags) {
+  // Taken from the real examples.
+  std::vector<string> args {
+    "clang++", "-c", "foo.cc",
+    "-Qunused-arguments",
+    "-Waddress",
+    "-nodefaultlibs",
+    "-pie",
+    "-rdynamic",
+    "-nostdlib",
+    "-nostdlib++",
+    "-static",
+    "-dA",
+  };
+
+  GCCFlags flags(args, "/");
+  EXPECT_TRUE(flags.is_successful());
+
+  EXPECT_TRUE(flags.unknown_flags().empty())
+      << "unknown flags="
+      << flags.unknown_flags();
+}
+
+TEST_F(CompilerFlagsTest, Precompiling) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-c");
+  args.push_back("hello.h");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ(GCCFlags::COMPILE, flags.mode());
+  EXPECT_TRUE(flags.is_precompiling_header());
+  ASSERT_EQ(1U, flags.output_files().size());
+  EXPECT_EQ("hello.h.gch", flags.output_files()[0]);
+}
+
+TEST_F(CompilerFlagsTest, PreprocessHeader) {
+  std::vector<string> args;
+  args.push_back("gcc");
+  args.push_back("-E");
+  args.push_back("hello.h");
+  GCCFlags flags(args, "/");
+  EXPECT_EQ(GCCFlags::PREPROCESS, flags.mode());
+  EXPECT_FALSE(flags.is_precompiling_header());
+  EXPECT_EQ(0U, flags.output_files().size());
+}
+
+TEST_F(CompilerFlagsTest, GetFirstLine) {
+  EXPECT_EQ("gcc (Ubuntu 4.4.3-4ubuntu5) 4.4.3",
+            GetFirstLine(
+                "gcc (Ubuntu 4.4.3-4ubuntu5) 4.4.3\n"
+                "Copyright (C) 2009 Free Software Foundation, Inc.\n"));
+}
+
+TEST_F(CompilerFlagsTest, NormalizeGccVersion) {
+  EXPECT_EQ("(Ubuntu 4.4.3-4ubuntu5) 4.4.3",
+            NormalizeGccVersion(
+                "gcc (Ubuntu 4.4.3-4ubuntu5) 4.4.3"));
+  EXPECT_EQ("(Ubuntu 4.4.3-4ubuntu5) 4.4.3",
+            NormalizeGccVersion(
+                "cc (Ubuntu 4.4.3-4ubuntu5) 4.4.3"));
+  EXPECT_EQ("(Ubuntu 4.4.3-4ubuntu5) 4.4.3",
+            NormalizeGccVersion(
+                "g++ (Ubuntu 4.4.3-4ubuntu5) 4.4.3"));
+  EXPECT_EQ("(Ubuntu 4.4.3-4ubuntu5) 4.4.3",
+            NormalizeGccVersion(
+                "c++ (Ubuntu 4.4.3-4ubuntu5) 4.4.3"));
+  EXPECT_EQ("(Native Client SDK [438be0db920e3ca7711844c0218a5db37c747c2b]) "
+            "4.8.1",
+            NormalizeGccVersion(
+                "arm-nacl-gcc (Native Client SDK "
+                "[438be0db920e3ca7711844c0218a5db37c747c2b]) 4.8.1"));
+  EXPECT_EQ("clang version 3.0 (trunk 129729)",
+            NormalizeGccVersion(
+                "clang version 3.0 (trunk 129729)"));
+  EXPECT_EQ("clang++ version 3.0 (trunk 129729)",
+            NormalizeGccVersion(
+                "clang++ version 3.0 (trunk 129729)"));
+}
+
+TEST_F(CompilerFlagsTest, VCFlags) {
+  std::vector<string> args;
+  args.push_back("cl");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_EQ("cl", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  const size_t kNumEnvs = 9;
+  const char** env =
+      static_cast<const char**>(malloc(sizeof(const char*) * kNumEnvs));
+  env[0] = strdup("PATH=C:\\Windows\\System32;C:\\VS9\\Common7\\Tools");
+  env[1] = strdup("VS90COMNTOOLS=C:\\VS9\\Common7\\Tools");
+  env[2] = strdup("VSINSTALLDIR=C:\\VS9");
+  env[3] = strdup("VCINSTALLDIR=C:\\vs9");
+  env[4] = strdup("INCLUDE=C:\\VS9\\VC\\ATLMFC\\INCLUDE;C:\\VS9\\VC\\INCLUDE;"
+                  "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\include;");
+  env[5] = strdup("LIB=C:\\VS9\\VC\\ATLMFC\\LIB;C:\\VS9\\VC\\LIB;"
+                  "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\lib;");
+  env[6] = strdup("LIBPATH=C:\\Windows\\Microsoft.NET\\Framework\\v3.5;"
+                  "C:\\Windows\\Microsoft.NET\\Framework\\v2.0.50727;"
+                  "C:\\VS9\\VC\\ATLMFC\\LIB;C:\\VS9\\VC\\LIB");
+  env[7] = strdup("WindowsSdkDir=C:\\Program Files\\Microsoft SDKs\\Windows\\"
+                  "v7.1\\");
+  env[8] = nullptr;
+
+  std::vector<string> important_env;
+  flags->GetClientImportantEnvs(env, &important_env);
+  EXPECT_EQ(5U, important_env.size()) << important_env;
+
+  for (int i = 0; i < 9; ++i) {
+    if (env[i] != nullptr) {
+      free(const_cast<char*>(env[i]));
+    }
+  }
+  free(env);
+
+  devtools_goma::VCFlags* vc_flags = static_cast<devtools_goma::VCFlags*>(
+      flags.get());
+  std::vector<string> compiler_info_flags;
+  EXPECT_EQ(compiler_info_flags, vc_flags->compiler_info_flags());
+  EXPECT_TRUE(vc_flags->is_cplusplus());
+  EXPECT_FALSE(vc_flags->ignore_stdinc());
+}
+
+TEST_F(CompilerFlagsTest, IsImportantEnvVC) {
+  const struct {
+    const char* env;
+    const bool client_important;
+    const bool server_important;
+  } kTestCases[] {
+    { "INCLUDE=/tmp/1234", true, true },
+    { "LIB=/tmp/1234", true, true },
+    { "MSC_CMD_FLAGS=foo", true, true },
+    { "VCINSTALLDIR=/tmp/to", true, true },
+    { "VSINSTALLDIR=/tmp/to", true, true },
+    { "WindowsSdkDir=/tmp/to", true, true },
+
+    { "PATHEXT=.EXE", true, false },
+    { "SystemDrive=C:", true, false },
+    { "SystemRoot=C:\\Windows", true, false },
+
+    { "LD_PRELOAD=foo.so", false, false },
+    { "ld_preload=foo.so", false, false },
+  };
+
+  std::vector<string> args {
+    "cl", "/c", "hello.cc",
+  };
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+
+  for (const auto& tc : kTestCases) {
+    ASSERT_TRUE(!tc.server_important || tc.client_important);
+    EXPECT_EQ(flags->IsClientImportantEnv(tc.env), tc.client_important)
+        << tc.env;
+    EXPECT_EQ(flags->IsServerImportantEnv(tc.env), tc.server_important)
+        << tc.env;
+  }
+}
+
+TEST_F(CompilerFlagsTest, ChromeWindowsCompileFlag) {
+  // The ridiculously long cl parameters
+  std::vector<string> args;
+  args.push_back("cl");
+  args.push_back("/Od");
+  args.push_back("/I");
+  args.push_back("\"..\\third_party\\WTL\\include\"");
+  args.push_back("/I");
+  args.push_back("\"..\"");
+  args.push_back("/I");
+  args.push_back("\"..\\third_party\\khronos\"");
+  args.push_back("/I");
+  args.push_back(
+      "\"..\\build\\Debug\\obj\\global_intermediate\\chrome_version\"");
+  args.push_back("/I");
+  args.push_back(
+      "\"..\\build\\Debug\\obj\\global_intermediate\\installer_util_strings\"");
+  args.push_back("/I");
+  args.push_back("\"..\\breakpad\\src\"");
+  args.push_back("/I");
+  args.push_back("\"..\\sandbox\\src\"");
+  args.push_back("/I");
+  args.push_back("\"..\\build\\Debug\\obj\\global_intermediate\\policy\"");
+  args.push_back("/I");
+  args.push_back("\"..\\build\\Debug\\obj\\global_intermediate\\protoc_out\"");
+  args.push_back("/I");
+  args.push_back("\"..\\third_party\\directxsdk\\files\\Include\"");
+  args.push_back("/I");
+  args.push_back("\"..\\third_party\\platformsdk_win7\\files\\Include\"");
+  args.push_back("/I");
+  args.push_back("\"C:\\vs08\\\\VC\\atlmfc\\include\"");
+  args.push_back("/D");
+  args.push_back("\"_DEBUG\"");
+  args.push_back("/D");
+  args.push_back("\"_WIN32_WINNT=0x0601\"");
+  args.push_back("/D");
+  args.push_back("\"WIN32\"");
+  args.push_back("/D");
+  args.push_back("\"_WINDOWS\"");
+  args.push_back("/D");
+  args.push_back("\"NOMINMAX\"");
+  args.push_back("/D");
+  args.push_back("\"PSAPI_VERSION=1\"");
+  args.push_back("/D");
+  args.push_back("\"_CRT_RAND_S\"");
+  args.push_back("/D");
+  args.push_back("\"CERT_CHAIN_PARA_HAS_EXTRA_FIELDS\"");
+  args.push_back("/D");
+  args.push_back("\"WIN32_LEAN_AND_MEAN\"");
+  args.push_back("/D");
+  args.push_back("\"_ATL_NO_OPENGL\"");
+  args.push_back("/D");
+  args.push_back("\"_HAS_TR1=0\"");
+  args.push_back("/D");
+  args.push_back("\"_SECURE_ATL\"");
+  args.push_back("/D");
+  args.push_back("\"CHROMIUM_BUILD\"");
+  args.push_back("/D");
+  args.push_back("\"COMPONENT_BUILD\"");
+  args.push_back("/D");
+  args.push_back("\"COMPILE_CONTENT_STATICALLY\"");
+  args.push_back("/D");
+  args.push_back("\"TOOLKIT_VIEWS=1\"");
+  args.push_back("/D");
+  args.push_back("\"ENABLE_REMOTING=1\"");
+  args.push_back("/D");
+  args.push_back("\"ENABLE_P2P_APIS=1\"");
+  args.push_back("/D");
+  args.push_back("\"ENABLE_CONFIGURATION_POLICY\"");
+  args.push_back("/D");
+  args.push_back("\"ENABLE_INPUT_SPEECH\"");
+  args.push_back("/D");
+  args.push_back("\"ENABLE_NOTIFICATIONS\"");
+  args.push_back("/D");
+  args.push_back("\"NO_TCMALLOC\"");
+  args.push_back("/D");
+  args.push_back("\"ENABLE_GPU=1\"");
+  args.push_back("/D");
+  args.push_back("\"ENABLE_EGLIMAGE=1\"");
+  args.push_back("/D");
+  args.push_back("\"USE_SKIA=1\"");
+  args.push_back("/D");
+  args.push_back("\"__STD_C\"");
+  args.push_back("/D");
+  args.push_back("\"_CRT_SECURE_NO_DEPRECATE\"");
+  args.push_back("/D");
+  args.push_back("\"_SCL_SECURE_NO_DEPRECATE\"");
+  args.push_back("/D");
+  args.push_back("\"ENABLE_REGISTER_PROTOCOL_HANDLER=1\"");
+  args.push_back("/D");
+  args.push_back("\"__STDC_FORMAT_MACROS\"");
+  args.push_back("/D");
+  args.push_back("\"DYNAMIC_ANNOTATIONS_ENABLED=1\"");
+  args.push_back("/D");
+  args.push_back("\"WTF_USE_DYNAMIC_ANNOTATIONS=1\"");
+  args.push_back("/D");
+  args.push_back("\"_DEBUG\"");
+  args.push_back("/D");
+  args.push_back("\"_UNICODE\"");
+  args.push_back("/D");
+  args.push_back("\"UNICODE\"");
+  args.push_back("/FD");
+  args.push_back("/EHsc");
+  args.push_back("/RTC1");
+  args.push_back("/MDd");
+  args.push_back("/Gy");
+  args.push_back("/GR-");
+  args.push_back("/Yu\"precompile.h\"");
+  args.push_back("/Fp\"..\\build\\Debug\\obj\\chrome\\chrome.pch\"");
+  args.push_back("/Fo\"..\\build\\Debug\\obj\\chrome\\\\\"");
+  args.push_back("/Fd\"..\\build\\Debug\\obj\\chrome\\chrome\\vc80.pdb\"");
+  args.push_back("/W4");
+  args.push_back("/WX");
+  args.push_back("/nologo");
+  args.push_back("/c");
+  args.push_back("/Zi");
+  args.push_back("/TP");
+  args.push_back("/wd4351");
+  args.push_back("/wd4396");
+  args.push_back("/wd4503");
+  args.push_back("/wd4819");
+  args.push_back("/wd4100");
+  args.push_back("/wd4121");
+  args.push_back("/wd4125");
+  args.push_back("/wd4127");
+  args.push_back("/wd4130");
+  args.push_back("/wd4131");
+  args.push_back("/wd4189");
+  args.push_back("/wd4201");
+  args.push_back("/wd4238");
+  args.push_back("/wd4244");
+  args.push_back("/wd4245");
+  args.push_back("/wd4310");
+  args.push_back("/wd4355");
+  args.push_back("/wd4428");
+  args.push_back("/wd4481");
+  args.push_back("/wd4505");
+  args.push_back("/wd4510");
+  args.push_back("/wd4512");
+  args.push_back("/wd4530");
+  args.push_back("/wd4610");
+  args.push_back("/wd4611");
+  args.push_back("/wd4701");
+  args.push_back("/wd4702");
+  args.push_back("/wd4706");
+  args.push_back("/wd4251");
+  args.push_back("/FI");
+  args.push_back("\"precompile.h\"");
+  args.push_back("/errorReport:prompt");
+  args.push_back("/MP");
+  args.push_back("/we4389");
+  args.push_back("app\\chrome_exe_main_win.cc");
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "d:\\src\\cr9\\src\\chrome"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("..\\build\\Debug\\obj\\chrome\\\\chrome_exe_main_win.obj",
+            flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("app\\chrome_exe_main_win.cc", flags->input_filenames()[0]);
+  EXPECT_EQ("cl", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\src\\cr9\\src\\chrome", flags->cwd());
+
+  devtools_goma::VCFlags* vc_flags = static_cast<devtools_goma::VCFlags*>(
+      flags.get());
+  std::vector<string> compiler_info_flags;
+  compiler_info_flags.push_back("/Od");
+  compiler_info_flags.push_back("/MDd");
+  EXPECT_EQ(compiler_info_flags, vc_flags->compiler_info_flags());
+  EXPECT_TRUE(vc_flags->is_cplusplus());
+  EXPECT_FALSE(vc_flags->ignore_stdinc());
+  EXPECT_TRUE(vc_flags->require_mspdbserv());
+  ASSERT_EQ(12, static_cast<int>(vc_flags->include_dirs().size()));
+  EXPECT_EQ("..\\third_party\\WTL\\include", vc_flags->include_dirs()[0]);
+  EXPECT_EQ("..", vc_flags->include_dirs()[1]);
+  EXPECT_EQ("..\\third_party\\khronos", vc_flags->include_dirs()[2]);
+
+  ASSERT_EQ(35U, vc_flags->commandline_macros().size());
+}
+
+TEST_F(CompilerFlagsTest, SfntlyWindowsCompileFlag) {
+  std::vector<string> args;
+  args.push_back("cl");
+  args.push_back("/nologo");
+  args.push_back("/DWIN32");
+  args.push_back("/D_WINDOWS");
+  args.push_back("/Zm100");
+  args.push_back("/EHsc");
+  args.push_back("/Zi");
+  args.push_back("/W4");
+  args.push_back("/WX");
+  args.push_back("/O2");
+  args.push_back("/Ob2");
+  args.push_back("/Oy");
+  args.push_back("/GF");
+  args.push_back("/Gm-");
+  args.push_back("/GS");
+  args.push_back("/Gy");
+  args.push_back("/fp:precise");
+  args.push_back("/Zc:wchar_t");
+  args.push_back("/Zc:forScope");
+  args.push_back("/await");
+  args.push_back("/constexpr:depth1024");
+  args.push_back("/guard:cf");
+  args.push_back("/guard:cf-");
+  args.push_back("/ZH:SHA_256");
+  args.push_back("/GR-");
+  args.push_back("/MD");
+  args.push_back("/D");
+  args.push_back("NDEBUG");
+  args.push_back("/IC:\\src\\sfntly\\cpp\\src");
+  args.push_back("/IC:\\src\\sfntly\\cpp\\ext\\gtest\\include");
+  args.push_back("/IC:\\src\\sfntly\\cpp\\ext\\gtest");
+  args.push_back("/IC:\\src\\sfntly\\cpp\\src\\sample");
+  args.push_back("/IC:\\src\\sfntly\\cpp\\src\\sample\\subtly");
+  args.push_back("/IC:\\src\\sfntly\\cpp\\ext\\icu\\include");
+  args.push_back("/DSFNTLY_NO_EXCEPTION");
+  args.push_back("/DTIXML_USE_STL");
+  args.push_back("/DSFNTLY_EXPERIMENTAL");
+  args.push_back("/D_UNICODE");
+  args.push_back("/DUNICODE");
+  args.push_back("/TP");
+  args.push_back("/FoCMakeFiles\\sfntly.dir\\src\\sfntly\\font.cc.obj");
+  args.push_back("/FdC:\\src\\sfntly\\cpp\\build\\lib\\sfntly.pdb");
+  args.push_back("/c");
+  args.push_back("C:\\src\\sfntly\\cpp\\src\\sfntly\\font.cc");
+
+  std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "C:\\src\\sfntly\\cpp\\build"));
+
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("CMakeFiles\\sfntly.dir\\src\\sfntly\\font.cc.obj",
+            flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("C:\\src\\sfntly\\cpp\\src\\sfntly\\font.cc",
+            flags->input_filenames()[0]);
+  EXPECT_EQ("cl", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("C:\\src\\sfntly\\cpp\\build", flags->cwd());
+
+  devtools_goma::VCFlags* vc_flags = static_cast<devtools_goma::VCFlags*>(
+      flags.get());
+  std::vector<string> compiler_info_flags;
+  compiler_info_flags.push_back("/O2");
+  compiler_info_flags.push_back("/Ob2");
+  compiler_info_flags.push_back("/Oy");
+  compiler_info_flags.push_back("/MD");
+  EXPECT_EQ(compiler_info_flags, vc_flags->compiler_info_flags());
+  EXPECT_TRUE(vc_flags->is_cplusplus());
+  EXPECT_FALSE(vc_flags->ignore_stdinc());
+  EXPECT_TRUE(vc_flags->require_mspdbserv());
+  ASSERT_EQ(6, static_cast<int>(vc_flags->include_dirs().size()));
+  EXPECT_EQ("C:\\src\\sfntly\\cpp\\src", vc_flags->include_dirs()[0]);
+  EXPECT_EQ("C:\\src\\sfntly\\cpp\\ext\\gtest\\include",
+            vc_flags->include_dirs()[1]);
+  EXPECT_EQ("C:\\src\\sfntly\\cpp\\ext\\icu\\include",
+            vc_flags->include_dirs()[5]);
+  ASSERT_EQ(8U, vc_flags->commandline_macros().size());
+}
+
+TEST_F(CompilerFlagsTest, VCImplicitMacros) {
+  std::vector<string> args;
+
+  // Simple C++ file
+  args.push_back("cl");
+  args.push_back("/nologo");
+  args.push_back("/Zc:forScope");
+  args.push_back("/c");
+  args.push_back("C:\\src\\sfntly\\cpp\\src\\sfntly\\font.cc");
+  std::unique_ptr<CompilerFlags> flags1(
+      CompilerFlags::MustNew(args, "C:\\src\\sfntly\\cpp\\build"));
+  EXPECT_EQ(args, flags1->args());
+  EXPECT_EQ("#define __cplusplus\n", flags1->implicit_macros());
+
+  // Simple C file
+  args.clear();
+  args.push_back("cl");
+  args.push_back("/nologo");
+  args.push_back("/c");
+  args.push_back("C:\\src\\sfntly\\cpp\\src\\sfntly\\font.c");
+  std::unique_ptr<CompilerFlags> flags2(
+      CompilerFlags::MustNew(args, "C:\\src\\sfntly\\cpp\\build"));
+  EXPECT_EQ(args, flags2->args());
+  EXPECT_EQ(0UL, flags2->implicit_macros().length());
+
+  // Full fledge
+  args.clear();
+  args.push_back("cl");
+  args.push_back("/nologo");
+  args.push_back("/D");
+  args.push_back("_DEBUG");
+  args.push_back("/RTC");
+  args.push_back("/MDd");
+  args.push_back("/Zc:wchar_t");
+  args.push_back("/ZI");
+  args.push_back("/c");
+  args.push_back("C:\\src\\sfntly\\cpp\\src\\sfntly\\font.cc");
+  std::unique_ptr<CompilerFlags> flags3(
+      CompilerFlags::MustNew(args, "C:\\src\\sfntly\\cpp\\build"));
+  EXPECT_EQ(args, flags3->args());
+  string macro = flags3->implicit_macros();
+  EXPECT_TRUE(macro.find("__cplusplus") != string::npos);
+  EXPECT_TRUE(macro.find("_VC_NODEFAULTLIB") != string::npos);
+  EXPECT_TRUE(macro.find("__MSVC_RUNTIME_CHECKS") != string::npos);
+  EXPECT_TRUE(macro.find("_NATIVE_WCHAR_T_DEFINED") != string::npos);
+  EXPECT_TRUE(macro.find("_WCHAR_T_DEFINED") != string::npos);
+
+  EXPECT_TRUE(flags3->is_vc());
+  VCFlags* vc_flags = static_cast<VCFlags*>(flags3.get());
+  EXPECT_TRUE(vc_flags->require_mspdbserv());
+}
+
+TEST_F(CompilerFlagsTest, ClangCl) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+}
+
+TEST_F(CompilerFlagsTest, ClangClWithMflag) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-m64");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  expected_compiler_info_flags.push_back("-m64");
+  EXPECT_EQ(expected_compiler_info_flags, flags->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ClangClKnownFlags) {
+  // These -f and -g are known.
+  std::vector<string> args {
+    "clang-cl", "/c", "hello.cc",
+    "-fcolor-diagnostics",
+    "-fno-standalone-debug",
+    "-fstandalone-debug",
+    "-gcolumn-info",
+    "-gline-tables-only",
+    "--analyze",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_TRUE(flags->unknown_flags().empty())
+      << "unknown flags: " << flags->unknown_flags();
+}
+
+TEST_F(CompilerFlagsTest, ClShouldNotRecognizeMflag) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("-m64");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  EXPECT_EQ(expected_compiler_info_flags, flags->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ClangClWithMscVersionflag) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-fmsc-version=1800");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  expected_compiler_info_flags.push_back("-fmsc-version=1800");
+  EXPECT_EQ(expected_compiler_info_flags, flags->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ClangClWithZi) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("/Zi");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+
+  {
+    std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "d:\\tmp"));
+    EXPECT_EQ(args, flags->args());
+    EXPECT_EQ(1U, flags->output_files().size());
+    EXPECT_EQ("hello.obj", flags->output_files()[0]);
+    EXPECT_EQ(1U, flags->input_filenames().size());
+    EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+    EXPECT_TRUE(flags->is_successful());
+    EXPECT_EQ("", flags->fail_message());
+    EXPECT_EQ("clang-cl", flags->compiler_name());
+    EXPECT_FALSE(flags->is_gcc());
+    EXPECT_FALSE(flags->is_javac());
+    EXPECT_TRUE(flags->is_vc());
+    EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+    EXPECT_EQ("d:\\tmp", flags->cwd());
+
+    const VCFlags& vc_flags = static_cast<const VCFlags&>(*flags);
+    EXPECT_FALSE(vc_flags.require_mspdbserv());
+  }
+
+  args[1] = "/ZI";
+  {
+    std::unique_ptr<CompilerFlags> flags(
+      CompilerFlags::MustNew(args, "d:\\tmp"));
+    EXPECT_EQ(args, flags->args());
+    EXPECT_EQ(1U, flags->output_files().size());
+    EXPECT_EQ("hello.obj", flags->output_files()[0]);
+    EXPECT_EQ(1U, flags->input_filenames().size());
+    EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+    EXPECT_TRUE(flags->is_successful());
+    EXPECT_EQ("", flags->fail_message());
+    EXPECT_EQ("clang-cl", flags->compiler_name());
+    EXPECT_FALSE(flags->is_gcc());
+    EXPECT_FALSE(flags->is_javac());
+    EXPECT_TRUE(flags->is_vc());
+    EXPECT_FALSE(flags->is_clang_tidy());
+    EXPECT_FALSE(flags->is_java());
+    EXPECT_EQ("d:\\tmp", flags->cwd());
+
+    const VCFlags& vc_flags = static_cast<const VCFlags&>(*flags);
+    EXPECT_FALSE(vc_flags.require_mspdbserv());
+  }
+}
+
+TEST_F(CompilerFlagsTest, ClangClISystem) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-isystem=c:\\clang-cl\\include");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  ASSERT_EQ(1U, flags->compiler_info_flags().size());
+  EXPECT_EQ("-isystem=c:\\clang-cl\\include", flags->compiler_info_flags()[0]);
+}
+
+TEST_F(CompilerFlagsTest, ClShouldNotRecognizeISystem) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("-isystem=c:\\clang-cl\\include");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  ASSERT_EQ(0U, flags->compiler_info_flags().size());
+}
+
+TEST_F(CompilerFlagsTest, ClangClImsvc) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-imsvcc:\\clang-cl\\include");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  ASSERT_EQ(1U, flags->compiler_info_flags().size());
+  EXPECT_EQ("-imsvcc:\\clang-cl\\include", flags->compiler_info_flags()[0]);
+
+  args[1] = "/imsvcc:\\clang-cl\\include";
+  flags = CompilerFlags::MustNew(args, "d:\\tmp");
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  ASSERT_EQ(1U, flags->compiler_info_flags().size());
+  EXPECT_EQ("/imsvcc:\\clang-cl\\include", flags->compiler_info_flags()[0]);
+}
+
+TEST_F(CompilerFlagsTest, ClangClImsvcWithValueArg) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-imsvc");
+  args.push_back("c:\\clang-cl\\include");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  ASSERT_EQ(2U, flags->compiler_info_flags().size());
+  EXPECT_EQ("-imsvc", flags->compiler_info_flags()[0]);
+  EXPECT_EQ("c:\\clang-cl\\include", flags->compiler_info_flags()[1]);
+
+  args[1] = "/imsvc";
+  flags = CompilerFlags::MustNew(args, "d:\\tmp");
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  ASSERT_EQ(2U, flags->compiler_info_flags().size());
+  EXPECT_EQ("/imsvc", flags->compiler_info_flags()[0]);
+  EXPECT_EQ("c:\\clang-cl\\include", flags->compiler_info_flags()[1]);
+}
+
+TEST_F(CompilerFlagsTest, ClShouldNotRecognizeImsvc) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("-imsvcc:\\clang-cl\\include");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  ASSERT_EQ(0U, flags->compiler_info_flags().size());
+
+  args[1] = "/imsvcc:\\clang-cl\\include";
+  flags = CompilerFlags::MustNew(args, "d:\\tmp");
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  ASSERT_EQ(0U, flags->compiler_info_flags().size());
+}
+
+TEST_F(CompilerFlagsTest, ClShouldNotRecognizeImsvcWithValueArg) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("-imsvc");
+  args.push_back("c:\\clang-cl\\include");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  ASSERT_EQ(0U, flags->compiler_info_flags().size());
+
+  args[1] = "/imsvc";
+  flags = CompilerFlags::MustNew(args, "d:\\tmp");
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  ASSERT_EQ(0U, flags->compiler_info_flags().size());
+}
+
+TEST_F(CompilerFlagsTest, ClShouldNotRecognizeMscVersionflag) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("-fmsc-version=1800");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  EXPECT_EQ(expected_compiler_info_flags, flags->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ClangClWithFsanitize) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-fsanitize=address");
+  args.push_back("-fsanitize=thread");
+  args.push_back("-fsanitize=memory");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  expected_compiler_info_flags.push_back("-fsanitize=address");
+  expected_compiler_info_flags.push_back("-fsanitize=thread");
+  expected_compiler_info_flags.push_back("-fsanitize=memory");
+  EXPECT_EQ(expected_compiler_info_flags, flags->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ClangClWithFsanitizeBlacklist) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-fsanitize-blacklist=blacklist.txt");
+  args.push_back("-fsanitize-blacklist=blacklist2.txt");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  EXPECT_EQ(expected_compiler_info_flags, flags->compiler_info_flags());
+  std::vector<string> expected_optional_input_filenames;
+  expected_optional_input_filenames.push_back("blacklist.txt");
+  expected_optional_input_filenames.push_back("blacklist2.txt");
+  EXPECT_EQ(expected_optional_input_filenames,
+            flags->optional_input_filenames());
+}
+
+TEST_F(CompilerFlagsTest, ClangClWithFsanitizeAndBlacklist) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-fsanitize=address");
+  args.push_back("-fsanitize-blacklist=blacklist.txt");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  expected_compiler_info_flags.push_back("-fsanitize=address");
+  EXPECT_EQ(expected_compiler_info_flags, flags->compiler_info_flags());
+  std::vector<string> expected_optional_input_filenames;
+  expected_optional_input_filenames.push_back("blacklist.txt");
+  EXPECT_EQ(expected_optional_input_filenames,
+            flags->optional_input_filenames());
+}
+
+TEST_F(CompilerFlagsTest, ClangClWithFNoSanitizeBlacklist) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-fno-sanitize-blacklist");
+  args.push_back("-fsanitize-blacklist=blacklist.txt");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_optional_input_filenames;
+  EXPECT_EQ(expected_optional_input_filenames,
+            flags->optional_input_filenames());
+}
+
+TEST_F(CompilerFlagsTest, ClShouldNotRecognizeAnyFsanitize) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("-fsanitize=address");
+  args.push_back("-fsanitize-blacklist=blacklist.txt");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  EXPECT_EQ(expected_compiler_info_flags, flags->compiler_info_flags());
+  std::vector<string> expected_optional_input_filenames;
+  EXPECT_EQ(expected_optional_input_filenames,
+            flags->optional_input_filenames());
+}
+
+TEST_F(CompilerFlagsTest, ClangClWithMllvm) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-mllvm");
+  args.push_back("-regalloc=pbqp");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  expected_compiler_info_flags.push_back("-mllvm");
+  expected_compiler_info_flags.push_back("-regalloc=pbqp");
+  EXPECT_EQ(expected_compiler_info_flags,
+            flags->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ClShouldNotRecognizeMllvm) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("-mllvm");
+  args.push_back("-regalloc=pbqp");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  EXPECT_EQ(expected_compiler_info_flags, flags->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ArchShouldBeRecognizedByClAndClangCl) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("/arch:AVX2");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+
+  std::vector<string> expected_compiler_info_flags;
+  expected_compiler_info_flags.push_back("/arch:AVX2");
+
+  // check cl.exe.
+  args[0] = "cl.exe";
+  std::unique_ptr<CompilerFlags> flags_cl(
+      CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags_cl->args());
+  EXPECT_EQ(expected_compiler_info_flags, flags_cl->compiler_info_flags());
+
+  // check clang-cl.
+  args[0] = "clang-cl.exe";
+  std::unique_ptr<CompilerFlags> flags_clang(
+      CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags_clang->args());
+  EXPECT_EQ(expected_compiler_info_flags,
+            flags_clang->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ClangClWithXclang) {
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("-Xclang");
+  args.push_back("-add-plugin");
+  args.push_back("-Xclang");
+  args.push_back("find-bad-constructs");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  expected_compiler_info_flags.push_back("-Xclang");
+  expected_compiler_info_flags.push_back("-add-plugin");
+  expected_compiler_info_flags.push_back("-Xclang");
+  expected_compiler_info_flags.push_back("find-bad-constructs");
+  EXPECT_EQ(expected_compiler_info_flags,
+            flags->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, ClShouldNotRecognizeXclang) {
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("-Xclang");
+  args.push_back("-add-plugin");
+  args.push_back("-Xclang");
+  args.push_back("find-bad-constructs");
+  args.push_back("/c");
+  args.push_back("hello.cc");
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("hello.obj", flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("hello.cc", flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("cl.exe", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+
+  std::vector<string> expected_compiler_info_flags;
+  EXPECT_EQ(expected_compiler_info_flags, flags->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, CrWinClangCompileFlag) {
+  // b/18742923
+  std::vector<string> args;
+  args.push_back("clang-cl.exe");
+  args.push_back("/FC");
+  args.push_back("-DV8_DEPRECATION_WARNINGS");
+  args.push_back("-D_WIN32_WINNT=0x0603");
+  args.push_back("-DWINVER=0x0603");
+  args.push_back("-DWIN32");
+  // snip more -D
+  args.push_back("-Igen");
+  args.push_back("-I..\\..\\third_party\\wtl\\include");
+  // snip more -I
+  args.push_back("/wd4127");
+  // snip more /wd
+  args.push_back("/O2");
+  args.push_back("/Ob2");
+  args.push_back("/GF");
+  args.push_back("/Oy-");
+  args.push_back("/fp:precise");
+  args.push_back("/W3");
+  args.push_back("/GR-");
+  args.push_back("/Gy");
+  args.push_back("/GS");
+  args.push_back("/MT");
+  args.push_back("-fmsc-version=1800");
+  args.push_back("/fallback");
+  args.push_back("/FIIntrin.h");
+  args.push_back("-Wno-c++11-compat-deprecated-writable-strings");
+  // snip more -W
+  args.push_back("-fsanitize=address");
+  args.push_back("/d2Zi+");
+  args.push_back("/d2FastFail");
+  args.push_back("/d2cgsummary");
+  args.push_back("/Zc:inline");
+  args.push_back("/Oy-");
+  args.push_back("/FS");
+  args.push_back("/TP");
+  args.push_back("/c");
+  args.push_back("/Foobj\\testing\\gtest.multiprocess_func_list.obj");
+  args.push_back("/Fdobj\\testing\\gtest.cc.pdb");
+  args.push_back("-Qunused-arguments");
+  args.push_back("..\\..\\testing\\multiprocess_func_list.cc");
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "d:\\tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("obj\\testing\\gtest.multiprocess_func_list.obj",
+            flags->output_files()[0]);
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("..\\..\\testing\\multiprocess_func_list.cc",
+            flags->input_filenames()[0]);
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-cl", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_TRUE(flags->is_vc());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("d:\\tmp", flags->cwd());
+}
+
+TEST_F(CompilerFlagsTest, ClangTidyFlag) {
+  const std::vector<string> args {
+    "clang-tidy",
+    "-analyze-temporary-drots",
+    "-checks=*",
+    "-config={}",
+    "-dump-config",
+    "-enable-check-profile",
+    "-explain-config",
+    "-export-fixes=ex.yaml",
+    "-extra-arg=-std=c++11",
+    "-extra-arg-before=-DFOO",
+    "-fix",
+    "-fix-errors",
+    "-header-filter=*",
+    "-line-filter=[]",
+    "-list-checks",
+    "-p=.",
+    "-system-headers",
+    "-warnings-as-errors=*",
+    "foo.cc",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "/tmp"));
+  EXPECT_EQ(args, flags->args());
+
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("ex.yaml", flags->output_files()[0]);
+
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ(file::JoinPath("/tmp", "foo.cc"), flags->input_filenames()[0]);
+
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-tidy", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_vc());
+  EXPECT_TRUE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/tmp", flags->cwd());
+
+  const ClangTidyFlags& clang_tidy_flags =
+      static_cast<const ClangTidyFlags&>(*flags);
+  EXPECT_EQ(std::vector<string> { "-std=c++11" },
+            clang_tidy_flags.extra_arg());
+  EXPECT_EQ(std::vector<string> { "-DFOO" },
+            clang_tidy_flags.extra_arg_before());
+  EXPECT_FALSE(clang_tidy_flags.seen_hyphen_hyphen());
+  EXPECT_EQ(std::vector<string> {},
+            clang_tidy_flags.args_after_hyphen_hyphen());
+}
+
+TEST_F(CompilerFlagsTest, ClangTidyFlagWithClangArgs) {
+  const std::vector<string> args {
+    "clang-tidy",
+    "-analyze-temporary-drots",
+    "-checks=*",
+    "-config={}",
+    "-dump-config",
+    "-enable-check-profile",
+    "-explain-config",
+    "-export-fixes=ex.yaml",
+    "-extra-arg=-std=c++11",
+    "-extra-arg-before=-DFOO",
+    "-fix",
+    "-fix-errors",
+    "-header-filter=*",
+    "-line-filter=[]",
+    "-list-checks",
+    "-p=.",
+    "-system-headers",
+    "-warnings-as-errors=*",
+    "foo.cc",
+    "--",
+    "-DBAR",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "/tmp"));
+  EXPECT_EQ(args, flags->args());
+
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("ex.yaml", flags->output_files()[0]);
+
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ(file::JoinPath("/tmp", "foo.cc"), flags->input_filenames()[0]);
+
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-tidy", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_vc());
+  EXPECT_TRUE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/tmp", flags->cwd());
+
+  const ClangTidyFlags& clang_tidy_flags =
+      static_cast<const ClangTidyFlags&>(*flags);
+  EXPECT_EQ(std::vector<string> { "-std=c++11" },
+            clang_tidy_flags.extra_arg());
+  EXPECT_EQ(std::vector<string> { "-DFOO" },
+            clang_tidy_flags.extra_arg_before());
+  EXPECT_TRUE(clang_tidy_flags.seen_hyphen_hyphen());
+  EXPECT_EQ(std::vector<string> { "-DBAR" },
+            clang_tidy_flags.args_after_hyphen_hyphen());
+}
+
+TEST_F(CompilerFlagsTest, ClangTidyFlagWithClangArgsEndingWithHyphenHyphen) {
+  const std::vector<string> args {
+    "clang-tidy",
+    "-analyze-temporary-drots",
+    "-checks=*",
+    "-config={}",
+    "-dump-config",
+    "-enable-check-profile",
+    "-explain-config",
+    "-export-fixes=ex.yaml",
+    "-extra-arg=-std=c++11",
+    "-extra-arg-before=-DFOO",
+    "-fix",
+    "-fix-errors",
+    "-header-filter=*",
+    "-line-filter=[]",
+    "-list-checks",
+    "-p=.",
+    "-system-headers",
+    "-warnings-as-errors=*",
+    "foo.cc",
+    "--",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "/tmp"));
+  EXPECT_EQ(args, flags->args());
+
+  EXPECT_EQ(1U, flags->output_files().size());
+  EXPECT_EQ("ex.yaml", flags->output_files()[0]);
+
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ(file::JoinPath("/tmp", "foo.cc"), flags->input_filenames()[0]);
+
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang-tidy", flags->compiler_name());
+  EXPECT_FALSE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_vc());
+  EXPECT_TRUE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_EQ("/tmp", flags->cwd());
+
+  const ClangTidyFlags& clang_tidy_flags =
+      static_cast<const ClangTidyFlags&>(*flags);
+  EXPECT_EQ(std::vector<string> { "-std=c++11" },
+            clang_tidy_flags.extra_arg());
+  EXPECT_EQ(std::vector<string> { "-DFOO" },
+            clang_tidy_flags.extra_arg_before());
+  EXPECT_TRUE(clang_tidy_flags.seen_hyphen_hyphen());
+  EXPECT_TRUE(clang_tidy_flags.args_after_hyphen_hyphen().empty());
+}
+
+TEST_F(CompilerFlagsTest, bazel) {
+  // excerpt from https://plus.google.com/113459563087243716523/posts/Vu3hiHmfhE4
+  const std::vector<string> args {
+    "clang",
+    "-DCOMPILER_GCC3",
+    "-g0",
+    "-Os",
+    "-g0",
+    "-std=gnu++11",
+    "-stdlib=libc++",
+    "-MD",
+    "-MF", "bazel-out/path/to/foo.d",
+    "-frandom-seed=bazel-out/path/to/foo.o",
+    "-iquote", ".",
+    "-iquote", "bazel-out/path/to/include",
+    "-isystem", "path/to/include",
+    "-isystem", "another/path/to/include",
+    "-Ipath/to/include",
+    "-no-canonical-prefixes",
+    "-pthread",
+    "-c",
+    "path/to/foo.cc",
+    "-o", "path/to/foo.o",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "/tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(2U, flags->output_files().size());
+  ExpectHasElement(flags->output_files(), "path/to/foo.o");
+  ExpectHasElement(flags->output_files(), "bazel-out/path/to/foo.d");
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("path/to/foo.cc", flags->input_filenames()[0]);
+  EXPECT_EQ("clang", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  const std::vector<string> compiler_info_flags {
+    "-Os",
+    "-std=gnu++11",
+    "-stdlib=libc++",
+    "-frandom-seed=bazel-out/path/to/foo.o",
+    "-iquote", ".",
+    "-iquote", "bazel-out/path/to/include",
+    "-isystem", "path/to/include",
+    "-isystem", "another/path/to/include",
+    "-no-canonical-prefixes",
+    "-pthread",
+  };
+  EXPECT_EQ(compiler_info_flags, gcc_flags->compiler_info_flags());
+}
+
+TEST_F(CompilerFlagsTest, NoCanonicalPrefixes) {
+  const std::vector<string> args {
+    "clang", "-c", "-no-canonical-prefixes", "path/to/foo.cc",
+    "-o", "path/to/foo.o",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "/tmp"));
+  EXPECT_EQ(args, flags->args());
+  EXPECT_EQ(1U, flags->output_files().size());
+  ExpectHasElement(flags->output_files(), "path/to/foo.o");
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("path/to/foo.cc", flags->input_filenames()[0]);
+  EXPECT_EQ("clang", flags->compiler_base_name());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang", flags->compiler_name());
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  const std::vector<string> compiler_info_flags {
+    "-no-canonical-prefixes",
+  };
+  EXPECT_EQ(compiler_info_flags, gcc_flags->compiler_info_flags());
+}
+
+// <path> in -fprofile-sample-use=<path> must be considered as input.
+// Set the value as optional input.
+TEST_F(CompilerFlagsTest, FProfileSampleUse) {
+  const std::vector<string> args {
+    "clang", "-fprofile-sample-use=path/to/prof.prof",
+    "-c", "path/to/foo.c",
+    "-o", "path/to/foo.o",
+  };
+
+  std::unique_ptr<CompilerFlags> flags(CompilerFlags::MustNew(args, "/tmp"));
+  EXPECT_EQ(args, flags->args());
+
+  EXPECT_TRUE(flags->is_gcc());
+  EXPECT_FALSE(flags->is_javac());
+  EXPECT_FALSE(flags->is_clang_tidy());
+  EXPECT_FALSE(flags->is_java());
+  EXPECT_TRUE(flags->is_successful());
+  EXPECT_EQ("", flags->fail_message());
+  EXPECT_EQ("clang", flags->compiler_base_name());
+  EXPECT_EQ("clang", flags->compiler_name());
+
+  EXPECT_EQ(1U, flags->input_filenames().size());
+  EXPECT_EQ("path/to/foo.c", flags->input_filenames()[0]);
+
+  EXPECT_EQ(1U, flags->optional_input_filenames().size());
+  EXPECT_EQ("path/to/prof.prof", flags->optional_input_filenames()[0]);
+
+  EXPECT_EQ(1U, flags->output_files().size());
+  ExpectHasElement(flags->output_files(), "path/to/foo.o");
+
+  // -fprofile-sample-use does not affect CompilerInfo key.
+  devtools_goma::GCCFlags* gcc_flags = static_cast<devtools_goma::GCCFlags*>(
+      flags.get());
+  EXPECT_TRUE(gcc_flags->compiler_info_flags().empty());
+}
+
+}  // namespace devtools_goma
diff --git a/lib/compress_util.cc b/lib/compress_util.cc
new file mode 100644
index 0000000..9fdb122
--- /dev/null
+++ b/lib/compress_util.cc
@@ -0,0 +1,206 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "compress_util.h"
+
+#include <string.h>
+
+#include "glog/logging.h"
+#include "string_piece.h"
+
+namespace {
+
+#ifdef ENABLE_LZMA
+const size_t kDefaultLZMAOutputBufSize = 65536;
+#endif
+
+}  // namespace
+
+namespace devtools_goma {
+
+const char* const kEncodingNames[NUM_ENCODINGS] = {
+  "no encoding",
+  "deflate",
+  "lzma2",
+};
+
+const char* GetEncodingName(EncodingType type) {
+  DCHECK_GE(type, NO_ENCODING);
+  DCHECK_LT(type, NUM_ENCODINGS);
+  return kEncodingNames[type];
+}
+
+EncodingType GetEncodingFromHeader(const char* header) {
+  if (!header) {
+    return NO_ENCODING;
+  }
+  if (strstr(header, "lzma2")) {
+    return ENCODING_LZMA2;
+  }
+  if (strstr(header, "deflate")) {
+    return ENCODING_DEFLATE;
+  }
+  return NO_ENCODING;
+}
+
+#ifdef ENABLE_LZMA
+bool ReadAllLZMAStream(absl::string_view input, lzma_stream* lzma,
+                       string* output) {
+  lzma->next_in = reinterpret_cast<const uint8_t*>(input.data());
+  lzma->avail_in = input.size();
+  char buf[4096];
+  lzma->next_out = reinterpret_cast<uint8_t*>(buf);
+  lzma->avail_out = sizeof(buf);
+  bool is_success = true;
+  for (;;) {
+    lzma_ret r = lzma_code(lzma, LZMA_FINISH);
+    output->append(buf, sizeof(buf) - lzma->avail_out);
+    if (r == LZMA_OK) {
+      lzma->next_out = reinterpret_cast<uint8_t*>(buf);
+      lzma->avail_out = sizeof(buf);
+    } else {
+      if (LZMA_STREAM_END != r) {
+        LOG(DFATAL) << r;
+        is_success = false;
+        break;
+      }
+      break;
+    }
+  }
+  lzma_end(lzma);
+  return is_success;
+}
+
+LZMAInputStream::LZMAInputStream(ZeroCopyInputStream* sub_stream)
+    : sub_stream_(sub_stream),
+      lzma_context_(LZMA_STREAM_INIT), lzma_error_(LZMA_OK),
+      byte_count_(0) {
+  lzma_context_.next_in = nullptr;
+  lzma_context_.avail_in = 0;
+
+  output_buffer_size_ = kDefaultLZMAOutputBufSize;
+  output_buffer_.reset(new uint8_t[output_buffer_size_]);
+  lzma_context_.next_out = output_buffer_.get();
+  lzma_context_.avail_out = output_buffer_size_;
+  output_position_ = output_buffer_.get();
+}
+
+LZMAInputStream::~LZMAInputStream() {
+  lzma_end(&lzma_context_);
+}
+
+lzma_ret LZMAInputStream::Decode() {
+  if (lzma_error_ == LZMA_OK && lzma_context_.avail_out == 0) {
+    // previous decode filled buffer. don't change input params yet.
+  } else if (lzma_context_.avail_in == 0) {
+    const void* in;
+    int in_size;
+    bool first = lzma_context_.next_in == nullptr;
+    bool ok = sub_stream_->Next(&in, &in_size);
+    if (!ok) {
+      lzma_context_.next_out = nullptr;
+      lzma_context_.avail_out = 0;
+      return LZMA_STREAM_END;
+    }
+    lzma_context_.next_in = reinterpret_cast<const uint8_t*>(in);
+    lzma_context_.avail_in = in_size;
+    if (first) {
+      lzma_ret error = lzma_stream_decoder(&lzma_context_,
+                                           lzma_easy_decoder_memusage(9),
+                                           0);
+      if (error != LZMA_OK) {
+        return error;
+      }
+    }
+  }
+  lzma_context_.next_out = reinterpret_cast<uint8_t*>(output_buffer_.get());
+  lzma_context_.avail_out = output_buffer_size_;
+  output_position_ = output_buffer_.get();
+  return lzma_code(&lzma_context_, LZMA_RUN);
+}
+
+void LZMAInputStream::DoNextOutput(const void** data, int* size) {
+  *data = output_position_;
+  *size = lzma_context_.next_out - output_position_;
+  output_position_ = lzma_context_.next_out;
+}
+
+bool LZMAInputStream::Next(const void** data, int* size) {
+  bool ok = ((lzma_error_ == LZMA_OK) || (lzma_error_ == LZMA_STREAM_END) ||
+             (lzma_error_ == LZMA_BUF_ERROR));
+  if (!ok || (lzma_context_.next_out == nullptr)) {
+    return false;
+  }
+  if (lzma_context_.next_out != output_position_) {
+    DoNextOutput(data, size);
+    return true;
+  }
+  if (lzma_error_ == LZMA_STREAM_END) {
+    if (lzma_context_.next_out == nullptr) {
+      *data = nullptr;
+      *size = 0;
+      return false;
+    } else {
+      // TODO: consider to use lzma's concatenated stream support?
+      // sub_stream_ may have concatenated streams to follow.
+      lzma_end(&lzma_context_);
+      byte_count_ += lzma_context_.total_out;
+      lzma_error_ = lzma_stream_decoder(&lzma_context_,
+                                        lzma_easy_decoder_memusage(9),
+                                        0);
+      if (lzma_error_ != LZMA_OK) {
+        return false;
+      }
+    }
+  }
+  lzma_error_ = Decode();
+  if (lzma_error_ == LZMA_STREAM_END && lzma_context_.next_out == nullptr) {
+    // The underlying stream's Next returned false inside Decode.
+    return false;
+  }
+  ok = ((lzma_error_ == LZMA_OK) || (lzma_error_ == LZMA_STREAM_END) ||
+        (lzma_error_ == LZMA_BUF_ERROR));
+  if (!ok) {
+    return false;
+  }
+  DoNextOutput(data, size);
+  return true;
+}
+
+void LZMAInputStream::BackUp(int count)  {
+  output_position_ -= count;
+  CHECK(output_position_ > output_buffer_.get());
+}
+
+bool LZMAInputStream::Skip(int count) {
+  const void* data;
+  int size;
+  bool ok = false;
+  while ((ok = Next(&data, &size)) && (size < count)) {
+    count -= size;
+  }
+  if (ok && (size > count)) {
+    BackUp(size - count);
+  }
+  return ok;
+}
+
+int64 LZMAInputStream::ByteCount() const {
+  int ret = byte_count_ + lzma_context_.total_out;
+  if (lzma_context_.next_out != nullptr && output_position_ != nullptr) {
+    // GzipInputStream adds followings but I think we need to remove.
+    //
+    // Followings won't be 0 if BackUp is called.  In such a case,
+    // total_out contains the bytes it is pushed back by BackUp.
+    ret -= reinterpret_cast<uintptr_t>(lzma_context_.next_out) -
+        reinterpret_cast<uintptr_t>(output_position_);
+  }
+  return ret;
+}
+
+#endif
+
+}  // namespace devtools_goma
diff --git a/lib/compress_util.h b/lib/compress_util.h
new file mode 100644
index 0000000..4a6e2f0
--- /dev/null
+++ b/lib/compress_util.h
@@ -0,0 +1,103 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_COMPRESS_UTIL_H_
+#define DEVTOOLS_GOMA_LIB_COMPRESS_UTIL_H_
+
+#include <memory>
+#include <string>
+
+
+#include "string_piece.h"
+#ifdef ENABLE_LZMA
+# ifdef _WIN32
+#  define LZMA_API_STATIC
+# endif  // _WIN32
+#include "google/protobuf/io/zero_copy_stream.h"
+#include "google/protobuf/message.h"
+#include "lzma.h"
+#endif  // ENABLE_LZMA
+
+namespace devtools_goma {
+
+
+#ifdef ENABLE_LZMA
+using google::protobuf::io::ZeroCopyInputStream;
+using google::int64;
+#endif
+using std::string;
+
+enum EncodingType {
+  NO_ENCODING = 0,
+  ENCODING_DEFLATE,
+  ENCODING_LZMA2,
+  NUM_ENCODINGS
+};
+
+const char* GetEncodingName(EncodingType type);
+
+// Gets encoding type from |header|. If multiple encodings are found,
+// this function returns the preferred one.
+EncodingType GetEncodingFromHeader(const char* header);
+
+#ifdef ENABLE_LZMA
+bool ReadAllLZMAStream(absl::string_view input, lzma_stream* lzma,
+                       string* output);
+
+class LZMAInputStream : public ZeroCopyInputStream {
+ public:
+  explicit LZMAInputStream(ZeroCopyInputStream* sub_stream);
+  ~LZMAInputStream() override;
+
+  LZMAInputStream(LZMAInputStream&&) = delete;
+  LZMAInputStream(const LZMAInputStream&) = delete;
+  LZMAInputStream& operator=(const LZMAInputStream&) = delete;
+  LZMAInputStream& operator=(LZMAInputStream&&) = delete;
+
+  // implements ZeroCopyInputStream ---
+  bool Next(const void** data, int* size) override;
+  void BackUp(int count) override;
+  bool Skip(int size) override;
+  int64 ByteCount() const override;
+
+ private:
+  lzma_ret Decode();
+  void DoNextOutput(const void** data, int* size);
+
+  ZeroCopyInputStream* sub_stream_;
+
+  // This code use lzma_context_ and lzma_error_ like GzipInputStream
+  // in protobuf library.
+  // lzma_context_ is used like zcontext_ in GzipInputStream, and
+  // lzma_error_ is used like zerror_ in GzipInputStream.
+  // lzma_context_ has following members:
+  // - next_out: an address to be written by lzma_code in the next time.
+  // - avail_out: size of next_out lzma_code can write in the next time.
+  // - next_in: an address to be read by lzma_code in the next time.
+  // - avail_in: size of next_in lzma_code can read in the next time.
+  // Note that when lzma_code is called, above values are updated for the next
+  // lzma_code. i.e. next_out increase and avail_out decrease with the size
+  // of decompressed data.
+  //
+  // output_buffer_ is a buffer dynamically allocated for writing uncompressed
+  // data.  output_buffer_size_ represents its size.
+  // TODO: allow to change buffer size.
+  //
+  // output_position_ is a cursor in output_buffer_ to be read with Next method.
+  // If BackUp is called, output_position_ decrease.
+  lzma_stream lzma_context_;
+  lzma_ret lzma_error_;
+  std::unique_ptr<uint8_t[]> output_buffer_;
+  size_t output_buffer_size_;
+  uint8_t* output_position_;
+  int64 byte_count_;
+};
+
+#endif
+
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_COMPRESS_UTIL_H_
diff --git a/lib/compress_util_unittest.cc b/lib/compress_util_unittest.cc
new file mode 100644
index 0000000..05b3cb2
--- /dev/null
+++ b/lib/compress_util_unittest.cc
@@ -0,0 +1,124 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "compress_util.h"
+
+#include <vector>
+
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+#include "string_piece.h"
+#ifdef ENABLE_LZMA
+# include "google/protobuf/io/zero_copy_stream_impl.h"
+# include "prototmp/goma_log.pb.h"
+using google::protobuf::io::ArrayInputStream;
+using google::protobuf::io::ConcatenatingInputStream;
+#endif  // ENABLE_LZMA
+using std::string;
+
+namespace devtools_goma {
+
+TEST(CompressUtilTest, GetEncodingFromHeader) {
+  EXPECT_EQ(ENCODING_DEFLATE, GetEncodingFromHeader("deflate"));
+  EXPECT_EQ(ENCODING_LZMA2, GetEncodingFromHeader("lzma2"));
+  EXPECT_EQ(ENCODING_LZMA2, GetEncodingFromHeader("deflate,lzma2"));
+  EXPECT_EQ(NO_ENCODING, GetEncodingFromHeader(""));
+  EXPECT_EQ(NO_ENCODING, GetEncodingFromHeader(nullptr));
+}
+
+#ifdef ENABLE_LZMA
+// Creates a compressible string.
+static string MakeCompressibleTestString() {
+  std::ostringstream ss;
+  static const int kNumberOfSubStrings = 10000;
+  for (int i = 0; i < kNumberOfSubStrings; ++i) {
+    ss << i << " ";
+  }
+  return ss.str();
+}
+
+class LZMATest : public testing::Test {
+ protected:
+  void Compress(const string& input, uint32_t preset, lzma_check check,
+                string* output) {
+    lzma_stream lzma = LZMA_STREAM_INIT;
+    ASSERT_EQ(LZMA_OK, lzma_easy_encoder(&lzma, preset, check));
+    ReadAllLZMAStream(input, &lzma, output);
+    LOG(INFO) << "Compressed: " << input.size() << " => " << output->size()
+              << " with preset=" << preset
+              << " check=" << check;
+  }
+
+  void Uncompress(const string& input, string* output) {
+    lzma_stream lzma = LZMA_STREAM_INIT;
+    ASSERT_EQ(LZMA_OK,
+              lzma_stream_decoder(&lzma, lzma_easy_decoder_memusage(9), 0));
+    ReadAllLZMAStream(input, &lzma, output);
+  }
+
+  // Compresses the input string, uncompresses the output, and checks
+  // the original string is recovered.
+  void RunTest(const string& original_string,
+               uint32_t preset, lzma_check check) {
+    string compressed_string;
+    Compress(original_string, preset, check, &compressed_string);
+    string uncompressed_string;
+    Uncompress(compressed_string, &uncompressed_string);
+    EXPECT_EQ(original_string, uncompressed_string);
+  }
+
+  void ConvertToCompressed(const devtools_goma::ExecLog& elog, string* out) {
+    string pbuf;
+    elog.SerializeToString(&pbuf);
+    LOG(INFO) << "orig size=" << pbuf.size();
+    Compress(pbuf, 9, LZMA_CHECK_CRC64, out);
+  }
+};
+
+TEST_F(LZMATest, CompressAndDecompress) {
+  RunTest(MakeCompressibleTestString(), 6, LZMA_CHECK_CRC64);
+  RunTest(MakeCompressibleTestString(), 9, LZMA_CHECK_NONE);
+  RunTest(MakeCompressibleTestString(), 1, LZMA_CHECK_SHA256);
+}
+
+TEST_F(LZMATest, LZMAInputStreamTestSimple) {
+  devtools_goma::ExecLog elog;
+  elog.set_username("goma-user");
+  string compressed;
+  ConvertToCompressed(elog, &compressed);
+
+  ArrayInputStream input(&compressed[0], compressed.size());
+  LZMAInputStream lzma_input(&input);
+  devtools_goma::ExecLog alog;
+  EXPECT_TRUE(alog.ParseFromZeroCopyStream(&lzma_input));
+  EXPECT_EQ(alog.username(), "goma-user");
+}
+
+TEST_F(LZMATest, LZMAInputStreamTestChunked) {
+  devtools_goma::ExecLog elog;
+  elog.set_username("goma-user");
+  string compressed;
+  ConvertToCompressed(elog, &compressed);
+
+  string former = compressed.substr(0, compressed.size() / 2);
+  string latter = compressed.substr(compressed.size() / 2);
+  std::vector<ZeroCopyInputStream*> inputs;
+  inputs.push_back(new ArrayInputStream(&former[0], former.size()));
+  inputs.push_back(new ArrayInputStream(&latter[0], latter.size()));
+  ConcatenatingInputStream concatenated(&inputs[0], inputs.size());
+  LZMAInputStream lzma_input(&concatenated);
+  devtools_goma::ExecLog alog;
+  EXPECT_TRUE(alog.ParseFromZeroCopyStream(&lzma_input));
+  LOG(INFO) << "lzma_input2. byte count: " << lzma_input.ByteCount();
+  EXPECT_EQ(alog.username(), "goma-user");
+  for (auto* input : inputs) {
+    delete input;
+  }
+}
+
+#endif
+
+}  // namespace devtools_goma
diff --git a/lib/execreq_normalizer.cc b/lib/execreq_normalizer.cc
new file mode 100644
index 0000000..a90197b
--- /dev/null
+++ b/lib/execreq_normalizer.cc
@@ -0,0 +1,619 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "execreq_normalizer.h"
+
+#include <utility>
+#include <vector>
+
+#include "compiler_flags.h"
+#include "glog/logging.h"
+#include "glog/stl_logging.h"
+#include "join.h"
+#include "path.h"
+#include "path_resolver.h"
+#include "path_util.h"
+#include "string_piece_utils.h"
+using ::google::protobuf::RepeatedPtrField;
+using ::strings::StrCat;
+
+namespace {
+
+class FixPath : public FlagParser::Callback {
+ public:
+  explicit FixPath(const string& cwd) : cwd_(cwd), is_fixed_(false) {}
+  string ParseFlagValue(const FlagParser::Flag& flag,
+                        const string& value) override {
+    string normalized_path =
+        devtools_goma::PathResolver::WeakRelativePath(value, cwd_);
+    if (normalized_path != value) {
+      is_fixed_ = true;
+    }
+    return normalized_path;
+  }
+
+  bool is_fixed() const { return is_fixed_; }
+
+ private:
+  const string cwd_;
+  bool is_fixed_;
+};
+
+class RewritePath : public FlagParser::Callback {
+ public:
+  explicit RewritePath(const std::map<string, string>& debug_prefix_map)
+      : debug_prefix_map_(debug_prefix_map),
+        is_rewritten_(false), removed_fdebug_prefix_map_(false) {}
+
+  string ParseFlagValue(const FlagParser::Flag& flag,
+                        const string& value) override {
+    // TODO: need to support Windows?
+    if (!devtools_goma::IsPosixAbsolutePath(value)) {
+      return value;
+    }
+    // RewritePath is used for normalizing paths.
+    // We MUST eliminate anything in debug-prefix-map.
+    if (flag.name() == "fdebug-prefix-map") {
+      removed_fdebug_prefix_map_ = true;
+      return "";
+    }
+
+    string path = value;
+    if (devtools_goma::RewritePathWithDebugPrefixMap(
+          debug_prefix_map_, &path)) {
+      is_rewritten_ = true;
+      return path;
+    }
+    return value;
+  }
+
+  bool is_rewritten() const { return is_rewritten_; }
+  bool removed_fdebug_prefix_map() const { return removed_fdebug_prefix_map_; }
+
+ private:
+  const std::map<string, string>& debug_prefix_map_;
+  bool is_rewritten_;
+  bool removed_fdebug_prefix_map_;
+};
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+bool RewritePathWithDebugPrefixMap(
+    const std::map<string, string>& debug_prefix_map,
+    string* path) {
+  if (path->empty()) {
+    return false;
+  }
+
+  // See CGDebugInfo::remapDIPath
+  // https://clang.llvm.org/doxygen/CGDebugInfo_8cpp_source.html
+  for (const auto& iter : debug_prefix_map) {
+    if (strings::StartsWith(*path, iter.first)) {
+      *path = file::JoinPath(iter.second, path->substr(iter.first.length()));
+      return true;
+    }
+  }
+  return false;
+}
+
+// We say debug prefix map is ambiguous when the application order of debug
+// prefix map can change the final result.
+// For example:
+//   Suppose we have the following debug prefix maps:
+//     /A = /X    (1)
+//     /A/B = /Y  (2)
+//   and we want to rewrite /A/B/C.
+//   /A/B/C is written to /X/B/C with (1), but is also written to /Y/C with (2).
+// When such a case happens, we say debug prefix map is ambiguous.
+//
+// In clang and gcc, only first matched rule is used to rewrite path.
+// https://clang.llvm.org/doxygen/CGDebugInfo_8cpp_source.html
+// (CGDebugInfo::remapDIPath)
+// https://github.com/gcc-mirror/gcc/blob/460902cc8ac206904e7f1763f197927be87b122f/gcc/final.c#L1562
+//
+// TODO: If the application order of debug_prefix_map is written-order,
+// using std::vector<std::pair<string, string>> looks better than
+// std::map<string, string>?
+bool HasAmbiguityInDebugPrefixMap(
+    const std::map<string, string>& debug_prefix_map) {
+  if (debug_prefix_map.size() <= 1) {
+    return false;
+  }
+
+  string prev;
+  for (const auto& path : debug_prefix_map) {
+    if (!prev.empty() && strings::StartsWith(path.first, prev)) {
+      return true;
+    }
+    prev = path.first;
+  }
+  return false;
+}
+
+// ExecReq_Inputs are sorted by filename now. However, cwd can be different
+// among computers, and filename might contain cwd. So the essentially same
+// ExecReq might have different hash values, even if cwd in ExecReq and
+// filenames in ExecReq_Input are cleared.
+// So we reorder ExecReq_Inputs so that ExecReq_Input whose filename starts with
+// cwd come first.
+//
+// For example: When cwd = /usr/local/google/home/foo/build,
+//   the following ExecReq_Inputs
+//     ExecReq_Input { filename: /usr/include/stdio.h, ... }
+//     ...
+//     ...
+//     ExecReq_Input { filename: /usr/local/google/home/foo/build/main.cc, ...}
+//   will be reorderd to
+//     ExecReq_Input { filename: /usr/local/google/home/foo/build/main.cc, ...}
+//     ExecReq_Input { filename: /usr/include/stdio.h, ... }
+//     ...
+//     ...
+//
+// See also b/11455957
+/* static */
+void NormalizeExecReqInputOrderForCacheKey(ExecReq* req) {
+  std::vector<const ExecReq_Input*> inputs_in_cwd;
+  std::vector<const ExecReq_Input*> inputs_not_in_cwd;
+
+  inputs_in_cwd.reserve(req->input_size());
+  inputs_not_in_cwd.reserve(req->input_size());
+
+  for (const auto& input : req->input()) {
+    if (strings::StartsWith(input.filename(), req->cwd())) {
+      inputs_in_cwd.push_back(&input);
+    } else {
+      inputs_not_in_cwd.push_back(&input);
+    }
+  }
+
+  RepeatedPtrField<ExecReq_Input> new_inputs;
+  new_inputs.Reserve(req->input_size());
+
+  // Inputs whose filename starting with cwd come first.
+  for (const auto& input : inputs_in_cwd) {
+    *new_inputs.Add() = *input;
+  }
+  for (const auto& input : inputs_not_in_cwd) {
+    *new_inputs.Add() = *input;
+  }
+
+  DCHECK_EQ(new_inputs.size(), req->input_size());
+
+  req->mutable_input()->Swap(&new_inputs);
+}
+
+void NormalizeExecReqForCacheKey(
+    const int id,
+    bool normalize_include_path,
+    bool is_linking,
+    const std::vector<string>& normalize_weak_relative_for_arg,
+    const std::map<string, string>& debug_prefix_map,
+    ExecReq* req) {
+  req->clear_requester_info();
+  req->clear_cache_policy();
+  req->clear_requester_env();
+
+  for (auto& input : *req->mutable_input()) {
+    input.clear_content();
+  }
+
+  req->mutable_command_spec()->clear_local_compiler_path();
+  const string& command_name = req->command_spec().name();
+  LOG_IF(ERROR, command_name.empty())
+      << "empty command_spec.name:" << req->command_spec().DebugString();
+  std::vector<string> args;
+  // Normalize args.
+  // we use CommandSpec.name for arg(0) for cache key.
+  // see b/11973647
+  if (req->expanded_arg_size() > 0) {
+    req->set_expanded_arg(0, command_name);
+    req->clear_arg();
+    std::copy(req->expanded_arg().begin(), req->expanded_arg().end(),
+              back_inserter(args));
+  } else if (req->arg_size() > 0) {
+    req->set_arg(0, command_name);
+    std::copy(req->arg().begin(), req->arg().end(), back_inserter(args));
+  }
+  static const int kOmit = 0;
+  static const int kNormalizeWithCwd = 1 << 0;
+  static const int kNormalizeWithDebugPrefixMap = 1 << 1;
+  static const int kPreserveI = 1 << 2;
+  static const int kAsIs = 1 << 3;
+
+  int keep_cwd = kOmit;
+  int keep_args = kNormalizeWithCwd;
+  int keep_pathnames_in_input = kOmit;
+  int keep_system_include_dirs = kNormalizeWithCwd;
+  if (normalize_weak_relative_for_arg.empty()) {
+    keep_args |= kAsIs;
+  }
+  if (!normalize_include_path) {
+    keep_system_include_dirs |= kAsIs;
+  }
+  if (CompilerFlags::IsGCCCommand(req->command_spec().name())) {
+    bool is_clang = CompilerFlags::IsClangCommand(req->command_spec().name());
+    FlagParser flag_parser;
+    GCCFlags::DefineFlags(&flag_parser);
+    FlagParser::Flag* flag_g = flag_parser.AddPrefixFlag("g");
+    FlagParser::Flag* flag_gsplit_dwarf =
+        flag_parser.AddBoolFlag("gsplit-dwarf");
+    FlagParser::Flag* flag_m = flag_parser.AddBoolFlag("M");
+    FlagParser::Flag* flag_md = flag_parser.AddBoolFlag("MD");
+    FlagParser::Flag* flag_mmd = flag_parser.AddBoolFlag("MMD");
+    FlagParser::Flag* flag_pnacl_allow_translate = flag_parser.AddBoolFlag(
+        "-pnacl-allow-translate");
+    flag_parser.Parse(args);
+
+    // -g does not capture -gsplit-dwarf. So we need to check it explicitly.
+    bool has_debug_flag = false;
+    if ((flag_g->seen() && flag_g->GetLastValue() != "0") ||
+        flag_gsplit_dwarf->seen()) {
+      // The last -g* is effective.
+      // If the last one is -g0, it is not debug build.
+      has_debug_flag = true;
+    }
+
+    bool has_m_flag = false;
+    if (flag_m->seen() ||
+        (flag_md->seen() && is_clang) ||
+        (flag_md->seen() && !flag_mmd->seen())) {
+      // We basically need to preserve all include paths if we see -M, -MD.
+      // With -M and -MD, full path input files are stored in .d file.
+      //
+      // Note that -MMD works opposite between clang and gcc.
+      // clang ignores -MMD if it is used with -M or -MD.
+      // gcc ignores -MD or -M if -MMD is specified.
+      has_m_flag = true;
+    }
+
+    // TODO: support relative path rewrite using debug-prefix-map.
+    // -fdebug-prefix-map=foo=bar is valid but it makes path conversion
+    // difficult to predict.
+    //
+    // TODO: support cross compile.
+    // I belive this feature will be used for cross compiling Windows code on
+    // Linux.  e.g. converting /home/foo to c:\\Users\\Foo.
+    //
+    // Although, clang-cl does not know -fdebug-prefix-map, it works with
+    // -Xclang
+    // $ clang-cl -Xclang -fdebug-prefix-map=/tmp=c:\\foo /Zi /c /tmp/foo.c
+    // and its debug info has c:\foo\foo.c.
+    if (has_debug_flag) {
+      // For debug build, we should keep cwd, system include paths,
+      // paths in input files.  However, all of them could be normalized
+      // with debug prefix map.
+      // (Note that if this is used with -M or -MD, restrictions for
+      // -M or -MD would be prioritized.
+      bool has_ambiguity = HasAmbiguityInDebugPrefixMap(debug_prefix_map);
+      LOG_IF(ERROR, has_ambiguity)
+          << id << ": has ambiguity in -fdebug_prefix_map. "
+          << "goma server won't normalize ExecReq."
+          << " debug_prefix_map=" << debug_prefix_map;
+
+      if (!has_ambiguity && !debug_prefix_map.empty()) {
+        keep_cwd |= kNormalizeWithDebugPrefixMap;
+        keep_system_include_dirs |= kNormalizeWithDebugPrefixMap;
+        keep_pathnames_in_input |= kNormalizeWithDebugPrefixMap;
+        if (is_clang) {
+          keep_args |= kNormalizeWithDebugPrefixMap;
+        } else {
+          // gcc has command line in DW_AT_producer but clang does not.
+          keep_args |= kAsIs;
+        }
+      } else {
+        keep_cwd |= kAsIs;
+        keep_system_include_dirs |= kAsIs;
+        keep_pathnames_in_input |= kAsIs;
+        keep_args |= kAsIs;
+      }
+    }
+    if (has_m_flag) {
+      keep_system_include_dirs |= kAsIs;
+      keep_args |= kPreserveI;
+    }
+    if (flag_pnacl_allow_translate->seen()) {
+      // Absolute source file path name would be set in symtab if pnacl-clang
+      // translate output to ELF.  See: crbug.com/685461
+      keep_cwd |= kAsIs;
+    }
+  } else if (CompilerFlags::IsVCCommand(req->command_spec().name())) {
+    FlagParser flag_parser;
+    VCFlags::DefineFlags(&flag_parser);
+    FlagParser::Flag* flag_show_include =
+        flag_parser.AddBoolFlag("showIncludes");
+    FlagParser::Flag* flag_z7 = flag_parser.AddBoolFlag("Z7");
+    FlagParser::Flag* flag_zi = flag_parser.AddBoolFlag("Zi");
+    FlagParser::Flag* flag_zI = flag_parser.AddBoolFlag("ZI");
+    flag_parser.Parse(args);
+
+    if (flag_show_include->seen()) {
+      // With this option, full path dependency would be shown in
+      // stdout.  We must preserve cwd and all input file paths.
+      keep_cwd |= kAsIs;
+      keep_pathnames_in_input |= kAsIs;
+    }
+    if (flag_z7->seen() || flag_zi->seen() || flag_zI->seen()) {
+      // If debug info option is set, we must keep cwd, args, pathnames,
+      // system include dirs as-is.
+      keep_cwd |= kAsIs;
+      keep_args |= kAsIs;
+      keep_pathnames_in_input |= kAsIs;
+      keep_system_include_dirs |= kAsIs;
+    }
+  } else if (CompilerFlags::IsJavacCommand(req->command_spec().name())) {
+    keep_cwd = kOmit;
+    keep_args = kNormalizeWithCwd;
+    keep_pathnames_in_input = kOmit;
+    keep_system_include_dirs = kOmit;
+  } else {
+    keep_cwd |= kAsIs;
+    keep_args |= kAsIs;
+    keep_pathnames_in_input |= kAsIs;
+    keep_system_include_dirs |= kAsIs;
+  }
+  // TODO: check what is good for linking.
+  if (is_linking) {
+    // We preserve anything for linking but we may omit file contents.
+    keep_cwd |= kAsIs;
+    keep_args |= kAsIs;
+    keep_pathnames_in_input |= kAsIs;
+    keep_system_include_dirs |= kAsIs;
+  }
+
+  LOG(INFO) << id << ": normalize:"
+            << " keep_cwd=" << keep_cwd
+            << " keep_args=" << keep_args
+            << " keep_pathnames_in_input=" << keep_pathnames_in_input
+            << " keep_system_include_dirs=" << keep_system_include_dirs;
+
+  string debug_prefix_map_signature;
+  if (!debug_prefix_map.empty()) {
+    debug_prefix_map_signature += "debug_prefix_map:";
+    for (const auto& iter : debug_prefix_map) {
+      debug_prefix_map_signature += iter.second;
+      debug_prefix_map_signature += ",";
+    }
+  }
+  // TODO: confirm output does not contains path in include_path
+  // for the situation we normalize the include path name.
+
+  if (!(keep_system_include_dirs & kAsIs)) {
+    // Hack for non-system-default compilers e.g. NaCl and clang.
+    // Normalize following paths to be given with the relative path:
+    // - system_include_path
+    // - cxx_system_include_path
+    //
+    // Already cleared:
+    // - local_compiler_path
+    //
+    // Note:
+    // Since followings are usually pointing the system default paths,
+    // we do not normalize them.
+    // - system_framework_path
+    // - system_library_path
+    CommandSpec* normalized_spec = req->mutable_command_spec();
+    // To avoid yet another cache poisoning, we should separate cache area.
+    // i.e. include_paths with relative paths is given but misunderstand
+    // it as not normalized.
+    if (keep_system_include_dirs & kNormalizeWithDebugPrefixMap) {
+      bool is_normalized = false;
+      for (auto& path : *normalized_spec->mutable_system_include_path()) {
+        is_normalized |=
+            RewritePathWithDebugPrefixMap(debug_prefix_map, &path);
+      }
+      for (auto& path : *normalized_spec->mutable_cxx_system_include_path()) {
+        is_normalized |=
+            RewritePathWithDebugPrefixMap(debug_prefix_map, &path);
+      }
+      if (is_normalized) {
+        normalized_spec->mutable_comment()->append(
+            " include_path:" + debug_prefix_map_signature);
+      }
+    } else if (keep_system_include_dirs & kNormalizeWithCwd) {
+      bool is_include_path_normalized = false;
+      for (auto& path : *normalized_spec->mutable_system_include_path()) {
+        string normalized_path =
+            PathResolver::WeakRelativePath(path, req->cwd());
+        if (path != normalized_path) {
+          path.assign(normalized_path);
+          is_include_path_normalized = true;
+        }
+      }
+      for (auto& path : *normalized_spec->mutable_cxx_system_include_path()) {
+        string normalized_path =
+            PathResolver::WeakRelativePath(path, req->cwd());
+        if (path != normalized_path) {
+          path.assign(normalized_path);
+          is_include_path_normalized = true;
+        }
+      }
+      if (is_include_path_normalized) {
+        normalized_spec->mutable_comment()->append(" include_path:cwd");
+      }
+    } else if (keep_system_include_dirs == kOmit) {
+      normalized_spec->clear_system_include_path();
+      normalized_spec->clear_cxx_system_include_path();
+      normalized_spec->mutable_comment()->append(" omit_include_path:");
+    } else {
+      DLOG(FATAL) << "Unexpected keep_system_include_dirs="
+                  << keep_system_include_dirs;
+    }
+  }
+
+  if (!(keep_args & kAsIs)) {
+    // Normalize arguments after certain flags.
+    // This is required for
+    // - libFindBadConstructs.so plugin used in chrome clang. b/9957696.
+    // - -B to choose third_party/binutils used in chrome. b/13940741.
+    // - -gcc-toolchain= for clang to find headers. b/16876457.
+    FlagParser parser;
+    GCCFlags::DefineFlags(&parser);
+
+    std::unique_ptr<RewritePath> rewrite_path;
+    // Use this to remove -fdebug-prefix-map in Release build b/28280739
+    if (keep_args & kNormalizeWithDebugPrefixMap) {
+      rewrite_path.reset(new RewritePath(debug_prefix_map));
+    } else {
+      rewrite_path.reset(new RewritePath((std::map<string, string>())));
+    }
+    parser.AddFlag("fdebug-prefix-map")->SetCallbackForParsedArgs(
+        rewrite_path.get());
+
+    FixPath fix_path(req->cwd());
+    for (const auto& flag : normalize_weak_relative_for_arg) {
+      if ((keep_args & kPreserveI) && (flag == "I" || flag == "isystem")) {
+        continue;
+      }
+      if (keep_args & kNormalizeWithDebugPrefixMap) {
+        parser.AddFlag(flag.c_str())->SetCallbackForParsedArgs(
+            rewrite_path.get());
+      } else if (keep_args & kNormalizeWithCwd) {
+        parser.AddFlag(flag.c_str())->SetCallbackForParsedArgs(&fix_path);
+      }
+    }
+
+    parser.Parse(args);
+    if (fix_path.is_fixed() || rewrite_path->removed_fdebug_prefix_map()) {
+      std::vector<string> parsed_args = parser.GetParsedArgs();
+      if (req->expanded_arg_size() > 0) {
+        req->clear_expanded_arg();
+        std::copy(parsed_args.begin(), parsed_args.end(),
+                  RepeatedFieldBackInserter(req->mutable_expanded_arg()));
+      } else {
+        req->clear_arg();
+        std::copy(parsed_args.begin(), parsed_args.end(),
+                  RepeatedFieldBackInserter(req->mutable_arg()));
+      }
+
+      CommandSpec* normalized_spec = req->mutable_command_spec();
+      if (fix_path.is_fixed()) {
+        normalized_spec->mutable_comment()->append(
+            " args:cwd:" + absl::StrJoin(normalize_weak_relative_for_arg, ","));
+      }
+      if (rewrite_path->removed_fdebug_prefix_map()) {
+        normalized_spec->mutable_comment()->append(
+            " args:removed_-fdebug-prefix-map");
+      }
+      if (rewrite_path->is_rewritten()) {
+        normalized_spec->mutable_comment()->append(
+            " args:" + debug_prefix_map_signature);
+      }
+    }
+  }
+
+  // This method needs cwd and filename in ExecReq_Input.
+  NormalizeExecReqInputOrderForCacheKey(req);
+
+  if (!(keep_pathnames_in_input & kAsIs)) {
+    bool is_rewritten_debug_prefix_map = false;
+    bool is_rewritten_cwd = false;
+    bool is_removed = false;
+    for (auto& input : *req->mutable_input()) {
+      if (keep_pathnames_in_input & kNormalizeWithDebugPrefixMap) {
+        RewritePathWithDebugPrefixMap(
+            debug_prefix_map, input.mutable_filename());
+        is_rewritten_debug_prefix_map = true;
+      } else if (keep_pathnames_in_input & kNormalizeWithCwd) {
+        input.set_filename(
+            PathResolver::WeakRelativePath(input.filename(), req->cwd()));
+        is_rewritten_cwd = true;
+      } else if (keep_pathnames_in_input == kOmit) {
+        input.clear_filename();
+        is_removed = true;
+      } else {
+        DLOG(FATAL) << "Unexpected keep_pathnames_in_input="
+                    << keep_pathnames_in_input;
+      }
+    }
+
+    CommandSpec* normalized_spec = req->mutable_command_spec();
+    if (is_rewritten_debug_prefix_map) {
+      normalized_spec->mutable_comment()->append(
+          " pathnames_in_input:" + debug_prefix_map_signature);
+    }
+    if (is_rewritten_cwd) {
+      normalized_spec->mutable_comment()->append(" pathnames_in_input:cwd");
+    }
+    if (is_removed) {
+      normalized_spec->mutable_comment()->append(" pathnames_in_input:removed");
+    }
+  }
+
+  if (!(keep_cwd & kAsIs)) {
+    bool is_rewritten = false;
+    bool is_removed = false;
+
+    static const char kPwd[] = "PWD=";
+
+    if (keep_cwd & kNormalizeWithDebugPrefixMap) {
+      // If there is PWD= in env, replace cwd with content of PWD=.
+      for (const auto& env_var : req->env()) {
+        if (strings::StartsWith(env_var, kPwd)) {
+          *req->mutable_cwd() = env_var.substr(strlen(kPwd));
+          break;
+        }
+      }
+      RewritePathWithDebugPrefixMap(debug_prefix_map, req->mutable_cwd());
+      is_rewritten = true;
+    } else {
+      req->clear_cwd();
+      is_removed = true;
+    }
+
+    // Drop PWD from env.
+    auto it = req->mutable_env()->begin();
+    while (it != req->mutable_env()->end()) {
+      if (strings::StartsWith(*it, kPwd)) {
+        if (keep_cwd & kNormalizeWithDebugPrefixMap) {
+          string path = it->substr(strlen(kPwd));
+          RewritePathWithDebugPrefixMap(debug_prefix_map, &path);
+          *it = StrCat(kPwd, path);
+          is_rewritten = true;
+          ++it;
+        } else {
+          it = req->mutable_env()->erase(it);
+          is_removed = true;
+        }
+      } else {
+        ++it;
+      }
+    }
+
+    CommandSpec* normalized_spec = req->mutable_command_spec();
+    if (is_rewritten) {
+      normalized_spec->mutable_comment()->append(
+          " cwd:" + debug_prefix_map_signature);
+    }
+    if (is_removed) {
+      normalized_spec->mutable_comment()->append(" cwd:removed");
+    }
+  }
+
+  // normalize subprogram. path names are not needed for cache key.
+  for (auto& s : *req->mutable_subprogram()) {
+    s.clear_path();
+  }
+
+  std::vector<string> new_env;
+  bool changed = false;
+  for (const auto& env_var : req->env()) {
+    if (strings::StartsWith(env_var, "DEVELOPER_DIR=")) {
+      changed = true;
+      continue;
+    }
+    new_env.push_back(env_var);
+  }
+  if (changed) {
+    req->clear_env();
+    for (auto&& env_var : new_env) {
+      req->add_env(std::move(env_var));
+    }
+  }
+}
+
+}  // namespace devtools_goma
diff --git a/lib/execreq_normalizer.h b/lib/execreq_normalizer.h
new file mode 100644
index 0000000..220f691
--- /dev/null
+++ b/lib/execreq_normalizer.h
@@ -0,0 +1,46 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_EXECREQ_NORMALIZER_H_
+#define DEVTOOLS_GOMA_LIB_EXECREQ_NORMALIZER_H_
+
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "compiler_specific.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+using std::string;
+
+namespace devtools_goma {
+
+// Normalize ExecReq for cache key. |req| will be modified.
+// |id| is used for logging purpose.
+//
+// How to disable normalization?
+//   system_include_paths: set |normalize_include_path| false.
+//   args: make |normalize_weak_relative_for_arg| empty.
+//   normalization using fdebug_prefix_map: make |debug_prefix_map| empty.
+void NormalizeExecReqForCacheKey(
+    int id,
+    bool normalize_include_path,
+    bool is_linking,
+    const std::vector<string>& normalize_weak_relative_for_arg,
+    const std::map<string, string>& debug_prefix_map,
+    ExecReq* req);
+
+bool RewritePathWithDebugPrefixMap(
+    const std::map<string, string>& debug_prefix_map,
+    string* path);
+
+bool HasAmbiguityInDebugPrefixMap(
+    const std::map<string, string>& debug_prefix_map);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_EXECREQ_NORMALIZER_H_
diff --git a/lib/execreq_normalizer_test.cc b/lib/execreq_normalizer_test.cc
new file mode 100644
index 0000000..9528b24
--- /dev/null
+++ b/lib/execreq_normalizer_test.cc
@@ -0,0 +1,2293 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "execreq_normalizer.h"
+
+#include "compiler_flags.h"
+#include "execreq_verifier.h"
+#include "google/protobuf/text_format.h"
+#include "google/protobuf/util/message_differencer.h"
+#include "gtest/gtest.h"
+#include "path.h"
+#include "string_piece_utils.h"
+using google::protobuf::TextFormat;
+using google::protobuf::util::MessageDifferencer;
+
+namespace {
+
+const char kExecReqToNormalize[] = "command_spec {\n"
+    "  name: \"clang\"\n"
+    "  version: \"4.2.1[clang version 3.5.0 (trunk 214024)]\"\n"
+    "  target: \"x86_64-unknown-linux-gnu\"\n"
+    "  system_include_path: \"/tmp/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/tmp/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"clang\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/tmp/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/tmp/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-gcc-toolchain=/tmp/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/tmp/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/tmp/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/tmp/src/third_party/clang\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/tmp/src/out/Release\"\n"
+    "env: \"PWD=/tmp/src/out/Release\"\n"
+    "Input {\n"
+    "  filename: \"/tmp/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+const int kExecReqToNormalizeArgSize = 11;
+
+const char kExecReqToNormalizeGcc[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.8[(Ubuntu 4.8.4-2ubuntu1~14.04) 4.8.4]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "  system_include_path: \"/tmp/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/tmp/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/tmp/src/third_party/include\"\n"
+    "arg: \"-gcc-toolchain=/tmp/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/tmp/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/tmp/src/build/linux/sysroot\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/tmp/src/out/Release\"\n"
+    "env: \"PWD=/tmp/src/out/Release\"\n"
+    "Input {\n"
+    "  filename: \"/tmp/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+const int kExecReqToNormalizeGccArgSize = 8;
+
+const char kExecReqToNormalizeRelativeArgs[] = "command_spec {\n"
+    "  name: \"clang\"\n"
+    "  version: \"4.2.1[clang version 3.5.0 (trunk 214024)]\"\n"
+    "  target: \"x86_64-unknown-linux-gnu\"\n"
+    "  system_include_path: \"/tmp/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/tmp/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"clang\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"../../third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"../../third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-gcc-toolchain=../third_party/target_toolchain\"\n"
+    "arg: \"-B./bin\"\n"
+    "arg: \"--sysroot=../../build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=../../third_party/clang\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/tmp/src/out/Release\"\n"
+    "env: \"PWD=/tmp/src/out/Release\"\n"
+    "Input {\n"
+    "  filename: \"/tmp/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNormalizeLink[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.4.3[Ubuntu 4.4.3-4ubuntu5]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "  system_include_path: \"/tmp/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/tmp/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/tmp/src/third_party/include\"\n"
+    "arg: \"-L\"\n"
+    "arg: \"/tmp/src/third_party/lib\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/tmp/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-B/tmp/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/tmp/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/tmp/src/third_party/clang\"\n"
+    "arg: \"hello.o\"\n"
+    "cwd: \"/tmp/src/out/Release\"\n"
+    "env: \"PWD=/tmp/src/out/Release\"\n"
+    "Input {\n"
+    "  filename: \"/tmp/hello.o\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNormalizeWinAlice[] = R"(command_spec {
+  name: "cl.exe"
+  version: "19.00.24215.1"
+  target: "x64"
+  binary_hash: "7928c17d5185cf7dca794e9970e2463985315adf832f3fde7becfc71673d2fd3"
+  local_compiler_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\vc\\bin\\amd64\\cl.exe"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\win_sdk\\include\\10.0.15063.0\\um"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\win_sdk\\include\\10.0.15063.0\\shared"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\win_sdk\\include\\10.0.15063.0\\winrt"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\win_sdk\\include\\10.0.15063.0\\ucrt"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\vc\\include"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\vc\\atlmfc\\include"
+}
+arg: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\vc\\bin\\amd64/cl.exe"
+arg: "/nologo"
+arg: "/showIncludes"
+arg: "/FC"
+arg: "@obj/chrome/browser/ui/ui_3/session_crashed_bubble_view.obj.rsp"
+arg: "/c"
+arg: "../../chrome/browser/ui/views/session_crashed_bubble_view.cc"
+arg: "/Foobj/chrome/browser/ui/ui_3/session_crashed_bubble_view.obj"
+arg: "/Fdobj/chrome/browser/ui/ui_3_cc.pdb"
+cwd: "C:\\src\\chromium\\src\\out\\Alice"
+Input {
+  filename: "..\\..\\third_party\\boringssl\\src\\include\\openssl\\base.h"
+  hash_key: "12812aef7084e6a0764657261ad92b9ef93a5e20aea26675324ad4b3c761a863"
+}
+Input {
+  filename: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\vc\\include\\algorithm"
+  hash_key: "40908bd6f47550869ac26ac15b189d72f67187123066d8dfd2cf4435e0d53fc6"
+}
+expanded_arg: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\vc\\bin\\amd64/cl.exe"
+expanded_arg: "/nologo"
+expanded_arg: "/showIncludes"
+expanded_arg: "/FC"
+expanded_arg: "-DI18N_PHONENUMBERS_NO_THREAD_SAFETY=1"
+expanded_arg: "-Igen"
+expanded_arg: "-I../../third_party/libaddressinput/src/cpp/include"
+expanded_arg: "/D__DATE__="
+expanded_arg: "/D__TIME__="
+expanded_arg: "/D__TIMESTAMP__="
+expanded_arg: "/Gy"
+expanded_arg: "/FS"
+expanded_arg: "/bigobj"
+expanded_arg: "/d2FastFail"
+expanded_arg: "/Zc:sizedDealloc-"
+expanded_arg: "/W4"
+expanded_arg: "/WX"
+expanded_arg: "/utf-8"
+expanded_arg: "/O1"
+expanded_arg: "/Ob2"
+expanded_arg: "/Oy-"
+expanded_arg: "/d2Zi+"
+expanded_arg: "/Zc:inline"
+expanded_arg: "/Gw"
+expanded_arg: "/Oi"
+expanded_arg: "/MD"
+expanded_arg: "/wd4267"
+expanded_arg: "/TP"
+expanded_arg: "/wd4577"
+expanded_arg: "/GR-"
+expanded_arg: "/c"
+expanded_arg: "../../chrome/browser/ui/views/session_crashed_bubble_view.cc"
+expanded_arg: "/Foobj/chrome/browser/ui/ui_3/session_crashed_bubble_view.obj"
+expanded_arg: "/Fdobj/chrome/browser/ui/ui_3_cc.pdb"
+)";
+
+const char kExecReqToNormalizeWinBob[] = R"(command_spec {
+  name: "cl.exe"
+  version: "19.00.24215.1"
+  target: "x64"
+  binary_hash: "7928c17d5185cf7dca794e9970e2463985315adf832f3fde7becfc71673d2fd3"
+  local_compiler_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\vc\\bin\\amd64\\cl.exe"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\win_sdk\\include\\10.0.15063.0\\um"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\win_sdk\\include\\10.0.15063.0\\shared"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\win_sdk\\include\\10.0.15063.0\\winrt"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\win_sdk\\include\\10.0.15063.0\\ucrt"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\vc\\include"
+  cxx_system_include_path: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\vc\\atlmfc\\include"
+}
+arg: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\vc\\bin\\amd64/cl.exe"
+arg: "/nologo"
+arg: "/showIncludes"
+arg: "/FC"
+arg: "@obj/chrome/browser/ui/ui_3/session_crashed_bubble_view.obj.rsp"
+arg: "/c"
+arg: "../../chrome/browser/ui/views/session_crashed_bubble_view.cc"
+arg: "/Foobj/chrome/browser/ui/ui_3/session_crashed_bubble_view.obj"
+arg: "/Fdobj/chrome/browser/ui/ui_3_cc.pdb"
+cwd: "C:\\src\\chromium\\src\\out\\Bob"
+Input {
+  filename: "..\\..\\third_party\\boringssl\\src\\include\\openssl\\base.h"
+  hash_key: "12812aef7084e6a0764657261ad92b9ef93a5e20aea26675324ad4b3c761a863"
+}
+Input {
+  filename: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\win_sdk\\bin\\..\\..\\vc\\include\\algorithm"
+  hash_key: "40908bd6f47550869ac26ac15b189d72f67187123066d8dfd2cf4435e0d53fc6"
+}
+expanded_arg: "c:\\src\\chromium\\src\\third_party\\depot_tools\\win_toolchain\\vs_files\\f53e4598951162bad6330f7a167486c7ae5db1e5\\vc\\bin\\amd64/cl.exe"
+expanded_arg: "/nologo"
+expanded_arg: "/showIncludes"
+expanded_arg: "/FC"
+expanded_arg: "-DI18N_PHONENUMBERS_NO_THREAD_SAFETY=1"
+expanded_arg: "-Igen"
+expanded_arg: "-I../../third_party/libaddressinput/src/cpp/include"
+expanded_arg: "/D__DATE__="
+expanded_arg: "/D__TIME__="
+expanded_arg: "/D__TIMESTAMP__="
+expanded_arg: "/Gy"
+expanded_arg: "/FS"
+expanded_arg: "/bigobj"
+expanded_arg: "/d2FastFail"
+expanded_arg: "/Zc:sizedDealloc-"
+expanded_arg: "/W4"
+expanded_arg: "/WX"
+expanded_arg: "/utf-8"
+expanded_arg: "/O1"
+expanded_arg: "/Ob2"
+expanded_arg: "/Oy-"
+expanded_arg: "/d2Zi+"
+expanded_arg: "/Zc:inline"
+expanded_arg: "/Gw"
+expanded_arg: "/Oi"
+expanded_arg: "/MD"
+expanded_arg: "/wd4267"
+expanded_arg: "/TP"
+expanded_arg: "/wd4577"
+expanded_arg: "/GR-"
+expanded_arg: "/c"
+expanded_arg: "../../chrome/browser/ui/views/session_crashed_bubble_view.cc"
+expanded_arg: "/Foobj/chrome/browser/ui/ui_3/session_crashed_bubble_view.obj"
+expanded_arg: "/Fdobj/chrome/browser/ui/ui_3_cc.pdb"
+)";
+
+const char kExecReqToNormalizeWin[] = "command_spec {\n"
+    "  name: \"cl.exe\"\n"
+    "  version: \"15.00.30729.01\"\n"
+    "  target: \"80x86\"\n"
+    "  local_compiler_path: \"c:\\\\Program Files (x86)"
+        "\\\\Microsoft Visual Studio 9.0\\\\VC\\\\BIN\\\\cl.exe\"\n"
+    "  system_include_path: \"c:\\\\Program Files (x86)"
+        "\\\\Microsoft Visual Studio 9.0\\\\VC\\\\INCLUDE\"\n"
+    "  cxx_system_include_path: \"c:\\\\Program Files (x86)"
+        "\\\\Microsoft Visual Studio 9.0\\\\VC\\\\INCLUDE\"\n"
+    "}\n"
+    "arg: \"cl\"\n"
+    "arg: \"/TP\"\n"
+    "arg: \"/showIncludes\"\n"
+    "arg: \"/Z7\"\n"
+    "arg: \"/FoC:\\\\src\\\\goma\\\\client\\\\build\\\\Debug"
+        "\\\\vc\\\\stdafx.obj\"\n"
+    "arg: \"stdafx.cpp\"\n"
+    "cwd: \"C:\\\\src\\\\goma\\\\client\\\\test\\\\vc\"\n"
+    "Input {\n"
+    "  filename: \"C:\\\\src\\\\goma\\\\client\\\\test\\\\vc\\\\stdafx.cpp\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNormalizeWinPNaCl[] =
+    "command_spec <\n"
+    "  name: \"clang++\"\n"
+    "  version: \"4.2.1[clang version 3.7.0 (https://chromium.googlesource.com"
+        "/a/native_client/pnacl-clang.git "
+        "ce163fdd0f16b4481e5cf77a16d45e9b4dc8300e"
+        ") (https://chromium.googlesource.com/a/native_client/pnacl-llvm.git "
+        "83991f993fea6cd9c515df12c3270ab9c0746215)]\"\n"
+    "  target: \"x86_64--nacl\"\n"
+    "  binary_hash: \"b15df3ea17efb0f8e7a617dd5727aec329eae89a5c8d42dedc9602f9"
+        "ae433c42\"\n"
+    "  local_compiler_path: \"C:\\\\Users\\\\dummy\\\\pnacl_newlib\\\\bin"
+        "\\\\x86_64-nacl-clang++.exe\"\n"
+    "  cxx_system_include_path: \"C:\\\\Users\\\\dummy\\\\pnacl_newlib\\\\"
+    "bin/../x86_64-nacl/include/c++/v1\"\n"
+    "  cxx_system_include_path: \"C:\\\\Users\\\\dummy\\\\pnacl_newlib\\\\"
+        "bin\\\\..\\\\lib\\\\clang\\\\3.7.0\\\\include\"\n"
+    "  cxx_system_include_path: \"C:\\\\Users\\\\dummy\\\\pnacl_newlib\\\\"
+        "bin/../x86_64-nacl\\\\include\"\n"
+    ">\n"
+    "arg: \"../../native_client/toolchain/win_x86/pnacl_newlib/bin/"
+        "x86_64-nacl-clang++.exe\"\n"
+    "arg: \"-MMD\"\n"
+    "arg: \"-MF\"\n"
+    "arg: \"clang_newlib_x64/obj/chrome/test/data/nacl/"
+        "ppapi_crash_via_exit_call_nexe/ppapi_crash_via_exit_call.o.d\"\n"
+    "arg: \"-Iclang_newlib_x64/gen\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"../../chrome/test/data/nacl/crash/ppapi_crash_via_exit_call.cc\"\n"
+    "arg: \"-o\"\n"
+    "arg: \"clang_newlib_x64/obj/chrome/test/data/nacl/"
+        "ppapi_crash_via_exit_call_nexe/ppapi_crash_via_exit_call.o\"\n"
+    "env: \"PATHEXT=.COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC\"\n"
+    "env: \"SystemRoot=C:\\\\Windows\"\n"
+    "cwd: \"C:\\\\Users\\\\dummy\\\\out\\\\Default\"\n"
+    "Input {\n"
+    "  filename: \"C:\\\\Users\\\\dummy\\\\pnacl_newlib\\\\bin\\\\..\\\\"
+        "lib\\\\clang\\\\3.7.0\\\\include\\\\limits.h\"\n"
+    "  hash_key: \"48cdf007c86904f26d7dcd38f04f69d21022add3e48aab145a3d22"
+        "16c061840d\"\n"
+    "}\n";
+
+const char kExecReqToNormalizePNaClTranslate[] =
+    "command_spec <\n"
+    "  name: \"clang++\"\n"
+    "  version: \"4.2.1[clang version 3.7.0 (https://chromium.googlesource.com"
+        "/a/native_client/pnacl-clang.git "
+        "ce163fdd0f16b4481e5cf77a16d45e9b4dc8300e"
+        ") (https://chromium.googlesource.com/a/native_client/pnacl-llvm.git "
+        "83991f993fea6cd9c515df12c3270ab9c0746215)]\"\n"
+    "  target: \"le32-unknown-nacl\"\n"
+    "  binary_hash: \"b15df3ea17efb0f8e7a617dd5727aec329eae89a5c8d42dedc9602f9"
+        "ae433c42\"\n"
+    "  local_compiler_path: \"/dummy/pnacl_newlib/bin/pnacl-clang++\"\n"
+    "  cxx_system_include_path: \"/dummy/pnacl_newlib/"
+    "bin/../x86_64-nacl/include/c++/v1\"\n"
+    "  cxx_system_include_path: \"/dummy/pnacl_newlib/"
+        "bin/../lib/clang/3.7.0/include\"\n"
+    "  cxx_system_include_path: \"/dummy/pnacl_newlib/"
+        "bin/../x86_64-nacl/include\"\n"
+    ">\n"
+    "arg: \"../../native_client/toolchain/linux_x86/pnacl_newlib/bin/"
+        "pnacl-clang++\"\n"
+    "arg: \"-MMD\"\n"
+    "arg: \"-MF\"\n"
+    "arg: \"clang_newlib_x64/obj/chrome/test/data/nacl/"
+        "ppapi_crash_via_exit_call_nexe/ppapi_crash_via_exit_call.o.d\"\n"
+    "arg: \"-Iclang_newlib_x64/gen\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"../../chrome/test/data/nacl/crash/ppapi_crash_via_exit_call.cc\"\n"
+    "arg: \"-o\"\n"
+    "arg: \"clang_newlib_x64/obj/chrome/test/data/nacl/"
+        "ppapi_crash_via_exit_call_nexe/ppapi_crash_via_exit_call.o\"\n"
+    "arg: \"--pnacl-allow-translate\"\n"
+    "arg: \"-arch\"\n"
+    "arg: \"x86-32-nonsfi\"\n"
+    "cwd: \"/dummy/out/Default\"\n"
+    "Input {\n"
+    "  filename: \"/dummy/pnacl_newlib/bin/../"
+        "lib/clang/3.7.0/include/limits.h\"\n"
+    "  hash_key: \"48cdf007c86904f26d7dcd38f04f69d21022add3e48aab145a3d22"
+        "16c061840d\"\n"
+    "}\n";
+
+const char kExecReqToNormalizeInputOrder[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.4.3[Ubuntu 4.4.3-4ubuntu5]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "  system_include_path: \"/tmp/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/tmp/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/tmp/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/tmp/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/tmp/src/out/Release\"\n"
+    "env: \"PWD=/tmp/src/out/Release\"\n"
+    "Input {\n"
+    "  filename: \"/tmp/hello1.c\"\n"
+    "  hash_key: \"aaaaaaaaaa\"\n"
+    "}\n"
+    "Input {\n"
+    "  filename: \"/tmp/src/out/Release/hello.c\"\n"
+    "  hash_key: \"bbbbbbbbbb\"\n"
+    "}\n"
+    "Input {\n"
+    "  filename: \"/tmp/test/hello2.c\"\n"
+    "  hash_key: \"cccccccccc\"\n"
+    "}\n";
+
+const char kExecReqToNormalizeContent[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.4.3[Ubuntu 4.4.3-4ubuntu5]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "  system_include_path: \"/tmp/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/tmp/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/tmp/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/tmp/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/tmp/src/out/Release\"\n"
+    "env: \"PWD=/tmp/src/out/Release\"\n"
+    "Input {\n"
+    "  filename: \"/tmp/hello.c\"\n"
+    "  hash_key: \"dummy_hash_key\"\n"
+    "  content {\n"
+    "    blob_type: FILE\n"
+    "    content: \"0123456789\"\n"
+    "    file_size: 10\n"
+    "  }\n"
+    "}\n";
+
+// TODO: Extract this to separated file.
+const char kExecReqToAmbiguaousDebugPrefixMap[] = R"(command_spec {
+  name: "clang"
+  version: "4.2.1[clang version 5.0.0 (trunk 300839)]"
+  target: "x86_64-unknown-linux-gnu"
+  binary_hash: "5f650cc98121b383aaa25e53a135d8b4c5e0748f25082b4f2d428a5934d22fda"
+  local_compiler_path: "../../third_party/llvm-build/Release+Asserts/bin/clang++"
+  cxx_system_include_path: "../../build/linux/debian_jessie_amd64-sysroot/usr/lib/gcc/x86_64-linux-gnu/4.8/../../../../include/c++/4.8"
+  cxx_system_include_path: "../../build/linux/debian_jessie_amd64-sysroot/usr/lib/gcc/x86_64-linux-gnu/4.8/../../../../include/x86_64-linux-gnu/c++/4.8"
+  cxx_system_include_path: "../../build/linux/debian_jessie_amd64-sysroot/usr/lib/gcc/x86_64-linux-gnu/4.8/../../../../include/c++/4.8/backward"
+  cxx_system_include_path: "/home/goma/chromium/src/third_party/llvm-build/Release+Asserts/lib/clang/5.0.0/include"
+  cxx_system_include_path: "../../build/linux/debian_jessie_amd64-sysroot/usr/include/x86_64-linux-gnu"
+  cxx_system_include_path: "../../build/linux/debian_jessie_amd64-sysroot/usr/include"
+}
+arg: "../../third_party/llvm-build/Release+Asserts/bin/clang++"
+arg: "-MMD"
+arg: "-MF"
+arg: "obj/base/allocator/tcmalloc/malloc_hook.o.d"
+arg: "-DNO_HEAP_CHECK"
+arg: "-DV8_DEPRECATION_WARNINGS"
+arg: "-DDCHECK_ALWAYS_ON=1"
+arg: "-DUSE_UDEV"
+arg: "-DUSE_AURA=1"
+arg: "-DUSE_PANGO=1"
+arg: "-DUSE_CAIRO=1"
+arg: "-DUSE_GLIB=1"
+arg: "-DUSE_NSS_CERTS=1"
+arg: "-DUSE_X11=1"
+arg: "-DFULL_SAFE_BROWSING"
+arg: "-DSAFE_BROWSING_CSD"
+arg: "-DSAFE_BROWSING_DB_LOCAL"
+arg: "-DCHROMIUM_BUILD"
+arg: "-DFIELDTRIAL_TESTING_ENABLED"
+arg: "-DCR_CLANG_REVISION=\"300839-1\""
+arg: "-D_FILE_OFFSET_BITS=64"
+arg: "-D_LARGEFILE_SOURCE"
+arg: "-D_LARGEFILE64_SOURCE"
+arg: "-DNDEBUG"
+arg: "-DNVALGRIND"
+arg: "-DDYNAMIC_ANNOTATIONS_ENABLED=0"
+arg: "-DTCMALLOC_DONT_REPLACE_SYSTEM_ALLOC"
+arg: "-I../../base/allocator"
+arg: "-I../../third_party/tcmalloc/chromium/src/base"
+arg: "-I../../third_party/tcmalloc/chromium/src"
+arg: "-I../.."
+arg: "-Igen"
+arg: "-fno-strict-aliasing"
+arg: "--param=ssp-buffer-size=4"
+arg: "-fstack-protector"
+arg: "-Wno-builtin-macro-redefined"
+arg: "-D__DATE__="
+arg: "-D__TIME__="
+arg: "-D__TIMESTAMP__="
+arg: "-funwind-tables"
+arg: "-fPIC"
+arg: "-pipe"
+arg: "-B../../third_party/binutils/Linux_x64/Release/bin"
+arg: "-fcolor-diagnostics"
+arg: "-fdebug-prefix-map=/home/goma/chromium/src=."
+arg: "-m64"
+arg: "-march=x86-64"
+arg: "-pthread"
+arg: "-fomit-frame-pointer"
+arg: "-g1"
+arg: "--sysroot=../../build/linux/debian_jessie_amd64-sysroot"
+arg: "-fvisibility=hidden"
+arg: "-Xclang"
+arg: "-load"
+arg: "-Xclang"
+arg: "../../third_party/llvm-build/Release+Asserts/lib/libFindBadConstructs.so"
+arg: "-Xclang"
+arg: "-add-plugin"
+arg: "-Xclang"
+arg: "find-bad-constructs"
+arg: "-Xclang"
+arg: "-plugin-arg-find-bad-constructs"
+arg: "-Xclang"
+arg: "check-auto-raw-pointer"
+arg: "-Xclang"
+arg: "-plugin-arg-find-bad-constructs"
+arg: "-Xclang"
+arg: "check-ipc"
+arg: "-Wheader-hygiene"
+arg: "-Wstring-conversion"
+arg: "-Wtautological-overlap-compare"
+arg: "-Werror"
+arg: "-Wall"
+arg: "-Wno-unused-variable"
+arg: "-Wno-missing-field-initializers"
+arg: "-Wno-unused-parameter"
+arg: "-Wno-c++11-narrowing"
+arg: "-Wno-covered-switch-default"
+arg: "-Wno-unneeded-internal-declaration"
+arg: "-Wno-inconsistent-missing-override"
+arg: "-Wno-undefined-var-template"
+arg: "-Wno-nonportable-include-path"
+arg: "-Wno-address-of-packed-member"
+arg: "-Wno-unused-lambda-capture"
+arg: "-Wno-user-defined-warnings"
+arg: "-Wno-reorder"
+arg: "-Wno-unused-function"
+arg: "-Wno-unused-local-typedefs"
+arg: "-Wno-unused-private-field"
+arg: "-Wno-sign-compare"
+arg: "-Wno-unused-result"
+arg: "-O2"
+arg: "-fno-ident"
+arg: "-fdata-sections"
+arg: "-ffunction-sections"
+arg: "-fvisibility-inlines-hidden"
+arg: "-std=gnu++11"
+arg: "-fno-rtti"
+arg: "-fno-exceptions"
+arg: "-Wno-deprecated"
+arg: "-c"
+arg: "../../third_party/tcmalloc/chromium/src/malloc_hook.cc"
+arg: "-o"
+arg: "obj/base/allocator/tcmalloc/malloc_hook.o"
+arg: "-fuse-init-array"
+env: "PWD=/home/goma/chromium/src/out/rel_ng"
+cwd: "/home/goma/chromium/src/out/rel_ng"
+subprogram {
+  path: "home/goma/chromium/src/out/rel_ng/../../third_party/llvm-build/Release+Asserts/lib/libFindBadConstructs.so"
+  binary_hash: "119407f17eb4777402734571183eb5518806900d9c7c7ce5ad71d242aad249f0"
+}
+subprogram {
+  path: "/home/goma/chromium/src/out/rel_ng/../../third_party/binutils/Linux_x64/Release/bin/objcopy"
+  binary_hash: "9ccd249906d57ef2ccd24cf19c67c8d645d309c49c284af9d42813caf87fba7e"
+}
+requester_info {
+  username: "goma"
+  compiler_proxy_id: "goma@goma.example.com:8088/1494385386/0"
+  api_version: 2
+  pid: 94105
+  retry: 0
+}
+hermetic_mode: true
+experimental_is_external_user: false
+)";
+
+const char kExecReqToNormalizeDebugPrefixMapAlice[] = "command_spec {\n"
+    "  name: \"clang\"\n"
+    "  version: \"4.2.1[clang version 3.5.0 (trunk 214024)]\"\n"
+    "  target: \"x86_64-unknown-linux-gnu\"\n"
+    "  system_include_path: \"/home/alice/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/home/alice/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"clang\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/home/alice/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/home/alice/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-gcc-toolchain=/home/alice/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/home/alice/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/home/alice/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/alice/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/home/alice=/base_dir\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/alice/src/out/Release\"\n"
+    "env: \"PWD=/tmp/src/out/Release\"\n"
+    "Input {\n"
+    "  filename: \"/home/alice/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNormalizeDebugPrefixMapBob[] = "command_spec {\n"
+    "  name: \"clang\"\n"
+    "  version: \"4.2.1[clang version 3.5.0 (trunk 214024)]\"\n"
+    "  target: \"x86_64-unknown-linux-gnu\"\n"
+    "  system_include_path: \"/home/bob/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/home/bob/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"clang\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/home/bob/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/home/bob/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-gcc-toolchain=/home/bob/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/home/bob/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/home/bob/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/bob/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/home/bob=/base_dir\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/bob/src/out/Release\"\n"
+    "env: \"PWD=/tmp/src/out/Release\"\n"
+    "Input {\n"
+    "  filename: \"/home/bob/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+// Test case for arg "-fdebug-prefix-map=/proc/self/cwd="
+const char kExecReqToNormalizeDebugPrefixMapAlicePSC[] = "command_spec {\n"
+    "  name: \"clang\"\n"
+    "  version: \"4.2.1[clang version 3.5.0 (trunk 214024)]\"\n"
+    "  target: \"x86_64-unknown-linux-gnu\"\n"
+    "  system_include_path: \"/home/alice/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/home/alice/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"clang\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/home/alice/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/home/alice/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-gcc-toolchain=/home/alice/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/home/alice/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/home/alice/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/alice/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/alice/src\"\n"
+    "env: \"PWD=/proc/self/cwd\"\n"
+    "Input {\n"
+    "  filename: \"/home/alice/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNormalizeDebugPrefixMapBobPSC[] = "command_spec {\n"
+    "  name: \"clang\"\n"
+    "  version: \"4.2.1[clang version 3.5.0 (trunk 214024)]\"\n"
+    "  target: \"x86_64-unknown-linux-gnu\"\n"
+    "  system_include_path: \"/home/bob/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/home/bob/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"clang\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/home/bob/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/home/bob/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-gcc-toolchain=/home/bob/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/home/bob/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/home/bob/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/bob/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/bob/src\"\n"
+    "env: \"PWD=/proc/self/cwd\"\n"
+    "Input {\n"
+    "  filename: \"/home/bob/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+// Test case for arg both "-fdebug-prefix-map=/proc/self/cwd=" and
+// "-fdebug-prefix-map=/home/$USER/src/=" given.
+// TODO: Have test to confirm that
+// the determinism of build is in the way we intended.
+const char kExecReqToNormalize2DebugPrefixMapAlicePSC[] = "command_spec {\n"
+    "  name: \"clang\"\n"
+    "  version: \"4.2.1[clang version 3.5.0 (trunk 214024)]\"\n"
+    "  target: \"x86_64-unknown-linux-gnu\"\n"
+    "  system_include_path: \"/home/alice/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/home/alice/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"clang\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/home/alice/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/home/alice/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-gcc-toolchain=/home/alice/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/home/alice/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/home/alice/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/alice/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-fdebug-prefix-map=/home/alice/src/=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/alice/src\"\n"
+    "env: \"PWD=/proc/self/cwd\"\n"
+    "Input {\n"
+    "  filename: \"/home/alice/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNormalize2DebugPrefixMapBobPSC[] = "command_spec {\n"
+    "  name: \"clang\"\n"
+    "  version: \"4.2.1[clang version 3.5.0 (trunk 214024)]\"\n"
+    "  target: \"x86_64-unknown-linux-gnu\"\n"
+    "  system_include_path: \"/home/bob/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/home/bob/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"clang\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/home/bob/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/home/bob/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-gcc-toolchain=/home/bob/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/home/bob/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/home/bob/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/bob/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-fdebug-prefix-map=/home/bob/src/=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/bob/src\"\n"
+    "env: \"PWD=/proc/self/cwd\"\n"
+    "Input {\n"
+    "  filename: \"/home/bob/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+// Test case for arg both "-fdebug-prefix-map=/proc/self/cwd=" and
+// "-fdebug-prefix-map=/home/$USER/src/=" given in gcc.
+const char kExecReqToNormalize2DebugPrefixMapAlicePSCGCC[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.4.3[Ubuntu 4.4.3-4ubuntu5]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "  system_include_path: \"/home/alice/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/home/alice/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"third_party/include\"\n"
+    "arg: \"-gcc-toolchain=third_party/target_toolchain\"\n"
+    "arg: \"-Bout/Release/bin\"\n"
+    "arg: \"--sysroot=/home/alice/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/alice/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-fdebug-prefix-map=/home/alice/src/=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/alice/src\"\n"
+    "env: \"PWD=/proc/self/cwd\"\n"
+    "Input {\n"
+    "  filename: \"/home/alice/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNormalize2DebugPrefixMapBobPSCGCC[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.4.3[Ubuntu 4.4.3-4ubuntu5]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "  system_include_path: \"/home/bob/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/home/bob/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"third_party/include\"\n"
+    "arg: \"-gcc-toolchain=third_party/target_toolchain\"\n"
+    "arg: \"-Bout/Release/bin\"\n"
+    "arg: \"--sysroot=/home/bob/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/bob/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-fdebug-prefix-map=/home/bob/src/=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/bob/src\"\n"
+    "env: \"PWD=/proc/self/cwd\"\n"
+    "Input {\n"
+    "  filename: \"/home/bob/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+// Test case for arg "-fdebug-prefix-map=/proc/self/cwd=" in gcc.
+const char kExecReqToNormalizeDebugPrefixMapAlicePSCGCC[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.4.3[Ubuntu 4.4.3-4ubuntu5]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "  system_include_path: \"/tmp/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/tmp/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"third_party/include\"\n"
+    "arg: \"-gcc-toolchain=third_party/target_toolchain\"\n"
+    "arg: \"-Bout/Release/bin\"\n"
+    "arg: \"--sysroot=src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/alice/src\"\n"
+    "env: \"PWD=/proc/self/cwd\"\n"
+    "Input {\n"
+    "  filename: \"/home/alice/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNormalizeDebugPrefixMapBobPSCGCC[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.4.3[Ubuntu 4.4.3-4ubuntu5]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "  system_include_path: \"/tmp/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/tmp/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"third_party/include\"\n"
+    "arg: \"-gcc-toolchain=third_party/target_toolchain\"\n"
+    "arg: \"-Bout/Release/bin\"\n"
+    "arg: \"--sysroot=src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/bob/src\"\n"
+    "env: \"PWD=/proc/self/cwd\"\n"
+    "Input {\n"
+    "  filename: \"/home/bob/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+// Test case for preserve arg "-fdebug-prefix-map=/proc/self/cwd=" in gcc.
+const char kExecReqToNoNormalizeDebugPrefixMapAlicePSCGCC[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.4.3[Ubuntu 4.4.3-4ubuntu5]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "  system_include_path: \"/tmp/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/tmp/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/home/alice/src/third_party/include\"\n"
+    "arg: \"-gcc-toolchain=/home/alice/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/home/alice/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/home/alice/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/alice/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/alice/src\"\n"
+    "env: \"PWD=/proc/self/cwd\"\n"
+    "Input {\n"
+    "  filename: \"/home/alice/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNoNormalizeDebugPrefixMapBobPSCGCC[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.4.3[Ubuntu 4.4.3-4ubuntu5]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "  system_include_path: \"/tmp/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/tmp/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/home/bob/src/third_party/include\"\n"
+    "arg: \"-gcc-toolchain=/home/bob/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/home/bob/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/home/bob/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/bob/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/bob/src\"\n"
+    "env: \"PWD=/proc/self/cwd\"\n"
+    "Input {\n"
+    "  filename: \"/home/bob/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+// Test case for arg "-fdebug-prefix-map=/proc/self/cwd="
+// without PWD=/proc/self/cwd
+const char kExecReqToNormalizeDebugPrefixMapAlicePSCNoPWD[] = "command_spec {\n"
+    "  name: \"clang\"\n"
+    "  version: \"4.2.1[clang version 3.5.0 (trunk 214024)]\"\n"
+    "  target: \"x86_64-unknown-linux-gnu\"\n"
+    "  system_include_path: \"/home/alice/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/home/alice/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"clang\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/home/alice/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/home/alice/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-gcc-toolchain=/home/alice/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/home/alice/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/home/alice/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/alice/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/alice/src\"\n"
+    "env: \"PWD=/home/alice/src\"\n"
+    "Input {\n"
+    "  filename: \"/home/alice/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNormalizeDebugPrefixMapBobPSCNoPWD[] = "command_spec {\n"
+    "  name: \"clang\"\n"
+    "  version: \"4.2.1[clang version 3.5.0 (trunk 214024)]\"\n"
+    "  target: \"x86_64-unknown-linux-gnu\"\n"
+    "  system_include_path: \"/home/bob/src/third_party/include\"\n"
+    "  cxx_system_include_path: \"/home/bob/src/third_party/include\"\n"
+    "}\n"
+    "arg: \"clang\"\n"
+    "arg: \"-I\"\n"
+    "arg: \"/home/bob/src/third_party/include\"\n"
+    "arg: \"-Xclang\"\n"
+    "arg: \"/home/bob/src/third_party/lib/libFindBadConstructs.so\"\n"
+    "arg: \"-gcc-toolchain=/home/bob/src/third_party/target_toolchain\"\n"
+    "arg: \"-B/home/bob/src/out/Release/bin\"\n"
+    "arg: \"--sysroot=/home/bob/src/build/linux/sysroot\"\n"
+    "arg: \"-resource-dir=/home/bob/src/third_party/clang\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-fdebug-prefix-map=/proc/self/cwd=\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/home/bob/src\"\n"
+    "env: \"PWD=/home/bob/src\"\n"
+    "Input {\n"
+    "  filename: \"/home/bob/src/hello.c\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+const char kExecReqToNormalizeJavac[] = "command_spec {\n"
+    "  name: \"javac\"\n"
+    "  version: \"1.8.0_45-internal\"\n"
+    "  target: \"java\"\n"
+    "}\n"
+    "arg: \"javac\"\n"
+    "arg: \"-J-Xmx1024M\"\n"
+    "arg: \"-Xmaxerrs\"\n"
+    "arg: \"9999999\"\n"
+    "arg: \"-encoding\"\n"
+    "arg: \"UTF-8\"\n"
+    "arg: \"-bootclasspath\"\n"
+    "arg: \"-classpath\"\n"
+    "arg: \"dummy.jar:dummy2.jar\"\n"
+    "arg: \"-extdirs\"\n"
+    "arg: \"-d\"\n"
+    "arg: \"dest\"\n"
+    "arg: \"-g\"\n"
+    "arg: \"-encoding\"\n"
+    "arg: \"UTF-8\"\n"
+    "arg: \"-Xmaxwarns\"\n"
+    "arg: \"9999999\"\n"
+    "arg: \"-source\"\n"
+    "arg: \"1.8\"\n"
+    "arg: \"-target\"\n"
+    "arg: \"1.8\"\n"
+    "arg: \"hello.java\"\n"
+    "cwd: \"/home/bob/src\"\n"
+    "env: \"PWD=/home/bob/src\"\n"
+    "Input {\n"
+    "  filename: \"/home/bob/src/hello.java\"\n"
+    "  hash_key: \"152d72ea117deff2af0cf0ca3aaa46a20a5f0c0e4ccb8b6d"
+        "559d507401ae81e9\"\n"
+    "}\n";
+
+}  // anonymous namespace
+
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKey) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  // Check all features can be disabled.
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, false, false, std::vector<string>(), std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  EXPECT_EQ(kExecReqToNormalizeArgSize, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("/tmp/src/third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_EQ("-gcc-toolchain=/tmp/src/third_party/target_toolchain", req.arg(5));
+  EXPECT_EQ("-B/tmp/src/out/Release/bin", req.arg(6));
+  EXPECT_EQ("--sysroot=/tmp/src/build/linux/sysroot", req.arg(7));
+  EXPECT_EQ("-resource-dir=/tmp/src/third_party/clang", req.arg(8));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyRelativeSystemPath) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  // Convert system include path.
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, std::vector<string>(), std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  EXPECT_EQ(kExecReqToNormalizeArgSize, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("/tmp/src/third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_EQ("-gcc-toolchain=/tmp/src/third_party/target_toolchain", req.arg(5));
+  EXPECT_EQ("-B/tmp/src/out/Release/bin", req.arg(6));
+  EXPECT_EQ("--sysroot=/tmp/src/build/linux/sysroot", req.arg(7));
+  EXPECT_EQ("-resource-dir=/tmp/src/third_party/clang", req.arg(8));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// Convert arguments followed by the certain flags.
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyRelativeSysroot) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, false, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  EXPECT_EQ(kExecReqToNormalizeArgSize, req.arg_size());
+  EXPECT_EQ("../../third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_EQ("-gcc-toolchain=../../third_party/target_toolchain", req.arg(5));
+  EXPECT_EQ("-Bbin", req.arg(6));
+  EXPECT_EQ("--sysroot=../../build/linux/sysroot", req.arg(7));
+  EXPECT_EQ("-resource-dir=../../third_party/clang", req.arg(8));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -g.
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithFlagG) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-g");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +1 because "-g" is added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 1, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("/tmp/src/third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_EQ("/tmp/src/out/Release", req.cwd());
+  EXPECT_FALSE(req.env().empty());
+  EXPECT_EQ("PWD=/tmp/src/out/Release", req.env(0));
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_EQ("/tmp/src/hello.c", req.input(0).filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -g0.
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithFlagG0) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-g0");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +1 because "-g0" is added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 1, req.arg_size());
+  EXPECT_EQ("../../third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -gsplit-dwarf (fission)
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithFission) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-gsplit-dwarf");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +1 because "-gsplit-dwarf" is added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 1, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("/tmp/src/third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_EQ("/tmp/src/out/Release", req.cwd());
+  EXPECT_FALSE(req.env().empty());
+  EXPECT_EQ("PWD=/tmp/src/out/Release", req.env(0));
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_EQ("/tmp/src/hello.c", req.input(0).filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -fdebug-prefix-map should be normalized with release build.
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithDebugPrefixMap) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-fdebug-prefix-map=/tmp/src=/ts");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +1 because "-fdebug-prefix-map" is added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 1, req.arg_size());
+  EXPECT_EQ("../../third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_EQ("-fdebug-prefix-map=", req.arg(kExecReqToNormalizeArgSize));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -fdebug-prefix-map should be normalized with -g0
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqForCacheKeyWithDebugPrefixMapWithFlagG0) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-g0");
+  req.add_arg("-fdebug-prefix-map=/tmp/src=/ts");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +2 because "-g0" and "-fdebug-prefix-map" are added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 2, req.arg_size());
+  EXPECT_EQ("../../third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_EQ("-fdebug-prefix-map=", req.arg(kExecReqToNormalizeArgSize + 1));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// Not normalize args but normalize -fdebug-prefix-map.
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqForCacheKeyWithDebugPrefixMapWithRelativeArgs) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeRelativeArgs, &req));
+  req.add_arg("-fdebug-prefix-map=/tmp/src=/ts");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +1 because "-fdebug-prefix-map" are added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 1, req.arg_size());
+  EXPECT_EQ("../../third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_EQ("-fdebug-prefix-map=", req.arg(kExecReqToNormalizeArgSize));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -MD
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithMD) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-MD");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +1 because "-MD" is added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 1, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -M && -MF
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithMMF) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-M");
+  req.add_arg("-MF");
+  req.add_arg("hello.d");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +3 because "-M", "-MF", and filename are added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 3, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -M
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithM) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-M");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +1 because "-M", is added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 1, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// When -MM or -MMD is specified, we can convert system paths to
+// relative paths.
+// -MMD
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithMMD) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-MMD");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +1 because "-MMD" is added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 1, req.arg_size());
+  EXPECT_EQ("../../third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -MM + -MF
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithMMMF) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-MM");
+  req.add_arg("-MF");
+  req.add_arg("hello.d");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +3 because "-MM", "-MF", and filename are added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 3, req.arg_size());
+  EXPECT_EQ("../../third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -MM
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithMM) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-MM");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +1 because "-MM", is added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 1, req.arg_size());
+  EXPECT_EQ("../../third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -MF only
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithMF) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-MF");
+  req.add_arg("hello.d");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +2 because "-MF", and filename are added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 2, req.arg_size());
+  EXPECT_EQ("../../third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// If both -MD and -MMD are speicified, -MMD won't be used,
+// regardless of the commandline order.
+// -MD & -MMD
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithMDMMD) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-MD");
+  req.add_arg("-MMD");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +2 because "-MD" and "-MMD" are added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 2, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -MMD & -MD (inverted order)
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithMMDMD) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-MMD");
+  req.add_arg("-MD");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +2 because "-MD" and "-MMD" are added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 2, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("../../third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// -MMD & -MD (with gcc)
+// -MD should be ignored if -MMD exists.
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyWithMMDMDGCC) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeGcc, &req));
+  req.add_arg("-MMD");
+  req.add_arg("-MD");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +2 because "-MD" and "-MMD" are added.
+  EXPECT_EQ(kExecReqToNormalizeGccArgSize + 2, req.arg_size());
+  EXPECT_EQ("../../third_party/include", req.arg(2));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_TRUE(req.env().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// link.
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyForLink) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeLink, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, true, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  EXPECT_EQ(11, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("/tmp/src/third_party/lib/libFindBadConstructs.so", req.arg(6));
+  EXPECT_EQ("-B/tmp/src/out/Release/bin", req.arg(7));
+  EXPECT_EQ("--sysroot=/tmp/src/build/linux/sysroot", req.arg(8));
+  EXPECT_EQ("-resource-dir=/tmp/src/third_party/clang", req.arg(9));
+  EXPECT_FALSE(req.cwd().empty());
+  EXPECT_FALSE(req.env().empty());
+  EXPECT_EQ("PWD=/tmp/src/out/Release", req.env(0));
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_TRUE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// cl.exe
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyForClExe) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeWin, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  const string expected_include_path(
+      "c:\\Program Files (x86)\\Microsoft Visual Studio 9.0\\VC\\INCLUDE");
+  EXPECT_EQ(expected_include_path,
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ(expected_include_path,
+            req.command_spec().cxx_system_include_path(0));
+  EXPECT_FALSE(req.cwd().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_EQ("C:\\src\\goma\\client\\test\\vc\\stdafx.cpp",
+            req.input(0).filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// cl.exe with different cwd and /showIncludes option
+TEST(ExecReqNormalizerTest, NormalizeExecReqForCacheKeyForClExeCWD) {
+  devtools_goma::ExecReq alice_req, bob_req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeWinAlice, &alice_req));
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeWinBob, &bob_req));
+
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(alice_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(bob_req));
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &alice_req);
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &bob_req);
+
+  EXPECT_FALSE(MessageDifferencer::Equals(alice_req, bob_req));
+
+  // check only |cwd| is different.
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeWinAlice, &alice_req));
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeWinBob, &bob_req));
+
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(alice_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(bob_req));
+
+  alice_req.clear_cwd();
+  bob_req.clear_cwd();
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &alice_req);
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &bob_req);
+
+  MessageDifferencer differencer;
+  string difference_reason;
+  differencer.ReportDifferencesToString(&difference_reason);
+  EXPECT_TRUE(differencer.Compare(alice_req, bob_req)) << difference_reason;
+}
+
+// subprogram path cleanup.
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqForCacheKeyWithSubprogramPathCleanup) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  devtools_goma::SubprogramSpec* s = req.add_subprogram();
+  s->set_path("../../third_party/binutils/Linux_x64/Release/bin/as");
+  s->set_binary_hash(
+      "2f931b1183b807976cb304a66d1b84dcfe5a32f02b45f54c2358e5c43f9183b0");
+  s = req.add_subprogram();
+  s->set_path("../../third_party/binutils/Linux_x64/Release/bin/strip");
+  s->set_binary_hash(
+      "4956e195e962c7329c1fd0aee839d5cdbf7bb42bbc19e197be11751da1f3ea3c");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, false, false, std::vector<string>(), std::map<string, string>(), &req);
+  EXPECT_EQ(2, req.subprogram_size());
+  EXPECT_EQ("", req.subprogram(0).path());
+  EXPECT_EQ("2f931b1183b807976cb304a66d1b84dcfe5a32f02b45f54c2358e5c43f9183b0",
+            req.subprogram(0).binary_hash());
+  EXPECT_EQ("", req.subprogram(1).path());
+  EXPECT_EQ("4956e195e962c7329c1fd0aee839d5cdbf7bb42bbc19e197be11751da1f3ea3c",
+            req.subprogram(1).binary_hash());
+}
+
+TEST(ExecReqNormalizeTest,
+     NormalizeExecReqForCacheKeyWithDebugPrefixMap) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  // TODO: On Windows, we should try to use Windows path?
+  // Currently no one is using debug prefix map on Windows.
+
+  // debug_prefix_map.
+  const std::map<string, string> debug_prefix_map = {
+    {"/tmp/src", "/ts"},
+  };
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-g");
+  // Above debug_prefix_map variable is applied actually but
+  // not to confuse NormalizeExecReqForCacheKey, let me also
+  // add "-fdebug-prefix-map" here.
+  req.add_arg("-fdebug-prefix-map=/tmp/src=/ts");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, debug_prefix_map, &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ(file::JoinPath("/ts", "third_party/include"),
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ(file::JoinPath("/ts", "third_party/include"),
+            req.command_spec().cxx_system_include_path(0));
+  // +2 because "-g" and "-fdebug-prefix-map" is added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 2, req.arg_size());
+  EXPECT_EQ(file::JoinPath("/ts", "third_party/include"), req.arg(2));
+  EXPECT_EQ(file::JoinPath("/ts", "third_party/lib/libFindBadConstructs.so"),
+            req.arg(4));
+  EXPECT_EQ(file::JoinPath("/ts", "out/Release"), req.cwd());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_EQ(file::JoinPath("/ts", "hello.c"), req.input(0).filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+// disable debug_prefix_map.
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqForCacheKeyWithDisabledDebugPrefixMap) {
+  devtools_goma::ExecReq req;
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  req.add_arg("-g");
+  req.add_arg("-fdebug-prefix-map=/tmp/src=/ts");
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  // Note: passing empty debug_prefix_map means disabling the feature.
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, std::map<string, string>(), &req);
+  EXPECT_EQ(1, req.command_spec().system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().system_include_path(0));
+  EXPECT_EQ(1, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("/tmp/src/third_party/include",
+            req.command_spec().cxx_system_include_path(0));
+  // +2 because "-g" and "-fdebug-prefix-map" is added.
+  EXPECT_EQ(kExecReqToNormalizeArgSize + 2, req.arg_size());
+  EXPECT_EQ("/tmp/src/third_party/include", req.arg(2));
+  EXPECT_EQ("/tmp/src/third_party/lib/libFindBadConstructs.so", req.arg(4));
+  EXPECT_EQ("/tmp/src/out/Release", req.cwd());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_EQ("/tmp/src/hello.c", req.input(0).filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+TEST(ExecReqNormalizerTest, NormalizeExecReqShouldNormalizeWithDebugPrefixMap) {
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+  devtools_goma::ExecReq alice_req, bob_req;
+
+  const std::map<string, string> expected_alice_map = {
+    {"/home/alice", "/base_dir"},
+  };
+  const std::map<string, string> expected_bob_map = {
+    {"/home/bob", "/base_dir"},
+  };
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeDebugPrefixMapAlice, &alice_req));
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeDebugPrefixMapBob, &bob_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(alice_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(bob_req));
+
+  std::vector<string> alice_args(alice_req.arg().begin(),
+                                 alice_req.arg().end());
+  std::vector<string> bob_args(bob_req.arg().begin(), bob_req.arg().end());
+  devtools_goma::GCCFlags alice_flags(alice_args, alice_req.cwd());
+  devtools_goma::GCCFlags bob_flags(bob_args, bob_req.cwd());
+  ASSERT_EQ(expected_alice_map, alice_flags.fdebug_prefix_map());
+  ASSERT_EQ(expected_bob_map, bob_flags.fdebug_prefix_map());
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, alice_flags.fdebug_prefix_map(),
+      &alice_req);
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, bob_flags.fdebug_prefix_map(),
+      &bob_req);
+  string normalized_alice;
+  string normalized_bob;
+  ASSERT_TRUE(TextFormat::PrintToString(alice_req, &normalized_alice));
+  ASSERT_TRUE(TextFormat::PrintToString(bob_req, &normalized_bob));
+  EXPECT_EQ(normalized_alice, normalized_bob);
+}
+
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqShouldNormalizeWithDebugPrefixMapAndCWD) {
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+  devtools_goma::ExecReq alice_req, bob_req;
+
+  const std::map<string, string> kExpectedMap = {
+    {"/proc/self/cwd", ""},
+  };
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeDebugPrefixMapAlicePSC, &alice_req));
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeDebugPrefixMapBobPSC, &bob_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(alice_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(bob_req));
+
+  std::vector<string> alice_args(alice_req.arg().begin(),
+                                 alice_req.arg().end());
+  std::vector<string> bob_args(bob_req.arg().begin(), bob_req.arg().end());
+  devtools_goma::GCCFlags alice_flags(alice_args, alice_req.cwd());
+  devtools_goma::GCCFlags bob_flags(bob_args, bob_req.cwd());
+  ASSERT_EQ(kExpectedMap, alice_flags.fdebug_prefix_map());
+  ASSERT_EQ(kExpectedMap, bob_flags.fdebug_prefix_map());
+
+  ASSERT_EQ(alice_req.env().size(), 1);
+  EXPECT_EQ("PWD=/proc/self/cwd", alice_req.env()[0]);
+
+  ASSERT_EQ(bob_req.env().size(), 1);
+  EXPECT_EQ("PWD=/proc/self/cwd", bob_req.env()[0]);
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, alice_flags.fdebug_prefix_map(),
+      &alice_req);
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, bob_flags.fdebug_prefix_map(),
+      &bob_req);
+
+  EXPECT_FALSE(MessageDifferencer::Equals(alice_req, bob_req));
+}
+
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqShouldNormalizeWith2DebugPrefixMapAndCWD) {
+  const std::vector<string> kTestOptions{
+      "Xclang", "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+  devtools_goma::ExecReq alice_req, bob_req;
+
+  const std::map<string, string> kExpectedMapAlice = {
+    {"/proc/self/cwd", ""},
+    {"/home/alice/src/", ""}
+  };
+
+  const std::map<string, string> kExpectedMapBob = {
+    {"/proc/self/cwd", ""},
+    {"/home/bob/src/", ""}
+  };
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalize2DebugPrefixMapAlicePSC, &alice_req));
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalize2DebugPrefixMapBobPSC, &bob_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(alice_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(bob_req));
+
+  std::vector<string> alice_args(alice_req.arg().begin(),
+                                 alice_req.arg().end());
+  std::vector<string> bob_args(bob_req.arg().begin(), bob_req.arg().end());
+  devtools_goma::GCCFlags alice_flags(alice_args, alice_req.cwd());
+  devtools_goma::GCCFlags bob_flags(bob_args, bob_req.cwd());
+  ASSERT_EQ(kExpectedMapAlice, alice_flags.fdebug_prefix_map());
+  ASSERT_EQ(kExpectedMapBob, bob_flags.fdebug_prefix_map());
+
+  ASSERT_EQ(alice_req.env().size(), 1);
+  EXPECT_EQ("PWD=/proc/self/cwd", alice_req.env()[0]);
+
+  ASSERT_EQ(bob_req.env().size(), 1);
+  EXPECT_EQ("PWD=/proc/self/cwd", bob_req.env()[0]);
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, alice_flags.fdebug_prefix_map(),
+      &alice_req);
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, bob_flags.fdebug_prefix_map(),
+      &bob_req);
+
+  MessageDifferencer differencer;
+  string difference_reason;
+  differencer.ReportDifferencesToString(&difference_reason);
+  EXPECT_TRUE(differencer.Compare(alice_req, bob_req)) << difference_reason;
+}
+
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqShouldNormalizeWith2DebugPrefixMapAndCWDGCC) {
+  const std::vector<string> kTestOptions{
+    "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+  devtools_goma::ExecReq alice_req, bob_req;
+
+  const std::map<string, string> kExpectedMapAlice = {
+    {"/proc/self/cwd", ""},
+    {"/home/alice/src/", ""}
+  };
+
+  const std::map<string, string> kExpectedMapBob = {
+    {"/proc/self/cwd", ""},
+    {"/home/bob/src/", ""}
+  };
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalize2DebugPrefixMapAlicePSCGCC, &alice_req));
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalize2DebugPrefixMapBobPSCGCC, &bob_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(alice_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(bob_req));
+
+  std::vector<string> alice_args(alice_req.arg().begin(),
+                                 alice_req.arg().end());
+  std::vector<string> bob_args(bob_req.arg().begin(), bob_req.arg().end());
+  devtools_goma::GCCFlags alice_flags(alice_args, alice_req.cwd());
+  devtools_goma::GCCFlags bob_flags(bob_args, bob_req.cwd());
+  ASSERT_EQ(kExpectedMapAlice, alice_flags.fdebug_prefix_map());
+  ASSERT_EQ(kExpectedMapBob, bob_flags.fdebug_prefix_map());
+
+  ASSERT_EQ(alice_req.env().size(), 1);
+  EXPECT_EQ("PWD=/proc/self/cwd", alice_req.env()[0]);
+
+  ASSERT_EQ(bob_req.env().size(), 1);
+  EXPECT_EQ("PWD=/proc/self/cwd", bob_req.env()[0]);
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, alice_flags.fdebug_prefix_map(),
+      &alice_req);
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, bob_flags.fdebug_prefix_map(),
+      &bob_req);
+
+  EXPECT_FALSE(MessageDifferencer::Equals(alice_req, bob_req));
+}
+
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqShouldNormalizeWithDebugPrefixMapAndCWDGCC) {
+  const std::vector<string> kTestOptions{
+    "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+  devtools_goma::ExecReq alice_req, bob_req;
+
+  const std::map<string, string> kExpectedMap = {
+    {"/proc/self/cwd", ""},
+  };
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeDebugPrefixMapAlicePSCGCC, &alice_req));
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeDebugPrefixMapBobPSCGCC, &bob_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(alice_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(bob_req));
+
+  std::vector<string> alice_args(alice_req.arg().begin(),
+                                 alice_req.arg().end());
+  std::vector<string> bob_args(bob_req.arg().begin(), bob_req.arg().end());
+  devtools_goma::GCCFlags alice_flags(alice_args, alice_req.cwd());
+  devtools_goma::GCCFlags bob_flags(bob_args, bob_req.cwd());
+  ASSERT_EQ(kExpectedMap, alice_flags.fdebug_prefix_map());
+  ASSERT_EQ(kExpectedMap, bob_flags.fdebug_prefix_map());
+
+  ASSERT_EQ(alice_req.env().size(), 1);
+  EXPECT_EQ("PWD=/proc/self/cwd", alice_req.env()[0]);
+
+  ASSERT_EQ(bob_req.env().size(), 1);
+  EXPECT_EQ("PWD=/proc/self/cwd", bob_req.env()[0]);
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, alice_flags.fdebug_prefix_map(),
+      &alice_req);
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, bob_flags.fdebug_prefix_map(),
+      &bob_req);
+
+  EXPECT_FALSE(MessageDifferencer::Equals(alice_req, bob_req));
+}
+
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqShouldNotNormalizeWithDebugPrefixMapAndCWDGCC) {
+  const std::vector<string> kTestOptions{
+    "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+  devtools_goma::ExecReq alice_req, bob_req;
+
+  const std::map<string, string> kExpectedMap = {
+    {"/proc/self/cwd", ""},
+  };
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNoNormalizeDebugPrefixMapAlicePSCGCC, &alice_req));
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNoNormalizeDebugPrefixMapBobPSCGCC, &bob_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(alice_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(bob_req));
+
+  std::vector<string> alice_args(alice_req.arg().begin(),
+                                 alice_req.arg().end());
+  std::vector<string> bob_args(bob_req.arg().begin(), bob_req.arg().end());
+  devtools_goma::GCCFlags alice_flags(alice_args, alice_req.cwd());
+  devtools_goma::GCCFlags bob_flags(bob_args, bob_req.cwd());
+  ASSERT_EQ(kExpectedMap, alice_flags.fdebug_prefix_map());
+  ASSERT_EQ(kExpectedMap, bob_flags.fdebug_prefix_map());
+
+  ASSERT_EQ(alice_req.env().size(), 1);
+  EXPECT_EQ("PWD=/proc/self/cwd", alice_req.env()[0]);
+
+  ASSERT_EQ(bob_req.env().size(), 1);
+  EXPECT_EQ("PWD=/proc/self/cwd", bob_req.env()[0]);
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, alice_flags.fdebug_prefix_map(),
+      &alice_req);
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, bob_flags.fdebug_prefix_map(),
+      &bob_req);
+
+  EXPECT_FALSE(MessageDifferencer::Equals(alice_req, bob_req));
+}
+
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqShouldNotNormalizeWithDebugPrefixMapAndCWDNoPWD) {
+  const std::vector<string> kTestOptions{
+    "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+  devtools_goma::ExecReq alice_req, bob_req;
+
+  const std::map<string, string> kExpectedMap = {
+    {"/proc/self/cwd", ""},
+  };
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeDebugPrefixMapAlicePSCNoPWD, &alice_req));
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeDebugPrefixMapBobPSCNoPWD, &bob_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(alice_req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(bob_req));
+
+  std::vector<string> alice_args(alice_req.arg().begin(),
+                                 alice_req.arg().end());
+  std::vector<string> bob_args(bob_req.arg().begin(), bob_req.arg().end());
+  devtools_goma::GCCFlags alice_flags(alice_args, alice_req.cwd());
+  devtools_goma::GCCFlags bob_flags(bob_args, bob_req.cwd());
+  ASSERT_EQ(kExpectedMap, alice_flags.fdebug_prefix_map());
+  ASSERT_EQ(kExpectedMap, bob_flags.fdebug_prefix_map());
+
+  ASSERT_EQ(alice_req.env().size(), 1);
+  EXPECT_EQ("PWD=/home/alice/src", alice_req.env()[0]);
+
+  ASSERT_EQ(bob_req.env().size(), 1);
+  EXPECT_EQ("PWD=/home/bob/src", bob_req.env()[0]);
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, alice_flags.fdebug_prefix_map(),
+      &alice_req);
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, bob_flags.fdebug_prefix_map(),
+      &bob_req);
+
+  EXPECT_FALSE(MessageDifferencer::Equals(alice_req, bob_req));
+}
+
+TEST(ExecReqNormalizerTest, NormalizeExecReqInputOrderForCacheKey) {
+  devtools_goma::ExecReq req;
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeInputOrder, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, false, false, std::vector<string>(), std::map<string, string>(), &req);
+
+  EXPECT_EQ("bbbbbbbbbb", req.input(0).hash_key());
+  EXPECT_EQ("aaaaaaaaaa", req.input(1).hash_key());
+  EXPECT_EQ("cccccccccc", req.input(2).hash_key());
+}
+
+TEST(ExecReqNormalizerTest, NormalizeExecReqShouldClearContent) {
+  devtools_goma::ExecReq req;
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeContent, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+  ASSERT_EQ(1, req.input_size());
+  ASSERT_EQ("dummy_hash_key", req.input(0).hash_key());
+  ASSERT_TRUE(req.input(0).has_content());
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, false, false, std::vector<string>(), std::map<string, string>(), &req);
+
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_EQ("dummy_hash_key", req.input(0).hash_key());
+  EXPECT_FALSE(req.input(0).has_content());
+}
+
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqForCacheKeyShouldNormalizeWindowsPNaClPath) {
+  devtools_goma::ExecReq req;
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizeWinPNaCl, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, std::vector<string>(), std::map<string, string>(), &req);
+
+  EXPECT_EQ(3, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("..\\..\\pnacl_newlib\\bin\\..\\x86_64-nacl\\include\\c++\\v1",
+            req.command_spec().cxx_system_include_path(0));
+  EXPECT_EQ("..\\..\\pnacl_newlib\\bin\\..\\lib\\clang\\3.7.0\\include",
+            req.command_spec().cxx_system_include_path(1));
+  EXPECT_EQ("..\\..\\pnacl_newlib\\bin\\..\\x86_64-nacl\\include",
+            req.command_spec().cxx_system_include_path(2));
+  EXPECT_TRUE(req.cwd().empty());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+TEST(ExecReqNormalizerTest,
+     NormalizeExecReqForCacheKeyShouldNotNormalizePNaClTranslate) {
+  devtools_goma::ExecReq req;
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToNormalizePNaClTranslate, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, std::vector<string>(), std::map<string, string>(), &req);
+
+  EXPECT_EQ(3, req.command_spec().cxx_system_include_path_size());
+  EXPECT_EQ("../../pnacl_newlib/bin/../x86_64-nacl/include/c++/v1",
+            req.command_spec().cxx_system_include_path(0));
+  EXPECT_EQ("../../pnacl_newlib/bin/../lib/clang/3.7.0/include",
+            req.command_spec().cxx_system_include_path(1));
+  EXPECT_EQ("../../pnacl_newlib/bin/../x86_64-nacl/include",
+            req.command_spec().cxx_system_include_path(2));
+  EXPECT_EQ("/dummy/out/Default", req.cwd());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+TEST(ExecReqNormalizerTest, NormalizeJavac) {
+  devtools_goma::ExecReq req;
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalizeJavac, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+
+  // To confirm NormalizeExecReqForCacheKey omit them, let me add path that
+  // won't exist in actual compile request.
+  req.mutable_command_spec()->add_system_include_path("dummy");
+  req.mutable_command_spec()->add_cxx_system_include_path("dummy");
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, std::vector<string>(), std::map<string, string>(), &req);
+
+  EXPECT_EQ(0, req.command_spec().system_include_path_size());
+  EXPECT_EQ(0, req.command_spec().cxx_system_include_path_size());
+
+  EXPECT_EQ("", req.cwd());
+  EXPECT_EQ(1, req.input_size());
+  EXPECT_FALSE(req.input(0).has_filename());
+  EXPECT_TRUE(req.input(0).has_hash_key());
+}
+
+TEST(ExecReqNormalizerTest, RewritePathWithDebugPrefixMap) {
+  const std::map<string, string> empty_map;
+  const std::map<string, string> single_rule_map = {
+    {"/usr/local", "/debug"},
+  };
+  const std::map<string, string> value_shows_up_in_key_map = {
+    {"/usr/local", "/foo"},
+    {"/foo", "/bar"},
+  };
+
+  string path;
+  path = "";
+  EXPECT_FALSE(
+      devtools_goma::RewritePathWithDebugPrefixMap(
+          single_rule_map, &path));
+
+  path = "/tmp";
+  EXPECT_FALSE(
+      devtools_goma::RewritePathWithDebugPrefixMap(
+          empty_map, &path));
+
+  path = "/usr/local/include/stdio.h";
+  EXPECT_TRUE(
+      devtools_goma::RewritePathWithDebugPrefixMap(
+          single_rule_map, &path));
+  EXPECT_EQ(file::JoinPath("/debug", "/include/stdio.h"), path);
+
+  path = "/usr/local/include/stdio.h";
+  EXPECT_TRUE(
+      devtools_goma::RewritePathWithDebugPrefixMap(
+          value_shows_up_in_key_map, &path));
+  EXPECT_EQ(file::JoinPath("/foo", "include/stdio.h"), path);
+
+  path = "/foo/local/include/stdio.h";
+  EXPECT_TRUE(
+      devtools_goma::RewritePathWithDebugPrefixMap(
+          value_shows_up_in_key_map, &path));
+  EXPECT_EQ(file::JoinPath("/bar", "local/include/stdio.h"), path);
+}
+
+TEST(ExecReqNormalizerTest, HasAmbiguityInDebugPrefixMap) {
+  EXPECT_FALSE(devtools_goma::HasAmbiguityInDebugPrefixMap(
+      std::map<string, string>()));
+  EXPECT_FALSE(
+      devtools_goma::HasAmbiguityInDebugPrefixMap(
+          std::map<string, string>({
+            {"/usr/local", "/debug"},
+          })));
+  EXPECT_TRUE(
+      devtools_goma::HasAmbiguityInDebugPrefixMap(
+          std::map<string, string>({
+                {"/usr/local", "/debug"}, {"/usr", "/debug2"},
+          })));
+  EXPECT_TRUE(
+      devtools_goma::HasAmbiguityInDebugPrefixMap(
+          std::map<string, string>({
+            {"/usr/lib", "/debug"}, {"/usr/libexec", "/debug2"},
+          })));
+  EXPECT_FALSE(
+      devtools_goma::HasAmbiguityInDebugPrefixMap(
+          std::map<string, string>({
+            {"/usr/lib", "/debug"}, {"/usr//libexec", "/debug2"},
+          })));
+  EXPECT_TRUE(
+      devtools_goma::HasAmbiguityInDebugPrefixMap(
+          std::map<string, string>({
+            {"/usr/local", "/debug"}, {"dummy", "dummy2"},
+                {"/usr", "/debug2"},
+          })));
+  EXPECT_TRUE(
+      devtools_goma::HasAmbiguityInDebugPrefixMap(
+          std::map<string, string>({
+            {"lib", "/debug"}, {"dummy", "dummy2"},
+                {"lib64", "/debug2"},
+          })));
+  EXPECT_FALSE(
+      devtools_goma::HasAmbiguityInDebugPrefixMap(
+          std::map<string, string>({
+            {"/home/alice/chromium/src", "."},
+          })));
+}
+
+TEST(ExecReqNormalizerTest, AlwaysRemoveRequesterInfo) {
+  // Test for b/38184335
+
+  const std::vector<string> kTestOptions{
+    "B", "I", "gcc-toolchain", "-sysroot", "resource-dir"};
+
+  devtools_goma::ExecReq req;
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+      kExecReqToAmbiguaousDebugPrefixMap, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+
+  const std::map<string, string> kExpectedMap = {
+    {"/home/goma/chromium/src", "."},
+  };
+
+  std::vector<string> args(req.arg().begin(), req.arg().end());
+  devtools_goma::GCCFlags flags(args, req.cwd());
+  ASSERT_EQ(kExpectedMap, flags.fdebug_prefix_map());
+
+  EXPECT_FALSE(
+      devtools_goma::HasAmbiguityInDebugPrefixMap(flags.fdebug_prefix_map()));
+
+  EXPECT_TRUE(req.has_requester_info());
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, true, false, kTestOptions, flags.fdebug_prefix_map(),
+      &req);
+  EXPECT_FALSE(req.has_requester_info());
+}
+
+TEST(ExecReqNormalizerTest, DropDeveloperDir) {
+  devtools_goma::ExecReq req;
+  ASSERT_TRUE(TextFormat::ParseFromString(kExecReqToNormalize, &req));
+  ASSERT_TRUE(devtools_goma::VerifyExecReq(req));
+
+  req.add_env("DEVELOPER_DIR=/some/where/to/developer_dir");
+  bool found_developer_env = false;
+  for (const auto& env : req.env()) {
+    if (strings::StartsWith(env, "DEVELOPER_DIR=")) {
+      found_developer_env = true;
+      break;
+    }
+  }
+  ASSERT_TRUE(found_developer_env);
+
+  devtools_goma::NormalizeExecReqForCacheKey(
+      0, false, false, std::vector<string>(), std::map<string, string>(), &req);
+
+  found_developer_env = false;
+  for (const auto& env : req.env()) {
+    if (strings::StartsWith(env, "DEVELOPER_DIR=")) {
+      found_developer_env = true;
+      break;
+    }
+  }
+  EXPECT_FALSE(found_developer_env);
+}
diff --git a/lib/execreq_verifier.cc b/lib/execreq_verifier.cc
new file mode 100644
index 0000000..505b73c
--- /dev/null
+++ b/lib/execreq_verifier.cc
@@ -0,0 +1,33 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "execreq_verifier.h"
+
+namespace devtools_goma {
+
+bool VerifyExecReq(const ExecReq& req) {
+  if (!req.IsInitialized()) {
+    return false;
+  }
+  const CommandSpec& spec = req.command_spec();
+  if (!spec.has_name() || !spec.has_version() || !spec.has_target()) {
+    return false;
+  }
+  if (req.arg_size() == 0) {
+    return false;
+  }
+  if (!req.has_cwd()) {
+    return false;
+  }
+  for (const auto& input : req.input()) {
+    if (!input.has_filename()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/lib/execreq_verifier.h b/lib/execreq_verifier.h
new file mode 100644
index 0000000..195338c
--- /dev/null
+++ b/lib/execreq_verifier.h
@@ -0,0 +1,21 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_EXECREQ_VERIFIER_H_
+#define DEVTOOLS_GOMA_LIB_EXECREQ_VERIFIER_H_
+
+
+#include "compiler_specific.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+
+namespace devtools_goma {
+
+bool VerifyExecReq(const ExecReq& req);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_EXECREQ_VERIFIER_H_
diff --git a/lib/execreq_verifier_test.cc b/lib/execreq_verifier_test.cc
new file mode 100644
index 0000000..b2e1716
--- /dev/null
+++ b/lib/execreq_verifier_test.cc
@@ -0,0 +1,56 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "execreq_verifier.h"
+
+#include "google/protobuf/text_format.h"
+#include "gtest/gtest.h"
+using google::protobuf::TextFormat;
+
+namespace {
+
+const char kValidExecReq[] = "command_spec {\n"
+    "  name: \"gcc\"\n"
+    "  version: \"4.4.3[Ubuntu 4.4.3-4ubuntu5]\"\n"
+    "  target: \"x86_64-linux-gnu\"\n"
+    "}\n"
+    "arg: \"gcc\"\n"
+    "arg: \"-c\"\n"
+    "arg: \"hello.c\"\n"
+    "cwd: \"/tmp\"\n";
+
+}  // anonymous namespace
+
+TEST(ExecreqVerifierTest, VerifyExecReq) {
+  devtools_goma::ExecReq req;
+  EXPECT_FALSE(devtools_goma::VerifyExecReq(req));
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kValidExecReq, &req));
+  EXPECT_TRUE(devtools_goma::VerifyExecReq(req));
+
+  req.mutable_command_spec()->clear_name();
+  EXPECT_FALSE(devtools_goma::VerifyExecReq(req));
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kValidExecReq, &req));
+  req.mutable_command_spec()->clear_version();
+  EXPECT_FALSE(devtools_goma::VerifyExecReq(req));
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kValidExecReq, &req));
+  req.mutable_command_spec()->clear_target();
+  EXPECT_FALSE(devtools_goma::VerifyExecReq(req));
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kValidExecReq, &req));
+  req.clear_command_spec();
+  EXPECT_FALSE(devtools_goma::VerifyExecReq(req));
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kValidExecReq, &req));
+  req.clear_arg();
+  EXPECT_FALSE(devtools_goma::VerifyExecReq(req));
+
+  ASSERT_TRUE(TextFormat::ParseFromString(kValidExecReq, &req));
+  req.clear_cwd();
+  EXPECT_FALSE(devtools_goma::VerifyExecReq(req));
+}
diff --git a/lib/file_helper.cc b/lib/file_helper.cc
new file mode 100644
index 0000000..d263e32
--- /dev/null
+++ b/lib/file_helper.cc
@@ -0,0 +1,88 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+
+#include "file_helper.h"
+
+#include <errno.h>
+
+#ifdef _WIN32
+# include "config_win.h"
+#endif  // _WIN32
+
+#include "file.h"
+#include "glog/logging.h"
+#include "path.h"
+#include "scoped_fd.h"
+#include "string_piece.h"
+
+namespace devtools_goma {
+
+bool ReadFileToString(absl::string_view filename, string* OUTPUT) {
+  const string& name = string(filename);
+  DCHECK(OUTPUT != nullptr) << filename;
+  OUTPUT->clear();
+
+  devtools_goma::ScopedFd fd(devtools_goma::ScopedFd::OpenForRead(name));
+  if (!fd.valid()) {
+#ifndef _WIN32
+    if (errno == ENOENT)
+      VLOG(1) << "GOMA: file not found:" << name;
+    else
+      PLOG(ERROR) << "GOMA: failed to open " << name;
+#else
+    DWORD err = GetLastError();
+    if ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND)) {
+      VLOG(1) << "GOMA: file not found:" << name;
+    } else {
+      LOG_SYSRESULT(err);
+      // PLOG checks std errno, which will always be 0, so use LOG(ERROR) here.
+      LOG(ERROR) << "GOMA: failed to open " << name;
+    }
+#endif
+    return false;
+  }
+  size_t file_size = 0;
+  if (!fd.GetFileSize(&file_size)) {
+    LOG(ERROR) << "filename: [" << name << "] stat failed";
+    return false;
+  }
+  VLOG(1) << "filename: [" << name << "] " << " size=" << file_size;
+  if (file_size == 0) {
+    return true;
+  }
+  OUTPUT->resize(file_size);
+  for (int r, len = 0; static_cast<size_t>(len) < file_size;) {
+    r = fd.Read(const_cast<char*>(OUTPUT->data() + len), file_size - len);
+    if (r < 0) {
+      LOG(ERROR) << "read " << name;
+      return false;
+    }
+    if (r == 0) {
+      LOG(ERROR) << "read unexpected EOF at " << len
+                 << " name " << name << " size=" << file_size;
+      return false;
+    }
+    len += r;
+  }
+  return true;
+}
+
+bool WriteStringToFile(absl::string_view data, absl::string_view file_name) {
+  devtools_goma::ScopedFd fd(
+      devtools_goma::ScopedFd::Create(string(file_name), 0600));
+  if (!fd.valid()) {
+    LOG(ERROR) << "GOMA: failed to open " << file_name;
+    return false;
+  }
+  if (fd.Write(data.data(), data.size()) == -1) {
+    LOG(ERROR) << "write " << file_name;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/lib/file_helper.h b/lib/file_helper.h
new file mode 100644
index 0000000..fd71222
--- /dev/null
+++ b/lib/file_helper.h
@@ -0,0 +1,22 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_FILE_HELPER_H_
+#define DEVTOOLS_GOMA_LIB_FILE_HELPER_H_
+
+#include <string>
+
+
+#include "string_piece.h"
+using std::string;
+
+namespace devtools_goma {
+
+bool ReadFileToString(absl::string_view file_name, string* OUTPUT);
+bool WriteStringToFile(absl::string_view data, absl::string_view file_name);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_FILE_HELPER_H_
diff --git a/lib/file_reader.cc b/lib/file_reader.cc
new file mode 100644
index 0000000..71914ce
--- /dev/null
+++ b/lib/file_reader.cc
@@ -0,0 +1,68 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "file_reader.h"
+
+#include <stdlib.h>
+#include <memory>
+
+namespace devtools_goma {
+
+/* static */
+void FileReaderFactory::Register(CreateFunction create) {
+  GetInstance()->creators_.push_back(create);
+}
+
+std::unique_ptr<FileReader> FileReaderFactory::NewFileReader(
+    const string& filename) {
+  for (std::vector<CreateFunction>::const_iterator iter = creators_.begin();
+       iter != creators_.end();
+       ++iter) {
+    std::unique_ptr<FileReader> reader = (*iter)(filename);
+    if (reader) {
+      return reader;
+    }
+  }
+  return FileReader::Create(filename);
+}
+
+/* static */
+FileReaderFactory* FileReaderFactory::GetInstance() {
+  if (factory_ == nullptr) {
+    factory_ = new FileReaderFactory();
+    atexit(FileReaderFactory::DeleteInstance);
+  }
+  return factory_;
+}
+
+/* static */
+void FileReaderFactory::DeleteInstance() {
+  if (factory_ != nullptr) {
+    delete factory_;
+  }
+}
+
+FileReaderFactory* FileReaderFactory::factory_ = nullptr;
+
+/* static */
+size_t FileReader::FlushDataInBuffer(string* buf, void** ptr, size_t* len) {
+  size_t moved = 0;
+  if (!buf->empty()) {
+    if (*len < buf->size()) {
+      memcpy(*ptr, buf->data(), *len);
+      moved = *len;
+      buf->erase(0, *len);
+    } else {
+      memcpy(*ptr, buf->data(), buf->size());
+      moved = buf->size();
+      buf->clear();
+    }
+    *len -= moved;
+    *reinterpret_cast<char**>(ptr) += moved;
+  }
+  return moved;
+}
+
+}  // namespace devtools_goma
diff --git a/lib/file_reader.h b/lib/file_reader.h
new file mode 100644
index 0000000..4663308
--- /dev/null
+++ b/lib/file_reader.h
@@ -0,0 +1,116 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// FileReaderFactory is a factory class of FileReader and its subclass.
+// You can register creator function of a special purpose FileReader subclass
+// through Register. For registering creator function at the beginning of the
+// program, FileReaderFactory is a singleton class.
+// This class is thread-hostile.
+//
+// FileReader is a wrapper class of ScopedFd.
+// Subclass of this class is made for giving a special behavior on reading.
+// This class's thread safety is the same as scoped_fd.
+
+#ifndef DEVTOOLS_GOMA_LIB_FILE_READER_H_
+#define DEVTOOLS_GOMA_LIB_FILE_READER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+
+#include "scoped_fd.h"
+using std::string;
+
+namespace devtools_goma {
+
+class FileReader;
+
+class FileReaderFactory {
+ public:
+  // A type to create an FileReader instance.
+  // It returns NULL if it cannot handle the given |filename|.
+  typedef std::unique_ptr<FileReader> (*CreateFunction)(const string& filename);
+
+  // Returns a new instance of FileReader or its subclass.
+  std::unique_ptr<FileReader> NewFileReader(const string& filename);
+
+  // Registers the creator functions of FileReader or its subclass.
+  static void Register(CreateFunction create);
+
+  // Gets the singleton instance of FileReaderFactory.
+  static FileReaderFactory* GetInstance();
+
+ private:
+  FileReaderFactory() {}
+
+  // Deletes the singleton instance to be called by atexit.
+  static void DeleteInstance();
+
+  std::vector<CreateFunction> creators_;
+  static FileReaderFactory* factory_;
+
+  DISALLOW_COPY_AND_ASSIGN(FileReaderFactory);
+};
+
+// Wrapper class of ScopedFd.
+// Subclass of this class is used for special treatment of files.
+class FileReader {
+ public:
+  virtual ~FileReader() {}
+
+  // Wrapper of ScopedFd's Read.
+  // If |len| == 0, returns 0.
+  virtual ssize_t Read(void* ptr, size_t len) {
+    if (len == 0) {
+      return 0;
+    }
+    return fd_.Read(ptr, len);
+  }
+
+  // Wrapper of ScopedFd's Seek.
+  virtual off_t Seek(off_t offset, ScopedFd::Whence whence) const {
+    return fd_.Seek(offset, whence);
+  }
+
+  // Wrapper of ScopedFd's valid.
+  virtual bool valid() const {
+    return fd_.valid();
+  }
+
+  // Wrapper of ScopedFd's GetFileSize.
+  virtual bool GetFileSize(size_t* file_size) const {
+    return fd_.GetFileSize(file_size);
+  }
+
+  // Copies data in |*buf| to |*ptr| with |*len|.
+  // |*ptr| is automatically incremented and |*len| is automatically
+  // decremented. Moved data in |*buf| is removed.
+  // Returns the number of copied bytes.
+  //
+  // Note: if size of |*buf| is larger than |*len|, copy would happen,
+  //       performance may suffer.
+  static size_t FlushDataInBuffer(string* buf, void** ptr, size_t* len);
+
+ protected:
+  explicit FileReader(const string& filename)
+      : fd_(ScopedFd::OpenForRead(filename)) {
+  }
+
+ private:
+  // Returns an instance of FileReader.
+  static std::unique_ptr<FileReader> Create(const string& filename) {
+    return std::unique_ptr<FileReader>(new FileReader(filename));
+  }
+
+  ScopedFd fd_;
+
+  friend class FileReaderFactory;
+  DISALLOW_COPY_AND_ASSIGN(FileReader);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_FILE_READER_H_
diff --git a/lib/file_reader_unittest.cc b/lib/file_reader_unittest.cc
new file mode 100644
index 0000000..fb79764
--- /dev/null
+++ b/lib/file_reader_unittest.cc
@@ -0,0 +1,146 @@
+// Copyright 2013 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "file_reader.h"
+
+#include <memory>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+using std::string;
+
+namespace devtools_goma {
+
+const int kBufSize = 1024;
+const char kDummyValue[] = "dummy value";
+
+class FileReaderFactoryTest : public testing::Test {
+};
+
+class FileReaderTest : public testing::Test {
+};
+
+class DummyFileReader : public FileReader {
+ public:
+  // MAGIC number.
+  enum { kMagic = 0x55 };
+
+  bool GetFileSize(size_t* file_size) const override {
+    *file_size = kMagic;
+    return true;
+  }
+
+  static std::unique_ptr<FileReader> Create(const string& dummy) {
+    called_create_ = true;
+    if (create_) {
+      is_created_ = true;
+      return std::unique_ptr<FileReader>(new DummyFileReader(dummy));
+    } else {
+      return nullptr;
+    }
+  }
+
+  static void Reset(bool create) {
+    create_ = create;
+
+    called_create_ = false;
+    is_created_ = false;
+  }
+
+  // enable / disable the function.
+  static bool create_;
+
+  // flags to check the code is executed or not.
+  static bool called_create_;
+  static bool is_created_;
+
+ private:
+  explicit DummyFileReader(const string& filename) : FileReader(filename) {}
+};
+
+// enable / disable the function.
+bool DummyFileReader::create_;
+
+// flags to check the code is executed or not.
+bool DummyFileReader::called_create_;
+bool DummyFileReader::is_created_;
+
+TEST_F(FileReaderFactoryTest, Create) {
+  std::unique_ptr<FileReader> fr;
+  size_t to_verify;
+  FileReaderFactory* factory = FileReaderFactory::GetInstance();
+
+  // Nothing registered and get file reader instance.
+  fr = factory->NewFileReader("non_existent");
+  CHECK(fr);
+
+  FileReaderFactory::Register(&DummyFileReader::Create);
+  // Registered class should be selected if subclass Create returns an instance.
+  DummyFileReader::Reset(true);
+  fr = factory->NewFileReader("non_existent");
+  CHECK(fr);
+  EXPECT_TRUE(fr->GetFileSize(&to_verify));
+  EXPECT_TRUE(DummyFileReader::called_create_);
+  EXPECT_TRUE(DummyFileReader::is_created_);
+  EXPECT_EQ(DummyFileReader::kMagic, to_verify);
+
+  // Default class should not be used if subclass Create returns nullptr.
+  DummyFileReader::Reset(false);
+  fr = factory->NewFileReader("non_existent");
+  CHECK(fr);
+  EXPECT_FALSE(fr->GetFileSize(&to_verify));
+  EXPECT_TRUE(DummyFileReader::called_create_);
+  EXPECT_FALSE(DummyFileReader::is_created_);
+}
+
+TEST_F(FileReaderTest, FlushDataInBuffer) {
+  char buf[kBufSize];
+  void *ptr;
+  size_t len, copied;
+  string read_buffer;
+
+  // Should not copy anything if len = 0.
+  read_buffer.assign(kDummyValue);
+  len = 0;
+  buf[0] = '\0';
+  ptr = buf;
+  copied = FileReader::FlushDataInBuffer(&read_buffer, &ptr, &len);
+  EXPECT_EQ(0U, copied);
+  EXPECT_EQ(kDummyValue, read_buffer);
+  EXPECT_EQ('\0', buf[0]);
+  EXPECT_EQ(0U, len);
+
+  // Should copy all data if len > read_buffer_.length().
+  read_buffer.assign(kDummyValue);
+  len = read_buffer.length() + 1;
+  ptr = buf;
+  copied = FileReader::FlushDataInBuffer(&read_buffer, &ptr, &len);
+  EXPECT_EQ("", read_buffer);
+  EXPECT_EQ(kDummyValue, string(buf, copied));
+  EXPECT_EQ(1U, len);
+
+  // Should copy all data if len = read_buffer_.length().
+  read_buffer.assign(kDummyValue);
+  len = read_buffer.length();
+  ptr = buf;
+  copied = FileReader::FlushDataInBuffer(&read_buffer, &ptr, &len);
+  EXPECT_EQ("", read_buffer);
+  EXPECT_EQ(strlen(kDummyValue), copied);
+  EXPECT_EQ(kDummyValue, string(buf, copied));
+  EXPECT_EQ(0U, len);
+
+  // Should remain some data if len < read_buffer_.length().
+  read_buffer.assign(kDummyValue);
+  len = read_buffer.length() - 1;
+  ptr = buf;
+  copied = FileReader::FlushDataInBuffer(&read_buffer, &ptr, &len);
+  EXPECT_NE("", read_buffer);
+  EXPECT_EQ(strlen(kDummyValue) - 1, copied);
+  EXPECT_EQ(string(kDummyValue, copied), string(buf, copied));
+  EXPECT_EQ(0U, len);
+}
+
+}  // namespace devtools_goma
diff --git a/lib/fileflag.cc b/lib/fileflag.cc
new file mode 100644
index 0000000..9a398b8
--- /dev/null
+++ b/lib/fileflag.cc
@@ -0,0 +1,45 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef _WIN32
+
+
+#include "fileflag.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "glog/logging.h"
+
+namespace devtools_goma {
+
+int SetFileDescriptorFlag(int fd, int flag) {
+  int old_flag = fcntl(fd, F_GETFD);
+  if (old_flag == -1) {
+    PLOG(ERROR) << "Cannot GETFD for fd:" << fd;
+    return -1;
+  }
+  if (fcntl(fd, F_SETFD, old_flag | flag) == -1) {
+    PLOG(ERROR) << "Cannot SETFD for fd:" << fd;
+    return -1;
+  }
+  return 0;
+}
+
+int SetFileStatusFlag(int fd, int flag) {
+  int old_flag = fcntl(fd, F_GETFL);
+  if (old_flag == -1) {
+    PLOG(ERROR) << "Cannot GETFL for fd:" << fd;
+    return -1;
+  }
+  if (fcntl(fd, F_SETFL, old_flag | flag) == -1) {
+    PLOG(ERROR) << "Cannot SETFL for fd:" << fd;
+    return -1;
+  }
+  return 0;
+}
+
+}  // namespace devtools_goma
+#endif  // !_WIN32
diff --git a/lib/fileflag.h b/lib/fileflag.h
new file mode 100644
index 0000000..7c3c312
--- /dev/null
+++ b/lib/fileflag.h
@@ -0,0 +1,19 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_FILEFLAG_H_
+#define DEVTOOLS_GOMA_LIB_FILEFLAG_H_
+
+#ifndef _WIN32
+
+namespace devtools_goma {
+
+int SetFileDescriptorFlag(int fd, int flag);
+int SetFileStatusFlag(int fd, int flag);
+
+}  // namespace devtools_goma
+
+#endif  // !_WIN32
+#endif  // DEVTOOLS_GOMA_LIB_FILEFLAG_H_
diff --git a/lib/flag_parser.cc b/lib/flag_parser.cc
new file mode 100644
index 0000000..3a300d7
--- /dev/null
+++ b/lib/flag_parser.cc
@@ -0,0 +1,318 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "flag_parser.h"
+
+#include <algorithm>
+#include <iterator>
+#include <utility>
+
+#include "glog/logging.h"
+#include "string_piece.h"
+#include "string_piece_utils.h"
+#include "strutil.h"
+using std::string;
+
+namespace {
+
+struct FlagLengthComparator {
+  bool operator() (FlagParser::Flag* a, FlagParser::Flag* b) const {
+    return a->name().size() > b->name().size();
+  }
+};
+
+}  // anonymous namespace
+
+FlagParser::Options::Options()
+    : flag_prefix('-'),
+      alt_flag_prefix('\0'),
+      allows_equal_arg(false),
+      allows_nonspace_arg(false),
+      has_command_name(true) {
+}
+
+FlagParser::Flag::Flag(const char* name,
+                       bool require_value,
+                       bool allows_space_arg,
+                       const FlagParser::Options& options)
+    : name_(name),
+      require_value_(require_value),
+      flag_prefix_(options.flag_prefix),
+      alt_flag_prefix_(options.alt_flag_prefix),
+      allows_equal_arg_(options.allows_equal_arg),
+      allows_nonspace_arg_(options.allows_nonspace_arg),
+      allows_space_arg_(allows_space_arg),
+      seen_(false),
+      seen_output_(nullptr),
+      output_(nullptr),
+      value_callback_(nullptr),
+      values_output_(nullptr),
+      parse_callback_(nullptr) {
+}
+
+FlagParser::Flag::~Flag() {
+}
+
+void FlagParser::Flag::SetSeenOutput(bool* seen_output) {
+  seen_output_ = seen_output;
+  *seen_output_ = false;
+}
+
+void FlagParser::Flag::SetOutput(std::vector<string>* output) {
+  output_ = output;
+}
+
+void FlagParser::Flag::SetValueOutputWithCallback(
+    Callback* callback, std::vector<string>* values) {
+  value_callback_ = callback;
+  values_output_ = values;
+}
+
+void FlagParser::Flag::SetCallbackForParsedArgs(Callback* callback) {
+  parse_callback_ = callback;
+}
+
+bool FlagParser::Flag::Parse(const std::vector<string>& args, size_t i,
+                             size_t* last_i) {
+  absl::string_view key;
+  if (!flag_prefix_) {
+    key = args[i];
+  } else if (args[i].size() > 1 &&
+             (args[i][0] == flag_prefix_
+              || (alt_flag_prefix_ && args[i][0] == alt_flag_prefix_))) {
+    key = absl::ClippedSubstr(absl::string_view(args[i]), 1);
+  } else {
+    // non flag args
+    VLOG(3) << "non flag arg:" << args[i];
+  }
+  VLOG(4) << "check flag '" << key << "' by '" << name_ << "'";
+  if (name_.empty()) {
+    if (key.empty()) {
+      VLOG(3) << "FlagParser: non flag: " << args[i];
+      Output(i, args[i], &args[i]);
+      *last_i = i;
+      return true;
+    } else if (args[i][0] != flag_prefix_) {
+      VLOG(3) << "FlagParser: maybe non flag? " << args[i];
+      Output(i, args[i], &args[i]);
+      *last_i = i;
+      return true;
+    }
+    return false;
+  }
+  if (!strings::StartsWith(key, name_)) {
+    return false;
+  }
+  if (key == name_) {
+    if (!require_value_) {
+      // E.g., "-c"
+      VLOG(3) << "FlagParser: no require value: " << key;
+      Output(i, args[i], nullptr);
+      *last_i = i;
+      return true;
+    } else if (!allows_space_arg_) {
+      // E.g., "-O"
+      VLOG(3) << "FlagParser: no allow space arg: " << key;
+      string no_value;
+      Output(i, args[i], &no_value);
+      *last_i = i;
+      return true;
+    } else {
+      // E.g., "-x c++"
+      if (i + 1U == args.size()) {
+        VLOG(2) << "FlagParser: " << args[i] << " should take an argument";
+        return false;
+      }
+      VLOG(3) << "FlagParser: key-value argument with space: " << args[i];
+      Output(i, args[i], nullptr);
+      Output(i + 1, args[i + 1], &args[i + 1]);
+      *last_i = i + 1;
+      return true;
+    }
+  }
+  if (!require_value_) {
+    // e.g. -clang-syntax for -c.
+    return false;
+  }
+  if (allows_equal_arg_) {
+    size_t equal_index = key.find('=');
+    if (equal_index != string::npos &&
+        key.substr(0, equal_index) == name_) {
+      // E.g., "-isysroot=/foobar"
+      VLOG(3) << "FlagParser: key-value argument with equal: " << args[i];
+      const string value = string(absl::ClippedSubstr(key, equal_index + 1));
+      Output(i, args[i], &value);
+      *last_i = i;
+      return true;
+    }
+  }
+  if (allows_nonspace_arg_) {
+    // E.g. "-xc++" or "-O2"
+    VLOG(3) << "FlagParser: key-value argument without separator: " << args[i];
+    const string value =
+        args[i].substr(name_.size() + (flag_prefix_ ? 1 : 0));
+    Output(i, args[i], &value);
+    *last_i = i;
+    return true;
+  }
+  return false;
+}
+
+const string& FlagParser::Flag::value(int i) const {
+  CHECK_GE(i, 0);
+  CHECK_LT(i, static_cast<int>(values_.size()));
+  return values_[i];
+}
+
+string FlagParser::Flag::GetLastValue() const {
+  if (values_.empty())
+    return "";
+  return values_[values_.size() - 1];
+}
+
+const string& FlagParser::Flag::GetParsedArgs(int i) const {
+  unordered_map<int, string>::const_iterator found = parsed_args_.find(i);
+  CHECK(found != parsed_args_.end()) << name_ << " at " << i;
+  return found->second;
+}
+
+void FlagParser::Flag::Output(int i, const string& arg, const string* value) {
+  VLOG(4) << "Output:" << i << " " << arg << " value="
+          << (value ? *value : "(null)");
+  seen_ = true;
+  if (seen_output_)
+    *seen_output_ = true;
+  if (output_)
+    output_->push_back(arg);
+
+  if (value == nullptr) {
+    CHECK(parsed_args_.insert(std::make_pair(i, arg)).second);
+    return;
+  }
+  values_.push_back(*value);
+  if (values_output_ != nullptr) {
+    string v;
+    if (value_callback_)
+      v = value_callback_->ParseFlagValue(*this, *value);
+    else
+      v = *value;
+    values_output_->push_back(v);
+  }
+
+  string parsed_value = *value;
+  if (parse_callback_)
+    parsed_value = parse_callback_->ParseFlagValue(*this, *value);
+  string parsed_arg = arg;
+  if (parsed_value != *value) {
+    parsed_arg = StringReplace(arg, *value, parsed_value, true);
+  }
+  CHECK(parsed_args_.insert(std::make_pair(i, parsed_arg)).second);
+}
+
+FlagParser::FlagParser() {
+}
+
+FlagParser::~FlagParser() {
+  for (const auto& iter : flags_) {
+    Flag* flag = iter.second;
+    delete flag;
+  }
+}
+
+FlagParser::Flag* FlagParser::AddBoolFlag(const char* name) {
+  Flag* flag = nullptr;
+  std::pair<std::map<string, Flag*>::iterator, bool> p =
+      flags_.insert(std::make_pair(name, flag));
+  if (p.second) {
+    p.first->second = new Flag(name, false, false, opts_);
+  }
+  return p.first->second;
+}
+
+FlagParser::Flag* FlagParser::AddPrefixFlag(const char* name) {
+  Flag* flag = nullptr;
+  std::pair<std::map<string, Flag*>::iterator, bool> p =
+      flags_.insert(std::make_pair(name, flag));
+  if (p.second) {
+    p.first->second = new Flag(name, true, false, opts_);
+  }
+  return p.first->second;
+}
+
+FlagParser::Flag* FlagParser::AddFlag(const char* name) {
+  Flag* flag = nullptr;
+  std::pair<std::map<string, Flag*>::iterator, bool> p =
+      flags_.insert(std::make_pair(name, flag));
+  if (p.second) {
+    p.first->second = new Flag(name, true, true, opts_);
+  }
+  return p.first->second;
+}
+
+FlagParser::Flag* FlagParser::AddNonFlag() {
+  Flag* flag = nullptr;
+  std::pair<std::map<string, Flag*>::iterator, bool> p =
+      flags_.insert(std::make_pair("", flag));
+  if (p.second) {
+    p.first->second = new Flag("", true, false, opts_);
+  }
+  return p.first->second;
+}
+
+void FlagParser::Parse(const std::vector<string>& args) {
+  std::copy(args.begin(), args.end(), back_inserter(args_));
+  parsed_flags_.resize(args_.size());
+
+  // Check longest flag name first.
+  std::vector<Flag*> flags;
+  for (const auto& iter : flags_) {
+    flags.push_back(iter.second);
+  }
+  FlagLengthComparator comp;
+  std::sort(flags.begin(), flags.end(), comp);
+
+  for (size_t i = opts_.has_command_name ? 1 : 0; i < args.size(); i++) {
+    const string& arg = args[i];
+    VLOG(4) << "FlagParser: arg=" << arg;
+    if (arg.empty()) {
+      VLOG(3) << "FlagParser: empty flag";
+      continue;
+    }
+
+    bool parsed = false;
+    for (size_t j = 0; j < flags.size(); ++j) {
+      size_t last_i;
+      if (flags[j]->Parse(args_, i, &last_i)) {
+        VLOG(3) << "matched for flag '" << flags[j]->name() << "' for "
+                << args_[i];
+        for (; i <= last_i; i++)
+          parsed_flags_[i] = flags[j];
+        i = last_i;
+        parsed = true;
+        break;
+      }
+    }
+
+    if (!parsed && arg.front() == opts_.flag_prefix) {
+      unknown_flag_args_.push_back(arg);
+    }
+  }
+}
+
+std::vector<string> FlagParser::GetParsedArgs() {
+  std::vector<string> args;
+  if (opts_.has_command_name)
+    args.push_back(args_[0]);
+  for (size_t i = (opts_.has_command_name ? 1 : 0);
+       i < parsed_flags_.size();
+       ++i) {
+    if (parsed_flags_[i])
+      args.push_back(parsed_flags_[i]->GetParsedArgs(i));
+    else
+      args.push_back(args_[i]);
+  }
+  return args;
+}
diff --git a/lib/flag_parser.h b/lib/flag_parser.h
new file mode 100644
index 0000000..18b7f62
--- /dev/null
+++ b/lib/flag_parser.h
@@ -0,0 +1,186 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_FLAG_PARSER_H_
+#define DEVTOOLS_GOMA_LIB_FLAG_PARSER_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+
+#include "basictypes.h"
+#include "unordered.h"
+using std::string;
+
+class FlagParser {
+ public:
+  struct Options {
+    Options();
+
+    // '-' for GCC, '/' for VC++, and '\0' for ar.
+    char flag_prefix;
+
+    // Alternative flag prefix if any.
+    // '-' for VC++.
+    // It is weaker than flag_prefix.  If arg starts with alt_flag_prefix, but
+    // no Flag matching found, arg may be considered as non flag.
+    // TODO: for clang-cl, flag_prefix='-', alt_flag_prefix='/' ?
+    // https://code.google.com/p/chromium/issues/detail?id=427942
+    char alt_flag_prefix;
+
+    // Support -flag=value style. default false.
+    bool allows_equal_arg;
+
+    // Support -flagvalue style. default false.
+    bool allows_nonspace_arg;
+
+    // If true, we will skip the first argument. True by default.
+    bool has_command_name;
+  };
+  class Flag;
+  class Callback {
+   public:
+    Callback() {}
+    virtual ~Callback() {}
+    // Returns parsed flag value of value for flag.
+    virtual string ParseFlagValue(const Flag& flag, const string& value) = 0;
+   private:
+    DISALLOW_COPY_AND_ASSIGN(Callback);
+  };
+  class Flag {
+   public:
+    // Uses seen_output to store boolean whether the flag is seen or not.
+    // Should be called before calling FlagParser::Parse().
+    // *seen_output will be updated in FlagParser::Parse().
+    // Doesn't take ownership of seen_output.
+    void SetSeenOutput(bool* seen_output);
+
+    // Uses output to store original arguments for the flag.
+    // Should be called before calling FlagParser::Parse().
+    // *output will be updated in FlagParser::Parse().
+    // output may be shared with other flags.
+    // Doesn't take ownership of output.
+    void SetOutput(std::vector<string>* output);
+
+    // Uses values to store values for the flags.
+    // If callback is not NULL, it is used to parse flag value before stroing
+    // to values.  If callback is NULL, original flag value will be stored.
+    // Should be called before calling FlagParser::Parse().
+    // *values will be updated in FlagParser::Parse().
+    // Doesn't take ownership of callback and values.
+    void SetValueOutputWithCallback(Callback* callback,
+                                    std::vector<string>* values);
+
+    // Uses callback to get parsed args.
+    // If callback is NULL or SetCallbackForParsedArgs() is not used, original
+    // args will be used as parsed args.
+    // Should be called before calling FlagParser::Parse().
+    // Doesn't take ownership of callback.
+    void SetCallbackForParsedArgs(Callback* callback);
+
+    // Name of the flag.  E.g "c" for "-c". "" for non flag args.
+    const string& name() const { return name_; }
+
+    // True if the flag requires a value.
+    bool require_value() const { return require_value_; }
+
+    // True if the flag is used.  Used after FlagParser::Parse() called.
+    bool seen() const { return seen_; }
+
+    // Returns flag values.  Used after FlagParser::Parse() called.
+    const std::vector<string>& values() const { return values_; }
+    // Gets i'th flag value.  Used after FlagParser::Parse() called.
+    const string& value(int i) const;
+    // Gets last flag value.  Used after FlagParser::Parse() called.
+    string GetLastValue() const;
+
+   private:
+    friend class FlagParser;
+    Flag(const char* name, bool require_value, bool allow_space_arg,
+         const Options& options);
+    ~Flag();
+
+    // Tries to parse args at i.
+    // Returns true if it is the flag and sets last i in *last_i.
+    // Returns false if it is not the flag.
+    bool Parse(const std::vector<string>& args, size_t i, size_t* last_i);
+
+    // Gets parsed arguments at i, where Parse() had returned true for the i.
+    const string& GetParsedArgs(int i) const;
+
+    void Output(int i, const string& arg, const string* value);
+
+    string name_;
+    bool require_value_;
+
+    char flag_prefix_;
+    char alt_flag_prefix_;
+    bool allows_equal_arg_;
+    bool allows_nonspace_arg_;
+    bool allows_space_arg_;
+
+    bool seen_;
+    bool* seen_output_;
+    std::vector<string>* output_;
+    Callback* value_callback_;
+    std::vector<string> values_;
+    std::vector<string>* values_output_;
+    Callback* parse_callback_;
+    unordered_map<int, string> parsed_args_;
+    DISALLOW_COPY_AND_ASSIGN(Flag);
+  };
+
+  FlagParser();
+  ~FlagParser();
+
+  FlagParser::Options* mutable_options() {
+    return &opts_;
+  }
+
+  // Adds flag to be parsed.
+  // If name is already added, returns the same flag instance.
+  // Must be called before calling Parse().
+  //
+  // BoolFlag doesn't take any value. "-name".
+  Flag* AddBoolFlag(const char* name);
+
+  // PrefixFlag may take value in the same argument. "-name" or "-namevalue".
+  Flag* AddPrefixFlag(const char* name);
+
+  // Flag takes value.
+  // "-name value".
+  // "-namevalue" (if allows_non_space_arg).
+  // "-name=value" (if allows_equal_arg).
+  Flag* AddFlag(const char* name);
+
+  // Argument that isn't prefixed with flag_prefix.
+  Flag* AddNonFlag();
+
+  void Parse(const std::vector<string>& args);
+
+  // Returns parsed args.  Called once Parse() is called.
+  std::vector<string> GetParsedArgs();
+  // Returns unknown flags. Valid after Parse() is called.
+  const std::vector<string>& unknown_flag_args() const {
+    return unknown_flag_args_;
+  }
+
+ private:
+  Options opts_;
+  std::map<string, Flag*> flags_;
+
+  // original args given by Parse().
+  std::vector<string> args_;
+
+  // Valid after Parse. This contains unknown flags.
+  std::vector<string> unknown_flag_args_;
+
+  std::vector<Flag*> parsed_flags_;
+
+  DISALLOW_COPY_AND_ASSIGN(FlagParser);
+};
+
+#endif  // DEVTOOLS_GOMA_LIB_FLAG_PARSER_H_
diff --git a/lib/flag_parser_unittest.cc b/lib/flag_parser_unittest.cc
new file mode 100644
index 0000000..206fc2b
--- /dev/null
+++ b/lib/flag_parser_unittest.cc
@@ -0,0 +1,272 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "flag_parser.h"
+
+#include "glog/stl_logging.h"
+#include "gtest/gtest.h"
+using std::string;
+
+class AddFramework : public FlagParser::Callback {
+ public:
+  string ParseFlagValue(
+      const FlagParser::Flag& /* flag */, const string& value) override {
+    return value + " (framework)";
+  }
+};
+
+TEST(FlagParserTest, Parse) {
+  FlagParser parser;
+  // Same options as GCCFlags::DefineFlags().
+  // Don't want to introduce dependency to compiler_flags here.
+  parser.mutable_options()->flag_prefix = '-';
+  parser.mutable_options()->allows_equal_arg = true;
+  parser.mutable_options()->allows_nonspace_arg = true;
+
+  bool E, c;
+  parser.AddBoolFlag("E")->SetSeenOutput(&E);
+  parser.AddBoolFlag("c")->SetSeenOutput(&c);
+
+  FlagParser::Flag* flag_arch = parser.AddFlag("arch");
+  FlagParser::Flag* flag_x = parser.AddFlag("x");
+  FlagParser::Flag* flag_o = parser.AddFlag("o");
+  FlagParser::Flag* flag_isysroot = parser.AddFlag("isysroot");
+  FlagParser::Flag* flag_WpMD = parser.AddFlag("Wp,MD,");
+  FlagParser::Flag* flag_MF = parser.AddFlag("MF");
+  FlagParser::Flag* flag_O = parser.AddPrefixFlag("O");
+
+  AddFramework add_framework;
+  std::vector<string> I;
+  parser.AddFlag("I")->SetValueOutputWithCallback(nullptr, &I);
+  parser.AddFlag("F")->SetValueOutputWithCallback(&add_framework, &I);
+
+  FlagParser::Flag* flag_D = parser.AddFlag("D");
+
+  std::vector<string> include_related;
+  parser.AddFlag("include")->SetOutput(&include_related);
+  parser.AddFlag("isystem")->SetOutput(&include_related);
+  parser.AddFlag("B")->SetOutput(&include_related);
+
+  FlagParser::Flag* non_flag = parser.AddNonFlag();
+
+  std::vector<string> args;
+  // The name of command.
+  args.push_back("/Users/goma/goma/gcc");
+
+  // A switch without an argument.
+  args.push_back("-c");
+
+  // We support three types of switches with arguments.
+  args.push_back("-xc++");
+  args.push_back("-arch");
+  args.push_back("i386");
+  args.push_back("-isysroot=/Developer/SDKs/MacOSX10.5.sdk");
+
+  // The "foobar" must not appear in input_files.
+  args.push_back("-MF");
+  args.push_back("foobar");
+
+  // We can handle this case as well.
+  args.push_back("-Wp,MD,animation.dep");
+
+  // Multiple values for the same switch.
+  args.push_back("-I../skia/ext");
+  args.push_back("-I../third_party/libjpeg");
+  args.push_back("-Ffoo.framework/Frameworks");
+  args.push_back("-I../third_party/libpng");
+
+  // We should keep original arguments for them.
+  args.push_back("-include");
+  args.push_back("foo.h");
+  args.push_back("-isystem=foo");
+  args.push_back("-Bbar");
+
+  args.push_back("-DFOO");
+  // -DBAR=BAZ should be parsed as {"D": "BAR=BAZ"}, not {"DBAR": "BAZ"}.
+  args.push_back("-DBAR=BAZ");
+
+  // Unknown flags
+  args.push_back("-fmessage-length=0");
+  args.push_back("-pipe");
+  args.push_back("-fno-exceptions");
+  args.push_back("-Wall");
+
+  // flag_O will be -O0, -Os and -O. Make sure -O should not take next argument.
+  args.push_back("-O0");
+  args.push_back("-Os");
+  args.push_back("-O");
+
+  // An argument without a leading switch.
+  args.push_back("/Users/goma/gitchr/src/app/animation_container.cc");
+  // This should be treated as an input.
+  args.push_back("-");
+
+  // Error case: the argument is missing. We ignore this flag.
+  args.push_back("-o");
+
+  parser.Parse(args);
+
+  EXPECT_FALSE(E);
+  EXPECT_TRUE(c);
+
+  EXPECT_EQ("i386", flag_arch->GetLastValue());
+  EXPECT_EQ("c++", flag_x->GetLastValue());
+  EXPECT_EQ("", flag_o->GetLastValue());
+  EXPECT_EQ("/Developer/SDKs/MacOSX10.5.sdk", flag_isysroot->GetLastValue());
+  EXPECT_EQ("animation.dep", flag_WpMD->GetLastValue());
+  EXPECT_EQ("foobar", flag_MF->GetLastValue());
+
+  ASSERT_EQ(4U, I.size());
+  EXPECT_EQ("../skia/ext", I[0]);
+  EXPECT_EQ("../third_party/libjpeg", I[1]);
+  EXPECT_EQ("foo.framework/Frameworks (framework)", I[2]);
+  EXPECT_EQ("../third_party/libpng", I[3]);
+
+  ASSERT_EQ(2U, flag_D->values().size());
+  EXPECT_EQ("FOO", flag_D->value(0));
+  EXPECT_EQ("BAR=BAZ", flag_D->value(1));
+
+  ASSERT_EQ(4U, include_related.size());
+  EXPECT_EQ("-include", include_related[0]);
+  EXPECT_EQ("foo.h", include_related[1]);
+  EXPECT_EQ("-isystem=foo", include_related[2]);
+  EXPECT_EQ("-Bbar", include_related[3]);
+
+  ASSERT_EQ(3U, flag_O->values().size());
+  EXPECT_EQ("0", flag_O->value(0));
+  EXPECT_EQ("s", flag_O->value(1));
+  EXPECT_EQ("", flag_O->value(2));
+
+  ASSERT_EQ(2U, non_flag->values().size());
+  EXPECT_EQ("/Users/goma/gitchr/src/app/animation_container.cc",
+            non_flag->value(0));
+  EXPECT_EQ("-", non_flag->value(1));
+
+  ASSERT_EQ(5U, parser.unknown_flag_args().size())
+      << parser.unknown_flag_args();
+  EXPECT_EQ("-fmessage-length=0", parser.unknown_flag_args()[0]);
+  EXPECT_EQ("-pipe", parser.unknown_flag_args()[1]);
+  EXPECT_EQ("-fno-exceptions", parser.unknown_flag_args()[2]);
+  EXPECT_EQ("-Wall", parser.unknown_flag_args()[3]);
+  // -o is missing argument, so counted as unknown flags.
+  EXPECT_EQ("-o", parser.unknown_flag_args()[4]);
+}
+
+TEST(FlagParserTest, ParseBoolFlag) {
+  FlagParser parser;
+  parser.mutable_options()->flag_prefix = '-';
+  parser.mutable_options()->allows_equal_arg = true;
+  parser.mutable_options()->allows_nonspace_arg = true;
+
+  bool c;
+  parser.AddBoolFlag("c")->SetSeenOutput(&c);
+
+  std::vector<string> args;
+  args.push_back("x86_65-cros-linux-gnu-gcc");
+  args.push_back("-clang-syntax");
+
+  parser.Parse(args);
+  EXPECT_FALSE(c);
+}
+
+TEST(FlagParserTest, AltPrefix) {
+  FlagParser parser;
+  parser.mutable_options()->flag_prefix = '/';
+  parser.mutable_options()->alt_flag_prefix = '-';
+  parser.mutable_options()->allows_nonspace_arg = true;
+
+  FlagParser::Flag* flag_D = parser.AddFlag("D");
+  FlagParser::Flag* non_flag = parser.AddNonFlag();
+
+  std::vector<string> args;
+  args.push_back("cl.exe");
+  args.push_back("-DFOO=BAR");
+  args.push_back("/DBAZ");
+
+  args.push_back("foo.cc");
+
+  parser.Parse(args);
+  ASSERT_EQ(2UL, flag_D->values().size());
+  EXPECT_EQ("FOO=BAR", flag_D->value(0));
+  EXPECT_EQ("BAZ", flag_D->value(1));
+
+  ASSERT_EQ(1UL, non_flag->values().size());
+  EXPECT_EQ("foo.cc", non_flag->value(0));
+}
+
+TEST(FlagParserTest, WeakAltPrefix) {
+  FlagParser parser;
+  parser.mutable_options()->flag_prefix = '-';
+  parser.mutable_options()->alt_flag_prefix = '/';
+  parser.mutable_options()->allows_nonspace_arg = true;
+
+  FlagParser::Flag* flag_D = parser.AddFlag("D");
+  FlagParser::Flag* non_flag = parser.AddNonFlag();
+
+  std::vector<string> args;
+  args.push_back("clang-cl");
+  args.push_back("-DFOO=BAR");
+  args.push_back("/DBAZ");
+
+  // since '/' is alt_flag_prefix, and we didn't add any flag that starts
+  // with 'h', it will be considered as non flag arg.
+  args.push_back("/home/foo/src/foo.cc");
+
+  parser.Parse(args);
+  ASSERT_EQ(2UL, flag_D->values().size());
+  EXPECT_EQ("FOO=BAR", flag_D->value(0));
+  EXPECT_EQ("BAZ", flag_D->value(1));
+
+  ASSERT_EQ(1UL, non_flag->values().size());
+  EXPECT_EQ("/home/foo/src/foo.cc", non_flag->value(0));
+}
+
+// We actually won't have this case to support clang-cl used on Linux
+// while sharing the code with Windows.
+TEST(FlagParserTest, ClexeUnknownFlagsAltPrefix) {
+  FlagParser parser;
+  parser.mutable_options()->flag_prefix = '/';
+  parser.mutable_options()->alt_flag_prefix = '-';
+  parser.mutable_options()->allows_nonspace_arg = true;
+
+  parser.AddFlag("D");
+
+  std::vector<string> args {
+    "clang-cl",
+    "-DFOO=BAR",
+    "/DBAZ",
+    "/UNKNOWN",  // unknown flag.
+    "/home/foo/src/foo.cc",  // unknown flag.
+  };
+
+  parser.Parse(args);
+
+  ASSERT_EQ(2U, parser.unknown_flag_args().size())
+      << parser.unknown_flag_args();
+  EXPECT_EQ("/UNKNOWN", parser.unknown_flag_args()[0]);
+  EXPECT_EQ("/home/foo/src/foo.cc", parser.unknown_flag_args()[1]);
+}
+
+TEST(FlagParserTest, ClexeUnknownFlagsWeakAltPrefix) {
+  FlagParser parser;
+  parser.mutable_options()->flag_prefix = '-';
+  parser.mutable_options()->alt_flag_prefix = '/';
+  parser.mutable_options()->allows_nonspace_arg = true;
+
+  parser.AddFlag("D");
+
+  std::vector<string> args {
+    "clang-cl",
+    "-DFOO=BAR",
+    "/DBAZ",
+    "/UNKNOWN",  // this is considered as non flag (!= unknown flag)
+    "/home/foo/src/foo.cc",  // this, too.
+  };
+
+  parser.Parse(args);
+
+  ASSERT_EQ(0U, parser.unknown_flag_args().size())
+      << parser.unknown_flag_args();
+}
diff --git a/lib/goma_data.proto b/lib/goma_data.proto
new file mode 100644
index 0000000..566b231
--- /dev/null
+++ b/lib/goma_data.proto
@@ -0,0 +1,313 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// new proto definitions for goma v2
+// LINT: ALLOW_GROUPS
+
+syntax = "proto2";
+
+package devtools_goma;
+
+// persistent data
+
+// hash_key = sha256(serialized FileBlob)
+//
+// for small file (< 2MB)
+//   blob_type=FILE, !has_offset(), has_content()
+//                   has_file_size(), hash_key_size() == 0
+// for large file
+//   blob_type=FILE_META, !has_offset(), !has_content()
+//                   has_file_size(), hash_key_size() > 0
+//  for each hash_key(i)
+//     blob_type=FILE_CHUNK, has_offset(), has_content(),
+//                   has_file_size(), hash_key_size() == 0
+message FileBlob {
+  enum BlobType {
+    FILE_UNSPECIFIED = 0;
+    FILE = 1;
+    FILE_META = 2;
+    FILE_CHUNK = 3;
+    // ARCHIVE = 4;
+  }
+  required BlobType blob_type = 1;
+
+  // for blob_type=FILE_CHUNK
+  optional int64 offset = 10;
+
+  // for blob_type=FILE or FILE_CHUNK
+  optional bytes content = 11;
+
+  // for all blob_types
+  optional int64 file_size = 20;
+
+  // for blob_type=FILE_META.  hash_key is FileBlob hash of FILE_CHUNK.
+  repeated string hash_key = 21;
+}
+
+// Goma backend selects compiler with CommandSpec.
+message CommandSpec {
+  optional string name = 1;  // "gcc", "g++", "cl.exe", etc, without
+                             // path, version number and architecture.
+  optional string version = 2;  // "4.4.3[Ubuntu 4.4.3-4ubuntu5]"
+  optional string target = 3;  // "x86_64-linux-gnu
+
+  // The followings will be used to request more specific version of
+  // command, populated by compiler_proxy.
+  optional bytes binary_hash = 4;  // to require exact the same binary.
+  // A list of alternative hashes. This field will be used when
+  // multiple binaries can be considered the same. For example, linux
+  // android gcc will be used even if a user is using goma from
+  // mac. For such cases, we should fill this field with binary hash
+  // of mac gcc.
+  repeated bytes equivalent_hash = 11;
+  optional string detailed_info = 5;  // output of "gcc -v"
+  // Tell compiler_proxy what the path to local 'gcc' (not goma gcc)
+  // is, populated by gomacc.cc and consumed by compiler_proxy
+  optional string local_compiler_path = 6;
+
+  // Compiler system include paths (sent from compiler_proxy).
+  // Note that system include paths can be a relative path from cwd.
+  // For Windows NaCl, it could also be a relative path from toolchain root.
+  repeated string system_include_path = 7;
+  repeated string cxx_system_include_path = 8;
+  repeated string system_framework_path = 9;
+
+  // Compiler system library paths
+  repeated string system_library_path = 12;
+
+  // An optional label of this command for logging/monitoring purpose.
+  optional string comment = 10;
+}
+
+message SubprogramSpec {
+  // full path (in client filesystem)
+  // or basename (in result when default subprogram is used).
+  optional string path = 1;
+  optional string binary_hash = 2;
+}
+
+message ExecResult {
+  required int32 exit_status = 1 [default=-1];
+  optional bytes stdout_buffer = 2;
+  optional bytes stderr_buffer = 3;
+  optional CommandSpec command_spec = 4;
+
+  // subprograms that were used in compilation.
+  repeated SubprogramSpec subprogram = 5;
+
+  repeated group Output = 10 {
+    // TODO: We might want to normalize this path to relative path?
+    optional string filename = 11;  // relative to request cwd or full path
+    // if blob.blob_type=FILE_META, client need to request blob.hash_key() later
+    optional FileBlob blob = 12;
+    optional bool is_executable = 13 [default=false];
+  };
+}
+
+// Common RPC message
+
+message RequesterInfo {
+  optional string addr = 1;  // requester's ip addr.
+  optional string username = 2;  // requester's user name
+  // Identifier for each compiler_proxy request.
+  optional string compiler_proxy_id = 3;
+  // The version of goma protocol. This is intended to be used for two purpose:
+  //
+  // - When we changes the meanings of compiler_proxy's request, we may
+  //   want to support the previous type of requests in our backend for a while.
+  //   Backend can change its behavior by checking the value of this field.
+  // - Once a server stops supporting the old behavior, the backend can
+  //   send an error messages to the client. Also, we can track the clients'
+  //   versions with this field so we can easily decide if it's safe to
+  //   stop supporting the old behavior.
+  //
+  // 0 => 1: Changed the meaning of command_spec.system_include_path.
+  //         Now -isysroot in a command line is considered to obtain this field.
+  // 1 => 2: command_spec.version contains vendor versions.
+  //         E.g., 4.4.3[Ubuntu 4.4.3-4ubuntu5]
+  enum GomaApiVersion {
+    CURRENT_VERSION = 2;
+  }
+  // Can't use [default=CURRENT_VERSION] since GomaApiVersion is not int32.
+  optional int32 api_version = 4 [default=2];
+
+  optional int32 pid = 5;
+
+  // deprecated: indicates client use case.
+  reserved 6;
+
+  // # of retry. 0 is first call.
+  optional int32 retry = 7;
+
+  optional string goma_revision = 8;
+}
+
+message RequesterEnv {
+  optional string gomacc_path = 41;  // full pathname of gomacc.
+  optional string local_path = 42;  // user's PATH.
+  optional int32 umask = 43;  // user's umask.
+  optional bool verify_output = 50;  // GOMA_VERIFY_OUTPUT
+  optional bool use_local = 51;  // GOMA_USE_LOCAL
+  optional bool fallback = 52; // GOMA_FALLBACK
+  optional string verify_command = 53; // GOMA_VERIFY_COMMAND
+  repeated string fallback_input_file = 60;  // GOMA_FALLBACK_INPUT_FILES
+}
+
+// ExecService Interface
+
+message ExecReq {
+  required CommandSpec command_spec = 1;
+  repeated string arg = 2;
+  repeated string env = 3;
+  optional string cwd = 4;
+
+  repeated group Input = 10 {
+    optional string filename = 11;  // relative to cwd or full path
+    required string hash_key = 12;
+    optional FileBlob content = 13;
+  };
+
+  // The @ notations in arg should be expanded and the result should
+  // be stored in this field for javac and VC++.
+  repeated string expanded_arg = 14;
+
+  // Subprograms that would be used in client. By setting this,
+  // client could request backend to use the same subprograms.
+  //
+  repeated SubprogramSpec subprogram = 15;
+
+  optional RequesterInfo requester_info = 30;
+  enum CachePolicy {
+    // IGNORE = 0;
+    LOOKUP_AND_STORE = 1;
+    LOOKUP_ONLY = 2;
+    STORE_ONLY = 3;
+    LOOKUP_AND_STORE_SUCCESS = 4;
+  }
+  optional CachePolicy cache_policy = 31 [default = LOOKUP_AND_STORE];
+
+  // This is passed from gomacc to compiler proxy, and compiler proxy
+  // clears it before sending ExecReq to goma service.
+  optional RequesterEnv requester_env = 32;
+
+  // When hermetic_mode is true, restrict backend use the same compiler
+  // as local version. Backend should use a compiler package
+  // that has the same version string and the same binary_hash only.
+  // If there is no such compilers in backend, it should not run any other
+  // compiler but returns error: ExecResp contains error messages,
+  // no command spec and empty missing_input.  It isn't rpc error.
+  optional bool hermetic_mode = 33;
+
+  // Requests that the call is traced.
+  optional bool trace = 34;
+
+  // EXPERIMENTAL.  Should be true if the user is external.
+  optional bool experimental_is_external_user = 99;
+}
+
+message MultiExecReq {
+  repeated ExecReq req = 1;
+
+  optional RequesterInfo requester_info = 10;
+}
+
+message ExecResp {
+  enum ExecError {
+    OK = 0;
+    BAD_REQUEST = -1;  // Non retryable error.
+  };
+  enum CacheSource {
+    NO_CACHE = 0;
+    MEM_CACHE = 1;
+    STORAGE_CACHE = 2;
+  };
+  optional ExecResult result = 1;
+  optional ExecError error = 2 [default=OK];
+
+  repeated string missing_input = 11;  // filename
+  repeated string missing_reason = 15;  // reasons of missing_input.
+  repeated string error_message = 12;
+
+  optional bool force_store_output_file_for_unmatched_hash = 13
+      [default=false];
+  optional bool force_store_output_file_for_unmatched_version = 14
+      [default=false];
+  optional bool force_store_output_file_for_unmatched_subprograms = 16
+      [default=false];
+
+  // for trace
+  optional string cache_key = 21;  // result cache_key
+  optional CacheSource cache_hit = 27;
+  reserved 22, 23;
+
+  // requester's compiler_proxy_id.
+  // for cached resp, it is the original requester, not current requester.
+  optional string requester_compiler_proxy_id = 26;
+
+
+  // Time at compiler_proxy
+  optional double compiler_proxy_time = 50;
+  optional double compiler_proxy_include_preproc_time = 51;
+  optional double compiler_proxy_include_fileload_time = 52;
+  optional double compiler_proxy_rpc_call_time = 53;
+  optional double compiler_proxy_file_response_time = 54;
+  optional double compiler_proxy_rpc_build_time = 55;
+  optional double compiler_proxy_rpc_send_time = 56;
+  optional double compiler_proxy_rpc_wait_time = 57;
+  optional double compiler_proxy_rpc_recv_time = 58;
+  optional double compiler_proxy_rpc_parse_time = 59;
+
+  optional double compiler_proxy_local_pending_time = 60;
+  optional double compiler_proxy_local_run_time = 61;
+
+  optional bool compiler_proxy_goma_finished = 70;
+  optional bool compiler_proxy_goma_cache_hit = 71;
+  optional bool compiler_proxy_goma_aborted = 72;
+  optional bool compiler_proxy_goma_error = 73;
+  optional bool compiler_proxy_local_finished = 74;
+  optional bool compiler_proxy_local_run = 75;
+  optional bool compiler_proxy_local_killed = 76;
+
+  optional int32 compiler_proxy_exec_request_retry = 80;
+
+  // 99 was used in experimental phase.
+  reserved 99;
+}
+
+message MultiExecResp {
+  repeated group Response = 1 {
+    optional int32 response_code = 2;
+    optional ExecResp resp = 3;
+  }
+}
+
+// FileService Interface
+
+message StoreFileReq {
+  repeated FileBlob blob = 1;
+
+  optional RequesterInfo requester_info = 10;
+}
+
+message StoreFileResp {
+  repeated string hash_key = 1;  // sha256(blob) for success or "" for error
+}
+
+message LookupFileReq {
+  repeated string hash_key = 1;
+
+  optional RequesterInfo requester_info = 10;
+}
+
+message LookupFileResp {
+  repeated FileBlob blob = 2;
+}
+
+message EmptyMessage {
+}
+
+message HttpPortResponse {
+  required int32 port = 1;
+}
diff --git a/lib/goma_data_util.cc b/lib/goma_data_util.cc
new file mode 100644
index 0000000..ff6ab7e
--- /dev/null
+++ b/lib/goma_data_util.cc
@@ -0,0 +1,36 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "goma_data_util.h"
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "prototmp/goma_data.pb.h"
+using std::string;
+
+namespace devtools_goma {
+
+bool IsSameSubprograms(const ExecReq& req, const ExecResp& resp) {
+  if (req.subprogram_size() != resp.result().subprogram_size()) {
+    return false;
+  }
+
+  std::vector<string> req_hashes;
+  for (const auto& subprogram : req.subprogram()) {
+    req_hashes.push_back(subprogram.binary_hash());
+  }
+  std::vector<string> resp_hashes;
+  for (const auto& subprogram : resp.result().subprogram()) {
+    resp_hashes.push_back(subprogram.binary_hash());
+  }
+  std::sort(req_hashes.begin(), req_hashes.end());
+  std::sort(resp_hashes.begin(), resp_hashes.end());
+  return req_hashes == resp_hashes;
+}
+
+}  // namespace devtools_goma
diff --git a/lib/goma_data_util.h b/lib/goma_data_util.h
new file mode 100644
index 0000000..4f7df9e
--- /dev/null
+++ b/lib/goma_data_util.h
@@ -0,0 +1,19 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_GOMA_DATA_UTIL_H_
+#define DEVTOOLS_GOMA_LIB_GOMA_DATA_UTIL_H_
+
+namespace devtools_goma {
+
+class ExecReq;
+class ExecResp;
+
+// Returns true if subprograms in ExecReq and ExecResp are the same.
+bool IsSameSubprograms(const ExecReq& req, const ExecResp& resp);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_GOMA_DATA_UTIL_H_
diff --git a/lib/goma_data_util_unittest.cc b/lib/goma_data_util_unittest.cc
new file mode 100644
index 0000000..208d621
--- /dev/null
+++ b/lib/goma_data_util_unittest.cc
@@ -0,0 +1,123 @@
+// Copyright 2014 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "goma_data_util.h"
+
+#include "prototmp/goma_data.pb.h"
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+TEST(GomaProtoUtilTest, IsSameSubprogramShouldBeTrueOnEmptyProto) {
+  ExecReq req;
+  ExecResp resp;
+
+  EXPECT_TRUE(IsSameSubprograms(req, resp));
+}
+
+TEST(GomaProtoUtilTest, IsSameSubprogramShouldIgnorePath) {
+  ExecReq req;
+  ExecResp resp;
+
+  SubprogramSpec dummy_spec;
+  dummy_spec.set_binary_hash("dummy_hash");
+
+  SubprogramSpec* spec;
+  spec = req.add_subprogram();
+  *spec = dummy_spec;
+  spec->set_path("request/path");
+
+  spec = resp.mutable_result()->add_subprogram();
+  *spec = dummy_spec;
+  spec->set_path("response/path");
+
+  EXPECT_TRUE(IsSameSubprograms(req, resp));
+}
+
+TEST(GomaProtoUtilTest, IsSameSubprogramShouldBeTrueIfSameEntries) {
+  ExecReq req;
+  ExecResp resp;
+
+  SubprogramSpec dummy_spec;
+  dummy_spec.set_path("dummy_path");
+  dummy_spec.set_binary_hash("dummy_hash");
+
+  SubprogramSpec dummy_spec2;
+  dummy_spec.set_path("dummy_path2");
+  dummy_spec.set_binary_hash("dummy_hash2");
+
+  SubprogramSpec* spec;
+  spec = req.add_subprogram();
+  *spec = dummy_spec;
+  spec = req.add_subprogram();
+  *spec = dummy_spec2;
+
+  spec = resp.mutable_result()->add_subprogram();
+  *spec = dummy_spec;
+  spec = resp.mutable_result()->add_subprogram();
+  *spec = dummy_spec2;
+
+  EXPECT_TRUE(IsSameSubprograms(req, resp));
+}
+
+TEST(GomaProtoUtilTest, IsSameSubprogramShouldBeTrueEvenIfOderIsDifferent) {
+  ExecReq req;
+  ExecResp resp;
+
+  SubprogramSpec dummy_spec;
+  dummy_spec.set_path("dummy_path");
+  dummy_spec.set_binary_hash("dummy_hash");
+
+  SubprogramSpec dummy_spec2;
+  dummy_spec.set_path("dummy_path2");
+  dummy_spec.set_binary_hash("dummy_hash2");
+
+  SubprogramSpec* spec;
+  spec = req.add_subprogram();
+  *spec = dummy_spec;
+  spec = req.add_subprogram();
+  *spec = dummy_spec2;
+
+  spec = resp.mutable_result()->add_subprogram();
+  *spec = dummy_spec2;
+  spec = resp.mutable_result()->add_subprogram();
+  *spec = dummy_spec;
+
+  EXPECT_TRUE(IsSameSubprograms(req, resp));
+}
+
+TEST(GomaProtoUtilTest, IsSameSubprogramShouldBeFalseOnSizeMismatch) {
+  ExecReq req;
+  ExecResp resp;
+
+  SubprogramSpec* spec;
+  spec = req.add_subprogram();
+  spec->set_path("dummy_path");
+  spec->set_binary_hash("dummy_hash");
+
+  EXPECT_FALSE(IsSameSubprograms(req, resp));
+}
+
+TEST(GomaProtoUtilTest, IsSameSubprogramShouldBeFalseOnContentsMismatch) {
+  ExecReq req;
+  ExecResp resp;
+
+  SubprogramSpec dummy_spec;
+  dummy_spec.set_path("dummy_path");
+
+  SubprogramSpec* spec;
+  spec = req.add_subprogram();
+  *spec = dummy_spec;
+  spec->set_binary_hash("dummy_hash");
+
+  spec = resp.mutable_result()->add_subprogram();
+  *spec = dummy_spec;
+  spec->set_binary_hash("different_hash");
+
+  EXPECT_FALSE(IsSameSubprograms(req, resp));
+}
+
+}  // namespace devtools_goma
diff --git a/lib/goma_file.cc b/lib/goma_file.cc
new file mode 100644
index 0000000..a663771
--- /dev/null
+++ b/lib/goma_file.cc
@@ -0,0 +1,686 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "goma_file.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#ifndef _WIN32
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+
+#include <memory>
+#include <stack>
+
+
+#include "compiler_specific.h"
+#include "file.h"
+#include "glog/logging.h"
+#include "goma_hash.h"
+MSVC_PUSH_DISABLE_WARNING_FOR_PROTO()
+#include "prototmp/goma_data.pb.h"
+MSVC_POP_WARNING()
+#include "scoped_fd.h"
+using std::string;
+
+namespace {
+
+const size_t kLargeFileThreshold = 2 * 1024 * 1024UL;  // 2MB
+const off_t kFileChunkSize = 2 * 1024 * 1024L;
+
+const int kNumChunksInStreamRequest = 5;
+
+bool CreateDirectoryForFile(const string& filename) {
+#ifndef _WIN32
+  std::stack<string> ancestors;
+  size_t last_slash = filename.rfind('/');
+  while (last_slash != string::npos) {
+    const string& dirname = filename.substr(0, last_slash);
+    int result = mkdir(dirname.c_str(), 0777);
+    if (result == 0) {
+      VLOG(1) << "created " << dirname << " to store " << filename;
+      break;
+    }
+    if (errno == EEXIST) {
+      // Other threads created this directory.
+      break;
+    }
+    if (errno != ENOENT) {
+      PLOG(INFO) << "failed to create directory: " << dirname;
+      return false;
+    }
+    ancestors.push(dirname);
+    last_slash = filename.rfind('/', last_slash - 1);
+  }
+
+  while (!ancestors.empty()) {
+    const string& dirname = ancestors.top();
+    int result = mkdir(dirname.c_str(), 0777);
+    if (result < 0 && errno != EEXIST) {
+      PLOG(INFO) << "failed to create directory: " << dirname;
+      return false;
+    }
+    VLOG(1) << "created " << dirname << " to store " << filename;
+    ancestors.pop();
+  }
+  return true;
+#else
+  size_t last_slash = filename.rfind('\\');
+  const string& dirname = filename.substr(0, last_slash);
+  int result = SHCreateDirectoryExA(nullptr, dirname.c_str(), nullptr);
+  if (result == ERROR_SUCCESS) {
+    VLOG(1) << "created " << dirname;
+  } else if (result == ERROR_FILE_EXISTS) {
+    // Other threads created this directory.
+  } else {
+    PLOG(INFO) << "failed to create directory: " << dirname;
+    return false;
+  }
+  return true;
+#endif
+}
+
+class FileOutputImpl : public devtools_goma::FileServiceClient::Output {
+ public:
+  FileOutputImpl(const string& filename, int mode)
+      : filename_(filename),
+        fd_(devtools_goma::ScopedFd::Create(filename, mode)),
+        error_(false) {
+    bool not_found_error = false;
+#ifndef _WIN32
+    not_found_error = !fd_.valid() && errno == ENOENT;
+#else
+    not_found_error = !fd_.valid() && GetLastError() == ERROR_PATH_NOT_FOUND;
+#endif
+    if (!not_found_error) {
+      return;
+    }
+    if (!CreateDirectoryForFile(filename)) {
+      PLOG(INFO) << "failed to create directory for " << filename;
+      // other threads/process may create the same dir, so next
+      // open might succeed.
+    }
+    fd_.reset(devtools_goma::ScopedFd::Create(filename, mode));
+    if (!fd_.valid()) {
+      PLOG(ERROR) << "open failed:" << filename;
+    }
+  }
+  ~FileOutputImpl() override {
+    if (error_) {
+      VLOG(1) << "Write failed. delete " << filename_;
+      remove(filename_.c_str());
+    }
+  }
+
+  bool IsValid() const override {
+    return fd_.valid();
+  }
+  bool WriteAt(off_t offset, const string& content) override {
+    off_t pos = fd_.Seek(offset, devtools_goma::ScopedFd::SeekAbsolute);
+    if (pos < 0 || pos != offset) {
+      PLOG(ERROR) << "seek failed? " << filename_
+                  << " pos=" << pos << " offset=" << offset;
+      error_ = true;
+      return false;
+    }
+    size_t written = 0;
+    while (written < content.size()) {
+      int n = fd_.Write(content.data() + written, content.size() - written);
+      if (n < 0) {
+        PLOG(WARNING) << "write failed " << filename_;
+        error_ = true;
+        return false;
+      }
+      written += n;
+    }
+    return true;
+  }
+
+  bool Close() override {
+    bool r = fd_.Close();
+    if (!r) {
+      error_ = true;
+    }
+    return r;
+  }
+
+  string ToString() const override {
+    return filename_;
+  }
+
+ private:
+  const string filename_;
+  devtools_goma::ScopedFd fd_;
+  bool error_;
+  DISALLOW_COPY_AND_ASSIGN(FileOutputImpl);
+};
+
+class StringOutputImpl : public devtools_goma::FileServiceClient::Output {
+ public:
+  StringOutputImpl(const string& name, string* buf)
+      : name_(name),
+        buf_(buf),
+        size_(0UL) {
+  }
+  ~StringOutputImpl() override {
+  }
+
+  bool IsValid() const override { return buf_ != nullptr; }
+  bool WriteAt(off_t offset, const string& content) override {
+    if (buf_->size() < offset + content.size()) {
+      buf_->resize(offset + content.size());
+    }
+    if (content.size() > 0) {
+      memcpy(&(buf_->at(offset)), content.data(), content.size());
+    }
+    if (size_ < offset + content.size()) {
+      size_ = offset + content.size();
+    }
+    return true;
+  }
+
+  bool Close() override {
+    buf_->resize(size_);
+    return true;
+  }
+  string ToString() const override { return name_; }
+
+ private:
+  const string name_;
+  string* buf_;
+  size_t size_;
+  DISALLOW_COPY_AND_ASSIGN(StringOutputImpl);
+};
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+static string GetHashKeyInLookupFileReq(const LookupFileReq& req, int i) {
+  CHECK_GE(i, 0);
+  if (i < req.hash_key_size())
+    return req.hash_key(i);
+  return "(out of range)";
+}
+
+/* static */
+std::unique_ptr<FileServiceClient::Output> FileServiceClient::FileOutput(
+    const string& filename, int mode) {
+  return std::unique_ptr<FileServiceClient::Output>(
+      new FileOutputImpl(filename, mode));
+}
+
+/* static */
+std::unique_ptr<FileServiceClient::Output> FileServiceClient::StringOutput(
+    const string& name, string* buf) {
+  return std::unique_ptr<FileServiceClient::Output>(
+      new StringOutputImpl(name, buf));
+}
+
+bool FileServiceClient::CreateFileBlob(
+    const string& filename, bool store_large, FileBlob* blob) {
+  VLOG(1) << "CreateFileBlob " << filename;
+  blob->set_blob_type(FileBlob::FILE);
+  blob->set_file_size(-1);
+  bool ok = false;
+
+  std::unique_ptr<FileReader> reader(reader_factory_->NewFileReader(filename));
+  size_t file_size = 0;
+  if (!reader->valid()) {
+    LOG(WARNING) << "open failed: " << filename;
+    return false;
+  }
+  if (!reader->GetFileSize(&file_size)) {
+    LOG(WARNING) << "stat failed: " << filename;
+    return false;
+  }
+  blob->set_file_size(file_size);
+  VLOG(1) << filename << " size=" << file_size;
+  if (file_size > kLargeFileThreshold) {
+    ok = CreateFileChunks(reader.get(), file_size, store_large, blob);
+  } else {
+    ok = ReadFileContent(reader.get(), 0, file_size, blob);
+  }
+
+  if (ok) {
+    VLOG(1) << "CreateFileBlob " << filename << " ok";
+  } else {
+    LOG(WARNING) << "CreateFileBlob " << filename << " failed";
+  }
+  return ok;
+}
+
+bool FileServiceClient::StoreFileBlob(const FileBlob& blob) {
+  VLOG(1) << "StoreFileBlob";
+  if (blob.blob_type() == FileBlob::FILE && blob.file_size() < 0) {
+    VLOG(1) << "Invalid FileBlob";
+    return false;
+  }
+
+  FileBlob* req_blob = const_cast<FileBlob*>(&blob);
+  StoreFileReq req;
+  StoreFileResp resp;
+  req.add_blob()->Swap(req_blob);
+  if (requester_info_ != nullptr) {
+    *req.mutable_requester_info() = *requester_info_;
+  }
+  bool ok = StoreFile(&req, &resp);
+  req_blob->Swap(req.mutable_blob(0));
+  VLOG(1) << "StoreFileBlob " << (ok ? "ok" : "failed");
+  return ok;
+}
+
+bool FileServiceClient::StoreFileBlobs(const std::vector<FileBlob*>& blobs) {
+  VLOG(1) << "StoreFileBlobs num=" << blobs.size();
+  StoreFileReq req;
+  StoreFileResp resp;
+  for (size_t i = 0; i < blobs.size(); ++i) {
+    if (blobs[i]->blob_type() == FileBlob::FILE && blobs[i]->file_size() < 0) {
+      LOG(WARNING) << "blobs[" << i << "] is invalid FileBlob";
+      return false;
+    }
+    req.add_blob()->Swap(blobs[i]);
+  }
+  if (requester_info_ != nullptr) {
+    *req.mutable_requester_info() = *requester_info_;
+  }
+  bool ok = StoreFile(&req, &resp);
+  for (size_t i = 0; i < blobs.size(); ++i) {
+    blobs[i]->Swap(req.mutable_blob(i));
+  }
+  return ok;
+}
+
+bool FileServiceClient::GetFileBlob(const string& hash_key, FileBlob* blob) {
+  VLOG(1) << "GetFileBlob " << hash_key;
+  LookupFileReq req;
+  LookupFileResp resp;
+  req.add_hash_key(hash_key);
+  if (requester_info_ != nullptr) {
+    *req.mutable_requester_info() = *requester_info_;
+  }
+  if (!LookupFile(&req, &resp)) {
+    VLOG(1) << "LookupFile failed";
+    return false;
+  }
+  if (resp.blob_size() < 1) {
+    LOG(WARNING) << "no resp.blob()";
+    return false;
+  }
+  blob->Swap(resp.mutable_blob(0));
+  return true;
+}
+
+bool FileServiceClient::GetFileBlobs(const std::vector<string>& hash_keys,
+                                     std::vector<FileBlob*>* blobs) {
+  VLOG(1) << "GetFileBlobs num=" << hash_keys.size();
+  LookupFileReq req;
+  LookupFileResp resp;
+  for (const auto& key : hash_keys) {
+    req.add_hash_key(key);
+  }
+  if (requester_info_ != nullptr) {
+    *req.mutable_requester_info() = *requester_info_;
+  }
+  if (!LookupFile(&req, &resp)) {
+    VLOG(1) << "LookupFile failed";
+    return false;
+  }
+  DCHECK_EQ(hash_keys.size(), static_cast<unsigned int>(resp.blob_size()));
+  for (int i = 0; i < resp.blob_size(); ++i) {
+    FileBlob* blob = new FileBlob;
+    blob->Swap(resp.mutable_blob(i));
+    blobs->push_back(blob);
+  }
+  return true;
+}
+
+bool FileServiceClient::WriteFileBlob(const string& filename,
+                                      int mode,
+                                      const FileBlob& blob) {
+  VLOG(1) << "WriteFileBlob " << filename;
+  std::unique_ptr<Output> output = FileOutput(filename, mode);
+  bool r = OutputFileBlob(blob, output.get());
+  return r;
+}
+
+bool FileServiceClient::OutputFileBlob(const FileBlob& blob, Output* output) {
+  if (!output->IsValid()) {
+    LOG(ERROR) << "invalid output:" << output->ToString();
+    return false;
+  }
+  bool ret = false;
+  switch (blob.blob_type()) {
+    case FileBlob::FILE:
+      if (blob.file_size() >= 0) {
+        ret = output->WriteAt(0, blob.content());
+      } else {
+        LOG(ERROR) << "Invalid FileBlob";
+      }
+      break;
+
+    case FileBlob::FILE_META:
+      ret = OutputFileChunks(blob, output);
+      break;
+
+    case FileBlob::FILE_CHUNK:
+      LOG(ERROR) << "Can't write FILE_CHUNK";
+      break;
+
+    default:
+      LOG(ERROR) << "Unknown blob_type:" << blob.blob_type();
+      break;
+  }
+  if (!output->Close()) {
+    PLOG(ERROR) << "Write close failed? " << output->ToString();
+    ret = false;
+  }
+  return ret;
+}
+
+bool FileServiceClient::FinishStoreFileTask(
+    std::unique_ptr<AsyncTask<StoreFileReq, StoreFileResp>> task) {
+  if (!task)
+    return true;
+  VLOG(1) << "Wait StoreFileTask";
+  task->Wait();
+  VLOG(1) << "Finish StoreFileTask";
+  if (!task->IsSuccess()) {
+    LOG(WARNING) << "Finish StoreFileTask failed.";
+    return false;
+  }
+  int num_failed = 0;
+  for (int i = 0; i < task->resp().hash_key_size(); ++i) {
+    if (task->resp().hash_key(i).empty()) {
+      VLOG(1) << "No response at " << i;
+      num_failed++;
+    }
+  }
+  if (num_failed > 0) {
+    LOG(WARNING) << "StoreFileTask failed " << num_failed << " chunks";
+    return false;
+  }
+  return true;
+}
+
+bool FileServiceClient::CreateFileChunks(
+    FileReader* fr, off_t size, bool store, FileBlob* blob) {
+  VLOG(1) << "CreateFileChunks size=" << size;
+  blob->set_blob_type(FileBlob::FILE_META);
+
+  std::unique_ptr<AsyncTask<StoreFileReq, StoreFileResp> > task(
+      NewAsyncStoreFileTask());
+  if (store && task.get()) {
+    // Streaming available.
+    VLOG(1) << "Streaming mode";
+    if (requester_info_ != nullptr) {
+      *task->mutable_req()->mutable_requester_info() = *requester_info_;
+    }
+    std::unique_ptr<AsyncTask<StoreFileReq, StoreFileResp> > in_flight_task;
+    for (off_t offset = 0; offset < size; offset += kFileChunkSize) {
+      FileBlob* chunk = task->mutable_req()->add_blob();
+      int chunk_size = std::min(kFileChunkSize, size - offset);
+      if (!ReadFileContent(fr, offset, chunk_size, chunk)) {
+        LOG(WARNING) << "ReadFile failed."
+                     << " offset=" << offset << " chunk_size=" << chunk_size;
+        return false;
+      }
+      chunk->set_blob_type(FileBlob::FILE_CHUNK);
+      chunk->set_offset(offset);
+      chunk->set_file_size(chunk_size);
+      string hash_key = ComputeHashKey(*chunk);
+      LOG(INFO) << "chunk hash_key:" << hash_key;
+      blob->add_hash_key(hash_key);
+      if (task->req().blob_size() >= kNumChunksInStreamRequest) {
+        if (!FinishStoreFileTask(std::move(in_flight_task)))
+          return false;
+        task->Run();
+        in_flight_task = std::move(task);
+        task = NewAsyncStoreFileTask();
+        if (requester_info_ != nullptr) {
+          *task->mutable_req()->mutable_requester_info() = *requester_info_;
+        }
+      }
+    }
+    VLOG(1) << "ReadFile done";
+    if (task->req().blob_size() > 0)
+      task->Run();
+    else
+      task.reset(nullptr);
+    if (!FinishStoreFileTask(std::move(in_flight_task))) {
+      FinishStoreFileTask(std::move(task));
+      return false;
+    }
+    return FinishStoreFileTask(std::move(task));
+  }
+
+  for (off_t offset = 0; offset < size; offset += kFileChunkSize) {
+    StoreFileReq req;
+    StoreFileResp resp;
+    if (requester_info_ != nullptr) {
+      *req.mutable_requester_info() = *requester_info_;
+    }
+    FileBlob* chunk = req.add_blob();
+    int chunk_size = std::min(kFileChunkSize, size - offset);
+    if (!ReadFileContent(fr, offset, chunk_size, chunk)) {
+      LOG(WARNING) << "ReadFile failed."
+                   << " offset=" << offset << " chunk_size=" << chunk_size;
+      return false;
+    }
+    chunk->set_blob_type(FileBlob::FILE_CHUNK);
+    chunk->set_offset(offset);
+    chunk->set_file_size(chunk_size);
+    string hash_key = ComputeHashKey(*chunk);
+    VLOG(1) << "chunk hash_key:" << hash_key;
+    blob->add_hash_key(hash_key);
+    if (store) {
+      if (!StoreFile(&req, &resp)) {
+        LOG(WARNING) << "StoreFile failed";
+        return false;
+      }
+      if (resp.hash_key(0) != hash_key) {
+        LOG(WARNING) << "Wrong hash_key:" << resp.hash_key(0)
+                     << "!=" << hash_key;
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool FileServiceClient::ReadFileContent(FileReader* fr,
+                                        off_t offset, off_t chunk_size,
+                                        FileBlob* blob) {
+  VLOG(1) << "ReadFileContent"
+          << " offset=" << offset << " chunk_size=" << chunk_size;
+  string* buf = blob->mutable_content();
+  buf->resize(chunk_size);
+  if (offset > 0) {
+    blob->set_blob_type(FileBlob::FILE_CHUNK);
+    blob->set_offset(offset);
+  } else {
+    blob->set_blob_type(FileBlob::FILE);
+  }
+  if (fr->Seek(offset, ScopedFd::SeekAbsolute) != offset) {
+    PLOG(WARNING) << "Seek failed " << offset;
+    blob->clear_content();
+    return false;
+  }
+  off_t nread = 0;
+  while (nread < chunk_size) {
+    int n = fr->Read(&((*buf)[nread]), chunk_size - nread);
+    if (n < 0) {
+      PLOG(WARNING) << "read failed.";
+      blob->clear_content();
+      return false;
+    }
+    nread += n;
+  }
+  return true;
+}
+
+bool FileServiceClient::OutputLookupFileResp(
+    const LookupFileReq& req,
+    const LookupFileResp& resp,
+    Output* output) {
+  for (int i = 0; i < resp.blob_size(); ++i) {
+    const FileBlob& blob = resp.blob(i);
+    if (!IsValidFileBlob(blob)) {
+      LOG(WARNING) << "no FILE_CHUNK available at " << i << ": "
+                   << GetHashKeyInLookupFileReq(req, i)
+                   << " blob=" << blob.DebugString();
+      return false;
+    }
+    if (blob.blob_type() == FileBlob::FILE_META) {
+      LOG(WARNING) << "Wrong blob_type at " << i << ": "
+                   << GetHashKeyInLookupFileReq(req, i)
+                   << " blob=" << blob.DebugString();
+      return false;
+    }
+    if (!output->WriteAt(static_cast<off_t>(blob.offset()), blob.content())) {
+      LOG(WARNING) << "WriteFileContent failed.";
+      return false;
+    }
+  }
+  return true;
+}
+
+bool FileServiceClient::FinishLookupFileTask(
+    std::unique_ptr<AsyncTask<LookupFileReq, LookupFileResp>> task,
+    Output* output) {
+  if (!task)
+    return true;
+  VLOG(1) << "Wait LookupFileTask";
+  task->Wait();
+  VLOG(1) << "Finish LookupFileTask";
+  if (!task->IsSuccess()) {
+    LOG(WARNING) << "Finish LookupFileTask failed.";
+    return false;
+  }
+  return OutputLookupFileResp(task->req(), task->resp(), output);
+}
+
+bool FileServiceClient::OutputFileChunks(const FileBlob& blob, Output* output) {
+  VLOG(1) << "OutputFileChunks";
+  if (blob.blob_type() != FileBlob::FILE_META) {
+    LOG(WARNING) << "wrong blob_type " << blob.blob_type();
+    return false;
+  }
+
+  std::unique_ptr<AsyncTask<LookupFileReq, LookupFileResp> > task(
+      NewAsyncLookupFileTask());
+  if (task.get()) {
+    // Streaming available.
+    VLOG(1) << "Streaming mode";
+    if (requester_info_ != nullptr) {
+      *task->mutable_req()->mutable_requester_info() = *requester_info_;
+    }
+    std::unique_ptr<AsyncTask<LookupFileReq, LookupFileResp> > in_flight_task;
+    for (const auto& key : blob.hash_key()) {
+      task->mutable_req()->add_hash_key(key);
+      VLOG(1) << "chunk hash_key:" << key;
+      if (task->req().hash_key_size() >= kNumChunksInStreamRequest) {
+        if (!FinishLookupFileTask(std::move(in_flight_task), output))
+          return false;
+        task->Run();
+        in_flight_task = std::move(task);
+        task = NewAsyncLookupFileTask();
+        if (requester_info_ != nullptr) {
+          *task->mutable_req()->mutable_requester_info() = *requester_info_;
+        }
+      }
+    }
+    VLOG(1) << "LookupFile done";
+    if (task->req().hash_key_size() > 0)
+      task->Run();
+    else
+      task.reset(nullptr);
+    if (!FinishLookupFileTask(std::move(in_flight_task), output)) {
+      FinishLookupFileTask(std::move(task), output);
+      return false;
+    }
+
+    return FinishLookupFileTask(std::move(task), output);
+  }
+
+  for (const auto& key : blob.hash_key()) {
+    LookupFileReq req;
+    LookupFileResp resp;
+    req.add_hash_key(key);
+    if (requester_info_ != nullptr) {
+      *req.mutable_requester_info() = *requester_info_;
+    }
+    VLOG(1) << "chunk hash_key:" << key;
+    if (!LookupFile(&req, &resp)) {
+      LOG(WARNING) << "Lookup failed.";
+      return false;
+    }
+    if (resp.blob_size() < 1) {
+      LOG(WARNING) << "no resp.blob()";
+      return false;
+    }
+    if (!OutputLookupFileResp(req, resp, output)) {
+      LOG(WARNING) << "Write response failed";
+      return false;
+    }
+  }
+  return true;
+}
+
+/* static */
+bool FileServiceClient::IsValidFileBlob(const FileBlob& blob) {
+  if (!blob.has_file_size())
+    return false;
+  if (blob.file_size() < 0)
+    return false;
+
+  switch (blob.blob_type()) {
+    case FileBlob::FILE:
+      if (blob.has_offset())
+        return false;
+      if (!blob.has_content())
+        return false;
+      if (blob.hash_key_size() > 0)
+        return false;
+      return true;
+
+    case FileBlob::FILE_META:
+      if (blob.has_offset())
+        return false;
+      if (blob.has_content())
+        return false;
+      if (blob.hash_key_size() <= 1)
+        return false;
+      return true;
+
+    case FileBlob::FILE_CHUNK:
+      if (!blob.has_offset())
+        return false;
+      if (!blob.has_content())
+        return false;
+      if (blob.hash_key_size() > 0)
+        return false;
+      return true;
+
+    default:
+      return false;
+  }
+}
+
+/* static */
+string FileServiceClient::ComputeHashKey(const FileBlob& blob) {
+  string s;
+  blob.SerializeToString(&s);
+  string md_str;
+  ComputeDataHashKey(s, &md_str);
+  return md_str;
+}
+
+}  // namespace devtools_goma
diff --git a/lib/goma_file.h b/lib/goma_file.h
new file mode 100644
index 0000000..8c82bbd
--- /dev/null
+++ b/lib/goma_file.h
@@ -0,0 +1,177 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_GOMA_FILE_H_
+#define DEVTOOLS_GOMA_LIB_GOMA_FILE_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+
+#include "basictypes.h"
+#include "file_reader.h"
+#include "prototmp/goma_data.pb.h"
+using std::string;
+
+#ifdef _WIN32
+# include <shlobj.h>
+# include <strsafe.h>
+# include "config_win.h"
+#endif
+
+namespace devtools_goma {
+
+class FileBlob;
+class StoreFileReq;
+class StoreFileResp;
+class LookupFileReq;
+class LookupFileResp;
+class ScopedFd;
+
+class FileServiceClient {
+ public:
+  // Asynchronous support on old synchronous http rpc.
+  // TODO: provide proto-service style async call.
+  template<typename Req, typename Resp>
+  class AsyncTask {
+   public:
+    AsyncTask() {}
+    virtual ~AsyncTask() {}
+    const Req& req() const { return req_; }
+    Req* mutable_req() { return &req_; }
+    const Resp& resp() const { return resp_; }
+    Resp* mutable_resp() { return &resp_; }
+    virtual void Run() = 0;
+    virtual void Wait() = 0;
+
+    virtual bool IsSuccess() const = 0;
+
+   protected:
+    Req req_;
+    Resp resp_;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(AsyncTask);
+  };
+  // TODO: provide Input too.
+  // Output is an abstract interface of output from FileServiceClient.
+  class Output {
+   public:
+    Output() {}
+    virtual ~Output() {}
+    // IsValid returns true if this output is valid to use.
+    virtual bool IsValid() const = 0;
+    // WriteAt writes content at offset in output.
+    virtual bool WriteAt(off_t offset, const string& content) = 0;
+    // Close closes the output.
+    virtual bool Close() = 0;
+    // ToString returns string representation of this output. e.g. filename.
+    virtual string ToString() const = 0;
+   private:
+    DISALLOW_COPY_AND_ASSIGN(Output);
+  };
+
+  // FileOutput returns Output for filename.
+  static std::unique_ptr<Output> FileOutput(const string& filename, int mode);
+  // StringOutput returns Output into buf.
+  // It doesn't take ownership of buf.
+  // *buf will have output size when Close().
+  // Note that, unlike sparse file in unix, it will not modify data in a hole,
+  // if the hole exists. This class won't create any sparse file, so may not
+  // need to worry about this.
+  // If you care, pass empty buf (StringOutput will
+  // allocate enough space), or zero-cleared preallocated buf.
+  static std::unique_ptr<Output> StringOutput(const string& name, string* buf);
+
+  FileServiceClient()
+      : reader_factory_(FileReaderFactory::GetInstance()) {}
+  virtual ~FileServiceClient() {}
+
+  // Create |blob| for |filename|.
+  // If failed to open |filename|, it will set FileBlob::FILE as blob_type
+  // and set file_size=-1, which is considered as an invalid FileBlob.
+  // If |store_large| is true and the file is large enough, it will also store
+  // file chunks in file service.
+  // Note that |blob| itself will not be stored in file service by this method,
+  // so need to use StoreFileBlob() to store.
+  // Returns true on success, false on error.
+  bool CreateFileBlob(const string& filename, bool store_large, FileBlob* blob);
+
+  // Store |blob| in file service.
+  // Returns true on success, false on error.
+  bool StoreFileBlob(const FileBlob& blob);
+
+  // Store muliple |blob|s in file service.
+  // Returns true on success, false on error.
+  bool StoreFileBlobs(const std::vector<FileBlob*>& blobs);
+
+  // Gets |blob| for |hash_key|.
+  // Returns true on success, false on error.
+  bool GetFileBlob(const string& hash_key, FileBlob* blob);
+
+  // Gets |blobs| for |hash_keys|.
+  // Returns true on success, false on error.
+  // Even if it returns true, blobs may contain invalid FileBlob, which means
+  // missing content for the corresponding hash_key.
+  bool GetFileBlobs(const std::vector<string>& hash_keys,
+                    std::vector<FileBlob*>* blobs);
+
+  // Writes |blob| to |filename|.
+  // convenient helper for OutputFileBlob().
+  bool WriteFileBlob(const string& filename, int mode, const FileBlob& blob);
+
+  // OutputFileBlob outputs blob into output.
+  // It doesn't take ownership of output.
+  // If the blob_type is FILE_META, it will also fetch file chunks in
+  // file service.
+  // Returns true on success, false on error.  output will be closed in
+  // this method.
+  bool OutputFileBlob(const FileBlob& blob, Output* output);
+
+  // Checks |blob| is valid.
+  static bool IsValidFileBlob(const FileBlob& blob);
+
+  // Compute hash key of |blob|.
+  static string ComputeHashKey(const FileBlob& blob);
+
+  virtual std::unique_ptr<AsyncTask<StoreFileReq, StoreFileResp>>
+  NewAsyncStoreFileTask() = 0;
+  virtual std::unique_ptr<AsyncTask<LookupFileReq, LookupFileResp>>
+  NewAsyncLookupFileTask() = 0;
+
+  virtual bool StoreFile(const StoreFileReq* req, StoreFileResp* resp) = 0;
+  virtual bool LookupFile(const LookupFileReq* req, LookupFileResp* resp) = 0;
+
+ protected:
+  FileReaderFactory* reader_factory_;
+  std::unique_ptr<RequesterInfo> requester_info_;
+  string trace_id_;
+
+ private:
+  bool FinishStoreFileTask(
+      std::unique_ptr<AsyncTask<StoreFileReq, StoreFileResp>> task);
+
+  // Note: off_t is 32-bit in Windows.  If need to handle files bigger than
+  //       4GB, it needs to be changed to QWORD.
+  bool CreateFileChunks(FileReader* fd,
+                        off_t size, bool store, FileBlob* blob);
+  bool ReadFileContent(FileReader* fd,
+                       off_t offset, off_t size, FileBlob* blob);
+
+  bool OutputLookupFileResp(const LookupFileReq& req,
+                            const LookupFileResp& resp,
+                            Output* output);
+  bool FinishLookupFileTask(
+      std::unique_ptr<AsyncTask<LookupFileReq, LookupFileResp>> task,
+      Output* output);
+  bool OutputFileChunks(const FileBlob& blob, Output* output);
+
+  DISALLOW_COPY_AND_ASSIGN(FileServiceClient);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_GOMA_FILE_H_
diff --git a/lib/goma_file_unittest.cc b/lib/goma_file_unittest.cc
new file mode 100644
index 0000000..f9f0205
--- /dev/null
+++ b/lib/goma_file_unittest.cc
@@ -0,0 +1,22 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "goma_file.h"
+
+#include <gtest/gtest.h>
+
+#include "compiler_specific.h"
+using std::string;
+
+TEST(StringOutput, EmptyContent) {
+  string buf;
+  std::unique_ptr<devtools_goma::FileServiceClient::Output> output =
+      devtools_goma::FileServiceClient::StringOutput("test", &buf);
+  EXPECT_TRUE(output->IsValid());
+  string content;
+  EXPECT_TRUE(output->WriteAt(0, content));
+  EXPECT_TRUE(output->Close());
+  EXPECT_EQ(buf, content);
+}
diff --git a/lib/goma_hash.cc b/lib/goma_hash.cc
new file mode 100644
index 0000000..5fb13bf
--- /dev/null
+++ b/lib/goma_hash.cc
@@ -0,0 +1,147 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "goma_hash.h"
+
+#include <stdio.h>
+
+
+#if defined __MACH__
+# include <CommonCrypto/CommonDigest.h>
+#elif defined _WIN32
+# include "config_win.h"
+# include <wincrypt.h>
+# pragma comment(lib, "advapi32.lib")
+# include "string_piece.h"
+# define SHA256_DIGEST_LENGTH 32
+#else
+# include <openssl/sha.h>  // BoringSSL
+# ifndef OPENSSL_IS_BORINGSSL
+#  error "We expect BoringSSL in the third_party directory is used."
+# endif
+#endif
+#include "file.h"
+#include "file_helper.h"
+#include "glog/logging.h"
+using std::string;
+
+namespace {
+
+bool FromHexChar(char c, unsigned char* ret) {
+  if ('0' <= c && c <= '9') {
+    *ret = c - '0';
+    return true;
+  }
+  if ('a' <= c && c <= 'f') {
+    *ret = c - 'a' + 10;
+    return true;
+  }
+  if ('A' <= c && c <= 'F') {
+    *ret = c - 'A' + 10;
+    return true;
+  }
+
+  return false;
+}
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+bool SHA256HashValue::ConvertFromHexString(const string& hex_string,
+                                           SHA256HashValue* hash_value) {
+  if (hex_string.size() != 64U) {
+    return false;
+  }
+
+  for (size_t i = 0; i < 32; ++i) {
+    unsigned char c1, c2;
+    if (!FromHexChar(hex_string[2 * i], &c1)) {
+      return false;
+    }
+    if (!FromHexChar(hex_string[2 * i + 1], &c2)) {
+      return false;
+    }
+    hash_value->data_[i] = (c1 << 4) + c2;
+  }
+
+  return true;
+}
+
+string SHA256HashValue::ToHexString() const {
+  string md_str;
+  for (size_t i = 0; i < 32; ++i) {
+    char hex[3];
+    hex[0] = "0123456789abcdef"[(data_[i] >> 4) & 0x0f];
+    hex[1] = "0123456789abcdef"[data_[i] & 0x0f];
+    hex[2] = '\0';
+    md_str += hex;
+  }
+
+  return md_str;
+}
+
+size_t SHA256HashValue::Hash() const {
+  size_t v = 0;
+  for (int i = 0; i < sizeof(data_); ++i) {
+    v = v * 37 + data_[i];
+  }
+  return v;
+}
+
+void ComputeDataHashKeyForSHA256HashValue(absl::string_view data,
+                                          SHA256HashValue* hash_value) {
+#ifdef __MACH__
+  CC_SHA256_CTX sha256;
+  CC_SHA256_Init(&sha256);
+  CC_SHA256_Update(&sha256, data.data(), data.size());
+  CC_SHA256_Final(hash_value->mutable_data(), &sha256);
+#elif defined _WIN32
+  HCRYPTPROV provider;
+  HCRYPTHASH hash;
+  BYTE md[SHA256_DIGEST_LENGTH] = {0};
+
+  if (!CryptAcquireContext(&provider, nullptr, nullptr, PROV_RSA_AES,
+                           CRYPT_VERIFYCONTEXT)) {
+    LOG(FATAL) << "Unable to acquire RSA_AES provider";
+    return;
+  }
+  if (CryptCreateHash(provider, CALG_SHA_256, 0, 0, &hash)) {
+    if (CryptHashData(hash, reinterpret_cast<const BYTE*>(data.data()),
+                      data.size(), 0)) {
+      DWORD hash_size = SHA256_DIGEST_LENGTH;
+      CryptGetHashParam(hash, HP_HASHVAL, hash_value->mutable_data(),
+                        &hash_size, 0);
+    }
+  }
+  if (hash) {
+    CryptDestroyHash(hash);
+  }
+  if (provider) {
+    CryptReleaseContext(provider, 0);
+  }
+#else
+  SHA256_CTX sha256;
+  SHA256_Init(&sha256);
+  SHA256_Update(&sha256, data.data(), data.size());
+  SHA256_Final(hash_value->mutable_data(), &sha256);
+#endif
+}
+
+void ComputeDataHashKey(absl::string_view data, string* md_str) {
+  SHA256HashValue value;
+  ComputeDataHashKeyForSHA256HashValue(data, &value);
+  *md_str = value.ToHexString();
+}
+
+bool GomaSha256FromFile(const string& filename, string* md_str) {
+  string s;
+  if (!ReadFileToString(filename, &s)) return false;
+  ComputeDataHashKey(s, md_str);
+  return true;
+}
+
+}  // namespace devtools_goma
diff --git a/lib/goma_hash.h b/lib/goma_hash.h
new file mode 100644
index 0000000..299acbd
--- /dev/null
+++ b/lib/goma_hash.h
@@ -0,0 +1,81 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_GOMA_HASH_H_
+#define DEVTOOLS_GOMA_LIB_GOMA_HASH_H_
+
+#include <ostream>
+#include <string>
+
+
+#include "string_piece.h"
+using std::string;
+
+namespace devtools_goma {
+
+class SHA256HashValue {
+ public:
+  SHA256HashValue() : data_{} {}
+
+  static bool ConvertFromHexString(const string& hex_string,
+                                   SHA256HashValue* hash_value);
+
+  string ToHexString() const;
+
+  unsigned char* mutable_data() { return data_; }
+  const unsigned char* data() const { return data_; }
+
+  // Make hash for unordered_map.
+  size_t Hash() const;
+
+  friend bool operator==(const SHA256HashValue& lhs,
+                         const SHA256HashValue& rhs) {
+    return memcmp(lhs.data_, rhs.data_, sizeof(lhs.data_)) == 0;
+  }
+
+  friend bool operator!=(const SHA256HashValue& lhs,
+                         const SHA256HashValue& rhs) {
+    return !(lhs == rhs);
+  }
+
+  friend bool operator<(const SHA256HashValue& lhs,
+                        const SHA256HashValue& rhs) {
+    return memcmp(lhs.data_, rhs.data_, sizeof(lhs.data_)) < 0;
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const SHA256HashValue& v) {
+    return os << v.ToHexString();
+  }
+
+ private:
+  unsigned char data_[32];
+};
+
+// OptionalSHA256HashValue is SHA256HashValue + valid bit.
+// TODO: Remove this when we can have something like std::option<T> in
+// client code.
+class OptionalSHA256HashValue {
+ public:
+  OptionalSHA256HashValue() : value_{}, valid_(false) {}
+  explicit OptionalSHA256HashValue(const SHA256HashValue& value)
+      : value_(value), valid_(true) {}
+
+  const SHA256HashValue& value() const { return value_; }
+  bool valid() const { return valid_; }
+
+ private:
+  SHA256HashValue value_;
+  bool valid_;
+};
+
+void ComputeDataHashKeyForSHA256HashValue(absl::string_view data,
+                                          SHA256HashValue* hash_value);
+
+void ComputeDataHashKey(absl::string_view data, string* md_str);
+bool GomaSha256FromFile(const string& filename, string* md_str);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_GOMA_HASH_H_
diff --git a/lib/goma_hash_unittest.cc b/lib/goma_hash_unittest.cc
new file mode 100644
index 0000000..a50b184
--- /dev/null
+++ b/lib/goma_hash_unittest.cc
@@ -0,0 +1,51 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "goma_hash.h"
+
+#include <gtest/gtest.h>
+using std::string;
+
+TEST(GomaHashTest, ComputeDataHashKey) {
+  string md_str;
+  devtools_goma::ComputeDataHashKey("", &md_str);
+  EXPECT_EQ("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
+            md_str);
+
+  md_str.clear();
+  devtools_goma::ComputeDataHashKey(
+      "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n",
+      &md_str);
+  EXPECT_EQ("38acb15d02d5ac0f2a2789602e9df950c380d2799b4bdb59394e4eeabdd3a662",
+            md_str);
+}
+
+TEST(GomaHashTest, SHA256HashValue) {
+  string hex_string =
+      "38acb15d02d5ac0f2a2789602e9df950c380d2799b4bdb59394e4eeabdd3a662";
+
+  devtools_goma::SHA256HashValue hash_value;
+  EXPECT_TRUE(devtools_goma::SHA256HashValue::ConvertFromHexString(
+                  hex_string, &hash_value));
+  EXPECT_EQ(hex_string, hash_value.ToHexString());
+}
+
+TEST(GomaHashTest, SHA256HashValueEmpty) {
+  string hex_string;
+
+  devtools_goma::SHA256HashValue hash_value;
+  EXPECT_FALSE(devtools_goma::SHA256HashValue::ConvertFromHexString(
+                   hex_string, &hash_value));
+}
+
+TEST(GomaHashTest, SHA256HashValueNonHex) {
+  string hex_string =
+      "XYacb15d02d5ac0f2a2789602e9df950c380d2799b4bdb59394e4eeabdd3a662";
+
+  devtools_goma::SHA256HashValue hash_value;
+  EXPECT_FALSE(devtools_goma::SHA256HashValue::ConvertFromHexString(
+                   hex_string, &hash_value));
+}
diff --git a/lib/goma_log.proto b/lib/goma_log.proto
new file mode 100644
index 0000000..c8827a5
--- /dev/null
+++ b/lib/goma_log.proto
@@ -0,0 +1,248 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// proto definitions for goma log.
+
+syntax = "proto2";
+
+package devtools_goma;
+
+// NEXT ID TO USE: 91
+message ExecLog {
+  enum AuthenticationType {
+    NONE = 0;
+    UNKNOWN = 1;
+    // reserved 2,3
+    OAUTH2_UNSPEC = 4;
+    OAUTH2_APPLICATION = 5;
+    OAUTH2_SERVICE_ACCOUNT = 6;
+    OAUTH2_GCE_SERVICE_ACCOUNT = 7;
+    OAUTH2_LUCI_LOCAL_AUTH = 8;
+  };
+  enum NetworkFailureType {
+    NO_NETWORK_ERROR = 0;  // succeeded or not used.
+    DISABLED = 1;  // failed because http is disabled (failnow() is true).
+    UNKNOWN_NETWORK_ERROR = 2;  // unknown error status.
+    CONNECT_FAILED = 3;  // failed at connect.
+    SEND_FAILED = 4;  // failed at send.
+    TIMEDOUT_AFTER_SEND = 5;  // timed out after request has sent.
+    RECEIVE_FAILED = 6;  // failed at receiving response.
+    BAD_HTTP_STATUS_CODE = 7;  // received but response was not 2xx.
+  };
+  enum CacheSource {
+    UNKNOWN_CACHE = 0;
+    MEM_CACHE = 1;
+    STORAGE_CACHE = 2;
+  };
+  // optional string compiler_proxy_id = 1;
+  optional string username = 46;
+  optional string nodename = 47;
+  optional int32 port = 48;
+  optional int32 compiler_proxy_start_time = 49;
+  optional int32 task_id = 50;
+  reserved 81;  // deprecated: use_case
+
+  optional string compiler_proxy_user_agent = 51;
+
+  // request info.
+  optional int32 start_time = 2;
+  repeated string arg = 3;
+  repeated string env = 4;
+  optional string cwd = 5;
+  repeated string expanded_arg = 62;
+
+  // command spec
+  optional string command_version = 71;
+  optional string command_target = 72;
+
+  optional string latest_input_filename = 59;
+  optional int32 latest_input_mtime = 60;
+
+  optional bool use_ssl = 64;
+  reserved 65;
+  optional AuthenticationType auth_type = 86 [default = NONE];
+  optional CpuFeatures cpu_features = 87;
+  optional NetworkFailureType network_failure_type = 88
+      [default = NO_NETWORK_ERROR];
+  optional OSInfo os_info = 89;
+
+  // in INIT.
+  optional int32 pending_time = 63;
+  // in SETUP.
+  optional int32 compiler_info_process_time = 82;
+  // include_preprocess_time is sum of
+  // include_processor_wait_time and include_processor_run_time.
+  optional int32 include_preprocess_time = 6;
+  optional int32 include_processor_wait_time = 84;
+  optional int32 include_processor_run_time = 85;
+  optional bool depscache_used = 78;
+  optional int32 include_preprocess_total_files = 79;
+  optional int32 include_preprocess_skipped_files = 80;
+
+  // in FILE_REQ.
+  optional int32 include_fileload_time = 7;
+  repeated int32 include_fileload_pending_time = 69;
+  repeated int32 include_fileload_run_time = 70;
+  optional int32 num_total_input_file = 8;
+
+  // repeated by retry.
+  repeated int32 num_uploading_input_file = 9;
+  repeated int32 num_missing_input_file = 10;
+  repeated int32 num_file_uploaded_during_exec_failure = 66;
+  // repeated by each input file.
+  repeated int32 input_file_time = 11;
+  repeated int32 input_file_size = 12;
+
+  // in CALL_EXEC.  repeated by retry.
+  repeated int32 rpc_call_time = 13;
+  repeated int32 rpc_req_size = 14;
+  repeated int32 rpc_resp_size = 15;
+  repeated int32 rpc_raw_req_size = 16;
+  repeated int32 rpc_raw_resp_size = 17;
+  repeated string rpc_master_trace_id = 58;
+  repeated int32 rpc_throttle_time = 67;
+  repeated int32 rpc_pending_time = 57;
+  repeated int32 rpc_req_build_time = 18;
+  repeated int32 rpc_req_send_time = 19;
+  repeated int32 rpc_wait_time = 20;
+  repeated int32 rpc_resp_recv_time = 21;
+  repeated int32 rpc_resp_parse_time = 22;
+
+  // stats from backends. repeated by exec retry.
+
+  // in FILE_RESP.
+  optional int32 file_response_time = 32;
+  optional int32 num_output_file = 33;
+  // repeated by each output file
+  repeated int32 output_file_time = 34;
+  repeated int32 output_file_size = 35;
+  repeated int32 chunk_resp_size = 36;
+
+  // Total time elapsed for handling the request in compiler_proxy.
+  optional int32 handler_time = 37;
+
+  // result info
+  optional string exec_command_not_found = 76;
+  optional string exec_command_name_mismatch = 73;
+  optional string exec_command_target_mismatch = 74;
+  optional string exec_command_version_mismatch = 38;
+  optional string exec_command_binary_hash_mismatch = 39;
+  optional string exec_command_subprograms_mismatch = 75;
+  optional int32 exec_exit_status = 40;
+  optional int32 exec_request_retry = 41;
+  repeated string exec_request_retry_reason = 56;
+
+  // local run
+  optional string local_run_reason = 42;
+  optional int32 local_pending_time = 43;
+  optional int32 local_run_time = 44;
+  // TODO: use int32?
+  optional int64 local_mem_kb = 52;
+  repeated int32 local_output_file_time = 54;
+  // TODO: use int64?
+  repeated int32 local_output_file_size = 55;
+  optional int32 local_delay_time = 61;
+
+  optional bool cache_hit = 45;
+  optional CacheSource cache_source = 90;
+
+  // goma_error indicates result mismatch (exit status, stdout, stderr) between
+  // local and remote.
+  optional bool goma_error = 53;
+
+  // compiler_proxy_error indicates it replied failure exit status to gomacc
+  // while remote/local compilation have succeeded.
+  // so not genuie compilation failure.
+  optional bool compiler_proxy_error = 77;
+}
+
+message MemoryUsageLog {
+  // compiler_proxy identification
+  optional int32 compiler_proxy_start_time = 1;
+  optional string compiler_proxy_user_agent = 2;
+
+  // user information
+  optional string username = 3;
+  optional string nodename = 4;
+
+  // memory information
+  optional int64 memory = 5;
+  optional int64 time = 6;
+}
+
+message SaveLogReq {
+  repeated ExecLog exec_log = 1;
+  repeated MemoryUsageLog memory_usage_log = 2;
+}
+
+message SaveLogResp {
+}
+
+message NumberSummary {
+  optional int64 samples = 1;
+  optional int32 average = 2;
+
+  optional int32 minimum = 10;
+  optional int32 percentile_2 = 11;
+  optional int32 percentile_9 = 12;
+  optional int32 lower_quantile = 13;
+  optional int32 median = 14;
+  optional int32 upper_quantile = 15;
+  optional int32 percentile_91 = 16;
+  optional int32 percentile_98 = 17;
+  optional int32 maximum = 18;
+}
+
+// NEXT ID TO USE: 13
+message ExecLogStat {
+  optional NumberSummary handler_time = 1;
+
+  optional NumberSummary compiler_info_process_time = 12;
+  optional NumberSummary include_preprocess_time = 2;
+  optional NumberSummary include_fileload_time = 3;
+  optional NumberSummary rpc_call_time = 4;
+  optional NumberSummary file_response_time = 7;
+  optional NumberSummary local_pending_time = 8;
+  optional NumberSummary local_run_time = 9;
+
+  optional int64 cache_hit = 10;
+  optional int64 goma_error = 11;
+}
+
+// NEXT ID TO USE: 12
+message CpuFeatures {
+  optional bool mmx = 1;
+  optional bool sse = 2;
+  optional bool sse2 = 3;
+  optional bool sse3 = 4;
+  optional bool sse41 = 5;
+  optional bool sse42 = 6;
+  optional bool popcnt = 7;
+  optional bool avx = 8;
+  optional bool avx2 = 9;
+  optional bool aesni = 10;
+  optional bool non_stop_time_stamp_counter = 11;
+}
+
+// NEXT ID TO USE: 4
+message OSInfo {
+  message LinuxInfo {
+    optional string gnu_libc_version = 1;
+  };
+
+  message WinInfo {
+  };
+
+  message MacInfo {
+    // TODO: Have max_osx_major_version
+    optional int32 mac_osx_minor_version = 1;
+  };
+
+  oneof os_info_oneof {
+    LinuxInfo linux_info = 1;
+    WinInfo win_info = 2;
+    MacInfo mac_info = 3;
+  };
+}
diff --git a/lib/goma_stats.proto b/lib/goma_stats.proto
new file mode 100644
index 0000000..7fd0d2f
--- /dev/null
+++ b/lib/goma_stats.proto
@@ -0,0 +1,430 @@
+// Copyright 2015 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+syntax = "proto2";
+
+package devtools_goma;
+
+
+// Statistics of all compile requests (including compiled locally).
+// NEXT ID TO USE: 9
+message RequestStats {
+  // Total number of compile requests came from gomacc.
+  optional int64 total = 1;
+  // Number of succeeded compile requests.
+  optional int64 success = 2;
+  // Number of failed compile requests.
+  // The number includes not only compile failure, but also
+  // backend errors that were recovered by local fallback.
+  optional int64 failure = 3;
+
+  // Below this line: different kind of stats. A single one should be provided.
+  // See the definition of each message type for a details description of
+  // thier use.
+  optional CompilerProxyStats compiler_proxy = 4;
+  optional CompilerInfoStats compiler_info = 5;
+  optional GomaCompileStats goma = 6;
+  optional LocalCompileStats local = 7;
+  optional FallbackInSetupStats fallback_in_setup = 8;
+}
+
+// Statistics of compiler_proxy itself.
+message CompilerProxyStats {
+  // Number of compile requests failed by compiler_proxy error.
+  optional int64 fail = 1;
+}
+
+// Statistics of compiler info store.
+//
+// Compiler info store contains caches of compiler info to be used for
+// listing up necessary files for compiles or dispatching compilers in
+// backend.
+// NEXT ID TO USE: 6
+message CompilerInfoStats {
+  // Number of times new compiler info were stored to the cache.
+  optional int64 stores = 1;
+  // Number of times compiler info were reused.
+  optional int64 store_dups = 2;
+  // Number of times compiler were not found.
+  optional int64 miss = 3;
+  // Number of failures to get compiler info.
+  // i.e. compiler can be found but failed to get compiler info.
+  // e.g. not executable binary, unknown options are passed, etc.
+  optional int64 fail = 4;
+  // The size of CompilerInfoCache loaded from disk.
+  optional int64 loaded_size_bytes = 5;
+}
+
+// Statistics of compiles done in goma backend.
+message GomaCompileStats {
+  // Number of compiles requested to goma backend.
+  optional int64 finished = 1;
+  // The number of compiles returned from the cache in backend.
+  optional int64 cache_hit = 2;
+  // The number of compiles returned from the cache in local output.
+  optional int64 local_cache_hit = 6;
+  // Number of compiles aborted.
+  // compiler_proxy does competition between local and remote, and if local
+  // wins, remote compile is aborted.
+  optional int64 aborted = 3;
+  // Number of retries.
+  // In case of retriable error, compiler proxy retries remote compile.
+  // e.g. if files required for compile is missing, compiler_proxy send
+  // missing files, and retries compile.
+  optional int64 retry = 4;
+  // Number of remote failures. This contains remote compile failure, backend
+  // failure, etc. Note that local compile result does not matter.
+  optional int64 fail = 5;
+}
+
+// Statistics of compiles done locally.
+message LocalCompileStats {
+  // Number of local compiles.
+  optional int64 run = 1;
+  // Number of local compiles killed.
+  // compiler_proxy does competition between local and remote, and if remote
+  // wins, local compile is killed.
+  optional int64 killed = 2;
+  // Number of local compiles finished.
+  optional int64 finished = 3;
+}
+
+// Statistics on forced local fallbacks in setup step.
+// NEXT ID TO USE: 7
+message FallbackInSetupStats {
+  // Number of fallbacks caused by failures to parse command line flags.
+  optional int64 failed_to_parse_flags = 1;
+  // Number of fallbacks because remote compile is not supported.
+  optional int64 no_remote_compile_supported = 2;
+  // Number of fallbacks caused by HTTP disabled.
+  optional int64 http_disabled = 3;
+  // Number of fallbacks caused by failures to get compiler info.
+  optional int64 fail_to_get_compiler_info = 4;
+  // Number of fallbacks caused by compiler disabled.
+  optional int64 compiler_disabled = 5;
+  // Number of fallbacks requested by the user.
+  optional int64 requested_by_user = 6;
+}
+
+// Statistics of files used for remote compile.
+message FileStats {
+  // Number of files used for compile.
+  optional int64 requested = 1;
+  // Number of files uploaded to goma backend.
+  optional int64 uploaded = 2;
+  // Number of file hashes not found in goma backend.
+  // A compile request to goma backend has a list of hashes of files
+  // instead of contents.  If one of them are missing in goma backend,
+  // the backend returns the list of missing hashes.  This field represents
+  // number of hashes missed.
+  optional int64 missed = 3;
+}
+
+// Statistics of output files.
+//
+// If remote compile succeeds, compiler_proxy outputs a result e.g. object
+// file.  If output file size is small, compiler_proxy keeps a result in memory.
+// If the size is large, the result is output to file, and renamed.
+// Since compiler_proxy does competition between local and remote, and
+// local compile output a file directly, we cannot directly write a result to
+// a file.
+message OutputStats {
+  // Total number of files output.
+  optional int64 files = 1;
+  // Number of outputs renamed.
+  optional int64 rename = 2;
+  // Number of outputs kept in memory instead of written to files.
+  optional int64 buf = 3;
+  // Peak size of memory to keep outputs.
+  optional int64 peak_req = 4;
+}
+
+// Statistics about compiler_proxy memory.
+message MemoryStats {
+  // Memory consumed by compiler_proxy when stats was taken.
+  optional int64 consuming = 1;
+}
+
+// Statistics about compiler_proxy time.
+message TimeStats {
+  // Uptime of compiler_proxy in seconds.
+  optional int64 uptime = 1;
+}
+
+// Statistics of include processor.
+//
+// Include processor gets defined macros, search dirs, and a source file,
+// and list up files required for compile.
+message IncludeProcessorStats {
+  // Number of files to be searched.
+  optional int64 total = 1;
+  // Number of files that was skipped to search.
+  // If include guard exists, and the same file is read twice, we do not need
+  // to go into the guard.
+  optional int64 skipped = 2;
+
+  // Total waiting time [ms] to start IncludeProcessor.
+  // This doesn't include the running time of IncludeProcessor.
+  optional int64 total_wait_time = 3;
+
+  // Total running time [ms] of IncludeProcessor.
+  optional int64 total_run_time = 4;
+}
+
+// Statistics for include cache.
+//
+// IncludeCache contains a file that include only preprocessor directives.
+message IncludeCacheStats {
+  // The number of entries in the include cache.
+  optional int64 total_entries = 1;
+  // The total bytes of the contents in the include cache.
+  optional int64 total_cache_size = 2;
+
+  // Cache hit count.
+  optional int64 hit = 3;
+  // Cache miss count.
+  optional int64 missed = 4;
+  // Cache update count.
+  optional int64 updated = 5;
+  // Cache evicted count.
+  optional int64 evicted = 6;
+
+  // The total size of original (unfiltered) header files in byte.
+  optional int64 original_total_size = 7;
+  // The max size ef the original header files in byte.
+  optional int64 original_max_size = 8;
+  // The total size of filtered header files in byte.
+  optional int64 filtered_total_size = 9;
+  // The max size of filtered header files in byte.
+  optional int64 filtered_max_size = 10;
+}
+
+// Statistics of DepsCache.
+//
+// The result of the include processor is cached in DepsCache.
+// It has two kinds of tables: deps_cache_table and id_table.
+// deps_cache_table is a mapping from an identifier of a compile to list of
+// files required for it.
+// Since the same filename shows up to various number of entries in
+// deps_cache_table, we give an unique id to each filename and deps_cache
+// actually contains ids as value. id_table maintains mapping between the id
+// and the filename.
+message DepsCacheStats {
+  // Size of deps_cache_table.
+  optional int64 deps_table_size = 1;
+  // Maximum number of ids of an entry in deps_cache_table.
+  optional int64 max_entries = 2;
+  // Total number of ids of all entries in deps_cache_table.
+  optional int64 total_entries = 3;
+
+  // Size of id_table.
+  optional int64 idtable_size = 4;
+  // Number of hit of the table.
+  optional int64 hit = 5;
+  // Number of update of the table.
+  optional int64 updated = 6;
+  // Number of miss. i.e. newly added to the table.
+  optional int64 missed = 7;
+}
+
+// Statistics for inlucde dir cache.
+//
+// Include dir cache is a cache for a directory to be used as an
+// include directory.
+message IncludeDirCacheStats {
+  // Number of instances of include dir cache.
+  optional int64 instances = 1;
+  // Estimated size of memory consumed for include dir cache.
+  optional int64 memory = 2;
+  // Number of times new instance is created.
+  optional int64 created = 3;
+  // Number of times instance is reused.
+  optional int64 reused = 4;
+}
+
+// Statistics for LocalOutputCache.
+//
+// LocalOutputCache is a cache for build output files.
+// NEXT ID TO USE: 13
+message LocalOutputCacheStats {
+  // Number of new compile results successfully cached.
+  optional int64 save_success = 1;
+  // Total time to save all the files.
+  optional int64 save_success_time_ms = 2;
+  // Number of compile results failed to get cached.
+  optional int64 save_failure = 3;
+
+  // The number of cache hits
+  optional int64 lookup_success = 4;
+  // The total time (in milliseconds) to find a cache (success case only).
+  optional int64 lookup_success_time_ms = 5;
+  // The number of cache misses
+  optional int64 lookup_miss = 6;
+  // The number of failed lookups due to an error (other than misses)
+  optional int64 lookup_failure = 7;
+
+  // The number of times a cache is correctly copied.
+  optional int64 commit_success = 8;
+  // The total time to copy a cache (copy from cache dir to output dir).
+  optional int64 commit_success_time_ms = 9;
+  // The number of times a cache copy failed.
+  optional int64 commit_failure = 10;
+
+  // The number of times LocalOutputCache garbage collection was invoked.
+  optional int64 gc_count = 11;
+  // The total time of garbage collection.
+  optional int64 gc_total_time_ms = 12;
+}
+
+// Statistics of HttpRPC.
+//
+// compiler_proxy calls goma backend via HttpRPC.
+// NEXT ID TO USE: 14
+message HttpRPCStats {
+  // Status code for initial /pingz.
+  // compiler_proxy accessis /pingz to confirm backend live.
+  optional int32 ping_status_code = 1;
+  // round trip time of initial /pingz in milliseconds.
+  // negative value would be set if /pingz failed by network error.
+  optional int32 ping_round_trip_time_ms = 10;
+  // Number of HttpRPC called.
+  optional int64 query = 2;
+  // Number of active HttpRPC now.
+  optional int64 active = 3;
+  // Number of HttpRPC retries.
+  optional int64 retry = 4;
+  // Number of HttpRPC timeouts.
+  optional int64 timeout = 5;
+  // Number of HttpRPC errors.
+  optional int64 error = 6;
+  // Number of times detected network error and become network error mode.
+  optional int64 network_error = 7;
+  // Number of times recovered from network error mode.
+  optional int64 network_recovered = 8;
+  // Number of pending HttpRPC now.
+  optional int64 current_pending = 11;
+  // Max (peak) number of pending HttpRPC.
+  optional int64 peak_pending = 13;
+  // Total number of HttpRPC that was in pending state.
+  // Note that one HttpRPC will be counted only once even if it goes to pending
+  // state several times.
+  optional int64 total_pending = 12;
+
+  message HttpStatus {
+    // HTTP status code e.g. 200, 302, 401, 503, etc.
+    optional int32 status_code = 1;
+    // Number of times the status code were returned from backend.
+    optional int64 count = 2;
+  }
+  // Statistics of HTTP status code.
+  //
+  // Since we may get several kinds of status code from backend,
+  // this is repeated field.
+  repeated HttpStatus status_code = 9;
+}
+
+// Statistics for errors in compile_task.
+message GomaErrorStats {
+  // The number of errors that were reported to the user.
+  optional int64 user_error = 1;
+  // NOTE: Deprecated 7/2016.
+  optional int64 DEPRECATED_user_warning = 2 [deprecated=true];
+  // The number of errors that were written to the log file.
+  optional int64 log_error = 3;
+  // The number of warnings that were written to the log file.
+  optional int64 log_warning = 4;
+}
+
+// Statistics for compilers/subprograms mismatches.
+message GomaMismatchStats {
+  // The number of compiler version mismatches.
+  optional int64 command_version_mismatch = 1;
+  // The number of compiler binary hash mismatches.
+  optional int64 binary_hash_mismatch = 2;
+  // The number of subprogram mismatches.
+  optional int64 subprogram_mismatch = 3;
+}
+
+// General purpose protocol buffer type to represents distribution.
+//
+// logs made from histogram usually use this protocol buffer type.
+message DistributionProto {
+  // Number of elements.
+  required int64 count = 1;
+  // Sum of all elements.
+  required int64 sum = 2;
+  // Square sum of all elements.
+  // This can be used to calculate variance.
+  required double sum_of_squares = 3;
+  // minimum value of all elements.
+  optional int64 min = 4;
+  // maximum value of all elements.
+  optional int64 max = 5;
+
+  // histogram logbase.
+  optional double logbase = 6;
+  // Values of each bucket.
+  // The bucket range is like [0,1), [1, logbase), [logbase, logbase^2), ...
+  repeated int64 bucket_value = 7;
+}
+
+// Histograpms of compiler_proxy.
+message GomaHistograms {
+  // Histogram for HttpRPC call time in milliseconds.
+  optional DistributionProto rpc_call_time = 1;
+}
+
+message MachineInfo {
+  enum OSType {
+    UNKNOWN = 0;
+    LINUX = 1;
+    MAC = 2;
+    WIN = 3;
+  };
+  // goma compiler_proxy's revision number.
+  // value: <git commit>@<timestamp in Unix time>. (51 chars)
+  optional string goma_revision = 1;
+  // type of operating system.
+  optional OSType os = 2;
+  // Number of CPUs (cores) of the machine.
+  optional int32 ncpus = 3;
+  // Memory size of the machine in bytes.
+  optional int64 memory_size = 4;
+  // NOTE: Deprecated 7/2016.
+  repeated uint32 DEPRECATED_cpu_capability = 5 [deprecated=true];
+}
+
+// Stats for subprocess (local compiles).
+message SubProcessStats {
+  // Count entering into burst mode because of network error.
+  optional int32 count_burst_by_network_error = 1;
+  // Count entering into burst mode because compiler is disabled.
+  optional int32 count_burst_by_compiler_disabled = 2;
+}
+
+// NEXT ID TO USE: 17
+message GomaStats {
+  // different kind of stats. A single one should be provided.
+  // See the definition of each message type for a details description of
+  // thier use.
+  optional RequestStats request_stats = 1;
+  optional FileStats file_stats = 2;
+  optional OutputStats output_stats = 3;
+  optional MemoryStats memory_stats = 4;
+  optional TimeStats time_stats = 5;
+  optional IncludeProcessorStats include_processor_stats = 6;
+  optional DepsCacheStats depscache_stats = 7;
+  // Deprecated 04/2017. IncludeDirCache has been removed from goma.
+  optional IncludeDirCacheStats incdircache_stats = 8 [deprecated=true];
+  optional HttpRPCStats http_rpc_stats = 9;
+  optional GomaErrorStats error_stats = 12;
+  optional GomaMismatchStats mismatch_stats = 13;
+  optional IncludeCacheStats includecache_stats = 14;
+  optional LocalOutputCacheStats local_output_cache_stats = 15;
+  optional SubProcessStats subprocess_stats = 16;
+
+  optional GomaHistograms histogram = 10;
+
+  optional MachineInfo machine_info = 11;
+}
diff --git a/lib/goma_statz_stats.proto b/lib/goma_statz_stats.proto
new file mode 100644
index 0000000..36b25fc
--- /dev/null
+++ b/lib/goma_statz_stats.proto
@@ -0,0 +1,25 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+syntax = "proto3";
+
+import "goma_stats.proto";
+
+package devtools_goma;
+
+// GomaStatzStats will contain various stats that will be shown in /statz.
+// This proto will be shown in human readable style or machine readable
+// (e.g. json) style.
+
+// NEXT ID TO USE: 8
+message GomaStatzStats {
+  GomaStats stats = 1;
+
+  map<string, int64> error_to_log = 2;
+  map<string, int64> error_to_user = 3;
+  map<string, int64> local_run_reason = 4;
+  map<string, int64> version_mismatch = 5;
+  map<string, int64> binary_hash_mismatch = 6;
+  map<string, int64> subprogram_mismatch = 7;
+}
diff --git a/lib/known_warning_options.h b/lib/known_warning_options.h
new file mode 100644
index 0000000..3703001
--- /dev/null
+++ b/lib/known_warning_options.h
@@ -0,0 +1,480 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// This is auto generated by build/generate_known_warnings_list.go
+// DO NOT EDIT
+
+#ifndef DEVTOOLS_GOMA_LIB_KNOWN_WARNING_OPTIONS_H_
+#define DEVTOOLS_GOMA_LIB_KNOWN_WARNING_OPTIONS_H_
+
+namespace devtools_goma {
+const char* const kKnownWarningOptions[] {
+  "",
+  "address",
+  "aggregate-return",
+  "aligned-new",
+  "all",
+  "alloc-size-larger-than=",
+  "alloc-zero",
+  "alloca",
+  "alloca-larger-than=",
+  "array-bounds",
+  "array-bounds=",
+  "attribute-alias",
+  "bad-function-cast",
+  "bool-compare",
+  "bool-operation",
+  "c++-compat",
+  "c++11-compat",
+  "c++11-narrowing",
+  "c++14-compat",
+  "c++17-compat",
+  "c90-c99-compat",
+  "c99-c11-compat",
+  "cast-align",
+  "cast-align=",
+  "cast-qual",
+  "catch-value",
+  "catch-value=",
+  "char-subscripts",
+  "chkp",
+  "clobbered",
+  "comment",
+  "comments",
+  "conditionally-supported",
+  "conversion",
+  "covered-switch-default",
+  "dangling-else",
+  "date-time",
+  "declaration-after-statement",
+  "delete-incomplete",
+  "delete-non-virtual-dtor",
+  "deprecated",
+  "disabled-optimization",
+  "double-promotion",
+  "duplicate-decl-specifier",
+  "duplicated-branches",
+  "duplicated-cond",
+  "effc++",
+  "empty-body",
+  "endif-labels",
+  "enum-compare",
+  "error",
+  "error-implicit-function-declaration",
+  "error=",
+  "everything",
+  "exit-time-destructors",
+  "expansion-to-defined",
+  "extra",
+  "extra-semi",
+  "fatal-errors",
+  "float-conversion",
+  "float-equal",
+  "format",
+  "format-nonliteral",
+  "format-overflow",
+  "format-overflow=",
+  "format-security",
+  "format-signedness",
+  "format-truncation",
+  "format-truncation=",
+  "format-y2k",
+  "format=",
+  "frame-address",
+  "frame-larger-than",
+  "frame-larger-than=",
+  "global-constructors",
+  "header-hygiene",
+  "hsa",
+  "if-not-aligned",
+  "ignored-attributes",
+  "ignored-qualifiers",
+  "implicit",
+  "implicit-fallthrough",
+  "implicit-fallthrough=",
+  "implicit-function-declaration",
+  "implicit-int",
+  "inconsistent-missing-override",
+  "init-self",
+  "inline",
+  "int-in-bool-context",
+  "int-to-void-pointer-cast",
+  "invalid-memory-model",
+  "invalid-pch",
+  "jump-misses-init",
+  "larger-than=",
+  "logical-not-parentheses",
+  "logical-op",
+  "long-long",
+  "main",
+  "maybe-uninitialized",
+  "memset-elt-size",
+  "memset-transposed-args",
+  "misleading-indentation",
+  "missing-braces",
+  "missing-declarations",
+  "missing-field-initializers",
+  "missing-format-attribute",
+  "missing-include-dirs",
+  "missing-noreturn",
+  "missing-parameter-type",
+  "missing-prototypes",
+  "multistatement-macros",
+  "nested-externs",
+  "no-#pragma-messages",
+  "no-#warnings",
+  "no-abi",
+  "no-absolute-value",
+  "no-abstract-vbase-init",
+  "no-address-of-packed-member",
+  "no-aggressive-loop-optimizations",
+  "no-array-bounds",
+  "no-attributes",
+  "no-bitfield-width",
+  "no-bool-conversion",
+  "no-builtin-declaration-mismatch",
+  "no-builtin-macro-redefined",
+  "no-builtin-requires-header",
+  "no-c++11-compat",
+  "no-c++11-extensions",
+  "no-c++11-narrowing",
+  "no-c++98-compat",
+  "no-c++98-compat-pedantic",
+  "no-c99-extensions",
+  "no-cast-align",
+  "no-cast-qual",
+  "no-char-subscripts",
+  "no-comment",
+  "no-conditional-uninitialized",
+  "no-constant-conversion",
+  "no-constant-logical-operand",
+  "no-conversion",
+  "no-conversion-null",
+  "no-coverage-mismatch",
+  "no-covered-switch-default",
+  "no-cpp",
+  "no-dangling-else",
+  "no-delete-incomplete",
+  "no-delete-non-virtual-dtor",
+  "no-deprecated",
+  "no-deprecated-declarations",
+  "no-deprecated-register",
+  "no-designated-init",
+  "no-disabled-macro-expansion",
+  "no-discarded-array-qualifiers",
+  "no-discarded-qualifiers",
+  "no-div-by-zero",
+  "no-documentation",
+  "no-documentation-unknown-command",
+  "no-double-promotion",
+  "no-duplicate-decl-specifier",
+  "no-empty-body",
+  "no-endif-labels",
+  "no-enum-compare",
+  "no-enum-compare-switch",
+  "no-enum-conversion",
+  "no-error",
+  "no-error-sometimes-uninitialized",
+  "no-error-unused",
+  "no-exit-time-destructors",
+  "no-expansion-to-defined",
+  "no-extern-c-compat",
+  "no-extern-initializer",
+  "no-extra",
+  "no-extra-tokens",
+  "no-float-conversion",
+  "no-float-equal",
+  "no-for-loop-analysis",
+  "no-format",
+  "no-format-contains-nul",
+  "no-format-extra-args",
+  "no-format-nonliteral",
+  "no-format-pedantic",
+  "no-format-security",
+  "no-format-y2k",
+  "no-format-zero-length",
+  "no-four-char-constants",
+  "no-frame-larger-than",
+  "no-free-nonheap-object",
+  "no-gcc-compat",
+  "no-global-constructors",
+  "no-gnu-anonymous-struct",
+  "no-gnu-designator",
+  "no-gnu-variable-sized-type-not-at-end",
+  "no-gnu-zero-variadic-macro-arguments",
+  "no-header-guard",
+  "no-header-hygiene",
+  "no-ignored-attributes",
+  "no-ignored-qualifiers",
+  "no-implicit-exception-spec-mismatch",
+  "no-implicit-fallthrough",
+  "no-implicit-function-declaration",
+  "no-implicit-int",
+  "no-implicitly-unsigned-literal",
+  "no-import",
+  "no-incompatible-library-redeclaration",
+  "no-incompatible-pointer-types",
+  "no-incompatible-pointer-types-discards-qualifiers",
+  "no-inconsistent-dllimport",
+  "no-inconsistent-missing-override",
+  "no-inherited-variadic-ctor",
+  "no-initializer-overrides",
+  "no-inline-asm",
+  "no-inline-new-delete",
+  "no-int-conversion",
+  "no-int-to-pointer-cast",
+  "no-int-to-void-pointer-cast",
+  "no-invalid-noreturn",
+  "no-invalid-offsetof",
+  "no-literal-conversion",
+  "no-logical-op-parentheses",
+  "no-long-long",
+  "no-macro-redefined",
+  "no-max-unsigned-zero",
+  "no-maybe-uninitialized",
+  "no-microsoft-cast",
+  "no-microsoft-enum-forward-reference",
+  "no-microsoft-extra-qualification",
+  "no-microsoft-goto",
+  "no-microsoft-include",
+  "no-mismatched-tags",
+  "no-missing-braces",
+  "no-missing-field-initializers",
+  "no-missing-noescape",
+  "no-missing-noreturn",
+  "no-missing-prototypes",
+  "no-missing-variable-declarations",
+  "no-multichar",
+  "no-narrowing",
+  "no-nested-anon-types",
+  "no-newline-eof",
+  "no-non-literal-null-conversion",
+  "no-non-pod-varargs",
+  "no-non-virtual-dtor",
+  "no-nonnull",
+  "no-nonportable-include-path",
+  "no-null-conversion",
+  "no-null-dereference",
+  "no-null-pointer-arithmetic",
+  "no-nullability-completeness",
+  "no-objc-missing-property-synthesis",
+  "no-odr",
+  "no-old-style-cast",
+  "no-overflow",
+  "no-overloaded-virtual",
+  "no-override-init",
+  "no-padded",
+  "no-parentheses",
+  "no-parentheses-equality",
+  "no-pedantic",
+  "no-pedantic-ms-format",
+  "no-pessimizing-move",
+  "no-pointer-arith",
+  "no-pointer-bool-conversion",
+  "no-pointer-sign",
+  "no-pointer-to-int-cast",
+  "no-pragmas",
+  "no-psabi",
+  "no-reorder",
+  "no-reserved-id-macro",
+  "no-return-local-addr",
+  "no-return-type",
+  "no-scalar-storage-order",
+  "no-self-assign",
+  "no-semicolon-before-method-body",
+  "no-sequence-point",
+  "no-shadow",
+  "no-shadow-ivar",
+  "no-shift-count-overflow",
+  "no-shift-negative-value",
+  "no-shift-op-parentheses",
+  "no-shift-overflow",
+  "no-shift-sign-overflow",
+  "no-shorten-64-to-32",
+  "no-sign-compare",
+  "no-sign-conversion",
+  "no-sign-promo",
+  "no-signed-enum-bitfield",
+  "no-sizeof-pointer-memaccess",
+  "no-sometimes-uninitialized",
+  "no-strict-aliasing",
+  "no-strict-overflow",
+  "no-string-conversion",
+  "no-string-plus-int",
+  "no-switch",
+  "no-switch-enum",
+  "no-system-headers",
+  "no-tautological-compare",
+  "no-tautological-constant-compare",
+  "no-tautological-constant-out-of-range-compare",
+  "no-tautological-pointer-compare",
+  "no-tautological-undefined-compare",
+  "no-tautological-unsigned-enum-zero-compare",
+  "no-tautological-unsigned-zero-compare",
+  "no-thread-safety-analysis",
+  "no-thread-safety-negative",
+  "no-trigraphs",
+  "no-type-limits",
+  "no-typedef-redefinition",
+  "no-undeclared-selector",
+  "no-undef",
+  "no-undefined-bool-conversion",
+  "no-undefined-func-template",
+  "no-undefined-var-template",
+  "no-unguarded-availability",
+  "no-uninitialized",
+  "no-unknown-attributes",
+  "no-unknown-pragmas",
+  "no-unknown-warning-option",
+  "no-unnamed-type-template-args",
+  "no-unneeded-internal-declaration",
+  "no-unreachable-code",
+  "no-unreachable-code-break",
+  "no-unreachable-code-return",
+  "no-unused",
+  "no-unused-but-set-variable",
+  "no-unused-command-line-argument",
+  "no-unused-const-variable",
+  "no-unused-function",
+  "no-unused-label",
+  "no-unused-lambda-capture",
+  "no-unused-local-typedef",
+  "no-unused-local-typedefs",
+  "no-unused-macros",
+  "no-unused-member-function",
+  "no-unused-parameter",
+  "no-unused-private-field",
+  "no-unused-result",
+  "no-unused-template",
+  "no-unused-value",
+  "no-unused-variable",
+  "no-used-but-marked-unused",
+  "no-user-defined-warnings",
+  "no-varargs",
+  "no-variadic-macros",
+  "no-virtual-move-assign",
+  "no-vla",
+  "no-weak-vtables",
+  "no-writable-strings",
+  "no-write-strings",
+  "no-zero-as-null-pointer-constant",
+  "no-zero-length-array",
+  "non-virtual-dtor",
+  "nonnull",
+  "nonnull-compare",
+  "normalized=",
+  "null-dereference",
+  "objc-missing-property-synthesis",
+  "old-style-cast",
+  "old-style-declaration",
+  "old-style-definition",
+  "openmp-simd",
+  "overlength-strings",
+  "overloaded-virtual",
+  "override-init",
+  "override-init-side-effects",
+  "packed",
+  "packed-bitfield-compat",
+  "packed-not-aligned",
+  "padded",
+  "parentheses",
+  "partial-availability",
+  "pedantic",
+  "placement-new",
+  "placement-new=",
+  "pointer-arith",
+  "pointer-compare",
+  "pointer-sign",
+  "redundant-decls",
+  "restrict",
+  "return-type",
+  "sequence-point",
+  "shadow",
+  "shadow=",
+  "shift-count-negative",
+  "shift-count-overflow",
+  "shift-negative-value",
+  "shift-overflow",
+  "shift-overflow=",
+  "shorten-64-to-32",
+  "sign-compare",
+  "sign-conversion",
+  "sign-promo",
+  "sized-deallocation",
+  "sizeof-array-argument",
+  "sizeof-pointer-div",
+  "sizeof-pointer-memaccess",
+  "stack-protector",
+  "stack-usage",
+  "stack-usage=",
+  "strict-aliasing",
+  "strict-aliasing=",
+  "strict-overflow",
+  "strict-overflow=",
+  "strict-prototypes",
+  "string-conversion",
+  "stringop-overflow",
+  "stringop-overflow=",
+  "stringop-truncation",
+  "subobject-linkage",
+  "suggest-attribute=",
+  "suggest-final-methods",
+  "suggest-final-types",
+  "suggest-override",
+  "switch",
+  "switch-bool",
+  "switch-default",
+  "switch-enum",
+  "switch-unreachable",
+  "sync-nand",
+  "system-headers",
+  "tautological-compare",
+  "tautological-constant-out-of-range-compare",
+  "tautological-overlap-compare",
+  "tautological-unsigned-zero-compare",
+  "thread-safety",
+  "thread-safety-negative",
+  "traditional",
+  "traditional-conversion",
+  "trampolines",
+  "trigraphs",
+  "type-limits",
+  "undeclared-selector",
+  "undef",
+  "unguarded-availability",
+  "uninitialized",
+  "unknown-pragmas",
+  "unreachable-code",
+  "unreachable-code-break",
+  "unreachable-code-return",
+  "unsafe-loop-optimizations",
+  "unsuffixed-float-constants",
+  "unused",
+  "unused-but-set-parameter",
+  "unused-but-set-variable",
+  "unused-const-variable",
+  "unused-const-variable=",
+  "unused-function",
+  "unused-label",
+  "unused-lambda-capture",
+  "unused-local-typedefs",
+  "unused-macros",
+  "unused-parameter",
+  "unused-value",
+  "unused-variable",
+  "used-but-marked-unused",
+  "useless-cast",
+  "user-defined-warnings",
+  "varargs",
+  "variadic-macros",
+  "vector-operation-performance",
+  "vla",
+  "vla-larger-than=",
+  "volatile-register-var",
+  "write-strings",
+  "zero-as-null-pointer-constant",
+};
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_KNOWN_WARNING_OPTIONS_H_
diff --git a/lib/path_resolver.cc b/lib/path_resolver.cc
new file mode 100644
index 0000000..a8b251e
--- /dev/null
+++ b/lib/path_resolver.cc
@@ -0,0 +1,359 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "path_resolver.h"
+
+#include <limits.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cctype>
+#include <functional>
+#include <locale>
+
+
+#include "glog/logging.h"
+#include "path_util.h"
+#include "string_piece.h"
+#include "string_piece_utils.h"
+using std::string;
+
+namespace {
+
+void trim(string* s) {
+  s->erase(s->begin(),
+           std::find_if(s->begin(), s->end(),
+                        std::not1(std::ptr_fun<int, int>(std::isspace))));
+  s->erase(std::find_if(s->rbegin(), s->rend(),
+                        std::not1(std::ptr_fun<int, int>(std::isspace))).base(),
+                        s->end());
+}
+
+bool IsSeparator(char c, const string& sep_chars) {
+  return sep_chars.find(c) != string::npos;
+}
+
+// Get the separator position where the UNC/drive letters end and the path
+// part begins.
+string::size_type GetDrivePrefixPosition(absl::string_view path) {
+  if (path.size() < 2) return 0;
+  absl::string_view preserve = path.substr(0, 2);
+  bool is_unc = (preserve == "\\\\");
+  if (!is_unc && preserve[1] != ':')
+    return 0;
+  if (!is_unc)
+    return 2;
+  string::size_type pos = absl::ClippedSubstr(path, 2).find('\\');
+  return (pos == string::npos) ? path.size() : pos + 2;
+}
+
+// Separate UNC/drive letter from path so that path operations can be done
+// correctly.  The UNC/drive letter will be kept in |preserve|, and the
+// path relative to topmost level (i.e. UNC host/drive letter) is in
+// |resolved_path|.
+void SeparatePath(string* preserve, string* resolved_path) {
+  *preserve = resolved_path->substr(0, 2);
+  bool is_unc = (strcmp(preserve->c_str(), "\\\\") == 0);
+  if (!is_unc && (*preserve)[1] != ':') {
+    preserve->clear();
+  } else {
+    *resolved_path = resolved_path->substr(2);
+    if (is_unc) {  // we need to preserve \\host
+      string::size_type pos = resolved_path->find('\\');
+      if (pos == string::npos) {
+        *preserve += *resolved_path;
+        resolved_path->clear();
+      } else {
+        *preserve += resolved_path->substr(0, pos);
+        *resolved_path = resolved_path->substr(pos);
+      }
+    }
+  }
+}
+
+}  // namespace
+
+namespace devtools_goma {
+
+#ifndef _WIN32
+const char PathResolver::kPathSep = '/';
+#else
+const char PathResolver::kPathSep = '\\';
+#endif
+
+PathResolver::PathResolver() {
+}
+
+PathResolver::~PathResolver() {
+}
+
+string PathResolver::PlatformConvert(const string& path) {
+  string OUTPUT;
+  PlatformConvertToString(path, &OUTPUT);
+  return OUTPUT;
+}
+
+void PathResolver::PlatformConvertToString(const string& path,
+                                           string* OUTPUT) {
+#ifdef _WIN32
+  PlatformConvertToString(path,
+                          PathResolver::kWin32PathSep,
+                          PathResolver::kPreserveCase,
+                          OUTPUT);
+#else
+  PlatformConvertToString(path,
+                          PathResolver::kPosixPathSep,
+                          PathResolver::kPreserveCase,
+                          OUTPUT);
+#endif
+}
+
+string PathResolver::PlatformConvert(
+    const string& path, PathResolver::PathSeparatorType sep_type,
+    PathResolver::PathCaseType case_type) {
+  string OUTPUT;
+  PlatformConvertToString(path, sep_type, case_type, &OUTPUT);
+  return OUTPUT;
+}
+
+void PathResolver::PlatformConvertToString(
+    const string& path, PathResolver::PathSeparatorType sep_type,
+    PathResolver::PathCaseType case_type, string* OUTPUT) {
+  // TODO: use Chrome base FilePath object, which has everything
+  //                  we need and is much better than the hack below.
+  *OUTPUT = path;
+  trim(OUTPUT);
+
+  if (sep_type == PathResolver::kWin32PathSep) {
+    std::replace(OUTPUT->begin(), OUTPUT->end(), '/', '\\');
+    if (OUTPUT->size() > 2) {
+      string::size_type pos = 2;
+      while (pos < OUTPUT->size() && pos != string::npos) {
+        pos = OUTPUT->find("\\\\", pos);
+        if (pos != string::npos) {
+          OUTPUT->replace(pos, strlen("\\\\"), string("\\"));
+          pos += strlen("\\");
+        }
+      }
+    }
+  } else {
+#ifdef _WIN32
+    LOG(FATAL) << "Unsupported";
+#endif
+    std::replace(OUTPUT->begin(), OUTPUT->end(), '\\', '/');
+  }
+
+  if (case_type == PathResolver::kLowerCase)
+    std::transform(OUTPUT->begin(), OUTPUT->end(), OUTPUT->begin(), ::tolower);
+}
+
+string PathResolver::ResolvePath(const string& path) {
+#ifndef _WIN32
+  return PathResolver::ResolvePath(path, kPosixPathSep);
+#else
+  return PathResolver::ResolvePath(path, kWin32PathSep);
+#endif
+}
+
+// TODO: This does similar path conversion to PlatformConvert inline.
+// Probably we should also (or rather) improve the method too.
+/* static */
+string PathResolver::ResolvePath(
+    const string& path, PathSeparatorType sep_type) {
+  // Note: Windows PathCanonicalize() API has different behavior than
+  //       what's expected, so we'll do a lot of due dilligence here.
+  absl::string_view buf(path);
+  string resolved_path;
+  resolved_path.reserve(path.size());
+
+  string sep_chars;
+  if (sep_type == kPosixPathSep) {
+    sep_chars = "/";
+  } else if (sep_type == kWin32PathSep) {
+    sep_chars = "\\/";
+
+    // Split UNC paths and drive letter.
+    string::size_type drive_position = GetDrivePrefixPosition(buf);
+    resolved_path.append(buf.begin(), drive_position);
+    if (drive_position == buf.size()) {
+      return resolved_path;
+    }
+    buf = absl::ClippedSubstr(buf, drive_position);
+  } else {
+    LOG(ERROR) << "Unknown sep_type=" << sep_type;
+    return path;
+  }
+
+  size_t found = 0;
+  bool is_absolute = IsSeparator(buf[0], sep_chars);
+  std::vector<absl::string_view> components;
+  components.reserve(32);
+
+  do {
+    found = buf.find_first_of(sep_chars);
+    absl::string_view component = buf.substr(0, found);
+    buf.remove_prefix(found + 1);
+    if (component.empty() || component == ".") {
+      continue;
+    }
+    if (component == ".." && (!components.empty() || is_absolute)) {
+      if (!components.empty() && components.back() == "..") {
+        components.push_back("..");
+      } else if (!components.empty()) {
+        components.pop_back();
+      }
+      continue;
+    }
+    components.push_back(component);
+  } while (found != string::npos);
+
+  if (is_absolute) {
+    resolved_path.push_back(sep_type);
+  }
+  if (components.empty()) {
+    return resolved_path;
+  }
+  resolved_path.append(components[0].begin(), components[0].size());
+  for (size_t i = 1; i < components.size(); ++i) {
+    resolved_path.push_back(sep_type);
+    resolved_path.append(components[i].begin(), components[i].size());
+  }
+  return resolved_path;
+}
+
+/* static */
+string PathResolver::WeakRelativePath(
+    const string& raw_path, const string& raw_cwd) {
+  // Note: Windows PathRelativePathTo() API has a way different behavior than
+  //       what's expected, so we'll do a lot of due dilligence here.
+  PathSeparatorType sep_type;
+  if (IsPosixAbsolutePath(raw_cwd)) {
+    sep_type = kPosixPathSep;
+  } else if (IsWindowsAbsolutePath(raw_cwd)) {
+    sep_type = kWin32PathSep;
+  } else {
+    LOG(ERROR) << "Unknown path type given to raw_cwd=" << raw_cwd;
+    return raw_path;
+  }
+
+  string path = raw_path;
+  string cwd = raw_cwd;
+  if (sep_type == kWin32PathSep) {
+    PlatformConvertToString(raw_path,
+                            kWin32PathSep,
+                            kPreserveCase,
+                            &path);
+    PlatformConvertToString(raw_cwd,
+                            kWin32PathSep,
+                            kPreserveCase,
+                            &cwd);
+  }
+
+  if (sep_type == kPosixPathSep && !IsPosixAbsolutePath(path)) {
+    return path;
+  }
+
+  string preserve_path;
+  if (sep_type == kWin32PathSep) {
+    if (!IsWindowsAbsolutePath(path)) {
+      return path;
+    }
+
+    SeparatePath(&preserve_path, &path);
+    string preserve_cwd;
+    SeparatePath(&preserve_cwd, &cwd);
+    if (preserve_path != preserve_cwd) {
+      return preserve_path + path;
+    }
+  }
+
+  string resolved_cwd = ResolvePath(cwd, sep_type);
+  absl::string_view real_cwd = resolved_cwd;
+  CHECK_EQ(real_cwd[0], sep_type)
+      << "expect real_cwd[0] == sep_type"
+      << " real_cwd=" << real_cwd
+      << " sep_type=" << sep_type;
+  // Don't resolve path for some case:
+  //  cwd = "/tmp"
+  //  path = "/tmp/foo/../bar"
+  //  /tmp/foo -> /var/tmp/foo
+  // if path is resolved, we'll get "bar" in /tmp.
+  // but it should be /var/tmp/bar.
+  // it might failed some cases, but we'll take safer option here.
+  absl::string_view target = path;
+  CHECK_EQ(target[0], sep_type);
+  if (target == real_cwd)
+    return ".";
+
+  if (HasPrefixDirWithSep(target, real_cwd, sep_type)) {
+    target.remove_prefix(real_cwd.size() + 1);
+    return string(target);
+  }
+  size_t found;
+  size_t last_slash = 0;
+  while ((found = real_cwd.find(sep_type, last_slash + 1)) != string::npos) {
+    if (real_cwd.substr(0, found) == target.substr(0, found)) {
+      last_slash = found;
+      continue;
+    }
+    // mismatch path component.
+    break;
+  }
+  if (last_slash == 0) {
+    // If it shares only /, use absolute path instead of relative.
+    // e.g. $HOME/src vs /tmp
+    if (sep_type == kWin32PathSep && target == path) {
+      path = preserve_path + path;
+      return path;
+    }
+    return string(target);
+  }
+  target = absl::ClippedSubstr(target, last_slash + 1);
+  int depth = 1;
+  found = last_slash;
+  while ((found = real_cwd.find(sep_type, found + 1)) != string::npos) {
+    ++depth;
+  }
+  string relative_path;
+  relative_path.reserve(depth * 3 + target.size());
+  for (int i = 0; i < depth; ++i) {
+    relative_path += "..";
+    relative_path += sep_type;
+  }
+  relative_path += string(target);
+
+  if (sep_type == kWin32PathSep && relative_path == path) {
+    relative_path = preserve_path + relative_path;
+  }
+
+  return relative_path;
+}
+
+bool PathResolver::IsSystemPath(const string& raw_path) const {
+#ifndef _WIN32
+  const string& path = raw_path;
+#else
+  string path = PlatformConvert(raw_path);
+#endif
+
+  for (const auto& iter : system_paths_) {
+    if (strings::StartsWith(path, iter))
+      return true;
+  }
+  return false;
+}
+
+void PathResolver::RegisterSystemPath(const string& raw_path) {
+#ifndef _WIN32
+  const string& path = raw_path;
+#else
+  string path = PlatformConvert(raw_path);
+#endif
+
+  system_paths_.push_back(path);
+}
+
+}  // namespace devtools_goma
diff --git a/lib/path_resolver.h b/lib/path_resolver.h
new file mode 100644
index 0000000..b049a1f
--- /dev/null
+++ b/lib/path_resolver.h
@@ -0,0 +1,84 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_PATH_RESOLVER_H_
+#define DEVTOOLS_GOMA_LIB_PATH_RESOLVER_H_
+
+#include <string>
+#include <vector>
+
+
+#include "basictypes.h"
+using std::string;
+
+namespace devtools_goma {
+
+class PathResolver {
+ public:
+  enum PathSeparatorType {
+    kPosixPathSep = '/',
+    kWin32PathSep = '\\',
+  };
+
+  enum PathCaseType {
+    kPreserveCase,
+    kLowerCase,
+  };
+
+  PathResolver();
+  ~PathResolver();
+
+  // Convert path to platform specific format of running platform.
+  static string PlatformConvert(const string& path);
+  static void PlatformConvertToString(const string& path, string* OUTPUT);
+
+  // Convert path to platform specific format specified by |path_type|.
+  // |case_type| to specify how the path should be normalized.
+  // Note that |path_type|==kPosixPathSep will convert \ to /, so user couldn't
+  // use \ in the path.
+  // TODO: fix this.
+  static string PlatformConvert(const string& path,
+                                PathSeparatorType sep_type,
+                                PathCaseType case_type);
+  static void PlatformConvertToString(const string& path,
+                                      PathSeparatorType sep_type,
+                                      PathCaseType case_type,
+                                      string* OUTPUT);
+
+  // Removes . and .. from |path|.
+  static string ResolvePath(const string& path);
+
+  // Removes . and .. from |path|.
+  // |sep_type| is used for notifying ResolvePath separator type.
+  // If kPosixPathSep is given, ResolvePath uses '/' as a path separator.
+  // If kWin32PathSep is given, ResolvePath uses '/' and '\\' as path
+  // separators, and paths are joined with '\\'.
+  static string ResolvePath(const string& path, PathSeparatorType sep_type);
+
+  // Returns relative path from cwd.
+  // If path and cwd doesn't share any directory hierarchy, returns path as is,
+  // instead of relative path.
+  // If path is already relative, returns path as is.
+  // Note that if cwd is not real path (i.e, it contains symbolic link),
+  // relative path may point different file.
+  static string WeakRelativePath(const string& path, const string& cwd);
+
+  // Returns true if path is under system paths.
+  bool IsSystemPath(const string& path) const;
+
+  // Registers path as system path.
+  void RegisterSystemPath(const string& path);
+
+  static const char kPathSep;
+
+ private:
+  std::vector<string> system_paths_;
+
+  DISALLOW_COPY_AND_ASSIGN(PathResolver);
+};
+
+};  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_PATH_RESOLVER_H_
diff --git a/lib/path_resolver_unittest.cc b/lib/path_resolver_unittest.cc
new file mode 100644
index 0000000..1374fa8
--- /dev/null
+++ b/lib/path_resolver_unittest.cc
@@ -0,0 +1,314 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "path_resolver.h"
+
+#include "gtest/gtest.h"
+
+namespace devtools_goma {
+
+class PathResolverTest : public ::testing::Test {
+};
+
+TEST_F(PathResolverTest, PlatformConvertCommon) {
+  EXPECT_EQ(
+      PathResolver::PlatformConvert(
+          "/FoO/BaR", PathResolver::kWin32PathSep, PathResolver::kPreserveCase),
+      "\\FoO\\BaR");
+  EXPECT_EQ(
+      PathResolver::PlatformConvert(
+          "\\FoO\\BaR", PathResolver::kWin32PathSep,
+          PathResolver::kPreserveCase),
+      "\\FoO\\BaR");
+  EXPECT_EQ(
+      PathResolver::PlatformConvert(
+          "/FoO/BaR", PathResolver::kWin32PathSep, PathResolver::kLowerCase),
+      "\\foo\\bar");
+  EXPECT_EQ(
+      PathResolver::PlatformConvert(
+          "\\FoO\\BaR", PathResolver::kWin32PathSep, PathResolver::kLowerCase),
+      "\\foo\\bar");
+}
+
+#ifdef _WIN32
+TEST_F(PathResolverTest, PlatformConvertWin32) {
+  EXPECT_EQ(PathResolver::PlatformConvert("/FoO/BaR"), "\\FoO\\BaR");
+  EXPECT_EQ(PathResolver::PlatformConvert("C:\\FoO/BaR"), "C:\\FoO\\BaR");
+  // Note: kPosixPathSep is not implemented for Windows.
+}
+#else
+TEST_F(PathResolverTest, PlatformConvertPOSIX) {
+  EXPECT_EQ(PathResolver::PlatformConvert("/FoO/BaR"), "/FoO/BaR");
+  EXPECT_EQ(PathResolver::PlatformConvert("\\FoO\\BaR"), "/FoO/BaR");
+  EXPECT_EQ(
+      PathResolver::PlatformConvert(
+          "/FoO/BaR", PathResolver::kPosixPathSep, PathResolver::kLowerCase),
+      "/foo/bar");
+  EXPECT_EQ(
+      PathResolver::PlatformConvert(
+          "\\FoO\\BaR", PathResolver::kPosixPathSep, PathResolver::kLowerCase),
+      "/foo/bar");
+}
+#endif
+
+TEST_F(PathResolverTest, ResolvePath) {
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/foo/bar"));
+
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/./foo/bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/foo/./bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/foo/bar/."));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/././foo/bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/./././foo/./bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/./foo/././bar"));
+
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/../foo/bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/../../foo/bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/../../../foo/bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/bar"),
+      PathResolver::ResolvePath("/foo/../bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo"),
+      PathResolver::ResolvePath("/foo/bar/../"));
+
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/baz/../foo/bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/bar"),
+      PathResolver::ResolvePath("/baz/../../foo/../bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/"),
+      PathResolver::ResolvePath("/baz/../../foo/../bar/../"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/foo/baz/../bar"));
+
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/foo/baz/quux/../../bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/foo/baz/../quux/../bar"));
+
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/foo/baz//////../quux/../bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/foo/baz//../quux/////..////////bar"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/baz"),
+      PathResolver::ResolvePath("/../../../foo/../../../baz"));
+
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/foo/bar/baz/.."));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/foo/bar"),
+      PathResolver::ResolvePath("/foo/bar/baz/../"));
+
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/"),
+      PathResolver::ResolvePath("/"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("../.."),
+      PathResolver::ResolvePath("././../.."));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("../.."),
+      PathResolver::ResolvePath("./.././.."));
+
+#ifndef _WIN32
+  EXPECT_EQ("/foo/bar", PathResolver::ResolvePath("//foo//bar"));
+#else
+  EXPECT_EQ("C:\\foo\\bar", PathResolver::ResolvePath("C:\\foo\\bar"));
+  EXPECT_EQ("C:\\foo\\bar", PathResolver::ResolvePath("C:\\.\\foo\\bar"));
+  EXPECT_EQ("C:\\foo\\bar", PathResolver::ResolvePath("C:\\foo\\.\\bar"));
+  EXPECT_EQ("C:\\foo\\bar", PathResolver::ResolvePath("C:\\foo\\bar\\."));
+  EXPECT_EQ("C:\\foo\\bar", PathResolver::ResolvePath("C:\\..\\foo\\bar"));
+  EXPECT_EQ("C:\\foo\\bar",
+            PathResolver::ResolvePath("C:\\..\\..\\foo\\bar"));
+  EXPECT_EQ("C:\\foo\\bar",
+            PathResolver::ResolvePath("C:\\baz\\..\\foo\\bar"));
+  EXPECT_EQ("C:\\foo\\bar",
+            PathResolver::ResolvePath("C:\\foo\\baz\\..\\bar"));
+  EXPECT_EQ("C:\\foo\\bar",
+            PathResolver::ResolvePath("C:\\foo\\baz\\quux\\..\\..\\bar"));
+  EXPECT_EQ("C:\\foo\\bar",
+            PathResolver::ResolvePath("C:\\foo\\baz\\..\\quux\\..\\bar"));
+  EXPECT_EQ("C:\\foo\\bar", PathResolver::ResolvePath("C:\\foo\\bar\\baz\\.."));
+  EXPECT_EQ("C:\\foo\\bar",
+            PathResolver::ResolvePath("C:\\foo\\bar\\baz\\..\\"));
+  EXPECT_EQ("\\\\foo\\bar", PathResolver::ResolvePath("\\\\foo\\bar"));
+  EXPECT_EQ("\\\\foo\\bar", PathResolver::ResolvePath("\\\\foo\\.\\bar"));
+  EXPECT_EQ("\\\\foo\\bar", PathResolver::ResolvePath("\\\\foo\\bar\\."));
+  EXPECT_EQ("\\\\foo\\bar", PathResolver::ResolvePath("\\\\foo\\..\\bar"));
+  EXPECT_EQ("\\\\foo\\bar", PathResolver::ResolvePath("\\\\foo\\..\\..\\bar"));
+  EXPECT_EQ("\\\\baz\\foo\\bar",
+            PathResolver::ResolvePath("\\\\baz\\..\\foo\\bar"));
+  EXPECT_EQ("\\\\foo\\bar", PathResolver::ResolvePath("\\\\foo\\baz\\..\\bar"));
+  EXPECT_EQ("\\\\foo\\bar",
+            PathResolver::ResolvePath("\\\\foo\\baz\\quux\\..\\..\\bar"));
+  EXPECT_EQ("\\\\foo\\bar",
+            PathResolver::ResolvePath("\\\\foo\\baz\\..\\quux\\..\\bar"));
+  EXPECT_EQ("\\\\foo\\bar", PathResolver::ResolvePath("\\\\foo\\bar\\baz\\.."));
+  EXPECT_EQ("\\\\foo\\bar",
+            PathResolver::ResolvePath("\\\\foo\\bar\\baz\\..\\"));
+#endif
+
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("relative/path/name"),
+      PathResolver::ResolvePath("./relative/path/name"));
+
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("path/name"),
+      PathResolver::ResolvePath("relative/../path/name"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("../full/path/name"),
+      PathResolver::ResolvePath("../full/path/name"));
+  EXPECT_EQ(
+      PathResolver::PlatformConvert("/full/path/name"),
+      PathResolver::ResolvePath("/../full/path/name"));
+}
+
+TEST_F(PathResolverTest, WeakReletivePath) {
+  EXPECT_EQ("foo", PathResolver::WeakRelativePath("/tmp/foo", "/tmp"));
+  EXPECT_EQ("foo/bar",
+            PathResolver::WeakRelativePath("/tmp/foo/bar", "/tmp"));
+  EXPECT_EQ("bar", PathResolver::WeakRelativePath("/tmp/foo/bar", "/tmp/foo"));
+  EXPECT_EQ("foo/../bar",
+            PathResolver::WeakRelativePath("/tmp/foo/../bar", "/tmp"));
+  EXPECT_EQ("../foo",
+            PathResolver::WeakRelativePath("/tmp/foo", "/tmp/baz"));
+  EXPECT_EQ("../../foo",
+            PathResolver::WeakRelativePath("/tmp/foo", "/tmp/bar/baz"));
+  EXPECT_EQ("../foo",
+            PathResolver::WeakRelativePath("/tmp/foo", "/tmp/foobar"));
+  EXPECT_EQ("../foobar",
+            PathResolver::WeakRelativePath("/tmp/foobar", "/tmp/foo"));
+  EXPECT_EQ("/usr/include",
+            PathResolver::WeakRelativePath("/usr/include", "/tmp"));
+
+  // Windows path.
+  EXPECT_EQ("foo", PathResolver::WeakRelativePath("C:\\tmp\\foo", "C:\\tmp"));
+  EXPECT_EQ(
+      "foo\\bar",
+      PathResolver::WeakRelativePath("C:\\tmp\\foo\\bar", "C:\\tmp"));
+  EXPECT_EQ(
+      "bar",
+      PathResolver::WeakRelativePath("C:\\tmp\\foo\\bar", "C:\\tmp\\foo"));
+  EXPECT_EQ(
+      "foo\\..\\bar",
+      PathResolver::WeakRelativePath("C:\\tmp\\foo\\..\\bar", "C:\\tmp"));
+  EXPECT_EQ(
+      "..\\foo",
+      PathResolver::WeakRelativePath("C:\\tmp\\foo", "C:\\tmp\\baz"));
+  EXPECT_EQ(
+      "..\\..\\foo",
+      PathResolver::WeakRelativePath("C:\\tmp\\foo", "C:\\tmp\\bar\\baz"));
+  EXPECT_EQ(
+      "..\\foo",
+      PathResolver::WeakRelativePath("C:\\tmp\\foo", "C:\\tmp\\foobar"));
+  EXPECT_EQ(
+      "..\\foobar",
+      PathResolver::WeakRelativePath("C:\\tmp\\foobar", "C:\\tmp\\foo"));
+  EXPECT_EQ(
+      "C:\\usr\\include",
+      PathResolver::WeakRelativePath("C:\\usr\\include", "C:\\tmp"));
+  EXPECT_EQ(
+      "C:\\usr\\include",
+      PathResolver::WeakRelativePath("C:\\usr\\include", "D:\\usr\\include"));
+  EXPECT_EQ(
+      "C:\\usr\\include",
+      PathResolver::WeakRelativePath("C:\\usr\\include", "\\usr\\include"));
+  EXPECT_EQ(
+      "foo", PathResolver::WeakRelativePath("\\\\g\\tmp\\foo", "\\\\g\\tmp"));
+  EXPECT_EQ(
+      "foo\\bar",
+      PathResolver::WeakRelativePath("\\\\g\\tmp\\foo\\bar", "\\\\g\\tmp"));
+  EXPECT_EQ(
+      "bar",
+      PathResolver::WeakRelativePath("\\\\g\\tmp\\foo\\bar",
+                                     "\\\\g\\tmp\\foo"));
+  EXPECT_EQ(
+      "foo\\..\\bar",
+      PathResolver::WeakRelativePath("\\\\g\\tmp\\foo\\..\\bar", "\\\\g\\tmp"));
+  EXPECT_EQ(
+      "..\\foo",
+      PathResolver::WeakRelativePath("\\\\g\\tmp\\foo", "\\\\g\\tmp\\baz"));
+  EXPECT_EQ(
+      "..\\..\\foo",
+      PathResolver::WeakRelativePath("\\\\g\\tmp\\foo",
+                                     "\\\\g\\tmp\\bar\\baz"));
+  EXPECT_EQ(
+      "..\\foo",
+      PathResolver::WeakRelativePath("\\\\g\\tmp\\foo", "\\\\g\\tmp\\foobar"));
+  EXPECT_EQ(
+      "..\\foobar",
+      PathResolver::WeakRelativePath("\\\\g\\tmp\\foobar", "\\\\g\\tmp\\foo"));
+  EXPECT_EQ(
+      "\\\\g\\usr\\include",
+      PathResolver::WeakRelativePath("\\\\g\\usr\\include", "\\\\g\\tmp"));
+  EXPECT_EQ(
+      "\\\\g\\usr\\include",
+      PathResolver::WeakRelativePath("\\\\g\\usr\\include",
+                                     "\\\\gg\\usr\\include"));
+  EXPECT_EQ(
+      "\\\\g\\usr\\include",
+      PathResolver::WeakRelativePath("\\\\g\\usr\\include", "\\usr\\include"));
+  EXPECT_EQ(
+      "d:foo.obj", PathResolver::WeakRelativePath("d:foo.obj", "C:\\tmp"));
+}
+
+TEST_F(PathResolverTest, SystemPath) {
+  PathResolver pr;
+  pr.RegisterSystemPath("/usr/include");
+  pr.RegisterSystemPath("/usr/include/c++/4.4");
+  EXPECT_TRUE(pr.IsSystemPath("/usr/include"));
+  EXPECT_TRUE(pr.IsSystemPath("/usr/include/c++/4.4"));
+  EXPECT_TRUE(pr.IsSystemPath("/usr/include/cairo"));
+  EXPECT_TRUE(pr.IsSystemPath("/usr/include/gtk-2.0"));
+  EXPECT_FALSE(pr.IsSystemPath("/home/goma/src"));
+  EXPECT_FALSE(pr.IsSystemPath("/var/tmp"));
+}
+
+#ifdef _WIN32
+TEST_F(PathResolverTest, SystemPathWin32) {
+  PathResolver pr;
+  pr.RegisterSystemPath("C:\\Windows");
+  pr.RegisterSystemPath("C:\\Windows\\System32");
+  pr.RegisterSystemPath("C:\\Program Files");
+  pr.RegisterSystemPath("C:\\Program Files (x86)");
+  EXPECT_TRUE(pr.IsSystemPath("C:\\Windows\\write.exe"));
+  EXPECT_TRUE(pr.IsSystemPath("C:\\Windows\\System32\\cmd.exe"));
+  EXPECT_TRUE(pr.IsSystemPath("C:\\Program Files\\Internet Explorer\\IE.DLL"));
+  EXPECT_TRUE(pr.IsSystemPath("C:\\Program Files (x86)\\Adobe\\acrobat.exe"));
+  EXPECT_FALSE(pr.IsSystemPath("C:\\ProgramData"));
+  EXPECT_FALSE(pr.IsSystemPath("D:\\Program Files"));
+}
+#endif
+
+}  // namespace devtools_goma
diff --git a/lib/path_unittest.cc b/lib/path_unittest.cc
new file mode 100644
index 0000000..77f4c2c
--- /dev/null
+++ b/lib/path_unittest.cc
@@ -0,0 +1,355 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "path.h"
+
+#include "gtest/gtest.h"
+using std::string;
+
+TEST(PathTest, fileJoinPath) {
+  EXPECT_EQ("", file::JoinPath());
+  EXPECT_EQ("", file::JoinPath(""));
+  EXPECT_EQ("", file::JoinPath("", ""));
+  EXPECT_EQ("", file::JoinPath("", "", ""));
+
+  EXPECT_EQ("a", file::JoinPath("a"));
+  EXPECT_EQ("/a", file::JoinPath("/a"));
+  EXPECT_EQ("a/", file::JoinPath("a/"));
+  EXPECT_EQ("/a/", file::JoinPath("/a/"));
+
+  EXPECT_EQ("a", file::JoinPath("a", ""));
+  EXPECT_EQ("/a", file::JoinPath("/a", ""));
+  EXPECT_EQ("a/", file::JoinPath("a/", ""));
+  EXPECT_EQ("/a/", file::JoinPath("/a/", ""));
+
+  EXPECT_EQ("a", file::JoinPath("", "a"));
+  EXPECT_EQ("/a", file::JoinPath("", "/a"));
+  EXPECT_EQ("a/", file::JoinPath("", "a/"));
+  EXPECT_EQ("/a/", file::JoinPath("", "/a/"));
+
+  EXPECT_EQ("a", file::JoinPath("a", "", ""));
+  EXPECT_EQ("a", file::JoinPath("", "a", ""));
+  EXPECT_EQ("a", file::JoinPath("", "", "a"));
+
+#ifndef _WIN32
+  EXPECT_EQ("a/b", file::JoinPath("a", "b"));
+  EXPECT_EQ("a/b/", file::JoinPath("a", "b/"));
+  EXPECT_EQ("a/b", file::JoinPath("a", "/b"));
+  EXPECT_EQ("a/b/", file::JoinPath("a", "/b/"));
+
+  EXPECT_EQ("a/b", file::JoinPath("a/", "b"));
+  EXPECT_EQ("a/b/", file::JoinPath("a/", "b/"));
+  EXPECT_EQ("a/b", file::JoinPath("a/", "/b"));
+  EXPECT_EQ("a/b/", file::JoinPath("a/", "/b/"));
+
+  EXPECT_EQ("/a/b", file::JoinPath("/a", "b"));
+  EXPECT_EQ("/a/b/", file::JoinPath("/a", "b/"));
+  EXPECT_EQ("/a/b", file::JoinPath("/a", "/b"));
+  EXPECT_EQ("/a/b/", file::JoinPath("/a", "/b/"));
+
+  EXPECT_EQ("/a/b", file::JoinPath("/a/", "b"));
+  EXPECT_EQ("/a/b/", file::JoinPath("/a/", "b/"));
+  EXPECT_EQ("/a/b", file::JoinPath("/a/", "/b"));
+  EXPECT_EQ("/a/b/", file::JoinPath("/a/", "/b/"));
+
+  EXPECT_EQ("a/a", file::JoinPath("a", "a", ""));
+  EXPECT_EQ("a/a", file::JoinPath("", "a", "a"));
+  EXPECT_EQ("a/a", file::JoinPath("a", "", "a"));
+
+  EXPECT_EQ("a/b/c/d/e", file::JoinPath("a", "b", "c", "d", "e"));
+  EXPECT_EQ("/a/b/c/d/e", file::JoinPath("/a", "/b", "/c", "/d", "/e"));
+  EXPECT_EQ("a/b/c/d/e/", file::JoinPath("a/", "b/", "c/", "d/", "e/"));
+  EXPECT_EQ("/a/b/c/d/e/", file::JoinPath("/a/", "/b/", "/c/", "/d/", "/e/"));
+#else
+  EXPECT_EQ("\\a", file::JoinPath("\\a"));
+  EXPECT_EQ("a\\", file::JoinPath("a\\"));
+  EXPECT_EQ("\\a\\", file::JoinPath("\\a\\"));
+
+  EXPECT_EQ("a\\b", file::JoinPath("a", "b"));
+  EXPECT_EQ("a\\b\\", file::JoinPath("a", "b\\"));
+  EXPECT_EQ("a\\b", file::JoinPath("a", "\\b"));
+  EXPECT_EQ("a\\b\\", file::JoinPath("a", "\\b\\"));
+
+  EXPECT_EQ("a\\b", file::JoinPath("a\\", "b"));
+  EXPECT_EQ("a\\b\\", file::JoinPath("a\\", "b\\"));
+  EXPECT_EQ("a\\b", file::JoinPath("a\\", "\\b"));
+  EXPECT_EQ("a\\b\\", file::JoinPath("a\\", "\\b\\"));
+
+  EXPECT_EQ("\\a\\b", file::JoinPath("\\a", "b"));
+  EXPECT_EQ("\\a\\b\\", file::JoinPath("\\a", "b\\"));
+  EXPECT_EQ("\\a\\b", file::JoinPath("\\a", "\\b"));
+  EXPECT_EQ("\\a\\b\\", file::JoinPath("\\a", "\\b\\"));
+
+  EXPECT_EQ("\\a\\b", file::JoinPath("\\a\\", "b"));
+  EXPECT_EQ("\\a\\b\\", file::JoinPath("\\a\\", "b\\"));
+  EXPECT_EQ("\\a\\b", file::JoinPath("\\a\\", "\\b"));
+  EXPECT_EQ("\\a\\b\\", file::JoinPath("\\a\\", "\\b\\"));
+
+  EXPECT_EQ("c:\\b", file::JoinPath("", "c:\\b"));
+  EXPECT_EQ("a\\c:\\b", file::JoinPath("a", "c:\\b"));
+  EXPECT_EQ("\\a\\c:\\b", file::JoinPath("\\a", "c:\\b"));
+  EXPECT_EQ("\\a\\c:\\b", file::JoinPath("\\a\\", "c:\\b"));
+  EXPECT_EQ("a\\c:\\b", file::JoinPath("a\\", "c:\\b"));
+
+  EXPECT_EQ("c:\\a\\b", file::JoinPath("c:\\a", "b"));
+  EXPECT_EQ("c:\\a\\b\\", file::JoinPath("c:\\a", "b\\"));
+  EXPECT_EQ("c:\\a\\b", file::JoinPath("c:\\a", "\\b"));
+  EXPECT_EQ("c:\\a\\b\\", file::JoinPath("c:\\a", "\\b\\"));
+  EXPECT_EQ("c:\\a\\b", file::JoinPath("c:\\a\\", "b"));
+  EXPECT_EQ("c:\\a\\b\\", file::JoinPath("c:\\a\\", "b\\"));
+  EXPECT_EQ("c:\\a\\b", file::JoinPath("c:\\a\\", "\\b"));
+  EXPECT_EQ("c:\\a\\b\\", file::JoinPath("c:\\a\\", "\\b\\"));
+
+  EXPECT_EQ("a\\a", file::JoinPath("a", "a", ""));
+  EXPECT_EQ("a\\a", file::JoinPath("", "a", "a"));
+  EXPECT_EQ("a\\a", file::JoinPath("a", "", "a"));
+
+  EXPECT_EQ("a\\b\\c\\d\\e",
+            file::JoinPath("a", "b", "c", "d", "e"));
+  EXPECT_EQ("\\a\\b\\c\\d\\e",
+            file::JoinPath("\\a", "\\b", "\\c", "\\d", "\\e"));
+  EXPECT_EQ("a\\b\\c\\d\\e\\",
+            file::JoinPath("a\\", "b\\", "c\\", "d\\", "e\\"));
+  EXPECT_EQ("\\a\\b\\c\\d\\e\\",
+            file::JoinPath("\\a\\", "\\b\\", "\\c\\", "\\d\\", "\\e\\"));
+
+  // Unix style should also work.
+  EXPECT_EQ("/a\\b", file::JoinPath("/a", "b"));
+  EXPECT_EQ("/a\\b", file::JoinPath("/a", "/b"));
+  EXPECT_EQ("/a/b", file::JoinPath("/a/", "b"));
+  EXPECT_EQ("/a/b", file::JoinPath("/a/", "/b"));
+  EXPECT_EQ("a\\b", file::JoinPath("a", "/b"));
+
+  EXPECT_EQ("/a\\b\\c\\d\\e",
+            file::JoinPath("/a", "/b", "/c", "/d", "/e"));
+  EXPECT_EQ("a/b/c/d/e/",
+            file::JoinPath("a/", "b/", "c/", "d/", "e/"));
+  EXPECT_EQ("/a/b/c/d/e/",
+            file::JoinPath("/a/", "/b/", "/c/", "/d/", "/e/"));
+#endif
+}
+
+TEST(PathTest, fileJoinPathRespectAbsolute) {
+  EXPECT_EQ("", file::JoinPathRespectAbsolute());
+  EXPECT_EQ("", file::JoinPathRespectAbsolute(""));
+  EXPECT_EQ("", file::JoinPathRespectAbsolute("", ""));
+  EXPECT_EQ("", file::JoinPathRespectAbsolute("", "", ""));
+
+  EXPECT_EQ("a", file::JoinPathRespectAbsolute("a"));
+  EXPECT_EQ("/a", file::JoinPathRespectAbsolute("/a"));
+  EXPECT_EQ("a/", file::JoinPathRespectAbsolute("a/"));
+  EXPECT_EQ("/a/", file::JoinPathRespectAbsolute("/a/"));
+
+  EXPECT_EQ("a", file::JoinPathRespectAbsolute("a", ""));
+  EXPECT_EQ("/a", file::JoinPathRespectAbsolute("/a", ""));
+  EXPECT_EQ("a/", file::JoinPathRespectAbsolute("a/", ""));
+  EXPECT_EQ("/a/", file::JoinPathRespectAbsolute("/a/", ""));
+
+  EXPECT_EQ("a", file::JoinPathRespectAbsolute("", "a"));
+  EXPECT_EQ("/a", file::JoinPathRespectAbsolute("", "/a"));
+  EXPECT_EQ("a/", file::JoinPathRespectAbsolute("", "a/"));
+  EXPECT_EQ("/a/", file::JoinPathRespectAbsolute("", "/a/"));
+
+  EXPECT_EQ("a", file::JoinPathRespectAbsolute("a", "", ""));
+  EXPECT_EQ("a", file::JoinPathRespectAbsolute("", "a", ""));
+  EXPECT_EQ("a", file::JoinPathRespectAbsolute("", "", "a"));
+
+#ifndef _WIN32
+  EXPECT_EQ("a/b", file::JoinPathRespectAbsolute("a", "b"));
+  EXPECT_EQ("/b", file::JoinPathRespectAbsolute("a", "/b"));
+  EXPECT_EQ("/c", file::JoinPathRespectAbsolute("a", "/b", "/c"));
+  EXPECT_EQ("/b/c", file::JoinPathRespectAbsolute("a", "/b", "c"));
+
+  EXPECT_EQ("/a/b", file::JoinPathRespectAbsolute("/a", "b"));
+  EXPECT_EQ("/b", file::JoinPathRespectAbsolute("/a", "/b"));
+  EXPECT_EQ("/c", file::JoinPathRespectAbsolute("/a", "/b", "/c"));
+  EXPECT_EQ("/b/c", file::JoinPathRespectAbsolute("/a", "/b", "c"));
+
+  EXPECT_EQ("/a/b", file::JoinPathRespectAbsolute("/a/", "b"));
+  EXPECT_EQ("/b", file::JoinPathRespectAbsolute("/a/", "/b"));
+  EXPECT_EQ("/c", file::JoinPathRespectAbsolute("/a/", "/b", "/c"));
+  EXPECT_EQ("/b/c", file::JoinPathRespectAbsolute("/a/", "/b", "c"));
+#else
+  EXPECT_EQ("a\\b", file::JoinPathRespectAbsolute("a", "b"));
+  EXPECT_EQ("c:\\b", file::JoinPathRespectAbsolute("a", "c:\\b"));
+  EXPECT_EQ("c:\\c", file::JoinPathRespectAbsolute("a", "c:\\b", "c:\\c"));
+  EXPECT_EQ("c:\\b\\c", file::JoinPathRespectAbsolute("a", "c:\\b", "c"));
+
+  EXPECT_EQ("\\a\\b", file::JoinPathRespectAbsolute("\\a", "b"));
+  EXPECT_EQ("c:\\b", file::JoinPathRespectAbsolute("\\a", "c:\\b"));
+  EXPECT_EQ("c:\\c", file::JoinPathRespectAbsolute("\\a", "c:\\b", "c:\\c"));
+  EXPECT_EQ("c:\\b\\c", file::JoinPathRespectAbsolute("\\a", "c:\\b", "c"));
+
+  EXPECT_EQ("\\a\\b", file::JoinPathRespectAbsolute("\\a\\", "b"));
+  EXPECT_EQ("c:\\b", file::JoinPathRespectAbsolute("\\a\\", "c:\\b"));
+  EXPECT_EQ("c:\\c", file::JoinPathRespectAbsolute("\\a\\", "c:\\b", "c:\\c"));
+  EXPECT_EQ("c:\\b\\c", file::JoinPathRespectAbsolute("\\a\\", "c:\\b", "c"));
+
+  EXPECT_EQ("c:\\a\\b", file::JoinPathRespectAbsolute("c:\\a", "b"));
+  EXPECT_EQ("c:\\b", file::JoinPathRespectAbsolute("c:\\a", "c:\\b"));
+  EXPECT_EQ("c:\\c", file::JoinPathRespectAbsolute("c:\\a", "c:\\b", "c:\\c"));
+  EXPECT_EQ("c:\\b\\c", file::JoinPathRespectAbsolute("c:\\a", "c:\\b", "c"));
+
+  EXPECT_EQ("c:\\a\\b", file::JoinPathRespectAbsolute("c:\\a\\", "b"));
+  EXPECT_EQ("c:\\b", file::JoinPathRespectAbsolute("c:\\a\\", "c:\\b"));
+  EXPECT_EQ("c:\\c",
+            file::JoinPathRespectAbsolute("c:\\a\\", "c:\\b", "c:\\c"));
+  EXPECT_EQ("c:\\b\\c", file::JoinPathRespectAbsolute("c:\\a\\", "c:\\b", "c"));
+
+  EXPECT_EQ("\\a\\b", file::JoinPathRespectAbsolute("\\a", "b"));
+  EXPECT_EQ("\\b", file::JoinPathRespectAbsolute("\\a", "\\b"));
+  EXPECT_EQ("\\a\\b", file::JoinPathRespectAbsolute("\\a\\", "b"));
+  EXPECT_EQ("\\b", file::JoinPathRespectAbsolute("\\a\\", "\\b"));
+
+  EXPECT_EQ("\\b", file::JoinPathRespectAbsolute("", "\\b"));
+  EXPECT_EQ("c:\\b", file::JoinPathRespectAbsolute("", "c:\\b"));
+  EXPECT_EQ("\\a", file::JoinPathRespectAbsolute("\\a", ""));
+  EXPECT_EQ("c:\\a", file::JoinPathRespectAbsolute("c:\\a", ""));
+
+  EXPECT_EQ("a\\b", file::JoinPathRespectAbsolute("a", "b"));
+  EXPECT_EQ("\\b", file::JoinPathRespectAbsolute("a", "\\b"));
+  EXPECT_EQ("c:\\b", file::JoinPathRespectAbsolute("a", "c:\\b"));
+
+  // Unix style should also work.
+  EXPECT_EQ("/a\\b", file::JoinPathRespectAbsolute("/a", "b"));
+  EXPECT_EQ("/b", file::JoinPathRespectAbsolute("/a", "/b"));
+  EXPECT_EQ("/a/b", file::JoinPathRespectAbsolute("/a/", "b"));
+  EXPECT_EQ("/b", file::JoinPathRespectAbsolute("/a/", "/b"));
+  EXPECT_EQ("/b", file::JoinPathRespectAbsolute("a", "/b"));
+#endif
+}
+
+TEST(PathTest, fileBasename) {
+  EXPECT_EQ("",  file::Basename("/a/"));
+  EXPECT_EQ("a", file::Basename("/a"));
+  EXPECT_EQ("b", file::Basename("a/b"));
+  EXPECT_EQ("",  file::Basename("a/"));
+  EXPECT_EQ("",  file::Basename("/"));
+  EXPECT_EQ("",  file::Basename(""));
+
+  EXPECT_EQ(".",     file::Basename("."));
+  EXPECT_EQ(".a",    file::Basename(".a"));
+  EXPECT_EQ("a.",    file::Basename("a."));
+  EXPECT_EQ("a.b",   file::Basename("a.b"));
+  EXPECT_EQ("a.b.c", file::Basename("a.b.c"));
+
+#ifdef _WIN32
+  EXPECT_EQ("",  file::Basename("\\a\\"));
+  EXPECT_EQ("a", file::Basename("\\a"));
+  EXPECT_EQ("b", file::Basename("a\\b"));
+  EXPECT_EQ("",  file::Basename("a\\"));
+  EXPECT_EQ("",  file::Basename("\\"));
+  // Test with drive letter.
+  EXPECT_EQ("",  file::Basename("a:\\"));
+  EXPECT_EQ("b", file::Basename("a:\\b"));
+  // Test with extension.
+  EXPECT_EQ("b.c",   file::Basename("a:\\b.c"));
+  EXPECT_EQ("",      file::Basename("a:\\b.c\\"));
+  EXPECT_EQ(".",     file::Basename("\\."));
+  EXPECT_EQ("",      file::Basename(".\\"));
+#endif
+}
+
+TEST(PathTest, fileDirname) {
+  EXPECT_EQ("/a", file::Dirname("/a/"));
+  EXPECT_EQ("/",  file::Dirname("/a"));
+  EXPECT_EQ("a",  file::Dirname("a/b"));
+  EXPECT_EQ("a",  file::Dirname("a/"));
+  EXPECT_EQ("",   file::Dirname("a"));
+  EXPECT_EQ("",   file::Dirname("ab"));
+  EXPECT_EQ("/",  file::Dirname("/"));
+  EXPECT_EQ("",   file::Dirname(""));
+
+#ifdef _WIN32
+  EXPECT_EQ("\\a", file::Dirname("\\a\\"));
+  EXPECT_EQ("\\",  file::Dirname("\\a"));
+  EXPECT_EQ("a",   file::Dirname("a\\b"));
+  EXPECT_EQ("a",   file::Dirname("a\\"));
+  EXPECT_EQ("\\",  file::Dirname("\\"));
+  // Test with drive letter.
+  EXPECT_EQ("a:\\", file::Dirname("a:\\"));
+  EXPECT_EQ("a:\\", file::Dirname("a:\\b"));
+  EXPECT_EQ("a:b",  file::Dirname("a:b\\c"));
+  EXPECT_EQ("a:",  file::Dirname("a:b"));
+  // Test with extension.
+  EXPECT_EQ("a:\\",    file::Dirname("a:\\b.c"));
+  EXPECT_EQ("a:\\b.c", file::Dirname("a:\\b.c\\"));
+  EXPECT_EQ("\\",      file::Dirname("\\."));
+  EXPECT_EQ(".",       file::Dirname(".\\"));
+  EXPECT_EQ("a:",  file::Dirname("a:b.txt"));
+#endif
+}
+
+TEST(PathTest, fileStem) {
+  EXPECT_EQ("a",    file::Stem("a.txt"));
+  EXPECT_EQ("a",    file::Stem("a."));
+  EXPECT_EQ("",     file::Stem(""));
+  EXPECT_EQ("",     file::Stem("/"));
+  EXPECT_EQ("a",    file::Stem("a"));
+  EXPECT_EQ("",     file::Stem("a/"));
+  EXPECT_EQ("c",    file::Stem("/a/b/c.c"));
+  EXPECT_EQ("e",    file::Stem("/a/b.c/d/e.cc"));
+  EXPECT_EQ("e",    file::Stem("/a/b.c/d/e"));
+  EXPECT_EQ("e.f",  file::Stem("/a/b.c/d/e.f.g"));
+
+#ifdef _WIN32
+  EXPECT_EQ("",     file::Stem("a:\\"));
+  EXPECT_EQ("",     file::Stem("a:\\b\\"));
+  EXPECT_EQ("c",    file::Stem("a:\\b\\c.c"));
+  EXPECT_EQ("e",    file::Stem("a:\\b.c\\d\\e.cc"));
+  EXPECT_EQ("e",    file::Stem("a:\\b.c\\d\\e"));
+  EXPECT_EQ("e.f",  file::Stem("a:\\b.c\\d\\e.f.g"));
+#endif
+}
+
+TEST(PathTest, fileExtension) {
+  EXPECT_EQ("txt", file::Extension("a.txt"));
+  EXPECT_EQ("",    file::Extension("a."));
+  EXPECT_EQ("",    file::Extension(""));
+  EXPECT_EQ("",    file::Extension("/"));
+  EXPECT_EQ("",    file::Extension("a"));
+  EXPECT_EQ("",    file::Extension("a/"));
+  EXPECT_EQ("txt", file::Extension("/a/b/c.txt"));
+  EXPECT_EQ("cc",  file::Extension("/a/b.c/d/e.cc"));
+  EXPECT_EQ("",    file::Extension("/a/b.c/d/e"));
+  EXPECT_EQ("g",   file::Extension("/a/b.c/d/e.f.g"));
+
+#ifdef _WIN32
+  EXPECT_EQ("",    file::Extension("a:\\"));
+  EXPECT_EQ("",    file::Extension("a:\\b\\"));
+  EXPECT_EQ("txt", file::Extension("a:\\b\\c.txt"));
+  EXPECT_EQ("cc",  file::Extension("a:\\b.c\\d\\e.cc"));
+  EXPECT_EQ("",    file::Extension("a:\\b.c\\d\\e"));
+  EXPECT_EQ("g",   file::Extension("a:\\b.c\\d\\e.f.g"));
+#endif
+}
+
+TEST(PathTest, fileIsAbsolutePath) {
+  // Unix Style.
+  EXPECT_FALSE(file::IsAbsolutePath(""));
+  EXPECT_FALSE(file::IsAbsolutePath("a"));
+  EXPECT_FALSE(file::IsAbsolutePath("../a"));
+  EXPECT_FALSE(file::IsAbsolutePath("./a"));
+  EXPECT_FALSE(file::IsAbsolutePath("a/b/c/"));
+  EXPECT_TRUE(file::IsAbsolutePath("/a"));
+  EXPECT_TRUE(file::IsAbsolutePath("/a/b/../c"));
+
+#ifdef _WIN32
+  EXPECT_FALSE(file::IsAbsolutePath("..\\a"));
+  EXPECT_FALSE(file::IsAbsolutePath("a\\b\\c\\"));
+  EXPECT_TRUE(file::IsAbsolutePath("a:"));
+  EXPECT_TRUE(file::IsAbsolutePath("a:\\b"));
+  EXPECT_TRUE(file::IsAbsolutePath("a:\\b\\..\\c"));
+  // Path without drive.
+  EXPECT_TRUE(file::IsAbsolutePath("\\a"));
+  EXPECT_TRUE(file::IsAbsolutePath("\\a\\b"));
+  EXPECT_TRUE(file::IsAbsolutePath("\\a\\b\\..\\c"));
+  // UNC path.
+  EXPECT_TRUE(file::IsAbsolutePath("\\\\a"));
+  EXPECT_TRUE(file::IsAbsolutePath("\\\\a\\b"));
+  EXPECT_TRUE(file::IsAbsolutePath("\\\\a\\b\\..\\c"));
+#endif
+}
diff --git a/lib/path_util.cc b/lib/path_util.cc
new file mode 100644
index 0000000..8383d10
--- /dev/null
+++ b/lib/path_util.cc
@@ -0,0 +1,74 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "path_util.h"
+
+#include <cctype>
+
+#include "string_piece.h"
+#include "string_piece_utils.h"
+
+namespace devtools_goma {
+
+bool IsPosixAbsolutePath(absl::string_view path) {
+  return !path.empty() && path[0] == '/';
+}
+
+bool IsWindowsAbsolutePath(absl::string_view path) {
+  // UNC
+  if (path.size() > 3 && path[0] == '\\' && path[1] == '\\' &&
+      path.find('\\', 3) != absl::string_view::npos &&
+      path.find('/', 3) == absl::string_view::npos) {
+    return true;
+  }
+
+  // local path.
+  if (path.size() > 2 && std::isalpha(path[0]) && path[1] == ':' &&
+      (path[2] == '/' || path[2] == '\\')) {
+    return true;
+  }
+
+  return false;
+}
+
+bool HasPrefixDir(absl::string_view path, absl::string_view prefix) {
+#ifdef _WIN32
+  return HasPrefixDirWithSep(path, prefix, '\\') ||
+      HasPrefixDirWithSep(path, prefix, '/');
+#else
+  return HasPrefixDirWithSep(path, prefix, '/');
+#endif
+}
+
+bool HasPrefixDirWithSep(absl::string_view path, absl::string_view prefix,
+                         char pathsep) {
+  // TODO: do we need to convert path before check on Win path?
+  // 1. need to make both lower case?
+
+  if (!strings::StartsWith(path, prefix)) {
+    return false;
+  }
+  if (path.size() == prefix.size()) {
+    return true;
+  }
+
+  return path[prefix.size()] == pathsep;
+}
+
+absl::string_view GetFileNameExtension(absl::string_view filename) {
+  absl::string_view::size_type last_sep = filename.find_last_of("/\\");
+  if (last_sep != absl::string_view::npos) {
+    filename.remove_prefix(last_sep + 1);
+  }
+
+  absl::string_view::size_type last_dot = filename.rfind('.');
+  // Note: .config file should not be path extension.
+  if (last_dot == absl::string_view::npos || last_dot == 0U) {
+    return absl::ClippedSubstr(filename, filename.size(), 0);
+  }
+  return absl::ClippedSubstr(filename, last_dot + 1);
+}
+
+}  // namespace devtools_goma
diff --git a/lib/path_util.h b/lib/path_util.h
new file mode 100644
index 0000000..62e2aac
--- /dev/null
+++ b/lib/path_util.h
@@ -0,0 +1,26 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEVTOOLS_GOMA_LIB_PATH_UTIL_H_
+#define DEVTOOLS_GOMA_LIB_PATH_UTIL_H_
+
+
+#include "string_piece.h"
+
+namespace devtools_goma {
+
+bool IsPosixAbsolutePath(absl::string_view path);
+bool IsWindowsAbsolutePath(absl::string_view path);
+
+bool HasPrefixDir(absl::string_view path, absl::string_view prefix);
+bool HasPrefixDirWithSep(absl::string_view path, absl::string_view prefix,
+                         char pathsep);
+
+// Get file extension of the given |filename|.
+// This function think both '/' and '\\' as path separators.
+absl::string_view GetFileNameExtension(absl::string_view filename);
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_PATH_UTIL_H_
diff --git a/lib/path_util_unittest.cc b/lib/path_util_unittest.cc
new file mode 100644
index 0000000..47f5cc8
--- /dev/null
+++ b/lib/path_util_unittest.cc
@@ -0,0 +1,179 @@
+// Copyright 2016 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "path_util.h"
+
+#include <gtest/gtest.h>
+
+namespace devtools_goma {
+
+TEST(PathUtilTest, IsPosixAbsolutePath) {
+  EXPECT_TRUE(IsPosixAbsolutePath("/"));
+  EXPECT_TRUE(IsPosixAbsolutePath("/foo"));
+  EXPECT_TRUE(IsPosixAbsolutePath("/foo/bar"));
+  EXPECT_TRUE(IsPosixAbsolutePath("/../foo"));
+  EXPECT_TRUE(IsPosixAbsolutePath("/foo/../bar"));
+
+  EXPECT_FALSE(IsPosixAbsolutePath("."));
+  EXPECT_FALSE(IsPosixAbsolutePath(".."));
+  EXPECT_FALSE(IsPosixAbsolutePath("foo"));
+  EXPECT_FALSE(IsPosixAbsolutePath("foo/bar"));
+  EXPECT_FALSE(IsPosixAbsolutePath("../foo"));
+
+  EXPECT_FALSE(IsPosixAbsolutePath("c:\\Users\\foo"));
+  EXPECT_FALSE(IsPosixAbsolutePath("\\\\Host\\dir\\content"));
+}
+
+TEST(PathUtilTest, IsWindowsAbsolutePath) {
+  EXPECT_TRUE(IsWindowsAbsolutePath("c:\\"));
+  EXPECT_TRUE(IsWindowsAbsolutePath("C:\\"));
+  EXPECT_TRUE(IsWindowsAbsolutePath("c:/"));
+  EXPECT_TRUE(IsWindowsAbsolutePath("C:/"));
+  EXPECT_TRUE(IsWindowsAbsolutePath("c:\\Users\\foo"));
+  EXPECT_TRUE(IsWindowsAbsolutePath("c:/Users/foo"));
+  EXPECT_TRUE(IsWindowsAbsolutePath("c:\\Users/foo"));
+  EXPECT_TRUE(IsWindowsAbsolutePath("c:/Users\\foo"));
+
+  EXPECT_TRUE(IsWindowsAbsolutePath("\\\\Host\\"));
+  EXPECT_TRUE(IsWindowsAbsolutePath("\\\\Host\\dir"));
+  EXPECT_TRUE(IsWindowsAbsolutePath("\\\\Host\\dir\\content"));
+
+  EXPECT_FALSE(IsWindowsAbsolutePath("/"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("/foo"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("/foo/bar"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("/../foo"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("/foo/../bar"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("\\"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("\\foo"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("\\foo\\bar"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("\\..\\foo"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("\\foo\\..\\bar"));
+
+  EXPECT_FALSE(IsWindowsAbsolutePath("."));
+  EXPECT_FALSE(IsWindowsAbsolutePath(".."));
+  EXPECT_FALSE(IsWindowsAbsolutePath("foo"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("foo/bar"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("../foo"));
+
+  // TODO: check wheather followings is allowed or not.
+  EXPECT_FALSE(IsWindowsAbsolutePath("c:"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("\\\\host"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("\\\\Host\\dir/content"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("\\\\Host/dir\\content"));
+  EXPECT_FALSE(IsWindowsAbsolutePath("\\\\Host/dir/content"));
+}
+
+TEST(PathUtilTest, HasPrefixDirWithSep) {
+  EXPECT_TRUE(HasPrefixDirWithSep("/home/foo/bar", "/home/foo", '/'));
+  EXPECT_TRUE(HasPrefixDirWithSep("/home/foo", "/home/foo", '/'));
+  EXPECT_TRUE(HasPrefixDirWithSep("/home/foo/", "/home/foo", '/'));
+
+  EXPECT_FALSE(HasPrefixDirWithSep("/foo", "/baz", '/'));
+  EXPECT_FALSE(HasPrefixDirWithSep("/foo/bar", "/bar", '/'));
+  EXPECT_FALSE(HasPrefixDirWithSep("/foo", "/bar/baz", '/'));
+  EXPECT_FALSE(HasPrefixDirWithSep("/foo", "/foo/bar", '/'));
+  EXPECT_FALSE(HasPrefixDirWithSep("/home/foobar", "/home/foo", '/'));
+
+  EXPECT_TRUE(HasPrefixDirWithSep("home/foo", "home/foo", '/'));
+  EXPECT_TRUE(HasPrefixDirWithSep("home/foo/bar", "home/foo", '/'));
+
+  EXPECT_TRUE(HasPrefixDirWithSep("../home/foo", "../home/foo", '/'));
+  EXPECT_TRUE(HasPrefixDirWithSep("../home/foo/bar", "../home/foo", '/'));
+
+  EXPECT_TRUE(HasPrefixDirWithSep("c:\\home\\foo\\bar", "c:\\home\\foo", '\\'));
+  EXPECT_TRUE(HasPrefixDirWithSep("c:\\home\\foo", "c:\\home\\foo", '\\'));
+  EXPECT_TRUE(HasPrefixDirWithSep("c:\\home\\foo\\", "c:\\home\\foo", '\\'));
+
+  EXPECT_FALSE(HasPrefixDirWithSep("c:\\foo", "c:\\baz", '\\'));
+  EXPECT_FALSE(HasPrefixDirWithSep("c:\\foo\\bar", "c:\\bar", '\\'));
+  EXPECT_FALSE(HasPrefixDirWithSep("c:\\foo", "c:\\bar\\baz", '\\'));
+  EXPECT_FALSE(HasPrefixDirWithSep("c:\\foo", "c:\\foo\\bar", '\\'));
+  EXPECT_FALSE(HasPrefixDirWithSep("c:\\home\\foobar", "c:\\home\\foo", '\\'));
+
+  EXPECT_TRUE(HasPrefixDirWithSep("home\\foo", "home\\foo", '\\'));
+  EXPECT_TRUE(HasPrefixDirWithSep("home\\foo\\bar", "home\\foo", '\\'));
+
+  EXPECT_TRUE(HasPrefixDirWithSep("..\\home\\foo", "..\\home\\foo", '\\'));
+  EXPECT_TRUE(HasPrefixDirWithSep("..\\home\\foo\\bar", "..\\home\\foo", '\\'));
+}
+
+TEST(PathUtilTest, HasPrefixDir) {
+  EXPECT_TRUE(HasPrefixDir("/home/foo/bar", "/home/foo"));
+  EXPECT_TRUE(HasPrefixDir("/home/foo", "/home/foo"));
+  EXPECT_TRUE(HasPrefixDir("/home/foo/", "/home/foo"));
+
+  EXPECT_FALSE(HasPrefixDir("/foo", "/baz"));
+  EXPECT_FALSE(HasPrefixDir("/foo/bar", "/bar"));
+  EXPECT_FALSE(HasPrefixDir("/foo", "/bar/baz"));
+  EXPECT_FALSE(HasPrefixDir("/foo", "/foo/bar"));
+  EXPECT_FALSE(HasPrefixDir("/home/foobar", "/home/foo"));
+
+  EXPECT_TRUE(HasPrefixDir("home/foo", "home/foo"));
+  EXPECT_TRUE(HasPrefixDir("home/foo/bar", "home/foo"));
+
+  EXPECT_TRUE(HasPrefixDir("../home/foo", "../home/foo"));
+  EXPECT_TRUE(HasPrefixDir("../home/foo/bar", "../home/foo"));
+
+#ifdef _WIN32
+  EXPECT_TRUE(HasPrefixDir("c:\\home\\foo\\bar", "c:\\home\\foo"));
+  EXPECT_TRUE(HasPrefixDir("c:\\home\\foo", "c:\\home\\foo"));
+  EXPECT_TRUE(HasPrefixDir("c:\\home\\foo\\", "c:\\home\\foo"));
+
+  EXPECT_FALSE(HasPrefixDir("c:\\foo", "c:\\baz"));
+  EXPECT_FALSE(HasPrefixDir("c:\\foo\\bar", "c:\\bar"));
+  EXPECT_FALSE(HasPrefixDir("c:\\foo", "c:\\bar\\baz"));
+  EXPECT_FALSE(HasPrefixDir("c:\\foo", "c:\\foo\\bar"));
+  EXPECT_FALSE(HasPrefixDir("c:\\home\\foobar", "c:\\home\\foo"));
+
+  EXPECT_TRUE(HasPrefixDir("home\\foo", "home\\foo"));
+  EXPECT_TRUE(HasPrefixDir("home\\foo\\bar", "home\\foo"));
+
+  EXPECT_TRUE(HasPrefixDir("..\\home\\foo", "..\\home\\foo"));
+  EXPECT_TRUE(HasPrefixDir("..\\home\\foo\\bar", "..\\home\\foo"));
+
+  EXPECT_TRUE(HasPrefixDir("c:/home/foo/bar", "c:/home/foo"));
+  EXPECT_TRUE(HasPrefixDir("c:/home/foo", "c:/home/foo"));
+  EXPECT_TRUE(HasPrefixDir("c:/home/foo/", "c:/home/foo"));
+
+  EXPECT_FALSE(HasPrefixDir("c:/foo", "c:/baz"));
+  EXPECT_FALSE(HasPrefixDir("c:/foo/bar", "c:/bar"));
+  EXPECT_FALSE(HasPrefixDir("c:/foo", "c:/bar/baz"));
+  EXPECT_FALSE(HasPrefixDir("c:/foo", "c:/foo/bar"));
+  EXPECT_FALSE(HasPrefixDir("c:/home/foobar", "c:/home/foo"));
+#endif
+}
+
+TEST(PathUtilTest, GetFileNameExtension) {
+  EXPECT_EQ("txt", GetFileNameExtension("a.txt"));
+  EXPECT_EQ("",    GetFileNameExtension("a."));
+  EXPECT_EQ("",    GetFileNameExtension(""));
+  EXPECT_EQ("",    GetFileNameExtension("/"));
+  EXPECT_EQ("",    GetFileNameExtension("a"));
+  EXPECT_EQ("",    GetFileNameExtension("a/"));
+  EXPECT_EQ("txt", GetFileNameExtension("/a/b/c.txt"));
+  EXPECT_EQ("cc",  GetFileNameExtension("/a/b.c/d/e.cc"));
+  EXPECT_EQ("",    GetFileNameExtension("/a/b.c/d/e"));
+  EXPECT_EQ("g",   GetFileNameExtension("/a/b.c/d/e.f.g"));
+
+  EXPECT_EQ("",    GetFileNameExtension("a:\\"));
+  EXPECT_EQ("",    GetFileNameExtension("a:\\b\\"));
+  EXPECT_EQ("txt", GetFileNameExtension("a:\\b\\c.txt"));
+  EXPECT_EQ("cc",  GetFileNameExtension("a:\\b.c\\d\\e.cc"));
+  EXPECT_EQ("",    GetFileNameExtension("a:\\b.c\\d\\e"));
+  EXPECT_EQ("g",   GetFileNameExtension("a:\\b.c\\d\\e.f.g"));
+
+  EXPECT_EQ("",    GetFileNameExtension("a:/"));
+  EXPECT_EQ("",    GetFileNameExtension("a:/b/"));
+  EXPECT_EQ("txt", GetFileNameExtension("a:/b/c.txt"));
+  EXPECT_EQ("cc",  GetFileNameExtension("a:/b.c/d/e.cc"));
+  EXPECT_EQ("",    GetFileNameExtension("a:/b.c/d/e"));
+  EXPECT_EQ("g",   GetFileNameExtension("a:/b.c/d/e.f.g"));
+
+  EXPECT_EQ("",   GetFileNameExtension(".cshrc"));
+  EXPECT_EQ("",   GetFileNameExtension("/home/user/.cshrc"));
+  EXPECT_EQ("",   GetFileNameExtension("c:\\.netrc"));
+}
+
+}  // namespace devtools_goma
diff --git a/lib/scoped_fd.cc b/lib/scoped_fd.cc
new file mode 100644
index 0000000..b73cd43
--- /dev/null
+++ b/lib/scoped_fd.cc
@@ -0,0 +1,533 @@
+// Copyright 2011 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+
+#include "scoped_fd.h"
+
+#include "compiler_specific.h"
+#include "glog/logging.h"
+#include "string_piece.h"
+
+#ifdef _WIN32
+#include "path_resolver.h"
+#include "socket_helper_win.h"
+#endif
+
+#ifndef _WIN32
+# include <errno.h>
+# include <fcntl.h>
+# include <poll.h>
+# include <sys/socket.h>
+# include <sys/stat.h>
+# include <sys/types.h>
+# include <unistd.h>
+# include "fileflag.h"
+#endif
+
+namespace devtools_goma {
+
+#ifndef _WIN32
+static ScopedFd::FileDescriptor kInvalidFd = -1;
+#else
+static ScopedFd::FileDescriptor kInvalidFd = INVALID_HANDLE_VALUE;
+#endif
+
+ScopedFd::ScopedFd()
+    : fd_(kInvalidFd) {
+}
+
+ScopedFd::ScopedFd(FileDescriptor fd)
+    : fd_(fd) {
+  if (valid())
+    SetCloseOnExec();
+}
+
+ScopedFd::~ScopedFd() {
+  Close();
+}
+
+/* static */
+ScopedFd::FileDescriptor ScopedFd::OpenForRead(const string& filename) {
+#ifndef _WIN32
+  return open(filename.c_str(), O_RDONLY);
+#else
+  // On Windows, the length of path is 256. When compiling NaCl untrusted code,
+  // the length of path often exceeds 256. Usually it contains '..', so let's
+  // clean it.
+  const string& resolved = PathResolver::ResolvePath(filename);
+  return CreateFileA(resolved.c_str(), GENERIC_READ,
+                     FILE_SHARE_READ,
+                     nullptr,
+                     OPEN_EXISTING,
+                     FILE_ATTRIBUTE_NORMAL,
+                     nullptr);
+#endif
+}
+
+/* static */
+ScopedFd::FileDescriptor ScopedFd::OpenForStat(const string& filename) {
+#ifndef _WIN32
+  return OpenForRead(filename);
+#else
+  // On Windows, the length of path is 256. When compiling NaCl untrusted code,
+  // the length of path often exceeds 256. Usually it contains '..', so let's
+  // clean it.
+  const string& resolved = PathResolver::ResolvePath(filename);
+  return CreateFileA(resolved.c_str(), 0,
+                     FILE_SHARE_READ,
+                     nullptr,
+                     OPEN_EXISTING,
+                     // Specify to get info from directory.
+                     FILE_FLAG_BACKUP_SEMANTICS,
+                     nullptr);
+#endif
+}
+
+/* static */
+ScopedFd::FileDescriptor ScopedFd::OpenForAppend(
+    const string& filename, int mode) {
+#ifndef _WIN32
+  return open(filename.c_str(), O_WRONLY | O_CREAT | O_APPEND, mode);
+#else
+  UNREFERENCED_PARAMETER(mode);
+  // TODO: translate mode to file attribute.
+  const string& resolved = PathResolver::ResolvePath(filename);
+  HANDLE h = CreateFileA(resolved.c_str(),
+                         FILE_APPEND_DATA,
+                         FILE_SHARE_WRITE,
+                         nullptr,
+                         CREATE_NEW,
+                         FILE_ATTRIBUTE_NORMAL,
+                         nullptr);
+  if (h == INVALID_HANDLE_VALUE) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "OpenForAppend failed: filename=" << filename;
+  }
+  return h;
+#endif
+}
+
+/* static */
+ScopedFd::FileDescriptor ScopedFd::OpenForRewrite(const string& filename) {
+#ifndef _WIN32
+  return open(filename.c_str(), O_RDWR);
+#else
+  const string& resolved = PathResolver::ResolvePath(filename);
+  HANDLE h = CreateFileA(resolved.c_str(),
+                         GENERIC_READ | GENERIC_WRITE,
+                         0,
+                         nullptr,
+                         OPEN_EXISTING,
+                         FILE_ATTRIBUTE_NORMAL,
+                         nullptr);
+  if (h == INVALID_HANDLE_VALUE) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "OpenForRewrite failed: filename=" << filename;
+  }
+
+  return h;
+#endif
+}
+
+ScopedFd::FileDescriptor ScopedFd::Create(
+    const string& filename, int mode) {
+#ifndef _WIN32
+  return open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, mode);
+#else
+  UNREFERENCED_PARAMETER(mode);
+  // TODO: translate mode to file attribute.
+  const string& resolved = PathResolver::ResolvePath(filename);
+  HANDLE h = CreateFileA(resolved.c_str(),
+                         GENERIC_WRITE,
+                         FILE_SHARE_WRITE,
+                         nullptr,
+                         CREATE_ALWAYS,
+                         FILE_ATTRIBUTE_NORMAL,
+                         nullptr);
+  if (h == INVALID_HANDLE_VALUE) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "Create failed: filename=" << filename;
+  }
+  return h;
+#endif
+}
+
+ScopedFd::FileDescriptor ScopedFd::CreateExclusive(
+    const string& filename, int mode) {
+#ifndef _WIN32
+  return open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, mode);
+#else
+  UNREFERENCED_PARAMETER(mode);
+  // TODO: translate mode to file attribute.
+  // If the file exists, CreateFile with dwCreationDisposition == CREATE_NEW
+  // will fail.
+  // See: http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
+  const string& resolved = PathResolver::ResolvePath(filename);
+  HANDLE h = CreateFileA(resolved.c_str(),
+                         GENERIC_WRITE,
+                         0,
+                         nullptr,
+                         CREATE_NEW,
+                         FILE_ATTRIBUTE_NORMAL,
+                         nullptr);
+  if (h == INVALID_HANDLE_VALUE) {
+    LOG_SYSRESULT(GetLastError());
+    LOG(ERROR) << "CreateExclusive failed: filename=" << filename;
+  }
+  return h;
+#endif
+}
+
+ScopedFd::FileDescriptor ScopedFd::OpenNull() {
+#ifndef _WIN32
+  return open("/dev/null", O_RDWR, 0600);
+#else
+  // To allow child process to continue using NUL, bInheritHandle should be set.
+  SECURITY_ATTRIBUTES secattr;
+  secattr.nLength = sizeof(secattr);
+  secattr.lpSecurityDescriptor = nullptr;
+  secattr.bInheritHandle = TRUE;
+  // NUL is something like Unix /dev/null on Windows.
+  // http://stackoverflow.com/questions/438092/how-to-open-a-nul-file
+  // http://blogs.msdn.com/b/oldnewthing/archive/2003/10/22/55388.aspx
+  return CreateFileA("NUL", GENERIC_WRITE, 0, &secattr, OPEN_EXISTING, 0,
+                     nullptr);
+#endif
+}
+
+bool ScopedFd::valid() const {
+#ifndef _WIN32
+  return fd_ >= 0;
+#else
+  if (fd_ == nullptr || fd_ == kInvalidFd)
+    return false;
+  return true;
+#endif
+}
+
+void ScopedFd::SetCloseOnExec() const {
+#ifndef _WIN32
+  SetFileDescriptorFlag(fd_, FD_CLOEXEC);
+#endif
+}
+
+ssize_t ScopedFd::Read(void* ptr, size_t len) const {
+#ifndef _WIN32
+  ssize_t r = 0;
+  while ((r = read(fd_, ptr, len)) < 0) {
+    if (errno != EINTR) break;
+  }
+  return r;
+#else
+  DWORD bytes_read = 0;
+  if (!ReadFile(fd_, ptr, len, &bytes_read, nullptr)) {
+    LOG_SYSRESULT(GetLastError());
+    return -1;
+  }
+  return bytes_read;
+#endif
+}
+
+ssize_t ScopedFd::Write(const void* ptr, size_t len) const {
+#ifndef _WIN32
+  ssize_t r = 0;
+  while ((r = write(fd_, ptr, len)) < 0) {
+    if (errno != EINTR) break;
+  }
+  return r;
+#else
+  DWORD bytes_written = 0;
+  if (!WriteFile(fd_, ptr, len, &bytes_written, nullptr)) {
+    LOG_SYSRESULT(GetLastError());
+    return -1;
+  }
+  return bytes_written;
+#endif
+}
+
+off_t ScopedFd::Seek(off_t offset, Whence whence) const {
+#ifndef _WIN32
+  return lseek(fd_, offset, whence);
+#else
+  // TODO: use lpDistanceToMoveHigh for high order 32bits of 64bits?
+  DWORD r = SetFilePointer(fd_, offset, nullptr, whence);
+  if (r == INVALID_SET_FILE_POINTER) {
+    DWORD err = GetLastError();
+    if (err != NO_ERROR) {
+      LOG_SYSRESULT(err);
+      return static_cast<off_t>(-1);
+    }
+    // maybe, seek success.
+  }
+  return r;
+#endif
+}
+
+bool ScopedFd::GetFileSize(size_t* file_size) const {
+  *file_size = 0;
+#ifndef _WIN32
+  struct stat st;
+  if (fstat(fd_, &st) != 0)
+    return false;
+  *file_size = st.st_size;
+  return true;
+#else
+  DWORD size = ::GetFileSize(fd_, nullptr);
+  if (size == INVALID_FILE_SIZE) {
+    LOG_SYSRESULT(GetLastError());
+    return false;
+  }
+  *file_size = size;
+  return true;
+#endif
+}
+
+void ScopedFd::reset(ScopedFd::FileDescriptor fd) {
+  Close();
+  fd_ = fd;
+#ifndef _WIN32
+  if (fd >= 0) {
+    SetCloseOnExec();
+  }
+#endif
+}
+
+ScopedFd::FileDescriptor ScopedFd::release() {
+  FileDescriptor fd = fd_;
+  fd_ = kInvalidFd;
+  return fd;
+}
+
+bool ScopedFd::Close() {
+  if (valid()) {
+#ifndef _WIN32
+    return close(release()) == 0;
+#else
+    return CloseHandle(release()) == TRUE;
+#endif
+  }
+  return true;
+}
+
+ScopedSocket::~ScopedSocket() {
+  Close();
+}
+
+bool ScopedSocket::SetCloseOnExec() const {
+#ifndef _WIN32
+  return SetFileDescriptorFlag(fd_, FD_CLOEXEC) == 0;
+#else
+  return true;
+#endif
+}
+
+bool ScopedSocket::SetNonBlocking() const {
+#ifndef _WIN32
+  return SetFileStatusFlag(fd_, O_NONBLOCK) == 0;
+#else
+  unsigned long non_blocking = 1;
+  return ioctlsocket(fd_, FIONBIO, &non_blocking) != SOCKET_ERROR;
+#endif
+}
+
+bool ScopedSocket::SetReuseAddr() const {
+  int yes = 1;
+#ifndef _WIN32
+  return setsockopt(fd_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) == 0;
+#else
+  return setsockopt(fd_, SOL_SOCKET, SO_REUSEADDR,
+                    (const char*)&yes, sizeof(yes)) == 0;
+#endif
+}
+
+void ScopedSocket::reset(int fd) {
+  Close();
+  fd_ = fd;
+}
+
+ssize_t ScopedSocket::Read(void* ptr, size_t len) const {
+#ifndef _WIN32
+  return read(fd_, ptr, len);
+#else
+  return recv(fd_, (char*)ptr, len, 0);
+#endif
+}
+
+ssize_t ScopedSocket::Write(const void* ptr, size_t len) const {
+#ifndef _WIN32
+  return write(fd_, ptr, len);
+#else
+  return send(fd_, (char*)ptr, len, 0);
+#endif
+}
+
+bool ScopedSocket::Close() {
+  if (valid()) {
+#ifndef _WIN32
+    return close(release()) == 0;
+#else
+    return closesocket(release()) == 0;
+#endif
+  }
+  return true;
+}
+
+// Read. Return < 0 on error.
+ssize_t ScopedSocket::ReadWithTimeout(char *buf, size_t bufsize,
+                                      int timeout_sec) const {
+  CHECK(buf);
+  CHECK(valid());
+  for (;;) {
+#ifdef _WIN32
+    // Since WSAPoll (Windows poll API) is broken, we should use select on Win.
+    // See: http://daniel.haxx.se/blog/2012/10/10/wsapoll-is-broken/
+    fd_set fdset;
+    FD_ZERO(&fdset);
+    MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+    FD_SET(fd_, &fdset);
+    MSVC_POP_WARNING();
+    TIMEVAL timeout;
+    timeout.tv_sec = timeout_sec;
+    timeout.tv_usec = 0;
+    // http://msdn.microsoft.com/en-us/library/windows/desktop/ms740141(v=vs.85).aspx
+    int r = select(fd_ + 1, &fdset, nullptr, nullptr, &timeout);
+    if (r == SOCKET_ERROR) {
+      PLOG(ERROR) << "GOMA: read select error";
+      return FAIL;
+    }
+    if (r == 0) {
+      LOG(WARNING) << "GOMA: read select timeout (" << timeout_sec << "sec)";
+      return ERR_TIMEOUT;
+    }
+    CHECK(FD_ISSET(fd_, &fdset))
+        << "GOMA: read select returned but read not ready."
+        << " fd_=" << fd_;
+#else
+    struct pollfd pfd;
+    pfd.fd = fd_;
+    pfd.events = POLLIN;
+    int r;
+    while ((r = poll(&pfd, 1, timeout_sec * 1000)) == -1) {
+      if (errno != EINTR)
+        break;
+    }
+    if (r == -1) {
+      PLOG(ERROR) << "GOMA: read poll error";
+      return FAIL;
+    }
+    if (r == 0) {
+      LOG(WARNING) << "GOMA: read poll timeout (" << timeout_sec << "sec)";
+      return ERR_TIMEOUT;
+    }
+    CHECK(pfd.revents & POLLIN)
+        << "GOMA: read poll returned but read not ready."
+        << " fd_=" << fd_;
+#endif
+
+    ssize_t ret = Read(buf, bufsize);
+    if (ret == -1) {
+      if (errno == EAGAIN || errno == EINTR)
+        continue;
+      PLOG(ERROR) << "read";
+    }
+    return ret;
+  }
+}
+
+ssize_t ScopedSocket::WriteWithTimeout(const char* buf, size_t bufsize,
+                                       int timeout_sec) const {
+  CHECK(buf);
+  CHECK(valid());
+  for (;;) {
+#ifdef _WIN32
+    // Since WSAPoll (Windows poll API) is broken, we should use select on Win.
+    // See: http://daniel.haxx.se/blog/2012/10/10/wsapoll-is-broken/
+    fd_set fdset;
+    FD_ZERO(&fdset);
+    MSVC_PUSH_DISABLE_WARNING_FOR_FD_SET();
+    FD_SET(fd_, &fdset);
+    MSVC_POP_WARNING();
+    TIMEVAL timeout;
+    timeout.tv_sec = timeout_sec;
+    timeout.tv_usec = 0;
+    // http://msdn.microsoft.com/en-us/library/windows/desktop/ms740141(v=vs.85).aspx
+    int r = select(fd_ + 1, nullptr, &fdset, nullptr, &timeout);
+    if (r == SOCKET_ERROR) {
+      PLOG(ERROR) << "GOMA: write select error";
+      return FAIL;
+    }
+    if (r == 0) {
+      LOG(ERROR) << "GOMA: write select timeout (" << timeout_sec << "sec)";
+      return ERR_TIMEOUT;
+    }
+    CHECK(FD_ISSET(fd_, &fdset))
+        << "GOMA: write select returned but write not ready."
+        << " fd_=" << fd_;
+#else
+    struct pollfd pfd;
+    pfd.fd = fd_;
+    pfd.events = POLLOUT;
+    int r;
+    while ((r = poll(&pfd, 1, timeout_sec * 1000)) == -1) {
+      if (errno != EINTR)
+        break;
+    }
+    if (r == -1) {
+      PLOG(ERROR) << "GOMA: write poll error";
+      return FAIL;
+    }
+    if (r == 0) {
+      LOG(ERROR) << "GOMA: write poll timeout (" << timeout_sec << "sec)";
+      return ERR_TIMEOUT;
+    }
+    CHECK(pfd.revents & POLLOUT)
+        << "GOMA: write poll returned > 0 but write not ready."
+        << " fd_=" << fd_;
+#endif
+
+    ssize_t ret = Write(buf, bufsize);
+    if (ret == -1) {
+      if (errno == EAGAIN || errno == EINTR)
+        continue;
+      PLOG(ERROR) << "write";
+    }
+    return ret;
+  }
+}
+
+// Write string to socket. Return negative (Errno) on fail, OK on success.
+int ScopedSocket::WriteString(absl::string_view message,
+                              int timeout_sec) const {
+  const char *p = message.data();
+  int size = message.size();
+  while (size > 0) {
+    int ret = WriteWithTimeout(p, size, timeout_sec);
+    if (ret < 0) {
+      PLOG(ERROR) << "write failure: " << ret
+                  << " written=" << (message.size() - size) << " size=" << size
+                  << " out of " << message.size();
+      return ret;
+    }
+    p += ret;
+    size -= ret;
+  }
+  return OK;
+}
+
+string ScopedSocket::GetLastErrorMessage() const {
+  char message[1024];
+#ifndef _WIN32
+  // Meaning of returned value of strerror_r is different between
+  // XSI and GNU. Need to ignore.
+  (void)strerror_r(errno, message, sizeof(message));
+#else
+  FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr,
+                 WSAGetLastError(), 0,
+                 message, sizeof(message), nullptr);
+#endif
+  return message;
+}
+
+}  // namespace devtools_goma
diff --git a/lib/scoped_fd.h b/lib/scoped_fd.h
new file mode 100644
index 0000000..81c6a39
--- /dev/null
+++ b/lib/scoped_fd.h
@@ -0,0 +1,187 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#ifndef DEVTOOLS_GOMA_LIB_SCOPED_FD_H_
+#define DEVTOOLS_GOMA_LIB_SCOPED_FD_H_
+
+#ifdef _WIN32
+#pragma once
+#include "config_win.h"
+#else
+#include <unistd.h>
+#endif
+
+#include <ostream>
+#include <string>
+
+
+#include "basictypes.h"
+#include "string_piece.h"
+using std::string;
+
+namespace devtools_goma {
+
+// Note: the Win32 version, ScopeFd is used to host HANDLEs
+// TODO: POSIX version set fd to be closed upon exec
+class ScopedFd {
+ public:
+#ifdef _WIN32
+  typedef HANDLE FileDescriptor;
+  enum Whence {
+    SeekAbsolute = FILE_BEGIN,
+    SeekRelative = FILE_CURRENT
+  };
+#else
+  typedef int FileDescriptor;
+  enum Whence {
+    SeekAbsolute = SEEK_SET,
+    SeekRelative = SEEK_CUR
+  };
+#endif
+  ScopedFd();
+  explicit ScopedFd(FileDescriptor fd);
+  ScopedFd(ScopedFd&& other) : fd_(other.release()) {}
+  ~ScopedFd();
+
+  ScopedFd& operator=(ScopedFd&& other) {
+    if (this == &other) {
+      return *this;
+    }
+    reset(other.release());
+    return *this;
+  }
+
+  static FileDescriptor OpenForStat(const string& filename);
+  static FileDescriptor OpenForRead(const string& filename);
+  static FileDescriptor OpenForAppend(const string& filename, int mode);
+  static FileDescriptor OpenForRewrite(const string& filename);
+  static FileDescriptor Create(const string& filename, int mode);
+  static FileDescriptor CreateExclusive(const string& filename, int mode);
+  static FileDescriptor OpenNull();
+
+  bool valid() const;
+  void SetCloseOnExec() const;
+
+  ssize_t Read(void* ptr, size_t len) const;
+  ssize_t Write(const void* ptr, size_t len) const;
+  off_t Seek(off_t offset, Whence whence) const;
+  bool GetFileSize(size_t* file_size) const;
+
+  // Returns a pointer to the internal representation.
+  FileDescriptor* ptr() { return &fd_; }
+  FileDescriptor release();
+  void reset(FileDescriptor fd);
+
+  // Returns true on success or already closed.
+  bool Close();
+
+#ifndef _WIN32
+  int fd() const { return fd_; }
+#else
+  HANDLE handle() const { return fd_; }
+#endif
+
+  friend std::ostream& operator<<(std::ostream& os, const ScopedFd& fd) {
+#ifdef _WIN32
+    return os << fd.handle();
+#else
+    return os << fd.fd();
+#endif
+  }
+
+ private:
+  FileDescriptor fd_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedFd);
+};
+
+enum Errno {
+  OK = 0,
+  FAIL = -1,
+  ERR_TIMEOUT = -2,
+};
+
+class IOChannel {
+ public:
+  virtual ~IOChannel() {}
+
+  virtual ssize_t Read(void* ptr, size_t len) const = 0;
+  virtual ssize_t Write(const void* ptr, size_t len) const = 0;
+  virtual ssize_t ReadWithTimeout(char *buf,
+                                  size_t bufsize,
+                                  int timeout_sec) const = 0;
+  virtual ssize_t WriteWithTimeout(const char* buf,
+                                   size_t bufsize,
+                                   int timeout_sec) const = 0;
+  // Write string to socket. Return negative on fail (Errno). OK on success.
+  virtual int WriteString(absl::string_view message, int timeout) const = 0;
+
+  // Returns the last error message. Valid when called just after
+  // Write(), Read(), etc.
+  virtual string GetLastErrorMessage() const = 0;
+
+  virtual bool is_secure() const { return false; }
+
+  virtual void StreamWrite(std::ostream& os) const = 0;
+
+  friend std::ostream& operator<<(std::ostream& os, const IOChannel& chan) {
+    chan.StreamWrite(os);
+    return os;
+  }
+};
+
+class ScopedSocket : public IOChannel {
+ public:
+  ScopedSocket() : fd_(-1) {}
+  explicit ScopedSocket(int fd) : fd_(fd) {}
+  ScopedSocket(ScopedSocket&& other) : fd_(other.release()) {}
+  ~ScopedSocket() override;
+
+  ScopedSocket& operator=(ScopedSocket&& other) {
+    if (this == &other) {
+      return *this;
+    }
+    reset(other.release());
+    return *this;
+  }
+
+  ssize_t Read(void* ptr, size_t len) const override;
+  ssize_t Write(const void* ptr, size_t len) const override;
+  ssize_t ReadWithTimeout(char *buf,
+                          size_t bufsize,
+                          int timeout_sec) const override;
+  ssize_t WriteWithTimeout(const char* buf,
+                           size_t bufsize,
+                           int timeout_sec) const override;
+  int WriteString(absl::string_view message, int timeout) const override;
+
+  // Returns the last error message. Valid when called just after
+  // Write(), Read(), etc.
+  string GetLastErrorMessage() const override;
+
+  bool SetCloseOnExec() const;
+  bool SetNonBlocking() const;
+  bool SetReuseAddr() const;
+
+  bool valid() const { return fd_ >= 0; }
+  int get() const { return fd_; }
+  int release() { int fd = fd_; fd_ = -1; return fd; }
+  void reset(int fd);
+  // Returns true on success or already closed.
+  bool Close();
+  explicit operator int() const { return fd_; }
+  void StreamWrite(std::ostream& os) const override {
+    os << fd_;
+  }
+
+ private:
+  int fd_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedSocket);
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_LIB_SCOPED_FD_H_
diff --git a/lib/unordered.h b/lib/unordered.h
new file mode 100644
index 0000000..b3d1a59
--- /dev/null
+++ b/lib/unordered.h
@@ -0,0 +1,17 @@
+// Copyright 2010 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//
+// Include this file to use unordered_map and unordered_set.
+
+#ifndef DEVTOOLS_GOMA_LIB_UNORDERED_H_
+#define DEVTOOLS_GOMA_LIB_UNORDERED_H_
+
+
+#include <unordered_map>
+#include <unordered_set>
+using std::unordered_map;
+using std::unordered_set;
+
+#endif  // DEVTOOLS_GOMA_LIB_UNORDERED_H_
diff --git a/test/Basic.jar b/test/Basic.jar
new file mode 100644
index 0000000..1115709
--- /dev/null
+++ b/test/Basic.jar
Binary files differ
diff --git a/test/Basic_expected.jar b/test/Basic_expected.jar
new file mode 100644
index 0000000..1df2b36
--- /dev/null
+++ b/test/Basic_expected.jar
Binary files differ
diff --git a/test/Broken.jar b/test/Broken.jar
new file mode 100644
index 0000000..de1d638
--- /dev/null
+++ b/test/Broken.jar
Binary files differ
diff --git a/test/MacDirtyRanlib.a b/test/MacDirtyRanlib.a
new file mode 100644
index 0000000..493f7c3
--- /dev/null
+++ b/test/MacDirtyRanlib.a
Binary files differ
diff --git a/test/NotThinArchive.a b/test/NotThinArchive.a
new file mode 100644
index 0000000..c8075a8
--- /dev/null
+++ b/test/NotThinArchive.a
Binary files differ
diff --git a/test/NotThinArchiveLongName.a b/test/NotThinArchiveLongName.a
new file mode 100644
index 0000000..24bb790
--- /dev/null
+++ b/test/NotThinArchiveLongName.a
Binary files differ
diff --git a/test/README b/test/README
new file mode 100644
index 0000000..da84be8
--- /dev/null
+++ b/test/README
@@ -0,0 +1,5 @@
+* libc.so: a random ld script file from fedora core 16, for elf_parser_unittest
+* libdl.so: a random so file from gPrecise, for elf_parser_unittest
+
+TODO: Basic.jar NotThinArchive.a NotThinArchiveLongName.a ReadManifest.jar
+      ThinArchive.a ThinArchiveLongName.a
diff --git a/test/ReadManifest.jar b/test/ReadManifest.jar
new file mode 100644
index 0000000..c2366e7
--- /dev/null
+++ b/test/ReadManifest.jar
Binary files differ
diff --git a/test/ThinArchive.a b/test/ThinArchive.a
new file mode 100644
index 0000000..6bc7a3e
--- /dev/null
+++ b/test/ThinArchive.a
Binary files differ
diff --git a/test/ThinArchiveLongName.a b/test/ThinArchiveLongName.a
new file mode 100644
index 0000000..f1450f5
--- /dev/null
+++ b/test/ThinArchiveLongName.a
Binary files differ
diff --git a/test/another_cert.pem b/test/another_cert.pem
new file mode 100644
index 0000000..35f31aa
--- /dev/null
+++ b/test/another_cert.pem
@@ -0,0 +1,10 @@
+-----BEGIN CERTIFICATE-----
+MIIBVzCB/wIJAKDzQCHhwxn4MAkGByqGSM49BAEwNDEPMA0GA1UEChMGR29vZ2xl
+MSEwHwYDVQQLExhmb3IgdGVzdGluZyBwdXJwb3NlIG9ubHkwIBcNMTQwMzI2MDMz
+MTM2WhgPMjExNDAzMDIwMzMxMzZaMDQxDzANBgNVBAoTBkdvb2dsZTEhMB8GA1UE
+CxMYZm9yIHRlc3RpbmcgcHVycG9zZSBvbmx5MFkwEwYHKoZIzj0CAQYIKoZIzj0D
+AQcDQgAEG3FR/eugcxlUtCaLHlDziYDudikZG3Gnv24yXeJPDjskgC9la919Ll9M
+mH+vGD4ZMkAxar9t7OqRoWezpOKoijAJBgcqhkjOPQQBA0gAMEUCIAarFTsj9oe5
+uEE6dqiwAfEpjEZAdwoyp4fCHJUygjBKAiEA7hCodOPlP7OjOOBUeQxcMA1ED7TT
+pcxV5wunGOvajio=
+-----END CERTIFICATE-----
diff --git a/test/another_key.pem b/test/another_key.pem
new file mode 100644
index 0000000..0348018
--- /dev/null
+++ b/test/another_key.pem
@@ -0,0 +1,5 @@
+-----BEGIN PRIVATE KEY-----
+MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgBlGQTvnd8kzO7T+T
+D6pcPHI/4t95TB4n0ntWQzVL2zShRANCAAQbcVH966BzGVS0JoseUPOJgO52KRkb
+cae/bjJd4k8OOySAL2Vr3X0uX0yYf68YPhkyQDFqv23s6pGhZ7Ok4qiK
+-----END PRIVATE KEY-----
diff --git a/test/asm.jar b/test/asm.jar
new file mode 100644
index 0000000..aea1181
--- /dev/null
+++ b/test/asm.jar
Binary files differ
diff --git a/test/badreq.bin b/test/badreq.bin
new file mode 100644
index 0000000..b0bf0a8
--- /dev/null
+++ b/test/badreq.bin
@@ -0,0 +1,3 @@
+
+$
+gccx86_64-linux-gnu2/usr/bin/id/usr/bin/gcc"/tmp
\ No newline at end of file
diff --git a/test/badreq.txt b/test/badreq.txt
new file mode 100644
index 0000000..a700ce1
--- /dev/null
+++ b/test/badreq.txt
@@ -0,0 +1,16 @@
+# Copyright 2016 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# badreq.bin is generated by
+# % printproto --multiline --raw_protocol_buffer --reverse \
+#   --message devtools_goma.ExecReq --output_file badreq.bin badreq.txt
+# TODO: generate binary in build rule
+command_spec <
+ name: "gcc"
+ target: "x86_64-linux-gnu"
+ local_compiler_path: "/usr/bin/id"
+>
+arg: "/usr/bin/gcc"
+cwd: "/tmp"
+
diff --git a/test/cert.pem b/test/cert.pem
new file mode 100644
index 0000000..52bf381
--- /dev/null
+++ b/test/cert.pem
@@ -0,0 +1,12 @@
+-----BEGIN CERTIFICATE-----
+MIIBrzCCAVSgAwIBAgIJALx3HPB/10zyMAoGCCqGSM49BAMCME0xDzANBgNVBAoT
+Bkdvb2dsZTEhMB8GA1UECxMYZm9yIHRlc3RpbmcgcHVycG9zZSBvbmx5MRcwFQYD
+VQQDEw53d3cuZ29vZ2xlLmNvbTAgFw0xNjAyMDEwMjQ5MTVaGA8yMTE2MDEwODAy
+NDkxNVowTTEPMA0GA1UEChMGR29vZ2xlMSEwHwYDVQQLExhmb3IgdGVzdGluZyBw
+dXJwb3NlIG9ubHkxFzAVBgNVBAMTDnd3dy5nb29nbGUuY29tMFkwEwYHKoZIzj0C
+AQYIKoZIzj0DAQcDQgAEZNrW/rQHkvEo/YPEmIJL3mRwtxDMvvoNEYrx+MYvsguL
+UwtP2cnZjO2jHlB61Bfi2ag5XkRnXJ0wE4EztThyxqMbMBkwFwYDVR0RBBAwDoIM
+Ki5nb29nbGUuY29tMAoGCCqGSM49BAMCA0kAMEYCIQCst1d8TK3cuTX0HqRVLpGe
+d3O20J25DeORFLj6/+QkLAIhANCKYYq68dnJUc7xfyG7UuA4bJLuhQ9D+IzFAaPh
+T6D+
+-----END CERTIFICATE-----
diff --git a/test/cert_127.0.0.1.pem b/test/cert_127.0.0.1.pem
new file mode 100644
index 0000000..9842d78
--- /dev/null
+++ b/test/cert_127.0.0.1.pem
@@ -0,0 +1,19 @@
+-----BEGIN CERTIFICATE-----
+MIIDDDCCAfSgAwIBAgIRAIMRPNwj5BxS3cdzJ+lXZb4wDQYJKoZIhvcNAQELBQAw
+EjEQMA4GA1UEChMHQWNtZSBDbzAgFw0xNjA4MDkwNDQ4MjRaGA8yMTE2MDcxNjA0
+NDgyNFowEjEQMA4GA1UEChMHQWNtZSBDbzCCASIwDQYJKoZIhvcNAQEBBQADggEP
+ADCCAQoCggEBALyNtRym/27c9UpRXDjMOl9VBmkzksvUGjpG74O1SMDmSuQCHvLL
+IqrUS0efjuoZpnQgmTTZJRxpgckFUP3Oals4DegYdBIn/XbgMocGV52Ob706cvOJ
+zXeuEZF522OXHCyggDyQNVA9s5+SV6EfCkXFtCqqyhaSW+Ug8mKkovB3kbgDmip1
+WejAvgsKJWfZHURn+cngyX3nw1Cft8yunqCyXpDey2Le11Uc/bpO2DduedVKCNVn
+Gg2xf5g7WtX7Geek6hMINfSWjbCnmcWo6axf+Cz1OLfyWMBqi9TK0zpu3+HXR/E8
++i/xd7ArBSIvqfdpYIxt4JU0FR81n25+HG8CAwEAAaNbMFkwDgYDVR0PAQH/BAQD
+AgKkMBMGA1UdJQQMMAoGCCsGAQUFBwMBMA8GA1UdEwEB/wQFMAMBAf8wIQYDVR0R
+BBowGIcEfwAAAYcQAAAAAAAAAAAAAAAAAAAAATANBgkqhkiG9w0BAQsFAAOCAQEA
+eQErW5QS9QnUd+zs9CtgeViaRtEc9W42xLQu1/6RNR8EX2YcfzQtcdBXS/W+tA4y
+JZpnLXvPNgbvuDKJq7DQ/iIYounI1jluAAFXqskZsR3W3zuUT0uL/XhAQmlPH6cW
+66n2vCMDos6gxJLy60IHcflh91gr6PL2ATomy8yEYKtybht+PnrWIqIZICvrk5DT
+P0n/b+gQYLjeoTsLmCpbB7i/TAhIasb+Uj0JCFRKbJ6w88z5cmKh5xfHlEOesNzg
+159z9+35V6VO4LhF9ReC1YrwTSIwuixCiBytO+XnyGFLzC0O8R69iClQvBRgw1QI
+OjVB+q/tnsmzoJo0Kq8CYA==
+-----END CERTIFICATE-----
diff --git a/test/clang b/test/clang
new file mode 100755
index 0000000..2dd6fce
--- /dev/null
+++ b/test/clang
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+# Dummy clang program for testing compiler_info behavior.
+# This is implemented to check misuse of real compiler.
+# (b/63874437)
+
+import os
+import re
+import sys
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+FEATURE_CHECK_PATTERN = re.compile(r'#(\d+)')
+
+def DumpVersion():
+  print '5.0.0'
+
+def Version():
+  print """Goma clang version 5.0.0 (based on LLVM 5.0.0)
+Target: x86_64-pc-linux-gnu
+Thread model: posix
+"""
+
+def IncludeFiles():
+  print """Goma fake clang version 5.0.0
+Target: x86_64-linux-gnu
+Thread model: posix
+InstalledDir: %(installed)s
+ "%(installed)s/clang.dontexec" -cc1 "-triple x86_64-linux-gnu" -o - -x c
+#include "..." search starts here:
+#include <...> search starts here:
+%(installed)s
+End of search list.
+""" % {'installed': BASE_DIR}
+
+def DumpMachine():
+  print 'x86_64-pc-linux-gnu'
+
+def PredefinedMacros():
+  print '#define dummy'
+
+def HandleFeatures(argv):
+  with open(argv[len(argv) - 1]) as f:
+    prev_num = ''
+    for line in f.readlines():
+      matched = FEATURE_CHECK_PATTERN.match(line)
+      if matched:
+        if prev_num == matched.group(1):
+          continue
+        print '# %s' % matched.group(1)
+        print '0'
+        prev_num = matched.group(1)
+
+def GetSubprograms():
+  IncludeFiles()
+
+if '-E' in sys.argv and '-v' in sys.argv:
+  IncludeFiles()
+if '-E' in sys.argv and '-dM' in sys.argv:
+  PredefinedMacros()
+elif '-E' in sys.argv:
+  HandleFeatures(sys.argv)
+elif '-dumpmachine' in sys.argv:
+  DumpMachine()
+elif '-dumpversion' in sys.argv:
+  DumpVersion()
+elif '--version' in sys.argv:
+  Version()
+elif '-c' in sys.argv and '-v' in sys.argv:
+  GetSubprograms()
+else:
+  raise Exception('Unknown option %s' % sys.argv)
diff --git a/test/clang.bat b/test/clang.bat
new file mode 100644
index 0000000..9f52743
--- /dev/null
+++ b/test/clang.bat
@@ -0,0 +1,7 @@
+@echo off

+

+REM Copyright 2017 The Goma Authors. All rights reserved.

+REM Use of this source code is governed by a BSD-style license that can be

+REM found in the LICENSE file.

+

+python "%~dp0clang" %*

diff --git a/test/clang.dontexec b/test/clang.dontexec
new file mode 100644
index 0000000..55f49e7
--- /dev/null
+++ b/test/clang.dontexec
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+
+# Copyright 2017 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Checking misuse of real compiler path (see b/63874437)
+raise Exception('This program must not be executed directly')
diff --git a/test/common.h b/test/common.h
new file mode 100644
index 0000000..3e6eb67
--- /dev/null
+++ b/test/common.h
@@ -0,0 +1,6 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+// common test header
+
+#include <iostream>
diff --git a/test/compile_error.cc b/test/compile_error.cc
new file mode 100644
index 0000000..a2ed1c0
--- /dev/null
+++ b/test/compile_error.cc
@@ -0,0 +1,5 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+this file compiles with error.
diff --git a/test/dhparam.pem b/test/dhparam.pem
new file mode 100644
index 0000000..bbad79b
--- /dev/null
+++ b/test/dhparam.pem
@@ -0,0 +1,5 @@
+-----BEGIN DH PARAMETERS-----
+MIGHAoGBANmADwAW6775Ber0ND97W4LqfINzXAx4vCEd00vUZ3Pb7OvA6ykGqRyW
+LCqnNbRWG8ymPLJMQnL5K7cayo27koSM6QAoYQPJejFghyt98934d3uar1m5uMyO
+//v61uA/fN92CZTrZraGFGA6VVgo6StdwnmS8ctw0Q6KyuL6Hb2vAgEF
+-----END DH PARAMETERS-----
diff --git a/test/goma_ctl_test.py b/test/goma_ctl_test.py
new file mode 100755
index 0000000..e26524d
--- /dev/null
+++ b/test/goma_ctl_test.py
@@ -0,0 +1,3303 @@
+#!/usr/bin/env python
+
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tests for goma_ctl."""
+
+
+
+import imp
+import json
+import optparse
+import os
+import shutil
+import stat
+import string
+import StringIO
+import sys
+import tempfile
+import time
+import unittest
+
+_GOMA_CTL = 'goma_ctl.py'
+
+
+class PlatformSpecific(object):
+  """class for platform specific commands / data."""
+
+  def __init__(self, platform):
+    self._platform = platform
+
+  def GetPlatform(self):
+    """Returns platform name."""
+    return self._platform
+
+  @staticmethod
+  def GetDefaultGomaCtlPath(test_dir):
+    """Returns platform name.
+
+    Args:
+      test_dir: a string of directory of this file.
+
+    Returns:
+      a string of the directory contains goma_ctl.py by default.
+    """
+    raise NotImplementedError('GetDefaultGomaCtlPath should be implemented.')
+
+  @staticmethod
+  def SetCompilerProxyEnv(tmp_dir, port):
+    """Configure compiler_proxy env.
+
+    Args:
+      tmp_dir: a string of temporary directory path.
+      port: an integer of compiler proxy port.
+    """
+    raise NotImplementedError('GetDefaultGomaCtlPath should be implemented.')
+
+  def GetCred(self):
+    if os.path.isfile(self._CRED):
+      return self._CRED
+    return None
+
+
+class WindowsSpecific(PlatformSpecific):
+  """class for Windows specific commands / data."""
+
+  _CRED = 'c:\\creds\\service_accounts\\service-account-goma-client.json'
+
+  @staticmethod
+  def GetDefaultGomaCtlPath(test_dir):
+    return os.path.join(test_dir, '..', 'out', 'Release')
+
+  @staticmethod
+  def SetCompilerProxyEnv(tmp_dir, port):
+    os.environ['GOMA_TMP_DIR'] = tmp_dir
+    os.environ['GOMA_COMPILER_PROXY_PORT'] = str(port)
+
+
+class PosixSpecific(PlatformSpecific):
+  """class for Windows specific commands / data."""
+
+  _CRED = '/creds/service_accounts/service-account-goma-client.json'
+
+  @staticmethod
+  def GetDefaultGomaCtlPath(test_dir):
+    return os.path.join(test_dir, '..', 'out', 'Release')
+
+  @staticmethod
+  def SetCompilerProxyEnv(tmp_dir, port):
+    os.environ['GOMA_TMP_DIR'] = tmp_dir
+    os.environ['GOMA_COMPILER_PROXY_PORT'] = str(port)
+
+
+def GetPlatformSpecific(platform):
+  """Get PlatformSpecific class for |platform|.
+
+  Args:
+    platform: platform name to be returned.
+
+  Returns:
+    an instance of a subclass of PlatformSpecific class.
+
+  Raises:
+    ValueError: if platform is None or not supported.
+  """
+  if platform == 'win64':
+    return WindowsSpecific(platform)
+  elif platform in ('goobuntu', 'chromeos', 'mac'):
+    return PosixSpecific(platform)
+  raise ValueError('You should specify supported platform name.')
+
+
+class FakeGomaEnv(object):
+  """Fake GomaEnv class for test."""
+  # pylint: disable=R0201
+  # pylint: disable=W0613
+
+  def AutoUpdate(self):
+    pass
+
+  def BackupCurrentPackage(self, backup_dir='dummy'):
+    pass
+
+  def CalculateChecksum(self, _, update_dir=''):
+    return 'dummy_checksum'
+
+  def CanAutoUpdate(self):
+    return True
+
+  def CheckConfig(self):
+    pass
+
+  def ControlCompilerProxy(self, command, fast=False):
+    if command == '/healthz':
+      return {'status': True, 'message': 'ok', 'url': 'dummy_url'}
+    return {'status': True, 'message': 'dummy', 'url': 'dummy_url'}
+
+  def CompilerProxyRunning(self):
+    return True
+
+  def ExecCompilerProxy(self):
+    pass
+
+  def ExtractPackage(self, src, dst):
+    return True
+
+  def GetCacheDirectory(self):
+    return 'dummy_cache_dir'
+
+  def GetCrashDumpDirectory(self):
+    return 'dummy_crash_dump_dir'
+
+  def GetCrashDumps(self):
+    return []
+
+  def GetCompilerProxyVersion(self):
+    return 'fake@version'
+
+  def GetGomaCtlScriptName(self):
+    return 'fake-script'
+
+  def GetGomaTmpDir(self):
+    return 'dummy_tmp_dir'
+
+  def GetPackageName(self):
+    return 'goma-fake'
+
+  def GetPlatform(self):
+    return 'fake'
+
+  def GetScriptDir(self):
+    return 'fake'
+
+  def HttpDownload(self, url,
+                   rewrite_url=None, headers=None, destination_file=None):
+    if destination_file:
+      return
+    if 'MANIFEST' in url:
+      return 'VERSION=1'
+    return 'fake'
+
+  def InstallPackage(self, _):
+    return True
+
+  def IsDirectoryExist(self, _):
+    return True
+
+  def EnsureDirectoryOwnedByUser(self, _):
+    return True
+
+  def IsGomaInstalledBefore(self):
+    return False
+
+  def IsOldFile(self, _):
+    return True
+
+  def IsProductionBinary(self):
+    return True
+
+  def IsValidManifest(self, _):
+    return False
+
+  def IsValidMagic(self, _):
+    return True
+
+  def KillStakeholders(self):
+    pass
+
+  def LoadChecksum(self, update_dir=''):
+    return {}
+
+  def MakeDirectory(self, _):
+    pass
+
+  def MayUsingDefaultIPCPort(self):
+    return True
+
+  def IsManifestModifiedRecently(self, directory='', threshold=4*60*60):
+    return False
+
+  def ReadManifest(self, path=''):
+    if path:
+      return {'VERSION': 1}
+    return {}
+
+  def RemoveDirectory(self, _):
+    pass
+
+  def WriteFile(self, filename, content):
+    pass
+
+  def CopyFile(self, from_file, to_file):
+    pass
+
+  def MakeTgzFromDirectory(self, dir_name, output_filename):
+    pass
+
+  def RemoveFile(self, _):
+    pass
+
+  def RollbackUpdate(self, backup_dir='dummy'):
+    pass
+
+  def SetDefaultKey(self, protocol):
+    pass
+
+
+  def WarnNonProtectedFile(self, filename):
+    pass
+
+  def WriteManifest(self, manifest, filename=''):
+    pass
+
+
+class FakeGomaBackend(object):
+  """Fake GomaBackend class for test."""
+  # pylint: disable=R0201
+  # pylint: disable=W0613
+
+  def GetDownloadBaseUrl(self):
+    return 'https://example.com'
+
+  def RewriteRequest(self, req):
+    return req
+
+  def GetHeaders(self):
+    return {}
+
+
+def _ClearGomaEnv():
+  """Clear goma-related environmental variables."""
+  to_delete = []
+  for e in os.environ:
+    if e.startswith('GOMA_'):
+      to_delete.append(e)
+  for e in to_delete:
+    del os.environ[e]
+  if os.environ.has_key('GOMAMODE'):
+    del os.environ['GOMAMODE']
+  if os.environ.has_key('PLATFORM'):
+    del os.environ['PLATFORM']
+
+  proxy_env_names = ['HTTP_PROXY', 'http_proxy', 'HTTPS_PROXY', 'https_proxy']
+  for proxy_env_name in proxy_env_names:
+    if os.environ.has_key(proxy_env_name):
+      del os.environ[proxy_env_name]
+
+
+class GomaCtlTestCommon(unittest.TestCase):
+  """Common features for goma_ctl.py test."""
+  # test should be able to access protected members and variables.
+  # pylint: disable=W0212
+
+  _TMP_SUBDIR_NAME = 'goma'
+
+  def __init__(self, method_name, goma_ctl_path, platform_specific):
+    """Initialize GomaCtlTest.
+
+    To be ready for accidentally write files in a test, initializer will
+    create a directory for test.
+
+    Args:
+      method_name: a string of test method name to execute.
+      goma_ctl_path: a string of goma directory name.
+      platform_specific: a object for providing platform specific behavior.
+    """
+    super(GomaCtlTestCommon, self).__init__(method_name)
+    self._goma_ctl_path = goma_ctl_path
+    self._platform_specific = platform_specific
+
+  def setUp(self):
+    _ClearGomaEnv()
+
+    # suppress stdout and make it available from test.
+    sys.stdout = StringIO.StringIO()
+
+    mod_name, _ = os.path.splitext(_GOMA_CTL)
+    # Copy GOMA client commands to a temporary directory.
+    # The directory should be removed at tearDown.
+    # TODO: copy same files as archive.py?
+    self._tmp_dir = tempfile.mkdtemp()
+    shutil.copytree(self._goma_ctl_path,
+                    os.path.join(self._tmp_dir, self._TMP_SUBDIR_NAME),
+                    symlinks=True,
+                    ignore=shutil.ignore_patterns('lib', 'lib.target', 'obj',
+                                                  'obj.*', '*_unittest*',
+                                                  '*proto*', '.deps'))
+    self._module = imp.load_source(mod_name,
+                                   os.path.join(self._tmp_dir,
+                                                self._TMP_SUBDIR_NAME,
+                                                _GOMA_CTL))
+
+  def tearDown(self):
+    _ClearGomaEnv()
+    shutil.rmtree(self._tmp_dir)
+
+
+class GomaCtlSmallTest(GomaCtlTestCommon):
+  """Small tests for goma_ctl.py.
+
+  All tests in this class use test doubles and do not expected to affect
+  external environment.
+  """
+  # test should be able to access protected members and variables.
+  # pylint: disable=W0212
+
+  def setUp(self):
+    super(GomaCtlSmallTest, self).setUp()
+    # Since we use test doubles, we do not have to wait.
+    self._module._COOLDOWN_SLEEP = 0
+
+  def CreateSpyControlCompilerProxy(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to capture ControlCompilerProxy command."""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.command = ''
+
+      def ControlCompilerProxy(self, command, fast=False):
+        self.command = command
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+    return SpyGomaEnv()
+
+  def testIsGomaFlagTrueShouldShowTrueForVariousTruePatterns(self):
+    flag_test_name = 'FLAG_TEST'
+    self.assertFalse(self._module._IsGomaFlagTrue(flag_test_name))
+    os.environ['GOMA_%s' % flag_test_name] = 'T'
+    self.assertTrue(self._module._IsGomaFlagTrue(flag_test_name))
+    os.environ['GOMA_%s' % flag_test_name] = 'true'
+    self.assertTrue(self._module._IsGomaFlagTrue(flag_test_name))
+    os.environ['GOMA_%s' % flag_test_name] = 'y'
+    self.assertTrue(self._module._IsGomaFlagTrue(flag_test_name))
+    os.environ['GOMA_%s' % flag_test_name] = 'Yes'
+    self.assertTrue(self._module._IsGomaFlagTrue(flag_test_name))
+    os.environ['GOMA_%s' % flag_test_name] = '1'
+    self.assertTrue(self._module._IsGomaFlagTrue(flag_test_name))
+
+  def testIsGomaFlagTrueShouldShowFalseForVariousFalsePatterns(self):
+    flag_test_name = 'FLAG_TEST'
+    os.environ['GOMA_%s' % flag_test_name] = 'F'
+    self.assertFalse(self._module._IsGomaFlagTrue(flag_test_name))
+    os.environ['GOMA_%s' % flag_test_name] = 'false'
+    self.assertFalse(self._module._IsGomaFlagTrue(flag_test_name))
+    os.environ['GOMA_%s' % flag_test_name] = 'n'
+    self.assertFalse(self._module._IsGomaFlagTrue(flag_test_name))
+    os.environ['GOMA_%s' % flag_test_name] = 'No'
+    self.assertFalse(self._module._IsGomaFlagTrue(flag_test_name))
+    os.environ['GOMA_%s' % flag_test_name] = '0'
+    self.assertFalse(self._module._IsGomaFlagTrue(flag_test_name))
+
+  def testIsGomaFlagTrueShouldFollowDefaultIfEnvNotSet(self):
+    flag_test_name = 'FLAG_TEST'
+    self.assertFalse(self._module._IsGomaFlagTrue(flag_test_name,
+                                                  default=False))
+    self.assertTrue(self._module._IsGomaFlagTrue(flag_test_name, default=True))
+
+  def testSetGomaFlagDefaultValueIfEmptyShouldSetIfEmpty(self):
+    flag_test_name = 'FLAG_TEST'
+    flag_test_value = 'test'
+    self.assertFalse(os.environ.has_key('GOMA_%s' % flag_test_name))
+    self._module._SetGomaFlagDefaultValueIfEmpty(flag_test_name,
+                                                 flag_test_value)
+    self.assertTrue(os.environ.has_key('GOMA_%s' % flag_test_name))
+    self.assertEqual(os.environ['GOMA_%s' % flag_test_name], flag_test_value)
+
+  def testSetGomaFlagDefaultValueIfEmptyShouldNotSetIfNotEmpty(self):
+    flag_test_name = 'FLAG_TEST'
+    flag_test_value = 'test'
+    flag_orig_value = 'original'
+    os.environ['GOMA_%s' % flag_test_name] = flag_orig_value
+    self._module._SetGomaFlagDefaultValueIfEmpty(flag_test_name,
+                                                 flag_test_value)
+    self.assertEqual(os.environ['GOMA_%s' % flag_test_name], flag_orig_value)
+
+  def testParseManifestContentsShouldReturnEmptyForEmptyLine(self):
+    self.assertEqual(self._module._ParseManifestContents(''), {})
+
+  def testParseManifestContentsShouldParseOneLine(self):
+    parsed = self._module._ParseManifestContents('key=val')
+    self.assertEqual(len(parsed.keys()), 1)
+    self.assertTrue(parsed.has_key('key'))
+    self.assertEqual(parsed['key'], 'val')
+
+  def testParseManifestContentsShouldParseMultipleLines(self):
+    parsed = self._module._ParseManifestContents('key0=val0\nkey1=val1')
+    self.assertEqual(len(parsed.keys()), 2)
+    self.assertTrue(parsed.has_key('key0'))
+    self.assertEqual(parsed['key0'], 'val0')
+    self.assertTrue(parsed.has_key('key1'))
+    self.assertEqual(parsed['key1'], 'val1')
+
+  def testParseManifestContentsShouldShowEmptyValueIfEndWithEqual(self):
+    parsed = self._module._ParseManifestContents('key=')
+    self.assertEqual(len(parsed.keys()), 1)
+    self.assertTrue(parsed.has_key('key'))
+    self.assertEqual(parsed['key'], '')
+
+  def testParseManifestContentsShouldParseLineWithMultipleEquals(self):
+    parsed = self._module._ParseManifestContents('key=label=value')
+    self.assertEqual(len(parsed.keys()), 1)
+    self.assertTrue(parsed.has_key('key'))
+    self.assertEqual(parsed['key'], 'label=value')
+
+  def testParseManifestContentsShouldIgnoreLineWitoutEquals(self):
+    parsed = self._module._ParseManifestContents('key')
+    self.assertEqual(len(parsed.keys()), 0)
+    self.assertFalse(parsed.has_key('key'))
+
+  def testIsBadVersionReturnsFalseForEmptyBadVersion(self):
+    self.assertFalse(self._module._IsBadVersion(1, ''))
+
+  def testIsBadVersionForExactMatch(self):
+    self.assertTrue(self._module._IsBadVersion(1, '1'))
+
+  def testIsBadVersionReturnsFalseForPartialMatchInCurVer(self):
+    self.assertFalse(self._module._IsBadVersion(10, '1'))
+    self.assertFalse(self._module._IsBadVersion(21, '1'))
+
+  def testIsBadVersionInList(self):
+    self.assertTrue(self._module._IsBadVersion(67, '65|67'))
+    self.assertTrue(self._module._IsBadVersion(67, '65|67|69'))
+    self.assertTrue(self._module._IsBadVersion(67, '67|69'))
+
+  def testIsBadVersionReturnsFalseNotInList(self):
+    self.assertFalse(self._module._IsBadVersion(5, '65|67|69'))
+    self.assertFalse(self._module._IsBadVersion(66, '65|67'))
+    self.assertFalse(self._module._IsBadVersion(56, '65|67|69'))
+    self.assertFalse(self._module._IsBadVersion(6, '65|67|69'))
+    self.assertFalse(self._module._IsBadVersion(7, '65|67|69'))
+    self.assertFalse(self._module._IsBadVersion(76, '65|67|69'))
+    self.assertFalse(self._module._IsBadVersion(9, '65|67|69'))
+
+  def testShouldUpdateWithNoBadVersion(self):
+    self.assertTrue(self._module._ShouldUpdate(1, 2, ''))
+    self.assertTrue(self._module._ShouldUpdate(1, 3, ''))
+    self.assertTrue(self._module._ShouldUpdate(66, 67, ''))
+
+  def tesstShouldUpdateReturnsFalseForDowngradeWithNoBadVersion(self):
+    self.assertFalse(self._module._ShouldUpdate(1, 1, ''))
+    self.assertFalse(self._module._ShouldUpdate(2, 1, ''))
+    self.assertFalse(self._module._ShouldUpdate(3, 1, ''))
+    self.assertFalse(self._module._ShouldUpdate(67, 66, ''))
+
+  def testShouldUpdateFromBadVersion(self):
+    self.assertTrue(self._module._ShouldUpdate(67, 68, '67'))
+
+  def testShouldUpdateForDowngradeFromBadVersion(self):
+    self.assertTrue(self._module._ShouldUpdate(2, 1, '2'))
+    self.assertTrue(self._module._ShouldUpdate(2, 1, '0|2'))
+    self.assertTrue(self._module._ShouldUpdate(2, 1, '2|3'))
+    self.assertTrue(self._module._ShouldUpdate(2, 1, '0|2|3'))
+    self.assertTrue(self._module._ShouldUpdate(67, 66, '67'))
+
+  def testShouldUpdateWithDifferentBadVersion(self):
+    self.assertTrue(self._module._ShouldUpdate(66, 68, '67'))
+
+  def testShouldUpdateReturnsFalseForDowngradeWithDifferentBadVersion(self):
+    self.assertFalse(self._module._ShouldUpdate(2, 1, '3'))
+
+  def testShouldUpdateReturnsFalseForTheSameVersion(self):
+    self.assertFalse(self._module._ShouldUpdate(1, 1, '1'))
+
+  def testParseSpaceSeparatedValuesShouldParse(self):
+    test = (
+        'COMMAND PID\n'
+        'bash      1\n'
+        'tcsh      2\n'
+        )
+    expected = [
+        {'COMMAND': 'bash', 'PID': '1'},
+        {'COMMAND': 'tcsh', 'PID': '2'},
+        ]
+    parsed = self._module._ParseSpaceSeparatedValues(test)
+    self.assertEqual(parsed, expected)
+
+  def testParseSpaceSeparatedValuesShouldParseEmpty(self):
+    parsed = self._module._ParseSpaceSeparatedValues('')
+    self.assertEqual(parsed, [])
+
+  def testParseSpaceSeparatedValuesShouldSkipBlankLines(self):
+    test = (
+        'COMMAND PID\n'
+        'bash      1\n'
+        'tcsh      2\n'
+        '\n'
+        )
+    expected = [
+        {'COMMAND': 'bash', 'PID': '1'},
+        {'COMMAND': 'tcsh', 'PID': '2'},
+        ]
+    parsed = self._module._ParseSpaceSeparatedValues(test)
+    self.assertEqual(parsed, expected)
+
+  def testParseSpaceSeparatedValuesShouldIgnoreWhiteSpaces(self):
+    test = (
+        'COMMAND                           PID            \n'
+        '   bash \t     1  \n'
+        '  \t  \n'
+        '\ttcsh    \t  2\t\n'
+        '\n'
+        )
+    expected = [
+        {'COMMAND': 'bash', 'PID': '1'},
+        {'COMMAND': 'tcsh', 'PID': '2'},
+        ]
+    parsed = self._module._ParseSpaceSeparatedValues(test)
+    self.assertEqual(parsed, expected)
+
+  def testParseLsofShouldParse(self):
+    test = 'u1\np2\nu3\np4\n'
+    expected = [{'uid': 1L, 'pid': 2L}, {'uid': 3L, 'pid': 4L}]
+    parsed = self._module._ParseLsof(test)
+    self.assertEqual(parsed, expected)
+
+  def testParseLsofShouldIgnoreUnknown(self):
+    test = 'x\n unknown\nu1\np2\nu3\np4\n'
+    expected = [{'uid': 1L, 'pid': 2L}, {'uid': 3L, 'pid': 4L}]
+    parsed = self._module._ParseLsof(test)
+    self.assertEqual(parsed, expected)
+
+  def testParseLsofShouldIgnoreEmptyLine(self):
+    test = '\n\t\t\n  \nu1\np2\nu3\np4\n'
+    expected = [{'uid': 1L, 'pid': 2L}, {'uid': 3L, 'pid': 4L}]
+    parsed = self._module._ParseLsof(test)
+    self.assertEqual(parsed, expected)
+
+  def testGetEnvMatchedConditionShouldReturnForEmptyCandidates(self):
+    default_value = 'default'
+    result = self._module._GetEnvMatchedCondition([],
+                                                  lambda x: True,
+                                                  default_value)
+    self.assertEqual(result, default_value)
+
+  def testGetEnvMatchedConditionShouldReturnIfNothingMatched(self):
+    default_value = 'default'
+    flag_test_name = 'FLAG_TEST'
+    flag_value = 'not matched value'
+    os.environ['GOMA_%s' % flag_test_name] = flag_value
+    result = self._module._GetEnvMatchedCondition(['GOMA_%s' % flag_test_name],
+                                                  lambda x: False,
+                                                  default_value)
+    self.assertEqual(result, default_value)
+
+  def testGetEnvMatchedConditionShouldReturnMatchedValue(self):
+    default_value = 'default'
+    flag_test_name = 'FLAG_TEST'
+    flag_value = 'expected_value'
+    os.environ['GOMA_%s' % flag_test_name] = flag_value
+    result = self._module._GetEnvMatchedCondition(['GOMA_%s' % flag_test_name],
+                                                  lambda x: True,
+                                                  default_value)
+    self.assertEqual(result, flag_value)
+
+  def testGetEnvMatchedConditionShouldReturnTheFirstCandidate(self):
+    default_value = 'default'
+    flag_test_name_1 = 'FLAG_TEST_1'
+    flag_value_1 = 'value_01'
+    os.environ['GOMA_%s' % flag_test_name_1] = flag_value_1
+    flag_test_name_2 = 'FLAG_TEST_2'
+    flag_value_2 = 'value_02'
+    os.environ['GOMA_%s' % flag_test_name_2] = flag_value_2
+    result = self._module._GetEnvMatchedCondition(
+        ['GOMA_%s' % i for i in [flag_test_name_1, flag_test_name_2]],
+        lambda x: True, default_value)
+    self.assertEqual(result, flag_value_1)
+
+  def testGetEnvMatchedConditionShouldReturnEarlierCandidateInList(self):
+    default_value = 'default'
+    flag_name_1 = 'FLAG_TEST_1'
+    flag_value_1 = 'value_01'
+    os.environ['GOMA_%s' % flag_name_1] = flag_value_1
+    flag_name_2 = 'FLAG_TEST_2'
+    flag_value_2 = 'match_02'
+    os.environ['GOMA_%s' % flag_name_2] = flag_value_2
+    flag_name_3 = 'FLAG_TEST_3'
+    flag_value_3 = 'match_03'
+    os.environ['GOMA_%s' % flag_name_3] = flag_value_3
+    result = self._module._GetEnvMatchedCondition(
+        ['GOMA_%s' % i for i in [flag_name_1, flag_name_2, flag_name_3]],
+        lambda x: x.startswith('match'), default_value)
+    self.assertEqual(result, flag_value_2)
+
+  def testParseFlagzShouldParse(self):
+    test_data = ('GOMA_COMPILER_PROXY_DAEMON_MODE=true\n'
+                 'GOMA_COMPILER_PROXY_LOCK_FILENAME=goma_compiler_proxy.lock\n')
+    parsed_data = self._module._ParseFlagz(test_data)
+    expected = {
+        'GOMA_COMPILER_PROXY_DAEMON_MODE': 'true',
+        'GOMA_COMPILER_PROXY_LOCK_FILENAME': 'goma_compiler_proxy.lock',
+    }
+    self.assertEqual(parsed_data, expected)
+
+  def testParseFlagzShouldIgnoreAutoConfigured(self):
+    test_data = ('GOMA_BURST_MAX_SUBPROCS=64 (auto configured)\n'
+                 'GOMA_COMPILER_PROXY_LOCK_FILENAME=goma_compiler_proxy.lock\n')
+    parsed_data = self._module._ParseFlagz(test_data)
+    expected = {
+        'GOMA_COMPILER_PROXY_LOCK_FILENAME': 'goma_compiler_proxy.lock',
+    }
+    self.assertEqual(parsed_data, expected)
+
+  def testParseFlagzShouldIgnoreNewlineOnlyLine(self):
+    test_data = ('\n'
+                 'GOMA_COMPILER_PROXY_DAEMON_MODE=true\n'
+                 '\n\n'
+                 '\r\n'
+                 'GOMA_COMPILER_PROXY_LOCK_FILENAME=goma_compiler_proxy.lock\n'
+                 '\n')
+    parsed_data = self._module._ParseFlagz(test_data)
+    expected = {
+        'GOMA_COMPILER_PROXY_DAEMON_MODE': 'true',
+        'GOMA_COMPILER_PROXY_LOCK_FILENAME': 'goma_compiler_proxy.lock',
+    }
+    self.assertEqual(parsed_data, expected)
+
+  def testParseFlagzShouldIgnoreWhiteSpaces(self):
+    test_data = (' \t  GOMA_COMPILER_PROXY_DAEMON_MODE \t = \t true  \n')
+    parsed_data = self._module._ParseFlagz(test_data)
+    expected = {
+        'GOMA_COMPILER_PROXY_DAEMON_MODE': 'true',
+    }
+    self.assertEqual(parsed_data, expected)
+
+  def testIsGomaFlagUpdatedShouldReturnFalseIfNothingHasSet(self):
+    self.assertFalse(self._module._IsGomaFlagUpdated({}))
+
+  def testIsGomaFlagUpdatedShouldReturnTrueIfNewFlag(self):
+    os.environ['GOMA_TEST'] = 'test'
+    self.assertTrue(self._module._IsGomaFlagUpdated({}))
+
+  def testIsGomaFlagUpdatedShouldReturnTrueIfFlagRemoved(self):
+    self.assertTrue(self._module._IsGomaFlagUpdated({'GOMA_TEST': 'test'}))
+
+  def testIsGomaFlagUpdatedShouldReturnFalseIfNoUpdate(self):
+    expected = {'GOMA_TEST': 'test'}
+    for key, value in expected.iteritems():
+      os.environ[key] = value
+    self.assertFalse(self._module._IsGomaFlagUpdated(expected))
+
+  def testPullShouldUpdateManifestInLatestDir(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to provide MANIFEST files and capture WriteManifest."""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.written_manifest = {}
+        self._downloaded = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        if 'MANIFEST' in url:
+          return 'VERSION=2'
+        if destination_file:
+          # ReadManifest should show the latest version only when the file has
+          # been downloaded.
+          self._downloaded = True
+
+      def ReadManifest(self, _=None):
+        if self._downloaded:
+          return {'VERSION': '2'}
+        else:
+          return {'VERSION': '1'}
+
+      def WriteManifest(self, manifest, _=None):
+        self.written_manifest = manifest
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Pull()
+    self.assertTrue(env.written_manifest)
+    self.assertEqual(env.written_manifest['PLATFORM'], 'fake')
+    self.assertEqual(env.written_manifest['VERSION'], '2')
+
+  def testPullShouldUpdateManifestInLatestDirToRollbackRelease(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to provide MANIFEST files and capture WriteManifest."""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.written_manifest = {}
+        self._downloaded = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        if 'MANIFEST' in url:
+          return 'VERSION=1\nbad_version=2'
+        if destination_file:
+          # ReadManifest should show the latest version only when the file has
+          # been downloaded.
+          self._downloaded = True
+
+      def ReadManifest(self, _=None):
+        if self._downloaded:
+          return {'VERSION': '1', 'bad_version': '2'}
+        else:
+          return {'VERSION': '2'}
+
+      def WriteManifest(self, manifest, _=None):
+        self.written_manifest = manifest
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Pull()
+    self.assertTrue(env.written_manifest)
+    self.assertEqual(env.written_manifest['PLATFORM'], 'fake')
+    self.assertEqual(env.written_manifest['VERSION'], '1')
+
+  def testPullShouldUpdateIfFilesAreNotValid(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to provide MANIFEST files and capture WriteManifest."""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.is_valid_magic = False
+        self.writte_manifest = False
+
+      def IsValidMagic(self, _):
+        self.is_valid_magic = True
+        return False
+
+      def WriteManifest(self, manifest, _=None):
+        self.writte_manifest = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Pull()
+    self.assertTrue(env.is_valid_magic)
+    self.assertTrue(env.writte_manifest)
+
+  def testPullShouldUpdateIfManifestIsEmpty(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to provide MANIFEST files and capture WriteManifest."""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.read_manifest_latest = False
+        self.writte_manifest = False
+        self._downloaded = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        if 'MANIFEST' in url:
+          return 'VERSION=2'
+        if destination_file:
+          self._downloaded = True
+
+      def ReadManifest(self, latest=None):
+        if latest == 'latest':
+          self.read_manifest_latest = True
+          return {}
+        if self._downloaded:
+          return {'VERSION': '2'}
+        else:
+          return {'VERSION': '1'}
+
+      def WriteManifest(self, manifest, _=None):
+        self.writte_manifest = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Pull()
+    self.assertTrue(env.read_manifest_latest)
+    self.assertTrue(env.writte_manifest)
+
+  def testPullShouldUpdateIfManifestIsBroken(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to provide MANIFEST files and capture WriteManifest."""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.read_manifest_latest = True
+        self.writte_manifest = False
+        self._downloaded = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        if 'MANIFEST' in url:
+          return 'VERSION=2'
+        if destination_file:
+          self._downloaded = True
+
+      def ReadManifest(self, latest=None):
+        if latest == 'latest':
+          self.read_manifest_latest = True
+          return {'VERSION': 'broken'}
+        return {'VERSION': '1'}
+
+      def WriteManifest(self, manifest, _=None):
+        self.writte_manifest = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Pull()
+    self.assertTrue(env.read_manifest_latest)
+    self.assertTrue(env.writte_manifest)
+
+  def testStartCompilerProxyShouldRun(self):
+    driver = self._module.GomaDriver(FakeGomaEnv(), FakeGomaBackend())
+    driver._StartCompilerProxy()
+
+  def testGetStatusShouldCallControlCompilerProxyWithHealthz(self):
+    env = self.CreateSpyControlCompilerProxy()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._GetStatus()
+    self.assertEqual(env.command, '/healthz')
+
+  def testShutdownCompilerProxyShouldCallControlCompilerProxyWith3Quit(self):
+    env = self.CreateSpyControlCompilerProxy()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._ShutdownCompilerProxy()
+    self.assertEqual(env.command, '/quitquitquit')
+
+  def testPrintStatisticsShouldCallControlCompilerProxyWithStatz(self):
+    env = self.CreateSpyControlCompilerProxy()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._PrintStatistics()
+    self.assertEqual(env.command, '/statz')
+
+  def testPrintHistogramShouldCallControlCompilerProxyWithStatz(self):
+    env = self.CreateSpyControlCompilerProxy()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._PrintHistogram()
+    self.assertEqual(env.command, '/histogramz')
+
+  def testGetJsonStatusShouldCallControlCompilerProxyWithErrorz(self):
+    env = self.CreateSpyControlCompilerProxy()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._GetJsonStatus()
+    self.assertEqual(env.command, '/errorz')
+
+  def testPrintLatestVersionShouldRun(self):
+    driver = self._module.GomaDriver(FakeGomaEnv(), FakeGomaBackend())
+    driver._PrintLatestVersion()
+
+  def testGetProxyEnvShouldReturnEmptyDictIfNoEnvConfigured(self):
+    self.assertFalse(self._module._GetProxyEnv())
+
+  def testReportMakeTgz(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to provide WriteFile, CopyFile and MakeTgzFromDirectory"""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.output_files = []
+        self.tgz_source_dir = None
+        self.tgz_file = None
+        self.written = False
+
+      def WriteFile(self, filename, content):
+        self.output_files.append(filename)
+
+      def CopyFile(self, from_file, to_file):
+        self.output_files.append(to_file)
+
+      def MakeTgzFromDirectory(self, dir_name, output_filename):
+        self.tgz_source_dir = dir_name
+        self.tgz_file = output_filename
+        self.written = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Report()
+    self.assertTrue(env.written)
+    for f in env.output_files:
+      self.assertTrue(f.startswith(env.tgz_source_dir))
+    self.assertTrue(env.tgz_file.startswith(self._module._GetTempDirectory()))
+
+  def testReportMakeTgzWithoutCompilerProxyRunning(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to provide WriteFile, CopyFile and MakeTgzFromDirectory.
+         Also, compiler_proxy is not running in this env."""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.output_files = []
+        self.tgz_source_dir = None
+        self.tgz_file = None
+        self.written = False
+
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/healthz':
+          return {
+              'status': False,
+              'message': 'compiler proxy is not running',
+              'url': 'fake',
+          }
+        # /compilerz, /histogramz, /serverz, or /statz won't be called.
+        if command in ['/compilerz', '/histogramz', '/serverz', '/statz']:
+          raise Exception('Unexpected command is called')
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+      def WriteFile(self, filename, content):
+        self.output_files.append(filename)
+
+      def CopyFile(self, from_file, to_file):
+        self.output_files.append(to_file)
+
+      def MakeTgzFromDirectory(self, dir_name, output_filename):
+        self.tgz_source_dir = dir_name
+        self.tgz_file = output_filename
+        self.written = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Report()
+    self.assertTrue(env.written)
+    for f in env.output_files:
+      self.assertTrue(f.startswith(env.tgz_source_dir))
+    self.assertTrue(env.tgz_file.startswith(self._module._GetTempDirectory()))
+
+  def testReportMakeTgzCompilerProxyDeadAfterHealthz(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to provide WriteFile, CopyFile and MakeTgzFromDirectory.
+         compiler_proxy dies after the first /healthz call."""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.output_files = []
+        self.tgz_source_dir = None
+        self.tgz_file = None
+        self.written = False
+        self.is_dead = False
+
+      def ControlCompilerProxy(self, command, fast=False):
+        if self.is_dead:
+          return {
+              'status': False,
+              'message': 'compiler_proxy is not running',
+              'url': 'dummy',
+          }
+        # Die after /healthz is called. The first /healthz should be
+        # processed correctly.
+        if command == '/healthz':
+          self.is_dead = True
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+      def WriteFile(self, filename, content):
+        self.output_files.append(filename)
+
+      def CopyFile(self, from_file, to_file):
+        self.output_files.append(to_file)
+
+      def MakeTgzFromDirectory(self, dir_name, output_filename):
+        self.tgz_source_dir = dir_name
+        self.tgz_file = output_filename
+        self.written = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Report()
+    self.assertTrue(env.written)
+    for f in env.output_files:
+      self.assertTrue(f.startswith(env.tgz_source_dir))
+    self.assertTrue(env.tgz_file.startswith(self._module._GetTempDirectory()))
+
+  def testGetProxyEnvShouldReturnDictIfEnvIsSet(self):
+    proxy_env_names = ['HTTP_PROXY', 'http_proxy', 'HTTPS_PROXY', 'https_proxy']
+    for name in proxy_env_names:
+      self.assertFalse(os.environ.has_key(name))
+      os.environ[name] = 'http://example.org:3128/'
+      proxy_env = self._module._GetProxyEnv()
+      self.assertTrue(proxy_env, msg=('proxy env=%s' % name))
+      self.assertEqual(proxy_env, {'host': 'example.org', 'port': '3128'})
+      del os.environ[name]
+
+  def testGetProxyEnvShouldRaiseForHttps(self):
+    os.environ['HTTP_PROXY'] = 'https://example.org:3128/'
+    self.assertRaises(self._module.ConfigError, self._module._GetProxyEnv)
+
+  def testGetProxyEnvShouldRaiseForProxyWithPassword(self):
+    os.environ['HTTP_PROXY'] = 'http://user:pass@example.org:3128'
+    self.assertRaises(self._module.ConfigError, self._module._GetProxyEnv)
+
+  def testGetProxyEnvShouldRaiseEnvWithoutPort(self):
+    os.environ['HTTP_PROXY'] = 'http://example.org/'
+    self.assertRaises(self._module.ConfigError, self._module._GetProxyEnv)
+
+  def testGetProxyEnvShouldAllowEnvWithoutScheme(self):
+    os.environ['HTTP_PROXY'] = 'example.org:3128'
+    proxy_env = self._module._GetProxyEnv()
+    self.assertTrue(proxy_env)
+    self.assertEqual(proxy_env, {'host': 'example.org', 'port': '3128'})
+
+  def testGetProxyEnvShouldRaiseEnvWithoutSchemeAndPort(self):
+    os.environ['HTTP_PROXY'] = 'example.org'
+    self.assertRaises(self._module.ConfigError, self._module._GetProxyEnv)
+
+  def testAutoUpdate(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to provide MANIFEST files and capture Update called."""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self._downloaded = False
+        self.auto_updated = False
+        self.read_manifest_before_update = False
+        self.read_manifest_after_update = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        return 'VERSION=2'
+
+      def CanAutoUpdate(self):
+        return True
+
+      def ReadManifest(self, _=None):
+        if self.auto_updated:
+          self.read_manifest_after_update = True
+          return {'VERSION': '2'}
+        self.read_manifest_before_update = True
+        return {'VERSION': '1'}
+
+      def AutoUpdate(self):
+        self.auto_updated = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._StartCompilerProxy()
+    self.assertTrue(env.auto_updated)
+    self.assertTrue(env.read_manifest_before_update)
+    self.assertTrue(env.read_manifest_after_update)
+    self.assertIn('VERSION', driver._manifest)
+    self.assertEqual(driver._manifest['VERSION'], '2')
+    self.assertEqual(driver._version, 2)
+
+  def testShouldNotAutoUpdateIfAlreadyUpToDate(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.auto_updated = False
+        self.http_downloaded = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=1'
+
+      def CanAutoUpdate(self):
+        return True
+
+      def ReadManifest(self, _=None):
+        return {'VERSION': '1'}
+
+      def AutoUpdate(self):
+        self.auto_updated = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._StartCompilerProxy()
+    self.assertTrue(env.http_downloaded)
+    self.assertFalse(env.auto_updated)
+
+  def testShouldNotAutoUpdateIfCanAutoUpdateIsFalse(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.auto_updated = False
+        self.http_downloaded = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=1'
+
+      def CanAutoUpdate(self):
+        return False
+
+      def AutoUpdate(self):
+        self.auto_updated = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._StartCompilerProxy()
+    self.assertFalse(env.http_downloaded)
+    self.assertFalse(env.auto_updated)
+
+  def testUpdateShouldUpdateIfFindTheNewVersion(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.http_downloaded = False
+        self.install_package = False
+        self.rollback = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=1'
+
+      def InstallPackage(self, _):
+        self.install_package = True
+        return True  # install success.
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Update()
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.install_package)
+
+  def testUpdateShouldNotUpdateIfCurrentPacakgeIsLatest(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.http_downloaded = False
+        self.install_package = False
+        self.rollback = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=1'
+
+      def ReadManifest(self, _=None):
+        return {'VERSION': '1'}
+
+      def InstallPackage(self, _):
+        self.install_package = True
+        return True  # install success.
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Update()
+    self.assertTrue(env.http_downloaded)
+    self.assertFalse(env.install_package)
+
+  def testUpdateShouldNotUpdateIfCurrentPacakgeIsNewerThanLatest(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.http_downloaded = False
+        self.manifests = {}
+        self.install_package = False
+        self.rollback = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        if destination_file:
+          dirname = os.path.dirname(destination_file)
+          manifest = {'VERSION': '1'}
+          self.manifests[dirname] = manifest
+          return
+        return 'VERSION=1'
+
+      def ReadManifest(self, path=None):
+        if path and self.manifests[path]:
+          return self.manifests[path]
+        return {'VERSION': '2'}
+
+      def InstallPackage(self, _):
+        self.install_package = True
+        return True  # install success.
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Update()
+    self.assertTrue(env.http_downloaded)
+    self.assertFalse(env.install_package)
+
+  def testUpdateShouldUpdateIfCurrentPacakgeIsMarkedAsBad(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.http_downloaded = False
+        self.manifests = {}
+        self.install_package = False
+        self.rollback = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        if destination_file:
+          dirname = os.path.dirname(destination_file)
+          manifest = {'VERSION': '1', 'bad_version': '2'}
+          self.manifests[dirname] = manifest
+          return
+        return 'VERSION=1\nbad_version=2'
+
+      def ReadManifest(self, path=None):
+        if path and self.manifests[path]:
+          return self.manifests[path]
+        return {'VERSION': '2'}
+
+      def InstallPackage(self, _):
+        self.install_package = True
+        return True  # install success.
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Update()
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.install_package)
+
+  def testUpdateShouldUpdateIfCurrentPacakgeIsMarkedAsOneOfBad(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.http_downloaded = False
+        self.manifests = {}
+        self.install_package = False
+        self.rollback = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        if destination_file:
+          dirname = os.path.dirname(destination_file)
+          manifest = {'VERSION': '1', 'bad_version': '2|3'}
+          self.manifests[dirname] = manifest
+          return
+        return 'VERSION=1\nbad_version=2|3'
+
+      def ReadManifest(self, path=None):
+        if path and self.manifests[path]:
+          return self.manifests[path]
+        return {'VERSION': '3'}
+
+      def InstallPackage(self, _):
+        self.install_package = True
+        return True  # install success.
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Update()
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.install_package)
+
+  def testUpdateShouldUpdateIfCurrentPacakgeIsNotMarkedAsBad(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.http_downloaded = False
+        self.manifests = {}
+        self.install_package = False
+        self.rollback = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        if destination_file:
+          dirname = os.path.dirname(destination_file)
+          manifest = {'VERSION': '3', 'bad_version': '1|4'}
+          self.manifests[dirname] = manifest
+          return
+        return 'VERSION=3\nbad_version=1|4'
+
+      def ReadManifest(self, path=None):
+        if path and self.manifests[path]:
+          return self.manifests[path]
+        return {'VERSION': '2'}
+
+      def InstallPackage(self, _):
+        self.install_package = True
+        return True  # install success.
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Update()
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.install_package)
+
+  def testUpdateShouldRollbackIfAuditFailed(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.http_downloaded = False
+        self.load_checksum = False
+        self.calculate_checksum = False
+        self.rollback = False
+        self._update_dir = ''
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=1'
+
+      def RollbackUpdate(self, backup_dir=None):
+        self.rollback = True
+
+      def LoadChecksum(self, update_dir=''):
+        self.load_checksum = True
+        if not update_dir:
+          raise Exception('update_dir should be specified')
+        self._update_dir = update_dir
+        return {'gomacc': 'dummy'}
+
+      def CalculateChecksum(self, _, update_dir=''):
+        self.calculate_checksum = True
+        if update_dir != self._update_dir:
+          raise Exception('unexpected update_dir given.'
+                          '%s != %s' % (update_dir, self._update_dir))
+        return 'invalid'  # wrong checksum.
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertRaises(self._module.Error, driver._Update)
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.load_checksum)
+    self.assertTrue(env.calculate_checksum)
+    self.assertTrue(env.rollback)
+
+  def testUpdateShouldRollbackIfUpdateFailed(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.http_downloaded = False
+        self.install_package = False
+        self.rollback = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=1'
+
+      def RollbackUpdate(self, backup_dir=None):
+        self.rollback = True
+
+      def InstallPackage(self, _):
+        self.install_package = True
+        return False  # install failure.
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertRaises(self._module.Error, driver._Update)
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.install_package)
+    self.assertTrue(env.rollback)
+
+  def testUpdateShouldRestartIfCompilerProxyRanBeforeUpdate(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.http_downloaded = False
+        self.installed_before = False
+        self.kill_stakeholders = False
+        self.exec_compiler_proxy = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=1'
+
+      def IsGomaInstalledBefore(self):
+        self.installed_before = True
+        return True
+
+      def KillStakeholders(self):
+        self.kill_stakeholders = True
+
+      def CompilerProxyRunning(self):
+        if self.kill_stakeholders:
+          return False
+        else:
+          return True
+
+      def ExecCompilerProxy(self):
+        self.exec_compiler_proxy = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Update()
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.installed_before)
+    self.assertTrue(env.kill_stakeholders)
+    self.assertTrue(env.exec_compiler_proxy)
+
+  def testUpdateShouldNotStartIfCompilerProxyDidNotRunBeforeUpdate(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_running = False
+        self.http_downloaded = False
+        self.installed_before = False
+        self.kill_stakeholders = False
+        self.exec_compiler_proxy = False
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=1'
+
+      def IsGomaInstalledBefore(self):
+        self.installed_before = True
+        return True
+
+      def KillStakeholders(self):
+        self.kill_stakeholders = True
+
+      def CompilerProxyRunning(self):
+        self.compiler_proxy_running = True
+        return False
+
+      def ExecCompilerProxy(self):
+        self.exec_compiler_proxy = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._Update()
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.compiler_proxy_running)
+    self.assertTrue(env.installed_before)
+    self.assertFalse(env.kill_stakeholders)
+    self.assertFalse(env.exec_compiler_proxy)
+
+  def testUpdatePackageShouldKillStackeholdersIfGomaInstalledBefore(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.read_manifest = False
+        self.is_installed_before = False
+        self.kill_all_goma_processes = False
+        self.install_package = False
+
+      def ReadManifest(self, _=None):
+        self.read_manifest = True
+        return {'VERSION': '1'}
+
+      def ExtractPackage(self, package_file, update_dir):
+        # package_file and update_dir are dummy.
+        # pylint: disable=W0613
+        return True
+
+      def IsGomaInstalledBefore(self):
+        self.is_installed_before = True
+        return True
+
+      def KillStakeholders(self):
+        self.kill_all_goma_processes = True
+
+      def InstallPackage(self, _):
+        self.install_package = True
+        return True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._UpdatePackage()
+    self.assertTrue(env.read_manifest)
+    self.assertTrue(env.is_installed_before)
+    self.assertTrue(env.kill_all_goma_processes)
+    self.assertTrue(env.install_package)
+
+  def testRestartCompilerProxyShouldRun(self):
+    driver = self._module.GomaDriver(FakeGomaEnv(), FakeGomaBackend())
+    driver._RestartCompilerProxy()
+
+  def testEnsureStartShouldStartCompilerProxy(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.can_auto_update = False
+        self.compiler_proxy_running = False
+        self.exec_compiler_proxy = False
+
+      def CanAutoUpdate(self):
+        self.can_auto_update = True
+        return False
+
+      def CompilerProxyRunning(self):
+        self.compiler_proxy_running = True
+        return False
+
+      def ExecCompilerProxy(self):
+        self.exec_compiler_proxy = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.can_auto_update)
+    self.assertTrue(env.compiler_proxy_running)
+    self.assertTrue(env.exec_compiler_proxy)
+
+  def testGomaStatusShouldTrueForOK(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_healthz_called = False
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/healthz':
+          self.compiler_proxy_healthz_called = True
+          return {'status': True, 'message': 'ok', 'url': 'fake'}
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertTrue(driver._GetStatus())
+    self.assertTrue(env.compiler_proxy_healthz_called)
+
+  def testGomaStatusShouldTrueForRunning(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_healthz_called = False
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/healthz':
+          self.compiler_proxy_healthz_called = True
+          return {'status': True, 'message': 'running: had some error',
+                  'url': 'fake'}
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertTrue(driver._GetStatus())
+    self.assertTrue(env.compiler_proxy_healthz_called)
+
+  def testGomaStatusShouldFalseForError(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_healthz_called = False
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/healthz':
+          self.compiler_proxy_healthz_called = True
+          return {'status': True, 'message': 'error: had some error',
+                  'url': 'fake'}
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertFalse(driver._GetStatus())
+    self.assertTrue(env.compiler_proxy_healthz_called)
+
+  def testGomaStatusShouldFalseForUnresponseHealthz(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_healthz_called = False
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/healthz':
+          self.compiler_proxy_healthz_called = True
+          return {'status': False, 'message': '', 'url': 'fake'}
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertFalse(driver._GetStatus())
+    self.assertTrue(env.compiler_proxy_healthz_called)
+
+  def testEnsureStartShouldNotKillCompilerProxyWithoutUpdate(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.can_auto_update = False
+        self.kill_all = False
+        self.using_default = False
+        self.control_with_quit = False
+        self.control_with_health = False
+        self.control_with_version = False
+        self.get_version = False
+        self.compiler_proxy_running = False
+
+      def CanAutoUpdate(self):
+        self.can_auto_update = True
+        return False
+
+      def KillStakeholders(self):
+        self.kill_all = True
+
+      def MayUsingDefaultIPCPort(self):
+        self.using_default = True
+        return True
+
+      def CompilerProxyRunning(self):
+        self.compiler_proxy_running = True
+        return True
+
+      def GetCompilerProxyVersion(self):
+        self.get_version = True
+        return 'GOMA version dummy_version'
+
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/quitquitquit':
+          self.control_with_quit = True
+        elif command == '/healthz':
+          self.control_with_health = True
+        elif command == '/versionz':
+          self.control_with_version = True
+          return {'status': True, 'message': 'dummy_version', 'url': 'fake'}
+        elif command == '/flagz':
+          return {'status': True, 'message': ''}
+        else:
+          raise Exception('Unknown command given.')
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertFalse(env.using_default)
+    self.assertFalse(env.kill_all)
+    self.assertTrue(env.can_auto_update)
+    self.assertTrue(env.compiler_proxy_running)
+    self.assertTrue(env.get_version)
+    self.assertTrue(env.control_with_health)
+    self.assertTrue(env.control_with_version)
+    self.assertFalse(env.control_with_quit)
+
+  def testEnsureStartShouldUpdateCompilerProxy(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.can_auto_update = False
+        self.read_manifest = False
+        self.http_downloaded = False
+        self.auto_update = False
+
+      def CanAutoUpdate(self):
+        self.can_auto_update = True
+        return True
+
+      def ReadManifest(self, _=None):
+        self.read_manifest = True
+        return {'VERSION': '1'}
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=2'
+
+      def AutoUpdate(self):
+        self.auto_update = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.can_auto_update)
+    self.assertTrue(env.read_manifest)
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.auto_update)
+
+  def testEnsureStartShouldNotUpdateCPIfManifestModifiedRecently(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.can_auto_update = False
+        self.read_manifest = False
+        self.http_downloaded = False
+        self.auto_update = False
+
+      def CanAutoUpdate(self):
+        self.can_auto_update = True
+        return True
+
+      def IsManifestModifiedRecently(self, directory='', threshold=4*60*60):
+        return True
+
+      def ReadManifest(self, _=None):
+        self.read_manifest = True
+        return {'VERSION': '1'}
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=2'
+
+      def AutoUpdate(self):
+        self.auto_update = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.can_auto_update)
+    self.assertTrue(env.read_manifest)
+    self.assertFalse(env.http_downloaded)
+    self.assertFalse(env.auto_update)
+
+  def testEnsureStartShouldUpdateCompilerProxyToRollbackRelease(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.can_auto_update = False
+        self.read_manifest = False
+        self.http_downloaded = False
+        self.manifests = {}
+        self.auto_update = False
+
+      def CanAutoUpdate(self):
+        self.can_auto_update = True
+        return True
+
+      def ReadManifest(self, path=None):
+        self.read_manifest = True
+        if path and (path in self.manifests) and self.manifest[path]:
+          return self.manifests[path]
+        return {'VERSION': '2'}
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        if destination_file:
+          dirname = os.path.dirname(destination_file)
+          manifest = {'VERSION': '1', 'bad_version': '2'}
+          self.manifests[dirname] = manifest
+          return
+        return 'VERSION=1\nbad_version=2'
+
+      def AutoUpdate(self):
+        self.auto_update = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.can_auto_update)
+    self.assertTrue(env.read_manifest)
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.auto_update)
+
+  def testEnsureStartShouldBeSurelyRunCompilerProxyWhenUpdate(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.can_auto_update = False
+        self.compiler_proxy_running = False
+        self.exec_compiler_proxy = False
+        self.read_manifest = False
+        self.http_downloaded = False
+        self.auto_update = False
+
+      def CanAutoUpdate(self):
+        self.can_auto_update = True
+        return True
+
+      def CompilerProxyRunning(self):
+        self.compiler_proxy_running = True
+        return False
+
+      def ExecCompilerProxy(self):
+        self.exec_compiler_proxy = True
+
+      def ReadManifest(self, _=None):
+        self.read_manifest = True
+        return {'VERSION': '1'}
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_downloaded = True
+        return 'VERSION=2'
+
+      def AutoUpdate(self):
+        self.auto_update = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.can_auto_update)
+    self.assertTrue(env.read_manifest)
+    self.assertTrue(env.http_downloaded)
+    self.assertTrue(env.auto_update)
+    self.assertTrue(env.compiler_proxy_running)
+    self.assertTrue(env.exec_compiler_proxy)
+
+  def testEnsureStartShouldRestartCompilerProxyIfBinaryHasUpdated(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_running = False
+        self.control_with_quit = False
+        self.control_with_health = False
+        self.control_with_version = False
+        self.get_version = False
+        self.kill_stakeholders = False
+
+      def CompilerProxyRunning(self):
+        self.compiler_proxy_running = True
+        return True
+
+      def GetCompilerProxyVersion(self):
+        self.get_version = True
+        return 'GOMA version This version should not be matched.'
+
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/quitquitquit':
+          self.control_with_quit = True
+        elif command == '/healthz':
+          self.control_with_health = True
+        elif command == '/versionz':
+          self.control_with_version = True
+        elif command == '/flagz':
+          return {'status': True, 'message': ''}
+        else:
+          raise Exception('Unknown command given.')
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+      def KillStakeholders(self):
+        self.kill_stakeholders = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.compiler_proxy_running)
+    self.assertTrue(env.get_version)
+    self.assertTrue(env.control_with_health)
+    self.assertTrue(env.control_with_version)
+    self.assertTrue(env.control_with_quit)
+    self.assertTrue(env.kill_stakeholders)
+
+  def testEnsureStartShouldRestartCompilerProxyIfHealthzFailed(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_running = False
+        self.control_with_quit = False
+        self.control_with_health = False
+        self.control_with_version = False
+        self.get_version = False
+        self.kill_stakeholders = False
+        self.exec_compiler_proxy = False
+        self.status_compiler_proxy_running = True
+        self.status_healthy = False
+
+      def CompilerProxyRunning(self):
+        self.compiler_proxy_running = True
+        return self.status_compiler_proxy_running
+
+      def GetCompilerProxyVersion(self):
+        self.get_version = True
+        return 'GOMA version dummy_version'
+
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/quitquitquit':
+          self.control_with_quit = True
+        elif command == '/healthz':
+          self.control_with_health = True
+          if self.status_healthy:
+            return {'status': True,
+                    'message': 'ok',
+                    'url': 'dummy'}
+          else:
+            return {'status': False,
+                    'message': 'connect failed',
+                    'url': ''}
+        elif command == '/versionz':
+          self.control_with_version = True
+          return {'status': True, 'message': 'dummy_version', 'url': 'fake'}
+        elif command == '/flagz':
+          return {'status': True, 'message': ''}
+        else:
+          raise Exception('Unknown command given.')
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+      def KillStakeholders(self):
+        self.kill_stakeholders = True
+        self.status_compiler_proxy_running = False
+
+      def ExecCompilerProxy(self):
+        self.exec_compiler_proxy = True
+        self.status_compiler_proxy_running = True
+        self.status_healthy = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.compiler_proxy_running)
+    self.assertTrue(env.get_version)
+    self.assertTrue(env.control_with_health)
+    self.assertTrue(env.control_with_version)
+    self.assertTrue(env.control_with_quit)
+    self.assertTrue(env.exec_compiler_proxy)
+    self.assertTrue(env.kill_stakeholders)
+
+  def testEnsureStartShouldRestartIfFlagsAreChanged(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_running = False
+        self.control_with_quit = False
+        self.control_with_health = False
+        self.control_with_version = False
+        self.control_with_flagz = False
+        self.get_version = False
+        self.kill_stakeholders = False
+
+      def CompilerProxyRunning(self):
+        self.compiler_proxy_running = True
+        return True
+
+      def GetCompilerProxyVersion(self):
+        self.get_version = True
+        return 'GOMA version dummy_version'
+
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/quitquitquit':
+          self.control_with_quit = True
+        elif command == '/healthz':
+          self.control_with_health = True
+          return {'status': True,
+                  'message': 'ok',
+                  'url': 'dummy'}
+        elif command == '/versionz':
+          self.control_with_version = True
+          return {'status': True, 'message': 'dummy_version', 'url': 'fake'}
+        elif command == '/flagz':
+          self.control_with_flagz = True
+          return {'status': True, 'message': ''}
+        else:
+          raise Exception('Unknown command given.')
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+      def KillStakeholders(self):
+        self.kill_stakeholders = True
+
+    os.environ['GOMA_TEST'] = 'flag should be different'
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.compiler_proxy_running)
+    self.assertTrue(env.get_version)
+    self.assertTrue(env.control_with_health)
+    self.assertTrue(env.control_with_version)
+    self.assertTrue(env.control_with_quit)
+    self.assertTrue(env.control_with_flagz)
+    self.assertTrue(env.kill_stakeholders)
+
+  def testEnsureStartShouldRestartIfFlagsAreRemoved(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_running = False
+        self.control_with_quit = False
+        self.control_with_health = False
+        self.control_with_version = False
+        self.control_with_flagz = False
+        self.get_version = False
+        self.kill_stakeholders = False
+
+      def CompilerProxyRunning(self):
+        self.compiler_proxy_running = True
+        return True
+
+      def GetCompilerProxyVersion(self):
+        self.get_version = True
+        return 'GOMA version dummy_version'
+
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/quitquitquit':
+          self.control_with_quit = True
+        elif command == '/healthz':
+          self.control_with_health = True
+          return {'status': True,
+                  'message': 'ok',
+                  'url': 'dummy'}
+        elif command == '/versionz':
+          self.control_with_version = True
+          return {'status': True, 'message': 'dummy_version', 'url': 'fake'}
+        elif command == '/flagz':
+          self.control_with_flagz = True
+          return {'status': True, 'message': 'GOMA_TEST=test\n'}
+        else:
+          raise Exception('Unknown command given.')
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+      def KillStakeholders(self):
+        self.kill_stakeholders = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.compiler_proxy_running)
+    self.assertTrue(env.get_version)
+    self.assertTrue(env.control_with_health)
+    self.assertTrue(env.control_with_version)
+    self.assertTrue(env.control_with_quit)
+    self.assertTrue(env.control_with_flagz)
+    self.assertTrue(env.kill_stakeholders)
+
+  def testEnsureStartShouldNotRestartIfFlagsNotChanged(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_running = False
+        self.control_with_quit = False
+        self.control_with_health = False
+        self.control_with_version = False
+        self.control_with_flagz = False
+        self.get_version = False
+        self.kill_stakeholders = False
+
+      def CompilerProxyRunning(self):
+        self.compiler_proxy_running = True
+        return True
+
+      def GetCompilerProxyVersion(self):
+        self.get_version = True
+        return 'GOMA version dummy_version'
+
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/quitquitquit':
+          self.control_with_quit = True
+        elif command == '/healthz':
+          self.control_with_health = True
+          return {'status': True,
+                  'message': 'ok',
+                  'url': 'dummy'}
+        elif command == '/versionz':
+          self.control_with_version = True
+          return {'status': True, 'message': 'dummy_version', 'url': 'fake'}
+        elif command == '/flagz':
+          self.control_with_flagz = True
+          return {'status': True, 'message': ''}
+        else:
+          raise Exception('Unknown command given.')
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+      def KillStakeholders(self):
+        self.kill_stakeholders = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.compiler_proxy_running)
+    self.assertTrue(env.get_version)
+    self.assertTrue(env.control_with_health)
+    self.assertTrue(env.control_with_version)
+    self.assertTrue(env.control_with_flagz)
+    self.assertFalse(env.control_with_quit)
+    self.assertFalse(env.kill_stakeholders)
+
+  def testEnsureStartShouldRestartCompilerProxyIfUnhealthy(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.compiler_proxy_running = False
+        self.control_with_quit = False
+        self.control_with_health = False
+        self.control_with_version = False
+        self.get_version = False
+        self.kill_stakeholders = False
+
+      def CompilerProxyRunning(self):
+        self.compiler_proxy_running = True
+        return True
+
+      def GetCompilerProxyVersion(self):
+        self.get_version = True
+        return 'GOMA version dummy_version'
+
+      def ControlCompilerProxy(self, command, fast=False):
+        if command == '/quitquitquit':
+          self.control_with_quit = True
+        elif command == '/healthz':
+          self.control_with_health = True
+          return {'status': True,
+                  'message': 'running: failed to connect to backend servers',
+                  'url': ''}
+        elif command == '/versionz':
+          self.control_with_version = True
+          return {'status': True, 'message': 'dummy_version', 'url': 'fake'}
+        elif command == '/flagz':
+          return {'status': True, 'message': ''}
+        else:
+          raise Exception('Unknown command given.')
+        return super(SpyGomaEnv, self).ControlCompilerProxy(command, fast)
+
+      def KillStakeholders(self):
+        self.kill_stakeholders = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._EnsureStartCompilerProxy()
+    self.assertTrue(env.compiler_proxy_running)
+    self.assertTrue(env.get_version)
+    self.assertTrue(env.control_with_health)
+    self.assertTrue(env.control_with_version)
+    self.assertTrue(env.control_with_quit)
+    self.assertTrue(env.kill_stakeholders)
+
+  def testFetchShouldBeAbleToReturnMyPackage(self):
+    self._module._GetPackageName(self._platform_specific.GetPlatform())
+
+  def testFetchShouldRaiseIfPackageUnknown(self):
+    self.assertRaises(self._module.ConfigError,
+                      self._module._GetPackageName,
+                      'unknown_package_name')
+
+  def testFetchShouldRun(self):
+    driver = self._module.GomaDriver(FakeGomaEnv(), FakeGomaBackend())
+    driver._args = ['dummy', self._platform_specific.GetPlatform()]
+    driver._Fetch()
+
+  def testFetchShouldOutputToGivenOutputFile(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.http_download = False
+        self.dest = None
+
+      def HttpDownload(self, url,
+                       rewrite_url=None, headers=None, destination_file=None):
+        self.http_download = True
+        self.dest = destination_file
+
+    output_file = 'TEST'
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._args = ['dummy', self._platform_specific.GetPlatform(), output_file]
+    driver._Fetch()
+    self.assertTrue(env.http_download)
+    self.assertTrue(output_file in env.dest,
+                    msg='Seems not output to specified file.')
+
+  def testFetchShouldRaiseIfPlatformNotGiven(self):
+    driver = self._module.GomaDriver(FakeGomaEnv(), FakeGomaBackend())
+    self.assertRaises(self._module.ConfigError, driver._Fetch)
+
+  def testIsCompilerProxySilentlyUpdatedShouldReturnTrueIfVersionMismatch(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.get_version = False
+        self.control_with_version = False
+
+      def GetCompilerProxyVersion(self):
+        self.get_version = True
+        return 'GOMA version fake0'
+
+      def ControlCompilerProxy(self, command, fast=False):
+        self.control_with_version = True
+        if command == '/versionz':
+          return {'status': True, 'message': 'fake1', 'url': 'fake'}
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertTrue(driver._IsCompilerProxySilentlyUpdated())
+    self.assertTrue(env.get_version)
+    self.assertTrue(env.control_with_version)
+
+  def testIsCompilerProxySilentlyUpdatedShouldReturnFalseIfVersionMatch(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.get_version = False
+        self.control_with_version = False
+
+      def GetCompilerProxyVersion(self):
+        self.get_version = True
+        return 'GOMA version fake0'
+
+      def ControlCompilerProxy(self, command, fast=False):
+        self.control_with_version = True
+        if command == '/versionz':
+          return {'status': True, 'message': 'fake0', 'url': 'fake'}
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertFalse(driver._IsCompilerProxySilentlyUpdated())
+    self.assertTrue(env.get_version)
+    self.assertTrue(env.control_with_version)
+
+  def testGetJsonStatusShouldShowErrorStatusOnControlCompilerProxyError(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.control_compiler_proxy = False
+
+      def ControlCompilerProxy(self, command, fast=False):
+        self.control_compiler_proxy = True
+        if command == '/errorz':
+          return {'status': False}
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    printed_json = json.loads(driver._GetJsonStatus())
+    self.assertTrue(env.control_compiler_proxy)
+    self.assertEqual(printed_json['notice'][0]['compile_error'],
+                     'COMPILER_PROXY_UNREACHABLE')
+
+  def testGetJsonStatusShouldShowCompilerProxyReplyAsIsIfAvailable(self):
+    compiler_proxy_output = '{"fake": 0}'
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.control_compiler_proxy = False
+
+      def ControlCompilerProxy(self, command, fast=False):
+        self.control_compiler_proxy = True
+        if command == '/errorz':
+          return {'status': True, 'message': compiler_proxy_output}
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    returned = driver._GetJsonStatus()
+    self.assertTrue(env.control_compiler_proxy)
+    self.assertEqual(returned, compiler_proxy_output)
+
+  def testCreateGomaTmpDirectoryNew(self):
+    fake_tmpdir = '/tmp/gomatest_chrome-bot'
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.is_directory_exist = None
+        self.make_directory = None
+        self.ensure_directory_owned_by_user = None
+
+      def GetGomaTmpDir(self):
+        return fake_tmpdir
+
+      def IsDirectoryExist(self, dirname):
+        self.is_directory_exist = dirname
+        return False
+
+      def MakeDirectory(self, dirname):
+        self.make_directory = dirname
+
+      def EnsureDirectoryOwnedByUser(self, dirname):
+        self.ensure_directory_owned_by_user = dirname
+        return True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    orig_goma_tmp_dir = os.environ.get('GOMA_TMP_DIR')
+    self.assertNotEquals(orig_goma_tmp_dir, fake_tmpdir)
+    driver._CreateGomaTmpDirectory()
+    goma_tmp_dir = os.environ.get('GOMA_TMP_DIR')
+    if orig_goma_tmp_dir:
+      os.environ['GOMA_TMP_DIR'] = orig_goma_tmp_dir
+    else:
+      del os.environ['GOMA_TMP_DIR']
+    self.assertEquals(env.is_directory_exist, fake_tmpdir)
+    self.assertEquals(env.make_directory, fake_tmpdir)
+    self.assertEquals(env.ensure_directory_owned_by_user, None)
+    self.assertEquals(goma_tmp_dir, fake_tmpdir)
+
+  def testCreateGomaTmpDirectoryExists(self):
+    fake_tmpdir = '/tmp/gomatest_chrome-bot'
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.is_directory_exist = None
+        self.make_directory = None
+        self.ensure_directory_owned_by_user = None
+
+      def GetGomaTmpDir(self):
+        return fake_tmpdir
+
+      def IsDirectoryExist(self, dirname):
+        self.is_directory_exist = dirname
+        return True
+
+      def MakeDirectory(self, dirname):
+        self.make_directory = dirname
+
+      def EnsureDirectoryOwnedByUser(self, dirname):
+        self.ensure_directory_owned_by_user = dirname
+        return True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    orig_goma_tmp_dir = os.environ.get('GOMA_TMP_DIR')
+    self.assertNotEquals(orig_goma_tmp_dir, fake_tmpdir)
+    driver._CreateGomaTmpDirectory()
+    goma_tmp_dir = os.environ.get('GOMA_TMP_DIR')
+    if orig_goma_tmp_dir:
+      os.environ['GOMA_TMP_DIR'] = orig_goma_tmp_dir
+    else:
+      del os.environ['GOMA_TMP_DIR']
+    self.assertEquals(env.is_directory_exist, fake_tmpdir)
+    self.assertEquals(env.make_directory, None)
+    self.assertEquals(env.ensure_directory_owned_by_user, fake_tmpdir)
+    self.assertEquals(goma_tmp_dir, fake_tmpdir)
+
+
+  def testCreateCrashDumpDirectoryShouldNotCreateDirectoryIfExist(self):
+    fake_dump_dir = '/dump_dir'
+    expected_dump_dir = fake_dump_dir
+
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.is_directory_exist = False
+        self.is_directory_exist_dir = ''
+        self.ensure_directory_owned_by_user = False
+        self.ensure_directory_owned_by_user_dir = ''
+        self.make_directory = False
+        self.get_crash_dump_directory = False
+
+      def GetCrashDumpDirectory(self):
+        self.get_crash_dump_directory = True
+        return fake_dump_dir
+
+      def IsDirectoryExist(self, dirname):
+        self.is_directory_exist = True
+        self.is_directory_exist_dir = dirname
+        return True
+
+      def EnsureDirectoryOwnedByUser(self, dirname):
+        self.ensure_directory_owned_by_user = True
+        self.ensure_directory_owned_by_user_dir = dirname
+        return True
+
+      def MakeDirectory(self, _):
+        self.make_directory = True
+        return True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._CreateCrashDumpDirectory()
+    self.assertTrue(env.get_crash_dump_directory)
+    self.assertTrue(env.is_directory_exist)
+    self.assertEqual(env.is_directory_exist_dir, expected_dump_dir)
+    self.assertTrue(env.ensure_directory_owned_by_user)
+    self.assertEqual(env.ensure_directory_owned_by_user_dir, expected_dump_dir)
+    self.assertFalse(env.make_directory)
+
+  def testCreateCrashDumpDirectoryShouldCreateDirectoryIfNotExist(self):
+    fake_dump_dir = '/dump_dir'
+    expected_dump_dir = fake_dump_dir
+
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.is_directory_exist = False
+        self.is_directory_exist_dir = ''
+        self.ensure_directory_owned_by_user = False
+        self.make_directory = False
+        self.make_directory_dir = ''
+        self.get_crash_dump_directory = False
+
+      def GetCrashDumpDirectory(self):
+        self.get_crash_dump_directory = True
+        return fake_dump_dir
+
+      def IsDirectoryExist(self, dirname):
+        self.is_directory_exist = True
+        self.is_directory_exist_dir = dirname
+        return False
+
+      def EnsureDirectoryOwnedByUser(self, _):
+        self.ensure_directory_owned_by_user = True
+        return True
+
+      def MakeDirectory(self, dirname):
+        self.make_directory = True
+        self.make_directory_dir = dirname
+        return True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._CreateCrashDumpDirectory()
+    self.assertTrue(env.get_crash_dump_directory)
+    self.assertTrue(env.is_directory_exist)
+    self.assertEqual(env.is_directory_exist_dir, expected_dump_dir)
+    self.assertFalse(env.ensure_directory_owned_by_user)
+    self.assertTrue(env.make_directory)
+    self.assertEqual(env.make_directory_dir, expected_dump_dir)
+
+  def testCreateCacheDirectoryShouldNotCreateDirectoryIfExist(self):
+    fake_cache_dir = '/cache_dir'
+    expected_cache_dir = fake_cache_dir
+
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.is_directory_exist = False
+        self.is_directory_exist_dir = ''
+        self.ensure_directory_owned_by_user = False
+        self.ensure_directory_owned_by_user_dir = ''
+        self.make_directory = False
+        self.get_cache_directory = False
+
+      def GetCacheDirectory(self):
+        self.get_cache_directory = True
+        return fake_cache_dir
+
+      def IsDirectoryExist(self, dirname):
+        self.is_directory_exist = True
+        self.is_directory_exist_dir = dirname
+        return True
+
+      def EnsureDirectoryOwnedByUser(self, dirname):
+        self.ensure_directory_owned_by_user = True
+        self.ensure_directory_owned_by_user_dir = dirname
+        return True
+
+      def MakeDirectory(self, _):
+        self.make_directory = True
+        return True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._CreateCacheDirectory()
+    self.assertTrue(env.get_cache_directory)
+    self.assertTrue(env.is_directory_exist)
+    self.assertEqual(env.is_directory_exist_dir, expected_cache_dir)
+    self.assertTrue(env.ensure_directory_owned_by_user)
+    self.assertEqual(env.ensure_directory_owned_by_user_dir,
+                     expected_cache_dir)
+    self.assertFalse(env.make_directory)
+
+  def testCreateCacheDirectoryShouldCreateDirectoryIfNotExist(self):
+    fake_cache_dir = 'cache_dir'
+    expected_cache_dir = fake_cache_dir
+
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.is_directory_exist = False
+        self.is_directory_exist_dir = ''
+        self.ensure_directory_owned_by_user = False
+        self.make_directory = False
+        self.make_directory_dir = ''
+        self.get_cache_directory = False
+
+      def GetCacheDirectory(self):
+        self.get_cache_directory = True
+        return fake_cache_dir
+
+      def IsDirectoryExist(self, dirname):
+        self.is_directory_exist = True
+        self.is_directory_exist_dir = dirname
+        return False
+
+      def EnsureDirectoryOwnedByUser(self, _):
+        self.ensure_directory_owned_by_user = True
+        return True
+
+      def MakeDirectory(self, dirname):
+        self.make_directory = True
+        self.make_directory_dir = dirname
+        return True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    driver._CreateCacheDirectory()
+    self.assertTrue(env.get_cache_directory)
+    self.assertTrue(env.is_directory_exist)
+    self.assertEqual(env.is_directory_exist_dir, expected_cache_dir)
+    self.assertFalse(env.ensure_directory_owned_by_user)
+    self.assertTrue(env.make_directory)
+
+  def testValidFilesShouldReturnFalseIfOneFileMagicIsNotValid(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      """Spy GomaEnv to provide IsValidMagic."""
+
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.written_manifest = {}
+        self.is_valid_magic = False
+
+      def IsValidMagic(self, filename):
+        sys.stderr.write(filename)
+        if filename.endswith('wrong_magic'):
+          self.is_valid_magic = True
+          return False
+        return True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertFalse(driver._ValidFiles(['test', 'wrong_magic']))
+    self.assertTrue(env.is_valid_magic)
+
+  def testAuditShouldReturnTrueForEmptyJSON(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.load_checksum = False
+        self.calculate_checksum = False
+
+      def LoadChecksum(self, update_dir=''):
+        self.load_checksum = True
+        return {}
+
+      def CalculateChecksum(self, _, update_dir=''):
+        self.calculate_checksum = True
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertTrue(driver._Audit())
+    self.assertTrue(env.load_checksum)
+    self.assertFalse(env.calculate_checksum)
+
+  def testAuditShouldReturnTrueForValidChecksum(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.load_checksum = False
+        self.calculate_checksum = False
+
+      def LoadChecksum(self, update_dir=''):
+        self.load_checksum = True
+        return {'compiler_proxy': 'valid_checksum'}
+
+      def CalculateChecksum(self, filename, update_dir=''):
+        self.calculate_checksum = True
+        assert filename == 'compiler_proxy'
+        return 'valid_checksum'
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertTrue(driver._Audit())
+    self.assertTrue(env.load_checksum)
+    self.assertTrue(env.calculate_checksum)
+
+  def testAuditShouldReturnFalseForInvalidChecksum(self):
+    class SpyGomaEnv(FakeGomaEnv):
+      def __init__(self):
+        super(SpyGomaEnv, self).__init__()
+        self.load_checksum = False
+        self.calculate_checksum = False
+
+      def LoadChecksum(self, update_dir=''):
+        self.load_checksum = True
+        return {'compiler_proxy': 'valid_checksum'}
+
+      def CalculateChecksum(self, filename, update_dir=''):
+        self.calculate_checksum = True
+        assert filename == 'compiler_proxy'
+        return 'invalid_checksum'
+
+    env = SpyGomaEnv()
+    driver = self._module.GomaDriver(env, FakeGomaBackend())
+    self.assertFalse(driver._Audit())
+    self.assertTrue(env.load_checksum)
+    self.assertTrue(env.calculate_checksum)
+
+
+class GomaEnvTest(GomaCtlTestCommon):
+  """Medium tests for GomaEnv in goma_ctl.py.
+
+  Some tests in this class may affect external environment.
+  """
+  # test should be able to access protected members and variables.
+  # pylint: disable=W0212
+
+  def testSetupEnvShouldAutomaticallySetProxyParamIfGomaProxyEnvIsSet(self):
+    os.environ['GOMA_PROXY_HOST'] = 'proxy.example.com'
+    os.environ['GOMA_PROXY_PORT'] = '3128'
+    env = self._module.GomaEnv()
+    self.assertEqual(env._https_proxy, 'proxy.example.com:3128')
+
+  def testSetupEnvShouldAutomaticallySetProxyParamIfHttpProxyEnvIsSet(self):
+    os.environ['https_proxy'] = 'proxy.example.com:3128'
+    env = self._module.GomaEnv()
+    self.assertEqual(env._https_proxy, 'proxy.example.com:3128')
+
+  def testSetupEnvShouldNotSetProxyParamIfGomaProxyEnvIsBroken(self):
+    os.environ['GOMA_PROXY_HOST'] = 'proxy.example.com'
+    os.environ['GOMA_PROXY_PORT'] = '3128'
+    env = self._module.GomaEnv()
+    self.assertEqual(env._https_proxy, 'proxy.example.com:3128')
+
+  def testBackupCurrentPackageShouldCreateBackup(self):
+    env = self._module.GomaEnv()
+    env.BackupCurrentPackage()
+    self.assertTrue(os.path.isfile(os.path.join(env._dir, 'backup',
+                                                'goma_ctl.py')))
+
+  def testBackupCurrentPackageShouldOverwriteBackupDirectory(self):
+    dummy_data = 'Hello! This is dummy data.'
+    env = self._module.GomaEnv()
+    self.assertFalse(os.path.isfile(os.path.join(env._dir, 'backup')))
+    env.MakeDirectory('backup')
+    dummy_goma_ctl_path = os.path.join(env._dir, 'backup', 'goma_ctl.py')
+    with open(dummy_goma_ctl_path, 'w') as f:
+      f.write(dummy_data)
+    self.assertEqual(open(dummy_goma_ctl_path).read(), dummy_data)
+    env.BackupCurrentPackage()
+    self.assertTrue(os.path.isfile(dummy_goma_ctl_path))
+    self.assertNotEqual(open(dummy_goma_ctl_path).read(), dummy_data)
+
+  def testRollbackShouldRollbackUpdate(self):
+    env = self._module.GomaEnv()
+    test_file_tuple = tempfile.mkstemp(dir=env._dir)
+    os.close(test_file_tuple[0])
+    test_file = test_file_tuple[1]
+    with open(test_file, 'w') as f:
+      f.write('before')
+    self.assertTrue(os.path.isfile(test_file))
+    env.BackupCurrentPackage()
+    with open(os.path.join(env._dir, test_file), 'w') as f:
+      f.write('after')
+    self.assertEqual(open(test_file).read(), 'after')
+    env.RollbackUpdate()
+    self.assertEqual(open(test_file).read(), 'before')
+
+  def testRollbackShouldRollbackVeryLongFileName(self):
+    env = self._module.GomaEnv()
+    long_suffix = '.' + 'Aa0-' * 16
+    long_prefix = 'Aa0-' * 16
+    # File name should be at least 128 charactors.
+    test_file_tuple = tempfile.mkstemp(suffix=long_suffix, prefix=long_prefix,
+                                       dir=env._dir)
+    os.close(test_file_tuple[0])
+    test_file = test_file_tuple[1]
+    self.assertTrue(len(test_file) > 128,
+                    msg='assuming at least 128 charactors filename.')
+    with open(test_file, 'w') as f:
+      f.write('before')
+    self.assertTrue(os.path.isfile(test_file))
+    env.BackupCurrentPackage()
+    with open(os.path.join(env._dir, test_file), 'w') as f:
+      f.write('after')
+    self.assertEqual(open(test_file).read(), 'after')
+    env.RollbackUpdate()
+    self.assertEqual(open(test_file).read(), 'before')
+
+  def testRollbackShouldNotDieEvenIfOriginalContainsDirectory(self):
+    env = self._module.GomaEnv()
+    tmp_dir = tempfile.mkdtemp(dir=env._dir)
+    self.assertTrue(os.path.isdir(os.path.join(env._dir, tmp_dir)))
+    env.BackupCurrentPackage()
+    env.RollbackUpdate()
+
+  def testRollbackShouldRecreateRemovedDirectory(self):
+    env = self._module.GomaEnv()
+    tmp_dir = tempfile.mkdtemp(dir=env._dir)
+    self.assertTrue(os.path.isdir(os.path.join(env._dir, tmp_dir)))
+    env.BackupCurrentPackage()
+    env.RemoveDirectory(tmp_dir)
+    self.assertFalse(os.path.isdir(os.path.join(env._dir, tmp_dir)))
+    env.RollbackUpdate()
+    self.assertTrue(os.path.isdir(os.path.join(env._dir, tmp_dir)))
+
+  def testShouldSetPlatformEnvIfPlatformNotInManifest(self):
+    os.environ['PLATFORM'] = 'goobuntu'
+    self.assertTrue(os.environ.get('PLATFORM'))
+    env = self._module.GomaEnv()
+    self.assertFalse(os.path.exists(os.path.join(env._dir, 'MANIFEST')))
+    self.assertEqual(env._platform, 'goobuntu')
+
+  def testShouldPreferPlatformInManifestToEnv(self):
+    os.environ['PLATFORM'] = 'goobuntu'
+    self.assertTrue(os.environ.get('PLATFORM'))
+    manifest_file = os.path.join(self._tmp_dir, self._TMP_SUBDIR_NAME,
+                                 'MANIFEST')
+    with open(manifest_file, 'w') as f:
+      f.write('PLATFORM=chromeos')
+    env = self._module.GomaEnv()
+    self.assertTrue(os.path.exists(os.path.join(env._dir, 'MANIFEST')))
+    self.assertEqual(env._platform, 'chromeos')
+
+  def testIsValidMagicShouldBeTrueForValidManifest(self):
+    filename = os.path.join(self._tmp_dir, self._TMP_SUBDIR_NAME, 'MANIFEST')
+    with open(filename, 'w') as f:
+      f.write('PLATFORM=goobuntu\nVERSION=1')
+    env = self._module.GomaEnv()
+    self.assertTrue(env.IsValidMagic(filename))
+
+  def testIsValidMagicShouldBeFalseForValidManifest(self):
+    filename = os.path.join(self._tmp_dir, self._TMP_SUBDIR_NAME, 'MANIFEST')
+    with open(filename, 'w') as f:
+      f.write('invalid magic')
+    env = self._module.GomaEnv()
+    self.assertFalse(env.IsValidMagic(filename))
+
+  def testGeneratedChecksumShouldBeValid(self):
+    env = self._module.GomaEnv()
+    cksums = env.LoadChecksum()
+    self.assertTrue(cksums)
+    for filename, checksum in cksums.iteritems():
+      self.assertEqual(env.CalculateChecksum(filename), checksum)
+
+  def testIsOldFileShouldReturnTrueForOldFile(self):
+    filename = os.path.join(self._tmp_dir, self._TMP_SUBDIR_NAME, 'test')
+    with open(filename, 'w') as f:
+      f.write('test')
+    env = self._module.GomaEnv()
+    env._time = time.time() + 120
+    os.environ['GOMA_LOG_CLEAN_INTERVAL'] = '1'
+    self.assertTrue(env.IsOldFile(filename))
+
+  def testIsOldFileShouldReturnFalseIfAFileIsNew(self):
+    filename = os.path.join(self._tmp_dir, self._TMP_SUBDIR_NAME, 'test')
+    with open(filename, 'w') as f:
+      f.write('test')
+    env = self._module.GomaEnv()
+    env._time = time.time()
+    os.environ['GOMA_LOG_CLEAN_INTERVAL'] = '60'
+    self.assertFalse(env.IsOldFile(filename))
+
+  def testIsOldFileShouldReturnFalseIfLogCleanIntervalIsNegative(self):
+    filename = os.path.join(self._tmp_dir, self._TMP_SUBDIR_NAME, 'test')
+    with open(filename, 'w') as f:
+      f.write('test')
+    env = self._module.GomaEnv()
+    env._time = time.time() + 120
+    os.environ['GOMA_LOG_CLEAN_INTERVAL'] = '-1'
+    self.assertFalse(env.IsOldFile(filename))
+
+  def testFindCurlShouldFindCurl(self):
+    env = self._module._GOMA_ENVS[os.name]()
+    curl_path = env._FindCurlPath()
+    self.assertTrue(curl_path)
+    if os.name == 'nt':
+      self.assertTrue('\\depot_tools\\' in curl_path)
+
+  def testMakeDirectory(self):
+    env = self._module._GOMA_ENVS[os.name]()
+    tmpdir = tempfile.mkdtemp()
+    os.rmdir(tmpdir)
+    self.assertFalse(os.path.exists(tmpdir))
+    env.MakeDirectory(tmpdir)
+    self.assertTrue(os.path.isdir(tmpdir))
+    if os.name != 'nt':
+      st = os.stat(tmpdir)
+      self.assertEquals(st.st_uid, os.geteuid())
+      self.assertEquals((st.st_mode & 077), 0)
+    os.rmdir(tmpdir)
+
+  def testGetGomaTmpDir(self):
+    env = self._module._GOMA_ENVS[os.name]()
+    isdir = os.path.isdir
+    os.path.isdir = lambda dirname: True
+    oenv = os.environ.copy()
+    for tmp in ('GOMA_TMP_DIR', 'TEST_TMPDIR', 'TMPDIR', 'TMP'):
+      if tmp in os.environ:
+        del os.environ[tmp]
+    fake_user = 'chrome-bot'
+    self._module._GetUsername = lambda: fake_user
+    self._module._GetUserRuntimeDirectory = lambda: None
+    try:
+      if os.name == 'nt':
+        testcases = ((None, None,
+                      os.path.join('/tmp', 'goma')),
+                     ('GOMA_TMP_DIR', 'c:\\tmp\\goma', 'c:\\tmp\\goma'),
+                     ('TEST_TMPDIR', 'c:\\tmp\\test',
+                      os.path.join('c:\\tmp\\test', 'goma')),
+                     ('TMPDIR', 'c:\\tmp',
+                      os.path.join('c:\\tmp', 'goma')),
+                     ('TMP', 'c:\\tmp',
+                      os.path.join('c:\\tmp', 'goma')))
+      else:
+        testcases = ((None, None,
+                      os.path.join('/tmp', 'goma_chrome-bot')),
+                     ('GOMA_TMP_DIR', '/tmp/goma', '/tmp/goma'),
+                     ('TEST_TMPDIR', '/tmp/test',
+                      os.path.join('/tmp/test', 'goma_chrome-bot')),
+                     ('TMPDIR', '/var/tmp',
+                      os.path.join('/var/tmp', 'goma_chrome-bot')),
+                     ('TMP', '/var/tmp',
+                      os.path.join('/var/tmp', 'goma_chrome-bot')))
+      for (envname, envval, expected) in testcases:
+        if envname:
+          os.environ[envname] = envval
+        try:
+          tmpdir = env.GetGomaTmpDir()
+          self.assertEqual(tmpdir, expected)
+        finally:
+          if envname:
+            del os.environ[envname]
+
+    finally:
+      os.path.isdir = isdir
+      for (k, v) in oenv.iteritems():
+        os.environ[k] = v
+
+  def testEnsureDirectoryOwnedByUser(self):
+    tmpdir = tempfile.mkdtemp()
+    env = self._module._GOMA_ENVS[os.name]()
+    if os.name == 'nt':
+      self.assertTrue(env.EnsureDirectoryOwnedByUser(tmpdir))
+      os.rmdir(tmpdir)
+      return
+    self._module._GetUserRuntimeDirectory = lambda: None
+    # test only permissions will not have readable/writable for group/other.
+    os.chmod(tmpdir, 0755)
+    st = os.stat(tmpdir)
+    self.assertEquals(st.st_uid, os.geteuid())
+    self.assertNotEquals((st.st_mode & 077), 0)
+    self.assertTrue(env.EnsureDirectoryOwnedByUser(tmpdir))
+    self.assertTrue(os.path.isdir(tmpdir))
+    st = os.stat(tmpdir)
+    self.assertEquals(st.st_uid, os.geteuid())
+    self.assertEquals((st.st_mode & 077), 0)
+    os.rmdir(tmpdir)
+
+  def testUserRuntimeDirectoryIsPreferred(self):
+    if os.name == 'nt':  # This test is not meaningful on nt.
+      return
+    fake_user = 'chrome-bot'
+    fake_dir = '/run/user/1000'
+    self._module._GetUsername = lambda: fake_user
+    self._module._GetUserRuntimeDirectory = lambda: fake_dir
+
+    oenv = os.environ.copy()
+    try:
+      if 'GOMA_TMP_DIR' in os.environ:
+        del os.environ['GOMA_TMP_DIR']
+
+      env = self._module._GOMA_ENVS[os.name]()
+      tmpdir = env.GetGomaTmpDir()
+      self.assertEqual(tmpdir, os.path.join(fake_dir, 'goma_%s' % fake_user))
+    finally:
+      for (k, v) in oenv.iteritems():
+        os.environ[k] = v
+
+  def testCreateCacheDirectoryShouldUseDefaultIfNoEnv(self):
+    fake_tmp_dir = '/fake_tmp'
+    expected_cache_dir = os.path.join(
+        fake_tmp_dir, self._module._CACHE_DIR)
+
+    env = self._module._GOMA_ENVS[os.name]()
+    env.GetGomaTmpDir = lambda: fake_tmp_dir
+    self.assertEqual(env.GetCacheDirectory(), expected_cache_dir)
+
+  def testCreateCacheDirectoryShouldRespectCacheDirEnv(self):
+    fake_tmp_dir = '/fake_tmp'
+    fake_cache_dir = '/fake_cache_dir'
+    expected_cache_dir = fake_cache_dir
+
+    env = self._module._GOMA_ENVS[os.name]()
+    env.GetGomaTmpDir = lambda: fake_tmp_dir
+    try:
+      backup = os.environ.get('GOMA_CACHE_DIR')
+      os.environ['GOMA_CACHE_DIR'] = fake_cache_dir
+      self.assertEqual(env.GetCacheDirectory(), expected_cache_dir)
+    finally:
+      if backup:
+        os.environ['GOMA_CACHE_DIR'] = backup
+      else:
+        del os.environ['GOMA_CACHE_DIR']
+
+
+class GomaCtlLargeTestCommon(GomaCtlTestCommon):
+  """Large tests for goma_ctl.py.
+
+  All tests in this class may affect external environment.  It may try to
+  download packages from servers and I/O local files in test environment.
+  """
+  # test should be able to access protected members and variables.
+  # pylint: disable=W0212
+
+  def __init__(self, method_name, goma_ctl_path, platform_specific,
+               oauth2_file, port):
+    """Initialize GomaCtlTest.
+
+    Args:
+      method_name: a string of test method name to execute.
+      goma_ctl_path: a string of goma directory name.
+      platform_specific: a object for providing platform specific behavior.
+      oauth2_file: a string of OAuth2 service account JSON filename.
+      port: a string or an integer port number of compiler_proxy.
+    """
+    super(GomaCtlLargeTestCommon, self).__init__(method_name, goma_ctl_path,
+                                                 platform_specific)
+    self._oauth2_file = oauth2_file
+    self._port = int(port)
+    self._driver = None
+
+  def setUp(self):
+    super(GomaCtlLargeTestCommon, self).setUp()
+    self._platform_specific.SetCompilerProxyEnv(self._tmp_dir, self._port)
+
+  def tearDown(self):
+    if self._driver:
+      self._driver._ShutdownCompilerProxy()
+      if not self._driver._WaitCooldown():
+        self._driver._KillStakeholders()
+    super(GomaCtlLargeTestCommon, self).tearDown()
+
+  def StartWithModifiedVersion(self, version=None):
+    """Start compiler proxy with modified version.
+
+    Since start-up method is overwritten with dummy method, we do not need
+    to stop the compiler proxy.
+
+    Args:
+      version: current version to be written.
+    """
+    driver = self._module.GetGomaDriver()
+    manifest = {}
+    if version:
+      manifest['VERSION'] = version
+      # Not goma_ctl to ask the platform, let me put 'PLATFORM' param here.
+      manifest['PLATFORM'] = self._platform_specific.GetPlatform()
+      driver._env.WriteManifest(manifest)
+    driver = self._module.GetGomaDriver()
+    # Put fake methods instead of actual one to improve performance of tests.
+    driver._env.GetCompilerProxyVersion = lambda dummy = None: 'dummy'
+    driver._env.ExecCompilerProxy = lambda dummy = None: True
+    driver._env.ControlCompilerProxy = lambda dummy = None, fast=False: {
+        'status': True, 'message': 'msg', 'url': 'url'}
+    driver._env.CompilerProxyRunning = lambda dummy = None: True
+    driver._StartCompilerProxy()
+
+  def testPullShouldDownloadAndUpdateManifest(self):
+    driver = self._module.GetGomaDriver()
+    driver._env._platform = self._platform_specific.GetPlatform()
+    driver._Pull()
+    manifest = driver._env.ReadManifest(driver._latest_package_dir)
+    self.assertTrue(manifest)
+    self.assertTrue('PLATFORM' in manifest)
+    self.assertEqual(manifest['PLATFORM'],
+                     self._platform_specific.GetPlatform())
+    self.assertTrue('VERSION' in manifest)
+
+  def testUpdateShouldUpdateManifestAndCompilerProxyButNotAutoRunIt(self):
+    """We expect 'update' command updates compiler proxy and manifest.
+
+    However, we do not expect it automatically run compiler proxy if it did not
+    run before.
+    """
+    driver = self._module.GetGomaDriver()
+    old_timestamp = os.stat(os.path.join(
+        self._tmp_dir, self._TMP_SUBDIR_NAME,
+        driver._env._COMPILER_PROXY)).st_mtime
+    driver._env._platform = self._platform_specific.GetPlatform()
+    self.assertFalse(driver._env.ReadManifest())
+    self.assertFalse(driver._env.CompilerProxyRunning())
+    driver._Update()
+    manifest = driver._env.ReadManifest()
+    self.assertTrue(manifest)
+    self.assertTrue('PLATFORM' in manifest)
+    self.assertTrue('VERSION' in manifest)
+    new_timestamp = os.stat(os.path.join(
+        self._tmp_dir, self._TMP_SUBDIR_NAME,
+        driver._env._COMPILER_PROXY)).st_mtime
+    self.assertNotEqual(old_timestamp, new_timestamp,
+                        msg=('Update should update the compiler proxy.'
+                             'old: %d, new: %d' % (old_timestamp,
+                                                   new_timestamp)))
+    self.assertFalse(driver._env.CompilerProxyRunning())
+
+  def testUpdateShouldUpdateCompilerProxyAndRestartIfItIsRunning(self):
+    self._driver = self._module.GetGomaDriver()
+    old_timestamp = os.stat(os.path.join(
+        self._tmp_dir, self._TMP_SUBDIR_NAME,
+        self._driver._env._COMPILER_PROXY)).st_mtime
+    self._driver._env._platform = self._platform_specific.GetPlatform()
+    try:
+      self._driver._StartCompilerProxy()
+      self._driver._Update()
+      # Check compiler proxy restarted.
+      self.assertTrue(self._driver._env.CompilerProxyRunning())
+    finally:
+      self._driver._ShutdownCompilerProxy()
+      self._driver._WaitCooldown()
+    new_timestamp = os.stat(os.path.join(
+        self._tmp_dir, self._TMP_SUBDIR_NAME,
+        self._driver._env._COMPILER_PROXY)).st_mtime
+    self.assertNotEqual(old_timestamp, new_timestamp,
+                        msg=('Update should update the compiler proxy.'
+                             'old: %d, new: %d' % (old_timestamp,
+                                                   new_timestamp)))
+
+  def testAutoUpdateShouldUpdateManifest(self):
+    self.StartWithModifiedVersion(version=1)
+    driver = self._module.GetGomaDriver()
+    manifest = driver._env.ReadManifest()
+    self.assertTrue(manifest)
+    self.assertTrue('PLATFORM' in manifest)
+    self.assertTrue('VERSION' in manifest)
+    self.assertNotEqual(manifest['VERSION'], '1')
+
+  def testShouldNotAutoUpdateNoAutoUpdate(self):
+    # Put no_auto_update file.
+    no_auto_update_path = os.path.join(self._tmp_dir, self._TMP_SUBDIR_NAME,
+                                       'no_auto_update')
+    with open(no_auto_update_path, 'w') as handler:
+      handler.write('dummy')
+    self.StartWithModifiedVersion(version=1)
+    # Confirm manifest not changed.
+    driver = self._module.GetGomaDriver()
+    manifest = driver._env.ReadManifest()
+    self.assertTrue(manifest)
+    self.assertTrue('PLATFORM' in manifest)
+    self.assertTrue('VERSION' in manifest)
+    self.assertEqual(manifest['VERSION'], '1')
+
+  def testShouldNotAutoUpdateNoVersionInManifest(self):
+    # Manifest is empty by default.
+    driver = self._module.GetGomaDriver()
+    manifest = driver._env.ReadManifest()
+    self.assertFalse(manifest)
+    self.StartWithModifiedVersion()
+    manifest = driver._env.ReadManifest()
+    self.assertFalse(manifest)
+
+  def testAutoUpdateShouldUpdateCompilerProxyEvenIfItIsRunning(self):
+    # Start compiler proxy first.
+    driver0 = self._module.GetGomaDriver()
+    driver0._StartCompilerProxy()
+
+    # Make version in manifest old.
+    manifest = driver0._env.ReadManifest()
+    manifest['VERSION'] = '1'
+    manifest['PLATFORM'] = self._platform_specific.GetPlatform()
+    driver0._env.WriteManifest(manifest)
+
+    # Save the current compiler_proxy timestamp.
+    old_timestamp = os.stat(os.path.join(
+        self._tmp_dir, self._TMP_SUBDIR_NAME,
+        driver0._env._COMPILER_PROXY)).st_mtime
+    self._driver = self._module.GetGomaDriver()
+    self._driver._env._platform = self._platform_specific.GetPlatform()
+    try:
+      self._driver._StartCompilerProxy()
+    finally:
+      self._driver._ShutdownCompilerProxy()
+      self._driver._WaitCooldown()
+      driver0._ShutdownCompilerProxy()
+      driver0._WaitCooldown()
+
+    # Time stamp should be changed.
+    new_timestamp = os.stat(os.path.join(
+        self._tmp_dir, self._TMP_SUBDIR_NAME,
+        self._driver._env._COMPILER_PROXY)).st_mtime
+    self.assertNotEqual(old_timestamp, new_timestamp,
+                        msg=('Update should update the compiler proxy.'
+                             'old: %d, new: %d' % (old_timestamp,
+                                                   new_timestamp)))
+
+  def testFetchShouldDownloadPackage(self):
+    # Get list of supported platforms.
+    platforms = []
+    for goma_env in self._module._GOMA_ENVS.values():
+      platforms.extend([x[1] for x in goma_env.PLATFORM_CANDIDATES])
+
+    # Check packages for them can be downloaded.
+    driver = self._module.GetGomaDriver()
+    for platform in platforms:
+      filename = os.path.join(self._tmp_dir, platform)
+      driver._args = ['dummy', platform, filename]
+      driver._Fetch()
+      self.assertTrue(os.path.isfile(filename))
+
+  def testEnsureShouldRestartCompilerProxyIfBinarySilentlyChanged(self):
+    if isinstance(self._platform_specific, WindowsSpecific):
+      return  # Windows cannot proceed this test.
+
+    # Start compiler proxy first.
+    driver0 = self._module.GetGomaDriver()
+    driver0._env._platform = self._platform_specific.GetPlatform()
+    driver0._StartCompilerProxy()
+
+    # binary update.
+    driver0._env.IsGomaInstalledBefore = lambda dummy = None: False
+    driver0._Pull()
+    driver0._UpdatePackage()
+    before_version = driver0._env.ControlCompilerProxy('/versionz')['message']
+    after_version = None
+
+    # start latter compiler_proxy.
+    self._driver = self._module.GetGomaDriver()
+    self._driver._env._platform = self._platform_specific.GetPlatform()
+    try:
+      self._driver._EnsureStartCompilerProxy()
+      after_version = self._driver._env.ControlCompilerProxy(
+          '/versionz')['message']
+    finally:
+      self._driver._ShutdownCompilerProxy()
+      self._driver._WaitCooldown()
+      driver0._ShutdownCompilerProxy()
+      driver0._WaitCooldown()
+
+    self.assertTrue(after_version)
+    self.assertNotEquals(before_version, after_version)
+  # TODO: test not silently updated case.
+
+  def testEnsureShouldWorkWithoutFuserCommand(self):
+    if isinstance(self._platform_specific, WindowsSpecific):
+      return  # Windows don't need this test.
+
+    self._driver = self._module.GetGomaDriver()
+    self._driver._env._platform = self._platform_specific.GetPlatform()
+    if not self._driver._env._GetFuserPath():
+      return  # No need to run this test.
+    self._driver._env._GetFuserPath = lambda dummy = None: ''
+    try:
+      self.assertFalse(self._driver._env.CompilerProxyRunning())
+      self._driver._EnsureStartCompilerProxy()
+      self.assertTrue(self._driver._env.CompilerProxyRunning())
+    finally:
+      self._driver._ShutdownCompilerProxy()
+      self._driver._WaitCooldown()
+
+  def testMultipleCompilerProxyInstancesRuns(self):
+    if isinstance(self._platform_specific, WindowsSpecific):
+      return  # Windows don't support this feature.
+
+    self._driver = self._module.GetGomaDriver()
+    self._driver._env._platform = self._platform_specific.GetPlatform()
+    try:
+      self.assertFalse(self._driver._env.CompilerProxyRunning())
+      self._driver._EnsureStartCompilerProxy()
+      self.assertTrue(self._driver._env.CompilerProxyRunning())
+
+      prev_envs = {}
+      try:
+        envs = [
+            'GOMA_COMPILER_PROXY_PORT',
+            'GOMA_COMPILER_PROXY_SOCKET_NAME',
+            'GOMA_COMPILER_PROXY_LOCK_FILENAME']
+        for env in envs:
+          prev_envs[env] = os.environ.get(env)
+
+        os.environ['GOMA_COMPILER_PROXY_PORT'] = str(int(self._port) + 1)
+        os.environ['GOMA_COMPILER_PROXY_SOCKET_NAME'] = 'goma.ipc_test'
+        os.environ['GOMA_COMPILER_PROXY_LOCK_FILENAME'] = (
+            '/tmp/goma_compiler_proxy.lock.test')
+        self.assertFalse(self._driver._env.CompilerProxyRunning())
+        self._driver._EnsureStartCompilerProxy()
+        self.assertTrue(self._driver._env.CompilerProxyRunning())
+      finally:
+        self._driver._ShutdownCompilerProxy()
+        self._driver._WaitCooldown()
+        for key, value in prev_envs.items():
+          if value:
+            os.environ[key] = value
+
+    finally:
+      self._driver._ShutdownCompilerProxy()
+      self._driver._WaitCooldown()
+
+  def testPullShouldNotUpdateInSecondTime(self):
+    driver = self._module.GetGomaDriver()
+    driver._env._platform = self._platform_specific.GetPlatform()
+    driver._Pull()
+    latest_dir = os.path.join(driver._env._dir, driver._latest_package_dir)
+
+    mtime = time.time() - 5
+    stat_dict = {}
+    for f in os.listdir(latest_dir):
+      # change modification time of files in |latest_dir| to check update
+      f_path = os.path.join(latest_dir, f)
+      os.utime(f_path, (mtime, mtime))
+      stat_dict[f] = os.stat(f_path)
+
+    driver._Pull()
+    for f in os.listdir(latest_dir):
+      if f == 'MANIFEST':
+        # In goma_ctl.py pull, we update timestamp of MANIFEST in latest_dir
+        # to skip frequent update check in goma_ctl.py ensure_start.
+        self.assertNotEqual(stat_dict[f].st_mtime,
+                            os.stat(os.path.join(latest_dir, f)).st_mtime)
+      else:
+        self.assertEqual(stat_dict[f].st_mtime,
+                         os.stat(os.path.join(latest_dir, f)).st_mtime)
+
+  def testPullShouldUpdateIfFilesAreNotExist(self):
+    driver = self._module.GetGomaDriver()
+    driver._env._platform = self._platform_specific.GetPlatform()
+    driver._Pull()
+    latest_dir = os.path.join(driver._env._dir, driver._latest_package_dir)
+    files = os.listdir(latest_dir)
+    for f in files:
+      # If we make broken manifest, driver's _DownloadedVersion() returns 0.
+      # In that case, _ShouldUpdate become true, and file check will not be
+      # executed in _Update function.
+      # Note that test for this case is testPullShouldUpdateIfManifestIsBroken.
+      if f == 'MANIFEST':
+        continue
+      os.remove(os.path.join(latest_dir, f))
+
+    driver._Pull()
+    for f in files:
+      self.assertTrue(os.path.exists(os.path.join(latest_dir, f)))
+
+  def testPullShouldUpdateIfFilesAreBroken(self):
+    driver = self._module.GetGomaDriver()
+    driver._env._platform = self._platform_specific.GetPlatform()
+    driver._Pull()
+    latest_dir = os.path.join(driver._env._dir, driver._latest_package_dir)
+    msg = 'broken'
+    files = os.listdir(latest_dir)
+    for f in files:
+      # If we make broken manifest, driver's _DownloadedVersion() returns 0.
+      # In that case, _ShouldUpdate become true, and file check will not be
+      # executed in _Update function.
+      # Note that test for this case is testPullShouldUpdateIfManifestIsBroken.
+      if f == 'MANIFEST':
+        continue
+      with open(os.path.join(latest_dir, f), 'w') as f:
+        f.write(msg)
+    # Confirms the files are broken.
+    for f in files:
+      # ditto.
+      if f == 'MANIFEST':
+        continue
+      with open(os.path.join(latest_dir, f)) as f:
+        self.assertEqual(f.read(), msg)
+
+    driver._Pull()
+    for f in files:
+      with open(os.path.join(latest_dir, f)) as f:
+        self.assertNotEqual(f.read(), msg)
+
+
+class GomaCtlLargeClients5Test(GomaCtlLargeTestCommon):
+  """Large Clients5 tests for goma_ctl.py."""
+
+  def setUp(self):
+    super(GomaCtlLargeClients5Test, self).setUp()
+
+    os.environ['GOMA_SERVICE_ACCOUNT_JSON_FILE'] = self._oauth2_file
+    sys.stderr.write(
+        'Using GOMA_SERVICE_ACCOUNT_JSON_FILE = %s\n' % self._oauth2_file)
+
+
+def GetParameterizedTestSuite(klass, **kwargs):
+  test_loader = unittest.TestLoader()
+  test_names = test_loader.getTestCaseNames(klass)
+  suite = unittest.TestSuite()
+  for name in test_names:
+    suite.addTest(klass(name, **kwargs))
+  return suite
+
+
+def main():
+  test_dir = os.path.abspath(os.path.dirname(__file__))
+  os.chdir(os.path.join(test_dir, '..'))
+
+  option_parser = optparse.OptionParser()
+  option_parser.add_option('--goma-dir', default=None,
+                           help='absolute or relative to goma top dir')
+  option_parser.add_option('--platform', help='goma platform type.',
+                           choices=['goobuntu', 'chromeos', 'mac', 'win64'])
+  option_parser.add_option('--goma-service-account-json-file',
+                           help='goma service account JSON file')
+  option_parser.add_option('--small', action='store_true',
+                           help='Check small tests only.')
+  option_parser.add_option('--verbosity', default=1,
+                           help='Verbosity of tests.')
+  option_parser.add_option('--port', default='8200',
+                           help='compiler_proxy port for large test')
+  options, _ = option_parser.parse_args()
+
+  platform_specific = GetPlatformSpecific(options.platform)
+
+  print 'testdir:%s' % test_dir
+  if options.goma_dir:
+    goma_ctl_path = os.path.abspath(options.goma_dir)
+  else:
+    goma_ctl_path = os.path.abspath(
+        platform_specific.GetDefaultGomaCtlPath(test_dir))
+  del sys.argv[1:]
+
+  # Execute test.
+  suite = unittest.TestSuite()
+  suite.addTest(
+      GetParameterizedTestSuite(GomaCtlSmallTest,
+                                goma_ctl_path=goma_ctl_path,
+                                platform_specific=platform_specific))
+  if not options.small:
+    suite.addTest(
+        GetParameterizedTestSuite(GomaEnvTest,
+                                  goma_ctl_path=goma_ctl_path,
+                                  platform_specific=platform_specific))
+    clients5_key = options.goma_service_account_json_file
+    if not clients5_key and platform_specific.GetCred():
+      clients5_key = platform_specific.GetCred()
+    assert clients5_key
+    suite.addTest(
+        GetParameterizedTestSuite(GomaCtlLargeClients5Test,
+                                  goma_ctl_path=goma_ctl_path,
+                                  platform_specific=platform_specific,
+                                  oauth2_file=clients5_key,
+                                  port=options.port))
+  result = unittest.TextTestRunner(verbosity=options.verbosity).run(suite)
+
+  # Return test status as exit status.
+  exit_code = 0
+  if result.errors:
+    exit_code |= 0x01
+  if result.failures:
+    exit_code |= 0x02
+  if exit_code:
+    sys.exit(exit_code)
+
+
+if __name__ == '__main__':
+  main()
+
+# TODO: write tests for GomaEnv and GomaBackend.
diff --git a/test/gomatest.sh b/test/gomatest.sh
new file mode 100755
index 0000000..a068b69
--- /dev/null
+++ b/test/gomatest.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+#
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+#
+
+function httpfetch() {
+  local host="$1"
+  local port="$2"
+  local path="$3"
+  local method="$4"
+  if command -v wget > /dev/null 2>&1; then
+    postarg=""
+    if [ "$method" = "post" ]; then
+       postarg="--post-data="
+    fi
+    wget $postarg  -o/dev/null -O- "http://$host:$port$path"
+  elif command -v curl > /dev/null 2>&1; then
+    postarg=""
+    if [ "$method" = "post" ]; then
+      postarg="--data="
+    fi
+    curl $postarg -s "http://$host:$port$path"
+  elif command -v nc > /dev/null 2>&1; then
+    httpmethod="GET"
+    if [ "$method" = "post" ]; then
+      httpmethod="POST"
+    fi
+    printf "$httpmethod $path HTTP/1.1\r\nHost: $host:$port\r\n\r\n" | \
+      nc $host $port | \
+      sed -e '1,/^
+/d'
+  else
+    echo 'wget, curl, or nc not found.' >&2
+    exit 1
+  fi
+}
+
+function update_compiler_proxy_port() {
+  local goma_dir="$1"
+  local num_tries="$2"
+  if [ ! -x $goma_dir/gomacc ]; then
+     echo "FATAL: $goma_dir/gomacc not found." >&2
+     exit 1
+  fi
+  for (( i = 0; i < $num_tries; i++)) do
+    port="$(GLOG_logtostderr=true $goma_dir/gomacc port 2>/dev/null)"
+    if [ "$port" != "" ]; then
+      export GOMA_COMPILER_PROXY_PORT=$port
+      return 0
+    fi
+    if [ "$COMPILER_PROXY_PID" != "" ] && \
+      ! kill -0 "$COMPILER_PROXY_PID" > /dev/null 2>&1; then
+      return 1
+    fi
+    echo "waiting for compiler_proxy's unix domain socket..."
+    sleep 1
+  done
+  return 1
+}
+
+function wait_shutdown() {
+  local pid="$1"
+  local num_tries=10
+
+  for (( i = 0; i < $num_tries; i++ )); do
+    if ! kill -0 "$pid" > /dev/null 2>&1; then
+      return 0
+    fi
+    echo "waiting for compiler_proxy's shutdown..."
+    sleep 1
+  done
+  return 1
+}
+
+function stop_compiler_proxy() {
+  httpfetch localhost ${GOMA_COMPILER_PROXY_PORT} /quitquitquit
+  echo
+
+  local ipc_pid=$(fuser "$GOMA_COMPILER_PROXY_SOCKET_NAME" 2>/dev/null)
+  if ! wait_shutdown $ipc_pid; then
+    echo "time's up. going to kill -9 $ipc_pid"
+    kill -9 "$ipc_pid"
+  fi
+}
+
+# watch a URL until it returns 'ok' or 'running:*'
+function watch_healthz() {
+  local host="$1" # URL to watch
+  local port="$2"
+  local path="$3"
+  local name="$4" # name of process
+  local num_tries=30
+  for (( i = 0; i < $num_tries; i++)) do
+    status="$(httpfetch $host $port $path)"
+    case "$status" in
+    ok) return;;
+    running:*) echo "$name is $status"; return;;
+    *)
+      echo "waiting for $name to start up (http://$host:$port$path)"
+      sleep 1
+      continue;;
+    esac
+  done
+  echo "$name failed to start up?" >&2
+  exit 1
+}
+
+function set_goma_dirs() {
+  local bin_subdir="$1"
+
+  if [ "$bin_subdir" = "" ]; then
+    bin_subdir=out/Release
+  fi
+
+  if [ ! -d "$bin_subdir" ]; then
+    echo "Directory $bin_subdir doesn't exist" >&2
+    exit 1
+  fi
+
+  # Get the fullpath.
+  goma_bin_dir=$(cd "$bin_subdir"; pwd)
+
+  if [ "$GOMA_COMPILER_PROXY_BINARY" = "" ]; then
+    export GOMA_COMPILER_PROXY_BINARY="$goma_bin_dir/compiler_proxy"
+  fi
+
+  for binary in "$GOMA_COMPILER_PROXY_BINARY" "$goma_bin_dir/gomacc"; do
+    if [ ! -x $binary ]; then
+      echo "$binary is not an executable" >&2
+      exit 1
+    fi
+  done
+}
diff --git a/test/hello.c b/test/hello.c
new file mode 100644
index 0000000..fc9927e
--- /dev/null
+++ b/test/hello.c
@@ -0,0 +1,9 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdio.h>
+
+int main() {
+  printf("Hello world\n");
+}
diff --git a/test/key.pem b/test/key.pem
new file mode 100644
index 0000000..2801171
--- /dev/null
+++ b/test/key.pem
@@ -0,0 +1,5 @@
+-----BEGIN PRIVATE KEY-----
+MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgZ4OXRKFOnN1arxL8
+V20ZkhufSq7ExFF5+fRUJ6qcH/mhRANCAARk2tb+tAeS8Sj9g8SYgkveZHC3EMy+
++g0RivH4xi+yC4tTC0/ZydmM7aMeUHrUF+LZqDleRGdcnTATgTO1OHLG
+-----END PRIVATE KEY-----
diff --git a/test/key_127.0.0.1.pem b/test/key_127.0.0.1.pem
new file mode 100644
index 0000000..f8b58b0
--- /dev/null
+++ b/test/key_127.0.0.1.pem
@@ -0,0 +1,27 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIEowIBAAKCAQEAvI21HKb/btz1SlFcOMw6X1UGaTOSy9QaOkbvg7VIwOZK5AIe
+8ssiqtRLR5+O6hmmdCCZNNklHGmByQVQ/c5qWzgN6Bh0Eif9duAyhwZXnY5vvTpy
+84nNd64RkXnbY5ccLKCAPJA1UD2zn5JXoR8KRcW0KqrKFpJb5SDyYqSi8HeRuAOa
+KnVZ6MC+CwolZ9kdRGf5yeDJfefDUJ+3zK6eoLJekN7LYt7XVRz9uk7YN2551UoI
+1WcaDbF/mDta1fsZ56TqEwg19JaNsKeZxajprF/4LPU4t/JYwGqL1MrTOm7f4ddH
+8Tz6L/F3sCsFIi+p92lgjG3glTQVHzWfbn4cbwIDAQABAoIBACkqNvQ+cV3e156W
+DLBJxiSyB2hIXjAp6l/2xKvYlD2pCYil/eO7/aGWH6lPaIArgW+w/kHPdo6xaihd
+BMWknwuzPTYCwLSGDow5fk6ET/SR+Eszy4zn9PfABQ3hsMgZYYlMsTKHuVv/XIvm
+A0Ol5zQ8aY03JE+xPGbVvpVSpFAjKrXoIdjxYc+LbY4IIvGKhBqjHCbhOn8OkDxs
+in37WIWzDeIq9wqp6PUzytwg5Gxh7cGs/H+yuV3lsfmg40z0E/5Q+pdwy3VeKsP5
+pa5t/DgcWx9O/4mcLpnF15aG8KzptI8GwwGrVlJdrL7HXt3cwBD6nhhgYQLsr+1k
+s82MfcECgYEA14rxsOUqkpWqbUDcXhsmSvfPmUm6cU3pJTflekKWAW7YQCNmP5Yv
+J9U8wB+Cz0NYEScx91o1lYWll8pwKDv8v/sCrkBX6Rt0UoV9eYXxPiVYtSXCEcfJ
+2L4JmD0YYouW8C+nZjTRP+uYzaSZ6FgY3otwHj8UmRD2gOhSGoScYBcCgYEA3/Hn
+VRrdqL+QCJ9sGNuF2NzkHjFUDVJmZ4IP7VZ2ZTWnGRwVHbLC8bdT9y7OB21/N+Ck
+zeiQU6Od+pEfxlt0NRhqiYnqJaleWJ3PRq4CoFNGLyNcHB7TZshB3dRFK2hOWwkV
+SyYDhjTQ6jvfl++9qFKE1Xb1Dw/EuHZbzd9/xWkCgYBVorh/5azSbHA31As5wGOo
+aWirqGVQ2vmEdf7QYmAi1Z1JbkcTgGHf25K1ak+YdVMqDX6GneqoK18tPZruAVdc
+Fyhrftjunp/KoYrGm3bLkB78abH/Sndhi69YMv3bOUaQyv4hV17DnKErIvibHVVU
+JeY3viw63Ehz0tm7+2/9pQKBgQDXGf6uYyjHnoCVl7N3yNuuarV+PrlSNnSANZBg
+0qzUGA3Rc4TgysSu8f1nroBc6BLLNC54TUrb24uxklWn7E4ZdcNXKq5J0H37tlDS
+ve9t2a7PNjKSHBXIYs/JZ8usnvqsPHREg1XdQbvtUOc6hB9ynxhSBXnrJqU43Q7f
+Fy8eCQKBgBtcDy9l892wi2QLq6z+veHf1BLNNA4hDk7He/gNsc49W6mfUon+7+z8
+mgqJ57HgjzKGZgvusXy71ItXLhhuZIWsgcrXlyIwV4t3puHRWAdC8sxcU4ZhtlSc
+cPy9ihbR2bYrXu7LRCN2IVLy73bWj9nXvfRuuApKpAUcFroWzNVN
+-----END RSA PRIVATE KEY-----
diff --git a/test/libc.so b/test/libc.so
new file mode 100644
index 0000000..a0c56c9
--- /dev/null
+++ b/test/libc.so
@@ -0,0 +1,5 @@
+/* GNU ld script
+   Use the shared library, but some functions are only in
+   the static library, so try that secondarily.  */
+OUTPUT_FORMAT(elf32-i386)
+GROUP ( /lib/libc.so.6 /usr/lib/libc_nonshared.a  AS_NEEDED ( /lib/ld-linux.so.2  ) )
diff --git a/test/libdl.so b/test/libdl.so
new file mode 100644
index 0000000..600a88b
--- /dev/null
+++ b/test/libdl.so
Binary files differ
diff --git a/test/oneinclude.cc b/test/oneinclude.cc
new file mode 100644
index 0000000..9b0b2eb
--- /dev/null
+++ b/test/oneinclude.cc
@@ -0,0 +1,11 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <iostream>
+
+using namespace std;
+
+int main() {
+  cout << "Hello world" << endl;
+}
diff --git a/test/oneinclude2.cc b/test/oneinclude2.cc
new file mode 100644
index 0000000..4f387b7
--- /dev/null
+++ b/test/oneinclude2.cc
@@ -0,0 +1,11 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "common.h"
+
+using namespace std;
+
+int main() {
+  cout << "Hello world" << endl;
+}
diff --git a/test/openssl.cnf b/test/openssl.cnf
new file mode 100644
index 0000000..83ff196
--- /dev/null
+++ b/test/openssl.cnf
@@ -0,0 +1,12 @@
+[ req ]
+distinguished_name = req_distinguished_name
+x509_extensions = v3_ca
+prompt = no
+
+[ req_distinguished_name ]
+O  = Google
+OU = for testing purpose only
+CN = www.google.com
+
+[ v3_ca ]
+subjectAltName = DNS:*.google.com
diff --git a/test/signapk.jar b/test/signapk.jar
new file mode 100644
index 0000000..7341708
--- /dev/null
+++ b/test/signapk.jar
Binary files differ
diff --git a/test/signapk_expected.jar b/test/signapk_expected.jar
new file mode 100644
index 0000000..5ef02de
--- /dev/null
+++ b/test/signapk_expected.jar
Binary files differ
diff --git a/test/signapk_ziptime.jar b/test/signapk_ziptime.jar
new file mode 100644
index 0000000..4afaaf9
--- /dev/null
+++ b/test/signapk_ziptime.jar
Binary files differ
diff --git a/test/simpletry.py b/test/simpletry.py
new file mode 100755
index 0000000..28e73f4
--- /dev/null
+++ b/test/simpletry.py
@@ -0,0 +1,581 @@
+#!/usr/bin/env python
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Simple test scripts for sanity check.
+
+The script uses the production servers.
+"""
+
+
+import glob
+import imp
+import optparse
+import os
+import re
+import requests
+import shutil
+import string
+import subprocess
+import sys
+import tempfile
+import unittest
+import urllib2
+
+_GOMA_CTL = 'goma_ctl.py'
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+_CRED = 'c:\\creds\\service_accounts\\service-account-goma-client.json'
+
+
+class Error(Exception):
+  """Raised on error."""
+
+
+class SimpleTryTest(unittest.TestCase):
+  """Goma Simple Try Test."""
+
+  def __init__(self, method_name, goma_dir, local_cl, gomacc):
+    """Initialize.
+
+    Args:
+      method_name: a string of method name to test.
+      goma_dir: a string of GOMA directory.
+      local_cl: a string of cl.exe path.
+      gomacc: a string of gomacc.exe path.
+    """
+    super(SimpleTryTest, self).__init__(method_name)
+    self._dir = os.path.abspath(goma_dir)
+    self.local_cl = local_cl
+    self.gomacc = gomacc
+    mod_name, _ = os.path.splitext(_GOMA_CTL)
+    self._module = imp.load_source(mod_name, os.path.join(goma_dir, _GOMA_CTL))
+
+  @staticmethod
+  def RemoveFile(fname):
+    """Removes the file and ignores error."""
+    try:
+      os.remove(fname)
+    except Exception:
+      pass
+
+  def setUp(self):
+    # Sets environmental variables.
+    os.environ['GOMA_STORE_ONLY'] = 'true'
+    os.environ['GOMA_DUMP'] = 'true'
+    os.environ['GOMA_RETRY'] = 'false'
+    os.environ['GOMA_FALLBACK'] = 'false'
+    os.environ['GOMA_USE_LOCAL'] = 'false'
+    os.environ['GOMA_START_COMPILER_PROXY'] = 'false'
+    # remote link not implemented on windows yet.
+    os.environ['GOMA_STORE_LOCAL_RUN_OUTPUT'] = 'false'
+    os.environ['GOMA_ENABLE_REMOTE_LINK'] = 'false'
+    os.environ['GOMA_GOMACC_WRITE_LOG_FOR_TESTING'] = 'false'
+    self._cwd = os.getcwd()
+
+  def tearDown(self):
+    self.RemoveFile('local.obj')
+    self.RemoveFile('remote.obj')
+    self.RemoveFile('hello.exe')
+    self.RemoveFile('create_pch.obj')
+    self.RemoveFile('use_pch.obj')
+    for log in self.GetGomaccLogs():
+      self.RemoveFile(log)
+    os.chdir(self._cwd)
+
+  @staticmethod
+  def ExecCommand(cmd):
+    """Execute given list of command.
+
+    Args:
+      cmd: a list of command line args.
+
+    Returns:
+      a tuple of proc instance, stdout string and stderr string.
+    """
+    proc = subprocess.Popen(cmd,
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+    (out, err) = proc.communicate()
+    return (proc, out, err)
+
+  def AssertSuccess(self, cmd, msg=''):
+    """Asserts given command succeeds.
+
+    Args:
+      cmd: a list of command to execute.
+      msg: additional message to be shown.
+    """
+    if msg:
+      msg += '\n'
+    (proc, out, err) = self.ExecCommand(cmd)
+    self.assertEqual(proc.returncode, 0, msg=('%s%s\n%s\n' % (msg, out, err)))
+
+  def AssertFailure(self, cmd, msg=''):
+    """Asserts given command fails.
+
+    Args:
+      cmd: a list of command to execute.
+      msg: additional message to be shown.
+    """
+    if msg:
+      msg += '\n'
+    (proc, out, err) = self.ExecCommand(cmd)
+    self.assertNotEqual(proc.returncode, 0,
+                        msg=('%s%s\n%s\n' % (msg, out, err)))
+
+  def AssertSameFile(self, files, msg=''):
+    """Asserts given two files are the same.
+
+    Args:
+      files: a list of two files to check.
+      msg: additional message to be shown.
+    """
+    a = open(files[0], 'rb').read()
+    b = open(files[1], 'rb').read()
+    a_size = os.stat(files[0]).st_size
+    b_size = os.stat(files[1]).st_size
+
+    if msg:
+      msg += '\n'
+    self.assertEqual(a_size, len(a),
+                     msg=('%sparsial read?: %s %d!=%d' % (msg, files[0],
+                                                          a_size, len(a))))
+    self.assertEqual(b_size, len(b),
+                     msg=('%sparsial read?: %s %d!=%d' % (msg, files[1],
+                                                          b_size, len(b))))
+    if a == b:
+      return  # Success.
+
+    self.assertEqual(len(a), len(b),
+                     msg=('%ssize mismatch: %s=%d %s=%d' % (msg,
+                                                            files[0], len(a),
+                                                            files[1], len(b))))
+    idx = -1
+    ndiff = 0
+    for ach, bch in zip(a, b):
+      idx += 1
+      # http://support.microsoft.com/kb/121460/en
+      # Header structure (0 - 20 bytes):
+      #  0 -  2: Machine
+      #  2 -  4: Number of sections.
+      #  4 -  8: Time/Date Stamp.
+      #  8 - 12: Pointer to Symbol Table.
+      # 12 - 16: Number of Symbols.
+      # 16 - 18: Optional Header Size.
+      # 18 - 20: Characteristics.
+      if idx in range(4, 8):  # Time/Date Stamp can be different.
+        continue
+      # Since compiler_proxy normalize path names to lower case, we should
+      # normalize printable charactors before comparison.
+      if ach in string.printable:
+        ach = ach.lower()
+      if bch in string.printable:
+        bch = bch.lower()
+
+      if ach != bch:
+        ndiff += 1
+    print '%d bytes differ' % ndiff
+    self.assertEqual(ndiff, 0,
+                     msg=('%sobj file should be the same after normalize.'
+                          % msg))
+
+  def AssertNotEmptyFile(self, filename, msg=''):
+    """Asserts if file is empty.
+
+    Args:
+      filename: a string of filname to check.
+      msg: additional message to be shown.
+    """
+    if msg:
+      msg += '\n'
+    self.assertNotEqual(os.stat(filename).st_size, 0,
+                        msg=('%s%s is empty' % (msg, filename)))
+
+  def GetGomaccLogs(self):
+    logdir = self._module._GetLogDirectory()
+    assert logdir
+    return glob.glob(os.path.join(logdir, "gomacc.*"))
+
+  def AssertNoGomaccInfo(self):
+    """Asserts if gomacc.INFO does not exist."""
+    logs = self.GetGomaccLogs()
+    for log in logs:
+      with open(log) as f:
+        print 'log: %s:' % log
+        print f.read()
+        print
+    self.assertEquals(len(logs), 0)
+
+  def testClHelp(self):
+    self.AssertSuccess([self.gomacc, self.local_cl, '/?'],
+                       msg='gomacc cl help')
+    self.AssertNoGomaccInfo()
+
+  def testClHello(self):
+    # Since object file contains a file name, an output file name should be
+    # the same.
+    self.AssertSuccess([self.local_cl, '/c', '/Fotest.obj',
+                        os.path.join('test', 'hello.c')],
+                       msg='local compile')
+    shutil.move('test.obj', 'local.obj')
+    self.AssertSuccess([self.gomacc, self.local_cl, '/c', '/Fotest.obj',
+                        os.path.join('test', 'hello.c')],
+                       msg='remote compile')
+    shutil.move('test.obj', 'remote.obj')
+
+    self.AssertSameFile(['local.obj', 'remote.obj'], msg='obj same?')
+    self.AssertSuccess([self.local_cl, '/Fehello.exe', 'remote.obj'],
+                       msg='link hello.obj')
+    self.AssertSuccess(['hello.exe'], msg='run hello.exe')
+    self.AssertNoGomaccInfo()
+
+  def testDashFlag(self):
+    self.AssertSuccess([self.gomacc, self.local_cl, '-c', '-Fotest.obj',
+                        os.path.join('test', 'hello.c')],
+                       msg='remote compile')
+    shutil.move('test.obj', 'remote.obj')
+    self.AssertSuccess([self.local_cl, '/Fehello.exe', 'remote.obj'],
+                       msg='link hello.obj')
+    self.AssertSuccess(['hello.exe'], msg='run hello.exe')
+    self.AssertNoGomaccInfo()
+
+  def testPchSupport(self):
+    # Since object file contains a file name, an output file name should be
+    # the same.
+    self.AssertSuccess([self.gomacc, self.local_cl,
+                        '/c', '/Fotest.obj', '/FIstdio.h', '/Ycstdio.h',
+                        os.path.join('test', 'hello.c')],
+                       msg='cl_create_pch')
+    shutil.move('test.obj', 'create_pch.obj')
+    self.AssertNotEmptyFile('stdio.pch', msg='cl_create_pch_exist')
+    self.AssertSuccess([self.gomacc, self.local_cl,
+                        '/c', '/Fotest.obj', '/FIstdio.h', '/Yustdio.h',
+                        os.path.join('test', 'hello.c')],
+                       msg='cl_use_pch')
+    shutil.move('test.obj', 'use_pch.obj')
+    # TODO: investigate pch mismatch.
+    # TODO: Still 5 bytes differ.
+    # I suppose some come from date/time.
+    try:
+      self.AssertSameFile(['create_pch.obj', 'use_pch.obj'],
+                          msg='FAILS_cl_pch.o')
+    except Exception, inst:
+      print 'Known failure %s' % inst
+    self.AssertNoGomaccInfo()
+
+  def testDisabledShouldWork(self):
+    stat_url = 'http://localhost:%s/statz' % (
+        os.environ['GOMA_COMPILER_PROXY_PORT'])
+    stat_before = urllib2.urlopen(stat_url).read()
+    os.environ['GOMA_DISABLED'] = 'true'
+    self.AssertSuccess([self.gomacc, self.local_cl, '/c', '/Fotest.obj',
+                        os.path.join('test', 'hello.c')],
+                       msg='remote compile')
+    del os.environ['GOMA_DISABLED']
+    stat_after = urllib2.urlopen(stat_url).read()
+    request_line_before = '\n'.join(
+        [line for line in stat_before.split('\n') if 'request' in line])
+    request_line_after = '\n'.join(
+        [line for line in stat_after.split('\n') if 'request' in line])
+    self.assertNotEqual(request_line_before, '')
+    self.assertNotEqual(request_line_after, '')
+    self.assertEqual(request_line_before, request_line_after)
+    self.AssertNoGomaccInfo()
+
+  def testClInPathShouldCompile(self):
+    self.AssertSuccess([self.gomacc, 'cl', '/c', '/Fotest.obj',
+                        os.path.join('test', 'hello.c')],
+                       msg='cl.exe in path env. compile')
+    self.AssertNotEmptyFile('test.obj', msg='cl_test_obj')
+    self.AssertNoGomaccInfo()
+
+  def testAccessCheck(self):
+    url = 'http://localhost:%s/e' % (
+        os.environ['GOMA_COMPILER_PROXY_PORT'])
+    with open(os.path.join('test', 'badreq.bin'), 'rb') as f:
+      req = f.read()
+    r = requests.post(url, verify=False,
+                      headers={'Content-Type': 'binary/x-protocol-buffer'},
+                      data=req)
+    self.assertEqual(r.status_code, 401,
+                     msg=('response code=%d; want=401' % r.status_code))
+    self.AssertNoGomaccInfo()
+
+  def testGomaccShouldLog(self):
+    os.environ['GOMA_GOMACC_WRITE_LOG_FOR_TESTING'] = 'true'
+    self.AssertSuccess([self.gomacc])
+    self.assertEquals(len(self.GetGomaccLogs()), 1)
+
+  # TODO: write a test for a compiler with a relative path.
+
+
+def GetParameterizedTestSuite(klass, **kwargs):
+  """Make test suite parameterized.
+
+  Args:
+    klass: a subclass of unittest.TestCase.
+    kwargs: arguments given to klass.
+
+  Returns:
+    an instance of unittest.TestSuite for |klass|.
+  """
+  test_loader = unittest.TestLoader()
+  test_names = test_loader.getTestCaseNames(klass)
+  suite = unittest.TestSuite()
+  for name in test_names:
+    suite.addTest(klass(name, **kwargs))
+  return suite
+
+
+class CompilerProxyManager(object):
+  """Compiler proxy management class.
+
+  This class should be used with 'with' statement.
+  This will automatically start compiler proxy when entering into with
+  statement, and automatically kill the compiler proxy when exiting from with
+  statement.
+  """
+  # TODO: fix this.
+  # pylint: disable=W0212
+
+  def __init__(self, goma_ctl_path, port, kill=False, api_key_file=None,
+               service_account_file=None):
+    """Initialize.
+
+    Args:
+      goma_ctl_path: a string of path goma_ctl.py is located.
+      port: a string or an integer port number of compiler_proxy.
+      kill: True to kill the GOMA processes before starting compiler_proxy.
+      api_key_file: a string of API key filename.
+      service_account_file: a string of service account filename.
+    """
+    # create goma_ctl.
+    mod_name, _ = os.path.splitext(_GOMA_CTL)
+    self._module = imp.load_source(mod_name,
+                                   os.path.join(goma_ctl_path, _GOMA_CTL))
+    self._kill = kill
+    self._tmpdir = None
+    self._port = int(port)
+    self._goma = None
+    self._api_key_file = None
+    if api_key_file and os.path.isfile(api_key_file):
+      self._api_key_file = api_key_file
+    self._service_account_file = None
+    if service_account_file and os.path.isfile(service_account_file):
+      self._service_account_file = service_account_file
+      self._api_key_file = None
+    elif os.path.isfile(_CRED):
+      self._service_account_file = _CRED
+      self._api_key_file = None
+
+  def __enter__(self):
+    self._tmpdir = tempfile.mkdtemp()
+    print 'GOMA_TMP_DIR: %s' % self._tmpdir
+    os.environ['GOMA_TMP_DIR'] = self._tmpdir
+    os.environ['TMP'] = self._tmpdir
+    os.environ['GOMA_DEPS_CACHE_FILE'] = 'deps_cache'
+    assert self._module._GetLogDirectory() == self._tmpdir
+
+    os.environ['GOMA_COMPILER_PROXY_PORT'] = str(self._port)
+    # TODO: find unused port
+    # os.environ['GOMA_GOMACC_LOCK_FILENAME']
+    os.environ['GOMA_GOMACC_LOCK_GLOBALNAME'] = (
+        'Global\\goma_cc_lock_compiler_proxy_test_%d' % self._port)
+    # os.environ['GOMA_COMPILER_PROXY_LOCK_FILENAME']
+    # Windows locks
+    # 'Global\$GOMA_COMPILER_PROXY_LOCK_FILENAME.$GOMA_COMPILER_PROXY_PORT'
+    if self._api_key_file:
+      os.environ['GOMA_API_KEY_FILE'] = self._api_key_file
+      print 'Use GOMA_API_KEY_FILE=%s' % self._api_key_file
+    if self._service_account_file:
+      os.environ['GOMA_SERVICE_ACCOUNT_JSON_FILE'] = self._service_account_file
+      print 'Use GOMA_SERVICE_ACCOUNT_JSON_FILE=%s' % self._service_account_file
+
+    self._goma = self._module.GetGomaDriver()
+    if self._kill:
+      print 'Kill any remaining compiler proxy'
+      self._goma._env.KillStakeholders()
+
+    self._goma._StartCompilerProxy()
+
+  def __exit__(self, unused_exc_type, unused_exc_value, unused_traceback):
+    if self._goma:
+      self._goma._ShutdownCompilerProxy()
+      if not self._goma._WaitCooldown():
+        self._goma._env.KillStakeholders()
+
+    diagnose = subprocess.Popen(
+      [sys.executable, os.path.join('client', 'diagnose_goma_log.py'),
+       '--show-errors', '--show-warnings', '--show-known-warnings-threshold=0'],
+      stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()[0]
+    print
+    print diagnose
+    print
+
+    if self._tmpdir:
+      shutil.rmtree(self._tmpdir)
+
+
+def _FindClExe():
+  """Returns cl.exe absolute path if it is found from PATH and others.
+
+  Side effect:
+  If cl.exe in depot_tools is used, necessary environment variables (
+  INCLUDE, LIB, PATH) are automatically set.
+
+  Returns:
+    an absolute path name of cl.exe.
+
+  Raises:
+    Error: if it cannot find cl.exe or cannot set proper env for cl.exe.
+  """
+  try:
+    where_cl = subprocess.check_output(['where', 'cl'])
+    local_cl = where_cl.split('\n')[0].strip()
+    if os.path.exists(local_cl):
+      return local_cl
+  except subprocess.CalledProcessError:
+    print 'Cannot find cl.exe in PATH.'
+
+  # Cannot find cl.exe in PATH.  Let me set it in depot_tools.
+  # The script also set INCLUDE, LIB, PATH at the same time.
+  print 'Going to use cl.exe in depot_tools.'
+  out = subprocess.check_output(['python',
+                                 os.path.join(_SCRIPT_DIR, '..', 'build',
+                                              'vs_toolchain.py'),
+                                 'get_toolchain_dir'])
+  vs_path_pattern = re.compile('^vs_path\s+=\s+"([^"]+)"')
+  sdk_path_pattern = re.compile('^sdk_path\s+=\s+"([^"]+)"')
+  vs_path = None
+  sdk_path = None
+  for line in out.splitlines():
+    matched = vs_path_pattern.search(line)
+    if matched:
+      vs_path = matched.group(1)
+      print 'vs_path=%s' % vs_path
+    matched = sdk_path_pattern.search(line)
+    if matched:
+      sdk_path = matched.group(1)
+      print 'sdk_path=%s' % sdk_path
+  if not vs_path or not sdk_path:
+    raise Error('Do not know proper vs_path or sdk_path.')
+  out = subprocess.check_output([os.path.join(sdk_path, 'bin/setenv.cmd'),
+                                 '&&', 'set'])
+  for line in out.splitlines():
+    key, value = line.split('=')
+    if key.upper() in ('INCLUDE', 'LIB', 'PATH'):
+      if key.upper() == 'PATH':
+        # PATH for api-ms-win-*.dll
+        value += ';' + os.path.join(vs_path, 'win_sdk', 'bin', 'x64')
+      os.environ[key] = value
+      print 'os.environ[%s] = "%s"' % (key, os.environ[key])
+
+  # For VS2015 or before
+  clpath = os.path.join(vs_path, 'VC', 'bin', 'cl.exe')
+  if os.path.exists(clpath):
+    return clpath
+
+  # For VS2017 or later
+  vc_bin_dir = glob.glob(os.path.join(
+    vs_path, 'VC', 'Tools', 'MSVC', '*', 'bin', 'HostX64'))[0]
+
+  # PATH for mspdb140.dll, etc.
+  os.environ['PATH'] += ';' + os.path.join(vc_bin_dir, 'x64')
+  return os.path.join(vc_bin_dir, 'x86', 'cl.exe')
+
+
+def ExecuteTests(goma_dir):
+  """Execute Tests.
+
+  Args:
+    goma_dir: a string of goma directory.
+
+  Returns:
+    integer exit code representing test status.  (success == 0)
+    0x01: there is errors.
+    0x02: there is failures.
+    0x04: command not found.
+  """
+  # set cl.exe and gomacc.exe locations.
+  local_cl = _FindClExe()
+  gomacc = os.path.join(goma_dir, 'gomacc.exe')
+  print 'LOCAL_CL=%s' % local_cl
+  print 'GOMACC=%s' % gomacc
+
+  if not local_cl or not os.path.exists(local_cl):
+    print "local_cl not found."
+    return 0x04
+  if not os.path.exists(gomacc):
+    print "gomacc not found."
+    return 0x04
+
+  print 'ShowGomaVerify'
+  cmd = [gomacc, '--goma-verify-command', local_cl]
+  subprocess.call(cmd)
+
+  # starts test.
+  suite = unittest.TestSuite()
+  suite.addTest(
+      GetParameterizedTestSuite(SimpleTryTest,
+                                goma_dir=goma_dir,
+                                local_cl=local_cl,
+                                gomacc=gomacc))
+  result = unittest.TextTestRunner(verbosity=2).run(suite)
+
+  # Return test status as exit status.
+  exit_code = 0
+  if result.errors:
+    exit_code |= 0x01
+  if result.failures:
+    exit_code |= 0x02
+  return exit_code
+
+
+def main():
+  test_dir = os.path.abspath(os.path.dirname(__file__))
+  os.chdir(os.path.join(test_dir, '..'))
+
+  option_parser = optparse.OptionParser()
+  option_parser.add_option('--wait', action='store_true',
+                           help='Wait after all tests finished')
+  option_parser.add_option('--kill', action='store_true',
+                           help='Kill running compiler_proxy before test')
+  option_parser.add_option('--port', default='8100',
+                           help='compiler_proxy port')
+  option_parser.add_option('--goma-dir',
+                           default=os.path.join(
+                               test_dir, '..', 'out', 'Release'),
+                           help='goma binary directory')
+  option_parser.add_option('--goma-api-key-file',
+                           default=os.path.abspath(
+                               os.path.join(test_dir, '..',  # curdir
+                                            '..', '..',  # build/client
+                                            '..', '..',  # slave/$builddir
+                                            'goma', 'goma.key')),
+                           help='goma api key file')
+  option_parser.add_option('--goma-service-account-file',
+                           help='goma service account file')
+
+  options, _ = option_parser.parse_args()
+  goma_dir = os.path.abspath(options.goma_dir)
+
+  if not os.environ.get('GOMATEST_USE_RUNNING_COMPILER_PROXY', ''):
+    with CompilerProxyManager(
+        goma_dir, options.port,
+        kill=options.kill,
+        api_key_file=options.goma_api_key_file,
+        service_account_file=options.goma_service_account_file):
+      exit_code = ExecuteTests(goma_dir)
+  else:
+    exit_code = ExecuteTests(goma_dir)
+
+  if options.wait:
+    raw_input('Ready to finish?')
+
+  if exit_code:
+    sys.exit(exit_code)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/test/simpletry.sh b/test/simpletry.sh
new file mode 100755
index 0000000..1d076d3
--- /dev/null
+++ b/test/simpletry.sh
@@ -0,0 +1,827 @@
+#!/bin/bash
+#
+# Copyright 2010 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+#
+# Simple test scripts for sanity check. Runs against production
+# servers.
+#
+# Run this like:
+#  % GOMA_RPC_EXTRA_PARAMS="?${USERNAME}_$cell" ./test/simpletry.sh out/Debug
+# in order to test your personal canary with binaries in out/Debug.
+# If the binary directory isn't specified, out/Release will be used.
+#
+#  % ./test/simpletry.sh -w
+# will wait after all tests finished, so you could investigate
+# outputs or compiler proxy status page.
+#
+# % ./test/simpletry.sh -k
+# will kill running compiler_proxy before test to make sure compiler_proxy
+# is actually invoked for the test only.
+# Without -k, it will try own compiler_proxy (isolated with GOMA_* flags)
+#
+# By default, it will allocate port 8100 (or later)
+# You can set port number with -p option.
+# % ./test/simpletry.sh -p 8200
+#
+# with -d dumpfile option, you'll get task.json and task's ExecReq in
+# dumpfile (tgz format)
+# % ./test/simpletry.sh -d /tmp/simpletry.tgz
+#
+# If CLANG_PATH is specified, and $CLANG_PATH/clang and $CLANG_PATH/clang++
+# exists, it will test with clang and clang++.
+# Note that it doesn't support old clang that doesn't support -dumpmachine.
+#
+
+test_dir=$(cd $(dirname $0); pwd)
+goma_top_dir=${test_dir}/..
+tmpdir=$(mktemp -d /tmp/tmp.XXXXXXXX)
+chmod 0700 $tmpdir
+
+. $test_dir/gomatest.sh
+
+is_color=0
+if tput init && test -t 1; then
+  is_color=1
+fi
+function test_term() {
+  test "$is_color" = 1
+}
+function tput_reset() {
+  test_term && tput sgr0
+  return 0
+}
+function echo_title() {
+  if test_term; then
+    tput bold; tput setaf 4
+  fi
+  echo "$@"
+  tput_reset
+}
+function echo_bold() {
+  test_term && tput bold
+  echo "$@"
+  tput_reset
+}
+function echo_ok() {
+  if test_term; then
+    tput bold; tput setaf 2
+  fi
+  echo "$@"
+  tput_reset
+}
+function echo_known_fail() {
+  if test_term; then
+    tput setab 1
+  fi
+  echo "$@"
+  tput_reset
+}
+function echo_fail() {
+  if test_term; then
+    tput bold; tput setaf 1
+  fi
+  echo "$@"
+  tput_reset
+}
+function echo_warn() {
+  if test_term; then
+   tput bold; tput setaf 5
+  fi
+  echo "$@"
+  tput_reset
+}
+
+function at_exit() {
+  # cleanup function.
+  rm -f a.out a.out2 out.o out2.o out_plain.o
+  rm -f test/compile_error.o
+  rm -f test/compile_error*.out test/compile_error*.err
+  rm -f cmd_out cmd_err
+  stop_compiler_proxy
+
+  $goma_top_dir/client/diagnose_goma_log.py \
+    --show-errors --show-warnings --show-known-warnings-threshold=0 \
+    --fail-tasks-threshold=2 \
+    || true
+  if [ -n "${GLOG_log_dir:-}" ]; then
+    echo "Gomacc logs:"
+    cat ${GLOG_log_dir}/gomacc.* || true
+  fi
+  rm -rf $tmpdir
+  tput_reset
+}
+
+function is_cros_gcc() {
+  local compiler=$1
+
+  local version=$($compiler --version)
+  case "$version" in
+    *_cos_*)
+      echo "yes"
+      ;;
+    *)
+      echo "no"
+      ;;
+  esac
+}
+
+# Note: all code in this script is expected to be executed from $goma_top_dir.
+cd $goma_top_dir
+
+FLAGS_wait=0
+FLAGS_kill=0
+FLAGS_port=8100
+FLAGS_dump=
+while getopts kwp:d: opt; do
+ case $opt in
+ k) FLAGS_kill=1 ;;
+ w) FLAGS_wait=1 ;;
+ p) FLAGS_port="$OPTARG";;
+ d) FLAGS_dump="$OPTARG";;
+ ?) echo "Usage: $0 [-w] [-k] [-p port] [-d tgz] [goma_dir]\n" >&2; exit 1;;
+ esac
+done
+shift $(($OPTIND - 1))
+
+set_goma_dirs "$1"
+
+# Flags for gomacc
+export GOMA_STORE_ONLY=true
+export GOMA_DUMP=true
+export GOMA_RETRY=false
+export GOMA_FALLBACK=false
+export GOMA_USE_LOCAL=false
+export GOMA_START_COMPILER_PROXY=false
+export GOMA_STORE_LOCAL_RUN_OUTPUT=true
+export GOMA_ENABLE_REMOTE_LINK=true
+export GOMA_HERMETIC=error
+
+# Set service account JSON file if exists.
+CRED="/creds/service_accounts/service-account-goma-client.json"
+if [ -z "$GOMA_SERVICE_ACCOUNT_JSON_FILE" -a -f "$CRED" ]; then
+  export GOMA_SERVICE_ACCOUNT_JSON_FILE="$CRED"
+fi
+if [ -n "$GOMA_SERVICE_ACCOUNT_JSON_FILE" -a \
+  ! -f "$GOMA_SERVICE_ACCOUNT_JSON_FILE" ]; then
+  echo "GOMA_SERVICE_ACCOUNT_JSON_FILE $GOMA_SERVICE_ACCOUNT_JSON_FILE " \
+    "not found." >&2
+  unset GOMA_SERVICE_ACCOUNT_JSON_FILE
+fi
+
+# on buildslave:/b/build/slave/$builddir/build/client
+# api key can be found at /b/build/goma/goma.key
+if [ -d "$goma_top_dir/../../../../goma" ]; then
+  bot_goma_dir=$(cd "$goma_top_dir/../../../../goma"; pwd)
+  GOMA_API_KEY_FILE=${GOMA_API_KEY_FILE:-$bot_goma_dir/goma.key}
+fi
+if [ -n "$GOMA_SERVICE_ACCOUNT_JSON_FILE" ]; then
+  echo "Use GOMA_SERVICE_ACCOUNT_JSON_FILE=$GOMA_SERVICE_ACCOUNT_JSON_FILE"
+  unset GOMA_API_KEY_FILE
+elif [ -f "$GOMA_API_KEY_FILE" ]; then
+  echo "Use GOMA_API_KEY_FILE=$GOMA_API_KEY_FILE"
+  export GOMA_API_KEY_FILE
+elif [ -n "$GOMA_API_KEY_FILE" ]; then
+  echo "GOMA_API_KEY_FILE $GOMA_API_KEY_FILE not found." >&2
+  unset GOMA_API_KEY_FILE
+fi
+
+if [ "$GOMATEST_USE_RUNNING_COMPILER_PROXY" = ""  ]; then
+  # --exec_compiler_proxy is deprecated. Use GOMA_COMPILER_PROXY_BINARY instead.
+  if ! [ -x ${GOMA_COMPILER_PROXY_BINARY} ]; then
+    echo "compiler_proxy($GOMA_COMPILER_PROXY_BINARY) is not executable" >&2
+    exit 1
+  fi
+  echo "Starting $GOMA_COMPILER_PROXY_BINARY..."
+
+  trap at_exit exit sighup sigpipe
+  export GOMA_COMPILER_PROXY_PORT=$FLAGS_port
+
+  if [ "$FLAGS_kill" = 1 ]; then
+    echo Kill any remaining compiler proxy
+    killall compiler_proxy
+  else
+    echo "GOMA_TMP_DIR: $tmpdir"
+    export GOMA_TMP_DIR=$tmpdir
+    export TMPDIR=$tmpdir
+    export GLOG_log_dir=$tmpdir
+    export GOMA_DEPS_CACHE_FILE=deps_cache
+    export GOMA_COMPILER_PROXY_SOCKET_NAME=$tmpdir/goma.ipc
+    export GOMA_GOMACC_LOCK_FILENAME=$tmpdir/gomacc.lock
+    export GOMA_COMPILER_PROXY_LOCK_FILENAME=$tmpdir/goma_compiler_proxy.lock
+    # Test uses SSL by default.
+    export GOMA_USE_SSL=true
+    export GOMA_STUBBY_PROXY_PORT=443
+  fi
+  (cd /tmp && ${GOMA_COMPILER_PROXY_BINARY} & )
+  update_compiler_proxy_port $(dirname $GOMA_COMPILER_PROXY_BINARY) 10
+  watch_healthz localhost ${GOMA_COMPILER_PROXY_PORT} /healthz \
+     ${GOMA_COMPILER_PROXY_BINARY}
+fi
+
+if [ "$CLANG_PATH" = "" ]; then
+  clang_path="$goma_top_dir/third_party/llvm-build/Release+Asserts/bin"
+  if [ -d "$clang_path" ]; then
+     if "$clang_path/clang" -v; then
+       CLANG_PATH="$clang_path"
+     else
+       echo "clang is not runnable, disable clang test" 1>&2
+     fi
+  fi
+fi
+
+if [ -n "${GLOG_log_dir:-}" ]; then
+  echo "removing gomacc logs."
+  rm -f "${GLOG_log_dir}/gomacc.*"
+fi
+
+# if build env doesn't not use hermetic gcc,
+# set HERMETIC_GCC=FAIL_ for workaround.
+HERMETIC_GCC=
+
+DEFAULT_CC=gcc
+DEFAULT_CXX=g++
+if [ "$(uname)" = "Darwin" ]; then
+  # recent macosx uses llvm-gcc as gcc, but goma doesn't support it.
+  # test with chromium clang by default.
+  DEFAULT_CC=clang
+  DEFAULT_CXX=clang++
+  if [ "$GOMATEST_USE_SYSTEM_CLANG" = "" ]; then
+    PATH=$CLANG_PATH:$PATH
+    GOMATEST_USE_CHROMIUM_CLANG=1
+  fi
+  # Should set SDKROOT if we use non system clang.
+  export SDKROOT="$("$goma_top_dir"/build/mac/find_sdk.py \
+    --print_sdk_path 10.7 | head -1)"
+fi
+
+CC=${CC:-$DEFAULT_CC}
+CXX=${CXX:-$DEFAULT_CXX}
+
+LOCAL_CC=$(command -v ${CC})
+LOCAL_CXX=$(command -v ${CXX})
+LOCAL_CXX_DIR=$(dirname ${LOCAL_CXX})
+GOMA_CC=${goma_bin_dir}/${CC}
+GOMA_CXX=${goma_bin_dir}/${CXX}
+GOMACC=$goma_bin_dir/gomacc
+
+# Build determinism is broken on ChromeOS gcc, and since ChromeOS uses
+# clang as a default compiler (b/31105358), I do not think we need to
+# guarantee build determinism for it.  (b/64499036)
+if [[ "$CC" =~ ^g(cc|\+\+)$ && "$(is_cros_gcc $CC)" = "yes" ]]; then
+  HERMETIC_GCC="FAIL_"
+fi
+
+echo_title "CC=${CC} CXX=${CXX}"
+echo_title "LOCAL CC=${LOCAL_CC} CXX=${LOCAL_CXX}"
+echo_title "GOMA CC=${GOMA_CC} CXX=${GOMA_CXX}"
+
+${GOMACC} --goma-verify-command ${LOCAL_CC} -v
+TASK_ID=1
+
+# keep the list of failed tests in an array
+FAIL=()
+KNOWN_FAIL=()
+
+function fail() {
+  local testname="$1"
+  case "$testname" in
+  FAIL_*)
+      echo_known_fail "FAIL"
+      KNOWN_FAIL+=($testname);;
+  *)
+      echo_fail "FAIL"
+      FAIL+=($testname);;
+  esac
+}
+
+function ok() {
+  echo_ok "OK"
+}
+
+function assert_success() {
+  local cmd="$1"
+  if eval $cmd; then
+    return
+  else
+    echo_fail "FAIL in $cmd"
+    exit 1
+  fi
+}
+
+function dump_request() {
+  local cmd="$1"
+  if [ "$FLAGS_dump" = "" ]; then
+    return
+  fi
+  set -- $cmd
+  cmd=$1
+  case "$cmd" in
+  "$GOMA_CC"|"$GOMA_CXX"|"$GOMACC")
+    echo "[dump:$TASK_ID]"
+    httpfetch 127.0.0.1 "$GOMA_COMPILER_PROXY_PORT" \
+     "/api/taskz?id=$TASK_ID&dump=req" post > /dev/null
+    if [ -d $GOMA_TMP_DIR/task_request_$TASK_ID ]; then
+      httpfetch 127.0.0.1 "$GOMA_COMPILER_PROXY_PORT" \
+       "/api/taskz?id=$TASK_ID" post \
+         > $GOMA_TMP_DIR/task_request_$TASK_ID/task.json
+    fi
+    TASK_ID=$((TASK_ID+1))
+    ;;
+   *)
+    echo "[nodump]";;
+  esac
+}
+
+function expect_success() {
+  local testname="$1"
+  local cmd="$2"
+  echo_bold -n "TEST: "
+  echo -n "${testname}..."
+  if eval $cmd >$tmpdir/cmd_out 2>$tmpdir/cmd_err; then
+    ok
+  else
+    fail $testname
+    echo_bold "cmd: $cmd"
+    cat $tmpdir/cmd_out
+    cat $tmpdir/cmd_err
+  fi
+  dump_request "$cmd"
+  rm -f cmd_out cmd_err
+}
+
+function expect_failure() {
+  local testname="$1"
+  local cmd="$2"
+  echo_bold -n "TEST: "
+  echo -n "${testname}..."
+  if eval $cmd >cmd_out 2>cmd_err; then
+    fail $testname
+    echo_bold "cmd: $cmd"
+    cat cmd_out
+    cat cmd_err
+  else
+    ok
+  fi
+  dump_request "$cmd"
+  rm -f cmd_out cmd_err
+}
+
+function objcmp() {
+ local want=$1
+ local got=$2
+ if command -v readelf > /dev/null 2>&1; then
+    readelf --headers $want > $want.elf
+    readelf --headers $got > $got.elf
+    diff -u $want.elf $got.elf
+    rm -f $want.elf $got.elf
+ fi
+ cmp $want $got
+}
+
+expect_success "${CC}_v" "${GOMA_CC} -v"
+# test $CC
+rm -f out_plain.o
+# build a control binary to test against.
+assert_success "${LOCAL_CC} test/hello.c -c -o out_plain.o"
+rm -f out.o
+expect_success "${CC}_hello" "${GOMA_CC} test/hello.c -c -o out.o"
+expect_success "${HERMETIC_GCC}${CC}_hello.o" "objcmp out_plain.o out.o"
+rm -f a.out
+expect_success "${CC}_hello_run" \
+     "${LOCAL_CC} out.o -o a.out && test \"\$(./a.out)\" = \"Hello world\""
+
+GOMA_FALLBACK=true
+expect_success "${CC}_hello_fallback" "${GOMA_CC} test/hello.c -c -o out.o"
+GOMA_USE_LOCAL=true
+expect_success "${CC}_hello_fallback_use_local" \
+    "${GOMA_CC} test/hello.c -c -o out.o"
+GOMA_FALLBACK=false
+expect_success "${CC}_hello_use_local" "${GOMA_CC} test/hello.c -c -o out.o"
+GOMA_USE_LOCAL=false
+
+rm -f a.out2
+expect_success "FAIL_${CC}_hello_remote_link" \
+     "${GOMA_CC} out.o -o a.out2 && test \"\$(./a.out2)\" = \"Hello world\""
+
+rm -f out_plain.o
+assert_success "${LOCAL_CC} -std=c99 test/hello.c -c -o out_plain.o"
+expect_success "${CC}_stdc99_hello" \
+    "${GOMA_CC} -std=c99 test/hello.c -c -o out.o"
+expect_success "${HERMETIC_GCC}${CC}_stdc99_hello.o" "objcmp out_plain.o out.o"
+
+# test $CXX
+rm -f out_plain.o out.o out2.o
+# build a control binary to test against.
+assert_success "${LOCAL_CXX} test/oneinclude.cc -c -o out_plain.o"
+
+expect_success "${CXX}_oneinclude" \
+    "${GOMA_CXX} test/oneinclude.cc -c -o out.o"
+expect_success "${HERMETIC_GCC}${CXX}_oneinclude.o" \
+    "objcmp out_plain.o out.o"
+expect_success "${CXX}_oneinclude_run" \
+     "${LOCAL_CXX} out.o -o a.out && test \"\$(./a.out)\" = \"Hello world\""
+rm -f a.out2
+expect_success "FAIL_${CXX}_oneinclude_remote_link" \
+     "${GOMA_CXX} out.o -o a.out2 && test \"\$(./a.out2)\" = \"Hello world\""
+
+rm -f out.o
+expect_success "gomacc_${CXX}" \
+     "${GOMACC} $CXX test/oneinclude.cc -c -o out.o"
+expect_success "${HERMETIC_GCC}gomacc_${CXX}_oneinclude.o" \
+    "objcmp out_plain.o out.o"
+rm -f out.o
+expect_success "gomacc_local_${CXX}" \
+     "${GOMACC} $LOCAL_CXX test/oneinclude.cc -c -o out.o"
+expect_success "${HERMETIC_GCC}gomacc_local_${CXX}_oneinclude.o" \
+    "objcmp out_plain.o out.o"
+rm -f out.o out_plain.o
+CURRENT_DIR_BACKUP=$PWD
+cd $LOCAL_CXX_DIR
+assert_success "${LOCAL_CXX} $CURRENT_DIR_BACKUP/test/oneinclude.cc \
+  -c -o $CURRENT_DIR_BACKUP/out_plain.o"
+expect_success "gomacc_relative_path_${CXX}" \
+     "${GOMACC} ./${CXX} $CURRENT_DIR_BACKUP/test/oneinclude.cc \
+     -c -o $CURRENT_DIR_BACKUP/out.o"
+expect_success "${HERMETIC_GCC}gomacc_relative_path_${CXX}_oneinclude.o" \
+    "objcmp ${CURRENT_DIR_BACKUP}/out_plain.o ${CURRENT_DIR_BACKUP}/out.o"
+cd $CURRENT_DIR_BACKUP
+
+rm -f out2.o out_plain.o
+assert_success "${LOCAL_CXX} -xc++ - -c -o out_plain.o < test/oneinclude.cc"
+expect_success "${CXX}_oneinclude_from_stdin" \
+     "${GOMA_CXX} -xc++ - -c -o out2.o < test/oneinclude.cc"
+expect_success "${HERMETIC_GCC}${CXX}_oneinclude.o_from_stdin" \
+     "objcmp out_plain.o out2.o"
+
+
+# oneinclude2
+rm -f out.o
+# - no precompiled header
+expect_success "${CXX}_oneinclude2" \
+   "${GOMA_CXX} -xc++ -Itest -c -o out.o test/oneinclude2.cc"
+# - precompile header
+rm -rf test/tmp
+mkdir -p test/tmp
+expect_success "${CXX}_precompile_common" \
+   "${GOMA_CXX} -xc++-header -c -o test/tmp/common.h.gch test/common.h"
+expect_success "${CXX}_precompile_common_local_output" \
+   "test -f test/tmp/common.h.gch"
+expect_success "${CXX}_precompile_common_remote_output" \
+   "test -f test/tmp/common.h"
+rm -rf test/tmp
+mkdir -p test/tmp
+expect_success "${CXX}_no_x_precompile_common" \
+   "${GOMA_CXX} -c -o test/tmp/common.h.gch test/common.h"
+expect_success "${CXX}_no_x_precompile_common_local_output" \
+   "test -f test/tmp/common.h.gch"
+expect_success "${CXX}_no_x_precompile_common_remote_output" \
+   "test -f test/tmp/common.h"
+
+rm -f out.o out_local.o
+expect_success "${CXX}_oneinclude2_with_precompiled_common" \
+   "${GOMA_CXX} -xc++ -Itest/tmp -c -o out.o test/oneinclude2.cc"
+expect_success "${CXX}_oneinclude2_with_local_precompiled_common" \
+   "${LOCAL_CXX} -xc++ -Itest/tmp -c -o out_local.o test/oneinclude2.cc"
+rm -rf test/tmp out.o out_local.o
+
+# If TSAN tests succeed with LOCAL_CXX, they should also succeed with GOMA_CXX.
+if (${LOCAL_CXX} -DTHREAD_SANITIZER -fsanitize=thread -fPIC \
+    -mllvm -tsan-blacklist=test/tsan-ign.txt \
+    -o out.o -c test/oneinclude.cc \
+    >/dev/null 2>/dev/null); then
+  expect_success "${CXX}_tsan_blacklist" \
+   "${GOMA_CXX} -DTHREAD_SANITIZER -fsanitize=thread -fPIC \
+    -mllvm -tsan-blacklist=test/tsan-ign.txt \
+    -o out.o -c test/oneinclude.cc"
+fi
+if (${LOCAL_CXX} -DTHREAD_SANITIZER -fsanitize=thread -fPIC \
+    -fsanitize-blacklist=test/tsan-ign.txt \
+    -o out.o -c test/oneinclude.cc \
+    >/dev/null 2>/dev/null); then
+  expect_success "${CXX}_thread_sanitize_blacklist" \
+   "${GOMA_CXX} -DTHREAD_SANITIZER -fsanitize=thread -fPIC \
+    -fsanitize-blacklist=test/tsan-ign.txt \
+    -o out.o -c test/oneinclude.cc"
+fi
+
+if [ "$CXX" = "clang++" ]; then
+  # See: b/16826568
+  ext=".so"
+  if [ "$(uname -s)" == "Darwin" ]; then
+    ext=".dylib"
+  fi
+
+  expect_success "${CXX}_load_plugin_in_relative_path" \
+  "${GOMACC} ${LOCAL_CXX} -Xclang -load -Xclang \
+   third_party/llvm-build/Release+Asserts/lib/libFindBadConstructs${ext} \
+   -o out.o -c test/oneinclude.cc"
+fi
+
+# TODO: From 2015-07-22, -fprofile-generate looks creating
+# default.profraw instead of test.profdata. We need to convert test.profraw
+# to test.profdata with llvm-profdata to use it with -fprofile-use.
+# However, chromium clang does not provide it yet. So, this test might fail.
+# See http://b/22723864
+
+if [ "$CXX" = "clang++" ]; then
+  # chrome's clang doesn't have libprofile_rt.a in lib, so it will fail
+  # /usr/bin/ld: error: cannot open
+  #   /path/to/llvm-build/Release+Asserts/bin/../lib/libprofile_rt.a:
+  #   No such file or directory
+  # clang: error: linker command failed with exit code 1
+  MAYBE_FAIL="FAIL_"
+fi
+expect_success "${MAYBE_FAIL}${CXX}_fprofile_generate" \
+   "${LOCAL_CXX} -xc++ -fprofile-generate test/hello.c"
+
+./a.out > /dev/null
+expect_success "${MAYBE_FAIL}${CXX}_fprofile_use" \
+   "${GOMA_CXX} -xc++ -c -fprofile-use test/hello.c 2> warning"
+expect_success "${MAYBE_FAIL}${CXX}_fprofile_use_local" \
+   "${LOCAL_CXX} -xc++ -c -fprofile-use test/hello.c 2> warning.local"
+expect_success "${MAYBE_FAIL}${CXX}_fprofile_use_warning" \
+   "cmp warning warning.local"
+diff -u warning warning.local
+rm -f out.o a.out hello.o hello.gcda warning.local warning a.out \
+   default.profraw test.profdata
+
+MAYBE_FAIL=
+
+if [ "$(uname)" = "Darwin" ]; then
+  rm -f out.o
+  # failure without fallback
+  expect_failure "${CXX}_multi_arch_no_fallback" \
+   "${GOMA_CXX} -arch i386 -arch x86_64 -c -o out.o test/hello.c"
+  rm -f out.o
+fi
+
+
+rm -f test/compile_error.{out,err} test/compile_error_fallback.{out,err}
+expect_failure "${CXX}_compile_error.cc" \
+  "${GOMA_CXX} test/compile_error.cc -c -o test/compile_error.o \
+    > test/compile_error.out 2> test/compile_error.err"
+
+GOMA_FALLBACK=true  # run local when remote failed.
+GOMA_USE_LOCAL=false  # don't run local when idle.
+expect_failure "${CXX}_fail_fallback" \
+  "${GOMA_CXX} test/compile_error.cc -c -o test/compile_error.o \
+  > test/compile_error_fallback.out 2> test/compile_error_fallback.err"
+
+expect_success "compile_error_out" \
+  "cmp test/compile_error.out test/compile_error_fallback.out"
+expect_success "compile_error_err" \
+  "cmp test/compile_error.err test/compile_error_fallback.err"
+
+if [ "$(uname)" = "Darwin" ]; then
+  rm -f out.o
+  expect_success "${CXX}_multi_arch_fallback" \
+   "${GOMA_CXX} -arch i386 -arch x86_64 -c -o out.o test/hello.c"
+  rm -f out.o
+fi
+
+expect_success "no_path_env" \
+  "(unset PATH; ${goma_bin_dir}/gomacc ${LOCAL_CXX} -c -o out.o test/hello.c)"
+rm -f out.o
+expect_success "empty_path_env" \
+  "PATH= ${goma_bin_dir}/gomacc ${LOCAL_CXX} -c -o out.o test/hello.c"
+rm -f out.o
+
+expect_failure "gomacc_gomacc" \
+  "${GOMACC} ${GOMA_CC} -c -o out.o test/hello.c"
+rm -f out.o
+expect_success "gomacc_path_gomacc" \
+  "PATH=${goma_bin_dir}:$PATH \
+   ${GOMACC} ${CC} -c -o out.o \
+   test/hello.c"
+rm -f out.o
+
+expect_failure "disabled_true_masquerade_gcc" \
+  "GOMA_DISABLED=1 \
+   ${GOMA_CC} -c -o out.o test/hello.c"
+rm -f out.o
+
+curl -s http://localhost:$GOMA_COMPILER_PROXY_PORT/statz | grep request > stat_before.txt
+expect_success "disabled_true_gomacc_local_path_gcc" \
+  "GOMA_DISABLED=1 \
+   ${GOMACC} ${LOCAL_CC} -c -o out.o test/hello.c"
+curl -s http://localhost:$GOMA_COMPILER_PROXY_PORT/statz | grep request > stat_after.txt
+expect_success "disabled_true_gomacc_local_path_gcc_not_delivered" \
+   "cmp stat_before.txt stat_after.txt"
+diff -u stat_before.txt stat_after.txt
+
+rm -f out.o
+rm -f stat_before.txt
+rm -f stat_after.txt
+
+expect_success "disabled_true_gomacc_masquerade_gcc" \
+  "GOMA_DISABLED=1 \
+   PATH=${goma_bin_dir}:$PATH \
+   ${GOMACC} ${CC} -c -o out.o test/hello.c"
+rm -f out.o
+
+expect_success "disabled_true_gomacc_gcc_in_local_path" \
+  "GOMA_DISABLED=1 \
+   PATH=$(dirname ${LOCAL_CC}) \
+   ${GOMACC} ${CC} -c -o out.o test/hello.c"
+rm -f out.o
+
+# GOMA_HERMETIC=error
+
+if [ "$(uname)" = "Linux" ]; then
+  OBJCOPY=$test_dir/third_party/binutils/Linux_x64/Release/bin/objcopy
+  if [ ! -f $OBJCOPY ]; then
+    OBJCOPY=$test_dir/third_party/binutils/Linux_ia32/Release/bin/objcopy
+  fi
+  #if objcopy does not exist, fallbacks to system's objcopy.
+  if [ ! -f $OBJCOPY ]; then
+    OBJCOPY=$(which objcopy)
+  fi
+  echo "Using objcopy: ${OBJCOPY}" 1>&2
+  cp -p ${OBJCOPY} ./objcopy
+  expect_success "${CC}_unmodified_objcopy_with_hermetic" \
+    "${GOMACC} ${LOCAL_CC} -gsplit-dwarf -B. -c -o out.o test/hello.c"
+  rm -f ./objcopy
+  rm -f out.o
+
+  # create objcopy with different SHA256.
+  cp -p ${OBJCOPY} ./objcopy
+  echo >> ./objcopy
+  # Since chromeos toolchain has
+  # force_store_output_file_for_unmatched_subprograms,
+  # mismatch of objcopy should not be treated as mismatch error.
+  if [ "$CC" = "gcc" -a "$(is_cros_gcc $CC)" = "yes" ]; then
+    expect_success "unknown_objcopy_with_hermetic_for_cros_gcc" \
+      "${GOMACC} ${LOCAL_CC} -gsplit-dwarf -B. -c -o out.o test/hello.c"
+  else
+    expect_failure "${CC}_unknown_objcopy_with_hermetic" \
+      "${GOMACC} ${LOCAL_CC} -gsplit-dwarf -B. -c -o out.o test/hello.c"
+  fi
+  rm -f ./objcopy
+
+  cp -p ${OBJCOPY} ./objcopy
+  expect_success "${CC}_after_unknown_objcopy_with_hermetic" \
+    "${GOMACC} ${LOCAL_CC} -gsplit-dwarf -B. -c -o out.o test/hello.c"
+  rm -f ./objcopy
+  rm -f out.o
+
+  # check PWD=/proc/self/cwd gcc -fdebug-prefix-map=/proc/self/cwd=
+  # http://b/27487704
+  mkdir dir1 dir2
+  cp test/hello.c dir1
+  (cd dir1; expect_success "${CC}_no_pwd_in_dir1" \
+     "PWD=/proc/self/cwd ${GOMACC} ${LOCAL_CC} \
+     -fdebug-prefix-map=/proc/self/cwd= -g -c -o out.o hello.c")
+  cp test/hello.c dir2
+  (cd dir2; expect_success "${CC}_no_pwd_in_dir2" \
+     "PWD=/proc/self/cwd ${GOMACC} ${LOCAL_CC} \
+      -fdebug-prefix-map=/proc/self/cwd= -g -c -o out.o hello.c")
+  expect_success "${CC}_deterministic_no_pwd" \
+     "cmp dir1/out.o dir2/out.o"
+  readelf --debug-dump dir1/out.o > dir1/out.debug
+  readelf --debug-dump dir2/out.o > dir2/out.debug
+  diff -u dir1/out.debug dir2/out.debug
+  rm -rf dir1 dir2
+
+  # check PWD=/proc/self/cwd ~/goma/gomacc linux-x86/clang-2690385/bin/clang -c
+  # -g -fdebug-prefix-map=/proc/self/cwd= -no-canonical-prefixes test.c
+  # b/28088682
+  if [ "$GOMATEST_USE_CHROMIUM_CLANG" = "1" ]; then
+    clang_path="../$(basename "$(dirname "$(dirname ${LOCAL_CC})")")/bin/clang"
+    cp -rp "$(dirname "$(dirname ${LOCAL_CC})")" .
+    # Local case.
+    mkdir dir1 dir2
+    cp test/test_pwd_hack.c dir1
+    (cd dir1; expect_success "${CC}_no_pwd_in_include_wo_goma_local" \
+      "GOMA_USE_LOCAL=true PWD=/proc/self/cwd ${clang_path} -c -g \
+      -fdebug-prefix-map=/proc/self/cwd= -no-canonical-prefixes \
+      -o out.o test_pwd_hack.c")
+    cp test/test_pwd_hack.c dir2
+    (cd dir2; expect_success "${CC}_no_pwd_in_include_with_goma_local" \
+      "GOMA_USE_LOCAL=true PWD=/proc/self/cwd ${GOMACC} ${clang_path} \
+      -c -g -fdebug-prefix-map=/proc/self/cwd= -no-canonical-prefixes \
+      -o out.o test_pwd_hack.c")
+    expect_success "${CC}_deterministic_no_pwd_in_include_local" \
+      "cmp dir1/out.o dir2/out.o"
+    readelf --debug-dump dir1/out.o > dir1/out.debug
+    readelf --debug-dump dir2/out.o > dir2/out.debug
+    diff -u dir1/out.debug dir2/out.debug
+    rm -rf dir1 dir2
+
+    # TODO: implement this when the issue for remote case fixed.
+  fi
+fi
+
+if [ "$GOMATEST_USE_CHROMIUM_CLANG" = "1" ]; then
+  # automatically detects .so and .dylib.
+  CLANG_PLUGIN=$(echo $(dirname $LOCAL_CXX)/../lib/libFindBadConstructs.*)
+  CLANG_PLUGIN_BASE=$(basename $CLANG_PLUGIN)
+
+  cp -p ${CLANG_PLUGIN} ./${CLANG_PLUGIN_BASE}
+  expect_success "${CXX}_unmodified_plugin_with_hermetic" \
+    "${GOMACC} ${LOCAL_CXX} -Xclang -load -Xclang ./${CLANG_PLUGIN_BASE} \
+    -c -o out.o test/hello.c"
+  rm -f ${CLANG_PLUGIN_BASE}
+  rm -f out.o
+
+  cp -p ${CLANG_PLUGIN} ./${CLANG_PLUGIN_BASE}
+  echo >> ./${CLANG_PLUGIN_BASE}
+  expect_failure "${CXX}_unknown_plugin_with_hermetic" \
+    "${GOMACC} ${LOCAL_CXX} -Xclang -load -Xclang ./${CLANG_PLUGIN_BASE} \
+    -c -o out.o test/hello.c"
+  rm -f ${CLANG_PLUGIN_BASE}
+  rm -f out.o
+
+  cp -p ${CLANG_PLUGIN} ./${CLANG_PLUGIN_BASE}
+  expect_success "${CXX}_after_unknown_plugin_with_hermetic" \
+    "${GOMACC} ${LOCAL_CXX} -Xclang -load -Xclang ./${CLANG_PLUGIN_BASE} \
+    -c -o out.o test/hello.c"
+  rm -f ${CLANG_PLUGIN_BASE}
+  rm -f out.o
+fi
+
+GOMA_USE_LOCAL=false
+GOMA_FALLBACK=false
+expect_success "${CXX}_compile_with_umask_remote" \
+  "(umask 777; ${GOMACC} ${LOCAL_CC} -o out.o -c test/hello.c)"
+expect_success "${CXX}_expected_umask_remote" \
+  "[ \"$(ls -l out.o | awk '{ print $1}')\" = \"----------\" ]"
+rm -f out.o
+
+GOMA_USE_LOCAL=true
+expect_success "${CXX}_compile_with_umask_local" \
+  "(umask 777; ${GOMACC} ${LOCAL_CC} -o out.o -c test/hello.c)"
+expect_success "${CXX}_expected_umask_local" \
+  "[ \"$(ls -l out.o | awk '{ print $1}')\" = \"----------\" ]"
+rm -f out.o
+
+curl --dump-header header.out \
+  -X POST --data-binary @${test_dir}/badreq.bin \
+  -H 'Content-Type: binary/x-protocol-buffer' \
+  http://localhost:${GOMA_COMPILER_PROXY_PORT}/e
+expect_success "access_rejected" \
+  "head -1 header.out | grep -q 'HTTP/1.1 401 Unauthorized'"
+rm -f header.out
+
+if [ -n "${GLOG_log_dir:-}" ]; then
+  # Smoke test to confirm gomacc does not create logs.
+  # I know there are several tests that make gomacc to write logs but should
+  # not be so much.
+  expect_success "smoke_test_gomacc_does_not_create_logs_much" \
+    "[ \"$(echo ${GLOG_log_dir}/gomacc.* | wc -w)\" -lt "20" ]"
+fi
+
+# Gomacc should write log to GLOG_log_dir.
+mkdir -p "$tmpdir/gomacc_test"
+expect_success "gomacc_should succeed_with_write_log_flag" \
+    "GOMA_GOMACC_WRITE_LOG_FOR_TESTING=true \
+     GLOG_log_dir=${tmpdir}/gomacc_test ${GOMACC}"
+expect_success "gomacc_should_create_log_file" \
+  "[ \"$(echo ${tmpdir}/gomacc_test/gomacc.* | wc -w)\" -eq "2" ]"
+
+if [ "${#FAIL[@]}" -ne 0 ]; then
+  echo_fail "Failed tests: ${FAIL[@]}"
+fi
+if [ "${#KNOWN_FAIL[@]}" -ne 0 ]; then
+  echo_known_fail "Known failed tests: ${KNOWN_FAIL[@]}"
+fi
+if [ "${#FAIL[@]}" -eq 0 -a "${#KNOWN_FAIL[@]}" -eq 0 ]; then
+  echo_ok "All tests passed: $CC $CXX"
+fi
+
+if [ "$GOMATEST_USE_CHROMIUM_CLANG" = "" ]; then
+ if [ -x "$CLANG_PATH/clang" -a -x "$CLANG_PATH/clang++" ]; then
+   PATH=$CLANG_PATH:$PATH
+   # clang (clang version 1.1) shipped in ubuntu/lucid are too old and
+   # don't support -dumpmachine option.
+   if clang -v > /dev/null 2>&1 && clang++ -v > /dev/null 2>&1 && \
+      clang -dumpmachine > /dev/null 2>&1 && \
+      clang++ -dumpmachine > /dev/null 2>&1 ; then
+     GOMATEST_USE_RUNNING_COMPILER_PROXY=1 \
+      GOMATEST_USE_CHROMIUM_CLANG=1 \
+      CC=clang CXX=clang++ \
+       $test_dir/$(basename $0)
+     if [ "$?" != 0 ]; then
+       FAIL+=("clang");
+     fi
+   else
+     echo_warn "WARNING: clang in $CLANG_PATH is too old."
+   fi
+ else
+   echo_warn "WARNING: no clang in $CLANG_PATH"
+ fi
+fi
+
+if [ "$FLAGS_dump" != "" ]; then
+   (cd $GOMA_TMP_DIR && tar zcf $FLAGS_dump task_request_*)
+   echo "task dump in $FLAGS_dump"
+fi
+
+if [ "$FLAGS_wait" = "1" ]; then
+  echo -n "Ready to finish? "
+  read
+fi
+echo exit "${#FAIL[@]} # ${CC} ${CXX}"
+exit "${#FAIL[@]}"
diff --git a/test/test_pwd_hack.c b/test/test_pwd_hack.c
new file mode 100644
index 0000000..275ae5d
--- /dev/null
+++ b/test/test_pwd_hack.c
@@ -0,0 +1,7 @@
+// Copyright 2012 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Test case for b/28088682.
+#include <stddef.h>
+ptrdiff_t test() { return 0; }
diff --git a/test/third_party/binutils/.gitignore b/test/third_party/binutils/.gitignore
new file mode 100644
index 0000000..5605b2f
--- /dev/null
+++ b/test/third_party/binutils/.gitignore
@@ -0,0 +1,8 @@
+binutils-*
+*-chroot-*
+output-*
+Linux_ia32/*stamp*
+Linux_ia32/*tar.bz2
+Linux_x64/*stamp*
+Linux_x64/*tar.bz2
+*/Release
diff --git a/test/third_party/binutils/Linux_ia32/binutils.tar.bz2.sha1 b/test/third_party/binutils/Linux_ia32/binutils.tar.bz2.sha1
new file mode 100644
index 0000000..ced91d2
--- /dev/null
+++ b/test/third_party/binutils/Linux_ia32/binutils.tar.bz2.sha1
@@ -0,0 +1 @@
+a0d516b95f19512a112cdad25259dd56b369863e
\ No newline at end of file
diff --git a/test/third_party/binutils/Linux_x64/binutils.tar.bz2.sha1 b/test/third_party/binutils/Linux_x64/binutils.tar.bz2.sha1
new file mode 100644
index 0000000..769e08b
--- /dev/null
+++ b/test/third_party/binutils/Linux_x64/binutils.tar.bz2.sha1
@@ -0,0 +1 @@
+3dc3f9f1ba7b3a9df28adb104b4ed37f1c26f68a
\ No newline at end of file
diff --git a/test/third_party/binutils/download.py b/test/third_party/binutils/download.py
new file mode 100755
index 0000000..5aaf1f4
--- /dev/null
+++ b/test/third_party/binutils/download.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+#
+# Copied from chromium's third_party/binutils directory, and modified for goma.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+# vim: set ts=2 sw=2 et sts=2 ai:
+
+"""Minimal tool to download binutils from Google storage.
+
+TODO: Replace with generic download_and_extract tool.
+"""
+
+import argparse
+import os
+import platform
+import re
+import shutil
+import subprocess
+import sys
+
+
+BINUTILS_DIR = os.path.abspath(os.path.dirname(__file__))
+BINUTILS_FILE = 'binutils.tar.bz2'
+BINUTILS_TOOLS = ['bin/ld.gold', 'bin/objcopy', 'bin/objdump']
+BINUTILS_OUT = 'Release'
+
+DETECT_HOST_ARCH = os.path.abspath(os.path.join(
+    BINUTILS_DIR, '../../../build/detect_host_arch.py'))
+
+
+def ReadFile(filename):
+  with file(filename, 'r') as f:
+    return f.read().strip()
+
+
+def WriteFile(filename, content):
+  assert not os.path.exists(filename)
+  with file(filename, 'w') as f:
+    f.write(content)
+    f.write('\n')
+
+
+def GetArch():
+  gyp_host_arch = re.search(
+      'host_arch=(\S*)', os.environ.get('GYP_DEFINES', ''))
+  if gyp_host_arch:
+    arch = gyp_host_arch.group(1)
+    # This matches detect_host_arch.py.
+    if arch == 'x86_64':
+      return 'x64'
+    return arch
+
+  return subprocess.check_output(['python', DETECT_HOST_ARCH]).strip()
+
+
+def FetchAndExtract(arch):
+  archdir = os.path.join(BINUTILS_DIR, 'Linux_' + arch)
+  tarball = os.path.join(archdir, BINUTILS_FILE)
+  outdir = os.path.join(archdir, BINUTILS_OUT)
+
+  sha1file = tarball + '.sha1'
+  if not os.path.exists(sha1file):
+    print "WARNING: No binutils found for your architecture (%s)!" % arch
+    return 0
+
+  checksum = ReadFile(sha1file)
+
+  stampfile = tarball + '.stamp'
+  if os.path.exists(stampfile):
+    if (os.path.exists(tarball) and
+        os.path.exists(outdir) and
+        checksum == ReadFile(stampfile)):
+      return 0
+    else:
+      os.unlink(stampfile)
+
+  print "Downloading", tarball
+  subprocess.check_call([
+      'download_from_google_storage',
+      '--no_resume',
+      '--no_auth',
+      '--bucket', 'chromium-binutils',
+      '-s', sha1file])
+  assert os.path.exists(tarball)
+
+  if os.path.exists(outdir):
+    shutil.rmtree(outdir)
+  assert not os.path.exists(outdir)
+  os.makedirs(outdir)
+  assert os.path.exists(outdir)
+
+  print "Extracting", tarball
+  subprocess.check_call(['tar', 'axf', tarball], cwd=outdir)
+
+  for tool in BINUTILS_TOOLS:
+    assert os.path.exists(os.path.join(outdir, tool))
+
+  WriteFile(stampfile, checksum)
+  return 0
+
+
+def main(args):
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('--ignore-if-arch', metavar='ARCH',
+                      action='append', default=[],
+                      help='Do nothing on host architecture ARCH')
+
+  options = parser.parse_args(args)
+
+  if not sys.platform.startswith('linux'):
+    return 0
+
+  arch = GetArch()
+  if arch in options.ignore_if_arch:
+    return 0
+
+  if arch == 'x64':
+    return FetchAndExtract(arch)
+  if arch == 'ia32':
+    ret = FetchAndExtract(arch)
+    if ret != 0:
+      return ret
+    # Fetch the x64 toolchain as well for official bots with 64-bit kernels.
+    return FetchAndExtract('x64')
+
+  print "Host architecture %s is not supported." % arch
+  return 1
+
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv[1:]))
diff --git a/test/tsan-ign.txt b/test/tsan-ign.txt
new file mode 100644
index 0000000..f1a52b7
--- /dev/null
+++ b/test/tsan-ign.txt
@@ -0,0 +1,7 @@
+# Copyright 2016 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+#
+# Unavoidable races in glog which won't be actual problems.
+fun:_ZN6google11InitVLOG3__EPPiS0_PKci
+fun:_ZN6google19RawLog__SetLastTimeERK2tmi
diff --git a/test/tsan.sh b/test/tsan.sh
new file mode 100755
index 0000000..d7d29aa
--- /dev/null
+++ b/test/tsan.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+#
+# Copyright 2012 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+#
+
+test_dir=$(cd $(dirname $0); pwd)
+
+. $test_dir/gomatest.sh
+
+echo Kill any remaining compiler proxy
+killall compiler_proxy
+
+export GOMA_COMPILER_PROXY_PORT=8100
+export GOMA_COMPILER_PROXY_NUM_FIND_PORTS=1
+
+set_goma_dirs "$1"
+
+echo "Starting $GOMA_COMPILER_PROXY_BINARY with tsan..."
+( cd /tmp && \
+  tsan --ignore=$test_dir/tsan-ign.txt \
+  $GOMA_COMPILER_PROXY_BINARY > tsan.log 2>&1 &
+)
+update_compiler_proxy_port $(dirname $GOMA_COMPILER_PROXY_BINARY) 10
+watch_healthz localhost ${GOMA_COMPILER_PROXY_PORT} /healthz \
+  ${GOMA_COMPILER_PROXY_BINARY}
+
+function at_exit() {
+  rm -f /tmp/goma-test-tmp.c /tmp/goma-test-tmp.o
+  stop_compiler_proxy
+  wait
+  echo 'Done. See /tmp/tsan.log'
+}
+
+trap at_exit exit sighup sigpipe
+
+cat <<EOF > /tmp/goma-test-tmp.c
+#include <stdio.h>
+int main() {
+  puts("hello-");
+}
+EOF
+
+# TODO: It seems reversing the order of them will change the result.
+#               Investigate a way which can check more cases.
+GOMA_USE_LOCAL=0 $goma_bin_dir/gomacc gcc -c /tmp/goma-test-tmp.c
+$goma_bin_dir/gomacc gcc -c /tmp/goma-test-tmp.c
+
+curl http://localhost:$GOMA_COMPILER_PROXY_PORT/ > /dev/null
+curl -d '' http://localhost:$GOMA_COMPILER_PROXY_PORT/api/taskz > /dev/null
diff --git a/test/verify_normalized_jar.py b/test/verify_normalized_jar.py
new file mode 100644
index 0000000..6b37cc2
--- /dev/null
+++ b/test/verify_normalized_jar.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+#
+# Copyright 2017 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Verifies normalized jar file.
+
+% verify_normalized_jar.py <original_jar_file> <normalized_jar_file>
+
+The program confirms:
+  - timestamps of all files and directrories in normalized jar file are
+    MS-DOS epoch time (1980-01-01T00:00:00).
+  - normalized_jar_file has the same contents with original jar file.
+"""
+
+import datetime
+import hashlib
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+
+
+JAR_COMMAND_PATH = '/usr/bin/jar'
+
+
+def CalcFileSha256(filename):
+  """Returns SHA256 of the file.
+
+  Args:
+    filename: a string file name to calculate SHA256.
+
+  Returns:
+    hexdigest string of the file content.
+  """
+  with open(filename, 'rb') as f:
+    return hashlib.sha256(f.read()).hexdigest()
+
+
+def GetListOfContents(filename):
+  """Returns contents information of a given jar file.
+
+  Args:
+    filename: a filename of a jar file.
+
+  Returns:
+    a dictionary from a filename in the jar file to its information.
+    e.g. {
+      'META-INF/MANIFEST.MF': {'size': 76, 'sha256': 'abcdef...'},
+      ...
+    }
+  """
+  tmpdir = None
+  file_to_info = {}
+  try:
+    tmpdir = tempfile.mkdtemp()
+    os.chdir(tmpdir)
+    subprocess.check_call([JAR_COMMAND_PATH, 'xf', filename])
+    for root, _, files in os.walk(tmpdir):
+      for name in files:
+        path = os.path.join(root, name)
+        file_to_info[path[len(tmpdir):]] = {
+            'size': os.stat(path).st_size,
+            'sha256': CalcFileSha256(path),
+        }
+  finally:
+    shutil.rmtree(tmpdir)
+  return file_to_info
+
+
+def VerifyNormalizedJarFile(filename, file_infos):
+  """Verifies that we have normalized the jar file timestamp.
+
+  Args:
+    filename: a normalized jar filename to verify.
+    file_infos: an original jar file info got by GetListOfContents.
+  """
+  tmpdir = None
+  msdos_epoch = time.mktime(datetime.datetime(1980, 1, 1, 0, 0, 0).timetuple())
+  try:
+    tmpdir = tempfile.mkdtemp()
+    os.chdir(tmpdir)
+    subprocess.check_call([JAR_COMMAND_PATH, 'xf', filename])
+    for root, _, files in os.walk(tmpdir):
+      if root != tmpdir:
+        assert os.stat(root).st_mtime == msdos_epoch
+      for name in files:
+        path = os.path.join(root, name)
+        path_info = file_infos[path[len(tmpdir):]]
+        assert os.stat(path).st_mtime == msdos_epoch
+        assert os.stat(path).st_size == path_info['size']
+        assert CalcFileSha256(path) == path_info['sha256']
+  finally:
+    shutil.rmtree(tmpdir)
+
+
+def main(argv):
+  if len(argv) != 3:
+    sys.stderr.write(''.join([
+        'Usage: %s <original jar file> <normalized jar file>\n' % argv[0],
+        'e.g. %s Basic.jar Basic_normalized.jar\n' % argv[0],
+    ]))
+    return 1
+  orig, normalized = os.path.realpath(argv[1]), os.path.realpath(argv[2])
+  infos = GetListOfContents(orig)
+  VerifyNormalizedJarFile(normalized, infos)
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv))
diff --git a/testing/libfuzzer/BUILD.gn b/testing/libfuzzer/BUILD.gn
new file mode 100644
index 0000000..2c4e19e
--- /dev/null
+++ b/testing/libfuzzer/BUILD.gn
@@ -0,0 +1,43 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# LibFuzzer is a LLVM tool for coverage-guided fuzz testing.
+# See http://www.chromium.org/developers/testing/libfuzzer
+#
+# To enable libfuzzer, 'use_libfuzzer' GN option should be set to true.
+# Or to enable afl, 'use_afl' GN option should be set to true.
+
+source_set("libfuzzer_main") {
+  deps = []
+  sources = []
+  if (use_libfuzzer) {
+    deps += [ "//third_party/libFuzzer:libfuzzer" ]
+    #} else if (use_afl) {
+    #  deps += [ "//third_party/libFuzzer:afl_driver" ]
+    #} else if (use_drfuzz) {
+    #  sources += [ "drfuzz_main.cc" ]
+  } else {
+    sources += [ "unittest_main.cc" ]
+  }
+}
+
+# A config used by all fuzzer_tests.
+config("fuzzer_test_config") {
+  if (use_libfuzzer && os == "mac") {
+    ldflags = [
+      "-Wl,-U,_LLVMFuzzerCustomMutator",
+      "-Wl,-U,_LLVMFuzzerInitialize",
+    ]
+  }
+}
+
+# Noop config used to tag fuzzer tests excluded from clusterfuzz.
+# Libfuzzer build bot uses this to filter out targets while
+# building an archive for clusterfuzz.
+config("no_clusterfuzz") {
+}
+
+# noop to tag seed corpus rules.
+source_set("seed_corpus") {
+}
diff --git a/testing/libfuzzer/README b/testing/libfuzzer/README
new file mode 100644
index 0000000..e073a1a
--- /dev/null
+++ b/testing/libfuzzer/README
@@ -0,0 +1 @@
+Files are copied from chromium's testing/libfuzzer.
diff --git a/testing/libfuzzer/archive_corpus.py b/testing/libfuzzer/archive_corpus.py
new file mode 100755
index 0000000..7e39bb5
--- /dev/null
+++ b/testing/libfuzzer/archive_corpus.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python2
+#
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Archive corpus file into zip and generate .d depfile.
+
+Invoked by GN from fuzzer_test.gni.
+"""
+
+from __future__ import print_function
+import argparse
+import os
+import sys
+import zipfile
+
+
+def main():
+  parser = argparse.ArgumentParser(description="Generate fuzzer config.")
+  parser.add_argument('--corpus', required=True)
+  parser.add_argument('--output', required=True)
+  parser.add_argument('--fuzzer', required=True)
+  args = parser.parse_args()
+
+  corpus_files = []
+
+  for (dirpath, _, filenames) in os.walk(args.corpus):
+    for filename in filenames:
+      full_filename = os.path.join(dirpath, filename)
+      corpus_files.append(full_filename)
+
+  with zipfile.ZipFile(args.output, 'w') as z:
+    for corpus_file in corpus_files:
+        z.write(corpus_file, os.path.basename(corpus_file))
+
+
+if __name__ == '__main__':
+  main()
diff --git a/testing/libfuzzer/fuzzer_test.gni b/testing/libfuzzer/fuzzer_test.gni
new file mode 100644
index 0000000..fec1314
--- /dev/null
+++ b/testing/libfuzzer/fuzzer_test.gni
@@ -0,0 +1,154 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Defines fuzzer_test.
+#
+import("//testing/test.gni")
+
+# fuzzer_test is used to define individual libfuzzer tests.
+#
+# Supported attributes:
+# - (required) sources - fuzzer test source files
+# - deps - test dependencies
+# - additional_configs - additional configs to be used for compilation
+# - dict - a dictionary file for the fuzzer.
+# - libfuzzer_options - options for the fuzzer (e.g. -max_len or -timeout).
+# - seed_corpus - a directory with seed corpus.
+#
+# If use_libfuzzer gn flag is defined, then proper fuzzer would be build.
+# Without use_libfuzzer or use_afl a unit-test style binary would be built on
+# linux and the whole target is a no-op otherwise.
+#
+# The template wraps test() target with appropriate dependencies.
+# If any test run-time options are present (dict or libfuzzer_options), then a
+# config (.options file) file would be generated or modified in root output
+# dir (next to test).
+template("fuzzer_test") {
+  if (use_libfuzzer) {
+    assert(defined(invoker.sources), "Need sources in $target_name.")
+
+    test_deps = [ "//testing/libfuzzer:libfuzzer_main" ]
+
+    if (defined(invoker.deps)) {
+      test_deps += invoker.deps
+    }
+
+    if (defined(invoker.seed_corpus)) {
+      out = "$root_build_dir/$target_name" + "_seed_corpus.zip"
+
+      action(target_name + "_seed_corpus") {
+        script = "//testing/libfuzzer/archive_corpus.py"
+        args = [
+          "--corpus",
+          rebase_path(invoker.seed_corpus),
+          "--output",
+          rebase_path(out),
+          "--fuzzer",
+          rebase_path("$root_build_dir/$target_name"),
+        ]
+
+        outputs = [
+          out,
+        ]
+        deps = [
+          "//testing/libfuzzer:seed_corpus",
+        ]
+      }
+
+      test_deps += [ ":" + target_name + "_seed_corpus" ]
+    }
+
+    if (defined(invoker.dict) || defined(invoker.libfuzzer_options)) {
+      if (defined(invoker.dict)) {
+        # Copy dictionary to output.
+        copy(target_name + "_dict_copy") {
+          sources = [
+            invoker.dict,
+          ]
+          outputs = [
+            "$root_build_dir/" + target_name + ".dict",
+          ]
+        }
+        test_deps += [ ":" + target_name + "_dict_copy" ]
+      }
+
+      # Generate .options file.
+      config_name = target_name + ".options"
+      action(config_name) {
+        script = "//testing/libfuzzer/gen_fuzzer_config.py"
+        args = [
+          "--config",
+          rebase_path("$root_build_dir/" + config_name),
+        ]
+
+        if (defined(invoker.dict)) {
+          args += [
+            "--dict",
+            rebase_path("$root_build_dir/" + invoker.target_name + ".dict"),
+          ]
+        }
+
+        if (defined(invoker.libfuzzer_options)) {
+          args += [ "--libfuzzer_options" ]
+          args += invoker.libfuzzer_options
+        }
+
+        outputs = [
+          "$root_build_dir/$config_name",
+        ]
+      }
+      test_deps += [ ":" + config_name ]
+    }
+
+    test(target_name) {
+      forward_variables_from(invoker,
+                             [
+                               "check_includes",
+                               "defines",
+                               "include_dirs",
+                               "sources",
+                             ])
+      deps = test_deps
+
+      if (defined(invoker.additional_configs)) {
+        configs += invoker.additional_configs
+      }
+
+      # TODO: fix this, not sure why no identifier can be
+      #                    found from here.
+      #configs += [ "//testing/libfuzzer:fuzzer_test_config" ]
+
+      # Used by WebRTC to suppress some Clang warnings in their codebase.
+      if (defined(invoker.suppressed_configs)) {
+        configs -= invoker.suppressed_configs
+      }
+    }
+  } else {
+    # noop on unsupported platforms.
+    # mark attributes as used.
+    assert(invoker.sources == [] || invoker.sources != [])
+    if (defined(invoker.additional_configs)) {
+      assert(
+          invoker.additional_configs == [] || invoker.additional_configs != [])
+    }
+    if (defined(invoker.deps)) {
+      assert(invoker.deps == [] || invoker.deps != [])
+    }
+    if (defined(invoker.dict)) {
+      assert(invoker.dict == [] || invoker.dict != [])
+    }
+    if (defined(invoker.libfuzzer_options)) {
+      assert(invoker.libfuzzer_options == [] || invoker.libfuzzer_options != [])
+    }
+    if (defined(invoker.seed_corpus)) {
+      assert(invoker.seed_corpus == [] || invoker.seed_corpus != [])
+    }
+    assert(!defined(invoker.check_includes) || invoker.check_includes != [])
+    assert(!defined(invoker.include_dirs) || invoker.include_dirs != [])
+    assert(!defined(invoker.defines) || invoker.defines != [])
+
+    group(target_name) {
+    }
+  }
+}
diff --git a/testing/libfuzzer/gen_fuzzer_config.py b/testing/libfuzzer/gen_fuzzer_config.py
new file mode 100755
index 0000000..310b7b9
--- /dev/null
+++ b/testing/libfuzzer/gen_fuzzer_config.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python2
+#
+# Copyright (c) 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generate or update an existing config (.options file) for libfuzzer test.
+
+Invoked by GN from fuzzer_test.gni.
+"""
+
+import argparse
+import os
+import sys
+
+
+CONFIG_HEADER = '''# This is an automatically generated config for libFuzzer.
+[libfuzzer]
+'''
+
+def main():
+  parser = argparse.ArgumentParser(description="Generate fuzzer config.")
+  parser.add_argument('--config', required=True)
+  parser.add_argument('--dict')
+  parser.add_argument('--libfuzzer_options', nargs='+', default=[])
+  args = parser.parse_args()
+
+  # Script shouldn't be invoked without both arguments, but just in case.
+  if not args.dict and not args.libfuzzer_options:
+    return
+
+  config_path = args.config
+  # Generate .options file.
+  with open(config_path, 'w') as options_file:
+    options_file.write(CONFIG_HEADER)
+
+    # Dict will be copied into build directory, need only basename for config.
+    if args.dict:
+      options_file.write('dict = %s\n' % os.path.basename(args.dict))
+
+    for option in args.libfuzzer_options:
+      options_file.write(option)
+      options_file.write('\n')
+
+if __name__ == '__main__':
+  main()
diff --git a/testing/libfuzzer/unittest_main.cc b/testing/libfuzzer/unittest_main.cc
new file mode 100644
index 0000000..87c7069
--- /dev/null
+++ b/testing/libfuzzer/unittest_main.cc
@@ -0,0 +1,59 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// A simple unit-test style driver for libfuzzer tests.
+// Usage: <fuzzer_test> <file>...
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <vector>
+
+// Libfuzzer API.
+extern "C" {
+  // User function.
+  int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+  // Initialization function.
+  __attribute__((weak)) int LLVMFuzzerInitialize(int *argc, char ***argv);
+  // Mutation function provided by libFuzzer.
+  size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize);
+}
+
+std::vector<uint8_t> readFile(std::string path) {
+  std::ifstream in(path);
+  return std::vector<uint8_t>((std::istreambuf_iterator<char>(in)),
+      std::istreambuf_iterator<char>());
+}
+
+size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) {
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  if (argc == 1) {
+    std::cerr
+        << "Usage: " << argv[0]
+        << " <file>...\n"
+           "\n"
+           "Alternatively, try building this target with "
+           "use_libfuzzer=true for a better test driver. For details see:\n"
+           "\n"
+           "https://chromium.googlesource.com/chromium/src/+/master/"
+           "testing/libfuzzer/getting_started.md"
+        << std::endl;
+    exit(1);
+  }
+
+  if (LLVMFuzzerInitialize)
+    LLVMFuzzerInitialize(&argc, &argv);
+
+  for (int i = 1; i < argc; ++i) {
+    std::cout << argv[i] << std::endl;
+    auto v = readFile(argv[i]);
+    LLVMFuzzerTestOneInput(v.data(), v.size());
+  }
+}
diff --git a/testing/test.gni b/testing/test.gni
new file mode 100644
index 0000000..594ba1c
--- /dev/null
+++ b/testing/test.gni
@@ -0,0 +1,43 @@
+# Copied from chromium's testing/test.gni and modified for goma.
+#
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# ==============================================================================
+# TEST SETUP
+# ==============================================================================
+
+# Define a test as an executable (or apk on Android) with the "testonly" flag
+# set.
+# Variable:
+#   use_raw_android_executable: Use executable() rather than android_apk().
+#   use_native_activity: Test implements ANativeActivity_onCreate().
+template("test") {
+  executable(target_name) {
+    deps = []
+    forward_variables_from(invoker, "*")
+
+    testonly = true
+    deps += [
+      # All shared libraries must have the sanitizer deps to properly link in
+      # asan mode (this target will be empty in other cases).
+      "//build/config/sanitizers:deps",
+      # Give tests the default manifest on Windows (a no-op elsewhere).
+      #"//build/win:default_exe_manifest",
+    ]
+  }
+
+  if (defined(invoker.output_name) && target_name != invoker.output_name) {
+    group("${invoker.output_name}_run") {
+      testonly = true
+      deps = [
+        ":${invoker.target_name}",
+      ]
+    }
+  }
+}
+# Test defaults.
+#set_defaults("test") {
+#  configs = default_executable_configs
+#}
diff --git a/third_party/.gitignore b/third_party/.gitignore
new file mode 100644
index 0000000..8b74adb
--- /dev/null
+++ b/third_party/.gitignore
@@ -0,0 +1,21 @@
+boringssl/src
+glog
+gtest
+jsoncpp/source
+protobuf/protobuf
+xz
+zlib
+breakpad/breakpad
+llvm-build
+libc++/trunk
+libc++abi/trunk
+libc++-static
+libFuzzer/src
+lss
+yasm/source/patched-yasm
+*.mk
+!config/protobuf
+!config/glog
+!config/glog/linux/glog
+!config/glog/mac/glog
+!config/xz
diff --git a/third_party/BUILD.gn b/third_party/BUILD.gn
new file mode 100644
index 0000000..0074ea1
--- /dev/null
+++ b/third_party/BUILD.gn
@@ -0,0 +1,410 @@
+# Copyright 2014 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+if (os != "win") {
+  config_h_dir = "config/glog/$os"
+} else {
+  import("//build/config/win/visual_studio_version.gni")
+  config_h_dir = "glog/src/windows"
+}
+
+glog_sources = [
+  "glog/src/base/commandlineflags.h",
+  "glog/src/base/googleinit.h",
+  "glog/src/base/mutex.h",
+  "glog/src/demangle.cc",
+  "glog/src/demangle.h",
+  "glog/src/logging.cc",
+  "glog/src/raw_logging.cc",
+  "glog/src/signalhandler.cc",
+  "glog/src/stacktrace_generic-inl.h",
+  "glog/src/stacktrace.h",
+  "glog/src/stacktrace_libunwind-inl.h",
+  "glog/src/stacktrace_x86_64-inl.h",
+  "glog/src/symbolize.cc",
+  "glog/src/symbolize.h",
+  "glog/src/utilities.cc",
+  "glog/src/utilities.h",
+  "glog/src/vlog_is_on.cc",
+  "$config_h_dir/config.h",
+  "$config_h_dir/glog/logging.h",
+  "$config_h_dir/glog/log_severity.h",
+  "$config_h_dir/glog/raw_logging.h",
+  "$config_h_dir/glog/stl_logging.h",
+  "$config_h_dir/glog/vlog_is_on.h",
+]
+
+glog_include_dirs = [
+  "$config_h_dir",
+  "glog/src",
+]
+
+glog_defines = [ "HAVE_CONFIG_H" ]
+
+if (os == "win" &&
+    (visual_studio_version == "2015" || visual_studio_version == "2017")) {
+  glog_defines += [ "HAVE_SNPRINTF" ]
+}
+
+if (os != "win") {
+  glog_cflags_cc = [ "-Wno-sign-compare" ]
+} else {
+  glog_cflags_cc = [
+    "/wd4541",
+
+    # C4389: signed/unsigned mismatch
+    # Come from glog/src/windows/glog/logging.h
+    "/wd4389",
+
+    # destructor never returns, potential memory leak.
+    # Occurred in glog Fatal.
+    "/wd4722",
+  ]
+  glog_sources -= [
+    "glog/src/stacktrace_generic-inl.h",
+    "glog/src/stacktrace.h",
+    "glog/src/stacktrace_libunwind-inl.h",
+    "glog/src/stacktrace_x86_64-inl.h",
+  ]
+  glog_sources += [
+    "glog/src/windows/port.cc",
+    "glog/src/windows/port.h",
+  ]
+}
+
+config("glog_config") {
+  include_dirs = glog_include_dirs
+  if (is_posix) {
+    cflags = [ "-Wno-unused-local-typedef" ]  # For COMPILE_ASSERT macro.
+  }
+}
+
+static_library("glog") {
+  sources = glog_sources
+  include_dirs = glog_include_dirs
+  defines = glog_defines
+  cflags_cc = glog_cflags_cc
+  public_configs = [ ":glog_config" ]
+
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [ "//build/config/compiler:no_goma_code" ]
+}
+
+config("has_rtti_config") {
+  defines = [
+    # gtest isn't able to figure out when RTTI is disabled for gcc
+    # versions older than 4.3.2, and assumes it's enabled.  Our Mac
+    # and Linux builds disable RTTI, and cannot guarantee that the
+    # compiler will be 4.3.2. or newer.  The Mac, for example, uses
+    # 4.2.1 as that is the latest available on that platform.  gtest
+    # must be instructed that RTTI is disabled here, and for any
+    # direct dependents that might include gtest headers.
+    "GTEST_HAS_RTTI=0",
+  ]
+}
+config("use_own_tr1_tuple_config") {
+  defines = [
+    # We want gtest features that use tr1::tuple, but we currently
+    # don't support the variadic templates used by libstdc++'s
+    # implementation. gtest supports this scenario by providing its
+    # own implementation but we must opt in to it.
+    "GTEST_USE_OWN_TR1_TUPLE=1",
+  ]
+}
+config("unittest_config") {
+  defines = [ "UNIT_TEST" ]
+
+  # So that gtest headers can find themselves.
+  include_dirs = [ "gtest/include" ]
+}
+
+# http://stackoverflow.com/questions/12558327/google-test-in-visual-studio-2012
+config("variadix_max_config") {
+  defines = [ "_VARIADIC_MAX=10" ]
+}
+static_library("gtest") {
+  testonly = true
+  sources = [
+    "gtest/include/gtest/gtest-death-test.h",
+    "gtest/include/gtest/gtest-message.h",
+    "gtest/include/gtest/gtest-param-test.h",
+    "gtest/include/gtest/gtest-printers.h",
+    "gtest/include/gtest/gtest-spi.h",
+    "gtest/include/gtest/gtest-test-part.h",
+    "gtest/include/gtest/gtest-typed-test.h",
+    "gtest/include/gtest/gtest.h",
+    "gtest/include/gtest/gtest_pred_impl.h",
+    "gtest/include/gtest/gtest_prod.h",
+    "gtest/include/gtest/internal/gtest-death-test-internal.h",
+    "gtest/include/gtest/internal/gtest-filepath.h",
+    "gtest/include/gtest/internal/gtest-internal.h",
+    "gtest/include/gtest/internal/gtest-linked_ptr.h",
+    "gtest/include/gtest/internal/gtest-param-util-generated.h",
+    "gtest/include/gtest/internal/gtest-param-util.h",
+    "gtest/include/gtest/internal/gtest-port.h",
+    "gtest/include/gtest/internal/gtest-string.h",
+    "gtest/include/gtest/internal/gtest-tuple.h",
+    "gtest/include/gtest/internal/gtest-type-util.h",
+    "gtest/src/gtest-death-test.cc",
+    "gtest/src/gtest-filepath.cc",
+    "gtest/src/gtest-internal-inl.h",
+    "gtest/src/gtest-port.cc",
+    "gtest/src/gtest-printers.cc",
+    "gtest/src/gtest-test-part.cc",
+    "gtest/src/gtest-typed-test.cc",
+    "gtest/src/gtest.cc",
+  ]
+
+  include_dirs = [
+    "gtest",
+    "gtest/include",
+  ]
+
+  public_configs = []
+  if (is_posix) {
+    public_configs += [ ":has_rtti_config" ]
+  }
+  if (is_clang && os != "win") {
+    public_configs += [ ":use_own_tr1_tuple_config" ]
+  }
+  if (os == "win") {
+    # http://stackoverflow.com/questions/12558327/google-test-in-visual-studio-2012
+    public_configs += [ ":variadix_max_config" ]
+  }
+
+  public_configs += [ ":unittest_config" ]
+
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [ "//build/config/compiler:no_goma_code" ]
+}
+
+config("gtest_prod") {
+  # To include gtest/include/gtest/gtest_prod.h
+  include_dirs = [ "gtest/include" ]
+}
+
+static_library("gtest_main") {
+  testonly = true
+  deps = [
+    ":gtest",
+  ]
+  sources = [
+    "gtest/src/gtest_main.cc",
+  ]
+
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [ "//build/config/compiler:no_goma_code" ]
+}
+
+config("minizip_config") {
+  include_dirs = [
+    "//third_party/zlib",
+    "//third_party/zlib/contrib",
+  ]
+}
+static_library("minizip") {
+  sources = [
+    "zlib/adler32.c",
+    "zlib/contrib/minizip/ioapi.c",
+    "zlib/contrib/minizip/ioapi.h",
+    "zlib/contrib/minizip/unzip.c",
+    "zlib/contrib/minizip/unzip.h",
+    "zlib/crc32.c",
+    "zlib/deflate.c",
+    "zlib/gzlib.c",
+    "zlib/gzwrite.c",
+    "zlib/infback.c",
+    "zlib/inffast.c",
+    "zlib/inflate.c",
+    "zlib/inftrees.c",
+    "zlib/trees.c",
+    "zlib/zconf.h",
+    "zlib/zlib.h",
+    "zlib/zutil.c",
+  ]
+  include_dirs = [ "//third_party/zlib/contrib/minizip" ]
+  defines = [ "USE_FILE32API" ]
+  public_configs = [ ":minizip_config" ]
+  if (os == "win") {
+    cflags = [
+      "/wd4013",  # "open" etc undefined; assuming extern returning int
+      "/wd4244",  # conversion from "ZPOS64_T" to "long", possible loss of data
+    ]
+    sources += [ "zlib/contrib/minizip/iowin32.c" ]
+  }
+  if (is_posix) {
+    cflags = [
+      "-includeunistd.h",  # for lseek, write and close.
+      "-Wno-shift-negative-value",  # inflateMark returns -1 << 16.
+    ]
+  }
+
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [ "//build/config/compiler:no_goma_code" ]
+}
+
+config("liblzma_config") {
+  include_dirs = [ "xz/src/liblzma/api" ]
+}
+
+if (enable_lzma) {
+  static_library("liblzma") {
+    config_h_dir = "config/xz/$os"
+    sources = [
+      "$config_h_dir/config.h",
+      "xz/src/common/mythread.h",
+      "xz/src/common/sysdefs.h",
+      "xz/src/common/tuklib_common.h",
+      "xz/src/common/tuklib_config.h",
+      "xz/src/common/tuklib_intger.h",
+      "xz/src/common/tuklib_physmem.c",
+      "xz/src/common/tuklib_physmem.h",
+      "xz/src/liblzma/api/lzma.h",
+      "xz/src/liblzma/api/lzma/base.h",
+      "xz/src/liblzma/api/lzma/bcj.h",
+      "xz/src/liblzma/api/lzma/block.h",
+      "xz/src/liblzma/api/lzma/check.h",
+      "xz/src/liblzma/api/lzma/container.h",
+      "xz/src/liblzma/api/lzma/delta.h",
+      "xz/src/liblzma/api/lzma/filter.h",
+      "xz/src/liblzma/api/lzma/hardware.h",
+      "xz/src/liblzma/api/lzma/index.h",
+      "xz/src/liblzma/api/lzma/index_hash.h",
+      "xz/src/liblzma/api/lzma/lzma.h",
+      "xz/src/liblzma/api/lzma/stream_flags.h",
+      "xz/src/liblzma/api/lzma/version.h",
+      "xz/src/liblzma/api/lzma/vli.h",
+      "xz/src/liblzma/check/check.c",
+      "xz/src/liblzma/check/check.h",
+      "xz/src/liblzma/check/crc32_fast.c",
+      "xz/src/liblzma/check/crc32_table.c",
+      "xz/src/liblzma/check/crc32_table_be.h",
+      "xz/src/liblzma/check/crc32_table_le.h",
+      "xz/src/liblzma/check/crc64_fast.c",
+      "xz/src/liblzma/check/crc64_table.c",
+      "xz/src/liblzma/check/crc64_table_be.h",
+      "xz/src/liblzma/check/crc64_table_le.h",
+      "xz/src/liblzma/check/crc_macros.h",
+      "xz/src/liblzma/check/sha256.c",
+      "xz/src/liblzma/common/alone_decoder.c",
+      "xz/src/liblzma/common/alone_decoder.h",
+      "xz/src/liblzma/common/alone_encoder.c",
+      "xz/src/liblzma/common/auto_decoder.c",
+      "xz/src/liblzma/common/block_buffer_decoder.c",
+      "xz/src/liblzma/common/block_buffer_encoder.c",
+      "xz/src/liblzma/common/block_decoder.c",
+      "xz/src/liblzma/common/block_decoder.h",
+      "xz/src/liblzma/common/block_encoder.c",
+      "xz/src/liblzma/common/block_encoder.h",
+      "xz/src/liblzma/common/block_header_decoder.c",
+      "xz/src/liblzma/common/block_header_encoder.c",
+      "xz/src/liblzma/common/block_util.c",
+      "xz/src/liblzma/common/common.c",
+      "xz/src/liblzma/common/common.h",
+      "xz/src/liblzma/common/easy_buffer_encoder.c",
+      "xz/src/liblzma/common/easy_decoder_memusage.c",
+      "xz/src/liblzma/common/easy_encoder.c",
+      "xz/src/liblzma/common/easy_encoder_memusage.c",
+      "xz/src/liblzma/common/easy_preset.c",
+      "xz/src/liblzma/common/easy_preset.h",
+      "xz/src/liblzma/common/filter_buffer_decoder.c",
+      "xz/src/liblzma/common/filter_buffer_encoder.c",
+      "xz/src/liblzma/common/filter_common.c",
+      "xz/src/liblzma/common/filter_common.h",
+      "xz/src/liblzma/common/filter_decoder.c",
+      "xz/src/liblzma/common/filter_decoder.h",
+      "xz/src/liblzma/common/filter_encoder.c",
+      "xz/src/liblzma/common/filter_encoder.h",
+      "xz/src/liblzma/common/filter_flags_decoder.c",
+      "xz/src/liblzma/common/filter_flags_encoder.c",
+      "xz/src/liblzma/common/hardware_physmem.c",
+      "xz/src/liblzma/common/index.c",
+      "xz/src/liblzma/common/index.h",
+      "xz/src/liblzma/common/index_decoder.c",
+      "xz/src/liblzma/common/index_encoder.c",
+      "xz/src/liblzma/common/index_encoder.h",
+      "xz/src/liblzma/common/index_hash.c",
+      "xz/src/liblzma/common/stream_buffer_decoder.c",
+      "xz/src/liblzma/common/stream_buffer_encoder.c",
+      "xz/src/liblzma/common/stream_decoder.c",
+      "xz/src/liblzma/common/stream_decoder.h",
+      "xz/src/liblzma/common/stream_encoder.c",
+      "xz/src/liblzma/common/stream_encoder.h",
+      "xz/src/liblzma/common/stream_flags_common.c",
+      "xz/src/liblzma/common/stream_flags_common.h",
+      "xz/src/liblzma/common/stream_flags_decoder.c",
+      "xz/src/liblzma/common/stream_flags_encoder.c",
+      "xz/src/liblzma/common/vli_decoder.c",
+      "xz/src/liblzma/common/vli_encoder.c",
+      "xz/src/liblzma/common/vli_size.c",
+      "xz/src/liblzma/delta/delta_common.c",
+      "xz/src/liblzma/delta/delta_common.h",
+      "xz/src/liblzma/delta/delta_decoder.c",
+      "xz/src/liblzma/delta/delta_decoder.h",
+      "xz/src/liblzma/delta/delta_encoder.c",
+      "xz/src/liblzma/delta/delta_encoder.h",
+      "xz/src/liblzma/delta/delta_private.h",
+      "xz/src/liblzma/lz/lz_decoder.c",
+      "xz/src/liblzma/lz/lz_decoder.h",
+      "xz/src/liblzma/lz/lz_encoder.c",
+      "xz/src/liblzma/lz/lz_encoder.h",
+      "xz/src/liblzma/lz/lz_encoder_hash.h",
+      "xz/src/liblzma/lz/lz_encoder_hash_table.h",
+      "xz/src/liblzma/lz/lz_encoder_mf.c",
+      "xz/src/liblzma/lzma/fastpos.h",
+      "xz/src/liblzma/lzma/fastpos_table.c",
+      "xz/src/liblzma/lzma/lzma2_decoder.c",
+      "xz/src/liblzma/lzma/lzma2_decoder.h",
+      "xz/src/liblzma/lzma/lzma2_encoder.c",
+      "xz/src/liblzma/lzma/lzma2_encoder.h",
+      "xz/src/liblzma/lzma/lzma_common.h",
+      "xz/src/liblzma/lzma/lzma_decoder.c",
+      "xz/src/liblzma/lzma/lzma_decoder.h",
+      "xz/src/liblzma/lzma/lzma_encoder.c",
+      "xz/src/liblzma/lzma/lzma_encoder.h",
+      "xz/src/liblzma/lzma/lzma_encoder_optimum_fast.c",
+      "xz/src/liblzma/lzma/lzma_encoder_optimum_normal.c",
+      "xz/src/liblzma/lzma/lzma_encoder_presets.c",
+      "xz/src/liblzma/lzma/lzma_encoder_private.h",
+      "xz/src/liblzma/rangecoder/price.h",
+      "xz/src/liblzma/rangecoder/price_table.c",
+      "xz/src/liblzma/rangecoder/range_common.h",
+      "xz/src/liblzma/rangecoder/range_decoder.h",
+      "xz/src/liblzma/rangecoder/range_encoder.h",
+      "xz/src/liblzma/simple/arm.c",
+      "xz/src/liblzma/simple/armthumb.c",
+      "xz/src/liblzma/simple/ia64.c",
+      "xz/src/liblzma/simple/powerpc.c",
+      "xz/src/liblzma/simple/simple_coder.c",
+      "xz/src/liblzma/simple/simple_coder.h",
+      "xz/src/liblzma/simple/simple_decoder.c",
+      "xz/src/liblzma/simple/simple_decoder.h",
+      "xz/src/liblzma/simple/simple_encoder.c",
+      "xz/src/liblzma/simple/simple_encoder.h",
+      "xz/src/liblzma/simple/simple_private.h",
+      "xz/src/liblzma/simple/sparc.c",
+      "xz/src/liblzma/simple/x86.c",
+    ]
+    include_dirs = [
+      "$config_h_dir",
+      "xz/src/liblzma/api",
+      "xz/src/liblzma/common",
+      "xz/src/liblzma/check",
+      "xz/src/liblzma/lz",
+      "xz/src/liblzma/rangecoder",
+      "xz/src/liblzma/lzma",
+      "xz/src/liblzma/delta",
+      "xz/src/liblzma/simple",
+      "xz/src/common",
+    ]
+    defines = [
+      "HAVE_CONFIG_H",
+      "TUKLIB_SYMBOL_PREFIX=lzma_",
+    ]
+    public_configs = [ ":liblzma_config" ]
+
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [ "//build/config/compiler:no_goma_code" ]
+  }
+}
diff --git a/third_party/boringssl/.gitignore b/third_party/boringssl/.gitignore
new file mode 100644
index 0000000..d2b302c
--- /dev/null
+++ b/third_party/boringssl/.gitignore
@@ -0,0 +1,5 @@
+linux-aarch64
+linux-arm
+mac-x86
+win-x86
+BUILD.generated_tests.gni
diff --git a/third_party/boringssl/BUILD.generated.gni b/third_party/boringssl/BUILD.generated.gni
new file mode 100644
index 0000000..5506b55
--- /dev/null
+++ b/third_party/boringssl/BUILD.generated.gni
@@ -0,0 +1,554 @@
+# Copyright (c) 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This file is created by generate_build_files.py. Do not edit manually.
+
+crypto_sources = [
+  "err_data.c",
+  "src/crypto/asn1/a_bitstr.c",
+  "src/crypto/asn1/a_bool.c",
+  "src/crypto/asn1/a_d2i_fp.c",
+  "src/crypto/asn1/a_dup.c",
+  "src/crypto/asn1/a_enum.c",
+  "src/crypto/asn1/a_gentm.c",
+  "src/crypto/asn1/a_i2d_fp.c",
+  "src/crypto/asn1/a_int.c",
+  "src/crypto/asn1/a_mbstr.c",
+  "src/crypto/asn1/a_object.c",
+  "src/crypto/asn1/a_octet.c",
+  "src/crypto/asn1/a_print.c",
+  "src/crypto/asn1/a_strnid.c",
+  "src/crypto/asn1/a_time.c",
+  "src/crypto/asn1/a_type.c",
+  "src/crypto/asn1/a_utctm.c",
+  "src/crypto/asn1/a_utf8.c",
+  "src/crypto/asn1/asn1_lib.c",
+  "src/crypto/asn1/asn1_locl.h",
+  "src/crypto/asn1/asn1_par.c",
+  "src/crypto/asn1/asn_pack.c",
+  "src/crypto/asn1/f_enum.c",
+  "src/crypto/asn1/f_int.c",
+  "src/crypto/asn1/f_string.c",
+  "src/crypto/asn1/tasn_dec.c",
+  "src/crypto/asn1/tasn_enc.c",
+  "src/crypto/asn1/tasn_fre.c",
+  "src/crypto/asn1/tasn_new.c",
+  "src/crypto/asn1/tasn_typ.c",
+  "src/crypto/asn1/tasn_utl.c",
+  "src/crypto/asn1/time_support.c",
+  "src/crypto/base64/base64.c",
+  "src/crypto/bio/bio.c",
+  "src/crypto/bio/bio_mem.c",
+  "src/crypto/bio/connect.c",
+  "src/crypto/bio/fd.c",
+  "src/crypto/bio/file.c",
+  "src/crypto/bio/hexdump.c",
+  "src/crypto/bio/internal.h",
+  "src/crypto/bio/pair.c",
+  "src/crypto/bio/printf.c",
+  "src/crypto/bio/socket.c",
+  "src/crypto/bio/socket_helper.c",
+  "src/crypto/bn_extra/bn_asn1.c",
+  "src/crypto/bn_extra/convert.c",
+  "src/crypto/buf/buf.c",
+  "src/crypto/bytestring/asn1_compat.c",
+  "src/crypto/bytestring/ber.c",
+  "src/crypto/bytestring/cbb.c",
+  "src/crypto/bytestring/cbs.c",
+  "src/crypto/bytestring/internal.h",
+  "src/crypto/chacha/chacha.c",
+  "src/crypto/cipher_extra/cipher_extra.c",
+  "src/crypto/cipher_extra/derive_key.c",
+  "src/crypto/cipher_extra/e_aesctrhmac.c",
+  "src/crypto/cipher_extra/e_aesgcmsiv.c",
+  "src/crypto/cipher_extra/e_chacha20poly1305.c",
+  "src/crypto/cipher_extra/e_null.c",
+  "src/crypto/cipher_extra/e_rc2.c",
+  "src/crypto/cipher_extra/e_rc4.c",
+  "src/crypto/cipher_extra/e_ssl3.c",
+  "src/crypto/cipher_extra/e_tls.c",
+  "src/crypto/cipher_extra/internal.h",
+  "src/crypto/cipher_extra/tls_cbc.c",
+  "src/crypto/cmac/cmac.c",
+  "src/crypto/conf/conf.c",
+  "src/crypto/conf/conf_def.h",
+  "src/crypto/conf/internal.h",
+  "src/crypto/cpu-aarch64-linux.c",
+  "src/crypto/cpu-arm-linux.c",
+  "src/crypto/cpu-arm.c",
+  "src/crypto/cpu-intel.c",
+  "src/crypto/cpu-ppc64le.c",
+  "src/crypto/crypto.c",
+  "src/crypto/curve25519/spake25519.c",
+  "src/crypto/curve25519/x25519-x86_64.c",
+  "src/crypto/dh/check.c",
+  "src/crypto/dh/dh.c",
+  "src/crypto/dh/dh_asn1.c",
+  "src/crypto/dh/params.c",
+  "src/crypto/digest_extra/digest_extra.c",
+  "src/crypto/dsa/dsa.c",
+  "src/crypto/dsa/dsa_asn1.c",
+  "src/crypto/ec_extra/ec_asn1.c",
+  "src/crypto/ecdh/ecdh.c",
+  "src/crypto/ecdsa_extra/ecdsa_asn1.c",
+  "src/crypto/engine/engine.c",
+  "src/crypto/err/err.c",
+  "src/crypto/err/internal.h",
+  "src/crypto/evp/digestsign.c",
+  "src/crypto/evp/evp.c",
+  "src/crypto/evp/evp_asn1.c",
+  "src/crypto/evp/evp_ctx.c",
+  "src/crypto/evp/internal.h",
+  "src/crypto/evp/p_dsa_asn1.c",
+  "src/crypto/evp/p_ec.c",
+  "src/crypto/evp/p_ec_asn1.c",
+  "src/crypto/evp/p_ed25519.c",
+  "src/crypto/evp/p_ed25519_asn1.c",
+  "src/crypto/evp/p_rsa.c",
+  "src/crypto/evp/p_rsa_asn1.c",
+  "src/crypto/evp/pbkdf.c",
+  "src/crypto/evp/print.c",
+  "src/crypto/evp/scrypt.c",
+  "src/crypto/evp/sign.c",
+  "src/crypto/ex_data.c",
+  "src/crypto/fipsmodule/aes/internal.h",
+  "src/crypto/fipsmodule/bcm.c",
+  "src/crypto/fipsmodule/bn/internal.h",
+  "src/crypto/fipsmodule/bn/rsaz_exp.h",
+  "src/crypto/fipsmodule/cipher/internal.h",
+  "src/crypto/fipsmodule/delocate.h",
+  "src/crypto/fipsmodule/des/internal.h",
+  "src/crypto/fipsmodule/digest/internal.h",
+  "src/crypto/fipsmodule/digest/md32_common.h",
+  "src/crypto/fipsmodule/ec/internal.h",
+  "src/crypto/fipsmodule/ec/p256-x86_64-table.h",
+  "src/crypto/fipsmodule/ec/p256-x86_64.h",
+  "src/crypto/fipsmodule/is_fips.c",
+  "src/crypto/fipsmodule/modes/internal.h",
+  "src/crypto/fipsmodule/rand/internal.h",
+  "src/crypto/fipsmodule/rsa/internal.h",
+  "src/crypto/hkdf/hkdf.c",
+  "src/crypto/internal.h",
+  "src/crypto/lhash/lhash.c",
+  "src/crypto/mem.c",
+  "src/crypto/obj/obj.c",
+  "src/crypto/obj/obj_dat.h",
+  "src/crypto/obj/obj_xref.c",
+  "src/crypto/pem/pem_all.c",
+  "src/crypto/pem/pem_info.c",
+  "src/crypto/pem/pem_lib.c",
+  "src/crypto/pem/pem_oth.c",
+  "src/crypto/pem/pem_pk8.c",
+  "src/crypto/pem/pem_pkey.c",
+  "src/crypto/pem/pem_x509.c",
+  "src/crypto/pem/pem_xaux.c",
+  "src/crypto/pkcs7/internal.h",
+  "src/crypto/pkcs7/pkcs7.c",
+  "src/crypto/pkcs7/pkcs7_x509.c",
+  "src/crypto/pkcs8/internal.h",
+  "src/crypto/pkcs8/p5_pbev2.c",
+  "src/crypto/pkcs8/pkcs8.c",
+  "src/crypto/pkcs8/pkcs8_x509.c",
+  "src/crypto/poly1305/internal.h",
+  "src/crypto/poly1305/poly1305.c",
+  "src/crypto/poly1305/poly1305_arm.c",
+  "src/crypto/poly1305/poly1305_vec.c",
+  "src/crypto/pool/internal.h",
+  "src/crypto/pool/pool.c",
+  "src/crypto/rand_extra/deterministic.c",
+  "src/crypto/rand_extra/forkunsafe.c",
+  "src/crypto/rand_extra/fuchsia.c",
+  "src/crypto/rand_extra/rand_extra.c",
+  "src/crypto/rand_extra/windows.c",
+  "src/crypto/rc4/rc4.c",
+  "src/crypto/refcount_c11.c",
+  "src/crypto/refcount_lock.c",
+  "src/crypto/rsa_extra/rsa_asn1.c",
+  "src/crypto/stack/stack.c",
+  "src/crypto/thread.c",
+  "src/crypto/thread_none.c",
+  "src/crypto/thread_pthread.c",
+  "src/crypto/thread_win.c",
+  "src/crypto/x509/a_digest.c",
+  "src/crypto/x509/a_sign.c",
+  "src/crypto/x509/a_strex.c",
+  "src/crypto/x509/a_verify.c",
+  "src/crypto/x509/algorithm.c",
+  "src/crypto/x509/asn1_gen.c",
+  "src/crypto/x509/by_dir.c",
+  "src/crypto/x509/by_file.c",
+  "src/crypto/x509/charmap.h",
+  "src/crypto/x509/i2d_pr.c",
+  "src/crypto/x509/internal.h",
+  "src/crypto/x509/rsa_pss.c",
+  "src/crypto/x509/t_crl.c",
+  "src/crypto/x509/t_req.c",
+  "src/crypto/x509/t_x509.c",
+  "src/crypto/x509/t_x509a.c",
+  "src/crypto/x509/vpm_int.h",
+  "src/crypto/x509/x509.c",
+  "src/crypto/x509/x509_att.c",
+  "src/crypto/x509/x509_cmp.c",
+  "src/crypto/x509/x509_d2.c",
+  "src/crypto/x509/x509_def.c",
+  "src/crypto/x509/x509_ext.c",
+  "src/crypto/x509/x509_lu.c",
+  "src/crypto/x509/x509_obj.c",
+  "src/crypto/x509/x509_r2x.c",
+  "src/crypto/x509/x509_req.c",
+  "src/crypto/x509/x509_set.c",
+  "src/crypto/x509/x509_trs.c",
+  "src/crypto/x509/x509_txt.c",
+  "src/crypto/x509/x509_v3.c",
+  "src/crypto/x509/x509_vfy.c",
+  "src/crypto/x509/x509_vpm.c",
+  "src/crypto/x509/x509cset.c",
+  "src/crypto/x509/x509name.c",
+  "src/crypto/x509/x509rset.c",
+  "src/crypto/x509/x509spki.c",
+  "src/crypto/x509/x_algor.c",
+  "src/crypto/x509/x_all.c",
+  "src/crypto/x509/x_attrib.c",
+  "src/crypto/x509/x_crl.c",
+  "src/crypto/x509/x_exten.c",
+  "src/crypto/x509/x_info.c",
+  "src/crypto/x509/x_name.c",
+  "src/crypto/x509/x_pkey.c",
+  "src/crypto/x509/x_pubkey.c",
+  "src/crypto/x509/x_req.c",
+  "src/crypto/x509/x_sig.c",
+  "src/crypto/x509/x_spki.c",
+  "src/crypto/x509/x_val.c",
+  "src/crypto/x509/x_x509.c",
+  "src/crypto/x509/x_x509a.c",
+  "src/crypto/x509v3/ext_dat.h",
+  "src/crypto/x509v3/pcy_cache.c",
+  "src/crypto/x509v3/pcy_data.c",
+  "src/crypto/x509v3/pcy_int.h",
+  "src/crypto/x509v3/pcy_lib.c",
+  "src/crypto/x509v3/pcy_map.c",
+  "src/crypto/x509v3/pcy_node.c",
+  "src/crypto/x509v3/pcy_tree.c",
+  "src/crypto/x509v3/v3_akey.c",
+  "src/crypto/x509v3/v3_akeya.c",
+  "src/crypto/x509v3/v3_alt.c",
+  "src/crypto/x509v3/v3_bcons.c",
+  "src/crypto/x509v3/v3_bitst.c",
+  "src/crypto/x509v3/v3_conf.c",
+  "src/crypto/x509v3/v3_cpols.c",
+  "src/crypto/x509v3/v3_crld.c",
+  "src/crypto/x509v3/v3_enum.c",
+  "src/crypto/x509v3/v3_extku.c",
+  "src/crypto/x509v3/v3_genn.c",
+  "src/crypto/x509v3/v3_ia5.c",
+  "src/crypto/x509v3/v3_info.c",
+  "src/crypto/x509v3/v3_int.c",
+  "src/crypto/x509v3/v3_lib.c",
+  "src/crypto/x509v3/v3_ncons.c",
+  "src/crypto/x509v3/v3_pci.c",
+  "src/crypto/x509v3/v3_pcia.c",
+  "src/crypto/x509v3/v3_pcons.c",
+  "src/crypto/x509v3/v3_pku.c",
+  "src/crypto/x509v3/v3_pmaps.c",
+  "src/crypto/x509v3/v3_prn.c",
+  "src/crypto/x509v3/v3_purp.c",
+  "src/crypto/x509v3/v3_skey.c",
+  "src/crypto/x509v3/v3_sxnet.c",
+  "src/crypto/x509v3/v3_utl.c",
+  "src/include/openssl/aead.h",
+  "src/include/openssl/aes.h",
+  "src/include/openssl/arm_arch.h",
+  "src/include/openssl/asn1.h",
+  "src/include/openssl/asn1_mac.h",
+  "src/include/openssl/asn1t.h",
+  "src/include/openssl/base.h",
+  "src/include/openssl/base64.h",
+  "src/include/openssl/bio.h",
+  "src/include/openssl/blowfish.h",
+  "src/include/openssl/bn.h",
+  "src/include/openssl/buf.h",
+  "src/include/openssl/buffer.h",
+  "src/include/openssl/bytestring.h",
+  "src/include/openssl/cast.h",
+  "src/include/openssl/chacha.h",
+  "src/include/openssl/cipher.h",
+  "src/include/openssl/cmac.h",
+  "src/include/openssl/conf.h",
+  "src/include/openssl/cpu.h",
+  "src/include/openssl/crypto.h",
+  "src/include/openssl/curve25519.h",
+  "src/include/openssl/des.h",
+  "src/include/openssl/dh.h",
+  "src/include/openssl/digest.h",
+  "src/include/openssl/dsa.h",
+  "src/include/openssl/ec.h",
+  "src/include/openssl/ec_key.h",
+  "src/include/openssl/ecdh.h",
+  "src/include/openssl/ecdsa.h",
+  "src/include/openssl/engine.h",
+  "src/include/openssl/err.h",
+  "src/include/openssl/evp.h",
+  "src/include/openssl/ex_data.h",
+  "src/include/openssl/hkdf.h",
+  "src/include/openssl/hmac.h",
+  "src/include/openssl/is_boringssl.h",
+  "src/include/openssl/lhash.h",
+  "src/include/openssl/lhash_macros.h",
+  "src/include/openssl/md4.h",
+  "src/include/openssl/md5.h",
+  "src/include/openssl/mem.h",
+  "src/include/openssl/nid.h",
+  "src/include/openssl/obj.h",
+  "src/include/openssl/obj_mac.h",
+  "src/include/openssl/objects.h",
+  "src/include/openssl/opensslconf.h",
+  "src/include/openssl/opensslv.h",
+  "src/include/openssl/ossl_typ.h",
+  "src/include/openssl/pem.h",
+  "src/include/openssl/pkcs12.h",
+  "src/include/openssl/pkcs7.h",
+  "src/include/openssl/pkcs8.h",
+  "src/include/openssl/poly1305.h",
+  "src/include/openssl/pool.h",
+  "src/include/openssl/rand.h",
+  "src/include/openssl/rc4.h",
+  "src/include/openssl/ripemd.h",
+  "src/include/openssl/rsa.h",
+  "src/include/openssl/safestack.h",
+  "src/include/openssl/sha.h",
+  "src/include/openssl/span.h",
+  "src/include/openssl/srtp.h",
+  "src/include/openssl/stack.h",
+  "src/include/openssl/thread.h",
+  "src/include/openssl/type_check.h",
+  "src/include/openssl/x509.h",
+  "src/include/openssl/x509_vfy.h",
+  "src/include/openssl/x509v3.h",
+  "src/third_party/fiat/curve25519.c",
+  "src/third_party/fiat/internal.h",
+  "src/third_party/fiat/p256.c",
+]
+
+ssl_sources = [
+  "src/include/openssl/dtls1.h",
+  "src/include/openssl/ssl.h",
+  "src/include/openssl/ssl3.h",
+  "src/include/openssl/tls1.h",
+  "src/ssl/bio_ssl.cc",
+  "src/ssl/custom_extensions.cc",
+  "src/ssl/d1_both.cc",
+  "src/ssl/d1_lib.cc",
+  "src/ssl/d1_pkt.cc",
+  "src/ssl/d1_srtp.cc",
+  "src/ssl/dtls_method.cc",
+  "src/ssl/dtls_record.cc",
+  "src/ssl/handshake.cc",
+  "src/ssl/handshake_client.cc",
+  "src/ssl/handshake_server.cc",
+  "src/ssl/internal.h",
+  "src/ssl/s3_both.cc",
+  "src/ssl/s3_lib.cc",
+  "src/ssl/s3_pkt.cc",
+  "src/ssl/ssl_aead_ctx.cc",
+  "src/ssl/ssl_asn1.cc",
+  "src/ssl/ssl_buffer.cc",
+  "src/ssl/ssl_cert.cc",
+  "src/ssl/ssl_cipher.cc",
+  "src/ssl/ssl_file.cc",
+  "src/ssl/ssl_key_share.cc",
+  "src/ssl/ssl_lib.cc",
+  "src/ssl/ssl_privkey.cc",
+  "src/ssl/ssl_session.cc",
+  "src/ssl/ssl_stat.cc",
+  "src/ssl/ssl_transcript.cc",
+  "src/ssl/ssl_versions.cc",
+  "src/ssl/ssl_x509.cc",
+  "src/ssl/t1_enc.cc",
+  "src/ssl/t1_lib.cc",
+  "src/ssl/tls13_both.cc",
+  "src/ssl/tls13_client.cc",
+  "src/ssl/tls13_enc.cc",
+  "src/ssl/tls13_server.cc",
+  "src/ssl/tls_method.cc",
+  "src/ssl/tls_record.cc",
+]
+
+crypto_sources_ios_aarch64 = [
+  "ios-aarch64/crypto/chacha/chacha-armv8.S",
+  "ios-aarch64/crypto/fipsmodule/aesv8-armx64.S",
+  "ios-aarch64/crypto/fipsmodule/armv8-mont.S",
+  "ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
+  "ios-aarch64/crypto/fipsmodule/sha1-armv8.S",
+  "ios-aarch64/crypto/fipsmodule/sha256-armv8.S",
+  "ios-aarch64/crypto/fipsmodule/sha512-armv8.S",
+]
+
+crypto_sources_ios_arm = [
+  "ios-arm/crypto/chacha/chacha-armv4.S",
+  "ios-arm/crypto/fipsmodule/aes-armv4.S",
+  "ios-arm/crypto/fipsmodule/aesv8-armx32.S",
+  "ios-arm/crypto/fipsmodule/armv4-mont.S",
+  "ios-arm/crypto/fipsmodule/bsaes-armv7.S",
+  "ios-arm/crypto/fipsmodule/ghash-armv4.S",
+  "ios-arm/crypto/fipsmodule/ghashv8-armx32.S",
+  "ios-arm/crypto/fipsmodule/sha1-armv4-large.S",
+  "ios-arm/crypto/fipsmodule/sha256-armv4.S",
+  "ios-arm/crypto/fipsmodule/sha512-armv4.S",
+]
+
+crypto_sources_linux_aarch64 = [
+  "linux-aarch64/crypto/chacha/chacha-armv8.S",
+  "linux-aarch64/crypto/fipsmodule/aesv8-armx64.S",
+  "linux-aarch64/crypto/fipsmodule/armv8-mont.S",
+  "linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
+  "linux-aarch64/crypto/fipsmodule/sha1-armv8.S",
+  "linux-aarch64/crypto/fipsmodule/sha256-armv8.S",
+  "linux-aarch64/crypto/fipsmodule/sha512-armv8.S",
+]
+
+crypto_sources_linux_arm = [
+  "linux-arm/crypto/chacha/chacha-armv4.S",
+  "linux-arm/crypto/fipsmodule/aes-armv4.S",
+  "linux-arm/crypto/fipsmodule/aesv8-armx32.S",
+  "linux-arm/crypto/fipsmodule/armv4-mont.S",
+  "linux-arm/crypto/fipsmodule/bsaes-armv7.S",
+  "linux-arm/crypto/fipsmodule/ghash-armv4.S",
+  "linux-arm/crypto/fipsmodule/ghashv8-armx32.S",
+  "linux-arm/crypto/fipsmodule/sha1-armv4-large.S",
+  "linux-arm/crypto/fipsmodule/sha256-armv4.S",
+  "linux-arm/crypto/fipsmodule/sha512-armv4.S",
+  "src/crypto/curve25519/asm/x25519-asm-arm.S",
+  "src/crypto/poly1305/poly1305_arm_asm.S",
+]
+
+crypto_sources_linux_ppc64le = [
+  "linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S",
+  "linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S",
+]
+
+crypto_sources_linux_x86 = [
+  "linux-x86/crypto/chacha/chacha-x86.S",
+  "linux-x86/crypto/fipsmodule/aes-586.S",
+  "linux-x86/crypto/fipsmodule/aesni-x86.S",
+  "linux-x86/crypto/fipsmodule/bn-586.S",
+  "linux-x86/crypto/fipsmodule/co-586.S",
+  "linux-x86/crypto/fipsmodule/ghash-x86.S",
+  "linux-x86/crypto/fipsmodule/md5-586.S",
+  "linux-x86/crypto/fipsmodule/sha1-586.S",
+  "linux-x86/crypto/fipsmodule/sha256-586.S",
+  "linux-x86/crypto/fipsmodule/sha512-586.S",
+  "linux-x86/crypto/fipsmodule/vpaes-x86.S",
+  "linux-x86/crypto/fipsmodule/x86-mont.S",
+]
+
+crypto_sources_linux_x86_64 = [
+  "linux-x86_64/crypto/chacha/chacha-x86_64.S",
+  "linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S",
+  "linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/aes-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/aesni-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/ghash-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/md5-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
+  "linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/rsaz-avx2.S",
+  "linux-x86_64/crypto/fipsmodule/sha1-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/sha256-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/sha512-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
+  "linux-x86_64/crypto/fipsmodule/x86_64-mont.S",
+  "linux-x86_64/crypto/fipsmodule/x86_64-mont5.S",
+  "src/crypto/curve25519/asm/x25519-asm-x86_64.S",
+]
+
+crypto_sources_mac_x86 = [
+  "mac-x86/crypto/chacha/chacha-x86.S",
+  "mac-x86/crypto/fipsmodule/aes-586.S",
+  "mac-x86/crypto/fipsmodule/aesni-x86.S",
+  "mac-x86/crypto/fipsmodule/bn-586.S",
+  "mac-x86/crypto/fipsmodule/co-586.S",
+  "mac-x86/crypto/fipsmodule/ghash-x86.S",
+  "mac-x86/crypto/fipsmodule/md5-586.S",
+  "mac-x86/crypto/fipsmodule/sha1-586.S",
+  "mac-x86/crypto/fipsmodule/sha256-586.S",
+  "mac-x86/crypto/fipsmodule/sha512-586.S",
+  "mac-x86/crypto/fipsmodule/vpaes-x86.S",
+  "mac-x86/crypto/fipsmodule/x86-mont.S",
+]
+
+crypto_sources_mac_x86_64 = [
+  "mac-x86_64/crypto/chacha/chacha-x86_64.S",
+  "mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S",
+  "mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/aes-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/aesni-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/ghash-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/md5-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
+  "mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/rsaz-avx2.S",
+  "mac-x86_64/crypto/fipsmodule/sha1-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/sha256-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/sha512-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
+  "mac-x86_64/crypto/fipsmodule/x86_64-mont.S",
+  "mac-x86_64/crypto/fipsmodule/x86_64-mont5.S",
+  "src/crypto/curve25519/asm/x25519-asm-x86_64.S",
+]
+
+crypto_sources_win_x86 = [
+  "win-x86/crypto/chacha/chacha-x86.asm",
+  "win-x86/crypto/fipsmodule/aes-586.asm",
+  "win-x86/crypto/fipsmodule/aesni-x86.asm",
+  "win-x86/crypto/fipsmodule/bn-586.asm",
+  "win-x86/crypto/fipsmodule/co-586.asm",
+  "win-x86/crypto/fipsmodule/ghash-x86.asm",
+  "win-x86/crypto/fipsmodule/md5-586.asm",
+  "win-x86/crypto/fipsmodule/sha1-586.asm",
+  "win-x86/crypto/fipsmodule/sha256-586.asm",
+  "win-x86/crypto/fipsmodule/sha512-586.asm",
+  "win-x86/crypto/fipsmodule/vpaes-x86.asm",
+  "win-x86/crypto/fipsmodule/x86-mont.asm",
+]
+
+crypto_sources_win_x86_64 = [
+  "win-x86_64/crypto/chacha/chacha-x86_64.asm",
+  "win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm",
+  "win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/aes-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/aesni-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/ghash-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/md5-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm",
+  "win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/rsaz-avx2.asm",
+  "win-x86_64/crypto/fipsmodule/sha1-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/sha256-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/sha512-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm",
+  "win-x86_64/crypto/fipsmodule/x86_64-mont.asm",
+  "win-x86_64/crypto/fipsmodule/x86_64-mont5.asm",
+]
+
+fuzzers = [
+  "bn_div",
+  "bn_mod_exp",
+  "cert",
+  "client",
+  "dtls_client",
+  "dtls_server",
+  "pkcs8",
+  "privkey",
+  "read_pem",
+  "server",
+  "session",
+  "spki",
+  "ssl_ctx_api",
+]
diff --git a/third_party/boringssl/BUILD.gn b/third_party/boringssl/BUILD.gn
new file mode 100644
index 0000000..c5cfd5e
--- /dev/null
+++ b/third_party/boringssl/BUILD.gn
@@ -0,0 +1,96 @@
+# Copied from chromium third_party/boringssl/.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("BUILD.generated.gni")
+
+# Config for us and everybody else depending on BoringSSL.
+config("external_config") {
+  include_dirs = [ "src/include" ]
+}
+
+# Config internal to this build file, shared by boringssl and boringssl_fuzzer.
+config("internal_config") {
+  visibility = [ ":*" ]  # Only targets in this file can depend on this.
+  defines = [
+    "BORINGSSL_IMPLEMENTATION",
+    "BORINGSSL_NO_STATIC_INITIALIZER",
+    "OPENSSL_SMALL",
+  ]
+  configs = [
+    # TODO: Fix size_t truncations in BoringSSL.
+    # https://crbug.com/429039
+    "//build/config/compiler:no_size_t_to_int_warning",
+  ]
+  if (is_posix) {
+    cflags_c = [ "-std=c99" ]
+    defines += [ "_XOPEN_SOURCE=700" ]
+  }
+}
+
+config("no_asm_config") {
+  visibility = [ ":*" ]  # Only targets in this file can depend on this.
+  defines = [ "OPENSSL_NO_ASM" ]
+}
+
+all_sources = crypto_sources + ssl_sources
+
+# Windows' assembly is built with Yasm. The other platforms use the platform
+# assembler.
+if (os == "win") {
+  import("//third_party/yasm/yasm_assemble.gni")
+  yasm_assemble("boringssl_asm") {
+    if (cpu_arch == "x64") {
+      sources = crypto_sources_win_x86_64
+    } else if (cpu_arch == "x86") {
+      sources = crypto_sources_win_x86
+    }
+  }
+} else {
+  source_set("boringssl_asm") {
+    visibility = [ ":*" ]  # Only targets in this file can depend on this.
+
+    defines = []
+    sources = []
+    asmflags = []
+    include_dirs = [ "src/include" ]
+
+    if (cpu_arch == "x64") {
+      if (os == "mac") {
+        sources += crypto_sources_mac_x86_64
+      } else if (os == "linux") {
+        sources += crypto_sources_linux_x86_64
+      } else {
+        public_configs = [ ":no_asm_config" ]
+      }
+    } else if (cpu_arch == "x86") {
+      if (os == "mac") {
+        sources += crypto_sources_mac_x86
+      } else if (os == "linux") {
+        sources += crypto_sources_linux_x86
+      } else {
+        public_configs = [ ":no_asm_config" ]
+      }
+      if (is_posix) {
+        asmflags += [ "-m32" ]
+      }
+    } else {
+      public_configs = [ ":no_asm_config" ]
+    }
+  }
+}
+
+static_library("boringssl") {
+  sources = all_sources
+  deps = [
+    ":boringssl_asm",
+  ]
+
+  public_configs = [ ":external_config" ]
+  configs += [ ":internal_config" ]
+
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [ "//build/config/compiler:no_goma_code" ]
+}
diff --git a/third_party/boringssl/LICENSE b/third_party/boringssl/LICENSE
new file mode 100644
index 0000000..ff4116f
--- /dev/null
+++ b/third_party/boringssl/LICENSE
@@ -0,0 +1,218 @@
+BoringSSL is a fork of OpenSSL. As such, large parts of it fall under OpenSSL
+licensing. Files that are completely new have a Google copyright and an ISC
+license. This license is reproduced at the bottom of this file.
+
+Contributors to BoringSSL are required to follow the CLA rules for Chromium:
+https://cla.developers.google.com/clas
+
+Some files from Intel are under yet another license, which is also included
+underneath. Files in third_party/ have their own licenses, as described
+therein. The MIT license, for third_party/fiat, which, unlike other third_party
+directories, is compiled into non-test libraries, is included below.
+
+The OpenSSL toolkit stays under a dual license, i.e. both the conditions of the
+OpenSSL License and the original SSLeay license apply to the toolkit. See below
+for the actual license texts. Actually both licenses are BSD-style Open Source
+licenses. In case of any license issues related to OpenSSL please contact
+openssl-core@openssl.org.
+
+The following are Google-internal bug numbers where explicit permission from
+some authors is recorded for use of their work. (This is purely for our own
+record keeping.)
+  27287199
+  27287880
+  27287883
+
+  OpenSSL License
+  ---------------
+
+/* ====================================================================
+ * Copyright (c) 1998-2011 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+ Original SSLeay License
+ -----------------------
+
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ * 
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ * 
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from 
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ * 
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+
+ISC license used for completely new code in BoringSSL:
+
+/* Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+
+Some files from Intel carry the following license:
+
+# Copyright (c) 2012, Intel Corporation
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# *  Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# *  Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the
+#    distribution.
+#
+# *  Neither the name of the Intel Corporation nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+#
+#
+# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+The code in third_party/fiat carries the MIT license:
+
+Copyright (c) 2015-2016 the fiat-crypto authors (see
+https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/third_party/boringssl/NOTICE b/third_party/boringssl/NOTICE
new file mode 100644
index 0000000..e47d101
--- /dev/null
+++ b/third_party/boringssl/NOTICE
@@ -0,0 +1,127 @@
+
+  LICENSE ISSUES
+  ==============
+
+  The OpenSSL toolkit stays under a dual license, i.e. both the conditions of
+  the OpenSSL License and the original SSLeay license apply to the toolkit.
+  See below for the actual license texts. Actually both licenses are BSD-style
+  Open Source licenses. In case of any license issues related to OpenSSL
+  please contact openssl-core@openssl.org.
+
+  OpenSSL License
+  ---------------
+
+/* ====================================================================
+ * Copyright (c) 1998-2011 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+ Original SSLeay License
+ -----------------------
+
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ * 
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ * 
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from 
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ * 
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
diff --git a/third_party/boringssl/README b/third_party/boringssl/README
new file mode 100644
index 0000000..40964b6
--- /dev/null
+++ b/third_party/boringssl/README
@@ -0,0 +1,10 @@
+Files in this directory are a subset of chromium's third_party/boringssl.
+However, to follow the latest BoringSSL, these are not just copy of the
+files.
+
+To update files in directory,
+1. please bump-up commit in ../DEPS,
+2. and run "python src/util/generate_build_files.py chromium"
+
+Since we do not need to build mac-x86 and linux-arm, we just ignores to add
+them even if such directories are made by the script.
diff --git a/third_party/boringssl/README.chromium b/third_party/boringssl/README.chromium
new file mode 100644
index 0000000..c4c63c4
--- /dev/null
+++ b/third_party/boringssl/README.chromium
@@ -0,0 +1,11 @@
+Name: boringssl
+URL: https://boringssl.googlesource.com/boringssl
+Version: git
+License: BSDish
+License File: NOTICE
+License Android Compatible: yes
+Security Critical: yes
+
+Description:
+This is BoringSSL, a fork of OpenSSL. See
+https://www.imperialviolet.org/2014/06/20/boringssl.html
diff --git a/third_party/boringssl/err_data.c b/third_party/boringssl/err_data.c
new file mode 100644
index 0000000..e83dc91
--- /dev/null
+++ b/third_party/boringssl/err_data.c
@@ -0,0 +1,1350 @@
+/* Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+ /* This file was generated by err_data_generate.go. */
+
+#include <openssl/base.h>
+#include <openssl/err.h>
+#include <openssl/type_check.h>
+
+
+OPENSSL_COMPILE_ASSERT(ERR_LIB_NONE == 1, library_values_changed_1);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_SYS == 2, library_values_changed_2);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_BN == 3, library_values_changed_3);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_RSA == 4, library_values_changed_4);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_DH == 5, library_values_changed_5);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_EVP == 6, library_values_changed_6);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_BUF == 7, library_values_changed_7);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_OBJ == 8, library_values_changed_8);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_PEM == 9, library_values_changed_9);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_DSA == 10, library_values_changed_10);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_X509 == 11, library_values_changed_11);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_ASN1 == 12, library_values_changed_12);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_CONF == 13, library_values_changed_13);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_CRYPTO == 14, library_values_changed_14);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_EC == 15, library_values_changed_15);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_SSL == 16, library_values_changed_16);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_BIO == 17, library_values_changed_17);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_PKCS7 == 18, library_values_changed_18);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_PKCS8 == 19, library_values_changed_19);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_X509V3 == 20, library_values_changed_20);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_RAND == 21, library_values_changed_21);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_ENGINE == 22, library_values_changed_22);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_OCSP == 23, library_values_changed_23);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_UI == 24, library_values_changed_24);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_COMP == 25, library_values_changed_25);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_ECDSA == 26, library_values_changed_26);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_ECDH == 27, library_values_changed_27);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_HMAC == 28, library_values_changed_28);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_DIGEST == 29, library_values_changed_29);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_CIPHER == 30, library_values_changed_30);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_HKDF == 31, library_values_changed_31);
+OPENSSL_COMPILE_ASSERT(ERR_LIB_USER == 32, library_values_changed_32);
+OPENSSL_COMPILE_ASSERT(ERR_NUM_LIBS == 33, library_values_changed_num);
+
+const uint32_t kOpenSSLReasonValues[] = {
+    0xc320838,
+    0xc328852,
+    0xc330861,
+    0xc338871,
+    0xc340880,
+    0xc348899,
+    0xc3508a5,
+    0xc3588c2,
+    0xc3608e2,
+    0xc3688f0,
+    0xc370900,
+    0xc37890d,
+    0xc38091d,
+    0xc388928,
+    0xc39093e,
+    0xc39894d,
+    0xc3a0961,
+    0xc3a8845,
+    0xc3b00ea,
+    0xc3b88d4,
+    0x10320845,
+    0x10329535,
+    0x10331541,
+    0x1033955a,
+    0x1034156d,
+    0x10348efc,
+    0x10350c5e,
+    0x10359580,
+    0x10361595,
+    0x103695a8,
+    0x103715c7,
+    0x103795e0,
+    0x103815f5,
+    0x10389613,
+    0x10391622,
+    0x1039963e,
+    0x103a1659,
+    0x103a9668,
+    0x103b1684,
+    0x103b969f,
+    0x103c16b6,
+    0x103c80ea,
+    0x103d16c7,
+    0x103d96db,
+    0x103e16fa,
+    0x103e9709,
+    0x103f1720,
+    0x103f9733,
+    0x10400c22,
+    0x10409746,
+    0x10411764,
+    0x10419777,
+    0x10421791,
+    0x104297a1,
+    0x104317b5,
+    0x104397cb,
+    0x104417e3,
+    0x104497f8,
+    0x1045180c,
+    0x1045981e,
+    0x104605fb,
+    0x1046894d,
+    0x10471833,
+    0x1047984a,
+    0x1048185f,
+    0x1048986d,
+    0x10490e5e,
+    0x14320c05,
+    0x14328c13,
+    0x14330c22,
+    0x14338c34,
+    0x143400ac,
+    0x143480ea,
+    0x18320083,
+    0x18328f52,
+    0x183300ac,
+    0x18338f68,
+    0x18340f7c,
+    0x183480ea,
+    0x18350f91,
+    0x18358fa9,
+    0x18360fbe,
+    0x18368fd2,
+    0x18370ff6,
+    0x1837900c,
+    0x18381020,
+    0x18389030,
+    0x18390a73,
+    0x18399040,
+    0x183a1068,
+    0x183a908e,
+    0x183b0c6a,
+    0x183b90c3,
+    0x183c10d5,
+    0x183c90e0,
+    0x183d10f0,
+    0x183d9101,
+    0x183e1112,
+    0x183e9124,
+    0x183f114d,
+    0x183f9166,
+    0x1840117e,
+    0x184086d3,
+    0x184110b1,
+    0x1841907c,
+    0x1842109b,
+    0x18429055,
+    0x203211b8,
+    0x203291a5,
+    0x243211c4,
+    0x24328993,
+    0x243311d6,
+    0x243391e3,
+    0x243411f0,
+    0x24349202,
+    0x24351211,
+    0x2435922e,
+    0x2436123b,
+    0x24369249,
+    0x24371257,
+    0x24379265,
+    0x2438126e,
+    0x2438927b,
+    0x2439128e,
+    0x28320c52,
+    0x28328c6a,
+    0x28330c22,
+    0x28338c7d,
+    0x28340c5e,
+    0x283480ac,
+    0x283500ea,
+    0x2c322c82,
+    0x2c3292a5,
+    0x2c332c90,
+    0x2c33aca2,
+    0x2c342cb6,
+    0x2c34acc8,
+    0x2c352ce3,
+    0x2c35acf5,
+    0x2c362d08,
+    0x2c36832d,
+    0x2c372d15,
+    0x2c37ad27,
+    0x2c382d4c,
+    0x2c38ad63,
+    0x2c392d71,
+    0x2c39ad81,
+    0x2c3a2d93,
+    0x2c3aada7,
+    0x2c3b2db8,
+    0x2c3badd7,
+    0x2c3c12b7,
+    0x2c3c92cd,
+    0x2c3d2deb,
+    0x2c3d92e6,
+    0x2c3e2e08,
+    0x2c3eae16,
+    0x2c3f2e2e,
+    0x2c3fae46,
+    0x2c402e53,
+    0x2c4091b8,
+    0x2c412e64,
+    0x2c41ae77,
+    0x2c42117e,
+    0x2c42ae88,
+    0x2c430720,
+    0x2c43adc9,
+    0x2c442d3a,
+    0x30320000,
+    0x30328015,
+    0x3033001f,
+    0x30338038,
+    0x3034004a,
+    0x30348064,
+    0x3035006b,
+    0x30358083,
+    0x30360094,
+    0x303680ac,
+    0x303700b9,
+    0x303780c8,
+    0x303800ea,
+    0x303880f7,
+    0x3039010a,
+    0x30398125,
+    0x303a013a,
+    0x303a814e,
+    0x303b0162,
+    0x303b8173,
+    0x303c018c,
+    0x303c81a9,
+    0x303d01b7,
+    0x303d81cb,
+    0x303e01db,
+    0x303e81f4,
+    0x303f0204,
+    0x303f8217,
+    0x30400226,
+    0x30408232,
+    0x30410247,
+    0x30418257,
+    0x3042026e,
+    0x3042827b,
+    0x3043028e,
+    0x3043829d,
+    0x304402b2,
+    0x304482d3,
+    0x304502e6,
+    0x304582f9,
+    0x30460312,
+    0x3046832d,
+    0x3047034a,
+    0x30478363,
+    0x30480371,
+    0x30488382,
+    0x30490391,
+    0x304983a9,
+    0x304a03bb,
+    0x304a83cf,
+    0x304b03ee,
+    0x304b8401,
+    0x304c040c,
+    0x304c841d,
+    0x304d0429,
+    0x304d843f,
+    0x304e044d,
+    0x304e8463,
+    0x304f0475,
+    0x304f8487,
+    0x3050049a,
+    0x305084ad,
+    0x305104be,
+    0x305184ce,
+    0x305204e6,
+    0x305284fb,
+    0x30530513,
+    0x30538527,
+    0x3054053f,
+    0x30548558,
+    0x30550571,
+    0x3055858e,
+    0x30560599,
+    0x305685b1,
+    0x305705c1,
+    0x305785d2,
+    0x305805e5,
+    0x305885fb,
+    0x30590604,
+    0x30598619,
+    0x305a062c,
+    0x305a863b,
+    0x305b065b,
+    0x305b866a,
+    0x305c068b,
+    0x305c86a7,
+    0x305d06b3,
+    0x305d86d3,
+    0x305e06ef,
+    0x305e8700,
+    0x305f0716,
+    0x305f8720,
+    0x34320b63,
+    0x34328b77,
+    0x34330b94,
+    0x34338ba7,
+    0x34340bb6,
+    0x34348bef,
+    0x34350bd3,
+    0x3c320083,
+    0x3c328ca7,
+    0x3c330cc0,
+    0x3c338cdb,
+    0x3c340cf8,
+    0x3c348d22,
+    0x3c350d3d,
+    0x3c358d63,
+    0x3c360d7c,
+    0x3c368d94,
+    0x3c370da5,
+    0x3c378db3,
+    0x3c380dc0,
+    0x3c388dd4,
+    0x3c390c6a,
+    0x3c398df7,
+    0x3c3a0e0b,
+    0x3c3a890d,
+    0x3c3b0e1b,
+    0x3c3b8e36,
+    0x3c3c0e48,
+    0x3c3c8e7b,
+    0x3c3d0e85,
+    0x3c3d8e99,
+    0x3c3e0ea7,
+    0x3c3e8ecc,
+    0x3c3f0c93,
+    0x3c3f8eb5,
+    0x3c4000ac,
+    0x3c4080ea,
+    0x3c410d13,
+    0x3c418d52,
+    0x3c420e5e,
+    0x3c428de8,
+    0x403218c6,
+    0x403298dc,
+    0x4033190a,
+    0x40339914,
+    0x4034192b,
+    0x40349949,
+    0x40351959,
+    0x4035996b,
+    0x40361978,
+    0x40369984,
+    0x40371999,
+    0x403799ab,
+    0x403819b6,
+    0x403899c8,
+    0x40390efc,
+    0x403999d8,
+    0x403a19eb,
+    0x403a9a0c,
+    0x403b1a1d,
+    0x403b9a2d,
+    0x403c0064,
+    0x403c8083,
+    0x403d1ab1,
+    0x403d9ac7,
+    0x403e1ad6,
+    0x403e9b0e,
+    0x403f1b28,
+    0x403f9b36,
+    0x40401b4b,
+    0x40409b5f,
+    0x40411b7c,
+    0x40419b97,
+    0x40421bb0,
+    0x40429bc3,
+    0x40431bd7,
+    0x40439bef,
+    0x40441c06,
+    0x404480ac,
+    0x40451c1b,
+    0x40459c2d,
+    0x40461c51,
+    0x40469c71,
+    0x40471c7f,
+    0x40479ca6,
+    0x40481cf9,
+    0x40489d2c,
+    0x40491d43,
+    0x40499d5d,
+    0x404a1d74,
+    0x404a9d92,
+    0x404b1daa,
+    0x404b9dc1,
+    0x404c1dd7,
+    0x404c9de9,
+    0x404d1e0a,
+    0x404d9e2c,
+    0x404e1e40,
+    0x404e9e4d,
+    0x404f1e7a,
+    0x404f9ea3,
+    0x40501ede,
+    0x40509ef2,
+    0x40511f0d,
+    0x40521f1d,
+    0x40529f41,
+    0x40531f59,
+    0x40539f6c,
+    0x40541f81,
+    0x40549fa4,
+    0x40551fb2,
+    0x40559fcf,
+    0x40561fdc,
+    0x40569ff5,
+    0x4057200d,
+    0x4057a020,
+    0x40582035,
+    0x4058a05c,
+    0x4059208b,
+    0x4059a0b8,
+    0x405a20cc,
+    0x405aa0dc,
+    0x405b20f4,
+    0x405ba105,
+    0x405c2118,
+    0x405ca157,
+    0x405d2164,
+    0x405da17b,
+    0x405e21b9,
+    0x405e8ab1,
+    0x405f21da,
+    0x405fa1e7,
+    0x406021f5,
+    0x4060a217,
+    0x4061225b,
+    0x4061a293,
+    0x406222aa,
+    0x4062a2bb,
+    0x406322cc,
+    0x4063a2e1,
+    0x406422f8,
+    0x4064a324,
+    0x4065233f,
+    0x4065a356,
+    0x4066236e,
+    0x4066a398,
+    0x406723c3,
+    0x4067a3e4,
+    0x4068240b,
+    0x4068a42c,
+    0x4069245e,
+    0x4069a48c,
+    0x406a24ad,
+    0x406aa4cd,
+    0x406b2655,
+    0x406ba678,
+    0x406c268e,
+    0x406ca909,
+    0x406d2938,
+    0x406da960,
+    0x406e298e,
+    0x406ea9db,
+    0x406f29fa,
+    0x406faa32,
+    0x40702a45,
+    0x4070aa62,
+    0x40710800,
+    0x4071aa74,
+    0x40722a87,
+    0x4072aaa0,
+    0x40732ab8,
+    0x407394a4,
+    0x40742acc,
+    0x4074aae6,
+    0x40752af7,
+    0x4075ab0b,
+    0x40762b19,
+    0x4076927b,
+    0x40772b3e,
+    0x4077ab60,
+    0x40782b7b,
+    0x4078abb4,
+    0x40792bcb,
+    0x4079abe1,
+    0x407a2bed,
+    0x407aac00,
+    0x407b2c15,
+    0x407bac27,
+    0x407c2c58,
+    0x407cac61,
+    0x407d2447,
+    0x407d9eb3,
+    0x407e2b90,
+    0x407ea06c,
+    0x407f1c93,
+    0x407f9a53,
+    0x40801e8a,
+    0x40809cbb,
+    0x40811f2f,
+    0x40819e64,
+    0x40822979,
+    0x40829a39,
+    0x40832047,
+    0x4083a309,
+    0x40841ccf,
+    0x4084a0a4,
+    0x40852129,
+    0x4085a23f,
+    0x4086219b,
+    0x40869ecd,
+    0x408729bf,
+    0x4087a270,
+    0x40881a9a,
+    0x4088a3f7,
+    0x40891ae9,
+    0x40899a76,
+    0x408a26ae,
+    0x408a9884,
+    0x408b2c3c,
+    0x408baa0f,
+    0x408c2139,
+    0x408c98a0,
+    0x408d1d12,
+    0x408d9ce3,
+    0x41f42580,
+    0x41f92612,
+    0x41fe2505,
+    0x41fea6fa,
+    0x41ff27eb,
+    0x42032599,
+    0x420825bb,
+    0x4208a5f7,
+    0x420924e9,
+    0x4209a631,
+    0x420a2540,
+    0x420aa520,
+    0x420b2560,
+    0x420ba5d9,
+    0x420c2807,
+    0x420ca6c7,
+    0x420d26e1,
+    0x420da718,
+    0x42122732,
+    0x421727ce,
+    0x4217a774,
+    0x421c2796,
+    0x421f2751,
+    0x4221281e,
+    0x422627b1,
+    0x422b28ed,
+    0x422ba89b,
+    0x422c28d5,
+    0x422ca85a,
+    0x422d2839,
+    0x422da8ba,
+    0x422e2880,
+    0x422ea9a6,
+    0x4432072b,
+    0x4432873a,
+    0x44330746,
+    0x44338754,
+    0x44340767,
+    0x44348778,
+    0x4435077f,
+    0x44358789,
+    0x4436079c,
+    0x443687b2,
+    0x443707c4,
+    0x443787d1,
+    0x443807e0,
+    0x443887e8,
+    0x44390800,
+    0x4439880e,
+    0x443a0821,
+    0x483212a5,
+    0x483292b7,
+    0x483312cd,
+    0x483392e6,
+    0x4c32130b,
+    0x4c32931b,
+    0x4c33132e,
+    0x4c33934e,
+    0x4c3400ac,
+    0x4c3480ea,
+    0x4c35135a,
+    0x4c359368,
+    0x4c361384,
+    0x4c369397,
+    0x4c3713a6,
+    0x4c3793b4,
+    0x4c3813c9,
+    0x4c3893d5,
+    0x4c3913f5,
+    0x4c39941f,
+    0x4c3a1438,
+    0x4c3a9451,
+    0x4c3b05fb,
+    0x4c3b946a,
+    0x4c3c147c,
+    0x4c3c948b,
+    0x4c3d14a4,
+    0x4c3d8c45,
+    0x4c3e14fd,
+    0x4c3e94b3,
+    0x4c3f151f,
+    0x4c3f927b,
+    0x4c4014c9,
+    0x4c4092f7,
+    0x4c4114ed,
+    0x50322e9a,
+    0x5032aea9,
+    0x50332eb4,
+    0x5033aec4,
+    0x50342edd,
+    0x5034aef7,
+    0x50352f05,
+    0x5035af1b,
+    0x50362f2d,
+    0x5036af43,
+    0x50372f5c,
+    0x5037af6f,
+    0x50382f87,
+    0x5038af98,
+    0x50392fad,
+    0x5039afc1,
+    0x503a2fe1,
+    0x503aaff7,
+    0x503b300f,
+    0x503bb021,
+    0x503c303d,
+    0x503cb054,
+    0x503d306d,
+    0x503db083,
+    0x503e3090,
+    0x503eb0a6,
+    0x503f30b8,
+    0x503f8382,
+    0x504030cb,
+    0x5040b0db,
+    0x504130f5,
+    0x5041b104,
+    0x5042311e,
+    0x5042b13b,
+    0x5043314b,
+    0x5043b15b,
+    0x5044316a,
+    0x5044843f,
+    0x5045317e,
+    0x5045b19c,
+    0x504631af,
+    0x5046b1c5,
+    0x504731d7,
+    0x5047b1ec,
+    0x50483212,
+    0x5048b220,
+    0x50493233,
+    0x5049b248,
+    0x504a325e,
+    0x504ab26e,
+    0x504b328e,
+    0x504bb2a1,
+    0x504c32c4,
+    0x504cb2f2,
+    0x504d3304,
+    0x504db321,
+    0x504e333c,
+    0x504eb358,
+    0x504f336a,
+    0x504fb381,
+    0x50503390,
+    0x505086ef,
+    0x505133a3,
+    0x58320f3a,
+    0x68320efc,
+    0x68328c6a,
+    0x68330c7d,
+    0x68338f0a,
+    0x68340f1a,
+    0x683480ea,
+    0x6c320ed8,
+    0x6c328c34,
+    0x6c330ee3,
+    0x74320a19,
+    0x743280ac,
+    0x74330c45,
+    0x7832097e,
+    0x78328993,
+    0x7833099f,
+    0x78338083,
+    0x783409ae,
+    0x783489c3,
+    0x783509e2,
+    0x78358a04,
+    0x78360a19,
+    0x78368a2f,
+    0x78370a3f,
+    0x78378a60,
+    0x78380a73,
+    0x78388a85,
+    0x78390a92,
+    0x78398ab1,
+    0x783a0ac6,
+    0x783a8ad4,
+    0x783b0ade,
+    0x783b8af2,
+    0x783c0b09,
+    0x783c8b1e,
+    0x783d0b35,
+    0x783d8b4a,
+    0x783e0aa0,
+    0x783e8a52,
+    0x7c321194,
+};
+
+const size_t kOpenSSLReasonValuesLen = sizeof(kOpenSSLReasonValues) / sizeof(kOpenSSLReasonValues[0]);
+
+const char kOpenSSLReasonStringData[] =
+    "ASN1_LENGTH_MISMATCH\0"
+    "AUX_ERROR\0"
+    "BAD_GET_ASN1_OBJECT_CALL\0"
+    "BAD_OBJECT_HEADER\0"
+    "BMPSTRING_IS_WRONG_LENGTH\0"
+    "BN_LIB\0"
+    "BOOLEAN_IS_WRONG_LENGTH\0"
+    "BUFFER_TOO_SMALL\0"
+    "CONTEXT_NOT_INITIALISED\0"
+    "DECODE_ERROR\0"
+    "DEPTH_EXCEEDED\0"
+    "DIGEST_AND_KEY_TYPE_NOT_SUPPORTED\0"
+    "ENCODE_ERROR\0"
+    "ERROR_GETTING_TIME\0"
+    "EXPECTING_AN_ASN1_SEQUENCE\0"
+    "EXPECTING_AN_INTEGER\0"
+    "EXPECTING_AN_OBJECT\0"
+    "EXPECTING_A_BOOLEAN\0"
+    "EXPECTING_A_TIME\0"
+    "EXPLICIT_LENGTH_MISMATCH\0"
+    "EXPLICIT_TAG_NOT_CONSTRUCTED\0"
+    "FIELD_MISSING\0"
+    "FIRST_NUM_TOO_LARGE\0"
+    "HEADER_TOO_LONG\0"
+    "ILLEGAL_BITSTRING_FORMAT\0"
+    "ILLEGAL_BOOLEAN\0"
+    "ILLEGAL_CHARACTERS\0"
+    "ILLEGAL_FORMAT\0"
+    "ILLEGAL_HEX\0"
+    "ILLEGAL_IMPLICIT_TAG\0"
+    "ILLEGAL_INTEGER\0"
+    "ILLEGAL_NESTED_TAGGING\0"
+    "ILLEGAL_NULL\0"
+    "ILLEGAL_NULL_VALUE\0"
+    "ILLEGAL_OBJECT\0"
+    "ILLEGAL_OPTIONAL_ANY\0"
+    "ILLEGAL_OPTIONS_ON_ITEM_TEMPLATE\0"
+    "ILLEGAL_TAGGED_ANY\0"
+    "ILLEGAL_TIME_VALUE\0"
+    "INTEGER_NOT_ASCII_FORMAT\0"
+    "INTEGER_TOO_LARGE_FOR_LONG\0"
+    "INVALID_BIT_STRING_BITS_LEFT\0"
+    "INVALID_BMPSTRING_LENGTH\0"
+    "INVALID_DIGIT\0"
+    "INVALID_MODIFIER\0"
+    "INVALID_NUMBER\0"
+    "INVALID_OBJECT_ENCODING\0"
+    "INVALID_SEPARATOR\0"
+    "INVALID_TIME_FORMAT\0"
+    "INVALID_UNIVERSALSTRING_LENGTH\0"
+    "INVALID_UTF8STRING\0"
+    "LIST_ERROR\0"
+    "MISSING_ASN1_EOS\0"
+    "MISSING_EOC\0"
+    "MISSING_SECOND_NUMBER\0"
+    "MISSING_VALUE\0"
+    "MSTRING_NOT_UNIVERSAL\0"
+    "MSTRING_WRONG_TAG\0"
+    "NESTED_ASN1_ERROR\0"
+    "NESTED_ASN1_STRING\0"
+    "NON_HEX_CHARACTERS\0"
+    "NOT_ASCII_FORMAT\0"
+    "NOT_ENOUGH_DATA\0"
+    "NO_MATCHING_CHOICE_TYPE\0"
+    "NULL_IS_WRONG_LENGTH\0"
+    "OBJECT_NOT_ASCII_FORMAT\0"
+    "ODD_NUMBER_OF_CHARS\0"
+    "SECOND_NUMBER_TOO_LARGE\0"
+    "SEQUENCE_LENGTH_MISMATCH\0"
+    "SEQUENCE_NOT_CONSTRUCTED\0"
+    "SEQUENCE_OR_SET_NEEDS_CONFIG\0"
+    "SHORT_LINE\0"
+    "STREAMING_NOT_SUPPORTED\0"
+    "STRING_TOO_LONG\0"
+    "STRING_TOO_SHORT\0"
+    "TAG_VALUE_TOO_HIGH\0"
+    "TIME_NOT_ASCII_FORMAT\0"
+    "TOO_LONG\0"
+    "TYPE_NOT_CONSTRUCTED\0"
+    "TYPE_NOT_PRIMITIVE\0"
+    "UNEXPECTED_EOC\0"
+    "UNIVERSALSTRING_IS_WRONG_LENGTH\0"
+    "UNKNOWN_FORMAT\0"
+    "UNKNOWN_MESSAGE_DIGEST_ALGORITHM\0"
+    "UNKNOWN_SIGNATURE_ALGORITHM\0"
+    "UNKNOWN_TAG\0"
+    "UNSUPPORTED_ANY_DEFINED_BY_TYPE\0"
+    "UNSUPPORTED_PUBLIC_KEY_TYPE\0"
+    "UNSUPPORTED_TYPE\0"
+    "WRONG_PUBLIC_KEY_TYPE\0"
+    "WRONG_TAG\0"
+    "WRONG_TYPE\0"
+    "BAD_FOPEN_MODE\0"
+    "BROKEN_PIPE\0"
+    "CONNECT_ERROR\0"
+    "ERROR_SETTING_NBIO\0"
+    "INVALID_ARGUMENT\0"
+    "IN_USE\0"
+    "KEEPALIVE\0"
+    "NBIO_CONNECT_ERROR\0"
+    "NO_HOSTNAME_SPECIFIED\0"
+    "NO_PORT_SPECIFIED\0"
+    "NO_SUCH_FILE\0"
+    "NULL_PARAMETER\0"
+    "SYS_LIB\0"
+    "UNABLE_TO_CREATE_SOCKET\0"
+    "UNINITIALIZED\0"
+    "UNSUPPORTED_METHOD\0"
+    "WRITE_TO_READ_ONLY_BIO\0"
+    "ARG2_LT_ARG3\0"
+    "BAD_ENCODING\0"
+    "BAD_RECIPROCAL\0"
+    "BIGNUM_TOO_LONG\0"
+    "BITS_TOO_SMALL\0"
+    "CALLED_WITH_EVEN_MODULUS\0"
+    "DIV_BY_ZERO\0"
+    "EXPAND_ON_STATIC_BIGNUM_DATA\0"
+    "INPUT_NOT_REDUCED\0"
+    "INVALID_INPUT\0"
+    "INVALID_RANGE\0"
+    "NEGATIVE_NUMBER\0"
+    "NOT_A_SQUARE\0"
+    "NOT_INITIALIZED\0"
+    "NO_INVERSE\0"
+    "PRIVATE_KEY_TOO_LARGE\0"
+    "P_IS_NOT_PRIME\0"
+    "TOO_MANY_ITERATIONS\0"
+    "TOO_MANY_TEMPORARY_VARIABLES\0"
+    "AES_KEY_SETUP_FAILED\0"
+    "BAD_DECRYPT\0"
+    "BAD_KEY_LENGTH\0"
+    "CTRL_NOT_IMPLEMENTED\0"
+    "CTRL_OPERATION_NOT_IMPLEMENTED\0"
+    "DATA_NOT_MULTIPLE_OF_BLOCK_LENGTH\0"
+    "INITIALIZATION_ERROR\0"
+    "INPUT_NOT_INITIALIZED\0"
+    "INVALID_AD_SIZE\0"
+    "INVALID_KEY_LENGTH\0"
+    "INVALID_NONCE\0"
+    "INVALID_NONCE_SIZE\0"
+    "INVALID_OPERATION\0"
+    "IV_TOO_LARGE\0"
+    "NO_CIPHER_SET\0"
+    "NO_DIRECTION_SET\0"
+    "OUTPUT_ALIASES_INPUT\0"
+    "TAG_TOO_LARGE\0"
+    "TOO_LARGE\0"
+    "UNSUPPORTED_AD_SIZE\0"
+    "UNSUPPORTED_INPUT_SIZE\0"
+    "UNSUPPORTED_KEY_SIZE\0"
+    "UNSUPPORTED_NONCE_SIZE\0"
+    "UNSUPPORTED_TAG_SIZE\0"
+    "WRONG_FINAL_BLOCK_LENGTH\0"
+    "LIST_CANNOT_BE_NULL\0"
+    "MISSING_CLOSE_SQUARE_BRACKET\0"
+    "MISSING_EQUAL_SIGN\0"
+    "NO_CLOSE_BRACE\0"
+    "UNABLE_TO_CREATE_NEW_SECTION\0"
+    "VARIABLE_EXPANSION_TOO_LONG\0"
+    "VARIABLE_HAS_NO_VALUE\0"
+    "BAD_GENERATOR\0"
+    "INVALID_PUBKEY\0"
+    "MODULUS_TOO_LARGE\0"
+    "NO_PRIVATE_VALUE\0"
+    "UNKNOWN_HASH\0"
+    "BAD_Q_VALUE\0"
+    "BAD_VERSION\0"
+    "MISSING_PARAMETERS\0"
+    "NEED_NEW_SETUP_VALUES\0"
+    "BIGNUM_OUT_OF_RANGE\0"
+    "COORDINATES_OUT_OF_RANGE\0"
+    "D2I_ECPKPARAMETERS_FAILURE\0"
+    "EC_GROUP_NEW_BY_NAME_FAILURE\0"
+    "GROUP2PKPARAMETERS_FAILURE\0"
+    "GROUP_MISMATCH\0"
+    "I2D_ECPKPARAMETERS_FAILURE\0"
+    "INCOMPATIBLE_OBJECTS\0"
+    "INVALID_COFACTOR\0"
+    "INVALID_COMPRESSED_POINT\0"
+    "INVALID_COMPRESSION_BIT\0"
+    "INVALID_ENCODING\0"
+    "INVALID_FIELD\0"
+    "INVALID_FORM\0"
+    "INVALID_GROUP_ORDER\0"
+    "INVALID_PRIVATE_KEY\0"
+    "INVALID_SCALAR\0"
+    "MISSING_PRIVATE_KEY\0"
+    "NON_NAMED_CURVE\0"
+    "PKPARAMETERS2GROUP_FAILURE\0"
+    "POINT_AT_INFINITY\0"
+    "POINT_IS_NOT_ON_CURVE\0"
+    "PUBLIC_KEY_VALIDATION_FAILED\0"
+    "SLOT_FULL\0"
+    "UNDEFINED_GENERATOR\0"
+    "UNKNOWN_GROUP\0"
+    "UNKNOWN_ORDER\0"
+    "WRONG_CURVE_PARAMETERS\0"
+    "WRONG_ORDER\0"
+    "KDF_FAILED\0"
+    "POINT_ARITHMETIC_FAILURE\0"
+    "BAD_SIGNATURE\0"
+    "NOT_IMPLEMENTED\0"
+    "RANDOM_NUMBER_GENERATION_FAILED\0"
+    "OPERATION_NOT_SUPPORTED\0"
+    "COMMAND_NOT_SUPPORTED\0"
+    "DIFFERENT_KEY_TYPES\0"
+    "DIFFERENT_PARAMETERS\0"
+    "EXPECTING_AN_EC_KEY_KEY\0"
+    "EXPECTING_AN_RSA_KEY\0"
+    "EXPECTING_A_DSA_KEY\0"
+    "ILLEGAL_OR_UNSUPPORTED_PADDING_MODE\0"
+    "INVALID_DIGEST_LENGTH\0"
+    "INVALID_DIGEST_TYPE\0"
+    "INVALID_KEYBITS\0"
+    "INVALID_MGF1_MD\0"
+    "INVALID_PADDING_MODE\0"
+    "INVALID_PARAMETERS\0"
+    "INVALID_PSS_SALTLEN\0"
+    "INVALID_SIGNATURE\0"
+    "KEYS_NOT_SET\0"
+    "MEMORY_LIMIT_EXCEEDED\0"
+    "NOT_A_PRIVATE_KEY\0"
+    "NO_DEFAULT_DIGEST\0"
+    "NO_KEY_SET\0"
+    "NO_MDC2_SUPPORT\0"
+    "NO_NID_FOR_CURVE\0"
+    "NO_OPERATION_SET\0"
+    "NO_PARAMETERS_SET\0"
+    "OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE\0"
+    "OPERATON_NOT_INITIALIZED\0"
+    "UNKNOWN_PUBLIC_KEY_TYPE\0"
+    "UNSUPPORTED_ALGORITHM\0"
+    "OUTPUT_TOO_LARGE\0"
+    "INVALID_OID_STRING\0"
+    "UNKNOWN_NID\0"
+    "BAD_BASE64_DECODE\0"
+    "BAD_END_LINE\0"
+    "BAD_IV_CHARS\0"
+    "BAD_PASSWORD_READ\0"
+    "CIPHER_IS_NULL\0"
+    "ERROR_CONVERTING_PRIVATE_KEY\0"
+    "NOT_DEK_INFO\0"
+    "NOT_ENCRYPTED\0"
+    "NOT_PROC_TYPE\0"
+    "NO_START_LINE\0"
+    "READ_KEY\0"
+    "SHORT_HEADER\0"
+    "UNSUPPORTED_CIPHER\0"
+    "UNSUPPORTED_ENCRYPTION\0"
+    "BAD_PKCS7_VERSION\0"
+    "NOT_PKCS7_SIGNED_DATA\0"
+    "NO_CERTIFICATES_INCLUDED\0"
+    "NO_CRLS_INCLUDED\0"
+    "BAD_ITERATION_COUNT\0"
+    "BAD_PKCS12_DATA\0"
+    "BAD_PKCS12_VERSION\0"
+    "CIPHER_HAS_NO_OBJECT_IDENTIFIER\0"
+    "CRYPT_ERROR\0"
+    "ENCRYPT_ERROR\0"
+    "ERROR_SETTING_CIPHER_PARAMS\0"
+    "INCORRECT_PASSWORD\0"
+    "KEYGEN_FAILURE\0"
+    "KEY_GEN_ERROR\0"
+    "METHOD_NOT_SUPPORTED\0"
+    "MISSING_MAC\0"
+    "MULTIPLE_PRIVATE_KEYS_IN_PKCS12\0"
+    "PKCS12_PUBLIC_KEY_INTEGRITY_NOT_SUPPORTED\0"
+    "PKCS12_TOO_DEEPLY_NESTED\0"
+    "PRIVATE_KEY_DECODE_ERROR\0"
+    "PRIVATE_KEY_ENCODE_ERROR\0"
+    "UNKNOWN_ALGORITHM\0"
+    "UNKNOWN_CIPHER\0"
+    "UNKNOWN_CIPHER_ALGORITHM\0"
+    "UNKNOWN_DIGEST\0"
+    "UNSUPPORTED_KEYLENGTH\0"
+    "UNSUPPORTED_KEY_DERIVATION_FUNCTION\0"
+    "UNSUPPORTED_PRF\0"
+    "UNSUPPORTED_PRIVATE_KEY_ALGORITHM\0"
+    "UNSUPPORTED_SALT_TYPE\0"
+    "BAD_E_VALUE\0"
+    "BAD_FIXED_HEADER_DECRYPT\0"
+    "BAD_PAD_BYTE_COUNT\0"
+    "BAD_RSA_PARAMETERS\0"
+    "BLOCK_TYPE_IS_NOT_01\0"
+    "BN_NOT_INITIALIZED\0"
+    "CANNOT_RECOVER_MULTI_PRIME_KEY\0"
+    "CRT_PARAMS_ALREADY_GIVEN\0"
+    "CRT_VALUES_INCORRECT\0"
+    "DATA_LEN_NOT_EQUAL_TO_MOD_LEN\0"
+    "DATA_TOO_LARGE\0"
+    "DATA_TOO_LARGE_FOR_KEY_SIZE\0"
+    "DATA_TOO_LARGE_FOR_MODULUS\0"
+    "DATA_TOO_SMALL\0"
+    "DATA_TOO_SMALL_FOR_KEY_SIZE\0"
+    "DIGEST_TOO_BIG_FOR_RSA_KEY\0"
+    "D_E_NOT_CONGRUENT_TO_1\0"
+    "EMPTY_PUBLIC_KEY\0"
+    "FIRST_OCTET_INVALID\0"
+    "INCONSISTENT_SET_OF_CRT_VALUES\0"
+    "INTERNAL_ERROR\0"
+    "INVALID_MESSAGE_LENGTH\0"
+    "KEY_SIZE_TOO_SMALL\0"
+    "LAST_OCTET_INVALID\0"
+    "MUST_HAVE_AT_LEAST_TWO_PRIMES\0"
+    "NO_PUBLIC_EXPONENT\0"
+    "NULL_BEFORE_BLOCK_MISSING\0"
+    "N_NOT_EQUAL_P_Q\0"
+    "OAEP_DECODING_ERROR\0"
+    "ONLY_ONE_OF_P_Q_GIVEN\0"
+    "OUTPUT_BUFFER_TOO_SMALL\0"
+    "PADDING_CHECK_FAILED\0"
+    "PKCS_DECODING_ERROR\0"
+    "SLEN_CHECK_FAILED\0"
+    "SLEN_RECOVERY_FAILED\0"
+    "UNKNOWN_ALGORITHM_TYPE\0"
+    "UNKNOWN_PADDING_TYPE\0"
+    "VALUE_MISSING\0"
+    "WRONG_SIGNATURE_LENGTH\0"
+    "ALPN_MISMATCH_ON_EARLY_DATA\0"
+    "APPLICATION_DATA_INSTEAD_OF_HANDSHAKE\0"
+    "APP_DATA_IN_HANDSHAKE\0"
+    "ATTEMPT_TO_REUSE_SESSION_IN_DIFFERENT_CONTEXT\0"
+    "BAD_ALERT\0"
+    "BAD_CHANGE_CIPHER_SPEC\0"
+    "BAD_DATA_RETURNED_BY_CALLBACK\0"
+    "BAD_DH_P_LENGTH\0"
+    "BAD_DIGEST_LENGTH\0"
+    "BAD_ECC_CERT\0"
+    "BAD_ECPOINT\0"
+    "BAD_HANDSHAKE_RECORD\0"
+    "BAD_HELLO_REQUEST\0"
+    "BAD_LENGTH\0"
+    "BAD_PACKET_LENGTH\0"
+    "BAD_RSA_ENCRYPT\0"
+    "BAD_SRTP_MKI_VALUE\0"
+    "BAD_SRTP_PROTECTION_PROFILE_LIST\0"
+    "BAD_SSL_FILETYPE\0"
+    "BAD_WRITE_RETRY\0"
+    "BIO_NOT_SET\0"
+    "BLOCK_CIPHER_PAD_IS_WRONG\0"
+    "BUFFERED_MESSAGES_ON_CIPHER_CHANGE\0"
+    "CANNOT_HAVE_BOTH_PRIVKEY_AND_METHOD\0"
+    "CANNOT_PARSE_LEAF_CERT\0"
+    "CA_DN_LENGTH_MISMATCH\0"
+    "CA_DN_TOO_LONG\0"
+    "CCS_RECEIVED_EARLY\0"
+    "CERTIFICATE_AND_PRIVATE_KEY_MISMATCH\0"
+    "CERTIFICATE_VERIFY_FAILED\0"
+    "CERT_CB_ERROR\0"
+    "CERT_LENGTH_MISMATCH\0"
+    "CHANNEL_ID_NOT_P256\0"
+    "CHANNEL_ID_SIGNATURE_INVALID\0"
+    "CIPHER_OR_HASH_UNAVAILABLE\0"
+    "CLIENTHELLO_PARSE_FAILED\0"
+    "CLIENTHELLO_TLSEXT\0"
+    "CONNECTION_REJECTED\0"
+    "CONNECTION_TYPE_NOT_SET\0"
+    "CUSTOM_EXTENSION_ERROR\0"
+    "DATA_LENGTH_TOO_LONG\0"
+    "DECRYPTION_FAILED\0"
+    "DECRYPTION_FAILED_OR_BAD_RECORD_MAC\0"
+    "DH_PUBLIC_VALUE_LENGTH_IS_WRONG\0"
+    "DH_P_TOO_LONG\0"
+    "DIGEST_CHECK_FAILED\0"
+    "DOWNGRADE_DETECTED\0"
+    "DTLS_MESSAGE_TOO_BIG\0"
+    "DUPLICATE_EXTENSION\0"
+    "DUPLICATE_KEY_SHARE\0"
+    "EARLY_DATA_NOT_IN_USE\0"
+    "ECC_CERT_NOT_FOR_SIGNING\0"
+    "EMPTY_HELLO_RETRY_REQUEST\0"
+    "EMS_STATE_INCONSISTENT\0"
+    "ENCRYPTED_LENGTH_TOO_LONG\0"
+    "ERROR_ADDING_EXTENSION\0"
+    "ERROR_IN_RECEIVED_CIPHER_LIST\0"
+    "ERROR_PARSING_EXTENSION\0"
+    "EXCESSIVE_MESSAGE_SIZE\0"
+    "EXTRA_DATA_IN_MESSAGE\0"
+    "FRAGMENT_MISMATCH\0"
+    "GOT_NEXT_PROTO_WITHOUT_EXTENSION\0"
+    "HANDSHAKE_FAILURE_ON_CLIENT_HELLO\0"
+    "HTTPS_PROXY_REQUEST\0"
+    "HTTP_REQUEST\0"
+    "INAPPROPRIATE_FALLBACK\0"
+    "INVALID_ALPN_PROTOCOL\0"
+    "INVALID_COMMAND\0"
+    "INVALID_COMPRESSION_LIST\0"
+    "INVALID_MESSAGE\0"
+    "INVALID_OUTER_RECORD_TYPE\0"
+    "INVALID_SCT_LIST\0"
+    "INVALID_SSL_SESSION\0"
+    "INVALID_TICKET_KEYS_LENGTH\0"
+    "LENGTH_MISMATCH\0"
+    "MISSING_EXTENSION\0"
+    "MISSING_KEY_SHARE\0"
+    "MISSING_RSA_CERTIFICATE\0"
+    "MISSING_TMP_DH_KEY\0"
+    "MISSING_TMP_ECDH_KEY\0"
+    "MIXED_SPECIAL_OPERATOR_WITH_GROUPS\0"
+    "MTU_TOO_SMALL\0"
+    "NEGOTIATED_BOTH_NPN_AND_ALPN\0"
+    "NESTED_GROUP\0"
+    "NO_CERTIFICATES_RETURNED\0"
+    "NO_CERTIFICATE_ASSIGNED\0"
+    "NO_CERTIFICATE_SET\0"
+    "NO_CIPHERS_AVAILABLE\0"
+    "NO_CIPHERS_PASSED\0"
+    "NO_CIPHERS_SPECIFIED\0"
+    "NO_CIPHER_MATCH\0"
+    "NO_COMMON_SIGNATURE_ALGORITHMS\0"
+    "NO_COMPRESSION_SPECIFIED\0"
+    "NO_GROUPS_SPECIFIED\0"
+    "NO_METHOD_SPECIFIED\0"
+    "NO_P256_SUPPORT\0"
+    "NO_PRIVATE_KEY_ASSIGNED\0"
+    "NO_RENEGOTIATION\0"
+    "NO_REQUIRED_DIGEST\0"
+    "NO_SHARED_CIPHER\0"
+    "NO_SHARED_GROUP\0"
+    "NO_SUPPORTED_VERSIONS_ENABLED\0"
+    "NULL_SSL_CTX\0"
+    "NULL_SSL_METHOD_PASSED\0"
+    "OLD_SESSION_CIPHER_NOT_RETURNED\0"
+    "OLD_SESSION_PRF_HASH_MISMATCH\0"
+    "OLD_SESSION_VERSION_NOT_RETURNED\0"
+    "PARSE_TLSEXT\0"
+    "PATH_TOO_LONG\0"
+    "PEER_DID_NOT_RETURN_A_CERTIFICATE\0"
+    "PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE\0"
+    "PRE_SHARED_KEY_MUST_BE_LAST\0"
+    "PROTOCOL_IS_SHUTDOWN\0"
+    "PSK_IDENTITY_BINDER_COUNT_MISMATCH\0"
+    "PSK_IDENTITY_NOT_FOUND\0"
+    "PSK_NO_CLIENT_CB\0"
+    "PSK_NO_SERVER_CB\0"
+    "READ_TIMEOUT_EXPIRED\0"
+    "RECORD_LENGTH_MISMATCH\0"
+    "RECORD_TOO_LARGE\0"
+    "RENEGOTIATION_EMS_MISMATCH\0"
+    "RENEGOTIATION_ENCODING_ERR\0"
+    "RENEGOTIATION_MISMATCH\0"
+    "REQUIRED_CIPHER_MISSING\0"
+    "RESUMED_EMS_SESSION_WITHOUT_EMS_EXTENSION\0"
+    "RESUMED_NON_EMS_SESSION_WITH_EMS_EXTENSION\0"
+    "SCSV_RECEIVED_WHEN_RENEGOTIATING\0"
+    "SERVERHELLO_TLSEXT\0"
+    "SERVER_CERT_CHANGED\0"
+    "SESSION_ID_CONTEXT_UNINITIALIZED\0"
+    "SESSION_MAY_NOT_BE_CREATED\0"
+    "SHUTDOWN_WHILE_IN_INIT\0"
+    "SIGNATURE_ALGORITHMS_EXTENSION_SENT_BY_SERVER\0"
+    "SRTP_COULD_NOT_ALLOCATE_PROFILES\0"
+    "SRTP_UNKNOWN_PROTECTION_PROFILE\0"
+    "SSL3_EXT_INVALID_SERVERNAME\0"
+    "SSLV3_ALERT_BAD_CERTIFICATE\0"
+    "SSLV3_ALERT_BAD_RECORD_MAC\0"
+    "SSLV3_ALERT_CERTIFICATE_EXPIRED\0"
+    "SSLV3_ALERT_CERTIFICATE_REVOKED\0"
+    "SSLV3_ALERT_CERTIFICATE_UNKNOWN\0"
+    "SSLV3_ALERT_CLOSE_NOTIFY\0"
+    "SSLV3_ALERT_DECOMPRESSION_FAILURE\0"
+    "SSLV3_ALERT_HANDSHAKE_FAILURE\0"
+    "SSLV3_ALERT_ILLEGAL_PARAMETER\0"
+    "SSLV3_ALERT_NO_CERTIFICATE\0"
+    "SSLV3_ALERT_UNEXPECTED_MESSAGE\0"
+    "SSLV3_ALERT_UNSUPPORTED_CERTIFICATE\0"
+    "SSL_CTX_HAS_NO_DEFAULT_SSL_VERSION\0"
+    "SSL_HANDSHAKE_FAILURE\0"
+    "SSL_SESSION_ID_CONTEXT_TOO_LONG\0"
+    "TICKET_ENCRYPTION_FAILED\0"
+    "TLSV1_ALERT_ACCESS_DENIED\0"
+    "TLSV1_ALERT_DECODE_ERROR\0"
+    "TLSV1_ALERT_DECRYPTION_FAILED\0"
+    "TLSV1_ALERT_DECRYPT_ERROR\0"
+    "TLSV1_ALERT_EXPORT_RESTRICTION\0"
+    "TLSV1_ALERT_INAPPROPRIATE_FALLBACK\0"
+    "TLSV1_ALERT_INSUFFICIENT_SECURITY\0"
+    "TLSV1_ALERT_INTERNAL_ERROR\0"
+    "TLSV1_ALERT_NO_RENEGOTIATION\0"
+    "TLSV1_ALERT_PROTOCOL_VERSION\0"
+    "TLSV1_ALERT_RECORD_OVERFLOW\0"
+    "TLSV1_ALERT_UNKNOWN_CA\0"
+    "TLSV1_ALERT_USER_CANCELLED\0"
+    "TLSV1_BAD_CERTIFICATE_HASH_VALUE\0"
+    "TLSV1_BAD_CERTIFICATE_STATUS_RESPONSE\0"
+    "TLSV1_CERTIFICATE_REQUIRED\0"
+    "TLSV1_CERTIFICATE_UNOBTAINABLE\0"
+    "TLSV1_UNKNOWN_PSK_IDENTITY\0"
+    "TLSV1_UNRECOGNIZED_NAME\0"
+    "TLSV1_UNSUPPORTED_EXTENSION\0"
+    "TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST\0"
+    "TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG\0"
+    "TOO_MANY_EMPTY_FRAGMENTS\0"
+    "TOO_MANY_KEY_UPDATES\0"
+    "TOO_MANY_WARNING_ALERTS\0"
+    "TOO_MUCH_READ_EARLY_DATA\0"
+    "TOO_MUCH_SKIPPED_EARLY_DATA\0"
+    "UNABLE_TO_FIND_ECDH_PARAMETERS\0"
+    "UNEXPECTED_EXTENSION\0"
+    "UNEXPECTED_EXTENSION_ON_EARLY_DATA\0"
+    "UNEXPECTED_MESSAGE\0"
+    "UNEXPECTED_OPERATOR_IN_GROUP\0"
+    "UNEXPECTED_RECORD\0"
+    "UNKNOWN_ALERT_TYPE\0"
+    "UNKNOWN_CERTIFICATE_TYPE\0"
+    "UNKNOWN_CIPHER_RETURNED\0"
+    "UNKNOWN_CIPHER_TYPE\0"
+    "UNKNOWN_KEY_EXCHANGE_TYPE\0"
+    "UNKNOWN_PROTOCOL\0"
+    "UNKNOWN_SSL_VERSION\0"
+    "UNKNOWN_STATE\0"
+    "UNSAFE_LEGACY_RENEGOTIATION_DISABLED\0"
+    "UNSUPPORTED_COMPRESSION_ALGORITHM\0"
+    "UNSUPPORTED_ELLIPTIC_CURVE\0"
+    "UNSUPPORTED_PROTOCOL\0"
+    "UNSUPPORTED_PROTOCOL_FOR_CUSTOM_KEY\0"
+    "WRONG_CERTIFICATE_TYPE\0"
+    "WRONG_CIPHER_RETURNED\0"
+    "WRONG_CURVE\0"
+    "WRONG_MESSAGE_TYPE\0"
+    "WRONG_SIGNATURE_TYPE\0"
+    "WRONG_SSL_VERSION\0"
+    "WRONG_VERSION_NUMBER\0"
+    "WRONG_VERSION_ON_EARLY_DATA\0"
+    "X509_LIB\0"
+    "X509_VERIFICATION_SETUP_PROBLEMS\0"
+    "AKID_MISMATCH\0"
+    "BAD_X509_FILETYPE\0"
+    "BASE64_DECODE_ERROR\0"
+    "CANT_CHECK_DH_KEY\0"
+    "CERT_ALREADY_IN_HASH_TABLE\0"
+    "CRL_ALREADY_DELTA\0"
+    "CRL_VERIFY_FAILURE\0"
+    "IDP_MISMATCH\0"
+    "INVALID_DIRECTORY\0"
+    "INVALID_FIELD_NAME\0"
+    "INVALID_PARAMETER\0"
+    "INVALID_PSS_PARAMETERS\0"
+    "INVALID_TRUST\0"
+    "ISSUER_MISMATCH\0"
+    "KEY_TYPE_MISMATCH\0"
+    "KEY_VALUES_MISMATCH\0"
+    "LOADING_CERT_DIR\0"
+    "LOADING_DEFAULTS\0"
+    "NAME_TOO_LONG\0"
+    "NEWER_CRL_NOT_NEWER\0"
+    "NO_CERT_SET_FOR_US_TO_VERIFY\0"
+    "NO_CRL_NUMBER\0"
+    "PUBLIC_KEY_DECODE_ERROR\0"
+    "PUBLIC_KEY_ENCODE_ERROR\0"
+    "SHOULD_RETRY\0"
+    "UNKNOWN_KEY_TYPE\0"
+    "UNKNOWN_PURPOSE_ID\0"
+    "UNKNOWN_TRUST_ID\0"
+    "WRONG_LOOKUP_TYPE\0"
+    "BAD_IP_ADDRESS\0"
+    "BAD_OBJECT\0"
+    "BN_DEC2BN_ERROR\0"
+    "BN_TO_ASN1_INTEGER_ERROR\0"
+    "CANNOT_FIND_FREE_FUNCTION\0"
+    "DIRNAME_ERROR\0"
+    "DISTPOINT_ALREADY_SET\0"
+    "DUPLICATE_ZONE_ID\0"
+    "ERROR_CONVERTING_ZONE\0"
+    "ERROR_CREATING_EXTENSION\0"
+    "ERROR_IN_EXTENSION\0"
+    "EXPECTED_A_SECTION_NAME\0"
+    "EXTENSION_EXISTS\0"
+    "EXTENSION_NAME_ERROR\0"
+    "EXTENSION_NOT_FOUND\0"
+    "EXTENSION_SETTING_NOT_SUPPORTED\0"
+    "EXTENSION_VALUE_ERROR\0"
+    "ILLEGAL_EMPTY_EXTENSION\0"
+    "ILLEGAL_HEX_DIGIT\0"
+    "INCORRECT_POLICY_SYNTAX_TAG\0"
+    "INVALID_BOOLEAN_STRING\0"
+    "INVALID_EXTENSION_STRING\0"
+    "INVALID_MULTIPLE_RDNS\0"
+    "INVALID_NAME\0"
+    "INVALID_NULL_ARGUMENT\0"
+    "INVALID_NULL_NAME\0"
+    "INVALID_NULL_VALUE\0"
+    "INVALID_NUMBERS\0"
+    "INVALID_OBJECT_IDENTIFIER\0"
+    "INVALID_OPTION\0"
+    "INVALID_POLICY_IDENTIFIER\0"
+    "INVALID_PROXY_POLICY_SETTING\0"
+    "INVALID_PURPOSE\0"
+    "INVALID_SECTION\0"
+    "INVALID_SYNTAX\0"
+    "ISSUER_DECODE_ERROR\0"
+    "NEED_ORGANIZATION_AND_NUMBERS\0"
+    "NO_CONFIG_DATABASE\0"
+    "NO_ISSUER_CERTIFICATE\0"
+    "NO_ISSUER_DETAILS\0"
+    "NO_POLICY_IDENTIFIER\0"
+    "NO_PROXY_CERT_POLICY_LANGUAGE_DEFINED\0"
+    "NO_PUBLIC_KEY\0"
+    "NO_SUBJECT_DETAILS\0"
+    "ODD_NUMBER_OF_DIGITS\0"
+    "OPERATION_NOT_DEFINED\0"
+    "OTHERNAME_ERROR\0"
+    "POLICY_LANGUAGE_ALREADY_DEFINED\0"
+    "POLICY_PATH_LENGTH\0"
+    "POLICY_PATH_LENGTH_ALREADY_DEFINED\0"
+    "POLICY_WHEN_PROXY_LANGUAGE_REQUIRES_NO_POLICY\0"
+    "SECTION_NOT_FOUND\0"
+    "UNABLE_TO_GET_ISSUER_DETAILS\0"
+    "UNABLE_TO_GET_ISSUER_KEYID\0"
+    "UNKNOWN_BIT_STRING_ARGUMENT\0"
+    "UNKNOWN_EXTENSION\0"
+    "UNKNOWN_EXTENSION_NAME\0"
+    "UNKNOWN_OPTION\0"
+    "UNSUPPORTED_OPTION\0"
+    "USER_TOO_LONG\0"
+    "";
+
diff --git a/third_party/boringssl/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S b/third_party/boringssl/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
new file mode 100644
index 0000000..a2ad2d1
--- /dev/null
+++ b/third_party/boringssl/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
@@ -0,0 +1,3658 @@
+.machine	"any"
+
+.abiversion	2
+.text
+
+.align	7
+.Lrcon:
+.byte	0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01
+.byte	0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b
+.byte	0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d
+.byte	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.Lconsts:
+	mflr	0
+	bcl	20,31,$+4
+	mflr	6
+	addi	6,6,-0x48
+	mtlr	0
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,0,0
+.byte	65,69,83,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+
+.globl	aes_hw_set_encrypt_key
+.type	aes_hw_set_encrypt_key,@function
+.align	5
+aes_hw_set_encrypt_key:
+.localentry	aes_hw_set_encrypt_key,0
+
+.Lset_encrypt_key:
+	mflr	11
+	std	11,16(1)
+
+	li	6,-1
+	cmpldi	3,0
+	beq-	.Lenc_key_abort
+	cmpldi	5,0
+	beq-	.Lenc_key_abort
+	li	6,-2
+	cmpwi	4,128
+	blt-	.Lenc_key_abort
+	cmpwi	4,256
+	bgt-	.Lenc_key_abort
+	andi.	0,4,0x3f
+	bne-	.Lenc_key_abort
+
+	lis	0,0xfff0
+	li	12,-1
+	or	0,0,0
+
+	bl	.Lconsts
+	mtlr	11
+
+	neg	9,3
+	lvx	1,0,3
+	addi	3,3,15
+	lvsr	3,0,9
+	li	8,0x20
+	cmpwi	4,192
+	lvx	2,0,3
+	vspltisb	5,0x0f
+	lvx	4,0,6
+	vxor	3,3,5
+	lvx	5,8,6
+	addi	6,6,0x10
+	vperm	1,1,2,3
+	li	7,8
+	vxor	0,0,0
+	mtctr	7
+
+	lvsl	8,0,5
+	vspltisb	9,-1
+	lvx	10,0,5
+	vperm	9,9,0,8
+
+	blt	.Loop128
+	addi	3,3,8
+	beq	.L192
+	addi	3,3,8
+	b	.L256
+
+.align	4
+.Loop128:
+	vperm	3,1,1,5
+	vsldoi	6,0,1,12
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	.long	0x10632509
+	stvx	7,0,5
+	addi	5,5,16
+
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vadduwm	4,4,4
+	vxor	1,1,3
+	bdnz	.Loop128
+
+	lvx	4,0,6
+
+	vperm	3,1,1,5
+	vsldoi	6,0,1,12
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	.long	0x10632509
+	stvx	7,0,5
+	addi	5,5,16
+
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vadduwm	4,4,4
+	vxor	1,1,3
+
+	vperm	3,1,1,5
+	vsldoi	6,0,1,12
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	.long	0x10632509
+	stvx	7,0,5
+	addi	5,5,16
+
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vxor	1,1,3
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	stvx	7,0,5
+
+	addi	3,5,15
+	addi	5,5,0x50
+
+	li	8,10
+	b	.Ldone
+
+.align	4
+.L192:
+	lvx	6,0,3
+	li	7,4
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	stvx	7,0,5
+	addi	5,5,16
+	vperm	2,2,6,3
+	vspltisb	3,8
+	mtctr	7
+	vsububm	5,5,3
+
+.Loop192:
+	vperm	3,2,2,5
+	vsldoi	6,0,1,12
+	.long	0x10632509
+
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+
+	vsldoi	7,0,2,8
+	vspltw	6,1,3
+	vxor	6,6,2
+	vsldoi	2,0,2,12
+	vadduwm	4,4,4
+	vxor	2,2,6
+	vxor	1,1,3
+	vxor	2,2,3
+	vsldoi	7,7,1,8
+
+	vperm	3,2,2,5
+	vsldoi	6,0,1,12
+	vperm	11,7,7,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	.long	0x10632509
+	stvx	7,0,5
+	addi	5,5,16
+
+	vsldoi	7,1,2,8
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vperm	11,7,7,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	stvx	7,0,5
+	addi	5,5,16
+
+	vspltw	6,1,3
+	vxor	6,6,2
+	vsldoi	2,0,2,12
+	vadduwm	4,4,4
+	vxor	2,2,6
+	vxor	1,1,3
+	vxor	2,2,3
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	stvx	7,0,5
+	addi	3,5,15
+	addi	5,5,16
+	bdnz	.Loop192
+
+	li	8,12
+	addi	5,5,0x20
+	b	.Ldone
+
+.align	4
+.L256:
+	lvx	6,0,3
+	li	7,7
+	li	8,14
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	stvx	7,0,5
+	addi	5,5,16
+	vperm	2,2,6,3
+	mtctr	7
+
+.Loop256:
+	vperm	3,2,2,5
+	vsldoi	6,0,1,12
+	vperm	11,2,2,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	.long	0x10632509
+	stvx	7,0,5
+	addi	5,5,16
+
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vadduwm	4,4,4
+	vxor	1,1,3
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	stvx	7,0,5
+	addi	3,5,15
+	addi	5,5,16
+	bdz	.Ldone
+
+	vspltw	3,1,3
+	vsldoi	6,0,2,12
+	.long	0x106305C8
+
+	vxor	2,2,6
+	vsldoi	6,0,6,12
+	vxor	2,2,6
+	vsldoi	6,0,6,12
+	vxor	2,2,6
+
+	vxor	2,2,3
+	b	.Loop256
+
+.align	4
+.Ldone:
+	lvx	2,0,3
+	vsel	2,10,2,9
+	stvx	2,0,3
+	li	6,0
+	or	12,12,12
+	stw	8,0(5)
+
+.Lenc_key_abort:
+	mr	3,6
+	blr	
+.long	0
+.byte	0,12,0x14,1,0,0,3,0
+.long	0
+.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
+
+.globl	aes_hw_set_decrypt_key
+.type	aes_hw_set_decrypt_key,@function
+.align	5
+aes_hw_set_decrypt_key:
+.localentry	aes_hw_set_decrypt_key,0
+
+	stdu	1,-64(1)
+	mflr	10
+	std	10,80(1)
+	bl	.Lset_encrypt_key
+	mtlr	10
+
+	cmpwi	3,0
+	bne-	.Ldec_key_abort
+
+	slwi	7,8,4
+	subi	3,5,240
+	srwi	8,8,1
+	add	5,3,7
+	mtctr	8
+
+.Ldeckey:
+	lwz	0, 0(3)
+	lwz	6, 4(3)
+	lwz	7, 8(3)
+	lwz	8, 12(3)
+	addi	3,3,16
+	lwz	9, 0(5)
+	lwz	10,4(5)
+	lwz	11,8(5)
+	lwz	12,12(5)
+	stw	0, 0(5)
+	stw	6, 4(5)
+	stw	7, 8(5)
+	stw	8, 12(5)
+	subi	5,5,16
+	stw	9, -16(3)
+	stw	10,-12(3)
+	stw	11,-8(3)
+	stw	12,-4(3)
+	bdnz	.Ldeckey
+
+	xor	3,3,3
+.Ldec_key_abort:
+	addi	1,1,64
+	blr	
+.long	0
+.byte	0,12,4,1,0x80,0,3,0
+.long	0
+.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
+.globl	aes_hw_encrypt
+.type	aes_hw_encrypt,@function
+.align	5
+aes_hw_encrypt:
+.localentry	aes_hw_encrypt,0
+
+	lwz	6,240(5)
+	lis	0,0xfc00
+	li	12,-1
+	li	7,15
+	or	0,0,0
+
+	lvx	0,0,3
+	neg	11,4
+	lvx	1,7,3
+	lvsl	2,0,3
+	vspltisb	4,0x0f
+	lvsr	3,0,11
+	vxor	2,2,4
+	li	7,16
+	vperm	0,0,1,2
+	lvx	1,0,5
+	lvsr	5,0,5
+	srwi	6,6,1
+	lvx	2,7,5
+	addi	7,7,16
+	subi	6,6,1
+	vperm	1,2,1,5
+
+	vxor	0,0,1
+	lvx	1,7,5
+	addi	7,7,16
+	mtctr	6
+
+.Loop_enc:
+	vperm	2,1,2,5
+	.long	0x10001508
+	lvx	2,7,5
+	addi	7,7,16
+	vperm	1,2,1,5
+	.long	0x10000D08
+	lvx	1,7,5
+	addi	7,7,16
+	bdnz	.Loop_enc
+
+	vperm	2,1,2,5
+	.long	0x10001508
+	lvx	2,7,5
+	vperm	1,2,1,5
+	.long	0x10000D09
+
+	vspltisb	2,-1
+	vxor	1,1,1
+	li	7,15
+	vperm	2,2,1,3
+	vxor	3,3,4
+	lvx	1,0,4
+	vperm	0,0,0,3
+	vsel	1,1,0,2
+	lvx	4,7,4
+	stvx	1,0,4
+	vsel	0,0,4,2
+	stvx	0,7,4
+
+	or	12,12,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,3,0
+.long	0
+.size	aes_hw_encrypt,.-aes_hw_encrypt
+.globl	aes_hw_decrypt
+.type	aes_hw_decrypt,@function
+.align	5
+aes_hw_decrypt:
+.localentry	aes_hw_decrypt,0
+
+	lwz	6,240(5)
+	lis	0,0xfc00
+	li	12,-1
+	li	7,15
+	or	0,0,0
+
+	lvx	0,0,3
+	neg	11,4
+	lvx	1,7,3
+	lvsl	2,0,3
+	vspltisb	4,0x0f
+	lvsr	3,0,11
+	vxor	2,2,4
+	li	7,16
+	vperm	0,0,1,2
+	lvx	1,0,5
+	lvsr	5,0,5
+	srwi	6,6,1
+	lvx	2,7,5
+	addi	7,7,16
+	subi	6,6,1
+	vperm	1,2,1,5
+
+	vxor	0,0,1
+	lvx	1,7,5
+	addi	7,7,16
+	mtctr	6
+
+.Loop_dec:
+	vperm	2,1,2,5
+	.long	0x10001548
+	lvx	2,7,5
+	addi	7,7,16
+	vperm	1,2,1,5
+	.long	0x10000D48
+	lvx	1,7,5
+	addi	7,7,16
+	bdnz	.Loop_dec
+
+	vperm	2,1,2,5
+	.long	0x10001548
+	lvx	2,7,5
+	vperm	1,2,1,5
+	.long	0x10000D49
+
+	vspltisb	2,-1
+	vxor	1,1,1
+	li	7,15
+	vperm	2,2,1,3
+	vxor	3,3,4
+	lvx	1,0,4
+	vperm	0,0,0,3
+	vsel	1,1,0,2
+	lvx	4,7,4
+	stvx	1,0,4
+	vsel	0,0,4,2
+	stvx	0,7,4
+
+	or	12,12,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,3,0
+.long	0
+.size	aes_hw_decrypt,.-aes_hw_decrypt
+.globl	aes_hw_cbc_encrypt
+.type	aes_hw_cbc_encrypt,@function
+.align	5
+aes_hw_cbc_encrypt:
+.localentry	aes_hw_cbc_encrypt,0
+
+	cmpldi	5,16
+	.long	0x4dc00020
+
+	cmpwi	8,0
+	lis	0,0xffe0
+	li	12,-1
+	or	0,0,0
+
+	li	10,15
+	vxor	0,0,0
+	vspltisb	3,0x0f
+
+	lvx	4,0,7
+	lvsl	6,0,7
+	lvx	5,10,7
+	vxor	6,6,3
+	vperm	4,4,5,6
+
+	neg	11,3
+	lvsr	10,0,6
+	lwz	9,240(6)
+
+	lvsr	6,0,11
+	lvx	5,0,3
+	addi	3,3,15
+	vxor	6,6,3
+
+	lvsl	8,0,4
+	vspltisb	9,-1
+	lvx	7,0,4
+	vperm	9,9,0,8
+	vxor	8,8,3
+
+	srwi	9,9,1
+	li	10,16
+	subi	9,9,1
+	beq	.Lcbc_dec
+
+.Lcbc_enc:
+	vor	2,5,5
+	lvx	5,0,3
+	addi	3,3,16
+	mtctr	9
+	subi	5,5,16
+
+	lvx	0,0,6
+	vperm	2,2,5,6
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	vxor	2,2,0
+	lvx	0,10,6
+	addi	10,10,16
+	vxor	2,2,4
+
+.Loop_cbc_enc:
+	vperm	1,0,1,10
+	.long	0x10420D08
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	.long	0x10420508
+	lvx	0,10,6
+	addi	10,10,16
+	bdnz	.Loop_cbc_enc
+
+	vperm	1,0,1,10
+	.long	0x10420D08
+	lvx	1,10,6
+	li	10,16
+	vperm	0,1,0,10
+	.long	0x10820509
+	cmpldi	5,16
+
+	vperm	3,4,4,8
+	vsel	2,7,3,9
+	vor	7,3,3
+	stvx	2,0,4
+	addi	4,4,16
+	bge	.Lcbc_enc
+
+	b	.Lcbc_done
+
+.align	4
+.Lcbc_dec:
+	cmpldi	5,128
+	bge	_aesp8_cbc_decrypt8x
+	vor	3,5,5
+	lvx	5,0,3
+	addi	3,3,16
+	mtctr	9
+	subi	5,5,16
+
+	lvx	0,0,6
+	vperm	3,3,5,6
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	vxor	2,3,0
+	lvx	0,10,6
+	addi	10,10,16
+
+.Loop_cbc_dec:
+	vperm	1,0,1,10
+	.long	0x10420D48
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	.long	0x10420548
+	lvx	0,10,6
+	addi	10,10,16
+	bdnz	.Loop_cbc_dec
+
+	vperm	1,0,1,10
+	.long	0x10420D48
+	lvx	1,10,6
+	li	10,16
+	vperm	0,1,0,10
+	.long	0x10420549
+	cmpldi	5,16
+
+	vxor	2,2,4
+	vor	4,3,3
+	vperm	3,2,2,8
+	vsel	2,7,3,9
+	vor	7,3,3
+	stvx	2,0,4
+	addi	4,4,16
+	bge	.Lcbc_dec
+
+.Lcbc_done:
+	addi	4,4,-1
+	lvx	2,0,4
+	vsel	2,7,2,9
+	stvx	2,0,4
+
+	neg	8,7
+	li	10,15
+	vxor	0,0,0
+	vspltisb	9,-1
+	vspltisb	3,0x0f
+	lvsr	8,0,8
+	vperm	9,9,0,8
+	vxor	8,8,3
+	lvx	7,0,7
+	vperm	4,4,4,8
+	vsel	2,7,4,9
+	lvx	5,10,7
+	stvx	2,0,7
+	vsel	2,4,5,9
+	stvx	2,10,7
+
+	or	12,12,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,6,0
+.long	0
+.align	5
+_aesp8_cbc_decrypt8x:
+	stdu	1,-448(1)
+	li	10,207
+	li	11,223
+	stvx	20,10,1
+	addi	10,10,32
+	stvx	21,11,1
+	addi	11,11,32
+	stvx	22,10,1
+	addi	10,10,32
+	stvx	23,11,1
+	addi	11,11,32
+	stvx	24,10,1
+	addi	10,10,32
+	stvx	25,11,1
+	addi	11,11,32
+	stvx	26,10,1
+	addi	10,10,32
+	stvx	27,11,1
+	addi	11,11,32
+	stvx	28,10,1
+	addi	10,10,32
+	stvx	29,11,1
+	addi	11,11,32
+	stvx	30,10,1
+	stvx	31,11,1
+	li	0,-1
+	stw	12,396(1)
+	li	8,0x10
+	std	26,400(1)
+	li	26,0x20
+	std	27,408(1)
+	li	27,0x30
+	std	28,416(1)
+	li	28,0x40
+	std	29,424(1)
+	li	29,0x50
+	std	30,432(1)
+	li	30,0x60
+	std	31,440(1)
+	li	31,0x70
+	or	0,0,0
+
+	subi	9,9,3
+	subi	5,5,128
+
+	lvx	23,0,6
+	lvx	30,8,6
+	addi	6,6,0x20
+	lvx	31,0,6
+	vperm	23,30,23,10
+	addi	11,1,79
+	mtctr	9
+
+.Load_cbc_dec_key:
+	vperm	24,31,30,10
+	lvx	30,8,6
+	addi	6,6,0x20
+	stvx	24,0,11
+	vperm	25,30,31,10
+	lvx	31,0,6
+	stvx	25,8,11
+	addi	11,11,0x20
+	bdnz	.Load_cbc_dec_key
+
+	lvx	26,8,6
+	vperm	24,31,30,10
+	lvx	27,26,6
+	stvx	24,0,11
+	vperm	25,26,31,10
+	lvx	28,27,6
+	stvx	25,8,11
+	addi	11,1,79
+	vperm	26,27,26,10
+	lvx	29,28,6
+	vperm	27,28,27,10
+	lvx	30,29,6
+	vperm	28,29,28,10
+	lvx	31,30,6
+	vperm	29,30,29,10
+	lvx	14,31,6
+	vperm	30,31,30,10
+	lvx	24,0,11
+	vperm	31,14,31,10
+	lvx	25,8,11
+
+
+
+	subi	3,3,15
+
+	li	10,8
+	.long	0x7C001E99
+	lvsl	6,0,10
+	vspltisb	3,0x0f
+	.long	0x7C281E99
+	vxor	6,6,3
+	.long	0x7C5A1E99
+	vperm	0,0,0,6
+	.long	0x7C7B1E99
+	vperm	1,1,1,6
+	.long	0x7D5C1E99
+	vperm	2,2,2,6
+	vxor	14,0,23
+	.long	0x7D7D1E99
+	vperm	3,3,3,6
+	vxor	15,1,23
+	.long	0x7D9E1E99
+	vperm	10,10,10,6
+	vxor	16,2,23
+	.long	0x7DBF1E99
+	addi	3,3,0x80
+	vperm	11,11,11,6
+	vxor	17,3,23
+	vperm	12,12,12,6
+	vxor	18,10,23
+	vperm	13,13,13,6
+	vxor	19,11,23
+	vxor	20,12,23
+	vxor	21,13,23
+
+	mtctr	9
+	b	.Loop_cbc_dec8x
+.align	5
+.Loop_cbc_dec8x:
+	.long	0x11CEC548
+	.long	0x11EFC548
+	.long	0x1210C548
+	.long	0x1231C548
+	.long	0x1252C548
+	.long	0x1273C548
+	.long	0x1294C548
+	.long	0x12B5C548
+	lvx	24,26,11
+	addi	11,11,0x20
+
+	.long	0x11CECD48
+	.long	0x11EFCD48
+	.long	0x1210CD48
+	.long	0x1231CD48
+	.long	0x1252CD48
+	.long	0x1273CD48
+	.long	0x1294CD48
+	.long	0x12B5CD48
+	lvx	25,8,11
+	bdnz	.Loop_cbc_dec8x
+
+	subic	5,5,128
+	.long	0x11CEC548
+	.long	0x11EFC548
+	.long	0x1210C548
+	.long	0x1231C548
+	.long	0x1252C548
+	.long	0x1273C548
+	.long	0x1294C548
+	.long	0x12B5C548
+
+	subfe.	0,0,0
+	.long	0x11CECD48
+	.long	0x11EFCD48
+	.long	0x1210CD48
+	.long	0x1231CD48
+	.long	0x1252CD48
+	.long	0x1273CD48
+	.long	0x1294CD48
+	.long	0x12B5CD48
+
+	and	0,0,5
+	.long	0x11CED548
+	.long	0x11EFD548
+	.long	0x1210D548
+	.long	0x1231D548
+	.long	0x1252D548
+	.long	0x1273D548
+	.long	0x1294D548
+	.long	0x12B5D548
+
+	add	3,3,0
+
+
+
+	.long	0x11CEDD48
+	.long	0x11EFDD48
+	.long	0x1210DD48
+	.long	0x1231DD48
+	.long	0x1252DD48
+	.long	0x1273DD48
+	.long	0x1294DD48
+	.long	0x12B5DD48
+
+	addi	11,1,79
+	.long	0x11CEE548
+	.long	0x11EFE548
+	.long	0x1210E548
+	.long	0x1231E548
+	.long	0x1252E548
+	.long	0x1273E548
+	.long	0x1294E548
+	.long	0x12B5E548
+	lvx	24,0,11
+
+	.long	0x11CEED48
+	.long	0x11EFED48
+	.long	0x1210ED48
+	.long	0x1231ED48
+	.long	0x1252ED48
+	.long	0x1273ED48
+	.long	0x1294ED48
+	.long	0x12B5ED48
+	lvx	25,8,11
+
+	.long	0x11CEF548
+	vxor	4,4,31
+	.long	0x11EFF548
+	vxor	0,0,31
+	.long	0x1210F548
+	vxor	1,1,31
+	.long	0x1231F548
+	vxor	2,2,31
+	.long	0x1252F548
+	vxor	3,3,31
+	.long	0x1273F548
+	vxor	10,10,31
+	.long	0x1294F548
+	vxor	11,11,31
+	.long	0x12B5F548
+	vxor	12,12,31
+
+	.long	0x11CE2549
+	.long	0x11EF0549
+	.long	0x7C001E99
+	.long	0x12100D49
+	.long	0x7C281E99
+	.long	0x12311549
+	vperm	0,0,0,6
+	.long	0x7C5A1E99
+	.long	0x12521D49
+	vperm	1,1,1,6
+	.long	0x7C7B1E99
+	.long	0x12735549
+	vperm	2,2,2,6
+	.long	0x7D5C1E99
+	.long	0x12945D49
+	vperm	3,3,3,6
+	.long	0x7D7D1E99
+	.long	0x12B56549
+	vperm	10,10,10,6
+	.long	0x7D9E1E99
+	vor	4,13,13
+	vperm	11,11,11,6
+	.long	0x7DBF1E99
+	addi	3,3,0x80
+
+	vperm	14,14,14,6
+	vperm	15,15,15,6
+	.long	0x7DC02799
+	vperm	12,12,12,6
+	vxor	14,0,23
+	vperm	16,16,16,6
+	.long	0x7DE82799
+	vperm	13,13,13,6
+	vxor	15,1,23
+	vperm	17,17,17,6
+	.long	0x7E1A2799
+	vxor	16,2,23
+	vperm	18,18,18,6
+	.long	0x7E3B2799
+	vxor	17,3,23
+	vperm	19,19,19,6
+	.long	0x7E5C2799
+	vxor	18,10,23
+	vperm	20,20,20,6
+	.long	0x7E7D2799
+	vxor	19,11,23
+	vperm	21,21,21,6
+	.long	0x7E9E2799
+	vxor	20,12,23
+	.long	0x7EBF2799
+	addi	4,4,0x80
+	vxor	21,13,23
+
+	mtctr	9
+	beq	.Loop_cbc_dec8x
+
+	addic.	5,5,128
+	beq	.Lcbc_dec8x_done
+	nop	
+	nop	
+
+.Loop_cbc_dec8x_tail:
+	.long	0x11EFC548
+	.long	0x1210C548
+	.long	0x1231C548
+	.long	0x1252C548
+	.long	0x1273C548
+	.long	0x1294C548
+	.long	0x12B5C548
+	lvx	24,26,11
+	addi	11,11,0x20
+
+	.long	0x11EFCD48
+	.long	0x1210CD48
+	.long	0x1231CD48
+	.long	0x1252CD48
+	.long	0x1273CD48
+	.long	0x1294CD48
+	.long	0x12B5CD48
+	lvx	25,8,11
+	bdnz	.Loop_cbc_dec8x_tail
+
+	.long	0x11EFC548
+	.long	0x1210C548
+	.long	0x1231C548
+	.long	0x1252C548
+	.long	0x1273C548
+	.long	0x1294C548
+	.long	0x12B5C548
+
+	.long	0x11EFCD48
+	.long	0x1210CD48
+	.long	0x1231CD48
+	.long	0x1252CD48
+	.long	0x1273CD48
+	.long	0x1294CD48
+	.long	0x12B5CD48
+
+	.long	0x11EFD548
+	.long	0x1210D548
+	.long	0x1231D548
+	.long	0x1252D548
+	.long	0x1273D548
+	.long	0x1294D548
+	.long	0x12B5D548
+
+	.long	0x11EFDD48
+	.long	0x1210DD48
+	.long	0x1231DD48
+	.long	0x1252DD48
+	.long	0x1273DD48
+	.long	0x1294DD48
+	.long	0x12B5DD48
+
+	.long	0x11EFE548
+	.long	0x1210E548
+	.long	0x1231E548
+	.long	0x1252E548
+	.long	0x1273E548
+	.long	0x1294E548
+	.long	0x12B5E548
+
+	.long	0x11EFED48
+	.long	0x1210ED48
+	.long	0x1231ED48
+	.long	0x1252ED48
+	.long	0x1273ED48
+	.long	0x1294ED48
+	.long	0x12B5ED48
+
+	.long	0x11EFF548
+	vxor	4,4,31
+	.long	0x1210F548
+	vxor	1,1,31
+	.long	0x1231F548
+	vxor	2,2,31
+	.long	0x1252F548
+	vxor	3,3,31
+	.long	0x1273F548
+	vxor	10,10,31
+	.long	0x1294F548
+	vxor	11,11,31
+	.long	0x12B5F548
+	vxor	12,12,31
+
+	cmplwi	5,32
+	blt	.Lcbc_dec8x_one
+	nop	
+	beq	.Lcbc_dec8x_two
+	cmplwi	5,64
+	blt	.Lcbc_dec8x_three
+	nop	
+	beq	.Lcbc_dec8x_four
+	cmplwi	5,96
+	blt	.Lcbc_dec8x_five
+	nop	
+	beq	.Lcbc_dec8x_six
+
+.Lcbc_dec8x_seven:
+	.long	0x11EF2549
+	.long	0x12100D49
+	.long	0x12311549
+	.long	0x12521D49
+	.long	0x12735549
+	.long	0x12945D49
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	vperm	19,19,19,6
+	.long	0x7E5B2799
+	vperm	20,20,20,6
+	.long	0x7E7C2799
+	vperm	21,21,21,6
+	.long	0x7E9D2799
+	.long	0x7EBE2799
+	addi	4,4,0x70
+	b	.Lcbc_dec8x_done
+
+.align	5
+.Lcbc_dec8x_six:
+	.long	0x12102549
+	.long	0x12311549
+	.long	0x12521D49
+	.long	0x12735549
+	.long	0x12945D49
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	16,16,16,6
+	vperm	17,17,17,6
+	.long	0x7E002799
+	vperm	18,18,18,6
+	.long	0x7E282799
+	vperm	19,19,19,6
+	.long	0x7E5A2799
+	vperm	20,20,20,6
+	.long	0x7E7B2799
+	vperm	21,21,21,6
+	.long	0x7E9C2799
+	.long	0x7EBD2799
+	addi	4,4,0x60
+	b	.Lcbc_dec8x_done
+
+.align	5
+.Lcbc_dec8x_five:
+	.long	0x12312549
+	.long	0x12521D49
+	.long	0x12735549
+	.long	0x12945D49
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	17,17,17,6
+	vperm	18,18,18,6
+	.long	0x7E202799
+	vperm	19,19,19,6
+	.long	0x7E482799
+	vperm	20,20,20,6
+	.long	0x7E7A2799
+	vperm	21,21,21,6
+	.long	0x7E9B2799
+	.long	0x7EBC2799
+	addi	4,4,0x50
+	b	.Lcbc_dec8x_done
+
+.align	5
+.Lcbc_dec8x_four:
+	.long	0x12522549
+	.long	0x12735549
+	.long	0x12945D49
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	18,18,18,6
+	vperm	19,19,19,6
+	.long	0x7E402799
+	vperm	20,20,20,6
+	.long	0x7E682799
+	vperm	21,21,21,6
+	.long	0x7E9A2799
+	.long	0x7EBB2799
+	addi	4,4,0x40
+	b	.Lcbc_dec8x_done
+
+.align	5
+.Lcbc_dec8x_three:
+	.long	0x12732549
+	.long	0x12945D49
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	19,19,19,6
+	vperm	20,20,20,6
+	.long	0x7E602799
+	vperm	21,21,21,6
+	.long	0x7E882799
+	.long	0x7EBA2799
+	addi	4,4,0x30
+	b	.Lcbc_dec8x_done
+
+.align	5
+.Lcbc_dec8x_two:
+	.long	0x12942549
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	20,20,20,6
+	vperm	21,21,21,6
+	.long	0x7E802799
+	.long	0x7EA82799
+	addi	4,4,0x20
+	b	.Lcbc_dec8x_done
+
+.align	5
+.Lcbc_dec8x_one:
+	.long	0x12B52549
+	vor	4,13,13
+
+	vperm	21,21,21,6
+	.long	0x7EA02799
+	addi	4,4,0x10
+
+.Lcbc_dec8x_done:
+	vperm	4,4,4,6
+	.long	0x7C803F99
+
+	li	10,79
+	li	11,95
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+
+	or	12,12,12
+	lvx	20,10,1
+	addi	10,10,32
+	lvx	21,11,1
+	addi	11,11,32
+	lvx	22,10,1
+	addi	10,10,32
+	lvx	23,11,1
+	addi	11,11,32
+	lvx	24,10,1
+	addi	10,10,32
+	lvx	25,11,1
+	addi	11,11,32
+	lvx	26,10,1
+	addi	10,10,32
+	lvx	27,11,1
+	addi	11,11,32
+	lvx	28,10,1
+	addi	10,10,32
+	lvx	29,11,1
+	addi	11,11,32
+	lvx	30,10,1
+	lvx	31,11,1
+	ld	26,400(1)
+	ld	27,408(1)
+	ld	28,416(1)
+	ld	29,424(1)
+	ld	30,432(1)
+	ld	31,440(1)
+	addi	1,1,448
+	blr	
+.long	0
+.byte	0,12,0x04,0,0x80,6,6,0
+.long	0
+.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
+.globl	aes_hw_ctr32_encrypt_blocks
+.type	aes_hw_ctr32_encrypt_blocks,@function
+.align	5
+aes_hw_ctr32_encrypt_blocks:
+.localentry	aes_hw_ctr32_encrypt_blocks,0
+
+	cmpldi	5,1
+	.long	0x4dc00020
+
+	lis	0,0xfff0
+	li	12,-1
+	or	0,0,0
+
+	li	10,15
+	vxor	0,0,0
+	vspltisb	3,0x0f
+
+	lvx	4,0,7
+	lvsl	6,0,7
+	lvx	5,10,7
+	vspltisb	11,1
+	vxor	6,6,3
+	vperm	4,4,5,6
+	vsldoi	11,0,11,1
+
+	neg	11,3
+	lvsr	10,0,6
+	lwz	9,240(6)
+
+	lvsr	6,0,11
+	lvx	5,0,3
+	addi	3,3,15
+	vxor	6,6,3
+
+	srwi	9,9,1
+	li	10,16
+	subi	9,9,1
+
+	cmpldi	5,8
+	bge	_aesp8_ctr32_encrypt8x
+
+	lvsl	8,0,4
+	vspltisb	9,-1
+	lvx	7,0,4
+	vperm	9,9,0,8
+	vxor	8,8,3
+
+	lvx	0,0,6
+	mtctr	9
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	vxor	2,4,0
+	lvx	0,10,6
+	addi	10,10,16
+	b	.Loop_ctr32_enc
+
+.align	5
+.Loop_ctr32_enc:
+	vperm	1,0,1,10
+	.long	0x10420D08
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	.long	0x10420508
+	lvx	0,10,6
+	addi	10,10,16
+	bdnz	.Loop_ctr32_enc
+
+	vadduwm	4,4,11
+	vor	3,5,5
+	lvx	5,0,3
+	addi	3,3,16
+	subic.	5,5,1
+
+	vperm	1,0,1,10
+	.long	0x10420D08
+	lvx	1,10,6
+	vperm	3,3,5,6
+	li	10,16
+	vperm	1,1,0,10
+	lvx	0,0,6
+	vxor	3,3,1
+	.long	0x10421D09
+
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	2,2,2,8
+	vsel	3,7,2,9
+	mtctr	9
+	vperm	0,1,0,10
+	vor	7,2,2
+	vxor	2,4,0
+	lvx	0,10,6
+	addi	10,10,16
+	stvx	3,0,4
+	addi	4,4,16
+	bne	.Loop_ctr32_enc
+
+	addi	4,4,-1
+	lvx	2,0,4
+	vsel	2,7,2,9
+	stvx	2,0,4
+
+	or	12,12,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,6,0
+.long	0
+.align	5
+_aesp8_ctr32_encrypt8x:
+	stdu	1,-448(1)
+	li	10,207
+	li	11,223
+	stvx	20,10,1
+	addi	10,10,32
+	stvx	21,11,1
+	addi	11,11,32
+	stvx	22,10,1
+	addi	10,10,32
+	stvx	23,11,1
+	addi	11,11,32
+	stvx	24,10,1
+	addi	10,10,32
+	stvx	25,11,1
+	addi	11,11,32
+	stvx	26,10,1
+	addi	10,10,32
+	stvx	27,11,1
+	addi	11,11,32
+	stvx	28,10,1
+	addi	10,10,32
+	stvx	29,11,1
+	addi	11,11,32
+	stvx	30,10,1
+	stvx	31,11,1
+	li	0,-1
+	stw	12,396(1)
+	li	8,0x10
+	std	26,400(1)
+	li	26,0x20
+	std	27,408(1)
+	li	27,0x30
+	std	28,416(1)
+	li	28,0x40
+	std	29,424(1)
+	li	29,0x50
+	std	30,432(1)
+	li	30,0x60
+	std	31,440(1)
+	li	31,0x70
+	or	0,0,0
+
+	subi	9,9,3
+
+	lvx	23,0,6
+	lvx	30,8,6
+	addi	6,6,0x20
+	lvx	31,0,6
+	vperm	23,30,23,10
+	addi	11,1,79
+	mtctr	9
+
+.Load_ctr32_enc_key:
+	vperm	24,31,30,10
+	lvx	30,8,6
+	addi	6,6,0x20
+	stvx	24,0,11
+	vperm	25,30,31,10
+	lvx	31,0,6
+	stvx	25,8,11
+	addi	11,11,0x20
+	bdnz	.Load_ctr32_enc_key
+
+	lvx	26,8,6
+	vperm	24,31,30,10
+	lvx	27,26,6
+	stvx	24,0,11
+	vperm	25,26,31,10
+	lvx	28,27,6
+	stvx	25,8,11
+	addi	11,1,79
+	vperm	26,27,26,10
+	lvx	29,28,6
+	vperm	27,28,27,10
+	lvx	30,29,6
+	vperm	28,29,28,10
+	lvx	31,30,6
+	vperm	29,30,29,10
+	lvx	15,31,6
+	vperm	30,31,30,10
+	lvx	24,0,11
+	vperm	31,15,31,10
+	lvx	25,8,11
+
+	vadduwm	7,11,11
+	subi	3,3,15
+	sldi	5,5,4
+
+	vadduwm	16,4,11
+	vadduwm	17,4,7
+	vxor	15,4,23
+	li	10,8
+	vadduwm	18,16,7
+	vxor	16,16,23
+	lvsl	6,0,10
+	vadduwm	19,17,7
+	vxor	17,17,23
+	vspltisb	3,0x0f
+	vadduwm	20,18,7
+	vxor	18,18,23
+	vxor	6,6,3
+	vadduwm	21,19,7
+	vxor	19,19,23
+	vadduwm	22,20,7
+	vxor	20,20,23
+	vadduwm	4,21,7
+	vxor	21,21,23
+	vxor	22,22,23
+
+	mtctr	9
+	b	.Loop_ctr32_enc8x
+.align	5
+.Loop_ctr32_enc8x:
+	.long	0x11EFC508
+	.long	0x1210C508
+	.long	0x1231C508
+	.long	0x1252C508
+	.long	0x1273C508
+	.long	0x1294C508
+	.long	0x12B5C508
+	.long	0x12D6C508
+.Loop_ctr32_enc8x_middle:
+	lvx	24,26,11
+	addi	11,11,0x20
+
+	.long	0x11EFCD08
+	.long	0x1210CD08
+	.long	0x1231CD08
+	.long	0x1252CD08
+	.long	0x1273CD08
+	.long	0x1294CD08
+	.long	0x12B5CD08
+	.long	0x12D6CD08
+	lvx	25,8,11
+	bdnz	.Loop_ctr32_enc8x
+
+	subic	11,5,256
+	.long	0x11EFC508
+	.long	0x1210C508
+	.long	0x1231C508
+	.long	0x1252C508
+	.long	0x1273C508
+	.long	0x1294C508
+	.long	0x12B5C508
+	.long	0x12D6C508
+
+	subfe	0,0,0
+	.long	0x11EFCD08
+	.long	0x1210CD08
+	.long	0x1231CD08
+	.long	0x1252CD08
+	.long	0x1273CD08
+	.long	0x1294CD08
+	.long	0x12B5CD08
+	.long	0x12D6CD08
+
+	and	0,0,11
+	addi	11,1,79
+	.long	0x11EFD508
+	.long	0x1210D508
+	.long	0x1231D508
+	.long	0x1252D508
+	.long	0x1273D508
+	.long	0x1294D508
+	.long	0x12B5D508
+	.long	0x12D6D508
+	lvx	24,0,11
+
+	subic	5,5,129
+	.long	0x11EFDD08
+	addi	5,5,1
+	.long	0x1210DD08
+	.long	0x1231DD08
+	.long	0x1252DD08
+	.long	0x1273DD08
+	.long	0x1294DD08
+	.long	0x12B5DD08
+	.long	0x12D6DD08
+	lvx	25,8,11
+
+	.long	0x11EFE508
+	.long	0x7C001E99
+	.long	0x1210E508
+	.long	0x7C281E99
+	.long	0x1231E508
+	.long	0x7C5A1E99
+	.long	0x1252E508
+	.long	0x7C7B1E99
+	.long	0x1273E508
+	.long	0x7D5C1E99
+	.long	0x1294E508
+	.long	0x7D9D1E99
+	.long	0x12B5E508
+	.long	0x7DBE1E99
+	.long	0x12D6E508
+	.long	0x7DDF1E99
+	addi	3,3,0x80
+
+	.long	0x11EFED08
+	vperm	0,0,0,6
+	.long	0x1210ED08
+	vperm	1,1,1,6
+	.long	0x1231ED08
+	vperm	2,2,2,6
+	.long	0x1252ED08
+	vperm	3,3,3,6
+	.long	0x1273ED08
+	vperm	10,10,10,6
+	.long	0x1294ED08
+	vperm	12,12,12,6
+	.long	0x12B5ED08
+	vperm	13,13,13,6
+	.long	0x12D6ED08
+	vperm	14,14,14,6
+
+	add	3,3,0
+
+
+
+	subfe.	0,0,0
+	.long	0x11EFF508
+	vxor	0,0,31
+	.long	0x1210F508
+	vxor	1,1,31
+	.long	0x1231F508
+	vxor	2,2,31
+	.long	0x1252F508
+	vxor	3,3,31
+	.long	0x1273F508
+	vxor	10,10,31
+	.long	0x1294F508
+	vxor	12,12,31
+	.long	0x12B5F508
+	vxor	13,13,31
+	.long	0x12D6F508
+	vxor	14,14,31
+
+	bne	.Lctr32_enc8x_break
+
+	.long	0x100F0509
+	.long	0x10300D09
+	vadduwm	16,4,11
+	.long	0x10511509
+	vadduwm	17,4,7
+	vxor	15,4,23
+	.long	0x10721D09
+	vadduwm	18,16,7
+	vxor	16,16,23
+	.long	0x11535509
+	vadduwm	19,17,7
+	vxor	17,17,23
+	.long	0x11946509
+	vadduwm	20,18,7
+	vxor	18,18,23
+	.long	0x11B56D09
+	vadduwm	21,19,7
+	vxor	19,19,23
+	.long	0x11D67509
+	vadduwm	22,20,7
+	vxor	20,20,23
+	vperm	0,0,0,6
+	vadduwm	4,21,7
+	vxor	21,21,23
+	vperm	1,1,1,6
+	vxor	22,22,23
+	mtctr	9
+
+	.long	0x11EFC508
+	.long	0x7C002799
+	vperm	2,2,2,6
+	.long	0x1210C508
+	.long	0x7C282799
+	vperm	3,3,3,6
+	.long	0x1231C508
+	.long	0x7C5A2799
+	vperm	10,10,10,6
+	.long	0x1252C508
+	.long	0x7C7B2799
+	vperm	12,12,12,6
+	.long	0x1273C508
+	.long	0x7D5C2799
+	vperm	13,13,13,6
+	.long	0x1294C508
+	.long	0x7D9D2799
+	vperm	14,14,14,6
+	.long	0x12B5C508
+	.long	0x7DBE2799
+	.long	0x12D6C508
+	.long	0x7DDF2799
+	addi	4,4,0x80
+
+	b	.Loop_ctr32_enc8x_middle
+
+.align	5
+.Lctr32_enc8x_break:
+	cmpwi	5,-0x60
+	blt	.Lctr32_enc8x_one
+	nop	
+	beq	.Lctr32_enc8x_two
+	cmpwi	5,-0x40
+	blt	.Lctr32_enc8x_three
+	nop	
+	beq	.Lctr32_enc8x_four
+	cmpwi	5,-0x20
+	blt	.Lctr32_enc8x_five
+	nop	
+	beq	.Lctr32_enc8x_six
+	cmpwi	5,0x00
+	blt	.Lctr32_enc8x_seven
+
+.Lctr32_enc8x_eight:
+	.long	0x11EF0509
+	.long	0x12100D09
+	.long	0x12311509
+	.long	0x12521D09
+	.long	0x12735509
+	.long	0x12946509
+	.long	0x12B56D09
+	.long	0x12D67509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	vperm	19,19,19,6
+	.long	0x7E5B2799
+	vperm	20,20,20,6
+	.long	0x7E7C2799
+	vperm	21,21,21,6
+	.long	0x7E9D2799
+	vperm	22,22,22,6
+	.long	0x7EBE2799
+	.long	0x7EDF2799
+	addi	4,4,0x80
+	b	.Lctr32_enc8x_done
+
+.align	5
+.Lctr32_enc8x_seven:
+	.long	0x11EF0D09
+	.long	0x12101509
+	.long	0x12311D09
+	.long	0x12525509
+	.long	0x12736509
+	.long	0x12946D09
+	.long	0x12B57509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	vperm	19,19,19,6
+	.long	0x7E5B2799
+	vperm	20,20,20,6
+	.long	0x7E7C2799
+	vperm	21,21,21,6
+	.long	0x7E9D2799
+	.long	0x7EBE2799
+	addi	4,4,0x70
+	b	.Lctr32_enc8x_done
+
+.align	5
+.Lctr32_enc8x_six:
+	.long	0x11EF1509
+	.long	0x12101D09
+	.long	0x12315509
+	.long	0x12526509
+	.long	0x12736D09
+	.long	0x12947509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	vperm	19,19,19,6
+	.long	0x7E5B2799
+	vperm	20,20,20,6
+	.long	0x7E7C2799
+	.long	0x7E9D2799
+	addi	4,4,0x60
+	b	.Lctr32_enc8x_done
+
+.align	5
+.Lctr32_enc8x_five:
+	.long	0x11EF1D09
+	.long	0x12105509
+	.long	0x12316509
+	.long	0x12526D09
+	.long	0x12737509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	vperm	19,19,19,6
+	.long	0x7E5B2799
+	.long	0x7E7C2799
+	addi	4,4,0x50
+	b	.Lctr32_enc8x_done
+
+.align	5
+.Lctr32_enc8x_four:
+	.long	0x11EF5509
+	.long	0x12106509
+	.long	0x12316D09
+	.long	0x12527509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	.long	0x7E5B2799
+	addi	4,4,0x40
+	b	.Lctr32_enc8x_done
+
+.align	5
+.Lctr32_enc8x_three:
+	.long	0x11EF6509
+	.long	0x12106D09
+	.long	0x12317509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	.long	0x7E3A2799
+	addi	4,4,0x30
+	b	.Lcbc_dec8x_done
+
+.align	5
+.Lctr32_enc8x_two:
+	.long	0x11EF6D09
+	.long	0x12107509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	.long	0x7E082799
+	addi	4,4,0x20
+	b	.Lcbc_dec8x_done
+
+.align	5
+.Lctr32_enc8x_one:
+	.long	0x11EF7509
+
+	vperm	15,15,15,6
+	.long	0x7DE02799
+	addi	4,4,0x10
+
+.Lctr32_enc8x_done:
+	li	10,79
+	li	11,95
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+
+	or	12,12,12
+	lvx	20,10,1
+	addi	10,10,32
+	lvx	21,11,1
+	addi	11,11,32
+	lvx	22,10,1
+	addi	10,10,32
+	lvx	23,11,1
+	addi	11,11,32
+	lvx	24,10,1
+	addi	10,10,32
+	lvx	25,11,1
+	addi	11,11,32
+	lvx	26,10,1
+	addi	10,10,32
+	lvx	27,11,1
+	addi	11,11,32
+	lvx	28,10,1
+	addi	10,10,32
+	lvx	29,11,1
+	addi	11,11,32
+	lvx	30,10,1
+	lvx	31,11,1
+	ld	26,400(1)
+	ld	27,408(1)
+	ld	28,416(1)
+	ld	29,424(1)
+	ld	30,432(1)
+	ld	31,440(1)
+	addi	1,1,448
+	blr	
+.long	0
+.byte	0,12,0x04,0,0x80,6,6,0
+.long	0
+.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
+.globl	aes_hw_xts_encrypt
+.type	aes_hw_xts_encrypt,@function
+.align	5
+aes_hw_xts_encrypt:
+.localentry	aes_hw_xts_encrypt,0
+
+	mr	10,3
+	li	3,-1
+	cmpldi	5,16
+	.long	0x4dc00020
+
+	lis	0,0xfff0
+	li	12,-1
+	li	11,0
+	or	0,0,0
+
+	vspltisb	9,0x07
+	lvsl	6,11,11
+	vspltisb	11,0x0f
+	vxor	6,6,9
+
+	li	3,15
+	lvx	8,0,8
+	lvsl	5,0,8
+	lvx	4,3,8
+	vxor	5,5,11
+	vperm	8,8,4,5
+
+	neg	11,10
+	lvsr	5,0,11
+	lvx	2,0,10
+	addi	10,10,15
+	vxor	5,5,11
+
+	cmpldi	7,0
+	beq	.Lxts_enc_no_key2
+
+	lvsr	7,0,7
+	lwz	9,240(7)
+	srwi	9,9,1
+	subi	9,9,1
+	li	3,16
+
+	lvx	0,0,7
+	lvx	1,3,7
+	addi	3,3,16
+	vperm	0,1,0,7
+	vxor	8,8,0
+	lvx	0,3,7
+	addi	3,3,16
+	mtctr	9
+
+.Ltweak_xts_enc:
+	vperm	1,0,1,7
+	.long	0x11080D08
+	lvx	1,3,7
+	addi	3,3,16
+	vperm	0,1,0,7
+	.long	0x11080508
+	lvx	0,3,7
+	addi	3,3,16
+	bdnz	.Ltweak_xts_enc
+
+	vperm	1,0,1,7
+	.long	0x11080D08
+	lvx	1,3,7
+	vperm	0,1,0,7
+	.long	0x11080509
+
+	li	8,0
+	b	.Lxts_enc
+
+.Lxts_enc_no_key2:
+	li	3,-16
+	and	5,5,3
+
+
+.Lxts_enc:
+	lvx	4,0,10
+	addi	10,10,16
+
+	lvsr	7,0,6
+	lwz	9,240(6)
+	srwi	9,9,1
+	subi	9,9,1
+	li	3,16
+
+	vslb	10,9,9
+	vor	10,10,9
+	vspltisb	11,1
+	vsldoi	10,10,11,15
+
+	cmpldi	5,96
+	bge	_aesp8_xts_encrypt6x
+
+	andi.	7,5,15
+	subic	0,5,32
+	subi	7,7,16
+	subfe	0,0,0
+	and	0,0,7
+	add	10,10,0
+
+	lvx	0,0,6
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	2,2,4,5
+	vperm	0,1,0,7
+	vxor	2,2,8
+	vxor	2,2,0
+	lvx	0,3,6
+	addi	3,3,16
+	mtctr	9
+	b	.Loop_xts_enc
+
+.align	5
+.Loop_xts_enc:
+	vperm	1,0,1,7
+	.long	0x10420D08
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	0,1,0,7
+	.long	0x10420508
+	lvx	0,3,6
+	addi	3,3,16
+	bdnz	.Loop_xts_enc
+
+	vperm	1,0,1,7
+	.long	0x10420D08
+	lvx	1,3,6
+	li	3,16
+	vperm	0,1,0,7
+	vxor	0,0,8
+	.long	0x10620509
+
+	vperm	11,3,3,6
+
+	.long	0x7D602799
+
+	addi	4,4,16
+
+	subic.	5,5,16
+	beq	.Lxts_enc_done
+
+	vor	2,4,4
+	lvx	4,0,10
+	addi	10,10,16
+	lvx	0,0,6
+	lvx	1,3,6
+	addi	3,3,16
+
+	subic	0,5,32
+	subfe	0,0,0
+	and	0,0,7
+	add	10,10,0
+
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	8,8,11
+
+	vperm	2,2,4,5
+	vperm	0,1,0,7
+	vxor	2,2,8
+	vxor	3,3,0
+	vxor	2,2,0
+	lvx	0,3,6
+	addi	3,3,16
+
+	mtctr	9
+	cmpldi	5,16
+	bge	.Loop_xts_enc
+
+	vxor	3,3,8
+	lvsr	5,0,5
+	vxor	4,4,4
+	vspltisb	11,-1
+	vperm	4,4,11,5
+	vsel	2,2,3,4
+
+	subi	11,4,17
+	subi	4,4,16
+	mtctr	5
+	li	5,16
+.Loop_xts_enc_steal:
+	lbzu	0,1(11)
+	stb	0,16(11)
+	bdnz	.Loop_xts_enc_steal
+
+	mtctr	9
+	b	.Loop_xts_enc
+
+.Lxts_enc_done:
+	cmpldi	8,0
+	beq	.Lxts_enc_ret
+
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	8,8,11
+
+	vperm	8,8,8,6
+	.long	0x7D004799
+
+.Lxts_enc_ret:
+	or	12,12,12
+	li	3,0
+	blr	
+.long	0
+.byte	0,12,0x04,0,0x80,6,6,0
+.long	0
+.size	aes_hw_xts_encrypt,.-aes_hw_xts_encrypt
+
+.globl	aes_hw_xts_decrypt
+.type	aes_hw_xts_decrypt,@function
+.align	5
+aes_hw_xts_decrypt:
+.localentry	aes_hw_xts_decrypt,0
+
+	mr	10,3
+	li	3,-1
+	cmpldi	5,16
+	.long	0x4dc00020
+
+	lis	0,0xfff8
+	li	12,-1
+	li	11,0
+	or	0,0,0
+
+	andi.	0,5,15
+	neg	0,0
+	andi.	0,0,16
+	sub	5,5,0
+
+	vspltisb	9,0x07
+	lvsl	6,11,11
+	vspltisb	11,0x0f
+	vxor	6,6,9
+
+	li	3,15
+	lvx	8,0,8
+	lvsl	5,0,8
+	lvx	4,3,8
+	vxor	5,5,11
+	vperm	8,8,4,5
+
+	neg	11,10
+	lvsr	5,0,11
+	lvx	2,0,10
+	addi	10,10,15
+	vxor	5,5,11
+
+	cmpldi	7,0
+	beq	.Lxts_dec_no_key2
+
+	lvsr	7,0,7
+	lwz	9,240(7)
+	srwi	9,9,1
+	subi	9,9,1
+	li	3,16
+
+	lvx	0,0,7
+	lvx	1,3,7
+	addi	3,3,16
+	vperm	0,1,0,7
+	vxor	8,8,0
+	lvx	0,3,7
+	addi	3,3,16
+	mtctr	9
+
+.Ltweak_xts_dec:
+	vperm	1,0,1,7
+	.long	0x11080D08
+	lvx	1,3,7
+	addi	3,3,16
+	vperm	0,1,0,7
+	.long	0x11080508
+	lvx	0,3,7
+	addi	3,3,16
+	bdnz	.Ltweak_xts_dec
+
+	vperm	1,0,1,7
+	.long	0x11080D08
+	lvx	1,3,7
+	vperm	0,1,0,7
+	.long	0x11080509
+
+	li	8,0
+	b	.Lxts_dec
+
+.Lxts_dec_no_key2:
+	neg	3,5
+	andi.	3,3,15
+	add	5,5,3
+
+
+.Lxts_dec:
+	lvx	4,0,10
+	addi	10,10,16
+
+	lvsr	7,0,6
+	lwz	9,240(6)
+	srwi	9,9,1
+	subi	9,9,1
+	li	3,16
+
+	vslb	10,9,9
+	vor	10,10,9
+	vspltisb	11,1
+	vsldoi	10,10,11,15
+
+	cmpldi	5,96
+	bge	_aesp8_xts_decrypt6x
+
+	lvx	0,0,6
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	2,2,4,5
+	vperm	0,1,0,7
+	vxor	2,2,8
+	vxor	2,2,0
+	lvx	0,3,6
+	addi	3,3,16
+	mtctr	9
+
+	cmpldi	5,16
+	blt	.Ltail_xts_dec
+
+
+.align	5
+.Loop_xts_dec:
+	vperm	1,0,1,7
+	.long	0x10420D48
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	0,1,0,7
+	.long	0x10420548
+	lvx	0,3,6
+	addi	3,3,16
+	bdnz	.Loop_xts_dec
+
+	vperm	1,0,1,7
+	.long	0x10420D48
+	lvx	1,3,6
+	li	3,16
+	vperm	0,1,0,7
+	vxor	0,0,8
+	.long	0x10620549
+
+	vperm	11,3,3,6
+
+	.long	0x7D602799
+
+	addi	4,4,16
+
+	subic.	5,5,16
+	beq	.Lxts_dec_done
+
+	vor	2,4,4
+	lvx	4,0,10
+	addi	10,10,16
+	lvx	0,0,6
+	lvx	1,3,6
+	addi	3,3,16
+
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	8,8,11
+
+	vperm	2,2,4,5
+	vperm	0,1,0,7
+	vxor	2,2,8
+	vxor	2,2,0
+	lvx	0,3,6
+	addi	3,3,16
+
+	mtctr	9
+	cmpldi	5,16
+	bge	.Loop_xts_dec
+
+.Ltail_xts_dec:
+	vsrab	11,8,9
+	vaddubm	12,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	12,12,11
+
+	subi	10,10,16
+	add	10,10,5
+
+	vxor	2,2,8
+	vxor	2,2,12
+
+.Loop_xts_dec_short:
+	vperm	1,0,1,7
+	.long	0x10420D48
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	0,1,0,7
+	.long	0x10420548
+	lvx	0,3,6
+	addi	3,3,16
+	bdnz	.Loop_xts_dec_short
+
+	vperm	1,0,1,7
+	.long	0x10420D48
+	lvx	1,3,6
+	li	3,16
+	vperm	0,1,0,7
+	vxor	0,0,12
+	.long	0x10620549
+
+	vperm	11,3,3,6
+
+	.long	0x7D602799
+
+
+	vor	2,4,4
+	lvx	4,0,10
+
+	lvx	0,0,6
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	2,2,4,5
+	vperm	0,1,0,7
+
+	lvsr	5,0,5
+	vxor	4,4,4
+	vspltisb	11,-1
+	vperm	4,4,11,5
+	vsel	2,2,3,4
+
+	vxor	0,0,8
+	vxor	2,2,0
+	lvx	0,3,6
+	addi	3,3,16
+
+	subi	11,4,1
+	mtctr	5
+	li	5,16
+.Loop_xts_dec_steal:
+	lbzu	0,1(11)
+	stb	0,16(11)
+	bdnz	.Loop_xts_dec_steal
+
+	mtctr	9
+	b	.Loop_xts_dec
+
+.Lxts_dec_done:
+	cmpldi	8,0
+	beq	.Lxts_dec_ret
+
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	8,8,11
+
+	vperm	8,8,8,6
+	.long	0x7D004799
+
+.Lxts_dec_ret:
+	or	12,12,12
+	li	3,0
+	blr	
+.long	0
+.byte	0,12,0x04,0,0x80,6,6,0
+.long	0
+.size	aes_hw_xts_decrypt,.-aes_hw_xts_decrypt
+.align	5
+_aesp8_xts_encrypt6x:
+	stdu	1,-448(1)
+	mflr	11
+	li	7,207
+	li	3,223
+	std	11,464(1)
+	stvx	20,7,1
+	addi	7,7,32
+	stvx	21,3,1
+	addi	3,3,32
+	stvx	22,7,1
+	addi	7,7,32
+	stvx	23,3,1
+	addi	3,3,32
+	stvx	24,7,1
+	addi	7,7,32
+	stvx	25,3,1
+	addi	3,3,32
+	stvx	26,7,1
+	addi	7,7,32
+	stvx	27,3,1
+	addi	3,3,32
+	stvx	28,7,1
+	addi	7,7,32
+	stvx	29,3,1
+	addi	3,3,32
+	stvx	30,7,1
+	stvx	31,3,1
+	li	0,-1
+	stw	12,396(1)
+	li	3,0x10
+	std	26,400(1)
+	li	26,0x20
+	std	27,408(1)
+	li	27,0x30
+	std	28,416(1)
+	li	28,0x40
+	std	29,424(1)
+	li	29,0x50
+	std	30,432(1)
+	li	30,0x60
+	std	31,440(1)
+	li	31,0x70
+	or	0,0,0
+
+	subi	9,9,3
+
+	lvx	23,0,6
+	lvx	30,3,6
+	addi	6,6,0x20
+	lvx	31,0,6
+	vperm	23,30,23,7
+	addi	7,1,79
+	mtctr	9
+
+.Load_xts_enc_key:
+	vperm	24,31,30,7
+	lvx	30,3,6
+	addi	6,6,0x20
+	stvx	24,0,7
+	vperm	25,30,31,7
+	lvx	31,0,6
+	stvx	25,3,7
+	addi	7,7,0x20
+	bdnz	.Load_xts_enc_key
+
+	lvx	26,3,6
+	vperm	24,31,30,7
+	lvx	27,26,6
+	stvx	24,0,7
+	vperm	25,26,31,7
+	lvx	28,27,6
+	stvx	25,3,7
+	addi	7,1,79
+	vperm	26,27,26,7
+	lvx	29,28,6
+	vperm	27,28,27,7
+	lvx	30,29,6
+	vperm	28,29,28,7
+	lvx	31,30,6
+	vperm	29,30,29,7
+	lvx	22,31,6
+	vperm	30,31,30,7
+	lvx	24,0,7
+	vperm	31,22,31,7
+	lvx	25,3,7
+
+	vperm	0,2,4,5
+	subi	10,10,31
+	vxor	17,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	7,0,17
+	vxor	8,8,11
+
+	.long	0x7C235699
+	vxor	18,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	1,1,1,6
+	vand	11,11,10
+	vxor	12,1,18
+	vxor	8,8,11
+
+	.long	0x7C5A5699
+	andi.	31,5,15
+	vxor	19,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	2,2,2,6
+	vand	11,11,10
+	vxor	13,2,19
+	vxor	8,8,11
+
+	.long	0x7C7B5699
+	sub	5,5,31
+	vxor	20,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	3,3,3,6
+	vand	11,11,10
+	vxor	14,3,20
+	vxor	8,8,11
+
+	.long	0x7C9C5699
+	subi	5,5,0x60
+	vxor	21,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	4,4,4,6
+	vand	11,11,10
+	vxor	15,4,21
+	vxor	8,8,11
+
+	.long	0x7CBD5699
+	addi	10,10,0x60
+	vxor	22,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	5,5,5,6
+	vand	11,11,10
+	vxor	16,5,22
+	vxor	8,8,11
+
+	vxor	31,31,23
+	mtctr	9
+	b	.Loop_xts_enc6x
+
+.align	5
+.Loop_xts_enc6x:
+	.long	0x10E7C508
+	.long	0x118CC508
+	.long	0x11ADC508
+	.long	0x11CEC508
+	.long	0x11EFC508
+	.long	0x1210C508
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD08
+	.long	0x118CCD08
+	.long	0x11ADCD08
+	.long	0x11CECD08
+	.long	0x11EFCD08
+	.long	0x1210CD08
+	lvx	25,3,7
+	bdnz	.Loop_xts_enc6x
+
+	subic	5,5,96
+	vxor	0,17,31
+	.long	0x10E7C508
+	.long	0x118CC508
+	vsrab	11,8,9
+	vxor	17,8,23
+	vaddubm	8,8,8
+	.long	0x11ADC508
+	.long	0x11CEC508
+	vsldoi	11,11,11,15
+	.long	0x11EFC508
+	.long	0x1210C508
+
+	subfe.	0,0,0
+	vand	11,11,10
+	.long	0x10E7CD08
+	.long	0x118CCD08
+	vxor	8,8,11
+	.long	0x11ADCD08
+	.long	0x11CECD08
+	vxor	1,18,31
+	vsrab	11,8,9
+	vxor	18,8,23
+	.long	0x11EFCD08
+	.long	0x1210CD08
+
+	and	0,0,5
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x10E7D508
+	.long	0x118CD508
+	vand	11,11,10
+	.long	0x11ADD508
+	.long	0x11CED508
+	vxor	8,8,11
+	.long	0x11EFD508
+	.long	0x1210D508
+
+	add	10,10,0
+
+
+
+	vxor	2,19,31
+	vsrab	11,8,9
+	vxor	19,8,23
+	vaddubm	8,8,8
+	.long	0x10E7DD08
+	.long	0x118CDD08
+	vsldoi	11,11,11,15
+	.long	0x11ADDD08
+	.long	0x11CEDD08
+	vand	11,11,10
+	.long	0x11EFDD08
+	.long	0x1210DD08
+
+	addi	7,1,79
+	vxor	8,8,11
+	.long	0x10E7E508
+	.long	0x118CE508
+	vxor	3,20,31
+	vsrab	11,8,9
+	vxor	20,8,23
+	.long	0x11ADE508
+	.long	0x11CEE508
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x11EFE508
+	.long	0x1210E508
+	lvx	24,0,7
+	vand	11,11,10
+
+	.long	0x10E7ED08
+	.long	0x118CED08
+	vxor	8,8,11
+	.long	0x11ADED08
+	.long	0x11CEED08
+	vxor	4,21,31
+	vsrab	11,8,9
+	vxor	21,8,23
+	.long	0x11EFED08
+	.long	0x1210ED08
+	lvx	25,3,7
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+
+	.long	0x10E7F508
+	.long	0x118CF508
+	vand	11,11,10
+	.long	0x11ADF508
+	.long	0x11CEF508
+	vxor	8,8,11
+	.long	0x11EFF508
+	.long	0x1210F508
+	vxor	5,22,31
+	vsrab	11,8,9
+	vxor	22,8,23
+
+	.long	0x10E70509
+	.long	0x7C005699
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x118C0D09
+	.long	0x7C235699
+	.long	0x11AD1509
+	vperm	0,0,0,6
+	.long	0x7C5A5699
+	vand	11,11,10
+	.long	0x11CE1D09
+	vperm	1,1,1,6
+	.long	0x7C7B5699
+	.long	0x11EF2509
+	vperm	2,2,2,6
+	.long	0x7C9C5699
+	vxor	8,8,11
+	.long	0x11702D09
+
+	vperm	3,3,3,6
+	.long	0x7CBD5699
+	addi	10,10,0x60
+	vperm	4,4,4,6
+	vperm	5,5,5,6
+
+	vperm	7,7,7,6
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,17
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vxor	12,1,18
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	vxor	13,2,19
+	vperm	15,15,15,6
+	.long	0x7DDB2799
+	vxor	14,3,20
+	vperm	16,11,11,6
+	.long	0x7DFC2799
+	vxor	15,4,21
+	.long	0x7E1D2799
+
+	vxor	16,5,22
+	addi	4,4,0x60
+
+	mtctr	9
+	beq	.Loop_xts_enc6x
+
+	addic.	5,5,0x60
+	beq	.Lxts_enc6x_zero
+	cmpwi	5,0x20
+	blt	.Lxts_enc6x_one
+	nop	
+	beq	.Lxts_enc6x_two
+	cmpwi	5,0x40
+	blt	.Lxts_enc6x_three
+	nop	
+	beq	.Lxts_enc6x_four
+
+.Lxts_enc6x_five:
+	vxor	7,1,17
+	vxor	12,2,18
+	vxor	13,3,19
+	vxor	14,4,20
+	vxor	15,5,21
+
+	bl	_aesp8_xts_enc5x
+
+	vperm	7,7,7,6
+	vor	17,22,22
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	vxor	11,15,22
+	vperm	15,15,15,6
+	.long	0x7DDB2799
+	.long	0x7DFC2799
+	addi	4,4,0x50
+	bne	.Lxts_enc6x_steal
+	b	.Lxts_enc6x_done
+
+.align	4
+.Lxts_enc6x_four:
+	vxor	7,2,17
+	vxor	12,3,18
+	vxor	13,4,19
+	vxor	14,5,20
+	vxor	15,15,15
+
+	bl	_aesp8_xts_enc5x
+
+	vperm	7,7,7,6
+	vor	17,21,21
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vxor	11,14,21
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	.long	0x7DDB2799
+	addi	4,4,0x40
+	bne	.Lxts_enc6x_steal
+	b	.Lxts_enc6x_done
+
+.align	4
+.Lxts_enc6x_three:
+	vxor	7,3,17
+	vxor	12,4,18
+	vxor	13,5,19
+	vxor	14,14,14
+	vxor	15,15,15
+
+	bl	_aesp8_xts_enc5x
+
+	vperm	7,7,7,6
+	vor	17,20,20
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	11,13,20
+	vperm	13,13,13,6
+	.long	0x7D832799
+	.long	0x7DBA2799
+	addi	4,4,0x30
+	bne	.Lxts_enc6x_steal
+	b	.Lxts_enc6x_done
+
+.align	4
+.Lxts_enc6x_two:
+	vxor	7,4,17
+	vxor	12,5,18
+	vxor	13,13,13
+	vxor	14,14,14
+	vxor	15,15,15
+
+	bl	_aesp8_xts_enc5x
+
+	vperm	7,7,7,6
+	vor	17,19,19
+	vxor	11,12,19
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	.long	0x7D832799
+	addi	4,4,0x20
+	bne	.Lxts_enc6x_steal
+	b	.Lxts_enc6x_done
+
+.align	4
+.Lxts_enc6x_one:
+	vxor	7,5,17
+	nop	
+.Loop_xts_enc1x:
+	.long	0x10E7C508
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD08
+	lvx	25,3,7
+	bdnz	.Loop_xts_enc1x
+
+	add	10,10,31
+	cmpwi	31,0
+	.long	0x10E7C508
+
+	subi	10,10,16
+	.long	0x10E7CD08
+
+	lvsr	5,0,31
+	.long	0x10E7D508
+
+	.long	0x7C005699
+	.long	0x10E7DD08
+
+	addi	7,1,79
+	.long	0x10E7E508
+	lvx	24,0,7
+
+	.long	0x10E7ED08
+	lvx	25,3,7
+	vxor	17,17,31
+
+	vperm	0,0,0,6
+	.long	0x10E7F508
+
+	vperm	0,0,0,5
+	.long	0x10E78D09
+
+	vor	17,18,18
+	vxor	11,7,18
+	vperm	7,7,7,6
+	.long	0x7CE02799
+	addi	4,4,0x10
+	bne	.Lxts_enc6x_steal
+	b	.Lxts_enc6x_done
+
+.align	4
+.Lxts_enc6x_zero:
+	cmpwi	31,0
+	beq	.Lxts_enc6x_done
+
+	add	10,10,31
+	subi	10,10,16
+	.long	0x7C005699
+	lvsr	5,0,31
+	vperm	0,0,0,6
+	vperm	0,0,0,5
+	vxor	11,11,17
+.Lxts_enc6x_steal:
+	vxor	0,0,17
+	vxor	7,7,7
+	vspltisb	12,-1
+	vperm	7,7,12,5
+	vsel	7,0,11,7
+
+	subi	30,4,17
+	subi	4,4,16
+	mtctr	31
+.Loop_xts_enc6x_steal:
+	lbzu	0,1(30)
+	stb	0,16(30)
+	bdnz	.Loop_xts_enc6x_steal
+
+	li	31,0
+	mtctr	9
+	b	.Loop_xts_enc1x
+
+.align	4
+.Lxts_enc6x_done:
+	cmpldi	8,0
+	beq	.Lxts_enc6x_ret
+
+	vxor	8,17,23
+	vperm	8,8,8,6
+	.long	0x7D004799
+
+.Lxts_enc6x_ret:
+	mtlr	11
+	li	10,79
+	li	11,95
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+
+	or	12,12,12
+	lvx	20,10,1
+	addi	10,10,32
+	lvx	21,11,1
+	addi	11,11,32
+	lvx	22,10,1
+	addi	10,10,32
+	lvx	23,11,1
+	addi	11,11,32
+	lvx	24,10,1
+	addi	10,10,32
+	lvx	25,11,1
+	addi	11,11,32
+	lvx	26,10,1
+	addi	10,10,32
+	lvx	27,11,1
+	addi	11,11,32
+	lvx	28,10,1
+	addi	10,10,32
+	lvx	29,11,1
+	addi	11,11,32
+	lvx	30,10,1
+	lvx	31,11,1
+	ld	26,400(1)
+	ld	27,408(1)
+	ld	28,416(1)
+	ld	29,424(1)
+	ld	30,432(1)
+	ld	31,440(1)
+	addi	1,1,448
+	blr	
+.long	0
+.byte	0,12,0x04,1,0x80,6,6,0
+.long	0
+
+.align	5
+_aesp8_xts_enc5x:
+	.long	0x10E7C508
+	.long	0x118CC508
+	.long	0x11ADC508
+	.long	0x11CEC508
+	.long	0x11EFC508
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD08
+	.long	0x118CCD08
+	.long	0x11ADCD08
+	.long	0x11CECD08
+	.long	0x11EFCD08
+	lvx	25,3,7
+	bdnz	_aesp8_xts_enc5x
+
+	add	10,10,31
+	cmpwi	31,0
+	.long	0x10E7C508
+	.long	0x118CC508
+	.long	0x11ADC508
+	.long	0x11CEC508
+	.long	0x11EFC508
+
+	subi	10,10,16
+	.long	0x10E7CD08
+	.long	0x118CCD08
+	.long	0x11ADCD08
+	.long	0x11CECD08
+	.long	0x11EFCD08
+	vxor	17,17,31
+
+	.long	0x10E7D508
+	lvsr	5,0,31
+	.long	0x118CD508
+	.long	0x11ADD508
+	.long	0x11CED508
+	.long	0x11EFD508
+	vxor	1,18,31
+
+	.long	0x10E7DD08
+	.long	0x7C005699
+	.long	0x118CDD08
+	.long	0x11ADDD08
+	.long	0x11CEDD08
+	.long	0x11EFDD08
+	vxor	2,19,31
+
+	addi	7,1,79
+	.long	0x10E7E508
+	.long	0x118CE508
+	.long	0x11ADE508
+	.long	0x11CEE508
+	.long	0x11EFE508
+	lvx	24,0,7
+	vxor	3,20,31
+
+	.long	0x10E7ED08
+	vperm	0,0,0,6
+	.long	0x118CED08
+	.long	0x11ADED08
+	.long	0x11CEED08
+	.long	0x11EFED08
+	lvx	25,3,7
+	vxor	4,21,31
+
+	.long	0x10E7F508
+	vperm	0,0,0,5
+	.long	0x118CF508
+	.long	0x11ADF508
+	.long	0x11CEF508
+	.long	0x11EFF508
+
+	.long	0x10E78D09
+	.long	0x118C0D09
+	.long	0x11AD1509
+	.long	0x11CE1D09
+	.long	0x11EF2509
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,0,0
+
+.align	5
+_aesp8_xts_decrypt6x:
+	stdu	1,-448(1)
+	mflr	11
+	li	7,207
+	li	3,223
+	std	11,464(1)
+	stvx	20,7,1
+	addi	7,7,32
+	stvx	21,3,1
+	addi	3,3,32
+	stvx	22,7,1
+	addi	7,7,32
+	stvx	23,3,1
+	addi	3,3,32
+	stvx	24,7,1
+	addi	7,7,32
+	stvx	25,3,1
+	addi	3,3,32
+	stvx	26,7,1
+	addi	7,7,32
+	stvx	27,3,1
+	addi	3,3,32
+	stvx	28,7,1
+	addi	7,7,32
+	stvx	29,3,1
+	addi	3,3,32
+	stvx	30,7,1
+	stvx	31,3,1
+	li	0,-1
+	stw	12,396(1)
+	li	3,0x10
+	std	26,400(1)
+	li	26,0x20
+	std	27,408(1)
+	li	27,0x30
+	std	28,416(1)
+	li	28,0x40
+	std	29,424(1)
+	li	29,0x50
+	std	30,432(1)
+	li	30,0x60
+	std	31,440(1)
+	li	31,0x70
+	or	0,0,0
+
+	subi	9,9,3
+
+	lvx	23,0,6
+	lvx	30,3,6
+	addi	6,6,0x20
+	lvx	31,0,6
+	vperm	23,30,23,7
+	addi	7,1,79
+	mtctr	9
+
+.Load_xts_dec_key:
+	vperm	24,31,30,7
+	lvx	30,3,6
+	addi	6,6,0x20
+	stvx	24,0,7
+	vperm	25,30,31,7
+	lvx	31,0,6
+	stvx	25,3,7
+	addi	7,7,0x20
+	bdnz	.Load_xts_dec_key
+
+	lvx	26,3,6
+	vperm	24,31,30,7
+	lvx	27,26,6
+	stvx	24,0,7
+	vperm	25,26,31,7
+	lvx	28,27,6
+	stvx	25,3,7
+	addi	7,1,79
+	vperm	26,27,26,7
+	lvx	29,28,6
+	vperm	27,28,27,7
+	lvx	30,29,6
+	vperm	28,29,28,7
+	lvx	31,30,6
+	vperm	29,30,29,7
+	lvx	22,31,6
+	vperm	30,31,30,7
+	lvx	24,0,7
+	vperm	31,22,31,7
+	lvx	25,3,7
+
+	vperm	0,2,4,5
+	subi	10,10,31
+	vxor	17,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	7,0,17
+	vxor	8,8,11
+
+	.long	0x7C235699
+	vxor	18,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	1,1,1,6
+	vand	11,11,10
+	vxor	12,1,18
+	vxor	8,8,11
+
+	.long	0x7C5A5699
+	andi.	31,5,15
+	vxor	19,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	2,2,2,6
+	vand	11,11,10
+	vxor	13,2,19
+	vxor	8,8,11
+
+	.long	0x7C7B5699
+	sub	5,5,31
+	vxor	20,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	3,3,3,6
+	vand	11,11,10
+	vxor	14,3,20
+	vxor	8,8,11
+
+	.long	0x7C9C5699
+	subi	5,5,0x60
+	vxor	21,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	4,4,4,6
+	vand	11,11,10
+	vxor	15,4,21
+	vxor	8,8,11
+
+	.long	0x7CBD5699
+	addi	10,10,0x60
+	vxor	22,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	5,5,5,6
+	vand	11,11,10
+	vxor	16,5,22
+	vxor	8,8,11
+
+	vxor	31,31,23
+	mtctr	9
+	b	.Loop_xts_dec6x
+
+.align	5
+.Loop_xts_dec6x:
+	.long	0x10E7C548
+	.long	0x118CC548
+	.long	0x11ADC548
+	.long	0x11CEC548
+	.long	0x11EFC548
+	.long	0x1210C548
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD48
+	.long	0x118CCD48
+	.long	0x11ADCD48
+	.long	0x11CECD48
+	.long	0x11EFCD48
+	.long	0x1210CD48
+	lvx	25,3,7
+	bdnz	.Loop_xts_dec6x
+
+	subic	5,5,96
+	vxor	0,17,31
+	.long	0x10E7C548
+	.long	0x118CC548
+	vsrab	11,8,9
+	vxor	17,8,23
+	vaddubm	8,8,8
+	.long	0x11ADC548
+	.long	0x11CEC548
+	vsldoi	11,11,11,15
+	.long	0x11EFC548
+	.long	0x1210C548
+
+	subfe.	0,0,0
+	vand	11,11,10
+	.long	0x10E7CD48
+	.long	0x118CCD48
+	vxor	8,8,11
+	.long	0x11ADCD48
+	.long	0x11CECD48
+	vxor	1,18,31
+	vsrab	11,8,9
+	vxor	18,8,23
+	.long	0x11EFCD48
+	.long	0x1210CD48
+
+	and	0,0,5
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x10E7D548
+	.long	0x118CD548
+	vand	11,11,10
+	.long	0x11ADD548
+	.long	0x11CED548
+	vxor	8,8,11
+	.long	0x11EFD548
+	.long	0x1210D548
+
+	add	10,10,0
+
+
+
+	vxor	2,19,31
+	vsrab	11,8,9
+	vxor	19,8,23
+	vaddubm	8,8,8
+	.long	0x10E7DD48
+	.long	0x118CDD48
+	vsldoi	11,11,11,15
+	.long	0x11ADDD48
+	.long	0x11CEDD48
+	vand	11,11,10
+	.long	0x11EFDD48
+	.long	0x1210DD48
+
+	addi	7,1,79
+	vxor	8,8,11
+	.long	0x10E7E548
+	.long	0x118CE548
+	vxor	3,20,31
+	vsrab	11,8,9
+	vxor	20,8,23
+	.long	0x11ADE548
+	.long	0x11CEE548
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x11EFE548
+	.long	0x1210E548
+	lvx	24,0,7
+	vand	11,11,10
+
+	.long	0x10E7ED48
+	.long	0x118CED48
+	vxor	8,8,11
+	.long	0x11ADED48
+	.long	0x11CEED48
+	vxor	4,21,31
+	vsrab	11,8,9
+	vxor	21,8,23
+	.long	0x11EFED48
+	.long	0x1210ED48
+	lvx	25,3,7
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+
+	.long	0x10E7F548
+	.long	0x118CF548
+	vand	11,11,10
+	.long	0x11ADF548
+	.long	0x11CEF548
+	vxor	8,8,11
+	.long	0x11EFF548
+	.long	0x1210F548
+	vxor	5,22,31
+	vsrab	11,8,9
+	vxor	22,8,23
+
+	.long	0x10E70549
+	.long	0x7C005699
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x118C0D49
+	.long	0x7C235699
+	.long	0x11AD1549
+	vperm	0,0,0,6
+	.long	0x7C5A5699
+	vand	11,11,10
+	.long	0x11CE1D49
+	vperm	1,1,1,6
+	.long	0x7C7B5699
+	.long	0x11EF2549
+	vperm	2,2,2,6
+	.long	0x7C9C5699
+	vxor	8,8,11
+	.long	0x12102D49
+	vperm	3,3,3,6
+	.long	0x7CBD5699
+	addi	10,10,0x60
+	vperm	4,4,4,6
+	vperm	5,5,5,6
+
+	vperm	7,7,7,6
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,17
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vxor	12,1,18
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	vxor	13,2,19
+	vperm	15,15,15,6
+	.long	0x7DDB2799
+	vxor	14,3,20
+	vperm	16,16,16,6
+	.long	0x7DFC2799
+	vxor	15,4,21
+	.long	0x7E1D2799
+	vxor	16,5,22
+	addi	4,4,0x60
+
+	mtctr	9
+	beq	.Loop_xts_dec6x
+
+	addic.	5,5,0x60
+	beq	.Lxts_dec6x_zero
+	cmpwi	5,0x20
+	blt	.Lxts_dec6x_one
+	nop	
+	beq	.Lxts_dec6x_two
+	cmpwi	5,0x40
+	blt	.Lxts_dec6x_three
+	nop	
+	beq	.Lxts_dec6x_four
+
+.Lxts_dec6x_five:
+	vxor	7,1,17
+	vxor	12,2,18
+	vxor	13,3,19
+	vxor	14,4,20
+	vxor	15,5,21
+
+	bl	_aesp8_xts_dec5x
+
+	vperm	7,7,7,6
+	vor	17,22,22
+	vxor	18,8,23
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,18
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	vperm	15,15,15,6
+	.long	0x7DDB2799
+	.long	0x7DFC2799
+	addi	4,4,0x50
+	bne	.Lxts_dec6x_steal
+	b	.Lxts_dec6x_done
+
+.align	4
+.Lxts_dec6x_four:
+	vxor	7,2,17
+	vxor	12,3,18
+	vxor	13,4,19
+	vxor	14,5,20
+	vxor	15,15,15
+
+	bl	_aesp8_xts_dec5x
+
+	vperm	7,7,7,6
+	vor	17,21,21
+	vor	18,22,22
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,22
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	.long	0x7DDB2799
+	addi	4,4,0x40
+	bne	.Lxts_dec6x_steal
+	b	.Lxts_dec6x_done
+
+.align	4
+.Lxts_dec6x_three:
+	vxor	7,3,17
+	vxor	12,4,18
+	vxor	13,5,19
+	vxor	14,14,14
+	vxor	15,15,15
+
+	bl	_aesp8_xts_dec5x
+
+	vperm	7,7,7,6
+	vor	17,20,20
+	vor	18,21,21
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,21
+	vperm	13,13,13,6
+	.long	0x7D832799
+	.long	0x7DBA2799
+	addi	4,4,0x30
+	bne	.Lxts_dec6x_steal
+	b	.Lxts_dec6x_done
+
+.align	4
+.Lxts_dec6x_two:
+	vxor	7,4,17
+	vxor	12,5,18
+	vxor	13,13,13
+	vxor	14,14,14
+	vxor	15,15,15
+
+	bl	_aesp8_xts_dec5x
+
+	vperm	7,7,7,6
+	vor	17,19,19
+	vor	18,20,20
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,20
+	.long	0x7D832799
+	addi	4,4,0x20
+	bne	.Lxts_dec6x_steal
+	b	.Lxts_dec6x_done
+
+.align	4
+.Lxts_dec6x_one:
+	vxor	7,5,17
+	nop	
+.Loop_xts_dec1x:
+	.long	0x10E7C548
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD48
+	lvx	25,3,7
+	bdnz	.Loop_xts_dec1x
+
+	subi	0,31,1
+	.long	0x10E7C548
+
+	andi.	0,0,16
+	cmpwi	31,0
+	.long	0x10E7CD48
+
+	sub	10,10,0
+	.long	0x10E7D548
+
+	.long	0x7C005699
+	.long	0x10E7DD48
+
+	addi	7,1,79
+	.long	0x10E7E548
+	lvx	24,0,7
+
+	.long	0x10E7ED48
+	lvx	25,3,7
+	vxor	17,17,31
+
+	vperm	0,0,0,6
+	.long	0x10E7F548
+
+	mtctr	9
+	.long	0x10E78D49
+
+	vor	17,18,18
+	vor	18,19,19
+	vperm	7,7,7,6
+	.long	0x7CE02799
+	addi	4,4,0x10
+	vxor	7,0,19
+	bne	.Lxts_dec6x_steal
+	b	.Lxts_dec6x_done
+
+.align	4
+.Lxts_dec6x_zero:
+	cmpwi	31,0
+	beq	.Lxts_dec6x_done
+
+	.long	0x7C005699
+	vperm	0,0,0,6
+	vxor	7,0,18
+.Lxts_dec6x_steal:
+	.long	0x10E7C548
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD48
+	lvx	25,3,7
+	bdnz	.Lxts_dec6x_steal
+
+	add	10,10,31
+	.long	0x10E7C548
+
+	cmpwi	31,0
+	.long	0x10E7CD48
+
+	.long	0x7C005699
+	.long	0x10E7D548
+
+	lvsr	5,0,31
+	.long	0x10E7DD48
+
+	addi	7,1,79
+	.long	0x10E7E548
+	lvx	24,0,7
+
+	.long	0x10E7ED48
+	lvx	25,3,7
+	vxor	18,18,31
+
+	vperm	0,0,0,6
+	.long	0x10E7F548
+
+	vperm	0,0,0,5
+	.long	0x11679549
+
+	vperm	7,11,11,6
+	.long	0x7CE02799
+
+
+	vxor	7,7,7
+	vspltisb	12,-1
+	vperm	7,7,12,5
+	vsel	7,0,11,7
+	vxor	7,7,17
+
+	subi	30,4,1
+	mtctr	31
+.Loop_xts_dec6x_steal:
+	lbzu	0,1(30)
+	stb	0,16(30)
+	bdnz	.Loop_xts_dec6x_steal
+
+	li	31,0
+	mtctr	9
+	b	.Loop_xts_dec1x
+
+.align	4
+.Lxts_dec6x_done:
+	cmpldi	8,0
+	beq	.Lxts_dec6x_ret
+
+	vxor	8,17,23
+	vperm	8,8,8,6
+	.long	0x7D004799
+
+.Lxts_dec6x_ret:
+	mtlr	11
+	li	10,79
+	li	11,95
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+
+	or	12,12,12
+	lvx	20,10,1
+	addi	10,10,32
+	lvx	21,11,1
+	addi	11,11,32
+	lvx	22,10,1
+	addi	10,10,32
+	lvx	23,11,1
+	addi	11,11,32
+	lvx	24,10,1
+	addi	10,10,32
+	lvx	25,11,1
+	addi	11,11,32
+	lvx	26,10,1
+	addi	10,10,32
+	lvx	27,11,1
+	addi	11,11,32
+	lvx	28,10,1
+	addi	10,10,32
+	lvx	29,11,1
+	addi	11,11,32
+	lvx	30,10,1
+	lvx	31,11,1
+	ld	26,400(1)
+	ld	27,408(1)
+	ld	28,416(1)
+	ld	29,424(1)
+	ld	30,432(1)
+	ld	31,440(1)
+	addi	1,1,448
+	blr	
+.long	0
+.byte	0,12,0x04,1,0x80,6,6,0
+.long	0
+
+.align	5
+_aesp8_xts_dec5x:
+	.long	0x10E7C548
+	.long	0x118CC548
+	.long	0x11ADC548
+	.long	0x11CEC548
+	.long	0x11EFC548
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD48
+	.long	0x118CCD48
+	.long	0x11ADCD48
+	.long	0x11CECD48
+	.long	0x11EFCD48
+	lvx	25,3,7
+	bdnz	_aesp8_xts_dec5x
+
+	subi	0,31,1
+	.long	0x10E7C548
+	.long	0x118CC548
+	.long	0x11ADC548
+	.long	0x11CEC548
+	.long	0x11EFC548
+
+	andi.	0,0,16
+	cmpwi	31,0
+	.long	0x10E7CD48
+	.long	0x118CCD48
+	.long	0x11ADCD48
+	.long	0x11CECD48
+	.long	0x11EFCD48
+	vxor	17,17,31
+
+	sub	10,10,0
+	.long	0x10E7D548
+	.long	0x118CD548
+	.long	0x11ADD548
+	.long	0x11CED548
+	.long	0x11EFD548
+	vxor	1,18,31
+
+	.long	0x10E7DD48
+	.long	0x7C005699
+	.long	0x118CDD48
+	.long	0x11ADDD48
+	.long	0x11CEDD48
+	.long	0x11EFDD48
+	vxor	2,19,31
+
+	addi	7,1,79
+	.long	0x10E7E548
+	.long	0x118CE548
+	.long	0x11ADE548
+	.long	0x11CEE548
+	.long	0x11EFE548
+	lvx	24,0,7
+	vxor	3,20,31
+
+	.long	0x10E7ED48
+	vperm	0,0,0,6
+	.long	0x118CED48
+	.long	0x11ADED48
+	.long	0x11CEED48
+	.long	0x11EFED48
+	lvx	25,3,7
+	vxor	4,21,31
+
+	.long	0x10E7F548
+	.long	0x118CF548
+	.long	0x11ADF548
+	.long	0x11CEF548
+	.long	0x11EFF548
+
+	.long	0x10E78D49
+	.long	0x118C0D49
+	.long	0x11AD1549
+	.long	0x11CE1D49
+	.long	0x11EF2549
+	mtctr	9
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,0,0
diff --git a/third_party/boringssl/linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S b/third_party/boringssl/linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
new file mode 100644
index 0000000..0b7a2d0
--- /dev/null
+++ b/third_party/boringssl/linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
@@ -0,0 +1,575 @@
+.machine	"any"
+
+.abiversion	2
+.text
+
+.globl	gcm_init_p8
+.type	gcm_init_p8,@function
+.align	5
+gcm_init_p8:
+.localentry	gcm_init_p8,0
+
+	li	0,-4096
+	li	8,0x10
+	li	12,-1
+	li	9,0x20
+	or	0,0,0
+	li	10,0x30
+	.long	0x7D202699
+
+	vspltisb	8,-16
+	vspltisb	5,1
+	vaddubm	8,8,8
+	vxor	4,4,4
+	vor	8,8,5
+	vsldoi	8,8,4,15
+	vsldoi	6,4,5,1
+	vaddubm	8,8,8
+	vspltisb	7,7
+	vor	8,8,6
+	vspltb	6,9,0
+	vsl	9,9,5
+	vsrab	6,6,7
+	vand	6,6,8
+	vxor	3,9,6
+
+	vsldoi	9,3,3,8
+	vsldoi	8,4,8,8
+	vsldoi	11,4,9,8
+	vsldoi	10,9,4,8
+
+	.long	0x7D001F99
+	.long	0x7D681F99
+	li	8,0x40
+	.long	0x7D291F99
+	li	9,0x50
+	.long	0x7D4A1F99
+	li	10,0x60
+
+	.long	0x10035CC8
+	.long	0x10234CC8
+	.long	0x104354C8
+
+	.long	0x10E044C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+
+	vsldoi	6,0,0,8
+	.long	0x100044C8
+	vxor	6,6,2
+	vxor	16,0,6
+
+	vsldoi	17,16,16,8
+	vsldoi	19,4,17,8
+	vsldoi	18,17,4,8
+
+	.long	0x7E681F99
+	li	8,0x70
+	.long	0x7E291F99
+	li	9,0x80
+	.long	0x7E4A1F99
+	li	10,0x90
+	.long	0x10039CC8
+	.long	0x11B09CC8
+	.long	0x10238CC8
+	.long	0x11D08CC8
+	.long	0x104394C8
+	.long	0x11F094C8
+
+	.long	0x10E044C8
+	.long	0x114D44C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vsldoi	11,14,4,8
+	vsldoi	9,4,14,8
+	vxor	0,0,5
+	vxor	2,2,6
+	vxor	13,13,11
+	vxor	15,15,9
+
+	vsldoi	0,0,0,8
+	vsldoi	13,13,13,8
+	vxor	0,0,7
+	vxor	13,13,10
+
+	vsldoi	6,0,0,8
+	vsldoi	9,13,13,8
+	.long	0x100044C8
+	.long	0x11AD44C8
+	vxor	6,6,2
+	vxor	9,9,15
+	vxor	0,0,6
+	vxor	13,13,9
+
+	vsldoi	9,0,0,8
+	vsldoi	17,13,13,8
+	vsldoi	11,4,9,8
+	vsldoi	10,9,4,8
+	vsldoi	19,4,17,8
+	vsldoi	18,17,4,8
+
+	.long	0x7D681F99
+	li	8,0xa0
+	.long	0x7D291F99
+	li	9,0xb0
+	.long	0x7D4A1F99
+	li	10,0xc0
+	.long	0x7E681F99
+	.long	0x7E291F99
+	.long	0x7E4A1F99
+
+	or	12,12,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,2,0
+.long	0
+.size	gcm_init_p8,.-gcm_init_p8
+.globl	gcm_gmult_p8
+.type	gcm_gmult_p8,@function
+.align	5
+gcm_gmult_p8:
+.localentry	gcm_gmult_p8,0
+
+	lis	0,0xfff8
+	li	8,0x10
+	li	12,-1
+	li	9,0x20
+	or	0,0,0
+	li	10,0x30
+	.long	0x7C601E99
+
+	.long	0x7D682699
+	lvsl	12,0,0
+	.long	0x7D292699
+	vspltisb	5,0x07
+	.long	0x7D4A2699
+	vxor	12,12,5
+	.long	0x7D002699
+	vperm	3,3,3,12
+	vxor	4,4,4
+
+	.long	0x10035CC8
+	.long	0x10234CC8
+	.long	0x104354C8
+
+	.long	0x10E044C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+
+	vsldoi	6,0,0,8
+	.long	0x100044C8
+	vxor	6,6,2
+	vxor	0,0,6
+
+	vperm	0,0,0,12
+	.long	0x7C001F99
+
+	or	12,12,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,2,0
+.long	0
+.size	gcm_gmult_p8,.-gcm_gmult_p8
+
+.globl	gcm_ghash_p8
+.type	gcm_ghash_p8,@function
+.align	5
+gcm_ghash_p8:
+.localentry	gcm_ghash_p8,0
+
+	li	0,-4096
+	li	8,0x10
+	li	12,-1
+	li	9,0x20
+	or	0,0,0
+	li	10,0x30
+	.long	0x7C001E99
+
+	.long	0x7D682699
+	li	8,0x40
+	lvsl	12,0,0
+	.long	0x7D292699
+	li	9,0x50
+	vspltisb	5,0x07
+	.long	0x7D4A2699
+	li	10,0x60
+	vxor	12,12,5
+	.long	0x7D002699
+	vperm	0,0,0,12
+	vxor	4,4,4
+
+	cmpldi	6,64
+	bge	.Lgcm_ghash_p8_4x
+
+	.long	0x7C602E99
+	addi	5,5,16
+	subic.	6,6,16
+	vperm	3,3,3,12
+	vxor	3,3,0
+	beq	.Lshort
+
+	.long	0x7E682699
+	li	8,16
+	.long	0x7E292699
+	add	9,5,6
+	.long	0x7E4A2699
+
+
+.align	5
+.Loop_2x:
+	.long	0x7E002E99
+	vperm	16,16,16,12
+
+	subic	6,6,32
+	.long	0x10039CC8
+	.long	0x11B05CC8
+	subfe	0,0,0
+	.long	0x10238CC8
+	.long	0x11D04CC8
+	and	0,0,6
+	.long	0x104394C8
+	.long	0x11F054C8
+	add	5,5,0
+
+	vxor	0,0,13
+	vxor	1,1,14
+
+	.long	0x10E044C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	2,2,15
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+	.long	0x7C682E99
+	addi	5,5,32
+
+	vsldoi	6,0,0,8
+	.long	0x100044C8
+	vperm	3,3,3,12
+	vxor	6,6,2
+	vxor	3,3,6
+	vxor	3,3,0
+	cmpld	9,5
+	bgt	.Loop_2x
+
+	cmplwi	6,0
+	bne	.Leven
+
+.Lshort:
+	.long	0x10035CC8
+	.long	0x10234CC8
+	.long	0x104354C8
+
+	.long	0x10E044C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+
+	vsldoi	6,0,0,8
+	.long	0x100044C8
+	vxor	6,6,2
+
+.Leven:
+	vxor	0,0,6
+	vperm	0,0,0,12
+	.long	0x7C001F99
+
+	or	12,12,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,4,0
+.long	0
+.align	5
+.gcm_ghash_p8_4x:
+.Lgcm_ghash_p8_4x:
+	stdu	1,-256(1)
+	li	10,63
+	li	11,79
+	stvx	20,10,1
+	addi	10,10,32
+	stvx	21,11,1
+	addi	11,11,32
+	stvx	22,10,1
+	addi	10,10,32
+	stvx	23,11,1
+	addi	11,11,32
+	stvx	24,10,1
+	addi	10,10,32
+	stvx	25,11,1
+	addi	11,11,32
+	stvx	26,10,1
+	addi	10,10,32
+	stvx	27,11,1
+	addi	11,11,32
+	stvx	28,10,1
+	addi	10,10,32
+	stvx	29,11,1
+	addi	11,11,32
+	stvx	30,10,1
+	li	10,0x60
+	stvx	31,11,1
+	li	0,-1
+	stw	12,252(1)
+	or	0,0,0
+
+	lvsl	5,0,8
+
+	li	8,0x70
+	.long	0x7E292699
+	li	9,0x80
+	vspltisb	6,8
+
+	li	10,0x90
+	.long	0x7EE82699
+	li	8,0xa0
+	.long	0x7F092699
+	li	9,0xb0
+	.long	0x7F2A2699
+	li	10,0xc0
+	.long	0x7FA82699
+	li	8,0x10
+	.long	0x7FC92699
+	li	9,0x20
+	.long	0x7FEA2699
+	li	10,0x30
+
+	vsldoi	7,4,6,8
+	vaddubm	18,5,7
+	vaddubm	19,6,18
+
+	srdi	6,6,4
+
+	.long	0x7C602E99
+	.long	0x7E082E99
+	subic.	6,6,8
+	.long	0x7EC92E99
+	.long	0x7F8A2E99
+	addi	5,5,0x40
+	vperm	3,3,3,12
+	vperm	16,16,16,12
+	vperm	22,22,22,12
+	vperm	28,28,28,12
+
+	vxor	2,3,0
+
+	.long	0x11B0BCC8
+	.long	0x11D0C4C8
+	.long	0x11F0CCC8
+
+	vperm	11,17,9,18
+	vperm	5,22,28,19
+	vperm	10,17,9,19
+	vperm	6,22,28,18
+	.long	0x12B68CC8
+	.long	0x12855CC8
+	.long	0x137C4CC8
+	.long	0x134654C8
+
+	vxor	21,21,14
+	vxor	20,20,13
+	vxor	27,27,21
+	vxor	26,26,15
+
+	blt	.Ltail_4x
+
+.Loop_4x:
+	.long	0x7C602E99
+	.long	0x7E082E99
+	subic.	6,6,4
+	.long	0x7EC92E99
+	.long	0x7F8A2E99
+	addi	5,5,0x40
+	vperm	16,16,16,12
+	vperm	22,22,22,12
+	vperm	28,28,28,12
+	vperm	3,3,3,12
+
+	.long	0x1002ECC8
+	.long	0x1022F4C8
+	.long	0x1042FCC8
+	.long	0x11B0BCC8
+	.long	0x11D0C4C8
+	.long	0x11F0CCC8
+
+	vxor	0,0,20
+	vxor	1,1,27
+	vxor	2,2,26
+	vperm	5,22,28,19
+	vperm	6,22,28,18
+
+	.long	0x10E044C8
+	.long	0x12855CC8
+	.long	0x134654C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+
+	vsldoi	6,0,0,8
+	.long	0x12B68CC8
+	.long	0x137C4CC8
+	.long	0x100044C8
+
+	vxor	20,20,13
+	vxor	26,26,15
+	vxor	2,2,3
+	vxor	21,21,14
+	vxor	2,2,6
+	vxor	27,27,21
+	vxor	2,2,0
+	bge	.Loop_4x
+
+.Ltail_4x:
+	.long	0x1002ECC8
+	.long	0x1022F4C8
+	.long	0x1042FCC8
+
+	vxor	0,0,20
+	vxor	1,1,27
+
+	.long	0x10E044C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	2,2,26
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+
+	vsldoi	6,0,0,8
+	.long	0x100044C8
+	vxor	6,6,2
+	vxor	0,0,6
+
+	addic.	6,6,4
+	beq	.Ldone_4x
+
+	.long	0x7C602E99
+	cmpldi	6,2
+	li	6,-4
+	blt	.Lone
+	.long	0x7E082E99
+	beq	.Ltwo
+
+.Lthree:
+	.long	0x7EC92E99
+	vperm	3,3,3,12
+	vperm	16,16,16,12
+	vperm	22,22,22,12
+
+	vxor	2,3,0
+	vor	29,23,23
+	vor	30, 24, 24
+	vor	31,25,25
+
+	vperm	5,16,22,19
+	vperm	6,16,22,18
+	.long	0x12B08CC8
+	.long	0x13764CC8
+	.long	0x12855CC8
+	.long	0x134654C8
+
+	vxor	27,27,21
+	b	.Ltail_4x
+
+.align	4
+.Ltwo:
+	vperm	3,3,3,12
+	vperm	16,16,16,12
+
+	vxor	2,3,0
+	vperm	5,4,16,19
+	vperm	6,4,16,18
+
+	vsldoi	29,4,17,8
+	vor	30, 17, 17
+	vsldoi	31,17,4,8
+
+	.long	0x12855CC8
+	.long	0x13704CC8
+	.long	0x134654C8
+
+	b	.Ltail_4x
+
+.align	4
+.Lone:
+	vperm	3,3,3,12
+
+	vsldoi	29,4,9,8
+	vor	30, 9, 9
+	vsldoi	31,9,4,8
+
+	vxor	2,3,0
+	vxor	20,20,20
+	vxor	27,27,27
+	vxor	26,26,26
+
+	b	.Ltail_4x
+
+.Ldone_4x:
+	vperm	0,0,0,12
+	.long	0x7C001F99
+
+	li	10,63
+	li	11,79
+	or	12,12,12
+	lvx	20,10,1
+	addi	10,10,32
+	lvx	21,11,1
+	addi	11,11,32
+	lvx	22,10,1
+	addi	10,10,32
+	lvx	23,11,1
+	addi	11,11,32
+	lvx	24,10,1
+	addi	10,10,32
+	lvx	25,11,1
+	addi	11,11,32
+	lvx	26,10,1
+	addi	10,10,32
+	lvx	27,11,1
+	addi	11,11,32
+	lvx	28,10,1
+	addi	10,10,32
+	lvx	29,11,1
+	addi	11,11,32
+	lvx	30,10,1
+	lvx	31,11,1
+	addi	1,1,256
+	blr	
+.long	0
+.byte	0,12,0x04,0,0x80,0,4,0
+.long	0
+.size	gcm_ghash_p8,.-gcm_ghash_p8
+
+.byte	71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
diff --git a/third_party/boringssl/linux-x86/crypto/chacha/chacha-x86.S b/third_party/boringssl/linux-x86/crypto/chacha/chacha-x86.S
new file mode 100644
index 0000000..519081b
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/chacha/chacha-x86.S
@@ -0,0 +1,968 @@
+#if defined(__i386__)
+.text
+.globl	ChaCha20_ctr32
+.hidden	ChaCha20_ctr32
+.type	ChaCha20_ctr32,@function
+.align	16
+ChaCha20_ctr32:
+.L_ChaCha20_ctr32_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	xorl	%eax,%eax
+	cmpl	28(%esp),%eax
+	je	.L000no_data
+	call	.Lpic_point
+.Lpic_point:
+	popl	%eax
+	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
+	testl	$16777216,(%ebp)
+	jz	.L001x86
+	testl	$512,4(%ebp)
+	jz	.L001x86
+	jmp	.Lssse3_shortcut
+.L001x86:
+	movl	32(%esp),%esi
+	movl	36(%esp),%edi
+	subl	$132,%esp
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,80(%esp)
+	movl	%ebx,84(%esp)
+	movl	%ecx,88(%esp)
+	movl	%edx,92(%esp)
+	movl	16(%esi),%eax
+	movl	20(%esi),%ebx
+	movl	24(%esi),%ecx
+	movl	28(%esi),%edx
+	movl	%eax,96(%esp)
+	movl	%ebx,100(%esp)
+	movl	%ecx,104(%esp)
+	movl	%edx,108(%esp)
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	subl	$1,%eax
+	movl	%eax,112(%esp)
+	movl	%ebx,116(%esp)
+	movl	%ecx,120(%esp)
+	movl	%edx,124(%esp)
+	jmp	.L002entry
+.align	16
+.L003outer_loop:
+	movl	%ebx,156(%esp)
+	movl	%eax,152(%esp)
+	movl	%ecx,160(%esp)
+.L002entry:
+	movl	$1634760805,%eax
+	movl	$857760878,4(%esp)
+	movl	$2036477234,8(%esp)
+	movl	$1797285236,12(%esp)
+	movl	84(%esp),%ebx
+	movl	88(%esp),%ebp
+	movl	104(%esp),%ecx
+	movl	108(%esp),%esi
+	movl	116(%esp),%edx
+	movl	120(%esp),%edi
+	movl	%ebx,20(%esp)
+	movl	%ebp,24(%esp)
+	movl	%ecx,40(%esp)
+	movl	%esi,44(%esp)
+	movl	%edx,52(%esp)
+	movl	%edi,56(%esp)
+	movl	92(%esp),%ebx
+	movl	124(%esp),%edi
+	movl	112(%esp),%edx
+	movl	80(%esp),%ebp
+	movl	96(%esp),%ecx
+	movl	100(%esp),%esi
+	addl	$1,%edx
+	movl	%ebx,28(%esp)
+	movl	%edi,60(%esp)
+	movl	%edx,112(%esp)
+	movl	$10,%ebx
+	jmp	.L004loop
+.align	16
+.L004loop:
+	addl	%ebp,%eax
+	movl	%ebx,128(%esp)
+	movl	%ebp,%ebx
+	xorl	%eax,%edx
+	roll	$16,%edx
+	addl	%edx,%ecx
+	xorl	%ecx,%ebx
+	movl	52(%esp),%edi
+	roll	$12,%ebx
+	movl	20(%esp),%ebp
+	addl	%ebx,%eax
+	xorl	%eax,%edx
+	movl	%eax,(%esp)
+	roll	$8,%edx
+	movl	4(%esp),%eax
+	addl	%edx,%ecx
+	movl	%edx,48(%esp)
+	xorl	%ecx,%ebx
+	addl	%ebp,%eax
+	roll	$7,%ebx
+	xorl	%eax,%edi
+	movl	%ecx,32(%esp)
+	roll	$16,%edi
+	movl	%ebx,16(%esp)
+	addl	%edi,%esi
+	movl	40(%esp),%ecx
+	xorl	%esi,%ebp
+	movl	56(%esp),%edx
+	roll	$12,%ebp
+	movl	24(%esp),%ebx
+	addl	%ebp,%eax
+	xorl	%eax,%edi
+	movl	%eax,4(%esp)
+	roll	$8,%edi
+	movl	8(%esp),%eax
+	addl	%edi,%esi
+	movl	%edi,52(%esp)
+	xorl	%esi,%ebp
+	addl	%ebx,%eax
+	roll	$7,%ebp
+	xorl	%eax,%edx
+	movl	%esi,36(%esp)
+	roll	$16,%edx
+	movl	%ebp,20(%esp)
+	addl	%edx,%ecx
+	movl	44(%esp),%esi
+	xorl	%ecx,%ebx
+	movl	60(%esp),%edi
+	roll	$12,%ebx
+	movl	28(%esp),%ebp
+	addl	%ebx,%eax
+	xorl	%eax,%edx
+	movl	%eax,8(%esp)
+	roll	$8,%edx
+	movl	12(%esp),%eax
+	addl	%edx,%ecx
+	movl	%edx,56(%esp)
+	xorl	%ecx,%ebx
+	addl	%ebp,%eax
+	roll	$7,%ebx
+	xorl	%eax,%edi
+	roll	$16,%edi
+	movl	%ebx,24(%esp)
+	addl	%edi,%esi
+	xorl	%esi,%ebp
+	roll	$12,%ebp
+	movl	20(%esp),%ebx
+	addl	%ebp,%eax
+	xorl	%eax,%edi
+	movl	%eax,12(%esp)
+	roll	$8,%edi
+	movl	(%esp),%eax
+	addl	%edi,%esi
+	movl	%edi,%edx
+	xorl	%esi,%ebp
+	addl	%ebx,%eax
+	roll	$7,%ebp
+	xorl	%eax,%edx
+	roll	$16,%edx
+	movl	%ebp,28(%esp)
+	addl	%edx,%ecx
+	xorl	%ecx,%ebx
+	movl	48(%esp),%edi
+	roll	$12,%ebx
+	movl	24(%esp),%ebp
+	addl	%ebx,%eax
+	xorl	%eax,%edx
+	movl	%eax,(%esp)
+	roll	$8,%edx
+	movl	4(%esp),%eax
+	addl	%edx,%ecx
+	movl	%edx,60(%esp)
+	xorl	%ecx,%ebx
+	addl	%ebp,%eax
+	roll	$7,%ebx
+	xorl	%eax,%edi
+	movl	%ecx,40(%esp)
+	roll	$16,%edi
+	movl	%ebx,20(%esp)
+	addl	%edi,%esi
+	movl	32(%esp),%ecx
+	xorl	%esi,%ebp
+	movl	52(%esp),%edx
+	roll	$12,%ebp
+	movl	28(%esp),%ebx
+	addl	%ebp,%eax
+	xorl	%eax,%edi
+	movl	%eax,4(%esp)
+	roll	$8,%edi
+	movl	8(%esp),%eax
+	addl	%edi,%esi
+	movl	%edi,48(%esp)
+	xorl	%esi,%ebp
+	addl	%ebx,%eax
+	roll	$7,%ebp
+	xorl	%eax,%edx
+	movl	%esi,44(%esp)
+	roll	$16,%edx
+	movl	%ebp,24(%esp)
+	addl	%edx,%ecx
+	movl	36(%esp),%esi
+	xorl	%ecx,%ebx
+	movl	56(%esp),%edi
+	roll	$12,%ebx
+	movl	16(%esp),%ebp
+	addl	%ebx,%eax
+	xorl	%eax,%edx
+	movl	%eax,8(%esp)
+	roll	$8,%edx
+	movl	12(%esp),%eax
+	addl	%edx,%ecx
+	movl	%edx,52(%esp)
+	xorl	%ecx,%ebx
+	addl	%ebp,%eax
+	roll	$7,%ebx
+	xorl	%eax,%edi
+	roll	$16,%edi
+	movl	%ebx,28(%esp)
+	addl	%edi,%esi
+	xorl	%esi,%ebp
+	movl	48(%esp),%edx
+	roll	$12,%ebp
+	movl	128(%esp),%ebx
+	addl	%ebp,%eax
+	xorl	%eax,%edi
+	movl	%eax,12(%esp)
+	roll	$8,%edi
+	movl	(%esp),%eax
+	addl	%edi,%esi
+	movl	%edi,56(%esp)
+	xorl	%esi,%ebp
+	roll	$7,%ebp
+	decl	%ebx
+	jnz	.L004loop
+	movl	160(%esp),%ebx
+	addl	$1634760805,%eax
+	addl	80(%esp),%ebp
+	addl	96(%esp),%ecx
+	addl	100(%esp),%esi
+	cmpl	$64,%ebx
+	jb	.L005tail
+	movl	156(%esp),%ebx
+	addl	112(%esp),%edx
+	addl	120(%esp),%edi
+	xorl	(%ebx),%eax
+	xorl	16(%ebx),%ebp
+	movl	%eax,(%esp)
+	movl	152(%esp),%eax
+	xorl	32(%ebx),%ecx
+	xorl	36(%ebx),%esi
+	xorl	48(%ebx),%edx
+	xorl	56(%ebx),%edi
+	movl	%ebp,16(%eax)
+	movl	%ecx,32(%eax)
+	movl	%esi,36(%eax)
+	movl	%edx,48(%eax)
+	movl	%edi,56(%eax)
+	movl	4(%esp),%ebp
+	movl	8(%esp),%ecx
+	movl	12(%esp),%esi
+	movl	20(%esp),%edx
+	movl	24(%esp),%edi
+	addl	$857760878,%ebp
+	addl	$2036477234,%ecx
+	addl	$1797285236,%esi
+	addl	84(%esp),%edx
+	addl	88(%esp),%edi
+	xorl	4(%ebx),%ebp
+	xorl	8(%ebx),%ecx
+	xorl	12(%ebx),%esi
+	xorl	20(%ebx),%edx
+	xorl	24(%ebx),%edi
+	movl	%ebp,4(%eax)
+	movl	%ecx,8(%eax)
+	movl	%esi,12(%eax)
+	movl	%edx,20(%eax)
+	movl	%edi,24(%eax)
+	movl	28(%esp),%ebp
+	movl	40(%esp),%ecx
+	movl	44(%esp),%esi
+	movl	52(%esp),%edx
+	movl	60(%esp),%edi
+	addl	92(%esp),%ebp
+	addl	104(%esp),%ecx
+	addl	108(%esp),%esi
+	addl	116(%esp),%edx
+	addl	124(%esp),%edi
+	xorl	28(%ebx),%ebp
+	xorl	40(%ebx),%ecx
+	xorl	44(%ebx),%esi
+	xorl	52(%ebx),%edx
+	xorl	60(%ebx),%edi
+	leal	64(%ebx),%ebx
+	movl	%ebp,28(%eax)
+	movl	(%esp),%ebp
+	movl	%ecx,40(%eax)
+	movl	160(%esp),%ecx
+	movl	%esi,44(%eax)
+	movl	%edx,52(%eax)
+	movl	%edi,60(%eax)
+	movl	%ebp,(%eax)
+	leal	64(%eax),%eax
+	subl	$64,%ecx
+	jnz	.L003outer_loop
+	jmp	.L006done
+.L005tail:
+	addl	112(%esp),%edx
+	addl	120(%esp),%edi
+	movl	%eax,(%esp)
+	movl	%ebp,16(%esp)
+	movl	%ecx,32(%esp)
+	movl	%esi,36(%esp)
+	movl	%edx,48(%esp)
+	movl	%edi,56(%esp)
+	movl	4(%esp),%ebp
+	movl	8(%esp),%ecx
+	movl	12(%esp),%esi
+	movl	20(%esp),%edx
+	movl	24(%esp),%edi
+	addl	$857760878,%ebp
+	addl	$2036477234,%ecx
+	addl	$1797285236,%esi
+	addl	84(%esp),%edx
+	addl	88(%esp),%edi
+	movl	%ebp,4(%esp)
+	movl	%ecx,8(%esp)
+	movl	%esi,12(%esp)
+	movl	%edx,20(%esp)
+	movl	%edi,24(%esp)
+	movl	28(%esp),%ebp
+	movl	40(%esp),%ecx
+	movl	44(%esp),%esi
+	movl	52(%esp),%edx
+	movl	60(%esp),%edi
+	addl	92(%esp),%ebp
+	addl	104(%esp),%ecx
+	addl	108(%esp),%esi
+	addl	116(%esp),%edx
+	addl	124(%esp),%edi
+	movl	%ebp,28(%esp)
+	movl	156(%esp),%ebp
+	movl	%ecx,40(%esp)
+	movl	152(%esp),%ecx
+	movl	%esi,44(%esp)
+	xorl	%esi,%esi
+	movl	%edx,52(%esp)
+	movl	%edi,60(%esp)
+	xorl	%eax,%eax
+	xorl	%edx,%edx
+.L007tail_loop:
+	movb	(%esi,%ebp,1),%al
+	movb	(%esp,%esi,1),%dl
+	leal	1(%esi),%esi
+	xorb	%dl,%al
+	movb	%al,-1(%ecx,%esi,1)
+	decl	%ebx
+	jnz	.L007tail_loop
+.L006done:
+	addl	$132,%esp
+.L000no_data:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
+.globl	ChaCha20_ssse3
+.hidden	ChaCha20_ssse3
+.type	ChaCha20_ssse3,@function
+.align	16
+ChaCha20_ssse3:
+.L_ChaCha20_ssse3_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+.Lssse3_shortcut:
+	movl	20(%esp),%edi
+	movl	24(%esp),%esi
+	movl	28(%esp),%ecx
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebx
+	movl	%esp,%ebp
+	subl	$524,%esp
+	andl	$-64,%esp
+	movl	%ebp,512(%esp)
+	leal	.Lssse3_data-.Lpic_point(%eax),%eax
+	movdqu	(%ebx),%xmm3
+	cmpl	$256,%ecx
+	jb	.L0081x
+	movl	%edx,516(%esp)
+	movl	%ebx,520(%esp)
+	subl	$256,%ecx
+	leal	384(%esp),%ebp
+	movdqu	(%edx),%xmm7
+	pshufd	$0,%xmm3,%xmm0
+	pshufd	$85,%xmm3,%xmm1
+	pshufd	$170,%xmm3,%xmm2
+	pshufd	$255,%xmm3,%xmm3
+	paddd	48(%eax),%xmm0
+	pshufd	$0,%xmm7,%xmm4
+	pshufd	$85,%xmm7,%xmm5
+	psubd	64(%eax),%xmm0
+	pshufd	$170,%xmm7,%xmm6
+	pshufd	$255,%xmm7,%xmm7
+	movdqa	%xmm0,64(%ebp)
+	movdqa	%xmm1,80(%ebp)
+	movdqa	%xmm2,96(%ebp)
+	movdqa	%xmm3,112(%ebp)
+	movdqu	16(%edx),%xmm3
+	movdqa	%xmm4,-64(%ebp)
+	movdqa	%xmm5,-48(%ebp)
+	movdqa	%xmm6,-32(%ebp)
+	movdqa	%xmm7,-16(%ebp)
+	movdqa	32(%eax),%xmm7
+	leal	128(%esp),%ebx
+	pshufd	$0,%xmm3,%xmm0
+	pshufd	$85,%xmm3,%xmm1
+	pshufd	$170,%xmm3,%xmm2
+	pshufd	$255,%xmm3,%xmm3
+	pshufd	$0,%xmm7,%xmm4
+	pshufd	$85,%xmm7,%xmm5
+	pshufd	$170,%xmm7,%xmm6
+	pshufd	$255,%xmm7,%xmm7
+	movdqa	%xmm0,(%ebp)
+	movdqa	%xmm1,16(%ebp)
+	movdqa	%xmm2,32(%ebp)
+	movdqa	%xmm3,48(%ebp)
+	movdqa	%xmm4,-128(%ebp)
+	movdqa	%xmm5,-112(%ebp)
+	movdqa	%xmm6,-96(%ebp)
+	movdqa	%xmm7,-80(%ebp)
+	leal	128(%esi),%esi
+	leal	128(%edi),%edi
+	jmp	.L009outer_loop
+.align	16
+.L009outer_loop:
+	movdqa	-112(%ebp),%xmm1
+	movdqa	-96(%ebp),%xmm2
+	movdqa	-80(%ebp),%xmm3
+	movdqa	-48(%ebp),%xmm5
+	movdqa	-32(%ebp),%xmm6
+	movdqa	-16(%ebp),%xmm7
+	movdqa	%xmm1,-112(%ebx)
+	movdqa	%xmm2,-96(%ebx)
+	movdqa	%xmm3,-80(%ebx)
+	movdqa	%xmm5,-48(%ebx)
+	movdqa	%xmm6,-32(%ebx)
+	movdqa	%xmm7,-16(%ebx)
+	movdqa	32(%ebp),%xmm2
+	movdqa	48(%ebp),%xmm3
+	movdqa	64(%ebp),%xmm4
+	movdqa	80(%ebp),%xmm5
+	movdqa	96(%ebp),%xmm6
+	movdqa	112(%ebp),%xmm7
+	paddd	64(%eax),%xmm4
+	movdqa	%xmm2,32(%ebx)
+	movdqa	%xmm3,48(%ebx)
+	movdqa	%xmm4,64(%ebx)
+	movdqa	%xmm5,80(%ebx)
+	movdqa	%xmm6,96(%ebx)
+	movdqa	%xmm7,112(%ebx)
+	movdqa	%xmm4,64(%ebp)
+	movdqa	-128(%ebp),%xmm0
+	movdqa	%xmm4,%xmm6
+	movdqa	-64(%ebp),%xmm3
+	movdqa	(%ebp),%xmm4
+	movdqa	16(%ebp),%xmm5
+	movl	$10,%edx
+	nop
+.align	16
+.L010loop:
+	paddd	%xmm3,%xmm0
+	movdqa	%xmm3,%xmm2
+	pxor	%xmm0,%xmm6
+	pshufb	(%eax),%xmm6
+	paddd	%xmm6,%xmm4
+	pxor	%xmm4,%xmm2
+	movdqa	-48(%ebx),%xmm3
+	movdqa	%xmm2,%xmm1
+	pslld	$12,%xmm2
+	psrld	$20,%xmm1
+	por	%xmm1,%xmm2
+	movdqa	-112(%ebx),%xmm1
+	paddd	%xmm2,%xmm0
+	movdqa	80(%ebx),%xmm7
+	pxor	%xmm0,%xmm6
+	movdqa	%xmm0,-128(%ebx)
+	pshufb	16(%eax),%xmm6
+	paddd	%xmm6,%xmm4
+	movdqa	%xmm6,64(%ebx)
+	pxor	%xmm4,%xmm2
+	paddd	%xmm3,%xmm1
+	movdqa	%xmm2,%xmm0
+	pslld	$7,%xmm2
+	psrld	$25,%xmm0
+	pxor	%xmm1,%xmm7
+	por	%xmm0,%xmm2
+	movdqa	%xmm4,(%ebx)
+	pshufb	(%eax),%xmm7
+	movdqa	%xmm2,-64(%ebx)
+	paddd	%xmm7,%xmm5
+	movdqa	32(%ebx),%xmm4
+	pxor	%xmm5,%xmm3
+	movdqa	-32(%ebx),%xmm2
+	movdqa	%xmm3,%xmm0
+	pslld	$12,%xmm3
+	psrld	$20,%xmm0
+	por	%xmm0,%xmm3
+	movdqa	-96(%ebx),%xmm0
+	paddd	%xmm3,%xmm1
+	movdqa	96(%ebx),%xmm6
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm1,-112(%ebx)
+	pshufb	16(%eax),%xmm7
+	paddd	%xmm7,%xmm5
+	movdqa	%xmm7,80(%ebx)
+	pxor	%xmm5,%xmm3
+	paddd	%xmm2,%xmm0
+	movdqa	%xmm3,%xmm1
+	pslld	$7,%xmm3
+	psrld	$25,%xmm1
+	pxor	%xmm0,%xmm6
+	por	%xmm1,%xmm3
+	movdqa	%xmm5,16(%ebx)
+	pshufb	(%eax),%xmm6
+	movdqa	%xmm3,-48(%ebx)
+	paddd	%xmm6,%xmm4
+	movdqa	48(%ebx),%xmm5
+	pxor	%xmm4,%xmm2
+	movdqa	-16(%ebx),%xmm3
+	movdqa	%xmm2,%xmm1
+	pslld	$12,%xmm2
+	psrld	$20,%xmm1
+	por	%xmm1,%xmm2
+	movdqa	-80(%ebx),%xmm1
+	paddd	%xmm2,%xmm0
+	movdqa	112(%ebx),%xmm7
+	pxor	%xmm0,%xmm6
+	movdqa	%xmm0,-96(%ebx)
+	pshufb	16(%eax),%xmm6
+	paddd	%xmm6,%xmm4
+	movdqa	%xmm6,96(%ebx)
+	pxor	%xmm4,%xmm2
+	paddd	%xmm3,%xmm1
+	movdqa	%xmm2,%xmm0
+	pslld	$7,%xmm2
+	psrld	$25,%xmm0
+	pxor	%xmm1,%xmm7
+	por	%xmm0,%xmm2
+	pshufb	(%eax),%xmm7
+	movdqa	%xmm2,-32(%ebx)
+	paddd	%xmm7,%xmm5
+	pxor	%xmm5,%xmm3
+	movdqa	-48(%ebx),%xmm2
+	movdqa	%xmm3,%xmm0
+	pslld	$12,%xmm3
+	psrld	$20,%xmm0
+	por	%xmm0,%xmm3
+	movdqa	-128(%ebx),%xmm0
+	paddd	%xmm3,%xmm1
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm1,-80(%ebx)
+	pshufb	16(%eax),%xmm7
+	paddd	%xmm7,%xmm5
+	movdqa	%xmm7,%xmm6
+	pxor	%xmm5,%xmm3
+	paddd	%xmm2,%xmm0
+	movdqa	%xmm3,%xmm1
+	pslld	$7,%xmm3
+	psrld	$25,%xmm1
+	pxor	%xmm0,%xmm6
+	por	%xmm1,%xmm3
+	pshufb	(%eax),%xmm6
+	movdqa	%xmm3,-16(%ebx)
+	paddd	%xmm6,%xmm4
+	pxor	%xmm4,%xmm2
+	movdqa	-32(%ebx),%xmm3
+	movdqa	%xmm2,%xmm1
+	pslld	$12,%xmm2
+	psrld	$20,%xmm1
+	por	%xmm1,%xmm2
+	movdqa	-112(%ebx),%xmm1
+	paddd	%xmm2,%xmm0
+	movdqa	64(%ebx),%xmm7
+	pxor	%xmm0,%xmm6
+	movdqa	%xmm0,-128(%ebx)
+	pshufb	16(%eax),%xmm6
+	paddd	%xmm6,%xmm4
+	movdqa	%xmm6,112(%ebx)
+	pxor	%xmm4,%xmm2
+	paddd	%xmm3,%xmm1
+	movdqa	%xmm2,%xmm0
+	pslld	$7,%xmm2
+	psrld	$25,%xmm0
+	pxor	%xmm1,%xmm7
+	por	%xmm0,%xmm2
+	movdqa	%xmm4,32(%ebx)
+	pshufb	(%eax),%xmm7
+	movdqa	%xmm2,-48(%ebx)
+	paddd	%xmm7,%xmm5
+	movdqa	(%ebx),%xmm4
+	pxor	%xmm5,%xmm3
+	movdqa	-16(%ebx),%xmm2
+	movdqa	%xmm3,%xmm0
+	pslld	$12,%xmm3
+	psrld	$20,%xmm0
+	por	%xmm0,%xmm3
+	movdqa	-96(%ebx),%xmm0
+	paddd	%xmm3,%xmm1
+	movdqa	80(%ebx),%xmm6
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm1,-112(%ebx)
+	pshufb	16(%eax),%xmm7
+	paddd	%xmm7,%xmm5
+	movdqa	%xmm7,64(%ebx)
+	pxor	%xmm5,%xmm3
+	paddd	%xmm2,%xmm0
+	movdqa	%xmm3,%xmm1
+	pslld	$7,%xmm3
+	psrld	$25,%xmm1
+	pxor	%xmm0,%xmm6
+	por	%xmm1,%xmm3
+	movdqa	%xmm5,48(%ebx)
+	pshufb	(%eax),%xmm6
+	movdqa	%xmm3,-32(%ebx)
+	paddd	%xmm6,%xmm4
+	movdqa	16(%ebx),%xmm5
+	pxor	%xmm4,%xmm2
+	movdqa	-64(%ebx),%xmm3
+	movdqa	%xmm2,%xmm1
+	pslld	$12,%xmm2
+	psrld	$20,%xmm1
+	por	%xmm1,%xmm2
+	movdqa	-80(%ebx),%xmm1
+	paddd	%xmm2,%xmm0
+	movdqa	96(%ebx),%xmm7
+	pxor	%xmm0,%xmm6
+	movdqa	%xmm0,-96(%ebx)
+	pshufb	16(%eax),%xmm6
+	paddd	%xmm6,%xmm4
+	movdqa	%xmm6,80(%ebx)
+	pxor	%xmm4,%xmm2
+	paddd	%xmm3,%xmm1
+	movdqa	%xmm2,%xmm0
+	pslld	$7,%xmm2
+	psrld	$25,%xmm0
+	pxor	%xmm1,%xmm7
+	por	%xmm0,%xmm2
+	pshufb	(%eax),%xmm7
+	movdqa	%xmm2,-16(%ebx)
+	paddd	%xmm7,%xmm5
+	pxor	%xmm5,%xmm3
+	movdqa	%xmm3,%xmm0
+	pslld	$12,%xmm3
+	psrld	$20,%xmm0
+	por	%xmm0,%xmm3
+	movdqa	-128(%ebx),%xmm0
+	paddd	%xmm3,%xmm1
+	movdqa	64(%ebx),%xmm6
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm1,-80(%ebx)
+	pshufb	16(%eax),%xmm7
+	paddd	%xmm7,%xmm5
+	movdqa	%xmm7,96(%ebx)
+	pxor	%xmm5,%xmm3
+	movdqa	%xmm3,%xmm1
+	pslld	$7,%xmm3
+	psrld	$25,%xmm1
+	por	%xmm1,%xmm3
+	decl	%edx
+	jnz	.L010loop
+	movdqa	%xmm3,-64(%ebx)
+	movdqa	%xmm4,(%ebx)
+	movdqa	%xmm5,16(%ebx)
+	movdqa	%xmm6,64(%ebx)
+	movdqa	%xmm7,96(%ebx)
+	movdqa	-112(%ebx),%xmm1
+	movdqa	-96(%ebx),%xmm2
+	movdqa	-80(%ebx),%xmm3
+	paddd	-128(%ebp),%xmm0
+	paddd	-112(%ebp),%xmm1
+	paddd	-96(%ebp),%xmm2
+	paddd	-80(%ebp),%xmm3
+	movdqa	%xmm0,%xmm6
+	punpckldq	%xmm1,%xmm0
+	movdqa	%xmm2,%xmm7
+	punpckldq	%xmm3,%xmm2
+	punpckhdq	%xmm1,%xmm6
+	punpckhdq	%xmm3,%xmm7
+	movdqa	%xmm0,%xmm1
+	punpcklqdq	%xmm2,%xmm0
+	movdqa	%xmm6,%xmm3
+	punpcklqdq	%xmm7,%xmm6
+	punpckhqdq	%xmm2,%xmm1
+	punpckhqdq	%xmm7,%xmm3
+	movdqu	-128(%esi),%xmm4
+	movdqu	-64(%esi),%xmm5
+	movdqu	(%esi),%xmm2
+	movdqu	64(%esi),%xmm7
+	leal	16(%esi),%esi
+	pxor	%xmm0,%xmm4
+	movdqa	-64(%ebx),%xmm0
+	pxor	%xmm1,%xmm5
+	movdqa	-48(%ebx),%xmm1
+	pxor	%xmm2,%xmm6
+	movdqa	-32(%ebx),%xmm2
+	pxor	%xmm3,%xmm7
+	movdqa	-16(%ebx),%xmm3
+	movdqu	%xmm4,-128(%edi)
+	movdqu	%xmm5,-64(%edi)
+	movdqu	%xmm6,(%edi)
+	movdqu	%xmm7,64(%edi)
+	leal	16(%edi),%edi
+	paddd	-64(%ebp),%xmm0
+	paddd	-48(%ebp),%xmm1
+	paddd	-32(%ebp),%xmm2
+	paddd	-16(%ebp),%xmm3
+	movdqa	%xmm0,%xmm6
+	punpckldq	%xmm1,%xmm0
+	movdqa	%xmm2,%xmm7
+	punpckldq	%xmm3,%xmm2
+	punpckhdq	%xmm1,%xmm6
+	punpckhdq	%xmm3,%xmm7
+	movdqa	%xmm0,%xmm1
+	punpcklqdq	%xmm2,%xmm0
+	movdqa	%xmm6,%xmm3
+	punpcklqdq	%xmm7,%xmm6
+	punpckhqdq	%xmm2,%xmm1
+	punpckhqdq	%xmm7,%xmm3
+	movdqu	-128(%esi),%xmm4
+	movdqu	-64(%esi),%xmm5
+	movdqu	(%esi),%xmm2
+	movdqu	64(%esi),%xmm7
+	leal	16(%esi),%esi
+	pxor	%xmm0,%xmm4
+	movdqa	(%ebx),%xmm0
+	pxor	%xmm1,%xmm5
+	movdqa	16(%ebx),%xmm1
+	pxor	%xmm2,%xmm6
+	movdqa	32(%ebx),%xmm2
+	pxor	%xmm3,%xmm7
+	movdqa	48(%ebx),%xmm3
+	movdqu	%xmm4,-128(%edi)
+	movdqu	%xmm5,-64(%edi)
+	movdqu	%xmm6,(%edi)
+	movdqu	%xmm7,64(%edi)
+	leal	16(%edi),%edi
+	paddd	(%ebp),%xmm0
+	paddd	16(%ebp),%xmm1
+	paddd	32(%ebp),%xmm2
+	paddd	48(%ebp),%xmm3
+	movdqa	%xmm0,%xmm6
+	punpckldq	%xmm1,%xmm0
+	movdqa	%xmm2,%xmm7
+	punpckldq	%xmm3,%xmm2
+	punpckhdq	%xmm1,%xmm6
+	punpckhdq	%xmm3,%xmm7
+	movdqa	%xmm0,%xmm1
+	punpcklqdq	%xmm2,%xmm0
+	movdqa	%xmm6,%xmm3
+	punpcklqdq	%xmm7,%xmm6
+	punpckhqdq	%xmm2,%xmm1
+	punpckhqdq	%xmm7,%xmm3
+	movdqu	-128(%esi),%xmm4
+	movdqu	-64(%esi),%xmm5
+	movdqu	(%esi),%xmm2
+	movdqu	64(%esi),%xmm7
+	leal	16(%esi),%esi
+	pxor	%xmm0,%xmm4
+	movdqa	64(%ebx),%xmm0
+	pxor	%xmm1,%xmm5
+	movdqa	80(%ebx),%xmm1
+	pxor	%xmm2,%xmm6
+	movdqa	96(%ebx),%xmm2
+	pxor	%xmm3,%xmm7
+	movdqa	112(%ebx),%xmm3
+	movdqu	%xmm4,-128(%edi)
+	movdqu	%xmm5,-64(%edi)
+	movdqu	%xmm6,(%edi)
+	movdqu	%xmm7,64(%edi)
+	leal	16(%edi),%edi
+	paddd	64(%ebp),%xmm0
+	paddd	80(%ebp),%xmm1
+	paddd	96(%ebp),%xmm2
+	paddd	112(%ebp),%xmm3
+	movdqa	%xmm0,%xmm6
+	punpckldq	%xmm1,%xmm0
+	movdqa	%xmm2,%xmm7
+	punpckldq	%xmm3,%xmm2
+	punpckhdq	%xmm1,%xmm6
+	punpckhdq	%xmm3,%xmm7
+	movdqa	%xmm0,%xmm1
+	punpcklqdq	%xmm2,%xmm0
+	movdqa	%xmm6,%xmm3
+	punpcklqdq	%xmm7,%xmm6
+	punpckhqdq	%xmm2,%xmm1
+	punpckhqdq	%xmm7,%xmm3
+	movdqu	-128(%esi),%xmm4
+	movdqu	-64(%esi),%xmm5
+	movdqu	(%esi),%xmm2
+	movdqu	64(%esi),%xmm7
+	leal	208(%esi),%esi
+	pxor	%xmm0,%xmm4
+	pxor	%xmm1,%xmm5
+	pxor	%xmm2,%xmm6
+	pxor	%xmm3,%xmm7
+	movdqu	%xmm4,-128(%edi)
+	movdqu	%xmm5,-64(%edi)
+	movdqu	%xmm6,(%edi)
+	movdqu	%xmm7,64(%edi)
+	leal	208(%edi),%edi
+	subl	$256,%ecx
+	jnc	.L009outer_loop
+	addl	$256,%ecx
+	jz	.L011done
+	movl	520(%esp),%ebx
+	leal	-128(%esi),%esi
+	movl	516(%esp),%edx
+	leal	-128(%edi),%edi
+	movd	64(%ebp),%xmm2
+	movdqu	(%ebx),%xmm3
+	paddd	96(%eax),%xmm2
+	pand	112(%eax),%xmm3
+	por	%xmm2,%xmm3
+.L0081x:
+	movdqa	32(%eax),%xmm0
+	movdqu	(%edx),%xmm1
+	movdqu	16(%edx),%xmm2
+	movdqa	(%eax),%xmm6
+	movdqa	16(%eax),%xmm7
+	movl	%ebp,48(%esp)
+	movdqa	%xmm0,(%esp)
+	movdqa	%xmm1,16(%esp)
+	movdqa	%xmm2,32(%esp)
+	movdqa	%xmm3,48(%esp)
+	movl	$10,%edx
+	jmp	.L012loop1x
+.align	16
+.L013outer1x:
+	movdqa	80(%eax),%xmm3
+	movdqa	(%esp),%xmm0
+	movdqa	16(%esp),%xmm1
+	movdqa	32(%esp),%xmm2
+	paddd	48(%esp),%xmm3
+	movl	$10,%edx
+	movdqa	%xmm3,48(%esp)
+	jmp	.L012loop1x
+.align	16
+.L012loop1x:
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,222
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,223
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$57,%xmm1,%xmm1
+	pshufd	$147,%xmm3,%xmm3
+	nop
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,222
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,223
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$147,%xmm1,%xmm1
+	pshufd	$57,%xmm3,%xmm3
+	decl	%edx
+	jnz	.L012loop1x
+	paddd	(%esp),%xmm0
+	paddd	16(%esp),%xmm1
+	paddd	32(%esp),%xmm2
+	paddd	48(%esp),%xmm3
+	cmpl	$64,%ecx
+	jb	.L014tail
+	movdqu	(%esi),%xmm4
+	movdqu	16(%esi),%xmm5
+	pxor	%xmm4,%xmm0
+	movdqu	32(%esi),%xmm4
+	pxor	%xmm5,%xmm1
+	movdqu	48(%esi),%xmm5
+	pxor	%xmm4,%xmm2
+	pxor	%xmm5,%xmm3
+	leal	64(%esi),%esi
+	movdqu	%xmm0,(%edi)
+	movdqu	%xmm1,16(%edi)
+	movdqu	%xmm2,32(%edi)
+	movdqu	%xmm3,48(%edi)
+	leal	64(%edi),%edi
+	subl	$64,%ecx
+	jnz	.L013outer1x
+	jmp	.L011done
+.L014tail:
+	movdqa	%xmm0,(%esp)
+	movdqa	%xmm1,16(%esp)
+	movdqa	%xmm2,32(%esp)
+	movdqa	%xmm3,48(%esp)
+	xorl	%eax,%eax
+	xorl	%edx,%edx
+	xorl	%ebp,%ebp
+.L015tail_loop:
+	movb	(%esp,%ebp,1),%al
+	movb	(%esi,%ebp,1),%dl
+	leal	1(%ebp),%ebp
+	xorb	%dl,%al
+	movb	%al,-1(%edi,%ebp,1)
+	decl	%ecx
+	jnz	.L015tail_loop
+.L011done:
+	movl	512(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
+.align	64
+.Lssse3_data:
+.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
+.long	1634760805,857760878,2036477234,1797285236
+.long	0,1,2,3
+.long	4,4,4,4
+.long	1,0,0,0
+.long	4,0,0,0
+.long	0,-1,-1,-1
+.align	64
+.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
+.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+.byte	114,103,62,0
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/aes-586.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/aes-586.S
new file mode 100644
index 0000000..319ed62
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/aes-586.S
@@ -0,0 +1,3256 @@
+#if defined(__i386__)
+.text
+.hidden	_x86_AES_encrypt_compact
+.type	_x86_AES_encrypt_compact,@function
+.align	16
+_x86_AES_encrypt_compact:
+	movl	%edi,20(%esp)
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+	movl	-128(%ebp),%edi
+	movl	-96(%ebp),%esi
+	movl	-64(%ebp),%edi
+	movl	-32(%ebp),%esi
+	movl	(%ebp),%edi
+	movl	32(%ebp),%esi
+	movl	64(%ebp),%edi
+	movl	96(%ebp),%esi
+.align	16
+.L000loop:
+	movl	%eax,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+
+	movl	%ebx,%esi
+	andl	$255,%esi
+	shrl	$16,%ebx
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%ch,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+
+	movl	%ecx,%esi
+	andl	$255,%esi
+	shrl	$24,%ecx
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%dh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edx
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+
+	andl	$255,%edx
+	movzbl	-128(%ebp,%edx,1),%edx
+	movzbl	%ah,%eax
+	movzbl	-128(%ebp,%eax,1),%eax
+	shll	$8,%eax
+	xorl	%eax,%edx
+	movl	4(%esp),%eax
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%edx
+	movl	8(%esp),%ebx
+	movzbl	-128(%ebp,%ecx,1),%ecx
+	shll	$24,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+
+	movl	$2155905152,%ebp
+	andl	%ecx,%ebp
+	leal	(%ecx,%ecx,1),%edi
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	andl	$4278124286,%edi
+	subl	%ebp,%esi
+	movl	%ecx,%ebp
+	andl	$454761243,%esi
+	rorl	$16,%ebp
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	xorl	%esi,%ecx
+	rorl	$24,%edi
+	xorl	%ebp,%esi
+	roll	$24,%ecx
+	xorl	%edi,%esi
+	movl	$2155905152,%ebp
+	xorl	%esi,%ecx
+	andl	%edx,%ebp
+	leal	(%edx,%edx,1),%edi
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	andl	$4278124286,%edi
+	subl	%ebp,%esi
+	movl	%edx,%ebp
+	andl	$454761243,%esi
+	rorl	$16,%ebp
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	xorl	%esi,%edx
+	rorl	$24,%edi
+	xorl	%ebp,%esi
+	roll	$24,%edx
+	xorl	%edi,%esi
+	movl	$2155905152,%ebp
+	xorl	%esi,%edx
+	andl	%eax,%ebp
+	leal	(%eax,%eax,1),%edi
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	andl	$4278124286,%edi
+	subl	%ebp,%esi
+	movl	%eax,%ebp
+	andl	$454761243,%esi
+	rorl	$16,%ebp
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	xorl	%esi,%eax
+	rorl	$24,%edi
+	xorl	%ebp,%esi
+	roll	$24,%eax
+	xorl	%edi,%esi
+	movl	$2155905152,%ebp
+	xorl	%esi,%eax
+	andl	%ebx,%ebp
+	leal	(%ebx,%ebx,1),%edi
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	andl	$4278124286,%edi
+	subl	%ebp,%esi
+	movl	%ebx,%ebp
+	andl	$454761243,%esi
+	rorl	$16,%ebp
+	xorl	%edi,%esi
+	movl	%ebx,%edi
+	xorl	%esi,%ebx
+	rorl	$24,%edi
+	xorl	%ebp,%esi
+	roll	$24,%ebx
+	xorl	%edi,%esi
+	xorl	%esi,%ebx
+	movl	20(%esp),%edi
+	movl	28(%esp),%ebp
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	cmpl	24(%esp),%edi
+	movl	%edi,20(%esp)
+	jb	.L000loop
+	movl	%eax,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+
+	movl	%ebx,%esi
+	andl	$255,%esi
+	shrl	$16,%ebx
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%ch,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+
+	movl	%ecx,%esi
+	andl	$255,%esi
+	shrl	$24,%ecx
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%dh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edx
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+
+	movl	20(%esp),%edi
+	andl	$255,%edx
+	movzbl	-128(%ebp,%edx,1),%edx
+	movzbl	%ah,%eax
+	movzbl	-128(%ebp,%eax,1),%eax
+	shll	$8,%eax
+	xorl	%eax,%edx
+	movl	4(%esp),%eax
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%edx
+	movl	8(%esp),%ebx
+	movzbl	-128(%ebp,%ecx,1),%ecx
+	shll	$24,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+
+	xorl	16(%edi),%eax
+	xorl	20(%edi),%ebx
+	xorl	24(%edi),%ecx
+	xorl	28(%edi),%edx
+	ret
+.size	_x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact
+.hidden	_sse_AES_encrypt_compact
+.type	_sse_AES_encrypt_compact,@function
+.align	16
+_sse_AES_encrypt_compact:
+	pxor	(%edi),%mm0
+	pxor	8(%edi),%mm4
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+	movl	$454761243,%eax
+	movl	%eax,8(%esp)
+	movl	%eax,12(%esp)
+	movl	-128(%ebp),%eax
+	movl	-96(%ebp),%ebx
+	movl	-64(%ebp),%ecx
+	movl	-32(%ebp),%edx
+	movl	(%ebp),%eax
+	movl	32(%ebp),%ebx
+	movl	64(%ebp),%ecx
+	movl	96(%ebp),%edx
+.align	16
+.L001loop:
+	pshufw	$8,%mm0,%mm1
+	pshufw	$13,%mm4,%mm5
+	movd	%mm1,%eax
+	movd	%mm5,%ebx
+	movl	%edi,20(%esp)
+	movzbl	%al,%esi
+	movzbl	%ah,%edx
+	pshufw	$13,%mm0,%mm2
+	movzbl	-128(%ebp,%esi,1),%ecx
+	movzbl	%bl,%edi
+	movzbl	-128(%ebp,%edx,1),%edx
+	shrl	$16,%eax
+	shll	$8,%edx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bh,%edi
+	shll	$16,%esi
+	pshufw	$8,%mm4,%mm6
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%ah,%edi
+	shll	$24,%esi
+	shrl	$16,%ebx
+	orl	%esi,%edx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bh,%edi
+	shll	$8,%esi
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%al,%edi
+	shll	$24,%esi
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bl,%edi
+	movd	%mm2,%eax
+	movd	%ecx,%mm0
+	movzbl	-128(%ebp,%edi,1),%ecx
+	movzbl	%ah,%edi
+	shll	$16,%ecx
+	movd	%mm6,%ebx
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bh,%edi
+	shll	$24,%esi
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bl,%edi
+	shll	$8,%esi
+	shrl	$16,%ebx
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%al,%edi
+	shrl	$16,%eax
+	movd	%ecx,%mm1
+	movzbl	-128(%ebp,%edi,1),%ecx
+	movzbl	%ah,%edi
+	shll	$16,%ecx
+	andl	$255,%eax
+	orl	%esi,%ecx
+	punpckldq	%mm1,%mm0
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bh,%edi
+	shll	$24,%esi
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%eax,1),%eax
+	orl	%esi,%ecx
+	shll	$16,%eax
+	movzbl	-128(%ebp,%edi,1),%esi
+	orl	%eax,%edx
+	shll	$8,%esi
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	orl	%esi,%ecx
+	orl	%ebx,%edx
+	movl	20(%esp),%edi
+	movd	%ecx,%mm4
+	movd	%edx,%mm5
+	punpckldq	%mm5,%mm4
+	addl	$16,%edi
+	cmpl	24(%esp),%edi
+	ja	.L002out
+	movq	8(%esp),%mm2
+	pxor	%mm3,%mm3
+	pxor	%mm7,%mm7
+	movq	%mm0,%mm1
+	movq	%mm4,%mm5
+	pcmpgtb	%mm0,%mm3
+	pcmpgtb	%mm4,%mm7
+	pand	%mm2,%mm3
+	pand	%mm2,%mm7
+	pshufw	$177,%mm0,%mm2
+	pshufw	$177,%mm4,%mm6
+	paddb	%mm0,%mm0
+	paddb	%mm4,%mm4
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pshufw	$177,%mm2,%mm3
+	pshufw	$177,%mm6,%mm7
+	pxor	%mm0,%mm1
+	pxor	%mm4,%mm5
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	movq	%mm3,%mm2
+	movq	%mm7,%mm6
+	pslld	$8,%mm3
+	pslld	$8,%mm7
+	psrld	$24,%mm2
+	psrld	$24,%mm6
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	movq	%mm1,%mm3
+	movq	%mm5,%mm7
+	movq	(%edi),%mm2
+	movq	8(%edi),%mm6
+	psrld	$8,%mm1
+	psrld	$8,%mm5
+	movl	-128(%ebp),%eax
+	pslld	$24,%mm3
+	pslld	$24,%mm7
+	movl	-64(%ebp),%ebx
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	movl	(%ebp),%ecx
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	movl	64(%ebp),%edx
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	jmp	.L001loop
+.align	16
+.L002out:
+	pxor	(%edi),%mm0
+	pxor	8(%edi),%mm4
+	ret
+.size	_sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact
+.hidden	_x86_AES_encrypt
+.type	_x86_AES_encrypt,@function
+.align	16
+_x86_AES_encrypt:
+	movl	%edi,20(%esp)
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+.align	16
+.L003loop:
+	movl	%eax,%esi
+	andl	$255,%esi
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%bh,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	xorl	1(%ebp,%edi,8),%esi
+	movl	%esi,4(%esp)
+
+	movl	%ebx,%esi
+	andl	$255,%esi
+	shrl	$16,%ebx
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%ch,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movl	%eax,%edi
+	shrl	$24,%edi
+	xorl	1(%ebp,%edi,8),%esi
+	movl	%esi,8(%esp)
+
+	movl	%ecx,%esi
+	andl	$255,%esi
+	shrl	$24,%ecx
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%dh,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edx
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movzbl	%bh,%edi
+	xorl	1(%ebp,%edi,8),%esi
+
+	movl	20(%esp),%edi
+	movl	(%ebp,%edx,8),%edx
+	movzbl	%ah,%eax
+	xorl	3(%ebp,%eax,8),%edx
+	movl	4(%esp),%eax
+	andl	$255,%ebx
+	xorl	2(%ebp,%ebx,8),%edx
+	movl	8(%esp),%ebx
+	xorl	1(%ebp,%ecx,8),%edx
+	movl	%esi,%ecx
+
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	cmpl	24(%esp),%edi
+	movl	%edi,20(%esp)
+	jb	.L003loop
+	movl	%eax,%esi
+	andl	$255,%esi
+	movl	2(%ebp,%esi,8),%esi
+	andl	$255,%esi
+	movzbl	%bh,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$65280,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$16711680,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movl	2(%ebp,%edi,8),%edi
+	andl	$4278190080,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+	movl	%ebx,%esi
+	andl	$255,%esi
+	shrl	$16,%ebx
+	movl	2(%ebp,%esi,8),%esi
+	andl	$255,%esi
+	movzbl	%ch,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$65280,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$16711680,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$24,%edi
+	movl	2(%ebp,%edi,8),%edi
+	andl	$4278190080,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+	movl	%ecx,%esi
+	andl	$255,%esi
+	shrl	$24,%ecx
+	movl	2(%ebp,%esi,8),%esi
+	andl	$255,%esi
+	movzbl	%dh,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$65280,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edx
+	andl	$255,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$16711680,%edi
+	xorl	%edi,%esi
+	movzbl	%bh,%edi
+	movl	2(%ebp,%edi,8),%edi
+	andl	$4278190080,%edi
+	xorl	%edi,%esi
+	movl	20(%esp),%edi
+	andl	$255,%edx
+	movl	2(%ebp,%edx,8),%edx
+	andl	$255,%edx
+	movzbl	%ah,%eax
+	movl	(%ebp,%eax,8),%eax
+	andl	$65280,%eax
+	xorl	%eax,%edx
+	movl	4(%esp),%eax
+	andl	$255,%ebx
+	movl	(%ebp,%ebx,8),%ebx
+	andl	$16711680,%ebx
+	xorl	%ebx,%edx
+	movl	8(%esp),%ebx
+	movl	2(%ebp,%ecx,8),%ecx
+	andl	$4278190080,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	ret
+.align	64
+.LAES_Te:
+.long	2774754246,2774754246
+.long	2222750968,2222750968
+.long	2574743534,2574743534
+.long	2373680118,2373680118
+.long	234025727,234025727
+.long	3177933782,3177933782
+.long	2976870366,2976870366
+.long	1422247313,1422247313
+.long	1345335392,1345335392
+.long	50397442,50397442
+.long	2842126286,2842126286
+.long	2099981142,2099981142
+.long	436141799,436141799
+.long	1658312629,1658312629
+.long	3870010189,3870010189
+.long	2591454956,2591454956
+.long	1170918031,1170918031
+.long	2642575903,2642575903
+.long	1086966153,1086966153
+.long	2273148410,2273148410
+.long	368769775,368769775
+.long	3948501426,3948501426
+.long	3376891790,3376891790
+.long	200339707,200339707
+.long	3970805057,3970805057
+.long	1742001331,1742001331
+.long	4255294047,4255294047
+.long	3937382213,3937382213
+.long	3214711843,3214711843
+.long	4154762323,4154762323
+.long	2524082916,2524082916
+.long	1539358875,1539358875
+.long	3266819957,3266819957
+.long	486407649,486407649
+.long	2928907069,2928907069
+.long	1780885068,1780885068
+.long	1513502316,1513502316
+.long	1094664062,1094664062
+.long	49805301,49805301
+.long	1338821763,1338821763
+.long	1546925160,1546925160
+.long	4104496465,4104496465
+.long	887481809,887481809
+.long	150073849,150073849
+.long	2473685474,2473685474
+.long	1943591083,1943591083
+.long	1395732834,1395732834
+.long	1058346282,1058346282
+.long	201589768,201589768
+.long	1388824469,1388824469
+.long	1696801606,1696801606
+.long	1589887901,1589887901
+.long	672667696,672667696
+.long	2711000631,2711000631
+.long	251987210,251987210
+.long	3046808111,3046808111
+.long	151455502,151455502
+.long	907153956,907153956
+.long	2608889883,2608889883
+.long	1038279391,1038279391
+.long	652995533,652995533
+.long	1764173646,1764173646
+.long	3451040383,3451040383
+.long	2675275242,2675275242
+.long	453576978,453576978
+.long	2659418909,2659418909
+.long	1949051992,1949051992
+.long	773462580,773462580
+.long	756751158,756751158
+.long	2993581788,2993581788
+.long	3998898868,3998898868
+.long	4221608027,4221608027
+.long	4132590244,4132590244
+.long	1295727478,1295727478
+.long	1641469623,1641469623
+.long	3467883389,3467883389
+.long	2066295122,2066295122
+.long	1055122397,1055122397
+.long	1898917726,1898917726
+.long	2542044179,2542044179
+.long	4115878822,4115878822
+.long	1758581177,1758581177
+.long	0,0
+.long	753790401,753790401
+.long	1612718144,1612718144
+.long	536673507,536673507
+.long	3367088505,3367088505
+.long	3982187446,3982187446
+.long	3194645204,3194645204
+.long	1187761037,1187761037
+.long	3653156455,3653156455
+.long	1262041458,1262041458
+.long	3729410708,3729410708
+.long	3561770136,3561770136
+.long	3898103984,3898103984
+.long	1255133061,1255133061
+.long	1808847035,1808847035
+.long	720367557,720367557
+.long	3853167183,3853167183
+.long	385612781,385612781
+.long	3309519750,3309519750
+.long	3612167578,3612167578
+.long	1429418854,1429418854
+.long	2491778321,2491778321
+.long	3477423498,3477423498
+.long	284817897,284817897
+.long	100794884,100794884
+.long	2172616702,2172616702
+.long	4031795360,4031795360
+.long	1144798328,1144798328
+.long	3131023141,3131023141
+.long	3819481163,3819481163
+.long	4082192802,4082192802
+.long	4272137053,4272137053
+.long	3225436288,3225436288
+.long	2324664069,2324664069
+.long	2912064063,2912064063
+.long	3164445985,3164445985
+.long	1211644016,1211644016
+.long	83228145,83228145
+.long	3753688163,3753688163
+.long	3249976951,3249976951
+.long	1977277103,1977277103
+.long	1663115586,1663115586
+.long	806359072,806359072
+.long	452984805,452984805
+.long	250868733,250868733
+.long	1842533055,1842533055
+.long	1288555905,1288555905
+.long	336333848,336333848
+.long	890442534,890442534
+.long	804056259,804056259
+.long	3781124030,3781124030
+.long	2727843637,2727843637
+.long	3427026056,3427026056
+.long	957814574,957814574
+.long	1472513171,1472513171
+.long	4071073621,4071073621
+.long	2189328124,2189328124
+.long	1195195770,1195195770
+.long	2892260552,2892260552
+.long	3881655738,3881655738
+.long	723065138,723065138
+.long	2507371494,2507371494
+.long	2690670784,2690670784
+.long	2558624025,2558624025
+.long	3511635870,3511635870
+.long	2145180835,2145180835
+.long	1713513028,1713513028
+.long	2116692564,2116692564
+.long	2878378043,2878378043
+.long	2206763019,2206763019
+.long	3393603212,3393603212
+.long	703524551,703524551
+.long	3552098411,3552098411
+.long	1007948840,1007948840
+.long	2044649127,2044649127
+.long	3797835452,3797835452
+.long	487262998,487262998
+.long	1994120109,1994120109
+.long	1004593371,1004593371
+.long	1446130276,1446130276
+.long	1312438900,1312438900
+.long	503974420,503974420
+.long	3679013266,3679013266
+.long	168166924,168166924
+.long	1814307912,1814307912
+.long	3831258296,3831258296
+.long	1573044895,1573044895
+.long	1859376061,1859376061
+.long	4021070915,4021070915
+.long	2791465668,2791465668
+.long	2828112185,2828112185
+.long	2761266481,2761266481
+.long	937747667,937747667
+.long	2339994098,2339994098
+.long	854058965,854058965
+.long	1137232011,1137232011
+.long	1496790894,1496790894
+.long	3077402074,3077402074
+.long	2358086913,2358086913
+.long	1691735473,1691735473
+.long	3528347292,3528347292
+.long	3769215305,3769215305
+.long	3027004632,3027004632
+.long	4199962284,4199962284
+.long	133494003,133494003
+.long	636152527,636152527
+.long	2942657994,2942657994
+.long	2390391540,2390391540
+.long	3920539207,3920539207
+.long	403179536,403179536
+.long	3585784431,3585784431
+.long	2289596656,2289596656
+.long	1864705354,1864705354
+.long	1915629148,1915629148
+.long	605822008,605822008
+.long	4054230615,4054230615
+.long	3350508659,3350508659
+.long	1371981463,1371981463
+.long	602466507,602466507
+.long	2094914977,2094914977
+.long	2624877800,2624877800
+.long	555687742,555687742
+.long	3712699286,3712699286
+.long	3703422305,3703422305
+.long	2257292045,2257292045
+.long	2240449039,2240449039
+.long	2423288032,2423288032
+.long	1111375484,1111375484
+.long	3300242801,3300242801
+.long	2858837708,2858837708
+.long	3628615824,3628615824
+.long	84083462,84083462
+.long	32962295,32962295
+.long	302911004,302911004
+.long	2741068226,2741068226
+.long	1597322602,1597322602
+.long	4183250862,4183250862
+.long	3501832553,3501832553
+.long	2441512471,2441512471
+.long	1489093017,1489093017
+.long	656219450,656219450
+.long	3114180135,3114180135
+.long	954327513,954327513
+.long	335083755,335083755
+.long	3013122091,3013122091
+.long	856756514,856756514
+.long	3144247762,3144247762
+.long	1893325225,1893325225
+.long	2307821063,2307821063
+.long	2811532339,2811532339
+.long	3063651117,3063651117
+.long	572399164,572399164
+.long	2458355477,2458355477
+.long	552200649,552200649
+.long	1238290055,1238290055
+.long	4283782570,4283782570
+.long	2015897680,2015897680
+.long	2061492133,2061492133
+.long	2408352771,2408352771
+.long	4171342169,4171342169
+.long	2156497161,2156497161
+.long	386731290,386731290
+.long	3669999461,3669999461
+.long	837215959,837215959
+.long	3326231172,3326231172
+.long	3093850320,3093850320
+.long	3275833730,3275833730
+.long	2962856233,2962856233
+.long	1999449434,1999449434
+.long	286199582,286199582
+.long	3417354363,3417354363
+.long	4233385128,4233385128
+.long	3602627437,3602627437
+.long	974525996,974525996
+.byte	99,124,119,123,242,107,111,197
+.byte	48,1,103,43,254,215,171,118
+.byte	202,130,201,125,250,89,71,240
+.byte	173,212,162,175,156,164,114,192
+.byte	183,253,147,38,54,63,247,204
+.byte	52,165,229,241,113,216,49,21
+.byte	4,199,35,195,24,150,5,154
+.byte	7,18,128,226,235,39,178,117
+.byte	9,131,44,26,27,110,90,160
+.byte	82,59,214,179,41,227,47,132
+.byte	83,209,0,237,32,252,177,91
+.byte	106,203,190,57,74,76,88,207
+.byte	208,239,170,251,67,77,51,133
+.byte	69,249,2,127,80,60,159,168
+.byte	81,163,64,143,146,157,56,245
+.byte	188,182,218,33,16,255,243,210
+.byte	205,12,19,236,95,151,68,23
+.byte	196,167,126,61,100,93,25,115
+.byte	96,129,79,220,34,42,144,136
+.byte	70,238,184,20,222,94,11,219
+.byte	224,50,58,10,73,6,36,92
+.byte	194,211,172,98,145,149,228,121
+.byte	231,200,55,109,141,213,78,169
+.byte	108,86,244,234,101,122,174,8
+.byte	186,120,37,46,28,166,180,198
+.byte	232,221,116,31,75,189,139,138
+.byte	112,62,181,102,72,3,246,14
+.byte	97,53,87,185,134,193,29,158
+.byte	225,248,152,17,105,217,142,148
+.byte	155,30,135,233,206,85,40,223
+.byte	140,161,137,13,191,230,66,104
+.byte	65,153,45,15,176,84,187,22
+.byte	99,124,119,123,242,107,111,197
+.byte	48,1,103,43,254,215,171,118
+.byte	202,130,201,125,250,89,71,240
+.byte	173,212,162,175,156,164,114,192
+.byte	183,253,147,38,54,63,247,204
+.byte	52,165,229,241,113,216,49,21
+.byte	4,199,35,195,24,150,5,154
+.byte	7,18,128,226,235,39,178,117
+.byte	9,131,44,26,27,110,90,160
+.byte	82,59,214,179,41,227,47,132
+.byte	83,209,0,237,32,252,177,91
+.byte	106,203,190,57,74,76,88,207
+.byte	208,239,170,251,67,77,51,133
+.byte	69,249,2,127,80,60,159,168
+.byte	81,163,64,143,146,157,56,245
+.byte	188,182,218,33,16,255,243,210
+.byte	205,12,19,236,95,151,68,23
+.byte	196,167,126,61,100,93,25,115
+.byte	96,129,79,220,34,42,144,136
+.byte	70,238,184,20,222,94,11,219
+.byte	224,50,58,10,73,6,36,92
+.byte	194,211,172,98,145,149,228,121
+.byte	231,200,55,109,141,213,78,169
+.byte	108,86,244,234,101,122,174,8
+.byte	186,120,37,46,28,166,180,198
+.byte	232,221,116,31,75,189,139,138
+.byte	112,62,181,102,72,3,246,14
+.byte	97,53,87,185,134,193,29,158
+.byte	225,248,152,17,105,217,142,148
+.byte	155,30,135,233,206,85,40,223
+.byte	140,161,137,13,191,230,66,104
+.byte	65,153,45,15,176,84,187,22
+.byte	99,124,119,123,242,107,111,197
+.byte	48,1,103,43,254,215,171,118
+.byte	202,130,201,125,250,89,71,240
+.byte	173,212,162,175,156,164,114,192
+.byte	183,253,147,38,54,63,247,204
+.byte	52,165,229,241,113,216,49,21
+.byte	4,199,35,195,24,150,5,154
+.byte	7,18,128,226,235,39,178,117
+.byte	9,131,44,26,27,110,90,160
+.byte	82,59,214,179,41,227,47,132
+.byte	83,209,0,237,32,252,177,91
+.byte	106,203,190,57,74,76,88,207
+.byte	208,239,170,251,67,77,51,133
+.byte	69,249,2,127,80,60,159,168
+.byte	81,163,64,143,146,157,56,245
+.byte	188,182,218,33,16,255,243,210
+.byte	205,12,19,236,95,151,68,23
+.byte	196,167,126,61,100,93,25,115
+.byte	96,129,79,220,34,42,144,136
+.byte	70,238,184,20,222,94,11,219
+.byte	224,50,58,10,73,6,36,92
+.byte	194,211,172,98,145,149,228,121
+.byte	231,200,55,109,141,213,78,169
+.byte	108,86,244,234,101,122,174,8
+.byte	186,120,37,46,28,166,180,198
+.byte	232,221,116,31,75,189,139,138
+.byte	112,62,181,102,72,3,246,14
+.byte	97,53,87,185,134,193,29,158
+.byte	225,248,152,17,105,217,142,148
+.byte	155,30,135,233,206,85,40,223
+.byte	140,161,137,13,191,230,66,104
+.byte	65,153,45,15,176,84,187,22
+.byte	99,124,119,123,242,107,111,197
+.byte	48,1,103,43,254,215,171,118
+.byte	202,130,201,125,250,89,71,240
+.byte	173,212,162,175,156,164,114,192
+.byte	183,253,147,38,54,63,247,204
+.byte	52,165,229,241,113,216,49,21
+.byte	4,199,35,195,24,150,5,154
+.byte	7,18,128,226,235,39,178,117
+.byte	9,131,44,26,27,110,90,160
+.byte	82,59,214,179,41,227,47,132
+.byte	83,209,0,237,32,252,177,91
+.byte	106,203,190,57,74,76,88,207
+.byte	208,239,170,251,67,77,51,133
+.byte	69,249,2,127,80,60,159,168
+.byte	81,163,64,143,146,157,56,245
+.byte	188,182,218,33,16,255,243,210
+.byte	205,12,19,236,95,151,68,23
+.byte	196,167,126,61,100,93,25,115
+.byte	96,129,79,220,34,42,144,136
+.byte	70,238,184,20,222,94,11,219
+.byte	224,50,58,10,73,6,36,92
+.byte	194,211,172,98,145,149,228,121
+.byte	231,200,55,109,141,213,78,169
+.byte	108,86,244,234,101,122,174,8
+.byte	186,120,37,46,28,166,180,198
+.byte	232,221,116,31,75,189,139,138
+.byte	112,62,181,102,72,3,246,14
+.byte	97,53,87,185,134,193,29,158
+.byte	225,248,152,17,105,217,142,148
+.byte	155,30,135,233,206,85,40,223
+.byte	140,161,137,13,191,230,66,104
+.byte	65,153,45,15,176,84,187,22
+.long	1,2,4,8
+.long	16,32,64,128
+.long	27,54,0,0
+.long	0,0,0,0
+.size	_x86_AES_encrypt,.-_x86_AES_encrypt
+.globl	asm_AES_encrypt
+.hidden	asm_AES_encrypt
+.type	asm_AES_encrypt,@function
+.align	16
+asm_AES_encrypt:
+.L_asm_AES_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	28(%esp),%edi
+	movl	%esp,%eax
+	subl	$36,%esp
+	andl	$-64,%esp
+	leal	-127(%edi),%ebx
+	subl	%esp,%ebx
+	negl	%ebx
+	andl	$960,%ebx
+	subl	%ebx,%esp
+	addl	$4,%esp
+	movl	%eax,28(%esp)
+	call	.L004pic_point
+.L004pic_point:
+	popl	%ebp
+	leal	OPENSSL_ia32cap_P-.L004pic_point(%ebp),%eax
+	leal	.LAES_Te-.L004pic_point(%ebp),%ebp
+	leal	764(%esp),%ebx
+	subl	%ebp,%ebx
+	andl	$768,%ebx
+	leal	2176(%ebp,%ebx,1),%ebp
+	btl	$25,(%eax)
+	jnc	.L005x86
+	movq	(%esi),%mm0
+	movq	8(%esi),%mm4
+	call	_sse_AES_encrypt_compact
+	movl	28(%esp),%esp
+	movl	24(%esp),%esi
+	movq	%mm0,(%esi)
+	movq	%mm4,8(%esi)
+	emms
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	16
+.L005x86:
+	movl	%ebp,24(%esp)
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	call	_x86_AES_encrypt_compact
+	movl	28(%esp),%esp
+	movl	24(%esp),%esi
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	asm_AES_encrypt,.-.L_asm_AES_encrypt_begin
+.hidden	_x86_AES_decrypt_compact
+.type	_x86_AES_decrypt_compact,@function
+.align	16
+_x86_AES_decrypt_compact:
+	movl	%edi,20(%esp)
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+	movl	-128(%ebp),%edi
+	movl	-96(%ebp),%esi
+	movl	-64(%ebp),%edi
+	movl	-32(%ebp),%esi
+	movl	(%ebp),%edi
+	movl	32(%ebp),%esi
+	movl	64(%ebp),%edi
+	movl	96(%ebp),%esi
+.align	16
+.L006loop:
+	movl	%eax,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%dh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ebx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+	movl	%ebx,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%ah,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+	movl	%ecx,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	andl	$255,%edx
+	movzbl	-128(%ebp,%edx,1),%edx
+	movzbl	%ch,%ecx
+	movzbl	-128(%ebp,%ecx,1),%ecx
+	shll	$8,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+	shrl	$16,%ebx
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%edx
+	shrl	$24,%eax
+	movzbl	-128(%ebp,%eax,1),%eax
+	shll	$24,%eax
+	xorl	%eax,%edx
+	movl	$2155905152,%edi
+	andl	%ecx,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%ecx,%ecx,1),%eax
+	subl	%edi,%esi
+	andl	$4278124286,%eax
+	andl	$454761243,%esi
+	xorl	%esi,%eax
+	movl	$2155905152,%edi
+	andl	%eax,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%eax,%eax,1),%ebx
+	subl	%edi,%esi
+	andl	$4278124286,%ebx
+	andl	$454761243,%esi
+	xorl	%ecx,%eax
+	xorl	%esi,%ebx
+	movl	$2155905152,%edi
+	andl	%ebx,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%ebx,%ebx,1),%ebp
+	subl	%edi,%esi
+	andl	$4278124286,%ebp
+	andl	$454761243,%esi
+	xorl	%ecx,%ebx
+	roll	$8,%ecx
+	xorl	%esi,%ebp
+	xorl	%eax,%ecx
+	xorl	%ebp,%eax
+	xorl	%ebx,%ecx
+	xorl	%ebp,%ebx
+	roll	$24,%eax
+	xorl	%ebp,%ecx
+	roll	$16,%ebx
+	xorl	%eax,%ecx
+	roll	$8,%ebp
+	xorl	%ebx,%ecx
+	movl	4(%esp),%eax
+	xorl	%ebp,%ecx
+	movl	%ecx,12(%esp)
+	movl	$2155905152,%edi
+	andl	%edx,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%edx,%edx,1),%ebx
+	subl	%edi,%esi
+	andl	$4278124286,%ebx
+	andl	$454761243,%esi
+	xorl	%esi,%ebx
+	movl	$2155905152,%edi
+	andl	%ebx,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%ebx,%ebx,1),%ecx
+	subl	%edi,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	xorl	%edx,%ebx
+	xorl	%esi,%ecx
+	movl	$2155905152,%edi
+	andl	%ecx,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%ecx,%ecx,1),%ebp
+	subl	%edi,%esi
+	andl	$4278124286,%ebp
+	andl	$454761243,%esi
+	xorl	%edx,%ecx
+	roll	$8,%edx
+	xorl	%esi,%ebp
+	xorl	%ebx,%edx
+	xorl	%ebp,%ebx
+	xorl	%ecx,%edx
+	xorl	%ebp,%ecx
+	roll	$24,%ebx
+	xorl	%ebp,%edx
+	roll	$16,%ecx
+	xorl	%ebx,%edx
+	roll	$8,%ebp
+	xorl	%ecx,%edx
+	movl	8(%esp),%ebx
+	xorl	%ebp,%edx
+	movl	%edx,16(%esp)
+	movl	$2155905152,%edi
+	andl	%eax,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%eax,%eax,1),%ecx
+	subl	%edi,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	xorl	%esi,%ecx
+	movl	$2155905152,%edi
+	andl	%ecx,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%ecx,%ecx,1),%edx
+	subl	%edi,%esi
+	andl	$4278124286,%edx
+	andl	$454761243,%esi
+	xorl	%eax,%ecx
+	xorl	%esi,%edx
+	movl	$2155905152,%edi
+	andl	%edx,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%edx,%edx,1),%ebp
+	subl	%edi,%esi
+	andl	$4278124286,%ebp
+	andl	$454761243,%esi
+	xorl	%eax,%edx
+	roll	$8,%eax
+	xorl	%esi,%ebp
+	xorl	%ecx,%eax
+	xorl	%ebp,%ecx
+	xorl	%edx,%eax
+	xorl	%ebp,%edx
+	roll	$24,%ecx
+	xorl	%ebp,%eax
+	roll	$16,%edx
+	xorl	%ecx,%eax
+	roll	$8,%ebp
+	xorl	%edx,%eax
+	xorl	%ebp,%eax
+	movl	$2155905152,%edi
+	andl	%ebx,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%ebx,%ebx,1),%ecx
+	subl	%edi,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	xorl	%esi,%ecx
+	movl	$2155905152,%edi
+	andl	%ecx,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%ecx,%ecx,1),%edx
+	subl	%edi,%esi
+	andl	$4278124286,%edx
+	andl	$454761243,%esi
+	xorl	%ebx,%ecx
+	xorl	%esi,%edx
+	movl	$2155905152,%edi
+	andl	%edx,%edi
+	movl	%edi,%esi
+	shrl	$7,%edi
+	leal	(%edx,%edx,1),%ebp
+	subl	%edi,%esi
+	andl	$4278124286,%ebp
+	andl	$454761243,%esi
+	xorl	%ebx,%edx
+	roll	$8,%ebx
+	xorl	%esi,%ebp
+	xorl	%ecx,%ebx
+	xorl	%ebp,%ecx
+	xorl	%edx,%ebx
+	xorl	%ebp,%edx
+	roll	$24,%ecx
+	xorl	%ebp,%ebx
+	roll	$16,%edx
+	xorl	%ecx,%ebx
+	roll	$8,%ebp
+	xorl	%edx,%ebx
+	movl	12(%esp),%ecx
+	xorl	%ebp,%ebx
+	movl	16(%esp),%edx
+	movl	20(%esp),%edi
+	movl	28(%esp),%ebp
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	cmpl	24(%esp),%edi
+	movl	%edi,20(%esp)
+	jb	.L006loop
+	movl	%eax,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%dh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ebx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+	movl	%ebx,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%ah,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+	movl	%ecx,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	20(%esp),%edi
+	andl	$255,%edx
+	movzbl	-128(%ebp,%edx,1),%edx
+	movzbl	%ch,%ecx
+	movzbl	-128(%ebp,%ecx,1),%ecx
+	shll	$8,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+	shrl	$16,%ebx
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%edx
+	movl	8(%esp),%ebx
+	shrl	$24,%eax
+	movzbl	-128(%ebp,%eax,1),%eax
+	shll	$24,%eax
+	xorl	%eax,%edx
+	movl	4(%esp),%eax
+	xorl	16(%edi),%eax
+	xorl	20(%edi),%ebx
+	xorl	24(%edi),%ecx
+	xorl	28(%edi),%edx
+	ret
+.size	_x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact
+.hidden	_sse_AES_decrypt_compact
+.type	_sse_AES_decrypt_compact,@function
+.align	16
+_sse_AES_decrypt_compact:
+	pxor	(%edi),%mm0
+	pxor	8(%edi),%mm4
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+	movl	$454761243,%eax
+	movl	%eax,8(%esp)
+	movl	%eax,12(%esp)
+	movl	-128(%ebp),%eax
+	movl	-96(%ebp),%ebx
+	movl	-64(%ebp),%ecx
+	movl	-32(%ebp),%edx
+	movl	(%ebp),%eax
+	movl	32(%ebp),%ebx
+	movl	64(%ebp),%ecx
+	movl	96(%ebp),%edx
+.align	16
+.L007loop:
+	pshufw	$12,%mm0,%mm1
+	pshufw	$9,%mm4,%mm5
+	movd	%mm1,%eax
+	movd	%mm5,%ebx
+	movl	%edi,20(%esp)
+	movzbl	%al,%esi
+	movzbl	%ah,%edx
+	pshufw	$6,%mm0,%mm2
+	movzbl	-128(%ebp,%esi,1),%ecx
+	movzbl	%bl,%edi
+	movzbl	-128(%ebp,%edx,1),%edx
+	shrl	$16,%eax
+	shll	$8,%edx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bh,%edi
+	shll	$16,%esi
+	pshufw	$3,%mm4,%mm6
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%ah,%edi
+	shll	$24,%esi
+	shrl	$16,%ebx
+	orl	%esi,%edx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bh,%edi
+	shll	$24,%esi
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%al,%edi
+	shll	$8,%esi
+	movd	%mm2,%eax
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bl,%edi
+	shll	$16,%esi
+	movd	%mm6,%ebx
+	movd	%ecx,%mm0
+	movzbl	-128(%ebp,%edi,1),%ecx
+	movzbl	%al,%edi
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bl,%edi
+	orl	%esi,%edx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%ah,%edi
+	shll	$16,%esi
+	shrl	$16,%eax
+	orl	%esi,%edx
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%bh,%edi
+	shrl	$16,%ebx
+	shll	$8,%esi
+	movd	%edx,%mm1
+	movzbl	-128(%ebp,%edi,1),%edx
+	movzbl	%bh,%edi
+	shll	$24,%edx
+	andl	$255,%ebx
+	orl	%esi,%edx
+	punpckldq	%mm1,%mm0
+	movzbl	-128(%ebp,%edi,1),%esi
+	movzbl	%al,%edi
+	shll	$8,%esi
+	movzbl	%ah,%eax
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	orl	%esi,%ecx
+	movzbl	-128(%ebp,%edi,1),%esi
+	orl	%ebx,%edx
+	shll	$16,%esi
+	movzbl	-128(%ebp,%eax,1),%eax
+	orl	%esi,%edx
+	shll	$24,%eax
+	orl	%eax,%ecx
+	movl	20(%esp),%edi
+	movd	%edx,%mm4
+	movd	%ecx,%mm5
+	punpckldq	%mm5,%mm4
+	addl	$16,%edi
+	cmpl	24(%esp),%edi
+	ja	.L008out
+	movq	%mm0,%mm3
+	movq	%mm4,%mm7
+	pshufw	$228,%mm0,%mm2
+	pshufw	$228,%mm4,%mm6
+	movq	%mm0,%mm1
+	movq	%mm4,%mm5
+	pshufw	$177,%mm0,%mm0
+	pshufw	$177,%mm4,%mm4
+	pslld	$8,%mm2
+	pslld	$8,%mm6
+	psrld	$8,%mm3
+	psrld	$8,%mm7
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pslld	$16,%mm2
+	pslld	$16,%mm6
+	psrld	$16,%mm3
+	psrld	$16,%mm7
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	movq	8(%esp),%mm3
+	pxor	%mm2,%mm2
+	pxor	%mm6,%mm6
+	pcmpgtb	%mm1,%mm2
+	pcmpgtb	%mm5,%mm6
+	pand	%mm3,%mm2
+	pand	%mm3,%mm6
+	paddb	%mm1,%mm1
+	paddb	%mm5,%mm5
+	pxor	%mm2,%mm1
+	pxor	%mm6,%mm5
+	movq	%mm1,%mm3
+	movq	%mm5,%mm7
+	movq	%mm1,%mm2
+	movq	%mm5,%mm6
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	pslld	$24,%mm3
+	pslld	$24,%mm7
+	psrld	$8,%mm2
+	psrld	$8,%mm6
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	movq	8(%esp),%mm2
+	pxor	%mm3,%mm3
+	pxor	%mm7,%mm7
+	pcmpgtb	%mm1,%mm3
+	pcmpgtb	%mm5,%mm7
+	pand	%mm2,%mm3
+	pand	%mm2,%mm7
+	paddb	%mm1,%mm1
+	paddb	%mm5,%mm5
+	pxor	%mm3,%mm1
+	pxor	%mm7,%mm5
+	pshufw	$177,%mm1,%mm3
+	pshufw	$177,%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pxor	%mm3,%mm3
+	pxor	%mm7,%mm7
+	pcmpgtb	%mm1,%mm3
+	pcmpgtb	%mm5,%mm7
+	pand	%mm2,%mm3
+	pand	%mm2,%mm7
+	paddb	%mm1,%mm1
+	paddb	%mm5,%mm5
+	pxor	%mm3,%mm1
+	pxor	%mm7,%mm5
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	movq	%mm1,%mm3
+	movq	%mm5,%mm7
+	pshufw	$177,%mm1,%mm2
+	pshufw	$177,%mm5,%mm6
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	pslld	$8,%mm1
+	pslld	$8,%mm5
+	psrld	$8,%mm3
+	psrld	$8,%mm7
+	movq	(%edi),%mm2
+	movq	8(%edi),%mm6
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	movl	-128(%ebp),%eax
+	pslld	$16,%mm1
+	pslld	$16,%mm5
+	movl	-64(%ebp),%ebx
+	psrld	$16,%mm3
+	psrld	$16,%mm7
+	movl	(%ebp),%ecx
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	movl	64(%ebp),%edx
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	jmp	.L007loop
+.align	16
+.L008out:
+	pxor	(%edi),%mm0
+	pxor	8(%edi),%mm4
+	ret
+.size	_sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact
+.hidden	_x86_AES_decrypt
+.type	_x86_AES_decrypt,@function
+.align	16
+_x86_AES_decrypt:
+	movl	%edi,20(%esp)
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+.align	16
+.L009loop:
+	movl	%eax,%esi
+	andl	$255,%esi
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%dh,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movl	%ebx,%edi
+	shrl	$24,%edi
+	xorl	1(%ebp,%edi,8),%esi
+	movl	%esi,4(%esp)
+
+	movl	%ebx,%esi
+	andl	$255,%esi
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%ah,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movl	%ecx,%edi
+	shrl	$24,%edi
+	xorl	1(%ebp,%edi,8),%esi
+	movl	%esi,8(%esp)
+
+	movl	%ecx,%esi
+	andl	$255,%esi
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%bh,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	xorl	1(%ebp,%edi,8),%esi
+
+	movl	20(%esp),%edi
+	andl	$255,%edx
+	movl	(%ebp,%edx,8),%edx
+	movzbl	%ch,%ecx
+	xorl	3(%ebp,%ecx,8),%edx
+	movl	%esi,%ecx
+	shrl	$16,%ebx
+	andl	$255,%ebx
+	xorl	2(%ebp,%ebx,8),%edx
+	movl	8(%esp),%ebx
+	shrl	$24,%eax
+	xorl	1(%ebp,%eax,8),%edx
+	movl	4(%esp),%eax
+
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	cmpl	24(%esp),%edi
+	movl	%edi,20(%esp)
+	jb	.L009loop
+	leal	2176(%ebp),%ebp
+	movl	-128(%ebp),%edi
+	movl	-96(%ebp),%esi
+	movl	-64(%ebp),%edi
+	movl	-32(%ebp),%esi
+	movl	(%ebp),%edi
+	movl	32(%ebp),%esi
+	movl	64(%ebp),%edi
+	movl	96(%ebp),%esi
+	leal	-128(%ebp),%ebp
+	movl	%eax,%esi
+	andl	$255,%esi
+	movzbl	(%ebp,%esi,1),%esi
+	movzbl	%dh,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ebx,%edi
+	shrl	$24,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+	movl	%ebx,%esi
+	andl	$255,%esi
+	movzbl	(%ebp,%esi,1),%esi
+	movzbl	%ah,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$24,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+	movl	%ecx,%esi
+	andl	$255,%esi
+	movzbl	(%ebp,%esi,1),%esi
+	movzbl	%bh,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	20(%esp),%edi
+	andl	$255,%edx
+	movzbl	(%ebp,%edx,1),%edx
+	movzbl	%ch,%ecx
+	movzbl	(%ebp,%ecx,1),%ecx
+	shll	$8,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+	shrl	$16,%ebx
+	andl	$255,%ebx
+	movzbl	(%ebp,%ebx,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%edx
+	movl	8(%esp),%ebx
+	shrl	$24,%eax
+	movzbl	(%ebp,%eax,1),%eax
+	shll	$24,%eax
+	xorl	%eax,%edx
+	movl	4(%esp),%eax
+	leal	-2048(%ebp),%ebp
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	ret
+.align	64
+.LAES_Td:
+.long	1353184337,1353184337
+.long	1399144830,1399144830
+.long	3282310938,3282310938
+.long	2522752826,2522752826
+.long	3412831035,3412831035
+.long	4047871263,4047871263
+.long	2874735276,2874735276
+.long	2466505547,2466505547
+.long	1442459680,1442459680
+.long	4134368941,4134368941
+.long	2440481928,2440481928
+.long	625738485,625738485
+.long	4242007375,4242007375
+.long	3620416197,3620416197
+.long	2151953702,2151953702
+.long	2409849525,2409849525
+.long	1230680542,1230680542
+.long	1729870373,1729870373
+.long	2551114309,2551114309
+.long	3787521629,3787521629
+.long	41234371,41234371
+.long	317738113,317738113
+.long	2744600205,2744600205
+.long	3338261355,3338261355
+.long	3881799427,3881799427
+.long	2510066197,2510066197
+.long	3950669247,3950669247
+.long	3663286933,3663286933
+.long	763608788,763608788
+.long	3542185048,3542185048
+.long	694804553,694804553
+.long	1154009486,1154009486
+.long	1787413109,1787413109
+.long	2021232372,2021232372
+.long	1799248025,1799248025
+.long	3715217703,3715217703
+.long	3058688446,3058688446
+.long	397248752,397248752
+.long	1722556617,1722556617
+.long	3023752829,3023752829
+.long	407560035,407560035
+.long	2184256229,2184256229
+.long	1613975959,1613975959
+.long	1165972322,1165972322
+.long	3765920945,3765920945
+.long	2226023355,2226023355
+.long	480281086,480281086
+.long	2485848313,2485848313
+.long	1483229296,1483229296
+.long	436028815,436028815
+.long	2272059028,2272059028
+.long	3086515026,3086515026
+.long	601060267,601060267
+.long	3791801202,3791801202
+.long	1468997603,1468997603
+.long	715871590,715871590
+.long	120122290,120122290
+.long	63092015,63092015
+.long	2591802758,2591802758
+.long	2768779219,2768779219
+.long	4068943920,4068943920
+.long	2997206819,2997206819
+.long	3127509762,3127509762
+.long	1552029421,1552029421
+.long	723308426,723308426
+.long	2461301159,2461301159
+.long	4042393587,4042393587
+.long	2715969870,2715969870
+.long	3455375973,3455375973
+.long	3586000134,3586000134
+.long	526529745,526529745
+.long	2331944644,2331944644
+.long	2639474228,2639474228
+.long	2689987490,2689987490
+.long	853641733,853641733
+.long	1978398372,1978398372
+.long	971801355,971801355
+.long	2867814464,2867814464
+.long	111112542,111112542
+.long	1360031421,1360031421
+.long	4186579262,4186579262
+.long	1023860118,1023860118
+.long	2919579357,2919579357
+.long	1186850381,1186850381
+.long	3045938321,3045938321
+.long	90031217,90031217
+.long	1876166148,1876166148
+.long	4279586912,4279586912
+.long	620468249,620468249
+.long	2548678102,2548678102
+.long	3426959497,3426959497
+.long	2006899047,2006899047
+.long	3175278768,3175278768
+.long	2290845959,2290845959
+.long	945494503,945494503
+.long	3689859193,3689859193
+.long	1191869601,1191869601
+.long	3910091388,3910091388
+.long	3374220536,3374220536
+.long	0,0
+.long	2206629897,2206629897
+.long	1223502642,1223502642
+.long	2893025566,2893025566
+.long	1316117100,1316117100
+.long	4227796733,4227796733
+.long	1446544655,1446544655
+.long	517320253,517320253
+.long	658058550,658058550
+.long	1691946762,1691946762
+.long	564550760,564550760
+.long	3511966619,3511966619
+.long	976107044,976107044
+.long	2976320012,2976320012
+.long	266819475,266819475
+.long	3533106868,3533106868
+.long	2660342555,2660342555
+.long	1338359936,1338359936
+.long	2720062561,2720062561
+.long	1766553434,1766553434
+.long	370807324,370807324
+.long	179999714,179999714
+.long	3844776128,3844776128
+.long	1138762300,1138762300
+.long	488053522,488053522
+.long	185403662,185403662
+.long	2915535858,2915535858
+.long	3114841645,3114841645
+.long	3366526484,3366526484
+.long	2233069911,2233069911
+.long	1275557295,1275557295
+.long	3151862254,3151862254
+.long	4250959779,4250959779
+.long	2670068215,2670068215
+.long	3170202204,3170202204
+.long	3309004356,3309004356
+.long	880737115,880737115
+.long	1982415755,1982415755
+.long	3703972811,3703972811
+.long	1761406390,1761406390
+.long	1676797112,1676797112
+.long	3403428311,3403428311
+.long	277177154,277177154
+.long	1076008723,1076008723
+.long	538035844,538035844
+.long	2099530373,2099530373
+.long	4164795346,4164795346
+.long	288553390,288553390
+.long	1839278535,1839278535
+.long	1261411869,1261411869
+.long	4080055004,4080055004
+.long	3964831245,3964831245
+.long	3504587127,3504587127
+.long	1813426987,1813426987
+.long	2579067049,2579067049
+.long	4199060497,4199060497
+.long	577038663,577038663
+.long	3297574056,3297574056
+.long	440397984,440397984
+.long	3626794326,3626794326
+.long	4019204898,4019204898
+.long	3343796615,3343796615
+.long	3251714265,3251714265
+.long	4272081548,4272081548
+.long	906744984,906744984
+.long	3481400742,3481400742
+.long	685669029,685669029
+.long	646887386,646887386
+.long	2764025151,2764025151
+.long	3835509292,3835509292
+.long	227702864,227702864
+.long	2613862250,2613862250
+.long	1648787028,1648787028
+.long	3256061430,3256061430
+.long	3904428176,3904428176
+.long	1593260334,1593260334
+.long	4121936770,4121936770
+.long	3196083615,3196083615
+.long	2090061929,2090061929
+.long	2838353263,2838353263
+.long	3004310991,3004310991
+.long	999926984,999926984
+.long	2809993232,2809993232
+.long	1852021992,1852021992
+.long	2075868123,2075868123
+.long	158869197,158869197
+.long	4095236462,4095236462
+.long	28809964,28809964
+.long	2828685187,2828685187
+.long	1701746150,1701746150
+.long	2129067946,2129067946
+.long	147831841,147831841
+.long	3873969647,3873969647
+.long	3650873274,3650873274
+.long	3459673930,3459673930
+.long	3557400554,3557400554
+.long	3598495785,3598495785
+.long	2947720241,2947720241
+.long	824393514,824393514
+.long	815048134,815048134
+.long	3227951669,3227951669
+.long	935087732,935087732
+.long	2798289660,2798289660
+.long	2966458592,2966458592
+.long	366520115,366520115
+.long	1251476721,1251476721
+.long	4158319681,4158319681
+.long	240176511,240176511
+.long	804688151,804688151
+.long	2379631990,2379631990
+.long	1303441219,1303441219
+.long	1414376140,1414376140
+.long	3741619940,3741619940
+.long	3820343710,3820343710
+.long	461924940,461924940
+.long	3089050817,3089050817
+.long	2136040774,2136040774
+.long	82468509,82468509
+.long	1563790337,1563790337
+.long	1937016826,1937016826
+.long	776014843,776014843
+.long	1511876531,1511876531
+.long	1389550482,1389550482
+.long	861278441,861278441
+.long	323475053,323475053
+.long	2355222426,2355222426
+.long	2047648055,2047648055
+.long	2383738969,2383738969
+.long	2302415851,2302415851
+.long	3995576782,3995576782
+.long	902390199,902390199
+.long	3991215329,3991215329
+.long	1018251130,1018251130
+.long	1507840668,1507840668
+.long	1064563285,1064563285
+.long	2043548696,2043548696
+.long	3208103795,3208103795
+.long	3939366739,3939366739
+.long	1537932639,1537932639
+.long	342834655,342834655
+.long	2262516856,2262516856
+.long	2180231114,2180231114
+.long	1053059257,1053059257
+.long	741614648,741614648
+.long	1598071746,1598071746
+.long	1925389590,1925389590
+.long	203809468,203809468
+.long	2336832552,2336832552
+.long	1100287487,1100287487
+.long	1895934009,1895934009
+.long	3736275976,3736275976
+.long	2632234200,2632234200
+.long	2428589668,2428589668
+.long	1636092795,1636092795
+.long	1890988757,1890988757
+.long	1952214088,1952214088
+.long	1113045200,1113045200
+.byte	82,9,106,213,48,54,165,56
+.byte	191,64,163,158,129,243,215,251
+.byte	124,227,57,130,155,47,255,135
+.byte	52,142,67,68,196,222,233,203
+.byte	84,123,148,50,166,194,35,61
+.byte	238,76,149,11,66,250,195,78
+.byte	8,46,161,102,40,217,36,178
+.byte	118,91,162,73,109,139,209,37
+.byte	114,248,246,100,134,104,152,22
+.byte	212,164,92,204,93,101,182,146
+.byte	108,112,72,80,253,237,185,218
+.byte	94,21,70,87,167,141,157,132
+.byte	144,216,171,0,140,188,211,10
+.byte	247,228,88,5,184,179,69,6
+.byte	208,44,30,143,202,63,15,2
+.byte	193,175,189,3,1,19,138,107
+.byte	58,145,17,65,79,103,220,234
+.byte	151,242,207,206,240,180,230,115
+.byte	150,172,116,34,231,173,53,133
+.byte	226,249,55,232,28,117,223,110
+.byte	71,241,26,113,29,41,197,137
+.byte	111,183,98,14,170,24,190,27
+.byte	252,86,62,75,198,210,121,32
+.byte	154,219,192,254,120,205,90,244
+.byte	31,221,168,51,136,7,199,49
+.byte	177,18,16,89,39,128,236,95
+.byte	96,81,127,169,25,181,74,13
+.byte	45,229,122,159,147,201,156,239
+.byte	160,224,59,77,174,42,245,176
+.byte	200,235,187,60,131,83,153,97
+.byte	23,43,4,126,186,119,214,38
+.byte	225,105,20,99,85,33,12,125
+.byte	82,9,106,213,48,54,165,56
+.byte	191,64,163,158,129,243,215,251
+.byte	124,227,57,130,155,47,255,135
+.byte	52,142,67,68,196,222,233,203
+.byte	84,123,148,50,166,194,35,61
+.byte	238,76,149,11,66,250,195,78
+.byte	8,46,161,102,40,217,36,178
+.byte	118,91,162,73,109,139,209,37
+.byte	114,248,246,100,134,104,152,22
+.byte	212,164,92,204,93,101,182,146
+.byte	108,112,72,80,253,237,185,218
+.byte	94,21,70,87,167,141,157,132
+.byte	144,216,171,0,140,188,211,10
+.byte	247,228,88,5,184,179,69,6
+.byte	208,44,30,143,202,63,15,2
+.byte	193,175,189,3,1,19,138,107
+.byte	58,145,17,65,79,103,220,234
+.byte	151,242,207,206,240,180,230,115
+.byte	150,172,116,34,231,173,53,133
+.byte	226,249,55,232,28,117,223,110
+.byte	71,241,26,113,29,41,197,137
+.byte	111,183,98,14,170,24,190,27
+.byte	252,86,62,75,198,210,121,32
+.byte	154,219,192,254,120,205,90,244
+.byte	31,221,168,51,136,7,199,49
+.byte	177,18,16,89,39,128,236,95
+.byte	96,81,127,169,25,181,74,13
+.byte	45,229,122,159,147,201,156,239
+.byte	160,224,59,77,174,42,245,176
+.byte	200,235,187,60,131,83,153,97
+.byte	23,43,4,126,186,119,214,38
+.byte	225,105,20,99,85,33,12,125
+.byte	82,9,106,213,48,54,165,56
+.byte	191,64,163,158,129,243,215,251
+.byte	124,227,57,130,155,47,255,135
+.byte	52,142,67,68,196,222,233,203
+.byte	84,123,148,50,166,194,35,61
+.byte	238,76,149,11,66,250,195,78
+.byte	8,46,161,102,40,217,36,178
+.byte	118,91,162,73,109,139,209,37
+.byte	114,248,246,100,134,104,152,22
+.byte	212,164,92,204,93,101,182,146
+.byte	108,112,72,80,253,237,185,218
+.byte	94,21,70,87,167,141,157,132
+.byte	144,216,171,0,140,188,211,10
+.byte	247,228,88,5,184,179,69,6
+.byte	208,44,30,143,202,63,15,2
+.byte	193,175,189,3,1,19,138,107
+.byte	58,145,17,65,79,103,220,234
+.byte	151,242,207,206,240,180,230,115
+.byte	150,172,116,34,231,173,53,133
+.byte	226,249,55,232,28,117,223,110
+.byte	71,241,26,113,29,41,197,137
+.byte	111,183,98,14,170,24,190,27
+.byte	252,86,62,75,198,210,121,32
+.byte	154,219,192,254,120,205,90,244
+.byte	31,221,168,51,136,7,199,49
+.byte	177,18,16,89,39,128,236,95
+.byte	96,81,127,169,25,181,74,13
+.byte	45,229,122,159,147,201,156,239
+.byte	160,224,59,77,174,42,245,176
+.byte	200,235,187,60,131,83,153,97
+.byte	23,43,4,126,186,119,214,38
+.byte	225,105,20,99,85,33,12,125
+.byte	82,9,106,213,48,54,165,56
+.byte	191,64,163,158,129,243,215,251
+.byte	124,227,57,130,155,47,255,135
+.byte	52,142,67,68,196,222,233,203
+.byte	84,123,148,50,166,194,35,61
+.byte	238,76,149,11,66,250,195,78
+.byte	8,46,161,102,40,217,36,178
+.byte	118,91,162,73,109,139,209,37
+.byte	114,248,246,100,134,104,152,22
+.byte	212,164,92,204,93,101,182,146
+.byte	108,112,72,80,253,237,185,218
+.byte	94,21,70,87,167,141,157,132
+.byte	144,216,171,0,140,188,211,10
+.byte	247,228,88,5,184,179,69,6
+.byte	208,44,30,143,202,63,15,2
+.byte	193,175,189,3,1,19,138,107
+.byte	58,145,17,65,79,103,220,234
+.byte	151,242,207,206,240,180,230,115
+.byte	150,172,116,34,231,173,53,133
+.byte	226,249,55,232,28,117,223,110
+.byte	71,241,26,113,29,41,197,137
+.byte	111,183,98,14,170,24,190,27
+.byte	252,86,62,75,198,210,121,32
+.byte	154,219,192,254,120,205,90,244
+.byte	31,221,168,51,136,7,199,49
+.byte	177,18,16,89,39,128,236,95
+.byte	96,81,127,169,25,181,74,13
+.byte	45,229,122,159,147,201,156,239
+.byte	160,224,59,77,174,42,245,176
+.byte	200,235,187,60,131,83,153,97
+.byte	23,43,4,126,186,119,214,38
+.byte	225,105,20,99,85,33,12,125
+.size	_x86_AES_decrypt,.-_x86_AES_decrypt
+.globl	asm_AES_decrypt
+.hidden	asm_AES_decrypt
+.type	asm_AES_decrypt,@function
+.align	16
+asm_AES_decrypt:
+.L_asm_AES_decrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	28(%esp),%edi
+	movl	%esp,%eax
+	subl	$36,%esp
+	andl	$-64,%esp
+	leal	-127(%edi),%ebx
+	subl	%esp,%ebx
+	negl	%ebx
+	andl	$960,%ebx
+	subl	%ebx,%esp
+	addl	$4,%esp
+	movl	%eax,28(%esp)
+	call	.L010pic_point
+.L010pic_point:
+	popl	%ebp
+	leal	OPENSSL_ia32cap_P-.L010pic_point(%ebp),%eax
+	leal	.LAES_Td-.L010pic_point(%ebp),%ebp
+	leal	764(%esp),%ebx
+	subl	%ebp,%ebx
+	andl	$768,%ebx
+	leal	2176(%ebp,%ebx,1),%ebp
+	btl	$25,(%eax)
+	jnc	.L011x86
+	movq	(%esi),%mm0
+	movq	8(%esi),%mm4
+	call	_sse_AES_decrypt_compact
+	movl	28(%esp),%esp
+	movl	24(%esp),%esi
+	movq	%mm0,(%esi)
+	movq	%mm4,8(%esi)
+	emms
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	16
+.L011x86:
+	movl	%ebp,24(%esp)
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	call	_x86_AES_decrypt_compact
+	movl	28(%esp),%esp
+	movl	24(%esp),%esi
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	asm_AES_decrypt,.-.L_asm_AES_decrypt_begin
+.globl	asm_AES_cbc_encrypt
+.hidden	asm_AES_cbc_encrypt
+.type	asm_AES_cbc_encrypt,@function
+.align	16
+asm_AES_cbc_encrypt:
+.L_asm_AES_cbc_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	28(%esp),%ecx
+	cmpl	$0,%ecx
+	je	.L012drop_out
+	call	.L013pic_point
+.L013pic_point:
+	popl	%ebp
+	leal	OPENSSL_ia32cap_P-.L013pic_point(%ebp),%eax
+	cmpl	$0,40(%esp)
+	leal	.LAES_Te-.L013pic_point(%ebp),%ebp
+	jne	.L014picked_te
+	leal	.LAES_Td-.LAES_Te(%ebp),%ebp
+.L014picked_te:
+	pushfl
+	cld
+	cmpl	$512,%ecx
+	jb	.L015slow_way
+	testl	$15,%ecx
+	jnz	.L015slow_way
+	btl	$28,(%eax)
+	jc	.L015slow_way
+	leal	-324(%esp),%esi
+	andl	$-64,%esi
+	movl	%ebp,%eax
+	leal	2304(%ebp),%ebx
+	movl	%esi,%edx
+	andl	$4095,%eax
+	andl	$4095,%ebx
+	andl	$4095,%edx
+	cmpl	%ebx,%edx
+	jb	.L016tbl_break_out
+	subl	%ebx,%edx
+	subl	%edx,%esi
+	jmp	.L017tbl_ok
+.align	4
+.L016tbl_break_out:
+	subl	%eax,%edx
+	andl	$4095,%edx
+	addl	$384,%edx
+	subl	%edx,%esi
+.align	4
+.L017tbl_ok:
+	leal	24(%esp),%edx
+	xchgl	%esi,%esp
+	addl	$4,%esp
+	movl	%ebp,24(%esp)
+	movl	%esi,28(%esp)
+	movl	(%edx),%eax
+	movl	4(%edx),%ebx
+	movl	12(%edx),%edi
+	movl	16(%edx),%esi
+	movl	20(%edx),%edx
+	movl	%eax,32(%esp)
+	movl	%ebx,36(%esp)
+	movl	%ecx,40(%esp)
+	movl	%edi,44(%esp)
+	movl	%esi,48(%esp)
+	movl	$0,316(%esp)
+	movl	%edi,%ebx
+	movl	$61,%ecx
+	subl	%ebp,%ebx
+	movl	%edi,%esi
+	andl	$4095,%ebx
+	leal	76(%esp),%edi
+	cmpl	$2304,%ebx
+	jb	.L018do_copy
+	cmpl	$3852,%ebx
+	jb	.L019skip_copy
+.align	4
+.L018do_copy:
+	movl	%edi,44(%esp)
+.long	2784229001
+.L019skip_copy:
+	movl	$16,%edi
+.align	4
+.L020prefetch_tbl:
+	movl	(%ebp),%eax
+	movl	32(%ebp),%ebx
+	movl	64(%ebp),%ecx
+	movl	96(%ebp),%esi
+	leal	128(%ebp),%ebp
+	subl	$1,%edi
+	jnz	.L020prefetch_tbl
+	subl	$2048,%ebp
+	movl	32(%esp),%esi
+	movl	48(%esp),%edi
+	cmpl	$0,%edx
+	je	.L021fast_decrypt
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+.align	16
+.L022fast_enc_loop:
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	xorl	(%esi),%eax
+	xorl	4(%esi),%ebx
+	xorl	8(%esi),%ecx
+	xorl	12(%esi),%edx
+	movl	44(%esp),%edi
+	call	_x86_AES_encrypt
+	movl	32(%esp),%esi
+	movl	36(%esp),%edi
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	leal	16(%esi),%esi
+	movl	40(%esp),%ecx
+	movl	%esi,32(%esp)
+	leal	16(%edi),%edx
+	movl	%edx,36(%esp)
+	subl	$16,%ecx
+	movl	%ecx,40(%esp)
+	jnz	.L022fast_enc_loop
+	movl	48(%esp),%esi
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	cmpl	$0,316(%esp)
+	movl	44(%esp),%edi
+	je	.L023skip_ezero
+	movl	$60,%ecx
+	xorl	%eax,%eax
+.align	4
+.long	2884892297
+.L023skip_ezero:
+	movl	28(%esp),%esp
+	popfl
+.L012drop_out:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L021fast_decrypt:
+	cmpl	36(%esp),%esi
+	je	.L024fast_dec_in_place
+	movl	%edi,52(%esp)
+.align	4
+.align	16
+.L025fast_dec_loop:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	44(%esp),%edi
+	call	_x86_AES_decrypt
+	movl	52(%esp),%edi
+	movl	40(%esp),%esi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	36(%esp),%edi
+	movl	32(%esp),%esi
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	40(%esp),%ecx
+	movl	%esi,52(%esp)
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	leal	16(%edi),%edi
+	movl	%edi,36(%esp)
+	subl	$16,%ecx
+	movl	%ecx,40(%esp)
+	jnz	.L025fast_dec_loop
+	movl	52(%esp),%edi
+	movl	48(%esp),%esi
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	jmp	.L026fast_dec_out
+.align	16
+.L024fast_dec_in_place:
+.L027fast_dec_in_place_loop:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	leal	60(%esp),%edi
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	44(%esp),%edi
+	call	_x86_AES_decrypt
+	movl	48(%esp),%edi
+	movl	36(%esp),%esi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	leal	16(%esi),%esi
+	movl	%esi,36(%esp)
+	leal	60(%esp),%esi
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	32(%esp),%esi
+	movl	40(%esp),%ecx
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	subl	$16,%ecx
+	movl	%ecx,40(%esp)
+	jnz	.L027fast_dec_in_place_loop
+.align	4
+.L026fast_dec_out:
+	cmpl	$0,316(%esp)
+	movl	44(%esp),%edi
+	je	.L028skip_dzero
+	movl	$60,%ecx
+	xorl	%eax,%eax
+.align	4
+.long	2884892297
+.L028skip_dzero:
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L015slow_way:
+	movl	(%eax),%eax
+	movl	36(%esp),%edi
+	leal	-80(%esp),%esi
+	andl	$-64,%esi
+	leal	-143(%edi),%ebx
+	subl	%esi,%ebx
+	negl	%ebx
+	andl	$960,%ebx
+	subl	%ebx,%esi
+	leal	768(%esi),%ebx
+	subl	%ebp,%ebx
+	andl	$768,%ebx
+	leal	2176(%ebp,%ebx,1),%ebp
+	leal	24(%esp),%edx
+	xchgl	%esi,%esp
+	addl	$4,%esp
+	movl	%ebp,24(%esp)
+	movl	%esi,28(%esp)
+	movl	%eax,52(%esp)
+	movl	(%edx),%eax
+	movl	4(%edx),%ebx
+	movl	16(%edx),%esi
+	movl	20(%edx),%edx
+	movl	%eax,32(%esp)
+	movl	%ebx,36(%esp)
+	movl	%ecx,40(%esp)
+	movl	%edi,44(%esp)
+	movl	%esi,48(%esp)
+	movl	%esi,%edi
+	movl	%eax,%esi
+	cmpl	$0,%edx
+	je	.L029slow_decrypt
+	cmpl	$16,%ecx
+	movl	%ebx,%edx
+	jb	.L030slow_enc_tail
+	btl	$25,52(%esp)
+	jnc	.L031slow_enc_x86
+	movq	(%edi),%mm0
+	movq	8(%edi),%mm4
+.align	16
+.L032slow_enc_loop_sse:
+	pxor	(%esi),%mm0
+	pxor	8(%esi),%mm4
+	movl	44(%esp),%edi
+	call	_sse_AES_encrypt_compact
+	movl	32(%esp),%esi
+	movl	36(%esp),%edi
+	movl	40(%esp),%ecx
+	movq	%mm0,(%edi)
+	movq	%mm4,8(%edi)
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	leal	16(%edi),%edx
+	movl	%edx,36(%esp)
+	subl	$16,%ecx
+	cmpl	$16,%ecx
+	movl	%ecx,40(%esp)
+	jae	.L032slow_enc_loop_sse
+	testl	$15,%ecx
+	jnz	.L030slow_enc_tail
+	movl	48(%esp),%esi
+	movq	%mm0,(%esi)
+	movq	%mm4,8(%esi)
+	emms
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L031slow_enc_x86:
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+.align	4
+.L033slow_enc_loop_x86:
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	xorl	(%esi),%eax
+	xorl	4(%esi),%ebx
+	xorl	8(%esi),%ecx
+	xorl	12(%esi),%edx
+	movl	44(%esp),%edi
+	call	_x86_AES_encrypt_compact
+	movl	32(%esp),%esi
+	movl	36(%esp),%edi
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	40(%esp),%ecx
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	leal	16(%edi),%edx
+	movl	%edx,36(%esp)
+	subl	$16,%ecx
+	cmpl	$16,%ecx
+	movl	%ecx,40(%esp)
+	jae	.L033slow_enc_loop_x86
+	testl	$15,%ecx
+	jnz	.L030slow_enc_tail
+	movl	48(%esp),%esi
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L030slow_enc_tail:
+	emms
+	movl	%edx,%edi
+	movl	$16,%ebx
+	subl	%ecx,%ebx
+	cmpl	%esi,%edi
+	je	.L034enc_in_place
+.align	4
+.long	2767451785
+	jmp	.L035enc_skip_in_place
+.L034enc_in_place:
+	leal	(%edi,%ecx,1),%edi
+.L035enc_skip_in_place:
+	movl	%ebx,%ecx
+	xorl	%eax,%eax
+.align	4
+.long	2868115081
+	movl	48(%esp),%edi
+	movl	%edx,%esi
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	$16,40(%esp)
+	jmp	.L033slow_enc_loop_x86
+.align	16
+.L029slow_decrypt:
+	btl	$25,52(%esp)
+	jnc	.L036slow_dec_loop_x86
+.align	4
+.L037slow_dec_loop_sse:
+	movq	(%esi),%mm0
+	movq	8(%esi),%mm4
+	movl	44(%esp),%edi
+	call	_sse_AES_decrypt_compact
+	movl	32(%esp),%esi
+	leal	60(%esp),%eax
+	movl	36(%esp),%ebx
+	movl	40(%esp),%ecx
+	movl	48(%esp),%edi
+	movq	(%esi),%mm1
+	movq	8(%esi),%mm5
+	pxor	(%edi),%mm0
+	pxor	8(%edi),%mm4
+	movq	%mm1,(%edi)
+	movq	%mm5,8(%edi)
+	subl	$16,%ecx
+	jc	.L038slow_dec_partial_sse
+	movq	%mm0,(%ebx)
+	movq	%mm4,8(%ebx)
+	leal	16(%ebx),%ebx
+	movl	%ebx,36(%esp)
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	movl	%ecx,40(%esp)
+	jnz	.L037slow_dec_loop_sse
+	emms
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L038slow_dec_partial_sse:
+	movq	%mm0,(%eax)
+	movq	%mm4,8(%eax)
+	emms
+	addl	$16,%ecx
+	movl	%ebx,%edi
+	movl	%eax,%esi
+.align	4
+.long	2767451785
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L036slow_dec_loop_x86:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	leal	60(%esp),%edi
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	44(%esp),%edi
+	call	_x86_AES_decrypt_compact
+	movl	48(%esp),%edi
+	movl	40(%esp),%esi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	subl	$16,%esi
+	jc	.L039slow_dec_partial_x86
+	movl	%esi,40(%esp)
+	movl	36(%esp),%esi
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	leal	16(%esi),%esi
+	movl	%esi,36(%esp)
+	leal	60(%esp),%esi
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	32(%esp),%esi
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	jnz	.L036slow_dec_loop_x86
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L039slow_dec_partial_x86:
+	leal	60(%esp),%esi
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	movl	32(%esp),%esi
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	40(%esp),%ecx
+	movl	36(%esp),%edi
+	leal	60(%esp),%esi
+.align	4
+.long	2767451785
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	asm_AES_cbc_encrypt,.-.L_asm_AES_cbc_encrypt_begin
+.hidden	_x86_AES_set_encrypt_key
+.type	_x86_AES_set_encrypt_key,@function
+.align	16
+_x86_AES_set_encrypt_key:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	24(%esp),%esi
+	movl	32(%esp),%edi
+	testl	$-1,%esi
+	jz	.L040badpointer
+	testl	$-1,%edi
+	jz	.L040badpointer
+	call	.L041pic_point
+.L041pic_point:
+	popl	%ebp
+	leal	.LAES_Te-.L041pic_point(%ebp),%ebp
+	leal	2176(%ebp),%ebp
+	movl	-128(%ebp),%eax
+	movl	-96(%ebp),%ebx
+	movl	-64(%ebp),%ecx
+	movl	-32(%ebp),%edx
+	movl	(%ebp),%eax
+	movl	32(%ebp),%ebx
+	movl	64(%ebp),%ecx
+	movl	96(%ebp),%edx
+	movl	28(%esp),%ecx
+	cmpl	$128,%ecx
+	je	.L04210rounds
+	cmpl	$192,%ecx
+	je	.L04312rounds
+	cmpl	$256,%ecx
+	je	.L04414rounds
+	movl	$-2,%eax
+	jmp	.L045exit
+.L04210rounds:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	xorl	%ecx,%ecx
+	jmp	.L04610shortcut
+.align	4
+.L04710loop:
+	movl	(%edi),%eax
+	movl	12(%edi),%edx
+.L04610shortcut:
+	movzbl	%dl,%esi
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+	xorl	896(%ebp,%ecx,4),%eax
+	movl	%eax,16(%edi)
+	xorl	4(%edi),%eax
+	movl	%eax,20(%edi)
+	xorl	8(%edi),%eax
+	movl	%eax,24(%edi)
+	xorl	12(%edi),%eax
+	movl	%eax,28(%edi)
+	incl	%ecx
+	addl	$16,%edi
+	cmpl	$10,%ecx
+	jl	.L04710loop
+	movl	$10,80(%edi)
+	xorl	%eax,%eax
+	jmp	.L045exit
+.L04312rounds:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	16(%esi),%ecx
+	movl	20(%esi),%edx
+	movl	%ecx,16(%edi)
+	movl	%edx,20(%edi)
+	xorl	%ecx,%ecx
+	jmp	.L04812shortcut
+.align	4
+.L04912loop:
+	movl	(%edi),%eax
+	movl	20(%edi),%edx
+.L04812shortcut:
+	movzbl	%dl,%esi
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+	xorl	896(%ebp,%ecx,4),%eax
+	movl	%eax,24(%edi)
+	xorl	4(%edi),%eax
+	movl	%eax,28(%edi)
+	xorl	8(%edi),%eax
+	movl	%eax,32(%edi)
+	xorl	12(%edi),%eax
+	movl	%eax,36(%edi)
+	cmpl	$7,%ecx
+	je	.L05012break
+	incl	%ecx
+	xorl	16(%edi),%eax
+	movl	%eax,40(%edi)
+	xorl	20(%edi),%eax
+	movl	%eax,44(%edi)
+	addl	$24,%edi
+	jmp	.L04912loop
+.L05012break:
+	movl	$12,72(%edi)
+	xorl	%eax,%eax
+	jmp	.L045exit
+.L04414rounds:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	16(%esi),%eax
+	movl	20(%esi),%ebx
+	movl	24(%esi),%ecx
+	movl	28(%esi),%edx
+	movl	%eax,16(%edi)
+	movl	%ebx,20(%edi)
+	movl	%ecx,24(%edi)
+	movl	%edx,28(%edi)
+	xorl	%ecx,%ecx
+	jmp	.L05114shortcut
+.align	4
+.L05214loop:
+	movl	28(%edi),%edx
+.L05114shortcut:
+	movl	(%edi),%eax
+	movzbl	%dl,%esi
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+	xorl	896(%ebp,%ecx,4),%eax
+	movl	%eax,32(%edi)
+	xorl	4(%edi),%eax
+	movl	%eax,36(%edi)
+	xorl	8(%edi),%eax
+	movl	%eax,40(%edi)
+	xorl	12(%edi),%eax
+	movl	%eax,44(%edi)
+	cmpl	$6,%ecx
+	je	.L05314break
+	incl	%ecx
+	movl	%eax,%edx
+	movl	16(%edi),%eax
+	movzbl	%dl,%esi
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shrl	$16,%edx
+	shll	$8,%ebx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+	movl	%eax,48(%edi)
+	xorl	20(%edi),%eax
+	movl	%eax,52(%edi)
+	xorl	24(%edi),%eax
+	movl	%eax,56(%edi)
+	xorl	28(%edi),%eax
+	movl	%eax,60(%edi)
+	addl	$32,%edi
+	jmp	.L05214loop
+.L05314break:
+	movl	$14,48(%edi)
+	xorl	%eax,%eax
+	jmp	.L045exit
+.L040badpointer:
+	movl	$-1,%eax
+.L045exit:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	_x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key
+.globl	asm_AES_set_encrypt_key
+.hidden	asm_AES_set_encrypt_key
+.type	asm_AES_set_encrypt_key,@function
+.align	16
+asm_AES_set_encrypt_key:
+.L_asm_AES_set_encrypt_key_begin:
+	call	_x86_AES_set_encrypt_key
+	ret
+.size	asm_AES_set_encrypt_key,.-.L_asm_AES_set_encrypt_key_begin
+.globl	asm_AES_set_decrypt_key
+.hidden	asm_AES_set_decrypt_key
+.type	asm_AES_set_decrypt_key,@function
+.align	16
+asm_AES_set_decrypt_key:
+.L_asm_AES_set_decrypt_key_begin:
+	call	_x86_AES_set_encrypt_key
+	cmpl	$0,%eax
+	je	.L054proceed
+	ret
+.L054proceed:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	28(%esp),%esi
+	movl	240(%esi),%ecx
+	leal	(,%ecx,4),%ecx
+	leal	(%esi,%ecx,4),%edi
+.align	4
+.L055invert:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	(%edi),%ecx
+	movl	4(%edi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,(%esi)
+	movl	%edx,4(%esi)
+	movl	8(%esi),%eax
+	movl	12(%esi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	movl	%eax,8(%edi)
+	movl	%ebx,12(%edi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	addl	$16,%esi
+	subl	$16,%edi
+	cmpl	%edi,%esi
+	jne	.L055invert
+	movl	28(%esp),%edi
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,28(%esp)
+	movl	16(%edi),%eax
+.align	4
+.L056permute:
+	addl	$16,%edi
+	movl	$2155905152,%ebp
+	andl	%eax,%ebp
+	leal	(%eax,%eax,1),%ebx
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	subl	%ebp,%esi
+	andl	$4278124286,%ebx
+	andl	$454761243,%esi
+	xorl	%esi,%ebx
+	movl	$2155905152,%ebp
+	andl	%ebx,%ebp
+	leal	(%ebx,%ebx,1),%ecx
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	subl	%ebp,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	xorl	%eax,%ebx
+	xorl	%esi,%ecx
+	movl	$2155905152,%ebp
+	andl	%ecx,%ebp
+	leal	(%ecx,%ecx,1),%edx
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	xorl	%eax,%ecx
+	subl	%ebp,%esi
+	andl	$4278124286,%edx
+	andl	$454761243,%esi
+	roll	$8,%eax
+	xorl	%esi,%edx
+	movl	4(%edi),%ebp
+	xorl	%ebx,%eax
+	xorl	%edx,%ebx
+	xorl	%ecx,%eax
+	roll	$24,%ebx
+	xorl	%edx,%ecx
+	xorl	%edx,%eax
+	roll	$16,%ecx
+	xorl	%ebx,%eax
+	roll	$8,%edx
+	xorl	%ecx,%eax
+	movl	%ebp,%ebx
+	xorl	%edx,%eax
+	movl	%eax,(%edi)
+	movl	$2155905152,%ebp
+	andl	%ebx,%ebp
+	leal	(%ebx,%ebx,1),%ecx
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	subl	%ebp,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	xorl	%esi,%ecx
+	movl	$2155905152,%ebp
+	andl	%ecx,%ebp
+	leal	(%ecx,%ecx,1),%edx
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	subl	%ebp,%esi
+	andl	$4278124286,%edx
+	andl	$454761243,%esi
+	xorl	%ebx,%ecx
+	xorl	%esi,%edx
+	movl	$2155905152,%ebp
+	andl	%edx,%ebp
+	leal	(%edx,%edx,1),%eax
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	xorl	%ebx,%edx
+	subl	%ebp,%esi
+	andl	$4278124286,%eax
+	andl	$454761243,%esi
+	roll	$8,%ebx
+	xorl	%esi,%eax
+	movl	8(%edi),%ebp
+	xorl	%ecx,%ebx
+	xorl	%eax,%ecx
+	xorl	%edx,%ebx
+	roll	$24,%ecx
+	xorl	%eax,%edx
+	xorl	%eax,%ebx
+	roll	$16,%edx
+	xorl	%ecx,%ebx
+	roll	$8,%eax
+	xorl	%edx,%ebx
+	movl	%ebp,%ecx
+	xorl	%eax,%ebx
+	movl	%ebx,4(%edi)
+	movl	$2155905152,%ebp
+	andl	%ecx,%ebp
+	leal	(%ecx,%ecx,1),%edx
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	subl	%ebp,%esi
+	andl	$4278124286,%edx
+	andl	$454761243,%esi
+	xorl	%esi,%edx
+	movl	$2155905152,%ebp
+	andl	%edx,%ebp
+	leal	(%edx,%edx,1),%eax
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	subl	%ebp,%esi
+	andl	$4278124286,%eax
+	andl	$454761243,%esi
+	xorl	%ecx,%edx
+	xorl	%esi,%eax
+	movl	$2155905152,%ebp
+	andl	%eax,%ebp
+	leal	(%eax,%eax,1),%ebx
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	xorl	%ecx,%eax
+	subl	%ebp,%esi
+	andl	$4278124286,%ebx
+	andl	$454761243,%esi
+	roll	$8,%ecx
+	xorl	%esi,%ebx
+	movl	12(%edi),%ebp
+	xorl	%edx,%ecx
+	xorl	%ebx,%edx
+	xorl	%eax,%ecx
+	roll	$24,%edx
+	xorl	%ebx,%eax
+	xorl	%ebx,%ecx
+	roll	$16,%eax
+	xorl	%edx,%ecx
+	roll	$8,%ebx
+	xorl	%eax,%ecx
+	movl	%ebp,%edx
+	xorl	%ebx,%ecx
+	movl	%ecx,8(%edi)
+	movl	$2155905152,%ebp
+	andl	%edx,%ebp
+	leal	(%edx,%edx,1),%eax
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	subl	%ebp,%esi
+	andl	$4278124286,%eax
+	andl	$454761243,%esi
+	xorl	%esi,%eax
+	movl	$2155905152,%ebp
+	andl	%eax,%ebp
+	leal	(%eax,%eax,1),%ebx
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	subl	%ebp,%esi
+	andl	$4278124286,%ebx
+	andl	$454761243,%esi
+	xorl	%edx,%eax
+	xorl	%esi,%ebx
+	movl	$2155905152,%ebp
+	andl	%ebx,%ebp
+	leal	(%ebx,%ebx,1),%ecx
+	movl	%ebp,%esi
+	shrl	$7,%ebp
+	xorl	%edx,%ebx
+	subl	%ebp,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	roll	$8,%edx
+	xorl	%esi,%ecx
+	movl	16(%edi),%ebp
+	xorl	%eax,%edx
+	xorl	%ecx,%eax
+	xorl	%ebx,%edx
+	roll	$24,%eax
+	xorl	%ecx,%ebx
+	xorl	%ecx,%edx
+	roll	$16,%ebx
+	xorl	%eax,%edx
+	roll	$8,%ecx
+	xorl	%ebx,%edx
+	movl	%ebp,%eax
+	xorl	%ecx,%edx
+	movl	%edx,12(%edi)
+	cmpl	28(%esp),%edi
+	jb	.L056permute
+	xorl	%eax,%eax
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	asm_AES_set_decrypt_key,.-.L_asm_AES_set_decrypt_key_begin
+.byte	65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
+.byte	80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
+.byte	111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/aesni-x86.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/aesni-x86.S
new file mode 100644
index 0000000..cc53fa4
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/aesni-x86.S
@@ -0,0 +1,2468 @@
+#if defined(__i386__)
+.text
+.globl	aesni_encrypt
+.hidden	aesni_encrypt
+.type	aesni_encrypt,@function
+.align	16
+aesni_encrypt:
+.L_aesni_encrypt_begin:
+	movl	4(%esp),%eax
+	movl	12(%esp),%edx
+	movups	(%eax),%xmm2
+	movl	240(%edx),%ecx
+	movl	8(%esp),%eax
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L000enc1_loop_1:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L000enc1_loop_1
+.byte	102,15,56,221,209
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movups	%xmm2,(%eax)
+	pxor	%xmm2,%xmm2
+	ret
+.size	aesni_encrypt,.-.L_aesni_encrypt_begin
+.globl	aesni_decrypt
+.hidden	aesni_decrypt
+.type	aesni_decrypt,@function
+.align	16
+aesni_decrypt:
+.L_aesni_decrypt_begin:
+	movl	4(%esp),%eax
+	movl	12(%esp),%edx
+	movups	(%eax),%xmm2
+	movl	240(%edx),%ecx
+	movl	8(%esp),%eax
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L001dec1_loop_2:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L001dec1_loop_2
+.byte	102,15,56,223,209
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movups	%xmm2,(%eax)
+	pxor	%xmm2,%xmm2
+	ret
+.size	aesni_decrypt,.-.L_aesni_decrypt_begin
+.hidden	_aesni_encrypt2
+.type	_aesni_encrypt2,@function
+.align	16
+_aesni_encrypt2:
+	movups	(%edx),%xmm0
+	shll	$4,%ecx
+	movups	16(%edx),%xmm1
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	movups	32(%edx),%xmm0
+	leal	32(%edx,%ecx,1),%edx
+	negl	%ecx
+	addl	$16,%ecx
+.L002enc2_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	movups	(%edx,%ecx,1),%xmm1
+	addl	$32,%ecx
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	movups	-16(%edx,%ecx,1),%xmm0
+	jnz	.L002enc2_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+	ret
+.size	_aesni_encrypt2,.-_aesni_encrypt2
+.hidden	_aesni_decrypt2
+.type	_aesni_decrypt2,@function
+.align	16
+_aesni_decrypt2:
+	movups	(%edx),%xmm0
+	shll	$4,%ecx
+	movups	16(%edx),%xmm1
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	movups	32(%edx),%xmm0
+	leal	32(%edx,%ecx,1),%edx
+	negl	%ecx
+	addl	$16,%ecx
+.L003dec2_loop:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+	movups	(%edx,%ecx,1),%xmm1
+	addl	$32,%ecx
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+	movups	-16(%edx,%ecx,1),%xmm0
+	jnz	.L003dec2_loop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+	ret
+.size	_aesni_decrypt2,.-_aesni_decrypt2
+.hidden	_aesni_encrypt3
+.type	_aesni_encrypt3,@function
+.align	16
+_aesni_encrypt3:
+	movups	(%edx),%xmm0
+	shll	$4,%ecx
+	movups	16(%edx),%xmm1
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	movups	32(%edx),%xmm0
+	leal	32(%edx,%ecx,1),%edx
+	negl	%ecx
+	addl	$16,%ecx
+.L004enc3_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+	movups	(%edx,%ecx,1),%xmm1
+	addl	$32,%ecx
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+	movups	-16(%edx,%ecx,1),%xmm0
+	jnz	.L004enc3_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+	ret
+.size	_aesni_encrypt3,.-_aesni_encrypt3
+.hidden	_aesni_decrypt3
+.type	_aesni_decrypt3,@function
+.align	16
+_aesni_decrypt3:
+	movups	(%edx),%xmm0
+	shll	$4,%ecx
+	movups	16(%edx),%xmm1
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	movups	32(%edx),%xmm0
+	leal	32(%edx,%ecx,1),%edx
+	negl	%ecx
+	addl	$16,%ecx
+.L005dec3_loop:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+	movups	(%edx,%ecx,1),%xmm1
+	addl	$32,%ecx
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+	movups	-16(%edx,%ecx,1),%xmm0
+	jnz	.L005dec3_loop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+	ret
+.size	_aesni_decrypt3,.-_aesni_decrypt3
+.hidden	_aesni_encrypt4
+.type	_aesni_encrypt4,@function
+.align	16
+_aesni_encrypt4:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	shll	$4,%ecx
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	pxor	%xmm0,%xmm5
+	movups	32(%edx),%xmm0
+	leal	32(%edx,%ecx,1),%edx
+	negl	%ecx
+.byte	15,31,64,0
+	addl	$16,%ecx
+.L006enc4_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	(%edx,%ecx,1),%xmm1
+	addl	$32,%ecx
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movups	-16(%edx,%ecx,1),%xmm0
+	jnz	.L006enc4_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+.byte	102,15,56,221,232
+	ret
+.size	_aesni_encrypt4,.-_aesni_encrypt4
+.hidden	_aesni_decrypt4
+.type	_aesni_decrypt4,@function
+.align	16
+_aesni_decrypt4:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	shll	$4,%ecx
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	pxor	%xmm0,%xmm5
+	movups	32(%edx),%xmm0
+	leal	32(%edx,%ecx,1),%edx
+	negl	%ecx
+.byte	15,31,64,0
+	addl	$16,%ecx
+.L007dec4_loop:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movups	(%edx,%ecx,1),%xmm1
+	addl	$32,%ecx
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	movups	-16(%edx,%ecx,1),%xmm0
+	jnz	.L007dec4_loop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+.byte	102,15,56,223,232
+	ret
+.size	_aesni_decrypt4,.-_aesni_decrypt4
+.hidden	_aesni_encrypt6
+.type	_aesni_encrypt6,@function
+.align	16
+_aesni_encrypt6:
+	movups	(%edx),%xmm0
+	shll	$4,%ecx
+	movups	16(%edx),%xmm1
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+.byte	102,15,56,220,209
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+.byte	102,15,56,220,217
+	leal	32(%edx,%ecx,1),%edx
+	negl	%ecx
+.byte	102,15,56,220,225
+	pxor	%xmm0,%xmm7
+	movups	(%edx,%ecx,1),%xmm0
+	addl	$16,%ecx
+	jmp	.L008_aesni_encrypt6_inner
+.align	16
+.L009enc6_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.L008_aesni_encrypt6_inner:
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.L_aesni_encrypt6_enter:
+	movups	(%edx,%ecx,1),%xmm1
+	addl	$32,%ecx
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	-16(%edx,%ecx,1),%xmm0
+	jnz	.L009enc6_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+.byte	102,15,56,221,232
+.byte	102,15,56,221,240
+.byte	102,15,56,221,248
+	ret
+.size	_aesni_encrypt6,.-_aesni_encrypt6
+.hidden	_aesni_decrypt6
+.type	_aesni_decrypt6,@function
+.align	16
+_aesni_decrypt6:
+	movups	(%edx),%xmm0
+	shll	$4,%ecx
+	movups	16(%edx),%xmm1
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+.byte	102,15,56,222,209
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+.byte	102,15,56,222,217
+	leal	32(%edx,%ecx,1),%edx
+	negl	%ecx
+.byte	102,15,56,222,225
+	pxor	%xmm0,%xmm7
+	movups	(%edx,%ecx,1),%xmm0
+	addl	$16,%ecx
+	jmp	.L010_aesni_decrypt6_inner
+.align	16
+.L011dec6_loop:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.L010_aesni_decrypt6_inner:
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.L_aesni_decrypt6_enter:
+	movups	(%edx,%ecx,1),%xmm1
+	addl	$32,%ecx
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	-16(%edx,%ecx,1),%xmm0
+	jnz	.L011dec6_loop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+.byte	102,15,56,223,232
+.byte	102,15,56,223,240
+.byte	102,15,56,223,248
+	ret
+.size	_aesni_decrypt6,.-_aesni_decrypt6
+.globl	aesni_ecb_encrypt
+.hidden	aesni_ecb_encrypt
+.type	aesni_ecb_encrypt,@function
+.align	16
+aesni_ecb_encrypt:
+.L_aesni_ecb_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebx
+	andl	$-16,%eax
+	jz	.L012ecb_ret
+	movl	240(%edx),%ecx
+	testl	%ebx,%ebx
+	jz	.L013ecb_decrypt
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+	cmpl	$96,%eax
+	jb	.L014ecb_enc_tail
+	movdqu	(%esi),%xmm2
+	movdqu	16(%esi),%xmm3
+	movdqu	32(%esi),%xmm4
+	movdqu	48(%esi),%xmm5
+	movdqu	64(%esi),%xmm6
+	movdqu	80(%esi),%xmm7
+	leal	96(%esi),%esi
+	subl	$96,%eax
+	jmp	.L015ecb_enc_loop6_enter
+.align	16
+.L016ecb_enc_loop6:
+	movups	%xmm2,(%edi)
+	movdqu	(%esi),%xmm2
+	movups	%xmm3,16(%edi)
+	movdqu	16(%esi),%xmm3
+	movups	%xmm4,32(%edi)
+	movdqu	32(%esi),%xmm4
+	movups	%xmm5,48(%edi)
+	movdqu	48(%esi),%xmm5
+	movups	%xmm6,64(%edi)
+	movdqu	64(%esi),%xmm6
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	movdqu	80(%esi),%xmm7
+	leal	96(%esi),%esi
+.L015ecb_enc_loop6_enter:
+	call	_aesni_encrypt6
+	movl	%ebp,%edx
+	movl	%ebx,%ecx
+	subl	$96,%eax
+	jnc	.L016ecb_enc_loop6
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	addl	$96,%eax
+	jz	.L012ecb_ret
+.L014ecb_enc_tail:
+	movups	(%esi),%xmm2
+	cmpl	$32,%eax
+	jb	.L017ecb_enc_one
+	movups	16(%esi),%xmm3
+	je	.L018ecb_enc_two
+	movups	32(%esi),%xmm4
+	cmpl	$64,%eax
+	jb	.L019ecb_enc_three
+	movups	48(%esi),%xmm5
+	je	.L020ecb_enc_four
+	movups	64(%esi),%xmm6
+	xorps	%xmm7,%xmm7
+	call	_aesni_encrypt6
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	jmp	.L012ecb_ret
+.align	16
+.L017ecb_enc_one:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L021enc1_loop_3:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L021enc1_loop_3
+.byte	102,15,56,221,209
+	movups	%xmm2,(%edi)
+	jmp	.L012ecb_ret
+.align	16
+.L018ecb_enc_two:
+	call	_aesni_encrypt2
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	jmp	.L012ecb_ret
+.align	16
+.L019ecb_enc_three:
+	call	_aesni_encrypt3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	jmp	.L012ecb_ret
+.align	16
+.L020ecb_enc_four:
+	call	_aesni_encrypt4
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	jmp	.L012ecb_ret
+.align	16
+.L013ecb_decrypt:
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+	cmpl	$96,%eax
+	jb	.L022ecb_dec_tail
+	movdqu	(%esi),%xmm2
+	movdqu	16(%esi),%xmm3
+	movdqu	32(%esi),%xmm4
+	movdqu	48(%esi),%xmm5
+	movdqu	64(%esi),%xmm6
+	movdqu	80(%esi),%xmm7
+	leal	96(%esi),%esi
+	subl	$96,%eax
+	jmp	.L023ecb_dec_loop6_enter
+.align	16
+.L024ecb_dec_loop6:
+	movups	%xmm2,(%edi)
+	movdqu	(%esi),%xmm2
+	movups	%xmm3,16(%edi)
+	movdqu	16(%esi),%xmm3
+	movups	%xmm4,32(%edi)
+	movdqu	32(%esi),%xmm4
+	movups	%xmm5,48(%edi)
+	movdqu	48(%esi),%xmm5
+	movups	%xmm6,64(%edi)
+	movdqu	64(%esi),%xmm6
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	movdqu	80(%esi),%xmm7
+	leal	96(%esi),%esi
+.L023ecb_dec_loop6_enter:
+	call	_aesni_decrypt6
+	movl	%ebp,%edx
+	movl	%ebx,%ecx
+	subl	$96,%eax
+	jnc	.L024ecb_dec_loop6
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	addl	$96,%eax
+	jz	.L012ecb_ret
+.L022ecb_dec_tail:
+	movups	(%esi),%xmm2
+	cmpl	$32,%eax
+	jb	.L025ecb_dec_one
+	movups	16(%esi),%xmm3
+	je	.L026ecb_dec_two
+	movups	32(%esi),%xmm4
+	cmpl	$64,%eax
+	jb	.L027ecb_dec_three
+	movups	48(%esi),%xmm5
+	je	.L028ecb_dec_four
+	movups	64(%esi),%xmm6
+	xorps	%xmm7,%xmm7
+	call	_aesni_decrypt6
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	jmp	.L012ecb_ret
+.align	16
+.L025ecb_dec_one:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L029dec1_loop_4:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L029dec1_loop_4
+.byte	102,15,56,223,209
+	movups	%xmm2,(%edi)
+	jmp	.L012ecb_ret
+.align	16
+.L026ecb_dec_two:
+	call	_aesni_decrypt2
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	jmp	.L012ecb_ret
+.align	16
+.L027ecb_dec_three:
+	call	_aesni_decrypt3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	jmp	.L012ecb_ret
+.align	16
+.L028ecb_dec_four:
+	call	_aesni_decrypt4
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+.L012ecb_ret:
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
+.globl	aesni_ccm64_encrypt_blocks
+.hidden	aesni_ccm64_encrypt_blocks
+.type	aesni_ccm64_encrypt_blocks,@function
+.align	16
+aesni_ccm64_encrypt_blocks:
+.L_aesni_ccm64_encrypt_blocks_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebx
+	movl	40(%esp),%ecx
+	movl	%esp,%ebp
+	subl	$60,%esp
+	andl	$-16,%esp
+	movl	%ebp,48(%esp)
+	movdqu	(%ebx),%xmm7
+	movdqu	(%ecx),%xmm3
+	movl	240(%edx),%ecx
+	movl	$202182159,(%esp)
+	movl	$134810123,4(%esp)
+	movl	$67438087,8(%esp)
+	movl	$66051,12(%esp)
+	movl	$1,%ebx
+	xorl	%ebp,%ebp
+	movl	%ebx,16(%esp)
+	movl	%ebp,20(%esp)
+	movl	%ebp,24(%esp)
+	movl	%ebp,28(%esp)
+	shll	$4,%ecx
+	movl	$16,%ebx
+	leal	(%edx),%ebp
+	movdqa	(%esp),%xmm5
+	movdqa	%xmm7,%xmm2
+	leal	32(%edx,%ecx,1),%edx
+	subl	%ecx,%ebx
+.byte	102,15,56,0,253
+.L030ccm64_enc_outer:
+	movups	(%ebp),%xmm0
+	movl	%ebx,%ecx
+	movups	(%esi),%xmm6
+	xorps	%xmm0,%xmm2
+	movups	16(%ebp),%xmm1
+	xorps	%xmm6,%xmm0
+	xorps	%xmm0,%xmm3
+	movups	32(%ebp),%xmm0
+.L031ccm64_enc2_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	movups	(%edx,%ecx,1),%xmm1
+	addl	$32,%ecx
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	movups	-16(%edx,%ecx,1),%xmm0
+	jnz	.L031ccm64_enc2_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	paddq	16(%esp),%xmm7
+	decl	%eax
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+	leal	16(%esi),%esi
+	xorps	%xmm2,%xmm6
+	movdqa	%xmm7,%xmm2
+	movups	%xmm6,(%edi)
+.byte	102,15,56,0,213
+	leal	16(%edi),%edi
+	jnz	.L030ccm64_enc_outer
+	movl	48(%esp),%esp
+	movl	40(%esp),%edi
+	movups	%xmm3,(%edi)
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
+.globl	aesni_ccm64_decrypt_blocks
+.hidden	aesni_ccm64_decrypt_blocks
+.type	aesni_ccm64_decrypt_blocks,@function
+.align	16
+aesni_ccm64_decrypt_blocks:
+.L_aesni_ccm64_decrypt_blocks_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebx
+	movl	40(%esp),%ecx
+	movl	%esp,%ebp
+	subl	$60,%esp
+	andl	$-16,%esp
+	movl	%ebp,48(%esp)
+	movdqu	(%ebx),%xmm7
+	movdqu	(%ecx),%xmm3
+	movl	240(%edx),%ecx
+	movl	$202182159,(%esp)
+	movl	$134810123,4(%esp)
+	movl	$67438087,8(%esp)
+	movl	$66051,12(%esp)
+	movl	$1,%ebx
+	xorl	%ebp,%ebp
+	movl	%ebx,16(%esp)
+	movl	%ebp,20(%esp)
+	movl	%ebp,24(%esp)
+	movl	%ebp,28(%esp)
+	movdqa	(%esp),%xmm5
+	movdqa	%xmm7,%xmm2
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+.byte	102,15,56,0,253
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L032enc1_loop_5:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L032enc1_loop_5
+.byte	102,15,56,221,209
+	shll	$4,%ebx
+	movl	$16,%ecx
+	movups	(%esi),%xmm6
+	paddq	16(%esp),%xmm7
+	leal	16(%esi),%esi
+	subl	%ebx,%ecx
+	leal	32(%ebp,%ebx,1),%edx
+	movl	%ecx,%ebx
+	jmp	.L033ccm64_dec_outer
+.align	16
+.L033ccm64_dec_outer:
+	xorps	%xmm2,%xmm6
+	movdqa	%xmm7,%xmm2
+	movups	%xmm6,(%edi)
+	leal	16(%edi),%edi
+.byte	102,15,56,0,213
+	subl	$1,%eax
+	jz	.L034ccm64_dec_break
+	movups	(%ebp),%xmm0
+	movl	%ebx,%ecx
+	movups	16(%ebp),%xmm1
+	xorps	%xmm0,%xmm6
+	xorps	%xmm0,%xmm2
+	xorps	%xmm6,%xmm3
+	movups	32(%ebp),%xmm0
+.L035ccm64_dec2_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	movups	(%edx,%ecx,1),%xmm1
+	addl	$32,%ecx
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	movups	-16(%edx,%ecx,1),%xmm0
+	jnz	.L035ccm64_dec2_loop
+	movups	(%esi),%xmm6
+	paddq	16(%esp),%xmm7
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+	leal	16(%esi),%esi
+	jmp	.L033ccm64_dec_outer
+.align	16
+.L034ccm64_dec_break:
+	movl	240(%ebp),%ecx
+	movl	%ebp,%edx
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	xorps	%xmm0,%xmm6
+	leal	32(%edx),%edx
+	xorps	%xmm6,%xmm3
+.L036enc1_loop_6:
+.byte	102,15,56,220,217
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L036enc1_loop_6
+.byte	102,15,56,221,217
+	movl	48(%esp),%esp
+	movl	40(%esp),%edi
+	movups	%xmm3,(%edi)
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
+.globl	aesni_ctr32_encrypt_blocks
+.hidden	aesni_ctr32_encrypt_blocks
+.type	aesni_ctr32_encrypt_blocks,@function
+.align	16
+aesni_ctr32_encrypt_blocks:
+.L_aesni_ctr32_encrypt_blocks_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebx
+	movl	%esp,%ebp
+	subl	$88,%esp
+	andl	$-16,%esp
+	movl	%ebp,80(%esp)
+	cmpl	$1,%eax
+	je	.L037ctr32_one_shortcut
+	movdqu	(%ebx),%xmm7
+	movl	$202182159,(%esp)
+	movl	$134810123,4(%esp)
+	movl	$67438087,8(%esp)
+	movl	$66051,12(%esp)
+	movl	$6,%ecx
+	xorl	%ebp,%ebp
+	movl	%ecx,16(%esp)
+	movl	%ecx,20(%esp)
+	movl	%ecx,24(%esp)
+	movl	%ebp,28(%esp)
+.byte	102,15,58,22,251,3
+.byte	102,15,58,34,253,3
+	movl	240(%edx),%ecx
+	bswap	%ebx
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movdqa	(%esp),%xmm2
+.byte	102,15,58,34,195,0
+	leal	3(%ebx),%ebp
+.byte	102,15,58,34,205,0
+	incl	%ebx
+.byte	102,15,58,34,195,1
+	incl	%ebp
+.byte	102,15,58,34,205,1
+	incl	%ebx
+.byte	102,15,58,34,195,2
+	incl	%ebp
+.byte	102,15,58,34,205,2
+	movdqa	%xmm0,48(%esp)
+.byte	102,15,56,0,194
+	movdqu	(%edx),%xmm6
+	movdqa	%xmm1,64(%esp)
+.byte	102,15,56,0,202
+	pshufd	$192,%xmm0,%xmm2
+	pshufd	$128,%xmm0,%xmm3
+	cmpl	$6,%eax
+	jb	.L038ctr32_tail
+	pxor	%xmm6,%xmm7
+	shll	$4,%ecx
+	movl	$16,%ebx
+	movdqa	%xmm7,32(%esp)
+	movl	%edx,%ebp
+	subl	%ecx,%ebx
+	leal	32(%edx,%ecx,1),%edx
+	subl	$6,%eax
+	jmp	.L039ctr32_loop6
+.align	16
+.L039ctr32_loop6:
+	pshufd	$64,%xmm0,%xmm4
+	movdqa	32(%esp),%xmm0
+	pshufd	$192,%xmm1,%xmm5
+	pxor	%xmm0,%xmm2
+	pshufd	$128,%xmm1,%xmm6
+	pxor	%xmm0,%xmm3
+	pshufd	$64,%xmm1,%xmm7
+	movups	16(%ebp),%xmm1
+	pxor	%xmm0,%xmm4
+	pxor	%xmm0,%xmm5
+.byte	102,15,56,220,209
+	pxor	%xmm0,%xmm6
+	pxor	%xmm0,%xmm7
+.byte	102,15,56,220,217
+	movups	32(%ebp),%xmm0
+	movl	%ebx,%ecx
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	call	.L_aesni_encrypt6_enter
+	movups	(%esi),%xmm1
+	movups	16(%esi),%xmm0
+	xorps	%xmm1,%xmm2
+	movups	32(%esi),%xmm1
+	xorps	%xmm0,%xmm3
+	movups	%xmm2,(%edi)
+	movdqa	16(%esp),%xmm0
+	xorps	%xmm1,%xmm4
+	movdqa	64(%esp),%xmm1
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	paddd	%xmm0,%xmm1
+	paddd	48(%esp),%xmm0
+	movdqa	(%esp),%xmm2
+	movups	48(%esi),%xmm3
+	movups	64(%esi),%xmm4
+	xorps	%xmm3,%xmm5
+	movups	80(%esi),%xmm3
+	leal	96(%esi),%esi
+	movdqa	%xmm0,48(%esp)
+.byte	102,15,56,0,194
+	xorps	%xmm4,%xmm6
+	movups	%xmm5,48(%edi)
+	xorps	%xmm3,%xmm7
+	movdqa	%xmm1,64(%esp)
+.byte	102,15,56,0,202
+	movups	%xmm6,64(%edi)
+	pshufd	$192,%xmm0,%xmm2
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	pshufd	$128,%xmm0,%xmm3
+	subl	$6,%eax
+	jnc	.L039ctr32_loop6
+	addl	$6,%eax
+	jz	.L040ctr32_ret
+	movdqu	(%ebp),%xmm7
+	movl	%ebp,%edx
+	pxor	32(%esp),%xmm7
+	movl	240(%ebp),%ecx
+.L038ctr32_tail:
+	por	%xmm7,%xmm2
+	cmpl	$2,%eax
+	jb	.L041ctr32_one
+	pshufd	$64,%xmm0,%xmm4
+	por	%xmm7,%xmm3
+	je	.L042ctr32_two
+	pshufd	$192,%xmm1,%xmm5
+	por	%xmm7,%xmm4
+	cmpl	$4,%eax
+	jb	.L043ctr32_three
+	pshufd	$128,%xmm1,%xmm6
+	por	%xmm7,%xmm5
+	je	.L044ctr32_four
+	por	%xmm7,%xmm6
+	call	_aesni_encrypt6
+	movups	(%esi),%xmm1
+	movups	16(%esi),%xmm0
+	xorps	%xmm1,%xmm2
+	movups	32(%esi),%xmm1
+	xorps	%xmm0,%xmm3
+	movups	48(%esi),%xmm0
+	xorps	%xmm1,%xmm4
+	movups	64(%esi),%xmm1
+	xorps	%xmm0,%xmm5
+	movups	%xmm2,(%edi)
+	xorps	%xmm1,%xmm6
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	jmp	.L040ctr32_ret
+.align	16
+.L037ctr32_one_shortcut:
+	movups	(%ebx),%xmm2
+	movl	240(%edx),%ecx
+.L041ctr32_one:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L045enc1_loop_7:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L045enc1_loop_7
+.byte	102,15,56,221,209
+	movups	(%esi),%xmm6
+	xorps	%xmm2,%xmm6
+	movups	%xmm6,(%edi)
+	jmp	.L040ctr32_ret
+.align	16
+.L042ctr32_two:
+	call	_aesni_encrypt2
+	movups	(%esi),%xmm5
+	movups	16(%esi),%xmm6
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	jmp	.L040ctr32_ret
+.align	16
+.L043ctr32_three:
+	call	_aesni_encrypt3
+	movups	(%esi),%xmm5
+	movups	16(%esi),%xmm6
+	xorps	%xmm5,%xmm2
+	movups	32(%esi),%xmm7
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	xorps	%xmm7,%xmm4
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	jmp	.L040ctr32_ret
+.align	16
+.L044ctr32_four:
+	call	_aesni_encrypt4
+	movups	(%esi),%xmm6
+	movups	16(%esi),%xmm7
+	movups	32(%esi),%xmm1
+	xorps	%xmm6,%xmm2
+	movups	48(%esi),%xmm0
+	xorps	%xmm7,%xmm3
+	movups	%xmm2,(%edi)
+	xorps	%xmm1,%xmm4
+	movups	%xmm3,16(%edi)
+	xorps	%xmm0,%xmm5
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+.L040ctr32_ret:
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	movdqa	%xmm0,32(%esp)
+	pxor	%xmm5,%xmm5
+	movdqa	%xmm0,48(%esp)
+	pxor	%xmm6,%xmm6
+	movdqa	%xmm0,64(%esp)
+	pxor	%xmm7,%xmm7
+	movl	80(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
+.globl	aesni_xts_encrypt
+.hidden	aesni_xts_encrypt
+.type	aesni_xts_encrypt,@function
+.align	16
+aesni_xts_encrypt:
+.L_aesni_xts_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	36(%esp),%edx
+	movl	40(%esp),%esi
+	movl	240(%edx),%ecx
+	movups	(%esi),%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L046enc1_loop_8:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L046enc1_loop_8
+.byte	102,15,56,221,209
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	%esp,%ebp
+	subl	$120,%esp
+	movl	240(%edx),%ecx
+	andl	$-16,%esp
+	movl	$135,96(%esp)
+	movl	$0,100(%esp)
+	movl	$1,104(%esp)
+	movl	$0,108(%esp)
+	movl	%eax,112(%esp)
+	movl	%ebp,116(%esp)
+	movdqa	%xmm2,%xmm1
+	pxor	%xmm0,%xmm0
+	movdqa	96(%esp),%xmm3
+	pcmpgtd	%xmm1,%xmm0
+	andl	$-16,%eax
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+	subl	$96,%eax
+	jc	.L047xts_enc_short
+	shll	$4,%ecx
+	movl	$16,%ebx
+	subl	%ecx,%ebx
+	leal	32(%edx,%ecx,1),%edx
+	jmp	.L048xts_enc_loop6
+.align	16
+.L048xts_enc_loop6:
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,16(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,32(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,48(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm7
+	movdqa	%xmm1,64(%esp)
+	paddq	%xmm1,%xmm1
+	movups	(%ebp),%xmm0
+	pand	%xmm3,%xmm7
+	movups	(%esi),%xmm2
+	pxor	%xmm1,%xmm7
+	movl	%ebx,%ecx
+	movdqu	16(%esi),%xmm3
+	xorps	%xmm0,%xmm2
+	movdqu	32(%esi),%xmm4
+	pxor	%xmm0,%xmm3
+	movdqu	48(%esi),%xmm5
+	pxor	%xmm0,%xmm4
+	movdqu	64(%esi),%xmm6
+	pxor	%xmm0,%xmm5
+	movdqu	80(%esi),%xmm1
+	pxor	%xmm0,%xmm6
+	leal	96(%esi),%esi
+	pxor	(%esp),%xmm2
+	movdqa	%xmm7,80(%esp)
+	pxor	%xmm1,%xmm7
+	movups	16(%ebp),%xmm1
+	pxor	16(%esp),%xmm3
+	pxor	32(%esp),%xmm4
+.byte	102,15,56,220,209
+	pxor	48(%esp),%xmm5
+	pxor	64(%esp),%xmm6
+.byte	102,15,56,220,217
+	pxor	%xmm0,%xmm7
+	movups	32(%ebp),%xmm0
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	call	.L_aesni_encrypt6_enter
+	movdqa	80(%esp),%xmm1
+	pxor	%xmm0,%xmm0
+	xorps	(%esp),%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	xorps	16(%esp),%xmm3
+	movups	%xmm2,(%edi)
+	xorps	32(%esp),%xmm4
+	movups	%xmm3,16(%edi)
+	xorps	48(%esp),%xmm5
+	movups	%xmm4,32(%edi)
+	xorps	64(%esp),%xmm6
+	movups	%xmm5,48(%edi)
+	xorps	%xmm1,%xmm7
+	movups	%xmm6,64(%edi)
+	pshufd	$19,%xmm0,%xmm2
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	movdqa	96(%esp),%xmm3
+	pxor	%xmm0,%xmm0
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	subl	$96,%eax
+	jnc	.L048xts_enc_loop6
+	movl	240(%ebp),%ecx
+	movl	%ebp,%edx
+	movl	%ecx,%ebx
+.L047xts_enc_short:
+	addl	$96,%eax
+	jz	.L049xts_enc_done6x
+	movdqa	%xmm1,%xmm5
+	cmpl	$32,%eax
+	jb	.L050xts_enc_one
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	je	.L051xts_enc_two
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,%xmm6
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	cmpl	$64,%eax
+	jb	.L052xts_enc_three
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,%xmm7
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm5,(%esp)
+	movdqa	%xmm6,16(%esp)
+	je	.L053xts_enc_four
+	movdqa	%xmm7,32(%esp)
+	pshufd	$19,%xmm0,%xmm7
+	movdqa	%xmm1,48(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm7
+	pxor	%xmm1,%xmm7
+	movdqu	(%esi),%xmm2
+	movdqu	16(%esi),%xmm3
+	movdqu	32(%esi),%xmm4
+	pxor	(%esp),%xmm2
+	movdqu	48(%esi),%xmm5
+	pxor	16(%esp),%xmm3
+	movdqu	64(%esi),%xmm6
+	pxor	32(%esp),%xmm4
+	leal	80(%esi),%esi
+	pxor	48(%esp),%xmm5
+	movdqa	%xmm7,64(%esp)
+	pxor	%xmm7,%xmm6
+	call	_aesni_encrypt6
+	movaps	64(%esp),%xmm1
+	xorps	(%esp),%xmm2
+	xorps	16(%esp),%xmm3
+	xorps	32(%esp),%xmm4
+	movups	%xmm2,(%edi)
+	xorps	48(%esp),%xmm5
+	movups	%xmm3,16(%edi)
+	xorps	%xmm1,%xmm6
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	leal	80(%edi),%edi
+	jmp	.L054xts_enc_done
+.align	16
+.L050xts_enc_one:
+	movups	(%esi),%xmm2
+	leal	16(%esi),%esi
+	xorps	%xmm5,%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L055enc1_loop_9:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L055enc1_loop_9
+.byte	102,15,56,221,209
+	xorps	%xmm5,%xmm2
+	movups	%xmm2,(%edi)
+	leal	16(%edi),%edi
+	movdqa	%xmm5,%xmm1
+	jmp	.L054xts_enc_done
+.align	16
+.L051xts_enc_two:
+	movaps	%xmm1,%xmm6
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	leal	32(%esi),%esi
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	call	_aesni_encrypt2
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	leal	32(%edi),%edi
+	movdqa	%xmm6,%xmm1
+	jmp	.L054xts_enc_done
+.align	16
+.L052xts_enc_three:
+	movaps	%xmm1,%xmm7
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	movups	32(%esi),%xmm4
+	leal	48(%esi),%esi
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm7,%xmm4
+	call	_aesni_encrypt3
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm7,%xmm4
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	leal	48(%edi),%edi
+	movdqa	%xmm7,%xmm1
+	jmp	.L054xts_enc_done
+.align	16
+.L053xts_enc_four:
+	movaps	%xmm1,%xmm6
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	movups	32(%esi),%xmm4
+	xorps	(%esp),%xmm2
+	movups	48(%esi),%xmm5
+	leal	64(%esi),%esi
+	xorps	16(%esp),%xmm3
+	xorps	%xmm7,%xmm4
+	xorps	%xmm6,%xmm5
+	call	_aesni_encrypt4
+	xorps	(%esp),%xmm2
+	xorps	16(%esp),%xmm3
+	xorps	%xmm7,%xmm4
+	movups	%xmm2,(%edi)
+	xorps	%xmm6,%xmm5
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	leal	64(%edi),%edi
+	movdqa	%xmm6,%xmm1
+	jmp	.L054xts_enc_done
+.align	16
+.L049xts_enc_done6x:
+	movl	112(%esp),%eax
+	andl	$15,%eax
+	jz	.L056xts_enc_ret
+	movdqa	%xmm1,%xmm5
+	movl	%eax,112(%esp)
+	jmp	.L057xts_enc_steal
+.align	16
+.L054xts_enc_done:
+	movl	112(%esp),%eax
+	pxor	%xmm0,%xmm0
+	andl	$15,%eax
+	jz	.L056xts_enc_ret
+	pcmpgtd	%xmm1,%xmm0
+	movl	%eax,112(%esp)
+	pshufd	$19,%xmm0,%xmm5
+	paddq	%xmm1,%xmm1
+	pand	96(%esp),%xmm5
+	pxor	%xmm1,%xmm5
+.L057xts_enc_steal:
+	movzbl	(%esi),%ecx
+	movzbl	-16(%edi),%edx
+	leal	1(%esi),%esi
+	movb	%cl,-16(%edi)
+	movb	%dl,(%edi)
+	leal	1(%edi),%edi
+	subl	$1,%eax
+	jnz	.L057xts_enc_steal
+	subl	112(%esp),%edi
+	movl	%ebp,%edx
+	movl	%ebx,%ecx
+	movups	-16(%edi),%xmm2
+	xorps	%xmm5,%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L058enc1_loop_10:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L058enc1_loop_10
+.byte	102,15,56,221,209
+	xorps	%xmm5,%xmm2
+	movups	%xmm2,-16(%edi)
+.L056xts_enc_ret:
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	movdqa	%xmm0,(%esp)
+	pxor	%xmm3,%xmm3
+	movdqa	%xmm0,16(%esp)
+	pxor	%xmm4,%xmm4
+	movdqa	%xmm0,32(%esp)
+	pxor	%xmm5,%xmm5
+	movdqa	%xmm0,48(%esp)
+	pxor	%xmm6,%xmm6
+	movdqa	%xmm0,64(%esp)
+	pxor	%xmm7,%xmm7
+	movdqa	%xmm0,80(%esp)
+	movl	116(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
+.globl	aesni_xts_decrypt
+.hidden	aesni_xts_decrypt
+.type	aesni_xts_decrypt,@function
+.align	16
+aesni_xts_decrypt:
+.L_aesni_xts_decrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	36(%esp),%edx
+	movl	40(%esp),%esi
+	movl	240(%edx),%ecx
+	movups	(%esi),%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L059enc1_loop_11:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L059enc1_loop_11
+.byte	102,15,56,221,209
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	%esp,%ebp
+	subl	$120,%esp
+	andl	$-16,%esp
+	xorl	%ebx,%ebx
+	testl	$15,%eax
+	setnz	%bl
+	shll	$4,%ebx
+	subl	%ebx,%eax
+	movl	$135,96(%esp)
+	movl	$0,100(%esp)
+	movl	$1,104(%esp)
+	movl	$0,108(%esp)
+	movl	%eax,112(%esp)
+	movl	%ebp,116(%esp)
+	movl	240(%edx),%ecx
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+	movdqa	%xmm2,%xmm1
+	pxor	%xmm0,%xmm0
+	movdqa	96(%esp),%xmm3
+	pcmpgtd	%xmm1,%xmm0
+	andl	$-16,%eax
+	subl	$96,%eax
+	jc	.L060xts_dec_short
+	shll	$4,%ecx
+	movl	$16,%ebx
+	subl	%ecx,%ebx
+	leal	32(%edx,%ecx,1),%edx
+	jmp	.L061xts_dec_loop6
+.align	16
+.L061xts_dec_loop6:
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,16(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,32(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,48(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm7
+	movdqa	%xmm1,64(%esp)
+	paddq	%xmm1,%xmm1
+	movups	(%ebp),%xmm0
+	pand	%xmm3,%xmm7
+	movups	(%esi),%xmm2
+	pxor	%xmm1,%xmm7
+	movl	%ebx,%ecx
+	movdqu	16(%esi),%xmm3
+	xorps	%xmm0,%xmm2
+	movdqu	32(%esi),%xmm4
+	pxor	%xmm0,%xmm3
+	movdqu	48(%esi),%xmm5
+	pxor	%xmm0,%xmm4
+	movdqu	64(%esi),%xmm6
+	pxor	%xmm0,%xmm5
+	movdqu	80(%esi),%xmm1
+	pxor	%xmm0,%xmm6
+	leal	96(%esi),%esi
+	pxor	(%esp),%xmm2
+	movdqa	%xmm7,80(%esp)
+	pxor	%xmm1,%xmm7
+	movups	16(%ebp),%xmm1
+	pxor	16(%esp),%xmm3
+	pxor	32(%esp),%xmm4
+.byte	102,15,56,222,209
+	pxor	48(%esp),%xmm5
+	pxor	64(%esp),%xmm6
+.byte	102,15,56,222,217
+	pxor	%xmm0,%xmm7
+	movups	32(%ebp),%xmm0
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	call	.L_aesni_decrypt6_enter
+	movdqa	80(%esp),%xmm1
+	pxor	%xmm0,%xmm0
+	xorps	(%esp),%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	xorps	16(%esp),%xmm3
+	movups	%xmm2,(%edi)
+	xorps	32(%esp),%xmm4
+	movups	%xmm3,16(%edi)
+	xorps	48(%esp),%xmm5
+	movups	%xmm4,32(%edi)
+	xorps	64(%esp),%xmm6
+	movups	%xmm5,48(%edi)
+	xorps	%xmm1,%xmm7
+	movups	%xmm6,64(%edi)
+	pshufd	$19,%xmm0,%xmm2
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	movdqa	96(%esp),%xmm3
+	pxor	%xmm0,%xmm0
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	subl	$96,%eax
+	jnc	.L061xts_dec_loop6
+	movl	240(%ebp),%ecx
+	movl	%ebp,%edx
+	movl	%ecx,%ebx
+.L060xts_dec_short:
+	addl	$96,%eax
+	jz	.L062xts_dec_done6x
+	movdqa	%xmm1,%xmm5
+	cmpl	$32,%eax
+	jb	.L063xts_dec_one
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	je	.L064xts_dec_two
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,%xmm6
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	cmpl	$64,%eax
+	jb	.L065xts_dec_three
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,%xmm7
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm5,(%esp)
+	movdqa	%xmm6,16(%esp)
+	je	.L066xts_dec_four
+	movdqa	%xmm7,32(%esp)
+	pshufd	$19,%xmm0,%xmm7
+	movdqa	%xmm1,48(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm7
+	pxor	%xmm1,%xmm7
+	movdqu	(%esi),%xmm2
+	movdqu	16(%esi),%xmm3
+	movdqu	32(%esi),%xmm4
+	pxor	(%esp),%xmm2
+	movdqu	48(%esi),%xmm5
+	pxor	16(%esp),%xmm3
+	movdqu	64(%esi),%xmm6
+	pxor	32(%esp),%xmm4
+	leal	80(%esi),%esi
+	pxor	48(%esp),%xmm5
+	movdqa	%xmm7,64(%esp)
+	pxor	%xmm7,%xmm6
+	call	_aesni_decrypt6
+	movaps	64(%esp),%xmm1
+	xorps	(%esp),%xmm2
+	xorps	16(%esp),%xmm3
+	xorps	32(%esp),%xmm4
+	movups	%xmm2,(%edi)
+	xorps	48(%esp),%xmm5
+	movups	%xmm3,16(%edi)
+	xorps	%xmm1,%xmm6
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	leal	80(%edi),%edi
+	jmp	.L067xts_dec_done
+.align	16
+.L063xts_dec_one:
+	movups	(%esi),%xmm2
+	leal	16(%esi),%esi
+	xorps	%xmm5,%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L068dec1_loop_12:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L068dec1_loop_12
+.byte	102,15,56,223,209
+	xorps	%xmm5,%xmm2
+	movups	%xmm2,(%edi)
+	leal	16(%edi),%edi
+	movdqa	%xmm5,%xmm1
+	jmp	.L067xts_dec_done
+.align	16
+.L064xts_dec_two:
+	movaps	%xmm1,%xmm6
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	leal	32(%esi),%esi
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	call	_aesni_decrypt2
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	leal	32(%edi),%edi
+	movdqa	%xmm6,%xmm1
+	jmp	.L067xts_dec_done
+.align	16
+.L065xts_dec_three:
+	movaps	%xmm1,%xmm7
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	movups	32(%esi),%xmm4
+	leal	48(%esi),%esi
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm7,%xmm4
+	call	_aesni_decrypt3
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm7,%xmm4
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	leal	48(%edi),%edi
+	movdqa	%xmm7,%xmm1
+	jmp	.L067xts_dec_done
+.align	16
+.L066xts_dec_four:
+	movaps	%xmm1,%xmm6
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	movups	32(%esi),%xmm4
+	xorps	(%esp),%xmm2
+	movups	48(%esi),%xmm5
+	leal	64(%esi),%esi
+	xorps	16(%esp),%xmm3
+	xorps	%xmm7,%xmm4
+	xorps	%xmm6,%xmm5
+	call	_aesni_decrypt4
+	xorps	(%esp),%xmm2
+	xorps	16(%esp),%xmm3
+	xorps	%xmm7,%xmm4
+	movups	%xmm2,(%edi)
+	xorps	%xmm6,%xmm5
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	leal	64(%edi),%edi
+	movdqa	%xmm6,%xmm1
+	jmp	.L067xts_dec_done
+.align	16
+.L062xts_dec_done6x:
+	movl	112(%esp),%eax
+	andl	$15,%eax
+	jz	.L069xts_dec_ret
+	movl	%eax,112(%esp)
+	jmp	.L070xts_dec_only_one_more
+.align	16
+.L067xts_dec_done:
+	movl	112(%esp),%eax
+	pxor	%xmm0,%xmm0
+	andl	$15,%eax
+	jz	.L069xts_dec_ret
+	pcmpgtd	%xmm1,%xmm0
+	movl	%eax,112(%esp)
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	96(%esp),%xmm3
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+.L070xts_dec_only_one_more:
+	pshufd	$19,%xmm0,%xmm5
+	movdqa	%xmm1,%xmm6
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm5
+	pxor	%xmm1,%xmm5
+	movl	%ebp,%edx
+	movl	%ebx,%ecx
+	movups	(%esi),%xmm2
+	xorps	%xmm5,%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L071dec1_loop_13:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L071dec1_loop_13
+.byte	102,15,56,223,209
+	xorps	%xmm5,%xmm2
+	movups	%xmm2,(%edi)
+.L072xts_dec_steal:
+	movzbl	16(%esi),%ecx
+	movzbl	(%edi),%edx
+	leal	1(%esi),%esi
+	movb	%cl,(%edi)
+	movb	%dl,16(%edi)
+	leal	1(%edi),%edi
+	subl	$1,%eax
+	jnz	.L072xts_dec_steal
+	subl	112(%esp),%edi
+	movl	%ebp,%edx
+	movl	%ebx,%ecx
+	movups	(%edi),%xmm2
+	xorps	%xmm6,%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L073dec1_loop_14:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L073dec1_loop_14
+.byte	102,15,56,223,209
+	xorps	%xmm6,%xmm2
+	movups	%xmm2,(%edi)
+.L069xts_dec_ret:
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	movdqa	%xmm0,(%esp)
+	pxor	%xmm3,%xmm3
+	movdqa	%xmm0,16(%esp)
+	pxor	%xmm4,%xmm4
+	movdqa	%xmm0,32(%esp)
+	pxor	%xmm5,%xmm5
+	movdqa	%xmm0,48(%esp)
+	pxor	%xmm6,%xmm6
+	movdqa	%xmm0,64(%esp)
+	pxor	%xmm7,%xmm7
+	movdqa	%xmm0,80(%esp)
+	movl	116(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
+.globl	aesni_cbc_encrypt
+.hidden	aesni_cbc_encrypt
+.type	aesni_cbc_encrypt,@function
+.align	16
+aesni_cbc_encrypt:
+.L_aesni_cbc_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	%esp,%ebx
+	movl	24(%esp),%edi
+	subl	$24,%ebx
+	movl	28(%esp),%eax
+	andl	$-16,%ebx
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebp
+	testl	%eax,%eax
+	jz	.L074cbc_abort
+	cmpl	$0,40(%esp)
+	xchgl	%esp,%ebx
+	movups	(%ebp),%xmm7
+	movl	240(%edx),%ecx
+	movl	%edx,%ebp
+	movl	%ebx,16(%esp)
+	movl	%ecx,%ebx
+	je	.L075cbc_decrypt
+	movaps	%xmm7,%xmm2
+	cmpl	$16,%eax
+	jb	.L076cbc_enc_tail
+	subl	$16,%eax
+	jmp	.L077cbc_enc_loop
+.align	16
+.L077cbc_enc_loop:
+	movups	(%esi),%xmm7
+	leal	16(%esi),%esi
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	xorps	%xmm0,%xmm7
+	leal	32(%edx),%edx
+	xorps	%xmm7,%xmm2
+.L078enc1_loop_15:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L078enc1_loop_15
+.byte	102,15,56,221,209
+	movl	%ebx,%ecx
+	movl	%ebp,%edx
+	movups	%xmm2,(%edi)
+	leal	16(%edi),%edi
+	subl	$16,%eax
+	jnc	.L077cbc_enc_loop
+	addl	$16,%eax
+	jnz	.L076cbc_enc_tail
+	movaps	%xmm2,%xmm7
+	pxor	%xmm2,%xmm2
+	jmp	.L079cbc_ret
+.L076cbc_enc_tail:
+	movl	%eax,%ecx
+.long	2767451785
+	movl	$16,%ecx
+	subl	%eax,%ecx
+	xorl	%eax,%eax
+.long	2868115081
+	leal	-16(%edi),%edi
+	movl	%ebx,%ecx
+	movl	%edi,%esi
+	movl	%ebp,%edx
+	jmp	.L077cbc_enc_loop
+.align	16
+.L075cbc_decrypt:
+	cmpl	$80,%eax
+	jbe	.L080cbc_dec_tail
+	movaps	%xmm7,(%esp)
+	subl	$80,%eax
+	jmp	.L081cbc_dec_loop6_enter
+.align	16
+.L082cbc_dec_loop6:
+	movaps	%xmm0,(%esp)
+	movups	%xmm7,(%edi)
+	leal	16(%edi),%edi
+.L081cbc_dec_loop6_enter:
+	movdqu	(%esi),%xmm2
+	movdqu	16(%esi),%xmm3
+	movdqu	32(%esi),%xmm4
+	movdqu	48(%esi),%xmm5
+	movdqu	64(%esi),%xmm6
+	movdqu	80(%esi),%xmm7
+	call	_aesni_decrypt6
+	movups	(%esi),%xmm1
+	movups	16(%esi),%xmm0
+	xorps	(%esp),%xmm2
+	xorps	%xmm1,%xmm3
+	movups	32(%esi),%xmm1
+	xorps	%xmm0,%xmm4
+	movups	48(%esi),%xmm0
+	xorps	%xmm1,%xmm5
+	movups	64(%esi),%xmm1
+	xorps	%xmm0,%xmm6
+	movups	80(%esi),%xmm0
+	xorps	%xmm1,%xmm7
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	leal	96(%esi),%esi
+	movups	%xmm4,32(%edi)
+	movl	%ebx,%ecx
+	movups	%xmm5,48(%edi)
+	movl	%ebp,%edx
+	movups	%xmm6,64(%edi)
+	leal	80(%edi),%edi
+	subl	$96,%eax
+	ja	.L082cbc_dec_loop6
+	movaps	%xmm7,%xmm2
+	movaps	%xmm0,%xmm7
+	addl	$80,%eax
+	jle	.L083cbc_dec_clear_tail_collected
+	movups	%xmm2,(%edi)
+	leal	16(%edi),%edi
+.L080cbc_dec_tail:
+	movups	(%esi),%xmm2
+	movaps	%xmm2,%xmm6
+	cmpl	$16,%eax
+	jbe	.L084cbc_dec_one
+	movups	16(%esi),%xmm3
+	movaps	%xmm3,%xmm5
+	cmpl	$32,%eax
+	jbe	.L085cbc_dec_two
+	movups	32(%esi),%xmm4
+	cmpl	$48,%eax
+	jbe	.L086cbc_dec_three
+	movups	48(%esi),%xmm5
+	cmpl	$64,%eax
+	jbe	.L087cbc_dec_four
+	movups	64(%esi),%xmm6
+	movaps	%xmm7,(%esp)
+	movups	(%esi),%xmm2
+	xorps	%xmm7,%xmm7
+	call	_aesni_decrypt6
+	movups	(%esi),%xmm1
+	movups	16(%esi),%xmm0
+	xorps	(%esp),%xmm2
+	xorps	%xmm1,%xmm3
+	movups	32(%esi),%xmm1
+	xorps	%xmm0,%xmm4
+	movups	48(%esi),%xmm0
+	xorps	%xmm1,%xmm5
+	movups	64(%esi),%xmm7
+	xorps	%xmm0,%xmm6
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%edi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%edi)
+	pxor	%xmm5,%xmm5
+	leal	64(%edi),%edi
+	movaps	%xmm6,%xmm2
+	pxor	%xmm6,%xmm6
+	subl	$80,%eax
+	jmp	.L088cbc_dec_tail_collected
+.align	16
+.L084cbc_dec_one:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L089dec1_loop_16:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L089dec1_loop_16
+.byte	102,15,56,223,209
+	xorps	%xmm7,%xmm2
+	movaps	%xmm6,%xmm7
+	subl	$16,%eax
+	jmp	.L088cbc_dec_tail_collected
+.align	16
+.L085cbc_dec_two:
+	call	_aesni_decrypt2
+	xorps	%xmm7,%xmm2
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	movaps	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	leal	16(%edi),%edi
+	movaps	%xmm5,%xmm7
+	subl	$32,%eax
+	jmp	.L088cbc_dec_tail_collected
+.align	16
+.L086cbc_dec_three:
+	call	_aesni_decrypt3
+	xorps	%xmm7,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm5,%xmm4
+	movups	%xmm2,(%edi)
+	movaps	%xmm4,%xmm2
+	pxor	%xmm4,%xmm4
+	movups	%xmm3,16(%edi)
+	pxor	%xmm3,%xmm3
+	leal	32(%edi),%edi
+	movups	32(%esi),%xmm7
+	subl	$48,%eax
+	jmp	.L088cbc_dec_tail_collected
+.align	16
+.L087cbc_dec_four:
+	call	_aesni_decrypt4
+	movups	16(%esi),%xmm1
+	movups	32(%esi),%xmm0
+	xorps	%xmm7,%xmm2
+	movups	48(%esi),%xmm7
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	xorps	%xmm1,%xmm4
+	movups	%xmm3,16(%edi)
+	pxor	%xmm3,%xmm3
+	xorps	%xmm0,%xmm5
+	movups	%xmm4,32(%edi)
+	pxor	%xmm4,%xmm4
+	leal	48(%edi),%edi
+	movaps	%xmm5,%xmm2
+	pxor	%xmm5,%xmm5
+	subl	$64,%eax
+	jmp	.L088cbc_dec_tail_collected
+.align	16
+.L083cbc_dec_clear_tail_collected:
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+.L088cbc_dec_tail_collected:
+	andl	$15,%eax
+	jnz	.L090cbc_dec_tail_partial
+	movups	%xmm2,(%edi)
+	pxor	%xmm0,%xmm0
+	jmp	.L079cbc_ret
+.align	16
+.L090cbc_dec_tail_partial:
+	movaps	%xmm2,(%esp)
+	pxor	%xmm0,%xmm0
+	movl	$16,%ecx
+	movl	%esp,%esi
+	subl	%eax,%ecx
+.long	2767451785
+	movdqa	%xmm2,(%esp)
+.L079cbc_ret:
+	movl	16(%esp),%esp
+	movl	36(%esp),%ebp
+	pxor	%xmm2,%xmm2
+	pxor	%xmm1,%xmm1
+	movups	%xmm7,(%ebp)
+	pxor	%xmm7,%xmm7
+.L074cbc_abort:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
+.hidden	_aesni_set_encrypt_key
+.type	_aesni_set_encrypt_key,@function
+.align	16
+_aesni_set_encrypt_key:
+	pushl	%ebp
+	pushl	%ebx
+	testl	%eax,%eax
+	jz	.L091bad_pointer
+	testl	%edx,%edx
+	jz	.L091bad_pointer
+	call	.L092pic
+.L092pic:
+	popl	%ebx
+	leal	.Lkey_const-.L092pic(%ebx),%ebx
+	leal	OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp
+	movups	(%eax),%xmm0
+	xorps	%xmm4,%xmm4
+	movl	4(%ebp),%ebp
+	leal	16(%edx),%edx
+	andl	$268437504,%ebp
+	cmpl	$256,%ecx
+	je	.L09314rounds
+	cmpl	$192,%ecx
+	je	.L09412rounds
+	cmpl	$128,%ecx
+	jne	.L095bad_keybits
+.align	16
+.L09610rounds:
+	cmpl	$268435456,%ebp
+	je	.L09710rounds_alt
+	movl	$9,%ecx
+	movups	%xmm0,-16(%edx)
+.byte	102,15,58,223,200,1
+	call	.L098key_128_cold
+.byte	102,15,58,223,200,2
+	call	.L099key_128
+.byte	102,15,58,223,200,4
+	call	.L099key_128
+.byte	102,15,58,223,200,8
+	call	.L099key_128
+.byte	102,15,58,223,200,16
+	call	.L099key_128
+.byte	102,15,58,223,200,32
+	call	.L099key_128
+.byte	102,15,58,223,200,64
+	call	.L099key_128
+.byte	102,15,58,223,200,128
+	call	.L099key_128
+.byte	102,15,58,223,200,27
+	call	.L099key_128
+.byte	102,15,58,223,200,54
+	call	.L099key_128
+	movups	%xmm0,(%edx)
+	movl	%ecx,80(%edx)
+	jmp	.L100good_key
+.align	16
+.L099key_128:
+	movups	%xmm0,(%edx)
+	leal	16(%edx),%edx
+.L098key_128_cold:
+	shufps	$16,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$255,%xmm1,%xmm1
+	xorps	%xmm1,%xmm0
+	ret
+.align	16
+.L09710rounds_alt:
+	movdqa	(%ebx),%xmm5
+	movl	$8,%ecx
+	movdqa	32(%ebx),%xmm4
+	movdqa	%xmm0,%xmm2
+	movdqu	%xmm0,-16(%edx)
+.L101loop_key128:
+.byte	102,15,56,0,197
+.byte	102,15,56,221,196
+	pslld	$1,%xmm4
+	leal	16(%edx),%edx
+	movdqa	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,-16(%edx)
+	movdqa	%xmm0,%xmm2
+	decl	%ecx
+	jnz	.L101loop_key128
+	movdqa	48(%ebx),%xmm4
+.byte	102,15,56,0,197
+.byte	102,15,56,221,196
+	pslld	$1,%xmm4
+	movdqa	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,(%edx)
+	movdqa	%xmm0,%xmm2
+.byte	102,15,56,0,197
+.byte	102,15,56,221,196
+	movdqa	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,16(%edx)
+	movl	$9,%ecx
+	movl	%ecx,96(%edx)
+	jmp	.L100good_key
+.align	16
+.L09412rounds:
+	movq	16(%eax),%xmm2
+	cmpl	$268435456,%ebp
+	je	.L10212rounds_alt
+	movl	$11,%ecx
+	movups	%xmm0,-16(%edx)
+.byte	102,15,58,223,202,1
+	call	.L103key_192a_cold
+.byte	102,15,58,223,202,2
+	call	.L104key_192b
+.byte	102,15,58,223,202,4
+	call	.L105key_192a
+.byte	102,15,58,223,202,8
+	call	.L104key_192b
+.byte	102,15,58,223,202,16
+	call	.L105key_192a
+.byte	102,15,58,223,202,32
+	call	.L104key_192b
+.byte	102,15,58,223,202,64
+	call	.L105key_192a
+.byte	102,15,58,223,202,128
+	call	.L104key_192b
+	movups	%xmm0,(%edx)
+	movl	%ecx,48(%edx)
+	jmp	.L100good_key
+.align	16
+.L105key_192a:
+	movups	%xmm0,(%edx)
+	leal	16(%edx),%edx
+.align	16
+.L103key_192a_cold:
+	movaps	%xmm2,%xmm5
+.L106key_192b_warm:
+	shufps	$16,%xmm0,%xmm4
+	movdqa	%xmm2,%xmm3
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	pslldq	$4,%xmm3
+	xorps	%xmm4,%xmm0
+	pshufd	$85,%xmm1,%xmm1
+	pxor	%xmm3,%xmm2
+	pxor	%xmm1,%xmm0
+	pshufd	$255,%xmm0,%xmm3
+	pxor	%xmm3,%xmm2
+	ret
+.align	16
+.L104key_192b:
+	movaps	%xmm0,%xmm3
+	shufps	$68,%xmm0,%xmm5
+	movups	%xmm5,(%edx)
+	shufps	$78,%xmm2,%xmm3
+	movups	%xmm3,16(%edx)
+	leal	32(%edx),%edx
+	jmp	.L106key_192b_warm
+.align	16
+.L10212rounds_alt:
+	movdqa	16(%ebx),%xmm5
+	movdqa	32(%ebx),%xmm4
+	movl	$8,%ecx
+	movdqu	%xmm0,-16(%edx)
+.L107loop_key192:
+	movq	%xmm2,(%edx)
+	movdqa	%xmm2,%xmm1
+.byte	102,15,56,0,213
+.byte	102,15,56,221,212
+	pslld	$1,%xmm4
+	leal	24(%edx),%edx
+	movdqa	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm3,%xmm0
+	pshufd	$255,%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm1,%xmm3
+	pxor	%xmm2,%xmm0
+	pxor	%xmm3,%xmm2
+	movdqu	%xmm0,-16(%edx)
+	decl	%ecx
+	jnz	.L107loop_key192
+	movl	$11,%ecx
+	movl	%ecx,32(%edx)
+	jmp	.L100good_key
+.align	16
+.L09314rounds:
+	movups	16(%eax),%xmm2
+	leal	16(%edx),%edx
+	cmpl	$268435456,%ebp
+	je	.L10814rounds_alt
+	movl	$13,%ecx
+	movups	%xmm0,-32(%edx)
+	movups	%xmm2,-16(%edx)
+.byte	102,15,58,223,202,1
+	call	.L109key_256a_cold
+.byte	102,15,58,223,200,1
+	call	.L110key_256b
+.byte	102,15,58,223,202,2
+	call	.L111key_256a
+.byte	102,15,58,223,200,2
+	call	.L110key_256b
+.byte	102,15,58,223,202,4
+	call	.L111key_256a
+.byte	102,15,58,223,200,4
+	call	.L110key_256b
+.byte	102,15,58,223,202,8
+	call	.L111key_256a
+.byte	102,15,58,223,200,8
+	call	.L110key_256b
+.byte	102,15,58,223,202,16
+	call	.L111key_256a
+.byte	102,15,58,223,200,16
+	call	.L110key_256b
+.byte	102,15,58,223,202,32
+	call	.L111key_256a
+.byte	102,15,58,223,200,32
+	call	.L110key_256b
+.byte	102,15,58,223,202,64
+	call	.L111key_256a
+	movups	%xmm0,(%edx)
+	movl	%ecx,16(%edx)
+	xorl	%eax,%eax
+	jmp	.L100good_key
+.align	16
+.L111key_256a:
+	movups	%xmm2,(%edx)
+	leal	16(%edx),%edx
+.L109key_256a_cold:
+	shufps	$16,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$255,%xmm1,%xmm1
+	xorps	%xmm1,%xmm0
+	ret
+.align	16
+.L110key_256b:
+	movups	%xmm0,(%edx)
+	leal	16(%edx),%edx
+	shufps	$16,%xmm2,%xmm4
+	xorps	%xmm4,%xmm2
+	shufps	$140,%xmm2,%xmm4
+	xorps	%xmm4,%xmm2
+	shufps	$170,%xmm1,%xmm1
+	xorps	%xmm1,%xmm2
+	ret
+.align	16
+.L10814rounds_alt:
+	movdqa	(%ebx),%xmm5
+	movdqa	32(%ebx),%xmm4
+	movl	$7,%ecx
+	movdqu	%xmm0,-32(%edx)
+	movdqa	%xmm2,%xmm1
+	movdqu	%xmm2,-16(%edx)
+.L112loop_key256:
+.byte	102,15,56,0,213
+.byte	102,15,56,221,212
+	movdqa	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm3,%xmm0
+	pslld	$1,%xmm4
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,(%edx)
+	decl	%ecx
+	jz	.L113done_key256
+	pshufd	$255,%xmm0,%xmm2
+	pxor	%xmm3,%xmm3
+.byte	102,15,56,221,211
+	movdqa	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm3,%xmm1
+	pxor	%xmm1,%xmm2
+	movdqu	%xmm2,16(%edx)
+	leal	32(%edx),%edx
+	movdqa	%xmm2,%xmm1
+	jmp	.L112loop_key256
+.L113done_key256:
+	movl	$13,%ecx
+	movl	%ecx,16(%edx)
+.L100good_key:
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	xorl	%eax,%eax
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	4
+.L091bad_pointer:
+	movl	$-1,%eax
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	4
+.L095bad_keybits:
+	pxor	%xmm0,%xmm0
+	movl	$-2,%eax
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	_aesni_set_encrypt_key,.-_aesni_set_encrypt_key
+.globl	aesni_set_encrypt_key
+.hidden	aesni_set_encrypt_key
+.type	aesni_set_encrypt_key,@function
+.align	16
+aesni_set_encrypt_key:
+.L_aesni_set_encrypt_key_begin:
+	movl	4(%esp),%eax
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	call	_aesni_set_encrypt_key
+	ret
+.size	aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
+.globl	aesni_set_decrypt_key
+.hidden	aesni_set_decrypt_key
+.type	aesni_set_decrypt_key,@function
+.align	16
+aesni_set_decrypt_key:
+.L_aesni_set_decrypt_key_begin:
+	movl	4(%esp),%eax
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	call	_aesni_set_encrypt_key
+	movl	12(%esp),%edx
+	shll	$4,%ecx
+	testl	%eax,%eax
+	jnz	.L114dec_key_ret
+	leal	16(%edx,%ecx,1),%eax
+	movups	(%edx),%xmm0
+	movups	(%eax),%xmm1
+	movups	%xmm0,(%eax)
+	movups	%xmm1,(%edx)
+	leal	16(%edx),%edx
+	leal	-16(%eax),%eax
+.L115dec_key_inverse:
+	movups	(%edx),%xmm0
+	movups	(%eax),%xmm1
+.byte	102,15,56,219,192
+.byte	102,15,56,219,201
+	leal	16(%edx),%edx
+	leal	-16(%eax),%eax
+	movups	%xmm0,16(%eax)
+	movups	%xmm1,-16(%edx)
+	cmpl	%edx,%eax
+	ja	.L115dec_key_inverse
+	movups	(%edx),%xmm0
+.byte	102,15,56,219,192
+	movups	%xmm0,(%edx)
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	xorl	%eax,%eax
+.L114dec_key_ret:
+	ret
+.size	aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
+.align	64
+.Lkey_const:
+.long	202313229,202313229,202313229,202313229
+.long	67569157,67569157,67569157,67569157
+.long	1,1,1,1
+.long	27,27,27,27
+.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+.byte	115,108,46,111,114,103,62,0
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/bn-586.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/bn-586.S
new file mode 100644
index 0000000..cc067f7
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/bn-586.S
@@ -0,0 +1,1537 @@
+#if defined(__i386__)
+.text
+.globl	bn_mul_add_words
+.hidden	bn_mul_add_words
+.type	bn_mul_add_words,@function
+.align	16
+bn_mul_add_words:
+.L_bn_mul_add_words_begin:
+	call	.L000PIC_me_up
+.L000PIC_me_up:
+	popl	%eax
+	leal	OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
+	btl	$26,(%eax)
+	jnc	.L001maw_non_sse2
+	movl	4(%esp),%eax
+	movl	8(%esp),%edx
+	movl	12(%esp),%ecx
+	movd	16(%esp),%mm0
+	pxor	%mm1,%mm1
+	jmp	.L002maw_sse2_entry
+.align	16
+.L003maw_sse2_unrolled:
+	movd	(%eax),%mm3
+	paddq	%mm3,%mm1
+	movd	(%edx),%mm2
+	pmuludq	%mm0,%mm2
+	movd	4(%edx),%mm4
+	pmuludq	%mm0,%mm4
+	movd	8(%edx),%mm6
+	pmuludq	%mm0,%mm6
+	movd	12(%edx),%mm7
+	pmuludq	%mm0,%mm7
+	paddq	%mm2,%mm1
+	movd	4(%eax),%mm3
+	paddq	%mm4,%mm3
+	movd	8(%eax),%mm5
+	paddq	%mm6,%mm5
+	movd	12(%eax),%mm4
+	paddq	%mm4,%mm7
+	movd	%mm1,(%eax)
+	movd	16(%edx),%mm2
+	pmuludq	%mm0,%mm2
+	psrlq	$32,%mm1
+	movd	20(%edx),%mm4
+	pmuludq	%mm0,%mm4
+	paddq	%mm3,%mm1
+	movd	24(%edx),%mm6
+	pmuludq	%mm0,%mm6
+	movd	%mm1,4(%eax)
+	psrlq	$32,%mm1
+	movd	28(%edx),%mm3
+	addl	$32,%edx
+	pmuludq	%mm0,%mm3
+	paddq	%mm5,%mm1
+	movd	16(%eax),%mm5
+	paddq	%mm5,%mm2
+	movd	%mm1,8(%eax)
+	psrlq	$32,%mm1
+	paddq	%mm7,%mm1
+	movd	20(%eax),%mm5
+	paddq	%mm5,%mm4
+	movd	%mm1,12(%eax)
+	psrlq	$32,%mm1
+	paddq	%mm2,%mm1
+	movd	24(%eax),%mm5
+	paddq	%mm5,%mm6
+	movd	%mm1,16(%eax)
+	psrlq	$32,%mm1
+	paddq	%mm4,%mm1
+	movd	28(%eax),%mm5
+	paddq	%mm5,%mm3
+	movd	%mm1,20(%eax)
+	psrlq	$32,%mm1
+	paddq	%mm6,%mm1
+	movd	%mm1,24(%eax)
+	psrlq	$32,%mm1
+	paddq	%mm3,%mm1
+	movd	%mm1,28(%eax)
+	leal	32(%eax),%eax
+	psrlq	$32,%mm1
+	subl	$8,%ecx
+	jz	.L004maw_sse2_exit
+.L002maw_sse2_entry:
+	testl	$4294967288,%ecx
+	jnz	.L003maw_sse2_unrolled
+.align	4
+.L005maw_sse2_loop:
+	movd	(%edx),%mm2
+	movd	(%eax),%mm3
+	pmuludq	%mm0,%mm2
+	leal	4(%edx),%edx
+	paddq	%mm3,%mm1
+	paddq	%mm2,%mm1
+	movd	%mm1,(%eax)
+	subl	$1,%ecx
+	psrlq	$32,%mm1
+	leal	4(%eax),%eax
+	jnz	.L005maw_sse2_loop
+.L004maw_sse2_exit:
+	movd	%mm1,%eax
+	emms
+	ret
+.align	16
+.L001maw_non_sse2:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	xorl	%esi,%esi
+	movl	20(%esp),%edi
+	movl	28(%esp),%ecx
+	movl	24(%esp),%ebx
+	andl	$4294967288,%ecx
+	movl	32(%esp),%ebp
+	pushl	%ecx
+	jz	.L006maw_finish
+.align	16
+.L007maw_loop:
+
+	movl	(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,(%edi)
+	movl	%edx,%esi
+
+	movl	4(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	4(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,4(%edi)
+	movl	%edx,%esi
+
+	movl	8(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	8(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,8(%edi)
+	movl	%edx,%esi
+
+	movl	12(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	12(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,12(%edi)
+	movl	%edx,%esi
+
+	movl	16(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	16(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,%esi
+
+	movl	20(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	20(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,20(%edi)
+	movl	%edx,%esi
+
+	movl	24(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	24(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,24(%edi)
+	movl	%edx,%esi
+
+	movl	28(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	28(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,28(%edi)
+	movl	%edx,%esi
+
+	subl	$8,%ecx
+	leal	32(%ebx),%ebx
+	leal	32(%edi),%edi
+	jnz	.L007maw_loop
+.L006maw_finish:
+	movl	32(%esp),%ecx
+	andl	$7,%ecx
+	jnz	.L008maw_finish2
+	jmp	.L009maw_end
+.L008maw_finish2:
+
+	movl	(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,(%edi)
+	movl	%edx,%esi
+	jz	.L009maw_end
+
+	movl	4(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	4(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,4(%edi)
+	movl	%edx,%esi
+	jz	.L009maw_end
+
+	movl	8(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	8(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,8(%edi)
+	movl	%edx,%esi
+	jz	.L009maw_end
+
+	movl	12(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	12(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,12(%edi)
+	movl	%edx,%esi
+	jz	.L009maw_end
+
+	movl	16(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	16(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,16(%edi)
+	movl	%edx,%esi
+	jz	.L009maw_end
+
+	movl	20(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	20(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,20(%edi)
+	movl	%edx,%esi
+	jz	.L009maw_end
+
+	movl	24(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	24(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,24(%edi)
+	movl	%edx,%esi
+.L009maw_end:
+	movl	%esi,%eax
+	popl	%ecx
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_mul_add_words,.-.L_bn_mul_add_words_begin
+.globl	bn_mul_words
+.hidden	bn_mul_words
+.type	bn_mul_words,@function
+.align	16
+bn_mul_words:
+.L_bn_mul_words_begin:
+	call	.L010PIC_me_up
+.L010PIC_me_up:
+	popl	%eax
+	leal	OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
+	btl	$26,(%eax)
+	jnc	.L011mw_non_sse2
+	movl	4(%esp),%eax
+	movl	8(%esp),%edx
+	movl	12(%esp),%ecx
+	movd	16(%esp),%mm0
+	pxor	%mm1,%mm1
+.align	16
+.L012mw_sse2_loop:
+	movd	(%edx),%mm2
+	pmuludq	%mm0,%mm2
+	leal	4(%edx),%edx
+	paddq	%mm2,%mm1
+	movd	%mm1,(%eax)
+	subl	$1,%ecx
+	psrlq	$32,%mm1
+	leal	4(%eax),%eax
+	jnz	.L012mw_sse2_loop
+	movd	%mm1,%eax
+	emms
+	ret
+.align	16
+.L011mw_non_sse2:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	xorl	%esi,%esi
+	movl	20(%esp),%edi
+	movl	24(%esp),%ebx
+	movl	28(%esp),%ebp
+	movl	32(%esp),%ecx
+	andl	$4294967288,%ebp
+	jz	.L013mw_finish
+.L014mw_loop:
+
+	movl	(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,(%edi)
+	movl	%edx,%esi
+
+	movl	4(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,4(%edi)
+	movl	%edx,%esi
+
+	movl	8(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,8(%edi)
+	movl	%edx,%esi
+
+	movl	12(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,12(%edi)
+	movl	%edx,%esi
+
+	movl	16(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,%esi
+
+	movl	20(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,20(%edi)
+	movl	%edx,%esi
+
+	movl	24(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,24(%edi)
+	movl	%edx,%esi
+
+	movl	28(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,28(%edi)
+	movl	%edx,%esi
+
+	addl	$32,%ebx
+	addl	$32,%edi
+	subl	$8,%ebp
+	jz	.L013mw_finish
+	jmp	.L014mw_loop
+.L013mw_finish:
+	movl	28(%esp),%ebp
+	andl	$7,%ebp
+	jnz	.L015mw_finish2
+	jmp	.L016mw_end
+.L015mw_finish2:
+
+	movl	(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L016mw_end
+
+	movl	4(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,4(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L016mw_end
+
+	movl	8(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,8(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L016mw_end
+
+	movl	12(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,12(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L016mw_end
+
+	movl	16(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L016mw_end
+
+	movl	20(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,20(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L016mw_end
+
+	movl	24(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,24(%edi)
+	movl	%edx,%esi
+.L016mw_end:
+	movl	%esi,%eax
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_mul_words,.-.L_bn_mul_words_begin
+.globl	bn_sqr_words
+.hidden	bn_sqr_words
+.type	bn_sqr_words,@function
+.align	16
+bn_sqr_words:
+.L_bn_sqr_words_begin:
+	call	.L017PIC_me_up
+.L017PIC_me_up:
+	popl	%eax
+	leal	OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax
+	btl	$26,(%eax)
+	jnc	.L018sqr_non_sse2
+	movl	4(%esp),%eax
+	movl	8(%esp),%edx
+	movl	12(%esp),%ecx
+.align	16
+.L019sqr_sse2_loop:
+	movd	(%edx),%mm0
+	pmuludq	%mm0,%mm0
+	leal	4(%edx),%edx
+	movq	%mm0,(%eax)
+	subl	$1,%ecx
+	leal	8(%eax),%eax
+	jnz	.L019sqr_sse2_loop
+	emms
+	ret
+.align	16
+.L018sqr_non_sse2:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%ebx
+	andl	$4294967288,%ebx
+	jz	.L020sw_finish
+.L021sw_loop:
+
+	movl	(%edi),%eax
+	mull	%eax
+	movl	%eax,(%esi)
+	movl	%edx,4(%esi)
+
+	movl	4(%edi),%eax
+	mull	%eax
+	movl	%eax,8(%esi)
+	movl	%edx,12(%esi)
+
+	movl	8(%edi),%eax
+	mull	%eax
+	movl	%eax,16(%esi)
+	movl	%edx,20(%esi)
+
+	movl	12(%edi),%eax
+	mull	%eax
+	movl	%eax,24(%esi)
+	movl	%edx,28(%esi)
+
+	movl	16(%edi),%eax
+	mull	%eax
+	movl	%eax,32(%esi)
+	movl	%edx,36(%esi)
+
+	movl	20(%edi),%eax
+	mull	%eax
+	movl	%eax,40(%esi)
+	movl	%edx,44(%esi)
+
+	movl	24(%edi),%eax
+	mull	%eax
+	movl	%eax,48(%esi)
+	movl	%edx,52(%esi)
+
+	movl	28(%edi),%eax
+	mull	%eax
+	movl	%eax,56(%esi)
+	movl	%edx,60(%esi)
+
+	addl	$32,%edi
+	addl	$64,%esi
+	subl	$8,%ebx
+	jnz	.L021sw_loop
+.L020sw_finish:
+	movl	28(%esp),%ebx
+	andl	$7,%ebx
+	jz	.L022sw_end
+
+	movl	(%edi),%eax
+	mull	%eax
+	movl	%eax,(%esi)
+	decl	%ebx
+	movl	%edx,4(%esi)
+	jz	.L022sw_end
+
+	movl	4(%edi),%eax
+	mull	%eax
+	movl	%eax,8(%esi)
+	decl	%ebx
+	movl	%edx,12(%esi)
+	jz	.L022sw_end
+
+	movl	8(%edi),%eax
+	mull	%eax
+	movl	%eax,16(%esi)
+	decl	%ebx
+	movl	%edx,20(%esi)
+	jz	.L022sw_end
+
+	movl	12(%edi),%eax
+	mull	%eax
+	movl	%eax,24(%esi)
+	decl	%ebx
+	movl	%edx,28(%esi)
+	jz	.L022sw_end
+
+	movl	16(%edi),%eax
+	mull	%eax
+	movl	%eax,32(%esi)
+	decl	%ebx
+	movl	%edx,36(%esi)
+	jz	.L022sw_end
+
+	movl	20(%edi),%eax
+	mull	%eax
+	movl	%eax,40(%esi)
+	decl	%ebx
+	movl	%edx,44(%esi)
+	jz	.L022sw_end
+
+	movl	24(%edi),%eax
+	mull	%eax
+	movl	%eax,48(%esi)
+	movl	%edx,52(%esi)
+.L022sw_end:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_sqr_words,.-.L_bn_sqr_words_begin
+.globl	bn_div_words
+.hidden	bn_div_words
+.type	bn_div_words,@function
+.align	16
+bn_div_words:
+.L_bn_div_words_begin:
+	movl	4(%esp),%edx
+	movl	8(%esp),%eax
+	movl	12(%esp),%ecx
+	divl	%ecx
+	ret
+.size	bn_div_words,.-.L_bn_div_words_begin
+.globl	bn_add_words
+.hidden	bn_add_words
+.type	bn_add_words,@function
+.align	16
+bn_add_words:
+.L_bn_add_words_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	movl	20(%esp),%ebx
+	movl	24(%esp),%esi
+	movl	28(%esp),%edi
+	movl	32(%esp),%ebp
+	xorl	%eax,%eax
+	andl	$4294967288,%ebp
+	jz	.L023aw_finish
+.L024aw_loop:
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+
+	movl	4(%esi),%ecx
+	movl	4(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,4(%ebx)
+
+	movl	8(%esi),%ecx
+	movl	8(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,8(%ebx)
+
+	movl	12(%esi),%ecx
+	movl	12(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,12(%ebx)
+
+	movl	16(%esi),%ecx
+	movl	16(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,16(%ebx)
+
+	movl	20(%esi),%ecx
+	movl	20(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,20(%ebx)
+
+	movl	24(%esi),%ecx
+	movl	24(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+
+	movl	28(%esi),%ecx
+	movl	28(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,28(%ebx)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L024aw_loop
+.L023aw_finish:
+	movl	32(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L025aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,(%ebx)
+	jz	.L025aw_end
+
+	movl	4(%esi),%ecx
+	movl	4(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,4(%ebx)
+	jz	.L025aw_end
+
+	movl	8(%esi),%ecx
+	movl	8(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,8(%ebx)
+	jz	.L025aw_end
+
+	movl	12(%esi),%ecx
+	movl	12(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,12(%ebx)
+	jz	.L025aw_end
+
+	movl	16(%esi),%ecx
+	movl	16(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,16(%ebx)
+	jz	.L025aw_end
+
+	movl	20(%esi),%ecx
+	movl	20(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,20(%ebx)
+	jz	.L025aw_end
+
+	movl	24(%esi),%ecx
+	movl	24(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+.L025aw_end:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_add_words,.-.L_bn_add_words_begin
+.globl	bn_sub_words
+.hidden	bn_sub_words
+.type	bn_sub_words,@function
+.align	16
+bn_sub_words:
+.L_bn_sub_words_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	movl	20(%esp),%ebx
+	movl	24(%esp),%esi
+	movl	28(%esp),%edi
+	movl	32(%esp),%ebp
+	xorl	%eax,%eax
+	andl	$4294967288,%ebp
+	jz	.L026aw_finish
+.L027aw_loop:
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+
+	movl	4(%esi),%ecx
+	movl	4(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,4(%ebx)
+
+	movl	8(%esi),%ecx
+	movl	8(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,8(%ebx)
+
+	movl	12(%esi),%ecx
+	movl	12(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,12(%ebx)
+
+	movl	16(%esi),%ecx
+	movl	16(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,16(%ebx)
+
+	movl	20(%esi),%ecx
+	movl	20(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,20(%ebx)
+
+	movl	24(%esi),%ecx
+	movl	24(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+
+	movl	28(%esi),%ecx
+	movl	28(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,28(%ebx)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L027aw_loop
+.L026aw_finish:
+	movl	32(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L028aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,(%ebx)
+	jz	.L028aw_end
+
+	movl	4(%esi),%ecx
+	movl	4(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,4(%ebx)
+	jz	.L028aw_end
+
+	movl	8(%esi),%ecx
+	movl	8(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,8(%ebx)
+	jz	.L028aw_end
+
+	movl	12(%esi),%ecx
+	movl	12(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,12(%ebx)
+	jz	.L028aw_end
+
+	movl	16(%esi),%ecx
+	movl	16(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,16(%ebx)
+	jz	.L028aw_end
+
+	movl	20(%esi),%ecx
+	movl	20(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,20(%ebx)
+	jz	.L028aw_end
+
+	movl	24(%esi),%ecx
+	movl	24(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+.L028aw_end:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_sub_words,.-.L_bn_sub_words_begin
+.globl	bn_sub_part_words
+.hidden	bn_sub_part_words
+.type	bn_sub_part_words,@function
+.align	16
+bn_sub_part_words:
+.L_bn_sub_part_words_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	movl	20(%esp),%ebx
+	movl	24(%esp),%esi
+	movl	28(%esp),%edi
+	movl	32(%esp),%ebp
+	xorl	%eax,%eax
+	andl	$4294967288,%ebp
+	jz	.L029aw_finish
+.L030aw_loop:
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+
+	movl	4(%esi),%ecx
+	movl	4(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,4(%ebx)
+
+	movl	8(%esi),%ecx
+	movl	8(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,8(%ebx)
+
+	movl	12(%esi),%ecx
+	movl	12(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,12(%ebx)
+
+	movl	16(%esi),%ecx
+	movl	16(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,16(%ebx)
+
+	movl	20(%esi),%ecx
+	movl	20(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,20(%ebx)
+
+	movl	24(%esi),%ecx
+	movl	24(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+
+	movl	28(%esi),%ecx
+	movl	28(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,28(%ebx)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L030aw_loop
+.L029aw_finish:
+	movl	32(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L031aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L031aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L031aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L031aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L031aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L031aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L031aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+.L031aw_end:
+	cmpl	$0,36(%esp)
+	je	.L032pw_end
+	movl	36(%esp),%ebp
+	cmpl	$0,%ebp
+	je	.L032pw_end
+	jge	.L033pw_pos
+
+	movl	$0,%edx
+	subl	%ebp,%edx
+	movl	%edx,%ebp
+	andl	$4294967288,%ebp
+	jz	.L034pw_neg_finish
+.L035pw_neg_loop:
+
+	movl	$0,%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+
+	movl	$0,%ecx
+	movl	4(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,4(%ebx)
+
+	movl	$0,%ecx
+	movl	8(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,8(%ebx)
+
+	movl	$0,%ecx
+	movl	12(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,12(%ebx)
+
+	movl	$0,%ecx
+	movl	16(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,16(%ebx)
+
+	movl	$0,%ecx
+	movl	20(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,20(%ebx)
+
+	movl	$0,%ecx
+	movl	24(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+
+	movl	$0,%ecx
+	movl	28(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,28(%ebx)
+
+	addl	$32,%edi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L035pw_neg_loop
+.L034pw_neg_finish:
+	movl	36(%esp),%edx
+	movl	$0,%ebp
+	subl	%edx,%ebp
+	andl	$7,%ebp
+	jz	.L032pw_end
+
+	movl	$0,%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,(%ebx)
+	jz	.L032pw_end
+
+	movl	$0,%ecx
+	movl	4(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,4(%ebx)
+	jz	.L032pw_end
+
+	movl	$0,%ecx
+	movl	8(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,8(%ebx)
+	jz	.L032pw_end
+
+	movl	$0,%ecx
+	movl	12(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,12(%ebx)
+	jz	.L032pw_end
+
+	movl	$0,%ecx
+	movl	16(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,16(%ebx)
+	jz	.L032pw_end
+
+	movl	$0,%ecx
+	movl	20(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,20(%ebx)
+	jz	.L032pw_end
+
+	movl	$0,%ecx
+	movl	24(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+	jmp	.L032pw_end
+.L033pw_pos:
+	andl	$4294967288,%ebp
+	jz	.L036pw_pos_finish
+.L037pw_pos_loop:
+
+	movl	(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,(%ebx)
+	jnc	.L038pw_nc0
+
+	movl	4(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,4(%ebx)
+	jnc	.L039pw_nc1
+
+	movl	8(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,8(%ebx)
+	jnc	.L040pw_nc2
+
+	movl	12(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,12(%ebx)
+	jnc	.L041pw_nc3
+
+	movl	16(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,16(%ebx)
+	jnc	.L042pw_nc4
+
+	movl	20(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,20(%ebx)
+	jnc	.L043pw_nc5
+
+	movl	24(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,24(%ebx)
+	jnc	.L044pw_nc6
+
+	movl	28(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,28(%ebx)
+	jnc	.L045pw_nc7
+
+	addl	$32,%esi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L037pw_pos_loop
+.L036pw_pos_finish:
+	movl	36(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L032pw_end
+
+	movl	(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,(%ebx)
+	jnc	.L046pw_tail_nc0
+	decl	%ebp
+	jz	.L032pw_end
+
+	movl	4(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,4(%ebx)
+	jnc	.L047pw_tail_nc1
+	decl	%ebp
+	jz	.L032pw_end
+
+	movl	8(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,8(%ebx)
+	jnc	.L048pw_tail_nc2
+	decl	%ebp
+	jz	.L032pw_end
+
+	movl	12(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,12(%ebx)
+	jnc	.L049pw_tail_nc3
+	decl	%ebp
+	jz	.L032pw_end
+
+	movl	16(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,16(%ebx)
+	jnc	.L050pw_tail_nc4
+	decl	%ebp
+	jz	.L032pw_end
+
+	movl	20(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,20(%ebx)
+	jnc	.L051pw_tail_nc5
+	decl	%ebp
+	jz	.L032pw_end
+
+	movl	24(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,24(%ebx)
+	jnc	.L052pw_tail_nc6
+	movl	$1,%eax
+	jmp	.L032pw_end
+.L053pw_nc_loop:
+	movl	(%esi),%ecx
+	movl	%ecx,(%ebx)
+.L038pw_nc0:
+	movl	4(%esi),%ecx
+	movl	%ecx,4(%ebx)
+.L039pw_nc1:
+	movl	8(%esi),%ecx
+	movl	%ecx,8(%ebx)
+.L040pw_nc2:
+	movl	12(%esi),%ecx
+	movl	%ecx,12(%ebx)
+.L041pw_nc3:
+	movl	16(%esi),%ecx
+	movl	%ecx,16(%ebx)
+.L042pw_nc4:
+	movl	20(%esi),%ecx
+	movl	%ecx,20(%ebx)
+.L043pw_nc5:
+	movl	24(%esi),%ecx
+	movl	%ecx,24(%ebx)
+.L044pw_nc6:
+	movl	28(%esi),%ecx
+	movl	%ecx,28(%ebx)
+.L045pw_nc7:
+
+	addl	$32,%esi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L053pw_nc_loop
+	movl	36(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L054pw_nc_end
+	movl	(%esi),%ecx
+	movl	%ecx,(%ebx)
+.L046pw_tail_nc0:
+	decl	%ebp
+	jz	.L054pw_nc_end
+	movl	4(%esi),%ecx
+	movl	%ecx,4(%ebx)
+.L047pw_tail_nc1:
+	decl	%ebp
+	jz	.L054pw_nc_end
+	movl	8(%esi),%ecx
+	movl	%ecx,8(%ebx)
+.L048pw_tail_nc2:
+	decl	%ebp
+	jz	.L054pw_nc_end
+	movl	12(%esi),%ecx
+	movl	%ecx,12(%ebx)
+.L049pw_tail_nc3:
+	decl	%ebp
+	jz	.L054pw_nc_end
+	movl	16(%esi),%ecx
+	movl	%ecx,16(%ebx)
+.L050pw_tail_nc4:
+	decl	%ebp
+	jz	.L054pw_nc_end
+	movl	20(%esi),%ecx
+	movl	%ecx,20(%ebx)
+.L051pw_tail_nc5:
+	decl	%ebp
+	jz	.L054pw_nc_end
+	movl	24(%esi),%ecx
+	movl	%ecx,24(%ebx)
+.L052pw_tail_nc6:
+.L054pw_nc_end:
+	movl	$0,%eax
+.L032pw_end:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_sub_part_words,.-.L_bn_sub_part_words_begin
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/co-586.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/co-586.S
new file mode 100644
index 0000000..56834d0
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/co-586.S
@@ -0,0 +1,1259 @@
+#if defined(__i386__)
+.text
+.globl	bn_mul_comba8
+.hidden	bn_mul_comba8
+.type	bn_mul_comba8,@function
+.align	16
+bn_mul_comba8:
+.L_bn_mul_comba8_begin:
+	pushl	%esi
+	movl	12(%esp),%esi
+	pushl	%edi
+	movl	20(%esp),%edi
+	pushl	%ebp
+	pushl	%ebx
+	xorl	%ebx,%ebx
+	movl	(%esi),%eax
+	xorl	%ecx,%ecx
+	movl	(%edi),%edx
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,(%eax)
+	movl	4(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	(%esi),%eax
+	adcl	%edx,%ebp
+	movl	4(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,4(%eax)
+	movl	8(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	4(%esi),%eax
+	adcl	%edx,%ebx
+	movl	4(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	(%esi),%eax
+	adcl	%edx,%ebx
+	movl	8(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,8(%eax)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	8(%esi),%eax
+	adcl	%edx,%ecx
+	movl	4(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	4(%esi),%eax
+	adcl	%edx,%ecx
+	movl	8(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	(%esi),%eax
+	adcl	%edx,%ecx
+	movl	12(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,12(%eax)
+	movl	16(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	12(%esi),%eax
+	adcl	%edx,%ebp
+	movl	4(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	8(%esi),%eax
+	adcl	%edx,%ebp
+	movl	8(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	4(%esi),%eax
+	adcl	%edx,%ebp
+	movl	12(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	(%esi),%eax
+	adcl	%edx,%ebp
+	movl	16(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,16(%eax)
+	movl	20(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	16(%esi),%eax
+	adcl	%edx,%ebx
+	movl	4(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	12(%esi),%eax
+	adcl	%edx,%ebx
+	movl	8(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	8(%esi),%eax
+	adcl	%edx,%ebx
+	movl	12(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	4(%esi),%eax
+	adcl	%edx,%ebx
+	movl	16(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	(%esi),%eax
+	adcl	%edx,%ebx
+	movl	20(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,20(%eax)
+	movl	24(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esi),%eax
+	adcl	%edx,%ecx
+	movl	4(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	16(%esi),%eax
+	adcl	%edx,%ecx
+	movl	8(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	12(%esi),%eax
+	adcl	%edx,%ecx
+	movl	12(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	8(%esi),%eax
+	adcl	%edx,%ecx
+	movl	16(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	4(%esi),%eax
+	adcl	%edx,%ecx
+	movl	20(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	(%esi),%eax
+	adcl	%edx,%ecx
+	movl	24(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,24(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	24(%esi),%eax
+	adcl	%edx,%ebp
+	movl	4(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esi),%eax
+	adcl	%edx,%ebp
+	movl	8(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	16(%esi),%eax
+	adcl	%edx,%ebp
+	movl	12(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	12(%esi),%eax
+	adcl	%edx,%ebp
+	movl	16(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	8(%esi),%eax
+	adcl	%edx,%ebp
+	movl	20(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	4(%esi),%eax
+	adcl	%edx,%ebp
+	movl	24(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	(%esi),%eax
+	adcl	%edx,%ebp
+	movl	28(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	4(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,28(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	24(%esi),%eax
+	adcl	%edx,%ebx
+	movl	8(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esi),%eax
+	adcl	%edx,%ebx
+	movl	12(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	16(%esi),%eax
+	adcl	%edx,%ebx
+	movl	16(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	12(%esi),%eax
+	adcl	%edx,%ebx
+	movl	20(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	8(%esi),%eax
+	adcl	%edx,%ebx
+	movl	24(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	4(%esi),%eax
+	adcl	%edx,%ebx
+	movl	28(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	8(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,32(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	24(%esi),%eax
+	adcl	%edx,%ecx
+	movl	12(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esi),%eax
+	adcl	%edx,%ecx
+	movl	16(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	16(%esi),%eax
+	adcl	%edx,%ecx
+	movl	20(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	12(%esi),%eax
+	adcl	%edx,%ecx
+	movl	24(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	8(%esi),%eax
+	adcl	%edx,%ecx
+	movl	28(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	12(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,36(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	24(%esi),%eax
+	adcl	%edx,%ebp
+	movl	16(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esi),%eax
+	adcl	%edx,%ebp
+	movl	20(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	16(%esi),%eax
+	adcl	%edx,%ebp
+	movl	24(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	12(%esi),%eax
+	adcl	%edx,%ebp
+	movl	28(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	16(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,40(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	24(%esi),%eax
+	adcl	%edx,%ebx
+	movl	20(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esi),%eax
+	adcl	%edx,%ebx
+	movl	24(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	16(%esi),%eax
+	adcl	%edx,%ebx
+	movl	28(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	20(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,44(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	24(%esi),%eax
+	adcl	%edx,%ecx
+	movl	24(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esi),%eax
+	adcl	%edx,%ecx
+	movl	28(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	24(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,48(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	24(%esi),%eax
+	adcl	%edx,%ebp
+	movl	28(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	28(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,52(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	adcl	$0,%ecx
+	movl	%ebp,56(%eax)
+
+
+	movl	%ebx,60(%eax)
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	bn_mul_comba8,.-.L_bn_mul_comba8_begin
+.globl	bn_mul_comba4
+.hidden	bn_mul_comba4
+.type	bn_mul_comba4,@function
+.align	16
+bn_mul_comba4:
+.L_bn_mul_comba4_begin:
+	pushl	%esi
+	movl	12(%esp),%esi
+	pushl	%edi
+	movl	20(%esp),%edi
+	pushl	%ebp
+	pushl	%ebx
+	xorl	%ebx,%ebx
+	movl	(%esi),%eax
+	xorl	%ecx,%ecx
+	movl	(%edi),%edx
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,(%eax)
+	movl	4(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	(%esi),%eax
+	adcl	%edx,%ebp
+	movl	4(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,4(%eax)
+	movl	8(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	4(%esi),%eax
+	adcl	%edx,%ebx
+	movl	4(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	(%esi),%eax
+	adcl	%edx,%ebx
+	movl	8(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,8(%eax)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	8(%esi),%eax
+	adcl	%edx,%ecx
+	movl	4(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	4(%esi),%eax
+	adcl	%edx,%ecx
+	movl	8(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	(%esi),%eax
+	adcl	%edx,%ecx
+	movl	12(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	4(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,12(%eax)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	8(%esi),%eax
+	adcl	%edx,%ebp
+	movl	8(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	4(%esi),%eax
+	adcl	%edx,%ebp
+	movl	12(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	8(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,16(%eax)
+	movl	12(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	8(%esi),%eax
+	adcl	%edx,%ebx
+	movl	12(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	12(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,20(%eax)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	adcl	$0,%ebp
+	movl	%ebx,24(%eax)
+
+
+	movl	%ecx,28(%eax)
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	bn_mul_comba4,.-.L_bn_mul_comba4_begin
+.globl	bn_sqr_comba8
+.hidden	bn_sqr_comba8
+.type	bn_sqr_comba8,@function
+.align	16
+bn_sqr_comba8:
+.L_bn_sqr_comba8_begin:
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebp
+	pushl	%ebx
+	movl	20(%esp),%edi
+	movl	24(%esp),%esi
+	xorl	%ebx,%ebx
+	xorl	%ecx,%ecx
+	movl	(%esi),%eax
+
+	xorl	%ebp,%ebp
+
+	mull	%eax
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	(%esi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,(%edi)
+	movl	4(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	8(%esi),%eax
+	adcl	$0,%ebx
+	movl	%ecx,4(%edi)
+	movl	(%esi),%edx
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	4(%esi),%eax
+	adcl	$0,%ecx
+
+	mull	%eax
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	(%esi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,8(%edi)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	8(%esi),%eax
+	adcl	$0,%ebp
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	16(%esi),%eax
+	adcl	$0,%ebp
+	movl	%ebx,12(%edi)
+	movl	(%esi),%edx
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	12(%esi),%eax
+	adcl	$0,%ebx
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	8(%esi),%eax
+	adcl	$0,%ebx
+
+	mull	%eax
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	(%esi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,16(%edi)
+	movl	20(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	16(%esi),%eax
+	adcl	$0,%ecx
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	12(%esi),%eax
+	adcl	$0,%ecx
+	movl	8(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	24(%esi),%eax
+	adcl	$0,%ecx
+	movl	%ebp,20(%edi)
+	movl	(%esi),%edx
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	20(%esi),%eax
+	adcl	$0,%ebp
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	16(%esi),%eax
+	adcl	$0,%ebp
+	movl	8(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	12(%esi),%eax
+	adcl	$0,%ebp
+
+	mull	%eax
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	(%esi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,24(%edi)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	24(%esi),%eax
+	adcl	$0,%ebx
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	20(%esi),%eax
+	adcl	$0,%ebx
+	movl	8(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	16(%esi),%eax
+	adcl	$0,%ebx
+	movl	12(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	28(%esi),%eax
+	adcl	$0,%ebx
+	movl	%ecx,28(%edi)
+	movl	4(%esi),%edx
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	24(%esi),%eax
+	adcl	$0,%ecx
+	movl	8(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	20(%esi),%eax
+	adcl	$0,%ecx
+	movl	12(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	16(%esi),%eax
+	adcl	$0,%ecx
+
+	mull	%eax
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	8(%esi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,32(%edi)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	24(%esi),%eax
+	adcl	$0,%ebp
+	movl	12(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	20(%esi),%eax
+	adcl	$0,%ebp
+	movl	16(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	28(%esi),%eax
+	adcl	$0,%ebp
+	movl	%ebx,36(%edi)
+	movl	12(%esi),%edx
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	24(%esi),%eax
+	adcl	$0,%ebx
+	movl	16(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	20(%esi),%eax
+	adcl	$0,%ebx
+
+	mull	%eax
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	16(%esi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,40(%edi)
+	movl	28(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	24(%esi),%eax
+	adcl	$0,%ecx
+	movl	20(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	28(%esi),%eax
+	adcl	$0,%ecx
+	movl	%ebp,44(%edi)
+	movl	20(%esi),%edx
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	24(%esi),%eax
+	adcl	$0,%ebp
+
+	mull	%eax
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	24(%esi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,48(%edi)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	28(%esi),%eax
+	adcl	$0,%ebx
+	movl	%ecx,52(%edi)
+
+
+	xorl	%ecx,%ecx
+
+	mull	%eax
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	adcl	$0,%ecx
+	movl	%ebp,56(%edi)
+
+	movl	%ebx,60(%edi)
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	bn_sqr_comba8,.-.L_bn_sqr_comba8_begin
+.globl	bn_sqr_comba4
+.hidden	bn_sqr_comba4
+.type	bn_sqr_comba4,@function
+.align	16
+bn_sqr_comba4:
+.L_bn_sqr_comba4_begin:
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebp
+	pushl	%ebx
+	movl	20(%esp),%edi
+	movl	24(%esp),%esi
+	xorl	%ebx,%ebx
+	xorl	%ecx,%ecx
+	movl	(%esi),%eax
+
+	xorl	%ebp,%ebp
+
+	mull	%eax
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	(%esi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,(%edi)
+	movl	4(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	8(%esi),%eax
+	adcl	$0,%ebx
+	movl	%ecx,4(%edi)
+	movl	(%esi),%edx
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	4(%esi),%eax
+	adcl	$0,%ecx
+
+	mull	%eax
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	(%esi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,8(%edi)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	8(%esi),%eax
+	adcl	$0,%ebp
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	12(%esi),%eax
+	adcl	$0,%ebp
+	movl	%ebx,12(%edi)
+	movl	4(%esi),%edx
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	8(%esi),%eax
+	adcl	$0,%ebx
+
+	mull	%eax
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	8(%esi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,16(%edi)
+	movl	12(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	12(%esi),%eax
+	adcl	$0,%ecx
+	movl	%ebp,20(%edi)
+
+
+	xorl	%ebp,%ebp
+
+	mull	%eax
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	adcl	$0,%ebp
+	movl	%ebx,24(%edi)
+
+	movl	%ecx,28(%edi)
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/ghash-x86.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/ghash-x86.S
new file mode 100644
index 0000000..a384d9a
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/ghash-x86.S
@@ -0,0 +1,1068 @@
+#if defined(__i386__)
+.text
+.globl	gcm_gmult_4bit_mmx
+.hidden	gcm_gmult_4bit_mmx
+.type	gcm_gmult_4bit_mmx,@function
+.align	16
+gcm_gmult_4bit_mmx:
+.L_gcm_gmult_4bit_mmx_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%edi
+	movl	24(%esp),%esi
+	call	.L000pic_point
+.L000pic_point:
+	popl	%eax
+	leal	.Lrem_4bit-.L000pic_point(%eax),%eax
+	movzbl	15(%edi),%ebx
+	xorl	%ecx,%ecx
+	movl	%ebx,%edx
+	movb	%dl,%cl
+	movl	$14,%ebp
+	shlb	$4,%cl
+	andl	$240,%edx
+	movq	8(%esi,%ecx,1),%mm0
+	movq	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebx
+	jmp	.L001mmx_loop
+.align	16
+.L001mmx_loop:
+	psrlq	$4,%mm0
+	andl	$15,%ebx
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	(%edi,%ebp,1),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	decl	%ebp
+	movd	%mm0,%ebx
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	pxor	%mm2,%mm0
+	js	.L002mmx_break
+	shlb	$4,%cl
+	andl	$15,%ebx
+	psrlq	$4,%mm0
+	andl	$240,%edx
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	movd	%mm0,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	pxor	%mm2,%mm0
+	jmp	.L001mmx_loop
+.align	16
+.L002mmx_break:
+	shlb	$4,%cl
+	andl	$15,%ebx
+	psrlq	$4,%mm0
+	andl	$240,%edx
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	movd	%mm0,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	andl	$15,%ebx
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	movd	%mm0,%ebx
+	pxor	(%esi,%edx,1),%mm1
+	pxor	%mm2,%mm0
+	psrlq	$32,%mm0
+	movd	%mm1,%edx
+	psrlq	$32,%mm1
+	movd	%mm0,%ecx
+	movd	%mm1,%ebp
+	bswap	%ebx
+	bswap	%edx
+	bswap	%ecx
+	bswap	%ebp
+	emms
+	movl	%ebx,12(%edi)
+	movl	%edx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%ebp,(%edi)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
+.globl	gcm_ghash_4bit_mmx
+.hidden	gcm_ghash_4bit_mmx
+.type	gcm_ghash_4bit_mmx,@function
+.align	16
+gcm_ghash_4bit_mmx:
+.L_gcm_ghash_4bit_mmx_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%eax
+	movl	24(%esp),%ebx
+	movl	28(%esp),%ecx
+	movl	32(%esp),%edx
+	movl	%esp,%ebp
+	call	.L003pic_point
+.L003pic_point:
+	popl	%esi
+	leal	.Lrem_8bit-.L003pic_point(%esi),%esi
+	subl	$544,%esp
+	andl	$-64,%esp
+	subl	$16,%esp
+	addl	%ecx,%edx
+	movl	%eax,544(%esp)
+	movl	%edx,552(%esp)
+	movl	%ebp,556(%esp)
+	addl	$128,%ebx
+	leal	144(%esp),%edi
+	leal	400(%esp),%ebp
+	movl	-120(%ebx),%edx
+	movq	-120(%ebx),%mm0
+	movq	-128(%ebx),%mm3
+	shll	$4,%edx
+	movb	%dl,(%esp)
+	movl	-104(%ebx),%edx
+	movq	-104(%ebx),%mm2
+	movq	-112(%ebx),%mm5
+	movq	%mm0,-128(%edi)
+	psrlq	$4,%mm0
+	movq	%mm3,(%edi)
+	movq	%mm3,%mm7
+	psrlq	$4,%mm3
+	shll	$4,%edx
+	movb	%dl,1(%esp)
+	movl	-88(%ebx),%edx
+	movq	-88(%ebx),%mm1
+	psllq	$60,%mm7
+	movq	-96(%ebx),%mm4
+	por	%mm7,%mm0
+	movq	%mm2,-120(%edi)
+	psrlq	$4,%mm2
+	movq	%mm5,8(%edi)
+	movq	%mm5,%mm6
+	movq	%mm0,-128(%ebp)
+	psrlq	$4,%mm5
+	movq	%mm3,(%ebp)
+	shll	$4,%edx
+	movb	%dl,2(%esp)
+	movl	-72(%ebx),%edx
+	movq	-72(%ebx),%mm0
+	psllq	$60,%mm6
+	movq	-80(%ebx),%mm3
+	por	%mm6,%mm2
+	movq	%mm1,-112(%edi)
+	psrlq	$4,%mm1
+	movq	%mm4,16(%edi)
+	movq	%mm4,%mm7
+	movq	%mm2,-120(%ebp)
+	psrlq	$4,%mm4
+	movq	%mm5,8(%ebp)
+	shll	$4,%edx
+	movb	%dl,3(%esp)
+	movl	-56(%ebx),%edx
+	movq	-56(%ebx),%mm2
+	psllq	$60,%mm7
+	movq	-64(%ebx),%mm5
+	por	%mm7,%mm1
+	movq	%mm0,-104(%edi)
+	psrlq	$4,%mm0
+	movq	%mm3,24(%edi)
+	movq	%mm3,%mm6
+	movq	%mm1,-112(%ebp)
+	psrlq	$4,%mm3
+	movq	%mm4,16(%ebp)
+	shll	$4,%edx
+	movb	%dl,4(%esp)
+	movl	-40(%ebx),%edx
+	movq	-40(%ebx),%mm1
+	psllq	$60,%mm6
+	movq	-48(%ebx),%mm4
+	por	%mm6,%mm0
+	movq	%mm2,-96(%edi)
+	psrlq	$4,%mm2
+	movq	%mm5,32(%edi)
+	movq	%mm5,%mm7
+	movq	%mm0,-104(%ebp)
+	psrlq	$4,%mm5
+	movq	%mm3,24(%ebp)
+	shll	$4,%edx
+	movb	%dl,5(%esp)
+	movl	-24(%ebx),%edx
+	movq	-24(%ebx),%mm0
+	psllq	$60,%mm7
+	movq	-32(%ebx),%mm3
+	por	%mm7,%mm2
+	movq	%mm1,-88(%edi)
+	psrlq	$4,%mm1
+	movq	%mm4,40(%edi)
+	movq	%mm4,%mm6
+	movq	%mm2,-96(%ebp)
+	psrlq	$4,%mm4
+	movq	%mm5,32(%ebp)
+	shll	$4,%edx
+	movb	%dl,6(%esp)
+	movl	-8(%ebx),%edx
+	movq	-8(%ebx),%mm2
+	psllq	$60,%mm6
+	movq	-16(%ebx),%mm5
+	por	%mm6,%mm1
+	movq	%mm0,-80(%edi)
+	psrlq	$4,%mm0
+	movq	%mm3,48(%edi)
+	movq	%mm3,%mm7
+	movq	%mm1,-88(%ebp)
+	psrlq	$4,%mm3
+	movq	%mm4,40(%ebp)
+	shll	$4,%edx
+	movb	%dl,7(%esp)
+	movl	8(%ebx),%edx
+	movq	8(%ebx),%mm1
+	psllq	$60,%mm7
+	movq	(%ebx),%mm4
+	por	%mm7,%mm0
+	movq	%mm2,-72(%edi)
+	psrlq	$4,%mm2
+	movq	%mm5,56(%edi)
+	movq	%mm5,%mm6
+	movq	%mm0,-80(%ebp)
+	psrlq	$4,%mm5
+	movq	%mm3,48(%ebp)
+	shll	$4,%edx
+	movb	%dl,8(%esp)
+	movl	24(%ebx),%edx
+	movq	24(%ebx),%mm0
+	psllq	$60,%mm6
+	movq	16(%ebx),%mm3
+	por	%mm6,%mm2
+	movq	%mm1,-64(%edi)
+	psrlq	$4,%mm1
+	movq	%mm4,64(%edi)
+	movq	%mm4,%mm7
+	movq	%mm2,-72(%ebp)
+	psrlq	$4,%mm4
+	movq	%mm5,56(%ebp)
+	shll	$4,%edx
+	movb	%dl,9(%esp)
+	movl	40(%ebx),%edx
+	movq	40(%ebx),%mm2
+	psllq	$60,%mm7
+	movq	32(%ebx),%mm5
+	por	%mm7,%mm1
+	movq	%mm0,-56(%edi)
+	psrlq	$4,%mm0
+	movq	%mm3,72(%edi)
+	movq	%mm3,%mm6
+	movq	%mm1,-64(%ebp)
+	psrlq	$4,%mm3
+	movq	%mm4,64(%ebp)
+	shll	$4,%edx
+	movb	%dl,10(%esp)
+	movl	56(%ebx),%edx
+	movq	56(%ebx),%mm1
+	psllq	$60,%mm6
+	movq	48(%ebx),%mm4
+	por	%mm6,%mm0
+	movq	%mm2,-48(%edi)
+	psrlq	$4,%mm2
+	movq	%mm5,80(%edi)
+	movq	%mm5,%mm7
+	movq	%mm0,-56(%ebp)
+	psrlq	$4,%mm5
+	movq	%mm3,72(%ebp)
+	shll	$4,%edx
+	movb	%dl,11(%esp)
+	movl	72(%ebx),%edx
+	movq	72(%ebx),%mm0
+	psllq	$60,%mm7
+	movq	64(%ebx),%mm3
+	por	%mm7,%mm2
+	movq	%mm1,-40(%edi)
+	psrlq	$4,%mm1
+	movq	%mm4,88(%edi)
+	movq	%mm4,%mm6
+	movq	%mm2,-48(%ebp)
+	psrlq	$4,%mm4
+	movq	%mm5,80(%ebp)
+	shll	$4,%edx
+	movb	%dl,12(%esp)
+	movl	88(%ebx),%edx
+	movq	88(%ebx),%mm2
+	psllq	$60,%mm6
+	movq	80(%ebx),%mm5
+	por	%mm6,%mm1
+	movq	%mm0,-32(%edi)
+	psrlq	$4,%mm0
+	movq	%mm3,96(%edi)
+	movq	%mm3,%mm7
+	movq	%mm1,-40(%ebp)
+	psrlq	$4,%mm3
+	movq	%mm4,88(%ebp)
+	shll	$4,%edx
+	movb	%dl,13(%esp)
+	movl	104(%ebx),%edx
+	movq	104(%ebx),%mm1
+	psllq	$60,%mm7
+	movq	96(%ebx),%mm4
+	por	%mm7,%mm0
+	movq	%mm2,-24(%edi)
+	psrlq	$4,%mm2
+	movq	%mm5,104(%edi)
+	movq	%mm5,%mm6
+	movq	%mm0,-32(%ebp)
+	psrlq	$4,%mm5
+	movq	%mm3,96(%ebp)
+	shll	$4,%edx
+	movb	%dl,14(%esp)
+	movl	120(%ebx),%edx
+	movq	120(%ebx),%mm0
+	psllq	$60,%mm6
+	movq	112(%ebx),%mm3
+	por	%mm6,%mm2
+	movq	%mm1,-16(%edi)
+	psrlq	$4,%mm1
+	movq	%mm4,112(%edi)
+	movq	%mm4,%mm7
+	movq	%mm2,-24(%ebp)
+	psrlq	$4,%mm4
+	movq	%mm5,104(%ebp)
+	shll	$4,%edx
+	movb	%dl,15(%esp)
+	psllq	$60,%mm7
+	por	%mm7,%mm1
+	movq	%mm0,-8(%edi)
+	psrlq	$4,%mm0
+	movq	%mm3,120(%edi)
+	movq	%mm3,%mm6
+	movq	%mm1,-16(%ebp)
+	psrlq	$4,%mm3
+	movq	%mm4,112(%ebp)
+	psllq	$60,%mm6
+	por	%mm6,%mm0
+	movq	%mm0,-8(%ebp)
+	movq	%mm3,120(%ebp)
+	movq	(%eax),%mm6
+	movl	8(%eax),%ebx
+	movl	12(%eax),%edx
+.align	16
+.L004outer:
+	xorl	12(%ecx),%edx
+	xorl	8(%ecx),%ebx
+	pxor	(%ecx),%mm6
+	leal	16(%ecx),%ecx
+	movl	%ebx,536(%esp)
+	movq	%mm6,528(%esp)
+	movl	%ecx,548(%esp)
+	xorl	%eax,%eax
+	roll	$8,%edx
+	movb	%dl,%al
+	movl	%eax,%ebp
+	andb	$15,%al
+	shrl	$4,%ebp
+	pxor	%mm0,%mm0
+	roll	$8,%edx
+	pxor	%mm1,%mm1
+	pxor	%mm2,%mm2
+	movq	16(%esp,%eax,8),%mm7
+	movq	144(%esp,%eax,8),%mm6
+	movb	%dl,%al
+	movd	%mm7,%ebx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%edi
+	psrlq	$8,%mm6
+	pxor	272(%esp,%ebp,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	shrl	$4,%edi
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%ebp,8),%mm6
+	xorb	(%esp,%ebp,1),%bl
+	movb	%dl,%al
+	movd	%mm7,%ecx
+	movzbl	%bl,%ebx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%ebp
+	psrlq	$8,%mm6
+	pxor	272(%esp,%edi,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	shrl	$4,%ebp
+	pinsrw	$2,(%esi,%ebx,2),%mm2
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%edi,8),%mm6
+	xorb	(%esp,%edi,1),%cl
+	movb	%dl,%al
+	movl	536(%esp),%edx
+	movd	%mm7,%ebx
+	movzbl	%cl,%ecx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%edi
+	psrlq	$8,%mm6
+	pxor	272(%esp,%ebp,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm2,%mm6
+	shrl	$4,%edi
+	pinsrw	$2,(%esi,%ecx,2),%mm1
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%ebp,8),%mm6
+	xorb	(%esp,%ebp,1),%bl
+	movb	%dl,%al
+	movd	%mm7,%ecx
+	movzbl	%bl,%ebx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%ebp
+	psrlq	$8,%mm6
+	pxor	272(%esp,%edi,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm1,%mm6
+	shrl	$4,%ebp
+	pinsrw	$2,(%esi,%ebx,2),%mm0
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%edi,8),%mm6
+	xorb	(%esp,%edi,1),%cl
+	movb	%dl,%al
+	movd	%mm7,%ebx
+	movzbl	%cl,%ecx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%edi
+	psrlq	$8,%mm6
+	pxor	272(%esp,%ebp,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm0,%mm6
+	shrl	$4,%edi
+	pinsrw	$2,(%esi,%ecx,2),%mm2
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%ebp,8),%mm6
+	xorb	(%esp,%ebp,1),%bl
+	movb	%dl,%al
+	movd	%mm7,%ecx
+	movzbl	%bl,%ebx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%ebp
+	psrlq	$8,%mm6
+	pxor	272(%esp,%edi,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm2,%mm6
+	shrl	$4,%ebp
+	pinsrw	$2,(%esi,%ebx,2),%mm1
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%edi,8),%mm6
+	xorb	(%esp,%edi,1),%cl
+	movb	%dl,%al
+	movl	532(%esp),%edx
+	movd	%mm7,%ebx
+	movzbl	%cl,%ecx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%edi
+	psrlq	$8,%mm6
+	pxor	272(%esp,%ebp,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm1,%mm6
+	shrl	$4,%edi
+	pinsrw	$2,(%esi,%ecx,2),%mm0
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%ebp,8),%mm6
+	xorb	(%esp,%ebp,1),%bl
+	movb	%dl,%al
+	movd	%mm7,%ecx
+	movzbl	%bl,%ebx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%ebp
+	psrlq	$8,%mm6
+	pxor	272(%esp,%edi,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm0,%mm6
+	shrl	$4,%ebp
+	pinsrw	$2,(%esi,%ebx,2),%mm2
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%edi,8),%mm6
+	xorb	(%esp,%edi,1),%cl
+	movb	%dl,%al
+	movd	%mm7,%ebx
+	movzbl	%cl,%ecx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%edi
+	psrlq	$8,%mm6
+	pxor	272(%esp,%ebp,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm2,%mm6
+	shrl	$4,%edi
+	pinsrw	$2,(%esi,%ecx,2),%mm1
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%ebp,8),%mm6
+	xorb	(%esp,%ebp,1),%bl
+	movb	%dl,%al
+	movd	%mm7,%ecx
+	movzbl	%bl,%ebx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%ebp
+	psrlq	$8,%mm6
+	pxor	272(%esp,%edi,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm1,%mm6
+	shrl	$4,%ebp
+	pinsrw	$2,(%esi,%ebx,2),%mm0
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%edi,8),%mm6
+	xorb	(%esp,%edi,1),%cl
+	movb	%dl,%al
+	movl	528(%esp),%edx
+	movd	%mm7,%ebx
+	movzbl	%cl,%ecx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%edi
+	psrlq	$8,%mm6
+	pxor	272(%esp,%ebp,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm0,%mm6
+	shrl	$4,%edi
+	pinsrw	$2,(%esi,%ecx,2),%mm2
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%ebp,8),%mm6
+	xorb	(%esp,%ebp,1),%bl
+	movb	%dl,%al
+	movd	%mm7,%ecx
+	movzbl	%bl,%ebx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%ebp
+	psrlq	$8,%mm6
+	pxor	272(%esp,%edi,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm2,%mm6
+	shrl	$4,%ebp
+	pinsrw	$2,(%esi,%ebx,2),%mm1
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%edi,8),%mm6
+	xorb	(%esp,%edi,1),%cl
+	movb	%dl,%al
+	movd	%mm7,%ebx
+	movzbl	%cl,%ecx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%edi
+	psrlq	$8,%mm6
+	pxor	272(%esp,%ebp,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm1,%mm6
+	shrl	$4,%edi
+	pinsrw	$2,(%esi,%ecx,2),%mm0
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%ebp,8),%mm6
+	xorb	(%esp,%ebp,1),%bl
+	movb	%dl,%al
+	movd	%mm7,%ecx
+	movzbl	%bl,%ebx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%ebp
+	psrlq	$8,%mm6
+	pxor	272(%esp,%edi,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm0,%mm6
+	shrl	$4,%ebp
+	pinsrw	$2,(%esi,%ebx,2),%mm2
+	pxor	16(%esp,%eax,8),%mm7
+	roll	$8,%edx
+	pxor	144(%esp,%eax,8),%mm6
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%edi,8),%mm6
+	xorb	(%esp,%edi,1),%cl
+	movb	%dl,%al
+	movl	524(%esp),%edx
+	movd	%mm7,%ebx
+	movzbl	%cl,%ecx
+	psrlq	$8,%mm7
+	movq	%mm6,%mm3
+	movl	%eax,%edi
+	psrlq	$8,%mm6
+	pxor	272(%esp,%ebp,8),%mm7
+	andb	$15,%al
+	psllq	$56,%mm3
+	pxor	%mm2,%mm6
+	shrl	$4,%edi
+	pinsrw	$2,(%esi,%ecx,2),%mm1
+	pxor	16(%esp,%eax,8),%mm7
+	pxor	144(%esp,%eax,8),%mm6
+	xorb	(%esp,%ebp,1),%bl
+	pxor	%mm3,%mm7
+	pxor	400(%esp,%ebp,8),%mm6
+	movzbl	%bl,%ebx
+	pxor	%mm2,%mm2
+	psllq	$4,%mm1
+	movd	%mm7,%ecx
+	psrlq	$4,%mm7
+	movq	%mm6,%mm3
+	psrlq	$4,%mm6
+	shll	$4,%ecx
+	pxor	16(%esp,%edi,8),%mm7
+	psllq	$60,%mm3
+	movzbl	%cl,%ecx
+	pxor	%mm3,%mm7
+	pxor	144(%esp,%edi,8),%mm6
+	pinsrw	$2,(%esi,%ebx,2),%mm0
+	pxor	%mm1,%mm6
+	movd	%mm7,%edx
+	pinsrw	$3,(%esi,%ecx,2),%mm2
+	psllq	$12,%mm0
+	pxor	%mm0,%mm6
+	psrlq	$32,%mm7
+	pxor	%mm2,%mm6
+	movl	548(%esp),%ecx
+	movd	%mm7,%ebx
+	movq	%mm6,%mm3
+	psllw	$8,%mm6
+	psrlw	$8,%mm3
+	por	%mm3,%mm6
+	bswap	%edx
+	pshufw	$27,%mm6,%mm6
+	bswap	%ebx
+	cmpl	552(%esp),%ecx
+	jne	.L004outer
+	movl	544(%esp),%eax
+	movl	%edx,12(%eax)
+	movl	%ebx,8(%eax)
+	movq	%mm6,(%eax)
+	movl	556(%esp),%esp
+	emms
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
+.globl	gcm_init_clmul
+.hidden	gcm_init_clmul
+.type	gcm_init_clmul,@function
+.align	16
+gcm_init_clmul:
+.L_gcm_init_clmul_begin:
+	movl	4(%esp),%edx
+	movl	8(%esp),%eax
+	call	.L005pic
+.L005pic:
+	popl	%ecx
+	leal	.Lbswap-.L005pic(%ecx),%ecx
+	movdqu	(%eax),%xmm2
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$255,%xmm2,%xmm4
+	movdqa	%xmm2,%xmm3
+	psllq	$1,%xmm2
+	pxor	%xmm5,%xmm5
+	psrlq	$63,%xmm3
+	pcmpgtd	%xmm4,%xmm5
+	pslldq	$8,%xmm3
+	por	%xmm3,%xmm2
+	pand	16(%ecx),%xmm5
+	pxor	%xmm5,%xmm2
+	movdqa	%xmm2,%xmm0
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pshufd	$78,%xmm2,%xmm4
+	pxor	%xmm0,%xmm3
+	pxor	%xmm2,%xmm4
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,220,0
+	xorps	%xmm0,%xmm3
+	xorps	%xmm1,%xmm3
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	pshufd	$78,%xmm2,%xmm3
+	pshufd	$78,%xmm0,%xmm4
+	pxor	%xmm2,%xmm3
+	movdqu	%xmm2,(%edx)
+	pxor	%xmm0,%xmm4
+	movdqu	%xmm0,16(%edx)
+.byte	102,15,58,15,227,8
+	movdqu	%xmm4,32(%edx)
+	ret
+.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
+.globl	gcm_gmult_clmul
+.hidden	gcm_gmult_clmul
+.type	gcm_gmult_clmul,@function
+.align	16
+gcm_gmult_clmul:
+.L_gcm_gmult_clmul_begin:
+	movl	4(%esp),%eax
+	movl	8(%esp),%edx
+	call	.L006pic
+.L006pic:
+	popl	%ecx
+	leal	.Lbswap-.L006pic(%ecx),%ecx
+	movdqu	(%eax),%xmm0
+	movdqa	(%ecx),%xmm5
+	movups	(%edx),%xmm2
+.byte	102,15,56,0,197
+	movups	32(%edx),%xmm4
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,220,0
+	xorps	%xmm0,%xmm3
+	xorps	%xmm1,%xmm3
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+.byte	102,15,56,0,197
+	movdqu	%xmm0,(%eax)
+	ret
+.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
+.globl	gcm_ghash_clmul
+.hidden	gcm_ghash_clmul
+.type	gcm_ghash_clmul,@function
+.align	16
+gcm_ghash_clmul:
+.L_gcm_ghash_clmul_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%eax
+	movl	24(%esp),%edx
+	movl	28(%esp),%esi
+	movl	32(%esp),%ebx
+	call	.L007pic
+.L007pic:
+	popl	%ecx
+	leal	.Lbswap-.L007pic(%ecx),%ecx
+	movdqu	(%eax),%xmm0
+	movdqa	(%ecx),%xmm5
+	movdqu	(%edx),%xmm2
+.byte	102,15,56,0,197
+	subl	$16,%ebx
+	jz	.L008odd_tail
+	movdqu	(%esi),%xmm3
+	movdqu	16(%esi),%xmm6
+.byte	102,15,56,0,221
+.byte	102,15,56,0,245
+	movdqu	32(%edx),%xmm5
+	pxor	%xmm3,%xmm0
+	pshufd	$78,%xmm6,%xmm3
+	movdqa	%xmm6,%xmm7
+	pxor	%xmm6,%xmm3
+	leal	32(%esi),%esi
+.byte	102,15,58,68,242,0
+.byte	102,15,58,68,250,17
+.byte	102,15,58,68,221,0
+	movups	16(%edx),%xmm2
+	nop
+	subl	$32,%ebx
+	jbe	.L009even_tail
+	jmp	.L010mod_loop
+.align	32
+.L010mod_loop:
+	pshufd	$78,%xmm0,%xmm4
+	movdqa	%xmm0,%xmm1
+	pxor	%xmm0,%xmm4
+	nop
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,229,16
+	movups	(%edx),%xmm2
+	xorps	%xmm6,%xmm0
+	movdqa	(%ecx),%xmm5
+	xorps	%xmm7,%xmm1
+	movdqu	(%esi),%xmm7
+	pxor	%xmm0,%xmm3
+	movdqu	16(%esi),%xmm6
+	pxor	%xmm1,%xmm3
+.byte	102,15,56,0,253
+	pxor	%xmm3,%xmm4
+	movdqa	%xmm4,%xmm3
+	psrldq	$8,%xmm4
+	pslldq	$8,%xmm3
+	pxor	%xmm4,%xmm1
+	pxor	%xmm3,%xmm0
+.byte	102,15,56,0,245
+	pxor	%xmm7,%xmm1
+	movdqa	%xmm6,%xmm7
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+.byte	102,15,58,68,242,0
+	movups	32(%edx),%xmm5
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+	pshufd	$78,%xmm7,%xmm3
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm7,%xmm3
+	pxor	%xmm4,%xmm1
+.byte	102,15,58,68,250,17
+	movups	16(%edx),%xmm2
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+.byte	102,15,58,68,221,0
+	leal	32(%esi),%esi
+	subl	$32,%ebx
+	ja	.L010mod_loop
+.L009even_tail:
+	pshufd	$78,%xmm0,%xmm4
+	movdqa	%xmm0,%xmm1
+	pxor	%xmm0,%xmm4
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,229,16
+	movdqa	(%ecx),%xmm5
+	xorps	%xmm6,%xmm0
+	xorps	%xmm7,%xmm1
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+	pxor	%xmm3,%xmm4
+	movdqa	%xmm4,%xmm3
+	psrldq	$8,%xmm4
+	pslldq	$8,%xmm3
+	pxor	%xmm4,%xmm1
+	pxor	%xmm3,%xmm0
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	testl	%ebx,%ebx
+	jnz	.L011done
+	movups	(%edx),%xmm2
+.L008odd_tail:
+	movdqu	(%esi),%xmm3
+.byte	102,15,56,0,221
+	pxor	%xmm3,%xmm0
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pshufd	$78,%xmm2,%xmm4
+	pxor	%xmm0,%xmm3
+	pxor	%xmm2,%xmm4
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,220,0
+	xorps	%xmm0,%xmm3
+	xorps	%xmm1,%xmm3
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+.L011done:
+.byte	102,15,56,0,197
+	movdqu	%xmm0,(%eax)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
+.align	64
+.Lbswap:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
+.align	64
+.Lrem_8bit:
+.value	0,450,900,582,1800,1738,1164,1358
+.value	3600,4050,3476,3158,2328,2266,2716,2910
+.value	7200,7650,8100,7782,6952,6890,6316,6510
+.value	4656,5106,4532,4214,5432,5370,5820,6014
+.value	14400,14722,15300,14854,16200,16010,15564,15630
+.value	13904,14226,13780,13334,12632,12442,13020,13086
+.value	9312,9634,10212,9766,9064,8874,8428,8494
+.value	10864,11186,10740,10294,11640,11450,12028,12094
+.value	28800,28994,29444,29382,30600,30282,29708,30158
+.value	32400,32594,32020,31958,31128,30810,31260,31710
+.value	27808,28002,28452,28390,27560,27242,26668,27118
+.value	25264,25458,24884,24822,26040,25722,26172,26622
+.value	18624,18690,19268,19078,20424,19978,19532,19854
+.value	18128,18194,17748,17558,16856,16410,16988,17310
+.value	21728,21794,22372,22182,21480,21034,20588,20910
+.value	23280,23346,22900,22710,24056,23610,24188,24510
+.value	57600,57538,57988,58182,58888,59338,58764,58446
+.value	61200,61138,60564,60758,59416,59866,60316,59998
+.value	64800,64738,65188,65382,64040,64490,63916,63598
+.value	62256,62194,61620,61814,62520,62970,63420,63102
+.value	55616,55426,56004,56070,56904,57226,56780,56334
+.value	55120,54930,54484,54550,53336,53658,54236,53790
+.value	50528,50338,50916,50982,49768,50090,49644,49198
+.value	52080,51890,51444,51510,52344,52666,53244,52798
+.value	37248,36930,37380,37830,38536,38730,38156,38094
+.value	40848,40530,39956,40406,39064,39258,39708,39646
+.value	36256,35938,36388,36838,35496,35690,35116,35054
+.value	33712,33394,32820,33270,33976,34170,34620,34558
+.value	43456,43010,43588,43910,44744,44810,44364,44174
+.value	42960,42514,42068,42390,41176,41242,41820,41630
+.value	46560,46114,46692,47014,45800,45866,45420,45230
+.value	48112,47666,47220,47542,48376,48442,49020,48830
+.align	64
+.Lrem_4bit:
+.long	0,0,0,471859200,0,943718400,0,610271232
+.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
+.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
+.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
+.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
+.byte	0
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/md5-586.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/md5-586.S
new file mode 100644
index 0000000..7237f95
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/md5-586.S
@@ -0,0 +1,681 @@
+#if defined(__i386__)
+.text
+.globl	md5_block_asm_data_order
+.hidden	md5_block_asm_data_order
+.type	md5_block_asm_data_order,@function
+.align	16
+md5_block_asm_data_order:
+.L_md5_block_asm_data_order_begin:
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+	movl	20(%esp),%ecx
+	pushl	%ebp
+	shll	$6,%ecx
+	pushl	%ebx
+	addl	%esi,%ecx
+	subl	$64,%ecx
+	movl	(%edi),%eax
+	pushl	%ecx
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+.L000start:
+
+
+	movl	%ecx,%edi
+	movl	(%esi),%ebp
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	3614090360(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	4(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	3905402710(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	8(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	606105819(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	12(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	3250441966(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	16(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	4118548399(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	20(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	1200080426(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	24(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	2821735955(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	28(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	4249261313(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	32(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	1770035416(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	36(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	2336552879(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	40(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	4294925233(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	44(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	2304563134(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	48(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	1804603682(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	52(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	4254626195(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	56(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	2792965006(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	60(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	1236535329(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	4(%esi),%ebp
+	addl	%ecx,%ebx
+
+
+
+	leal	4129170786(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	24(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	3225465664(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	44(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	643717713(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	3921069994(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	20(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	3593408605(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	40(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	38016083(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	60(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	3634488961(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	16(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	3889429448(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	36(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	568446438(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	56(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	3275163606(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	12(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	4107603335(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	32(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	1163531501(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	52(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	2850285829(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	8(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	4243563512(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	28(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	1735328473(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	48(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	2368359562(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	20(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	4294588738(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	32(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	2272392833(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	44(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	1839030562(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	56(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	4259657740(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	4(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	2763975236(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	16(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	1272893353(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	28(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	4139469664(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	40(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	3200236656(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	52(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	681279174(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	3936430074(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	12(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	3572445317(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	24(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	76029189(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	36(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	3654602809(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	48(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	3873151461(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	60(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	530742520(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	8(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	3299628645(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+
+
+	xorl	%edx,%edi
+	orl	%ebx,%edi
+	leal	4096336452(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	28(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	1126891415(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	56(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	2878612391(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	20(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	4237533241(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	48(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	1700485571(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	12(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	2399980690(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	40(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	4293915773(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	4(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	2240044497(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	32(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	1873313359(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	60(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	4264355552(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	24(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	2734768916(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	52(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	1309151649(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	16(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	4149444226(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	44(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	3174756917(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	8(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	718787259(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	36(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	3951481745(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	24(%esp),%ebp
+	addl	%edi,%ebx
+	addl	$64,%esi
+	roll	$21,%ebx
+	movl	(%ebp),%edi
+	addl	%ecx,%ebx
+	addl	%edi,%eax
+	movl	4(%ebp),%edi
+	addl	%edi,%ebx
+	movl	8(%ebp),%edi
+	addl	%edi,%ecx
+	movl	12(%ebp),%edi
+	addl	%edi,%edx
+	movl	%eax,(%ebp)
+	movl	%ebx,4(%ebp)
+	movl	(%esp),%edi
+	movl	%ecx,8(%ebp)
+	movl	%edx,12(%ebp)
+	cmpl	%esi,%edi
+	jae	.L000start
+	popl	%eax
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/sha1-586.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/sha1-586.S
new file mode 100644
index 0000000..2c022ec
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/sha1-586.S
@@ -0,0 +1,3801 @@
+#if defined(__i386__)
+.text
+.globl	sha1_block_data_order
+.hidden	sha1_block_data_order
+.type	sha1_block_data_order,@function
+.align	16
+sha1_block_data_order:
+.L_sha1_block_data_order_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	call	.L000pic_point
+.L000pic_point:
+	popl	%ebp
+	leal	OPENSSL_ia32cap_P-.L000pic_point(%ebp),%esi
+	leal	.LK_XX_XX-.L000pic_point(%ebp),%ebp
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	testl	$512,%edx
+	jz	.L001x86
+	movl	8(%esi),%ecx
+	testl	$16777216,%eax
+	jz	.L001x86
+	andl	$268435456,%edx
+	andl	$1073741824,%eax
+	orl	%edx,%eax
+	cmpl	$1342177280,%eax
+	je	.Lavx_shortcut
+	jmp	.Lssse3_shortcut
+.align	16
+.L001x86:
+	movl	20(%esp),%ebp
+	movl	24(%esp),%esi
+	movl	28(%esp),%eax
+	subl	$76,%esp
+	shll	$6,%eax
+	addl	%esi,%eax
+	movl	%eax,104(%esp)
+	movl	16(%ebp),%edi
+	jmp	.L002loop
+.align	16
+.L002loop:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	movl	%ecx,8(%esp)
+	movl	%edx,12(%esp)
+	movl	16(%esi),%eax
+	movl	20(%esi),%ebx
+	movl	24(%esi),%ecx
+	movl	28(%esi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	movl	%eax,16(%esp)
+	movl	%ebx,20(%esp)
+	movl	%ecx,24(%esp)
+	movl	%edx,28(%esp)
+	movl	32(%esi),%eax
+	movl	36(%esi),%ebx
+	movl	40(%esi),%ecx
+	movl	44(%esi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	movl	%eax,32(%esp)
+	movl	%ebx,36(%esp)
+	movl	%ecx,40(%esp)
+	movl	%edx,44(%esp)
+	movl	48(%esi),%eax
+	movl	52(%esi),%ebx
+	movl	56(%esi),%ecx
+	movl	60(%esi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	movl	%eax,48(%esp)
+	movl	%ebx,52(%esp)
+	movl	%ecx,56(%esp)
+	movl	%edx,60(%esp)
+	movl	%esi,100(%esp)
+	movl	(%ebp),%eax
+	movl	4(%ebp),%ebx
+	movl	8(%ebp),%ecx
+	movl	12(%ebp),%edx
+
+	movl	%ecx,%esi
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	xorl	%edx,%esi
+	addl	%edi,%ebp
+	movl	(%esp),%edi
+	andl	%ebx,%esi
+	rorl	$2,%ebx
+	xorl	%edx,%esi
+	leal	1518500249(%ebp,%edi,1),%ebp
+	addl	%esi,%ebp
+
+	movl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ecx,%edi
+	addl	%edx,%ebp
+	movl	4(%esp),%edx
+	andl	%eax,%edi
+	rorl	$2,%eax
+	xorl	%ecx,%edi
+	leal	1518500249(%ebp,%edx,1),%ebp
+	addl	%edi,%ebp
+
+	movl	%eax,%edx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%edx
+	addl	%ecx,%ebp
+	movl	8(%esp),%ecx
+	andl	%esi,%edx
+	rorl	$2,%esi
+	xorl	%ebx,%edx
+	leal	1518500249(%ebp,%ecx,1),%ebp
+	addl	%edx,%ebp
+
+	movl	%esi,%ecx
+	movl	%ebp,%edx
+	roll	$5,%ebp
+	xorl	%eax,%ecx
+	addl	%ebx,%ebp
+	movl	12(%esp),%ebx
+	andl	%edi,%ecx
+	rorl	$2,%edi
+	xorl	%eax,%ecx
+	leal	1518500249(%ebp,%ebx,1),%ebp
+	addl	%ecx,%ebp
+
+	movl	%edi,%ebx
+	movl	%ebp,%ecx
+	roll	$5,%ebp
+	xorl	%esi,%ebx
+	addl	%eax,%ebp
+	movl	16(%esp),%eax
+	andl	%edx,%ebx
+	rorl	$2,%edx
+	xorl	%esi,%ebx
+	leal	1518500249(%ebp,%eax,1),%ebp
+	addl	%ebx,%ebp
+
+	movl	%edx,%eax
+	movl	%ebp,%ebx
+	roll	$5,%ebp
+	xorl	%edi,%eax
+	addl	%esi,%ebp
+	movl	20(%esp),%esi
+	andl	%ecx,%eax
+	rorl	$2,%ecx
+	xorl	%edi,%eax
+	leal	1518500249(%ebp,%esi,1),%ebp
+	addl	%eax,%ebp
+
+	movl	%ecx,%esi
+	movl	%ebp,%eax
+	roll	$5,%ebp
+	xorl	%edx,%esi
+	addl	%edi,%ebp
+	movl	24(%esp),%edi
+	andl	%ebx,%esi
+	rorl	$2,%ebx
+	xorl	%edx,%esi
+	leal	1518500249(%ebp,%edi,1),%ebp
+	addl	%esi,%ebp
+
+	movl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ecx,%edi
+	addl	%edx,%ebp
+	movl	28(%esp),%edx
+	andl	%eax,%edi
+	rorl	$2,%eax
+	xorl	%ecx,%edi
+	leal	1518500249(%ebp,%edx,1),%ebp
+	addl	%edi,%ebp
+
+	movl	%eax,%edx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%edx
+	addl	%ecx,%ebp
+	movl	32(%esp),%ecx
+	andl	%esi,%edx
+	rorl	$2,%esi
+	xorl	%ebx,%edx
+	leal	1518500249(%ebp,%ecx,1),%ebp
+	addl	%edx,%ebp
+
+	movl	%esi,%ecx
+	movl	%ebp,%edx
+	roll	$5,%ebp
+	xorl	%eax,%ecx
+	addl	%ebx,%ebp
+	movl	36(%esp),%ebx
+	andl	%edi,%ecx
+	rorl	$2,%edi
+	xorl	%eax,%ecx
+	leal	1518500249(%ebp,%ebx,1),%ebp
+	addl	%ecx,%ebp
+
+	movl	%edi,%ebx
+	movl	%ebp,%ecx
+	roll	$5,%ebp
+	xorl	%esi,%ebx
+	addl	%eax,%ebp
+	movl	40(%esp),%eax
+	andl	%edx,%ebx
+	rorl	$2,%edx
+	xorl	%esi,%ebx
+	leal	1518500249(%ebp,%eax,1),%ebp
+	addl	%ebx,%ebp
+
+	movl	%edx,%eax
+	movl	%ebp,%ebx
+	roll	$5,%ebp
+	xorl	%edi,%eax
+	addl	%esi,%ebp
+	movl	44(%esp),%esi
+	andl	%ecx,%eax
+	rorl	$2,%ecx
+	xorl	%edi,%eax
+	leal	1518500249(%ebp,%esi,1),%ebp
+	addl	%eax,%ebp
+
+	movl	%ecx,%esi
+	movl	%ebp,%eax
+	roll	$5,%ebp
+	xorl	%edx,%esi
+	addl	%edi,%ebp
+	movl	48(%esp),%edi
+	andl	%ebx,%esi
+	rorl	$2,%ebx
+	xorl	%edx,%esi
+	leal	1518500249(%ebp,%edi,1),%ebp
+	addl	%esi,%ebp
+
+	movl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ecx,%edi
+	addl	%edx,%ebp
+	movl	52(%esp),%edx
+	andl	%eax,%edi
+	rorl	$2,%eax
+	xorl	%ecx,%edi
+	leal	1518500249(%ebp,%edx,1),%ebp
+	addl	%edi,%ebp
+
+	movl	%eax,%edx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%edx
+	addl	%ecx,%ebp
+	movl	56(%esp),%ecx
+	andl	%esi,%edx
+	rorl	$2,%esi
+	xorl	%ebx,%edx
+	leal	1518500249(%ebp,%ecx,1),%ebp
+	addl	%edx,%ebp
+
+	movl	%esi,%ecx
+	movl	%ebp,%edx
+	roll	$5,%ebp
+	xorl	%eax,%ecx
+	addl	%ebx,%ebp
+	movl	60(%esp),%ebx
+	andl	%edi,%ecx
+	rorl	$2,%edi
+	xorl	%eax,%ecx
+	leal	1518500249(%ebp,%ebx,1),%ebp
+	movl	(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edi,%ebp
+	xorl	8(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	32(%esp),%ebx
+	andl	%edx,%ebp
+	xorl	52(%esp),%ebx
+	roll	$1,%ebx
+	xorl	%esi,%ebp
+	addl	%ebp,%eax
+	movl	%ecx,%ebp
+	rorl	$2,%edx
+	movl	%ebx,(%esp)
+	roll	$5,%ebp
+	leal	1518500249(%ebx,%eax,1),%ebx
+	movl	4(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%edx,%ebp
+	xorl	12(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	36(%esp),%eax
+	andl	%ecx,%ebp
+	xorl	56(%esp),%eax
+	roll	$1,%eax
+	xorl	%edi,%ebp
+	addl	%ebp,%esi
+	movl	%ebx,%ebp
+	rorl	$2,%ecx
+	movl	%eax,4(%esp)
+	roll	$5,%ebp
+	leal	1518500249(%eax,%esi,1),%eax
+	movl	8(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ecx,%ebp
+	xorl	16(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	40(%esp),%esi
+	andl	%ebx,%ebp
+	xorl	60(%esp),%esi
+	roll	$1,%esi
+	xorl	%edx,%ebp
+	addl	%ebp,%edi
+	movl	%eax,%ebp
+	rorl	$2,%ebx
+	movl	%esi,8(%esp)
+	roll	$5,%ebp
+	leal	1518500249(%esi,%edi,1),%esi
+	movl	12(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%ebx,%ebp
+	xorl	20(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	44(%esp),%edi
+	andl	%eax,%ebp
+	xorl	(%esp),%edi
+	roll	$1,%edi
+	xorl	%ecx,%ebp
+	addl	%ebp,%edx
+	movl	%esi,%ebp
+	rorl	$2,%eax
+	movl	%edi,12(%esp)
+	roll	$5,%ebp
+	leal	1518500249(%edi,%edx,1),%edi
+	movl	16(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	24(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	48(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	4(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,16(%esp)
+	leal	1859775393(%edx,%ecx,1),%edx
+	movl	20(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	28(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	52(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	8(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,20(%esp)
+	leal	1859775393(%ecx,%ebx,1),%ecx
+	movl	24(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	32(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	56(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	12(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,24(%esp)
+	leal	1859775393(%ebx,%eax,1),%ebx
+	movl	28(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	36(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	60(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	16(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	movl	%eax,28(%esp)
+	leal	1859775393(%eax,%esi,1),%eax
+	movl	32(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	40(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	20(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,32(%esp)
+	leal	1859775393(%esi,%edi,1),%esi
+	movl	36(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	44(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	4(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	24(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,36(%esp)
+	leal	1859775393(%edi,%edx,1),%edi
+	movl	40(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	48(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	8(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	28(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,40(%esp)
+	leal	1859775393(%edx,%ecx,1),%edx
+	movl	44(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	52(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	12(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	32(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,44(%esp)
+	leal	1859775393(%ecx,%ebx,1),%ecx
+	movl	48(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	56(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	16(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	36(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,48(%esp)
+	leal	1859775393(%ebx,%eax,1),%ebx
+	movl	52(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	60(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	20(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	40(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	movl	%eax,52(%esp)
+	leal	1859775393(%eax,%esi,1),%eax
+	movl	56(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	24(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	44(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,56(%esp)
+	leal	1859775393(%esi,%edi,1),%esi
+	movl	60(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	4(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	28(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	48(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,60(%esp)
+	leal	1859775393(%edi,%edx,1),%edi
+	movl	(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	8(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	32(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	52(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,(%esp)
+	leal	1859775393(%edx,%ecx,1),%edx
+	movl	4(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	12(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	36(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	56(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,4(%esp)
+	leal	1859775393(%ecx,%ebx,1),%ecx
+	movl	8(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	16(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	40(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	60(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,8(%esp)
+	leal	1859775393(%ebx,%eax,1),%ebx
+	movl	12(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	20(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	44(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	movl	%eax,12(%esp)
+	leal	1859775393(%eax,%esi,1),%eax
+	movl	16(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	24(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	48(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	4(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,16(%esp)
+	leal	1859775393(%esi,%edi,1),%esi
+	movl	20(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	28(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	52(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	8(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,20(%esp)
+	leal	1859775393(%edi,%edx,1),%edi
+	movl	24(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	32(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	56(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	12(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,24(%esp)
+	leal	1859775393(%edx,%ecx,1),%edx
+	movl	28(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	36(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	60(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	16(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,28(%esp)
+	leal	1859775393(%ecx,%ebx,1),%ecx
+	movl	32(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edi,%ebp
+	xorl	40(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	(%esp),%ebx
+	andl	%edx,%ebp
+	xorl	20(%esp),%ebx
+	roll	$1,%ebx
+	addl	%eax,%ebp
+	rorl	$2,%edx
+	movl	%ecx,%eax
+	roll	$5,%eax
+	movl	%ebx,32(%esp)
+	leal	2400959708(%ebx,%ebp,1),%ebx
+	movl	%edi,%ebp
+	addl	%eax,%ebx
+	andl	%esi,%ebp
+	movl	36(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%edx,%ebp
+	xorl	44(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	4(%esp),%eax
+	andl	%ecx,%ebp
+	xorl	24(%esp),%eax
+	roll	$1,%eax
+	addl	%esi,%ebp
+	rorl	$2,%ecx
+	movl	%ebx,%esi
+	roll	$5,%esi
+	movl	%eax,36(%esp)
+	leal	2400959708(%eax,%ebp,1),%eax
+	movl	%edx,%ebp
+	addl	%esi,%eax
+	andl	%edi,%ebp
+	movl	40(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ecx,%ebp
+	xorl	48(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	8(%esp),%esi
+	andl	%ebx,%ebp
+	xorl	28(%esp),%esi
+	roll	$1,%esi
+	addl	%edi,%ebp
+	rorl	$2,%ebx
+	movl	%eax,%edi
+	roll	$5,%edi
+	movl	%esi,40(%esp)
+	leal	2400959708(%esi,%ebp,1),%esi
+	movl	%ecx,%ebp
+	addl	%edi,%esi
+	andl	%edx,%ebp
+	movl	44(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%ebx,%ebp
+	xorl	52(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	12(%esp),%edi
+	andl	%eax,%ebp
+	xorl	32(%esp),%edi
+	roll	$1,%edi
+	addl	%edx,%ebp
+	rorl	$2,%eax
+	movl	%esi,%edx
+	roll	$5,%edx
+	movl	%edi,44(%esp)
+	leal	2400959708(%edi,%ebp,1),%edi
+	movl	%ebx,%ebp
+	addl	%edx,%edi
+	andl	%ecx,%ebp
+	movl	48(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%eax,%ebp
+	xorl	56(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	16(%esp),%edx
+	andl	%esi,%ebp
+	xorl	36(%esp),%edx
+	roll	$1,%edx
+	addl	%ecx,%ebp
+	rorl	$2,%esi
+	movl	%edi,%ecx
+	roll	$5,%ecx
+	movl	%edx,48(%esp)
+	leal	2400959708(%edx,%ebp,1),%edx
+	movl	%eax,%ebp
+	addl	%ecx,%edx
+	andl	%ebx,%ebp
+	movl	52(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%esi,%ebp
+	xorl	60(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	20(%esp),%ecx
+	andl	%edi,%ebp
+	xorl	40(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebx,%ebp
+	rorl	$2,%edi
+	movl	%edx,%ebx
+	roll	$5,%ebx
+	movl	%ecx,52(%esp)
+	leal	2400959708(%ecx,%ebp,1),%ecx
+	movl	%esi,%ebp
+	addl	%ebx,%ecx
+	andl	%eax,%ebp
+	movl	56(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edi,%ebp
+	xorl	(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	24(%esp),%ebx
+	andl	%edx,%ebp
+	xorl	44(%esp),%ebx
+	roll	$1,%ebx
+	addl	%eax,%ebp
+	rorl	$2,%edx
+	movl	%ecx,%eax
+	roll	$5,%eax
+	movl	%ebx,56(%esp)
+	leal	2400959708(%ebx,%ebp,1),%ebx
+	movl	%edi,%ebp
+	addl	%eax,%ebx
+	andl	%esi,%ebp
+	movl	60(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%edx,%ebp
+	xorl	4(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	28(%esp),%eax
+	andl	%ecx,%ebp
+	xorl	48(%esp),%eax
+	roll	$1,%eax
+	addl	%esi,%ebp
+	rorl	$2,%ecx
+	movl	%ebx,%esi
+	roll	$5,%esi
+	movl	%eax,60(%esp)
+	leal	2400959708(%eax,%ebp,1),%eax
+	movl	%edx,%ebp
+	addl	%esi,%eax
+	andl	%edi,%ebp
+	movl	(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ecx,%ebp
+	xorl	8(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	32(%esp),%esi
+	andl	%ebx,%ebp
+	xorl	52(%esp),%esi
+	roll	$1,%esi
+	addl	%edi,%ebp
+	rorl	$2,%ebx
+	movl	%eax,%edi
+	roll	$5,%edi
+	movl	%esi,(%esp)
+	leal	2400959708(%esi,%ebp,1),%esi
+	movl	%ecx,%ebp
+	addl	%edi,%esi
+	andl	%edx,%ebp
+	movl	4(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%ebx,%ebp
+	xorl	12(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	36(%esp),%edi
+	andl	%eax,%ebp
+	xorl	56(%esp),%edi
+	roll	$1,%edi
+	addl	%edx,%ebp
+	rorl	$2,%eax
+	movl	%esi,%edx
+	roll	$5,%edx
+	movl	%edi,4(%esp)
+	leal	2400959708(%edi,%ebp,1),%edi
+	movl	%ebx,%ebp
+	addl	%edx,%edi
+	andl	%ecx,%ebp
+	movl	8(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%eax,%ebp
+	xorl	16(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	40(%esp),%edx
+	andl	%esi,%ebp
+	xorl	60(%esp),%edx
+	roll	$1,%edx
+	addl	%ecx,%ebp
+	rorl	$2,%esi
+	movl	%edi,%ecx
+	roll	$5,%ecx
+	movl	%edx,8(%esp)
+	leal	2400959708(%edx,%ebp,1),%edx
+	movl	%eax,%ebp
+	addl	%ecx,%edx
+	andl	%ebx,%ebp
+	movl	12(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%esi,%ebp
+	xorl	20(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	44(%esp),%ecx
+	andl	%edi,%ebp
+	xorl	(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebx,%ebp
+	rorl	$2,%edi
+	movl	%edx,%ebx
+	roll	$5,%ebx
+	movl	%ecx,12(%esp)
+	leal	2400959708(%ecx,%ebp,1),%ecx
+	movl	%esi,%ebp
+	addl	%ebx,%ecx
+	andl	%eax,%ebp
+	movl	16(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edi,%ebp
+	xorl	24(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	48(%esp),%ebx
+	andl	%edx,%ebp
+	xorl	4(%esp),%ebx
+	roll	$1,%ebx
+	addl	%eax,%ebp
+	rorl	$2,%edx
+	movl	%ecx,%eax
+	roll	$5,%eax
+	movl	%ebx,16(%esp)
+	leal	2400959708(%ebx,%ebp,1),%ebx
+	movl	%edi,%ebp
+	addl	%eax,%ebx
+	andl	%esi,%ebp
+	movl	20(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%edx,%ebp
+	xorl	28(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	52(%esp),%eax
+	andl	%ecx,%ebp
+	xorl	8(%esp),%eax
+	roll	$1,%eax
+	addl	%esi,%ebp
+	rorl	$2,%ecx
+	movl	%ebx,%esi
+	roll	$5,%esi
+	movl	%eax,20(%esp)
+	leal	2400959708(%eax,%ebp,1),%eax
+	movl	%edx,%ebp
+	addl	%esi,%eax
+	andl	%edi,%ebp
+	movl	24(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ecx,%ebp
+	xorl	32(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	56(%esp),%esi
+	andl	%ebx,%ebp
+	xorl	12(%esp),%esi
+	roll	$1,%esi
+	addl	%edi,%ebp
+	rorl	$2,%ebx
+	movl	%eax,%edi
+	roll	$5,%edi
+	movl	%esi,24(%esp)
+	leal	2400959708(%esi,%ebp,1),%esi
+	movl	%ecx,%ebp
+	addl	%edi,%esi
+	andl	%edx,%ebp
+	movl	28(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%ebx,%ebp
+	xorl	36(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	60(%esp),%edi
+	andl	%eax,%ebp
+	xorl	16(%esp),%edi
+	roll	$1,%edi
+	addl	%edx,%ebp
+	rorl	$2,%eax
+	movl	%esi,%edx
+	roll	$5,%edx
+	movl	%edi,28(%esp)
+	leal	2400959708(%edi,%ebp,1),%edi
+	movl	%ebx,%ebp
+	addl	%edx,%edi
+	andl	%ecx,%ebp
+	movl	32(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%eax,%ebp
+	xorl	40(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	(%esp),%edx
+	andl	%esi,%ebp
+	xorl	20(%esp),%edx
+	roll	$1,%edx
+	addl	%ecx,%ebp
+	rorl	$2,%esi
+	movl	%edi,%ecx
+	roll	$5,%ecx
+	movl	%edx,32(%esp)
+	leal	2400959708(%edx,%ebp,1),%edx
+	movl	%eax,%ebp
+	addl	%ecx,%edx
+	andl	%ebx,%ebp
+	movl	36(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%esi,%ebp
+	xorl	44(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	4(%esp),%ecx
+	andl	%edi,%ebp
+	xorl	24(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebx,%ebp
+	rorl	$2,%edi
+	movl	%edx,%ebx
+	roll	$5,%ebx
+	movl	%ecx,36(%esp)
+	leal	2400959708(%ecx,%ebp,1),%ecx
+	movl	%esi,%ebp
+	addl	%ebx,%ecx
+	andl	%eax,%ebp
+	movl	40(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edi,%ebp
+	xorl	48(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	8(%esp),%ebx
+	andl	%edx,%ebp
+	xorl	28(%esp),%ebx
+	roll	$1,%ebx
+	addl	%eax,%ebp
+	rorl	$2,%edx
+	movl	%ecx,%eax
+	roll	$5,%eax
+	movl	%ebx,40(%esp)
+	leal	2400959708(%ebx,%ebp,1),%ebx
+	movl	%edi,%ebp
+	addl	%eax,%ebx
+	andl	%esi,%ebp
+	movl	44(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%edx,%ebp
+	xorl	52(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	12(%esp),%eax
+	andl	%ecx,%ebp
+	xorl	32(%esp),%eax
+	roll	$1,%eax
+	addl	%esi,%ebp
+	rorl	$2,%ecx
+	movl	%ebx,%esi
+	roll	$5,%esi
+	movl	%eax,44(%esp)
+	leal	2400959708(%eax,%ebp,1),%eax
+	movl	%edx,%ebp
+	addl	%esi,%eax
+	andl	%edi,%ebp
+	movl	48(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	56(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	16(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	36(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,48(%esp)
+	leal	3395469782(%esi,%edi,1),%esi
+	movl	52(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	60(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	20(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	40(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,52(%esp)
+	leal	3395469782(%edi,%edx,1),%edi
+	movl	56(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	24(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	44(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,56(%esp)
+	leal	3395469782(%edx,%ecx,1),%edx
+	movl	60(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	4(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	28(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	48(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,60(%esp)
+	leal	3395469782(%ecx,%ebx,1),%ecx
+	movl	(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	8(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	32(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	52(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,(%esp)
+	leal	3395469782(%ebx,%eax,1),%ebx
+	movl	4(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	12(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	36(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	56(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	movl	%eax,4(%esp)
+	leal	3395469782(%eax,%esi,1),%eax
+	movl	8(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	16(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	40(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	60(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,8(%esp)
+	leal	3395469782(%esi,%edi,1),%esi
+	movl	12(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	20(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	44(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,12(%esp)
+	leal	3395469782(%edi,%edx,1),%edi
+	movl	16(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	24(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	48(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	4(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,16(%esp)
+	leal	3395469782(%edx,%ecx,1),%edx
+	movl	20(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	28(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	52(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	8(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,20(%esp)
+	leal	3395469782(%ecx,%ebx,1),%ecx
+	movl	24(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	32(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	56(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	12(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,24(%esp)
+	leal	3395469782(%ebx,%eax,1),%ebx
+	movl	28(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	36(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	60(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	16(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	movl	%eax,28(%esp)
+	leal	3395469782(%eax,%esi,1),%eax
+	movl	32(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	40(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	20(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,32(%esp)
+	leal	3395469782(%esi,%edi,1),%esi
+	movl	36(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	44(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	4(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	24(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,36(%esp)
+	leal	3395469782(%edi,%edx,1),%edi
+	movl	40(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	48(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	8(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	28(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,40(%esp)
+	leal	3395469782(%edx,%ecx,1),%edx
+	movl	44(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	52(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	12(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	32(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,44(%esp)
+	leal	3395469782(%ecx,%ebx,1),%ecx
+	movl	48(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	56(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	16(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	36(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,48(%esp)
+	leal	3395469782(%ebx,%eax,1),%ebx
+	movl	52(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	60(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	20(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	40(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	leal	3395469782(%eax,%esi,1),%eax
+	movl	56(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	24(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	44(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	leal	3395469782(%esi,%edi,1),%esi
+	movl	60(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	4(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	28(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	48(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	leal	3395469782(%edi,%edx,1),%edi
+	addl	%ebp,%edi
+	movl	96(%esp),%ebp
+	movl	100(%esp),%edx
+	addl	(%ebp),%edi
+	addl	4(%ebp),%esi
+	addl	8(%ebp),%eax
+	addl	12(%ebp),%ebx
+	addl	16(%ebp),%ecx
+	movl	%edi,(%ebp)
+	addl	$64,%edx
+	movl	%esi,4(%ebp)
+	cmpl	104(%esp),%edx
+	movl	%eax,8(%ebp)
+	movl	%ecx,%edi
+	movl	%ebx,12(%ebp)
+	movl	%edx,%esi
+	movl	%ecx,16(%ebp)
+	jb	.L002loop
+	addl	$76,%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	sha1_block_data_order,.-.L_sha1_block_data_order_begin
+.hidden	_sha1_block_data_order_ssse3
+.type	_sha1_block_data_order_ssse3,@function
+.align	16
+_sha1_block_data_order_ssse3:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	call	.L003pic_point
+.L003pic_point:
+	popl	%ebp
+	leal	.LK_XX_XX-.L003pic_point(%ebp),%ebp
+.Lssse3_shortcut:
+	movdqa	(%ebp),%xmm7
+	movdqa	16(%ebp),%xmm0
+	movdqa	32(%ebp),%xmm1
+	movdqa	48(%ebp),%xmm2
+	movdqa	64(%ebp),%xmm6
+	movl	20(%esp),%edi
+	movl	24(%esp),%ebp
+	movl	28(%esp),%edx
+	movl	%esp,%esi
+	subl	$208,%esp
+	andl	$-64,%esp
+	movdqa	%xmm0,112(%esp)
+	movdqa	%xmm1,128(%esp)
+	movdqa	%xmm2,144(%esp)
+	shll	$6,%edx
+	movdqa	%xmm7,160(%esp)
+	addl	%ebp,%edx
+	movdqa	%xmm6,176(%esp)
+	addl	$64,%ebp
+	movl	%edi,192(%esp)
+	movl	%ebp,196(%esp)
+	movl	%edx,200(%esp)
+	movl	%esi,204(%esp)
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	movl	16(%edi),%edi
+	movl	%ebx,%esi
+	movdqu	-64(%ebp),%xmm0
+	movdqu	-48(%ebp),%xmm1
+	movdqu	-32(%ebp),%xmm2
+	movdqu	-16(%ebp),%xmm3
+.byte	102,15,56,0,198
+.byte	102,15,56,0,206
+.byte	102,15,56,0,214
+	movdqa	%xmm7,96(%esp)
+.byte	102,15,56,0,222
+	paddd	%xmm7,%xmm0
+	paddd	%xmm7,%xmm1
+	paddd	%xmm7,%xmm2
+	movdqa	%xmm0,(%esp)
+	psubd	%xmm7,%xmm0
+	movdqa	%xmm1,16(%esp)
+	psubd	%xmm7,%xmm1
+	movdqa	%xmm2,32(%esp)
+	movl	%ecx,%ebp
+	psubd	%xmm7,%xmm2
+	xorl	%edx,%ebp
+	pshufd	$238,%xmm0,%xmm4
+	andl	%ebp,%esi
+	jmp	.L004loop
+.align	16
+.L004loop:
+	rorl	$2,%ebx
+	xorl	%edx,%esi
+	movl	%eax,%ebp
+	punpcklqdq	%xmm1,%xmm4
+	movdqa	%xmm3,%xmm6
+	addl	(%esp),%edi
+	xorl	%ecx,%ebx
+	paddd	%xmm3,%xmm7
+	movdqa	%xmm0,64(%esp)
+	roll	$5,%eax
+	addl	%esi,%edi
+	psrldq	$4,%xmm6
+	andl	%ebx,%ebp
+	xorl	%ecx,%ebx
+	pxor	%xmm0,%xmm4
+	addl	%eax,%edi
+	rorl	$7,%eax
+	pxor	%xmm2,%xmm6
+	xorl	%ecx,%ebp
+	movl	%edi,%esi
+	addl	4(%esp),%edx
+	pxor	%xmm6,%xmm4
+	xorl	%ebx,%eax
+	roll	$5,%edi
+	movdqa	%xmm7,48(%esp)
+	addl	%ebp,%edx
+	andl	%eax,%esi
+	movdqa	%xmm4,%xmm0
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	rorl	$7,%edi
+	movdqa	%xmm4,%xmm6
+	xorl	%ebx,%esi
+	pslldq	$12,%xmm0
+	paddd	%xmm4,%xmm4
+	movl	%edx,%ebp
+	addl	8(%esp),%ecx
+	psrld	$31,%xmm6
+	xorl	%eax,%edi
+	roll	$5,%edx
+	movdqa	%xmm0,%xmm7
+	addl	%esi,%ecx
+	andl	%edi,%ebp
+	xorl	%eax,%edi
+	psrld	$30,%xmm0
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	por	%xmm6,%xmm4
+	xorl	%eax,%ebp
+	movl	%ecx,%esi
+	addl	12(%esp),%ebx
+	pslld	$2,%xmm7
+	xorl	%edi,%edx
+	roll	$5,%ecx
+	pxor	%xmm0,%xmm4
+	movdqa	96(%esp),%xmm0
+	addl	%ebp,%ebx
+	andl	%edx,%esi
+	pxor	%xmm7,%xmm4
+	pshufd	$238,%xmm1,%xmm5
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	movl	%ebx,%ebp
+	punpcklqdq	%xmm2,%xmm5
+	movdqa	%xmm4,%xmm7
+	addl	16(%esp),%eax
+	xorl	%edx,%ecx
+	paddd	%xmm4,%xmm0
+	movdqa	%xmm1,80(%esp)
+	roll	$5,%ebx
+	addl	%esi,%eax
+	psrldq	$4,%xmm7
+	andl	%ecx,%ebp
+	xorl	%edx,%ecx
+	pxor	%xmm1,%xmm5
+	addl	%ebx,%eax
+	rorl	$7,%ebx
+	pxor	%xmm3,%xmm7
+	xorl	%edx,%ebp
+	movl	%eax,%esi
+	addl	20(%esp),%edi
+	pxor	%xmm7,%xmm5
+	xorl	%ecx,%ebx
+	roll	$5,%eax
+	movdqa	%xmm0,(%esp)
+	addl	%ebp,%edi
+	andl	%ebx,%esi
+	movdqa	%xmm5,%xmm1
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	rorl	$7,%eax
+	movdqa	%xmm5,%xmm7
+	xorl	%ecx,%esi
+	pslldq	$12,%xmm1
+	paddd	%xmm5,%xmm5
+	movl	%edi,%ebp
+	addl	24(%esp),%edx
+	psrld	$31,%xmm7
+	xorl	%ebx,%eax
+	roll	$5,%edi
+	movdqa	%xmm1,%xmm0
+	addl	%esi,%edx
+	andl	%eax,%ebp
+	xorl	%ebx,%eax
+	psrld	$30,%xmm1
+	addl	%edi,%edx
+	rorl	$7,%edi
+	por	%xmm7,%xmm5
+	xorl	%ebx,%ebp
+	movl	%edx,%esi
+	addl	28(%esp),%ecx
+	pslld	$2,%xmm0
+	xorl	%eax,%edi
+	roll	$5,%edx
+	pxor	%xmm1,%xmm5
+	movdqa	112(%esp),%xmm1
+	addl	%ebp,%ecx
+	andl	%edi,%esi
+	pxor	%xmm0,%xmm5
+	pshufd	$238,%xmm2,%xmm6
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	xorl	%eax,%esi
+	movl	%ecx,%ebp
+	punpcklqdq	%xmm3,%xmm6
+	movdqa	%xmm5,%xmm0
+	addl	32(%esp),%ebx
+	xorl	%edi,%edx
+	paddd	%xmm5,%xmm1
+	movdqa	%xmm2,96(%esp)
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	psrldq	$4,%xmm0
+	andl	%edx,%ebp
+	xorl	%edi,%edx
+	pxor	%xmm2,%xmm6
+	addl	%ecx,%ebx
+	rorl	$7,%ecx
+	pxor	%xmm4,%xmm0
+	xorl	%edi,%ebp
+	movl	%ebx,%esi
+	addl	36(%esp),%eax
+	pxor	%xmm0,%xmm6
+	xorl	%edx,%ecx
+	roll	$5,%ebx
+	movdqa	%xmm1,16(%esp)
+	addl	%ebp,%eax
+	andl	%ecx,%esi
+	movdqa	%xmm6,%xmm2
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	rorl	$7,%ebx
+	movdqa	%xmm6,%xmm0
+	xorl	%edx,%esi
+	pslldq	$12,%xmm2
+	paddd	%xmm6,%xmm6
+	movl	%eax,%ebp
+	addl	40(%esp),%edi
+	psrld	$31,%xmm0
+	xorl	%ecx,%ebx
+	roll	$5,%eax
+	movdqa	%xmm2,%xmm1
+	addl	%esi,%edi
+	andl	%ebx,%ebp
+	xorl	%ecx,%ebx
+	psrld	$30,%xmm2
+	addl	%eax,%edi
+	rorl	$7,%eax
+	por	%xmm0,%xmm6
+	xorl	%ecx,%ebp
+	movdqa	64(%esp),%xmm0
+	movl	%edi,%esi
+	addl	44(%esp),%edx
+	pslld	$2,%xmm1
+	xorl	%ebx,%eax
+	roll	$5,%edi
+	pxor	%xmm2,%xmm6
+	movdqa	112(%esp),%xmm2
+	addl	%ebp,%edx
+	andl	%eax,%esi
+	pxor	%xmm1,%xmm6
+	pshufd	$238,%xmm3,%xmm7
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	rorl	$7,%edi
+	xorl	%ebx,%esi
+	movl	%edx,%ebp
+	punpcklqdq	%xmm4,%xmm7
+	movdqa	%xmm6,%xmm1
+	addl	48(%esp),%ecx
+	xorl	%eax,%edi
+	paddd	%xmm6,%xmm2
+	movdqa	%xmm3,64(%esp)
+	roll	$5,%edx
+	addl	%esi,%ecx
+	psrldq	$4,%xmm1
+	andl	%edi,%ebp
+	xorl	%eax,%edi
+	pxor	%xmm3,%xmm7
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	pxor	%xmm5,%xmm1
+	xorl	%eax,%ebp
+	movl	%ecx,%esi
+	addl	52(%esp),%ebx
+	pxor	%xmm1,%xmm7
+	xorl	%edi,%edx
+	roll	$5,%ecx
+	movdqa	%xmm2,32(%esp)
+	addl	%ebp,%ebx
+	andl	%edx,%esi
+	movdqa	%xmm7,%xmm3
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	rorl	$7,%ecx
+	movdqa	%xmm7,%xmm1
+	xorl	%edi,%esi
+	pslldq	$12,%xmm3
+	paddd	%xmm7,%xmm7
+	movl	%ebx,%ebp
+	addl	56(%esp),%eax
+	psrld	$31,%xmm1
+	xorl	%edx,%ecx
+	roll	$5,%ebx
+	movdqa	%xmm3,%xmm2
+	addl	%esi,%eax
+	andl	%ecx,%ebp
+	xorl	%edx,%ecx
+	psrld	$30,%xmm3
+	addl	%ebx,%eax
+	rorl	$7,%ebx
+	por	%xmm1,%xmm7
+	xorl	%edx,%ebp
+	movdqa	80(%esp),%xmm1
+	movl	%eax,%esi
+	addl	60(%esp),%edi
+	pslld	$2,%xmm2
+	xorl	%ecx,%ebx
+	roll	$5,%eax
+	pxor	%xmm3,%xmm7
+	movdqa	112(%esp),%xmm3
+	addl	%ebp,%edi
+	andl	%ebx,%esi
+	pxor	%xmm2,%xmm7
+	pshufd	$238,%xmm6,%xmm2
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	rorl	$7,%eax
+	pxor	%xmm4,%xmm0
+	punpcklqdq	%xmm7,%xmm2
+	xorl	%ecx,%esi
+	movl	%edi,%ebp
+	addl	(%esp),%edx
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm4,80(%esp)
+	xorl	%ebx,%eax
+	roll	$5,%edi
+	movdqa	%xmm3,%xmm4
+	addl	%esi,%edx
+	paddd	%xmm7,%xmm3
+	andl	%eax,%ebp
+	pxor	%xmm2,%xmm0
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	rorl	$7,%edi
+	xorl	%ebx,%ebp
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm3,48(%esp)
+	movl	%edx,%esi
+	addl	4(%esp),%ecx
+	xorl	%eax,%edi
+	roll	$5,%edx
+	pslld	$2,%xmm0
+	addl	%ebp,%ecx
+	andl	%edi,%esi
+	psrld	$30,%xmm2
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	xorl	%eax,%esi
+	movl	%ecx,%ebp
+	addl	8(%esp),%ebx
+	xorl	%edi,%edx
+	roll	$5,%ecx
+	por	%xmm2,%xmm0
+	addl	%esi,%ebx
+	andl	%edx,%ebp
+	movdqa	96(%esp),%xmm2
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	addl	12(%esp),%eax
+	xorl	%edi,%ebp
+	movl	%ebx,%esi
+	pshufd	$238,%xmm7,%xmm3
+	roll	$5,%ebx
+	addl	%ebp,%eax
+	xorl	%edx,%esi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	16(%esp),%edi
+	pxor	%xmm5,%xmm1
+	punpcklqdq	%xmm0,%xmm3
+	xorl	%ecx,%esi
+	movl	%eax,%ebp
+	roll	$5,%eax
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm5,96(%esp)
+	addl	%esi,%edi
+	xorl	%ecx,%ebp
+	movdqa	%xmm4,%xmm5
+	rorl	$7,%ebx
+	paddd	%xmm0,%xmm4
+	addl	%eax,%edi
+	pxor	%xmm3,%xmm1
+	addl	20(%esp),%edx
+	xorl	%ebx,%ebp
+	movl	%edi,%esi
+	roll	$5,%edi
+	movdqa	%xmm1,%xmm3
+	movdqa	%xmm4,(%esp)
+	addl	%ebp,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%edi,%edx
+	pslld	$2,%xmm1
+	addl	24(%esp),%ecx
+	xorl	%eax,%esi
+	psrld	$30,%xmm3
+	movl	%edx,%ebp
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%ebp
+	rorl	$7,%edi
+	addl	%edx,%ecx
+	por	%xmm3,%xmm1
+	addl	28(%esp),%ebx
+	xorl	%edi,%ebp
+	movdqa	64(%esp),%xmm3
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%ebp,%ebx
+	xorl	%edi,%esi
+	rorl	$7,%edx
+	pshufd	$238,%xmm0,%xmm4
+	addl	%ecx,%ebx
+	addl	32(%esp),%eax
+	pxor	%xmm6,%xmm2
+	punpcklqdq	%xmm1,%xmm4
+	xorl	%edx,%esi
+	movl	%ebx,%ebp
+	roll	$5,%ebx
+	pxor	%xmm3,%xmm2
+	movdqa	%xmm6,64(%esp)
+	addl	%esi,%eax
+	xorl	%edx,%ebp
+	movdqa	128(%esp),%xmm6
+	rorl	$7,%ecx
+	paddd	%xmm1,%xmm5
+	addl	%ebx,%eax
+	pxor	%xmm4,%xmm2
+	addl	36(%esp),%edi
+	xorl	%ecx,%ebp
+	movl	%eax,%esi
+	roll	$5,%eax
+	movdqa	%xmm2,%xmm4
+	movdqa	%xmm5,16(%esp)
+	addl	%ebp,%edi
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%edi
+	pslld	$2,%xmm2
+	addl	40(%esp),%edx
+	xorl	%ebx,%esi
+	psrld	$30,%xmm4
+	movl	%edi,%ebp
+	roll	$5,%edi
+	addl	%esi,%edx
+	xorl	%ebx,%ebp
+	rorl	$7,%eax
+	addl	%edi,%edx
+	por	%xmm4,%xmm2
+	addl	44(%esp),%ecx
+	xorl	%eax,%ebp
+	movdqa	80(%esp),%xmm4
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%ebp,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%edi
+	pshufd	$238,%xmm1,%xmm5
+	addl	%edx,%ecx
+	addl	48(%esp),%ebx
+	pxor	%xmm7,%xmm3
+	punpcklqdq	%xmm2,%xmm5
+	xorl	%edi,%esi
+	movl	%ecx,%ebp
+	roll	$5,%ecx
+	pxor	%xmm4,%xmm3
+	movdqa	%xmm7,80(%esp)
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	movdqa	%xmm6,%xmm7
+	rorl	$7,%edx
+	paddd	%xmm2,%xmm6
+	addl	%ecx,%ebx
+	pxor	%xmm5,%xmm3
+	addl	52(%esp),%eax
+	xorl	%edx,%ebp
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	movdqa	%xmm3,%xmm5
+	movdqa	%xmm6,32(%esp)
+	addl	%ebp,%eax
+	xorl	%edx,%esi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	pslld	$2,%xmm3
+	addl	56(%esp),%edi
+	xorl	%ecx,%esi
+	psrld	$30,%xmm5
+	movl	%eax,%ebp
+	roll	$5,%eax
+	addl	%esi,%edi
+	xorl	%ecx,%ebp
+	rorl	$7,%ebx
+	addl	%eax,%edi
+	por	%xmm5,%xmm3
+	addl	60(%esp),%edx
+	xorl	%ebx,%ebp
+	movdqa	96(%esp),%xmm5
+	movl	%edi,%esi
+	roll	$5,%edi
+	addl	%ebp,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	pshufd	$238,%xmm2,%xmm6
+	addl	%edi,%edx
+	addl	(%esp),%ecx
+	pxor	%xmm0,%xmm4
+	punpcklqdq	%xmm3,%xmm6
+	xorl	%eax,%esi
+	movl	%edx,%ebp
+	roll	$5,%edx
+	pxor	%xmm5,%xmm4
+	movdqa	%xmm0,96(%esp)
+	addl	%esi,%ecx
+	xorl	%eax,%ebp
+	movdqa	%xmm7,%xmm0
+	rorl	$7,%edi
+	paddd	%xmm3,%xmm7
+	addl	%edx,%ecx
+	pxor	%xmm6,%xmm4
+	addl	4(%esp),%ebx
+	xorl	%edi,%ebp
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm7,48(%esp)
+	addl	%ebp,%ebx
+	xorl	%edi,%esi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	pslld	$2,%xmm4
+	addl	8(%esp),%eax
+	xorl	%edx,%esi
+	psrld	$30,%xmm6
+	movl	%ebx,%ebp
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%ebp
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	por	%xmm6,%xmm4
+	addl	12(%esp),%edi
+	xorl	%ecx,%ebp
+	movdqa	64(%esp),%xmm6
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%ebp,%edi
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	pshufd	$238,%xmm3,%xmm7
+	addl	%eax,%edi
+	addl	16(%esp),%edx
+	pxor	%xmm1,%xmm5
+	punpcklqdq	%xmm4,%xmm7
+	xorl	%ebx,%esi
+	movl	%edi,%ebp
+	roll	$5,%edi
+	pxor	%xmm6,%xmm5
+	movdqa	%xmm1,64(%esp)
+	addl	%esi,%edx
+	xorl	%ebx,%ebp
+	movdqa	%xmm0,%xmm1
+	rorl	$7,%eax
+	paddd	%xmm4,%xmm0
+	addl	%edi,%edx
+	pxor	%xmm7,%xmm5
+	addl	20(%esp),%ecx
+	xorl	%eax,%ebp
+	movl	%edx,%esi
+	roll	$5,%edx
+	movdqa	%xmm5,%xmm7
+	movdqa	%xmm0,(%esp)
+	addl	%ebp,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%edi
+	addl	%edx,%ecx
+	pslld	$2,%xmm5
+	addl	24(%esp),%ebx
+	xorl	%edi,%esi
+	psrld	$30,%xmm7
+	movl	%ecx,%ebp
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	por	%xmm7,%xmm5
+	addl	28(%esp),%eax
+	movdqa	80(%esp),%xmm7
+	rorl	$7,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%ebp
+	roll	$5,%ebx
+	pshufd	$238,%xmm4,%xmm0
+	addl	%ebp,%eax
+	xorl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	32(%esp),%edi
+	pxor	%xmm2,%xmm6
+	punpcklqdq	%xmm5,%xmm0
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	rorl	$7,%ebx
+	pxor	%xmm7,%xmm6
+	movdqa	%xmm2,80(%esp)
+	movl	%eax,%ebp
+	xorl	%ecx,%esi
+	roll	$5,%eax
+	movdqa	%xmm1,%xmm2
+	addl	%esi,%edi
+	paddd	%xmm5,%xmm1
+	xorl	%ebx,%ebp
+	pxor	%xmm0,%xmm6
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	addl	36(%esp),%edx
+	andl	%ebx,%ebp
+	movdqa	%xmm6,%xmm0
+	movdqa	%xmm1,16(%esp)
+	xorl	%ecx,%ebx
+	rorl	$7,%eax
+	movl	%edi,%esi
+	xorl	%ebx,%ebp
+	roll	$5,%edi
+	pslld	$2,%xmm6
+	addl	%ebp,%edx
+	xorl	%eax,%esi
+	psrld	$30,%xmm0
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	addl	40(%esp),%ecx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	rorl	$7,%edi
+	por	%xmm0,%xmm6
+	movl	%edx,%ebp
+	xorl	%eax,%esi
+	movdqa	96(%esp),%xmm0
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%edi,%ebp
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	pshufd	$238,%xmm5,%xmm1
+	addl	44(%esp),%ebx
+	andl	%edi,%ebp
+	xorl	%eax,%edi
+	rorl	$7,%edx
+	movl	%ecx,%esi
+	xorl	%edi,%ebp
+	roll	$5,%ecx
+	addl	%ebp,%ebx
+	xorl	%edx,%esi
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	addl	48(%esp),%eax
+	pxor	%xmm3,%xmm7
+	punpcklqdq	%xmm6,%xmm1
+	andl	%edx,%esi
+	xorl	%edi,%edx
+	rorl	$7,%ecx
+	pxor	%xmm0,%xmm7
+	movdqa	%xmm3,96(%esp)
+	movl	%ebx,%ebp
+	xorl	%edx,%esi
+	roll	$5,%ebx
+	movdqa	144(%esp),%xmm3
+	addl	%esi,%eax
+	paddd	%xmm6,%xmm2
+	xorl	%ecx,%ebp
+	pxor	%xmm1,%xmm7
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	52(%esp),%edi
+	andl	%ecx,%ebp
+	movdqa	%xmm7,%xmm1
+	movdqa	%xmm2,32(%esp)
+	xorl	%edx,%ecx
+	rorl	$7,%ebx
+	movl	%eax,%esi
+	xorl	%ecx,%ebp
+	roll	$5,%eax
+	pslld	$2,%xmm7
+	addl	%ebp,%edi
+	xorl	%ebx,%esi
+	psrld	$30,%xmm1
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	addl	56(%esp),%edx
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	rorl	$7,%eax
+	por	%xmm1,%xmm7
+	movl	%edi,%ebp
+	xorl	%ebx,%esi
+	movdqa	64(%esp),%xmm1
+	roll	$5,%edi
+	addl	%esi,%edx
+	xorl	%eax,%ebp
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	pshufd	$238,%xmm6,%xmm2
+	addl	60(%esp),%ecx
+	andl	%eax,%ebp
+	xorl	%ebx,%eax
+	rorl	$7,%edi
+	movl	%edx,%esi
+	xorl	%eax,%ebp
+	roll	$5,%edx
+	addl	%ebp,%ecx
+	xorl	%edi,%esi
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	addl	(%esp),%ebx
+	pxor	%xmm4,%xmm0
+	punpcklqdq	%xmm7,%xmm2
+	andl	%edi,%esi
+	xorl	%eax,%edi
+	rorl	$7,%edx
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm4,64(%esp)
+	movl	%ecx,%ebp
+	xorl	%edi,%esi
+	roll	$5,%ecx
+	movdqa	%xmm3,%xmm4
+	addl	%esi,%ebx
+	paddd	%xmm7,%xmm3
+	xorl	%edx,%ebp
+	pxor	%xmm2,%xmm0
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	addl	4(%esp),%eax
+	andl	%edx,%ebp
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm3,48(%esp)
+	xorl	%edi,%edx
+	rorl	$7,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%ebp
+	roll	$5,%ebx
+	pslld	$2,%xmm0
+	addl	%ebp,%eax
+	xorl	%ecx,%esi
+	psrld	$30,%xmm2
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	8(%esp),%edi
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	rorl	$7,%ebx
+	por	%xmm2,%xmm0
+	movl	%eax,%ebp
+	xorl	%ecx,%esi
+	movdqa	80(%esp),%xmm2
+	roll	$5,%eax
+	addl	%esi,%edi
+	xorl	%ebx,%ebp
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	pshufd	$238,%xmm7,%xmm3
+	addl	12(%esp),%edx
+	andl	%ebx,%ebp
+	xorl	%ecx,%ebx
+	rorl	$7,%eax
+	movl	%edi,%esi
+	xorl	%ebx,%ebp
+	roll	$5,%edi
+	addl	%ebp,%edx
+	xorl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	addl	16(%esp),%ecx
+	pxor	%xmm5,%xmm1
+	punpcklqdq	%xmm0,%xmm3
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	rorl	$7,%edi
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm5,80(%esp)
+	movl	%edx,%ebp
+	xorl	%eax,%esi
+	roll	$5,%edx
+	movdqa	%xmm4,%xmm5
+	addl	%esi,%ecx
+	paddd	%xmm0,%xmm4
+	xorl	%edi,%ebp
+	pxor	%xmm3,%xmm1
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	addl	20(%esp),%ebx
+	andl	%edi,%ebp
+	movdqa	%xmm1,%xmm3
+	movdqa	%xmm4,(%esp)
+	xorl	%eax,%edi
+	rorl	$7,%edx
+	movl	%ecx,%esi
+	xorl	%edi,%ebp
+	roll	$5,%ecx
+	pslld	$2,%xmm1
+	addl	%ebp,%ebx
+	xorl	%edx,%esi
+	psrld	$30,%xmm3
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	addl	24(%esp),%eax
+	andl	%edx,%esi
+	xorl	%edi,%edx
+	rorl	$7,%ecx
+	por	%xmm3,%xmm1
+	movl	%ebx,%ebp
+	xorl	%edx,%esi
+	movdqa	96(%esp),%xmm3
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%ecx,%ebp
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	pshufd	$238,%xmm0,%xmm4
+	addl	28(%esp),%edi
+	andl	%ecx,%ebp
+	xorl	%edx,%ecx
+	rorl	$7,%ebx
+	movl	%eax,%esi
+	xorl	%ecx,%ebp
+	roll	$5,%eax
+	addl	%ebp,%edi
+	xorl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	addl	32(%esp),%edx
+	pxor	%xmm6,%xmm2
+	punpcklqdq	%xmm1,%xmm4
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	rorl	$7,%eax
+	pxor	%xmm3,%xmm2
+	movdqa	%xmm6,96(%esp)
+	movl	%edi,%ebp
+	xorl	%ebx,%esi
+	roll	$5,%edi
+	movdqa	%xmm5,%xmm6
+	addl	%esi,%edx
+	paddd	%xmm1,%xmm5
+	xorl	%eax,%ebp
+	pxor	%xmm4,%xmm2
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	addl	36(%esp),%ecx
+	andl	%eax,%ebp
+	movdqa	%xmm2,%xmm4
+	movdqa	%xmm5,16(%esp)
+	xorl	%ebx,%eax
+	rorl	$7,%edi
+	movl	%edx,%esi
+	xorl	%eax,%ebp
+	roll	$5,%edx
+	pslld	$2,%xmm2
+	addl	%ebp,%ecx
+	xorl	%edi,%esi
+	psrld	$30,%xmm4
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	addl	40(%esp),%ebx
+	andl	%edi,%esi
+	xorl	%eax,%edi
+	rorl	$7,%edx
+	por	%xmm4,%xmm2
+	movl	%ecx,%ebp
+	xorl	%edi,%esi
+	movdqa	64(%esp),%xmm4
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%edx,%ebp
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	pshufd	$238,%xmm1,%xmm5
+	addl	44(%esp),%eax
+	andl	%edx,%ebp
+	xorl	%edi,%edx
+	rorl	$7,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%ebp
+	roll	$5,%ebx
+	addl	%ebp,%eax
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	addl	48(%esp),%edi
+	pxor	%xmm7,%xmm3
+	punpcklqdq	%xmm2,%xmm5
+	xorl	%ecx,%esi
+	movl	%eax,%ebp
+	roll	$5,%eax
+	pxor	%xmm4,%xmm3
+	movdqa	%xmm7,64(%esp)
+	addl	%esi,%edi
+	xorl	%ecx,%ebp
+	movdqa	%xmm6,%xmm7
+	rorl	$7,%ebx
+	paddd	%xmm2,%xmm6
+	addl	%eax,%edi
+	pxor	%xmm5,%xmm3
+	addl	52(%esp),%edx
+	xorl	%ebx,%ebp
+	movl	%edi,%esi
+	roll	$5,%edi
+	movdqa	%xmm3,%xmm5
+	movdqa	%xmm6,32(%esp)
+	addl	%ebp,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%edi,%edx
+	pslld	$2,%xmm3
+	addl	56(%esp),%ecx
+	xorl	%eax,%esi
+	psrld	$30,%xmm5
+	movl	%edx,%ebp
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%ebp
+	rorl	$7,%edi
+	addl	%edx,%ecx
+	por	%xmm5,%xmm3
+	addl	60(%esp),%ebx
+	xorl	%edi,%ebp
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%ebp,%ebx
+	xorl	%edi,%esi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	(%esp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%ebp
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%ebp
+	rorl	$7,%ecx
+	paddd	%xmm3,%xmm7
+	addl	%ebx,%eax
+	addl	4(%esp),%edi
+	xorl	%ecx,%ebp
+	movl	%eax,%esi
+	movdqa	%xmm7,48(%esp)
+	roll	$5,%eax
+	addl	%ebp,%edi
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%edi
+	addl	8(%esp),%edx
+	xorl	%ebx,%esi
+	movl	%edi,%ebp
+	roll	$5,%edi
+	addl	%esi,%edx
+	xorl	%ebx,%ebp
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	12(%esp),%ecx
+	xorl	%eax,%ebp
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%ebp,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%edi
+	addl	%edx,%ecx
+	movl	196(%esp),%ebp
+	cmpl	200(%esp),%ebp
+	je	.L005done
+	movdqa	160(%esp),%xmm7
+	movdqa	176(%esp),%xmm6
+	movdqu	(%ebp),%xmm0
+	movdqu	16(%ebp),%xmm1
+	movdqu	32(%ebp),%xmm2
+	movdqu	48(%ebp),%xmm3
+	addl	$64,%ebp
+.byte	102,15,56,0,198
+	movl	%ebp,196(%esp)
+	movdqa	%xmm7,96(%esp)
+	addl	16(%esp),%ebx
+	xorl	%edi,%esi
+	movl	%ecx,%ebp
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	rorl	$7,%edx
+.byte	102,15,56,0,206
+	addl	%ecx,%ebx
+	addl	20(%esp),%eax
+	xorl	%edx,%ebp
+	movl	%ebx,%esi
+	paddd	%xmm7,%xmm0
+	roll	$5,%ebx
+	addl	%ebp,%eax
+	xorl	%edx,%esi
+	rorl	$7,%ecx
+	movdqa	%xmm0,(%esp)
+	addl	%ebx,%eax
+	addl	24(%esp),%edi
+	xorl	%ecx,%esi
+	movl	%eax,%ebp
+	psubd	%xmm7,%xmm0
+	roll	$5,%eax
+	addl	%esi,%edi
+	xorl	%ecx,%ebp
+	rorl	$7,%ebx
+	addl	%eax,%edi
+	addl	28(%esp),%edx
+	xorl	%ebx,%ebp
+	movl	%edi,%esi
+	roll	$5,%edi
+	addl	%ebp,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	32(%esp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%ebp
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%ebp
+	rorl	$7,%edi
+.byte	102,15,56,0,214
+	addl	%edx,%ecx
+	addl	36(%esp),%ebx
+	xorl	%edi,%ebp
+	movl	%ecx,%esi
+	paddd	%xmm7,%xmm1
+	roll	$5,%ecx
+	addl	%ebp,%ebx
+	xorl	%edi,%esi
+	rorl	$7,%edx
+	movdqa	%xmm1,16(%esp)
+	addl	%ecx,%ebx
+	addl	40(%esp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%ebp
+	psubd	%xmm7,%xmm1
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%ebp
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	44(%esp),%edi
+	xorl	%ecx,%ebp
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%ebp,%edi
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%edi
+	addl	48(%esp),%edx
+	xorl	%ebx,%esi
+	movl	%edi,%ebp
+	roll	$5,%edi
+	addl	%esi,%edx
+	xorl	%ebx,%ebp
+	rorl	$7,%eax
+.byte	102,15,56,0,222
+	addl	%edi,%edx
+	addl	52(%esp),%ecx
+	xorl	%eax,%ebp
+	movl	%edx,%esi
+	paddd	%xmm7,%xmm2
+	roll	$5,%edx
+	addl	%ebp,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%edi
+	movdqa	%xmm2,32(%esp)
+	addl	%edx,%ecx
+	addl	56(%esp),%ebx
+	xorl	%edi,%esi
+	movl	%ecx,%ebp
+	psubd	%xmm7,%xmm2
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	60(%esp),%eax
+	xorl	%edx,%ebp
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%ebp,%eax
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	movl	192(%esp),%ebp
+	addl	(%ebp),%eax
+	addl	4(%ebp),%esi
+	addl	8(%ebp),%ecx
+	movl	%eax,(%ebp)
+	addl	12(%ebp),%edx
+	movl	%esi,4(%ebp)
+	addl	16(%ebp),%edi
+	movl	%ecx,8(%ebp)
+	movl	%ecx,%ebx
+	movl	%edx,12(%ebp)
+	xorl	%edx,%ebx
+	movl	%edi,16(%ebp)
+	movl	%esi,%ebp
+	pshufd	$238,%xmm0,%xmm4
+	andl	%ebx,%esi
+	movl	%ebp,%ebx
+	jmp	.L004loop
+.align	16
+.L005done:
+	addl	16(%esp),%ebx
+	xorl	%edi,%esi
+	movl	%ecx,%ebp
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	20(%esp),%eax
+	xorl	%edx,%ebp
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%ebp,%eax
+	xorl	%edx,%esi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	24(%esp),%edi
+	xorl	%ecx,%esi
+	movl	%eax,%ebp
+	roll	$5,%eax
+	addl	%esi,%edi
+	xorl	%ecx,%ebp
+	rorl	$7,%ebx
+	addl	%eax,%edi
+	addl	28(%esp),%edx
+	xorl	%ebx,%ebp
+	movl	%edi,%esi
+	roll	$5,%edi
+	addl	%ebp,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	32(%esp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%ebp
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%ebp
+	rorl	$7,%edi
+	addl	%edx,%ecx
+	addl	36(%esp),%ebx
+	xorl	%edi,%ebp
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%ebp,%ebx
+	xorl	%edi,%esi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	40(%esp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%ebp
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%ebp
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	44(%esp),%edi
+	xorl	%ecx,%ebp
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%ebp,%edi
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%edi
+	addl	48(%esp),%edx
+	xorl	%ebx,%esi
+	movl	%edi,%ebp
+	roll	$5,%edi
+	addl	%esi,%edx
+	xorl	%ebx,%ebp
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	52(%esp),%ecx
+	xorl	%eax,%ebp
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%ebp,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%edi
+	addl	%edx,%ecx
+	addl	56(%esp),%ebx
+	xorl	%edi,%esi
+	movl	%ecx,%ebp
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	60(%esp),%eax
+	xorl	%edx,%ebp
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%ebp,%eax
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	movl	192(%esp),%ebp
+	addl	(%ebp),%eax
+	movl	204(%esp),%esp
+	addl	4(%ebp),%esi
+	addl	8(%ebp),%ecx
+	movl	%eax,(%ebp)
+	addl	12(%ebp),%edx
+	movl	%esi,4(%ebp)
+	addl	16(%ebp),%edi
+	movl	%ecx,8(%ebp)
+	movl	%edx,12(%ebp)
+	movl	%edi,16(%ebp)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	_sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3
+.hidden	_sha1_block_data_order_avx
+.type	_sha1_block_data_order_avx,@function
+.align	16
+_sha1_block_data_order_avx:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	call	.L006pic_point
+.L006pic_point:
+	popl	%ebp
+	leal	.LK_XX_XX-.L006pic_point(%ebp),%ebp
+.Lavx_shortcut:
+	vzeroall
+	vmovdqa	(%ebp),%xmm7
+	vmovdqa	16(%ebp),%xmm0
+	vmovdqa	32(%ebp),%xmm1
+	vmovdqa	48(%ebp),%xmm2
+	vmovdqa	64(%ebp),%xmm6
+	movl	20(%esp),%edi
+	movl	24(%esp),%ebp
+	movl	28(%esp),%edx
+	movl	%esp,%esi
+	subl	$208,%esp
+	andl	$-64,%esp
+	vmovdqa	%xmm0,112(%esp)
+	vmovdqa	%xmm1,128(%esp)
+	vmovdqa	%xmm2,144(%esp)
+	shll	$6,%edx
+	vmovdqa	%xmm7,160(%esp)
+	addl	%ebp,%edx
+	vmovdqa	%xmm6,176(%esp)
+	addl	$64,%ebp
+	movl	%edi,192(%esp)
+	movl	%ebp,196(%esp)
+	movl	%edx,200(%esp)
+	movl	%esi,204(%esp)
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	movl	16(%edi),%edi
+	movl	%ebx,%esi
+	vmovdqu	-64(%ebp),%xmm0
+	vmovdqu	-48(%ebp),%xmm1
+	vmovdqu	-32(%ebp),%xmm2
+	vmovdqu	-16(%ebp),%xmm3
+	vpshufb	%xmm6,%xmm0,%xmm0
+	vpshufb	%xmm6,%xmm1,%xmm1
+	vpshufb	%xmm6,%xmm2,%xmm2
+	vmovdqa	%xmm7,96(%esp)
+	vpshufb	%xmm6,%xmm3,%xmm3
+	vpaddd	%xmm7,%xmm0,%xmm4
+	vpaddd	%xmm7,%xmm1,%xmm5
+	vpaddd	%xmm7,%xmm2,%xmm6
+	vmovdqa	%xmm4,(%esp)
+	movl	%ecx,%ebp
+	vmovdqa	%xmm5,16(%esp)
+	xorl	%edx,%ebp
+	vmovdqa	%xmm6,32(%esp)
+	andl	%ebp,%esi
+	jmp	.L007loop
+.align	16
+.L007loop:
+	shrdl	$2,%ebx,%ebx
+	xorl	%edx,%esi
+	vpalignr	$8,%xmm0,%xmm1,%xmm4
+	movl	%eax,%ebp
+	addl	(%esp),%edi
+	vpaddd	%xmm3,%xmm7,%xmm7
+	vmovdqa	%xmm0,64(%esp)
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	vpsrldq	$4,%xmm3,%xmm6
+	addl	%esi,%edi
+	andl	%ebx,%ebp
+	vpxor	%xmm0,%xmm4,%xmm4
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	vpxor	%xmm2,%xmm6,%xmm6
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%ebp
+	vmovdqa	%xmm7,48(%esp)
+	movl	%edi,%esi
+	addl	4(%esp),%edx
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%ebx,%eax
+	shldl	$5,%edi,%edi
+	addl	%ebp,%edx
+	andl	%eax,%esi
+	vpsrld	$31,%xmm4,%xmm6
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	shrdl	$7,%edi,%edi
+	xorl	%ebx,%esi
+	vpslldq	$12,%xmm4,%xmm0
+	vpaddd	%xmm4,%xmm4,%xmm4
+	movl	%edx,%ebp
+	addl	8(%esp),%ecx
+	xorl	%eax,%edi
+	shldl	$5,%edx,%edx
+	vpsrld	$30,%xmm0,%xmm7
+	vpor	%xmm6,%xmm4,%xmm4
+	addl	%esi,%ecx
+	andl	%edi,%ebp
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	vpslld	$2,%xmm0,%xmm0
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%ebp
+	vpxor	%xmm7,%xmm4,%xmm4
+	movl	%ecx,%esi
+	addl	12(%esp),%ebx
+	xorl	%edi,%edx
+	shldl	$5,%ecx,%ecx
+	vpxor	%xmm0,%xmm4,%xmm4
+	addl	%ebp,%ebx
+	andl	%edx,%esi
+	vmovdqa	96(%esp),%xmm0
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	shrdl	$7,%ecx,%ecx
+	xorl	%edi,%esi
+	vpalignr	$8,%xmm1,%xmm2,%xmm5
+	movl	%ebx,%ebp
+	addl	16(%esp),%eax
+	vpaddd	%xmm4,%xmm0,%xmm0
+	vmovdqa	%xmm1,80(%esp)
+	xorl	%edx,%ecx
+	shldl	$5,%ebx,%ebx
+	vpsrldq	$4,%xmm4,%xmm7
+	addl	%esi,%eax
+	andl	%ecx,%ebp
+	vpxor	%xmm1,%xmm5,%xmm5
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	vpxor	%xmm3,%xmm7,%xmm7
+	shrdl	$7,%ebx,%ebx
+	xorl	%edx,%ebp
+	vmovdqa	%xmm0,(%esp)
+	movl	%eax,%esi
+	addl	20(%esp),%edi
+	vpxor	%xmm7,%xmm5,%xmm5
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	addl	%ebp,%edi
+	andl	%ebx,%esi
+	vpsrld	$31,%xmm5,%xmm7
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%esi
+	vpslldq	$12,%xmm5,%xmm1
+	vpaddd	%xmm5,%xmm5,%xmm5
+	movl	%edi,%ebp
+	addl	24(%esp),%edx
+	xorl	%ebx,%eax
+	shldl	$5,%edi,%edi
+	vpsrld	$30,%xmm1,%xmm0
+	vpor	%xmm7,%xmm5,%xmm5
+	addl	%esi,%edx
+	andl	%eax,%ebp
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	vpslld	$2,%xmm1,%xmm1
+	shrdl	$7,%edi,%edi
+	xorl	%ebx,%ebp
+	vpxor	%xmm0,%xmm5,%xmm5
+	movl	%edx,%esi
+	addl	28(%esp),%ecx
+	xorl	%eax,%edi
+	shldl	$5,%edx,%edx
+	vpxor	%xmm1,%xmm5,%xmm5
+	addl	%ebp,%ecx
+	andl	%edi,%esi
+	vmovdqa	112(%esp),%xmm1
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%esi
+	vpalignr	$8,%xmm2,%xmm3,%xmm6
+	movl	%ecx,%ebp
+	addl	32(%esp),%ebx
+	vpaddd	%xmm5,%xmm1,%xmm1
+	vmovdqa	%xmm2,96(%esp)
+	xorl	%edi,%edx
+	shldl	$5,%ecx,%ecx
+	vpsrldq	$4,%xmm5,%xmm0
+	addl	%esi,%ebx
+	andl	%edx,%ebp
+	vpxor	%xmm2,%xmm6,%xmm6
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	vpxor	%xmm4,%xmm0,%xmm0
+	shrdl	$7,%ecx,%ecx
+	xorl	%edi,%ebp
+	vmovdqa	%xmm1,16(%esp)
+	movl	%ebx,%esi
+	addl	36(%esp),%eax
+	vpxor	%xmm0,%xmm6,%xmm6
+	xorl	%edx,%ecx
+	shldl	$5,%ebx,%ebx
+	addl	%ebp,%eax
+	andl	%ecx,%esi
+	vpsrld	$31,%xmm6,%xmm0
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	shrdl	$7,%ebx,%ebx
+	xorl	%edx,%esi
+	vpslldq	$12,%xmm6,%xmm2
+	vpaddd	%xmm6,%xmm6,%xmm6
+	movl	%eax,%ebp
+	addl	40(%esp),%edi
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	vpsrld	$30,%xmm2,%xmm1
+	vpor	%xmm0,%xmm6,%xmm6
+	addl	%esi,%edi
+	andl	%ebx,%ebp
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	vpslld	$2,%xmm2,%xmm2
+	vmovdqa	64(%esp),%xmm0
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%ebp
+	vpxor	%xmm1,%xmm6,%xmm6
+	movl	%edi,%esi
+	addl	44(%esp),%edx
+	xorl	%ebx,%eax
+	shldl	$5,%edi,%edi
+	vpxor	%xmm2,%xmm6,%xmm6
+	addl	%ebp,%edx
+	andl	%eax,%esi
+	vmovdqa	112(%esp),%xmm2
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	shrdl	$7,%edi,%edi
+	xorl	%ebx,%esi
+	vpalignr	$8,%xmm3,%xmm4,%xmm7
+	movl	%edx,%ebp
+	addl	48(%esp),%ecx
+	vpaddd	%xmm6,%xmm2,%xmm2
+	vmovdqa	%xmm3,64(%esp)
+	xorl	%eax,%edi
+	shldl	$5,%edx,%edx
+	vpsrldq	$4,%xmm6,%xmm1
+	addl	%esi,%ecx
+	andl	%edi,%ebp
+	vpxor	%xmm3,%xmm7,%xmm7
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	vpxor	%xmm5,%xmm1,%xmm1
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%ebp
+	vmovdqa	%xmm2,32(%esp)
+	movl	%ecx,%esi
+	addl	52(%esp),%ebx
+	vpxor	%xmm1,%xmm7,%xmm7
+	xorl	%edi,%edx
+	shldl	$5,%ecx,%ecx
+	addl	%ebp,%ebx
+	andl	%edx,%esi
+	vpsrld	$31,%xmm7,%xmm1
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	shrdl	$7,%ecx,%ecx
+	xorl	%edi,%esi
+	vpslldq	$12,%xmm7,%xmm3
+	vpaddd	%xmm7,%xmm7,%xmm7
+	movl	%ebx,%ebp
+	addl	56(%esp),%eax
+	xorl	%edx,%ecx
+	shldl	$5,%ebx,%ebx
+	vpsrld	$30,%xmm3,%xmm2
+	vpor	%xmm1,%xmm7,%xmm7
+	addl	%esi,%eax
+	andl	%ecx,%ebp
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	vpslld	$2,%xmm3,%xmm3
+	vmovdqa	80(%esp),%xmm1
+	shrdl	$7,%ebx,%ebx
+	xorl	%edx,%ebp
+	vpxor	%xmm2,%xmm7,%xmm7
+	movl	%eax,%esi
+	addl	60(%esp),%edi
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	vpxor	%xmm3,%xmm7,%xmm7
+	addl	%ebp,%edi
+	andl	%ebx,%esi
+	vmovdqa	112(%esp),%xmm3
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	vpalignr	$8,%xmm6,%xmm7,%xmm2
+	vpxor	%xmm4,%xmm0,%xmm0
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%esi
+	movl	%edi,%ebp
+	addl	(%esp),%edx
+	vpxor	%xmm1,%xmm0,%xmm0
+	vmovdqa	%xmm4,80(%esp)
+	xorl	%ebx,%eax
+	shldl	$5,%edi,%edi
+	vmovdqa	%xmm3,%xmm4
+	vpaddd	%xmm7,%xmm3,%xmm3
+	addl	%esi,%edx
+	andl	%eax,%ebp
+	vpxor	%xmm2,%xmm0,%xmm0
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	shrdl	$7,%edi,%edi
+	xorl	%ebx,%ebp
+	vpsrld	$30,%xmm0,%xmm2
+	vmovdqa	%xmm3,48(%esp)
+	movl	%edx,%esi
+	addl	4(%esp),%ecx
+	xorl	%eax,%edi
+	shldl	$5,%edx,%edx
+	vpslld	$2,%xmm0,%xmm0
+	addl	%ebp,%ecx
+	andl	%edi,%esi
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%esi
+	movl	%ecx,%ebp
+	addl	8(%esp),%ebx
+	vpor	%xmm2,%xmm0,%xmm0
+	xorl	%edi,%edx
+	shldl	$5,%ecx,%ecx
+	vmovdqa	96(%esp),%xmm2
+	addl	%esi,%ebx
+	andl	%edx,%ebp
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	addl	12(%esp),%eax
+	xorl	%edi,%ebp
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%ebp,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpalignr	$8,%xmm7,%xmm0,%xmm3
+	vpxor	%xmm5,%xmm1,%xmm1
+	addl	16(%esp),%edi
+	xorl	%ecx,%esi
+	movl	%eax,%ebp
+	shldl	$5,%eax,%eax
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm5,96(%esp)
+	addl	%esi,%edi
+	xorl	%ecx,%ebp
+	vmovdqa	%xmm4,%xmm5
+	vpaddd	%xmm0,%xmm4,%xmm4
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%edi
+	vpxor	%xmm3,%xmm1,%xmm1
+	addl	20(%esp),%edx
+	xorl	%ebx,%ebp
+	movl	%edi,%esi
+	shldl	$5,%edi,%edi
+	vpsrld	$30,%xmm1,%xmm3
+	vmovdqa	%xmm4,(%esp)
+	addl	%ebp,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	vpslld	$2,%xmm1,%xmm1
+	addl	24(%esp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%ebp
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%ebp
+	shrdl	$7,%edi,%edi
+	addl	%edx,%ecx
+	vpor	%xmm3,%xmm1,%xmm1
+	addl	28(%esp),%ebx
+	xorl	%edi,%ebp
+	vmovdqa	64(%esp),%xmm3
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%ebp,%ebx
+	xorl	%edi,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpalignr	$8,%xmm0,%xmm1,%xmm4
+	vpxor	%xmm6,%xmm2,%xmm2
+	addl	32(%esp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%ebp
+	shldl	$5,%ebx,%ebx
+	vpxor	%xmm3,%xmm2,%xmm2
+	vmovdqa	%xmm6,64(%esp)
+	addl	%esi,%eax
+	xorl	%edx,%ebp
+	vmovdqa	128(%esp),%xmm6
+	vpaddd	%xmm1,%xmm5,%xmm5
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpxor	%xmm4,%xmm2,%xmm2
+	addl	36(%esp),%edi
+	xorl	%ecx,%ebp
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	vpsrld	$30,%xmm2,%xmm4
+	vmovdqa	%xmm5,16(%esp)
+	addl	%ebp,%edi
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%edi
+	vpslld	$2,%xmm2,%xmm2
+	addl	40(%esp),%edx
+	xorl	%ebx,%esi
+	movl	%edi,%ebp
+	shldl	$5,%edi,%edi
+	addl	%esi,%edx
+	xorl	%ebx,%ebp
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	vpor	%xmm4,%xmm2,%xmm2
+	addl	44(%esp),%ecx
+	xorl	%eax,%ebp
+	vmovdqa	80(%esp),%xmm4
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%ebp,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%edi,%edi
+	addl	%edx,%ecx
+	vpalignr	$8,%xmm1,%xmm2,%xmm5
+	vpxor	%xmm7,%xmm3,%xmm3
+	addl	48(%esp),%ebx
+	xorl	%edi,%esi
+	movl	%ecx,%ebp
+	shldl	$5,%ecx,%ecx
+	vpxor	%xmm4,%xmm3,%xmm3
+	vmovdqa	%xmm7,80(%esp)
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	vmovdqa	%xmm6,%xmm7
+	vpaddd	%xmm2,%xmm6,%xmm6
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpxor	%xmm5,%xmm3,%xmm3
+	addl	52(%esp),%eax
+	xorl	%edx,%ebp
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	vpsrld	$30,%xmm3,%xmm5
+	vmovdqa	%xmm6,32(%esp)
+	addl	%ebp,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpslld	$2,%xmm3,%xmm3
+	addl	56(%esp),%edi
+	xorl	%ecx,%esi
+	movl	%eax,%ebp
+	shldl	$5,%eax,%eax
+	addl	%esi,%edi
+	xorl	%ecx,%ebp
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%edi
+	vpor	%xmm5,%xmm3,%xmm3
+	addl	60(%esp),%edx
+	xorl	%ebx,%ebp
+	vmovdqa	96(%esp),%xmm5
+	movl	%edi,%esi
+	shldl	$5,%edi,%edi
+	addl	%ebp,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	vpalignr	$8,%xmm2,%xmm3,%xmm6
+	vpxor	%xmm0,%xmm4,%xmm4
+	addl	(%esp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%ebp
+	shldl	$5,%edx,%edx
+	vpxor	%xmm5,%xmm4,%xmm4
+	vmovdqa	%xmm0,96(%esp)
+	addl	%esi,%ecx
+	xorl	%eax,%ebp
+	vmovdqa	%xmm7,%xmm0
+	vpaddd	%xmm3,%xmm7,%xmm7
+	shrdl	$7,%edi,%edi
+	addl	%edx,%ecx
+	vpxor	%xmm6,%xmm4,%xmm4
+	addl	4(%esp),%ebx
+	xorl	%edi,%ebp
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	vpsrld	$30,%xmm4,%xmm6
+	vmovdqa	%xmm7,48(%esp)
+	addl	%ebp,%ebx
+	xorl	%edi,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpslld	$2,%xmm4,%xmm4
+	addl	8(%esp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%ebp
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%ebp
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpor	%xmm6,%xmm4,%xmm4
+	addl	12(%esp),%edi
+	xorl	%ecx,%ebp
+	vmovdqa	64(%esp),%xmm6
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%ebp,%edi
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%edi
+	vpalignr	$8,%xmm3,%xmm4,%xmm7
+	vpxor	%xmm1,%xmm5,%xmm5
+	addl	16(%esp),%edx
+	xorl	%ebx,%esi
+	movl	%edi,%ebp
+	shldl	$5,%edi,%edi
+	vpxor	%xmm6,%xmm5,%xmm5
+	vmovdqa	%xmm1,64(%esp)
+	addl	%esi,%edx
+	xorl	%ebx,%ebp
+	vmovdqa	%xmm0,%xmm1
+	vpaddd	%xmm4,%xmm0,%xmm0
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	vpxor	%xmm7,%xmm5,%xmm5
+	addl	20(%esp),%ecx
+	xorl	%eax,%ebp
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	vpsrld	$30,%xmm5,%xmm7
+	vmovdqa	%xmm0,(%esp)
+	addl	%ebp,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%edi,%edi
+	addl	%edx,%ecx
+	vpslld	$2,%xmm5,%xmm5
+	addl	24(%esp),%ebx
+	xorl	%edi,%esi
+	movl	%ecx,%ebp
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpor	%xmm7,%xmm5,%xmm5
+	addl	28(%esp),%eax
+	vmovdqa	80(%esp),%xmm7
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%ebp
+	shldl	$5,%ebx,%ebx
+	addl	%ebp,%eax
+	xorl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	vpalignr	$8,%xmm4,%xmm5,%xmm0
+	vpxor	%xmm2,%xmm6,%xmm6
+	addl	32(%esp),%edi
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	vpxor	%xmm7,%xmm6,%xmm6
+	vmovdqa	%xmm2,80(%esp)
+	movl	%eax,%ebp
+	xorl	%ecx,%esi
+	vmovdqa	%xmm1,%xmm2
+	vpaddd	%xmm5,%xmm1,%xmm1
+	shldl	$5,%eax,%eax
+	addl	%esi,%edi
+	vpxor	%xmm0,%xmm6,%xmm6
+	xorl	%ebx,%ebp
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	addl	36(%esp),%edx
+	vpsrld	$30,%xmm6,%xmm0
+	vmovdqa	%xmm1,16(%esp)
+	andl	%ebx,%ebp
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	movl	%edi,%esi
+	vpslld	$2,%xmm6,%xmm6
+	xorl	%ebx,%ebp
+	shldl	$5,%edi,%edi
+	addl	%ebp,%edx
+	xorl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	addl	40(%esp),%ecx
+	andl	%eax,%esi
+	vpor	%xmm0,%xmm6,%xmm6
+	xorl	%ebx,%eax
+	shrdl	$7,%edi,%edi
+	vmovdqa	96(%esp),%xmm0
+	movl	%edx,%ebp
+	xorl	%eax,%esi
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%edi,%ebp
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	addl	44(%esp),%ebx
+	andl	%edi,%ebp
+	xorl	%eax,%edi
+	shrdl	$7,%edx,%edx
+	movl	%ecx,%esi
+	xorl	%edi,%ebp
+	shldl	$5,%ecx,%ecx
+	addl	%ebp,%ebx
+	xorl	%edx,%esi
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	vpalignr	$8,%xmm5,%xmm6,%xmm1
+	vpxor	%xmm3,%xmm7,%xmm7
+	addl	48(%esp),%eax
+	andl	%edx,%esi
+	xorl	%edi,%edx
+	shrdl	$7,%ecx,%ecx
+	vpxor	%xmm0,%xmm7,%xmm7
+	vmovdqa	%xmm3,96(%esp)
+	movl	%ebx,%ebp
+	xorl	%edx,%esi
+	vmovdqa	144(%esp),%xmm3
+	vpaddd	%xmm6,%xmm2,%xmm2
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	vpxor	%xmm1,%xmm7,%xmm7
+	xorl	%ecx,%ebp
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	52(%esp),%edi
+	vpsrld	$30,%xmm7,%xmm1
+	vmovdqa	%xmm2,32(%esp)
+	andl	%ecx,%ebp
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	movl	%eax,%esi
+	vpslld	$2,%xmm7,%xmm7
+	xorl	%ecx,%ebp
+	shldl	$5,%eax,%eax
+	addl	%ebp,%edi
+	xorl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	addl	56(%esp),%edx
+	andl	%ebx,%esi
+	vpor	%xmm1,%xmm7,%xmm7
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	vmovdqa	64(%esp),%xmm1
+	movl	%edi,%ebp
+	xorl	%ebx,%esi
+	shldl	$5,%edi,%edi
+	addl	%esi,%edx
+	xorl	%eax,%ebp
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	addl	60(%esp),%ecx
+	andl	%eax,%ebp
+	xorl	%ebx,%eax
+	shrdl	$7,%edi,%edi
+	movl	%edx,%esi
+	xorl	%eax,%ebp
+	shldl	$5,%edx,%edx
+	addl	%ebp,%ecx
+	xorl	%edi,%esi
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	vpalignr	$8,%xmm6,%xmm7,%xmm2
+	vpxor	%xmm4,%xmm0,%xmm0
+	addl	(%esp),%ebx
+	andl	%edi,%esi
+	xorl	%eax,%edi
+	shrdl	$7,%edx,%edx
+	vpxor	%xmm1,%xmm0,%xmm0
+	vmovdqa	%xmm4,64(%esp)
+	movl	%ecx,%ebp
+	xorl	%edi,%esi
+	vmovdqa	%xmm3,%xmm4
+	vpaddd	%xmm7,%xmm3,%xmm3
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	vpxor	%xmm2,%xmm0,%xmm0
+	xorl	%edx,%ebp
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	addl	4(%esp),%eax
+	vpsrld	$30,%xmm0,%xmm2
+	vmovdqa	%xmm3,48(%esp)
+	andl	%edx,%ebp
+	xorl	%edi,%edx
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%esi
+	vpslld	$2,%xmm0,%xmm0
+	xorl	%edx,%ebp
+	shldl	$5,%ebx,%ebx
+	addl	%ebp,%eax
+	xorl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	8(%esp),%edi
+	andl	%ecx,%esi
+	vpor	%xmm2,%xmm0,%xmm0
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	vmovdqa	80(%esp),%xmm2
+	movl	%eax,%ebp
+	xorl	%ecx,%esi
+	shldl	$5,%eax,%eax
+	addl	%esi,%edi
+	xorl	%ebx,%ebp
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	addl	12(%esp),%edx
+	andl	%ebx,%ebp
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	movl	%edi,%esi
+	xorl	%ebx,%ebp
+	shldl	$5,%edi,%edi
+	addl	%ebp,%edx
+	xorl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	vpalignr	$8,%xmm7,%xmm0,%xmm3
+	vpxor	%xmm5,%xmm1,%xmm1
+	addl	16(%esp),%ecx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	shrdl	$7,%edi,%edi
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm5,80(%esp)
+	movl	%edx,%ebp
+	xorl	%eax,%esi
+	vmovdqa	%xmm4,%xmm5
+	vpaddd	%xmm0,%xmm4,%xmm4
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	vpxor	%xmm3,%xmm1,%xmm1
+	xorl	%edi,%ebp
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	addl	20(%esp),%ebx
+	vpsrld	$30,%xmm1,%xmm3
+	vmovdqa	%xmm4,(%esp)
+	andl	%edi,%ebp
+	xorl	%eax,%edi
+	shrdl	$7,%edx,%edx
+	movl	%ecx,%esi
+	vpslld	$2,%xmm1,%xmm1
+	xorl	%edi,%ebp
+	shldl	$5,%ecx,%ecx
+	addl	%ebp,%ebx
+	xorl	%edx,%esi
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	addl	24(%esp),%eax
+	andl	%edx,%esi
+	vpor	%xmm3,%xmm1,%xmm1
+	xorl	%edi,%edx
+	shrdl	$7,%ecx,%ecx
+	vmovdqa	96(%esp),%xmm3
+	movl	%ebx,%ebp
+	xorl	%edx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%ecx,%ebp
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	28(%esp),%edi
+	andl	%ecx,%ebp
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	movl	%eax,%esi
+	xorl	%ecx,%ebp
+	shldl	$5,%eax,%eax
+	addl	%ebp,%edi
+	xorl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%edi
+	vpalignr	$8,%xmm0,%xmm1,%xmm4
+	vpxor	%xmm6,%xmm2,%xmm2
+	addl	32(%esp),%edx
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	vpxor	%xmm3,%xmm2,%xmm2
+	vmovdqa	%xmm6,96(%esp)
+	movl	%edi,%ebp
+	xorl	%ebx,%esi
+	vmovdqa	%xmm5,%xmm6
+	vpaddd	%xmm1,%xmm5,%xmm5
+	shldl	$5,%edi,%edi
+	addl	%esi,%edx
+	vpxor	%xmm4,%xmm2,%xmm2
+	xorl	%eax,%ebp
+	xorl	%ebx,%eax
+	addl	%edi,%edx
+	addl	36(%esp),%ecx
+	vpsrld	$30,%xmm2,%xmm4
+	vmovdqa	%xmm5,16(%esp)
+	andl	%eax,%ebp
+	xorl	%ebx,%eax
+	shrdl	$7,%edi,%edi
+	movl	%edx,%esi
+	vpslld	$2,%xmm2,%xmm2
+	xorl	%eax,%ebp
+	shldl	$5,%edx,%edx
+	addl	%ebp,%ecx
+	xorl	%edi,%esi
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	addl	40(%esp),%ebx
+	andl	%edi,%esi
+	vpor	%xmm4,%xmm2,%xmm2
+	xorl	%eax,%edi
+	shrdl	$7,%edx,%edx
+	vmovdqa	64(%esp),%xmm4
+	movl	%ecx,%ebp
+	xorl	%edi,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%edx,%ebp
+	xorl	%edi,%edx
+	addl	%ecx,%ebx
+	addl	44(%esp),%eax
+	andl	%edx,%ebp
+	xorl	%edi,%edx
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%ebp
+	shldl	$5,%ebx,%ebx
+	addl	%ebp,%eax
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	vpalignr	$8,%xmm1,%xmm2,%xmm5
+	vpxor	%xmm7,%xmm3,%xmm3
+	addl	48(%esp),%edi
+	xorl	%ecx,%esi
+	movl	%eax,%ebp
+	shldl	$5,%eax,%eax
+	vpxor	%xmm4,%xmm3,%xmm3
+	vmovdqa	%xmm7,64(%esp)
+	addl	%esi,%edi
+	xorl	%ecx,%ebp
+	vmovdqa	%xmm6,%xmm7
+	vpaddd	%xmm2,%xmm6,%xmm6
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%edi
+	vpxor	%xmm5,%xmm3,%xmm3
+	addl	52(%esp),%edx
+	xorl	%ebx,%ebp
+	movl	%edi,%esi
+	shldl	$5,%edi,%edi
+	vpsrld	$30,%xmm3,%xmm5
+	vmovdqa	%xmm6,32(%esp)
+	addl	%ebp,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	vpslld	$2,%xmm3,%xmm3
+	addl	56(%esp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%ebp
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%ebp
+	shrdl	$7,%edi,%edi
+	addl	%edx,%ecx
+	vpor	%xmm5,%xmm3,%xmm3
+	addl	60(%esp),%ebx
+	xorl	%edi,%ebp
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%ebp,%ebx
+	xorl	%edi,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	(%esp),%eax
+	vpaddd	%xmm3,%xmm7,%xmm7
+	xorl	%edx,%esi
+	movl	%ebx,%ebp
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	vmovdqa	%xmm7,48(%esp)
+	xorl	%edx,%ebp
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	4(%esp),%edi
+	xorl	%ecx,%ebp
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%ebp,%edi
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%edi
+	addl	8(%esp),%edx
+	xorl	%ebx,%esi
+	movl	%edi,%ebp
+	shldl	$5,%edi,%edi
+	addl	%esi,%edx
+	xorl	%ebx,%ebp
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	addl	12(%esp),%ecx
+	xorl	%eax,%ebp
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%ebp,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%edi,%edi
+	addl	%edx,%ecx
+	movl	196(%esp),%ebp
+	cmpl	200(%esp),%ebp
+	je	.L008done
+	vmovdqa	160(%esp),%xmm7
+	vmovdqa	176(%esp),%xmm6
+	vmovdqu	(%ebp),%xmm0
+	vmovdqu	16(%ebp),%xmm1
+	vmovdqu	32(%ebp),%xmm2
+	vmovdqu	48(%ebp),%xmm3
+	addl	$64,%ebp
+	vpshufb	%xmm6,%xmm0,%xmm0
+	movl	%ebp,196(%esp)
+	vmovdqa	%xmm7,96(%esp)
+	addl	16(%esp),%ebx
+	xorl	%edi,%esi
+	vpshufb	%xmm6,%xmm1,%xmm1
+	movl	%ecx,%ebp
+	shldl	$5,%ecx,%ecx
+	vpaddd	%xmm7,%xmm0,%xmm4
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vmovdqa	%xmm4,(%esp)
+	addl	20(%esp),%eax
+	xorl	%edx,%ebp
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%ebp,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	24(%esp),%edi
+	xorl	%ecx,%esi
+	movl	%eax,%ebp
+	shldl	$5,%eax,%eax
+	addl	%esi,%edi
+	xorl	%ecx,%ebp
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%edi
+	addl	28(%esp),%edx
+	xorl	%ebx,%ebp
+	movl	%edi,%esi
+	shldl	$5,%edi,%edi
+	addl	%ebp,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	addl	32(%esp),%ecx
+	xorl	%eax,%esi
+	vpshufb	%xmm6,%xmm2,%xmm2
+	movl	%edx,%ebp
+	shldl	$5,%edx,%edx
+	vpaddd	%xmm7,%xmm1,%xmm5
+	addl	%esi,%ecx
+	xorl	%eax,%ebp
+	shrdl	$7,%edi,%edi
+	addl	%edx,%ecx
+	vmovdqa	%xmm5,16(%esp)
+	addl	36(%esp),%ebx
+	xorl	%edi,%ebp
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%ebp,%ebx
+	xorl	%edi,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	40(%esp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%ebp
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%ebp
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	44(%esp),%edi
+	xorl	%ecx,%ebp
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%ebp,%edi
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%edi
+	addl	48(%esp),%edx
+	xorl	%ebx,%esi
+	vpshufb	%xmm6,%xmm3,%xmm3
+	movl	%edi,%ebp
+	shldl	$5,%edi,%edi
+	vpaddd	%xmm7,%xmm2,%xmm6
+	addl	%esi,%edx
+	xorl	%ebx,%ebp
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	vmovdqa	%xmm6,32(%esp)
+	addl	52(%esp),%ecx
+	xorl	%eax,%ebp
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%ebp,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%edi,%edi
+	addl	%edx,%ecx
+	addl	56(%esp),%ebx
+	xorl	%edi,%esi
+	movl	%ecx,%ebp
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	60(%esp),%eax
+	xorl	%edx,%ebp
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%ebp,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	movl	192(%esp),%ebp
+	addl	(%ebp),%eax
+	addl	4(%ebp),%esi
+	addl	8(%ebp),%ecx
+	movl	%eax,(%ebp)
+	addl	12(%ebp),%edx
+	movl	%esi,4(%ebp)
+	addl	16(%ebp),%edi
+	movl	%ecx,%ebx
+	movl	%ecx,8(%ebp)
+	xorl	%edx,%ebx
+	movl	%edx,12(%ebp)
+	movl	%edi,16(%ebp)
+	movl	%esi,%ebp
+	andl	%ebx,%esi
+	movl	%ebp,%ebx
+	jmp	.L007loop
+.align	16
+.L008done:
+	addl	16(%esp),%ebx
+	xorl	%edi,%esi
+	movl	%ecx,%ebp
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	20(%esp),%eax
+	xorl	%edx,%ebp
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%ebp,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	24(%esp),%edi
+	xorl	%ecx,%esi
+	movl	%eax,%ebp
+	shldl	$5,%eax,%eax
+	addl	%esi,%edi
+	xorl	%ecx,%ebp
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%edi
+	addl	28(%esp),%edx
+	xorl	%ebx,%ebp
+	movl	%edi,%esi
+	shldl	$5,%edi,%edi
+	addl	%ebp,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	addl	32(%esp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%ebp
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%ebp
+	shrdl	$7,%edi,%edi
+	addl	%edx,%ecx
+	addl	36(%esp),%ebx
+	xorl	%edi,%ebp
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%ebp,%ebx
+	xorl	%edi,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	40(%esp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%ebp
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%ebp
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	44(%esp),%edi
+	xorl	%ecx,%ebp
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%ebp,%edi
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%edi
+	addl	48(%esp),%edx
+	xorl	%ebx,%esi
+	movl	%edi,%ebp
+	shldl	$5,%edi,%edi
+	addl	%esi,%edx
+	xorl	%ebx,%ebp
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	addl	52(%esp),%ecx
+	xorl	%eax,%ebp
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%ebp,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%edi,%edi
+	addl	%edx,%ecx
+	addl	56(%esp),%ebx
+	xorl	%edi,%esi
+	movl	%ecx,%ebp
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%edi,%ebp
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	60(%esp),%eax
+	xorl	%edx,%ebp
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%ebp,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vzeroall
+	movl	192(%esp),%ebp
+	addl	(%ebp),%eax
+	movl	204(%esp),%esp
+	addl	4(%ebp),%esi
+	addl	8(%ebp),%ecx
+	movl	%eax,(%ebp)
+	addl	12(%ebp),%edx
+	movl	%esi,4(%ebp)
+	addl	16(%ebp),%edi
+	movl	%ecx,8(%ebp)
+	movl	%edx,12(%ebp)
+	movl	%edi,16(%ebp)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	_sha1_block_data_order_avx,.-_sha1_block_data_order_avx
+.align	64
+.LK_XX_XX:
+.long	1518500249,1518500249,1518500249,1518500249
+.long	1859775393,1859775393,1859775393,1859775393
+.long	2400959708,2400959708,2400959708,2400959708
+.long	3395469782,3395469782,3395469782,3395469782
+.long	66051,67438087,134810123,202182159
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+.byte	102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
+.byte	89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+.byte	114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/sha256-586.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/sha256-586.S
new file mode 100644
index 0000000..984758f
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/sha256-586.S
@@ -0,0 +1,5560 @@
+#if defined(__i386__)
+.text
+.globl	sha256_block_data_order
+.hidden	sha256_block_data_order
+.type	sha256_block_data_order,@function
+.align	16
+sha256_block_data_order:
+.L_sha256_block_data_order_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	%esp,%ebx
+	call	.L000pic_point
+.L000pic_point:
+	popl	%ebp
+	leal	.L001K256-.L000pic_point(%ebp),%ebp
+	subl	$16,%esp
+	andl	$-64,%esp
+	shll	$6,%eax
+	addl	%edi,%eax
+	movl	%esi,(%esp)
+	movl	%edi,4(%esp)
+	movl	%eax,8(%esp)
+	movl	%ebx,12(%esp)
+	leal	OPENSSL_ia32cap_P-.L001K256(%ebp),%edx
+	movl	(%edx),%ecx
+	movl	4(%edx),%ebx
+	testl	$1048576,%ecx
+	jnz	.L002loop
+	movl	8(%edx),%edx
+	testl	$16777216,%ecx
+	jz	.L003no_xmm
+	andl	$1073741824,%ecx
+	andl	$268435968,%ebx
+	orl	%ebx,%ecx
+	andl	$1342177280,%ecx
+	cmpl	$1342177280,%ecx
+	je	.L004AVX
+	testl	$512,%ebx
+	jnz	.L005SSSE3
+.L003no_xmm:
+	subl	%edi,%eax
+	cmpl	$256,%eax
+	jae	.L006unrolled
+	jmp	.L002loop
+.align	16
+.L002loop:
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	bswap	%eax
+	movl	12(%edi),%edx
+	bswap	%ebx
+	pushl	%eax
+	bswap	%ecx
+	pushl	%ebx
+	bswap	%edx
+	pushl	%ecx
+	pushl	%edx
+	movl	16(%edi),%eax
+	movl	20(%edi),%ebx
+	movl	24(%edi),%ecx
+	bswap	%eax
+	movl	28(%edi),%edx
+	bswap	%ebx
+	pushl	%eax
+	bswap	%ecx
+	pushl	%ebx
+	bswap	%edx
+	pushl	%ecx
+	pushl	%edx
+	movl	32(%edi),%eax
+	movl	36(%edi),%ebx
+	movl	40(%edi),%ecx
+	bswap	%eax
+	movl	44(%edi),%edx
+	bswap	%ebx
+	pushl	%eax
+	bswap	%ecx
+	pushl	%ebx
+	bswap	%edx
+	pushl	%ecx
+	pushl	%edx
+	movl	48(%edi),%eax
+	movl	52(%edi),%ebx
+	movl	56(%edi),%ecx
+	bswap	%eax
+	movl	60(%edi),%edx
+	bswap	%ebx
+	pushl	%eax
+	bswap	%ecx
+	pushl	%ebx
+	bswap	%edx
+	pushl	%ecx
+	pushl	%edx
+	addl	$64,%edi
+	leal	-36(%esp),%esp
+	movl	%edi,104(%esp)
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edi
+	movl	%ebx,8(%esp)
+	xorl	%ecx,%ebx
+	movl	%ecx,12(%esp)
+	movl	%edi,16(%esp)
+	movl	%ebx,(%esp)
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebx
+	movl	24(%esi),%ecx
+	movl	28(%esi),%edi
+	movl	%ebx,24(%esp)
+	movl	%ecx,28(%esp)
+	movl	%edi,32(%esp)
+.align	16
+.L00700_15:
+	movl	%edx,%ecx
+	movl	24(%esp),%esi
+	rorl	$14,%ecx
+	movl	28(%esp),%edi
+	xorl	%edx,%ecx
+	xorl	%edi,%esi
+	movl	96(%esp),%ebx
+	rorl	$5,%ecx
+	andl	%edx,%esi
+	movl	%edx,20(%esp)
+	xorl	%ecx,%edx
+	addl	32(%esp),%ebx
+	xorl	%edi,%esi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%esi,%ebx
+	rorl	$9,%ecx
+	addl	%edx,%ebx
+	movl	8(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,4(%esp)
+	leal	-4(%esp),%esp
+	rorl	$11,%ecx
+	movl	(%ebp),%esi
+	xorl	%eax,%ecx
+	movl	20(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%esi,%ebx
+	movl	%eax,(%esp)
+	addl	%ebx,%edx
+	andl	4(%esp),%eax
+	addl	%ecx,%ebx
+	xorl	%edi,%eax
+	addl	$4,%ebp
+	addl	%ebx,%eax
+	cmpl	$3248222580,%esi
+	jne	.L00700_15
+	movl	156(%esp),%ecx
+	jmp	.L00816_63
+.align	16
+.L00816_63:
+	movl	%ecx,%ebx
+	movl	104(%esp),%esi
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	160(%esp),%ebx
+	shrl	$10,%edi
+	addl	124(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	24(%esp),%esi
+	rorl	$14,%ecx
+	addl	%edi,%ebx
+	movl	28(%esp),%edi
+	xorl	%edx,%ecx
+	xorl	%edi,%esi
+	movl	%ebx,96(%esp)
+	rorl	$5,%ecx
+	andl	%edx,%esi
+	movl	%edx,20(%esp)
+	xorl	%ecx,%edx
+	addl	32(%esp),%ebx
+	xorl	%edi,%esi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%esi,%ebx
+	rorl	$9,%ecx
+	addl	%edx,%ebx
+	movl	8(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,4(%esp)
+	leal	-4(%esp),%esp
+	rorl	$11,%ecx
+	movl	(%ebp),%esi
+	xorl	%eax,%ecx
+	movl	20(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%esi,%ebx
+	movl	%eax,(%esp)
+	addl	%ebx,%edx
+	andl	4(%esp),%eax
+	addl	%ecx,%ebx
+	xorl	%edi,%eax
+	movl	156(%esp),%ecx
+	addl	$4,%ebp
+	addl	%ebx,%eax
+	cmpl	$3329325298,%esi
+	jne	.L00816_63
+	movl	356(%esp),%esi
+	movl	8(%esp),%ebx
+	movl	16(%esp),%ecx
+	addl	(%esi),%eax
+	addl	4(%esi),%ebx
+	addl	8(%esi),%edi
+	addl	12(%esi),%ecx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%edi,8(%esi)
+	movl	%ecx,12(%esi)
+	movl	24(%esp),%eax
+	movl	28(%esp),%ebx
+	movl	32(%esp),%ecx
+	movl	360(%esp),%edi
+	addl	16(%esi),%edx
+	addl	20(%esi),%eax
+	addl	24(%esi),%ebx
+	addl	28(%esi),%ecx
+	movl	%edx,16(%esi)
+	movl	%eax,20(%esi)
+	movl	%ebx,24(%esi)
+	movl	%ecx,28(%esi)
+	leal	356(%esp),%esp
+	subl	$256,%ebp
+	cmpl	8(%esp),%edi
+	jb	.L002loop
+	movl	12(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	64
+.L001K256:
+.long	1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+.long	66051,67438087,134810123,202182159
+.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+.byte	110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte	62,0
+.align	16
+.L006unrolled:
+	leal	-96(%esp),%esp
+	movl	(%esi),%eax
+	movl	4(%esi),%ebp
+	movl	8(%esi),%ecx
+	movl	12(%esi),%ebx
+	movl	%ebp,4(%esp)
+	xorl	%ecx,%ebp
+	movl	%ecx,8(%esp)
+	movl	%ebx,12(%esp)
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebx
+	movl	24(%esi),%ecx
+	movl	28(%esi),%esi
+	movl	%ebx,20(%esp)
+	movl	%ecx,24(%esp)
+	movl	%esi,28(%esp)
+	jmp	.L009grand_loop
+.align	16
+.L009grand_loop:
+	movl	(%edi),%ebx
+	movl	4(%edi),%ecx
+	bswap	%ebx
+	movl	8(%edi),%esi
+	bswap	%ecx
+	movl	%ebx,32(%esp)
+	bswap	%esi
+	movl	%ecx,36(%esp)
+	movl	%esi,40(%esp)
+	movl	12(%edi),%ebx
+	movl	16(%edi),%ecx
+	bswap	%ebx
+	movl	20(%edi),%esi
+	bswap	%ecx
+	movl	%ebx,44(%esp)
+	bswap	%esi
+	movl	%ecx,48(%esp)
+	movl	%esi,52(%esp)
+	movl	24(%edi),%ebx
+	movl	28(%edi),%ecx
+	bswap	%ebx
+	movl	32(%edi),%esi
+	bswap	%ecx
+	movl	%ebx,56(%esp)
+	bswap	%esi
+	movl	%ecx,60(%esp)
+	movl	%esi,64(%esp)
+	movl	36(%edi),%ebx
+	movl	40(%edi),%ecx
+	bswap	%ebx
+	movl	44(%edi),%esi
+	bswap	%ecx
+	movl	%ebx,68(%esp)
+	bswap	%esi
+	movl	%ecx,72(%esp)
+	movl	%esi,76(%esp)
+	movl	48(%edi),%ebx
+	movl	52(%edi),%ecx
+	bswap	%ebx
+	movl	56(%edi),%esi
+	bswap	%ecx
+	movl	%ebx,80(%esp)
+	bswap	%esi
+	movl	%ecx,84(%esp)
+	movl	%esi,88(%esp)
+	movl	60(%edi),%ebx
+	addl	$64,%edi
+	bswap	%ebx
+	movl	%edi,100(%esp)
+	movl	%ebx,92(%esp)
+	movl	%edx,%ecx
+	movl	20(%esp),%esi
+	rorl	$14,%edx
+	movl	24(%esp),%edi
+	xorl	%ecx,%edx
+	movl	32(%esp),%ebx
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	addl	28(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	4(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	1116352408(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	12(%esp),%edx
+	addl	%ecx,%ebp
+	movl	%edx,%esi
+	movl	16(%esp),%ecx
+	rorl	$14,%edx
+	movl	20(%esp),%edi
+	xorl	%esi,%edx
+	movl	36(%esp),%ebx
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,12(%esp)
+	xorl	%esi,%edx
+	addl	24(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,28(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	1899447441(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%esi,%eax
+	movl	%edx,%ecx
+	movl	12(%esp),%esi
+	rorl	$14,%edx
+	movl	16(%esp),%edi
+	xorl	%ecx,%edx
+	movl	40(%esp),%ebx
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	addl	20(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	28(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,24(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	3049323471(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	4(%esp),%edx
+	addl	%ecx,%ebp
+	movl	%edx,%esi
+	movl	8(%esp),%ecx
+	rorl	$14,%edx
+	movl	12(%esp),%edi
+	xorl	%esi,%edx
+	movl	44(%esp),%ebx
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,4(%esp)
+	xorl	%esi,%edx
+	addl	16(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	24(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,20(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	3921009573(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%esi,%eax
+	movl	%edx,%ecx
+	movl	4(%esp),%esi
+	rorl	$14,%edx
+	movl	8(%esp),%edi
+	xorl	%ecx,%edx
+	movl	48(%esp),%ebx
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	addl	12(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	20(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,16(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	961987163(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	28(%esp),%edx
+	addl	%ecx,%ebp
+	movl	%edx,%esi
+	movl	(%esp),%ecx
+	rorl	$14,%edx
+	movl	4(%esp),%edi
+	xorl	%esi,%edx
+	movl	52(%esp),%ebx
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,28(%esp)
+	xorl	%esi,%edx
+	addl	8(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	16(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,12(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	1508970993(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%esi,%eax
+	movl	%edx,%ecx
+	movl	28(%esp),%esi
+	rorl	$14,%edx
+	movl	(%esp),%edi
+	xorl	%ecx,%edx
+	movl	56(%esp),%ebx
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	addl	4(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	12(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,8(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	2453635748(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	20(%esp),%edx
+	addl	%ecx,%ebp
+	movl	%edx,%esi
+	movl	24(%esp),%ecx
+	rorl	$14,%edx
+	movl	28(%esp),%edi
+	xorl	%esi,%edx
+	movl	60(%esp),%ebx
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,20(%esp)
+	xorl	%esi,%edx
+	addl	(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	8(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,4(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	2870763221(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%esi,%eax
+	movl	%edx,%ecx
+	movl	20(%esp),%esi
+	rorl	$14,%edx
+	movl	24(%esp),%edi
+	xorl	%ecx,%edx
+	movl	64(%esp),%ebx
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	addl	28(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	4(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	3624381080(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	12(%esp),%edx
+	addl	%ecx,%ebp
+	movl	%edx,%esi
+	movl	16(%esp),%ecx
+	rorl	$14,%edx
+	movl	20(%esp),%edi
+	xorl	%esi,%edx
+	movl	68(%esp),%ebx
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,12(%esp)
+	xorl	%esi,%edx
+	addl	24(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,28(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	310598401(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%esi,%eax
+	movl	%edx,%ecx
+	movl	12(%esp),%esi
+	rorl	$14,%edx
+	movl	16(%esp),%edi
+	xorl	%ecx,%edx
+	movl	72(%esp),%ebx
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	addl	20(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	28(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,24(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	607225278(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	4(%esp),%edx
+	addl	%ecx,%ebp
+	movl	%edx,%esi
+	movl	8(%esp),%ecx
+	rorl	$14,%edx
+	movl	12(%esp),%edi
+	xorl	%esi,%edx
+	movl	76(%esp),%ebx
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,4(%esp)
+	xorl	%esi,%edx
+	addl	16(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	24(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,20(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	1426881987(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%esi,%eax
+	movl	%edx,%ecx
+	movl	4(%esp),%esi
+	rorl	$14,%edx
+	movl	8(%esp),%edi
+	xorl	%ecx,%edx
+	movl	80(%esp),%ebx
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	addl	12(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	20(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,16(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	1925078388(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	28(%esp),%edx
+	addl	%ecx,%ebp
+	movl	%edx,%esi
+	movl	(%esp),%ecx
+	rorl	$14,%edx
+	movl	4(%esp),%edi
+	xorl	%esi,%edx
+	movl	84(%esp),%ebx
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,28(%esp)
+	xorl	%esi,%edx
+	addl	8(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	16(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,12(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	2162078206(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%esi,%eax
+	movl	%edx,%ecx
+	movl	28(%esp),%esi
+	rorl	$14,%edx
+	movl	(%esp),%edi
+	xorl	%ecx,%edx
+	movl	88(%esp),%ebx
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	addl	4(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	12(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,8(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	2614888103(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	20(%esp),%edx
+	addl	%ecx,%ebp
+	movl	%edx,%esi
+	movl	24(%esp),%ecx
+	rorl	$14,%edx
+	movl	28(%esp),%edi
+	xorl	%esi,%edx
+	movl	92(%esp),%ebx
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,20(%esp)
+	xorl	%esi,%edx
+	addl	(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	8(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,4(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	3248222580(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	36(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%esi,%eax
+	movl	88(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	32(%esp),%ebx
+	shrl	$10,%edi
+	addl	68(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	20(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	24(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,32(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	addl	28(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	4(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	3835390401(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	40(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	12(%esp),%edx
+	addl	%ecx,%ebp
+	movl	92(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	36(%esp),%ebx
+	shrl	$10,%edi
+	addl	72(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	16(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	20(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,36(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,12(%esp)
+	xorl	%esi,%edx
+	addl	24(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,28(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	4022224774(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	44(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%esi,%eax
+	movl	32(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	40(%esp),%ebx
+	shrl	$10,%edi
+	addl	76(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	12(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	16(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,40(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	addl	20(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	28(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,24(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	264347078(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	48(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	4(%esp),%edx
+	addl	%ecx,%ebp
+	movl	36(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	44(%esp),%ebx
+	shrl	$10,%edi
+	addl	80(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	8(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	12(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,44(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,4(%esp)
+	xorl	%esi,%edx
+	addl	16(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	24(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,20(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	604807628(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	52(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%esi,%eax
+	movl	40(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	48(%esp),%ebx
+	shrl	$10,%edi
+	addl	84(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	4(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	8(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,48(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	addl	12(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	20(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,16(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	770255983(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	56(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	28(%esp),%edx
+	addl	%ecx,%ebp
+	movl	44(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	52(%esp),%ebx
+	shrl	$10,%edi
+	addl	88(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	4(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,52(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,28(%esp)
+	xorl	%esi,%edx
+	addl	8(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	16(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,12(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	1249150122(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	60(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%esi,%eax
+	movl	48(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	56(%esp),%ebx
+	shrl	$10,%edi
+	addl	92(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	28(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,56(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	addl	4(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	12(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,8(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	1555081692(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	64(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	20(%esp),%edx
+	addl	%ecx,%ebp
+	movl	52(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	60(%esp),%ebx
+	shrl	$10,%edi
+	addl	32(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	24(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	28(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,60(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,20(%esp)
+	xorl	%esi,%edx
+	addl	(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	8(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,4(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	1996064986(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	68(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%esi,%eax
+	movl	56(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	64(%esp),%ebx
+	shrl	$10,%edi
+	addl	36(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	20(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	24(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,64(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	addl	28(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	4(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	2554220882(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	72(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	12(%esp),%edx
+	addl	%ecx,%ebp
+	movl	60(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	68(%esp),%ebx
+	shrl	$10,%edi
+	addl	40(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	16(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	20(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,68(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,12(%esp)
+	xorl	%esi,%edx
+	addl	24(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,28(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	2821834349(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	76(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%esi,%eax
+	movl	64(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	72(%esp),%ebx
+	shrl	$10,%edi
+	addl	44(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	12(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	16(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,72(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	addl	20(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	28(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,24(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	2952996808(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	80(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	4(%esp),%edx
+	addl	%ecx,%ebp
+	movl	68(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	76(%esp),%ebx
+	shrl	$10,%edi
+	addl	48(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	8(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	12(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,76(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,4(%esp)
+	xorl	%esi,%edx
+	addl	16(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	24(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,20(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	3210313671(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	84(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%esi,%eax
+	movl	72(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	80(%esp),%ebx
+	shrl	$10,%edi
+	addl	52(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	4(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	8(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,80(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	addl	12(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	20(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,16(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	3336571891(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	88(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	28(%esp),%edx
+	addl	%ecx,%ebp
+	movl	76(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	84(%esp),%ebx
+	shrl	$10,%edi
+	addl	56(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	4(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,84(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,28(%esp)
+	xorl	%esi,%edx
+	addl	8(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	16(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,12(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	3584528711(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	92(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%esi,%eax
+	movl	80(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	88(%esp),%ebx
+	shrl	$10,%edi
+	addl	60(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	28(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,88(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	addl	4(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	12(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,8(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	113926993(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	32(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	20(%esp),%edx
+	addl	%ecx,%ebp
+	movl	84(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	92(%esp),%ebx
+	shrl	$10,%edi
+	addl	64(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	24(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	28(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,92(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,20(%esp)
+	xorl	%esi,%edx
+	addl	(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	8(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,4(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	338241895(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	36(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%esi,%eax
+	movl	88(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	32(%esp),%ebx
+	shrl	$10,%edi
+	addl	68(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	20(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	24(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,32(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	addl	28(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	4(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	666307205(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	40(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	12(%esp),%edx
+	addl	%ecx,%ebp
+	movl	92(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	36(%esp),%ebx
+	shrl	$10,%edi
+	addl	72(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	16(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	20(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,36(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,12(%esp)
+	xorl	%esi,%edx
+	addl	24(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,28(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	773529912(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	44(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%esi,%eax
+	movl	32(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	40(%esp),%ebx
+	shrl	$10,%edi
+	addl	76(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	12(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	16(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,40(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	addl	20(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	28(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,24(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	1294757372(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	48(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	4(%esp),%edx
+	addl	%ecx,%ebp
+	movl	36(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	44(%esp),%ebx
+	shrl	$10,%edi
+	addl	80(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	8(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	12(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,44(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,4(%esp)
+	xorl	%esi,%edx
+	addl	16(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	24(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,20(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	1396182291(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	52(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%esi,%eax
+	movl	40(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	48(%esp),%ebx
+	shrl	$10,%edi
+	addl	84(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	4(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	8(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,48(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	addl	12(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	20(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,16(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	1695183700(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	56(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	28(%esp),%edx
+	addl	%ecx,%ebp
+	movl	44(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	52(%esp),%ebx
+	shrl	$10,%edi
+	addl	88(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	4(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,52(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,28(%esp)
+	xorl	%esi,%edx
+	addl	8(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	16(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,12(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	1986661051(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	60(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%esi,%eax
+	movl	48(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	56(%esp),%ebx
+	shrl	$10,%edi
+	addl	92(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	28(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,56(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	addl	4(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	12(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,8(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	2177026350(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	64(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	20(%esp),%edx
+	addl	%ecx,%ebp
+	movl	52(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	60(%esp),%ebx
+	shrl	$10,%edi
+	addl	32(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	24(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	28(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,60(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,20(%esp)
+	xorl	%esi,%edx
+	addl	(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	8(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,4(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	2456956037(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	68(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%esi,%eax
+	movl	56(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	64(%esp),%ebx
+	shrl	$10,%edi
+	addl	36(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	20(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	24(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,64(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	addl	28(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	4(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	2730485921(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	72(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	12(%esp),%edx
+	addl	%ecx,%ebp
+	movl	60(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	68(%esp),%ebx
+	shrl	$10,%edi
+	addl	40(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	16(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	20(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,68(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,12(%esp)
+	xorl	%esi,%edx
+	addl	24(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,28(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	2820302411(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	76(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%esi,%eax
+	movl	64(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	72(%esp),%ebx
+	shrl	$10,%edi
+	addl	44(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	12(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	16(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,72(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	addl	20(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	28(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,24(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	3259730800(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	80(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	4(%esp),%edx
+	addl	%ecx,%ebp
+	movl	68(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	76(%esp),%ebx
+	shrl	$10,%edi
+	addl	48(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	8(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	12(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,76(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,4(%esp)
+	xorl	%esi,%edx
+	addl	16(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	24(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,20(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	3345764771(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	84(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%esi,%eax
+	movl	72(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	80(%esp),%ebx
+	shrl	$10,%edi
+	addl	52(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	4(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	8(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,80(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	addl	12(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	20(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,16(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	3516065817(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	88(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	28(%esp),%edx
+	addl	%ecx,%ebp
+	movl	76(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	84(%esp),%ebx
+	shrl	$10,%edi
+	addl	56(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	4(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,84(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,28(%esp)
+	xorl	%esi,%edx
+	addl	8(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	16(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,12(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	3600352804(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	92(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%esi,%eax
+	movl	80(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	88(%esp),%ebx
+	shrl	$10,%edi
+	addl	60(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	28(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,88(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	addl	4(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	12(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,8(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	4094571909(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	32(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	20(%esp),%edx
+	addl	%ecx,%ebp
+	movl	84(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	92(%esp),%ebx
+	shrl	$10,%edi
+	addl	64(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	24(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	28(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,92(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,20(%esp)
+	xorl	%esi,%edx
+	addl	(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	8(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,4(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	275423344(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	36(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%esi,%eax
+	movl	88(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	32(%esp),%ebx
+	shrl	$10,%edi
+	addl	68(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	20(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	24(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,32(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	addl	28(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	4(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	430227734(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	40(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	12(%esp),%edx
+	addl	%ecx,%ebp
+	movl	92(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	36(%esp),%ebx
+	shrl	$10,%edi
+	addl	72(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	16(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	20(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,36(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,12(%esp)
+	xorl	%esi,%edx
+	addl	24(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,28(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	506948616(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	44(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%esi,%eax
+	movl	32(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	40(%esp),%ebx
+	shrl	$10,%edi
+	addl	76(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	12(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	16(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,40(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	addl	20(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	28(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,24(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	659060556(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	48(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	4(%esp),%edx
+	addl	%ecx,%ebp
+	movl	36(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	44(%esp),%ebx
+	shrl	$10,%edi
+	addl	80(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	8(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	12(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,44(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,4(%esp)
+	xorl	%esi,%edx
+	addl	16(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	24(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,20(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	883997877(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	52(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%esi,%eax
+	movl	40(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	48(%esp),%ebx
+	shrl	$10,%edi
+	addl	84(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	4(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	8(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,48(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	addl	12(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	20(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,16(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	958139571(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	56(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	28(%esp),%edx
+	addl	%ecx,%ebp
+	movl	44(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	52(%esp),%ebx
+	shrl	$10,%edi
+	addl	88(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	4(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,52(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,28(%esp)
+	xorl	%esi,%edx
+	addl	8(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	16(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,12(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	1322822218(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	60(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%esi,%eax
+	movl	48(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	56(%esp),%ebx
+	shrl	$10,%edi
+	addl	92(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	28(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,56(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	addl	4(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	12(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,8(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	1537002063(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	64(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	20(%esp),%edx
+	addl	%ecx,%ebp
+	movl	52(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	60(%esp),%ebx
+	shrl	$10,%edi
+	addl	32(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	24(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	28(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,60(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,20(%esp)
+	xorl	%esi,%edx
+	addl	(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	8(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,4(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	1747873779(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	68(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%esi,%eax
+	movl	56(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	64(%esp),%ebx
+	shrl	$10,%edi
+	addl	36(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	20(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	24(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,64(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	addl	28(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	4(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	1955562222(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	72(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	12(%esp),%edx
+	addl	%ecx,%ebp
+	movl	60(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	68(%esp),%ebx
+	shrl	$10,%edi
+	addl	40(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	16(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	20(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,68(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,12(%esp)
+	xorl	%esi,%edx
+	addl	24(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,28(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	2024104815(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	76(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%esi,%eax
+	movl	64(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	72(%esp),%ebx
+	shrl	$10,%edi
+	addl	44(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	12(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	16(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,72(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	addl	20(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	28(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,24(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	2227730452(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	80(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	4(%esp),%edx
+	addl	%ecx,%ebp
+	movl	68(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	76(%esp),%ebx
+	shrl	$10,%edi
+	addl	48(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	8(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	12(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,76(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,4(%esp)
+	xorl	%esi,%edx
+	addl	16(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	24(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,20(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	2361852424(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	84(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%esi,%eax
+	movl	72(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	80(%esp),%ebx
+	shrl	$10,%edi
+	addl	52(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	4(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	8(%esp),%edi
+	xorl	%ecx,%edx
+	movl	%ebx,80(%esp)
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	addl	12(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	20(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,16(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	2428436474(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	88(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	28(%esp),%edx
+	addl	%ecx,%ebp
+	movl	76(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	84(%esp),%ebx
+	shrl	$10,%edi
+	addl	56(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	4(%esp),%edi
+	xorl	%esi,%edx
+	movl	%ebx,84(%esp)
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,28(%esp)
+	xorl	%esi,%edx
+	addl	8(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	16(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,12(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	2756734187(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	movl	92(%esp),%ecx
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%esi,%eax
+	movl	80(%esp),%esi
+	movl	%ecx,%ebx
+	rorl	$11,%ecx
+	movl	%esi,%edi
+	rorl	$2,%esi
+	xorl	%ebx,%ecx
+	shrl	$3,%ebx
+	rorl	$7,%ecx
+	xorl	%edi,%esi
+	xorl	%ecx,%ebx
+	rorl	$17,%esi
+	addl	88(%esp),%ebx
+	shrl	$10,%edi
+	addl	60(%esp),%ebx
+	movl	%edx,%ecx
+	xorl	%esi,%edi
+	movl	28(%esp),%esi
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	(%esp),%edi
+	xorl	%ecx,%edx
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	addl	4(%esp),%ebx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%ebx
+	rorl	$9,%ecx
+	movl	%eax,%esi
+	movl	12(%esp),%edi
+	xorl	%eax,%ecx
+	movl	%eax,8(%esp)
+	xorl	%edi,%eax
+	rorl	$11,%ecx
+	andl	%eax,%ebp
+	leal	3204031479(%ebx,%edx,1),%edx
+	xorl	%esi,%ecx
+	xorl	%edi,%ebp
+	movl	32(%esp),%esi
+	rorl	$2,%ecx
+	addl	%edx,%ebp
+	addl	20(%esp),%edx
+	addl	%ecx,%ebp
+	movl	84(%esp),%ecx
+	movl	%esi,%ebx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	rorl	$2,%ecx
+	xorl	%ebx,%esi
+	shrl	$3,%ebx
+	rorl	$7,%esi
+	xorl	%edi,%ecx
+	xorl	%esi,%ebx
+	rorl	$17,%ecx
+	addl	92(%esp),%ebx
+	shrl	$10,%edi
+	addl	64(%esp),%ebx
+	movl	%edx,%esi
+	xorl	%ecx,%edi
+	movl	24(%esp),%ecx
+	rorl	$14,%edx
+	addl	%edi,%ebx
+	movl	28(%esp),%edi
+	xorl	%esi,%edx
+	xorl	%edi,%ecx
+	rorl	$5,%edx
+	andl	%esi,%ecx
+	movl	%esi,20(%esp)
+	xorl	%esi,%edx
+	addl	(%esp),%ebx
+	xorl	%ecx,%edi
+	rorl	$6,%edx
+	movl	%ebp,%esi
+	addl	%edi,%ebx
+	rorl	$9,%esi
+	movl	%ebp,%ecx
+	movl	8(%esp),%edi
+	xorl	%ebp,%esi
+	movl	%ebp,4(%esp)
+	xorl	%edi,%ebp
+	rorl	$11,%esi
+	andl	%ebp,%eax
+	leal	3329325298(%ebx,%edx,1),%edx
+	xorl	%ecx,%esi
+	xorl	%edi,%eax
+	rorl	$2,%esi
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%esi,%eax
+	movl	96(%esp),%esi
+	xorl	%edi,%ebp
+	movl	12(%esp),%ecx
+	addl	(%esi),%eax
+	addl	4(%esi),%ebp
+	addl	8(%esi),%edi
+	addl	12(%esi),%ecx
+	movl	%eax,(%esi)
+	movl	%ebp,4(%esi)
+	movl	%edi,8(%esi)
+	movl	%ecx,12(%esi)
+	movl	%ebp,4(%esp)
+	xorl	%edi,%ebp
+	movl	%edi,8(%esp)
+	movl	%ecx,12(%esp)
+	movl	20(%esp),%edi
+	movl	24(%esp),%ebx
+	movl	28(%esp),%ecx
+	addl	16(%esi),%edx
+	addl	20(%esi),%edi
+	addl	24(%esi),%ebx
+	addl	28(%esi),%ecx
+	movl	%edx,16(%esi)
+	movl	%edi,20(%esi)
+	movl	%ebx,24(%esi)
+	movl	%ecx,28(%esi)
+	movl	%edi,20(%esp)
+	movl	100(%esp),%edi
+	movl	%ebx,24(%esp)
+	movl	%ecx,28(%esp)
+	cmpl	104(%esp),%edi
+	jb	.L009grand_loop
+	movl	108(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	32
+.L005SSSE3:
+	leal	-96(%esp),%esp
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edi
+	movl	%ebx,4(%esp)
+	xorl	%ecx,%ebx
+	movl	%ecx,8(%esp)
+	movl	%edi,12(%esp)
+	movl	16(%esi),%edx
+	movl	20(%esi),%edi
+	movl	24(%esi),%ecx
+	movl	28(%esi),%esi
+	movl	%edi,20(%esp)
+	movl	100(%esp),%edi
+	movl	%ecx,24(%esp)
+	movl	%esi,28(%esp)
+	movdqa	256(%ebp),%xmm7
+	jmp	.L010grand_ssse3
+.align	16
+.L010grand_ssse3:
+	movdqu	(%edi),%xmm0
+	movdqu	16(%edi),%xmm1
+	movdqu	32(%edi),%xmm2
+	movdqu	48(%edi),%xmm3
+	addl	$64,%edi
+.byte	102,15,56,0,199
+	movl	%edi,100(%esp)
+.byte	102,15,56,0,207
+	movdqa	(%ebp),%xmm4
+.byte	102,15,56,0,215
+	movdqa	16(%ebp),%xmm5
+	paddd	%xmm0,%xmm4
+.byte	102,15,56,0,223
+	movdqa	32(%ebp),%xmm6
+	paddd	%xmm1,%xmm5
+	movdqa	48(%ebp),%xmm7
+	movdqa	%xmm4,32(%esp)
+	paddd	%xmm2,%xmm6
+	movdqa	%xmm5,48(%esp)
+	paddd	%xmm3,%xmm7
+	movdqa	%xmm6,64(%esp)
+	movdqa	%xmm7,80(%esp)
+	jmp	.L011ssse3_00_47
+.align	16
+.L011ssse3_00_47:
+	addl	$64,%ebp
+	movl	%edx,%ecx
+	movdqa	%xmm1,%xmm4
+	rorl	$14,%edx
+	movl	20(%esp),%esi
+	movdqa	%xmm3,%xmm7
+	xorl	%ecx,%edx
+	movl	24(%esp),%edi
+.byte	102,15,58,15,224,4
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+.byte	102,15,58,15,250,4
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	movdqa	%xmm4,%xmm5
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	movdqa	%xmm4,%xmm6
+	addl	%edi,%edx
+	movl	4(%esp),%edi
+	psrld	$3,%xmm4
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	paddd	%xmm7,%xmm0
+	movl	%eax,(%esp)
+	xorl	%eax,%ecx
+	psrld	$7,%xmm6
+	xorl	%edi,%eax
+	addl	28(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	pshufd	$250,%xmm3,%xmm7
+	xorl	%esi,%ecx
+	addl	32(%esp),%edx
+	pslld	$14,%xmm5
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm4
+	addl	%edx,%ebx
+	addl	12(%esp),%edx
+	psrld	$11,%xmm6
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	pxor	%xmm5,%xmm4
+	movl	16(%esp),%esi
+	xorl	%ecx,%edx
+	pslld	$11,%xmm5
+	movl	20(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	pxor	%xmm6,%xmm4
+	andl	%ecx,%esi
+	movl	%ecx,12(%esp)
+	movdqa	%xmm7,%xmm6
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	pxor	%xmm5,%xmm4
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	psrld	$10,%xmm7
+	movl	(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	paddd	%xmm4,%xmm0
+	movl	%ebx,28(%esp)
+	xorl	%ebx,%ecx
+	psrlq	$17,%xmm6
+	xorl	%edi,%ebx
+	addl	24(%esp),%edx
+	rorl	$11,%ecx
+	pxor	%xmm6,%xmm7
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	psrlq	$2,%xmm6
+	addl	36(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm7
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	pshufd	$128,%xmm7,%xmm7
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	12(%esp),%esi
+	xorl	%ecx,%edx
+	movl	16(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	psrldq	$8,%xmm7
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	paddd	%xmm7,%xmm0
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	28(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,24(%esp)
+	pshufd	$80,%xmm0,%xmm7
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	20(%esp),%edx
+	movdqa	%xmm7,%xmm6
+	rorl	$11,%ecx
+	psrld	$10,%xmm7
+	andl	%eax,%ebx
+	psrlq	$17,%xmm6
+	xorl	%esi,%ecx
+	addl	40(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm7
+	addl	%edx,%ebx
+	addl	4(%esp),%edx
+	psrlq	$2,%xmm6
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	pxor	%xmm6,%xmm7
+	movl	8(%esp),%esi
+	xorl	%ecx,%edx
+	movl	12(%esp),%edi
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	movdqa	(%ebp),%xmm6
+	andl	%ecx,%esi
+	movl	%ecx,4(%esp)
+	pslldq	$8,%xmm7
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	24(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	paddd	%xmm7,%xmm0
+	movl	%ebx,20(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	16(%esp),%edx
+	paddd	%xmm0,%xmm6
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	44(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%ecx,%eax
+	movdqa	%xmm6,32(%esp)
+	movl	%edx,%ecx
+	movdqa	%xmm2,%xmm4
+	rorl	$14,%edx
+	movl	4(%esp),%esi
+	movdqa	%xmm0,%xmm7
+	xorl	%ecx,%edx
+	movl	8(%esp),%edi
+.byte	102,15,58,15,225,4
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+.byte	102,15,58,15,251,4
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	movdqa	%xmm4,%xmm5
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	movdqa	%xmm4,%xmm6
+	addl	%edi,%edx
+	movl	20(%esp),%edi
+	psrld	$3,%xmm4
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	paddd	%xmm7,%xmm1
+	movl	%eax,16(%esp)
+	xorl	%eax,%ecx
+	psrld	$7,%xmm6
+	xorl	%edi,%eax
+	addl	12(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	pshufd	$250,%xmm0,%xmm7
+	xorl	%esi,%ecx
+	addl	48(%esp),%edx
+	pslld	$14,%xmm5
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm4
+	addl	%edx,%ebx
+	addl	28(%esp),%edx
+	psrld	$11,%xmm6
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	pxor	%xmm5,%xmm4
+	movl	(%esp),%esi
+	xorl	%ecx,%edx
+	pslld	$11,%xmm5
+	movl	4(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	pxor	%xmm6,%xmm4
+	andl	%ecx,%esi
+	movl	%ecx,28(%esp)
+	movdqa	%xmm7,%xmm6
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	pxor	%xmm5,%xmm4
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	psrld	$10,%xmm7
+	movl	16(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	paddd	%xmm4,%xmm1
+	movl	%ebx,12(%esp)
+	xorl	%ebx,%ecx
+	psrlq	$17,%xmm6
+	xorl	%edi,%ebx
+	addl	8(%esp),%edx
+	rorl	$11,%ecx
+	pxor	%xmm6,%xmm7
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	psrlq	$2,%xmm6
+	addl	52(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm7
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	pshufd	$128,%xmm7,%xmm7
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	28(%esp),%esi
+	xorl	%ecx,%edx
+	movl	(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	psrldq	$8,%xmm7
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	paddd	%xmm7,%xmm1
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	12(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,8(%esp)
+	pshufd	$80,%xmm1,%xmm7
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	4(%esp),%edx
+	movdqa	%xmm7,%xmm6
+	rorl	$11,%ecx
+	psrld	$10,%xmm7
+	andl	%eax,%ebx
+	psrlq	$17,%xmm6
+	xorl	%esi,%ecx
+	addl	56(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm7
+	addl	%edx,%ebx
+	addl	20(%esp),%edx
+	psrlq	$2,%xmm6
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	pxor	%xmm6,%xmm7
+	movl	24(%esp),%esi
+	xorl	%ecx,%edx
+	movl	28(%esp),%edi
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	movdqa	16(%ebp),%xmm6
+	andl	%ecx,%esi
+	movl	%ecx,20(%esp)
+	pslldq	$8,%xmm7
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	8(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	paddd	%xmm7,%xmm1
+	movl	%ebx,4(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	(%esp),%edx
+	paddd	%xmm1,%xmm6
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	60(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%ecx,%eax
+	movdqa	%xmm6,48(%esp)
+	movl	%edx,%ecx
+	movdqa	%xmm3,%xmm4
+	rorl	$14,%edx
+	movl	20(%esp),%esi
+	movdqa	%xmm1,%xmm7
+	xorl	%ecx,%edx
+	movl	24(%esp),%edi
+.byte	102,15,58,15,226,4
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+.byte	102,15,58,15,248,4
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	movdqa	%xmm4,%xmm5
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	movdqa	%xmm4,%xmm6
+	addl	%edi,%edx
+	movl	4(%esp),%edi
+	psrld	$3,%xmm4
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	paddd	%xmm7,%xmm2
+	movl	%eax,(%esp)
+	xorl	%eax,%ecx
+	psrld	$7,%xmm6
+	xorl	%edi,%eax
+	addl	28(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	pshufd	$250,%xmm1,%xmm7
+	xorl	%esi,%ecx
+	addl	64(%esp),%edx
+	pslld	$14,%xmm5
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm4
+	addl	%edx,%ebx
+	addl	12(%esp),%edx
+	psrld	$11,%xmm6
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	pxor	%xmm5,%xmm4
+	movl	16(%esp),%esi
+	xorl	%ecx,%edx
+	pslld	$11,%xmm5
+	movl	20(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	pxor	%xmm6,%xmm4
+	andl	%ecx,%esi
+	movl	%ecx,12(%esp)
+	movdqa	%xmm7,%xmm6
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	pxor	%xmm5,%xmm4
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	psrld	$10,%xmm7
+	movl	(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	paddd	%xmm4,%xmm2
+	movl	%ebx,28(%esp)
+	xorl	%ebx,%ecx
+	psrlq	$17,%xmm6
+	xorl	%edi,%ebx
+	addl	24(%esp),%edx
+	rorl	$11,%ecx
+	pxor	%xmm6,%xmm7
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	psrlq	$2,%xmm6
+	addl	68(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm7
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	pshufd	$128,%xmm7,%xmm7
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	12(%esp),%esi
+	xorl	%ecx,%edx
+	movl	16(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	psrldq	$8,%xmm7
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	paddd	%xmm7,%xmm2
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	28(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,24(%esp)
+	pshufd	$80,%xmm2,%xmm7
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	20(%esp),%edx
+	movdqa	%xmm7,%xmm6
+	rorl	$11,%ecx
+	psrld	$10,%xmm7
+	andl	%eax,%ebx
+	psrlq	$17,%xmm6
+	xorl	%esi,%ecx
+	addl	72(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm7
+	addl	%edx,%ebx
+	addl	4(%esp),%edx
+	psrlq	$2,%xmm6
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	pxor	%xmm6,%xmm7
+	movl	8(%esp),%esi
+	xorl	%ecx,%edx
+	movl	12(%esp),%edi
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	movdqa	32(%ebp),%xmm6
+	andl	%ecx,%esi
+	movl	%ecx,4(%esp)
+	pslldq	$8,%xmm7
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	24(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	paddd	%xmm7,%xmm2
+	movl	%ebx,20(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	16(%esp),%edx
+	paddd	%xmm2,%xmm6
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	76(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%ecx,%eax
+	movdqa	%xmm6,64(%esp)
+	movl	%edx,%ecx
+	movdqa	%xmm0,%xmm4
+	rorl	$14,%edx
+	movl	4(%esp),%esi
+	movdqa	%xmm2,%xmm7
+	xorl	%ecx,%edx
+	movl	8(%esp),%edi
+.byte	102,15,58,15,227,4
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+.byte	102,15,58,15,249,4
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	movdqa	%xmm4,%xmm5
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	movdqa	%xmm4,%xmm6
+	addl	%edi,%edx
+	movl	20(%esp),%edi
+	psrld	$3,%xmm4
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	paddd	%xmm7,%xmm3
+	movl	%eax,16(%esp)
+	xorl	%eax,%ecx
+	psrld	$7,%xmm6
+	xorl	%edi,%eax
+	addl	12(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	pshufd	$250,%xmm2,%xmm7
+	xorl	%esi,%ecx
+	addl	80(%esp),%edx
+	pslld	$14,%xmm5
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm4
+	addl	%edx,%ebx
+	addl	28(%esp),%edx
+	psrld	$11,%xmm6
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	pxor	%xmm5,%xmm4
+	movl	(%esp),%esi
+	xorl	%ecx,%edx
+	pslld	$11,%xmm5
+	movl	4(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	pxor	%xmm6,%xmm4
+	andl	%ecx,%esi
+	movl	%ecx,28(%esp)
+	movdqa	%xmm7,%xmm6
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	pxor	%xmm5,%xmm4
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	psrld	$10,%xmm7
+	movl	16(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	paddd	%xmm4,%xmm3
+	movl	%ebx,12(%esp)
+	xorl	%ebx,%ecx
+	psrlq	$17,%xmm6
+	xorl	%edi,%ebx
+	addl	8(%esp),%edx
+	rorl	$11,%ecx
+	pxor	%xmm6,%xmm7
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	psrlq	$2,%xmm6
+	addl	84(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm7
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	pshufd	$128,%xmm7,%xmm7
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	28(%esp),%esi
+	xorl	%ecx,%edx
+	movl	(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	psrldq	$8,%xmm7
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	paddd	%xmm7,%xmm3
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	12(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,8(%esp)
+	pshufd	$80,%xmm3,%xmm7
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	4(%esp),%edx
+	movdqa	%xmm7,%xmm6
+	rorl	$11,%ecx
+	psrld	$10,%xmm7
+	andl	%eax,%ebx
+	psrlq	$17,%xmm6
+	xorl	%esi,%ecx
+	addl	88(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	pxor	%xmm6,%xmm7
+	addl	%edx,%ebx
+	addl	20(%esp),%edx
+	psrlq	$2,%xmm6
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	pxor	%xmm6,%xmm7
+	movl	24(%esp),%esi
+	xorl	%ecx,%edx
+	movl	28(%esp),%edi
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	movdqa	48(%ebp),%xmm6
+	andl	%ecx,%esi
+	movl	%ecx,20(%esp)
+	pslldq	$8,%xmm7
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	8(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	paddd	%xmm7,%xmm3
+	movl	%ebx,4(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	(%esp),%edx
+	paddd	%xmm3,%xmm6
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	92(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%ecx,%eax
+	movdqa	%xmm6,80(%esp)
+	cmpl	$66051,64(%ebp)
+	jne	.L011ssse3_00_47
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	20(%esp),%esi
+	xorl	%ecx,%edx
+	movl	24(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	4(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	28(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	32(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	addl	%edx,%ebx
+	addl	12(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	16(%esp),%esi
+	xorl	%ecx,%edx
+	movl	20(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,12(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	movl	%ebx,28(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	24(%esp),%edx
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	36(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	12(%esp),%esi
+	xorl	%ecx,%edx
+	movl	16(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	28(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,24(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	20(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	40(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	addl	%edx,%ebx
+	addl	4(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	8(%esp),%esi
+	xorl	%ecx,%edx
+	movl	12(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,4(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	24(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	movl	%ebx,20(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	16(%esp),%edx
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	44(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	4(%esp),%esi
+	xorl	%ecx,%edx
+	movl	8(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	20(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,16(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	12(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	48(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	addl	%edx,%ebx
+	addl	28(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	(%esp),%esi
+	xorl	%ecx,%edx
+	movl	4(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,28(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	16(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	movl	%ebx,12(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	8(%esp),%edx
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	52(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	28(%esp),%esi
+	xorl	%ecx,%edx
+	movl	(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	12(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,8(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	4(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	56(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	addl	%edx,%ebx
+	addl	20(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	24(%esp),%esi
+	xorl	%ecx,%edx
+	movl	28(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,20(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	8(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	movl	%ebx,4(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	(%esp),%edx
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	60(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	20(%esp),%esi
+	xorl	%ecx,%edx
+	movl	24(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	4(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	28(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	64(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	addl	%edx,%ebx
+	addl	12(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	16(%esp),%esi
+	xorl	%ecx,%edx
+	movl	20(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,12(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	movl	%ebx,28(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	24(%esp),%edx
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	68(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	12(%esp),%esi
+	xorl	%ecx,%edx
+	movl	16(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	28(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,24(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	20(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	72(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	addl	%edx,%ebx
+	addl	4(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	8(%esp),%esi
+	xorl	%ecx,%edx
+	movl	12(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,4(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	24(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	movl	%ebx,20(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	16(%esp),%edx
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	76(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	4(%esp),%esi
+	xorl	%ecx,%edx
+	movl	8(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	20(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,16(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	12(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	80(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	addl	%edx,%ebx
+	addl	28(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	(%esp),%esi
+	xorl	%ecx,%edx
+	movl	4(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,28(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	16(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	movl	%ebx,12(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	8(%esp),%edx
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	84(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	28(%esp),%esi
+	xorl	%ecx,%edx
+	movl	(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	12(%esp),%edi
+	movl	%eax,%esi
+	rorl	$9,%ecx
+	movl	%eax,8(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	4(%esp),%edx
+	rorl	$11,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	88(%esp),%edx
+	xorl	%edi,%ebx
+	rorl	$2,%ecx
+	addl	%edx,%ebx
+	addl	20(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	rorl	$14,%edx
+	movl	24(%esp),%esi
+	xorl	%ecx,%edx
+	movl	28(%esp),%edi
+	xorl	%edi,%esi
+	rorl	$5,%edx
+	andl	%ecx,%esi
+	movl	%ecx,20(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	rorl	$6,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	8(%esp),%edi
+	movl	%ebx,%esi
+	rorl	$9,%ecx
+	movl	%ebx,4(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	(%esp),%edx
+	rorl	$11,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	92(%esp),%edx
+	xorl	%edi,%eax
+	rorl	$2,%ecx
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%ecx,%eax
+	movl	96(%esp),%esi
+	xorl	%edi,%ebx
+	movl	12(%esp),%ecx
+	addl	(%esi),%eax
+	addl	4(%esi),%ebx
+	addl	8(%esi),%edi
+	addl	12(%esi),%ecx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%edi,8(%esi)
+	movl	%ecx,12(%esi)
+	movl	%ebx,4(%esp)
+	xorl	%edi,%ebx
+	movl	%edi,8(%esp)
+	movl	%ecx,12(%esp)
+	movl	20(%esp),%edi
+	movl	24(%esp),%ecx
+	addl	16(%esi),%edx
+	addl	20(%esi),%edi
+	addl	24(%esi),%ecx
+	movl	%edx,16(%esi)
+	movl	%edi,20(%esi)
+	movl	%edi,20(%esp)
+	movl	28(%esp),%edi
+	movl	%ecx,24(%esi)
+	addl	28(%esi),%edi
+	movl	%ecx,24(%esp)
+	movl	%edi,28(%esi)
+	movl	%edi,28(%esp)
+	movl	100(%esp),%edi
+	movdqa	64(%ebp),%xmm7
+	subl	$192,%ebp
+	cmpl	104(%esp),%edi
+	jb	.L010grand_ssse3
+	movl	108(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	32
+.L004AVX:
+	leal	-96(%esp),%esp
+	vzeroall
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edi
+	movl	%ebx,4(%esp)
+	xorl	%ecx,%ebx
+	movl	%ecx,8(%esp)
+	movl	%edi,12(%esp)
+	movl	16(%esi),%edx
+	movl	20(%esi),%edi
+	movl	24(%esi),%ecx
+	movl	28(%esi),%esi
+	movl	%edi,20(%esp)
+	movl	100(%esp),%edi
+	movl	%ecx,24(%esp)
+	movl	%esi,28(%esp)
+	vmovdqa	256(%ebp),%xmm7
+	jmp	.L012grand_avx
+.align	32
+.L012grand_avx:
+	vmovdqu	(%edi),%xmm0
+	vmovdqu	16(%edi),%xmm1
+	vmovdqu	32(%edi),%xmm2
+	vmovdqu	48(%edi),%xmm3
+	addl	$64,%edi
+	vpshufb	%xmm7,%xmm0,%xmm0
+	movl	%edi,100(%esp)
+	vpshufb	%xmm7,%xmm1,%xmm1
+	vpshufb	%xmm7,%xmm2,%xmm2
+	vpaddd	(%ebp),%xmm0,%xmm4
+	vpshufb	%xmm7,%xmm3,%xmm3
+	vpaddd	16(%ebp),%xmm1,%xmm5
+	vpaddd	32(%ebp),%xmm2,%xmm6
+	vpaddd	48(%ebp),%xmm3,%xmm7
+	vmovdqa	%xmm4,32(%esp)
+	vmovdqa	%xmm5,48(%esp)
+	vmovdqa	%xmm6,64(%esp)
+	vmovdqa	%xmm7,80(%esp)
+	jmp	.L013avx_00_47
+.align	16
+.L013avx_00_47:
+	addl	$64,%ebp
+	vpalignr	$4,%xmm0,%xmm1,%xmm4
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	20(%esp),%esi
+	vpalignr	$4,%xmm2,%xmm3,%xmm7
+	xorl	%ecx,%edx
+	movl	24(%esp),%edi
+	xorl	%edi,%esi
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	vpaddd	%xmm7,%xmm0,%xmm0
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrld	$3,%xmm4,%xmm7
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	4(%esp),%edi
+	vpslld	$14,%xmm4,%xmm5
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,(%esp)
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	28(%esp),%edx
+	vpshufd	$250,%xmm3,%xmm7
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	vpsrld	$11,%xmm6,%xmm6
+	addl	32(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	vpxor	%xmm5,%xmm4,%xmm4
+	addl	%edx,%ebx
+	addl	12(%esp),%edx
+	addl	%ecx,%ebx
+	vpslld	$11,%xmm5,%xmm5
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	16(%esp),%esi
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%ecx,%edx
+	movl	20(%esp),%edi
+	xorl	%edi,%esi
+	vpsrld	$10,%xmm7,%xmm6
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,12(%esp)
+	vpxor	%xmm5,%xmm4,%xmm4
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrlq	$17,%xmm7,%xmm5
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	(%esp),%edi
+	vpaddd	%xmm4,%xmm0,%xmm0
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,28(%esp)
+	vpxor	%xmm5,%xmm6,%xmm6
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	24(%esp),%edx
+	vpsrlq	$19,%xmm7,%xmm7
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	36(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	vpshufd	$132,%xmm6,%xmm7
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%ecx,%eax
+	vpsrldq	$8,%xmm7,%xmm7
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	12(%esp),%esi
+	vpaddd	%xmm7,%xmm0,%xmm0
+	xorl	%ecx,%edx
+	movl	16(%esp),%edi
+	xorl	%edi,%esi
+	vpshufd	$80,%xmm0,%xmm7
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrlq	$17,%xmm7,%xmm5
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	28(%esp),%edi
+	vpxor	%xmm5,%xmm6,%xmm6
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,24(%esp)
+	vpsrlq	$19,%xmm7,%xmm7
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	20(%esp),%edx
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	vpshufd	$232,%xmm6,%xmm7
+	addl	40(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	vpslldq	$8,%xmm7,%xmm7
+	addl	%edx,%ebx
+	addl	4(%esp),%edx
+	addl	%ecx,%ebx
+	vpaddd	%xmm7,%xmm0,%xmm0
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	8(%esp),%esi
+	vpaddd	(%ebp),%xmm0,%xmm6
+	xorl	%ecx,%edx
+	movl	12(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,4(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	24(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,20(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	16(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	44(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%ecx,%eax
+	vmovdqa	%xmm6,32(%esp)
+	vpalignr	$4,%xmm1,%xmm2,%xmm4
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	4(%esp),%esi
+	vpalignr	$4,%xmm3,%xmm0,%xmm7
+	xorl	%ecx,%edx
+	movl	8(%esp),%edi
+	xorl	%edi,%esi
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	vpaddd	%xmm7,%xmm1,%xmm1
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrld	$3,%xmm4,%xmm7
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	20(%esp),%edi
+	vpslld	$14,%xmm4,%xmm5
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,16(%esp)
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	12(%esp),%edx
+	vpshufd	$250,%xmm0,%xmm7
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	vpsrld	$11,%xmm6,%xmm6
+	addl	48(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	vpxor	%xmm5,%xmm4,%xmm4
+	addl	%edx,%ebx
+	addl	28(%esp),%edx
+	addl	%ecx,%ebx
+	vpslld	$11,%xmm5,%xmm5
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	(%esp),%esi
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%ecx,%edx
+	movl	4(%esp),%edi
+	xorl	%edi,%esi
+	vpsrld	$10,%xmm7,%xmm6
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,28(%esp)
+	vpxor	%xmm5,%xmm4,%xmm4
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrlq	$17,%xmm7,%xmm5
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	16(%esp),%edi
+	vpaddd	%xmm4,%xmm1,%xmm1
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,12(%esp)
+	vpxor	%xmm5,%xmm6,%xmm6
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	8(%esp),%edx
+	vpsrlq	$19,%xmm7,%xmm7
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	52(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	vpshufd	$132,%xmm6,%xmm7
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%ecx,%eax
+	vpsrldq	$8,%xmm7,%xmm7
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	28(%esp),%esi
+	vpaddd	%xmm7,%xmm1,%xmm1
+	xorl	%ecx,%edx
+	movl	(%esp),%edi
+	xorl	%edi,%esi
+	vpshufd	$80,%xmm1,%xmm7
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrlq	$17,%xmm7,%xmm5
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	12(%esp),%edi
+	vpxor	%xmm5,%xmm6,%xmm6
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,8(%esp)
+	vpsrlq	$19,%xmm7,%xmm7
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	4(%esp),%edx
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	vpshufd	$232,%xmm6,%xmm7
+	addl	56(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	vpslldq	$8,%xmm7,%xmm7
+	addl	%edx,%ebx
+	addl	20(%esp),%edx
+	addl	%ecx,%ebx
+	vpaddd	%xmm7,%xmm1,%xmm1
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	24(%esp),%esi
+	vpaddd	16(%ebp),%xmm1,%xmm6
+	xorl	%ecx,%edx
+	movl	28(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,20(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	8(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,4(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	60(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%ecx,%eax
+	vmovdqa	%xmm6,48(%esp)
+	vpalignr	$4,%xmm2,%xmm3,%xmm4
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	20(%esp),%esi
+	vpalignr	$4,%xmm0,%xmm1,%xmm7
+	xorl	%ecx,%edx
+	movl	24(%esp),%edi
+	xorl	%edi,%esi
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	vpaddd	%xmm7,%xmm2,%xmm2
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrld	$3,%xmm4,%xmm7
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	4(%esp),%edi
+	vpslld	$14,%xmm4,%xmm5
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,(%esp)
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	28(%esp),%edx
+	vpshufd	$250,%xmm1,%xmm7
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	vpsrld	$11,%xmm6,%xmm6
+	addl	64(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	vpxor	%xmm5,%xmm4,%xmm4
+	addl	%edx,%ebx
+	addl	12(%esp),%edx
+	addl	%ecx,%ebx
+	vpslld	$11,%xmm5,%xmm5
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	16(%esp),%esi
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%ecx,%edx
+	movl	20(%esp),%edi
+	xorl	%edi,%esi
+	vpsrld	$10,%xmm7,%xmm6
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,12(%esp)
+	vpxor	%xmm5,%xmm4,%xmm4
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrlq	$17,%xmm7,%xmm5
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	(%esp),%edi
+	vpaddd	%xmm4,%xmm2,%xmm2
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,28(%esp)
+	vpxor	%xmm5,%xmm6,%xmm6
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	24(%esp),%edx
+	vpsrlq	$19,%xmm7,%xmm7
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	68(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	vpshufd	$132,%xmm6,%xmm7
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%ecx,%eax
+	vpsrldq	$8,%xmm7,%xmm7
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	12(%esp),%esi
+	vpaddd	%xmm7,%xmm2,%xmm2
+	xorl	%ecx,%edx
+	movl	16(%esp),%edi
+	xorl	%edi,%esi
+	vpshufd	$80,%xmm2,%xmm7
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrlq	$17,%xmm7,%xmm5
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	28(%esp),%edi
+	vpxor	%xmm5,%xmm6,%xmm6
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,24(%esp)
+	vpsrlq	$19,%xmm7,%xmm7
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	20(%esp),%edx
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	vpshufd	$232,%xmm6,%xmm7
+	addl	72(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	vpslldq	$8,%xmm7,%xmm7
+	addl	%edx,%ebx
+	addl	4(%esp),%edx
+	addl	%ecx,%ebx
+	vpaddd	%xmm7,%xmm2,%xmm2
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	8(%esp),%esi
+	vpaddd	32(%ebp),%xmm2,%xmm6
+	xorl	%ecx,%edx
+	movl	12(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,4(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	24(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,20(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	16(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	76(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%ecx,%eax
+	vmovdqa	%xmm6,64(%esp)
+	vpalignr	$4,%xmm3,%xmm0,%xmm4
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	4(%esp),%esi
+	vpalignr	$4,%xmm1,%xmm2,%xmm7
+	xorl	%ecx,%edx
+	movl	8(%esp),%edi
+	xorl	%edi,%esi
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	vpaddd	%xmm7,%xmm3,%xmm3
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrld	$3,%xmm4,%xmm7
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	20(%esp),%edi
+	vpslld	$14,%xmm4,%xmm5
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,16(%esp)
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	12(%esp),%edx
+	vpshufd	$250,%xmm2,%xmm7
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	vpsrld	$11,%xmm6,%xmm6
+	addl	80(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	vpxor	%xmm5,%xmm4,%xmm4
+	addl	%edx,%ebx
+	addl	28(%esp),%edx
+	addl	%ecx,%ebx
+	vpslld	$11,%xmm5,%xmm5
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	(%esp),%esi
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%ecx,%edx
+	movl	4(%esp),%edi
+	xorl	%edi,%esi
+	vpsrld	$10,%xmm7,%xmm6
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,28(%esp)
+	vpxor	%xmm5,%xmm4,%xmm4
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrlq	$17,%xmm7,%xmm5
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	16(%esp),%edi
+	vpaddd	%xmm4,%xmm3,%xmm3
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,12(%esp)
+	vpxor	%xmm5,%xmm6,%xmm6
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	8(%esp),%edx
+	vpsrlq	$19,%xmm7,%xmm7
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	84(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	vpshufd	$132,%xmm6,%xmm7
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%ecx,%eax
+	vpsrldq	$8,%xmm7,%xmm7
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	28(%esp),%esi
+	vpaddd	%xmm7,%xmm3,%xmm3
+	xorl	%ecx,%edx
+	movl	(%esp),%edi
+	xorl	%edi,%esi
+	vpshufd	$80,%xmm3,%xmm7
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	vpsrlq	$17,%xmm7,%xmm5
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	12(%esp),%edi
+	vpxor	%xmm5,%xmm6,%xmm6
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,8(%esp)
+	vpsrlq	$19,%xmm7,%xmm7
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	4(%esp),%edx
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	vpshufd	$232,%xmm6,%xmm7
+	addl	88(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	vpslldq	$8,%xmm7,%xmm7
+	addl	%edx,%ebx
+	addl	20(%esp),%edx
+	addl	%ecx,%ebx
+	vpaddd	%xmm7,%xmm3,%xmm3
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	24(%esp),%esi
+	vpaddd	48(%ebp),%xmm3,%xmm6
+	xorl	%ecx,%edx
+	movl	28(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,20(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	8(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,4(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	92(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%ecx,%eax
+	vmovdqa	%xmm6,80(%esp)
+	cmpl	$66051,64(%ebp)
+	jne	.L013avx_00_47
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	20(%esp),%esi
+	xorl	%ecx,%edx
+	movl	24(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	4(%esp),%edi
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	28(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	32(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%ebx
+	addl	12(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	16(%esp),%esi
+	xorl	%ecx,%edx
+	movl	20(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,12(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,28(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	24(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	36(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	12(%esp),%esi
+	xorl	%ecx,%edx
+	movl	16(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	28(%esp),%edi
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,24(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	20(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	40(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%ebx
+	addl	4(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	8(%esp),%esi
+	xorl	%ecx,%edx
+	movl	12(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,4(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	24(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,20(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	16(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	44(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	4(%esp),%esi
+	xorl	%ecx,%edx
+	movl	8(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	20(%esp),%edi
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,16(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	12(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	48(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%ebx
+	addl	28(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	(%esp),%esi
+	xorl	%ecx,%edx
+	movl	4(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,28(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	16(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,12(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	8(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	52(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	28(%esp),%esi
+	xorl	%ecx,%edx
+	movl	(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	12(%esp),%edi
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,8(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	4(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	56(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%ebx
+	addl	20(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	24(%esp),%esi
+	xorl	%ecx,%edx
+	movl	28(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,20(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	8(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,4(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	60(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	20(%esp),%esi
+	xorl	%ecx,%edx
+	movl	24(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,16(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	4(%esp),%edi
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	28(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	64(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%ebx
+	addl	12(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	16(%esp),%esi
+	xorl	%ecx,%edx
+	movl	20(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,12(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,28(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	24(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	68(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	8(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	12(%esp),%esi
+	xorl	%ecx,%edx
+	movl	16(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,8(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	28(%esp),%edi
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,24(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	20(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	72(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%ebx
+	addl	4(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	8(%esp),%esi
+	xorl	%ecx,%edx
+	movl	12(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,4(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	24(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,20(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	16(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	76(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	4(%esp),%esi
+	xorl	%ecx,%edx
+	movl	8(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	20(%esp),%edi
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,16(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	12(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	80(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%ebx
+	addl	28(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	(%esp),%esi
+	xorl	%ecx,%edx
+	movl	4(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,28(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	16(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,12(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	8(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	84(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	24(%esp),%edx
+	addl	%ecx,%eax
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	28(%esp),%esi
+	xorl	%ecx,%edx
+	movl	(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,24(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%eax,%ecx
+	addl	%edi,%edx
+	movl	12(%esp),%edi
+	movl	%eax,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%eax,8(%esp)
+	xorl	%eax,%ecx
+	xorl	%edi,%eax
+	addl	4(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%eax,%ebx
+	xorl	%esi,%ecx
+	addl	88(%esp),%edx
+	xorl	%edi,%ebx
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%ebx
+	addl	20(%esp),%edx
+	addl	%ecx,%ebx
+	movl	%edx,%ecx
+	shrdl	$14,%edx,%edx
+	movl	24(%esp),%esi
+	xorl	%ecx,%edx
+	movl	28(%esp),%edi
+	xorl	%edi,%esi
+	shrdl	$5,%edx,%edx
+	andl	%ecx,%esi
+	movl	%ecx,20(%esp)
+	xorl	%ecx,%edx
+	xorl	%esi,%edi
+	shrdl	$6,%edx,%edx
+	movl	%ebx,%ecx
+	addl	%edi,%edx
+	movl	8(%esp),%edi
+	movl	%ebx,%esi
+	shrdl	$9,%ecx,%ecx
+	movl	%ebx,4(%esp)
+	xorl	%ebx,%ecx
+	xorl	%edi,%ebx
+	addl	(%esp),%edx
+	shrdl	$11,%ecx,%ecx
+	andl	%ebx,%eax
+	xorl	%esi,%ecx
+	addl	92(%esp),%edx
+	xorl	%edi,%eax
+	shrdl	$2,%ecx,%ecx
+	addl	%edx,%eax
+	addl	16(%esp),%edx
+	addl	%ecx,%eax
+	movl	96(%esp),%esi
+	xorl	%edi,%ebx
+	movl	12(%esp),%ecx
+	addl	(%esi),%eax
+	addl	4(%esi),%ebx
+	addl	8(%esi),%edi
+	addl	12(%esi),%ecx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%edi,8(%esi)
+	movl	%ecx,12(%esi)
+	movl	%ebx,4(%esp)
+	xorl	%edi,%ebx
+	movl	%edi,8(%esp)
+	movl	%ecx,12(%esp)
+	movl	20(%esp),%edi
+	movl	24(%esp),%ecx
+	addl	16(%esi),%edx
+	addl	20(%esi),%edi
+	addl	24(%esi),%ecx
+	movl	%edx,16(%esi)
+	movl	%edi,20(%esi)
+	movl	%edi,20(%esp)
+	movl	28(%esp),%edi
+	movl	%ecx,24(%esi)
+	addl	28(%esi),%edi
+	movl	%ecx,24(%esp)
+	movl	%edi,28(%esi)
+	movl	%edi,28(%esp)
+	movl	100(%esp),%edi
+	vmovdqa	64(%ebp),%xmm7
+	subl	$192,%ebp
+	cmpl	104(%esp),%edi
+	jb	.L012grand_avx
+	movl	108(%esp),%esp
+	vzeroall
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	sha256_block_data_order,.-.L_sha256_block_data_order_begin
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/sha512-586.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/sha512-586.S
new file mode 100644
index 0000000..3617ce4
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/sha512-586.S
@@ -0,0 +1,2830 @@
+#if defined(__i386__)
+.text
+.globl	sha512_block_data_order
+.hidden	sha512_block_data_order
+.type	sha512_block_data_order,@function
+.align	16
+sha512_block_data_order:
+.L_sha512_block_data_order_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	%esp,%ebx
+	call	.L000pic_point
+.L000pic_point:
+	popl	%ebp
+	leal	.L001K512-.L000pic_point(%ebp),%ebp
+	subl	$16,%esp
+	andl	$-64,%esp
+	shll	$7,%eax
+	addl	%edi,%eax
+	movl	%esi,(%esp)
+	movl	%edi,4(%esp)
+	movl	%eax,8(%esp)
+	movl	%ebx,12(%esp)
+	leal	OPENSSL_ia32cap_P-.L001K512(%ebp),%edx
+	movl	(%edx),%ecx
+	testl	$67108864,%ecx
+	jz	.L002loop_x86
+	movl	4(%edx),%edx
+	movq	(%esi),%mm0
+	andl	$16777216,%ecx
+	movq	8(%esi),%mm1
+	andl	$512,%edx
+	movq	16(%esi),%mm2
+	orl	%edx,%ecx
+	movq	24(%esi),%mm3
+	movq	32(%esi),%mm4
+	movq	40(%esi),%mm5
+	movq	48(%esi),%mm6
+	movq	56(%esi),%mm7
+	cmpl	$16777728,%ecx
+	je	.L003SSSE3
+	subl	$80,%esp
+	jmp	.L004loop_sse2
+.align	16
+.L004loop_sse2:
+	movq	%mm1,8(%esp)
+	movq	%mm2,16(%esp)
+	movq	%mm3,24(%esp)
+	movq	%mm5,40(%esp)
+	movq	%mm6,48(%esp)
+	pxor	%mm1,%mm2
+	movq	%mm7,56(%esp)
+	movq	%mm0,%mm3
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	addl	$8,%edi
+	movl	$15,%edx
+	bswap	%eax
+	bswap	%ebx
+	jmp	.L00500_14_sse2
+.align	16
+.L00500_14_sse2:
+	movd	%eax,%mm1
+	movl	(%edi),%eax
+	movd	%ebx,%mm7
+	movl	4(%edi),%ebx
+	addl	$8,%edi
+	bswap	%eax
+	bswap	%ebx
+	punpckldq	%mm1,%mm7
+	movq	%mm4,%mm1
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,32(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	movq	%mm3,%mm0
+	movq	%mm7,72(%esp)
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	56(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	paddq	(%ebp),%mm7
+	pxor	%mm4,%mm3
+	movq	24(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	8(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	subl	$8,%esp
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	40(%esp),%mm5
+	paddq	%mm2,%mm3
+	movq	%mm0,%mm2
+	addl	$8,%ebp
+	paddq	%mm6,%mm3
+	movq	48(%esp),%mm6
+	decl	%edx
+	jnz	.L00500_14_sse2
+	movd	%eax,%mm1
+	movd	%ebx,%mm7
+	punpckldq	%mm1,%mm7
+	movq	%mm4,%mm1
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,32(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	movq	%mm3,%mm0
+	movq	%mm7,72(%esp)
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	56(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	paddq	(%ebp),%mm7
+	pxor	%mm4,%mm3
+	movq	24(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	8(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	subl	$8,%esp
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	192(%esp),%mm7
+	paddq	%mm2,%mm3
+	movq	%mm0,%mm2
+	addl	$8,%ebp
+	paddq	%mm6,%mm3
+	pxor	%mm0,%mm0
+	movl	$32,%edx
+	jmp	.L00616_79_sse2
+.align	16
+.L00616_79_sse2:
+	movq	88(%esp),%mm5
+	movq	%mm7,%mm1
+	psrlq	$1,%mm7
+	movq	%mm5,%mm6
+	psrlq	$6,%mm5
+	psllq	$56,%mm1
+	paddq	%mm3,%mm0
+	movq	%mm7,%mm3
+	psrlq	$6,%mm7
+	pxor	%mm1,%mm3
+	psllq	$7,%mm1
+	pxor	%mm7,%mm3
+	psrlq	$1,%mm7
+	pxor	%mm1,%mm3
+	movq	%mm5,%mm1
+	psrlq	$13,%mm5
+	pxor	%mm3,%mm7
+	psllq	$3,%mm6
+	pxor	%mm5,%mm1
+	paddq	200(%esp),%mm7
+	pxor	%mm6,%mm1
+	psrlq	$42,%mm5
+	paddq	128(%esp),%mm7
+	pxor	%mm5,%mm1
+	psllq	$42,%mm6
+	movq	40(%esp),%mm5
+	pxor	%mm6,%mm1
+	movq	48(%esp),%mm6
+	paddq	%mm1,%mm7
+	movq	%mm4,%mm1
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,32(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	movq	%mm7,72(%esp)
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	56(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	paddq	(%ebp),%mm7
+	pxor	%mm4,%mm3
+	movq	24(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	8(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	subl	$8,%esp
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	192(%esp),%mm7
+	paddq	%mm6,%mm2
+	addl	$8,%ebp
+	movq	88(%esp),%mm5
+	movq	%mm7,%mm1
+	psrlq	$1,%mm7
+	movq	%mm5,%mm6
+	psrlq	$6,%mm5
+	psllq	$56,%mm1
+	paddq	%mm3,%mm2
+	movq	%mm7,%mm3
+	psrlq	$6,%mm7
+	pxor	%mm1,%mm3
+	psllq	$7,%mm1
+	pxor	%mm7,%mm3
+	psrlq	$1,%mm7
+	pxor	%mm1,%mm3
+	movq	%mm5,%mm1
+	psrlq	$13,%mm5
+	pxor	%mm3,%mm7
+	psllq	$3,%mm6
+	pxor	%mm5,%mm1
+	paddq	200(%esp),%mm7
+	pxor	%mm6,%mm1
+	psrlq	$42,%mm5
+	paddq	128(%esp),%mm7
+	pxor	%mm5,%mm1
+	psllq	$42,%mm6
+	movq	40(%esp),%mm5
+	pxor	%mm6,%mm1
+	movq	48(%esp),%mm6
+	paddq	%mm1,%mm7
+	movq	%mm4,%mm1
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,32(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	movq	%mm7,72(%esp)
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	56(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	paddq	(%ebp),%mm7
+	pxor	%mm4,%mm3
+	movq	24(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	8(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	subl	$8,%esp
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	192(%esp),%mm7
+	paddq	%mm6,%mm0
+	addl	$8,%ebp
+	decl	%edx
+	jnz	.L00616_79_sse2
+	paddq	%mm3,%mm0
+	movq	8(%esp),%mm1
+	movq	24(%esp),%mm3
+	movq	40(%esp),%mm5
+	movq	48(%esp),%mm6
+	movq	56(%esp),%mm7
+	pxor	%mm1,%mm2
+	paddq	(%esi),%mm0
+	paddq	8(%esi),%mm1
+	paddq	16(%esi),%mm2
+	paddq	24(%esi),%mm3
+	paddq	32(%esi),%mm4
+	paddq	40(%esi),%mm5
+	paddq	48(%esi),%mm6
+	paddq	56(%esi),%mm7
+	movl	$640,%eax
+	movq	%mm0,(%esi)
+	movq	%mm1,8(%esi)
+	movq	%mm2,16(%esi)
+	movq	%mm3,24(%esi)
+	movq	%mm4,32(%esi)
+	movq	%mm5,40(%esi)
+	movq	%mm6,48(%esi)
+	movq	%mm7,56(%esi)
+	leal	(%esp,%eax,1),%esp
+	subl	%eax,%ebp
+	cmpl	88(%esp),%edi
+	jb	.L004loop_sse2
+	movl	92(%esp),%esp
+	emms
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	32
+.L003SSSE3:
+	leal	-64(%esp),%edx
+	subl	$256,%esp
+	movdqa	640(%ebp),%xmm1
+	movdqu	(%edi),%xmm0
+.byte	102,15,56,0,193
+	movdqa	(%ebp),%xmm3
+	movdqa	%xmm1,%xmm2
+	movdqu	16(%edi),%xmm1
+	paddq	%xmm0,%xmm3
+.byte	102,15,56,0,202
+	movdqa	%xmm3,-128(%edx)
+	movdqa	16(%ebp),%xmm4
+	movdqa	%xmm2,%xmm3
+	movdqu	32(%edi),%xmm2
+	paddq	%xmm1,%xmm4
+.byte	102,15,56,0,211
+	movdqa	%xmm4,-112(%edx)
+	movdqa	32(%ebp),%xmm5
+	movdqa	%xmm3,%xmm4
+	movdqu	48(%edi),%xmm3
+	paddq	%xmm2,%xmm5
+.byte	102,15,56,0,220
+	movdqa	%xmm5,-96(%edx)
+	movdqa	48(%ebp),%xmm6
+	movdqa	%xmm4,%xmm5
+	movdqu	64(%edi),%xmm4
+	paddq	%xmm3,%xmm6
+.byte	102,15,56,0,229
+	movdqa	%xmm6,-80(%edx)
+	movdqa	64(%ebp),%xmm7
+	movdqa	%xmm5,%xmm6
+	movdqu	80(%edi),%xmm5
+	paddq	%xmm4,%xmm7
+.byte	102,15,56,0,238
+	movdqa	%xmm7,-64(%edx)
+	movdqa	%xmm0,(%edx)
+	movdqa	80(%ebp),%xmm0
+	movdqa	%xmm6,%xmm7
+	movdqu	96(%edi),%xmm6
+	paddq	%xmm5,%xmm0
+.byte	102,15,56,0,247
+	movdqa	%xmm0,-48(%edx)
+	movdqa	%xmm1,16(%edx)
+	movdqa	96(%ebp),%xmm1
+	movdqa	%xmm7,%xmm0
+	movdqu	112(%edi),%xmm7
+	paddq	%xmm6,%xmm1
+.byte	102,15,56,0,248
+	movdqa	%xmm1,-32(%edx)
+	movdqa	%xmm2,32(%edx)
+	movdqa	112(%ebp),%xmm2
+	movdqa	(%edx),%xmm0
+	paddq	%xmm7,%xmm2
+	movdqa	%xmm2,-16(%edx)
+	nop
+.align	32
+.L007loop_ssse3:
+	movdqa	16(%edx),%xmm2
+	movdqa	%xmm3,48(%edx)
+	leal	128(%ebp),%ebp
+	movq	%mm1,8(%esp)
+	movl	%edi,%ebx
+	movq	%mm2,16(%esp)
+	leal	128(%edi),%edi
+	movq	%mm3,24(%esp)
+	cmpl	%eax,%edi
+	movq	%mm5,40(%esp)
+	cmovbl	%edi,%ebx
+	movq	%mm6,48(%esp)
+	movl	$4,%ecx
+	pxor	%mm1,%mm2
+	movq	%mm7,56(%esp)
+	pxor	%mm3,%mm3
+	jmp	.L00800_47_ssse3
+.align	32
+.L00800_47_ssse3:
+	movdqa	%xmm5,%xmm3
+	movdqa	%xmm2,%xmm1
+.byte	102,15,58,15,208,8
+	movdqa	%xmm4,(%edx)
+.byte	102,15,58,15,220,8
+	movdqa	%xmm2,%xmm4
+	psrlq	$7,%xmm2
+	paddq	%xmm3,%xmm0
+	movdqa	%xmm4,%xmm3
+	psrlq	$1,%xmm4
+	psllq	$56,%xmm3
+	pxor	%xmm4,%xmm2
+	psrlq	$7,%xmm4
+	pxor	%xmm3,%xmm2
+	psllq	$7,%xmm3
+	pxor	%xmm4,%xmm2
+	movdqa	%xmm7,%xmm4
+	pxor	%xmm3,%xmm2
+	movdqa	%xmm7,%xmm3
+	psrlq	$6,%xmm4
+	paddq	%xmm2,%xmm0
+	movdqa	%xmm7,%xmm2
+	psrlq	$19,%xmm3
+	psllq	$3,%xmm2
+	pxor	%xmm3,%xmm4
+	psrlq	$42,%xmm3
+	pxor	%xmm2,%xmm4
+	psllq	$42,%xmm2
+	pxor	%xmm3,%xmm4
+	movdqa	32(%edx),%xmm3
+	pxor	%xmm2,%xmm4
+	movdqa	(%ebp),%xmm2
+	movq	%mm4,%mm1
+	paddq	%xmm4,%xmm0
+	movq	-128(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,32(%esp)
+	paddq	%xmm0,%xmm2
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	56(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	24(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	8(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	32(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	40(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-120(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,24(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,56(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	48(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	16(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	24(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	32(%esp),%mm6
+	movdqa	%xmm2,-128(%edx)
+	movdqa	%xmm6,%xmm4
+	movdqa	%xmm3,%xmm2
+.byte	102,15,58,15,217,8
+	movdqa	%xmm5,16(%edx)
+.byte	102,15,58,15,229,8
+	movdqa	%xmm3,%xmm5
+	psrlq	$7,%xmm3
+	paddq	%xmm4,%xmm1
+	movdqa	%xmm5,%xmm4
+	psrlq	$1,%xmm5
+	psllq	$56,%xmm4
+	pxor	%xmm5,%xmm3
+	psrlq	$7,%xmm5
+	pxor	%xmm4,%xmm3
+	psllq	$7,%xmm4
+	pxor	%xmm5,%xmm3
+	movdqa	%xmm0,%xmm5
+	pxor	%xmm4,%xmm3
+	movdqa	%xmm0,%xmm4
+	psrlq	$6,%xmm5
+	paddq	%xmm3,%xmm1
+	movdqa	%xmm0,%xmm3
+	psrlq	$19,%xmm4
+	psllq	$3,%xmm3
+	pxor	%xmm4,%xmm5
+	psrlq	$42,%xmm4
+	pxor	%xmm3,%xmm5
+	psllq	$42,%xmm3
+	pxor	%xmm4,%xmm5
+	movdqa	48(%edx),%xmm4
+	pxor	%xmm3,%xmm5
+	movdqa	16(%ebp),%xmm3
+	movq	%mm4,%mm1
+	paddq	%xmm5,%xmm1
+	movq	-112(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,16(%esp)
+	paddq	%xmm1,%xmm3
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,48(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	40(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	8(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	56(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	16(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	24(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-104(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,8(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,40(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	32(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	48(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	8(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	16(%esp),%mm6
+	movdqa	%xmm3,-112(%edx)
+	movdqa	%xmm7,%xmm5
+	movdqa	%xmm4,%xmm3
+.byte	102,15,58,15,226,8
+	movdqa	%xmm6,32(%edx)
+.byte	102,15,58,15,238,8
+	movdqa	%xmm4,%xmm6
+	psrlq	$7,%xmm4
+	paddq	%xmm5,%xmm2
+	movdqa	%xmm6,%xmm5
+	psrlq	$1,%xmm6
+	psllq	$56,%xmm5
+	pxor	%xmm6,%xmm4
+	psrlq	$7,%xmm6
+	pxor	%xmm5,%xmm4
+	psllq	$7,%xmm5
+	pxor	%xmm6,%xmm4
+	movdqa	%xmm1,%xmm6
+	pxor	%xmm5,%xmm4
+	movdqa	%xmm1,%xmm5
+	psrlq	$6,%xmm6
+	paddq	%xmm4,%xmm2
+	movdqa	%xmm1,%xmm4
+	psrlq	$19,%xmm5
+	psllq	$3,%xmm4
+	pxor	%xmm5,%xmm6
+	psrlq	$42,%xmm5
+	pxor	%xmm4,%xmm6
+	psllq	$42,%xmm4
+	pxor	%xmm5,%xmm6
+	movdqa	(%edx),%xmm5
+	pxor	%xmm4,%xmm6
+	movdqa	32(%ebp),%xmm4
+	movq	%mm4,%mm1
+	paddq	%xmm6,%xmm2
+	movq	-96(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,(%esp)
+	paddq	%xmm2,%xmm4
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,32(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	24(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	56(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	40(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	8(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-88(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,56(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,24(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	16(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	48(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	32(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	56(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	(%esp),%mm6
+	movdqa	%xmm4,-96(%edx)
+	movdqa	%xmm0,%xmm6
+	movdqa	%xmm5,%xmm4
+.byte	102,15,58,15,235,8
+	movdqa	%xmm7,48(%edx)
+.byte	102,15,58,15,247,8
+	movdqa	%xmm5,%xmm7
+	psrlq	$7,%xmm5
+	paddq	%xmm6,%xmm3
+	movdqa	%xmm7,%xmm6
+	psrlq	$1,%xmm7
+	psllq	$56,%xmm6
+	pxor	%xmm7,%xmm5
+	psrlq	$7,%xmm7
+	pxor	%xmm6,%xmm5
+	psllq	$7,%xmm6
+	pxor	%xmm7,%xmm5
+	movdqa	%xmm2,%xmm7
+	pxor	%xmm6,%xmm5
+	movdqa	%xmm2,%xmm6
+	psrlq	$6,%xmm7
+	paddq	%xmm5,%xmm3
+	movdqa	%xmm2,%xmm5
+	psrlq	$19,%xmm6
+	psllq	$3,%xmm5
+	pxor	%xmm6,%xmm7
+	psrlq	$42,%xmm6
+	pxor	%xmm5,%xmm7
+	psllq	$42,%xmm5
+	pxor	%xmm6,%xmm7
+	movdqa	16(%edx),%xmm6
+	pxor	%xmm5,%xmm7
+	movdqa	48(%ebp),%xmm5
+	movq	%mm4,%mm1
+	paddq	%xmm7,%xmm3
+	movq	-80(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,48(%esp)
+	paddq	%xmm3,%xmm5
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,16(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	8(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	40(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	24(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	48(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	56(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-72(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,40(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,8(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	32(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	16(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	40(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	48(%esp),%mm6
+	movdqa	%xmm5,-80(%edx)
+	movdqa	%xmm1,%xmm7
+	movdqa	%xmm6,%xmm5
+.byte	102,15,58,15,244,8
+	movdqa	%xmm0,(%edx)
+.byte	102,15,58,15,248,8
+	movdqa	%xmm6,%xmm0
+	psrlq	$7,%xmm6
+	paddq	%xmm7,%xmm4
+	movdqa	%xmm0,%xmm7
+	psrlq	$1,%xmm0
+	psllq	$56,%xmm7
+	pxor	%xmm0,%xmm6
+	psrlq	$7,%xmm0
+	pxor	%xmm7,%xmm6
+	psllq	$7,%xmm7
+	pxor	%xmm0,%xmm6
+	movdqa	%xmm3,%xmm0
+	pxor	%xmm7,%xmm6
+	movdqa	%xmm3,%xmm7
+	psrlq	$6,%xmm0
+	paddq	%xmm6,%xmm4
+	movdqa	%xmm3,%xmm6
+	psrlq	$19,%xmm7
+	psllq	$3,%xmm6
+	pxor	%xmm7,%xmm0
+	psrlq	$42,%xmm7
+	pxor	%xmm6,%xmm0
+	psllq	$42,%xmm6
+	pxor	%xmm7,%xmm0
+	movdqa	32(%edx),%xmm7
+	pxor	%xmm6,%xmm0
+	movdqa	64(%ebp),%xmm6
+	movq	%mm4,%mm1
+	paddq	%xmm0,%xmm4
+	movq	-64(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,32(%esp)
+	paddq	%xmm4,%xmm6
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	56(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	24(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	8(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	32(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	40(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-56(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,24(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,56(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	48(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	16(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	24(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	32(%esp),%mm6
+	movdqa	%xmm6,-64(%edx)
+	movdqa	%xmm2,%xmm0
+	movdqa	%xmm7,%xmm6
+.byte	102,15,58,15,253,8
+	movdqa	%xmm1,16(%edx)
+.byte	102,15,58,15,193,8
+	movdqa	%xmm7,%xmm1
+	psrlq	$7,%xmm7
+	paddq	%xmm0,%xmm5
+	movdqa	%xmm1,%xmm0
+	psrlq	$1,%xmm1
+	psllq	$56,%xmm0
+	pxor	%xmm1,%xmm7
+	psrlq	$7,%xmm1
+	pxor	%xmm0,%xmm7
+	psllq	$7,%xmm0
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm4,%xmm1
+	pxor	%xmm0,%xmm7
+	movdqa	%xmm4,%xmm0
+	psrlq	$6,%xmm1
+	paddq	%xmm7,%xmm5
+	movdqa	%xmm4,%xmm7
+	psrlq	$19,%xmm0
+	psllq	$3,%xmm7
+	pxor	%xmm0,%xmm1
+	psrlq	$42,%xmm0
+	pxor	%xmm7,%xmm1
+	psllq	$42,%xmm7
+	pxor	%xmm0,%xmm1
+	movdqa	48(%edx),%xmm0
+	pxor	%xmm7,%xmm1
+	movdqa	80(%ebp),%xmm7
+	movq	%mm4,%mm1
+	paddq	%xmm1,%xmm5
+	movq	-48(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,16(%esp)
+	paddq	%xmm5,%xmm7
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,48(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	40(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	8(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	56(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	16(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	24(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-40(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,8(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,40(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	32(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	48(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	8(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	16(%esp),%mm6
+	movdqa	%xmm7,-48(%edx)
+	movdqa	%xmm3,%xmm1
+	movdqa	%xmm0,%xmm7
+.byte	102,15,58,15,198,8
+	movdqa	%xmm2,32(%edx)
+.byte	102,15,58,15,202,8
+	movdqa	%xmm0,%xmm2
+	psrlq	$7,%xmm0
+	paddq	%xmm1,%xmm6
+	movdqa	%xmm2,%xmm1
+	psrlq	$1,%xmm2
+	psllq	$56,%xmm1
+	pxor	%xmm2,%xmm0
+	psrlq	$7,%xmm2
+	pxor	%xmm1,%xmm0
+	psllq	$7,%xmm1
+	pxor	%xmm2,%xmm0
+	movdqa	%xmm5,%xmm2
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm5,%xmm1
+	psrlq	$6,%xmm2
+	paddq	%xmm0,%xmm6
+	movdqa	%xmm5,%xmm0
+	psrlq	$19,%xmm1
+	psllq	$3,%xmm0
+	pxor	%xmm1,%xmm2
+	psrlq	$42,%xmm1
+	pxor	%xmm0,%xmm2
+	psllq	$42,%xmm0
+	pxor	%xmm1,%xmm2
+	movdqa	(%edx),%xmm1
+	pxor	%xmm0,%xmm2
+	movdqa	96(%ebp),%xmm0
+	movq	%mm4,%mm1
+	paddq	%xmm2,%xmm6
+	movq	-32(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,(%esp)
+	paddq	%xmm6,%xmm0
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,32(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	24(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	56(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	40(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	8(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-24(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,56(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,24(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	16(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	48(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	32(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	56(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	(%esp),%mm6
+	movdqa	%xmm0,-32(%edx)
+	movdqa	%xmm4,%xmm2
+	movdqa	%xmm1,%xmm0
+.byte	102,15,58,15,207,8
+	movdqa	%xmm3,48(%edx)
+.byte	102,15,58,15,211,8
+	movdqa	%xmm1,%xmm3
+	psrlq	$7,%xmm1
+	paddq	%xmm2,%xmm7
+	movdqa	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	psllq	$56,%xmm2
+	pxor	%xmm3,%xmm1
+	psrlq	$7,%xmm3
+	pxor	%xmm2,%xmm1
+	psllq	$7,%xmm2
+	pxor	%xmm3,%xmm1
+	movdqa	%xmm6,%xmm3
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm6,%xmm2
+	psrlq	$6,%xmm3
+	paddq	%xmm1,%xmm7
+	movdqa	%xmm6,%xmm1
+	psrlq	$19,%xmm2
+	psllq	$3,%xmm1
+	pxor	%xmm2,%xmm3
+	psrlq	$42,%xmm2
+	pxor	%xmm1,%xmm3
+	psllq	$42,%xmm1
+	pxor	%xmm2,%xmm3
+	movdqa	16(%edx),%xmm2
+	pxor	%xmm1,%xmm3
+	movdqa	112(%ebp),%xmm1
+	movq	%mm4,%mm1
+	paddq	%xmm3,%xmm7
+	movq	-16(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,48(%esp)
+	paddq	%xmm7,%xmm1
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,16(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	8(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	40(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	24(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	48(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	56(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-8(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,40(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,8(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	32(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	16(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	40(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	48(%esp),%mm6
+	movdqa	%xmm1,-16(%edx)
+	leal	128(%ebp),%ebp
+	decl	%ecx
+	jnz	.L00800_47_ssse3
+	movdqa	(%ebp),%xmm1
+	leal	-640(%ebp),%ebp
+	movdqu	(%ebx),%xmm0
+.byte	102,15,56,0,193
+	movdqa	(%ebp),%xmm3
+	movdqa	%xmm1,%xmm2
+	movdqu	16(%ebx),%xmm1
+	paddq	%xmm0,%xmm3
+.byte	102,15,56,0,202
+	movq	%mm4,%mm1
+	movq	-128(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,32(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	56(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	24(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	8(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	32(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	40(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-120(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,24(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,56(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	48(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	16(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	24(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	32(%esp),%mm6
+	movdqa	%xmm3,-128(%edx)
+	movdqa	16(%ebp),%xmm4
+	movdqa	%xmm2,%xmm3
+	movdqu	32(%ebx),%xmm2
+	paddq	%xmm1,%xmm4
+.byte	102,15,56,0,211
+	movq	%mm4,%mm1
+	movq	-112(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,16(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,48(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	40(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	8(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	56(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	16(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	24(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-104(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,8(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,40(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	32(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	48(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	8(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	16(%esp),%mm6
+	movdqa	%xmm4,-112(%edx)
+	movdqa	32(%ebp),%xmm5
+	movdqa	%xmm3,%xmm4
+	movdqu	48(%ebx),%xmm3
+	paddq	%xmm2,%xmm5
+.byte	102,15,56,0,220
+	movq	%mm4,%mm1
+	movq	-96(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,32(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	24(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	56(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	40(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	8(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-88(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,56(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,24(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	16(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	48(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	32(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	56(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	(%esp),%mm6
+	movdqa	%xmm5,-96(%edx)
+	movdqa	48(%ebp),%xmm6
+	movdqa	%xmm4,%xmm5
+	movdqu	64(%ebx),%xmm4
+	paddq	%xmm3,%xmm6
+.byte	102,15,56,0,229
+	movq	%mm4,%mm1
+	movq	-80(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,48(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,16(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	8(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	40(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	24(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	48(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	56(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-72(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,40(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,8(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	32(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	16(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	40(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	48(%esp),%mm6
+	movdqa	%xmm6,-80(%edx)
+	movdqa	64(%ebp),%xmm7
+	movdqa	%xmm5,%xmm6
+	movdqu	80(%ebx),%xmm5
+	paddq	%xmm4,%xmm7
+.byte	102,15,56,0,238
+	movq	%mm4,%mm1
+	movq	-64(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,32(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	56(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	24(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	8(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	32(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	40(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-56(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,24(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,56(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	48(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	16(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	24(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	32(%esp),%mm6
+	movdqa	%xmm7,-64(%edx)
+	movdqa	%xmm0,(%edx)
+	movdqa	80(%ebp),%xmm0
+	movdqa	%xmm6,%xmm7
+	movdqu	96(%ebx),%xmm6
+	paddq	%xmm5,%xmm0
+.byte	102,15,56,0,247
+	movq	%mm4,%mm1
+	movq	-48(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,16(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,48(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	40(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	8(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	56(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	16(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	24(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-40(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,8(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,40(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	32(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	48(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	8(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	16(%esp),%mm6
+	movdqa	%xmm0,-48(%edx)
+	movdqa	%xmm1,16(%edx)
+	movdqa	96(%ebp),%xmm1
+	movdqa	%xmm7,%xmm0
+	movdqu	112(%ebx),%xmm7
+	paddq	%xmm6,%xmm1
+.byte	102,15,56,0,248
+	movq	%mm4,%mm1
+	movq	-32(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,32(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	24(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	56(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	40(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	8(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-24(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,56(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,24(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	16(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	48(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	32(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	56(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	(%esp),%mm6
+	movdqa	%xmm1,-32(%edx)
+	movdqa	%xmm2,32(%edx)
+	movdqa	112(%ebp),%xmm2
+	movdqa	(%edx),%xmm0
+	paddq	%xmm7,%xmm2
+	movq	%mm4,%mm1
+	movq	-16(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,48(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm0
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm0,16(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	8(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	40(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm0,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm0,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	24(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm0,%mm2
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	pxor	%mm7,%mm6
+	movq	48(%esp),%mm5
+	paddq	%mm6,%mm2
+	movq	56(%esp),%mm6
+	movq	%mm4,%mm1
+	movq	-8(%edx),%mm7
+	pxor	%mm6,%mm5
+	psrlq	$14,%mm1
+	movq	%mm4,40(%esp)
+	pand	%mm4,%mm5
+	psllq	$23,%mm4
+	paddq	%mm3,%mm2
+	movq	%mm1,%mm3
+	psrlq	$4,%mm1
+	pxor	%mm6,%mm5
+	pxor	%mm4,%mm3
+	psllq	$23,%mm4
+	pxor	%mm1,%mm3
+	movq	%mm2,8(%esp)
+	paddq	%mm5,%mm7
+	pxor	%mm4,%mm3
+	psrlq	$23,%mm1
+	paddq	(%esp),%mm7
+	pxor	%mm1,%mm3
+	psllq	$4,%mm4
+	pxor	%mm4,%mm3
+	movq	32(%esp),%mm4
+	paddq	%mm7,%mm3
+	movq	%mm2,%mm5
+	psrlq	$28,%mm5
+	paddq	%mm3,%mm4
+	movq	%mm2,%mm6
+	movq	%mm5,%mm7
+	psllq	$25,%mm6
+	movq	16(%esp),%mm1
+	psrlq	$6,%mm5
+	pxor	%mm6,%mm7
+	psllq	$5,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm2
+	psrlq	$5,%mm5
+	pxor	%mm6,%mm7
+	pand	%mm2,%mm0
+	psllq	$6,%mm6
+	pxor	%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm7,%mm6
+	movq	40(%esp),%mm5
+	paddq	%mm6,%mm0
+	movq	48(%esp),%mm6
+	movdqa	%xmm2,-16(%edx)
+	movq	8(%esp),%mm1
+	paddq	%mm3,%mm0
+	movq	24(%esp),%mm3
+	movq	56(%esp),%mm7
+	pxor	%mm1,%mm2
+	paddq	(%esi),%mm0
+	paddq	8(%esi),%mm1
+	paddq	16(%esi),%mm2
+	paddq	24(%esi),%mm3
+	paddq	32(%esi),%mm4
+	paddq	40(%esi),%mm5
+	paddq	48(%esi),%mm6
+	paddq	56(%esi),%mm7
+	movq	%mm0,(%esi)
+	movq	%mm1,8(%esi)
+	movq	%mm2,16(%esi)
+	movq	%mm3,24(%esi)
+	movq	%mm4,32(%esi)
+	movq	%mm5,40(%esi)
+	movq	%mm6,48(%esi)
+	movq	%mm7,56(%esi)
+	cmpl	%eax,%edi
+	jb	.L007loop_ssse3
+	movl	76(%edx),%esp
+	emms
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	16
+.L002loop_x86:
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	16(%edi),%eax
+	movl	20(%edi),%ebx
+	movl	24(%edi),%ecx
+	movl	28(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	32(%edi),%eax
+	movl	36(%edi),%ebx
+	movl	40(%edi),%ecx
+	movl	44(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	48(%edi),%eax
+	movl	52(%edi),%ebx
+	movl	56(%edi),%ecx
+	movl	60(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	64(%edi),%eax
+	movl	68(%edi),%ebx
+	movl	72(%edi),%ecx
+	movl	76(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	80(%edi),%eax
+	movl	84(%edi),%ebx
+	movl	88(%edi),%ecx
+	movl	92(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	96(%edi),%eax
+	movl	100(%edi),%ebx
+	movl	104(%edi),%ecx
+	movl	108(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	112(%edi),%eax
+	movl	116(%edi),%ebx
+	movl	120(%edi),%ecx
+	movl	124(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	addl	$128,%edi
+	subl	$72,%esp
+	movl	%edi,204(%esp)
+	leal	8(%esp),%edi
+	movl	$16,%ecx
+.long	2784229001
+.align	16
+.L00900_15_x86:
+	movl	40(%esp),%ecx
+	movl	44(%esp),%edx
+	movl	%ecx,%esi
+	shrl	$9,%ecx
+	movl	%edx,%edi
+	shrl	$9,%edx
+	movl	%ecx,%ebx
+	shll	$14,%esi
+	movl	%edx,%eax
+	shll	$14,%edi
+	xorl	%esi,%ebx
+	shrl	$5,%ecx
+	xorl	%edi,%eax
+	shrl	$5,%edx
+	xorl	%ecx,%eax
+	shll	$4,%esi
+	xorl	%edx,%ebx
+	shll	$4,%edi
+	xorl	%esi,%ebx
+	shrl	$4,%ecx
+	xorl	%edi,%eax
+	shrl	$4,%edx
+	xorl	%ecx,%eax
+	shll	$5,%esi
+	xorl	%edx,%ebx
+	shll	$5,%edi
+	xorl	%esi,%eax
+	xorl	%edi,%ebx
+	movl	48(%esp),%ecx
+	movl	52(%esp),%edx
+	movl	56(%esp),%esi
+	movl	60(%esp),%edi
+	addl	64(%esp),%eax
+	adcl	68(%esp),%ebx
+	xorl	%esi,%ecx
+	xorl	%edi,%edx
+	andl	40(%esp),%ecx
+	andl	44(%esp),%edx
+	addl	192(%esp),%eax
+	adcl	196(%esp),%ebx
+	xorl	%esi,%ecx
+	xorl	%edi,%edx
+	movl	(%ebp),%esi
+	movl	4(%ebp),%edi
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	32(%esp),%ecx
+	movl	36(%esp),%edx
+	addl	%esi,%eax
+	adcl	%edi,%ebx
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	movl	%eax,32(%esp)
+	movl	%ebx,36(%esp)
+	movl	%ecx,%esi
+	shrl	$2,%ecx
+	movl	%edx,%edi
+	shrl	$2,%edx
+	movl	%ecx,%ebx
+	shll	$4,%esi
+	movl	%edx,%eax
+	shll	$4,%edi
+	xorl	%esi,%ebx
+	shrl	$5,%ecx
+	xorl	%edi,%eax
+	shrl	$5,%edx
+	xorl	%ecx,%ebx
+	shll	$21,%esi
+	xorl	%edx,%eax
+	shll	$21,%edi
+	xorl	%esi,%eax
+	shrl	$21,%ecx
+	xorl	%edi,%ebx
+	shrl	$21,%edx
+	xorl	%ecx,%eax
+	shll	$5,%esi
+	xorl	%edx,%ebx
+	shll	$5,%edi
+	xorl	%esi,%eax
+	xorl	%edi,%ebx
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	movl	16(%esp),%esi
+	movl	20(%esp),%edi
+	addl	(%esp),%eax
+	adcl	4(%esp),%ebx
+	orl	%esi,%ecx
+	orl	%edi,%edx
+	andl	24(%esp),%ecx
+	andl	28(%esp),%edx
+	andl	8(%esp),%esi
+	andl	12(%esp),%edi
+	orl	%esi,%ecx
+	orl	%edi,%edx
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	movb	(%ebp),%dl
+	subl	$8,%esp
+	leal	8(%ebp),%ebp
+	cmpb	$148,%dl
+	jne	.L00900_15_x86
+.align	16
+.L01016_79_x86:
+	movl	312(%esp),%ecx
+	movl	316(%esp),%edx
+	movl	%ecx,%esi
+	shrl	$1,%ecx
+	movl	%edx,%edi
+	shrl	$1,%edx
+	movl	%ecx,%eax
+	shll	$24,%esi
+	movl	%edx,%ebx
+	shll	$24,%edi
+	xorl	%esi,%ebx
+	shrl	$6,%ecx
+	xorl	%edi,%eax
+	shrl	$6,%edx
+	xorl	%ecx,%eax
+	shll	$7,%esi
+	xorl	%edx,%ebx
+	shll	$1,%edi
+	xorl	%esi,%ebx
+	shrl	$1,%ecx
+	xorl	%edi,%eax
+	shrl	$1,%edx
+	xorl	%ecx,%eax
+	shll	$6,%edi
+	xorl	%edx,%ebx
+	xorl	%edi,%eax
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	movl	208(%esp),%ecx
+	movl	212(%esp),%edx
+	movl	%ecx,%esi
+	shrl	$6,%ecx
+	movl	%edx,%edi
+	shrl	$6,%edx
+	movl	%ecx,%eax
+	shll	$3,%esi
+	movl	%edx,%ebx
+	shll	$3,%edi
+	xorl	%esi,%eax
+	shrl	$13,%ecx
+	xorl	%edi,%ebx
+	shrl	$13,%edx
+	xorl	%ecx,%eax
+	shll	$10,%esi
+	xorl	%edx,%ebx
+	shll	$10,%edi
+	xorl	%esi,%ebx
+	shrl	$10,%ecx
+	xorl	%edi,%eax
+	shrl	$10,%edx
+	xorl	%ecx,%ebx
+	shll	$13,%edi
+	xorl	%edx,%eax
+	xorl	%edi,%eax
+	movl	320(%esp),%ecx
+	movl	324(%esp),%edx
+	addl	(%esp),%eax
+	adcl	4(%esp),%ebx
+	movl	248(%esp),%esi
+	movl	252(%esp),%edi
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	addl	%esi,%eax
+	adcl	%edi,%ebx
+	movl	%eax,192(%esp)
+	movl	%ebx,196(%esp)
+	movl	40(%esp),%ecx
+	movl	44(%esp),%edx
+	movl	%ecx,%esi
+	shrl	$9,%ecx
+	movl	%edx,%edi
+	shrl	$9,%edx
+	movl	%ecx,%ebx
+	shll	$14,%esi
+	movl	%edx,%eax
+	shll	$14,%edi
+	xorl	%esi,%ebx
+	shrl	$5,%ecx
+	xorl	%edi,%eax
+	shrl	$5,%edx
+	xorl	%ecx,%eax
+	shll	$4,%esi
+	xorl	%edx,%ebx
+	shll	$4,%edi
+	xorl	%esi,%ebx
+	shrl	$4,%ecx
+	xorl	%edi,%eax
+	shrl	$4,%edx
+	xorl	%ecx,%eax
+	shll	$5,%esi
+	xorl	%edx,%ebx
+	shll	$5,%edi
+	xorl	%esi,%eax
+	xorl	%edi,%ebx
+	movl	48(%esp),%ecx
+	movl	52(%esp),%edx
+	movl	56(%esp),%esi
+	movl	60(%esp),%edi
+	addl	64(%esp),%eax
+	adcl	68(%esp),%ebx
+	xorl	%esi,%ecx
+	xorl	%edi,%edx
+	andl	40(%esp),%ecx
+	andl	44(%esp),%edx
+	addl	192(%esp),%eax
+	adcl	196(%esp),%ebx
+	xorl	%esi,%ecx
+	xorl	%edi,%edx
+	movl	(%ebp),%esi
+	movl	4(%ebp),%edi
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	32(%esp),%ecx
+	movl	36(%esp),%edx
+	addl	%esi,%eax
+	adcl	%edi,%ebx
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	movl	%eax,32(%esp)
+	movl	%ebx,36(%esp)
+	movl	%ecx,%esi
+	shrl	$2,%ecx
+	movl	%edx,%edi
+	shrl	$2,%edx
+	movl	%ecx,%ebx
+	shll	$4,%esi
+	movl	%edx,%eax
+	shll	$4,%edi
+	xorl	%esi,%ebx
+	shrl	$5,%ecx
+	xorl	%edi,%eax
+	shrl	$5,%edx
+	xorl	%ecx,%ebx
+	shll	$21,%esi
+	xorl	%edx,%eax
+	shll	$21,%edi
+	xorl	%esi,%eax
+	shrl	$21,%ecx
+	xorl	%edi,%ebx
+	shrl	$21,%edx
+	xorl	%ecx,%eax
+	shll	$5,%esi
+	xorl	%edx,%ebx
+	shll	$5,%edi
+	xorl	%esi,%eax
+	xorl	%edi,%ebx
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	movl	16(%esp),%esi
+	movl	20(%esp),%edi
+	addl	(%esp),%eax
+	adcl	4(%esp),%ebx
+	orl	%esi,%ecx
+	orl	%edi,%edx
+	andl	24(%esp),%ecx
+	andl	28(%esp),%edx
+	andl	8(%esp),%esi
+	andl	12(%esp),%edi
+	orl	%esi,%ecx
+	orl	%edi,%edx
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	movb	(%ebp),%dl
+	subl	$8,%esp
+	leal	8(%ebp),%ebp
+	cmpb	$23,%dl
+	jne	.L01016_79_x86
+	movl	840(%esp),%esi
+	movl	844(%esp),%edi
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	addl	8(%esp),%eax
+	adcl	12(%esp),%ebx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	addl	16(%esp),%ecx
+	adcl	20(%esp),%edx
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	movl	16(%esi),%eax
+	movl	20(%esi),%ebx
+	movl	24(%esi),%ecx
+	movl	28(%esi),%edx
+	addl	24(%esp),%eax
+	adcl	28(%esp),%ebx
+	movl	%eax,16(%esi)
+	movl	%ebx,20(%esi)
+	addl	32(%esp),%ecx
+	adcl	36(%esp),%edx
+	movl	%ecx,24(%esi)
+	movl	%edx,28(%esi)
+	movl	32(%esi),%eax
+	movl	36(%esi),%ebx
+	movl	40(%esi),%ecx
+	movl	44(%esi),%edx
+	addl	40(%esp),%eax
+	adcl	44(%esp),%ebx
+	movl	%eax,32(%esi)
+	movl	%ebx,36(%esi)
+	addl	48(%esp),%ecx
+	adcl	52(%esp),%edx
+	movl	%ecx,40(%esi)
+	movl	%edx,44(%esi)
+	movl	48(%esi),%eax
+	movl	52(%esi),%ebx
+	movl	56(%esi),%ecx
+	movl	60(%esi),%edx
+	addl	56(%esp),%eax
+	adcl	60(%esp),%ebx
+	movl	%eax,48(%esi)
+	movl	%ebx,52(%esi)
+	addl	64(%esp),%ecx
+	adcl	68(%esp),%edx
+	movl	%ecx,56(%esi)
+	movl	%edx,60(%esi)
+	addl	$840,%esp
+	subl	$640,%ebp
+	cmpl	8(%esp),%edi
+	jb	.L002loop_x86
+	movl	12(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	64
+.L001K512:
+.long	3609767458,1116352408
+.long	602891725,1899447441
+.long	3964484399,3049323471
+.long	2173295548,3921009573
+.long	4081628472,961987163
+.long	3053834265,1508970993
+.long	2937671579,2453635748
+.long	3664609560,2870763221
+.long	2734883394,3624381080
+.long	1164996542,310598401
+.long	1323610764,607225278
+.long	3590304994,1426881987
+.long	4068182383,1925078388
+.long	991336113,2162078206
+.long	633803317,2614888103
+.long	3479774868,3248222580
+.long	2666613458,3835390401
+.long	944711139,4022224774
+.long	2341262773,264347078
+.long	2007800933,604807628
+.long	1495990901,770255983
+.long	1856431235,1249150122
+.long	3175218132,1555081692
+.long	2198950837,1996064986
+.long	3999719339,2554220882
+.long	766784016,2821834349
+.long	2566594879,2952996808
+.long	3203337956,3210313671
+.long	1034457026,3336571891
+.long	2466948901,3584528711
+.long	3758326383,113926993
+.long	168717936,338241895
+.long	1188179964,666307205
+.long	1546045734,773529912
+.long	1522805485,1294757372
+.long	2643833823,1396182291
+.long	2343527390,1695183700
+.long	1014477480,1986661051
+.long	1206759142,2177026350
+.long	344077627,2456956037
+.long	1290863460,2730485921
+.long	3158454273,2820302411
+.long	3505952657,3259730800
+.long	106217008,3345764771
+.long	3606008344,3516065817
+.long	1432725776,3600352804
+.long	1467031594,4094571909
+.long	851169720,275423344
+.long	3100823752,430227734
+.long	1363258195,506948616
+.long	3750685593,659060556
+.long	3785050280,883997877
+.long	3318307427,958139571
+.long	3812723403,1322822218
+.long	2003034995,1537002063
+.long	3602036899,1747873779
+.long	1575990012,1955562222
+.long	1125592928,2024104815
+.long	2716904306,2227730452
+.long	442776044,2361852424
+.long	593698344,2428436474
+.long	3733110249,2756734187
+.long	2999351573,3204031479
+.long	3815920427,3329325298
+.long	3928383900,3391569614
+.long	566280711,3515267271
+.long	3454069534,3940187606
+.long	4000239992,4118630271
+.long	1914138554,116418474
+.long	2731055270,174292421
+.long	3203993006,289380356
+.long	320620315,460393269
+.long	587496836,685471733
+.long	1086792851,852142971
+.long	365543100,1017036298
+.long	2618297676,1126000580
+.long	3409855158,1288033470
+.long	4234509866,1501505948
+.long	987167468,1607167915
+.long	1246189591,1816402316
+.long	67438087,66051
+.long	202182159,134810123
+.size	sha512_block_data_order,.-.L_sha512_block_data_order_begin
+.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+.byte	110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte	62,0
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/vpaes-x86.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/vpaes-x86.S
new file mode 100644
index 0000000..0417b7e
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/vpaes-x86.S
@@ -0,0 +1,675 @@
+#if defined(__i386__)
+.text
+.align	64
+.L_vpaes_consts:
+.long	218628480,235210255,168496130,67568393
+.long	252381056,17041926,33884169,51187212
+.long	252645135,252645135,252645135,252645135
+.long	1512730624,3266504856,1377990664,3401244816
+.long	830229760,1275146365,2969422977,3447763452
+.long	3411033600,2979783055,338359620,2782886510
+.long	4209124096,907596821,221174255,1006095553
+.long	191964160,3799684038,3164090317,1589111125
+.long	182528256,1777043520,2877432650,3265356744
+.long	1874708224,3503451415,3305285752,363511674
+.long	1606117888,3487855781,1093350906,2384367825
+.long	197121,67569157,134941193,202313229
+.long	67569157,134941193,202313229,197121
+.long	134941193,202313229,197121,67569157
+.long	202313229,197121,67569157,134941193
+.long	33619971,100992007,168364043,235736079
+.long	235736079,33619971,100992007,168364043
+.long	168364043,235736079,33619971,100992007
+.long	100992007,168364043,235736079,33619971
+.long	50462976,117835012,185207048,252579084
+.long	252314880,51251460,117574920,184942860
+.long	184682752,252054788,50987272,118359308
+.long	118099200,185467140,251790600,50727180
+.long	2946363062,528716217,1300004225,1881839624
+.long	1532713819,1532713819,1532713819,1532713819
+.long	3602276352,4288629033,3737020424,4153884961
+.long	1354558464,32357713,2958822624,3775749553
+.long	1201988352,132424512,1572796698,503232858
+.long	2213177600,1597421020,4103937655,675398315
+.long	2749646592,4273543773,1511898873,121693092
+.long	3040248576,1103263732,2871565598,1608280554
+.long	2236667136,2588920351,482954393,64377734
+.long	3069987328,291237287,2117370568,3650299247
+.long	533321216,3573750986,2572112006,1401264716
+.long	1339849704,2721158661,548607111,3445553514
+.long	2128193280,3054596040,2183486460,1257083700
+.long	655635200,1165381986,3923443150,2344132524
+.long	190078720,256924420,290342170,357187870
+.long	1610966272,2263057382,4103205268,309794674
+.long	2592527872,2233205587,1335446729,3402964816
+.long	3973531904,3225098121,3002836325,1918774430
+.long	3870401024,2102906079,2284471353,4117666579
+.long	617007872,1021508343,366931923,691083277
+.long	2528395776,3491914898,2968704004,1613121270
+.long	3445188352,3247741094,844474987,4093578302
+.long	651481088,1190302358,1689581232,574775300
+.long	4289380608,206939853,2555985458,2489840491
+.long	2130264064,327674451,3566485037,3349835193
+.long	2470714624,316102159,3636825756,3393945945
+.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+.byte	111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
+.byte	83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
+.byte	114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
+.byte	118,101,114,115,105,116,121,41,0
+.align	64
+.hidden	_vpaes_preheat
+.type	_vpaes_preheat,@function
+.align	16
+_vpaes_preheat:
+	addl	(%esp),%ebp
+	movdqa	-48(%ebp),%xmm7
+	movdqa	-16(%ebp),%xmm6
+	ret
+.size	_vpaes_preheat,.-_vpaes_preheat
+.hidden	_vpaes_encrypt_core
+.type	_vpaes_encrypt_core,@function
+.align	16
+_vpaes_encrypt_core:
+	movl	$16,%ecx
+	movl	240(%edx),%eax
+	movdqa	%xmm6,%xmm1
+	movdqa	(%ebp),%xmm2
+	pandn	%xmm0,%xmm1
+	pand	%xmm6,%xmm0
+	movdqu	(%edx),%xmm5
+.byte	102,15,56,0,208
+	movdqa	16(%ebp),%xmm0
+	pxor	%xmm5,%xmm2
+	psrld	$4,%xmm1
+	addl	$16,%edx
+.byte	102,15,56,0,193
+	leal	192(%ebp),%ebx
+	pxor	%xmm2,%xmm0
+	jmp	.L000enc_entry
+.align	16
+.L001enc_loop:
+	movdqa	32(%ebp),%xmm4
+	movdqa	48(%ebp),%xmm0
+.byte	102,15,56,0,226
+.byte	102,15,56,0,195
+	pxor	%xmm5,%xmm4
+	movdqa	64(%ebp),%xmm5
+	pxor	%xmm4,%xmm0
+	movdqa	-64(%ebx,%ecx,1),%xmm1
+.byte	102,15,56,0,234
+	movdqa	80(%ebp),%xmm2
+	movdqa	(%ebx,%ecx,1),%xmm4
+.byte	102,15,56,0,211
+	movdqa	%xmm0,%xmm3
+	pxor	%xmm5,%xmm2
+.byte	102,15,56,0,193
+	addl	$16,%edx
+	pxor	%xmm2,%xmm0
+.byte	102,15,56,0,220
+	addl	$16,%ecx
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,193
+	andl	$48,%ecx
+	subl	$1,%eax
+	pxor	%xmm3,%xmm0
+.L000enc_entry:
+	movdqa	%xmm6,%xmm1
+	movdqa	-32(%ebp),%xmm5
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm6,%xmm0
+.byte	102,15,56,0,232
+	movdqa	%xmm7,%xmm3
+	pxor	%xmm1,%xmm0
+.byte	102,15,56,0,217
+	movdqa	%xmm7,%xmm4
+	pxor	%xmm5,%xmm3
+.byte	102,15,56,0,224
+	movdqa	%xmm7,%xmm2
+	pxor	%xmm5,%xmm4
+.byte	102,15,56,0,211
+	movdqa	%xmm7,%xmm3
+	pxor	%xmm0,%xmm2
+.byte	102,15,56,0,220
+	movdqu	(%edx),%xmm5
+	pxor	%xmm1,%xmm3
+	jnz	.L001enc_loop
+	movdqa	96(%ebp),%xmm4
+	movdqa	112(%ebp),%xmm0
+.byte	102,15,56,0,226
+	pxor	%xmm5,%xmm4
+.byte	102,15,56,0,195
+	movdqa	64(%ebx,%ecx,1),%xmm1
+	pxor	%xmm4,%xmm0
+.byte	102,15,56,0,193
+	ret
+.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
+.hidden	_vpaes_decrypt_core
+.type	_vpaes_decrypt_core,@function
+.align	16
+_vpaes_decrypt_core:
+	leal	608(%ebp),%ebx
+	movl	240(%edx),%eax
+	movdqa	%xmm6,%xmm1
+	movdqa	-64(%ebx),%xmm2
+	pandn	%xmm0,%xmm1
+	movl	%eax,%ecx
+	psrld	$4,%xmm1
+	movdqu	(%edx),%xmm5
+	shll	$4,%ecx
+	pand	%xmm6,%xmm0
+.byte	102,15,56,0,208
+	movdqa	-48(%ebx),%xmm0
+	xorl	$48,%ecx
+.byte	102,15,56,0,193
+	andl	$48,%ecx
+	pxor	%xmm5,%xmm2
+	movdqa	176(%ebp),%xmm5
+	pxor	%xmm2,%xmm0
+	addl	$16,%edx
+	leal	-352(%ebx,%ecx,1),%ecx
+	jmp	.L002dec_entry
+.align	16
+.L003dec_loop:
+	movdqa	-32(%ebx),%xmm4
+	movdqa	-16(%ebx),%xmm1
+.byte	102,15,56,0,226
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	movdqa	(%ebx),%xmm4
+	pxor	%xmm1,%xmm0
+	movdqa	16(%ebx),%xmm1
+.byte	102,15,56,0,226
+.byte	102,15,56,0,197
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	movdqa	32(%ebx),%xmm4
+	pxor	%xmm1,%xmm0
+	movdqa	48(%ebx),%xmm1
+.byte	102,15,56,0,226
+.byte	102,15,56,0,197
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	movdqa	64(%ebx),%xmm4
+	pxor	%xmm1,%xmm0
+	movdqa	80(%ebx),%xmm1
+.byte	102,15,56,0,226
+.byte	102,15,56,0,197
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	addl	$16,%edx
+.byte	102,15,58,15,237,12
+	pxor	%xmm1,%xmm0
+	subl	$1,%eax
+.L002dec_entry:
+	movdqa	%xmm6,%xmm1
+	movdqa	-32(%ebp),%xmm2
+	pandn	%xmm0,%xmm1
+	pand	%xmm6,%xmm0
+	psrld	$4,%xmm1
+.byte	102,15,56,0,208
+	movdqa	%xmm7,%xmm3
+	pxor	%xmm1,%xmm0
+.byte	102,15,56,0,217
+	movdqa	%xmm7,%xmm4
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,224
+	pxor	%xmm2,%xmm4
+	movdqa	%xmm7,%xmm2
+.byte	102,15,56,0,211
+	movdqa	%xmm7,%xmm3
+	pxor	%xmm0,%xmm2
+.byte	102,15,56,0,220
+	movdqu	(%edx),%xmm0
+	pxor	%xmm1,%xmm3
+	jnz	.L003dec_loop
+	movdqa	96(%ebx),%xmm4
+.byte	102,15,56,0,226
+	pxor	%xmm0,%xmm4
+	movdqa	112(%ebx),%xmm0
+	movdqa	(%ecx),%xmm2
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+.byte	102,15,56,0,194
+	ret
+.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core
+.hidden	_vpaes_schedule_core
+.type	_vpaes_schedule_core,@function
+.align	16
+_vpaes_schedule_core:
+	addl	(%esp),%ebp
+	movdqu	(%esi),%xmm0
+	movdqa	320(%ebp),%xmm2
+	movdqa	%xmm0,%xmm3
+	leal	(%ebp),%ebx
+	movdqa	%xmm2,4(%esp)
+	call	_vpaes_schedule_transform
+	movdqa	%xmm0,%xmm7
+	testl	%edi,%edi
+	jnz	.L004schedule_am_decrypting
+	movdqu	%xmm0,(%edx)
+	jmp	.L005schedule_go
+.L004schedule_am_decrypting:
+	movdqa	256(%ebp,%ecx,1),%xmm1
+.byte	102,15,56,0,217
+	movdqu	%xmm3,(%edx)
+	xorl	$48,%ecx
+.L005schedule_go:
+	cmpl	$192,%eax
+	ja	.L006schedule_256
+	je	.L007schedule_192
+.L008schedule_128:
+	movl	$10,%eax
+.L009loop_schedule_128:
+	call	_vpaes_schedule_round
+	decl	%eax
+	jz	.L010schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	jmp	.L009loop_schedule_128
+.align	16
+.L007schedule_192:
+	movdqu	8(%esi),%xmm0
+	call	_vpaes_schedule_transform
+	movdqa	%xmm0,%xmm6
+	pxor	%xmm4,%xmm4
+	movhlps	%xmm4,%xmm6
+	movl	$4,%eax
+.L011loop_schedule_192:
+	call	_vpaes_schedule_round
+.byte	102,15,58,15,198,8
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_round
+	decl	%eax
+	jz	.L010schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	jmp	.L011loop_schedule_192
+.align	16
+.L006schedule_256:
+	movdqu	16(%esi),%xmm0
+	call	_vpaes_schedule_transform
+	movl	$7,%eax
+.L012loop_schedule_256:
+	call	_vpaes_schedule_mangle
+	movdqa	%xmm0,%xmm6
+	call	_vpaes_schedule_round
+	decl	%eax
+	jz	.L010schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	pshufd	$255,%xmm0,%xmm0
+	movdqa	%xmm7,20(%esp)
+	movdqa	%xmm6,%xmm7
+	call	.L_vpaes_schedule_low_round
+	movdqa	20(%esp),%xmm7
+	jmp	.L012loop_schedule_256
+.align	16
+.L010schedule_mangle_last:
+	leal	384(%ebp),%ebx
+	testl	%edi,%edi
+	jnz	.L013schedule_mangle_last_dec
+	movdqa	256(%ebp,%ecx,1),%xmm1
+.byte	102,15,56,0,193
+	leal	352(%ebp),%ebx
+	addl	$32,%edx
+.L013schedule_mangle_last_dec:
+	addl	$-16,%edx
+	pxor	336(%ebp),%xmm0
+	call	_vpaes_schedule_transform
+	movdqu	%xmm0,(%edx)
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	ret
+.size	_vpaes_schedule_core,.-_vpaes_schedule_core
+.hidden	_vpaes_schedule_192_smear
+.type	_vpaes_schedule_192_smear,@function
+.align	16
+_vpaes_schedule_192_smear:
+	pshufd	$128,%xmm6,%xmm1
+	pshufd	$254,%xmm7,%xmm0
+	pxor	%xmm1,%xmm6
+	pxor	%xmm1,%xmm1
+	pxor	%xmm0,%xmm6
+	movdqa	%xmm6,%xmm0
+	movhlps	%xmm1,%xmm6
+	ret
+.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
+.hidden	_vpaes_schedule_round
+.type	_vpaes_schedule_round,@function
+.align	16
+_vpaes_schedule_round:
+	movdqa	8(%esp),%xmm2
+	pxor	%xmm1,%xmm1
+.byte	102,15,58,15,202,15
+.byte	102,15,58,15,210,15
+	pxor	%xmm1,%xmm7
+	pshufd	$255,%xmm0,%xmm0
+.byte	102,15,58,15,192,1
+	movdqa	%xmm2,8(%esp)
+.L_vpaes_schedule_low_round:
+	movdqa	%xmm7,%xmm1
+	pslldq	$4,%xmm7
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm7,%xmm1
+	pslldq	$8,%xmm7
+	pxor	%xmm1,%xmm7
+	pxor	336(%ebp),%xmm7
+	movdqa	-16(%ebp),%xmm4
+	movdqa	-48(%ebp),%xmm5
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm4,%xmm0
+	movdqa	-32(%ebp),%xmm2
+.byte	102,15,56,0,208
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm5,%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+	movdqa	%xmm5,%xmm4
+.byte	102,15,56,0,224
+	pxor	%xmm2,%xmm4
+	movdqa	%xmm5,%xmm2
+.byte	102,15,56,0,211
+	pxor	%xmm0,%xmm2
+	movdqa	%xmm5,%xmm3
+.byte	102,15,56,0,220
+	pxor	%xmm1,%xmm3
+	movdqa	32(%ebp),%xmm4
+.byte	102,15,56,0,226
+	movdqa	48(%ebp),%xmm0
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+	pxor	%xmm7,%xmm0
+	movdqa	%xmm0,%xmm7
+	ret
+.size	_vpaes_schedule_round,.-_vpaes_schedule_round
+.hidden	_vpaes_schedule_transform
+.type	_vpaes_schedule_transform,@function
+.align	16
+_vpaes_schedule_transform:
+	movdqa	-16(%ebp),%xmm2
+	movdqa	%xmm2,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm2,%xmm0
+	movdqa	(%ebx),%xmm2
+.byte	102,15,56,0,208
+	movdqa	16(%ebx),%xmm0
+.byte	102,15,56,0,193
+	pxor	%xmm2,%xmm0
+	ret
+.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
+.hidden	_vpaes_schedule_mangle
+.type	_vpaes_schedule_mangle,@function
+.align	16
+_vpaes_schedule_mangle:
+	movdqa	%xmm0,%xmm4
+	movdqa	128(%ebp),%xmm5
+	testl	%edi,%edi
+	jnz	.L014schedule_mangle_dec
+	addl	$16,%edx
+	pxor	336(%ebp),%xmm4
+.byte	102,15,56,0,229
+	movdqa	%xmm4,%xmm3
+.byte	102,15,56,0,229
+	pxor	%xmm4,%xmm3
+.byte	102,15,56,0,229
+	pxor	%xmm4,%xmm3
+	jmp	.L015schedule_mangle_both
+.align	16
+.L014schedule_mangle_dec:
+	movdqa	-16(%ebp),%xmm2
+	leal	416(%ebp),%esi
+	movdqa	%xmm2,%xmm1
+	pandn	%xmm4,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm2,%xmm4
+	movdqa	(%esi),%xmm2
+.byte	102,15,56,0,212
+	movdqa	16(%esi),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+	movdqa	32(%esi),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	48(%esi),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+	movdqa	64(%esi),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	80(%esi),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+	movdqa	96(%esi),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	112(%esi),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+	addl	$-16,%edx
+.L015schedule_mangle_both:
+	movdqa	256(%ebp,%ecx,1),%xmm1
+.byte	102,15,56,0,217
+	addl	$-16,%ecx
+	andl	$48,%ecx
+	movdqu	%xmm3,(%edx)
+	ret
+.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
+.globl	vpaes_set_encrypt_key
+.hidden	vpaes_set_encrypt_key
+.type	vpaes_set_encrypt_key,@function
+.align	16
+vpaes_set_encrypt_key:
+.L_vpaes_set_encrypt_key_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	leal	-56(%esp),%ebx
+	movl	24(%esp),%eax
+	andl	$-16,%ebx
+	movl	28(%esp),%edx
+	xchgl	%esp,%ebx
+	movl	%ebx,48(%esp)
+	movl	%eax,%ebx
+	shrl	$5,%ebx
+	addl	$5,%ebx
+	movl	%ebx,240(%edx)
+	movl	$48,%ecx
+	movl	$0,%edi
+	leal	.L_vpaes_consts+0x30-.L016pic_point,%ebp
+	call	_vpaes_schedule_core
+.L016pic_point:
+	movl	48(%esp),%esp
+	xorl	%eax,%eax
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin
+.globl	vpaes_set_decrypt_key
+.hidden	vpaes_set_decrypt_key
+.type	vpaes_set_decrypt_key,@function
+.align	16
+vpaes_set_decrypt_key:
+.L_vpaes_set_decrypt_key_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	leal	-56(%esp),%ebx
+	movl	24(%esp),%eax
+	andl	$-16,%ebx
+	movl	28(%esp),%edx
+	xchgl	%esp,%ebx
+	movl	%ebx,48(%esp)
+	movl	%eax,%ebx
+	shrl	$5,%ebx
+	addl	$5,%ebx
+	movl	%ebx,240(%edx)
+	shll	$4,%ebx
+	leal	16(%edx,%ebx,1),%edx
+	movl	$1,%edi
+	movl	%eax,%ecx
+	shrl	$1,%ecx
+	andl	$32,%ecx
+	xorl	$32,%ecx
+	leal	.L_vpaes_consts+0x30-.L017pic_point,%ebp
+	call	_vpaes_schedule_core
+.L017pic_point:
+	movl	48(%esp),%esp
+	xorl	%eax,%eax
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin
+.globl	vpaes_encrypt
+.hidden	vpaes_encrypt
+.type	vpaes_encrypt,@function
+.align	16
+vpaes_encrypt:
+.L_vpaes_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	leal	.L_vpaes_consts+0x30-.L018pic_point,%ebp
+	call	_vpaes_preheat
+.L018pic_point:
+	movl	20(%esp),%esi
+	leal	-56(%esp),%ebx
+	movl	24(%esp),%edi
+	andl	$-16,%ebx
+	movl	28(%esp),%edx
+	xchgl	%esp,%ebx
+	movl	%ebx,48(%esp)
+	movdqu	(%esi),%xmm0
+	call	_vpaes_encrypt_core
+	movdqu	%xmm0,(%edi)
+	movl	48(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	vpaes_encrypt,.-.L_vpaes_encrypt_begin
+.globl	vpaes_decrypt
+.hidden	vpaes_decrypt
+.type	vpaes_decrypt,@function
+.align	16
+vpaes_decrypt:
+.L_vpaes_decrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	leal	.L_vpaes_consts+0x30-.L019pic_point,%ebp
+	call	_vpaes_preheat
+.L019pic_point:
+	movl	20(%esp),%esi
+	leal	-56(%esp),%ebx
+	movl	24(%esp),%edi
+	andl	$-16,%ebx
+	movl	28(%esp),%edx
+	xchgl	%esp,%ebx
+	movl	%ebx,48(%esp)
+	movdqu	(%esi),%xmm0
+	call	_vpaes_decrypt_core
+	movdqu	%xmm0,(%edi)
+	movl	48(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	vpaes_decrypt,.-.L_vpaes_decrypt_begin
+.globl	vpaes_cbc_encrypt
+.hidden	vpaes_cbc_encrypt
+.type	vpaes_cbc_encrypt,@function
+.align	16
+vpaes_cbc_encrypt:
+.L_vpaes_cbc_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	subl	$16,%eax
+	jc	.L020cbc_abort
+	leal	-56(%esp),%ebx
+	movl	36(%esp),%ebp
+	andl	$-16,%ebx
+	movl	40(%esp),%ecx
+	xchgl	%esp,%ebx
+	movdqu	(%ebp),%xmm1
+	subl	%esi,%edi
+	movl	%ebx,48(%esp)
+	movl	%edi,(%esp)
+	movl	%edx,4(%esp)
+	movl	%ebp,8(%esp)
+	movl	%eax,%edi
+	leal	.L_vpaes_consts+0x30-.L021pic_point,%ebp
+	call	_vpaes_preheat
+.L021pic_point:
+	cmpl	$0,%ecx
+	je	.L022cbc_dec_loop
+	jmp	.L023cbc_enc_loop
+.align	16
+.L023cbc_enc_loop:
+	movdqu	(%esi),%xmm0
+	pxor	%xmm1,%xmm0
+	call	_vpaes_encrypt_core
+	movl	(%esp),%ebx
+	movl	4(%esp),%edx
+	movdqa	%xmm0,%xmm1
+	movdqu	%xmm0,(%ebx,%esi,1)
+	leal	16(%esi),%esi
+	subl	$16,%edi
+	jnc	.L023cbc_enc_loop
+	jmp	.L024cbc_done
+.align	16
+.L022cbc_dec_loop:
+	movdqu	(%esi),%xmm0
+	movdqa	%xmm1,16(%esp)
+	movdqa	%xmm0,32(%esp)
+	call	_vpaes_decrypt_core
+	movl	(%esp),%ebx
+	movl	4(%esp),%edx
+	pxor	16(%esp),%xmm0
+	movdqa	32(%esp),%xmm1
+	movdqu	%xmm0,(%ebx,%esi,1)
+	leal	16(%esi),%esi
+	subl	$16,%edi
+	jnc	.L022cbc_dec_loop
+.L024cbc_done:
+	movl	8(%esp),%ebx
+	movl	48(%esp),%esp
+	movdqu	%xmm1,(%ebx)
+.L020cbc_abort:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
+#endif
diff --git a/third_party/boringssl/linux-x86/crypto/fipsmodule/x86-mont.S b/third_party/boringssl/linux-x86/crypto/fipsmodule/x86-mont.S
new file mode 100644
index 0000000..3fb6688
--- /dev/null
+++ b/third_party/boringssl/linux-x86/crypto/fipsmodule/x86-mont.S
@@ -0,0 +1,475 @@
+#if defined(__i386__)
+.text
+.globl	bn_mul_mont
+.hidden	bn_mul_mont
+.type	bn_mul_mont,@function
+.align	16
+bn_mul_mont:
+.L_bn_mul_mont_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	xorl	%eax,%eax
+	movl	40(%esp),%edi
+	cmpl	$4,%edi
+	jl	.L000just_leave
+	leal	20(%esp),%esi
+	leal	24(%esp),%edx
+	addl	$2,%edi
+	negl	%edi
+	leal	-32(%esp,%edi,4),%ebp
+	negl	%edi
+	movl	%ebp,%eax
+	subl	%edx,%eax
+	andl	$2047,%eax
+	subl	%eax,%ebp
+	xorl	%ebp,%edx
+	andl	$2048,%edx
+	xorl	$2048,%edx
+	subl	%edx,%ebp
+	andl	$-64,%ebp
+	movl	%esp,%eax
+	subl	%ebp,%eax
+	andl	$-4096,%eax
+	movl	%esp,%edx
+	leal	(%ebp,%eax,1),%esp
+	movl	(%esp),%eax
+	cmpl	%ebp,%esp
+	ja	.L001page_walk
+	jmp	.L002page_walk_done
+.align	16
+.L001page_walk:
+	leal	-4096(%esp),%esp
+	movl	(%esp),%eax
+	cmpl	%ebp,%esp
+	ja	.L001page_walk
+.L002page_walk_done:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%ebp
+	movl	16(%esi),%esi
+	movl	(%esi),%esi
+	movl	%eax,4(%esp)
+	movl	%ebx,8(%esp)
+	movl	%ecx,12(%esp)
+	movl	%ebp,16(%esp)
+	movl	%esi,20(%esp)
+	leal	-3(%edi),%ebx
+	movl	%edx,24(%esp)
+	call	.L003PIC_me_up
+.L003PIC_me_up:
+	popl	%eax
+	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
+	btl	$26,(%eax)
+	jnc	.L004non_sse2
+	movl	$-1,%eax
+	movd	%eax,%mm7
+	movl	8(%esp),%esi
+	movl	12(%esp),%edi
+	movl	16(%esp),%ebp
+	xorl	%edx,%edx
+	xorl	%ecx,%ecx
+	movd	(%edi),%mm4
+	movd	(%esi),%mm5
+	movd	(%ebp),%mm3
+	pmuludq	%mm4,%mm5
+	movq	%mm5,%mm2
+	movq	%mm5,%mm0
+	pand	%mm7,%mm0
+	pmuludq	20(%esp),%mm5
+	pmuludq	%mm5,%mm3
+	paddq	%mm0,%mm3
+	movd	4(%ebp),%mm1
+	movd	4(%esi),%mm0
+	psrlq	$32,%mm2
+	psrlq	$32,%mm3
+	incl	%ecx
+.align	16
+.L0051st:
+	pmuludq	%mm4,%mm0
+	pmuludq	%mm5,%mm1
+	paddq	%mm0,%mm2
+	paddq	%mm1,%mm3
+	movq	%mm2,%mm0
+	pand	%mm7,%mm0
+	movd	4(%ebp,%ecx,4),%mm1
+	paddq	%mm0,%mm3
+	movd	4(%esi,%ecx,4),%mm0
+	psrlq	$32,%mm2
+	movd	%mm3,28(%esp,%ecx,4)
+	psrlq	$32,%mm3
+	leal	1(%ecx),%ecx
+	cmpl	%ebx,%ecx
+	jl	.L0051st
+	pmuludq	%mm4,%mm0
+	pmuludq	%mm5,%mm1
+	paddq	%mm0,%mm2
+	paddq	%mm1,%mm3
+	movq	%mm2,%mm0
+	pand	%mm7,%mm0
+	paddq	%mm0,%mm3
+	movd	%mm3,28(%esp,%ecx,4)
+	psrlq	$32,%mm2
+	psrlq	$32,%mm3
+	paddq	%mm2,%mm3
+	movq	%mm3,32(%esp,%ebx,4)
+	incl	%edx
+.L006outer:
+	xorl	%ecx,%ecx
+	movd	(%edi,%edx,4),%mm4
+	movd	(%esi),%mm5
+	movd	32(%esp),%mm6
+	movd	(%ebp),%mm3
+	pmuludq	%mm4,%mm5
+	paddq	%mm6,%mm5
+	movq	%mm5,%mm0
+	movq	%mm5,%mm2
+	pand	%mm7,%mm0
+	pmuludq	20(%esp),%mm5
+	pmuludq	%mm5,%mm3
+	paddq	%mm0,%mm3
+	movd	36(%esp),%mm6
+	movd	4(%ebp),%mm1
+	movd	4(%esi),%mm0
+	psrlq	$32,%mm2
+	psrlq	$32,%mm3
+	paddq	%mm6,%mm2
+	incl	%ecx
+	decl	%ebx
+.L007inner:
+	pmuludq	%mm4,%mm0
+	pmuludq	%mm5,%mm1
+	paddq	%mm0,%mm2
+	paddq	%mm1,%mm3
+	movq	%mm2,%mm0
+	movd	36(%esp,%ecx,4),%mm6
+	pand	%mm7,%mm0
+	movd	4(%ebp,%ecx,4),%mm1
+	paddq	%mm0,%mm3
+	movd	4(%esi,%ecx,4),%mm0
+	psrlq	$32,%mm2
+	movd	%mm3,28(%esp,%ecx,4)
+	psrlq	$32,%mm3
+	paddq	%mm6,%mm2
+	decl	%ebx
+	leal	1(%ecx),%ecx
+	jnz	.L007inner
+	movl	%ecx,%ebx
+	pmuludq	%mm4,%mm0
+	pmuludq	%mm5,%mm1
+	paddq	%mm0,%mm2
+	paddq	%mm1,%mm3
+	movq	%mm2,%mm0
+	pand	%mm7,%mm0
+	paddq	%mm0,%mm3
+	movd	%mm3,28(%esp,%ecx,4)
+	psrlq	$32,%mm2
+	psrlq	$32,%mm3
+	movd	36(%esp,%ebx,4),%mm6
+	paddq	%mm2,%mm3
+	paddq	%mm6,%mm3
+	movq	%mm3,32(%esp,%ebx,4)
+	leal	1(%edx),%edx
+	cmpl	%ebx,%edx
+	jle	.L006outer
+	emms
+	jmp	.L008common_tail
+.align	16
+.L004non_sse2:
+	movl	8(%esp),%esi
+	leal	1(%ebx),%ebp
+	movl	12(%esp),%edi
+	xorl	%ecx,%ecx
+	movl	%esi,%edx
+	andl	$1,%ebp
+	subl	%edi,%edx
+	leal	4(%edi,%ebx,4),%eax
+	orl	%edx,%ebp
+	movl	(%edi),%edi
+	jz	.L009bn_sqr_mont
+	movl	%eax,28(%esp)
+	movl	(%esi),%eax
+	xorl	%edx,%edx
+.align	16
+.L010mull:
+	movl	%edx,%ebp
+	mull	%edi
+	addl	%eax,%ebp
+	leal	1(%ecx),%ecx
+	adcl	$0,%edx
+	movl	(%esi,%ecx,4),%eax
+	cmpl	%ebx,%ecx
+	movl	%ebp,28(%esp,%ecx,4)
+	jl	.L010mull
+	movl	%edx,%ebp
+	mull	%edi
+	movl	20(%esp),%edi
+	addl	%ebp,%eax
+	movl	16(%esp),%esi
+	adcl	$0,%edx
+	imull	32(%esp),%edi
+	movl	%eax,32(%esp,%ebx,4)
+	xorl	%ecx,%ecx
+	movl	%edx,36(%esp,%ebx,4)
+	movl	%ecx,40(%esp,%ebx,4)
+	movl	(%esi),%eax
+	mull	%edi
+	addl	32(%esp),%eax
+	movl	4(%esi),%eax
+	adcl	$0,%edx
+	incl	%ecx
+	jmp	.L0112ndmadd
+.align	16
+.L0121stmadd:
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ecx,4),%ebp
+	leal	1(%ecx),%ecx
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	movl	(%esi,%ecx,4),%eax
+	adcl	$0,%edx
+	cmpl	%ebx,%ecx
+	movl	%ebp,28(%esp,%ecx,4)
+	jl	.L0121stmadd
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ebx,4),%eax
+	movl	20(%esp),%edi
+	adcl	$0,%edx
+	movl	16(%esp),%esi
+	addl	%eax,%ebp
+	adcl	$0,%edx
+	imull	32(%esp),%edi
+	xorl	%ecx,%ecx
+	addl	36(%esp,%ebx,4),%edx
+	movl	%ebp,32(%esp,%ebx,4)
+	adcl	$0,%ecx
+	movl	(%esi),%eax
+	movl	%edx,36(%esp,%ebx,4)
+	movl	%ecx,40(%esp,%ebx,4)
+	mull	%edi
+	addl	32(%esp),%eax
+	movl	4(%esi),%eax
+	adcl	$0,%edx
+	movl	$1,%ecx
+.align	16
+.L0112ndmadd:
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ecx,4),%ebp
+	leal	1(%ecx),%ecx
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	movl	(%esi,%ecx,4),%eax
+	adcl	$0,%edx
+	cmpl	%ebx,%ecx
+	movl	%ebp,24(%esp,%ecx,4)
+	jl	.L0112ndmadd
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ebx,4),%ebp
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	adcl	$0,%edx
+	movl	%ebp,28(%esp,%ebx,4)
+	xorl	%eax,%eax
+	movl	12(%esp),%ecx
+	addl	36(%esp,%ebx,4),%edx
+	adcl	40(%esp,%ebx,4),%eax
+	leal	4(%ecx),%ecx
+	movl	%edx,32(%esp,%ebx,4)
+	cmpl	28(%esp),%ecx
+	movl	%eax,36(%esp,%ebx,4)
+	je	.L008common_tail
+	movl	(%ecx),%edi
+	movl	8(%esp),%esi
+	movl	%ecx,12(%esp)
+	xorl	%ecx,%ecx
+	xorl	%edx,%edx
+	movl	(%esi),%eax
+	jmp	.L0121stmadd
+.align	16
+.L009bn_sqr_mont:
+	movl	%ebx,(%esp)
+	movl	%ecx,12(%esp)
+	movl	%edi,%eax
+	mull	%edi
+	movl	%eax,32(%esp)
+	movl	%edx,%ebx
+	shrl	$1,%edx
+	andl	$1,%ebx
+	incl	%ecx
+.align	16
+.L013sqr:
+	movl	(%esi,%ecx,4),%eax
+	movl	%edx,%ebp
+	mull	%edi
+	addl	%ebp,%eax
+	leal	1(%ecx),%ecx
+	adcl	$0,%edx
+	leal	(%ebx,%eax,2),%ebp
+	shrl	$31,%eax
+	cmpl	(%esp),%ecx
+	movl	%eax,%ebx
+	movl	%ebp,28(%esp,%ecx,4)
+	jl	.L013sqr
+	movl	(%esi,%ecx,4),%eax
+	movl	%edx,%ebp
+	mull	%edi
+	addl	%ebp,%eax
+	movl	20(%esp),%edi
+	adcl	$0,%edx
+	movl	16(%esp),%esi
+	leal	(%ebx,%eax,2),%ebp
+	imull	32(%esp),%edi
+	shrl	$31,%eax
+	movl	%ebp,32(%esp,%ecx,4)
+	leal	(%eax,%edx,2),%ebp
+	movl	(%esi),%eax
+	shrl	$31,%edx
+	movl	%ebp,36(%esp,%ecx,4)
+	movl	%edx,40(%esp,%ecx,4)
+	mull	%edi
+	addl	32(%esp),%eax
+	movl	%ecx,%ebx
+	adcl	$0,%edx
+	movl	4(%esi),%eax
+	movl	$1,%ecx
+.align	16
+.L0143rdmadd:
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ecx,4),%ebp
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	movl	4(%esi,%ecx,4),%eax
+	adcl	$0,%edx
+	movl	%ebp,28(%esp,%ecx,4)
+	movl	%edx,%ebp
+	mull	%edi
+	addl	36(%esp,%ecx,4),%ebp
+	leal	2(%ecx),%ecx
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	movl	(%esi,%ecx,4),%eax
+	adcl	$0,%edx
+	cmpl	%ebx,%ecx
+	movl	%ebp,24(%esp,%ecx,4)
+	jl	.L0143rdmadd
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ebx,4),%ebp
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	adcl	$0,%edx
+	movl	%ebp,28(%esp,%ebx,4)
+	movl	12(%esp),%ecx
+	xorl	%eax,%eax
+	movl	8(%esp),%esi
+	addl	36(%esp,%ebx,4),%edx
+	adcl	40(%esp,%ebx,4),%eax
+	movl	%edx,32(%esp,%ebx,4)
+	cmpl	%ebx,%ecx
+	movl	%eax,36(%esp,%ebx,4)
+	je	.L008common_tail
+	movl	4(%esi,%ecx,4),%edi
+	leal	1(%ecx),%ecx
+	movl	%edi,%eax
+	movl	%ecx,12(%esp)
+	mull	%edi
+	addl	32(%esp,%ecx,4),%eax
+	adcl	$0,%edx
+	movl	%eax,32(%esp,%ecx,4)
+	xorl	%ebp,%ebp
+	cmpl	%ebx,%ecx
+	leal	1(%ecx),%ecx
+	je	.L015sqrlast
+	movl	%edx,%ebx
+	shrl	$1,%edx
+	andl	$1,%ebx
+.align	16
+.L016sqradd:
+	movl	(%esi,%ecx,4),%eax
+	movl	%edx,%ebp
+	mull	%edi
+	addl	%ebp,%eax
+	leal	(%eax,%eax,1),%ebp
+	adcl	$0,%edx
+	shrl	$31,%eax
+	addl	32(%esp,%ecx,4),%ebp
+	leal	1(%ecx),%ecx
+	adcl	$0,%eax
+	addl	%ebx,%ebp
+	adcl	$0,%eax
+	cmpl	(%esp),%ecx
+	movl	%ebp,28(%esp,%ecx,4)
+	movl	%eax,%ebx
+	jle	.L016sqradd
+	movl	%edx,%ebp
+	addl	%edx,%edx
+	shrl	$31,%ebp
+	addl	%ebx,%edx
+	adcl	$0,%ebp
+.L015sqrlast:
+	movl	20(%esp),%edi
+	movl	16(%esp),%esi
+	imull	32(%esp),%edi
+	addl	32(%esp,%ecx,4),%edx
+	movl	(%esi),%eax
+	adcl	$0,%ebp
+	movl	%edx,32(%esp,%ecx,4)
+	movl	%ebp,36(%esp,%ecx,4)
+	mull	%edi
+	addl	32(%esp),%eax
+	leal	-1(%ecx),%ebx
+	adcl	$0,%edx
+	movl	$1,%ecx
+	movl	4(%esi),%eax
+	jmp	.L0143rdmadd
+.align	16
+.L008common_tail:
+	movl	16(%esp),%ebp
+	movl	4(%esp),%edi
+	leal	32(%esp),%esi
+	movl	(%esi),%eax
+	movl	%ebx,%ecx
+	xorl	%edx,%edx
+.align	16
+.L017sub:
+	sbbl	(%ebp,%edx,4),%eax
+	movl	%eax,(%edi,%edx,4)
+	decl	%ecx
+	movl	4(%esi,%edx,4),%eax
+	leal	1(%edx),%edx
+	jge	.L017sub
+	sbbl	$0,%eax
+	andl	%eax,%esi
+	notl	%eax
+	movl	%edi,%ebp
+	andl	%eax,%ebp
+	orl	%ebp,%esi
+.align	16
+.L018copy:
+	movl	(%esi,%ebx,4),%eax
+	movl	%eax,(%edi,%ebx,4)
+	movl	%ecx,32(%esp,%ebx,4)
+	decl	%ebx
+	jge	.L018copy
+	movl	24(%esp),%esp
+	movl	$1,%eax
+.L000just_leave:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_mul_mont,.-.L_bn_mul_mont_begin
+.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
+.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+.byte	111,114,103,62,0
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/chacha/chacha-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/chacha/chacha-x86_64.S
new file mode 100644
index 0000000..62dc779
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/chacha/chacha-x86_64.S
@@ -0,0 +1,1586 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+
+.align	64
+.Lzero:
+.long	0,0,0,0
+.Lone:
+.long	1,0,0,0
+.Linc:
+.long	0,1,2,3
+.Lfour:
+.long	4,4,4,4
+.Lincy:
+.long	0,2,4,6,1,3,5,7
+.Leight:
+.long	8,8,8,8,8,8,8,8
+.Lrot16:
+.byte	0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd
+.Lrot24:
+.byte	0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe
+.Lsigma:
+.byte	101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0
+.align	64
+.Lzeroz:
+.long	0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0
+.Lfourz:
+.long	4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0
+.Lincz:
+.long	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+.Lsixteen:
+.long	16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
+.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.globl	ChaCha20_ctr32
+.hidden ChaCha20_ctr32
+.type	ChaCha20_ctr32,@function
+.align	64
+ChaCha20_ctr32:
+	cmpq	$0,%rdx
+	je	.Lno_data
+	movq	OPENSSL_ia32cap_P+4(%rip),%r10
+	testl	$512,%r10d
+	jnz	.LChaCha20_ssse3
+
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$64+24,%rsp
+.Lctr32_body:
+
+
+	movdqu	(%rcx),%xmm1
+	movdqu	16(%rcx),%xmm2
+	movdqu	(%r8),%xmm3
+	movdqa	.Lone(%rip),%xmm4
+
+
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	movq	%rdx,%rbp
+	jmp	.Loop_outer
+
+.align	32
+.Loop_outer:
+	movl	$0x61707865,%eax
+	movl	$0x3320646e,%ebx
+	movl	$0x79622d32,%ecx
+	movl	$0x6b206574,%edx
+	movl	16(%rsp),%r8d
+	movl	20(%rsp),%r9d
+	movl	24(%rsp),%r10d
+	movl	28(%rsp),%r11d
+	movd	%xmm3,%r12d
+	movl	52(%rsp),%r13d
+	movl	56(%rsp),%r14d
+	movl	60(%rsp),%r15d
+
+	movq	%rbp,64+0(%rsp)
+	movl	$10,%ebp
+	movq	%rsi,64+8(%rsp)
+.byte	102,72,15,126,214
+	movq	%rdi,64+16(%rsp)
+	movq	%rsi,%rdi
+	shrq	$32,%rdi
+	jmp	.Loop
+
+.align	32
+.Loop:
+	addl	%r8d,%eax
+	xorl	%eax,%r12d
+	roll	$16,%r12d
+	addl	%r9d,%ebx
+	xorl	%ebx,%r13d
+	roll	$16,%r13d
+	addl	%r12d,%esi
+	xorl	%esi,%r8d
+	roll	$12,%r8d
+	addl	%r13d,%edi
+	xorl	%edi,%r9d
+	roll	$12,%r9d
+	addl	%r8d,%eax
+	xorl	%eax,%r12d
+	roll	$8,%r12d
+	addl	%r9d,%ebx
+	xorl	%ebx,%r13d
+	roll	$8,%r13d
+	addl	%r12d,%esi
+	xorl	%esi,%r8d
+	roll	$7,%r8d
+	addl	%r13d,%edi
+	xorl	%edi,%r9d
+	roll	$7,%r9d
+	movl	%esi,32(%rsp)
+	movl	%edi,36(%rsp)
+	movl	40(%rsp),%esi
+	movl	44(%rsp),%edi
+	addl	%r10d,%ecx
+	xorl	%ecx,%r14d
+	roll	$16,%r14d
+	addl	%r11d,%edx
+	xorl	%edx,%r15d
+	roll	$16,%r15d
+	addl	%r14d,%esi
+	xorl	%esi,%r10d
+	roll	$12,%r10d
+	addl	%r15d,%edi
+	xorl	%edi,%r11d
+	roll	$12,%r11d
+	addl	%r10d,%ecx
+	xorl	%ecx,%r14d
+	roll	$8,%r14d
+	addl	%r11d,%edx
+	xorl	%edx,%r15d
+	roll	$8,%r15d
+	addl	%r14d,%esi
+	xorl	%esi,%r10d
+	roll	$7,%r10d
+	addl	%r15d,%edi
+	xorl	%edi,%r11d
+	roll	$7,%r11d
+	addl	%r9d,%eax
+	xorl	%eax,%r15d
+	roll	$16,%r15d
+	addl	%r10d,%ebx
+	xorl	%ebx,%r12d
+	roll	$16,%r12d
+	addl	%r15d,%esi
+	xorl	%esi,%r9d
+	roll	$12,%r9d
+	addl	%r12d,%edi
+	xorl	%edi,%r10d
+	roll	$12,%r10d
+	addl	%r9d,%eax
+	xorl	%eax,%r15d
+	roll	$8,%r15d
+	addl	%r10d,%ebx
+	xorl	%ebx,%r12d
+	roll	$8,%r12d
+	addl	%r15d,%esi
+	xorl	%esi,%r9d
+	roll	$7,%r9d
+	addl	%r12d,%edi
+	xorl	%edi,%r10d
+	roll	$7,%r10d
+	movl	%esi,40(%rsp)
+	movl	%edi,44(%rsp)
+	movl	32(%rsp),%esi
+	movl	36(%rsp),%edi
+	addl	%r11d,%ecx
+	xorl	%ecx,%r13d
+	roll	$16,%r13d
+	addl	%r8d,%edx
+	xorl	%edx,%r14d
+	roll	$16,%r14d
+	addl	%r13d,%esi
+	xorl	%esi,%r11d
+	roll	$12,%r11d
+	addl	%r14d,%edi
+	xorl	%edi,%r8d
+	roll	$12,%r8d
+	addl	%r11d,%ecx
+	xorl	%ecx,%r13d
+	roll	$8,%r13d
+	addl	%r8d,%edx
+	xorl	%edx,%r14d
+	roll	$8,%r14d
+	addl	%r13d,%esi
+	xorl	%esi,%r11d
+	roll	$7,%r11d
+	addl	%r14d,%edi
+	xorl	%edi,%r8d
+	roll	$7,%r8d
+	decl	%ebp
+	jnz	.Loop
+	movl	%edi,36(%rsp)
+	movl	%esi,32(%rsp)
+	movq	64(%rsp),%rbp
+	movdqa	%xmm2,%xmm1
+	movq	64+8(%rsp),%rsi
+	paddd	%xmm4,%xmm3
+	movq	64+16(%rsp),%rdi
+
+	addl	$0x61707865,%eax
+	addl	$0x3320646e,%ebx
+	addl	$0x79622d32,%ecx
+	addl	$0x6b206574,%edx
+	addl	16(%rsp),%r8d
+	addl	20(%rsp),%r9d
+	addl	24(%rsp),%r10d
+	addl	28(%rsp),%r11d
+	addl	48(%rsp),%r12d
+	addl	52(%rsp),%r13d
+	addl	56(%rsp),%r14d
+	addl	60(%rsp),%r15d
+	paddd	32(%rsp),%xmm1
+
+	cmpq	$64,%rbp
+	jb	.Ltail
+
+	xorl	0(%rsi),%eax
+	xorl	4(%rsi),%ebx
+	xorl	8(%rsi),%ecx
+	xorl	12(%rsi),%edx
+	xorl	16(%rsi),%r8d
+	xorl	20(%rsi),%r9d
+	xorl	24(%rsi),%r10d
+	xorl	28(%rsi),%r11d
+	movdqu	32(%rsi),%xmm0
+	xorl	48(%rsi),%r12d
+	xorl	52(%rsi),%r13d
+	xorl	56(%rsi),%r14d
+	xorl	60(%rsi),%r15d
+	leaq	64(%rsi),%rsi
+	pxor	%xmm1,%xmm0
+
+	movdqa	%xmm2,32(%rsp)
+	movd	%xmm3,48(%rsp)
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	movdqu	%xmm0,32(%rdi)
+	movl	%r12d,48(%rdi)
+	movl	%r13d,52(%rdi)
+	movl	%r14d,56(%rdi)
+	movl	%r15d,60(%rdi)
+	leaq	64(%rdi),%rdi
+
+	subq	$64,%rbp
+	jnz	.Loop_outer
+
+	jmp	.Ldone
+
+.align	16
+.Ltail:
+	movl	%eax,0(%rsp)
+	movl	%ebx,4(%rsp)
+	xorq	%rbx,%rbx
+	movl	%ecx,8(%rsp)
+	movl	%edx,12(%rsp)
+	movl	%r8d,16(%rsp)
+	movl	%r9d,20(%rsp)
+	movl	%r10d,24(%rsp)
+	movl	%r11d,28(%rsp)
+	movdqa	%xmm1,32(%rsp)
+	movl	%r12d,48(%rsp)
+	movl	%r13d,52(%rsp)
+	movl	%r14d,56(%rsp)
+	movl	%r15d,60(%rsp)
+
+.Loop_tail:
+	movzbl	(%rsi,%rbx,1),%eax
+	movzbl	(%rsp,%rbx,1),%edx
+	leaq	1(%rbx),%rbx
+	xorl	%edx,%eax
+	movb	%al,-1(%rdi,%rbx,1)
+	decq	%rbp
+	jnz	.Loop_tail
+
+.Ldone:
+	leaq	64+24+48(%rsp),%rsi
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Lno_data:
+	.byte	0xf3,0xc3
+.size	ChaCha20_ctr32,.-ChaCha20_ctr32
+.type	ChaCha20_ssse3,@function
+.align	32
+ChaCha20_ssse3:
+.LChaCha20_ssse3:
+	movq	%rsp,%r9
+	cmpq	$128,%rdx
+	ja	.LChaCha20_4x
+
+.Ldo_sse3_after_all:
+	subq	$64+8,%rsp
+	movdqa	.Lsigma(%rip),%xmm0
+	movdqu	(%rcx),%xmm1
+	movdqu	16(%rcx),%xmm2
+	movdqu	(%r8),%xmm3
+	movdqa	.Lrot16(%rip),%xmm6
+	movdqa	.Lrot24(%rip),%xmm7
+
+	movdqa	%xmm0,0(%rsp)
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	movq	$10,%r8
+	jmp	.Loop_ssse3
+
+.align	32
+.Loop_outer_ssse3:
+	movdqa	.Lone(%rip),%xmm3
+	movdqa	0(%rsp),%xmm0
+	movdqa	16(%rsp),%xmm1
+	movdqa	32(%rsp),%xmm2
+	paddd	48(%rsp),%xmm3
+	movq	$10,%r8
+	movdqa	%xmm3,48(%rsp)
+	jmp	.Loop_ssse3
+
+.align	32
+.Loop_ssse3:
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,222
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,223
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$57,%xmm1,%xmm1
+	pshufd	$147,%xmm3,%xmm3
+	nop
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,222
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,223
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$147,%xmm1,%xmm1
+	pshufd	$57,%xmm3,%xmm3
+	decq	%r8
+	jnz	.Loop_ssse3
+	paddd	0(%rsp),%xmm0
+	paddd	16(%rsp),%xmm1
+	paddd	32(%rsp),%xmm2
+	paddd	48(%rsp),%xmm3
+
+	cmpq	$64,%rdx
+	jb	.Ltail_ssse3
+
+	movdqu	0(%rsi),%xmm4
+	movdqu	16(%rsi),%xmm5
+	pxor	%xmm4,%xmm0
+	movdqu	32(%rsi),%xmm4
+	pxor	%xmm5,%xmm1
+	movdqu	48(%rsi),%xmm5
+	leaq	64(%rsi),%rsi
+	pxor	%xmm4,%xmm2
+	pxor	%xmm5,%xmm3
+
+	movdqu	%xmm0,0(%rdi)
+	movdqu	%xmm1,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm3,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	subq	$64,%rdx
+	jnz	.Loop_outer_ssse3
+
+	jmp	.Ldone_ssse3
+
+.align	16
+.Ltail_ssse3:
+	movdqa	%xmm0,0(%rsp)
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	xorq	%r8,%r8
+
+.Loop_tail_ssse3:
+	movzbl	(%rsi,%r8,1),%eax
+	movzbl	(%rsp,%r8,1),%ecx
+	leaq	1(%r8),%r8
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r8,1)
+	decq	%rdx
+	jnz	.Loop_tail_ssse3
+
+.Ldone_ssse3:
+	leaq	(%r9),%rsp
+.Lssse3_epilogue:
+	.byte	0xf3,0xc3
+.size	ChaCha20_ssse3,.-ChaCha20_ssse3
+.type	ChaCha20_4x,@function
+.align	32
+ChaCha20_4x:
+.LChaCha20_4x:
+	movq	%rsp,%r9
+	movq	%r10,%r11
+	shrq	$32,%r10
+	testq	$32,%r10
+	jnz	.LChaCha20_8x
+	cmpq	$192,%rdx
+	ja	.Lproceed4x
+
+	andq	$71303168,%r11
+	cmpq	$4194304,%r11
+	je	.Ldo_sse3_after_all
+
+.Lproceed4x:
+	subq	$0x140+8,%rsp
+	movdqa	.Lsigma(%rip),%xmm11
+	movdqu	(%rcx),%xmm15
+	movdqu	16(%rcx),%xmm7
+	movdqu	(%r8),%xmm3
+	leaq	256(%rsp),%rcx
+	leaq	.Lrot16(%rip),%r10
+	leaq	.Lrot24(%rip),%r11
+
+	pshufd	$0x00,%xmm11,%xmm8
+	pshufd	$0x55,%xmm11,%xmm9
+	movdqa	%xmm8,64(%rsp)
+	pshufd	$0xaa,%xmm11,%xmm10
+	movdqa	%xmm9,80(%rsp)
+	pshufd	$0xff,%xmm11,%xmm11
+	movdqa	%xmm10,96(%rsp)
+	movdqa	%xmm11,112(%rsp)
+
+	pshufd	$0x00,%xmm15,%xmm12
+	pshufd	$0x55,%xmm15,%xmm13
+	movdqa	%xmm12,128-256(%rcx)
+	pshufd	$0xaa,%xmm15,%xmm14
+	movdqa	%xmm13,144-256(%rcx)
+	pshufd	$0xff,%xmm15,%xmm15
+	movdqa	%xmm14,160-256(%rcx)
+	movdqa	%xmm15,176-256(%rcx)
+
+	pshufd	$0x00,%xmm7,%xmm4
+	pshufd	$0x55,%xmm7,%xmm5
+	movdqa	%xmm4,192-256(%rcx)
+	pshufd	$0xaa,%xmm7,%xmm6
+	movdqa	%xmm5,208-256(%rcx)
+	pshufd	$0xff,%xmm7,%xmm7
+	movdqa	%xmm6,224-256(%rcx)
+	movdqa	%xmm7,240-256(%rcx)
+
+	pshufd	$0x00,%xmm3,%xmm0
+	pshufd	$0x55,%xmm3,%xmm1
+	paddd	.Linc(%rip),%xmm0
+	pshufd	$0xaa,%xmm3,%xmm2
+	movdqa	%xmm1,272-256(%rcx)
+	pshufd	$0xff,%xmm3,%xmm3
+	movdqa	%xmm2,288-256(%rcx)
+	movdqa	%xmm3,304-256(%rcx)
+
+	jmp	.Loop_enter4x
+
+.align	32
+.Loop_outer4x:
+	movdqa	64(%rsp),%xmm8
+	movdqa	80(%rsp),%xmm9
+	movdqa	96(%rsp),%xmm10
+	movdqa	112(%rsp),%xmm11
+	movdqa	128-256(%rcx),%xmm12
+	movdqa	144-256(%rcx),%xmm13
+	movdqa	160-256(%rcx),%xmm14
+	movdqa	176-256(%rcx),%xmm15
+	movdqa	192-256(%rcx),%xmm4
+	movdqa	208-256(%rcx),%xmm5
+	movdqa	224-256(%rcx),%xmm6
+	movdqa	240-256(%rcx),%xmm7
+	movdqa	256-256(%rcx),%xmm0
+	movdqa	272-256(%rcx),%xmm1
+	movdqa	288-256(%rcx),%xmm2
+	movdqa	304-256(%rcx),%xmm3
+	paddd	.Lfour(%rip),%xmm0
+
+.Loop_enter4x:
+	movdqa	%xmm6,32(%rsp)
+	movdqa	%xmm7,48(%rsp)
+	movdqa	(%r10),%xmm7
+	movl	$10,%eax
+	movdqa	%xmm0,256-256(%rcx)
+	jmp	.Loop4x
+
+.align	32
+.Loop4x:
+	paddd	%xmm12,%xmm8
+	paddd	%xmm13,%xmm9
+	pxor	%xmm8,%xmm0
+	pxor	%xmm9,%xmm1
+.byte	102,15,56,0,199
+.byte	102,15,56,0,207
+	paddd	%xmm0,%xmm4
+	paddd	%xmm1,%xmm5
+	pxor	%xmm4,%xmm12
+	pxor	%xmm5,%xmm13
+	movdqa	%xmm12,%xmm6
+	pslld	$12,%xmm12
+	psrld	$20,%xmm6
+	movdqa	%xmm13,%xmm7
+	pslld	$12,%xmm13
+	por	%xmm6,%xmm12
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm13
+	paddd	%xmm12,%xmm8
+	paddd	%xmm13,%xmm9
+	pxor	%xmm8,%xmm0
+	pxor	%xmm9,%xmm1
+.byte	102,15,56,0,198
+.byte	102,15,56,0,206
+	paddd	%xmm0,%xmm4
+	paddd	%xmm1,%xmm5
+	pxor	%xmm4,%xmm12
+	pxor	%xmm5,%xmm13
+	movdqa	%xmm12,%xmm7
+	pslld	$7,%xmm12
+	psrld	$25,%xmm7
+	movdqa	%xmm13,%xmm6
+	pslld	$7,%xmm13
+	por	%xmm7,%xmm12
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm13
+	movdqa	%xmm4,0(%rsp)
+	movdqa	%xmm5,16(%rsp)
+	movdqa	32(%rsp),%xmm4
+	movdqa	48(%rsp),%xmm5
+	paddd	%xmm14,%xmm10
+	paddd	%xmm15,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm3
+.byte	102,15,56,0,215
+.byte	102,15,56,0,223
+	paddd	%xmm2,%xmm4
+	paddd	%xmm3,%xmm5
+	pxor	%xmm4,%xmm14
+	pxor	%xmm5,%xmm15
+	movdqa	%xmm14,%xmm6
+	pslld	$12,%xmm14
+	psrld	$20,%xmm6
+	movdqa	%xmm15,%xmm7
+	pslld	$12,%xmm15
+	por	%xmm6,%xmm14
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm15
+	paddd	%xmm14,%xmm10
+	paddd	%xmm15,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm3
+.byte	102,15,56,0,214
+.byte	102,15,56,0,222
+	paddd	%xmm2,%xmm4
+	paddd	%xmm3,%xmm5
+	pxor	%xmm4,%xmm14
+	pxor	%xmm5,%xmm15
+	movdqa	%xmm14,%xmm7
+	pslld	$7,%xmm14
+	psrld	$25,%xmm7
+	movdqa	%xmm15,%xmm6
+	pslld	$7,%xmm15
+	por	%xmm7,%xmm14
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm15
+	paddd	%xmm13,%xmm8
+	paddd	%xmm14,%xmm9
+	pxor	%xmm8,%xmm3
+	pxor	%xmm9,%xmm0
+.byte	102,15,56,0,223
+.byte	102,15,56,0,199
+	paddd	%xmm3,%xmm4
+	paddd	%xmm0,%xmm5
+	pxor	%xmm4,%xmm13
+	pxor	%xmm5,%xmm14
+	movdqa	%xmm13,%xmm6
+	pslld	$12,%xmm13
+	psrld	$20,%xmm6
+	movdqa	%xmm14,%xmm7
+	pslld	$12,%xmm14
+	por	%xmm6,%xmm13
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm14
+	paddd	%xmm13,%xmm8
+	paddd	%xmm14,%xmm9
+	pxor	%xmm8,%xmm3
+	pxor	%xmm9,%xmm0
+.byte	102,15,56,0,222
+.byte	102,15,56,0,198
+	paddd	%xmm3,%xmm4
+	paddd	%xmm0,%xmm5
+	pxor	%xmm4,%xmm13
+	pxor	%xmm5,%xmm14
+	movdqa	%xmm13,%xmm7
+	pslld	$7,%xmm13
+	psrld	$25,%xmm7
+	movdqa	%xmm14,%xmm6
+	pslld	$7,%xmm14
+	por	%xmm7,%xmm13
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm14
+	movdqa	%xmm4,32(%rsp)
+	movdqa	%xmm5,48(%rsp)
+	movdqa	0(%rsp),%xmm4
+	movdqa	16(%rsp),%xmm5
+	paddd	%xmm15,%xmm10
+	paddd	%xmm12,%xmm11
+	pxor	%xmm10,%xmm1
+	pxor	%xmm11,%xmm2
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	paddd	%xmm1,%xmm4
+	paddd	%xmm2,%xmm5
+	pxor	%xmm4,%xmm15
+	pxor	%xmm5,%xmm12
+	movdqa	%xmm15,%xmm6
+	pslld	$12,%xmm15
+	psrld	$20,%xmm6
+	movdqa	%xmm12,%xmm7
+	pslld	$12,%xmm12
+	por	%xmm6,%xmm15
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm12
+	paddd	%xmm15,%xmm10
+	paddd	%xmm12,%xmm11
+	pxor	%xmm10,%xmm1
+	pxor	%xmm11,%xmm2
+.byte	102,15,56,0,206
+.byte	102,15,56,0,214
+	paddd	%xmm1,%xmm4
+	paddd	%xmm2,%xmm5
+	pxor	%xmm4,%xmm15
+	pxor	%xmm5,%xmm12
+	movdqa	%xmm15,%xmm7
+	pslld	$7,%xmm15
+	psrld	$25,%xmm7
+	movdqa	%xmm12,%xmm6
+	pslld	$7,%xmm12
+	por	%xmm7,%xmm15
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm12
+	decl	%eax
+	jnz	.Loop4x
+
+	paddd	64(%rsp),%xmm8
+	paddd	80(%rsp),%xmm9
+	paddd	96(%rsp),%xmm10
+	paddd	112(%rsp),%xmm11
+
+	movdqa	%xmm8,%xmm6
+	punpckldq	%xmm9,%xmm8
+	movdqa	%xmm10,%xmm7
+	punpckldq	%xmm11,%xmm10
+	punpckhdq	%xmm9,%xmm6
+	punpckhdq	%xmm11,%xmm7
+	movdqa	%xmm8,%xmm9
+	punpcklqdq	%xmm10,%xmm8
+	movdqa	%xmm6,%xmm11
+	punpcklqdq	%xmm7,%xmm6
+	punpckhqdq	%xmm10,%xmm9
+	punpckhqdq	%xmm7,%xmm11
+	paddd	128-256(%rcx),%xmm12
+	paddd	144-256(%rcx),%xmm13
+	paddd	160-256(%rcx),%xmm14
+	paddd	176-256(%rcx),%xmm15
+
+	movdqa	%xmm8,0(%rsp)
+	movdqa	%xmm9,16(%rsp)
+	movdqa	32(%rsp),%xmm8
+	movdqa	48(%rsp),%xmm9
+
+	movdqa	%xmm12,%xmm10
+	punpckldq	%xmm13,%xmm12
+	movdqa	%xmm14,%xmm7
+	punpckldq	%xmm15,%xmm14
+	punpckhdq	%xmm13,%xmm10
+	punpckhdq	%xmm15,%xmm7
+	movdqa	%xmm12,%xmm13
+	punpcklqdq	%xmm14,%xmm12
+	movdqa	%xmm10,%xmm15
+	punpcklqdq	%xmm7,%xmm10
+	punpckhqdq	%xmm14,%xmm13
+	punpckhqdq	%xmm7,%xmm15
+	paddd	192-256(%rcx),%xmm4
+	paddd	208-256(%rcx),%xmm5
+	paddd	224-256(%rcx),%xmm8
+	paddd	240-256(%rcx),%xmm9
+
+	movdqa	%xmm6,32(%rsp)
+	movdqa	%xmm11,48(%rsp)
+
+	movdqa	%xmm4,%xmm14
+	punpckldq	%xmm5,%xmm4
+	movdqa	%xmm8,%xmm7
+	punpckldq	%xmm9,%xmm8
+	punpckhdq	%xmm5,%xmm14
+	punpckhdq	%xmm9,%xmm7
+	movdqa	%xmm4,%xmm5
+	punpcklqdq	%xmm8,%xmm4
+	movdqa	%xmm14,%xmm9
+	punpcklqdq	%xmm7,%xmm14
+	punpckhqdq	%xmm8,%xmm5
+	punpckhqdq	%xmm7,%xmm9
+	paddd	256-256(%rcx),%xmm0
+	paddd	272-256(%rcx),%xmm1
+	paddd	288-256(%rcx),%xmm2
+	paddd	304-256(%rcx),%xmm3
+
+	movdqa	%xmm0,%xmm8
+	punpckldq	%xmm1,%xmm0
+	movdqa	%xmm2,%xmm7
+	punpckldq	%xmm3,%xmm2
+	punpckhdq	%xmm1,%xmm8
+	punpckhdq	%xmm3,%xmm7
+	movdqa	%xmm0,%xmm1
+	punpcklqdq	%xmm2,%xmm0
+	movdqa	%xmm8,%xmm3
+	punpcklqdq	%xmm7,%xmm8
+	punpckhqdq	%xmm2,%xmm1
+	punpckhqdq	%xmm7,%xmm3
+	cmpq	$256,%rdx
+	jb	.Ltail4x
+
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+
+	movdqu	%xmm6,64(%rdi)
+	movdqu	0(%rsi),%xmm6
+	movdqu	%xmm11,80(%rdi)
+	movdqu	16(%rsi),%xmm11
+	movdqu	%xmm2,96(%rdi)
+	movdqu	32(%rsi),%xmm2
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+	movdqu	48(%rsi),%xmm7
+	pxor	32(%rsp),%xmm6
+	pxor	%xmm10,%xmm11
+	pxor	%xmm14,%xmm2
+	pxor	%xmm8,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	48(%rsp),%xmm6
+	pxor	%xmm15,%xmm11
+	pxor	%xmm9,%xmm2
+	pxor	%xmm3,%xmm7
+	movdqu	%xmm6,64(%rdi)
+	movdqu	%xmm11,80(%rdi)
+	movdqu	%xmm2,96(%rdi)
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+
+	subq	$256,%rdx
+	jnz	.Loop_outer4x
+
+	jmp	.Ldone4x
+
+.Ltail4x:
+	cmpq	$192,%rdx
+	jae	.L192_or_more4x
+	cmpq	$128,%rdx
+	jae	.L128_or_more4x
+	cmpq	$64,%rdx
+	jae	.L64_or_more4x
+
+
+	xorq	%r10,%r10
+
+	movdqa	%xmm12,16(%rsp)
+	movdqa	%xmm4,32(%rsp)
+	movdqa	%xmm0,48(%rsp)
+	jmp	.Loop_tail4x
+
+.align	32
+.L64_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+	movdqu	%xmm6,0(%rdi)
+	movdqu	%xmm11,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm7,48(%rdi)
+	je	.Ldone4x
+
+	movdqa	16(%rsp),%xmm6
+	leaq	64(%rsi),%rsi
+	xorq	%r10,%r10
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm13,16(%rsp)
+	leaq	64(%rdi),%rdi
+	movdqa	%xmm5,32(%rsp)
+	subq	$64,%rdx
+	movdqa	%xmm1,48(%rsp)
+	jmp	.Loop_tail4x
+
+.align	32
+.L128_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+	movdqu	%xmm6,64(%rdi)
+	movdqu	%xmm11,80(%rdi)
+	movdqu	%xmm2,96(%rdi)
+	movdqu	%xmm7,112(%rdi)
+	je	.Ldone4x
+
+	movdqa	32(%rsp),%xmm6
+	leaq	128(%rsi),%rsi
+	xorq	%r10,%r10
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm10,16(%rsp)
+	leaq	128(%rdi),%rdi
+	movdqa	%xmm14,32(%rsp)
+	subq	$128,%rdx
+	movdqa	%xmm8,48(%rsp)
+	jmp	.Loop_tail4x
+
+.align	32
+.L192_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+
+	movdqu	%xmm6,64(%rdi)
+	movdqu	0(%rsi),%xmm6
+	movdqu	%xmm11,80(%rdi)
+	movdqu	16(%rsi),%xmm11
+	movdqu	%xmm2,96(%rdi)
+	movdqu	32(%rsi),%xmm2
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+	movdqu	48(%rsi),%xmm7
+	pxor	32(%rsp),%xmm6
+	pxor	%xmm10,%xmm11
+	pxor	%xmm14,%xmm2
+	pxor	%xmm8,%xmm7
+	movdqu	%xmm6,0(%rdi)
+	movdqu	%xmm11,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm7,48(%rdi)
+	je	.Ldone4x
+
+	movdqa	48(%rsp),%xmm6
+	leaq	64(%rsi),%rsi
+	xorq	%r10,%r10
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm15,16(%rsp)
+	leaq	64(%rdi),%rdi
+	movdqa	%xmm9,32(%rsp)
+	subq	$192,%rdx
+	movdqa	%xmm3,48(%rsp)
+
+.Loop_tail4x:
+	movzbl	(%rsi,%r10,1),%eax
+	movzbl	(%rsp,%r10,1),%ecx
+	leaq	1(%r10),%r10
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r10,1)
+	decq	%rdx
+	jnz	.Loop_tail4x
+
+.Ldone4x:
+	leaq	(%r9),%rsp
+.L4x_epilogue:
+	.byte	0xf3,0xc3
+.size	ChaCha20_4x,.-ChaCha20_4x
+.type	ChaCha20_8x,@function
+.align	32
+ChaCha20_8x:
+.LChaCha20_8x:
+	movq	%rsp,%r9
+	subq	$0x280+8,%rsp
+	andq	$-32,%rsp
+	vzeroupper
+
+
+
+
+
+
+
+
+
+
+	vbroadcasti128	.Lsigma(%rip),%ymm11
+	vbroadcasti128	(%rcx),%ymm3
+	vbroadcasti128	16(%rcx),%ymm15
+	vbroadcasti128	(%r8),%ymm7
+	leaq	256(%rsp),%rcx
+	leaq	512(%rsp),%rax
+	leaq	.Lrot16(%rip),%r10
+	leaq	.Lrot24(%rip),%r11
+
+	vpshufd	$0x00,%ymm11,%ymm8
+	vpshufd	$0x55,%ymm11,%ymm9
+	vmovdqa	%ymm8,128-256(%rcx)
+	vpshufd	$0xaa,%ymm11,%ymm10
+	vmovdqa	%ymm9,160-256(%rcx)
+	vpshufd	$0xff,%ymm11,%ymm11
+	vmovdqa	%ymm10,192-256(%rcx)
+	vmovdqa	%ymm11,224-256(%rcx)
+
+	vpshufd	$0x00,%ymm3,%ymm0
+	vpshufd	$0x55,%ymm3,%ymm1
+	vmovdqa	%ymm0,256-256(%rcx)
+	vpshufd	$0xaa,%ymm3,%ymm2
+	vmovdqa	%ymm1,288-256(%rcx)
+	vpshufd	$0xff,%ymm3,%ymm3
+	vmovdqa	%ymm2,320-256(%rcx)
+	vmovdqa	%ymm3,352-256(%rcx)
+
+	vpshufd	$0x00,%ymm15,%ymm12
+	vpshufd	$0x55,%ymm15,%ymm13
+	vmovdqa	%ymm12,384-512(%rax)
+	vpshufd	$0xaa,%ymm15,%ymm14
+	vmovdqa	%ymm13,416-512(%rax)
+	vpshufd	$0xff,%ymm15,%ymm15
+	vmovdqa	%ymm14,448-512(%rax)
+	vmovdqa	%ymm15,480-512(%rax)
+
+	vpshufd	$0x00,%ymm7,%ymm4
+	vpshufd	$0x55,%ymm7,%ymm5
+	vpaddd	.Lincy(%rip),%ymm4,%ymm4
+	vpshufd	$0xaa,%ymm7,%ymm6
+	vmovdqa	%ymm5,544-512(%rax)
+	vpshufd	$0xff,%ymm7,%ymm7
+	vmovdqa	%ymm6,576-512(%rax)
+	vmovdqa	%ymm7,608-512(%rax)
+
+	jmp	.Loop_enter8x
+
+.align	32
+.Loop_outer8x:
+	vmovdqa	128-256(%rcx),%ymm8
+	vmovdqa	160-256(%rcx),%ymm9
+	vmovdqa	192-256(%rcx),%ymm10
+	vmovdqa	224-256(%rcx),%ymm11
+	vmovdqa	256-256(%rcx),%ymm0
+	vmovdqa	288-256(%rcx),%ymm1
+	vmovdqa	320-256(%rcx),%ymm2
+	vmovdqa	352-256(%rcx),%ymm3
+	vmovdqa	384-512(%rax),%ymm12
+	vmovdqa	416-512(%rax),%ymm13
+	vmovdqa	448-512(%rax),%ymm14
+	vmovdqa	480-512(%rax),%ymm15
+	vmovdqa	512-512(%rax),%ymm4
+	vmovdqa	544-512(%rax),%ymm5
+	vmovdqa	576-512(%rax),%ymm6
+	vmovdqa	608-512(%rax),%ymm7
+	vpaddd	.Leight(%rip),%ymm4,%ymm4
+
+.Loop_enter8x:
+	vmovdqa	%ymm14,64(%rsp)
+	vmovdqa	%ymm15,96(%rsp)
+	vbroadcasti128	(%r10),%ymm15
+	vmovdqa	%ymm4,512-512(%rax)
+	movl	$10,%eax
+	jmp	.Loop8x
+
+.align	32
+.Loop8x:
+	vpaddd	%ymm0,%ymm8,%ymm8
+	vpxor	%ymm4,%ymm8,%ymm4
+	vpshufb	%ymm15,%ymm4,%ymm4
+	vpaddd	%ymm1,%ymm9,%ymm9
+	vpxor	%ymm5,%ymm9,%ymm5
+	vpshufb	%ymm15,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm12,%ymm12
+	vpxor	%ymm0,%ymm12,%ymm0
+	vpslld	$12,%ymm0,%ymm14
+	vpsrld	$20,%ymm0,%ymm0
+	vpor	%ymm0,%ymm14,%ymm0
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm5,%ymm13,%ymm13
+	vpxor	%ymm1,%ymm13,%ymm1
+	vpslld	$12,%ymm1,%ymm15
+	vpsrld	$20,%ymm1,%ymm1
+	vpor	%ymm1,%ymm15,%ymm1
+	vpaddd	%ymm0,%ymm8,%ymm8
+	vpxor	%ymm4,%ymm8,%ymm4
+	vpshufb	%ymm14,%ymm4,%ymm4
+	vpaddd	%ymm1,%ymm9,%ymm9
+	vpxor	%ymm5,%ymm9,%ymm5
+	vpshufb	%ymm14,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm12,%ymm12
+	vpxor	%ymm0,%ymm12,%ymm0
+	vpslld	$7,%ymm0,%ymm15
+	vpsrld	$25,%ymm0,%ymm0
+	vpor	%ymm0,%ymm15,%ymm0
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm5,%ymm13,%ymm13
+	vpxor	%ymm1,%ymm13,%ymm1
+	vpslld	$7,%ymm1,%ymm14
+	vpsrld	$25,%ymm1,%ymm1
+	vpor	%ymm1,%ymm14,%ymm1
+	vmovdqa	%ymm12,0(%rsp)
+	vmovdqa	%ymm13,32(%rsp)
+	vmovdqa	64(%rsp),%ymm12
+	vmovdqa	96(%rsp),%ymm13
+	vpaddd	%ymm2,%ymm10,%ymm10
+	vpxor	%ymm6,%ymm10,%ymm6
+	vpshufb	%ymm15,%ymm6,%ymm6
+	vpaddd	%ymm3,%ymm11,%ymm11
+	vpxor	%ymm7,%ymm11,%ymm7
+	vpshufb	%ymm15,%ymm7,%ymm7
+	vpaddd	%ymm6,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm12,%ymm2
+	vpslld	$12,%ymm2,%ymm14
+	vpsrld	$20,%ymm2,%ymm2
+	vpor	%ymm2,%ymm14,%ymm2
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm7,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm13,%ymm3
+	vpslld	$12,%ymm3,%ymm15
+	vpsrld	$20,%ymm3,%ymm3
+	vpor	%ymm3,%ymm15,%ymm3
+	vpaddd	%ymm2,%ymm10,%ymm10
+	vpxor	%ymm6,%ymm10,%ymm6
+	vpshufb	%ymm14,%ymm6,%ymm6
+	vpaddd	%ymm3,%ymm11,%ymm11
+	vpxor	%ymm7,%ymm11,%ymm7
+	vpshufb	%ymm14,%ymm7,%ymm7
+	vpaddd	%ymm6,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm12,%ymm2
+	vpslld	$7,%ymm2,%ymm15
+	vpsrld	$25,%ymm2,%ymm2
+	vpor	%ymm2,%ymm15,%ymm2
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm7,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm13,%ymm3
+	vpslld	$7,%ymm3,%ymm14
+	vpsrld	$25,%ymm3,%ymm3
+	vpor	%ymm3,%ymm14,%ymm3
+	vpaddd	%ymm1,%ymm8,%ymm8
+	vpxor	%ymm7,%ymm8,%ymm7
+	vpshufb	%ymm15,%ymm7,%ymm7
+	vpaddd	%ymm2,%ymm9,%ymm9
+	vpxor	%ymm4,%ymm9,%ymm4
+	vpshufb	%ymm15,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm12,%ymm1
+	vpslld	$12,%ymm1,%ymm14
+	vpsrld	$20,%ymm1,%ymm1
+	vpor	%ymm1,%ymm14,%ymm1
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm4,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm13,%ymm2
+	vpslld	$12,%ymm2,%ymm15
+	vpsrld	$20,%ymm2,%ymm2
+	vpor	%ymm2,%ymm15,%ymm2
+	vpaddd	%ymm1,%ymm8,%ymm8
+	vpxor	%ymm7,%ymm8,%ymm7
+	vpshufb	%ymm14,%ymm7,%ymm7
+	vpaddd	%ymm2,%ymm9,%ymm9
+	vpxor	%ymm4,%ymm9,%ymm4
+	vpshufb	%ymm14,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm12,%ymm1
+	vpslld	$7,%ymm1,%ymm15
+	vpsrld	$25,%ymm1,%ymm1
+	vpor	%ymm1,%ymm15,%ymm1
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm4,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm13,%ymm2
+	vpslld	$7,%ymm2,%ymm14
+	vpsrld	$25,%ymm2,%ymm2
+	vpor	%ymm2,%ymm14,%ymm2
+	vmovdqa	%ymm12,64(%rsp)
+	vmovdqa	%ymm13,96(%rsp)
+	vmovdqa	0(%rsp),%ymm12
+	vmovdqa	32(%rsp),%ymm13
+	vpaddd	%ymm3,%ymm10,%ymm10
+	vpxor	%ymm5,%ymm10,%ymm5
+	vpshufb	%ymm15,%ymm5,%ymm5
+	vpaddd	%ymm0,%ymm11,%ymm11
+	vpxor	%ymm6,%ymm11,%ymm6
+	vpshufb	%ymm15,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm12,%ymm12
+	vpxor	%ymm3,%ymm12,%ymm3
+	vpslld	$12,%ymm3,%ymm14
+	vpsrld	$20,%ymm3,%ymm3
+	vpor	%ymm3,%ymm14,%ymm3
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm6,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm13,%ymm0
+	vpslld	$12,%ymm0,%ymm15
+	vpsrld	$20,%ymm0,%ymm0
+	vpor	%ymm0,%ymm15,%ymm0
+	vpaddd	%ymm3,%ymm10,%ymm10
+	vpxor	%ymm5,%ymm10,%ymm5
+	vpshufb	%ymm14,%ymm5,%ymm5
+	vpaddd	%ymm0,%ymm11,%ymm11
+	vpxor	%ymm6,%ymm11,%ymm6
+	vpshufb	%ymm14,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm12,%ymm12
+	vpxor	%ymm3,%ymm12,%ymm3
+	vpslld	$7,%ymm3,%ymm15
+	vpsrld	$25,%ymm3,%ymm3
+	vpor	%ymm3,%ymm15,%ymm3
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm6,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm13,%ymm0
+	vpslld	$7,%ymm0,%ymm14
+	vpsrld	$25,%ymm0,%ymm0
+	vpor	%ymm0,%ymm14,%ymm0
+	decl	%eax
+	jnz	.Loop8x
+
+	leaq	512(%rsp),%rax
+	vpaddd	128-256(%rcx),%ymm8,%ymm8
+	vpaddd	160-256(%rcx),%ymm9,%ymm9
+	vpaddd	192-256(%rcx),%ymm10,%ymm10
+	vpaddd	224-256(%rcx),%ymm11,%ymm11
+
+	vpunpckldq	%ymm9,%ymm8,%ymm14
+	vpunpckldq	%ymm11,%ymm10,%ymm15
+	vpunpckhdq	%ymm9,%ymm8,%ymm8
+	vpunpckhdq	%ymm11,%ymm10,%ymm10
+	vpunpcklqdq	%ymm15,%ymm14,%ymm9
+	vpunpckhqdq	%ymm15,%ymm14,%ymm14
+	vpunpcklqdq	%ymm10,%ymm8,%ymm11
+	vpunpckhqdq	%ymm10,%ymm8,%ymm8
+	vpaddd	256-256(%rcx),%ymm0,%ymm0
+	vpaddd	288-256(%rcx),%ymm1,%ymm1
+	vpaddd	320-256(%rcx),%ymm2,%ymm2
+	vpaddd	352-256(%rcx),%ymm3,%ymm3
+
+	vpunpckldq	%ymm1,%ymm0,%ymm10
+	vpunpckldq	%ymm3,%ymm2,%ymm15
+	vpunpckhdq	%ymm1,%ymm0,%ymm0
+	vpunpckhdq	%ymm3,%ymm2,%ymm2
+	vpunpcklqdq	%ymm15,%ymm10,%ymm1
+	vpunpckhqdq	%ymm15,%ymm10,%ymm10
+	vpunpcklqdq	%ymm2,%ymm0,%ymm3
+	vpunpckhqdq	%ymm2,%ymm0,%ymm0
+	vperm2i128	$0x20,%ymm1,%ymm9,%ymm15
+	vperm2i128	$0x31,%ymm1,%ymm9,%ymm1
+	vperm2i128	$0x20,%ymm10,%ymm14,%ymm9
+	vperm2i128	$0x31,%ymm10,%ymm14,%ymm10
+	vperm2i128	$0x20,%ymm3,%ymm11,%ymm14
+	vperm2i128	$0x31,%ymm3,%ymm11,%ymm3
+	vperm2i128	$0x20,%ymm0,%ymm8,%ymm11
+	vperm2i128	$0x31,%ymm0,%ymm8,%ymm0
+	vmovdqa	%ymm15,0(%rsp)
+	vmovdqa	%ymm9,32(%rsp)
+	vmovdqa	64(%rsp),%ymm15
+	vmovdqa	96(%rsp),%ymm9
+
+	vpaddd	384-512(%rax),%ymm12,%ymm12
+	vpaddd	416-512(%rax),%ymm13,%ymm13
+	vpaddd	448-512(%rax),%ymm15,%ymm15
+	vpaddd	480-512(%rax),%ymm9,%ymm9
+
+	vpunpckldq	%ymm13,%ymm12,%ymm2
+	vpunpckldq	%ymm9,%ymm15,%ymm8
+	vpunpckhdq	%ymm13,%ymm12,%ymm12
+	vpunpckhdq	%ymm9,%ymm15,%ymm15
+	vpunpcklqdq	%ymm8,%ymm2,%ymm13
+	vpunpckhqdq	%ymm8,%ymm2,%ymm2
+	vpunpcklqdq	%ymm15,%ymm12,%ymm9
+	vpunpckhqdq	%ymm15,%ymm12,%ymm12
+	vpaddd	512-512(%rax),%ymm4,%ymm4
+	vpaddd	544-512(%rax),%ymm5,%ymm5
+	vpaddd	576-512(%rax),%ymm6,%ymm6
+	vpaddd	608-512(%rax),%ymm7,%ymm7
+
+	vpunpckldq	%ymm5,%ymm4,%ymm15
+	vpunpckldq	%ymm7,%ymm6,%ymm8
+	vpunpckhdq	%ymm5,%ymm4,%ymm4
+	vpunpckhdq	%ymm7,%ymm6,%ymm6
+	vpunpcklqdq	%ymm8,%ymm15,%ymm5
+	vpunpckhqdq	%ymm8,%ymm15,%ymm15
+	vpunpcklqdq	%ymm6,%ymm4,%ymm7
+	vpunpckhqdq	%ymm6,%ymm4,%ymm4
+	vperm2i128	$0x20,%ymm5,%ymm13,%ymm8
+	vperm2i128	$0x31,%ymm5,%ymm13,%ymm5
+	vperm2i128	$0x20,%ymm15,%ymm2,%ymm13
+	vperm2i128	$0x31,%ymm15,%ymm2,%ymm15
+	vperm2i128	$0x20,%ymm7,%ymm9,%ymm2
+	vperm2i128	$0x31,%ymm7,%ymm9,%ymm7
+	vperm2i128	$0x20,%ymm4,%ymm12,%ymm9
+	vperm2i128	$0x31,%ymm4,%ymm12,%ymm4
+	vmovdqa	0(%rsp),%ymm6
+	vmovdqa	32(%rsp),%ymm12
+
+	cmpq	$512,%rdx
+	jb	.Ltail8x
+
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm12,%ymm12
+	vpxor	32(%rsi),%ymm13,%ymm13
+	vpxor	64(%rsi),%ymm10,%ymm10
+	vpxor	96(%rsi),%ymm15,%ymm15
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm12,0(%rdi)
+	vmovdqu	%ymm13,32(%rdi)
+	vmovdqu	%ymm10,64(%rdi)
+	vmovdqu	%ymm15,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm14,%ymm14
+	vpxor	32(%rsi),%ymm2,%ymm2
+	vpxor	64(%rsi),%ymm3,%ymm3
+	vpxor	96(%rsi),%ymm7,%ymm7
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm14,0(%rdi)
+	vmovdqu	%ymm2,32(%rdi)
+	vmovdqu	%ymm3,64(%rdi)
+	vmovdqu	%ymm7,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm11,%ymm11
+	vpxor	32(%rsi),%ymm9,%ymm9
+	vpxor	64(%rsi),%ymm0,%ymm0
+	vpxor	96(%rsi),%ymm4,%ymm4
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm11,0(%rdi)
+	vmovdqu	%ymm9,32(%rdi)
+	vmovdqu	%ymm0,64(%rdi)
+	vmovdqu	%ymm4,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	subq	$512,%rdx
+	jnz	.Loop_outer8x
+
+	jmp	.Ldone8x
+
+.Ltail8x:
+	cmpq	$448,%rdx
+	jae	.L448_or_more8x
+	cmpq	$384,%rdx
+	jae	.L384_or_more8x
+	cmpq	$320,%rdx
+	jae	.L320_or_more8x
+	cmpq	$256,%rdx
+	jae	.L256_or_more8x
+	cmpq	$192,%rdx
+	jae	.L192_or_more8x
+	cmpq	$128,%rdx
+	jae	.L128_or_more8x
+	cmpq	$64,%rdx
+	jae	.L64_or_more8x
+
+	xorq	%r10,%r10
+	vmovdqa	%ymm6,0(%rsp)
+	vmovdqa	%ymm8,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L64_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	je	.Ldone8x
+
+	leaq	64(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm1,0(%rsp)
+	leaq	64(%rdi),%rdi
+	subq	$64,%rdx
+	vmovdqa	%ymm5,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L128_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	je	.Ldone8x
+
+	leaq	128(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm12,0(%rsp)
+	leaq	128(%rdi),%rdi
+	subq	$128,%rdx
+	vmovdqa	%ymm13,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L192_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	je	.Ldone8x
+
+	leaq	192(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm10,0(%rsp)
+	leaq	192(%rdi),%rdi
+	subq	$192,%rdx
+	vmovdqa	%ymm15,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L256_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	je	.Ldone8x
+
+	leaq	256(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm14,0(%rsp)
+	leaq	256(%rdi),%rdi
+	subq	$256,%rdx
+	vmovdqa	%ymm2,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L320_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	je	.Ldone8x
+
+	leaq	320(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm3,0(%rsp)
+	leaq	320(%rdi),%rdi
+	subq	$320,%rdx
+	vmovdqa	%ymm7,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L384_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vpxor	320(%rsi),%ymm3,%ymm3
+	vpxor	352(%rsi),%ymm7,%ymm7
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	vmovdqu	%ymm3,320(%rdi)
+	vmovdqu	%ymm7,352(%rdi)
+	je	.Ldone8x
+
+	leaq	384(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm11,0(%rsp)
+	leaq	384(%rdi),%rdi
+	subq	$384,%rdx
+	vmovdqa	%ymm9,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L448_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vpxor	320(%rsi),%ymm3,%ymm3
+	vpxor	352(%rsi),%ymm7,%ymm7
+	vpxor	384(%rsi),%ymm11,%ymm11
+	vpxor	416(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	vmovdqu	%ymm3,320(%rdi)
+	vmovdqu	%ymm7,352(%rdi)
+	vmovdqu	%ymm11,384(%rdi)
+	vmovdqu	%ymm9,416(%rdi)
+	je	.Ldone8x
+
+	leaq	448(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm0,0(%rsp)
+	leaq	448(%rdi),%rdi
+	subq	$448,%rdx
+	vmovdqa	%ymm4,32(%rsp)
+
+.Loop_tail8x:
+	movzbl	(%rsi,%r10,1),%eax
+	movzbl	(%rsp,%r10,1),%ecx
+	leaq	1(%r10),%r10
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r10,1)
+	decq	%rdx
+	jnz	.Loop_tail8x
+
+.Ldone8x:
+	vzeroall
+	leaq	(%r9),%rsp
+.L8x_epilogue:
+	.byte	0xf3,0xc3
+.size	ChaCha20_8x,.-ChaCha20_8x
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
new file mode 100644
index 0000000..42e25f4
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
@@ -0,0 +1,3066 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.data	
+
+.align	16
+one:
+.quad	1,0
+two:
+.quad	2,0
+three:
+.quad	3,0
+four:
+.quad	4,0
+five:
+.quad	5,0
+six:
+.quad	6,0
+seven:
+.quad	7,0
+eight:
+.quad	8,0
+
+OR_MASK:
+.long	0x00000000,0x00000000,0x00000000,0x80000000
+poly:
+.quad	0x1, 0xc200000000000000
+mask:
+.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+con1:
+.long	1,1,1,1
+con2:
+.long	0x1b,0x1b,0x1b,0x1b
+con3:
+.byte	-1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7
+and_mask:
+.long	0,0xffffffff, 0xffffffff, 0xffffffff
+.text	
+.type	GFMUL,@function
+.align	16
+GFMUL:
+.cfi_startproc	
+	vpclmulqdq	$0x00,%xmm1,%xmm0,%xmm2
+	vpclmulqdq	$0x11,%xmm1,%xmm0,%xmm5
+	vpclmulqdq	$0x10,%xmm1,%xmm0,%xmm3
+	vpclmulqdq	$0x01,%xmm1,%xmm0,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$8,%xmm3,%xmm4
+	vpsrldq	$8,%xmm3,%xmm3
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpxor	%xmm3,%xmm5,%xmm5
+
+	vpclmulqdq	$0x10,poly(%rip),%xmm2,%xmm3
+	vpshufd	$78,%xmm2,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm2
+
+	vpclmulqdq	$0x10,poly(%rip),%xmm2,%xmm3
+	vpshufd	$78,%xmm2,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm2
+
+	vpxor	%xmm5,%xmm2,%xmm0
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	GFMUL, .-GFMUL
+.globl	aesgcmsiv_htable_init
+.hidden aesgcmsiv_htable_init
+.type	aesgcmsiv_htable_init,@function
+.align	16
+aesgcmsiv_htable_init:
+.cfi_startproc	
+	vmovdqa	(%rsi),%xmm0
+	vmovdqa	%xmm0,%xmm1
+	vmovdqa	%xmm0,(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,16(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,32(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,48(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,64(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,80(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,96(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,112(%rdi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aesgcmsiv_htable_init, .-aesgcmsiv_htable_init
+.globl	aesgcmsiv_htable6_init
+.hidden aesgcmsiv_htable6_init
+.type	aesgcmsiv_htable6_init,@function
+.align	16
+aesgcmsiv_htable6_init:
+.cfi_startproc	
+	vmovdqa	(%rsi),%xmm0
+	vmovdqa	%xmm0,%xmm1
+	vmovdqa	%xmm0,(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,16(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,32(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,48(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,64(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,80(%rdi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aesgcmsiv_htable6_init, .-aesgcmsiv_htable6_init
+.globl	aesgcmsiv_htable_polyval
+.hidden aesgcmsiv_htable_polyval
+.type	aesgcmsiv_htable_polyval,@function
+.align	16
+aesgcmsiv_htable_polyval:
+.cfi_startproc	
+	testq	%rdx,%rdx
+	jnz	.Lhtable_polyval_start
+	.byte	0xf3,0xc3
+
+.Lhtable_polyval_start:
+	vzeroall
+
+
+
+	movq	%rdx,%r11
+	andq	$127,%r11
+
+	jz	.Lhtable_polyval_no_prefix
+
+	vpxor	%xmm9,%xmm9,%xmm9
+	vmovdqa	(%rcx),%xmm1
+	subq	%r11,%rdx
+
+	subq	$16,%r11
+
+
+	vmovdqu	(%rsi),%xmm0
+	vpxor	%xmm1,%xmm0,%xmm0
+
+	vpclmulqdq	$0x01,(%rdi,%r11,1),%xmm0,%xmm5
+	vpclmulqdq	$0x00,(%rdi,%r11,1),%xmm0,%xmm3
+	vpclmulqdq	$0x11,(%rdi,%r11,1),%xmm0,%xmm4
+	vpclmulqdq	$0x10,(%rdi,%r11,1),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+	leaq	16(%rsi),%rsi
+	testq	%r11,%r11
+	jnz	.Lhtable_polyval_prefix_loop
+	jmp	.Lhtable_polyval_prefix_complete
+
+
+.align	64
+.Lhtable_polyval_prefix_loop:
+	subq	$16,%r11
+
+	vmovdqu	(%rsi),%xmm0
+
+	vpclmulqdq	$0x00,(%rdi,%r11,1),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,(%rdi,%r11,1),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x01,(%rdi,%r11,1),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x10,(%rdi,%r11,1),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+	testq	%r11,%r11
+
+	leaq	16(%rsi),%rsi
+
+	jnz	.Lhtable_polyval_prefix_loop
+
+.Lhtable_polyval_prefix_complete:
+	vpsrldq	$8,%xmm5,%xmm6
+	vpslldq	$8,%xmm5,%xmm5
+
+	vpxor	%xmm6,%xmm4,%xmm9
+	vpxor	%xmm5,%xmm3,%xmm1
+
+	jmp	.Lhtable_polyval_main_loop
+
+.Lhtable_polyval_no_prefix:
+
+
+
+
+	vpxor	%xmm1,%xmm1,%xmm1
+	vmovdqa	(%rcx),%xmm9
+
+.align	64
+.Lhtable_polyval_main_loop:
+	subq	$0x80,%rdx
+	jb	.Lhtable_polyval_out
+
+	vmovdqu	112(%rsi),%xmm0
+
+	vpclmulqdq	$0x01,(%rdi),%xmm0,%xmm5
+	vpclmulqdq	$0x00,(%rdi),%xmm0,%xmm3
+	vpclmulqdq	$0x11,(%rdi),%xmm0,%xmm4
+	vpclmulqdq	$0x10,(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vmovdqu	96(%rsi),%xmm0
+	vpclmulqdq	$0x01,16(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,16(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,16(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,16(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+
+	vmovdqu	80(%rsi),%xmm0
+
+	vpclmulqdq	$0x10,poly(%rip),%xmm1,%xmm7
+	vpalignr	$8,%xmm1,%xmm1,%xmm1
+
+	vpclmulqdq	$0x01,32(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,32(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,32(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,32(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vpxor	%xmm7,%xmm1,%xmm1
+
+	vmovdqu	64(%rsi),%xmm0
+
+	vpclmulqdq	$0x01,48(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,48(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,48(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,48(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vmovdqu	48(%rsi),%xmm0
+
+	vpclmulqdq	$0x10,poly(%rip),%xmm1,%xmm7
+	vpalignr	$8,%xmm1,%xmm1,%xmm1
+
+	vpclmulqdq	$0x01,64(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,64(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,64(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,64(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vpxor	%xmm7,%xmm1,%xmm1
+
+	vmovdqu	32(%rsi),%xmm0
+
+	vpclmulqdq	$0x01,80(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,80(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,80(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,80(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vpxor	%xmm9,%xmm1,%xmm1
+
+	vmovdqu	16(%rsi),%xmm0
+
+	vpclmulqdq	$0x01,96(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,96(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,96(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,96(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vmovdqu	0(%rsi),%xmm0
+	vpxor	%xmm1,%xmm0,%xmm0
+
+	vpclmulqdq	$0x01,112(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,112(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,112(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,112(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vpsrldq	$8,%xmm5,%xmm6
+	vpslldq	$8,%xmm5,%xmm5
+
+	vpxor	%xmm6,%xmm4,%xmm9
+	vpxor	%xmm5,%xmm3,%xmm1
+
+	leaq	128(%rsi),%rsi
+	jmp	.Lhtable_polyval_main_loop
+
+
+
+.Lhtable_polyval_out:
+	vpclmulqdq	$0x10,poly(%rip),%xmm1,%xmm6
+	vpalignr	$8,%xmm1,%xmm1,%xmm1
+	vpxor	%xmm6,%xmm1,%xmm1
+
+	vpclmulqdq	$0x10,poly(%rip),%xmm1,%xmm6
+	vpalignr	$8,%xmm1,%xmm1,%xmm1
+	vpxor	%xmm6,%xmm1,%xmm1
+	vpxor	%xmm9,%xmm1,%xmm1
+
+	vmovdqu	%xmm1,(%rcx)
+	vzeroupper
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aesgcmsiv_htable_polyval,.-aesgcmsiv_htable_polyval
+.globl	aesgcmsiv_polyval_horner
+.hidden aesgcmsiv_polyval_horner
+.type	aesgcmsiv_polyval_horner,@function
+.align	16
+aesgcmsiv_polyval_horner:
+.cfi_startproc	
+	testq	%rcx,%rcx
+	jnz	.Lpolyval_horner_start
+	.byte	0xf3,0xc3
+
+.Lpolyval_horner_start:
+
+
+
+	xorq	%r10,%r10
+	shlq	$4,%rcx
+
+	vmovdqa	(%rsi),%xmm1
+	vmovdqa	(%rdi),%xmm0
+
+.Lpolyval_horner_loop:
+	vpxor	(%rdx,%r10,1),%xmm0,%xmm0
+	call	GFMUL
+
+	addq	$16,%r10
+	cmpq	%r10,%rcx
+	jne	.Lpolyval_horner_loop
+
+
+	vmovdqa	%xmm0,(%rdi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aesgcmsiv_polyval_horner,.-aesgcmsiv_polyval_horner
+.globl	aes128gcmsiv_aes_ks
+.hidden aes128gcmsiv_aes_ks
+.type	aes128gcmsiv_aes_ks,@function
+.align	16
+aes128gcmsiv_aes_ks:
+.cfi_startproc	
+	vmovdqu	(%rdi),%xmm1
+	vmovdqa	%xmm1,(%rsi)
+
+	vmovdqa	con1(%rip),%xmm0
+	vmovdqa	mask(%rip),%xmm15
+
+	movq	$8,%rax
+
+.Lks128_loop:
+	addq	$16,%rsi
+	subq	$1,%rax
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm1,(%rsi)
+	jne	.Lks128_loop
+
+	vmovdqa	con2(%rip),%xmm0
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm1,16(%rsi)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslldq	$4,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm1,32(%rsi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes128gcmsiv_aes_ks,.-aes128gcmsiv_aes_ks
+.globl	aes256gcmsiv_aes_ks
+.hidden aes256gcmsiv_aes_ks
+.type	aes256gcmsiv_aes_ks,@function
+.align	16
+aes256gcmsiv_aes_ks:
+.cfi_startproc	
+	vmovdqu	(%rdi),%xmm1
+	vmovdqu	16(%rdi),%xmm3
+	vmovdqa	%xmm1,(%rsi)
+	vmovdqa	%xmm3,16(%rsi)
+	vmovdqa	con1(%rip),%xmm0
+	vmovdqa	mask(%rip),%xmm15
+	vpxor	%xmm14,%xmm14,%xmm14
+	movq	$6,%rax
+
+.Lks256_loop:
+	addq	$32,%rsi
+	subq	$1,%rax
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm1,(%rsi)
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpsllq	$32,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpshufb	con3(%rip),%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vmovdqa	%xmm3,16(%rsi)
+	jne	.Lks256_loop
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpsllq	$32,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm1,32(%rsi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.globl	aes128gcmsiv_aes_ks_enc_x1
+.hidden aes128gcmsiv_aes_ks_enc_x1
+.type	aes128gcmsiv_aes_ks_enc_x1,@function
+.align	16
+aes128gcmsiv_aes_ks_enc_x1:
+.cfi_startproc	
+	vmovdqa	(%rcx),%xmm1
+	vmovdqa	0(%rdi),%xmm4
+
+	vmovdqa	%xmm1,(%rdx)
+	vpxor	%xmm1,%xmm4,%xmm4
+
+	vmovdqa	con1(%rip),%xmm0
+	vmovdqa	mask(%rip),%xmm15
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,16(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,32(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,48(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,64(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,80(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,96(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,112(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,128(%rdx)
+
+
+	vmovdqa	con2(%rip),%xmm0
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,144(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenclast	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,160(%rdx)
+
+
+	vmovdqa	%xmm4,0(%rsi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes128gcmsiv_aes_ks_enc_x1,.-aes128gcmsiv_aes_ks_enc_x1
+.globl	aes128gcmsiv_kdf
+.hidden aes128gcmsiv_kdf
+.type	aes128gcmsiv_kdf,@function
+.align	16
+aes128gcmsiv_kdf:
+.cfi_startproc	
+
+
+
+
+	vmovdqa	(%rdx),%xmm1
+	vmovdqa	0(%rdi),%xmm9
+	vmovdqa	and_mask(%rip),%xmm12
+	vmovdqa	one(%rip),%xmm13
+	vpshufd	$0x90,%xmm9,%xmm9
+	vpand	%xmm12,%xmm9,%xmm9
+	vpaddd	%xmm13,%xmm9,%xmm10
+	vpaddd	%xmm13,%xmm10,%xmm11
+	vpaddd	%xmm13,%xmm11,%xmm12
+
+	vpxor	%xmm1,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm10,%xmm10
+	vpxor	%xmm1,%xmm11,%xmm11
+	vpxor	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	16(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	32(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+
+	vmovdqa	48(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	64(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+
+	vmovdqa	80(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	96(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+
+	vmovdqa	112(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	128(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+
+	vmovdqa	144(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	160(%rdx),%xmm2
+	vaesenclast	%xmm2,%xmm9,%xmm9
+	vaesenclast	%xmm2,%xmm10,%xmm10
+	vaesenclast	%xmm2,%xmm11,%xmm11
+	vaesenclast	%xmm2,%xmm12,%xmm12
+
+
+	vmovdqa	%xmm9,0(%rsi)
+	vmovdqa	%xmm10,16(%rsi)
+	vmovdqa	%xmm11,32(%rsi)
+	vmovdqa	%xmm12,48(%rsi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes128gcmsiv_kdf,.-aes128gcmsiv_kdf
+.globl	aes128gcmsiv_enc_msg_x4
+.hidden aes128gcmsiv_enc_msg_x4
+.type	aes128gcmsiv_enc_msg_x4,@function
+.align	16
+aes128gcmsiv_enc_msg_x4:
+.cfi_startproc	
+	testq	%r8,%r8
+	jnz	.L128_enc_msg_x4_start
+	.byte	0xf3,0xc3
+
+.L128_enc_msg_x4_start:
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r12,-16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r13,-24
+
+	shrq	$4,%r8
+	movq	%r8,%r10
+	shlq	$62,%r10
+	shrq	$62,%r10
+
+
+	vmovdqa	(%rdx),%xmm15
+	vpor	OR_MASK(%rip),%xmm15,%xmm15
+
+	vmovdqu	four(%rip),%xmm4
+	vmovdqa	%xmm15,%xmm0
+	vpaddd	one(%rip),%xmm15,%xmm1
+	vpaddd	two(%rip),%xmm15,%xmm2
+	vpaddd	three(%rip),%xmm15,%xmm3
+
+	shrq	$2,%r8
+	je	.L128_enc_msg_x4_check_remainder
+
+	subq	$64,%rsi
+	subq	$64,%rdi
+
+.L128_enc_msg_x4_loop1:
+	addq	$64,%rsi
+	addq	$64,%rdi
+
+	vmovdqa	%xmm0,%xmm5
+	vmovdqa	%xmm1,%xmm6
+	vmovdqa	%xmm2,%xmm7
+	vmovdqa	%xmm3,%xmm8
+
+	vpxor	(%rcx),%xmm5,%xmm5
+	vpxor	(%rcx),%xmm6,%xmm6
+	vpxor	(%rcx),%xmm7,%xmm7
+	vpxor	(%rcx),%xmm8,%xmm8
+
+	vmovdqu	16(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm0,%xmm0
+	vmovdqu	32(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm1,%xmm1
+	vmovdqu	48(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm2,%xmm2
+	vmovdqu	64(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm3,%xmm3
+
+	vmovdqu	80(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	96(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	112(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	128(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	144(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	160(%rcx),%xmm12
+	vaesenclast	%xmm12,%xmm5,%xmm5
+	vaesenclast	%xmm12,%xmm6,%xmm6
+	vaesenclast	%xmm12,%xmm7,%xmm7
+	vaesenclast	%xmm12,%xmm8,%xmm8
+
+
+
+	vpxor	0(%rdi),%xmm5,%xmm5
+	vpxor	16(%rdi),%xmm6,%xmm6
+	vpxor	32(%rdi),%xmm7,%xmm7
+	vpxor	48(%rdi),%xmm8,%xmm8
+
+	subq	$1,%r8
+
+	vmovdqu	%xmm5,0(%rsi)
+	vmovdqu	%xmm6,16(%rsi)
+	vmovdqu	%xmm7,32(%rsi)
+	vmovdqu	%xmm8,48(%rsi)
+
+	jne	.L128_enc_msg_x4_loop1
+
+	addq	$64,%rsi
+	addq	$64,%rdi
+
+.L128_enc_msg_x4_check_remainder:
+	cmpq	$0,%r10
+	je	.L128_enc_msg_x4_out
+
+.L128_enc_msg_x4_loop2:
+
+
+	vmovdqa	%xmm0,%xmm5
+	vpaddd	one(%rip),%xmm0,%xmm0
+
+	vpxor	(%rcx),%xmm5,%xmm5
+	vaesenc	16(%rcx),%xmm5,%xmm5
+	vaesenc	32(%rcx),%xmm5,%xmm5
+	vaesenc	48(%rcx),%xmm5,%xmm5
+	vaesenc	64(%rcx),%xmm5,%xmm5
+	vaesenc	80(%rcx),%xmm5,%xmm5
+	vaesenc	96(%rcx),%xmm5,%xmm5
+	vaesenc	112(%rcx),%xmm5,%xmm5
+	vaesenc	128(%rcx),%xmm5,%xmm5
+	vaesenc	144(%rcx),%xmm5,%xmm5
+	vaesenclast	160(%rcx),%xmm5,%xmm5
+
+
+	vpxor	(%rdi),%xmm5,%xmm5
+	vmovdqu	%xmm5,(%rsi)
+
+	addq	$16,%rdi
+	addq	$16,%rsi
+
+	subq	$1,%r10
+	jne	.L128_enc_msg_x4_loop2
+
+.L128_enc_msg_x4_out:
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+.cfi_restore	%r13
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+.cfi_restore	%r12
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes128gcmsiv_enc_msg_x4,.-aes128gcmsiv_enc_msg_x4
+.globl	aes128gcmsiv_enc_msg_x8
+.hidden aes128gcmsiv_enc_msg_x8
+.type	aes128gcmsiv_enc_msg_x8,@function
+.align	16
+aes128gcmsiv_enc_msg_x8:
+.cfi_startproc	
+	testq	%r8,%r8
+	jnz	.L128_enc_msg_x8_start
+	.byte	0xf3,0xc3
+
+.L128_enc_msg_x8_start:
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r12,-16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r13,-24
+	pushq	%rbp
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%rbp,-32
+	movq	%rsp,%rbp
+.cfi_def_cfa_register	rbp
+
+
+	subq	$128,%rsp
+	andq	$-64,%rsp
+
+	shrq	$4,%r8
+	movq	%r8,%r10
+	shlq	$61,%r10
+	shrq	$61,%r10
+
+
+	vmovdqu	(%rdx),%xmm1
+	vpor	OR_MASK(%rip),%xmm1,%xmm1
+
+
+	vpaddd	seven(%rip),%xmm1,%xmm0
+	vmovdqu	%xmm0,(%rsp)
+	vpaddd	one(%rip),%xmm1,%xmm9
+	vpaddd	two(%rip),%xmm1,%xmm10
+	vpaddd	three(%rip),%xmm1,%xmm11
+	vpaddd	four(%rip),%xmm1,%xmm12
+	vpaddd	five(%rip),%xmm1,%xmm13
+	vpaddd	six(%rip),%xmm1,%xmm14
+	vmovdqa	%xmm1,%xmm0
+
+	shrq	$3,%r8
+	je	.L128_enc_msg_x8_check_remainder
+
+	subq	$128,%rsi
+	subq	$128,%rdi
+
+.L128_enc_msg_x8_loop1:
+	addq	$128,%rsi
+	addq	$128,%rdi
+
+	vmovdqa	%xmm0,%xmm1
+	vmovdqa	%xmm9,%xmm2
+	vmovdqa	%xmm10,%xmm3
+	vmovdqa	%xmm11,%xmm4
+	vmovdqa	%xmm12,%xmm5
+	vmovdqa	%xmm13,%xmm6
+	vmovdqa	%xmm14,%xmm7
+
+	vmovdqu	(%rsp),%xmm8
+
+	vpxor	(%rcx),%xmm1,%xmm1
+	vpxor	(%rcx),%xmm2,%xmm2
+	vpxor	(%rcx),%xmm3,%xmm3
+	vpxor	(%rcx),%xmm4,%xmm4
+	vpxor	(%rcx),%xmm5,%xmm5
+	vpxor	(%rcx),%xmm6,%xmm6
+	vpxor	(%rcx),%xmm7,%xmm7
+	vpxor	(%rcx),%xmm8,%xmm8
+
+	vmovdqu	16(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	(%rsp),%xmm14
+	vpaddd	eight(%rip),%xmm14,%xmm14
+	vmovdqu	%xmm14,(%rsp)
+	vmovdqu	32(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpsubd	one(%rip),%xmm14,%xmm14
+	vmovdqu	48(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm0,%xmm0
+	vmovdqu	64(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm9,%xmm9
+	vmovdqu	80(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm10,%xmm10
+	vmovdqu	96(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm11,%xmm11
+	vmovdqu	112(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm12,%xmm12
+	vmovdqu	128(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm13,%xmm13
+	vmovdqu	144(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	160(%rcx),%xmm15
+	vaesenclast	%xmm15,%xmm1,%xmm1
+	vaesenclast	%xmm15,%xmm2,%xmm2
+	vaesenclast	%xmm15,%xmm3,%xmm3
+	vaesenclast	%xmm15,%xmm4,%xmm4
+	vaesenclast	%xmm15,%xmm5,%xmm5
+	vaesenclast	%xmm15,%xmm6,%xmm6
+	vaesenclast	%xmm15,%xmm7,%xmm7
+	vaesenclast	%xmm15,%xmm8,%xmm8
+
+
+
+	vpxor	0(%rdi),%xmm1,%xmm1
+	vpxor	16(%rdi),%xmm2,%xmm2
+	vpxor	32(%rdi),%xmm3,%xmm3
+	vpxor	48(%rdi),%xmm4,%xmm4
+	vpxor	64(%rdi),%xmm5,%xmm5
+	vpxor	80(%rdi),%xmm6,%xmm6
+	vpxor	96(%rdi),%xmm7,%xmm7
+	vpxor	112(%rdi),%xmm8,%xmm8
+
+	decq	%r8
+
+	vmovdqu	%xmm1,0(%rsi)
+	vmovdqu	%xmm2,16(%rsi)
+	vmovdqu	%xmm3,32(%rsi)
+	vmovdqu	%xmm4,48(%rsi)
+	vmovdqu	%xmm5,64(%rsi)
+	vmovdqu	%xmm6,80(%rsi)
+	vmovdqu	%xmm7,96(%rsi)
+	vmovdqu	%xmm8,112(%rsi)
+
+	jne	.L128_enc_msg_x8_loop1
+
+	addq	$128,%rsi
+	addq	$128,%rdi
+
+.L128_enc_msg_x8_check_remainder:
+	cmpq	$0,%r10
+	je	.L128_enc_msg_x8_out
+
+.L128_enc_msg_x8_loop2:
+
+
+	vmovdqa	%xmm0,%xmm1
+	vpaddd	one(%rip),%xmm0,%xmm0
+
+	vpxor	(%rcx),%xmm1,%xmm1
+	vaesenc	16(%rcx),%xmm1,%xmm1
+	vaesenc	32(%rcx),%xmm1,%xmm1
+	vaesenc	48(%rcx),%xmm1,%xmm1
+	vaesenc	64(%rcx),%xmm1,%xmm1
+	vaesenc	80(%rcx),%xmm1,%xmm1
+	vaesenc	96(%rcx),%xmm1,%xmm1
+	vaesenc	112(%rcx),%xmm1,%xmm1
+	vaesenc	128(%rcx),%xmm1,%xmm1
+	vaesenc	144(%rcx),%xmm1,%xmm1
+	vaesenclast	160(%rcx),%xmm1,%xmm1
+
+
+	vpxor	(%rdi),%xmm1,%xmm1
+
+	vmovdqu	%xmm1,(%rsi)
+
+	addq	$16,%rdi
+	addq	$16,%rsi
+
+	decq	%r10
+	jne	.L128_enc_msg_x8_loop2
+
+.L128_enc_msg_x8_out:
+	movq	%rbp,%rsp
+.cfi_def_cfa_register	%rsp
+	popq	%rbp
+.cfi_adjust_cfa_offset	-8
+.cfi_restore	%rbp
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+.cfi_restore	%r13
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+.cfi_restore	%r12
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes128gcmsiv_enc_msg_x8,.-aes128gcmsiv_enc_msg_x8
+.globl	aes128gcmsiv_dec
+.hidden aes128gcmsiv_dec
+.type	aes128gcmsiv_dec,@function
+.align	16
+aes128gcmsiv_dec:
+.cfi_startproc	
+	testq	$~15,%r9
+	jnz	.L128_dec_start
+	.byte	0xf3,0xc3
+
+.L128_dec_start:
+	vzeroupper
+	vmovdqa	(%rdx),%xmm0
+	movq	%rdx,%rax
+
+	leaq	32(%rax),%rax
+	leaq	32(%rcx),%rcx
+
+
+	vmovdqu	(%rdi,%r9,1),%xmm15
+	vpor	OR_MASK(%rip),%xmm15,%xmm15
+	andq	$~15,%r9
+
+
+	cmpq	$96,%r9
+	jb	.L128_dec_loop2
+
+
+	subq	$96,%r9
+	vmovdqa	%xmm15,%xmm7
+	vpaddd	one(%rip),%xmm7,%xmm8
+	vpaddd	two(%rip),%xmm7,%xmm9
+	vpaddd	one(%rip),%xmm9,%xmm10
+	vpaddd	two(%rip),%xmm9,%xmm11
+	vpaddd	one(%rip),%xmm11,%xmm12
+	vpaddd	two(%rip),%xmm11,%xmm15
+
+	vpxor	(%r8),%xmm7,%xmm7
+	vpxor	(%r8),%xmm8,%xmm8
+	vpxor	(%r8),%xmm9,%xmm9
+	vpxor	(%r8),%xmm10,%xmm10
+	vpxor	(%r8),%xmm11,%xmm11
+	vpxor	(%r8),%xmm12,%xmm12
+
+	vmovdqu	16(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	32(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	48(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	64(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	80(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	96(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	112(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	128(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	144(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	160(%r8),%xmm4
+	vaesenclast	%xmm4,%xmm7,%xmm7
+	vaesenclast	%xmm4,%xmm8,%xmm8
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vaesenclast	%xmm4,%xmm10,%xmm10
+	vaesenclast	%xmm4,%xmm11,%xmm11
+	vaesenclast	%xmm4,%xmm12,%xmm12
+
+
+	vpxor	0(%rdi),%xmm7,%xmm7
+	vpxor	16(%rdi),%xmm8,%xmm8
+	vpxor	32(%rdi),%xmm9,%xmm9
+	vpxor	48(%rdi),%xmm10,%xmm10
+	vpxor	64(%rdi),%xmm11,%xmm11
+	vpxor	80(%rdi),%xmm12,%xmm12
+
+	vmovdqu	%xmm7,0(%rsi)
+	vmovdqu	%xmm8,16(%rsi)
+	vmovdqu	%xmm9,32(%rsi)
+	vmovdqu	%xmm10,48(%rsi)
+	vmovdqu	%xmm11,64(%rsi)
+	vmovdqu	%xmm12,80(%rsi)
+
+	addq	$96,%rdi
+	addq	$96,%rsi
+	jmp	.L128_dec_loop1
+
+
+.align	64
+.L128_dec_loop1:
+	cmpq	$96,%r9
+	jb	.L128_dec_finish_96
+	subq	$96,%r9
+
+	vmovdqa	%xmm12,%xmm6
+	vmovdqa	%xmm11,16-32(%rax)
+	vmovdqa	%xmm10,32-32(%rax)
+	vmovdqa	%xmm9,48-32(%rax)
+	vmovdqa	%xmm8,64-32(%rax)
+	vmovdqa	%xmm7,80-32(%rax)
+
+	vmovdqa	%xmm15,%xmm7
+	vpaddd	one(%rip),%xmm7,%xmm8
+	vpaddd	two(%rip),%xmm7,%xmm9
+	vpaddd	one(%rip),%xmm9,%xmm10
+	vpaddd	two(%rip),%xmm9,%xmm11
+	vpaddd	one(%rip),%xmm11,%xmm12
+	vpaddd	two(%rip),%xmm11,%xmm15
+
+	vmovdqa	(%r8),%xmm4
+	vpxor	%xmm4,%xmm7,%xmm7
+	vpxor	%xmm4,%xmm8,%xmm8
+	vpxor	%xmm4,%xmm9,%xmm9
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpxor	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	0-32(%rcx),%xmm4
+	vpclmulqdq	$0x11,%xmm4,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm4,%xmm6,%xmm3
+	vpclmulqdq	$0x01,%xmm4,%xmm6,%xmm1
+	vpclmulqdq	$0x10,%xmm4,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	16(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	-16(%rax),%xmm6
+	vmovdqu	-16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	32(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	0(%rax),%xmm6
+	vmovdqu	0(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	48(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	16(%rax),%xmm6
+	vmovdqu	16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	64(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	32(%rax),%xmm6
+	vmovdqu	32(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	80(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	96(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	112(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+
+	vmovdqa	80-32(%rax),%xmm6
+	vpxor	%xmm0,%xmm6,%xmm6
+	vmovdqu	80-32(%rcx),%xmm5
+
+	vpclmulqdq	$0x01,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x10,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	128(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+
+	vpsrldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm5
+	vpslldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm0
+
+	vmovdqa	poly(%rip),%xmm3
+
+	vmovdqu	144(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	160(%r8),%xmm6
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpxor	0(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm7,%xmm7
+	vpxor	16(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm8,%xmm8
+	vpxor	32(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vpxor	48(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm10,%xmm10
+	vpxor	64(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm11,%xmm11
+	vpxor	80(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm12,%xmm12
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vmovdqu	%xmm7,0(%rsi)
+	vmovdqu	%xmm8,16(%rsi)
+	vmovdqu	%xmm9,32(%rsi)
+	vmovdqu	%xmm10,48(%rsi)
+	vmovdqu	%xmm11,64(%rsi)
+	vmovdqu	%xmm12,80(%rsi)
+
+	vpxor	%xmm5,%xmm0,%xmm0
+
+	leaq	96(%rdi),%rdi
+	leaq	96(%rsi),%rsi
+	jmp	.L128_dec_loop1
+
+.L128_dec_finish_96:
+	vmovdqa	%xmm12,%xmm6
+	vmovdqa	%xmm11,16-32(%rax)
+	vmovdqa	%xmm10,32-32(%rax)
+	vmovdqa	%xmm9,48-32(%rax)
+	vmovdqa	%xmm8,64-32(%rax)
+	vmovdqa	%xmm7,80-32(%rax)
+
+	vmovdqu	0-32(%rcx),%xmm4
+	vpclmulqdq	$0x10,%xmm4,%xmm6,%xmm1
+	vpclmulqdq	$0x11,%xmm4,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm4,%xmm6,%xmm3
+	vpclmulqdq	$0x01,%xmm4,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	-16(%rax),%xmm6
+	vmovdqu	-16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	0(%rax),%xmm6
+	vmovdqu	0(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	16(%rax),%xmm6
+	vmovdqu	16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	32(%rax),%xmm6
+	vmovdqu	32(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	80-32(%rax),%xmm6
+	vpxor	%xmm0,%xmm6,%xmm6
+	vmovdqu	80-32(%rcx),%xmm5
+	vpclmulqdq	$0x11,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x10,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x01,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vpsrldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm5
+	vpslldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm0
+
+	vmovdqa	poly(%rip),%xmm3
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpxor	%xmm5,%xmm0,%xmm0
+
+.L128_dec_loop2:
+
+
+
+	cmpq	$16,%r9
+	jb	.L128_dec_out
+	subq	$16,%r9
+
+	vmovdqa	%xmm15,%xmm2
+	vpaddd	one(%rip),%xmm15,%xmm15
+
+	vpxor	0(%r8),%xmm2,%xmm2
+	vaesenc	16(%r8),%xmm2,%xmm2
+	vaesenc	32(%r8),%xmm2,%xmm2
+	vaesenc	48(%r8),%xmm2,%xmm2
+	vaesenc	64(%r8),%xmm2,%xmm2
+	vaesenc	80(%r8),%xmm2,%xmm2
+	vaesenc	96(%r8),%xmm2,%xmm2
+	vaesenc	112(%r8),%xmm2,%xmm2
+	vaesenc	128(%r8),%xmm2,%xmm2
+	vaesenc	144(%r8),%xmm2,%xmm2
+	vaesenclast	160(%r8),%xmm2,%xmm2
+	vpxor	(%rdi),%xmm2,%xmm2
+	vmovdqu	%xmm2,(%rsi)
+	addq	$16,%rdi
+	addq	$16,%rsi
+
+	vpxor	%xmm2,%xmm0,%xmm0
+	vmovdqa	-32(%rcx),%xmm1
+	call	GFMUL
+
+	jmp	.L128_dec_loop2
+
+.L128_dec_out:
+	vmovdqu	%xmm0,(%rdx)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes128gcmsiv_dec, .-aes128gcmsiv_dec
+.globl	aes128gcmsiv_ecb_enc_block
+.hidden aes128gcmsiv_ecb_enc_block
+.type	aes128gcmsiv_ecb_enc_block,@function
+.align	16
+aes128gcmsiv_ecb_enc_block:
+.cfi_startproc	
+	vmovdqa	(%rdi),%xmm1
+
+	vpxor	(%rdx),%xmm1,%xmm1
+	vaesenc	16(%rdx),%xmm1,%xmm1
+	vaesenc	32(%rdx),%xmm1,%xmm1
+	vaesenc	48(%rdx),%xmm1,%xmm1
+	vaesenc	64(%rdx),%xmm1,%xmm1
+	vaesenc	80(%rdx),%xmm1,%xmm1
+	vaesenc	96(%rdx),%xmm1,%xmm1
+	vaesenc	112(%rdx),%xmm1,%xmm1
+	vaesenc	128(%rdx),%xmm1,%xmm1
+	vaesenc	144(%rdx),%xmm1,%xmm1
+	vaesenclast	160(%rdx),%xmm1,%xmm1
+
+	vmovdqa	%xmm1,(%rsi)
+
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes128gcmsiv_ecb_enc_block,.-aes128gcmsiv_ecb_enc_block
+.globl	aes256gcmsiv_aes_ks_enc_x1
+.hidden aes256gcmsiv_aes_ks_enc_x1
+.type	aes256gcmsiv_aes_ks_enc_x1,@function
+.align	16
+aes256gcmsiv_aes_ks_enc_x1:
+.cfi_startproc	
+	vmovdqa	con1(%rip),%xmm0
+	vmovdqa	mask(%rip),%xmm15
+	vmovdqa	(%rdi),%xmm8
+	vmovdqa	(%rcx),%xmm1
+	vmovdqa	16(%rcx),%xmm3
+	vpxor	%xmm1,%xmm8,%xmm8
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm1,(%rdx)
+	vmovdqu	%xmm3,16(%rdx)
+	vpxor	%xmm14,%xmm14,%xmm14
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,32(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,48(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,64(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,80(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,96(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,112(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,128(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,144(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,160(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,176(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,192(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,208(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenclast	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,224(%rdx)
+
+	vmovdqa	%xmm8,(%rsi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes256gcmsiv_aes_ks_enc_x1,.-aes256gcmsiv_aes_ks_enc_x1
+.globl	aes256gcmsiv_ecb_enc_block
+.hidden aes256gcmsiv_ecb_enc_block
+.type	aes256gcmsiv_ecb_enc_block,@function
+.align	16
+aes256gcmsiv_ecb_enc_block:
+.cfi_startproc	
+	vmovdqa	(%rdi),%xmm1
+	vpxor	(%rdx),%xmm1,%xmm1
+	vaesenc	16(%rdx),%xmm1,%xmm1
+	vaesenc	32(%rdx),%xmm1,%xmm1
+	vaesenc	48(%rdx),%xmm1,%xmm1
+	vaesenc	64(%rdx),%xmm1,%xmm1
+	vaesenc	80(%rdx),%xmm1,%xmm1
+	vaesenc	96(%rdx),%xmm1,%xmm1
+	vaesenc	112(%rdx),%xmm1,%xmm1
+	vaesenc	128(%rdx),%xmm1,%xmm1
+	vaesenc	144(%rdx),%xmm1,%xmm1
+	vaesenc	160(%rdx),%xmm1,%xmm1
+	vaesenc	176(%rdx),%xmm1,%xmm1
+	vaesenc	192(%rdx),%xmm1,%xmm1
+	vaesenc	208(%rdx),%xmm1,%xmm1
+	vaesenclast	224(%rdx),%xmm1,%xmm1
+	vmovdqa	%xmm1,(%rsi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes256gcmsiv_ecb_enc_block,.-aes256gcmsiv_ecb_enc_block
+.globl	aes256gcmsiv_enc_msg_x4
+.hidden aes256gcmsiv_enc_msg_x4
+.type	aes256gcmsiv_enc_msg_x4,@function
+.align	16
+aes256gcmsiv_enc_msg_x4:
+.cfi_startproc	
+	testq	%r8,%r8
+	jnz	.L256_enc_msg_x4_start
+	.byte	0xf3,0xc3
+
+.L256_enc_msg_x4_start:
+	movq	%r8,%r10
+	shrq	$4,%r8
+	shlq	$60,%r10
+	jz	.L256_enc_msg_x4_start2
+	addq	$1,%r8
+
+.L256_enc_msg_x4_start2:
+	movq	%r8,%r10
+	shlq	$62,%r10
+	shrq	$62,%r10
+
+
+	vmovdqa	(%rdx),%xmm15
+	vpor	OR_MASK(%rip),%xmm15,%xmm15
+
+	vmovdqa	four(%rip),%xmm4
+	vmovdqa	%xmm15,%xmm0
+	vpaddd	one(%rip),%xmm15,%xmm1
+	vpaddd	two(%rip),%xmm15,%xmm2
+	vpaddd	three(%rip),%xmm15,%xmm3
+
+	shrq	$2,%r8
+	je	.L256_enc_msg_x4_check_remainder
+
+	subq	$64,%rsi
+	subq	$64,%rdi
+
+.L256_enc_msg_x4_loop1:
+	addq	$64,%rsi
+	addq	$64,%rdi
+
+	vmovdqa	%xmm0,%xmm5
+	vmovdqa	%xmm1,%xmm6
+	vmovdqa	%xmm2,%xmm7
+	vmovdqa	%xmm3,%xmm8
+
+	vpxor	(%rcx),%xmm5,%xmm5
+	vpxor	(%rcx),%xmm6,%xmm6
+	vpxor	(%rcx),%xmm7,%xmm7
+	vpxor	(%rcx),%xmm8,%xmm8
+
+	vmovdqu	16(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm0,%xmm0
+	vmovdqu	32(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm1,%xmm1
+	vmovdqu	48(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm2,%xmm2
+	vmovdqu	64(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm3,%xmm3
+
+	vmovdqu	80(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	96(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	112(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	128(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	144(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	160(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	176(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	192(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	208(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	224(%rcx),%xmm12
+	vaesenclast	%xmm12,%xmm5,%xmm5
+	vaesenclast	%xmm12,%xmm6,%xmm6
+	vaesenclast	%xmm12,%xmm7,%xmm7
+	vaesenclast	%xmm12,%xmm8,%xmm8
+
+
+
+	vpxor	0(%rdi),%xmm5,%xmm5
+	vpxor	16(%rdi),%xmm6,%xmm6
+	vpxor	32(%rdi),%xmm7,%xmm7
+	vpxor	48(%rdi),%xmm8,%xmm8
+
+	subq	$1,%r8
+
+	vmovdqu	%xmm5,0(%rsi)
+	vmovdqu	%xmm6,16(%rsi)
+	vmovdqu	%xmm7,32(%rsi)
+	vmovdqu	%xmm8,48(%rsi)
+
+	jne	.L256_enc_msg_x4_loop1
+
+	addq	$64,%rsi
+	addq	$64,%rdi
+
+.L256_enc_msg_x4_check_remainder:
+	cmpq	$0,%r10
+	je	.L256_enc_msg_x4_out
+
+.L256_enc_msg_x4_loop2:
+
+
+
+	vmovdqa	%xmm0,%xmm5
+	vpaddd	one(%rip),%xmm0,%xmm0
+	vpxor	(%rcx),%xmm5,%xmm5
+	vaesenc	16(%rcx),%xmm5,%xmm5
+	vaesenc	32(%rcx),%xmm5,%xmm5
+	vaesenc	48(%rcx),%xmm5,%xmm5
+	vaesenc	64(%rcx),%xmm5,%xmm5
+	vaesenc	80(%rcx),%xmm5,%xmm5
+	vaesenc	96(%rcx),%xmm5,%xmm5
+	vaesenc	112(%rcx),%xmm5,%xmm5
+	vaesenc	128(%rcx),%xmm5,%xmm5
+	vaesenc	144(%rcx),%xmm5,%xmm5
+	vaesenc	160(%rcx),%xmm5,%xmm5
+	vaesenc	176(%rcx),%xmm5,%xmm5
+	vaesenc	192(%rcx),%xmm5,%xmm5
+	vaesenc	208(%rcx),%xmm5,%xmm5
+	vaesenclast	224(%rcx),%xmm5,%xmm5
+
+
+	vpxor	(%rdi),%xmm5,%xmm5
+
+	vmovdqu	%xmm5,(%rsi)
+
+	addq	$16,%rdi
+	addq	$16,%rsi
+
+	subq	$1,%r10
+	jne	.L256_enc_msg_x4_loop2
+
+.L256_enc_msg_x4_out:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes256gcmsiv_enc_msg_x4,.-aes256gcmsiv_enc_msg_x4
+.globl	aes256gcmsiv_enc_msg_x8
+.hidden aes256gcmsiv_enc_msg_x8
+.type	aes256gcmsiv_enc_msg_x8,@function
+.align	16
+aes256gcmsiv_enc_msg_x8:
+.cfi_startproc	
+	testq	%r8,%r8
+	jnz	.L256_enc_msg_x8_start
+	.byte	0xf3,0xc3
+
+.L256_enc_msg_x8_start:
+
+	movq	%rsp,%r11
+	subq	$16,%r11
+	andq	$-64,%r11
+
+	movq	%r8,%r10
+	shrq	$4,%r8
+	shlq	$60,%r10
+	jz	.L256_enc_msg_x8_start2
+	addq	$1,%r8
+
+.L256_enc_msg_x8_start2:
+	movq	%r8,%r10
+	shlq	$61,%r10
+	shrq	$61,%r10
+
+
+	vmovdqa	(%rdx),%xmm1
+	vpor	OR_MASK(%rip),%xmm1,%xmm1
+
+
+	vpaddd	seven(%rip),%xmm1,%xmm0
+	vmovdqa	%xmm0,(%r11)
+	vpaddd	one(%rip),%xmm1,%xmm9
+	vpaddd	two(%rip),%xmm1,%xmm10
+	vpaddd	three(%rip),%xmm1,%xmm11
+	vpaddd	four(%rip),%xmm1,%xmm12
+	vpaddd	five(%rip),%xmm1,%xmm13
+	vpaddd	six(%rip),%xmm1,%xmm14
+	vmovdqa	%xmm1,%xmm0
+
+	shrq	$3,%r8
+	jz	.L256_enc_msg_x8_check_remainder
+
+	subq	$128,%rsi
+	subq	$128,%rdi
+
+.L256_enc_msg_x8_loop1:
+	addq	$128,%rsi
+	addq	$128,%rdi
+
+	vmovdqa	%xmm0,%xmm1
+	vmovdqa	%xmm9,%xmm2
+	vmovdqa	%xmm10,%xmm3
+	vmovdqa	%xmm11,%xmm4
+	vmovdqa	%xmm12,%xmm5
+	vmovdqa	%xmm13,%xmm6
+	vmovdqa	%xmm14,%xmm7
+
+	vmovdqa	(%r11),%xmm8
+
+	vpxor	(%rcx),%xmm1,%xmm1
+	vpxor	(%rcx),%xmm2,%xmm2
+	vpxor	(%rcx),%xmm3,%xmm3
+	vpxor	(%rcx),%xmm4,%xmm4
+	vpxor	(%rcx),%xmm5,%xmm5
+	vpxor	(%rcx),%xmm6,%xmm6
+	vpxor	(%rcx),%xmm7,%xmm7
+	vpxor	(%rcx),%xmm8,%xmm8
+
+	vmovdqu	16(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqa	(%r11),%xmm14
+	vpaddd	eight(%rip),%xmm14,%xmm14
+	vmovdqa	%xmm14,(%r11)
+	vmovdqu	32(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpsubd	one(%rip),%xmm14,%xmm14
+	vmovdqu	48(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm0,%xmm0
+	vmovdqu	64(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm9,%xmm9
+	vmovdqu	80(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm10,%xmm10
+	vmovdqu	96(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm11,%xmm11
+	vmovdqu	112(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm12,%xmm12
+	vmovdqu	128(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm13,%xmm13
+	vmovdqu	144(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	160(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	176(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	192(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	208(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	224(%rcx),%xmm15
+	vaesenclast	%xmm15,%xmm1,%xmm1
+	vaesenclast	%xmm15,%xmm2,%xmm2
+	vaesenclast	%xmm15,%xmm3,%xmm3
+	vaesenclast	%xmm15,%xmm4,%xmm4
+	vaesenclast	%xmm15,%xmm5,%xmm5
+	vaesenclast	%xmm15,%xmm6,%xmm6
+	vaesenclast	%xmm15,%xmm7,%xmm7
+	vaesenclast	%xmm15,%xmm8,%xmm8
+
+
+
+	vpxor	0(%rdi),%xmm1,%xmm1
+	vpxor	16(%rdi),%xmm2,%xmm2
+	vpxor	32(%rdi),%xmm3,%xmm3
+	vpxor	48(%rdi),%xmm4,%xmm4
+	vpxor	64(%rdi),%xmm5,%xmm5
+	vpxor	80(%rdi),%xmm6,%xmm6
+	vpxor	96(%rdi),%xmm7,%xmm7
+	vpxor	112(%rdi),%xmm8,%xmm8
+
+	subq	$1,%r8
+
+	vmovdqu	%xmm1,0(%rsi)
+	vmovdqu	%xmm2,16(%rsi)
+	vmovdqu	%xmm3,32(%rsi)
+	vmovdqu	%xmm4,48(%rsi)
+	vmovdqu	%xmm5,64(%rsi)
+	vmovdqu	%xmm6,80(%rsi)
+	vmovdqu	%xmm7,96(%rsi)
+	vmovdqu	%xmm8,112(%rsi)
+
+	jne	.L256_enc_msg_x8_loop1
+
+	addq	$128,%rsi
+	addq	$128,%rdi
+
+.L256_enc_msg_x8_check_remainder:
+	cmpq	$0,%r10
+	je	.L256_enc_msg_x8_out
+
+.L256_enc_msg_x8_loop2:
+
+
+	vmovdqa	%xmm0,%xmm1
+	vpaddd	one(%rip),%xmm0,%xmm0
+
+	vpxor	(%rcx),%xmm1,%xmm1
+	vaesenc	16(%rcx),%xmm1,%xmm1
+	vaesenc	32(%rcx),%xmm1,%xmm1
+	vaesenc	48(%rcx),%xmm1,%xmm1
+	vaesenc	64(%rcx),%xmm1,%xmm1
+	vaesenc	80(%rcx),%xmm1,%xmm1
+	vaesenc	96(%rcx),%xmm1,%xmm1
+	vaesenc	112(%rcx),%xmm1,%xmm1
+	vaesenc	128(%rcx),%xmm1,%xmm1
+	vaesenc	144(%rcx),%xmm1,%xmm1
+	vaesenc	160(%rcx),%xmm1,%xmm1
+	vaesenc	176(%rcx),%xmm1,%xmm1
+	vaesenc	192(%rcx),%xmm1,%xmm1
+	vaesenc	208(%rcx),%xmm1,%xmm1
+	vaesenclast	224(%rcx),%xmm1,%xmm1
+
+
+	vpxor	(%rdi),%xmm1,%xmm1
+
+	vmovdqu	%xmm1,(%rsi)
+
+	addq	$16,%rdi
+	addq	$16,%rsi
+	subq	$1,%r10
+	jnz	.L256_enc_msg_x8_loop2
+
+.L256_enc_msg_x8_out:
+	.byte	0xf3,0xc3
+
+.cfi_endproc	
+.size	aes256gcmsiv_enc_msg_x8,.-aes256gcmsiv_enc_msg_x8
+.globl	aes256gcmsiv_dec
+.hidden aes256gcmsiv_dec
+.type	aes256gcmsiv_dec,@function
+.align	16
+aes256gcmsiv_dec:
+.cfi_startproc	
+	testq	$~15,%r9
+	jnz	.L256_dec_start
+	.byte	0xf3,0xc3
+
+.L256_dec_start:
+	vzeroupper
+	vmovdqa	(%rdx),%xmm0
+	movq	%rdx,%rax
+
+	leaq	32(%rax),%rax
+	leaq	32(%rcx),%rcx
+
+
+	vmovdqu	(%rdi,%r9,1),%xmm15
+	vpor	OR_MASK(%rip),%xmm15,%xmm15
+	andq	$~15,%r9
+
+
+	cmpq	$96,%r9
+	jb	.L256_dec_loop2
+
+
+	subq	$96,%r9
+	vmovdqa	%xmm15,%xmm7
+	vpaddd	one(%rip),%xmm7,%xmm8
+	vpaddd	two(%rip),%xmm7,%xmm9
+	vpaddd	one(%rip),%xmm9,%xmm10
+	vpaddd	two(%rip),%xmm9,%xmm11
+	vpaddd	one(%rip),%xmm11,%xmm12
+	vpaddd	two(%rip),%xmm11,%xmm15
+
+	vpxor	(%r8),%xmm7,%xmm7
+	vpxor	(%r8),%xmm8,%xmm8
+	vpxor	(%r8),%xmm9,%xmm9
+	vpxor	(%r8),%xmm10,%xmm10
+	vpxor	(%r8),%xmm11,%xmm11
+	vpxor	(%r8),%xmm12,%xmm12
+
+	vmovdqu	16(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	32(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	48(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	64(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	80(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	96(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	112(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	128(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	144(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	160(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	176(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	192(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	208(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	224(%r8),%xmm4
+	vaesenclast	%xmm4,%xmm7,%xmm7
+	vaesenclast	%xmm4,%xmm8,%xmm8
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vaesenclast	%xmm4,%xmm10,%xmm10
+	vaesenclast	%xmm4,%xmm11,%xmm11
+	vaesenclast	%xmm4,%xmm12,%xmm12
+
+
+	vpxor	0(%rdi),%xmm7,%xmm7
+	vpxor	16(%rdi),%xmm8,%xmm8
+	vpxor	32(%rdi),%xmm9,%xmm9
+	vpxor	48(%rdi),%xmm10,%xmm10
+	vpxor	64(%rdi),%xmm11,%xmm11
+	vpxor	80(%rdi),%xmm12,%xmm12
+
+	vmovdqu	%xmm7,0(%rsi)
+	vmovdqu	%xmm8,16(%rsi)
+	vmovdqu	%xmm9,32(%rsi)
+	vmovdqu	%xmm10,48(%rsi)
+	vmovdqu	%xmm11,64(%rsi)
+	vmovdqu	%xmm12,80(%rsi)
+
+	addq	$96,%rdi
+	addq	$96,%rsi
+	jmp	.L256_dec_loop1
+
+
+.align	64
+.L256_dec_loop1:
+	cmpq	$96,%r9
+	jb	.L256_dec_finish_96
+	subq	$96,%r9
+
+	vmovdqa	%xmm12,%xmm6
+	vmovdqa	%xmm11,16-32(%rax)
+	vmovdqa	%xmm10,32-32(%rax)
+	vmovdqa	%xmm9,48-32(%rax)
+	vmovdqa	%xmm8,64-32(%rax)
+	vmovdqa	%xmm7,80-32(%rax)
+
+	vmovdqa	%xmm15,%xmm7
+	vpaddd	one(%rip),%xmm7,%xmm8
+	vpaddd	two(%rip),%xmm7,%xmm9
+	vpaddd	one(%rip),%xmm9,%xmm10
+	vpaddd	two(%rip),%xmm9,%xmm11
+	vpaddd	one(%rip),%xmm11,%xmm12
+	vpaddd	two(%rip),%xmm11,%xmm15
+
+	vmovdqa	(%r8),%xmm4
+	vpxor	%xmm4,%xmm7,%xmm7
+	vpxor	%xmm4,%xmm8,%xmm8
+	vpxor	%xmm4,%xmm9,%xmm9
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpxor	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	0-32(%rcx),%xmm4
+	vpclmulqdq	$0x11,%xmm4,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm4,%xmm6,%xmm3
+	vpclmulqdq	$0x01,%xmm4,%xmm6,%xmm1
+	vpclmulqdq	$0x10,%xmm4,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	16(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	-16(%rax),%xmm6
+	vmovdqu	-16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	32(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	0(%rax),%xmm6
+	vmovdqu	0(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	48(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	16(%rax),%xmm6
+	vmovdqu	16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	64(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	32(%rax),%xmm6
+	vmovdqu	32(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	80(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	96(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	112(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+
+	vmovdqa	80-32(%rax),%xmm6
+	vpxor	%xmm0,%xmm6,%xmm6
+	vmovdqu	80-32(%rcx),%xmm5
+
+	vpclmulqdq	$0x01,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x10,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	128(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+
+	vpsrldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm5
+	vpslldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm0
+
+	vmovdqa	poly(%rip),%xmm3
+
+	vmovdqu	144(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	160(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	176(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	192(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	208(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	224(%r8),%xmm6
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpxor	0(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm7,%xmm7
+	vpxor	16(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm8,%xmm8
+	vpxor	32(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vpxor	48(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm10,%xmm10
+	vpxor	64(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm11,%xmm11
+	vpxor	80(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm12,%xmm12
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vmovdqu	%xmm7,0(%rsi)
+	vmovdqu	%xmm8,16(%rsi)
+	vmovdqu	%xmm9,32(%rsi)
+	vmovdqu	%xmm10,48(%rsi)
+	vmovdqu	%xmm11,64(%rsi)
+	vmovdqu	%xmm12,80(%rsi)
+
+	vpxor	%xmm5,%xmm0,%xmm0
+
+	leaq	96(%rdi),%rdi
+	leaq	96(%rsi),%rsi
+	jmp	.L256_dec_loop1
+
+.L256_dec_finish_96:
+	vmovdqa	%xmm12,%xmm6
+	vmovdqa	%xmm11,16-32(%rax)
+	vmovdqa	%xmm10,32-32(%rax)
+	vmovdqa	%xmm9,48-32(%rax)
+	vmovdqa	%xmm8,64-32(%rax)
+	vmovdqa	%xmm7,80-32(%rax)
+
+	vmovdqu	0-32(%rcx),%xmm4
+	vpclmulqdq	$0x10,%xmm4,%xmm6,%xmm1
+	vpclmulqdq	$0x11,%xmm4,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm4,%xmm6,%xmm3
+	vpclmulqdq	$0x01,%xmm4,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	-16(%rax),%xmm6
+	vmovdqu	-16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	0(%rax),%xmm6
+	vmovdqu	0(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	16(%rax),%xmm6
+	vmovdqu	16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	32(%rax),%xmm6
+	vmovdqu	32(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	80-32(%rax),%xmm6
+	vpxor	%xmm0,%xmm6,%xmm6
+	vmovdqu	80-32(%rcx),%xmm5
+	vpclmulqdq	$0x11,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x10,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x01,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vpsrldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm5
+	vpslldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm0
+
+	vmovdqa	poly(%rip),%xmm3
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpxor	%xmm5,%xmm0,%xmm0
+
+.L256_dec_loop2:
+
+
+
+	cmpq	$16,%r9
+	jb	.L256_dec_out
+	subq	$16,%r9
+
+	vmovdqa	%xmm15,%xmm2
+	vpaddd	one(%rip),%xmm15,%xmm15
+
+	vpxor	0(%r8),%xmm2,%xmm2
+	vaesenc	16(%r8),%xmm2,%xmm2
+	vaesenc	32(%r8),%xmm2,%xmm2
+	vaesenc	48(%r8),%xmm2,%xmm2
+	vaesenc	64(%r8),%xmm2,%xmm2
+	vaesenc	80(%r8),%xmm2,%xmm2
+	vaesenc	96(%r8),%xmm2,%xmm2
+	vaesenc	112(%r8),%xmm2,%xmm2
+	vaesenc	128(%r8),%xmm2,%xmm2
+	vaesenc	144(%r8),%xmm2,%xmm2
+	vaesenc	160(%r8),%xmm2,%xmm2
+	vaesenc	176(%r8),%xmm2,%xmm2
+	vaesenc	192(%r8),%xmm2,%xmm2
+	vaesenc	208(%r8),%xmm2,%xmm2
+	vaesenclast	224(%r8),%xmm2,%xmm2
+	vpxor	(%rdi),%xmm2,%xmm2
+	vmovdqu	%xmm2,(%rsi)
+	addq	$16,%rdi
+	addq	$16,%rsi
+
+	vpxor	%xmm2,%xmm0,%xmm0
+	vmovdqa	-32(%rcx),%xmm1
+	call	GFMUL
+
+	jmp	.L256_dec_loop2
+
+.L256_dec_out:
+	vmovdqu	%xmm0,(%rdx)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes256gcmsiv_dec, .-aes256gcmsiv_dec
+.globl	aes256gcmsiv_kdf
+.hidden aes256gcmsiv_kdf
+.type	aes256gcmsiv_kdf,@function
+.align	16
+aes256gcmsiv_kdf:
+.cfi_startproc	
+
+
+
+
+	vmovdqa	(%rdx),%xmm1
+	vmovdqa	0(%rdi),%xmm4
+	vmovdqa	and_mask(%rip),%xmm11
+	vmovdqa	one(%rip),%xmm8
+	vpshufd	$0x90,%xmm4,%xmm4
+	vpand	%xmm11,%xmm4,%xmm4
+	vpaddd	%xmm8,%xmm4,%xmm6
+	vpaddd	%xmm8,%xmm6,%xmm7
+	vpaddd	%xmm8,%xmm7,%xmm11
+	vpaddd	%xmm8,%xmm11,%xmm12
+	vpaddd	%xmm8,%xmm12,%xmm13
+
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpxor	%xmm1,%xmm6,%xmm6
+	vpxor	%xmm1,%xmm7,%xmm7
+	vpxor	%xmm1,%xmm11,%xmm11
+	vpxor	%xmm1,%xmm12,%xmm12
+	vpxor	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	16(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	32(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	48(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	64(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	80(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	96(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	112(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	128(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	144(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	160(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	176(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	192(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	208(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	224(%rdx),%xmm2
+	vaesenclast	%xmm2,%xmm4,%xmm4
+	vaesenclast	%xmm2,%xmm6,%xmm6
+	vaesenclast	%xmm2,%xmm7,%xmm7
+	vaesenclast	%xmm2,%xmm11,%xmm11
+	vaesenclast	%xmm2,%xmm12,%xmm12
+	vaesenclast	%xmm2,%xmm13,%xmm13
+
+
+	vmovdqa	%xmm4,0(%rsi)
+	vmovdqa	%xmm6,16(%rsi)
+	vmovdqa	%xmm7,32(%rsi)
+	vmovdqa	%xmm11,48(%rsi)
+	vmovdqa	%xmm12,64(%rsi)
+	vmovdqa	%xmm13,80(%rsi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aes256gcmsiv_kdf, .-aes256gcmsiv_kdf
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S b/third_party/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
new file mode 100644
index 0000000..a6f5e07
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
@@ -0,0 +1,8974 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+
+chacha20_poly1305_constants:
+
+.align	64
+.chacha20_consts:
+.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
+.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
+.rol8:
+.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
+.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
+.rol16:
+.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
+.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
+.avx2_init:
+.long	0,0,0,0
+.sse_inc:
+.long	1,0,0,0
+.avx2_inc:
+.long	2,0,0,0,2,0,0,0
+.clamp:
+.quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
+.quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
+.align	16
+.and_masks:
+.byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+.type	poly_hash_ad_internal,@function
+.align	64
+poly_hash_ad_internal:
+.cfi_startproc	
+	xorq	%r10,%r10
+	xorq	%r11,%r11
+	xorq	%r12,%r12
+	cmpq	$13,%r8
+	jne	hash_ad_loop
+poly_fast_tls_ad:
+
+	movq	(%rcx),%r10
+	movq	5(%rcx),%r11
+	shrq	$24,%r11
+	movq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	.byte	0xf3,0xc3
+hash_ad_loop:
+
+	cmpq	$16,%r8
+	jb	hash_ad_tail
+	addq	0(%rcx),%r10
+	adcq	8+0(%rcx),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rcx),%rcx
+	subq	$16,%r8
+	jmp	hash_ad_loop
+hash_ad_tail:
+	cmpq	$0,%r8
+	je	1f
+
+	xorq	%r13,%r13
+	xorq	%r14,%r14
+	xorq	%r15,%r15
+	addq	%r8,%rcx
+hash_ad_tail_loop:
+	shldq	$8,%r13,%r14
+	shlq	$8,%r13
+	movzbq	-1(%rcx),%r15
+	xorq	%r15,%r13
+	decq	%rcx
+	decq	%r8
+	jne	hash_ad_tail_loop
+
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+1:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	poly_hash_ad_internal, .-poly_hash_ad_internal
+
+.globl	chacha20_poly1305_open
+.hidden chacha20_poly1305_open
+.type	chacha20_poly1305_open,@function
+.align	64
+chacha20_poly1305_open:
+.cfi_startproc	
+	pushq	%rbp
+.cfi_adjust_cfa_offset	8
+	pushq	%rbx
+.cfi_adjust_cfa_offset	8
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+	pushq	%r15
+.cfi_adjust_cfa_offset	8
+
+
+	pushq	%r9
+.cfi_adjust_cfa_offset	8
+	subq	$288 + 32,%rsp
+.cfi_adjust_cfa_offset	288 + 32
+.cfi_offset	rbp, -16
+.cfi_offset	rbx, -24
+.cfi_offset	r12, -32
+.cfi_offset	r13, -40
+.cfi_offset	r14, -48
+.cfi_offset	r15, -56
+	leaq	32(%rsp),%rbp
+	andq	$-32,%rbp
+	movq	%rdx,8+32(%rbp)
+	movq	%r8,0+32(%rbp)
+	movq	%rdx,%rbx
+
+	movl	OPENSSL_ia32cap_P+8(%rip),%eax
+	andl	$288,%eax
+	xorl	$288,%eax
+	jz	chacha20_poly1305_open_avx2
+
+1:
+	cmpq	$128,%rbx
+	jbe	open_sse_128
+
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqu	0(%r9),%xmm4
+	movdqu	16(%r9),%xmm8
+	movdqu	32(%r9),%xmm12
+	movdqa	%xmm12,%xmm7
+
+	movdqa	%xmm4,48(%rbp)
+	movdqa	%xmm8,64(%rbp)
+	movdqa	%xmm12,96(%rbp)
+	movq	$10,%r10
+1:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+
+	decq	%r10
+	jne	1b
+
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+
+	pand	.clamp(%rip),%xmm0
+	movdqa	%xmm0,0(%rbp)
+	movdqa	%xmm4,16(%rbp)
+
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+open_sse_main_loop:
+	cmpq	$256,%rbx
+	jb	2f
+
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm8,%xmm10
+	movdqa	%xmm0,%xmm3
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm11
+	movdqa	96(%rbp),%xmm15
+	paddd	.sse_inc(%rip),%xmm15
+	movdqa	%xmm15,%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm14,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+	movdqa	%xmm15,144(%rbp)
+
+
+
+	movq	$4,%rcx
+	movq	%rsi,%r8
+1:
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+
+	leaq	16(%r8),%r8
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+.byte	102,15,58,15,255,4
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,12
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+.byte	102,15,58,15,255,12
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,4
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+
+	decq	%rcx
+	jge	1b
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%r8),%r8
+	cmpq	$-6,%rcx
+	jg	1b
+	paddd	.chacha20_consts(%rip),%xmm3
+	paddd	48(%rbp),%xmm7
+	paddd	64(%rbp),%xmm11
+	paddd	144(%rbp),%xmm15
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqa	%xmm12,80(%rbp)
+	movdqu	0 + 0(%rsi),%xmm12
+	pxor	%xmm3,%xmm12
+	movdqu	%xmm12,0 + 0(%rdi)
+	movdqu	16 + 0(%rsi),%xmm12
+	pxor	%xmm7,%xmm12
+	movdqu	%xmm12,16 + 0(%rdi)
+	movdqu	32 + 0(%rsi),%xmm12
+	pxor	%xmm11,%xmm12
+	movdqu	%xmm12,32 + 0(%rdi)
+	movdqu	48 + 0(%rsi),%xmm12
+	pxor	%xmm15,%xmm12
+	movdqu	%xmm12,48 + 0(%rdi)
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 64(%rdi)
+	movdqu	%xmm6,16 + 64(%rdi)
+	movdqu	%xmm10,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+	movdqu	0 + 128(%rsi),%xmm3
+	movdqu	16 + 128(%rsi),%xmm7
+	movdqu	32 + 128(%rsi),%xmm11
+	movdqu	48 + 128(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 128(%rdi)
+	movdqu	%xmm5,16 + 128(%rdi)
+	movdqu	%xmm9,32 + 128(%rdi)
+	movdqu	%xmm15,48 + 128(%rdi)
+	movdqu	0 + 192(%rsi),%xmm3
+	movdqu	16 + 192(%rsi),%xmm7
+	movdqu	32 + 192(%rsi),%xmm11
+	movdqu	48 + 192(%rsi),%xmm15
+	pxor	%xmm3,%xmm0
+	pxor	%xmm7,%xmm4
+	pxor	%xmm11,%xmm8
+	pxor	80(%rbp),%xmm15
+	movdqu	%xmm0,0 + 192(%rdi)
+	movdqu	%xmm4,16 + 192(%rdi)
+	movdqu	%xmm8,32 + 192(%rdi)
+	movdqu	%xmm15,48 + 192(%rdi)
+
+	leaq	256(%rsi),%rsi
+	leaq	256(%rdi),%rdi
+	subq	$256,%rbx
+	jmp	open_sse_main_loop
+2:
+
+	testq	%rbx,%rbx
+	jz	open_sse_finalize
+	cmpq	$64,%rbx
+	ja	3f
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	96(%rbp),%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+
+	xorq	%r8,%r8
+	movq	%rbx,%rcx
+	cmpq	$16,%rcx
+	jb	2f
+1:
+	addq	0(%rsi,%r8), %r10
+	adcq	8+0(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	subq	$16,%rcx
+2:
+	addq	$16,%r8
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+
+	cmpq	$16,%rcx
+	jae	1b
+	cmpq	$160,%r8
+	jne	2b
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+
+	jmp	open_sse_tail_64_dec_loop
+3:
+	cmpq	$128,%rbx
+	ja	3f
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	96(%rbp),%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+
+	movq	%rbx,%rcx
+	andq	$-16,%rcx
+	xorq	%r8,%r8
+1:
+	addq	0(%rsi,%r8), %r10
+	adcq	8+0(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+2:
+	addq	$16,%r8
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+
+	cmpq	%rcx,%r8
+	jb	1b
+	cmpq	$160,%r8
+	jne	2b
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqu	0 + 0(%rsi),%xmm3
+	movdqu	16 + 0(%rsi),%xmm7
+	movdqu	32 + 0(%rsi),%xmm11
+	movdqu	48 + 0(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 0(%rdi)
+	movdqu	%xmm5,16 + 0(%rdi)
+	movdqu	%xmm9,32 + 0(%rdi)
+	movdqu	%xmm15,48 + 0(%rdi)
+
+	subq	$64,%rbx
+	leaq	64(%rsi),%rsi
+	leaq	64(%rdi),%rdi
+	jmp	open_sse_tail_64_dec_loop
+3:
+	cmpq	$192,%rbx
+	ja	3f
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm8,%xmm10
+	movdqa	96(%rbp),%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm14,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+
+	movq	%rbx,%rcx
+	movq	$160,%r8
+	cmpq	$160,%rcx
+	cmovgq	%r8,%rcx
+	andq	$-16,%rcx
+	xorq	%r8,%r8
+1:
+	addq	0(%rsi,%r8), %r10
+	adcq	8+0(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+2:
+	addq	$16,%r8
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+
+	cmpq	%rcx,%r8
+	jb	1b
+	cmpq	$160,%r8
+	jne	2b
+	cmpq	$176,%rbx
+	jb	1f
+	addq	160(%rsi),%r10
+	adcq	8+160(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	cmpq	$192,%rbx
+	jb	1f
+	addq	176(%rsi),%r10
+	adcq	8+176(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+1:
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqu	0 + 0(%rsi),%xmm3
+	movdqu	16 + 0(%rsi),%xmm7
+	movdqu	32 + 0(%rsi),%xmm11
+	movdqu	48 + 0(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 0(%rdi)
+	movdqu	%xmm6,16 + 0(%rdi)
+	movdqu	%xmm10,32 + 0(%rdi)
+	movdqu	%xmm15,48 + 0(%rdi)
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 64(%rdi)
+	movdqu	%xmm5,16 + 64(%rdi)
+	movdqu	%xmm9,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+
+	subq	$128,%rbx
+	leaq	128(%rsi),%rsi
+	leaq	128(%rdi),%rdi
+	jmp	open_sse_tail_64_dec_loop
+3:
+
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm8,%xmm10
+	movdqa	%xmm0,%xmm3
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm11
+	movdqa	96(%rbp),%xmm15
+	paddd	.sse_inc(%rip),%xmm15
+	movdqa	%xmm15,%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm14,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+	movdqa	%xmm15,144(%rbp)
+
+	xorq	%r8,%r8
+1:
+	addq	0(%rsi,%r8), %r10
+	adcq	8+0(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movdqa	%xmm11,80(%rbp)
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm4
+	pxor	%xmm11,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm4
+	pxor	%xmm11,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm5
+	pxor	%xmm11,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm5
+	pxor	%xmm11,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm6
+	pxor	%xmm11,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm6
+	pxor	%xmm11,%xmm6
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+	movdqa	80(%rbp),%xmm11
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movdqa	%xmm9,80(%rbp)
+	paddd	%xmm7,%xmm3
+	pxor	%xmm3,%xmm15
+	pshufb	.rol16(%rip),%xmm15
+	paddd	%xmm15,%xmm11
+	pxor	%xmm11,%xmm7
+	movdqa	%xmm7,%xmm9
+	pslld	$12,%xmm9
+	psrld	$20,%xmm7
+	pxor	%xmm9,%xmm7
+	paddd	%xmm7,%xmm3
+	pxor	%xmm3,%xmm15
+	pshufb	.rol8(%rip),%xmm15
+	paddd	%xmm15,%xmm11
+	pxor	%xmm11,%xmm7
+	movdqa	%xmm7,%xmm9
+	pslld	$7,%xmm9
+	psrld	$25,%xmm7
+	pxor	%xmm9,%xmm7
+.byte	102,15,58,15,255,4
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,12
+	movdqa	80(%rbp),%xmm9
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	movdqa	%xmm11,80(%rbp)
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm4
+	pxor	%xmm11,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm4
+	pxor	%xmm11,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm5
+	pxor	%xmm11,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm5
+	pxor	%xmm11,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm6
+	pxor	%xmm11,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm6
+	pxor	%xmm11,%xmm6
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+	movdqa	80(%rbp),%xmm11
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	movdqa	%xmm9,80(%rbp)
+	paddd	%xmm7,%xmm3
+	pxor	%xmm3,%xmm15
+	pshufb	.rol16(%rip),%xmm15
+	paddd	%xmm15,%xmm11
+	pxor	%xmm11,%xmm7
+	movdqa	%xmm7,%xmm9
+	pslld	$12,%xmm9
+	psrld	$20,%xmm7
+	pxor	%xmm9,%xmm7
+	paddd	%xmm7,%xmm3
+	pxor	%xmm3,%xmm15
+	pshufb	.rol8(%rip),%xmm15
+	paddd	%xmm15,%xmm11
+	pxor	%xmm11,%xmm7
+	movdqa	%xmm7,%xmm9
+	pslld	$7,%xmm9
+	psrld	$25,%xmm7
+	pxor	%xmm9,%xmm7
+.byte	102,15,58,15,255,12
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,4
+	movdqa	80(%rbp),%xmm9
+
+	addq	$16,%r8
+	cmpq	$160,%r8
+	jb	1b
+	movq	%rbx,%rcx
+	andq	$-16,%rcx
+1:
+	addq	0(%rsi,%r8), %r10
+	adcq	8+0(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	addq	$16,%r8
+	cmpq	%rcx,%r8
+	jb	1b
+	paddd	.chacha20_consts(%rip),%xmm3
+	paddd	48(%rbp),%xmm7
+	paddd	64(%rbp),%xmm11
+	paddd	144(%rbp),%xmm15
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqa	%xmm12,80(%rbp)
+	movdqu	0 + 0(%rsi),%xmm12
+	pxor	%xmm3,%xmm12
+	movdqu	%xmm12,0 + 0(%rdi)
+	movdqu	16 + 0(%rsi),%xmm12
+	pxor	%xmm7,%xmm12
+	movdqu	%xmm12,16 + 0(%rdi)
+	movdqu	32 + 0(%rsi),%xmm12
+	pxor	%xmm11,%xmm12
+	movdqu	%xmm12,32 + 0(%rdi)
+	movdqu	48 + 0(%rsi),%xmm12
+	pxor	%xmm15,%xmm12
+	movdqu	%xmm12,48 + 0(%rdi)
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 64(%rdi)
+	movdqu	%xmm6,16 + 64(%rdi)
+	movdqu	%xmm10,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+	movdqu	0 + 128(%rsi),%xmm3
+	movdqu	16 + 128(%rsi),%xmm7
+	movdqu	32 + 128(%rsi),%xmm11
+	movdqu	48 + 128(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 128(%rdi)
+	movdqu	%xmm5,16 + 128(%rdi)
+	movdqu	%xmm9,32 + 128(%rdi)
+	movdqu	%xmm15,48 + 128(%rdi)
+
+	movdqa	80(%rbp),%xmm12
+	subq	$192,%rbx
+	leaq	192(%rsi),%rsi
+	leaq	192(%rdi),%rdi
+
+
+open_sse_tail_64_dec_loop:
+	cmpq	$16,%rbx
+	jb	1f
+	subq	$16,%rbx
+	movdqu	(%rsi),%xmm3
+	pxor	%xmm3,%xmm0
+	movdqu	%xmm0,(%rdi)
+	leaq	16(%rsi),%rsi
+	leaq	16(%rdi),%rdi
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm8,%xmm4
+	movdqa	%xmm12,%xmm8
+	jmp	open_sse_tail_64_dec_loop
+1:
+	movdqa	%xmm0,%xmm1
+
+
+open_sse_tail_16:
+	testq	%rbx,%rbx
+	jz	open_sse_finalize
+
+
+
+	pxor	%xmm3,%xmm3
+	leaq	-1(%rsi,%rbx), %rsi
+	movq	%rbx,%r8
+2:
+	pslldq	$1,%xmm3
+	pinsrb	$0,(%rsi),%xmm3
+	subq	$1,%rsi
+	subq	$1,%r8
+	jnz	2b
+
+3:
+.byte	102,73,15,126,221
+	pextrq	$1,%xmm3,%r14
+
+	pxor	%xmm1,%xmm3
+
+
+2:
+	pextrb	$0,%xmm3,(%rdi)
+	psrldq	$1,%xmm3
+	addq	$1,%rdi
+	subq	$1,%rbx
+	jne	2b
+
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+open_sse_finalize:
+	addq	32(%rbp),%r10
+	adcq	8+32(%rbp),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+	movq	%r10,%r13
+	movq	%r11,%r14
+	movq	%r12,%r15
+	subq	$-5,%r10
+	sbbq	$-1,%r11
+	sbbq	$3,%r12
+	cmovcq	%r13,%r10
+	cmovcq	%r14,%r11
+	cmovcq	%r15,%r12
+
+	addq	0+16(%rbp),%r10
+	adcq	8+16(%rbp),%r11
+
+	addq	$288 + 32,%rsp
+.cfi_adjust_cfa_offset	-(288 + 32)
+	popq	%r9
+.cfi_adjust_cfa_offset	-8
+	movq	%r10,(%r9)
+	movq	%r11,8(%r9)
+
+	popq	%r15
+.cfi_adjust_cfa_offset	-8
+	popq	%r14
+.cfi_adjust_cfa_offset	-8
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	popq	%rbx
+.cfi_adjust_cfa_offset	-8
+	popq	%rbp
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_adjust_cfa_offset	(8 * 6) + 288 + 32
+
+open_sse_128:
+	movdqu	.chacha20_consts(%rip),%xmm0
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm0,%xmm2
+	movdqu	0(%r9),%xmm4
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm4,%xmm6
+	movdqu	16(%r9),%xmm8
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm8,%xmm10
+	movdqu	32(%r9),%xmm12
+	movdqa	%xmm12,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm11
+	movdqa	%xmm13,%xmm15
+	movq	$10,%r10
+1:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+
+	decq	%r10
+	jnz	1b
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	%xmm7,%xmm4
+	paddd	%xmm7,%xmm5
+	paddd	%xmm7,%xmm6
+	paddd	%xmm11,%xmm9
+	paddd	%xmm11,%xmm10
+	paddd	%xmm15,%xmm13
+	paddd	.sse_inc(%rip),%xmm15
+	paddd	%xmm15,%xmm14
+
+	pand	.clamp(%rip),%xmm0
+	movdqa	%xmm0,0(%rbp)
+	movdqa	%xmm4,16(%rbp)
+
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+1:
+	cmpq	$16,%rbx
+	jb	open_sse_tail_16
+	subq	$16,%rbx
+	addq	0(%rsi),%r10
+	adcq	8+0(%rsi),%r11
+	adcq	$1,%r12
+
+
+	movdqu	0(%rsi),%xmm3
+	pxor	%xmm3,%xmm1
+	movdqu	%xmm1,0(%rdi)
+	leaq	16(%rsi),%rsi
+	leaq	16(%rdi),%rdi
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+	movdqa	%xmm5,%xmm1
+	movdqa	%xmm9,%xmm5
+	movdqa	%xmm13,%xmm9
+	movdqa	%xmm2,%xmm13
+	movdqa	%xmm6,%xmm2
+	movdqa	%xmm10,%xmm6
+	movdqa	%xmm14,%xmm10
+	jmp	1b
+	jmp	open_sse_tail_16
+.size	chacha20_poly1305_open, .-chacha20_poly1305_open
+.cfi_endproc	
+
+
+
+
+.globl	chacha20_poly1305_seal
+.hidden chacha20_poly1305_seal
+.type	chacha20_poly1305_seal,@function
+.align	64
+chacha20_poly1305_seal:
+.cfi_startproc	
+	pushq	%rbp
+.cfi_adjust_cfa_offset	8
+	pushq	%rbx
+.cfi_adjust_cfa_offset	8
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+	pushq	%r15
+.cfi_adjust_cfa_offset	8
+
+
+	pushq	%r9
+.cfi_adjust_cfa_offset	8
+	subq	$288 + 32,%rsp
+.cfi_adjust_cfa_offset	288 + 32
+.cfi_offset	rbp, -16
+.cfi_offset	rbx, -24
+.cfi_offset	r12, -32
+.cfi_offset	r13, -40
+.cfi_offset	r14, -48
+.cfi_offset	r15, -56
+	leaq	32(%rsp),%rbp
+	andq	$-32,%rbp
+	movq	56(%r9),%rbx
+	addq	%rdx,%rbx
+	movq	%rbx,8+32(%rbp)
+	movq	%r8,0+32(%rbp)
+	movq	%rdx,%rbx
+
+	movl	OPENSSL_ia32cap_P+8(%rip),%eax
+	andl	$288,%eax
+	xorl	$288,%eax
+	jz	chacha20_poly1305_seal_avx2
+
+	cmpq	$128,%rbx
+	jbe	seal_sse_128
+
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqu	0(%r9),%xmm4
+	movdqu	16(%r9),%xmm8
+	movdqu	32(%r9),%xmm12
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm0,%xmm3
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm8,%xmm10
+	movdqa	%xmm8,%xmm11
+	movdqa	%xmm12,%xmm15
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,%xmm14
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,%xmm13
+	paddd	.sse_inc(%rip),%xmm12
+
+	movdqa	%xmm4,48(%rbp)
+	movdqa	%xmm8,64(%rbp)
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+	movdqa	%xmm15,144(%rbp)
+	movq	$10,%r10
+1:
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+.byte	102,15,58,15,255,4
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,12
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+.byte	102,15,58,15,255,12
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,4
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+
+	decq	%r10
+	jnz	1b
+	paddd	.chacha20_consts(%rip),%xmm3
+	paddd	48(%rbp),%xmm7
+	paddd	64(%rbp),%xmm11
+	paddd	144(%rbp),%xmm15
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+
+
+	pand	.clamp(%rip),%xmm3
+	movdqa	%xmm3,0(%rbp)
+	movdqa	%xmm7,16(%rbp)
+
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+	movdqu	0 + 0(%rsi),%xmm3
+	movdqu	16 + 0(%rsi),%xmm7
+	movdqu	32 + 0(%rsi),%xmm11
+	movdqu	48 + 0(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 0(%rdi)
+	movdqu	%xmm6,16 + 0(%rdi)
+	movdqu	%xmm10,32 + 0(%rdi)
+	movdqu	%xmm15,48 + 0(%rdi)
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 64(%rdi)
+	movdqu	%xmm5,16 + 64(%rdi)
+	movdqu	%xmm9,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+
+	cmpq	$192,%rbx
+	ja	1f
+	movq	$128,%rcx
+	subq	$128,%rbx
+	leaq	128(%rsi),%rsi
+	jmp	seal_sse_128_seal_hash
+1:
+	movdqu	0 + 128(%rsi),%xmm3
+	movdqu	16 + 128(%rsi),%xmm7
+	movdqu	32 + 128(%rsi),%xmm11
+	movdqu	48 + 128(%rsi),%xmm15
+	pxor	%xmm3,%xmm0
+	pxor	%xmm7,%xmm4
+	pxor	%xmm11,%xmm8
+	pxor	%xmm12,%xmm15
+	movdqu	%xmm0,0 + 128(%rdi)
+	movdqu	%xmm4,16 + 128(%rdi)
+	movdqu	%xmm8,32 + 128(%rdi)
+	movdqu	%xmm15,48 + 128(%rdi)
+
+	movq	$192,%rcx
+	subq	$192,%rbx
+	leaq	192(%rsi),%rsi
+	movq	$2,%rcx
+	movq	$8,%r8
+	cmpq	$64,%rbx
+	jbe	seal_sse_tail_64
+	cmpq	$128,%rbx
+	jbe	seal_sse_tail_128
+	cmpq	$192,%rbx
+	jbe	seal_sse_tail_192
+
+1:
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm8,%xmm10
+	movdqa	%xmm0,%xmm3
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm11
+	movdqa	96(%rbp),%xmm15
+	paddd	.sse_inc(%rip),%xmm15
+	movdqa	%xmm15,%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm14,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+	movdqa	%xmm15,144(%rbp)
+
+2:
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+.byte	102,15,58,15,255,4
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,12
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+.byte	102,15,58,15,255,12
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,4
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+
+	leaq	16(%rdi),%rdi
+	decq	%r8
+	jge	2b
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+	decq	%rcx
+	jg	2b
+	paddd	.chacha20_consts(%rip),%xmm3
+	paddd	48(%rbp),%xmm7
+	paddd	64(%rbp),%xmm11
+	paddd	144(%rbp),%xmm15
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+
+	movdqa	%xmm14,80(%rbp)
+	movdqa	%xmm14,80(%rbp)
+	movdqu	0 + 0(%rsi),%xmm14
+	pxor	%xmm3,%xmm14
+	movdqu	%xmm14,0 + 0(%rdi)
+	movdqu	16 + 0(%rsi),%xmm14
+	pxor	%xmm7,%xmm14
+	movdqu	%xmm14,16 + 0(%rdi)
+	movdqu	32 + 0(%rsi),%xmm14
+	pxor	%xmm11,%xmm14
+	movdqu	%xmm14,32 + 0(%rdi)
+	movdqu	48 + 0(%rsi),%xmm14
+	pxor	%xmm15,%xmm14
+	movdqu	%xmm14,48 + 0(%rdi)
+
+	movdqa	80(%rbp),%xmm14
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 64(%rdi)
+	movdqu	%xmm6,16 + 64(%rdi)
+	movdqu	%xmm10,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+	movdqu	0 + 128(%rsi),%xmm3
+	movdqu	16 + 128(%rsi),%xmm7
+	movdqu	32 + 128(%rsi),%xmm11
+	movdqu	48 + 128(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 128(%rdi)
+	movdqu	%xmm5,16 + 128(%rdi)
+	movdqu	%xmm9,32 + 128(%rdi)
+	movdqu	%xmm15,48 + 128(%rdi)
+
+	cmpq	$256,%rbx
+	ja	3f
+
+	movq	$192,%rcx
+	subq	$192,%rbx
+	leaq	192(%rsi),%rsi
+	jmp	seal_sse_128_seal_hash
+3:
+	movdqu	0 + 192(%rsi),%xmm3
+	movdqu	16 + 192(%rsi),%xmm7
+	movdqu	32 + 192(%rsi),%xmm11
+	movdqu	48 + 192(%rsi),%xmm15
+	pxor	%xmm3,%xmm0
+	pxor	%xmm7,%xmm4
+	pxor	%xmm11,%xmm8
+	pxor	%xmm12,%xmm15
+	movdqu	%xmm0,0 + 192(%rdi)
+	movdqu	%xmm4,16 + 192(%rdi)
+	movdqu	%xmm8,32 + 192(%rdi)
+	movdqu	%xmm15,48 + 192(%rdi)
+
+	leaq	256(%rsi),%rsi
+	subq	$256,%rbx
+	movq	$6,%rcx
+	movq	$4,%r8
+	cmpq	$192,%rbx
+	jg	1b
+	movq	%rbx,%rcx
+	testq	%rbx,%rbx
+	je	seal_sse_128_seal_hash
+	movq	$6,%rcx
+	cmpq	$64,%rbx
+	jg	3f
+
+seal_sse_tail_64:
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	96(%rbp),%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+
+	jmp	seal_sse_128_seal
+3:
+	cmpq	$128,%rbx
+	jg	3f
+
+seal_sse_tail_128:
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	96(%rbp),%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+
+	leaq	16(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqu	0 + 0(%rsi),%xmm3
+	movdqu	16 + 0(%rsi),%xmm7
+	movdqu	32 + 0(%rsi),%xmm11
+	movdqu	48 + 0(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 0(%rdi)
+	movdqu	%xmm5,16 + 0(%rdi)
+	movdqu	%xmm9,32 + 0(%rdi)
+	movdqu	%xmm15,48 + 0(%rdi)
+
+	movq	$64,%rcx
+	subq	$64,%rbx
+	leaq	64(%rsi),%rsi
+	jmp	seal_sse_128_seal_hash
+3:
+
+seal_sse_tail_192:
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm8,%xmm10
+	movdqa	96(%rbp),%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm14,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+
+	leaq	16(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqu	0 + 0(%rsi),%xmm3
+	movdqu	16 + 0(%rsi),%xmm7
+	movdqu	32 + 0(%rsi),%xmm11
+	movdqu	48 + 0(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 0(%rdi)
+	movdqu	%xmm6,16 + 0(%rdi)
+	movdqu	%xmm10,32 + 0(%rdi)
+	movdqu	%xmm15,48 + 0(%rdi)
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 64(%rdi)
+	movdqu	%xmm5,16 + 64(%rdi)
+	movdqu	%xmm9,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+
+	movq	$128,%rcx
+	subq	$128,%rbx
+	leaq	128(%rsi),%rsi
+
+seal_sse_128_seal_hash:
+	cmpq	$16,%rcx
+	jb	seal_sse_128_seal
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	subq	$16,%rcx
+	leaq	16(%rdi),%rdi
+	jmp	seal_sse_128_seal_hash
+
+seal_sse_128_seal:
+	cmpq	$16,%rbx
+	jb	seal_sse_tail_16
+	subq	$16,%rbx
+
+	movdqu	0(%rsi),%xmm3
+	pxor	%xmm3,%xmm0
+	movdqu	%xmm0,0(%rdi)
+
+	addq	0(%rdi),%r10
+	adcq	8(%rdi),%r11
+	adcq	$1,%r12
+	leaq	16(%rsi),%rsi
+	leaq	16(%rdi),%rdi
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm8,%xmm4
+	movdqa	%xmm12,%xmm8
+	movdqa	%xmm1,%xmm12
+	movdqa	%xmm5,%xmm1
+	movdqa	%xmm9,%xmm5
+	movdqa	%xmm13,%xmm9
+	jmp	seal_sse_128_seal
+
+seal_sse_tail_16:
+	testq	%rbx,%rbx
+	jz	process_blocks_of_extra_in
+
+	movq	%rbx,%r8
+	movq	%rbx,%rcx
+	leaq	-1(%rsi,%rbx), %rsi
+	pxor	%xmm15,%xmm15
+1:
+	pslldq	$1,%xmm15
+	pinsrb	$0,(%rsi),%xmm15
+	leaq	-1(%rsi),%rsi
+	decq	%rcx
+	jne	1b
+
+
+	pxor	%xmm0,%xmm15
+
+
+	movq	%rbx,%rcx
+	movdqu	%xmm15,%xmm0
+2:
+	pextrb	$0,%xmm0,(%rdi)
+	psrldq	$1,%xmm0
+	addq	$1,%rdi
+	subq	$1,%rcx
+	jnz	2b
+
+
+
+
+
+
+
+
+	movq	288+32(%rsp),%r9
+	movq	56(%r9),%r14
+	movq	48(%r9),%r13
+	testq	%r14,%r14
+	jz	process_partial_block
+
+	movq	$16,%r15
+	subq	%rbx,%r15
+	cmpq	%r15,%r14
+
+	jge	load_extra_in
+	movq	%r14,%r15
+
+load_extra_in:
+
+
+	leaq	-1(%r13,%r15), %rsi
+
+
+	addq	%r15,%r13
+	subq	%r15,%r14
+	movq	%r13,48(%r9)
+	movq	%r14,56(%r9)
+
+
+
+	addq	%r15,%r8
+
+
+	pxor	%xmm11,%xmm11
+3:
+	pslldq	$1,%xmm11
+	pinsrb	$0,(%rsi),%xmm11
+	leaq	-1(%rsi),%rsi
+	subq	$1,%r15
+	jnz	3b
+
+
+
+
+	movq	%rbx,%r15
+
+4:
+	pslldq	$1,%xmm11
+	subq	$1,%r15
+	jnz	4b
+
+
+
+
+	leaq	.and_masks(%rip),%r15
+	shlq	$4,%rbx
+	pand	-16(%r15,%rbx), %xmm15
+
+
+	por	%xmm11,%xmm15
+
+
+
+.byte	102,77,15,126,253
+	pextrq	$1,%xmm15,%r14
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+process_blocks_of_extra_in:
+
+	movq	288+32(%rsp),%r9
+	movq	48(%r9),%rsi
+	movq	56(%r9),%r8
+	movq	%r8,%rcx
+	shrq	$4,%r8
+
+5:
+	jz	process_extra_in_trailer
+	addq	0(%rsi),%r10
+	adcq	8+0(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rsi),%rsi
+	subq	$1,%r8
+	jmp	5b
+
+process_extra_in_trailer:
+	andq	$15,%rcx
+	movq	%rcx,%rbx
+	jz	do_length_block
+	leaq	-1(%rsi,%rcx), %rsi
+
+6:
+	pslldq	$1,%xmm15
+	pinsrb	$0,(%rsi),%xmm15
+	leaq	-1(%rsi),%rsi
+	subq	$1,%rcx
+	jnz	6b
+
+process_partial_block:
+
+	leaq	.and_masks(%rip),%r15
+	shlq	$4,%rbx
+	pand	-16(%r15,%rbx), %xmm15
+.byte	102,77,15,126,253
+	pextrq	$1,%xmm15,%r14
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+do_length_block:
+	addq	32(%rbp),%r10
+	adcq	8+32(%rbp),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+	movq	%r10,%r13
+	movq	%r11,%r14
+	movq	%r12,%r15
+	subq	$-5,%r10
+	sbbq	$-1,%r11
+	sbbq	$3,%r12
+	cmovcq	%r13,%r10
+	cmovcq	%r14,%r11
+	cmovcq	%r15,%r12
+
+	addq	0+16(%rbp),%r10
+	adcq	8+16(%rbp),%r11
+
+	addq	$288 + 32,%rsp
+.cfi_adjust_cfa_offset	-(288 + 32)
+	popq	%r9
+.cfi_adjust_cfa_offset	-8
+	movq	%r10,0(%r9)
+	movq	%r11,8(%r9)
+
+	popq	%r15
+.cfi_adjust_cfa_offset	-8
+	popq	%r14
+.cfi_adjust_cfa_offset	-8
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	popq	%rbx
+.cfi_adjust_cfa_offset	-8
+	popq	%rbp
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_adjust_cfa_offset	(8 * 6) + 288 + 32
+
+seal_sse_128:
+	movdqu	.chacha20_consts(%rip),%xmm0
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm0,%xmm2
+	movdqu	0(%r9),%xmm4
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm4,%xmm6
+	movdqu	16(%r9),%xmm8
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm8,%xmm10
+	movdqu	32(%r9),%xmm14
+	movdqa	%xmm14,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm11
+	movdqa	%xmm12,%xmm15
+	movq	$10,%r10
+1:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+
+	decq	%r10
+	jnz	1b
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	%xmm7,%xmm4
+	paddd	%xmm7,%xmm5
+	paddd	%xmm7,%xmm6
+	paddd	%xmm11,%xmm8
+	paddd	%xmm11,%xmm9
+	paddd	%xmm15,%xmm12
+	paddd	.sse_inc(%rip),%xmm15
+	paddd	%xmm15,%xmm13
+
+	pand	.clamp(%rip),%xmm2
+	movdqa	%xmm2,0(%rbp)
+	movdqa	%xmm6,16(%rbp)
+
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+	jmp	seal_sse_128_seal
+.size	chacha20_poly1305_seal, .-chacha20_poly1305_seal
+
+
+.type	chacha20_poly1305_open_avx2,@function
+.align	64
+chacha20_poly1305_open_avx2:
+	vzeroupper
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vbroadcasti128	0(%r9),%ymm4
+	vbroadcasti128	16(%r9),%ymm8
+	vbroadcasti128	32(%r9),%ymm12
+	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
+	cmpq	$192,%rbx
+	jbe	open_avx2_192
+	cmpq	$320,%rbx
+	jbe	open_avx2_320
+
+	vmovdqa	%ymm4,64(%rbp)
+	vmovdqa	%ymm8,96(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+	movq	$10,%r10
+1:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+
+	decq	%r10
+	jne	1b
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+
+	vpand	.clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0(%rbp)
+
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
+
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+	xorq	%rcx,%rcx
+
+1:
+	addq	0(%rsi,%rcx), %r10
+	adcq	8+0(%rsi,%rcx), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	addq	$16,%rcx
+	cmpq	$64,%rcx
+	jne	1b
+
+	vpxor	0(%rsi),%ymm0,%ymm0
+	vpxor	32(%rsi),%ymm4,%ymm4
+	vmovdqu	%ymm0,0(%rdi)
+	vmovdqu	%ymm4,32(%rdi)
+	leaq	64(%rsi),%rsi
+	leaq	64(%rdi),%rdi
+	subq	$64,%rbx
+1:
+
+	cmpq	$512,%rbx
+	jb	3f
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm15
+	vpaddd	%ymm15,%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm15,256(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+
+	xorq	%rcx,%rcx
+2:
+	addq	0*8(%rsi,%rcx), %r10
+	adcq	8+0*8(%rsi,%rcx), %r11
+	adcq	$1,%r12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	addq	2*8(%rsi,%rcx), %r10
+	adcq	8+2*8(%rsi,%rcx), %r11
+	adcq	$1,%r12
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	addq	4*8(%rsi,%rcx), %r10
+	adcq	8+4*8(%rsi,%rcx), %r11
+	adcq	$1,%r12
+
+	leaq	48(%rcx),%rcx
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+
+	cmpq	$60*8,%rcx
+	jne	2b
+	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	64(%rbp),%ymm7,%ymm7
+	vpaddd	96(%rbp),%ymm11,%ymm11
+	vpaddd	256(%rbp),%ymm15,%ymm15
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	vmovdqa	%ymm0,128(%rbp)
+	addq	60*8(%rsi),%r10
+	adcq	8+60*8(%rsi),%r11
+	adcq	$1,%r12
+	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
+	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
+	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
+	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
+	vpxor	0+0(%rsi),%ymm0,%ymm0
+	vpxor	32+0(%rsi),%ymm3,%ymm3
+	vpxor	64+0(%rsi),%ymm7,%ymm7
+	vpxor	96+0(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm0,0+0(%rdi)
+	vmovdqu	%ymm3,32+0(%rdi)
+	vmovdqu	%ymm7,64+0(%rdi)
+	vmovdqu	%ymm11,96+0(%rdi)
+
+	vmovdqa	128(%rbp),%ymm0
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm2,%ymm2
+	vpxor	64+128(%rsi),%ymm6,%ymm6
+	vpxor	96+128(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm2,32+128(%rdi)
+	vmovdqu	%ymm6,64+128(%rdi)
+	vmovdqu	%ymm10,96+128(%rdi)
+	addq	60*8+16(%rsi),%r10
+	adcq	8+60*8+16(%rsi),%r11
+	adcq	$1,%r12
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+256(%rsi),%ymm3,%ymm3
+	vpxor	32+256(%rsi),%ymm1,%ymm1
+	vpxor	64+256(%rsi),%ymm5,%ymm5
+	vpxor	96+256(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+256(%rdi)
+	vmovdqu	%ymm1,32+256(%rdi)
+	vmovdqu	%ymm5,64+256(%rdi)
+	vmovdqu	%ymm9,96+256(%rdi)
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
+	vpxor	0+384(%rsi),%ymm3,%ymm3
+	vpxor	32+384(%rsi),%ymm0,%ymm0
+	vpxor	64+384(%rsi),%ymm4,%ymm4
+	vpxor	96+384(%rsi),%ymm8,%ymm8
+	vmovdqu	%ymm3,0+384(%rdi)
+	vmovdqu	%ymm0,32+384(%rdi)
+	vmovdqu	%ymm4,64+384(%rdi)
+	vmovdqu	%ymm8,96+384(%rdi)
+
+	leaq	512(%rsi),%rsi
+	leaq	512(%rdi),%rdi
+	subq	$512,%rbx
+	jmp	1b
+3:
+	testq	%rbx,%rbx
+	vzeroupper
+	je	open_sse_finalize
+3:
+	cmpq	$128,%rbx
+	ja	3f
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+
+	xorq	%r8,%r8
+	movq	%rbx,%rcx
+	andq	$-16,%rcx
+	testq	%rcx,%rcx
+	je	2f
+1:
+	addq	0*8(%rsi,%r8), %r10
+	adcq	8+0*8(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+2:
+	addq	$16,%r8
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+
+	cmpq	%rcx,%r8
+	jb	1b
+	cmpq	$160,%r8
+	jne	2b
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	jmp	open_avx2_tail_loop
+3:
+	cmpq	$256,%rbx
+	ja	3f
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+
+	movq	%rbx,128(%rbp)
+	movq	%rbx,%rcx
+	subq	$128,%rcx
+	shrq	$4,%rcx
+	movq	$10,%r8
+	cmpq	$10,%rcx
+	cmovgq	%r8,%rcx
+	movq	%rsi,%rbx
+	xorq	%r8,%r8
+1:
+	addq	0(%rbx),%r10
+	adcq	8+0(%rbx),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rbx),%rbx
+2:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+
+	incq	%r8
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+
+	cmpq	%rcx,%r8
+	jb	1b
+	cmpq	$10,%r8
+	jne	2b
+	movq	%rbx,%r8
+	subq	%rsi,%rbx
+	movq	%rbx,%rcx
+	movq	128(%rbp),%rbx
+1:
+	addq	$16,%rcx
+	cmpq	%rbx,%rcx
+	jg	1f
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%r8),%r8
+	jmp	1b
+1:
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+0(%rsi),%ymm3,%ymm3
+	vpxor	32+0(%rsi),%ymm1,%ymm1
+	vpxor	64+0(%rsi),%ymm5,%ymm5
+	vpxor	96+0(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+0(%rdi)
+	vmovdqu	%ymm1,32+0(%rdi)
+	vmovdqu	%ymm5,64+0(%rdi)
+	vmovdqu	%ymm9,96+0(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	leaq	128(%rsi),%rsi
+	leaq	128(%rdi),%rdi
+	subq	$128,%rbx
+	jmp	open_avx2_tail_loop
+3:
+	cmpq	$384,%rbx
+	ja	3f
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+
+	movq	%rbx,128(%rbp)
+	movq	%rbx,%rcx
+	subq	$256,%rcx
+	shrq	$4,%rcx
+	addq	$6,%rcx
+	movq	$10,%r8
+	cmpq	$10,%rcx
+	cmovgq	%r8,%rcx
+	movq	%rsi,%rbx
+	xorq	%r8,%r8
+1:
+	addq	0(%rbx),%r10
+	adcq	8+0(%rbx),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rbx),%rbx
+2:
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	addq	0(%rbx),%r10
+	adcq	8+0(%rbx),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rbx),%rbx
+	incq	%r8
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+
+	cmpq	%rcx,%r8
+	jb	1b
+	cmpq	$10,%r8
+	jne	2b
+	movq	%rbx,%r8
+	subq	%rsi,%rbx
+	movq	%rbx,%rcx
+	movq	128(%rbp),%rbx
+1:
+	addq	$16,%rcx
+	cmpq	%rbx,%rcx
+	jg	1f
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%r8),%r8
+	jmp	1b
+1:
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+0(%rsi),%ymm3,%ymm3
+	vpxor	32+0(%rsi),%ymm2,%ymm2
+	vpxor	64+0(%rsi),%ymm6,%ymm6
+	vpxor	96+0(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+0(%rdi)
+	vmovdqu	%ymm2,32+0(%rdi)
+	vmovdqu	%ymm6,64+0(%rdi)
+	vmovdqu	%ymm10,96+0(%rdi)
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm1,%ymm1
+	vpxor	64+128(%rsi),%ymm5,%ymm5
+	vpxor	96+128(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm1,32+128(%rdi)
+	vmovdqu	%ymm5,64+128(%rdi)
+	vmovdqu	%ymm9,96+128(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	leaq	256(%rsi),%rsi
+	leaq	256(%rdi),%rdi
+	subq	$256,%rbx
+	jmp	open_avx2_tail_loop
+3:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm15
+	vpaddd	%ymm15,%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm15,256(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+
+	xorq	%rcx,%rcx
+	movq	%rsi,%r8
+1:
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%r8),%r8
+2:
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	addq	16(%r8),%r10
+	adcq	8+16(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%r8),%r8
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+
+	incq	%rcx
+	cmpq	$4,%rcx
+	jl	1b
+	cmpq	$10,%rcx
+	jne	2b
+	movq	%rbx,%rcx
+	subq	$384,%rcx
+	andq	$-16,%rcx
+1:
+	testq	%rcx,%rcx
+	je	1f
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%r8),%r8
+	subq	$16,%rcx
+	jmp	1b
+1:
+	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	64(%rbp),%ymm7,%ymm7
+	vpaddd	96(%rbp),%ymm11,%ymm11
+	vpaddd	256(%rbp),%ymm15,%ymm15
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	vmovdqa	%ymm0,128(%rbp)
+	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
+	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
+	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
+	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
+	vpxor	0+0(%rsi),%ymm0,%ymm0
+	vpxor	32+0(%rsi),%ymm3,%ymm3
+	vpxor	64+0(%rsi),%ymm7,%ymm7
+	vpxor	96+0(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm0,0+0(%rdi)
+	vmovdqu	%ymm3,32+0(%rdi)
+	vmovdqu	%ymm7,64+0(%rdi)
+	vmovdqu	%ymm11,96+0(%rdi)
+
+	vmovdqa	128(%rbp),%ymm0
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm2,%ymm2
+	vpxor	64+128(%rsi),%ymm6,%ymm6
+	vpxor	96+128(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm2,32+128(%rdi)
+	vmovdqu	%ymm6,64+128(%rdi)
+	vmovdqu	%ymm10,96+128(%rdi)
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+256(%rsi),%ymm3,%ymm3
+	vpxor	32+256(%rsi),%ymm1,%ymm1
+	vpxor	64+256(%rsi),%ymm5,%ymm5
+	vpxor	96+256(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+256(%rdi)
+	vmovdqu	%ymm1,32+256(%rdi)
+	vmovdqu	%ymm5,64+256(%rdi)
+	vmovdqu	%ymm9,96+256(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	leaq	384(%rsi),%rsi
+	leaq	384(%rdi),%rdi
+	subq	$384,%rbx
+open_avx2_tail_loop:
+	cmpq	$32,%rbx
+	jb	open_avx2_tail
+	subq	$32,%rbx
+	vpxor	(%rsi),%ymm0,%ymm0
+	vmovdqu	%ymm0,(%rdi)
+	leaq	32(%rsi),%rsi
+	leaq	32(%rdi),%rdi
+	vmovdqa	%ymm4,%ymm0
+	vmovdqa	%ymm8,%ymm4
+	vmovdqa	%ymm12,%ymm8
+	jmp	open_avx2_tail_loop
+open_avx2_tail:
+	cmpq	$16,%rbx
+	vmovdqa	%xmm0,%xmm1
+	jb	1f
+	subq	$16,%rbx
+
+	vpxor	(%rsi),%xmm0,%xmm1
+	vmovdqu	%xmm1,(%rdi)
+	leaq	16(%rsi),%rsi
+	leaq	16(%rdi),%rdi
+	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
+	vmovdqa	%xmm0,%xmm1
+1:
+	vzeroupper
+	jmp	open_sse_tail_16
+
+open_avx2_192:
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm8,%ymm10
+	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vmovdqa	%ymm12,%ymm11
+	vmovdqa	%ymm13,%ymm15
+	movq	$10,%r10
+1:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+
+	decq	%r10
+	jne	1b
+	vpaddd	%ymm2,%ymm0,%ymm0
+	vpaddd	%ymm2,%ymm1,%ymm1
+	vpaddd	%ymm6,%ymm4,%ymm4
+	vpaddd	%ymm6,%ymm5,%ymm5
+	vpaddd	%ymm10,%ymm8,%ymm8
+	vpaddd	%ymm10,%ymm9,%ymm9
+	vpaddd	%ymm11,%ymm12,%ymm12
+	vpaddd	%ymm15,%ymm13,%ymm13
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+
+	vpand	.clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0(%rbp)
+
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
+open_avx2_short:
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+open_avx2_hash_and_xor_loop:
+	cmpq	$32,%rbx
+	jb	open_avx2_short_tail_32
+	subq	$32,%rbx
+	addq	0(%rsi),%r10
+	adcq	8+0(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	addq	16(%rsi),%r10
+	adcq	8+16(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+	vpxor	(%rsi),%ymm0,%ymm0
+	vmovdqu	%ymm0,(%rdi)
+	leaq	32(%rsi),%rsi
+	leaq	32(%rdi),%rdi
+
+	vmovdqa	%ymm4,%ymm0
+	vmovdqa	%ymm8,%ymm4
+	vmovdqa	%ymm12,%ymm8
+	vmovdqa	%ymm1,%ymm12
+	vmovdqa	%ymm5,%ymm1
+	vmovdqa	%ymm9,%ymm5
+	vmovdqa	%ymm13,%ymm9
+	vmovdqa	%ymm2,%ymm13
+	vmovdqa	%ymm6,%ymm2
+	jmp	open_avx2_hash_and_xor_loop
+open_avx2_short_tail_32:
+	cmpq	$16,%rbx
+	vmovdqa	%xmm0,%xmm1
+	jb	1f
+	subq	$16,%rbx
+	addq	0(%rsi),%r10
+	adcq	8+0(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	vpxor	(%rsi),%xmm0,%xmm3
+	vmovdqu	%xmm3,(%rdi)
+	leaq	16(%rsi),%rsi
+	leaq	16(%rdi),%rdi
+	vextracti128	$1,%ymm0,%xmm1
+1:
+	vzeroupper
+	jmp	open_sse_tail_16
+
+open_avx2_320:
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm8,%ymm10
+	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	movq	$10,%r10
+1:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+
+	decq	%r10
+	jne	1b
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	%ymm7,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm5,%ymm5
+	vpaddd	%ymm7,%ymm6,%ymm6
+	vpaddd	%ymm11,%ymm8,%ymm8
+	vpaddd	%ymm11,%ymm9,%ymm9
+	vpaddd	%ymm11,%ymm10,%ymm10
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+
+	vpand	.clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0(%rbp)
+
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
+	jmp	open_avx2_short
+.size	chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
+
+
+.type	chacha20_poly1305_seal_avx2,@function
+.align	64
+chacha20_poly1305_seal_avx2:
+	vzeroupper
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vbroadcasti128	0(%r9),%ymm4
+	vbroadcasti128	16(%r9),%ymm8
+	vbroadcasti128	32(%r9),%ymm12
+	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
+	cmpq	$192,%rbx
+	jbe	seal_avx2_192
+	cmpq	$320,%rbx
+	jbe	seal_avx2_320
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm4,64(%rbp)
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	%ymm8,96(%rbp)
+	vmovdqa	%ymm12,%ymm15
+	vpaddd	.avx2_inc(%rip),%ymm15,%ymm14
+	vpaddd	.avx2_inc(%rip),%ymm14,%ymm13
+	vpaddd	.avx2_inc(%rip),%ymm13,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm15,256(%rbp)
+	movq	$10,%r10
+1:
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+
+	decq	%r10
+	jnz	1b
+	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	64(%rbp),%ymm7,%ymm7
+	vpaddd	96(%rbp),%ymm11,%ymm11
+	vpaddd	256(%rbp),%ymm15,%ymm15
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
+	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
+	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
+	vpand	.clamp(%rip),%ymm15,%ymm15
+	vmovdqa	%ymm15,0(%rbp)
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+
+	vpxor	0(%rsi),%ymm3,%ymm3
+	vpxor	32(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm3,0(%rdi)
+	vmovdqu	%ymm11,32(%rdi)
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+64(%rsi),%ymm15,%ymm15
+	vpxor	32+64(%rsi),%ymm2,%ymm2
+	vpxor	64+64(%rsi),%ymm6,%ymm6
+	vpxor	96+64(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm15,0+64(%rdi)
+	vmovdqu	%ymm2,32+64(%rdi)
+	vmovdqu	%ymm6,64+64(%rdi)
+	vmovdqu	%ymm10,96+64(%rdi)
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+192(%rsi),%ymm15,%ymm15
+	vpxor	32+192(%rsi),%ymm1,%ymm1
+	vpxor	64+192(%rsi),%ymm5,%ymm5
+	vpxor	96+192(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm15,0+192(%rdi)
+	vmovdqu	%ymm1,32+192(%rdi)
+	vmovdqu	%ymm5,64+192(%rdi)
+	vmovdqu	%ymm9,96+192(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm15,%ymm8
+
+	leaq	320(%rsi),%rsi
+	subq	$320,%rbx
+	movq	$320,%rcx
+	cmpq	$128,%rbx
+	jbe	seal_avx2_hash
+	vpxor	0(%rsi),%ymm0,%ymm0
+	vpxor	32(%rsi),%ymm4,%ymm4
+	vpxor	64(%rsi),%ymm8,%ymm8
+	vpxor	96(%rsi),%ymm12,%ymm12
+	vmovdqu	%ymm0,320(%rdi)
+	vmovdqu	%ymm4,352(%rdi)
+	vmovdqu	%ymm8,384(%rdi)
+	vmovdqu	%ymm12,416(%rdi)
+	leaq	128(%rsi),%rsi
+	subq	$128,%rbx
+	movq	$8,%rcx
+	movq	$2,%r8
+	cmpq	$128,%rbx
+	jbe	seal_avx2_tail_128
+	cmpq	$256,%rbx
+	jbe	seal_avx2_tail_256
+	cmpq	$384,%rbx
+	jbe	seal_avx2_tail_384
+	cmpq	$512,%rbx
+	jbe	seal_avx2_tail_512
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm15
+	vpaddd	%ymm15,%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm15,256(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+
+	subq	$16,%rdi
+	movq	$9,%rcx
+	jmp	4f
+1:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm15
+	vpaddd	%ymm15,%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm15,256(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+
+	movq	$10,%rcx
+2:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+4:
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	addq	32(%rdi),%r10
+	adcq	8+32(%rdi),%r11
+	adcq	$1,%r12
+
+	leaq	48(%rdi),%rdi
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+
+	decq	%rcx
+	jne	2b
+	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	64(%rbp),%ymm7,%ymm7
+	vpaddd	96(%rbp),%ymm11,%ymm11
+	vpaddd	256(%rbp),%ymm15,%ymm15
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	leaq	32(%rdi),%rdi
+	vmovdqa	%ymm0,128(%rbp)
+	addq	-32(%rdi),%r10
+	adcq	8+-32(%rdi),%r11
+	adcq	$1,%r12
+	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
+	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
+	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
+	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
+	vpxor	0+0(%rsi),%ymm0,%ymm0
+	vpxor	32+0(%rsi),%ymm3,%ymm3
+	vpxor	64+0(%rsi),%ymm7,%ymm7
+	vpxor	96+0(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm0,0+0(%rdi)
+	vmovdqu	%ymm3,32+0(%rdi)
+	vmovdqu	%ymm7,64+0(%rdi)
+	vmovdqu	%ymm11,96+0(%rdi)
+
+	vmovdqa	128(%rbp),%ymm0
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm2,%ymm2
+	vpxor	64+128(%rsi),%ymm6,%ymm6
+	vpxor	96+128(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm2,32+128(%rdi)
+	vmovdqu	%ymm6,64+128(%rdi)
+	vmovdqu	%ymm10,96+128(%rdi)
+	addq	-16(%rdi),%r10
+	adcq	8+-16(%rdi),%r11
+	adcq	$1,%r12
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+256(%rsi),%ymm3,%ymm3
+	vpxor	32+256(%rsi),%ymm1,%ymm1
+	vpxor	64+256(%rsi),%ymm5,%ymm5
+	vpxor	96+256(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+256(%rdi)
+	vmovdqu	%ymm1,32+256(%rdi)
+	vmovdqu	%ymm5,64+256(%rdi)
+	vmovdqu	%ymm9,96+256(%rdi)
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
+	vpxor	0+384(%rsi),%ymm3,%ymm3
+	vpxor	32+384(%rsi),%ymm0,%ymm0
+	vpxor	64+384(%rsi),%ymm4,%ymm4
+	vpxor	96+384(%rsi),%ymm8,%ymm8
+	vmovdqu	%ymm3,0+384(%rdi)
+	vmovdqu	%ymm0,32+384(%rdi)
+	vmovdqu	%ymm4,64+384(%rdi)
+	vmovdqu	%ymm8,96+384(%rdi)
+
+	leaq	512(%rsi),%rsi
+	subq	$512,%rbx
+	cmpq	$512,%rbx
+	jg	1b
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%rdi),%rdi
+	movq	$10,%rcx
+	xorq	%r8,%r8
+	cmpq	$128,%rbx
+	ja	3f
+
+seal_avx2_tail_128:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	jmp	seal_avx2_short_loop
+3:
+	cmpq	$256,%rbx
+	ja	3f
+
+seal_avx2_tail_256:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+0(%rsi),%ymm3,%ymm3
+	vpxor	32+0(%rsi),%ymm1,%ymm1
+	vpxor	64+0(%rsi),%ymm5,%ymm5
+	vpxor	96+0(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+0(%rdi)
+	vmovdqu	%ymm1,32+0(%rdi)
+	vmovdqu	%ymm5,64+0(%rdi)
+	vmovdqu	%ymm9,96+0(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	movq	$128,%rcx
+	leaq	128(%rsi),%rsi
+	subq	$128,%rbx
+	jmp	seal_avx2_hash
+3:
+	cmpq	$384,%rbx
+	ja	seal_avx2_tail_512
+
+seal_avx2_tail_384:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+
+	leaq	32(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+0(%rsi),%ymm3,%ymm3
+	vpxor	32+0(%rsi),%ymm2,%ymm2
+	vpxor	64+0(%rsi),%ymm6,%ymm6
+	vpxor	96+0(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+0(%rdi)
+	vmovdqu	%ymm2,32+0(%rdi)
+	vmovdqu	%ymm6,64+0(%rdi)
+	vmovdqu	%ymm10,96+0(%rdi)
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm1,%ymm1
+	vpxor	64+128(%rsi),%ymm5,%ymm5
+	vpxor	96+128(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm1,32+128(%rdi)
+	vmovdqu	%ymm5,64+128(%rdi)
+	vmovdqu	%ymm9,96+128(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	movq	$256,%rcx
+	leaq	256(%rsi),%rsi
+	subq	$256,%rbx
+	jmp	seal_avx2_hash
+
+seal_avx2_tail_512:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm15
+	vpaddd	%ymm15,%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm15,256(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+
+
+
+
+
+
+
+
+
+
+
+
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	64(%rbp),%ymm7,%ymm7
+	vpaddd	96(%rbp),%ymm11,%ymm11
+	vpaddd	256(%rbp),%ymm15,%ymm15
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	vmovdqa	%ymm0,128(%rbp)
+	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
+	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
+	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
+	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
+	vpxor	0+0(%rsi),%ymm0,%ymm0
+	vpxor	32+0(%rsi),%ymm3,%ymm3
+	vpxor	64+0(%rsi),%ymm7,%ymm7
+	vpxor	96+0(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm0,0+0(%rdi)
+	vmovdqu	%ymm3,32+0(%rdi)
+	vmovdqu	%ymm7,64+0(%rdi)
+	vmovdqu	%ymm11,96+0(%rdi)
+
+	vmovdqa	128(%rbp),%ymm0
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm2,%ymm2
+	vpxor	64+128(%rsi),%ymm6,%ymm6
+	vpxor	96+128(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm2,32+128(%rdi)
+	vmovdqu	%ymm6,64+128(%rdi)
+	vmovdqu	%ymm10,96+128(%rdi)
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+256(%rsi),%ymm3,%ymm3
+	vpxor	32+256(%rsi),%ymm1,%ymm1
+	vpxor	64+256(%rsi),%ymm5,%ymm5
+	vpxor	96+256(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+256(%rdi)
+	vmovdqu	%ymm1,32+256(%rdi)
+	vmovdqu	%ymm5,64+256(%rdi)
+	vmovdqu	%ymm9,96+256(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	movq	$384,%rcx
+	leaq	384(%rsi),%rsi
+	subq	$384,%rbx
+	jmp	seal_avx2_hash
+
+seal_avx2_320:
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm8,%ymm10
+	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	movq	$10,%r10
+1:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+
+	decq	%r10
+	jne	1b
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	%ymm7,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm5,%ymm5
+	vpaddd	%ymm7,%ymm6,%ymm6
+	vpaddd	%ymm11,%ymm8,%ymm8
+	vpaddd	%ymm11,%ymm9,%ymm9
+	vpaddd	%ymm11,%ymm10,%ymm10
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+
+	vpand	.clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0(%rbp)
+
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
+	jmp	seal_avx2_short
+
+seal_avx2_192:
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm8,%ymm10
+	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vmovdqa	%ymm12,%ymm11
+	vmovdqa	%ymm13,%ymm15
+	movq	$10,%r10
+1:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+
+	decq	%r10
+	jne	1b
+	vpaddd	%ymm2,%ymm0,%ymm0
+	vpaddd	%ymm2,%ymm1,%ymm1
+	vpaddd	%ymm6,%ymm4,%ymm4
+	vpaddd	%ymm6,%ymm5,%ymm5
+	vpaddd	%ymm10,%ymm8,%ymm8
+	vpaddd	%ymm10,%ymm9,%ymm9
+	vpaddd	%ymm11,%ymm12,%ymm12
+	vpaddd	%ymm15,%ymm13,%ymm13
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+
+	vpand	.clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0(%rbp)
+
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
+seal_avx2_short:
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+	xorq	%rcx,%rcx
+seal_avx2_hash:
+	cmpq	$16,%rcx
+	jb	seal_avx2_short_loop
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	subq	$16,%rcx
+	addq	$16,%rdi
+	jmp	seal_avx2_hash
+seal_avx2_short_loop:
+	cmpq	$32,%rbx
+	jb	seal_avx2_short_tail
+	subq	$32,%rbx
+
+	vpxor	(%rsi),%ymm0,%ymm0
+	vmovdqu	%ymm0,(%rdi)
+	leaq	32(%rsi),%rsi
+
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%rdi),%rdi
+
+	vmovdqa	%ymm4,%ymm0
+	vmovdqa	%ymm8,%ymm4
+	vmovdqa	%ymm12,%ymm8
+	vmovdqa	%ymm1,%ymm12
+	vmovdqa	%ymm5,%ymm1
+	vmovdqa	%ymm9,%ymm5
+	vmovdqa	%ymm13,%ymm9
+	vmovdqa	%ymm2,%ymm13
+	vmovdqa	%ymm6,%ymm2
+	jmp	seal_avx2_short_loop
+seal_avx2_short_tail:
+	cmpq	$16,%rbx
+	jb	1f
+	subq	$16,%rbx
+	vpxor	(%rsi),%xmm0,%xmm3
+	vmovdqu	%xmm3,(%rdi)
+	leaq	16(%rsi),%rsi
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+	vextracti128	$1,%ymm0,%xmm0
+1:
+	vzeroupper
+	jmp	seal_sse_tail_16
+.cfi_endproc	
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/aes-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/aes-x86_64.S
new file mode 100644
index 0000000..ff87f98
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/aes-x86_64.S
@@ -0,0 +1,2536 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+.type	_x86_64_AES_encrypt,@function
+.align	16
+_x86_64_AES_encrypt:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+
+	movl	240(%r15),%r13d
+	subl	$1,%r13d
+	jmp	.Lenc_loop
+.align	16
+.Lenc_loop:
+
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movl	0(%r14,%rsi,8),%r10d
+	movl	0(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r12d
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	movzbl	%dl,%ebp
+	xorl	3(%r14,%rsi,8),%r10d
+	xorl	3(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r8d
+
+	movzbl	%dh,%esi
+	shrl	$16,%ecx
+	movzbl	%ah,%ebp
+	xorl	3(%r14,%rsi,8),%r12d
+	shrl	$16,%edx
+	xorl	3(%r14,%rbp,8),%r8d
+
+	shrl	$16,%ebx
+	leaq	16(%r15),%r15
+	shrl	$16,%eax
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	xorl	2(%r14,%rsi,8),%r10d
+	xorl	2(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r12d
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	movzbl	%bl,%ebp
+	xorl	1(%r14,%rsi,8),%r10d
+	xorl	1(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r8d
+
+	movl	12(%r15),%edx
+	movzbl	%bh,%edi
+	movzbl	%ch,%ebp
+	movl	0(%r15),%eax
+	xorl	1(%r14,%rdi,8),%r12d
+	xorl	1(%r14,%rbp,8),%r8d
+
+	movl	4(%r15),%ebx
+	movl	8(%r15),%ecx
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+	subl	$1,%r13d
+	jnz	.Lenc_loop
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movzbl	2(%r14,%rsi,8),%r10d
+	movzbl	2(%r14,%rdi,8),%r11d
+	movzbl	2(%r14,%rbp,8),%r12d
+
+	movzbl	%dl,%esi
+	movzbl	%bh,%edi
+	movzbl	%ch,%ebp
+	movzbl	2(%r14,%rsi,8),%r8d
+	movl	0(%r14,%rdi,8),%edi
+	movl	0(%r14,%rbp,8),%ebp
+
+	andl	$0x0000ff00,%edi
+	andl	$0x0000ff00,%ebp
+
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+	shrl	$16,%ecx
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	shrl	$16,%edx
+	movl	0(%r14,%rsi,8),%esi
+	movl	0(%r14,%rdi,8),%edi
+
+	andl	$0x0000ff00,%esi
+	andl	$0x0000ff00,%edi
+	shrl	$16,%ebx
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+	shrl	$16,%eax
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	movl	0(%r14,%rsi,8),%esi
+	movl	0(%r14,%rdi,8),%edi
+	movl	0(%r14,%rbp,8),%ebp
+
+	andl	$0x00ff0000,%esi
+	andl	$0x00ff0000,%edi
+	andl	$0x00ff0000,%ebp
+
+	xorl	%esi,%r10d
+	xorl	%edi,%r11d
+	xorl	%ebp,%r12d
+
+	movzbl	%bl,%esi
+	movzbl	%dh,%edi
+	movzbl	%ah,%ebp
+	movl	0(%r14,%rsi,8),%esi
+	movl	2(%r14,%rdi,8),%edi
+	movl	2(%r14,%rbp,8),%ebp
+
+	andl	$0x00ff0000,%esi
+	andl	$0xff000000,%edi
+	andl	$0xff000000,%ebp
+
+	xorl	%esi,%r8d
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	movl	16+12(%r15),%edx
+	movl	2(%r14,%rsi,8),%esi
+	movl	2(%r14,%rdi,8),%edi
+	movl	16+0(%r15),%eax
+
+	andl	$0xff000000,%esi
+	andl	$0xff000000,%edi
+
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+
+	movl	16+4(%r15),%ebx
+	movl	16+8(%r15),%ecx
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+.byte	0xf3,0xc3
+.size	_x86_64_AES_encrypt,.-_x86_64_AES_encrypt
+.type	_x86_64_AES_encrypt_compact,@function
+.align	16
+_x86_64_AES_encrypt_compact:
+	leaq	128(%r14),%r8
+	movl	0-128(%r8),%edi
+	movl	32-128(%r8),%ebp
+	movl	64-128(%r8),%r10d
+	movl	96-128(%r8),%r11d
+	movl	128-128(%r8),%edi
+	movl	160-128(%r8),%ebp
+	movl	192-128(%r8),%r10d
+	movl	224-128(%r8),%r11d
+	jmp	.Lenc_loop_compact
+.align	16
+.Lenc_loop_compact:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+	leaq	16(%r15),%r15
+	movzbl	%al,%r10d
+	movzbl	%bl,%r11d
+	movzbl	%cl,%r12d
+	movzbl	%dl,%r8d
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	shrl	$16,%ecx
+	movzbl	%dh,%ebp
+	movzbl	(%r14,%r10,1),%r10d
+	movzbl	(%r14,%r11,1),%r11d
+	movzbl	(%r14,%r12,1),%r12d
+	movzbl	(%r14,%r8,1),%r8d
+
+	movzbl	(%r14,%rsi,1),%r9d
+	movzbl	%ah,%esi
+	movzbl	(%r14,%rdi,1),%r13d
+	movzbl	%cl,%edi
+	movzbl	(%r14,%rbp,1),%ebp
+	movzbl	(%r14,%rsi,1),%esi
+
+	shll	$8,%r9d
+	shrl	$16,%edx
+	shll	$8,%r13d
+	xorl	%r9d,%r10d
+	shrl	$16,%eax
+	movzbl	%dl,%r9d
+	shrl	$16,%ebx
+	xorl	%r13d,%r11d
+	shll	$8,%ebp
+	movzbl	%al,%r13d
+	movzbl	(%r14,%rdi,1),%edi
+	xorl	%ebp,%r12d
+
+	shll	$8,%esi
+	movzbl	%bl,%ebp
+	shll	$16,%edi
+	xorl	%esi,%r8d
+	movzbl	(%r14,%r9,1),%r9d
+	movzbl	%dh,%esi
+	movzbl	(%r14,%r13,1),%r13d
+	xorl	%edi,%r10d
+
+	shrl	$8,%ecx
+	movzbl	%ah,%edi
+	shll	$16,%r9d
+	shrl	$8,%ebx
+	shll	$16,%r13d
+	xorl	%r9d,%r11d
+	movzbl	(%r14,%rbp,1),%ebp
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rcx,1),%edx
+	movzbl	(%r14,%rbx,1),%ecx
+
+	shll	$16,%ebp
+	xorl	%r13d,%r12d
+	shll	$24,%esi
+	xorl	%ebp,%r8d
+	shll	$24,%edi
+	xorl	%esi,%r10d
+	shll	$24,%edx
+	xorl	%edi,%r11d
+	shll	$24,%ecx
+	movl	%r10d,%eax
+	movl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+	cmpq	16(%rsp),%r15
+	je	.Lenc_compact_done
+	movl	$0x80808080,%r10d
+	movl	$0x80808080,%r11d
+	andl	%eax,%r10d
+	andl	%ebx,%r11d
+	movl	%r10d,%esi
+	movl	%r11d,%edi
+	shrl	$7,%r10d
+	leal	(%rax,%rax,1),%r8d
+	shrl	$7,%r11d
+	leal	(%rbx,%rbx,1),%r9d
+	subl	%r10d,%esi
+	subl	%r11d,%edi
+	andl	$0xfefefefe,%r8d
+	andl	$0xfefefefe,%r9d
+	andl	$0x1b1b1b1b,%esi
+	andl	$0x1b1b1b1b,%edi
+	movl	%eax,%r10d
+	movl	%ebx,%r11d
+	xorl	%esi,%r8d
+	xorl	%edi,%r9d
+
+	xorl	%r8d,%eax
+	xorl	%r9d,%ebx
+	movl	$0x80808080,%r12d
+	roll	$24,%eax
+	movl	$0x80808080,%ebp
+	roll	$24,%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%ebp
+	xorl	%r8d,%eax
+	xorl	%r9d,%ebx
+	movl	%r12d,%esi
+	rorl	$16,%r10d
+	movl	%ebp,%edi
+	rorl	$16,%r11d
+	leal	(%rcx,%rcx,1),%r8d
+	shrl	$7,%r12d
+	xorl	%r10d,%eax
+	shrl	$7,%ebp
+	xorl	%r11d,%ebx
+	rorl	$8,%r10d
+	leal	(%rdx,%rdx,1),%r9d
+	rorl	$8,%r11d
+	subl	%r12d,%esi
+	subl	%ebp,%edi
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+
+	andl	$0xfefefefe,%r8d
+	andl	$0xfefefefe,%r9d
+	andl	$0x1b1b1b1b,%esi
+	andl	$0x1b1b1b1b,%edi
+	movl	%ecx,%r12d
+	movl	%edx,%ebp
+	xorl	%esi,%r8d
+	xorl	%edi,%r9d
+
+	rorl	$16,%r12d
+	xorl	%r8d,%ecx
+	rorl	$16,%ebp
+	xorl	%r9d,%edx
+	roll	$24,%ecx
+	movl	0(%r14),%esi
+	roll	$24,%edx
+	xorl	%r8d,%ecx
+	movl	64(%r14),%edi
+	xorl	%r9d,%edx
+	movl	128(%r14),%r8d
+	xorl	%r12d,%ecx
+	rorl	$8,%r12d
+	xorl	%ebp,%edx
+	rorl	$8,%ebp
+	xorl	%r12d,%ecx
+	movl	192(%r14),%r9d
+	xorl	%ebp,%edx
+	jmp	.Lenc_loop_compact
+.align	16
+.Lenc_compact_done:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+.byte	0xf3,0xc3
+.size	_x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
+.align	16
+.globl	asm_AES_encrypt
+.hidden asm_AES_encrypt
+.type	asm_AES_encrypt,@function
+.hidden	asm_AES_encrypt
+asm_AES_encrypt:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+
+
+	leaq	-63(%rdx),%rcx
+	andq	$-64,%rsp
+	subq	%rsp,%rcx
+	negq	%rcx
+	andq	$0x3c0,%rcx
+	subq	%rcx,%rsp
+	subq	$32,%rsp
+
+	movq	%rsi,16(%rsp)
+	movq	%rax,24(%rsp)
+.Lenc_prologue:
+
+	movq	%rdx,%r15
+	movl	240(%r15),%r13d
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+
+	shll	$4,%r13d
+	leaq	(%r15,%r13,1),%rbp
+	movq	%r15,(%rsp)
+	movq	%rbp,8(%rsp)
+
+
+	leaq	.LAES_Te+2048(%rip),%r14
+	leaq	768(%rsp),%rbp
+	subq	%r14,%rbp
+	andq	$0x300,%rbp
+	leaq	(%r14,%rbp,1),%r14
+
+	call	_x86_64_AES_encrypt_compact
+
+	movq	16(%rsp),%r9
+	movq	24(%rsp),%rsi
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Lenc_epilogue:
+	.byte	0xf3,0xc3
+.size	asm_AES_encrypt,.-asm_AES_encrypt
+.type	_x86_64_AES_decrypt,@function
+.align	16
+_x86_64_AES_decrypt:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+
+	movl	240(%r15),%r13d
+	subl	$1,%r13d
+	jmp	.Ldec_loop
+.align	16
+.Ldec_loop:
+
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movl	0(%r14,%rsi,8),%r10d
+	movl	0(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r12d
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	movzbl	%dl,%ebp
+	xorl	3(%r14,%rsi,8),%r10d
+	xorl	3(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r8d
+
+	movzbl	%bh,%esi
+	shrl	$16,%eax
+	movzbl	%ch,%ebp
+	xorl	3(%r14,%rsi,8),%r12d
+	shrl	$16,%edx
+	xorl	3(%r14,%rbp,8),%r8d
+
+	shrl	$16,%ebx
+	leaq	16(%r15),%r15
+	shrl	$16,%ecx
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	xorl	2(%r14,%rsi,8),%r10d
+	xorl	2(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r12d
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	movzbl	%bl,%ebp
+	xorl	1(%r14,%rsi,8),%r10d
+	xorl	1(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r8d
+
+	movzbl	%dh,%esi
+	movl	12(%r15),%edx
+	movzbl	%ah,%ebp
+	xorl	1(%r14,%rsi,8),%r12d
+	movl	0(%r15),%eax
+	xorl	1(%r14,%rbp,8),%r8d
+
+	xorl	%r10d,%eax
+	movl	4(%r15),%ebx
+	movl	8(%r15),%ecx
+	xorl	%r12d,%ecx
+	xorl	%r11d,%ebx
+	xorl	%r8d,%edx
+	subl	$1,%r13d
+	jnz	.Ldec_loop
+	leaq	2048(%r14),%r14
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movzbl	(%r14,%rsi,1),%r10d
+	movzbl	(%r14,%rdi,1),%r11d
+	movzbl	(%r14,%rbp,1),%r12d
+
+	movzbl	%dl,%esi
+	movzbl	%dh,%edi
+	movzbl	%ah,%ebp
+	movzbl	(%r14,%rsi,1),%r8d
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rbp,1),%ebp
+
+	shll	$8,%edi
+	shll	$8,%ebp
+
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+	shrl	$16,%edx
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	shrl	$16,%eax
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+
+	shll	$8,%esi
+	shll	$8,%edi
+	shrl	$16,%ebx
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+	shrl	$16,%ecx
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rbp,1),%ebp
+
+	shll	$16,%esi
+	shll	$16,%edi
+	shll	$16,%ebp
+
+	xorl	%esi,%r10d
+	xorl	%edi,%r11d
+	xorl	%ebp,%r12d
+
+	movzbl	%bl,%esi
+	movzbl	%bh,%edi
+	movzbl	%ch,%ebp
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rbp,1),%ebp
+
+	shll	$16,%esi
+	shll	$24,%edi
+	shll	$24,%ebp
+
+	xorl	%esi,%r8d
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	movl	16+12(%r15),%edx
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movl	16+0(%r15),%eax
+
+	shll	$24,%esi
+	shll	$24,%edi
+
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+
+	movl	16+4(%r15),%ebx
+	movl	16+8(%r15),%ecx
+	leaq	-2048(%r14),%r14
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+.byte	0xf3,0xc3
+.size	_x86_64_AES_decrypt,.-_x86_64_AES_decrypt
+.type	_x86_64_AES_decrypt_compact,@function
+.align	16
+_x86_64_AES_decrypt_compact:
+	leaq	128(%r14),%r8
+	movl	0-128(%r8),%edi
+	movl	32-128(%r8),%ebp
+	movl	64-128(%r8),%r10d
+	movl	96-128(%r8),%r11d
+	movl	128-128(%r8),%edi
+	movl	160-128(%r8),%ebp
+	movl	192-128(%r8),%r10d
+	movl	224-128(%r8),%r11d
+	jmp	.Ldec_loop_compact
+
+.align	16
+.Ldec_loop_compact:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+	leaq	16(%r15),%r15
+	movzbl	%al,%r10d
+	movzbl	%bl,%r11d
+	movzbl	%cl,%r12d
+	movzbl	%dl,%r8d
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	shrl	$16,%edx
+	movzbl	%bh,%ebp
+	movzbl	(%r14,%r10,1),%r10d
+	movzbl	(%r14,%r11,1),%r11d
+	movzbl	(%r14,%r12,1),%r12d
+	movzbl	(%r14,%r8,1),%r8d
+
+	movzbl	(%r14,%rsi,1),%r9d
+	movzbl	%ch,%esi
+	movzbl	(%r14,%rdi,1),%r13d
+	movzbl	(%r14,%rbp,1),%ebp
+	movzbl	(%r14,%rsi,1),%esi
+
+	shrl	$16,%ecx
+	shll	$8,%r13d
+	shll	$8,%r9d
+	movzbl	%cl,%edi
+	shrl	$16,%eax
+	xorl	%r9d,%r10d
+	shrl	$16,%ebx
+	movzbl	%dl,%r9d
+
+	shll	$8,%ebp
+	xorl	%r13d,%r11d
+	shll	$8,%esi
+	movzbl	%al,%r13d
+	movzbl	(%r14,%rdi,1),%edi
+	xorl	%ebp,%r12d
+	movzbl	%bl,%ebp
+
+	shll	$16,%edi
+	xorl	%esi,%r8d
+	movzbl	(%r14,%r9,1),%r9d
+	movzbl	%bh,%esi
+	movzbl	(%r14,%rbp,1),%ebp
+	xorl	%edi,%r10d
+	movzbl	(%r14,%r13,1),%r13d
+	movzbl	%ch,%edi
+
+	shll	$16,%ebp
+	shll	$16,%r9d
+	shll	$16,%r13d
+	xorl	%ebp,%r8d
+	movzbl	%dh,%ebp
+	xorl	%r9d,%r11d
+	shrl	$8,%eax
+	xorl	%r13d,%r12d
+
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%ebx
+	movzbl	(%r14,%rbp,1),%ecx
+	movzbl	(%r14,%rax,1),%edx
+
+	movl	%r10d,%eax
+	shll	$24,%esi
+	shll	$24,%ebx
+	shll	$24,%ecx
+	xorl	%esi,%eax
+	shll	$24,%edx
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+	cmpq	16(%rsp),%r15
+	je	.Ldec_compact_done
+
+	movq	256+0(%r14),%rsi
+	shlq	$32,%rbx
+	shlq	$32,%rdx
+	movq	256+8(%r14),%rdi
+	orq	%rbx,%rax
+	orq	%rdx,%rcx
+	movq	256+16(%r14),%rbp
+	movq	%rsi,%r9
+	movq	%rsi,%r12
+	andq	%rax,%r9
+	andq	%rcx,%r12
+	movq	%r9,%rbx
+	movq	%r12,%rdx
+	shrq	$7,%r9
+	leaq	(%rax,%rax,1),%r8
+	shrq	$7,%r12
+	leaq	(%rcx,%rcx,1),%r11
+	subq	%r9,%rbx
+	subq	%r12,%rdx
+	andq	%rdi,%r8
+	andq	%rdi,%r11
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r8
+	xorq	%rdx,%r11
+	movq	%rsi,%r10
+	movq	%rsi,%r13
+
+	andq	%r8,%r10
+	andq	%r11,%r13
+	movq	%r10,%rbx
+	movq	%r13,%rdx
+	shrq	$7,%r10
+	leaq	(%r8,%r8,1),%r9
+	shrq	$7,%r13
+	leaq	(%r11,%r11,1),%r12
+	subq	%r10,%rbx
+	subq	%r13,%rdx
+	andq	%rdi,%r9
+	andq	%rdi,%r12
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r9
+	xorq	%rdx,%r12
+	movq	%rsi,%r10
+	movq	%rsi,%r13
+
+	andq	%r9,%r10
+	andq	%r12,%r13
+	movq	%r10,%rbx
+	movq	%r13,%rdx
+	shrq	$7,%r10
+	xorq	%rax,%r8
+	shrq	$7,%r13
+	xorq	%rcx,%r11
+	subq	%r10,%rbx
+	subq	%r13,%rdx
+	leaq	(%r9,%r9,1),%r10
+	leaq	(%r12,%r12,1),%r13
+	xorq	%rax,%r9
+	xorq	%rcx,%r12
+	andq	%rdi,%r10
+	andq	%rdi,%r13
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r10
+	xorq	%rdx,%r13
+
+	xorq	%r10,%rax
+	xorq	%r13,%rcx
+	xorq	%r10,%r8
+	xorq	%r13,%r11
+	movq	%rax,%rbx
+	movq	%rcx,%rdx
+	xorq	%r10,%r9
+	shrq	$32,%rbx
+	xorq	%r13,%r12
+	shrq	$32,%rdx
+	xorq	%r8,%r10
+	roll	$8,%eax
+	xorq	%r11,%r13
+	roll	$8,%ecx
+	xorq	%r9,%r10
+	roll	$8,%ebx
+	xorq	%r12,%r13
+
+	roll	$8,%edx
+	xorl	%r10d,%eax
+	shrq	$32,%r10
+	xorl	%r13d,%ecx
+	shrq	$32,%r13
+	xorl	%r10d,%ebx
+	xorl	%r13d,%edx
+
+	movq	%r8,%r10
+	roll	$24,%r8d
+	movq	%r11,%r13
+	roll	$24,%r11d
+	shrq	$32,%r10
+	xorl	%r8d,%eax
+	shrq	$32,%r13
+	xorl	%r11d,%ecx
+	roll	$24,%r10d
+	movq	%r9,%r8
+	roll	$24,%r13d
+	movq	%r12,%r11
+	shrq	$32,%r8
+	xorl	%r10d,%ebx
+	shrq	$32,%r11
+	xorl	%r13d,%edx
+
+	movq	0(%r14),%rsi
+	roll	$16,%r9d
+	movq	64(%r14),%rdi
+	roll	$16,%r12d
+	movq	128(%r14),%rbp
+	roll	$16,%r8d
+	movq	192(%r14),%r10
+	xorl	%r9d,%eax
+	roll	$16,%r11d
+	xorl	%r12d,%ecx
+	movq	256(%r14),%r13
+	xorl	%r8d,%ebx
+	xorl	%r11d,%edx
+	jmp	.Ldec_loop_compact
+.align	16
+.Ldec_compact_done:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+.byte	0xf3,0xc3
+.size	_x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
+.align	16
+.globl	asm_AES_decrypt
+.hidden asm_AES_decrypt
+.type	asm_AES_decrypt,@function
+.hidden	asm_AES_decrypt
+asm_AES_decrypt:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+
+
+	leaq	-63(%rdx),%rcx
+	andq	$-64,%rsp
+	subq	%rsp,%rcx
+	negq	%rcx
+	andq	$0x3c0,%rcx
+	subq	%rcx,%rsp
+	subq	$32,%rsp
+
+	movq	%rsi,16(%rsp)
+	movq	%rax,24(%rsp)
+.Ldec_prologue:
+
+	movq	%rdx,%r15
+	movl	240(%r15),%r13d
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+
+	shll	$4,%r13d
+	leaq	(%r15,%r13,1),%rbp
+	movq	%r15,(%rsp)
+	movq	%rbp,8(%rsp)
+
+
+	leaq	.LAES_Td+2048(%rip),%r14
+	leaq	768(%rsp),%rbp
+	subq	%r14,%rbp
+	andq	$0x300,%rbp
+	leaq	(%r14,%rbp,1),%r14
+	shrq	$3,%rbp
+	addq	%rbp,%r14
+
+	call	_x86_64_AES_decrypt_compact
+
+	movq	16(%rsp),%r9
+	movq	24(%rsp),%rsi
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Ldec_epilogue:
+	.byte	0xf3,0xc3
+.size	asm_AES_decrypt,.-asm_AES_decrypt
+.align	16
+.globl	asm_AES_set_encrypt_key
+.hidden asm_AES_set_encrypt_key
+.type	asm_AES_set_encrypt_key,@function
+asm_AES_set_encrypt_key:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$8,%rsp
+.Lenc_key_prologue:
+
+	call	_x86_64_AES_set_encrypt_key
+
+	movq	40(%rsp),%rbp
+	movq	48(%rsp),%rbx
+	addq	$56,%rsp
+.Lenc_key_epilogue:
+	.byte	0xf3,0xc3
+.size	asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
+
+.type	_x86_64_AES_set_encrypt_key,@function
+.align	16
+_x86_64_AES_set_encrypt_key:
+	movl	%esi,%ecx
+	movq	%rdi,%rsi
+	movq	%rdx,%rdi
+
+	testq	$-1,%rsi
+	jz	.Lbadpointer
+	testq	$-1,%rdi
+	jz	.Lbadpointer
+
+	leaq	.LAES_Te(%rip),%rbp
+	leaq	2048+128(%rbp),%rbp
+
+
+	movl	0-128(%rbp),%eax
+	movl	32-128(%rbp),%ebx
+	movl	64-128(%rbp),%r8d
+	movl	96-128(%rbp),%edx
+	movl	128-128(%rbp),%eax
+	movl	160-128(%rbp),%ebx
+	movl	192-128(%rbp),%r8d
+	movl	224-128(%rbp),%edx
+
+	cmpl	$128,%ecx
+	je	.L10rounds
+	cmpl	$192,%ecx
+	je	.L12rounds
+	cmpl	$256,%ecx
+	je	.L14rounds
+	movq	$-2,%rax
+	jmp	.Lexit
+
+.L10rounds:
+	movq	0(%rsi),%rax
+	movq	8(%rsi),%rdx
+	movq	%rax,0(%rdi)
+	movq	%rdx,8(%rdi)
+
+	shrq	$32,%rdx
+	xorl	%ecx,%ecx
+	jmp	.L10shortcut
+.align	4
+.L10loop:
+	movl	0(%rdi),%eax
+	movl	12(%rdi),%edx
+.L10shortcut:
+	movzbl	%dl,%esi
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+
+	xorl	1024-128(%rbp,%rcx,4),%eax
+	movl	%eax,16(%rdi)
+	xorl	4(%rdi),%eax
+	movl	%eax,20(%rdi)
+	xorl	8(%rdi),%eax
+	movl	%eax,24(%rdi)
+	xorl	12(%rdi),%eax
+	movl	%eax,28(%rdi)
+	addl	$1,%ecx
+	leaq	16(%rdi),%rdi
+	cmpl	$10,%ecx
+	jl	.L10loop
+
+	movl	$10,80(%rdi)
+	xorq	%rax,%rax
+	jmp	.Lexit
+
+.L12rounds:
+	movq	0(%rsi),%rax
+	movq	8(%rsi),%rbx
+	movq	16(%rsi),%rdx
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rdx,16(%rdi)
+
+	shrq	$32,%rdx
+	xorl	%ecx,%ecx
+	jmp	.L12shortcut
+.align	4
+.L12loop:
+	movl	0(%rdi),%eax
+	movl	20(%rdi),%edx
+.L12shortcut:
+	movzbl	%dl,%esi
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+
+	xorl	1024-128(%rbp,%rcx,4),%eax
+	movl	%eax,24(%rdi)
+	xorl	4(%rdi),%eax
+	movl	%eax,28(%rdi)
+	xorl	8(%rdi),%eax
+	movl	%eax,32(%rdi)
+	xorl	12(%rdi),%eax
+	movl	%eax,36(%rdi)
+
+	cmpl	$7,%ecx
+	je	.L12break
+	addl	$1,%ecx
+
+	xorl	16(%rdi),%eax
+	movl	%eax,40(%rdi)
+	xorl	20(%rdi),%eax
+	movl	%eax,44(%rdi)
+
+	leaq	24(%rdi),%rdi
+	jmp	.L12loop
+.L12break:
+	movl	$12,72(%rdi)
+	xorq	%rax,%rax
+	jmp	.Lexit
+
+.L14rounds:
+	movq	0(%rsi),%rax
+	movq	8(%rsi),%rbx
+	movq	16(%rsi),%rcx
+	movq	24(%rsi),%rdx
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+
+	shrq	$32,%rdx
+	xorl	%ecx,%ecx
+	jmp	.L14shortcut
+.align	4
+.L14loop:
+	movl	0(%rdi),%eax
+	movl	28(%rdi),%edx
+.L14shortcut:
+	movzbl	%dl,%esi
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+
+	xorl	1024-128(%rbp,%rcx,4),%eax
+	movl	%eax,32(%rdi)
+	xorl	4(%rdi),%eax
+	movl	%eax,36(%rdi)
+	xorl	8(%rdi),%eax
+	movl	%eax,40(%rdi)
+	xorl	12(%rdi),%eax
+	movl	%eax,44(%rdi)
+
+	cmpl	$6,%ecx
+	je	.L14break
+	addl	$1,%ecx
+
+	movl	%eax,%edx
+	movl	16(%rdi),%eax
+	movzbl	%dl,%esi
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shrl	$16,%edx
+	shll	$8,%ebx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+
+	movl	%eax,48(%rdi)
+	xorl	20(%rdi),%eax
+	movl	%eax,52(%rdi)
+	xorl	24(%rdi),%eax
+	movl	%eax,56(%rdi)
+	xorl	28(%rdi),%eax
+	movl	%eax,60(%rdi)
+
+	leaq	32(%rdi),%rdi
+	jmp	.L14loop
+.L14break:
+	movl	$14,48(%rdi)
+	xorq	%rax,%rax
+	jmp	.Lexit
+
+.Lbadpointer:
+	movq	$-1,%rax
+.Lexit:
+.byte	0xf3,0xc3
+.size	_x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
+.align	16
+.globl	asm_AES_set_decrypt_key
+.hidden asm_AES_set_decrypt_key
+.type	asm_AES_set_decrypt_key,@function
+asm_AES_set_decrypt_key:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushq	%rdx
+.Ldec_key_prologue:
+
+	call	_x86_64_AES_set_encrypt_key
+	movq	(%rsp),%r8
+	cmpl	$0,%eax
+	jne	.Labort
+
+	movl	240(%r8),%r14d
+	xorq	%rdi,%rdi
+	leaq	(%rdi,%r14,4),%rcx
+	movq	%r8,%rsi
+	leaq	(%r8,%rcx,4),%rdi
+.align	4
+.Linvert:
+	movq	0(%rsi),%rax
+	movq	8(%rsi),%rbx
+	movq	0(%rdi),%rcx
+	movq	8(%rdi),%rdx
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,0(%rsi)
+	movq	%rdx,8(%rsi)
+	leaq	16(%rsi),%rsi
+	leaq	-16(%rdi),%rdi
+	cmpq	%rsi,%rdi
+	jne	.Linvert
+
+	leaq	.LAES_Te+2048+1024(%rip),%rax
+
+	movq	40(%rax),%rsi
+	movq	48(%rax),%rdi
+	movq	56(%rax),%rbp
+
+	movq	%r8,%r15
+	subl	$1,%r14d
+.align	4
+.Lpermute:
+	leaq	16(%r15),%r15
+	movq	0(%r15),%rax
+	movq	8(%r15),%rcx
+	movq	%rsi,%r9
+	movq	%rsi,%r12
+	andq	%rax,%r9
+	andq	%rcx,%r12
+	movq	%r9,%rbx
+	movq	%r12,%rdx
+	shrq	$7,%r9
+	leaq	(%rax,%rax,1),%r8
+	shrq	$7,%r12
+	leaq	(%rcx,%rcx,1),%r11
+	subq	%r9,%rbx
+	subq	%r12,%rdx
+	andq	%rdi,%r8
+	andq	%rdi,%r11
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r8
+	xorq	%rdx,%r11
+	movq	%rsi,%r10
+	movq	%rsi,%r13
+
+	andq	%r8,%r10
+	andq	%r11,%r13
+	movq	%r10,%rbx
+	movq	%r13,%rdx
+	shrq	$7,%r10
+	leaq	(%r8,%r8,1),%r9
+	shrq	$7,%r13
+	leaq	(%r11,%r11,1),%r12
+	subq	%r10,%rbx
+	subq	%r13,%rdx
+	andq	%rdi,%r9
+	andq	%rdi,%r12
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r9
+	xorq	%rdx,%r12
+	movq	%rsi,%r10
+	movq	%rsi,%r13
+
+	andq	%r9,%r10
+	andq	%r12,%r13
+	movq	%r10,%rbx
+	movq	%r13,%rdx
+	shrq	$7,%r10
+	xorq	%rax,%r8
+	shrq	$7,%r13
+	xorq	%rcx,%r11
+	subq	%r10,%rbx
+	subq	%r13,%rdx
+	leaq	(%r9,%r9,1),%r10
+	leaq	(%r12,%r12,1),%r13
+	xorq	%rax,%r9
+	xorq	%rcx,%r12
+	andq	%rdi,%r10
+	andq	%rdi,%r13
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r10
+	xorq	%rdx,%r13
+
+	xorq	%r10,%rax
+	xorq	%r13,%rcx
+	xorq	%r10,%r8
+	xorq	%r13,%r11
+	movq	%rax,%rbx
+	movq	%rcx,%rdx
+	xorq	%r10,%r9
+	shrq	$32,%rbx
+	xorq	%r13,%r12
+	shrq	$32,%rdx
+	xorq	%r8,%r10
+	roll	$8,%eax
+	xorq	%r11,%r13
+	roll	$8,%ecx
+	xorq	%r9,%r10
+	roll	$8,%ebx
+	xorq	%r12,%r13
+
+	roll	$8,%edx
+	xorl	%r10d,%eax
+	shrq	$32,%r10
+	xorl	%r13d,%ecx
+	shrq	$32,%r13
+	xorl	%r10d,%ebx
+	xorl	%r13d,%edx
+
+	movq	%r8,%r10
+	roll	$24,%r8d
+	movq	%r11,%r13
+	roll	$24,%r11d
+	shrq	$32,%r10
+	xorl	%r8d,%eax
+	shrq	$32,%r13
+	xorl	%r11d,%ecx
+	roll	$24,%r10d
+	movq	%r9,%r8
+	roll	$24,%r13d
+	movq	%r12,%r11
+	shrq	$32,%r8
+	xorl	%r10d,%ebx
+	shrq	$32,%r11
+	xorl	%r13d,%edx
+
+
+	roll	$16,%r9d
+
+	roll	$16,%r12d
+
+	roll	$16,%r8d
+
+	xorl	%r9d,%eax
+	roll	$16,%r11d
+	xorl	%r12d,%ecx
+
+	xorl	%r8d,%ebx
+	xorl	%r11d,%edx
+	movl	%eax,0(%r15)
+	movl	%ebx,4(%r15)
+	movl	%ecx,8(%r15)
+	movl	%edx,12(%r15)
+	subl	$1,%r14d
+	jnz	.Lpermute
+
+	xorq	%rax,%rax
+.Labort:
+	movq	8(%rsp),%r15
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r13
+	movq	32(%rsp),%r12
+	movq	40(%rsp),%rbp
+	movq	48(%rsp),%rbx
+	addq	$56,%rsp
+.Ldec_key_epilogue:
+	.byte	0xf3,0xc3
+.size	asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
+.align	16
+.globl	asm_AES_cbc_encrypt
+.hidden asm_AES_cbc_encrypt
+.type	asm_AES_cbc_encrypt,@function
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+.hidden	asm_AES_cbc_encrypt
+asm_AES_cbc_encrypt:
+	cmpq	$0,%rdx
+	je	.Lcbc_epilogue
+	pushfq
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+.Lcbc_prologue:
+
+	cld
+	movl	%r9d,%r9d
+
+	leaq	.LAES_Te(%rip),%r14
+	leaq	.LAES_Td(%rip),%r10
+	cmpq	$0,%r9
+	cmoveq	%r10,%r14
+
+	leaq	OPENSSL_ia32cap_P(%rip),%r10
+	movl	(%r10),%r10d
+	cmpq	$512,%rdx
+	jb	.Lcbc_slow_prologue
+	testq	$15,%rdx
+	jnz	.Lcbc_slow_prologue
+	btl	$28,%r10d
+	jc	.Lcbc_slow_prologue
+
+
+	leaq	-88-248(%rsp),%r15
+	andq	$-64,%r15
+
+
+	movq	%r14,%r10
+	leaq	2304(%r14),%r11
+	movq	%r15,%r12
+	andq	$0xFFF,%r10
+	andq	$0xFFF,%r11
+	andq	$0xFFF,%r12
+
+	cmpq	%r11,%r12
+	jb	.Lcbc_te_break_out
+	subq	%r11,%r12
+	subq	%r12,%r15
+	jmp	.Lcbc_te_ok
+.Lcbc_te_break_out:
+	subq	%r10,%r12
+	andq	$0xFFF,%r12
+	addq	$320,%r12
+	subq	%r12,%r15
+.align	4
+.Lcbc_te_ok:
+
+	xchgq	%rsp,%r15
+
+	movq	%r15,16(%rsp)
+.Lcbc_fast_body:
+	movq	%rdi,24(%rsp)
+	movq	%rsi,32(%rsp)
+	movq	%rdx,40(%rsp)
+	movq	%rcx,48(%rsp)
+	movq	%r8,56(%rsp)
+	movl	$0,80+240(%rsp)
+	movq	%r8,%rbp
+	movq	%r9,%rbx
+	movq	%rsi,%r9
+	movq	%rdi,%r8
+	movq	%rcx,%r15
+
+	movl	240(%r15),%eax
+
+	movq	%r15,%r10
+	subq	%r14,%r10
+	andq	$0xfff,%r10
+	cmpq	$2304,%r10
+	jb	.Lcbc_do_ecopy
+	cmpq	$4096-248,%r10
+	jb	.Lcbc_skip_ecopy
+.align	4
+.Lcbc_do_ecopy:
+	movq	%r15,%rsi
+	leaq	80(%rsp),%rdi
+	leaq	80(%rsp),%r15
+	movl	$30,%ecx
+.long	0x90A548F3
+	movl	%eax,(%rdi)
+.Lcbc_skip_ecopy:
+	movq	%r15,0(%rsp)
+
+	movl	$18,%ecx
+.align	4
+.Lcbc_prefetch_te:
+	movq	0(%r14),%r10
+	movq	32(%r14),%r11
+	movq	64(%r14),%r12
+	movq	96(%r14),%r13
+	leaq	128(%r14),%r14
+	subl	$1,%ecx
+	jnz	.Lcbc_prefetch_te
+	leaq	-2304(%r14),%r14
+
+	cmpq	$0,%rbx
+	je	.LFAST_DECRYPT
+
+
+	movl	0(%rbp),%eax
+	movl	4(%rbp),%ebx
+	movl	8(%rbp),%ecx
+	movl	12(%rbp),%edx
+
+.align	4
+.Lcbc_fast_enc_loop:
+	xorl	0(%r8),%eax
+	xorl	4(%r8),%ebx
+	xorl	8(%r8),%ecx
+	xorl	12(%r8),%edx
+	movq	0(%rsp),%r15
+	movq	%r8,24(%rsp)
+
+	call	_x86_64_AES_encrypt
+
+	movq	24(%rsp),%r8
+	movq	40(%rsp),%r10
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	leaq	16(%r8),%r8
+	leaq	16(%r9),%r9
+	subq	$16,%r10
+	testq	$-16,%r10
+	movq	%r10,40(%rsp)
+	jnz	.Lcbc_fast_enc_loop
+	movq	56(%rsp),%rbp
+	movl	%eax,0(%rbp)
+	movl	%ebx,4(%rbp)
+	movl	%ecx,8(%rbp)
+	movl	%edx,12(%rbp)
+
+	jmp	.Lcbc_fast_cleanup
+
+
+.align	16
+.LFAST_DECRYPT:
+	cmpq	%r8,%r9
+	je	.Lcbc_fast_dec_in_place
+
+	movq	%rbp,64(%rsp)
+.align	4
+.Lcbc_fast_dec_loop:
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movq	0(%rsp),%r15
+	movq	%r8,24(%rsp)
+
+	call	_x86_64_AES_decrypt
+
+	movq	64(%rsp),%rbp
+	movq	24(%rsp),%r8
+	movq	40(%rsp),%r10
+	xorl	0(%rbp),%eax
+	xorl	4(%rbp),%ebx
+	xorl	8(%rbp),%ecx
+	xorl	12(%rbp),%edx
+	movq	%r8,%rbp
+
+	subq	$16,%r10
+	movq	%r10,40(%rsp)
+	movq	%rbp,64(%rsp)
+
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	leaq	16(%r8),%r8
+	leaq	16(%r9),%r9
+	jnz	.Lcbc_fast_dec_loop
+	movq	56(%rsp),%r12
+	movq	0(%rbp),%r10
+	movq	8(%rbp),%r11
+	movq	%r10,0(%r12)
+	movq	%r11,8(%r12)
+	jmp	.Lcbc_fast_cleanup
+
+.align	16
+.Lcbc_fast_dec_in_place:
+	movq	0(%rbp),%r10
+	movq	8(%rbp),%r11
+	movq	%r10,0+64(%rsp)
+	movq	%r11,8+64(%rsp)
+.align	4
+.Lcbc_fast_dec_in_place_loop:
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movq	0(%rsp),%r15
+	movq	%r8,24(%rsp)
+
+	call	_x86_64_AES_decrypt
+
+	movq	24(%rsp),%r8
+	movq	40(%rsp),%r10
+	xorl	0+64(%rsp),%eax
+	xorl	4+64(%rsp),%ebx
+	xorl	8+64(%rsp),%ecx
+	xorl	12+64(%rsp),%edx
+
+	movq	0(%r8),%r11
+	movq	8(%r8),%r12
+	subq	$16,%r10
+	jz	.Lcbc_fast_dec_in_place_done
+
+	movq	%r11,0+64(%rsp)
+	movq	%r12,8+64(%rsp)
+
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	leaq	16(%r8),%r8
+	leaq	16(%r9),%r9
+	movq	%r10,40(%rsp)
+	jmp	.Lcbc_fast_dec_in_place_loop
+.Lcbc_fast_dec_in_place_done:
+	movq	56(%rsp),%rdi
+	movq	%r11,0(%rdi)
+	movq	%r12,8(%rdi)
+
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+.align	4
+.Lcbc_fast_cleanup:
+	cmpl	$0,80+240(%rsp)
+	leaq	80(%rsp),%rdi
+	je	.Lcbc_exit
+	movl	$30,%ecx
+	xorq	%rax,%rax
+.long	0x90AB48F3
+
+	jmp	.Lcbc_exit
+
+
+.align	16
+.Lcbc_slow_prologue:
+
+	leaq	-88(%rsp),%rbp
+	andq	$-64,%rbp
+
+	leaq	-88-63(%rcx),%r10
+	subq	%rbp,%r10
+	negq	%r10
+	andq	$0x3c0,%r10
+	subq	%r10,%rbp
+
+	xchgq	%rsp,%rbp
+
+	movq	%rbp,16(%rsp)
+.Lcbc_slow_body:
+
+
+
+
+	movq	%r8,56(%rsp)
+	movq	%r8,%rbp
+	movq	%r9,%rbx
+	movq	%rsi,%r9
+	movq	%rdi,%r8
+	movq	%rcx,%r15
+	movq	%rdx,%r10
+
+	movl	240(%r15),%eax
+	movq	%r15,0(%rsp)
+	shll	$4,%eax
+	leaq	(%r15,%rax,1),%rax
+	movq	%rax,8(%rsp)
+
+
+	leaq	2048(%r14),%r14
+	leaq	768-8(%rsp),%rax
+	subq	%r14,%rax
+	andq	$0x300,%rax
+	leaq	(%r14,%rax,1),%r14
+
+	cmpq	$0,%rbx
+	je	.LSLOW_DECRYPT
+
+
+	testq	$-16,%r10
+	movl	0(%rbp),%eax
+	movl	4(%rbp),%ebx
+	movl	8(%rbp),%ecx
+	movl	12(%rbp),%edx
+	jz	.Lcbc_slow_enc_tail
+
+.align	4
+.Lcbc_slow_enc_loop:
+	xorl	0(%r8),%eax
+	xorl	4(%r8),%ebx
+	xorl	8(%r8),%ecx
+	xorl	12(%r8),%edx
+	movq	0(%rsp),%r15
+	movq	%r8,24(%rsp)
+	movq	%r9,32(%rsp)
+	movq	%r10,40(%rsp)
+
+	call	_x86_64_AES_encrypt_compact
+
+	movq	24(%rsp),%r8
+	movq	32(%rsp),%r9
+	movq	40(%rsp),%r10
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	leaq	16(%r8),%r8
+	leaq	16(%r9),%r9
+	subq	$16,%r10
+	testq	$-16,%r10
+	jnz	.Lcbc_slow_enc_loop
+	testq	$15,%r10
+	jnz	.Lcbc_slow_enc_tail
+	movq	56(%rsp),%rbp
+	movl	%eax,0(%rbp)
+	movl	%ebx,4(%rbp)
+	movl	%ecx,8(%rbp)
+	movl	%edx,12(%rbp)
+
+	jmp	.Lcbc_exit
+
+.align	4
+.Lcbc_slow_enc_tail:
+	movq	%rax,%r11
+	movq	%rcx,%r12
+	movq	%r10,%rcx
+	movq	%r8,%rsi
+	movq	%r9,%rdi
+.long	0x9066A4F3
+	movq	$16,%rcx
+	subq	%r10,%rcx
+	xorq	%rax,%rax
+.long	0x9066AAF3
+	movq	%r9,%r8
+	movq	$16,%r10
+	movq	%r11,%rax
+	movq	%r12,%rcx
+	jmp	.Lcbc_slow_enc_loop
+
+.align	16
+.LSLOW_DECRYPT:
+	shrq	$3,%rax
+	addq	%rax,%r14
+
+	movq	0(%rbp),%r11
+	movq	8(%rbp),%r12
+	movq	%r11,0+64(%rsp)
+	movq	%r12,8+64(%rsp)
+
+.align	4
+.Lcbc_slow_dec_loop:
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movq	0(%rsp),%r15
+	movq	%r8,24(%rsp)
+	movq	%r9,32(%rsp)
+	movq	%r10,40(%rsp)
+
+	call	_x86_64_AES_decrypt_compact
+
+	movq	24(%rsp),%r8
+	movq	32(%rsp),%r9
+	movq	40(%rsp),%r10
+	xorl	0+64(%rsp),%eax
+	xorl	4+64(%rsp),%ebx
+	xorl	8+64(%rsp),%ecx
+	xorl	12+64(%rsp),%edx
+
+	movq	0(%r8),%r11
+	movq	8(%r8),%r12
+	subq	$16,%r10
+	jc	.Lcbc_slow_dec_partial
+	jz	.Lcbc_slow_dec_done
+
+	movq	%r11,0+64(%rsp)
+	movq	%r12,8+64(%rsp)
+
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	leaq	16(%r8),%r8
+	leaq	16(%r9),%r9
+	jmp	.Lcbc_slow_dec_loop
+.Lcbc_slow_dec_done:
+	movq	56(%rsp),%rdi
+	movq	%r11,0(%rdi)
+	movq	%r12,8(%rdi)
+
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	jmp	.Lcbc_exit
+
+.align	4
+.Lcbc_slow_dec_partial:
+	movq	56(%rsp),%rdi
+	movq	%r11,0(%rdi)
+	movq	%r12,8(%rdi)
+
+	movl	%eax,0+64(%rsp)
+	movl	%ebx,4+64(%rsp)
+	movl	%ecx,8+64(%rsp)
+	movl	%edx,12+64(%rsp)
+
+	movq	%r9,%rdi
+	leaq	64(%rsp),%rsi
+	leaq	16(%r10),%rcx
+.long	0x9066A4F3
+	jmp	.Lcbc_exit
+
+.align	16
+.Lcbc_exit:
+	movq	16(%rsp),%rsi
+	movq	(%rsi),%r15
+	movq	8(%rsi),%r14
+	movq	16(%rsi),%r13
+	movq	24(%rsi),%r12
+	movq	32(%rsi),%rbp
+	movq	40(%rsi),%rbx
+	leaq	48(%rsi),%rsp
+.Lcbc_popfq:
+	popfq
+.Lcbc_epilogue:
+	.byte	0xf3,0xc3
+.size	asm_AES_cbc_encrypt,.-asm_AES_cbc_encrypt
+.align	64
+.LAES_Te:
+.long	0xa56363c6,0xa56363c6
+.long	0x847c7cf8,0x847c7cf8
+.long	0x997777ee,0x997777ee
+.long	0x8d7b7bf6,0x8d7b7bf6
+.long	0x0df2f2ff,0x0df2f2ff
+.long	0xbd6b6bd6,0xbd6b6bd6
+.long	0xb16f6fde,0xb16f6fde
+.long	0x54c5c591,0x54c5c591
+.long	0x50303060,0x50303060
+.long	0x03010102,0x03010102
+.long	0xa96767ce,0xa96767ce
+.long	0x7d2b2b56,0x7d2b2b56
+.long	0x19fefee7,0x19fefee7
+.long	0x62d7d7b5,0x62d7d7b5
+.long	0xe6abab4d,0xe6abab4d
+.long	0x9a7676ec,0x9a7676ec
+.long	0x45caca8f,0x45caca8f
+.long	0x9d82821f,0x9d82821f
+.long	0x40c9c989,0x40c9c989
+.long	0x877d7dfa,0x877d7dfa
+.long	0x15fafaef,0x15fafaef
+.long	0xeb5959b2,0xeb5959b2
+.long	0xc947478e,0xc947478e
+.long	0x0bf0f0fb,0x0bf0f0fb
+.long	0xecadad41,0xecadad41
+.long	0x67d4d4b3,0x67d4d4b3
+.long	0xfda2a25f,0xfda2a25f
+.long	0xeaafaf45,0xeaafaf45
+.long	0xbf9c9c23,0xbf9c9c23
+.long	0xf7a4a453,0xf7a4a453
+.long	0x967272e4,0x967272e4
+.long	0x5bc0c09b,0x5bc0c09b
+.long	0xc2b7b775,0xc2b7b775
+.long	0x1cfdfde1,0x1cfdfde1
+.long	0xae93933d,0xae93933d
+.long	0x6a26264c,0x6a26264c
+.long	0x5a36366c,0x5a36366c
+.long	0x413f3f7e,0x413f3f7e
+.long	0x02f7f7f5,0x02f7f7f5
+.long	0x4fcccc83,0x4fcccc83
+.long	0x5c343468,0x5c343468
+.long	0xf4a5a551,0xf4a5a551
+.long	0x34e5e5d1,0x34e5e5d1
+.long	0x08f1f1f9,0x08f1f1f9
+.long	0x937171e2,0x937171e2
+.long	0x73d8d8ab,0x73d8d8ab
+.long	0x53313162,0x53313162
+.long	0x3f15152a,0x3f15152a
+.long	0x0c040408,0x0c040408
+.long	0x52c7c795,0x52c7c795
+.long	0x65232346,0x65232346
+.long	0x5ec3c39d,0x5ec3c39d
+.long	0x28181830,0x28181830
+.long	0xa1969637,0xa1969637
+.long	0x0f05050a,0x0f05050a
+.long	0xb59a9a2f,0xb59a9a2f
+.long	0x0907070e,0x0907070e
+.long	0x36121224,0x36121224
+.long	0x9b80801b,0x9b80801b
+.long	0x3de2e2df,0x3de2e2df
+.long	0x26ebebcd,0x26ebebcd
+.long	0x6927274e,0x6927274e
+.long	0xcdb2b27f,0xcdb2b27f
+.long	0x9f7575ea,0x9f7575ea
+.long	0x1b090912,0x1b090912
+.long	0x9e83831d,0x9e83831d
+.long	0x742c2c58,0x742c2c58
+.long	0x2e1a1a34,0x2e1a1a34
+.long	0x2d1b1b36,0x2d1b1b36
+.long	0xb26e6edc,0xb26e6edc
+.long	0xee5a5ab4,0xee5a5ab4
+.long	0xfba0a05b,0xfba0a05b
+.long	0xf65252a4,0xf65252a4
+.long	0x4d3b3b76,0x4d3b3b76
+.long	0x61d6d6b7,0x61d6d6b7
+.long	0xceb3b37d,0xceb3b37d
+.long	0x7b292952,0x7b292952
+.long	0x3ee3e3dd,0x3ee3e3dd
+.long	0x712f2f5e,0x712f2f5e
+.long	0x97848413,0x97848413
+.long	0xf55353a6,0xf55353a6
+.long	0x68d1d1b9,0x68d1d1b9
+.long	0x00000000,0x00000000
+.long	0x2cededc1,0x2cededc1
+.long	0x60202040,0x60202040
+.long	0x1ffcfce3,0x1ffcfce3
+.long	0xc8b1b179,0xc8b1b179
+.long	0xed5b5bb6,0xed5b5bb6
+.long	0xbe6a6ad4,0xbe6a6ad4
+.long	0x46cbcb8d,0x46cbcb8d
+.long	0xd9bebe67,0xd9bebe67
+.long	0x4b393972,0x4b393972
+.long	0xde4a4a94,0xde4a4a94
+.long	0xd44c4c98,0xd44c4c98
+.long	0xe85858b0,0xe85858b0
+.long	0x4acfcf85,0x4acfcf85
+.long	0x6bd0d0bb,0x6bd0d0bb
+.long	0x2aefefc5,0x2aefefc5
+.long	0xe5aaaa4f,0xe5aaaa4f
+.long	0x16fbfbed,0x16fbfbed
+.long	0xc5434386,0xc5434386
+.long	0xd74d4d9a,0xd74d4d9a
+.long	0x55333366,0x55333366
+.long	0x94858511,0x94858511
+.long	0xcf45458a,0xcf45458a
+.long	0x10f9f9e9,0x10f9f9e9
+.long	0x06020204,0x06020204
+.long	0x817f7ffe,0x817f7ffe
+.long	0xf05050a0,0xf05050a0
+.long	0x443c3c78,0x443c3c78
+.long	0xba9f9f25,0xba9f9f25
+.long	0xe3a8a84b,0xe3a8a84b
+.long	0xf35151a2,0xf35151a2
+.long	0xfea3a35d,0xfea3a35d
+.long	0xc0404080,0xc0404080
+.long	0x8a8f8f05,0x8a8f8f05
+.long	0xad92923f,0xad92923f
+.long	0xbc9d9d21,0xbc9d9d21
+.long	0x48383870,0x48383870
+.long	0x04f5f5f1,0x04f5f5f1
+.long	0xdfbcbc63,0xdfbcbc63
+.long	0xc1b6b677,0xc1b6b677
+.long	0x75dadaaf,0x75dadaaf
+.long	0x63212142,0x63212142
+.long	0x30101020,0x30101020
+.long	0x1affffe5,0x1affffe5
+.long	0x0ef3f3fd,0x0ef3f3fd
+.long	0x6dd2d2bf,0x6dd2d2bf
+.long	0x4ccdcd81,0x4ccdcd81
+.long	0x140c0c18,0x140c0c18
+.long	0x35131326,0x35131326
+.long	0x2fececc3,0x2fececc3
+.long	0xe15f5fbe,0xe15f5fbe
+.long	0xa2979735,0xa2979735
+.long	0xcc444488,0xcc444488
+.long	0x3917172e,0x3917172e
+.long	0x57c4c493,0x57c4c493
+.long	0xf2a7a755,0xf2a7a755
+.long	0x827e7efc,0x827e7efc
+.long	0x473d3d7a,0x473d3d7a
+.long	0xac6464c8,0xac6464c8
+.long	0xe75d5dba,0xe75d5dba
+.long	0x2b191932,0x2b191932
+.long	0x957373e6,0x957373e6
+.long	0xa06060c0,0xa06060c0
+.long	0x98818119,0x98818119
+.long	0xd14f4f9e,0xd14f4f9e
+.long	0x7fdcdca3,0x7fdcdca3
+.long	0x66222244,0x66222244
+.long	0x7e2a2a54,0x7e2a2a54
+.long	0xab90903b,0xab90903b
+.long	0x8388880b,0x8388880b
+.long	0xca46468c,0xca46468c
+.long	0x29eeeec7,0x29eeeec7
+.long	0xd3b8b86b,0xd3b8b86b
+.long	0x3c141428,0x3c141428
+.long	0x79dedea7,0x79dedea7
+.long	0xe25e5ebc,0xe25e5ebc
+.long	0x1d0b0b16,0x1d0b0b16
+.long	0x76dbdbad,0x76dbdbad
+.long	0x3be0e0db,0x3be0e0db
+.long	0x56323264,0x56323264
+.long	0x4e3a3a74,0x4e3a3a74
+.long	0x1e0a0a14,0x1e0a0a14
+.long	0xdb494992,0xdb494992
+.long	0x0a06060c,0x0a06060c
+.long	0x6c242448,0x6c242448
+.long	0xe45c5cb8,0xe45c5cb8
+.long	0x5dc2c29f,0x5dc2c29f
+.long	0x6ed3d3bd,0x6ed3d3bd
+.long	0xefacac43,0xefacac43
+.long	0xa66262c4,0xa66262c4
+.long	0xa8919139,0xa8919139
+.long	0xa4959531,0xa4959531
+.long	0x37e4e4d3,0x37e4e4d3
+.long	0x8b7979f2,0x8b7979f2
+.long	0x32e7e7d5,0x32e7e7d5
+.long	0x43c8c88b,0x43c8c88b
+.long	0x5937376e,0x5937376e
+.long	0xb76d6dda,0xb76d6dda
+.long	0x8c8d8d01,0x8c8d8d01
+.long	0x64d5d5b1,0x64d5d5b1
+.long	0xd24e4e9c,0xd24e4e9c
+.long	0xe0a9a949,0xe0a9a949
+.long	0xb46c6cd8,0xb46c6cd8
+.long	0xfa5656ac,0xfa5656ac
+.long	0x07f4f4f3,0x07f4f4f3
+.long	0x25eaeacf,0x25eaeacf
+.long	0xaf6565ca,0xaf6565ca
+.long	0x8e7a7af4,0x8e7a7af4
+.long	0xe9aeae47,0xe9aeae47
+.long	0x18080810,0x18080810
+.long	0xd5baba6f,0xd5baba6f
+.long	0x887878f0,0x887878f0
+.long	0x6f25254a,0x6f25254a
+.long	0x722e2e5c,0x722e2e5c
+.long	0x241c1c38,0x241c1c38
+.long	0xf1a6a657,0xf1a6a657
+.long	0xc7b4b473,0xc7b4b473
+.long	0x51c6c697,0x51c6c697
+.long	0x23e8e8cb,0x23e8e8cb
+.long	0x7cdddda1,0x7cdddda1
+.long	0x9c7474e8,0x9c7474e8
+.long	0x211f1f3e,0x211f1f3e
+.long	0xdd4b4b96,0xdd4b4b96
+.long	0xdcbdbd61,0xdcbdbd61
+.long	0x868b8b0d,0x868b8b0d
+.long	0x858a8a0f,0x858a8a0f
+.long	0x907070e0,0x907070e0
+.long	0x423e3e7c,0x423e3e7c
+.long	0xc4b5b571,0xc4b5b571
+.long	0xaa6666cc,0xaa6666cc
+.long	0xd8484890,0xd8484890
+.long	0x05030306,0x05030306
+.long	0x01f6f6f7,0x01f6f6f7
+.long	0x120e0e1c,0x120e0e1c
+.long	0xa36161c2,0xa36161c2
+.long	0x5f35356a,0x5f35356a
+.long	0xf95757ae,0xf95757ae
+.long	0xd0b9b969,0xd0b9b969
+.long	0x91868617,0x91868617
+.long	0x58c1c199,0x58c1c199
+.long	0x271d1d3a,0x271d1d3a
+.long	0xb99e9e27,0xb99e9e27
+.long	0x38e1e1d9,0x38e1e1d9
+.long	0x13f8f8eb,0x13f8f8eb
+.long	0xb398982b,0xb398982b
+.long	0x33111122,0x33111122
+.long	0xbb6969d2,0xbb6969d2
+.long	0x70d9d9a9,0x70d9d9a9
+.long	0x898e8e07,0x898e8e07
+.long	0xa7949433,0xa7949433
+.long	0xb69b9b2d,0xb69b9b2d
+.long	0x221e1e3c,0x221e1e3c
+.long	0x92878715,0x92878715
+.long	0x20e9e9c9,0x20e9e9c9
+.long	0x49cece87,0x49cece87
+.long	0xff5555aa,0xff5555aa
+.long	0x78282850,0x78282850
+.long	0x7adfdfa5,0x7adfdfa5
+.long	0x8f8c8c03,0x8f8c8c03
+.long	0xf8a1a159,0xf8a1a159
+.long	0x80898909,0x80898909
+.long	0x170d0d1a,0x170d0d1a
+.long	0xdabfbf65,0xdabfbf65
+.long	0x31e6e6d7,0x31e6e6d7
+.long	0xc6424284,0xc6424284
+.long	0xb86868d0,0xb86868d0
+.long	0xc3414182,0xc3414182
+.long	0xb0999929,0xb0999929
+.long	0x772d2d5a,0x772d2d5a
+.long	0x110f0f1e,0x110f0f1e
+.long	0xcbb0b07b,0xcbb0b07b
+.long	0xfc5454a8,0xfc5454a8
+.long	0xd6bbbb6d,0xd6bbbb6d
+.long	0x3a16162c,0x3a16162c
+.byte	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long	0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long	0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long	0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long	0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.align	64
+.LAES_Td:
+.long	0x50a7f451,0x50a7f451
+.long	0x5365417e,0x5365417e
+.long	0xc3a4171a,0xc3a4171a
+.long	0x965e273a,0x965e273a
+.long	0xcb6bab3b,0xcb6bab3b
+.long	0xf1459d1f,0xf1459d1f
+.long	0xab58faac,0xab58faac
+.long	0x9303e34b,0x9303e34b
+.long	0x55fa3020,0x55fa3020
+.long	0xf66d76ad,0xf66d76ad
+.long	0x9176cc88,0x9176cc88
+.long	0x254c02f5,0x254c02f5
+.long	0xfcd7e54f,0xfcd7e54f
+.long	0xd7cb2ac5,0xd7cb2ac5
+.long	0x80443526,0x80443526
+.long	0x8fa362b5,0x8fa362b5
+.long	0x495ab1de,0x495ab1de
+.long	0x671bba25,0x671bba25
+.long	0x980eea45,0x980eea45
+.long	0xe1c0fe5d,0xe1c0fe5d
+.long	0x02752fc3,0x02752fc3
+.long	0x12f04c81,0x12f04c81
+.long	0xa397468d,0xa397468d
+.long	0xc6f9d36b,0xc6f9d36b
+.long	0xe75f8f03,0xe75f8f03
+.long	0x959c9215,0x959c9215
+.long	0xeb7a6dbf,0xeb7a6dbf
+.long	0xda595295,0xda595295
+.long	0x2d83bed4,0x2d83bed4
+.long	0xd3217458,0xd3217458
+.long	0x2969e049,0x2969e049
+.long	0x44c8c98e,0x44c8c98e
+.long	0x6a89c275,0x6a89c275
+.long	0x78798ef4,0x78798ef4
+.long	0x6b3e5899,0x6b3e5899
+.long	0xdd71b927,0xdd71b927
+.long	0xb64fe1be,0xb64fe1be
+.long	0x17ad88f0,0x17ad88f0
+.long	0x66ac20c9,0x66ac20c9
+.long	0xb43ace7d,0xb43ace7d
+.long	0x184adf63,0x184adf63
+.long	0x82311ae5,0x82311ae5
+.long	0x60335197,0x60335197
+.long	0x457f5362,0x457f5362
+.long	0xe07764b1,0xe07764b1
+.long	0x84ae6bbb,0x84ae6bbb
+.long	0x1ca081fe,0x1ca081fe
+.long	0x942b08f9,0x942b08f9
+.long	0x58684870,0x58684870
+.long	0x19fd458f,0x19fd458f
+.long	0x876cde94,0x876cde94
+.long	0xb7f87b52,0xb7f87b52
+.long	0x23d373ab,0x23d373ab
+.long	0xe2024b72,0xe2024b72
+.long	0x578f1fe3,0x578f1fe3
+.long	0x2aab5566,0x2aab5566
+.long	0x0728ebb2,0x0728ebb2
+.long	0x03c2b52f,0x03c2b52f
+.long	0x9a7bc586,0x9a7bc586
+.long	0xa50837d3,0xa50837d3
+.long	0xf2872830,0xf2872830
+.long	0xb2a5bf23,0xb2a5bf23
+.long	0xba6a0302,0xba6a0302
+.long	0x5c8216ed,0x5c8216ed
+.long	0x2b1ccf8a,0x2b1ccf8a
+.long	0x92b479a7,0x92b479a7
+.long	0xf0f207f3,0xf0f207f3
+.long	0xa1e2694e,0xa1e2694e
+.long	0xcdf4da65,0xcdf4da65
+.long	0xd5be0506,0xd5be0506
+.long	0x1f6234d1,0x1f6234d1
+.long	0x8afea6c4,0x8afea6c4
+.long	0x9d532e34,0x9d532e34
+.long	0xa055f3a2,0xa055f3a2
+.long	0x32e18a05,0x32e18a05
+.long	0x75ebf6a4,0x75ebf6a4
+.long	0x39ec830b,0x39ec830b
+.long	0xaaef6040,0xaaef6040
+.long	0x069f715e,0x069f715e
+.long	0x51106ebd,0x51106ebd
+.long	0xf98a213e,0xf98a213e
+.long	0x3d06dd96,0x3d06dd96
+.long	0xae053edd,0xae053edd
+.long	0x46bde64d,0x46bde64d
+.long	0xb58d5491,0xb58d5491
+.long	0x055dc471,0x055dc471
+.long	0x6fd40604,0x6fd40604
+.long	0xff155060,0xff155060
+.long	0x24fb9819,0x24fb9819
+.long	0x97e9bdd6,0x97e9bdd6
+.long	0xcc434089,0xcc434089
+.long	0x779ed967,0x779ed967
+.long	0xbd42e8b0,0xbd42e8b0
+.long	0x888b8907,0x888b8907
+.long	0x385b19e7,0x385b19e7
+.long	0xdbeec879,0xdbeec879
+.long	0x470a7ca1,0x470a7ca1
+.long	0xe90f427c,0xe90f427c
+.long	0xc91e84f8,0xc91e84f8
+.long	0x00000000,0x00000000
+.long	0x83868009,0x83868009
+.long	0x48ed2b32,0x48ed2b32
+.long	0xac70111e,0xac70111e
+.long	0x4e725a6c,0x4e725a6c
+.long	0xfbff0efd,0xfbff0efd
+.long	0x5638850f,0x5638850f
+.long	0x1ed5ae3d,0x1ed5ae3d
+.long	0x27392d36,0x27392d36
+.long	0x64d90f0a,0x64d90f0a
+.long	0x21a65c68,0x21a65c68
+.long	0xd1545b9b,0xd1545b9b
+.long	0x3a2e3624,0x3a2e3624
+.long	0xb1670a0c,0xb1670a0c
+.long	0x0fe75793,0x0fe75793
+.long	0xd296eeb4,0xd296eeb4
+.long	0x9e919b1b,0x9e919b1b
+.long	0x4fc5c080,0x4fc5c080
+.long	0xa220dc61,0xa220dc61
+.long	0x694b775a,0x694b775a
+.long	0x161a121c,0x161a121c
+.long	0x0aba93e2,0x0aba93e2
+.long	0xe52aa0c0,0xe52aa0c0
+.long	0x43e0223c,0x43e0223c
+.long	0x1d171b12,0x1d171b12
+.long	0x0b0d090e,0x0b0d090e
+.long	0xadc78bf2,0xadc78bf2
+.long	0xb9a8b62d,0xb9a8b62d
+.long	0xc8a91e14,0xc8a91e14
+.long	0x8519f157,0x8519f157
+.long	0x4c0775af,0x4c0775af
+.long	0xbbdd99ee,0xbbdd99ee
+.long	0xfd607fa3,0xfd607fa3
+.long	0x9f2601f7,0x9f2601f7
+.long	0xbcf5725c,0xbcf5725c
+.long	0xc53b6644,0xc53b6644
+.long	0x347efb5b,0x347efb5b
+.long	0x7629438b,0x7629438b
+.long	0xdcc623cb,0xdcc623cb
+.long	0x68fcedb6,0x68fcedb6
+.long	0x63f1e4b8,0x63f1e4b8
+.long	0xcadc31d7,0xcadc31d7
+.long	0x10856342,0x10856342
+.long	0x40229713,0x40229713
+.long	0x2011c684,0x2011c684
+.long	0x7d244a85,0x7d244a85
+.long	0xf83dbbd2,0xf83dbbd2
+.long	0x1132f9ae,0x1132f9ae
+.long	0x6da129c7,0x6da129c7
+.long	0x4b2f9e1d,0x4b2f9e1d
+.long	0xf330b2dc,0xf330b2dc
+.long	0xec52860d,0xec52860d
+.long	0xd0e3c177,0xd0e3c177
+.long	0x6c16b32b,0x6c16b32b
+.long	0x99b970a9,0x99b970a9
+.long	0xfa489411,0xfa489411
+.long	0x2264e947,0x2264e947
+.long	0xc48cfca8,0xc48cfca8
+.long	0x1a3ff0a0,0x1a3ff0a0
+.long	0xd82c7d56,0xd82c7d56
+.long	0xef903322,0xef903322
+.long	0xc74e4987,0xc74e4987
+.long	0xc1d138d9,0xc1d138d9
+.long	0xfea2ca8c,0xfea2ca8c
+.long	0x360bd498,0x360bd498
+.long	0xcf81f5a6,0xcf81f5a6
+.long	0x28de7aa5,0x28de7aa5
+.long	0x268eb7da,0x268eb7da
+.long	0xa4bfad3f,0xa4bfad3f
+.long	0xe49d3a2c,0xe49d3a2c
+.long	0x0d927850,0x0d927850
+.long	0x9bcc5f6a,0x9bcc5f6a
+.long	0x62467e54,0x62467e54
+.long	0xc2138df6,0xc2138df6
+.long	0xe8b8d890,0xe8b8d890
+.long	0x5ef7392e,0x5ef7392e
+.long	0xf5afc382,0xf5afc382
+.long	0xbe805d9f,0xbe805d9f
+.long	0x7c93d069,0x7c93d069
+.long	0xa92dd56f,0xa92dd56f
+.long	0xb31225cf,0xb31225cf
+.long	0x3b99acc8,0x3b99acc8
+.long	0xa77d1810,0xa77d1810
+.long	0x6e639ce8,0x6e639ce8
+.long	0x7bbb3bdb,0x7bbb3bdb
+.long	0x097826cd,0x097826cd
+.long	0xf418596e,0xf418596e
+.long	0x01b79aec,0x01b79aec
+.long	0xa89a4f83,0xa89a4f83
+.long	0x656e95e6,0x656e95e6
+.long	0x7ee6ffaa,0x7ee6ffaa
+.long	0x08cfbc21,0x08cfbc21
+.long	0xe6e815ef,0xe6e815ef
+.long	0xd99be7ba,0xd99be7ba
+.long	0xce366f4a,0xce366f4a
+.long	0xd4099fea,0xd4099fea
+.long	0xd67cb029,0xd67cb029
+.long	0xafb2a431,0xafb2a431
+.long	0x31233f2a,0x31233f2a
+.long	0x3094a5c6,0x3094a5c6
+.long	0xc066a235,0xc066a235
+.long	0x37bc4e74,0x37bc4e74
+.long	0xa6ca82fc,0xa6ca82fc
+.long	0xb0d090e0,0xb0d090e0
+.long	0x15d8a733,0x15d8a733
+.long	0x4a9804f1,0x4a9804f1
+.long	0xf7daec41,0xf7daec41
+.long	0x0e50cd7f,0x0e50cd7f
+.long	0x2ff69117,0x2ff69117
+.long	0x8dd64d76,0x8dd64d76
+.long	0x4db0ef43,0x4db0ef43
+.long	0x544daacc,0x544daacc
+.long	0xdf0496e4,0xdf0496e4
+.long	0xe3b5d19e,0xe3b5d19e
+.long	0x1b886a4c,0x1b886a4c
+.long	0xb81f2cc1,0xb81f2cc1
+.long	0x7f516546,0x7f516546
+.long	0x04ea5e9d,0x04ea5e9d
+.long	0x5d358c01,0x5d358c01
+.long	0x737487fa,0x737487fa
+.long	0x2e410bfb,0x2e410bfb
+.long	0x5a1d67b3,0x5a1d67b3
+.long	0x52d2db92,0x52d2db92
+.long	0x335610e9,0x335610e9
+.long	0x1347d66d,0x1347d66d
+.long	0x8c61d79a,0x8c61d79a
+.long	0x7a0ca137,0x7a0ca137
+.long	0x8e14f859,0x8e14f859
+.long	0x893c13eb,0x893c13eb
+.long	0xee27a9ce,0xee27a9ce
+.long	0x35c961b7,0x35c961b7
+.long	0xede51ce1,0xede51ce1
+.long	0x3cb1477a,0x3cb1477a
+.long	0x59dfd29c,0x59dfd29c
+.long	0x3f73f255,0x3f73f255
+.long	0x79ce1418,0x79ce1418
+.long	0xbf37c773,0xbf37c773
+.long	0xeacdf753,0xeacdf753
+.long	0x5baafd5f,0x5baafd5f
+.long	0x146f3ddf,0x146f3ddf
+.long	0x86db4478,0x86db4478
+.long	0x81f3afca,0x81f3afca
+.long	0x3ec468b9,0x3ec468b9
+.long	0x2c342438,0x2c342438
+.long	0x5f40a3c2,0x5f40a3c2
+.long	0x72c31d16,0x72c31d16
+.long	0x0c25e2bc,0x0c25e2bc
+.long	0x8b493c28,0x8b493c28
+.long	0x41950dff,0x41950dff
+.long	0x7101a839,0x7101a839
+.long	0xdeb30c08,0xdeb30c08
+.long	0x9ce4b4d8,0x9ce4b4d8
+.long	0x90c15664,0x90c15664
+.long	0x6184cb7b,0x6184cb7b
+.long	0x70b632d5,0x70b632d5
+.long	0x745c6c48,0x745c6c48
+.long	0x4257b8d0,0x4257b8d0
+.byte	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long	0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long	0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long	0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long	0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long	0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long	0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long	0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long	0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte	65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	64
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
new file mode 100644
index 0000000..e7b4c48
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
@@ -0,0 +1,834 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.type	_aesni_ctr32_ghash_6x,@function
+.align	32
+_aesni_ctr32_ghash_6x:
+.cfi_startproc	
+	vmovdqu	32(%r11),%xmm2
+	subq	$6,%rdx
+	vpxor	%xmm4,%xmm4,%xmm4
+	vmovdqu	0-128(%rcx),%xmm15
+	vpaddb	%xmm2,%xmm1,%xmm10
+	vpaddb	%xmm2,%xmm10,%xmm11
+	vpaddb	%xmm2,%xmm11,%xmm12
+	vpaddb	%xmm2,%xmm12,%xmm13
+	vpaddb	%xmm2,%xmm13,%xmm14
+	vpxor	%xmm15,%xmm1,%xmm9
+	vmovdqu	%xmm4,16+8(%rsp)
+	jmp	.Loop6x
+
+.align	32
+.Loop6x:
+	addl	$100663296,%ebx
+	jc	.Lhandle_ctr32
+	vmovdqu	0-32(%r9),%xmm3
+	vpaddb	%xmm2,%xmm14,%xmm1
+	vpxor	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm15,%xmm11,%xmm11
+
+.Lresume_ctr32:
+	vmovdqu	%xmm1,(%r8)
+	vpclmulqdq	$0x10,%xmm3,%xmm7,%xmm5
+	vpxor	%xmm15,%xmm12,%xmm12
+	vmovups	16-128(%rcx),%xmm2
+	vpclmulqdq	$0x01,%xmm3,%xmm7,%xmm6
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	xorq	%r12,%r12
+	cmpq	%r14,%r15
+
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vmovdqu	48+8(%rsp),%xmm0
+	vpxor	%xmm15,%xmm13,%xmm13
+	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm1
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vpxor	%xmm15,%xmm14,%xmm14
+	setnc	%r12b
+	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vmovdqu	16-32(%r9),%xmm3
+	negq	%r12
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm5
+	vpxor	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm2,%xmm13,%xmm13
+	vpxor	%xmm5,%xmm1,%xmm4
+	andq	$0x60,%r12
+	vmovups	32-128(%rcx),%xmm15
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm1
+	vaesenc	%xmm2,%xmm14,%xmm14
+
+	vpclmulqdq	$0x01,%xmm3,%xmm0,%xmm2
+	leaq	(%r14,%r12,1),%r14
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	16+8(%rsp),%xmm8,%xmm8
+	vpclmulqdq	$0x11,%xmm3,%xmm0,%xmm3
+	vmovdqu	64+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	88(%r14),%r13
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	80(%r14),%r12
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,32+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,40+8(%rsp)
+	vmovdqu	48-32(%r9),%xmm5
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	48-128(%rcx),%xmm15
+	vpxor	%xmm1,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm5,%xmm0,%xmm1
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm5,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm3,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm5,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpclmulqdq	$0x11,%xmm5,%xmm0,%xmm5
+	vmovdqu	80+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	%xmm1,%xmm4,%xmm4
+	vmovdqu	64-32(%r9),%xmm1
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	64-128(%rcx),%xmm15
+	vpxor	%xmm2,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm1,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm3,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm1,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	72(%r14),%r13
+	vpxor	%xmm5,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm1,%xmm0,%xmm5
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	64(%r14),%r12
+	vpclmulqdq	$0x11,%xmm1,%xmm0,%xmm1
+	vmovdqu	96+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,48+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,56+8(%rsp)
+	vpxor	%xmm2,%xmm4,%xmm4
+	vmovdqu	96-32(%r9),%xmm2
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	80-128(%rcx),%xmm15
+	vpxor	%xmm3,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm2,%xmm0,%xmm5
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	56(%r14),%r13
+	vpxor	%xmm1,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm2,%xmm0,%xmm1
+	vpxor	112+8(%rsp),%xmm8,%xmm8
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	48(%r14),%r12
+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,64+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,72+8(%rsp)
+	vpxor	%xmm3,%xmm4,%xmm4
+	vmovdqu	112-32(%r9),%xmm3
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	96-128(%rcx),%xmm15
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm5
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm6,%xmm6
+	vpclmulqdq	$0x01,%xmm3,%xmm8,%xmm1
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	40(%r14),%r13
+	vpxor	%xmm2,%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm3,%xmm8,%xmm2
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	32(%r14),%r12
+	vpclmulqdq	$0x11,%xmm3,%xmm8,%xmm8
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,80+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,88+8(%rsp)
+	vpxor	%xmm5,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	%xmm1,%xmm6,%xmm6
+
+	vmovups	112-128(%rcx),%xmm15
+	vpslldq	$8,%xmm6,%xmm5
+	vpxor	%xmm2,%xmm4,%xmm4
+	vmovdqu	16(%r11),%xmm3
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm8,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm5,%xmm4,%xmm4
+	movbeq	24(%r14),%r13
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	16(%r14),%r12
+	vpalignr	$8,%xmm4,%xmm4,%xmm0
+	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
+	movq	%r13,96+8(%rsp)
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r12,104+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vmovups	128-128(%rcx),%xmm1
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vmovups	144-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vpsrldq	$8,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vpxor	%xmm6,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vpxor	%xmm0,%xmm4,%xmm4
+	movbeq	8(%r14),%r13
+	vaesenc	%xmm1,%xmm13,%xmm13
+	movbeq	0(%r14),%r12
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	160-128(%rcx),%xmm1
+	cmpl	$11,%ebp
+	jb	.Lenc_tail
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+	vmovups	176-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	192-128(%rcx),%xmm1
+	je	.Lenc_tail
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+	vmovups	208-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	224-128(%rcx),%xmm1
+	jmp	.Lenc_tail
+
+.align	32
+.Lhandle_ctr32:
+	vmovdqu	(%r11),%xmm0
+	vpshufb	%xmm0,%xmm1,%xmm6
+	vmovdqu	48(%r11),%xmm5
+	vpaddd	64(%r11),%xmm6,%xmm10
+	vpaddd	%xmm5,%xmm6,%xmm11
+	vmovdqu	0-32(%r9),%xmm3
+	vpaddd	%xmm5,%xmm10,%xmm12
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm11,%xmm13
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vpxor	%xmm15,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm12,%xmm14
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vpxor	%xmm15,%xmm11,%xmm11
+	vpaddd	%xmm5,%xmm13,%xmm1
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vpshufb	%xmm0,%xmm1,%xmm1
+	jmp	.Lresume_ctr32
+
+.align	32
+.Lenc_tail:
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vmovdqu	%xmm7,16+8(%rsp)
+	vpalignr	$8,%xmm4,%xmm4,%xmm8
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
+	vpxor	0(%rdi),%xmm1,%xmm2
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpxor	16(%rdi),%xmm1,%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vpxor	32(%rdi),%xmm1,%xmm5
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	48(%rdi),%xmm1,%xmm6
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	64(%rdi),%xmm1,%xmm7
+	vpxor	80(%rdi),%xmm1,%xmm3
+	vmovdqu	(%r8),%xmm1
+
+	vaesenclast	%xmm2,%xmm9,%xmm9
+	vmovdqu	32(%r11),%xmm2
+	vaesenclast	%xmm0,%xmm10,%xmm10
+	vpaddb	%xmm2,%xmm1,%xmm0
+	movq	%r13,112+8(%rsp)
+	leaq	96(%rdi),%rdi
+	vaesenclast	%xmm5,%xmm11,%xmm11
+	vpaddb	%xmm2,%xmm0,%xmm5
+	movq	%r12,120+8(%rsp)
+	leaq	96(%rsi),%rsi
+	vmovdqu	0-128(%rcx),%xmm15
+	vaesenclast	%xmm6,%xmm12,%xmm12
+	vpaddb	%xmm2,%xmm5,%xmm6
+	vaesenclast	%xmm7,%xmm13,%xmm13
+	vpaddb	%xmm2,%xmm6,%xmm7
+	vaesenclast	%xmm3,%xmm14,%xmm14
+	vpaddb	%xmm2,%xmm7,%xmm3
+
+	addq	$0x60,%r10
+	subq	$0x6,%rdx
+	jc	.L6x_done
+
+	vmovups	%xmm9,-96(%rsi)
+	vpxor	%xmm15,%xmm1,%xmm9
+	vmovups	%xmm10,-80(%rsi)
+	vmovdqa	%xmm0,%xmm10
+	vmovups	%xmm11,-64(%rsi)
+	vmovdqa	%xmm5,%xmm11
+	vmovups	%xmm12,-48(%rsi)
+	vmovdqa	%xmm6,%xmm12
+	vmovups	%xmm13,-32(%rsi)
+	vmovdqa	%xmm7,%xmm13
+	vmovups	%xmm14,-16(%rsi)
+	vmovdqa	%xmm3,%xmm14
+	vmovdqu	32+8(%rsp),%xmm7
+	jmp	.Loop6x
+
+.L6x_done:
+	vpxor	16+8(%rsp),%xmm8,%xmm8
+	vpxor	%xmm4,%xmm8,%xmm8
+
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	_aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
+.globl	aesni_gcm_decrypt
+.hidden aesni_gcm_decrypt
+.type	aesni_gcm_decrypt,@function
+.align	32
+aesni_gcm_decrypt:
+.cfi_startproc	
+	xorq	%r10,%r10
+
+
+
+	cmpq	$0x60,%rdx
+	jb	.Lgcm_dec_abort
+
+	leaq	(%rsp),%rax
+.cfi_def_cfa_register	%rax
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+	vzeroupper
+
+	vmovdqu	(%r8),%xmm1
+	addq	$-128,%rsp
+	movl	12(%r8),%ebx
+	leaq	.Lbswap_mask(%rip),%r11
+	leaq	-128(%rcx),%r14
+	movq	$0xf80,%r15
+	vmovdqu	(%r9),%xmm8
+	andq	$-128,%rsp
+	vmovdqu	(%r11),%xmm0
+	leaq	128(%rcx),%rcx
+	leaq	32+32(%r9),%r9
+	movl	240-128(%rcx),%ebp
+	vpshufb	%xmm0,%xmm8,%xmm8
+
+	andq	%r15,%r14
+	andq	%rsp,%r15
+	subq	%r14,%r15
+	jc	.Ldec_no_key_aliasing
+	cmpq	$768,%r15
+	jnc	.Ldec_no_key_aliasing
+	subq	%r15,%rsp
+.Ldec_no_key_aliasing:
+
+	vmovdqu	80(%rdi),%xmm7
+	leaq	(%rdi),%r14
+	vmovdqu	64(%rdi),%xmm4
+
+
+
+
+
+
+
+	leaq	-192(%rdi,%rdx,1),%r15
+
+	vmovdqu	48(%rdi),%xmm5
+	shrq	$4,%rdx
+	xorq	%r10,%r10
+	vmovdqu	32(%rdi),%xmm6
+	vpshufb	%xmm0,%xmm7,%xmm7
+	vmovdqu	16(%rdi),%xmm2
+	vpshufb	%xmm0,%xmm4,%xmm4
+	vmovdqu	(%rdi),%xmm3
+	vpshufb	%xmm0,%xmm5,%xmm5
+	vmovdqu	%xmm4,48(%rsp)
+	vpshufb	%xmm0,%xmm6,%xmm6
+	vmovdqu	%xmm5,64(%rsp)
+	vpshufb	%xmm0,%xmm2,%xmm2
+	vmovdqu	%xmm6,80(%rsp)
+	vpshufb	%xmm0,%xmm3,%xmm3
+	vmovdqu	%xmm2,96(%rsp)
+	vmovdqu	%xmm3,112(%rsp)
+
+	call	_aesni_ctr32_ghash_6x
+
+	vmovups	%xmm9,-96(%rsi)
+	vmovups	%xmm10,-80(%rsi)
+	vmovups	%xmm11,-64(%rsi)
+	vmovups	%xmm12,-48(%rsi)
+	vmovups	%xmm13,-32(%rsi)
+	vmovups	%xmm14,-16(%rsi)
+
+	vpshufb	(%r11),%xmm8,%xmm8
+	vmovdqu	%xmm8,-64(%r9)
+
+	vzeroupper
+	movq	-48(%rax),%r15
+.cfi_restore	%r15
+	movq	-40(%rax),%r14
+.cfi_restore	%r14
+	movq	-32(%rax),%r13
+.cfi_restore	%r13
+	movq	-24(%rax),%r12
+.cfi_restore	%r12
+	movq	-16(%rax),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rax),%rbx
+.cfi_restore	%rbx
+	leaq	(%rax),%rsp
+.cfi_def_cfa_register	%rsp
+.Lgcm_dec_abort:
+	movq	%r10,%rax
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aesni_gcm_decrypt,.-aesni_gcm_decrypt
+.type	_aesni_ctr32_6x,@function
+.align	32
+_aesni_ctr32_6x:
+.cfi_startproc	
+	vmovdqu	0-128(%rcx),%xmm4
+	vmovdqu	32(%r11),%xmm2
+	leaq	-1(%rbp),%r13
+	vmovups	16-128(%rcx),%xmm15
+	leaq	32-128(%rcx),%r12
+	vpxor	%xmm4,%xmm1,%xmm9
+	addl	$100663296,%ebx
+	jc	.Lhandle_ctr32_2
+	vpaddb	%xmm2,%xmm1,%xmm10
+	vpaddb	%xmm2,%xmm10,%xmm11
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpaddb	%xmm2,%xmm11,%xmm12
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpaddb	%xmm2,%xmm12,%xmm13
+	vpxor	%xmm4,%xmm12,%xmm12
+	vpaddb	%xmm2,%xmm13,%xmm14
+	vpxor	%xmm4,%xmm13,%xmm13
+	vpaddb	%xmm2,%xmm14,%xmm1
+	vpxor	%xmm4,%xmm14,%xmm14
+	jmp	.Loop_ctr32
+
+.align	16
+.Loop_ctr32:
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vmovups	(%r12),%xmm15
+	leaq	16(%r12),%r12
+	decl	%r13d
+	jnz	.Loop_ctr32
+
+	vmovdqu	(%r12),%xmm3
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	0(%rdi),%xmm3,%xmm4
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	16(%rdi),%xmm3,%xmm5
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpxor	32(%rdi),%xmm3,%xmm6
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vpxor	48(%rdi),%xmm3,%xmm8
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	64(%rdi),%xmm3,%xmm2
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	80(%rdi),%xmm3,%xmm3
+	leaq	96(%rdi),%rdi
+
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vaesenclast	%xmm5,%xmm10,%xmm10
+	vaesenclast	%xmm6,%xmm11,%xmm11
+	vaesenclast	%xmm8,%xmm12,%xmm12
+	vaesenclast	%xmm2,%xmm13,%xmm13
+	vaesenclast	%xmm3,%xmm14,%xmm14
+	vmovups	%xmm9,0(%rsi)
+	vmovups	%xmm10,16(%rsi)
+	vmovups	%xmm11,32(%rsi)
+	vmovups	%xmm12,48(%rsi)
+	vmovups	%xmm13,64(%rsi)
+	vmovups	%xmm14,80(%rsi)
+	leaq	96(%rsi),%rsi
+
+	.byte	0xf3,0xc3
+.align	32
+.Lhandle_ctr32_2:
+	vpshufb	%xmm0,%xmm1,%xmm6
+	vmovdqu	48(%r11),%xmm5
+	vpaddd	64(%r11),%xmm6,%xmm10
+	vpaddd	%xmm5,%xmm6,%xmm11
+	vpaddd	%xmm5,%xmm10,%xmm12
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm11,%xmm13
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm12,%xmm14
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpaddd	%xmm5,%xmm13,%xmm1
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vpxor	%xmm4,%xmm12,%xmm12
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vpxor	%xmm4,%xmm13,%xmm13
+	vpshufb	%xmm0,%xmm1,%xmm1
+	vpxor	%xmm4,%xmm14,%xmm14
+	jmp	.Loop_ctr32
+.cfi_endproc	
+.size	_aesni_ctr32_6x,.-_aesni_ctr32_6x
+
+.globl	aesni_gcm_encrypt
+.hidden aesni_gcm_encrypt
+.type	aesni_gcm_encrypt,@function
+.align	32
+aesni_gcm_encrypt:
+.cfi_startproc	
+	xorq	%r10,%r10
+
+
+
+
+	cmpq	$288,%rdx
+	jb	.Lgcm_enc_abort
+
+	leaq	(%rsp),%rax
+.cfi_def_cfa_register	%rax
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+	vzeroupper
+
+	vmovdqu	(%r8),%xmm1
+	addq	$-128,%rsp
+	movl	12(%r8),%ebx
+	leaq	.Lbswap_mask(%rip),%r11
+	leaq	-128(%rcx),%r14
+	movq	$0xf80,%r15
+	leaq	128(%rcx),%rcx
+	vmovdqu	(%r11),%xmm0
+	andq	$-128,%rsp
+	movl	240-128(%rcx),%ebp
+
+	andq	%r15,%r14
+	andq	%rsp,%r15
+	subq	%r14,%r15
+	jc	.Lenc_no_key_aliasing
+	cmpq	$768,%r15
+	jnc	.Lenc_no_key_aliasing
+	subq	%r15,%rsp
+.Lenc_no_key_aliasing:
+
+	leaq	(%rsi),%r14
+
+
+
+
+
+
+
+
+	leaq	-192(%rsi,%rdx,1),%r15
+
+	shrq	$4,%rdx
+
+	call	_aesni_ctr32_6x
+	vpshufb	%xmm0,%xmm9,%xmm8
+	vpshufb	%xmm0,%xmm10,%xmm2
+	vmovdqu	%xmm8,112(%rsp)
+	vpshufb	%xmm0,%xmm11,%xmm4
+	vmovdqu	%xmm2,96(%rsp)
+	vpshufb	%xmm0,%xmm12,%xmm5
+	vmovdqu	%xmm4,80(%rsp)
+	vpshufb	%xmm0,%xmm13,%xmm6
+	vmovdqu	%xmm5,64(%rsp)
+	vpshufb	%xmm0,%xmm14,%xmm7
+	vmovdqu	%xmm6,48(%rsp)
+
+	call	_aesni_ctr32_6x
+
+	vmovdqu	(%r9),%xmm8
+	leaq	32+32(%r9),%r9
+	subq	$12,%rdx
+	movq	$192,%r10
+	vpshufb	%xmm0,%xmm8,%xmm8
+
+	call	_aesni_ctr32_ghash_6x
+	vmovdqu	32(%rsp),%xmm7
+	vmovdqu	(%r11),%xmm0
+	vmovdqu	0-32(%r9),%xmm3
+	vpunpckhqdq	%xmm7,%xmm7,%xmm1
+	vmovdqu	32-32(%r9),%xmm15
+	vmovups	%xmm9,-96(%rsi)
+	vpshufb	%xmm0,%xmm9,%xmm9
+	vpxor	%xmm7,%xmm1,%xmm1
+	vmovups	%xmm10,-80(%rsi)
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vmovups	%xmm11,-64(%rsi)
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vmovups	%xmm12,-48(%rsi)
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vmovups	%xmm13,-32(%rsi)
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vmovups	%xmm14,-16(%rsi)
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vmovdqu	%xmm9,16(%rsp)
+	vmovdqu	48(%rsp),%xmm6
+	vmovdqu	16-32(%r9),%xmm0
+	vpunpckhqdq	%xmm6,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm5
+	vpxor	%xmm6,%xmm2,%xmm2
+	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
+
+	vmovdqu	64(%rsp),%xmm9
+	vpclmulqdq	$0x00,%xmm0,%xmm6,%xmm4
+	vmovdqu	48-32(%r9),%xmm3
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm9,%xmm9,%xmm5
+	vpclmulqdq	$0x11,%xmm0,%xmm6,%xmm6
+	vpxor	%xmm9,%xmm5,%xmm5
+	vpxor	%xmm7,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
+	vmovdqu	80-32(%r9),%xmm15
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vmovdqu	80(%rsp),%xmm1
+	vpclmulqdq	$0x00,%xmm3,%xmm9,%xmm7
+	vmovdqu	64-32(%r9),%xmm0
+	vpxor	%xmm4,%xmm7,%xmm7
+	vpunpckhqdq	%xmm1,%xmm1,%xmm4
+	vpclmulqdq	$0x11,%xmm3,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpxor	%xmm6,%xmm9,%xmm9
+	vpclmulqdq	$0x00,%xmm15,%xmm5,%xmm5
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vmovdqu	96(%rsp),%xmm2
+	vpclmulqdq	$0x00,%xmm0,%xmm1,%xmm6
+	vmovdqu	96-32(%r9),%xmm3
+	vpxor	%xmm7,%xmm6,%xmm6
+	vpunpckhqdq	%xmm2,%xmm2,%xmm7
+	vpclmulqdq	$0x11,%xmm0,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm7,%xmm7
+	vpxor	%xmm9,%xmm1,%xmm1
+	vpclmulqdq	$0x10,%xmm15,%xmm4,%xmm4
+	vmovdqu	128-32(%r9),%xmm15
+	vpxor	%xmm5,%xmm4,%xmm4
+
+	vpxor	112(%rsp),%xmm8,%xmm8
+	vpclmulqdq	$0x00,%xmm3,%xmm2,%xmm5
+	vmovdqu	112-32(%r9),%xmm0
+	vpunpckhqdq	%xmm8,%xmm8,%xmm9
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x11,%xmm3,%xmm2,%xmm2
+	vpxor	%xmm8,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm15,%xmm7,%xmm7
+	vpxor	%xmm4,%xmm7,%xmm4
+
+	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm6
+	vmovdqu	0-32(%r9),%xmm3
+	vpunpckhqdq	%xmm14,%xmm14,%xmm1
+	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm8
+	vpxor	%xmm14,%xmm1,%xmm1
+	vpxor	%xmm5,%xmm6,%xmm5
+	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm9
+	vmovdqu	32-32(%r9),%xmm15
+	vpxor	%xmm2,%xmm8,%xmm7
+	vpxor	%xmm4,%xmm9,%xmm6
+
+	vmovdqu	16-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm7,%xmm9
+	vpclmulqdq	$0x00,%xmm3,%xmm14,%xmm4
+	vpxor	%xmm9,%xmm6,%xmm6
+	vpunpckhqdq	%xmm13,%xmm13,%xmm2
+	vpclmulqdq	$0x11,%xmm3,%xmm14,%xmm14
+	vpxor	%xmm13,%xmm2,%xmm2
+	vpslldq	$8,%xmm6,%xmm9
+	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
+	vpxor	%xmm9,%xmm5,%xmm8
+	vpsrldq	$8,%xmm6,%xmm6
+	vpxor	%xmm6,%xmm7,%xmm7
+
+	vpclmulqdq	$0x00,%xmm0,%xmm13,%xmm5
+	vmovdqu	48-32(%r9),%xmm3
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpunpckhqdq	%xmm12,%xmm12,%xmm9
+	vpclmulqdq	$0x11,%xmm0,%xmm13,%xmm13
+	vpxor	%xmm12,%xmm9,%xmm9
+	vpxor	%xmm14,%xmm13,%xmm13
+	vpalignr	$8,%xmm8,%xmm8,%xmm14
+	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
+	vmovdqu	80-32(%r9),%xmm15
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vpclmulqdq	$0x00,%xmm3,%xmm12,%xmm4
+	vmovdqu	64-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm11,%xmm11,%xmm1
+	vpclmulqdq	$0x11,%xmm3,%xmm12,%xmm12
+	vpxor	%xmm11,%xmm1,%xmm1
+	vpxor	%xmm13,%xmm12,%xmm12
+	vxorps	16(%rsp),%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm15,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm9,%xmm9
+
+	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
+	vxorps	%xmm14,%xmm8,%xmm8
+
+	vpclmulqdq	$0x00,%xmm0,%xmm11,%xmm5
+	vmovdqu	96-32(%r9),%xmm3
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpunpckhqdq	%xmm10,%xmm10,%xmm2
+	vpclmulqdq	$0x11,%xmm0,%xmm11,%xmm11
+	vpxor	%xmm10,%xmm2,%xmm2
+	vpalignr	$8,%xmm8,%xmm8,%xmm14
+	vpxor	%xmm12,%xmm11,%xmm11
+	vpclmulqdq	$0x10,%xmm15,%xmm1,%xmm1
+	vmovdqu	128-32(%r9),%xmm15
+	vpxor	%xmm9,%xmm1,%xmm1
+
+	vxorps	%xmm7,%xmm14,%xmm14
+	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
+	vxorps	%xmm14,%xmm8,%xmm8
+
+	vpclmulqdq	$0x00,%xmm3,%xmm10,%xmm4
+	vmovdqu	112-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm8,%xmm8,%xmm9
+	vpclmulqdq	$0x11,%xmm3,%xmm10,%xmm10
+	vpxor	%xmm8,%xmm9,%xmm9
+	vpxor	%xmm11,%xmm10,%xmm10
+	vpclmulqdq	$0x00,%xmm15,%xmm2,%xmm2
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm5
+	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm7
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm6
+	vpxor	%xmm10,%xmm7,%xmm7
+	vpxor	%xmm2,%xmm6,%xmm6
+
+	vpxor	%xmm5,%xmm7,%xmm4
+	vpxor	%xmm4,%xmm6,%xmm6
+	vpslldq	$8,%xmm6,%xmm1
+	vmovdqu	16(%r11),%xmm3
+	vpsrldq	$8,%xmm6,%xmm6
+	vpxor	%xmm1,%xmm5,%xmm8
+	vpxor	%xmm6,%xmm7,%xmm7
+
+	vpalignr	$8,%xmm8,%xmm8,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
+	vpxor	%xmm2,%xmm8,%xmm8
+
+	vpalignr	$8,%xmm8,%xmm8,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
+	vpxor	%xmm7,%xmm2,%xmm2
+	vpxor	%xmm2,%xmm8,%xmm8
+	vpshufb	(%r11),%xmm8,%xmm8
+	vmovdqu	%xmm8,-64(%r9)
+
+	vzeroupper
+	movq	-48(%rax),%r15
+.cfi_restore	%r15
+	movq	-40(%rax),%r14
+.cfi_restore	%r14
+	movq	-32(%rax),%r13
+.cfi_restore	%r13
+	movq	-24(%rax),%r12
+.cfi_restore	%r12
+	movq	-16(%rax),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rax),%rbx
+.cfi_restore	%rbx
+	leaq	(%rax),%rsp
+.cfi_def_cfa_register	%rsp
+.Lgcm_enc_abort:
+	movq	%r10,%rax
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aesni_gcm_encrypt,.-aesni_gcm_encrypt
+.align	64
+.Lbswap_mask:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lpoly:
+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.Lone_msb:
+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+.Ltwo_lsb:
+.byte	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.Lone_lsb:
+.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.byte	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	64
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
new file mode 100644
index 0000000..0c980a3
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
@@ -0,0 +1,4381 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+.globl	aesni_encrypt
+.hidden aesni_encrypt
+.type	aesni_encrypt,@function
+.align	16
+aesni_encrypt:
+	movups	(%rdi),%xmm2
+	movl	240(%rdx),%eax
+	movups	(%rdx),%xmm0
+	movups	16(%rdx),%xmm1
+	leaq	32(%rdx),%rdx
+	xorps	%xmm0,%xmm2
+.Loop_enc1_1:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rdx),%xmm1
+	leaq	16(%rdx),%rdx
+	jnz	.Loop_enc1_1
+.byte	102,15,56,221,209
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	.byte	0xf3,0xc3
+.size	aesni_encrypt,.-aesni_encrypt
+
+.globl	aesni_decrypt
+.hidden aesni_decrypt
+.type	aesni_decrypt,@function
+.align	16
+aesni_decrypt:
+	movups	(%rdi),%xmm2
+	movl	240(%rdx),%eax
+	movups	(%rdx),%xmm0
+	movups	16(%rdx),%xmm1
+	leaq	32(%rdx),%rdx
+	xorps	%xmm0,%xmm2
+.Loop_dec1_2:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rdx),%xmm1
+	leaq	16(%rdx),%rdx
+	jnz	.Loop_dec1_2
+.byte	102,15,56,223,209
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	.byte	0xf3,0xc3
+.size	aesni_decrypt, .-aesni_decrypt
+.type	_aesni_encrypt2,@function
+.align	16
+_aesni_encrypt2:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+	addq	$16,%rax
+
+.Lenc_loop2:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Lenc_loop2
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+	.byte	0xf3,0xc3
+.size	_aesni_encrypt2,.-_aesni_encrypt2
+.type	_aesni_decrypt2,@function
+.align	16
+_aesni_decrypt2:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+	addq	$16,%rax
+
+.Ldec_loop2:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Ldec_loop2
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+	.byte	0xf3,0xc3
+.size	_aesni_decrypt2,.-_aesni_decrypt2
+.type	_aesni_encrypt3,@function
+.align	16
+_aesni_encrypt3:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	xorps	%xmm0,%xmm4
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+	addq	$16,%rax
+
+.Lenc_loop3:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Lenc_loop3
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+	.byte	0xf3,0xc3
+.size	_aesni_encrypt3,.-_aesni_encrypt3
+.type	_aesni_decrypt3,@function
+.align	16
+_aesni_decrypt3:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	xorps	%xmm0,%xmm4
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+	addq	$16,%rax
+
+.Ldec_loop3:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Ldec_loop3
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+	.byte	0xf3,0xc3
+.size	_aesni_decrypt3,.-_aesni_decrypt3
+.type	_aesni_encrypt4,@function
+.align	16
+_aesni_encrypt4:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	xorps	%xmm0,%xmm4
+	xorps	%xmm0,%xmm5
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	0x0f,0x1f,0x00
+	addq	$16,%rax
+
+.Lenc_loop4:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Lenc_loop4
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+.byte	102,15,56,221,232
+	.byte	0xf3,0xc3
+.size	_aesni_encrypt4,.-_aesni_encrypt4
+.type	_aesni_decrypt4,@function
+.align	16
+_aesni_decrypt4:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	xorps	%xmm0,%xmm4
+	xorps	%xmm0,%xmm5
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	0x0f,0x1f,0x00
+	addq	$16,%rax
+
+.Ldec_loop4:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Ldec_loop4
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+.byte	102,15,56,223,232
+	.byte	0xf3,0xc3
+.size	_aesni_decrypt4,.-_aesni_decrypt4
+.type	_aesni_encrypt6,@function
+.align	16
+_aesni_encrypt6:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+.byte	102,15,56,220,209
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	102,15,56,220,217
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+.byte	102,15,56,220,225
+	pxor	%xmm0,%xmm7
+	movups	(%rcx,%rax,1),%xmm0
+	addq	$16,%rax
+	jmp	.Lenc_loop6_enter
+.align	16
+.Lenc_loop6:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.Lenc_loop6_enter:
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Lenc_loop6
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+.byte	102,15,56,221,232
+.byte	102,15,56,221,240
+.byte	102,15,56,221,248
+	.byte	0xf3,0xc3
+.size	_aesni_encrypt6,.-_aesni_encrypt6
+.type	_aesni_decrypt6,@function
+.align	16
+_aesni_decrypt6:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+.byte	102,15,56,222,209
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	102,15,56,222,217
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+.byte	102,15,56,222,225
+	pxor	%xmm0,%xmm7
+	movups	(%rcx,%rax,1),%xmm0
+	addq	$16,%rax
+	jmp	.Ldec_loop6_enter
+.align	16
+.Ldec_loop6:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.Ldec_loop6_enter:
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Ldec_loop6
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+.byte	102,15,56,223,232
+.byte	102,15,56,223,240
+.byte	102,15,56,223,248
+	.byte	0xf3,0xc3
+.size	_aesni_decrypt6,.-_aesni_decrypt6
+.type	_aesni_encrypt8,@function
+.align	16
+_aesni_encrypt8:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	102,15,56,220,209
+	pxor	%xmm0,%xmm7
+	pxor	%xmm0,%xmm8
+.byte	102,15,56,220,217
+	pxor	%xmm0,%xmm9
+	movups	(%rcx,%rax,1),%xmm0
+	addq	$16,%rax
+	jmp	.Lenc_loop8_inner
+.align	16
+.Lenc_loop8:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.Lenc_loop8_inner:
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+.Lenc_loop8_enter:
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Lenc_loop8
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+.byte	102,15,56,221,232
+.byte	102,15,56,221,240
+.byte	102,15,56,221,248
+.byte	102,68,15,56,221,192
+.byte	102,68,15,56,221,200
+	.byte	0xf3,0xc3
+.size	_aesni_encrypt8,.-_aesni_encrypt8
+.type	_aesni_decrypt8,@function
+.align	16
+_aesni_decrypt8:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	102,15,56,222,209
+	pxor	%xmm0,%xmm7
+	pxor	%xmm0,%xmm8
+.byte	102,15,56,222,217
+	pxor	%xmm0,%xmm9
+	movups	(%rcx,%rax,1),%xmm0
+	addq	$16,%rax
+	jmp	.Ldec_loop8_inner
+.align	16
+.Ldec_loop8:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.Ldec_loop8_inner:
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+.Ldec_loop8_enter:
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Ldec_loop8
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+.byte	102,15,56,223,232
+.byte	102,15,56,223,240
+.byte	102,15,56,223,248
+.byte	102,68,15,56,223,192
+.byte	102,68,15,56,223,200
+	.byte	0xf3,0xc3
+.size	_aesni_decrypt8,.-_aesni_decrypt8
+.globl	aesni_ecb_encrypt
+.hidden aesni_ecb_encrypt
+.type	aesni_ecb_encrypt,@function
+.align	16
+aesni_ecb_encrypt:
+	andq	$-16,%rdx
+	jz	.Lecb_ret
+
+	movl	240(%rcx),%eax
+	movups	(%rcx),%xmm0
+	movq	%rcx,%r11
+	movl	%eax,%r10d
+	testl	%r8d,%r8d
+	jz	.Lecb_decrypt
+
+	cmpq	$0x80,%rdx
+	jb	.Lecb_enc_tail
+
+	movdqu	(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	movdqu	48(%rdi),%xmm5
+	movdqu	64(%rdi),%xmm6
+	movdqu	80(%rdi),%xmm7
+	movdqu	96(%rdi),%xmm8
+	movdqu	112(%rdi),%xmm9
+	leaq	128(%rdi),%rdi
+	subq	$0x80,%rdx
+	jmp	.Lecb_enc_loop8_enter
+.align	16
+.Lecb_enc_loop8:
+	movups	%xmm2,(%rsi)
+	movq	%r11,%rcx
+	movdqu	(%rdi),%xmm2
+	movl	%r10d,%eax
+	movups	%xmm3,16(%rsi)
+	movdqu	16(%rdi),%xmm3
+	movups	%xmm4,32(%rsi)
+	movdqu	32(%rdi),%xmm4
+	movups	%xmm5,48(%rsi)
+	movdqu	48(%rdi),%xmm5
+	movups	%xmm6,64(%rsi)
+	movdqu	64(%rdi),%xmm6
+	movups	%xmm7,80(%rsi)
+	movdqu	80(%rdi),%xmm7
+	movups	%xmm8,96(%rsi)
+	movdqu	96(%rdi),%xmm8
+	movups	%xmm9,112(%rsi)
+	leaq	128(%rsi),%rsi
+	movdqu	112(%rdi),%xmm9
+	leaq	128(%rdi),%rdi
+.Lecb_enc_loop8_enter:
+
+	call	_aesni_encrypt8
+
+	subq	$0x80,%rdx
+	jnc	.Lecb_enc_loop8
+
+	movups	%xmm2,(%rsi)
+	movq	%r11,%rcx
+	movups	%xmm3,16(%rsi)
+	movl	%r10d,%eax
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+	movups	%xmm7,80(%rsi)
+	movups	%xmm8,96(%rsi)
+	movups	%xmm9,112(%rsi)
+	leaq	128(%rsi),%rsi
+	addq	$0x80,%rdx
+	jz	.Lecb_ret
+
+.Lecb_enc_tail:
+	movups	(%rdi),%xmm2
+	cmpq	$0x20,%rdx
+	jb	.Lecb_enc_one
+	movups	16(%rdi),%xmm3
+	je	.Lecb_enc_two
+	movups	32(%rdi),%xmm4
+	cmpq	$0x40,%rdx
+	jb	.Lecb_enc_three
+	movups	48(%rdi),%xmm5
+	je	.Lecb_enc_four
+	movups	64(%rdi),%xmm6
+	cmpq	$0x60,%rdx
+	jb	.Lecb_enc_five
+	movups	80(%rdi),%xmm7
+	je	.Lecb_enc_six
+	movdqu	96(%rdi),%xmm8
+	xorps	%xmm9,%xmm9
+	call	_aesni_encrypt8
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+	movups	%xmm7,80(%rsi)
+	movups	%xmm8,96(%rsi)
+	jmp	.Lecb_ret
+.align	16
+.Lecb_enc_one:
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_enc1_3:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_enc1_3
+.byte	102,15,56,221,209
+	movups	%xmm2,(%rsi)
+	jmp	.Lecb_ret
+.align	16
+.Lecb_enc_two:
+	call	_aesni_encrypt2
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	jmp	.Lecb_ret
+.align	16
+.Lecb_enc_three:
+	call	_aesni_encrypt3
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	jmp	.Lecb_ret
+.align	16
+.Lecb_enc_four:
+	call	_aesni_encrypt4
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	jmp	.Lecb_ret
+.align	16
+.Lecb_enc_five:
+	xorps	%xmm7,%xmm7
+	call	_aesni_encrypt6
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+	jmp	.Lecb_ret
+.align	16
+.Lecb_enc_six:
+	call	_aesni_encrypt6
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+	movups	%xmm7,80(%rsi)
+	jmp	.Lecb_ret
+
+.align	16
+.Lecb_decrypt:
+	cmpq	$0x80,%rdx
+	jb	.Lecb_dec_tail
+
+	movdqu	(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	movdqu	48(%rdi),%xmm5
+	movdqu	64(%rdi),%xmm6
+	movdqu	80(%rdi),%xmm7
+	movdqu	96(%rdi),%xmm8
+	movdqu	112(%rdi),%xmm9
+	leaq	128(%rdi),%rdi
+	subq	$0x80,%rdx
+	jmp	.Lecb_dec_loop8_enter
+.align	16
+.Lecb_dec_loop8:
+	movups	%xmm2,(%rsi)
+	movq	%r11,%rcx
+	movdqu	(%rdi),%xmm2
+	movl	%r10d,%eax
+	movups	%xmm3,16(%rsi)
+	movdqu	16(%rdi),%xmm3
+	movups	%xmm4,32(%rsi)
+	movdqu	32(%rdi),%xmm4
+	movups	%xmm5,48(%rsi)
+	movdqu	48(%rdi),%xmm5
+	movups	%xmm6,64(%rsi)
+	movdqu	64(%rdi),%xmm6
+	movups	%xmm7,80(%rsi)
+	movdqu	80(%rdi),%xmm7
+	movups	%xmm8,96(%rsi)
+	movdqu	96(%rdi),%xmm8
+	movups	%xmm9,112(%rsi)
+	leaq	128(%rsi),%rsi
+	movdqu	112(%rdi),%xmm9
+	leaq	128(%rdi),%rdi
+.Lecb_dec_loop8_enter:
+
+	call	_aesni_decrypt8
+
+	movups	(%r11),%xmm0
+	subq	$0x80,%rdx
+	jnc	.Lecb_dec_loop8
+
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movq	%r11,%rcx
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movl	%r10d,%eax
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	movups	%xmm7,80(%rsi)
+	pxor	%xmm7,%xmm7
+	movups	%xmm8,96(%rsi)
+	pxor	%xmm8,%xmm8
+	movups	%xmm9,112(%rsi)
+	pxor	%xmm9,%xmm9
+	leaq	128(%rsi),%rsi
+	addq	$0x80,%rdx
+	jz	.Lecb_ret
+
+.Lecb_dec_tail:
+	movups	(%rdi),%xmm2
+	cmpq	$0x20,%rdx
+	jb	.Lecb_dec_one
+	movups	16(%rdi),%xmm3
+	je	.Lecb_dec_two
+	movups	32(%rdi),%xmm4
+	cmpq	$0x40,%rdx
+	jb	.Lecb_dec_three
+	movups	48(%rdi),%xmm5
+	je	.Lecb_dec_four
+	movups	64(%rdi),%xmm6
+	cmpq	$0x60,%rdx
+	jb	.Lecb_dec_five
+	movups	80(%rdi),%xmm7
+	je	.Lecb_dec_six
+	movups	96(%rdi),%xmm8
+	movups	(%rcx),%xmm0
+	xorps	%xmm9,%xmm9
+	call	_aesni_decrypt8
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	movups	%xmm7,80(%rsi)
+	pxor	%xmm7,%xmm7
+	movups	%xmm8,96(%rsi)
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+	jmp	.Lecb_ret
+.align	16
+.Lecb_dec_one:
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_dec1_4:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_dec1_4
+.byte	102,15,56,223,209
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	jmp	.Lecb_ret
+.align	16
+.Lecb_dec_two:
+	call	_aesni_decrypt2
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	jmp	.Lecb_ret
+.align	16
+.Lecb_dec_three:
+	call	_aesni_decrypt3
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	jmp	.Lecb_ret
+.align	16
+.Lecb_dec_four:
+	call	_aesni_decrypt4
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	jmp	.Lecb_ret
+.align	16
+.Lecb_dec_five:
+	xorps	%xmm7,%xmm7
+	call	_aesni_decrypt6
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	jmp	.Lecb_ret
+.align	16
+.Lecb_dec_six:
+	call	_aesni_decrypt6
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	movups	%xmm7,80(%rsi)
+	pxor	%xmm7,%xmm7
+
+.Lecb_ret:
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	.byte	0xf3,0xc3
+.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
+.globl	aesni_ccm64_encrypt_blocks
+.hidden aesni_ccm64_encrypt_blocks
+.type	aesni_ccm64_encrypt_blocks,@function
+.align	16
+aesni_ccm64_encrypt_blocks:
+	movl	240(%rcx),%eax
+	movdqu	(%r8),%xmm6
+	movdqa	.Lincrement64(%rip),%xmm9
+	movdqa	.Lbswap_mask(%rip),%xmm7
+
+	shll	$4,%eax
+	movl	$16,%r10d
+	leaq	0(%rcx),%r11
+	movdqu	(%r9),%xmm3
+	movdqa	%xmm6,%xmm2
+	leaq	32(%rcx,%rax,1),%rcx
+.byte	102,15,56,0,247
+	subq	%rax,%r10
+	jmp	.Lccm64_enc_outer
+.align	16
+.Lccm64_enc_outer:
+	movups	(%r11),%xmm0
+	movq	%r10,%rax
+	movups	(%rdi),%xmm8
+
+	xorps	%xmm0,%xmm2
+	movups	16(%r11),%xmm1
+	xorps	%xmm8,%xmm0
+	xorps	%xmm0,%xmm3
+	movups	32(%r11),%xmm0
+
+.Lccm64_enc2_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Lccm64_enc2_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	paddq	%xmm9,%xmm6
+	decq	%rdx
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+
+	leaq	16(%rdi),%rdi
+	xorps	%xmm2,%xmm8
+	movdqa	%xmm6,%xmm2
+	movups	%xmm8,(%rsi)
+.byte	102,15,56,0,215
+	leaq	16(%rsi),%rsi
+	jnz	.Lccm64_enc_outer
+
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,(%r9)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm8,%xmm8
+	pxor	%xmm6,%xmm6
+	.byte	0xf3,0xc3
+.size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
+.globl	aesni_ccm64_decrypt_blocks
+.hidden aesni_ccm64_decrypt_blocks
+.type	aesni_ccm64_decrypt_blocks,@function
+.align	16
+aesni_ccm64_decrypt_blocks:
+	movl	240(%rcx),%eax
+	movups	(%r8),%xmm6
+	movdqu	(%r9),%xmm3
+	movdqa	.Lincrement64(%rip),%xmm9
+	movdqa	.Lbswap_mask(%rip),%xmm7
+
+	movaps	%xmm6,%xmm2
+	movl	%eax,%r10d
+	movq	%rcx,%r11
+.byte	102,15,56,0,247
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_enc1_5:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_enc1_5
+.byte	102,15,56,221,209
+	shll	$4,%r10d
+	movl	$16,%eax
+	movups	(%rdi),%xmm8
+	paddq	%xmm9,%xmm6
+	leaq	16(%rdi),%rdi
+	subq	%r10,%rax
+	leaq	32(%r11,%r10,1),%rcx
+	movq	%rax,%r10
+	jmp	.Lccm64_dec_outer
+.align	16
+.Lccm64_dec_outer:
+	xorps	%xmm2,%xmm8
+	movdqa	%xmm6,%xmm2
+	movups	%xmm8,(%rsi)
+	leaq	16(%rsi),%rsi
+.byte	102,15,56,0,215
+
+	subq	$1,%rdx
+	jz	.Lccm64_dec_break
+
+	movups	(%r11),%xmm0
+	movq	%r10,%rax
+	movups	16(%r11),%xmm1
+	xorps	%xmm0,%xmm8
+	xorps	%xmm0,%xmm2
+	xorps	%xmm8,%xmm3
+	movups	32(%r11),%xmm0
+	jmp	.Lccm64_dec2_loop
+.align	16
+.Lccm64_dec2_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Lccm64_dec2_loop
+	movups	(%rdi),%xmm8
+	paddq	%xmm9,%xmm6
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+	leaq	16(%rdi),%rdi
+	jmp	.Lccm64_dec_outer
+
+.align	16
+.Lccm64_dec_break:
+
+	movl	240(%r11),%eax
+	movups	(%r11),%xmm0
+	movups	16(%r11),%xmm1
+	xorps	%xmm0,%xmm8
+	leaq	32(%r11),%r11
+	xorps	%xmm8,%xmm3
+.Loop_enc1_6:
+.byte	102,15,56,220,217
+	decl	%eax
+	movups	(%r11),%xmm1
+	leaq	16(%r11),%r11
+	jnz	.Loop_enc1_6
+.byte	102,15,56,221,217
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,(%r9)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm8,%xmm8
+	pxor	%xmm6,%xmm6
+	.byte	0xf3,0xc3
+.size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
+.globl	aesni_ctr32_encrypt_blocks
+.hidden aesni_ctr32_encrypt_blocks
+.type	aesni_ctr32_encrypt_blocks,@function
+.align	16
+aesni_ctr32_encrypt_blocks:
+	cmpq	$1,%rdx
+	jne	.Lctr32_bulk
+
+
+
+	movups	(%r8),%xmm2
+	movups	(%rdi),%xmm3
+	movl	240(%rcx),%edx
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_enc1_7:
+.byte	102,15,56,220,209
+	decl	%edx
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_enc1_7
+.byte	102,15,56,221,209
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	xorps	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movups	%xmm2,(%rsi)
+	xorps	%xmm2,%xmm2
+	jmp	.Lctr32_epilogue
+
+.align	16
+.Lctr32_bulk:
+	leaq	(%rsp),%r11
+	pushq	%rbp
+	subq	$128,%rsp
+	andq	$-16,%rsp
+
+
+
+
+	movdqu	(%r8),%xmm2
+	movdqu	(%rcx),%xmm0
+	movl	12(%r8),%r8d
+	pxor	%xmm0,%xmm2
+	movl	12(%rcx),%ebp
+	movdqa	%xmm2,0(%rsp)
+	bswapl	%r8d
+	movdqa	%xmm2,%xmm3
+	movdqa	%xmm2,%xmm4
+	movdqa	%xmm2,%xmm5
+	movdqa	%xmm2,64(%rsp)
+	movdqa	%xmm2,80(%rsp)
+	movdqa	%xmm2,96(%rsp)
+	movq	%rdx,%r10
+	movdqa	%xmm2,112(%rsp)
+
+	leaq	1(%r8),%rax
+	leaq	2(%r8),%rdx
+	bswapl	%eax
+	bswapl	%edx
+	xorl	%ebp,%eax
+	xorl	%ebp,%edx
+.byte	102,15,58,34,216,3
+	leaq	3(%r8),%rax
+	movdqa	%xmm3,16(%rsp)
+.byte	102,15,58,34,226,3
+	bswapl	%eax
+	movq	%r10,%rdx
+	leaq	4(%r8),%r10
+	movdqa	%xmm4,32(%rsp)
+	xorl	%ebp,%eax
+	bswapl	%r10d
+.byte	102,15,58,34,232,3
+	xorl	%ebp,%r10d
+	movdqa	%xmm5,48(%rsp)
+	leaq	5(%r8),%r9
+	movl	%r10d,64+12(%rsp)
+	bswapl	%r9d
+	leaq	6(%r8),%r10
+	movl	240(%rcx),%eax
+	xorl	%ebp,%r9d
+	bswapl	%r10d
+	movl	%r9d,80+12(%rsp)
+	xorl	%ebp,%r10d
+	leaq	7(%r8),%r9
+	movl	%r10d,96+12(%rsp)
+	bswapl	%r9d
+	leaq	OPENSSL_ia32cap_P(%rip),%r10
+	movl	4(%r10),%r10d
+	xorl	%ebp,%r9d
+	andl	$71303168,%r10d
+	movl	%r9d,112+12(%rsp)
+
+	movups	16(%rcx),%xmm1
+
+	movdqa	64(%rsp),%xmm6
+	movdqa	80(%rsp),%xmm7
+
+	cmpq	$8,%rdx
+	jb	.Lctr32_tail
+
+	subq	$6,%rdx
+	cmpl	$4194304,%r10d
+	je	.Lctr32_6x
+
+	leaq	128(%rcx),%rcx
+	subq	$2,%rdx
+	jmp	.Lctr32_loop8
+
+.align	16
+.Lctr32_6x:
+	shll	$4,%eax
+	movl	$48,%r10d
+	bswapl	%ebp
+	leaq	32(%rcx,%rax,1),%rcx
+	subq	%rax,%r10
+	jmp	.Lctr32_loop6
+
+.align	16
+.Lctr32_loop6:
+	addl	$6,%r8d
+	movups	-48(%rcx,%r10,1),%xmm0
+.byte	102,15,56,220,209
+	movl	%r8d,%eax
+	xorl	%ebp,%eax
+.byte	102,15,56,220,217
+.byte	0x0f,0x38,0xf1,0x44,0x24,12
+	leal	1(%r8),%eax
+.byte	102,15,56,220,225
+	xorl	%ebp,%eax
+.byte	0x0f,0x38,0xf1,0x44,0x24,28
+.byte	102,15,56,220,233
+	leal	2(%r8),%eax
+	xorl	%ebp,%eax
+.byte	102,15,56,220,241
+.byte	0x0f,0x38,0xf1,0x44,0x24,44
+	leal	3(%r8),%eax
+.byte	102,15,56,220,249
+	movups	-32(%rcx,%r10,1),%xmm1
+	xorl	%ebp,%eax
+
+.byte	102,15,56,220,208
+.byte	0x0f,0x38,0xf1,0x44,0x24,60
+	leal	4(%r8),%eax
+.byte	102,15,56,220,216
+	xorl	%ebp,%eax
+.byte	0x0f,0x38,0xf1,0x44,0x24,76
+.byte	102,15,56,220,224
+	leal	5(%r8),%eax
+	xorl	%ebp,%eax
+.byte	102,15,56,220,232
+.byte	0x0f,0x38,0xf1,0x44,0x24,92
+	movq	%r10,%rax
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	-16(%rcx,%r10,1),%xmm0
+
+	call	.Lenc_loop6
+
+	movdqu	(%rdi),%xmm8
+	movdqu	16(%rdi),%xmm9
+	movdqu	32(%rdi),%xmm10
+	movdqu	48(%rdi),%xmm11
+	movdqu	64(%rdi),%xmm12
+	movdqu	80(%rdi),%xmm13
+	leaq	96(%rdi),%rdi
+	movups	-64(%rcx,%r10,1),%xmm1
+	pxor	%xmm2,%xmm8
+	movaps	0(%rsp),%xmm2
+	pxor	%xmm3,%xmm9
+	movaps	16(%rsp),%xmm3
+	pxor	%xmm4,%xmm10
+	movaps	32(%rsp),%xmm4
+	pxor	%xmm5,%xmm11
+	movaps	48(%rsp),%xmm5
+	pxor	%xmm6,%xmm12
+	movaps	64(%rsp),%xmm6
+	pxor	%xmm7,%xmm13
+	movaps	80(%rsp),%xmm7
+	movdqu	%xmm8,(%rsi)
+	movdqu	%xmm9,16(%rsi)
+	movdqu	%xmm10,32(%rsi)
+	movdqu	%xmm11,48(%rsi)
+	movdqu	%xmm12,64(%rsi)
+	movdqu	%xmm13,80(%rsi)
+	leaq	96(%rsi),%rsi
+
+	subq	$6,%rdx
+	jnc	.Lctr32_loop6
+
+	addq	$6,%rdx
+	jz	.Lctr32_done
+
+	leal	-48(%r10),%eax
+	leaq	-80(%rcx,%r10,1),%rcx
+	negl	%eax
+	shrl	$4,%eax
+	jmp	.Lctr32_tail
+
+.align	32
+.Lctr32_loop8:
+	addl	$8,%r8d
+	movdqa	96(%rsp),%xmm8
+.byte	102,15,56,220,209
+	movl	%r8d,%r9d
+	movdqa	112(%rsp),%xmm9
+.byte	102,15,56,220,217
+	bswapl	%r9d
+	movups	32-128(%rcx),%xmm0
+.byte	102,15,56,220,225
+	xorl	%ebp,%r9d
+	nop
+.byte	102,15,56,220,233
+	movl	%r9d,0+12(%rsp)
+	leaq	1(%r8),%r9
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	48-128(%rcx),%xmm1
+	bswapl	%r9d
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movl	%r9d,16+12(%rsp)
+	leaq	2(%r8),%r9
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	64-128(%rcx),%xmm0
+	bswapl	%r9d
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movl	%r9d,32+12(%rsp)
+	leaq	3(%r8),%r9
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	80-128(%rcx),%xmm1
+	bswapl	%r9d
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movl	%r9d,48+12(%rsp)
+	leaq	4(%r8),%r9
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	96-128(%rcx),%xmm0
+	bswapl	%r9d
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movl	%r9d,64+12(%rsp)
+	leaq	5(%r8),%r9
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	112-128(%rcx),%xmm1
+	bswapl	%r9d
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movl	%r9d,80+12(%rsp)
+	leaq	6(%r8),%r9
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	128-128(%rcx),%xmm0
+	bswapl	%r9d
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movl	%r9d,96+12(%rsp)
+	leaq	7(%r8),%r9
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	144-128(%rcx),%xmm1
+	bswapl	%r9d
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+	xorl	%ebp,%r9d
+	movdqu	0(%rdi),%xmm10
+.byte	102,15,56,220,232
+	movl	%r9d,112+12(%rsp)
+	cmpl	$11,%eax
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	160-128(%rcx),%xmm0
+
+	jb	.Lctr32_enc_done
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	176-128(%rcx),%xmm1
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	192-128(%rcx),%xmm0
+	je	.Lctr32_enc_done
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	208-128(%rcx),%xmm1
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	224-128(%rcx),%xmm0
+	jmp	.Lctr32_enc_done
+
+.align	16
+.Lctr32_enc_done:
+	movdqu	16(%rdi),%xmm11
+	pxor	%xmm0,%xmm10
+	movdqu	32(%rdi),%xmm12
+	pxor	%xmm0,%xmm11
+	movdqu	48(%rdi),%xmm13
+	pxor	%xmm0,%xmm12
+	movdqu	64(%rdi),%xmm14
+	pxor	%xmm0,%xmm13
+	movdqu	80(%rdi),%xmm15
+	pxor	%xmm0,%xmm14
+	pxor	%xmm0,%xmm15
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movdqu	96(%rdi),%xmm1
+	leaq	128(%rdi),%rdi
+
+.byte	102,65,15,56,221,210
+	pxor	%xmm0,%xmm1
+	movdqu	112-128(%rdi),%xmm10
+.byte	102,65,15,56,221,219
+	pxor	%xmm0,%xmm10
+	movdqa	0(%rsp),%xmm11
+.byte	102,65,15,56,221,228
+.byte	102,65,15,56,221,237
+	movdqa	16(%rsp),%xmm12
+	movdqa	32(%rsp),%xmm13
+.byte	102,65,15,56,221,246
+.byte	102,65,15,56,221,255
+	movdqa	48(%rsp),%xmm14
+	movdqa	64(%rsp),%xmm15
+.byte	102,68,15,56,221,193
+	movdqa	80(%rsp),%xmm0
+	movups	16-128(%rcx),%xmm1
+.byte	102,69,15,56,221,202
+
+	movups	%xmm2,(%rsi)
+	movdqa	%xmm11,%xmm2
+	movups	%xmm3,16(%rsi)
+	movdqa	%xmm12,%xmm3
+	movups	%xmm4,32(%rsi)
+	movdqa	%xmm13,%xmm4
+	movups	%xmm5,48(%rsi)
+	movdqa	%xmm14,%xmm5
+	movups	%xmm6,64(%rsi)
+	movdqa	%xmm15,%xmm6
+	movups	%xmm7,80(%rsi)
+	movdqa	%xmm0,%xmm7
+	movups	%xmm8,96(%rsi)
+	movups	%xmm9,112(%rsi)
+	leaq	128(%rsi),%rsi
+
+	subq	$8,%rdx
+	jnc	.Lctr32_loop8
+
+	addq	$8,%rdx
+	jz	.Lctr32_done
+	leaq	-128(%rcx),%rcx
+
+.Lctr32_tail:
+
+
+	leaq	16(%rcx),%rcx
+	cmpq	$4,%rdx
+	jb	.Lctr32_loop3
+	je	.Lctr32_loop4
+
+
+	shll	$4,%eax
+	movdqa	96(%rsp),%xmm8
+	pxor	%xmm9,%xmm9
+
+	movups	16(%rcx),%xmm0
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	leaq	32-16(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	102,15,56,220,225
+	addq	$16,%rax
+	movups	(%rdi),%xmm10
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+	movups	16(%rdi),%xmm11
+	movups	32(%rdi),%xmm12
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+
+	call	.Lenc_loop8_enter
+
+	movdqu	48(%rdi),%xmm13
+	pxor	%xmm10,%xmm2
+	movdqu	64(%rdi),%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm10,%xmm6
+	movdqu	%xmm5,48(%rsi)
+	movdqu	%xmm6,64(%rsi)
+	cmpq	$6,%rdx
+	jb	.Lctr32_done
+
+	movups	80(%rdi),%xmm11
+	xorps	%xmm11,%xmm7
+	movups	%xmm7,80(%rsi)
+	je	.Lctr32_done
+
+	movups	96(%rdi),%xmm12
+	xorps	%xmm12,%xmm8
+	movups	%xmm8,96(%rsi)
+	jmp	.Lctr32_done
+
+.align	32
+.Lctr32_loop4:
+.byte	102,15,56,220,209
+	leaq	16(%rcx),%rcx
+	decl	%eax
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	(%rcx),%xmm1
+	jnz	.Lctr32_loop4
+.byte	102,15,56,221,209
+.byte	102,15,56,221,217
+	movups	(%rdi),%xmm10
+	movups	16(%rdi),%xmm11
+.byte	102,15,56,221,225
+.byte	102,15,56,221,233
+	movups	32(%rdi),%xmm12
+	movups	48(%rdi),%xmm13
+
+	xorps	%xmm10,%xmm2
+	movups	%xmm2,(%rsi)
+	xorps	%xmm11,%xmm3
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm5,48(%rsi)
+	jmp	.Lctr32_done
+
+.align	32
+.Lctr32_loop3:
+.byte	102,15,56,220,209
+	leaq	16(%rcx),%rcx
+	decl	%eax
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+	movups	(%rcx),%xmm1
+	jnz	.Lctr32_loop3
+.byte	102,15,56,221,209
+.byte	102,15,56,221,217
+.byte	102,15,56,221,225
+
+	movups	(%rdi),%xmm10
+	xorps	%xmm10,%xmm2
+	movups	%xmm2,(%rsi)
+	cmpq	$2,%rdx
+	jb	.Lctr32_done
+
+	movups	16(%rdi),%xmm11
+	xorps	%xmm11,%xmm3
+	movups	%xmm3,16(%rsi)
+	je	.Lctr32_done
+
+	movups	32(%rdi),%xmm12
+	xorps	%xmm12,%xmm4
+	movups	%xmm4,32(%rsi)
+
+.Lctr32_done:
+	xorps	%xmm0,%xmm0
+	xorl	%ebp,%ebp
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	movaps	%xmm0,0(%rsp)
+	pxor	%xmm8,%xmm8
+	movaps	%xmm0,16(%rsp)
+	pxor	%xmm9,%xmm9
+	movaps	%xmm0,32(%rsp)
+	pxor	%xmm10,%xmm10
+	movaps	%xmm0,48(%rsp)
+	pxor	%xmm11,%xmm11
+	movaps	%xmm0,64(%rsp)
+	pxor	%xmm12,%xmm12
+	movaps	%xmm0,80(%rsp)
+	pxor	%xmm13,%xmm13
+	movaps	%xmm0,96(%rsp)
+	pxor	%xmm14,%xmm14
+	movaps	%xmm0,112(%rsp)
+	pxor	%xmm15,%xmm15
+	movq	-8(%r11),%rbp
+	leaq	(%r11),%rsp
+.Lctr32_epilogue:
+	.byte	0xf3,0xc3
+.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
+.globl	aesni_xts_encrypt
+.hidden aesni_xts_encrypt
+.type	aesni_xts_encrypt,@function
+.align	16
+aesni_xts_encrypt:
+	leaq	(%rsp),%r11
+	pushq	%rbp
+	subq	$112,%rsp
+	andq	$-16,%rsp
+	movups	(%r9),%xmm2
+	movl	240(%r8),%eax
+	movl	240(%rcx),%r10d
+	movups	(%r8),%xmm0
+	movups	16(%r8),%xmm1
+	leaq	32(%r8),%r8
+	xorps	%xmm0,%xmm2
+.Loop_enc1_8:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%r8),%xmm1
+	leaq	16(%r8),%r8
+	jnz	.Loop_enc1_8
+.byte	102,15,56,221,209
+	movups	(%rcx),%xmm0
+	movq	%rcx,%rbp
+	movl	%r10d,%eax
+	shll	$4,%r10d
+	movq	%rdx,%r9
+	andq	$-16,%rdx
+
+	movups	16(%rcx,%r10,1),%xmm1
+
+	movdqa	.Lxts_magic(%rip),%xmm8
+	movdqa	%xmm2,%xmm15
+	pshufd	$0x5f,%xmm2,%xmm9
+	pxor	%xmm0,%xmm1
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm10
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm11
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm11
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm12
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm12
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm13
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm13
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm15,%xmm14
+	psrad	$31,%xmm9
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm9
+	pxor	%xmm0,%xmm14
+	pxor	%xmm9,%xmm15
+	movaps	%xmm1,96(%rsp)
+
+	subq	$96,%rdx
+	jc	.Lxts_enc_short
+
+	movl	$16+96,%eax
+	leaq	32(%rbp,%r10,1),%rcx
+	subq	%r10,%rax
+	movups	16(%rbp),%xmm1
+	movq	%rax,%r10
+	leaq	.Lxts_magic(%rip),%r8
+	jmp	.Lxts_enc_grandloop
+
+.align	32
+.Lxts_enc_grandloop:
+	movdqu	0(%rdi),%xmm2
+	movdqa	%xmm0,%xmm8
+	movdqu	16(%rdi),%xmm3
+	pxor	%xmm10,%xmm2
+	movdqu	32(%rdi),%xmm4
+	pxor	%xmm11,%xmm3
+.byte	102,15,56,220,209
+	movdqu	48(%rdi),%xmm5
+	pxor	%xmm12,%xmm4
+.byte	102,15,56,220,217
+	movdqu	64(%rdi),%xmm6
+	pxor	%xmm13,%xmm5
+.byte	102,15,56,220,225
+	movdqu	80(%rdi),%xmm7
+	pxor	%xmm15,%xmm8
+	movdqa	96(%rsp),%xmm9
+	pxor	%xmm14,%xmm6
+.byte	102,15,56,220,233
+	movups	32(%rbp),%xmm0
+	leaq	96(%rdi),%rdi
+	pxor	%xmm8,%xmm7
+
+	pxor	%xmm9,%xmm10
+.byte	102,15,56,220,241
+	pxor	%xmm9,%xmm11
+	movdqa	%xmm10,0(%rsp)
+.byte	102,15,56,220,249
+	movups	48(%rbp),%xmm1
+	pxor	%xmm9,%xmm12
+
+.byte	102,15,56,220,208
+	pxor	%xmm9,%xmm13
+	movdqa	%xmm11,16(%rsp)
+.byte	102,15,56,220,216
+	pxor	%xmm9,%xmm14
+	movdqa	%xmm12,32(%rsp)
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	pxor	%xmm9,%xmm8
+	movdqa	%xmm14,64(%rsp)
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	64(%rbp),%xmm0
+	movdqa	%xmm8,80(%rsp)
+	pshufd	$0x5f,%xmm15,%xmm9
+	jmp	.Lxts_enc_loop6
+.align	32
+.Lxts_enc_loop6:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	movups	-64(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	-80(%rcx,%rax,1),%xmm0
+	jnz	.Lxts_enc_loop6
+
+	movdqa	(%r8),%xmm8
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,220,209
+	paddq	%xmm15,%xmm15
+	psrad	$31,%xmm14
+.byte	102,15,56,220,217
+	pand	%xmm8,%xmm14
+	movups	(%rbp),%xmm10
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+	pxor	%xmm14,%xmm15
+	movaps	%xmm10,%xmm11
+.byte	102,15,56,220,249
+	movups	-64(%rcx),%xmm1
+
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,220,208
+	paddd	%xmm9,%xmm9
+	pxor	%xmm15,%xmm10
+.byte	102,15,56,220,216
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	pand	%xmm8,%xmm14
+	movaps	%xmm11,%xmm12
+.byte	102,15,56,220,240
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,220,248
+	movups	-48(%rcx),%xmm0
+
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,220,209
+	pxor	%xmm15,%xmm11
+	psrad	$31,%xmm14
+.byte	102,15,56,220,217
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movdqa	%xmm13,48(%rsp)
+	pxor	%xmm14,%xmm15
+.byte	102,15,56,220,241
+	movaps	%xmm12,%xmm13
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,220,249
+	movups	-32(%rcx),%xmm1
+
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,220,208
+	pxor	%xmm15,%xmm12
+	psrad	$31,%xmm14
+.byte	102,15,56,220,216
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+	pxor	%xmm14,%xmm15
+	movaps	%xmm13,%xmm14
+.byte	102,15,56,220,248
+
+	movdqa	%xmm9,%xmm0
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,220,209
+	pxor	%xmm15,%xmm13
+	psrad	$31,%xmm0
+.byte	102,15,56,220,217
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm0
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	pxor	%xmm0,%xmm15
+	movups	(%rbp),%xmm0
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	movups	16(%rbp),%xmm1
+
+	pxor	%xmm15,%xmm14
+.byte	102,15,56,221,84,36,0
+	psrad	$31,%xmm9
+	paddq	%xmm15,%xmm15
+.byte	102,15,56,221,92,36,16
+.byte	102,15,56,221,100,36,32
+	pand	%xmm8,%xmm9
+	movq	%r10,%rax
+.byte	102,15,56,221,108,36,48
+.byte	102,15,56,221,116,36,64
+.byte	102,15,56,221,124,36,80
+	pxor	%xmm9,%xmm15
+
+	leaq	96(%rsi),%rsi
+	movups	%xmm2,-96(%rsi)
+	movups	%xmm3,-80(%rsi)
+	movups	%xmm4,-64(%rsi)
+	movups	%xmm5,-48(%rsi)
+	movups	%xmm6,-32(%rsi)
+	movups	%xmm7,-16(%rsi)
+	subq	$96,%rdx
+	jnc	.Lxts_enc_grandloop
+
+	movl	$16+96,%eax
+	subl	%r10d,%eax
+	movq	%rbp,%rcx
+	shrl	$4,%eax
+
+.Lxts_enc_short:
+
+	movl	%eax,%r10d
+	pxor	%xmm0,%xmm10
+	addq	$96,%rdx
+	jz	.Lxts_enc_done
+
+	pxor	%xmm0,%xmm11
+	cmpq	$0x20,%rdx
+	jb	.Lxts_enc_one
+	pxor	%xmm0,%xmm12
+	je	.Lxts_enc_two
+
+	pxor	%xmm0,%xmm13
+	cmpq	$0x40,%rdx
+	jb	.Lxts_enc_three
+	pxor	%xmm0,%xmm14
+	je	.Lxts_enc_four
+
+	movdqu	(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	pxor	%xmm10,%xmm2
+	movdqu	48(%rdi),%xmm5
+	pxor	%xmm11,%xmm3
+	movdqu	64(%rdi),%xmm6
+	leaq	80(%rdi),%rdi
+	pxor	%xmm12,%xmm4
+	pxor	%xmm13,%xmm5
+	pxor	%xmm14,%xmm6
+	pxor	%xmm7,%xmm7
+
+	call	_aesni_encrypt6
+
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm15,%xmm10
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+	movdqu	%xmm2,(%rsi)
+	xorps	%xmm13,%xmm5
+	movdqu	%xmm3,16(%rsi)
+	xorps	%xmm14,%xmm6
+	movdqu	%xmm4,32(%rsi)
+	movdqu	%xmm5,48(%rsi)
+	movdqu	%xmm6,64(%rsi)
+	leaq	80(%rsi),%rsi
+	jmp	.Lxts_enc_done
+
+.align	16
+.Lxts_enc_one:
+	movups	(%rdi),%xmm2
+	leaq	16(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_enc1_9:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_enc1_9
+.byte	102,15,56,221,209
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm11,%xmm10
+	movups	%xmm2,(%rsi)
+	leaq	16(%rsi),%rsi
+	jmp	.Lxts_enc_done
+
+.align	16
+.Lxts_enc_two:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	leaq	32(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	xorps	%xmm11,%xmm3
+
+	call	_aesni_encrypt2
+
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm12,%xmm10
+	xorps	%xmm11,%xmm3
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	leaq	32(%rsi),%rsi
+	jmp	.Lxts_enc_done
+
+.align	16
+.Lxts_enc_three:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	movups	32(%rdi),%xmm4
+	leaq	48(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+
+	call	_aesni_encrypt3
+
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm13,%xmm10
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	leaq	48(%rsi),%rsi
+	jmp	.Lxts_enc_done
+
+.align	16
+.Lxts_enc_four:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	movups	32(%rdi),%xmm4
+	xorps	%xmm10,%xmm2
+	movups	48(%rdi),%xmm5
+	leaq	64(%rdi),%rdi
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+	xorps	%xmm13,%xmm5
+
+	call	_aesni_encrypt4
+
+	pxor	%xmm10,%xmm2
+	movdqa	%xmm14,%xmm10
+	pxor	%xmm11,%xmm3
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm3,16(%rsi)
+	movdqu	%xmm4,32(%rsi)
+	movdqu	%xmm5,48(%rsi)
+	leaq	64(%rsi),%rsi
+	jmp	.Lxts_enc_done
+
+.align	16
+.Lxts_enc_done:
+	andq	$15,%r9
+	jz	.Lxts_enc_ret
+	movq	%r9,%rdx
+
+.Lxts_enc_steal:
+	movzbl	(%rdi),%eax
+	movzbl	-16(%rsi),%ecx
+	leaq	1(%rdi),%rdi
+	movb	%al,-16(%rsi)
+	movb	%cl,0(%rsi)
+	leaq	1(%rsi),%rsi
+	subq	$1,%rdx
+	jnz	.Lxts_enc_steal
+
+	subq	%r9,%rsi
+	movq	%rbp,%rcx
+	movl	%r10d,%eax
+
+	movups	-16(%rsi),%xmm2
+	xorps	%xmm10,%xmm2
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_enc1_10:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_enc1_10
+.byte	102,15,56,221,209
+	xorps	%xmm10,%xmm2
+	movups	%xmm2,-16(%rsi)
+
+.Lxts_enc_ret:
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	movaps	%xmm0,0(%rsp)
+	pxor	%xmm8,%xmm8
+	movaps	%xmm0,16(%rsp)
+	pxor	%xmm9,%xmm9
+	movaps	%xmm0,32(%rsp)
+	pxor	%xmm10,%xmm10
+	movaps	%xmm0,48(%rsp)
+	pxor	%xmm11,%xmm11
+	movaps	%xmm0,64(%rsp)
+	pxor	%xmm12,%xmm12
+	movaps	%xmm0,80(%rsp)
+	pxor	%xmm13,%xmm13
+	movaps	%xmm0,96(%rsp)
+	pxor	%xmm14,%xmm14
+	pxor	%xmm15,%xmm15
+	movq	-8(%r11),%rbp
+	leaq	(%r11),%rsp
+.Lxts_enc_epilogue:
+	.byte	0xf3,0xc3
+.size	aesni_xts_encrypt,.-aesni_xts_encrypt
+.globl	aesni_xts_decrypt
+.hidden aesni_xts_decrypt
+.type	aesni_xts_decrypt,@function
+.align	16
+aesni_xts_decrypt:
+	leaq	(%rsp),%r11
+	pushq	%rbp
+	subq	$112,%rsp
+	andq	$-16,%rsp
+	movups	(%r9),%xmm2
+	movl	240(%r8),%eax
+	movl	240(%rcx),%r10d
+	movups	(%r8),%xmm0
+	movups	16(%r8),%xmm1
+	leaq	32(%r8),%r8
+	xorps	%xmm0,%xmm2
+.Loop_enc1_11:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%r8),%xmm1
+	leaq	16(%r8),%r8
+	jnz	.Loop_enc1_11
+.byte	102,15,56,221,209
+	xorl	%eax,%eax
+	testq	$15,%rdx
+	setnz	%al
+	shlq	$4,%rax
+	subq	%rax,%rdx
+
+	movups	(%rcx),%xmm0
+	movq	%rcx,%rbp
+	movl	%r10d,%eax
+	shll	$4,%r10d
+	movq	%rdx,%r9
+	andq	$-16,%rdx
+
+	movups	16(%rcx,%r10,1),%xmm1
+
+	movdqa	.Lxts_magic(%rip),%xmm8
+	movdqa	%xmm2,%xmm15
+	pshufd	$0x5f,%xmm2,%xmm9
+	pxor	%xmm0,%xmm1
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm10
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm11
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm11
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm12
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm12
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm13
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm13
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm15,%xmm14
+	psrad	$31,%xmm9
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm9
+	pxor	%xmm0,%xmm14
+	pxor	%xmm9,%xmm15
+	movaps	%xmm1,96(%rsp)
+
+	subq	$96,%rdx
+	jc	.Lxts_dec_short
+
+	movl	$16+96,%eax
+	leaq	32(%rbp,%r10,1),%rcx
+	subq	%r10,%rax
+	movups	16(%rbp),%xmm1
+	movq	%rax,%r10
+	leaq	.Lxts_magic(%rip),%r8
+	jmp	.Lxts_dec_grandloop
+
+.align	32
+.Lxts_dec_grandloop:
+	movdqu	0(%rdi),%xmm2
+	movdqa	%xmm0,%xmm8
+	movdqu	16(%rdi),%xmm3
+	pxor	%xmm10,%xmm2
+	movdqu	32(%rdi),%xmm4
+	pxor	%xmm11,%xmm3
+.byte	102,15,56,222,209
+	movdqu	48(%rdi),%xmm5
+	pxor	%xmm12,%xmm4
+.byte	102,15,56,222,217
+	movdqu	64(%rdi),%xmm6
+	pxor	%xmm13,%xmm5
+.byte	102,15,56,222,225
+	movdqu	80(%rdi),%xmm7
+	pxor	%xmm15,%xmm8
+	movdqa	96(%rsp),%xmm9
+	pxor	%xmm14,%xmm6
+.byte	102,15,56,222,233
+	movups	32(%rbp),%xmm0
+	leaq	96(%rdi),%rdi
+	pxor	%xmm8,%xmm7
+
+	pxor	%xmm9,%xmm10
+.byte	102,15,56,222,241
+	pxor	%xmm9,%xmm11
+	movdqa	%xmm10,0(%rsp)
+.byte	102,15,56,222,249
+	movups	48(%rbp),%xmm1
+	pxor	%xmm9,%xmm12
+
+.byte	102,15,56,222,208
+	pxor	%xmm9,%xmm13
+	movdqa	%xmm11,16(%rsp)
+.byte	102,15,56,222,216
+	pxor	%xmm9,%xmm14
+	movdqa	%xmm12,32(%rsp)
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	pxor	%xmm9,%xmm8
+	movdqa	%xmm14,64(%rsp)
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	64(%rbp),%xmm0
+	movdqa	%xmm8,80(%rsp)
+	pshufd	$0x5f,%xmm15,%xmm9
+	jmp	.Lxts_dec_loop6
+.align	32
+.Lxts_dec_loop6:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	movups	-64(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	-80(%rcx,%rax,1),%xmm0
+	jnz	.Lxts_dec_loop6
+
+	movdqa	(%r8),%xmm8
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,222,209
+	paddq	%xmm15,%xmm15
+	psrad	$31,%xmm14
+.byte	102,15,56,222,217
+	pand	%xmm8,%xmm14
+	movups	(%rbp),%xmm10
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+	pxor	%xmm14,%xmm15
+	movaps	%xmm10,%xmm11
+.byte	102,15,56,222,249
+	movups	-64(%rcx),%xmm1
+
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,222,208
+	paddd	%xmm9,%xmm9
+	pxor	%xmm15,%xmm10
+.byte	102,15,56,222,216
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	pand	%xmm8,%xmm14
+	movaps	%xmm11,%xmm12
+.byte	102,15,56,222,240
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,222,248
+	movups	-48(%rcx),%xmm0
+
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,222,209
+	pxor	%xmm15,%xmm11
+	psrad	$31,%xmm14
+.byte	102,15,56,222,217
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movdqa	%xmm13,48(%rsp)
+	pxor	%xmm14,%xmm15
+.byte	102,15,56,222,241
+	movaps	%xmm12,%xmm13
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,222,249
+	movups	-32(%rcx),%xmm1
+
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,222,208
+	pxor	%xmm15,%xmm12
+	psrad	$31,%xmm14
+.byte	102,15,56,222,216
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+	pxor	%xmm14,%xmm15
+	movaps	%xmm13,%xmm14
+.byte	102,15,56,222,248
+
+	movdqa	%xmm9,%xmm0
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,222,209
+	pxor	%xmm15,%xmm13
+	psrad	$31,%xmm0
+.byte	102,15,56,222,217
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm0
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	pxor	%xmm0,%xmm15
+	movups	(%rbp),%xmm0
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	movups	16(%rbp),%xmm1
+
+	pxor	%xmm15,%xmm14
+.byte	102,15,56,223,84,36,0
+	psrad	$31,%xmm9
+	paddq	%xmm15,%xmm15
+.byte	102,15,56,223,92,36,16
+.byte	102,15,56,223,100,36,32
+	pand	%xmm8,%xmm9
+	movq	%r10,%rax
+.byte	102,15,56,223,108,36,48
+.byte	102,15,56,223,116,36,64
+.byte	102,15,56,223,124,36,80
+	pxor	%xmm9,%xmm15
+
+	leaq	96(%rsi),%rsi
+	movups	%xmm2,-96(%rsi)
+	movups	%xmm3,-80(%rsi)
+	movups	%xmm4,-64(%rsi)
+	movups	%xmm5,-48(%rsi)
+	movups	%xmm6,-32(%rsi)
+	movups	%xmm7,-16(%rsi)
+	subq	$96,%rdx
+	jnc	.Lxts_dec_grandloop
+
+	movl	$16+96,%eax
+	subl	%r10d,%eax
+	movq	%rbp,%rcx
+	shrl	$4,%eax
+
+.Lxts_dec_short:
+
+	movl	%eax,%r10d
+	pxor	%xmm0,%xmm10
+	pxor	%xmm0,%xmm11
+	addq	$96,%rdx
+	jz	.Lxts_dec_done
+
+	pxor	%xmm0,%xmm12
+	cmpq	$0x20,%rdx
+	jb	.Lxts_dec_one
+	pxor	%xmm0,%xmm13
+	je	.Lxts_dec_two
+
+	pxor	%xmm0,%xmm14
+	cmpq	$0x40,%rdx
+	jb	.Lxts_dec_three
+	je	.Lxts_dec_four
+
+	movdqu	(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	pxor	%xmm10,%xmm2
+	movdqu	48(%rdi),%xmm5
+	pxor	%xmm11,%xmm3
+	movdqu	64(%rdi),%xmm6
+	leaq	80(%rdi),%rdi
+	pxor	%xmm12,%xmm4
+	pxor	%xmm13,%xmm5
+	pxor	%xmm14,%xmm6
+
+	call	_aesni_decrypt6
+
+	xorps	%xmm10,%xmm2
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+	movdqu	%xmm2,(%rsi)
+	xorps	%xmm13,%xmm5
+	movdqu	%xmm3,16(%rsi)
+	xorps	%xmm14,%xmm6
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm14,%xmm14
+	movdqu	%xmm5,48(%rsi)
+	pcmpgtd	%xmm15,%xmm14
+	movdqu	%xmm6,64(%rsi)
+	leaq	80(%rsi),%rsi
+	pshufd	$0x13,%xmm14,%xmm11
+	andq	$15,%r9
+	jz	.Lxts_dec_ret
+
+	movdqa	%xmm15,%xmm10
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm11
+	pxor	%xmm15,%xmm11
+	jmp	.Lxts_dec_done2
+
+.align	16
+.Lxts_dec_one:
+	movups	(%rdi),%xmm2
+	leaq	16(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_dec1_12:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_dec1_12
+.byte	102,15,56,223,209
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm11,%xmm10
+	movups	%xmm2,(%rsi)
+	movdqa	%xmm12,%xmm11
+	leaq	16(%rsi),%rsi
+	jmp	.Lxts_dec_done
+
+.align	16
+.Lxts_dec_two:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	leaq	32(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	xorps	%xmm11,%xmm3
+
+	call	_aesni_decrypt2
+
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm12,%xmm10
+	xorps	%xmm11,%xmm3
+	movdqa	%xmm13,%xmm11
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	leaq	32(%rsi),%rsi
+	jmp	.Lxts_dec_done
+
+.align	16
+.Lxts_dec_three:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	movups	32(%rdi),%xmm4
+	leaq	48(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+
+	call	_aesni_decrypt3
+
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm13,%xmm10
+	xorps	%xmm11,%xmm3
+	movdqa	%xmm14,%xmm11
+	xorps	%xmm12,%xmm4
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	leaq	48(%rsi),%rsi
+	jmp	.Lxts_dec_done
+
+.align	16
+.Lxts_dec_four:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	movups	32(%rdi),%xmm4
+	xorps	%xmm10,%xmm2
+	movups	48(%rdi),%xmm5
+	leaq	64(%rdi),%rdi
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+	xorps	%xmm13,%xmm5
+
+	call	_aesni_decrypt4
+
+	pxor	%xmm10,%xmm2
+	movdqa	%xmm14,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqa	%xmm15,%xmm11
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm3,16(%rsi)
+	movdqu	%xmm4,32(%rsi)
+	movdqu	%xmm5,48(%rsi)
+	leaq	64(%rsi),%rsi
+	jmp	.Lxts_dec_done
+
+.align	16
+.Lxts_dec_done:
+	andq	$15,%r9
+	jz	.Lxts_dec_ret
+.Lxts_dec_done2:
+	movq	%r9,%rdx
+	movq	%rbp,%rcx
+	movl	%r10d,%eax
+
+	movups	(%rdi),%xmm2
+	xorps	%xmm11,%xmm2
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_dec1_13:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_dec1_13
+.byte	102,15,56,223,209
+	xorps	%xmm11,%xmm2
+	movups	%xmm2,(%rsi)
+
+.Lxts_dec_steal:
+	movzbl	16(%rdi),%eax
+	movzbl	(%rsi),%ecx
+	leaq	1(%rdi),%rdi
+	movb	%al,(%rsi)
+	movb	%cl,16(%rsi)
+	leaq	1(%rsi),%rsi
+	subq	$1,%rdx
+	jnz	.Lxts_dec_steal
+
+	subq	%r9,%rsi
+	movq	%rbp,%rcx
+	movl	%r10d,%eax
+
+	movups	(%rsi),%xmm2
+	xorps	%xmm10,%xmm2
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_dec1_14:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_dec1_14
+.byte	102,15,56,223,209
+	xorps	%xmm10,%xmm2
+	movups	%xmm2,(%rsi)
+
+.Lxts_dec_ret:
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	movaps	%xmm0,0(%rsp)
+	pxor	%xmm8,%xmm8
+	movaps	%xmm0,16(%rsp)
+	pxor	%xmm9,%xmm9
+	movaps	%xmm0,32(%rsp)
+	pxor	%xmm10,%xmm10
+	movaps	%xmm0,48(%rsp)
+	pxor	%xmm11,%xmm11
+	movaps	%xmm0,64(%rsp)
+	pxor	%xmm12,%xmm12
+	movaps	%xmm0,80(%rsp)
+	pxor	%xmm13,%xmm13
+	movaps	%xmm0,96(%rsp)
+	pxor	%xmm14,%xmm14
+	pxor	%xmm15,%xmm15
+	movq	-8(%r11),%rbp
+	leaq	(%r11),%rsp
+.Lxts_dec_epilogue:
+	.byte	0xf3,0xc3
+.size	aesni_xts_decrypt,.-aesni_xts_decrypt
+.globl	aesni_ocb_encrypt
+.hidden aesni_ocb_encrypt
+.type	aesni_ocb_encrypt,@function
+.align	32
+aesni_ocb_encrypt:
+	leaq	(%rsp),%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	movq	8(%rax),%rbx
+	movq	8+8(%rax),%rbp
+
+	movl	240(%rcx),%r10d
+	movq	%rcx,%r11
+	shll	$4,%r10d
+	movups	(%rcx),%xmm9
+	movups	16(%rcx,%r10,1),%xmm1
+
+	movdqu	(%r9),%xmm15
+	pxor	%xmm1,%xmm9
+	pxor	%xmm1,%xmm15
+
+	movl	$16+32,%eax
+	leaq	32(%r11,%r10,1),%rcx
+	movups	16(%r11),%xmm1
+	subq	%r10,%rax
+	movq	%rax,%r10
+
+	movdqu	(%rbx),%xmm10
+	movdqu	(%rbp),%xmm8
+
+	testq	$1,%r8
+	jnz	.Locb_enc_odd
+
+	bsfq	%r8,%r12
+	addq	$1,%r8
+	shlq	$4,%r12
+	movdqu	(%rbx,%r12,1),%xmm7
+	movdqu	(%rdi),%xmm2
+	leaq	16(%rdi),%rdi
+
+	call	__ocb_encrypt1
+
+	movdqa	%xmm7,%xmm15
+	movups	%xmm2,(%rsi)
+	leaq	16(%rsi),%rsi
+	subq	$1,%rdx
+	jz	.Locb_enc_done
+
+.Locb_enc_odd:
+	leaq	1(%r8),%r12
+	leaq	3(%r8),%r13
+	leaq	5(%r8),%r14
+	leaq	6(%r8),%r8
+	bsfq	%r12,%r12
+	bsfq	%r13,%r13
+	bsfq	%r14,%r14
+	shlq	$4,%r12
+	shlq	$4,%r13
+	shlq	$4,%r14
+
+	subq	$6,%rdx
+	jc	.Locb_enc_short
+	jmp	.Locb_enc_grandloop
+
+.align	32
+.Locb_enc_grandloop:
+	movdqu	0(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	movdqu	48(%rdi),%xmm5
+	movdqu	64(%rdi),%xmm6
+	movdqu	80(%rdi),%xmm7
+	leaq	96(%rdi),%rdi
+
+	call	__ocb_encrypt6
+
+	movups	%xmm2,0(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+	movups	%xmm7,80(%rsi)
+	leaq	96(%rsi),%rsi
+	subq	$6,%rdx
+	jnc	.Locb_enc_grandloop
+
+.Locb_enc_short:
+	addq	$6,%rdx
+	jz	.Locb_enc_done
+
+	movdqu	0(%rdi),%xmm2
+	cmpq	$2,%rdx
+	jb	.Locb_enc_one
+	movdqu	16(%rdi),%xmm3
+	je	.Locb_enc_two
+
+	movdqu	32(%rdi),%xmm4
+	cmpq	$4,%rdx
+	jb	.Locb_enc_three
+	movdqu	48(%rdi),%xmm5
+	je	.Locb_enc_four
+
+	movdqu	64(%rdi),%xmm6
+	pxor	%xmm7,%xmm7
+
+	call	__ocb_encrypt6
+
+	movdqa	%xmm14,%xmm15
+	movups	%xmm2,0(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+
+	jmp	.Locb_enc_done
+
+.align	16
+.Locb_enc_one:
+	movdqa	%xmm10,%xmm7
+
+	call	__ocb_encrypt1
+
+	movdqa	%xmm7,%xmm15
+	movups	%xmm2,0(%rsi)
+	jmp	.Locb_enc_done
+
+.align	16
+.Locb_enc_two:
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+
+	call	__ocb_encrypt4
+
+	movdqa	%xmm11,%xmm15
+	movups	%xmm2,0(%rsi)
+	movups	%xmm3,16(%rsi)
+
+	jmp	.Locb_enc_done
+
+.align	16
+.Locb_enc_three:
+	pxor	%xmm5,%xmm5
+
+	call	__ocb_encrypt4
+
+	movdqa	%xmm12,%xmm15
+	movups	%xmm2,0(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+
+	jmp	.Locb_enc_done
+
+.align	16
+.Locb_enc_four:
+	call	__ocb_encrypt4
+
+	movdqa	%xmm13,%xmm15
+	movups	%xmm2,0(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+
+.Locb_enc_done:
+	pxor	%xmm0,%xmm15
+	movdqu	%xmm8,(%rbp)
+	movdqu	%xmm15,(%r9)
+
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+	pxor	%xmm10,%xmm10
+	pxor	%xmm11,%xmm11
+	pxor	%xmm12,%xmm12
+	pxor	%xmm13,%xmm13
+	pxor	%xmm14,%xmm14
+	pxor	%xmm15,%xmm15
+	leaq	40(%rsp),%rax
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbp
+	movq	-8(%rax),%rbx
+	leaq	(%rax),%rsp
+.Locb_enc_epilogue:
+	.byte	0xf3,0xc3
+.size	aesni_ocb_encrypt,.-aesni_ocb_encrypt
+
+.type	__ocb_encrypt6,@function
+.align	32
+__ocb_encrypt6:
+	pxor	%xmm9,%xmm15
+	movdqu	(%rbx,%r12,1),%xmm11
+	movdqa	%xmm10,%xmm12
+	movdqu	(%rbx,%r13,1),%xmm13
+	movdqa	%xmm10,%xmm14
+	pxor	%xmm15,%xmm10
+	movdqu	(%rbx,%r14,1),%xmm15
+	pxor	%xmm10,%xmm11
+	pxor	%xmm2,%xmm8
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm12
+	pxor	%xmm3,%xmm8
+	pxor	%xmm11,%xmm3
+	pxor	%xmm12,%xmm13
+	pxor	%xmm4,%xmm8
+	pxor	%xmm12,%xmm4
+	pxor	%xmm13,%xmm14
+	pxor	%xmm5,%xmm8
+	pxor	%xmm13,%xmm5
+	pxor	%xmm14,%xmm15
+	pxor	%xmm6,%xmm8
+	pxor	%xmm14,%xmm6
+	pxor	%xmm7,%xmm8
+	pxor	%xmm15,%xmm7
+	movups	32(%r11),%xmm0
+
+	leaq	1(%r8),%r12
+	leaq	3(%r8),%r13
+	leaq	5(%r8),%r14
+	addq	$6,%r8
+	pxor	%xmm9,%xmm10
+	bsfq	%r12,%r12
+	bsfq	%r13,%r13
+	bsfq	%r14,%r14
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	pxor	%xmm9,%xmm11
+	pxor	%xmm9,%xmm12
+.byte	102,15,56,220,241
+	pxor	%xmm9,%xmm13
+	pxor	%xmm9,%xmm14
+.byte	102,15,56,220,249
+	movups	48(%r11),%xmm1
+	pxor	%xmm9,%xmm15
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	64(%r11),%xmm0
+	shlq	$4,%r12
+	shlq	$4,%r13
+	jmp	.Locb_enc_loop6
+
+.align	32
+.Locb_enc_loop6:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Locb_enc_loop6
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	movups	16(%r11),%xmm1
+	shlq	$4,%r14
+
+.byte	102,65,15,56,221,210
+	movdqu	(%rbx),%xmm10
+	movq	%r10,%rax
+.byte	102,65,15,56,221,219
+.byte	102,65,15,56,221,228
+.byte	102,65,15,56,221,237
+.byte	102,65,15,56,221,246
+.byte	102,65,15,56,221,255
+	.byte	0xf3,0xc3
+.size	__ocb_encrypt6,.-__ocb_encrypt6
+
+.type	__ocb_encrypt4,@function
+.align	32
+__ocb_encrypt4:
+	pxor	%xmm9,%xmm15
+	movdqu	(%rbx,%r12,1),%xmm11
+	movdqa	%xmm10,%xmm12
+	movdqu	(%rbx,%r13,1),%xmm13
+	pxor	%xmm15,%xmm10
+	pxor	%xmm10,%xmm11
+	pxor	%xmm2,%xmm8
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm12
+	pxor	%xmm3,%xmm8
+	pxor	%xmm11,%xmm3
+	pxor	%xmm12,%xmm13
+	pxor	%xmm4,%xmm8
+	pxor	%xmm12,%xmm4
+	pxor	%xmm5,%xmm8
+	pxor	%xmm13,%xmm5
+	movups	32(%r11),%xmm0
+
+	pxor	%xmm9,%xmm10
+	pxor	%xmm9,%xmm11
+	pxor	%xmm9,%xmm12
+	pxor	%xmm9,%xmm13
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	48(%r11),%xmm1
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movups	64(%r11),%xmm0
+	jmp	.Locb_enc_loop4
+
+.align	32
+.Locb_enc_loop4:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Locb_enc_loop4
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	16(%r11),%xmm1
+	movq	%r10,%rax
+
+.byte	102,65,15,56,221,210
+.byte	102,65,15,56,221,219
+.byte	102,65,15,56,221,228
+.byte	102,65,15,56,221,237
+	.byte	0xf3,0xc3
+.size	__ocb_encrypt4,.-__ocb_encrypt4
+
+.type	__ocb_encrypt1,@function
+.align	32
+__ocb_encrypt1:
+	pxor	%xmm15,%xmm7
+	pxor	%xmm9,%xmm7
+	pxor	%xmm2,%xmm8
+	pxor	%xmm7,%xmm2
+	movups	32(%r11),%xmm0
+
+.byte	102,15,56,220,209
+	movups	48(%r11),%xmm1
+	pxor	%xmm9,%xmm7
+
+.byte	102,15,56,220,208
+	movups	64(%r11),%xmm0
+	jmp	.Locb_enc_loop1
+
+.align	32
+.Locb_enc_loop1:
+.byte	102,15,56,220,209
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,220,208
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Locb_enc_loop1
+
+.byte	102,15,56,220,209
+	movups	16(%r11),%xmm1
+	movq	%r10,%rax
+
+.byte	102,15,56,221,215
+	.byte	0xf3,0xc3
+.size	__ocb_encrypt1,.-__ocb_encrypt1
+
+.globl	aesni_ocb_decrypt
+.hidden aesni_ocb_decrypt
+.type	aesni_ocb_decrypt,@function
+.align	32
+aesni_ocb_decrypt:
+	leaq	(%rsp),%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	movq	8(%rax),%rbx
+	movq	8+8(%rax),%rbp
+
+	movl	240(%rcx),%r10d
+	movq	%rcx,%r11
+	shll	$4,%r10d
+	movups	(%rcx),%xmm9
+	movups	16(%rcx,%r10,1),%xmm1
+
+	movdqu	(%r9),%xmm15
+	pxor	%xmm1,%xmm9
+	pxor	%xmm1,%xmm15
+
+	movl	$16+32,%eax
+	leaq	32(%r11,%r10,1),%rcx
+	movups	16(%r11),%xmm1
+	subq	%r10,%rax
+	movq	%rax,%r10
+
+	movdqu	(%rbx),%xmm10
+	movdqu	(%rbp),%xmm8
+
+	testq	$1,%r8
+	jnz	.Locb_dec_odd
+
+	bsfq	%r8,%r12
+	addq	$1,%r8
+	shlq	$4,%r12
+	movdqu	(%rbx,%r12,1),%xmm7
+	movdqu	(%rdi),%xmm2
+	leaq	16(%rdi),%rdi
+
+	call	__ocb_decrypt1
+
+	movdqa	%xmm7,%xmm15
+	movups	%xmm2,(%rsi)
+	xorps	%xmm2,%xmm8
+	leaq	16(%rsi),%rsi
+	subq	$1,%rdx
+	jz	.Locb_dec_done
+
+.Locb_dec_odd:
+	leaq	1(%r8),%r12
+	leaq	3(%r8),%r13
+	leaq	5(%r8),%r14
+	leaq	6(%r8),%r8
+	bsfq	%r12,%r12
+	bsfq	%r13,%r13
+	bsfq	%r14,%r14
+	shlq	$4,%r12
+	shlq	$4,%r13
+	shlq	$4,%r14
+
+	subq	$6,%rdx
+	jc	.Locb_dec_short
+	jmp	.Locb_dec_grandloop
+
+.align	32
+.Locb_dec_grandloop:
+	movdqu	0(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	movdqu	48(%rdi),%xmm5
+	movdqu	64(%rdi),%xmm6
+	movdqu	80(%rdi),%xmm7
+	leaq	96(%rdi),%rdi
+
+	call	__ocb_decrypt6
+
+	movups	%xmm2,0(%rsi)
+	pxor	%xmm2,%xmm8
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm8
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm8
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm8
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm8
+	movups	%xmm7,80(%rsi)
+	pxor	%xmm7,%xmm8
+	leaq	96(%rsi),%rsi
+	subq	$6,%rdx
+	jnc	.Locb_dec_grandloop
+
+.Locb_dec_short:
+	addq	$6,%rdx
+	jz	.Locb_dec_done
+
+	movdqu	0(%rdi),%xmm2
+	cmpq	$2,%rdx
+	jb	.Locb_dec_one
+	movdqu	16(%rdi),%xmm3
+	je	.Locb_dec_two
+
+	movdqu	32(%rdi),%xmm4
+	cmpq	$4,%rdx
+	jb	.Locb_dec_three
+	movdqu	48(%rdi),%xmm5
+	je	.Locb_dec_four
+
+	movdqu	64(%rdi),%xmm6
+	pxor	%xmm7,%xmm7
+
+	call	__ocb_decrypt6
+
+	movdqa	%xmm14,%xmm15
+	movups	%xmm2,0(%rsi)
+	pxor	%xmm2,%xmm8
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm8
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm8
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm8
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm8
+
+	jmp	.Locb_dec_done
+
+.align	16
+.Locb_dec_one:
+	movdqa	%xmm10,%xmm7
+
+	call	__ocb_decrypt1
+
+	movdqa	%xmm7,%xmm15
+	movups	%xmm2,0(%rsi)
+	xorps	%xmm2,%xmm8
+	jmp	.Locb_dec_done
+
+.align	16
+.Locb_dec_two:
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+
+	call	__ocb_decrypt4
+
+	movdqa	%xmm11,%xmm15
+	movups	%xmm2,0(%rsi)
+	xorps	%xmm2,%xmm8
+	movups	%xmm3,16(%rsi)
+	xorps	%xmm3,%xmm8
+
+	jmp	.Locb_dec_done
+
+.align	16
+.Locb_dec_three:
+	pxor	%xmm5,%xmm5
+
+	call	__ocb_decrypt4
+
+	movdqa	%xmm12,%xmm15
+	movups	%xmm2,0(%rsi)
+	xorps	%xmm2,%xmm8
+	movups	%xmm3,16(%rsi)
+	xorps	%xmm3,%xmm8
+	movups	%xmm4,32(%rsi)
+	xorps	%xmm4,%xmm8
+
+	jmp	.Locb_dec_done
+
+.align	16
+.Locb_dec_four:
+	call	__ocb_decrypt4
+
+	movdqa	%xmm13,%xmm15
+	movups	%xmm2,0(%rsi)
+	pxor	%xmm2,%xmm8
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm8
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm8
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm8
+
+.Locb_dec_done:
+	pxor	%xmm0,%xmm15
+	movdqu	%xmm8,(%rbp)
+	movdqu	%xmm15,(%r9)
+
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+	pxor	%xmm10,%xmm10
+	pxor	%xmm11,%xmm11
+	pxor	%xmm12,%xmm12
+	pxor	%xmm13,%xmm13
+	pxor	%xmm14,%xmm14
+	pxor	%xmm15,%xmm15
+	leaq	40(%rsp),%rax
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbp
+	movq	-8(%rax),%rbx
+	leaq	(%rax),%rsp
+.Locb_dec_epilogue:
+	.byte	0xf3,0xc3
+.size	aesni_ocb_decrypt,.-aesni_ocb_decrypt
+
+.type	__ocb_decrypt6,@function
+.align	32
+__ocb_decrypt6:
+	pxor	%xmm9,%xmm15
+	movdqu	(%rbx,%r12,1),%xmm11
+	movdqa	%xmm10,%xmm12
+	movdqu	(%rbx,%r13,1),%xmm13
+	movdqa	%xmm10,%xmm14
+	pxor	%xmm15,%xmm10
+	movdqu	(%rbx,%r14,1),%xmm15
+	pxor	%xmm10,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm12
+	pxor	%xmm11,%xmm3
+	pxor	%xmm12,%xmm13
+	pxor	%xmm12,%xmm4
+	pxor	%xmm13,%xmm14
+	pxor	%xmm13,%xmm5
+	pxor	%xmm14,%xmm15
+	pxor	%xmm14,%xmm6
+	pxor	%xmm15,%xmm7
+	movups	32(%r11),%xmm0
+
+	leaq	1(%r8),%r12
+	leaq	3(%r8),%r13
+	leaq	5(%r8),%r14
+	addq	$6,%r8
+	pxor	%xmm9,%xmm10
+	bsfq	%r12,%r12
+	bsfq	%r13,%r13
+	bsfq	%r14,%r14
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	pxor	%xmm9,%xmm11
+	pxor	%xmm9,%xmm12
+.byte	102,15,56,222,241
+	pxor	%xmm9,%xmm13
+	pxor	%xmm9,%xmm14
+.byte	102,15,56,222,249
+	movups	48(%r11),%xmm1
+	pxor	%xmm9,%xmm15
+
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	64(%r11),%xmm0
+	shlq	$4,%r12
+	shlq	$4,%r13
+	jmp	.Locb_dec_loop6
+
+.align	32
+.Locb_dec_loop6:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Locb_dec_loop6
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	movups	16(%r11),%xmm1
+	shlq	$4,%r14
+
+.byte	102,65,15,56,223,210
+	movdqu	(%rbx),%xmm10
+	movq	%r10,%rax
+.byte	102,65,15,56,223,219
+.byte	102,65,15,56,223,228
+.byte	102,65,15,56,223,237
+.byte	102,65,15,56,223,246
+.byte	102,65,15,56,223,255
+	.byte	0xf3,0xc3
+.size	__ocb_decrypt6,.-__ocb_decrypt6
+
+.type	__ocb_decrypt4,@function
+.align	32
+__ocb_decrypt4:
+	pxor	%xmm9,%xmm15
+	movdqu	(%rbx,%r12,1),%xmm11
+	movdqa	%xmm10,%xmm12
+	movdqu	(%rbx,%r13,1),%xmm13
+	pxor	%xmm15,%xmm10
+	pxor	%xmm10,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm12
+	pxor	%xmm11,%xmm3
+	pxor	%xmm12,%xmm13
+	pxor	%xmm12,%xmm4
+	pxor	%xmm13,%xmm5
+	movups	32(%r11),%xmm0
+
+	pxor	%xmm9,%xmm10
+	pxor	%xmm9,%xmm11
+	pxor	%xmm9,%xmm12
+	pxor	%xmm9,%xmm13
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movups	48(%r11),%xmm1
+
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	movups	64(%r11),%xmm0
+	jmp	.Locb_dec_loop4
+
+.align	32
+.Locb_dec_loop4:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Locb_dec_loop4
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movups	16(%r11),%xmm1
+	movq	%r10,%rax
+
+.byte	102,65,15,56,223,210
+.byte	102,65,15,56,223,219
+.byte	102,65,15,56,223,228
+.byte	102,65,15,56,223,237
+	.byte	0xf3,0xc3
+.size	__ocb_decrypt4,.-__ocb_decrypt4
+
+.type	__ocb_decrypt1,@function
+.align	32
+__ocb_decrypt1:
+	pxor	%xmm15,%xmm7
+	pxor	%xmm9,%xmm7
+	pxor	%xmm7,%xmm2
+	movups	32(%r11),%xmm0
+
+.byte	102,15,56,222,209
+	movups	48(%r11),%xmm1
+	pxor	%xmm9,%xmm7
+
+.byte	102,15,56,222,208
+	movups	64(%r11),%xmm0
+	jmp	.Locb_dec_loop1
+
+.align	32
+.Locb_dec_loop1:
+.byte	102,15,56,222,209
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,222,208
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	.Locb_dec_loop1
+
+.byte	102,15,56,222,209
+	movups	16(%r11),%xmm1
+	movq	%r10,%rax
+
+.byte	102,15,56,223,215
+	.byte	0xf3,0xc3
+.size	__ocb_decrypt1,.-__ocb_decrypt1
+.globl	aesni_cbc_encrypt
+.hidden aesni_cbc_encrypt
+.type	aesni_cbc_encrypt,@function
+.align	16
+aesni_cbc_encrypt:
+	testq	%rdx,%rdx
+	jz	.Lcbc_ret
+
+	movl	240(%rcx),%r10d
+	movq	%rcx,%r11
+	testl	%r9d,%r9d
+	jz	.Lcbc_decrypt
+
+	movups	(%r8),%xmm2
+	movl	%r10d,%eax
+	cmpq	$16,%rdx
+	jb	.Lcbc_enc_tail
+	subq	$16,%rdx
+	jmp	.Lcbc_enc_loop
+.align	16
+.Lcbc_enc_loop:
+	movups	(%rdi),%xmm3
+	leaq	16(%rdi),%rdi
+
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm3
+	leaq	32(%rcx),%rcx
+	xorps	%xmm3,%xmm2
+.Loop_enc1_15:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_enc1_15
+.byte	102,15,56,221,209
+	movl	%r10d,%eax
+	movq	%r11,%rcx
+	movups	%xmm2,0(%rsi)
+	leaq	16(%rsi),%rsi
+	subq	$16,%rdx
+	jnc	.Lcbc_enc_loop
+	addq	$16,%rdx
+	jnz	.Lcbc_enc_tail
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movups	%xmm2,(%r8)
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	jmp	.Lcbc_ret
+
+.Lcbc_enc_tail:
+	movq	%rdx,%rcx
+	xchgq	%rdi,%rsi
+.long	0x9066A4F3
+	movl	$16,%ecx
+	subq	%rdx,%rcx
+	xorl	%eax,%eax
+.long	0x9066AAF3
+	leaq	-16(%rdi),%rdi
+	movl	%r10d,%eax
+	movq	%rdi,%rsi
+	movq	%r11,%rcx
+	xorq	%rdx,%rdx
+	jmp	.Lcbc_enc_loop
+
+.align	16
+.Lcbc_decrypt:
+	cmpq	$16,%rdx
+	jne	.Lcbc_decrypt_bulk
+
+
+
+	movdqu	(%rdi),%xmm2
+	movdqu	(%r8),%xmm3
+	movdqa	%xmm2,%xmm4
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_dec1_16:
+.byte	102,15,56,222,209
+	decl	%r10d
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_dec1_16
+.byte	102,15,56,223,209
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movdqu	%xmm4,(%r8)
+	xorps	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	jmp	.Lcbc_ret
+.align	16
+.Lcbc_decrypt_bulk:
+	leaq	(%rsp),%r11
+	pushq	%rbp
+	subq	$16,%rsp
+	andq	$-16,%rsp
+	movq	%rcx,%rbp
+	movups	(%r8),%xmm10
+	movl	%r10d,%eax
+	cmpq	$0x50,%rdx
+	jbe	.Lcbc_dec_tail
+
+	movups	(%rcx),%xmm0
+	movdqu	0(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqa	%xmm2,%xmm11
+	movdqu	32(%rdi),%xmm4
+	movdqa	%xmm3,%xmm12
+	movdqu	48(%rdi),%xmm5
+	movdqa	%xmm4,%xmm13
+	movdqu	64(%rdi),%xmm6
+	movdqa	%xmm5,%xmm14
+	movdqu	80(%rdi),%xmm7
+	movdqa	%xmm6,%xmm15
+	leaq	OPENSSL_ia32cap_P(%rip),%r9
+	movl	4(%r9),%r9d
+	cmpq	$0x70,%rdx
+	jbe	.Lcbc_dec_six_or_seven
+
+	andl	$71303168,%r9d
+	subq	$0x50,%rdx
+	cmpl	$4194304,%r9d
+	je	.Lcbc_dec_loop6_enter
+	subq	$0x20,%rdx
+	leaq	112(%rcx),%rcx
+	jmp	.Lcbc_dec_loop8_enter
+.align	16
+.Lcbc_dec_loop8:
+	movups	%xmm9,(%rsi)
+	leaq	16(%rsi),%rsi
+.Lcbc_dec_loop8_enter:
+	movdqu	96(%rdi),%xmm8
+	pxor	%xmm0,%xmm2
+	movdqu	112(%rdi),%xmm9
+	pxor	%xmm0,%xmm3
+	movups	16-112(%rcx),%xmm1
+	pxor	%xmm0,%xmm4
+	movq	$-1,%rbp
+	cmpq	$0x70,%rdx
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+	pxor	%xmm0,%xmm7
+	pxor	%xmm0,%xmm8
+
+.byte	102,15,56,222,209
+	pxor	%xmm0,%xmm9
+	movups	32-112(%rcx),%xmm0
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+	adcq	$0,%rbp
+	andq	$128,%rbp
+.byte	102,68,15,56,222,201
+	addq	%rdi,%rbp
+	movups	48-112(%rcx),%xmm1
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	64-112(%rcx),%xmm0
+	nop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movups	80-112(%rcx),%xmm1
+	nop
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	96-112(%rcx),%xmm0
+	nop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movups	112-112(%rcx),%xmm1
+	nop
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	128-112(%rcx),%xmm0
+	nop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movups	144-112(%rcx),%xmm1
+	cmpl	$11,%eax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	160-112(%rcx),%xmm0
+	jb	.Lcbc_dec_done
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movups	176-112(%rcx),%xmm1
+	nop
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	192-112(%rcx),%xmm0
+	je	.Lcbc_dec_done
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movups	208-112(%rcx),%xmm1
+	nop
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	224-112(%rcx),%xmm0
+	jmp	.Lcbc_dec_done
+.align	16
+.Lcbc_dec_done:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+	pxor	%xmm0,%xmm10
+	pxor	%xmm0,%xmm11
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	pxor	%xmm0,%xmm12
+	pxor	%xmm0,%xmm13
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	pxor	%xmm0,%xmm14
+	pxor	%xmm0,%xmm15
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movdqu	80(%rdi),%xmm1
+
+.byte	102,65,15,56,223,210
+	movdqu	96(%rdi),%xmm10
+	pxor	%xmm0,%xmm1
+.byte	102,65,15,56,223,219
+	pxor	%xmm0,%xmm10
+	movdqu	112(%rdi),%xmm0
+.byte	102,65,15,56,223,228
+	leaq	128(%rdi),%rdi
+	movdqu	0(%rbp),%xmm11
+.byte	102,65,15,56,223,237
+.byte	102,65,15,56,223,246
+	movdqu	16(%rbp),%xmm12
+	movdqu	32(%rbp),%xmm13
+.byte	102,65,15,56,223,255
+.byte	102,68,15,56,223,193
+	movdqu	48(%rbp),%xmm14
+	movdqu	64(%rbp),%xmm15
+.byte	102,69,15,56,223,202
+	movdqa	%xmm0,%xmm10
+	movdqu	80(%rbp),%xmm1
+	movups	-112(%rcx),%xmm0
+
+	movups	%xmm2,(%rsi)
+	movdqa	%xmm11,%xmm2
+	movups	%xmm3,16(%rsi)
+	movdqa	%xmm12,%xmm3
+	movups	%xmm4,32(%rsi)
+	movdqa	%xmm13,%xmm4
+	movups	%xmm5,48(%rsi)
+	movdqa	%xmm14,%xmm5
+	movups	%xmm6,64(%rsi)
+	movdqa	%xmm15,%xmm6
+	movups	%xmm7,80(%rsi)
+	movdqa	%xmm1,%xmm7
+	movups	%xmm8,96(%rsi)
+	leaq	112(%rsi),%rsi
+
+	subq	$0x80,%rdx
+	ja	.Lcbc_dec_loop8
+
+	movaps	%xmm9,%xmm2
+	leaq	-112(%rcx),%rcx
+	addq	$0x70,%rdx
+	jle	.Lcbc_dec_clear_tail_collected
+	movups	%xmm9,(%rsi)
+	leaq	16(%rsi),%rsi
+	cmpq	$0x50,%rdx
+	jbe	.Lcbc_dec_tail
+
+	movaps	%xmm11,%xmm2
+.Lcbc_dec_six_or_seven:
+	cmpq	$0x60,%rdx
+	ja	.Lcbc_dec_seven
+
+	movaps	%xmm7,%xmm8
+	call	_aesni_decrypt6
+	pxor	%xmm10,%xmm2
+	movaps	%xmm8,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	pxor	%xmm14,%xmm6
+	movdqu	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	pxor	%xmm15,%xmm7
+	movdqu	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	leaq	80(%rsi),%rsi
+	movdqa	%xmm7,%xmm2
+	pxor	%xmm7,%xmm7
+	jmp	.Lcbc_dec_tail_collected
+
+.align	16
+.Lcbc_dec_seven:
+	movups	96(%rdi),%xmm8
+	xorps	%xmm9,%xmm9
+	call	_aesni_decrypt8
+	movups	80(%rdi),%xmm9
+	pxor	%xmm10,%xmm2
+	movups	96(%rdi),%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	pxor	%xmm14,%xmm6
+	movdqu	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	pxor	%xmm15,%xmm7
+	movdqu	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	pxor	%xmm9,%xmm8
+	movdqu	%xmm7,80(%rsi)
+	pxor	%xmm7,%xmm7
+	leaq	96(%rsi),%rsi
+	movdqa	%xmm8,%xmm2
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+	jmp	.Lcbc_dec_tail_collected
+
+.align	16
+.Lcbc_dec_loop6:
+	movups	%xmm7,(%rsi)
+	leaq	16(%rsi),%rsi
+	movdqu	0(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqa	%xmm2,%xmm11
+	movdqu	32(%rdi),%xmm4
+	movdqa	%xmm3,%xmm12
+	movdqu	48(%rdi),%xmm5
+	movdqa	%xmm4,%xmm13
+	movdqu	64(%rdi),%xmm6
+	movdqa	%xmm5,%xmm14
+	movdqu	80(%rdi),%xmm7
+	movdqa	%xmm6,%xmm15
+.Lcbc_dec_loop6_enter:
+	leaq	96(%rdi),%rdi
+	movdqa	%xmm7,%xmm8
+
+	call	_aesni_decrypt6
+
+	pxor	%xmm10,%xmm2
+	movdqa	%xmm8,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm14,%xmm6
+	movq	%rbp,%rcx
+	movdqu	%xmm5,48(%rsi)
+	pxor	%xmm15,%xmm7
+	movl	%r10d,%eax
+	movdqu	%xmm6,64(%rsi)
+	leaq	80(%rsi),%rsi
+	subq	$0x60,%rdx
+	ja	.Lcbc_dec_loop6
+
+	movdqa	%xmm7,%xmm2
+	addq	$0x50,%rdx
+	jle	.Lcbc_dec_clear_tail_collected
+	movups	%xmm7,(%rsi)
+	leaq	16(%rsi),%rsi
+
+.Lcbc_dec_tail:
+	movups	(%rdi),%xmm2
+	subq	$0x10,%rdx
+	jbe	.Lcbc_dec_one
+
+	movups	16(%rdi),%xmm3
+	movaps	%xmm2,%xmm11
+	subq	$0x10,%rdx
+	jbe	.Lcbc_dec_two
+
+	movups	32(%rdi),%xmm4
+	movaps	%xmm3,%xmm12
+	subq	$0x10,%rdx
+	jbe	.Lcbc_dec_three
+
+	movups	48(%rdi),%xmm5
+	movaps	%xmm4,%xmm13
+	subq	$0x10,%rdx
+	jbe	.Lcbc_dec_four
+
+	movups	64(%rdi),%xmm6
+	movaps	%xmm5,%xmm14
+	movaps	%xmm6,%xmm15
+	xorps	%xmm7,%xmm7
+	call	_aesni_decrypt6
+	pxor	%xmm10,%xmm2
+	movaps	%xmm15,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	pxor	%xmm14,%xmm6
+	movdqu	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	leaq	64(%rsi),%rsi
+	movdqa	%xmm6,%xmm2
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	subq	$0x10,%rdx
+	jmp	.Lcbc_dec_tail_collected
+
+.align	16
+.Lcbc_dec_one:
+	movaps	%xmm2,%xmm11
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+.Loop_dec1_17:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	.Loop_dec1_17
+.byte	102,15,56,223,209
+	xorps	%xmm10,%xmm2
+	movaps	%xmm11,%xmm10
+	jmp	.Lcbc_dec_tail_collected
+.align	16
+.Lcbc_dec_two:
+	movaps	%xmm3,%xmm12
+	call	_aesni_decrypt2
+	pxor	%xmm10,%xmm2
+	movaps	%xmm12,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	movdqa	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	leaq	16(%rsi),%rsi
+	jmp	.Lcbc_dec_tail_collected
+.align	16
+.Lcbc_dec_three:
+	movaps	%xmm4,%xmm13
+	call	_aesni_decrypt3
+	pxor	%xmm10,%xmm2
+	movaps	%xmm13,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movdqa	%xmm4,%xmm2
+	pxor	%xmm4,%xmm4
+	leaq	32(%rsi),%rsi
+	jmp	.Lcbc_dec_tail_collected
+.align	16
+.Lcbc_dec_four:
+	movaps	%xmm5,%xmm14
+	call	_aesni_decrypt4
+	pxor	%xmm10,%xmm2
+	movaps	%xmm14,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movdqa	%xmm5,%xmm2
+	pxor	%xmm5,%xmm5
+	leaq	48(%rsi),%rsi
+	jmp	.Lcbc_dec_tail_collected
+
+.align	16
+.Lcbc_dec_clear_tail_collected:
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+.Lcbc_dec_tail_collected:
+	movups	%xmm10,(%r8)
+	andq	$15,%rdx
+	jnz	.Lcbc_dec_tail_partial
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	jmp	.Lcbc_dec_ret
+.align	16
+.Lcbc_dec_tail_partial:
+	movaps	%xmm2,(%rsp)
+	pxor	%xmm2,%xmm2
+	movq	$16,%rcx
+	movq	%rsi,%rdi
+	subq	%rdx,%rcx
+	leaq	(%rsp),%rsi
+.long	0x9066A4F3
+	movdqa	%xmm2,(%rsp)
+
+.Lcbc_dec_ret:
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movq	-8(%r11),%rbp
+	leaq	(%r11),%rsp
+.Lcbc_ret:
+	.byte	0xf3,0xc3
+.size	aesni_cbc_encrypt,.-aesni_cbc_encrypt
+.globl	aesni_set_decrypt_key
+.hidden aesni_set_decrypt_key
+.type	aesni_set_decrypt_key,@function
+.align	16
+aesni_set_decrypt_key:
+.byte	0x48,0x83,0xEC,0x08
+	call	__aesni_set_encrypt_key
+	shll	$4,%esi
+	testl	%eax,%eax
+	jnz	.Ldec_key_ret
+	leaq	16(%rdx,%rsi,1),%rdi
+
+	movups	(%rdx),%xmm0
+	movups	(%rdi),%xmm1
+	movups	%xmm0,(%rdi)
+	movups	%xmm1,(%rdx)
+	leaq	16(%rdx),%rdx
+	leaq	-16(%rdi),%rdi
+
+.Ldec_key_inverse:
+	movups	(%rdx),%xmm0
+	movups	(%rdi),%xmm1
+.byte	102,15,56,219,192
+.byte	102,15,56,219,201
+	leaq	16(%rdx),%rdx
+	leaq	-16(%rdi),%rdi
+	movups	%xmm0,16(%rdi)
+	movups	%xmm1,-16(%rdx)
+	cmpq	%rdx,%rdi
+	ja	.Ldec_key_inverse
+
+	movups	(%rdx),%xmm0
+.byte	102,15,56,219,192
+	pxor	%xmm1,%xmm1
+	movups	%xmm0,(%rdi)
+	pxor	%xmm0,%xmm0
+.Ldec_key_ret:
+	addq	$8,%rsp
+	.byte	0xf3,0xc3
+.LSEH_end_set_decrypt_key:
+.size	aesni_set_decrypt_key,.-aesni_set_decrypt_key
+.globl	aesni_set_encrypt_key
+.hidden aesni_set_encrypt_key
+.type	aesni_set_encrypt_key,@function
+.align	16
+aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+.byte	0x48,0x83,0xEC,0x08
+	movq	$-1,%rax
+	testq	%rdi,%rdi
+	jz	.Lenc_key_ret
+	testq	%rdx,%rdx
+	jz	.Lenc_key_ret
+
+	movups	(%rdi),%xmm0
+	xorps	%xmm4,%xmm4
+	leaq	OPENSSL_ia32cap_P(%rip),%r10
+	movl	4(%r10),%r10d
+	andl	$268437504,%r10d
+	leaq	16(%rdx),%rax
+	cmpl	$256,%esi
+	je	.L14rounds
+	cmpl	$192,%esi
+	je	.L12rounds
+	cmpl	$128,%esi
+	jne	.Lbad_keybits
+
+.L10rounds:
+	movl	$9,%esi
+	cmpl	$268435456,%r10d
+	je	.L10rounds_alt
+
+	movups	%xmm0,(%rdx)
+.byte	102,15,58,223,200,1
+	call	.Lkey_expansion_128_cold
+.byte	102,15,58,223,200,2
+	call	.Lkey_expansion_128
+.byte	102,15,58,223,200,4
+	call	.Lkey_expansion_128
+.byte	102,15,58,223,200,8
+	call	.Lkey_expansion_128
+.byte	102,15,58,223,200,16
+	call	.Lkey_expansion_128
+.byte	102,15,58,223,200,32
+	call	.Lkey_expansion_128
+.byte	102,15,58,223,200,64
+	call	.Lkey_expansion_128
+.byte	102,15,58,223,200,128
+	call	.Lkey_expansion_128
+.byte	102,15,58,223,200,27
+	call	.Lkey_expansion_128
+.byte	102,15,58,223,200,54
+	call	.Lkey_expansion_128
+	movups	%xmm0,(%rax)
+	movl	%esi,80(%rax)
+	xorl	%eax,%eax
+	jmp	.Lenc_key_ret
+
+.align	16
+.L10rounds_alt:
+	movdqa	.Lkey_rotate(%rip),%xmm5
+	movl	$8,%r10d
+	movdqa	.Lkey_rcon1(%rip),%xmm4
+	movdqa	%xmm0,%xmm2
+	movdqu	%xmm0,(%rdx)
+	jmp	.Loop_key128
+
+.align	16
+.Loop_key128:
+.byte	102,15,56,0,197
+.byte	102,15,56,221,196
+	pslld	$1,%xmm4
+	leaq	16(%rax),%rax
+
+	movdqa	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm3,%xmm2
+
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,-16(%rax)
+	movdqa	%xmm0,%xmm2
+
+	decl	%r10d
+	jnz	.Loop_key128
+
+	movdqa	.Lkey_rcon1b(%rip),%xmm4
+
+.byte	102,15,56,0,197
+.byte	102,15,56,221,196
+	pslld	$1,%xmm4
+
+	movdqa	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm3,%xmm2
+
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,(%rax)
+
+	movdqa	%xmm0,%xmm2
+.byte	102,15,56,0,197
+.byte	102,15,56,221,196
+
+	movdqa	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm3,%xmm2
+
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,16(%rax)
+
+	movl	%esi,96(%rax)
+	xorl	%eax,%eax
+	jmp	.Lenc_key_ret
+
+.align	16
+.L12rounds:
+	movq	16(%rdi),%xmm2
+	movl	$11,%esi
+	cmpl	$268435456,%r10d
+	je	.L12rounds_alt
+
+	movups	%xmm0,(%rdx)
+.byte	102,15,58,223,202,1
+	call	.Lkey_expansion_192a_cold
+.byte	102,15,58,223,202,2
+	call	.Lkey_expansion_192b
+.byte	102,15,58,223,202,4
+	call	.Lkey_expansion_192a
+.byte	102,15,58,223,202,8
+	call	.Lkey_expansion_192b
+.byte	102,15,58,223,202,16
+	call	.Lkey_expansion_192a
+.byte	102,15,58,223,202,32
+	call	.Lkey_expansion_192b
+.byte	102,15,58,223,202,64
+	call	.Lkey_expansion_192a
+.byte	102,15,58,223,202,128
+	call	.Lkey_expansion_192b
+	movups	%xmm0,(%rax)
+	movl	%esi,48(%rax)
+	xorq	%rax,%rax
+	jmp	.Lenc_key_ret
+
+.align	16
+.L12rounds_alt:
+	movdqa	.Lkey_rotate192(%rip),%xmm5
+	movdqa	.Lkey_rcon1(%rip),%xmm4
+	movl	$8,%r10d
+	movdqu	%xmm0,(%rdx)
+	jmp	.Loop_key192
+
+.align	16
+.Loop_key192:
+	movq	%xmm2,0(%rax)
+	movdqa	%xmm2,%xmm1
+.byte	102,15,56,0,213
+.byte	102,15,56,221,212
+	pslld	$1,%xmm4
+	leaq	24(%rax),%rax
+
+	movdqa	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm3,%xmm0
+
+	pshufd	$0xff,%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm1,%xmm3
+
+	pxor	%xmm2,%xmm0
+	pxor	%xmm3,%xmm2
+	movdqu	%xmm0,-16(%rax)
+
+	decl	%r10d
+	jnz	.Loop_key192
+
+	movl	%esi,32(%rax)
+	xorl	%eax,%eax
+	jmp	.Lenc_key_ret
+
+.align	16
+.L14rounds:
+	movups	16(%rdi),%xmm2
+	movl	$13,%esi
+	leaq	16(%rax),%rax
+	cmpl	$268435456,%r10d
+	je	.L14rounds_alt
+
+	movups	%xmm0,(%rdx)
+	movups	%xmm2,16(%rdx)
+.byte	102,15,58,223,202,1
+	call	.Lkey_expansion_256a_cold
+.byte	102,15,58,223,200,1
+	call	.Lkey_expansion_256b
+.byte	102,15,58,223,202,2
+	call	.Lkey_expansion_256a
+.byte	102,15,58,223,200,2
+	call	.Lkey_expansion_256b
+.byte	102,15,58,223,202,4
+	call	.Lkey_expansion_256a
+.byte	102,15,58,223,200,4
+	call	.Lkey_expansion_256b
+.byte	102,15,58,223,202,8
+	call	.Lkey_expansion_256a
+.byte	102,15,58,223,200,8
+	call	.Lkey_expansion_256b
+.byte	102,15,58,223,202,16
+	call	.Lkey_expansion_256a
+.byte	102,15,58,223,200,16
+	call	.Lkey_expansion_256b
+.byte	102,15,58,223,202,32
+	call	.Lkey_expansion_256a
+.byte	102,15,58,223,200,32
+	call	.Lkey_expansion_256b
+.byte	102,15,58,223,202,64
+	call	.Lkey_expansion_256a
+	movups	%xmm0,(%rax)
+	movl	%esi,16(%rax)
+	xorq	%rax,%rax
+	jmp	.Lenc_key_ret
+
+.align	16
+.L14rounds_alt:
+	movdqa	.Lkey_rotate(%rip),%xmm5
+	movdqa	.Lkey_rcon1(%rip),%xmm4
+	movl	$7,%r10d
+	movdqu	%xmm0,0(%rdx)
+	movdqa	%xmm2,%xmm1
+	movdqu	%xmm2,16(%rdx)
+	jmp	.Loop_key256
+
+.align	16
+.Loop_key256:
+.byte	102,15,56,0,213
+.byte	102,15,56,221,212
+
+	movdqa	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm3,%xmm0
+	pslld	$1,%xmm4
+
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,(%rax)
+
+	decl	%r10d
+	jz	.Ldone_key256
+
+	pshufd	$0xff,%xmm0,%xmm2
+	pxor	%xmm3,%xmm3
+.byte	102,15,56,221,211
+
+	movdqa	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm3,%xmm1
+
+	pxor	%xmm1,%xmm2
+	movdqu	%xmm2,16(%rax)
+	leaq	32(%rax),%rax
+	movdqa	%xmm2,%xmm1
+
+	jmp	.Loop_key256
+
+.Ldone_key256:
+	movl	%esi,16(%rax)
+	xorl	%eax,%eax
+	jmp	.Lenc_key_ret
+
+.align	16
+.Lbad_keybits:
+	movq	$-2,%rax
+.Lenc_key_ret:
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	addq	$8,%rsp
+	.byte	0xf3,0xc3
+.LSEH_end_set_encrypt_key:
+
+.align	16
+.Lkey_expansion_128:
+	movups	%xmm0,(%rax)
+	leaq	16(%rax),%rax
+.Lkey_expansion_128_cold:
+	shufps	$16,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$255,%xmm1,%xmm1
+	xorps	%xmm1,%xmm0
+	.byte	0xf3,0xc3
+
+.align	16
+.Lkey_expansion_192a:
+	movups	%xmm0,(%rax)
+	leaq	16(%rax),%rax
+.Lkey_expansion_192a_cold:
+	movaps	%xmm2,%xmm5
+.Lkey_expansion_192b_warm:
+	shufps	$16,%xmm0,%xmm4
+	movdqa	%xmm2,%xmm3
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	pslldq	$4,%xmm3
+	xorps	%xmm4,%xmm0
+	pshufd	$85,%xmm1,%xmm1
+	pxor	%xmm3,%xmm2
+	pxor	%xmm1,%xmm0
+	pshufd	$255,%xmm0,%xmm3
+	pxor	%xmm3,%xmm2
+	.byte	0xf3,0xc3
+
+.align	16
+.Lkey_expansion_192b:
+	movaps	%xmm0,%xmm3
+	shufps	$68,%xmm0,%xmm5
+	movups	%xmm5,(%rax)
+	shufps	$78,%xmm2,%xmm3
+	movups	%xmm3,16(%rax)
+	leaq	32(%rax),%rax
+	jmp	.Lkey_expansion_192b_warm
+
+.align	16
+.Lkey_expansion_256a:
+	movups	%xmm2,(%rax)
+	leaq	16(%rax),%rax
+.Lkey_expansion_256a_cold:
+	shufps	$16,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$255,%xmm1,%xmm1
+	xorps	%xmm1,%xmm0
+	.byte	0xf3,0xc3
+
+.align	16
+.Lkey_expansion_256b:
+	movups	%xmm0,(%rax)
+	leaq	16(%rax),%rax
+
+	shufps	$16,%xmm2,%xmm4
+	xorps	%xmm4,%xmm2
+	shufps	$140,%xmm2,%xmm4
+	xorps	%xmm4,%xmm2
+	shufps	$170,%xmm1,%xmm1
+	xorps	%xmm1,%xmm2
+	.byte	0xf3,0xc3
+.size	aesni_set_encrypt_key,.-aesni_set_encrypt_key
+.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
+.align	64
+.Lbswap_mask:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lincrement32:
+.long	6,6,6,0
+.Lincrement64:
+.long	1,0,0,0
+.Lxts_magic:
+.long	0x87,0,1,0
+.Lincrement1:
+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+.Lkey_rotate:
+.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+.Lkey_rotate192:
+.long	0x04070605,0x04070605,0x04070605,0x04070605
+.Lkey_rcon1:
+.long	1,1,1,1
+.Lkey_rcon1b:
+.long	0x1b,0x1b,0x1b,0x1b
+
+.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	64
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S
new file mode 100644
index 0000000..04b161c
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S
@@ -0,0 +1,2503 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.extern	asm_AES_encrypt
+.hidden asm_AES_encrypt
+.extern	asm_AES_decrypt
+.hidden asm_AES_decrypt
+
+.type	_bsaes_encrypt8,@function
+.align	64
+_bsaes_encrypt8:
+	leaq	.LBS0(%rip),%r11
+
+	movdqa	(%rax),%xmm8
+	leaq	16(%rax),%rax
+	movdqa	80(%r11),%xmm7
+	pxor	%xmm8,%xmm15
+	pxor	%xmm8,%xmm0
+	pxor	%xmm8,%xmm1
+	pxor	%xmm8,%xmm2
+.byte	102,68,15,56,0,255
+.byte	102,15,56,0,199
+	pxor	%xmm8,%xmm3
+	pxor	%xmm8,%xmm4
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	pxor	%xmm8,%xmm5
+	pxor	%xmm8,%xmm6
+.byte	102,15,56,0,223
+.byte	102,15,56,0,231
+.byte	102,15,56,0,239
+.byte	102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+	movdqa	0(%r11),%xmm7
+	movdqa	16(%r11),%xmm8
+	movdqa	%xmm5,%xmm9
+	psrlq	$1,%xmm5
+	movdqa	%xmm3,%xmm10
+	psrlq	$1,%xmm3
+	pxor	%xmm6,%xmm5
+	pxor	%xmm4,%xmm3
+	pand	%xmm7,%xmm5
+	pand	%xmm7,%xmm3
+	pxor	%xmm5,%xmm6
+	psllq	$1,%xmm5
+	pxor	%xmm3,%xmm4
+	psllq	$1,%xmm3
+	pxor	%xmm9,%xmm5
+	pxor	%xmm10,%xmm3
+	movdqa	%xmm1,%xmm9
+	psrlq	$1,%xmm1
+	movdqa	%xmm15,%xmm10
+	psrlq	$1,%xmm15
+	pxor	%xmm2,%xmm1
+	pxor	%xmm0,%xmm15
+	pand	%xmm7,%xmm1
+	pand	%xmm7,%xmm15
+	pxor	%xmm1,%xmm2
+	psllq	$1,%xmm1
+	pxor	%xmm15,%xmm0
+	psllq	$1,%xmm15
+	pxor	%xmm9,%xmm1
+	pxor	%xmm10,%xmm15
+	movdqa	32(%r11),%xmm7
+	movdqa	%xmm4,%xmm9
+	psrlq	$2,%xmm4
+	movdqa	%xmm3,%xmm10
+	psrlq	$2,%xmm3
+	pxor	%xmm6,%xmm4
+	pxor	%xmm5,%xmm3
+	pand	%xmm8,%xmm4
+	pand	%xmm8,%xmm3
+	pxor	%xmm4,%xmm6
+	psllq	$2,%xmm4
+	pxor	%xmm3,%xmm5
+	psllq	$2,%xmm3
+	pxor	%xmm9,%xmm4
+	pxor	%xmm10,%xmm3
+	movdqa	%xmm0,%xmm9
+	psrlq	$2,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$2,%xmm15
+	pxor	%xmm2,%xmm0
+	pxor	%xmm1,%xmm15
+	pand	%xmm8,%xmm0
+	pand	%xmm8,%xmm15
+	pxor	%xmm0,%xmm2
+	psllq	$2,%xmm0
+	pxor	%xmm15,%xmm1
+	psllq	$2,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	%xmm2,%xmm9
+	psrlq	$4,%xmm2
+	movdqa	%xmm1,%xmm10
+	psrlq	$4,%xmm1
+	pxor	%xmm6,%xmm2
+	pxor	%xmm5,%xmm1
+	pand	%xmm7,%xmm2
+	pand	%xmm7,%xmm1
+	pxor	%xmm2,%xmm6
+	psllq	$4,%xmm2
+	pxor	%xmm1,%xmm5
+	psllq	$4,%xmm1
+	pxor	%xmm9,%xmm2
+	pxor	%xmm10,%xmm1
+	movdqa	%xmm0,%xmm9
+	psrlq	$4,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$4,%xmm15
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pand	%xmm7,%xmm0
+	pand	%xmm7,%xmm15
+	pxor	%xmm0,%xmm4
+	psllq	$4,%xmm0
+	pxor	%xmm15,%xmm3
+	psllq	$4,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	decl	%r10d
+	jmp	.Lenc_sbox
+.align	16
+.Lenc_loop:
+	pxor	0(%rax),%xmm15
+	pxor	16(%rax),%xmm0
+	pxor	32(%rax),%xmm1
+	pxor	48(%rax),%xmm2
+.byte	102,68,15,56,0,255
+.byte	102,15,56,0,199
+	pxor	64(%rax),%xmm3
+	pxor	80(%rax),%xmm4
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	pxor	96(%rax),%xmm5
+	pxor	112(%rax),%xmm6
+.byte	102,15,56,0,223
+.byte	102,15,56,0,231
+.byte	102,15,56,0,239
+.byte	102,15,56,0,247
+	leaq	128(%rax),%rax
+.Lenc_sbox:
+	pxor	%xmm5,%xmm4
+	pxor	%xmm0,%xmm1
+	pxor	%xmm15,%xmm2
+	pxor	%xmm1,%xmm5
+	pxor	%xmm15,%xmm4
+
+	pxor	%xmm2,%xmm5
+	pxor	%xmm6,%xmm2
+	pxor	%xmm4,%xmm6
+	pxor	%xmm3,%xmm2
+	pxor	%xmm4,%xmm3
+	pxor	%xmm0,%xmm2
+
+	pxor	%xmm6,%xmm1
+	pxor	%xmm4,%xmm0
+	movdqa	%xmm6,%xmm10
+	movdqa	%xmm0,%xmm9
+	movdqa	%xmm4,%xmm8
+	movdqa	%xmm1,%xmm12
+	movdqa	%xmm5,%xmm11
+
+	pxor	%xmm3,%xmm10
+	pxor	%xmm1,%xmm9
+	pxor	%xmm2,%xmm8
+	movdqa	%xmm10,%xmm13
+	pxor	%xmm3,%xmm12
+	movdqa	%xmm9,%xmm7
+	pxor	%xmm15,%xmm11
+	movdqa	%xmm10,%xmm14
+
+	por	%xmm8,%xmm9
+	por	%xmm11,%xmm10
+	pxor	%xmm7,%xmm14
+	pand	%xmm11,%xmm13
+	pxor	%xmm8,%xmm11
+	pand	%xmm8,%xmm7
+	pand	%xmm11,%xmm14
+	movdqa	%xmm2,%xmm11
+	pxor	%xmm15,%xmm11
+	pand	%xmm11,%xmm12
+	pxor	%xmm12,%xmm10
+	pxor	%xmm12,%xmm9
+	movdqa	%xmm6,%xmm12
+	movdqa	%xmm4,%xmm11
+	pxor	%xmm0,%xmm12
+	pxor	%xmm5,%xmm11
+	movdqa	%xmm12,%xmm8
+	pand	%xmm11,%xmm12
+	por	%xmm11,%xmm8
+	pxor	%xmm12,%xmm7
+	pxor	%xmm14,%xmm10
+	pxor	%xmm13,%xmm9
+	pxor	%xmm14,%xmm8
+	movdqa	%xmm1,%xmm11
+	pxor	%xmm13,%xmm7
+	movdqa	%xmm3,%xmm12
+	pxor	%xmm13,%xmm8
+	movdqa	%xmm0,%xmm13
+	pand	%xmm2,%xmm11
+	movdqa	%xmm6,%xmm14
+	pand	%xmm15,%xmm12
+	pand	%xmm4,%xmm13
+	por	%xmm5,%xmm14
+	pxor	%xmm11,%xmm10
+	pxor	%xmm12,%xmm9
+	pxor	%xmm13,%xmm8
+	pxor	%xmm14,%xmm7
+
+
+
+
+
+	movdqa	%xmm10,%xmm11
+	pand	%xmm8,%xmm10
+	pxor	%xmm9,%xmm11
+
+	movdqa	%xmm7,%xmm13
+	movdqa	%xmm11,%xmm14
+	pxor	%xmm10,%xmm13
+	pand	%xmm13,%xmm14
+
+	movdqa	%xmm8,%xmm12
+	pxor	%xmm9,%xmm14
+	pxor	%xmm7,%xmm12
+
+	pxor	%xmm9,%xmm10
+
+	pand	%xmm10,%xmm12
+
+	movdqa	%xmm13,%xmm9
+	pxor	%xmm7,%xmm12
+
+	pxor	%xmm12,%xmm9
+	pxor	%xmm12,%xmm8
+
+	pand	%xmm7,%xmm9
+
+	pxor	%xmm9,%xmm13
+	pxor	%xmm9,%xmm8
+
+	pand	%xmm14,%xmm13
+
+	pxor	%xmm11,%xmm13
+	movdqa	%xmm5,%xmm11
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm14,%xmm9
+	pxor	%xmm13,%xmm9
+	pand	%xmm5,%xmm9
+	pxor	%xmm4,%xmm5
+	pand	%xmm14,%xmm4
+	pand	%xmm13,%xmm5
+	pxor	%xmm4,%xmm5
+	pxor	%xmm9,%xmm4
+	pxor	%xmm15,%xmm11
+	pxor	%xmm2,%xmm7
+	pxor	%xmm12,%xmm14
+	pxor	%xmm8,%xmm13
+	movdqa	%xmm14,%xmm10
+	movdqa	%xmm12,%xmm9
+	pxor	%xmm13,%xmm10
+	pxor	%xmm8,%xmm9
+	pand	%xmm11,%xmm10
+	pand	%xmm15,%xmm9
+	pxor	%xmm7,%xmm11
+	pxor	%xmm2,%xmm15
+	pand	%xmm14,%xmm7
+	pand	%xmm12,%xmm2
+	pand	%xmm13,%xmm11
+	pand	%xmm8,%xmm15
+	pxor	%xmm11,%xmm7
+	pxor	%xmm2,%xmm15
+	pxor	%xmm10,%xmm11
+	pxor	%xmm9,%xmm2
+	pxor	%xmm11,%xmm5
+	pxor	%xmm11,%xmm15
+	pxor	%xmm7,%xmm4
+	pxor	%xmm7,%xmm2
+
+	movdqa	%xmm6,%xmm11
+	movdqa	%xmm0,%xmm7
+	pxor	%xmm3,%xmm11
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm14,%xmm10
+	movdqa	%xmm12,%xmm9
+	pxor	%xmm13,%xmm10
+	pxor	%xmm8,%xmm9
+	pand	%xmm11,%xmm10
+	pand	%xmm3,%xmm9
+	pxor	%xmm7,%xmm11
+	pxor	%xmm1,%xmm3
+	pand	%xmm14,%xmm7
+	pand	%xmm12,%xmm1
+	pand	%xmm13,%xmm11
+	pand	%xmm8,%xmm3
+	pxor	%xmm11,%xmm7
+	pxor	%xmm1,%xmm3
+	pxor	%xmm10,%xmm11
+	pxor	%xmm9,%xmm1
+	pxor	%xmm12,%xmm14
+	pxor	%xmm8,%xmm13
+	movdqa	%xmm14,%xmm10
+	pxor	%xmm13,%xmm10
+	pand	%xmm6,%xmm10
+	pxor	%xmm0,%xmm6
+	pand	%xmm14,%xmm0
+	pand	%xmm13,%xmm6
+	pxor	%xmm0,%xmm6
+	pxor	%xmm10,%xmm0
+	pxor	%xmm11,%xmm6
+	pxor	%xmm11,%xmm3
+	pxor	%xmm7,%xmm0
+	pxor	%xmm7,%xmm1
+	pxor	%xmm15,%xmm6
+	pxor	%xmm5,%xmm0
+	pxor	%xmm6,%xmm3
+	pxor	%xmm15,%xmm5
+	pxor	%xmm0,%xmm15
+
+	pxor	%xmm4,%xmm0
+	pxor	%xmm1,%xmm4
+	pxor	%xmm2,%xmm1
+	pxor	%xmm4,%xmm2
+	pxor	%xmm4,%xmm3
+
+	pxor	%xmm2,%xmm5
+	decl	%r10d
+	jl	.Lenc_done
+	pshufd	$0x93,%xmm15,%xmm7
+	pshufd	$0x93,%xmm0,%xmm8
+	pxor	%xmm7,%xmm15
+	pshufd	$0x93,%xmm3,%xmm9
+	pxor	%xmm8,%xmm0
+	pshufd	$0x93,%xmm5,%xmm10
+	pxor	%xmm9,%xmm3
+	pshufd	$0x93,%xmm2,%xmm11
+	pxor	%xmm10,%xmm5
+	pshufd	$0x93,%xmm6,%xmm12
+	pxor	%xmm11,%xmm2
+	pshufd	$0x93,%xmm1,%xmm13
+	pxor	%xmm12,%xmm6
+	pshufd	$0x93,%xmm4,%xmm14
+	pxor	%xmm13,%xmm1
+	pxor	%xmm14,%xmm4
+
+	pxor	%xmm15,%xmm8
+	pxor	%xmm4,%xmm7
+	pxor	%xmm4,%xmm8
+	pshufd	$0x4E,%xmm15,%xmm15
+	pxor	%xmm0,%xmm9
+	pshufd	$0x4E,%xmm0,%xmm0
+	pxor	%xmm2,%xmm12
+	pxor	%xmm7,%xmm15
+	pxor	%xmm6,%xmm13
+	pxor	%xmm8,%xmm0
+	pxor	%xmm5,%xmm11
+	pshufd	$0x4E,%xmm2,%xmm7
+	pxor	%xmm1,%xmm14
+	pshufd	$0x4E,%xmm6,%xmm8
+	pxor	%xmm3,%xmm10
+	pshufd	$0x4E,%xmm5,%xmm2
+	pxor	%xmm4,%xmm10
+	pshufd	$0x4E,%xmm4,%xmm6
+	pxor	%xmm4,%xmm11
+	pshufd	$0x4E,%xmm1,%xmm5
+	pxor	%xmm11,%xmm7
+	pshufd	$0x4E,%xmm3,%xmm1
+	pxor	%xmm12,%xmm8
+	pxor	%xmm10,%xmm2
+	pxor	%xmm14,%xmm6
+	pxor	%xmm13,%xmm5
+	movdqa	%xmm7,%xmm3
+	pxor	%xmm9,%xmm1
+	movdqa	%xmm8,%xmm4
+	movdqa	48(%r11),%xmm7
+	jnz	.Lenc_loop
+	movdqa	64(%r11),%xmm7
+	jmp	.Lenc_loop
+.align	16
+.Lenc_done:
+	movdqa	0(%r11),%xmm7
+	movdqa	16(%r11),%xmm8
+	movdqa	%xmm1,%xmm9
+	psrlq	$1,%xmm1
+	movdqa	%xmm2,%xmm10
+	psrlq	$1,%xmm2
+	pxor	%xmm4,%xmm1
+	pxor	%xmm6,%xmm2
+	pand	%xmm7,%xmm1
+	pand	%xmm7,%xmm2
+	pxor	%xmm1,%xmm4
+	psllq	$1,%xmm1
+	pxor	%xmm2,%xmm6
+	psllq	$1,%xmm2
+	pxor	%xmm9,%xmm1
+	pxor	%xmm10,%xmm2
+	movdqa	%xmm3,%xmm9
+	psrlq	$1,%xmm3
+	movdqa	%xmm15,%xmm10
+	psrlq	$1,%xmm15
+	pxor	%xmm5,%xmm3
+	pxor	%xmm0,%xmm15
+	pand	%xmm7,%xmm3
+	pand	%xmm7,%xmm15
+	pxor	%xmm3,%xmm5
+	psllq	$1,%xmm3
+	pxor	%xmm15,%xmm0
+	psllq	$1,%xmm15
+	pxor	%xmm9,%xmm3
+	pxor	%xmm10,%xmm15
+	movdqa	32(%r11),%xmm7
+	movdqa	%xmm6,%xmm9
+	psrlq	$2,%xmm6
+	movdqa	%xmm2,%xmm10
+	psrlq	$2,%xmm2
+	pxor	%xmm4,%xmm6
+	pxor	%xmm1,%xmm2
+	pand	%xmm8,%xmm6
+	pand	%xmm8,%xmm2
+	pxor	%xmm6,%xmm4
+	psllq	$2,%xmm6
+	pxor	%xmm2,%xmm1
+	psllq	$2,%xmm2
+	pxor	%xmm9,%xmm6
+	pxor	%xmm10,%xmm2
+	movdqa	%xmm0,%xmm9
+	psrlq	$2,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$2,%xmm15
+	pxor	%xmm5,%xmm0
+	pxor	%xmm3,%xmm15
+	pand	%xmm8,%xmm0
+	pand	%xmm8,%xmm15
+	pxor	%xmm0,%xmm5
+	psllq	$2,%xmm0
+	pxor	%xmm15,%xmm3
+	psllq	$2,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	%xmm5,%xmm9
+	psrlq	$4,%xmm5
+	movdqa	%xmm3,%xmm10
+	psrlq	$4,%xmm3
+	pxor	%xmm4,%xmm5
+	pxor	%xmm1,%xmm3
+	pand	%xmm7,%xmm5
+	pand	%xmm7,%xmm3
+	pxor	%xmm5,%xmm4
+	psllq	$4,%xmm5
+	pxor	%xmm3,%xmm1
+	psllq	$4,%xmm3
+	pxor	%xmm9,%xmm5
+	pxor	%xmm10,%xmm3
+	movdqa	%xmm0,%xmm9
+	psrlq	$4,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$4,%xmm15
+	pxor	%xmm6,%xmm0
+	pxor	%xmm2,%xmm15
+	pand	%xmm7,%xmm0
+	pand	%xmm7,%xmm15
+	pxor	%xmm0,%xmm6
+	psllq	$4,%xmm0
+	pxor	%xmm15,%xmm2
+	psllq	$4,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	(%rax),%xmm7
+	pxor	%xmm7,%xmm3
+	pxor	%xmm7,%xmm5
+	pxor	%xmm7,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm7,%xmm1
+	pxor	%xmm7,%xmm4
+	pxor	%xmm7,%xmm15
+	pxor	%xmm7,%xmm0
+	.byte	0xf3,0xc3
+.size	_bsaes_encrypt8,.-_bsaes_encrypt8
+
+.type	_bsaes_decrypt8,@function
+.align	64
+_bsaes_decrypt8:
+	leaq	.LBS0(%rip),%r11
+
+	movdqa	(%rax),%xmm8
+	leaq	16(%rax),%rax
+	movdqa	-48(%r11),%xmm7
+	pxor	%xmm8,%xmm15
+	pxor	%xmm8,%xmm0
+	pxor	%xmm8,%xmm1
+	pxor	%xmm8,%xmm2
+.byte	102,68,15,56,0,255
+.byte	102,15,56,0,199
+	pxor	%xmm8,%xmm3
+	pxor	%xmm8,%xmm4
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	pxor	%xmm8,%xmm5
+	pxor	%xmm8,%xmm6
+.byte	102,15,56,0,223
+.byte	102,15,56,0,231
+.byte	102,15,56,0,239
+.byte	102,15,56,0,247
+	movdqa	0(%r11),%xmm7
+	movdqa	16(%r11),%xmm8
+	movdqa	%xmm5,%xmm9
+	psrlq	$1,%xmm5
+	movdqa	%xmm3,%xmm10
+	psrlq	$1,%xmm3
+	pxor	%xmm6,%xmm5
+	pxor	%xmm4,%xmm3
+	pand	%xmm7,%xmm5
+	pand	%xmm7,%xmm3
+	pxor	%xmm5,%xmm6
+	psllq	$1,%xmm5
+	pxor	%xmm3,%xmm4
+	psllq	$1,%xmm3
+	pxor	%xmm9,%xmm5
+	pxor	%xmm10,%xmm3
+	movdqa	%xmm1,%xmm9
+	psrlq	$1,%xmm1
+	movdqa	%xmm15,%xmm10
+	psrlq	$1,%xmm15
+	pxor	%xmm2,%xmm1
+	pxor	%xmm0,%xmm15
+	pand	%xmm7,%xmm1
+	pand	%xmm7,%xmm15
+	pxor	%xmm1,%xmm2
+	psllq	$1,%xmm1
+	pxor	%xmm15,%xmm0
+	psllq	$1,%xmm15
+	pxor	%xmm9,%xmm1
+	pxor	%xmm10,%xmm15
+	movdqa	32(%r11),%xmm7
+	movdqa	%xmm4,%xmm9
+	psrlq	$2,%xmm4
+	movdqa	%xmm3,%xmm10
+	psrlq	$2,%xmm3
+	pxor	%xmm6,%xmm4
+	pxor	%xmm5,%xmm3
+	pand	%xmm8,%xmm4
+	pand	%xmm8,%xmm3
+	pxor	%xmm4,%xmm6
+	psllq	$2,%xmm4
+	pxor	%xmm3,%xmm5
+	psllq	$2,%xmm3
+	pxor	%xmm9,%xmm4
+	pxor	%xmm10,%xmm3
+	movdqa	%xmm0,%xmm9
+	psrlq	$2,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$2,%xmm15
+	pxor	%xmm2,%xmm0
+	pxor	%xmm1,%xmm15
+	pand	%xmm8,%xmm0
+	pand	%xmm8,%xmm15
+	pxor	%xmm0,%xmm2
+	psllq	$2,%xmm0
+	pxor	%xmm15,%xmm1
+	psllq	$2,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	%xmm2,%xmm9
+	psrlq	$4,%xmm2
+	movdqa	%xmm1,%xmm10
+	psrlq	$4,%xmm1
+	pxor	%xmm6,%xmm2
+	pxor	%xmm5,%xmm1
+	pand	%xmm7,%xmm2
+	pand	%xmm7,%xmm1
+	pxor	%xmm2,%xmm6
+	psllq	$4,%xmm2
+	pxor	%xmm1,%xmm5
+	psllq	$4,%xmm1
+	pxor	%xmm9,%xmm2
+	pxor	%xmm10,%xmm1
+	movdqa	%xmm0,%xmm9
+	psrlq	$4,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$4,%xmm15
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pand	%xmm7,%xmm0
+	pand	%xmm7,%xmm15
+	pxor	%xmm0,%xmm4
+	psllq	$4,%xmm0
+	pxor	%xmm15,%xmm3
+	psllq	$4,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	decl	%r10d
+	jmp	.Ldec_sbox
+.align	16
+.Ldec_loop:
+	pxor	0(%rax),%xmm15
+	pxor	16(%rax),%xmm0
+	pxor	32(%rax),%xmm1
+	pxor	48(%rax),%xmm2
+.byte	102,68,15,56,0,255
+.byte	102,15,56,0,199
+	pxor	64(%rax),%xmm3
+	pxor	80(%rax),%xmm4
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	pxor	96(%rax),%xmm5
+	pxor	112(%rax),%xmm6
+.byte	102,15,56,0,223
+.byte	102,15,56,0,231
+.byte	102,15,56,0,239
+.byte	102,15,56,0,247
+	leaq	128(%rax),%rax
+.Ldec_sbox:
+	pxor	%xmm3,%xmm2
+
+	pxor	%xmm6,%xmm3
+	pxor	%xmm6,%xmm1
+	pxor	%xmm3,%xmm5
+	pxor	%xmm5,%xmm6
+	pxor	%xmm6,%xmm0
+
+	pxor	%xmm0,%xmm15
+	pxor	%xmm4,%xmm1
+	pxor	%xmm15,%xmm2
+	pxor	%xmm15,%xmm4
+	pxor	%xmm2,%xmm0
+	movdqa	%xmm2,%xmm10
+	movdqa	%xmm6,%xmm9
+	movdqa	%xmm0,%xmm8
+	movdqa	%xmm3,%xmm12
+	movdqa	%xmm4,%xmm11
+
+	pxor	%xmm15,%xmm10
+	pxor	%xmm3,%xmm9
+	pxor	%xmm5,%xmm8
+	movdqa	%xmm10,%xmm13
+	pxor	%xmm15,%xmm12
+	movdqa	%xmm9,%xmm7
+	pxor	%xmm1,%xmm11
+	movdqa	%xmm10,%xmm14
+
+	por	%xmm8,%xmm9
+	por	%xmm11,%xmm10
+	pxor	%xmm7,%xmm14
+	pand	%xmm11,%xmm13
+	pxor	%xmm8,%xmm11
+	pand	%xmm8,%xmm7
+	pand	%xmm11,%xmm14
+	movdqa	%xmm5,%xmm11
+	pxor	%xmm1,%xmm11
+	pand	%xmm11,%xmm12
+	pxor	%xmm12,%xmm10
+	pxor	%xmm12,%xmm9
+	movdqa	%xmm2,%xmm12
+	movdqa	%xmm0,%xmm11
+	pxor	%xmm6,%xmm12
+	pxor	%xmm4,%xmm11
+	movdqa	%xmm12,%xmm8
+	pand	%xmm11,%xmm12
+	por	%xmm11,%xmm8
+	pxor	%xmm12,%xmm7
+	pxor	%xmm14,%xmm10
+	pxor	%xmm13,%xmm9
+	pxor	%xmm14,%xmm8
+	movdqa	%xmm3,%xmm11
+	pxor	%xmm13,%xmm7
+	movdqa	%xmm15,%xmm12
+	pxor	%xmm13,%xmm8
+	movdqa	%xmm6,%xmm13
+	pand	%xmm5,%xmm11
+	movdqa	%xmm2,%xmm14
+	pand	%xmm1,%xmm12
+	pand	%xmm0,%xmm13
+	por	%xmm4,%xmm14
+	pxor	%xmm11,%xmm10
+	pxor	%xmm12,%xmm9
+	pxor	%xmm13,%xmm8
+	pxor	%xmm14,%xmm7
+
+
+
+
+
+	movdqa	%xmm10,%xmm11
+	pand	%xmm8,%xmm10
+	pxor	%xmm9,%xmm11
+
+	movdqa	%xmm7,%xmm13
+	movdqa	%xmm11,%xmm14
+	pxor	%xmm10,%xmm13
+	pand	%xmm13,%xmm14
+
+	movdqa	%xmm8,%xmm12
+	pxor	%xmm9,%xmm14
+	pxor	%xmm7,%xmm12
+
+	pxor	%xmm9,%xmm10
+
+	pand	%xmm10,%xmm12
+
+	movdqa	%xmm13,%xmm9
+	pxor	%xmm7,%xmm12
+
+	pxor	%xmm12,%xmm9
+	pxor	%xmm12,%xmm8
+
+	pand	%xmm7,%xmm9
+
+	pxor	%xmm9,%xmm13
+	pxor	%xmm9,%xmm8
+
+	pand	%xmm14,%xmm13
+
+	pxor	%xmm11,%xmm13
+	movdqa	%xmm4,%xmm11
+	movdqa	%xmm0,%xmm7
+	movdqa	%xmm14,%xmm9
+	pxor	%xmm13,%xmm9
+	pand	%xmm4,%xmm9
+	pxor	%xmm0,%xmm4
+	pand	%xmm14,%xmm0
+	pand	%xmm13,%xmm4
+	pxor	%xmm0,%xmm4
+	pxor	%xmm9,%xmm0
+	pxor	%xmm1,%xmm11
+	pxor	%xmm5,%xmm7
+	pxor	%xmm12,%xmm14
+	pxor	%xmm8,%xmm13
+	movdqa	%xmm14,%xmm10
+	movdqa	%xmm12,%xmm9
+	pxor	%xmm13,%xmm10
+	pxor	%xmm8,%xmm9
+	pand	%xmm11,%xmm10
+	pand	%xmm1,%xmm9
+	pxor	%xmm7,%xmm11
+	pxor	%xmm5,%xmm1
+	pand	%xmm14,%xmm7
+	pand	%xmm12,%xmm5
+	pand	%xmm13,%xmm11
+	pand	%xmm8,%xmm1
+	pxor	%xmm11,%xmm7
+	pxor	%xmm5,%xmm1
+	pxor	%xmm10,%xmm11
+	pxor	%xmm9,%xmm5
+	pxor	%xmm11,%xmm4
+	pxor	%xmm11,%xmm1
+	pxor	%xmm7,%xmm0
+	pxor	%xmm7,%xmm5
+
+	movdqa	%xmm2,%xmm11
+	movdqa	%xmm6,%xmm7
+	pxor	%xmm15,%xmm11
+	pxor	%xmm3,%xmm7
+	movdqa	%xmm14,%xmm10
+	movdqa	%xmm12,%xmm9
+	pxor	%xmm13,%xmm10
+	pxor	%xmm8,%xmm9
+	pand	%xmm11,%xmm10
+	pand	%xmm15,%xmm9
+	pxor	%xmm7,%xmm11
+	pxor	%xmm3,%xmm15
+	pand	%xmm14,%xmm7
+	pand	%xmm12,%xmm3
+	pand	%xmm13,%xmm11
+	pand	%xmm8,%xmm15
+	pxor	%xmm11,%xmm7
+	pxor	%xmm3,%xmm15
+	pxor	%xmm10,%xmm11
+	pxor	%xmm9,%xmm3
+	pxor	%xmm12,%xmm14
+	pxor	%xmm8,%xmm13
+	movdqa	%xmm14,%xmm10
+	pxor	%xmm13,%xmm10
+	pand	%xmm2,%xmm10
+	pxor	%xmm6,%xmm2
+	pand	%xmm14,%xmm6
+	pand	%xmm13,%xmm2
+	pxor	%xmm6,%xmm2
+	pxor	%xmm10,%xmm6
+	pxor	%xmm11,%xmm2
+	pxor	%xmm11,%xmm15
+	pxor	%xmm7,%xmm6
+	pxor	%xmm7,%xmm3
+	pxor	%xmm6,%xmm0
+	pxor	%xmm4,%xmm5
+
+	pxor	%xmm0,%xmm3
+	pxor	%xmm6,%xmm1
+	pxor	%xmm6,%xmm4
+	pxor	%xmm1,%xmm3
+	pxor	%xmm15,%xmm6
+	pxor	%xmm4,%xmm3
+	pxor	%xmm5,%xmm2
+	pxor	%xmm0,%xmm5
+	pxor	%xmm3,%xmm2
+
+	pxor	%xmm15,%xmm3
+	pxor	%xmm2,%xmm6
+	decl	%r10d
+	jl	.Ldec_done
+
+	pshufd	$0x4E,%xmm15,%xmm7
+	pshufd	$0x4E,%xmm2,%xmm13
+	pxor	%xmm15,%xmm7
+	pshufd	$0x4E,%xmm4,%xmm14
+	pxor	%xmm2,%xmm13
+	pshufd	$0x4E,%xmm0,%xmm8
+	pxor	%xmm4,%xmm14
+	pshufd	$0x4E,%xmm5,%xmm9
+	pxor	%xmm0,%xmm8
+	pshufd	$0x4E,%xmm3,%xmm10
+	pxor	%xmm5,%xmm9
+	pxor	%xmm13,%xmm15
+	pxor	%xmm13,%xmm0
+	pshufd	$0x4E,%xmm1,%xmm11
+	pxor	%xmm3,%xmm10
+	pxor	%xmm7,%xmm5
+	pxor	%xmm8,%xmm3
+	pshufd	$0x4E,%xmm6,%xmm12
+	pxor	%xmm1,%xmm11
+	pxor	%xmm14,%xmm0
+	pxor	%xmm9,%xmm1
+	pxor	%xmm6,%xmm12
+
+	pxor	%xmm14,%xmm5
+	pxor	%xmm13,%xmm3
+	pxor	%xmm13,%xmm1
+	pxor	%xmm10,%xmm6
+	pxor	%xmm11,%xmm2
+	pxor	%xmm14,%xmm1
+	pxor	%xmm14,%xmm6
+	pxor	%xmm12,%xmm4
+	pshufd	$0x93,%xmm15,%xmm7
+	pshufd	$0x93,%xmm0,%xmm8
+	pxor	%xmm7,%xmm15
+	pshufd	$0x93,%xmm5,%xmm9
+	pxor	%xmm8,%xmm0
+	pshufd	$0x93,%xmm3,%xmm10
+	pxor	%xmm9,%xmm5
+	pshufd	$0x93,%xmm1,%xmm11
+	pxor	%xmm10,%xmm3
+	pshufd	$0x93,%xmm6,%xmm12
+	pxor	%xmm11,%xmm1
+	pshufd	$0x93,%xmm2,%xmm13
+	pxor	%xmm12,%xmm6
+	pshufd	$0x93,%xmm4,%xmm14
+	pxor	%xmm13,%xmm2
+	pxor	%xmm14,%xmm4
+
+	pxor	%xmm15,%xmm8
+	pxor	%xmm4,%xmm7
+	pxor	%xmm4,%xmm8
+	pshufd	$0x4E,%xmm15,%xmm15
+	pxor	%xmm0,%xmm9
+	pshufd	$0x4E,%xmm0,%xmm0
+	pxor	%xmm1,%xmm12
+	pxor	%xmm7,%xmm15
+	pxor	%xmm6,%xmm13
+	pxor	%xmm8,%xmm0
+	pxor	%xmm3,%xmm11
+	pshufd	$0x4E,%xmm1,%xmm7
+	pxor	%xmm2,%xmm14
+	pshufd	$0x4E,%xmm6,%xmm8
+	pxor	%xmm5,%xmm10
+	pshufd	$0x4E,%xmm3,%xmm1
+	pxor	%xmm4,%xmm10
+	pshufd	$0x4E,%xmm4,%xmm6
+	pxor	%xmm4,%xmm11
+	pshufd	$0x4E,%xmm2,%xmm3
+	pxor	%xmm11,%xmm7
+	pshufd	$0x4E,%xmm5,%xmm2
+	pxor	%xmm12,%xmm8
+	pxor	%xmm1,%xmm10
+	pxor	%xmm14,%xmm6
+	pxor	%xmm3,%xmm13
+	movdqa	%xmm7,%xmm3
+	pxor	%xmm9,%xmm2
+	movdqa	%xmm13,%xmm5
+	movdqa	%xmm8,%xmm4
+	movdqa	%xmm2,%xmm1
+	movdqa	%xmm10,%xmm2
+	movdqa	-16(%r11),%xmm7
+	jnz	.Ldec_loop
+	movdqa	-32(%r11),%xmm7
+	jmp	.Ldec_loop
+.align	16
+.Ldec_done:
+	movdqa	0(%r11),%xmm7
+	movdqa	16(%r11),%xmm8
+	movdqa	%xmm2,%xmm9
+	psrlq	$1,%xmm2
+	movdqa	%xmm1,%xmm10
+	psrlq	$1,%xmm1
+	pxor	%xmm4,%xmm2
+	pxor	%xmm6,%xmm1
+	pand	%xmm7,%xmm2
+	pand	%xmm7,%xmm1
+	pxor	%xmm2,%xmm4
+	psllq	$1,%xmm2
+	pxor	%xmm1,%xmm6
+	psllq	$1,%xmm1
+	pxor	%xmm9,%xmm2
+	pxor	%xmm10,%xmm1
+	movdqa	%xmm5,%xmm9
+	psrlq	$1,%xmm5
+	movdqa	%xmm15,%xmm10
+	psrlq	$1,%xmm15
+	pxor	%xmm3,%xmm5
+	pxor	%xmm0,%xmm15
+	pand	%xmm7,%xmm5
+	pand	%xmm7,%xmm15
+	pxor	%xmm5,%xmm3
+	psllq	$1,%xmm5
+	pxor	%xmm15,%xmm0
+	psllq	$1,%xmm15
+	pxor	%xmm9,%xmm5
+	pxor	%xmm10,%xmm15
+	movdqa	32(%r11),%xmm7
+	movdqa	%xmm6,%xmm9
+	psrlq	$2,%xmm6
+	movdqa	%xmm1,%xmm10
+	psrlq	$2,%xmm1
+	pxor	%xmm4,%xmm6
+	pxor	%xmm2,%xmm1
+	pand	%xmm8,%xmm6
+	pand	%xmm8,%xmm1
+	pxor	%xmm6,%xmm4
+	psllq	$2,%xmm6
+	pxor	%xmm1,%xmm2
+	psllq	$2,%xmm1
+	pxor	%xmm9,%xmm6
+	pxor	%xmm10,%xmm1
+	movdqa	%xmm0,%xmm9
+	psrlq	$2,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$2,%xmm15
+	pxor	%xmm3,%xmm0
+	pxor	%xmm5,%xmm15
+	pand	%xmm8,%xmm0
+	pand	%xmm8,%xmm15
+	pxor	%xmm0,%xmm3
+	psllq	$2,%xmm0
+	pxor	%xmm15,%xmm5
+	psllq	$2,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	%xmm3,%xmm9
+	psrlq	$4,%xmm3
+	movdqa	%xmm5,%xmm10
+	psrlq	$4,%xmm5
+	pxor	%xmm4,%xmm3
+	pxor	%xmm2,%xmm5
+	pand	%xmm7,%xmm3
+	pand	%xmm7,%xmm5
+	pxor	%xmm3,%xmm4
+	psllq	$4,%xmm3
+	pxor	%xmm5,%xmm2
+	psllq	$4,%xmm5
+	pxor	%xmm9,%xmm3
+	pxor	%xmm10,%xmm5
+	movdqa	%xmm0,%xmm9
+	psrlq	$4,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$4,%xmm15
+	pxor	%xmm6,%xmm0
+	pxor	%xmm1,%xmm15
+	pand	%xmm7,%xmm0
+	pand	%xmm7,%xmm15
+	pxor	%xmm0,%xmm6
+	psllq	$4,%xmm0
+	pxor	%xmm15,%xmm1
+	psllq	$4,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	(%rax),%xmm7
+	pxor	%xmm7,%xmm5
+	pxor	%xmm7,%xmm3
+	pxor	%xmm7,%xmm1
+	pxor	%xmm7,%xmm6
+	pxor	%xmm7,%xmm2
+	pxor	%xmm7,%xmm4
+	pxor	%xmm7,%xmm15
+	pxor	%xmm7,%xmm0
+	.byte	0xf3,0xc3
+.size	_bsaes_decrypt8,.-_bsaes_decrypt8
+.type	_bsaes_key_convert,@function
+.align	16
+_bsaes_key_convert:
+	leaq	.Lmasks(%rip),%r11
+	movdqu	(%rcx),%xmm7
+	leaq	16(%rcx),%rcx
+	movdqa	0(%r11),%xmm0
+	movdqa	16(%r11),%xmm1
+	movdqa	32(%r11),%xmm2
+	movdqa	48(%r11),%xmm3
+	movdqa	64(%r11),%xmm4
+	pcmpeqd	%xmm5,%xmm5
+
+	movdqu	(%rcx),%xmm6
+	movdqa	%xmm7,(%rax)
+	leaq	16(%rax),%rax
+	decl	%r10d
+	jmp	.Lkey_loop
+.align	16
+.Lkey_loop:
+.byte	102,15,56,0,244
+
+	movdqa	%xmm0,%xmm8
+	movdqa	%xmm1,%xmm9
+
+	pand	%xmm6,%xmm8
+	pand	%xmm6,%xmm9
+	movdqa	%xmm2,%xmm10
+	pcmpeqb	%xmm0,%xmm8
+	psllq	$4,%xmm0
+	movdqa	%xmm3,%xmm11
+	pcmpeqb	%xmm1,%xmm9
+	psllq	$4,%xmm1
+
+	pand	%xmm6,%xmm10
+	pand	%xmm6,%xmm11
+	movdqa	%xmm0,%xmm12
+	pcmpeqb	%xmm2,%xmm10
+	psllq	$4,%xmm2
+	movdqa	%xmm1,%xmm13
+	pcmpeqb	%xmm3,%xmm11
+	psllq	$4,%xmm3
+
+	movdqa	%xmm2,%xmm14
+	movdqa	%xmm3,%xmm15
+	pxor	%xmm5,%xmm8
+	pxor	%xmm5,%xmm9
+
+	pand	%xmm6,%xmm12
+	pand	%xmm6,%xmm13
+	movdqa	%xmm8,0(%rax)
+	pcmpeqb	%xmm0,%xmm12
+	psrlq	$4,%xmm0
+	movdqa	%xmm9,16(%rax)
+	pcmpeqb	%xmm1,%xmm13
+	psrlq	$4,%xmm1
+	leaq	16(%rcx),%rcx
+
+	pand	%xmm6,%xmm14
+	pand	%xmm6,%xmm15
+	movdqa	%xmm10,32(%rax)
+	pcmpeqb	%xmm2,%xmm14
+	psrlq	$4,%xmm2
+	movdqa	%xmm11,48(%rax)
+	pcmpeqb	%xmm3,%xmm15
+	psrlq	$4,%xmm3
+	movdqu	(%rcx),%xmm6
+
+	pxor	%xmm5,%xmm13
+	pxor	%xmm5,%xmm14
+	movdqa	%xmm12,64(%rax)
+	movdqa	%xmm13,80(%rax)
+	movdqa	%xmm14,96(%rax)
+	movdqa	%xmm15,112(%rax)
+	leaq	128(%rax),%rax
+	decl	%r10d
+	jnz	.Lkey_loop
+
+	movdqa	80(%r11),%xmm7
+
+	.byte	0xf3,0xc3
+.size	_bsaes_key_convert,.-_bsaes_key_convert
+.extern	asm_AES_cbc_encrypt
+.hidden asm_AES_cbc_encrypt
+.globl	bsaes_cbc_encrypt
+.hidden bsaes_cbc_encrypt
+.type	bsaes_cbc_encrypt,@function
+.align	16
+bsaes_cbc_encrypt:
+	cmpl	$0,%r9d
+	jne	asm_AES_cbc_encrypt
+	cmpq	$128,%rdx
+	jb	asm_AES_cbc_encrypt
+
+	movq	%rsp,%rax
+.Lcbc_dec_prologue:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	leaq	-72(%rsp),%rsp
+	movq	%rsp,%rbp
+	movl	240(%rcx),%eax
+	movq	%rdi,%r12
+	movq	%rsi,%r13
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+	movq	%r8,%rbx
+	shrq	$4,%r14
+
+	movl	%eax,%edx
+	shlq	$7,%rax
+	subq	$96,%rax
+	subq	%rax,%rsp
+
+	movq	%rsp,%rax
+	movq	%r15,%rcx
+	movl	%edx,%r10d
+	call	_bsaes_key_convert
+	pxor	(%rsp),%xmm7
+	movdqa	%xmm6,(%rax)
+	movdqa	%xmm7,(%rsp)
+
+	movdqu	(%rbx),%xmm14
+	subq	$8,%r14
+.Lcbc_dec_loop:
+	movdqu	0(%r12),%xmm15
+	movdqu	16(%r12),%xmm0
+	movdqu	32(%r12),%xmm1
+	movdqu	48(%r12),%xmm2
+	movdqu	64(%r12),%xmm3
+	movdqu	80(%r12),%xmm4
+	movq	%rsp,%rax
+	movdqu	96(%r12),%xmm5
+	movl	%edx,%r10d
+	movdqu	112(%r12),%xmm6
+	movdqa	%xmm14,32(%rbp)
+
+	call	_bsaes_decrypt8
+
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm5
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm3
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm10,%xmm1
+	movdqu	80(%r12),%xmm12
+	pxor	%xmm11,%xmm6
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm12,%xmm2
+	movdqu	112(%r12),%xmm14
+	pxor	%xmm13,%xmm4
+	movdqu	%xmm15,0(%r13)
+	leaq	128(%r12),%r12
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	movdqu	%xmm1,64(%r13)
+	movdqu	%xmm6,80(%r13)
+	movdqu	%xmm2,96(%r13)
+	movdqu	%xmm4,112(%r13)
+	leaq	128(%r13),%r13
+	subq	$8,%r14
+	jnc	.Lcbc_dec_loop
+
+	addq	$8,%r14
+	jz	.Lcbc_dec_done
+
+	movdqu	0(%r12),%xmm15
+	movq	%rsp,%rax
+	movl	%edx,%r10d
+	cmpq	$2,%r14
+	jb	.Lcbc_dec_one
+	movdqu	16(%r12),%xmm0
+	je	.Lcbc_dec_two
+	movdqu	32(%r12),%xmm1
+	cmpq	$4,%r14
+	jb	.Lcbc_dec_three
+	movdqu	48(%r12),%xmm2
+	je	.Lcbc_dec_four
+	movdqu	64(%r12),%xmm3
+	cmpq	$6,%r14
+	jb	.Lcbc_dec_five
+	movdqu	80(%r12),%xmm4
+	je	.Lcbc_dec_six
+	movdqu	96(%r12),%xmm5
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm5
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm3
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm10,%xmm1
+	movdqu	80(%r12),%xmm12
+	pxor	%xmm11,%xmm6
+	movdqu	96(%r12),%xmm14
+	pxor	%xmm12,%xmm2
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	movdqu	%xmm1,64(%r13)
+	movdqu	%xmm6,80(%r13)
+	movdqu	%xmm2,96(%r13)
+	jmp	.Lcbc_dec_done
+.align	16
+.Lcbc_dec_six:
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm5
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm3
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm10,%xmm1
+	movdqu	80(%r12),%xmm14
+	pxor	%xmm11,%xmm6
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	movdqu	%xmm1,64(%r13)
+	movdqu	%xmm6,80(%r13)
+	jmp	.Lcbc_dec_done
+.align	16
+.Lcbc_dec_five:
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm5
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm3
+	movdqu	64(%r12),%xmm14
+	pxor	%xmm10,%xmm1
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	movdqu	%xmm1,64(%r13)
+	jmp	.Lcbc_dec_done
+.align	16
+.Lcbc_dec_four:
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm5
+	movdqu	48(%r12),%xmm14
+	pxor	%xmm9,%xmm3
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	jmp	.Lcbc_dec_done
+.align	16
+.Lcbc_dec_three:
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm14
+	pxor	%xmm8,%xmm5
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	jmp	.Lcbc_dec_done
+.align	16
+.Lcbc_dec_two:
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm14
+	pxor	%xmm7,%xmm0
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	jmp	.Lcbc_dec_done
+.align	16
+.Lcbc_dec_one:
+	leaq	(%r12),%rdi
+	leaq	32(%rbp),%rsi
+	leaq	(%r15),%rdx
+	call	asm_AES_decrypt
+	pxor	32(%rbp),%xmm14
+	movdqu	%xmm14,(%r13)
+	movdqa	%xmm15,%xmm14
+
+.Lcbc_dec_done:
+	movdqu	%xmm14,(%rbx)
+	leaq	(%rsp),%rax
+	pxor	%xmm0,%xmm0
+.Lcbc_dec_bzero:
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm0,16(%rax)
+	leaq	32(%rax),%rax
+	cmpq	%rax,%rbp
+	ja	.Lcbc_dec_bzero
+
+	leaq	120(%rbp),%rax
+	movq	-48(%rax),%r15
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbx
+	movq	-8(%rax),%rbp
+	leaq	(%rax),%rsp
+.Lcbc_dec_epilogue:
+	.byte	0xf3,0xc3
+.size	bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+
+.globl	bsaes_ctr32_encrypt_blocks
+.hidden bsaes_ctr32_encrypt_blocks
+.type	bsaes_ctr32_encrypt_blocks,@function
+.align	16
+bsaes_ctr32_encrypt_blocks:
+	movq	%rsp,%rax
+.Lctr_enc_prologue:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	leaq	-72(%rsp),%rsp
+	movq	%rsp,%rbp
+	movdqu	(%r8),%xmm0
+	movl	240(%rcx),%eax
+	movq	%rdi,%r12
+	movq	%rsi,%r13
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+	movdqa	%xmm0,32(%rbp)
+	cmpq	$8,%rdx
+	jb	.Lctr_enc_short
+
+	movl	%eax,%ebx
+	shlq	$7,%rax
+	subq	$96,%rax
+	subq	%rax,%rsp
+
+	movq	%rsp,%rax
+	movq	%r15,%rcx
+	movl	%ebx,%r10d
+	call	_bsaes_key_convert
+	pxor	%xmm6,%xmm7
+	movdqa	%xmm7,(%rax)
+
+	movdqa	(%rsp),%xmm8
+	leaq	.LADD1(%rip),%r11
+	movdqa	32(%rbp),%xmm15
+	movdqa	-32(%r11),%xmm7
+.byte	102,68,15,56,0,199
+.byte	102,68,15,56,0,255
+	movdqa	%xmm8,(%rsp)
+	jmp	.Lctr_enc_loop
+.align	16
+.Lctr_enc_loop:
+	movdqa	%xmm15,32(%rbp)
+	movdqa	%xmm15,%xmm0
+	movdqa	%xmm15,%xmm1
+	paddd	0(%r11),%xmm0
+	movdqa	%xmm15,%xmm2
+	paddd	16(%r11),%xmm1
+	movdqa	%xmm15,%xmm3
+	paddd	32(%r11),%xmm2
+	movdqa	%xmm15,%xmm4
+	paddd	48(%r11),%xmm3
+	movdqa	%xmm15,%xmm5
+	paddd	64(%r11),%xmm4
+	movdqa	%xmm15,%xmm6
+	paddd	80(%r11),%xmm5
+	paddd	96(%r11),%xmm6
+
+
+
+	movdqa	(%rsp),%xmm8
+	leaq	16(%rsp),%rax
+	movdqa	-16(%r11),%xmm7
+	pxor	%xmm8,%xmm15
+	pxor	%xmm8,%xmm0
+	pxor	%xmm8,%xmm1
+	pxor	%xmm8,%xmm2
+.byte	102,68,15,56,0,255
+.byte	102,15,56,0,199
+	pxor	%xmm8,%xmm3
+	pxor	%xmm8,%xmm4
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	pxor	%xmm8,%xmm5
+	pxor	%xmm8,%xmm6
+.byte	102,15,56,0,223
+.byte	102,15,56,0,231
+.byte	102,15,56,0,239
+.byte	102,15,56,0,247
+	leaq	.LBS0(%rip),%r11
+	movl	%ebx,%r10d
+
+	call	_bsaes_encrypt8_bitslice
+
+	subq	$8,%r14
+	jc	.Lctr_enc_loop_done
+
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	movdqu	32(%r12),%xmm9
+	movdqu	48(%r12),%xmm10
+	movdqu	64(%r12),%xmm11
+	movdqu	80(%r12),%xmm12
+	movdqu	96(%r12),%xmm13
+	movdqu	112(%r12),%xmm14
+	leaq	128(%r12),%r12
+	pxor	%xmm15,%xmm7
+	movdqa	32(%rbp),%xmm15
+	pxor	%xmm8,%xmm0
+	movdqu	%xmm7,0(%r13)
+	pxor	%xmm9,%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	%xmm10,%xmm5
+	movdqu	%xmm3,32(%r13)
+	pxor	%xmm11,%xmm2
+	movdqu	%xmm5,48(%r13)
+	pxor	%xmm12,%xmm6
+	movdqu	%xmm2,64(%r13)
+	pxor	%xmm13,%xmm1
+	movdqu	%xmm6,80(%r13)
+	pxor	%xmm14,%xmm4
+	movdqu	%xmm1,96(%r13)
+	leaq	.LADD1(%rip),%r11
+	movdqu	%xmm4,112(%r13)
+	leaq	128(%r13),%r13
+	paddd	112(%r11),%xmm15
+	jnz	.Lctr_enc_loop
+
+	jmp	.Lctr_enc_done
+.align	16
+.Lctr_enc_loop_done:
+	addq	$8,%r14
+	movdqu	0(%r12),%xmm7
+	pxor	%xmm7,%xmm15
+	movdqu	%xmm15,0(%r13)
+	cmpq	$2,%r14
+	jb	.Lctr_enc_done
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm8,%xmm0
+	movdqu	%xmm0,16(%r13)
+	je	.Lctr_enc_done
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm9,%xmm3
+	movdqu	%xmm3,32(%r13)
+	cmpq	$4,%r14
+	jb	.Lctr_enc_done
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm10,%xmm5
+	movdqu	%xmm5,48(%r13)
+	je	.Lctr_enc_done
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm11,%xmm2
+	movdqu	%xmm2,64(%r13)
+	cmpq	$6,%r14
+	jb	.Lctr_enc_done
+	movdqu	80(%r12),%xmm12
+	pxor	%xmm12,%xmm6
+	movdqu	%xmm6,80(%r13)
+	je	.Lctr_enc_done
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm13,%xmm1
+	movdqu	%xmm1,96(%r13)
+	jmp	.Lctr_enc_done
+
+.align	16
+.Lctr_enc_short:
+	leaq	32(%rbp),%rdi
+	leaq	48(%rbp),%rsi
+	leaq	(%r15),%rdx
+	call	asm_AES_encrypt
+	movdqu	(%r12),%xmm0
+	leaq	16(%r12),%r12
+	movl	44(%rbp),%eax
+	bswapl	%eax
+	pxor	48(%rbp),%xmm0
+	incl	%eax
+	movdqu	%xmm0,(%r13)
+	bswapl	%eax
+	leaq	16(%r13),%r13
+	movl	%eax,44(%rsp)
+	decq	%r14
+	jnz	.Lctr_enc_short
+
+.Lctr_enc_done:
+	leaq	(%rsp),%rax
+	pxor	%xmm0,%xmm0
+.Lctr_enc_bzero:
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm0,16(%rax)
+	leaq	32(%rax),%rax
+	cmpq	%rax,%rbp
+	ja	.Lctr_enc_bzero
+
+	leaq	120(%rbp),%rax
+	movq	-48(%rax),%r15
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbx
+	movq	-8(%rax),%rbp
+	leaq	(%rax),%rsp
+.Lctr_enc_epilogue:
+	.byte	0xf3,0xc3
+.size	bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+.globl	bsaes_xts_encrypt
+.hidden bsaes_xts_encrypt
+.type	bsaes_xts_encrypt,@function
+.align	16
+bsaes_xts_encrypt:
+	movq	%rsp,%rax
+.Lxts_enc_prologue:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	leaq	-72(%rsp),%rsp
+	movq	%rsp,%rbp
+	movq	%rdi,%r12
+	movq	%rsi,%r13
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+
+	leaq	(%r9),%rdi
+	leaq	32(%rbp),%rsi
+	leaq	(%r8),%rdx
+	call	asm_AES_encrypt
+
+	movl	240(%r15),%eax
+	movq	%r14,%rbx
+
+	movl	%eax,%edx
+	shlq	$7,%rax
+	subq	$96,%rax
+	subq	%rax,%rsp
+
+	movq	%rsp,%rax
+	movq	%r15,%rcx
+	movl	%edx,%r10d
+	call	_bsaes_key_convert
+	pxor	%xmm6,%xmm7
+	movdqa	%xmm7,(%rax)
+
+	andq	$-16,%r14
+	subq	$0x80,%rsp
+	movdqa	32(%rbp),%xmm6
+
+	pxor	%xmm14,%xmm14
+	movdqa	.Lxts_magic(%rip),%xmm12
+	pcmpgtd	%xmm6,%xmm14
+
+	subq	$0x80,%r14
+	jc	.Lxts_enc_short
+	jmp	.Lxts_enc_loop
+
+.align	16
+.Lxts_enc_loop:
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm15
+	movdqa	%xmm6,0(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm0
+	movdqa	%xmm6,16(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	0(%r12),%xmm7
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm1
+	movdqa	%xmm6,32(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm15
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm2
+	movdqa	%xmm6,48(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm0
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm3
+	movdqa	%xmm6,64(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm4
+	movdqa	%xmm6,80(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm10,%xmm2
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm5
+	movdqa	%xmm6,96(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	80(%r12),%xmm12
+	pxor	%xmm11,%xmm3
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm12,%xmm4
+	movdqu	112(%r12),%xmm14
+	leaq	128(%r12),%r12
+	movdqa	%xmm6,112(%rsp)
+	pxor	%xmm13,%xmm5
+	leaq	128(%rsp),%rax
+	pxor	%xmm14,%xmm6
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm5
+	movdqu	%xmm3,32(%r13)
+	pxor	64(%rsp),%xmm2
+	movdqu	%xmm5,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm2,64(%r13)
+	pxor	96(%rsp),%xmm1
+	movdqu	%xmm6,80(%r13)
+	pxor	112(%rsp),%xmm4
+	movdqu	%xmm1,96(%r13)
+	movdqu	%xmm4,112(%r13)
+	leaq	128(%r13),%r13
+
+	movdqa	112(%rsp),%xmm6
+	pxor	%xmm14,%xmm14
+	movdqa	.Lxts_magic(%rip),%xmm12
+	pcmpgtd	%xmm6,%xmm14
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+
+	subq	$0x80,%r14
+	jnc	.Lxts_enc_loop
+
+.Lxts_enc_short:
+	addq	$0x80,%r14
+	jz	.Lxts_enc_done
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm15
+	movdqa	%xmm6,0(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm0
+	movdqa	%xmm6,16(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	0(%r12),%xmm7
+	cmpq	$16,%r14
+	je	.Lxts_enc_1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm1
+	movdqa	%xmm6,32(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	16(%r12),%xmm8
+	cmpq	$32,%r14
+	je	.Lxts_enc_2
+	pxor	%xmm7,%xmm15
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm2
+	movdqa	%xmm6,48(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	32(%r12),%xmm9
+	cmpq	$48,%r14
+	je	.Lxts_enc_3
+	pxor	%xmm8,%xmm0
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm3
+	movdqa	%xmm6,64(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	48(%r12),%xmm10
+	cmpq	$64,%r14
+	je	.Lxts_enc_4
+	pxor	%xmm9,%xmm1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm4
+	movdqa	%xmm6,80(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	64(%r12),%xmm11
+	cmpq	$80,%r14
+	je	.Lxts_enc_5
+	pxor	%xmm10,%xmm2
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm5
+	movdqa	%xmm6,96(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	80(%r12),%xmm12
+	cmpq	$96,%r14
+	je	.Lxts_enc_6
+	pxor	%xmm11,%xmm3
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm12,%xmm4
+	movdqa	%xmm6,112(%rsp)
+	leaq	112(%r12),%r12
+	pxor	%xmm13,%xmm5
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm5
+	movdqu	%xmm3,32(%r13)
+	pxor	64(%rsp),%xmm2
+	movdqu	%xmm5,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm2,64(%r13)
+	pxor	96(%rsp),%xmm1
+	movdqu	%xmm6,80(%r13)
+	movdqu	%xmm1,96(%r13)
+	leaq	112(%r13),%r13
+
+	movdqa	112(%rsp),%xmm6
+	jmp	.Lxts_enc_done
+.align	16
+.Lxts_enc_6:
+	pxor	%xmm11,%xmm3
+	leaq	96(%r12),%r12
+	pxor	%xmm12,%xmm4
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm5
+	movdqu	%xmm3,32(%r13)
+	pxor	64(%rsp),%xmm2
+	movdqu	%xmm5,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm2,64(%r13)
+	movdqu	%xmm6,80(%r13)
+	leaq	96(%r13),%r13
+
+	movdqa	96(%rsp),%xmm6
+	jmp	.Lxts_enc_done
+.align	16
+.Lxts_enc_5:
+	pxor	%xmm10,%xmm2
+	leaq	80(%r12),%r12
+	pxor	%xmm11,%xmm3
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm5
+	movdqu	%xmm3,32(%r13)
+	pxor	64(%rsp),%xmm2
+	movdqu	%xmm5,48(%r13)
+	movdqu	%xmm2,64(%r13)
+	leaq	80(%r13),%r13
+
+	movdqa	80(%rsp),%xmm6
+	jmp	.Lxts_enc_done
+.align	16
+.Lxts_enc_4:
+	pxor	%xmm9,%xmm1
+	leaq	64(%r12),%r12
+	pxor	%xmm10,%xmm2
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm5
+	movdqu	%xmm3,32(%r13)
+	movdqu	%xmm5,48(%r13)
+	leaq	64(%r13),%r13
+
+	movdqa	64(%rsp),%xmm6
+	jmp	.Lxts_enc_done
+.align	16
+.Lxts_enc_3:
+	pxor	%xmm8,%xmm0
+	leaq	48(%r12),%r12
+	pxor	%xmm9,%xmm1
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm3,32(%r13)
+	leaq	48(%r13),%r13
+
+	movdqa	48(%rsp),%xmm6
+	jmp	.Lxts_enc_done
+.align	16
+.Lxts_enc_2:
+	pxor	%xmm7,%xmm15
+	leaq	32(%r12),%r12
+	pxor	%xmm8,%xmm0
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	leaq	32(%r13),%r13
+
+	movdqa	32(%rsp),%xmm6
+	jmp	.Lxts_enc_done
+.align	16
+.Lxts_enc_1:
+	pxor	%xmm15,%xmm7
+	leaq	16(%r12),%r12
+	movdqa	%xmm7,32(%rbp)
+	leaq	32(%rbp),%rdi
+	leaq	32(%rbp),%rsi
+	leaq	(%r15),%rdx
+	call	asm_AES_encrypt
+	pxor	32(%rbp),%xmm15
+
+
+
+
+
+	movdqu	%xmm15,0(%r13)
+	leaq	16(%r13),%r13
+
+	movdqa	16(%rsp),%xmm6
+
+.Lxts_enc_done:
+	andl	$15,%ebx
+	jz	.Lxts_enc_ret
+	movq	%r13,%rdx
+
+.Lxts_enc_steal:
+	movzbl	(%r12),%eax
+	movzbl	-16(%rdx),%ecx
+	leaq	1(%r12),%r12
+	movb	%al,-16(%rdx)
+	movb	%cl,0(%rdx)
+	leaq	1(%rdx),%rdx
+	subl	$1,%ebx
+	jnz	.Lxts_enc_steal
+
+	movdqu	-16(%r13),%xmm15
+	leaq	32(%rbp),%rdi
+	pxor	%xmm6,%xmm15
+	leaq	32(%rbp),%rsi
+	movdqa	%xmm15,32(%rbp)
+	leaq	(%r15),%rdx
+	call	asm_AES_encrypt
+	pxor	32(%rbp),%xmm6
+	movdqu	%xmm6,-16(%r13)
+
+.Lxts_enc_ret:
+	leaq	(%rsp),%rax
+	pxor	%xmm0,%xmm0
+.Lxts_enc_bzero:
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm0,16(%rax)
+	leaq	32(%rax),%rax
+	cmpq	%rax,%rbp
+	ja	.Lxts_enc_bzero
+
+	leaq	120(%rbp),%rax
+	movq	-48(%rax),%r15
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbx
+	movq	-8(%rax),%rbp
+	leaq	(%rax),%rsp
+.Lxts_enc_epilogue:
+	.byte	0xf3,0xc3
+.size	bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl	bsaes_xts_decrypt
+.hidden bsaes_xts_decrypt
+.type	bsaes_xts_decrypt,@function
+.align	16
+bsaes_xts_decrypt:
+	movq	%rsp,%rax
+.Lxts_dec_prologue:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	leaq	-72(%rsp),%rsp
+	movq	%rsp,%rbp
+	movq	%rdi,%r12
+	movq	%rsi,%r13
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+
+	leaq	(%r9),%rdi
+	leaq	32(%rbp),%rsi
+	leaq	(%r8),%rdx
+	call	asm_AES_encrypt
+
+	movl	240(%r15),%eax
+	movq	%r14,%rbx
+
+	movl	%eax,%edx
+	shlq	$7,%rax
+	subq	$96,%rax
+	subq	%rax,%rsp
+
+	movq	%rsp,%rax
+	movq	%r15,%rcx
+	movl	%edx,%r10d
+	call	_bsaes_key_convert
+	pxor	(%rsp),%xmm7
+	movdqa	%xmm6,(%rax)
+	movdqa	%xmm7,(%rsp)
+
+	xorl	%eax,%eax
+	andq	$-16,%r14
+	testl	$15,%ebx
+	setnz	%al
+	shlq	$4,%rax
+	subq	%rax,%r14
+
+	subq	$0x80,%rsp
+	movdqa	32(%rbp),%xmm6
+
+	pxor	%xmm14,%xmm14
+	movdqa	.Lxts_magic(%rip),%xmm12
+	pcmpgtd	%xmm6,%xmm14
+
+	subq	$0x80,%r14
+	jc	.Lxts_dec_short
+	jmp	.Lxts_dec_loop
+
+.align	16
+.Lxts_dec_loop:
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm15
+	movdqa	%xmm6,0(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm0
+	movdqa	%xmm6,16(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	0(%r12),%xmm7
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm1
+	movdqa	%xmm6,32(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm15
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm2
+	movdqa	%xmm6,48(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm0
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm3
+	movdqa	%xmm6,64(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm4
+	movdqa	%xmm6,80(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm10,%xmm2
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm5
+	movdqa	%xmm6,96(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	80(%r12),%xmm12
+	pxor	%xmm11,%xmm3
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm12,%xmm4
+	movdqu	112(%r12),%xmm14
+	leaq	128(%r12),%r12
+	movdqa	%xmm6,112(%rsp)
+	pxor	%xmm13,%xmm5
+	leaq	128(%rsp),%rax
+	pxor	%xmm14,%xmm6
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm3
+	movdqu	%xmm5,32(%r13)
+	pxor	64(%rsp),%xmm1
+	movdqu	%xmm3,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm1,64(%r13)
+	pxor	96(%rsp),%xmm2
+	movdqu	%xmm6,80(%r13)
+	pxor	112(%rsp),%xmm4
+	movdqu	%xmm2,96(%r13)
+	movdqu	%xmm4,112(%r13)
+	leaq	128(%r13),%r13
+
+	movdqa	112(%rsp),%xmm6
+	pxor	%xmm14,%xmm14
+	movdqa	.Lxts_magic(%rip),%xmm12
+	pcmpgtd	%xmm6,%xmm14
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+
+	subq	$0x80,%r14
+	jnc	.Lxts_dec_loop
+
+.Lxts_dec_short:
+	addq	$0x80,%r14
+	jz	.Lxts_dec_done
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm15
+	movdqa	%xmm6,0(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm0
+	movdqa	%xmm6,16(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	0(%r12),%xmm7
+	cmpq	$16,%r14
+	je	.Lxts_dec_1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm1
+	movdqa	%xmm6,32(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	16(%r12),%xmm8
+	cmpq	$32,%r14
+	je	.Lxts_dec_2
+	pxor	%xmm7,%xmm15
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm2
+	movdqa	%xmm6,48(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	32(%r12),%xmm9
+	cmpq	$48,%r14
+	je	.Lxts_dec_3
+	pxor	%xmm8,%xmm0
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm3
+	movdqa	%xmm6,64(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	48(%r12),%xmm10
+	cmpq	$64,%r14
+	je	.Lxts_dec_4
+	pxor	%xmm9,%xmm1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm4
+	movdqa	%xmm6,80(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	64(%r12),%xmm11
+	cmpq	$80,%r14
+	je	.Lxts_dec_5
+	pxor	%xmm10,%xmm2
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm5
+	movdqa	%xmm6,96(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	80(%r12),%xmm12
+	cmpq	$96,%r14
+	je	.Lxts_dec_6
+	pxor	%xmm11,%xmm3
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm12,%xmm4
+	movdqa	%xmm6,112(%rsp)
+	leaq	112(%r12),%r12
+	pxor	%xmm13,%xmm5
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm3
+	movdqu	%xmm5,32(%r13)
+	pxor	64(%rsp),%xmm1
+	movdqu	%xmm3,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm1,64(%r13)
+	pxor	96(%rsp),%xmm2
+	movdqu	%xmm6,80(%r13)
+	movdqu	%xmm2,96(%r13)
+	leaq	112(%r13),%r13
+
+	movdqa	112(%rsp),%xmm6
+	jmp	.Lxts_dec_done
+.align	16
+.Lxts_dec_6:
+	pxor	%xmm11,%xmm3
+	leaq	96(%r12),%r12
+	pxor	%xmm12,%xmm4
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm3
+	movdqu	%xmm5,32(%r13)
+	pxor	64(%rsp),%xmm1
+	movdqu	%xmm3,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm1,64(%r13)
+	movdqu	%xmm6,80(%r13)
+	leaq	96(%r13),%r13
+
+	movdqa	96(%rsp),%xmm6
+	jmp	.Lxts_dec_done
+.align	16
+.Lxts_dec_5:
+	pxor	%xmm10,%xmm2
+	leaq	80(%r12),%r12
+	pxor	%xmm11,%xmm3
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm3
+	movdqu	%xmm5,32(%r13)
+	pxor	64(%rsp),%xmm1
+	movdqu	%xmm3,48(%r13)
+	movdqu	%xmm1,64(%r13)
+	leaq	80(%r13),%r13
+
+	movdqa	80(%rsp),%xmm6
+	jmp	.Lxts_dec_done
+.align	16
+.Lxts_dec_4:
+	pxor	%xmm9,%xmm1
+	leaq	64(%r12),%r12
+	pxor	%xmm10,%xmm2
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm3
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	leaq	64(%r13),%r13
+
+	movdqa	64(%rsp),%xmm6
+	jmp	.Lxts_dec_done
+.align	16
+.Lxts_dec_3:
+	pxor	%xmm8,%xmm0
+	leaq	48(%r12),%r12
+	pxor	%xmm9,%xmm1
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	leaq	48(%r13),%r13
+
+	movdqa	48(%rsp),%xmm6
+	jmp	.Lxts_dec_done
+.align	16
+.Lxts_dec_2:
+	pxor	%xmm7,%xmm15
+	leaq	32(%r12),%r12
+	pxor	%xmm8,%xmm0
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	leaq	32(%r13),%r13
+
+	movdqa	32(%rsp),%xmm6
+	jmp	.Lxts_dec_done
+.align	16
+.Lxts_dec_1:
+	pxor	%xmm15,%xmm7
+	leaq	16(%r12),%r12
+	movdqa	%xmm7,32(%rbp)
+	leaq	32(%rbp),%rdi
+	leaq	32(%rbp),%rsi
+	leaq	(%r15),%rdx
+	call	asm_AES_decrypt
+	pxor	32(%rbp),%xmm15
+
+
+
+
+
+	movdqu	%xmm15,0(%r13)
+	leaq	16(%r13),%r13
+
+	movdqa	16(%rsp),%xmm6
+
+.Lxts_dec_done:
+	andl	$15,%ebx
+	jz	.Lxts_dec_ret
+
+	pxor	%xmm14,%xmm14
+	movdqa	.Lxts_magic(%rip),%xmm12
+	pcmpgtd	%xmm6,%xmm14
+	pshufd	$0x13,%xmm14,%xmm13
+	movdqa	%xmm6,%xmm5
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	movdqu	(%r12),%xmm15
+	pxor	%xmm13,%xmm6
+
+	leaq	32(%rbp),%rdi
+	pxor	%xmm6,%xmm15
+	leaq	32(%rbp),%rsi
+	movdqa	%xmm15,32(%rbp)
+	leaq	(%r15),%rdx
+	call	asm_AES_decrypt
+	pxor	32(%rbp),%xmm6
+	movq	%r13,%rdx
+	movdqu	%xmm6,(%r13)
+
+.Lxts_dec_steal:
+	movzbl	16(%r12),%eax
+	movzbl	(%rdx),%ecx
+	leaq	1(%r12),%r12
+	movb	%al,(%rdx)
+	movb	%cl,16(%rdx)
+	leaq	1(%rdx),%rdx
+	subl	$1,%ebx
+	jnz	.Lxts_dec_steal
+
+	movdqu	(%r13),%xmm15
+	leaq	32(%rbp),%rdi
+	pxor	%xmm5,%xmm15
+	leaq	32(%rbp),%rsi
+	movdqa	%xmm15,32(%rbp)
+	leaq	(%r15),%rdx
+	call	asm_AES_decrypt
+	pxor	32(%rbp),%xmm5
+	movdqu	%xmm5,(%r13)
+
+.Lxts_dec_ret:
+	leaq	(%rsp),%rax
+	pxor	%xmm0,%xmm0
+.Lxts_dec_bzero:
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm0,16(%rax)
+	leaq	32(%rax),%rax
+	cmpq	%rax,%rbp
+	ja	.Lxts_dec_bzero
+
+	leaq	120(%rbp),%rax
+	movq	-48(%rax),%r15
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbx
+	movq	-8(%rax),%rbp
+	leaq	(%rax),%rsp
+.Lxts_dec_epilogue:
+	.byte	0xf3,0xc3
+.size	bsaes_xts_decrypt,.-bsaes_xts_decrypt
+.type	_bsaes_const,@object
+.align	64
+_bsaes_const:
+.LM0ISR:
+.quad	0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISRM0:
+.quad	0x01040b0e0205080f, 0x0306090c00070a0d
+.LISR:
+.quad	0x0504070602010003, 0x0f0e0d0c080b0a09
+.LBS0:
+.quad	0x5555555555555555, 0x5555555555555555
+.LBS1:
+.quad	0x3333333333333333, 0x3333333333333333
+.LBS2:
+.quad	0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.LSR:
+.quad	0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+.quad	0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0SR:
+.quad	0x0a0e02060f03070b, 0x0004080c05090d01
+.LSWPUP:
+.quad	0x0706050403020100, 0x0c0d0e0f0b0a0908
+.LSWPUPM0SR:
+.quad	0x0a0d02060c03070b, 0x0004080f05090e01
+.LADD1:
+.quad	0x0000000000000000, 0x0000000100000000
+.LADD2:
+.quad	0x0000000000000000, 0x0000000200000000
+.LADD3:
+.quad	0x0000000000000000, 0x0000000300000000
+.LADD4:
+.quad	0x0000000000000000, 0x0000000400000000
+.LADD5:
+.quad	0x0000000000000000, 0x0000000500000000
+.LADD6:
+.quad	0x0000000000000000, 0x0000000600000000
+.LADD7:
+.quad	0x0000000000000000, 0x0000000700000000
+.LADD8:
+.quad	0x0000000000000000, 0x0000000800000000
+.Lxts_magic:
+.long	0x87,0,1,0
+.Lmasks:
+.quad	0x0101010101010101, 0x0101010101010101
+.quad	0x0202020202020202, 0x0202020202020202
+.quad	0x0404040404040404, 0x0404040404040404
+.quad	0x0808080808080808, 0x0808080808080808
+.LM0:
+.quad	0x02060a0e03070b0f, 0x0004080c0105090d
+.L63:
+.quad	0x6363636363636363, 0x6363636363636363
+.byte	66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.align	64
+.size	_bsaes_const,.-_bsaes_const
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
new file mode 100644
index 0000000..64ef2c2
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
@@ -0,0 +1,1806 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+
+.globl	gcm_gmult_4bit
+.hidden gcm_gmult_4bit
+.type	gcm_gmult_4bit,@function
+.align	16
+gcm_gmult_4bit:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$280,%rsp
+.Lgmult_prologue:
+
+	movzbq	15(%rdi),%r8
+	leaq	.Lrem_4bit(%rip),%r11
+	xorq	%rax,%rax
+	xorq	%rbx,%rbx
+	movb	%r8b,%al
+	movb	%r8b,%bl
+	shlb	$4,%al
+	movq	$14,%rcx
+	movq	8(%rsi,%rax,1),%r8
+	movq	(%rsi,%rax,1),%r9
+	andb	$0xf0,%bl
+	movq	%r8,%rdx
+	jmp	.Loop1
+
+.align	16
+.Loop1:
+	shrq	$4,%r8
+	andq	$0xf,%rdx
+	movq	%r9,%r10
+	movb	(%rdi,%rcx,1),%al
+	shrq	$4,%r9
+	xorq	8(%rsi,%rbx,1),%r8
+	shlq	$60,%r10
+	xorq	(%rsi,%rbx,1),%r9
+	movb	%al,%bl
+	xorq	(%r11,%rdx,8),%r9
+	movq	%r8,%rdx
+	shlb	$4,%al
+	xorq	%r10,%r8
+	decq	%rcx
+	js	.Lbreak1
+
+	shrq	$4,%r8
+	andq	$0xf,%rdx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	xorq	8(%rsi,%rax,1),%r8
+	shlq	$60,%r10
+	xorq	(%rsi,%rax,1),%r9
+	andb	$0xf0,%bl
+	xorq	(%r11,%rdx,8),%r9
+	movq	%r8,%rdx
+	xorq	%r10,%r8
+	jmp	.Loop1
+
+.align	16
+.Lbreak1:
+	shrq	$4,%r8
+	andq	$0xf,%rdx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	xorq	8(%rsi,%rax,1),%r8
+	shlq	$60,%r10
+	xorq	(%rsi,%rax,1),%r9
+	andb	$0xf0,%bl
+	xorq	(%r11,%rdx,8),%r9
+	movq	%r8,%rdx
+	xorq	%r10,%r8
+
+	shrq	$4,%r8
+	andq	$0xf,%rdx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	xorq	8(%rsi,%rbx,1),%r8
+	shlq	$60,%r10
+	xorq	(%rsi,%rbx,1),%r9
+	xorq	%r10,%r8
+	xorq	(%r11,%rdx,8),%r9
+
+	bswapq	%r8
+	bswapq	%r9
+	movq	%r8,8(%rdi)
+	movq	%r9,(%rdi)
+
+	leaq	280+48(%rsp),%rsi
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Lgmult_epilogue:
+	.byte	0xf3,0xc3
+.size	gcm_gmult_4bit,.-gcm_gmult_4bit
+.globl	gcm_ghash_4bit
+.hidden gcm_ghash_4bit
+.type	gcm_ghash_4bit,@function
+.align	16
+gcm_ghash_4bit:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$280,%rsp
+.Lghash_prologue:
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+	subq	$-128,%rsi
+	leaq	16+128(%rsp),%rbp
+	xorl	%edx,%edx
+	movq	0+0-128(%rsi),%r8
+	movq	0+8-128(%rsi),%rax
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	16+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	16+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,0(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,0(%rbp)
+	movq	32+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,0-128(%rbp)
+	movq	32+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,1(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,8(%rbp)
+	movq	48+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,8-128(%rbp)
+	movq	48+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,2(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,16(%rbp)
+	movq	64+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,16-128(%rbp)
+	movq	64+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,3(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,24(%rbp)
+	movq	80+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,24-128(%rbp)
+	movq	80+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,4(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,32(%rbp)
+	movq	96+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,32-128(%rbp)
+	movq	96+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,5(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,40(%rbp)
+	movq	112+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,40-128(%rbp)
+	movq	112+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,6(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,48(%rbp)
+	movq	128+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,48-128(%rbp)
+	movq	128+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,7(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,56(%rbp)
+	movq	144+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,56-128(%rbp)
+	movq	144+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,8(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,64(%rbp)
+	movq	160+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,64-128(%rbp)
+	movq	160+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,9(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,72(%rbp)
+	movq	176+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,72-128(%rbp)
+	movq	176+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,10(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,80(%rbp)
+	movq	192+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,80-128(%rbp)
+	movq	192+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,11(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,88(%rbp)
+	movq	208+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,88-128(%rbp)
+	movq	208+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,12(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,96(%rbp)
+	movq	224+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,96-128(%rbp)
+	movq	224+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,13(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,104(%rbp)
+	movq	240+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,104-128(%rbp)
+	movq	240+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,14(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,112(%rbp)
+	shlb	$4,%dl
+	movq	%rax,112-128(%rbp)
+	shlq	$60,%r10
+	movb	%dl,15(%rsp)
+	orq	%r10,%rbx
+	movq	%r9,120(%rbp)
+	movq	%rbx,120-128(%rbp)
+	addq	$-128,%rsi
+	movq	8(%rdi),%r8
+	movq	0(%rdi),%r9
+	addq	%r14,%r15
+	leaq	.Lrem_8bit(%rip),%r11
+	jmp	.Louter_loop
+.align	16
+.Louter_loop:
+	xorq	(%r14),%r9
+	movq	8(%r14),%rdx
+	leaq	16(%r14),%r14
+	xorq	%r8,%rdx
+	movq	%r9,(%rdi)
+	movq	%rdx,8(%rdi)
+	shrq	$32,%rdx
+	xorq	%rax,%rax
+	roll	$8,%edx
+	movb	%dl,%al
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	shrl	$4,%ebx
+	roll	$8,%edx
+	movq	8(%rsi,%rax,1),%r8
+	movq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	movl	8(%rdi),%edx
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	movl	4(%rdi),%edx
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	movl	0(%rdi),%edx
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	andl	$240,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	movl	-4(%rdi),%edx
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	movzwq	(%r11,%r12,2),%r12
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	shlq	$48,%r12
+	xorq	%r10,%r8
+	xorq	%r12,%r9
+	movzbq	%r8b,%r13
+	shrq	$4,%r8
+	movq	%r9,%r10
+	shlb	$4,%r13b
+	shrq	$4,%r9
+	xorq	8(%rsi,%rcx,1),%r8
+	movzwq	(%r11,%r13,2),%r13
+	shlq	$60,%r10
+	xorq	(%rsi,%rcx,1),%r9
+	xorq	%r10,%r8
+	shlq	$48,%r13
+	bswapq	%r8
+	xorq	%r13,%r9
+	bswapq	%r9
+	cmpq	%r15,%r14
+	jb	.Louter_loop
+	movq	%r8,8(%rdi)
+	movq	%r9,(%rdi)
+
+	leaq	280+48(%rsp),%rsi
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	0(%rsi),%rsp
+.Lghash_epilogue:
+	.byte	0xf3,0xc3
+.size	gcm_ghash_4bit,.-gcm_ghash_4bit
+.globl	gcm_init_clmul
+.hidden gcm_init_clmul
+.type	gcm_init_clmul,@function
+.align	16
+gcm_init_clmul:
+.L_init_clmul:
+	movdqu	(%rsi),%xmm2
+	pshufd	$78,%xmm2,%xmm2
+
+
+	pshufd	$255,%xmm2,%xmm4
+	movdqa	%xmm2,%xmm3
+	psllq	$1,%xmm2
+	pxor	%xmm5,%xmm5
+	psrlq	$63,%xmm3
+	pcmpgtd	%xmm4,%xmm5
+	pslldq	$8,%xmm3
+	por	%xmm3,%xmm2
+
+
+	pand	.L0x1c2_polynomial(%rip),%xmm5
+	pxor	%xmm5,%xmm2
+
+
+	pshufd	$78,%xmm2,%xmm6
+	movdqa	%xmm2,%xmm0
+	pxor	%xmm2,%xmm6
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,222,0
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	pshufd	$78,%xmm2,%xmm3
+	pshufd	$78,%xmm0,%xmm4
+	pxor	%xmm2,%xmm3
+	movdqu	%xmm2,0(%rdi)
+	pxor	%xmm0,%xmm4
+	movdqu	%xmm0,16(%rdi)
+.byte	102,15,58,15,227,8
+	movdqu	%xmm4,32(%rdi)
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,222,0
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm0,%xmm5
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,222,0
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	pshufd	$78,%xmm5,%xmm3
+	pshufd	$78,%xmm0,%xmm4
+	pxor	%xmm5,%xmm3
+	movdqu	%xmm5,48(%rdi)
+	pxor	%xmm0,%xmm4
+	movdqu	%xmm0,64(%rdi)
+.byte	102,15,58,15,227,8
+	movdqu	%xmm4,80(%rdi)
+	.byte	0xf3,0xc3
+.size	gcm_init_clmul,.-gcm_init_clmul
+.globl	gcm_gmult_clmul
+.hidden gcm_gmult_clmul
+.type	gcm_gmult_clmul,@function
+.align	16
+gcm_gmult_clmul:
+.L_gmult_clmul:
+	movdqu	(%rdi),%xmm0
+	movdqa	.Lbswap_mask(%rip),%xmm5
+	movdqu	(%rsi),%xmm2
+	movdqu	32(%rsi),%xmm4
+.byte	102,15,56,0,197
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,220,0
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+.byte	102,15,56,0,197
+	movdqu	%xmm0,(%rdi)
+	.byte	0xf3,0xc3
+.size	gcm_gmult_clmul,.-gcm_gmult_clmul
+.globl	gcm_ghash_clmul
+.hidden gcm_ghash_clmul
+.type	gcm_ghash_clmul,@function
+.align	32
+gcm_ghash_clmul:
+.L_ghash_clmul:
+	movdqa	.Lbswap_mask(%rip),%xmm10
+
+	movdqu	(%rdi),%xmm0
+	movdqu	(%rsi),%xmm2
+	movdqu	32(%rsi),%xmm7
+.byte	102,65,15,56,0,194
+
+	subq	$0x10,%rcx
+	jz	.Lodd_tail
+
+	movdqu	16(%rsi),%xmm6
+	leaq	OPENSSL_ia32cap_P(%rip),%rax
+	movl	4(%rax),%eax
+	cmpq	$0x30,%rcx
+	jb	.Lskip4x
+
+	andl	$71303168,%eax
+	cmpl	$4194304,%eax
+	je	.Lskip4x
+
+	subq	$0x30,%rcx
+	movq	$0xA040608020C0E000,%rax
+	movdqu	48(%rsi),%xmm14
+	movdqu	64(%rsi),%xmm15
+
+
+
+
+	movdqu	48(%rdx),%xmm3
+	movdqu	32(%rdx),%xmm11
+.byte	102,65,15,56,0,218
+.byte	102,69,15,56,0,218
+	movdqa	%xmm3,%xmm5
+	pshufd	$78,%xmm3,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,68,218,0
+.byte	102,15,58,68,234,17
+.byte	102,15,58,68,231,0
+
+	movdqa	%xmm11,%xmm13
+	pshufd	$78,%xmm11,%xmm12
+	pxor	%xmm11,%xmm12
+.byte	102,68,15,58,68,222,0
+.byte	102,68,15,58,68,238,17
+.byte	102,68,15,58,68,231,16
+	xorps	%xmm11,%xmm3
+	xorps	%xmm13,%xmm5
+	movups	80(%rsi),%xmm7
+	xorps	%xmm12,%xmm4
+
+	movdqu	16(%rdx),%xmm11
+	movdqu	0(%rdx),%xmm8
+.byte	102,69,15,56,0,218
+.byte	102,69,15,56,0,194
+	movdqa	%xmm11,%xmm13
+	pshufd	$78,%xmm11,%xmm12
+	pxor	%xmm8,%xmm0
+	pxor	%xmm11,%xmm12
+.byte	102,69,15,58,68,222,0
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm8
+	pxor	%xmm0,%xmm8
+.byte	102,69,15,58,68,238,17
+.byte	102,68,15,58,68,231,0
+	xorps	%xmm11,%xmm3
+	xorps	%xmm13,%xmm5
+
+	leaq	64(%rdx),%rdx
+	subq	$0x40,%rcx
+	jc	.Ltail4x
+
+	jmp	.Lmod4_loop
+.align	32
+.Lmod4_loop:
+.byte	102,65,15,58,68,199,0
+	xorps	%xmm12,%xmm4
+	movdqu	48(%rdx),%xmm11
+.byte	102,69,15,56,0,218
+.byte	102,65,15,58,68,207,17
+	xorps	%xmm3,%xmm0
+	movdqu	32(%rdx),%xmm3
+	movdqa	%xmm11,%xmm13
+.byte	102,68,15,58,68,199,16
+	pshufd	$78,%xmm11,%xmm12
+	xorps	%xmm5,%xmm1
+	pxor	%xmm11,%xmm12
+.byte	102,65,15,56,0,218
+	movups	32(%rsi),%xmm7
+	xorps	%xmm4,%xmm8
+.byte	102,68,15,58,68,218,0
+	pshufd	$78,%xmm3,%xmm4
+
+	pxor	%xmm0,%xmm8
+	movdqa	%xmm3,%xmm5
+	pxor	%xmm1,%xmm8
+	pxor	%xmm3,%xmm4
+	movdqa	%xmm8,%xmm9
+.byte	102,68,15,58,68,234,17
+	pslldq	$8,%xmm8
+	psrldq	$8,%xmm9
+	pxor	%xmm8,%xmm0
+	movdqa	.L7_mask(%rip),%xmm8
+	pxor	%xmm9,%xmm1
+.byte	102,76,15,110,200
+
+	pand	%xmm0,%xmm8
+.byte	102,69,15,56,0,200
+	pxor	%xmm0,%xmm9
+.byte	102,68,15,58,68,231,0
+	psllq	$57,%xmm9
+	movdqa	%xmm9,%xmm8
+	pslldq	$8,%xmm9
+.byte	102,15,58,68,222,0
+	psrldq	$8,%xmm8
+	pxor	%xmm9,%xmm0
+	pxor	%xmm8,%xmm1
+	movdqu	0(%rdx),%xmm8
+
+	movdqa	%xmm0,%xmm9
+	psrlq	$1,%xmm0
+.byte	102,15,58,68,238,17
+	xorps	%xmm11,%xmm3
+	movdqu	16(%rdx),%xmm11
+.byte	102,69,15,56,0,218
+.byte	102,15,58,68,231,16
+	xorps	%xmm13,%xmm5
+	movups	80(%rsi),%xmm7
+.byte	102,69,15,56,0,194
+	pxor	%xmm9,%xmm1
+	pxor	%xmm0,%xmm9
+	psrlq	$5,%xmm0
+
+	movdqa	%xmm11,%xmm13
+	pxor	%xmm12,%xmm4
+	pshufd	$78,%xmm11,%xmm12
+	pxor	%xmm9,%xmm0
+	pxor	%xmm8,%xmm1
+	pxor	%xmm11,%xmm12
+.byte	102,69,15,58,68,222,0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm0,%xmm1
+.byte	102,69,15,58,68,238,17
+	xorps	%xmm11,%xmm3
+	pshufd	$78,%xmm0,%xmm8
+	pxor	%xmm0,%xmm8
+
+.byte	102,68,15,58,68,231,0
+	xorps	%xmm13,%xmm5
+
+	leaq	64(%rdx),%rdx
+	subq	$0x40,%rcx
+	jnc	.Lmod4_loop
+
+.Ltail4x:
+.byte	102,65,15,58,68,199,0
+.byte	102,65,15,58,68,207,17
+.byte	102,68,15,58,68,199,16
+	xorps	%xmm12,%xmm4
+	xorps	%xmm3,%xmm0
+	xorps	%xmm5,%xmm1
+	pxor	%xmm0,%xmm1
+	pxor	%xmm4,%xmm8
+
+	pxor	%xmm1,%xmm8
+	pxor	%xmm0,%xmm1
+
+	movdqa	%xmm8,%xmm9
+	psrldq	$8,%xmm8
+	pslldq	$8,%xmm9
+	pxor	%xmm8,%xmm1
+	pxor	%xmm9,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	addq	$0x40,%rcx
+	jz	.Ldone
+	movdqu	32(%rsi),%xmm7
+	subq	$0x10,%rcx
+	jz	.Lodd_tail
+.Lskip4x:
+
+
+
+
+
+	movdqu	(%rdx),%xmm8
+	movdqu	16(%rdx),%xmm3
+.byte	102,69,15,56,0,194
+.byte	102,65,15,56,0,218
+	pxor	%xmm8,%xmm0
+
+	movdqa	%xmm3,%xmm5
+	pshufd	$78,%xmm3,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,68,218,0
+.byte	102,15,58,68,234,17
+.byte	102,15,58,68,231,0
+
+	leaq	32(%rdx),%rdx
+	nop
+	subq	$0x20,%rcx
+	jbe	.Leven_tail
+	nop
+	jmp	.Lmod_loop
+
+.align	32
+.Lmod_loop:
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm8
+	pshufd	$78,%xmm0,%xmm4
+	pxor	%xmm0,%xmm4
+
+.byte	102,15,58,68,198,0
+.byte	102,15,58,68,206,17
+.byte	102,15,58,68,231,16
+
+	pxor	%xmm3,%xmm0
+	pxor	%xmm5,%xmm1
+	movdqu	(%rdx),%xmm9
+	pxor	%xmm0,%xmm8
+.byte	102,69,15,56,0,202
+	movdqu	16(%rdx),%xmm3
+
+	pxor	%xmm1,%xmm8
+	pxor	%xmm9,%xmm1
+	pxor	%xmm8,%xmm4
+.byte	102,65,15,56,0,218
+	movdqa	%xmm4,%xmm8
+	psrldq	$8,%xmm8
+	pslldq	$8,%xmm4
+	pxor	%xmm8,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm3,%xmm5
+
+	movdqa	%xmm0,%xmm9
+	movdqa	%xmm0,%xmm8
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm8
+.byte	102,15,58,68,218,0
+	psllq	$1,%xmm0
+	pxor	%xmm8,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm8
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm8
+	pxor	%xmm9,%xmm0
+	pshufd	$78,%xmm5,%xmm4
+	pxor	%xmm8,%xmm1
+	pxor	%xmm5,%xmm4
+
+	movdqa	%xmm0,%xmm9
+	psrlq	$1,%xmm0
+.byte	102,15,58,68,234,17
+	pxor	%xmm9,%xmm1
+	pxor	%xmm0,%xmm9
+	psrlq	$5,%xmm0
+	pxor	%xmm9,%xmm0
+	leaq	32(%rdx),%rdx
+	psrlq	$1,%xmm0
+.byte	102,15,58,68,231,0
+	pxor	%xmm1,%xmm0
+
+	subq	$0x20,%rcx
+	ja	.Lmod_loop
+
+.Leven_tail:
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm8
+	pshufd	$78,%xmm0,%xmm4
+	pxor	%xmm0,%xmm4
+
+.byte	102,15,58,68,198,0
+.byte	102,15,58,68,206,17
+.byte	102,15,58,68,231,16
+
+	pxor	%xmm3,%xmm0
+	pxor	%xmm5,%xmm1
+	pxor	%xmm0,%xmm8
+	pxor	%xmm1,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm8
+	psrldq	$8,%xmm8
+	pslldq	$8,%xmm4
+	pxor	%xmm8,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	testq	%rcx,%rcx
+	jnz	.Ldone
+
+.Lodd_tail:
+	movdqu	(%rdx),%xmm8
+.byte	102,69,15,56,0,194
+	pxor	%xmm8,%xmm0
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,223,0
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+.Ldone:
+.byte	102,65,15,56,0,194
+	movdqu	%xmm0,(%rdi)
+	.byte	0xf3,0xc3
+.size	gcm_ghash_clmul,.-gcm_ghash_clmul
+.globl	gcm_init_avx
+.hidden gcm_init_avx
+.type	gcm_init_avx,@function
+.align	32
+gcm_init_avx:
+	vzeroupper
+
+	vmovdqu	(%rsi),%xmm2
+	vpshufd	$78,%xmm2,%xmm2
+
+
+	vpshufd	$255,%xmm2,%xmm4
+	vpsrlq	$63,%xmm2,%xmm3
+	vpsllq	$1,%xmm2,%xmm2
+	vpxor	%xmm5,%xmm5,%xmm5
+	vpcmpgtd	%xmm4,%xmm5,%xmm5
+	vpslldq	$8,%xmm3,%xmm3
+	vpor	%xmm3,%xmm2,%xmm2
+
+
+	vpand	.L0x1c2_polynomial(%rip),%xmm5,%xmm5
+	vpxor	%xmm5,%xmm2,%xmm2
+
+	vpunpckhqdq	%xmm2,%xmm2,%xmm6
+	vmovdqa	%xmm2,%xmm0
+	vpxor	%xmm2,%xmm6,%xmm6
+	movq	$4,%r10
+	jmp	.Linit_start_avx
+.align	32
+.Linit_loop_avx:
+	vpalignr	$8,%xmm3,%xmm4,%xmm5
+	vmovdqu	%xmm5,-16(%rdi)
+	vpunpckhqdq	%xmm0,%xmm0,%xmm3
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
+	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
+	vpxor	%xmm0,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+
+	vpslldq	$8,%xmm3,%xmm4
+	vpsrldq	$8,%xmm3,%xmm3
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpsllq	$57,%xmm0,%xmm3
+	vpsllq	$62,%xmm0,%xmm4
+	vpxor	%xmm3,%xmm4,%xmm4
+	vpsllq	$63,%xmm0,%xmm3
+	vpxor	%xmm3,%xmm4,%xmm4
+	vpslldq	$8,%xmm4,%xmm3
+	vpsrldq	$8,%xmm4,%xmm4
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vpsrlq	$1,%xmm0,%xmm4
+	vpxor	%xmm0,%xmm1,%xmm1
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpsrlq	$5,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpsrlq	$1,%xmm0,%xmm0
+	vpxor	%xmm1,%xmm0,%xmm0
+.Linit_start_avx:
+	vmovdqa	%xmm0,%xmm5
+	vpunpckhqdq	%xmm0,%xmm0,%xmm3
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
+	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
+	vpxor	%xmm0,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+
+	vpslldq	$8,%xmm3,%xmm4
+	vpsrldq	$8,%xmm3,%xmm3
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpsllq	$57,%xmm0,%xmm3
+	vpsllq	$62,%xmm0,%xmm4
+	vpxor	%xmm3,%xmm4,%xmm4
+	vpsllq	$63,%xmm0,%xmm3
+	vpxor	%xmm3,%xmm4,%xmm4
+	vpslldq	$8,%xmm4,%xmm3
+	vpsrldq	$8,%xmm4,%xmm4
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vpsrlq	$1,%xmm0,%xmm4
+	vpxor	%xmm0,%xmm1,%xmm1
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpsrlq	$5,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpsrlq	$1,%xmm0,%xmm0
+	vpxor	%xmm1,%xmm0,%xmm0
+	vpshufd	$78,%xmm5,%xmm3
+	vpshufd	$78,%xmm0,%xmm4
+	vpxor	%xmm5,%xmm3,%xmm3
+	vmovdqu	%xmm5,0(%rdi)
+	vpxor	%xmm0,%xmm4,%xmm4
+	vmovdqu	%xmm0,16(%rdi)
+	leaq	48(%rdi),%rdi
+	subq	$1,%r10
+	jnz	.Linit_loop_avx
+
+	vpalignr	$8,%xmm4,%xmm3,%xmm5
+	vmovdqu	%xmm5,-16(%rdi)
+
+	vzeroupper
+	.byte	0xf3,0xc3
+.size	gcm_init_avx,.-gcm_init_avx
+.globl	gcm_gmult_avx
+.hidden gcm_gmult_avx
+.type	gcm_gmult_avx,@function
+.align	32
+gcm_gmult_avx:
+	jmp	.L_gmult_clmul
+.size	gcm_gmult_avx,.-gcm_gmult_avx
+.globl	gcm_ghash_avx
+.hidden gcm_ghash_avx
+.type	gcm_ghash_avx,@function
+.align	32
+gcm_ghash_avx:
+	vzeroupper
+
+	vmovdqu	(%rdi),%xmm10
+	leaq	.L0x1c2_polynomial(%rip),%r10
+	leaq	64(%rsi),%rsi
+	vmovdqu	.Lbswap_mask(%rip),%xmm13
+	vpshufb	%xmm13,%xmm10,%xmm10
+	cmpq	$0x80,%rcx
+	jb	.Lshort_avx
+	subq	$0x80,%rcx
+
+	vmovdqu	112(%rdx),%xmm14
+	vmovdqu	0-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vmovdqu	32-64(%rsi),%xmm7
+
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vmovdqu	96(%rdx),%xmm15
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpxor	%xmm14,%xmm9,%xmm9
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	16-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vmovdqu	80(%rdx),%xmm14
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm15,%xmm8,%xmm8
+
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vmovdqu	48-64(%rsi),%xmm6
+	vpxor	%xmm14,%xmm9,%xmm9
+	vmovdqu	64(%rdx),%xmm15
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	80-64(%rsi),%xmm7
+
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	64-64(%rsi),%xmm6
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	48(%rdx),%xmm14
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vmovdqu	96-64(%rsi),%xmm6
+	vpxor	%xmm5,%xmm2,%xmm2
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	128-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+
+	vmovdqu	32(%rdx),%xmm15
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	112-64(%rsi),%xmm6
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	16(%rdx),%xmm14
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vmovdqu	144-64(%rsi),%xmm6
+	vpxor	%xmm5,%xmm2,%xmm2
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	176-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+
+	vmovdqu	(%rdx),%xmm15
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	160-64(%rsi),%xmm6
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
+
+	leaq	128(%rdx),%rdx
+	cmpq	$0x80,%rcx
+	jb	.Ltail_avx
+
+	vpxor	%xmm10,%xmm15,%xmm15
+	subq	$0x80,%rcx
+	jmp	.Loop8x_avx
+
+.align	32
+.Loop8x_avx:
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vmovdqu	112(%rdx),%xmm14
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpxor	%xmm15,%xmm8,%xmm8
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm10
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm11
+	vmovdqu	0-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm12
+	vmovdqu	32-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+
+	vmovdqu	96(%rdx),%xmm15
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpxor	%xmm3,%xmm10,%xmm10
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vxorps	%xmm4,%xmm11,%xmm11
+	vmovdqu	16-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm5,%xmm12,%xmm12
+	vxorps	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	80(%rdx),%xmm14
+	vpxor	%xmm10,%xmm12,%xmm12
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpxor	%xmm11,%xmm12,%xmm12
+	vpslldq	$8,%xmm12,%xmm9
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vpsrldq	$8,%xmm12,%xmm12
+	vpxor	%xmm9,%xmm10,%xmm10
+	vmovdqu	48-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vxorps	%xmm12,%xmm11,%xmm11
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	80-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vmovdqu	64(%rdx),%xmm15
+	vpalignr	$8,%xmm10,%xmm10,%xmm12
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	64-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vxorps	%xmm15,%xmm8,%xmm8
+	vpxor	%xmm5,%xmm2,%xmm2
+
+	vmovdqu	48(%rdx),%xmm14
+	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vmovdqu	96-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	128-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vmovdqu	32(%rdx),%xmm15
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	112-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm15,%xmm8,%xmm8
+	vpxor	%xmm5,%xmm2,%xmm2
+	vxorps	%xmm12,%xmm10,%xmm10
+
+	vmovdqu	16(%rdx),%xmm14
+	vpalignr	$8,%xmm10,%xmm10,%xmm12
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vmovdqu	144-64(%rsi),%xmm6
+	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
+	vxorps	%xmm11,%xmm12,%xmm12
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	176-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vmovdqu	(%rdx),%xmm15
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	160-64(%rsi),%xmm6
+	vpxor	%xmm12,%xmm15,%xmm15
+	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm10,%xmm15,%xmm15
+
+	leaq	128(%rdx),%rdx
+	subq	$0x80,%rcx
+	jnc	.Loop8x_avx
+
+	addq	$0x80,%rcx
+	jmp	.Ltail_no_xor_avx
+
+.align	32
+.Lshort_avx:
+	vmovdqu	-16(%rdx,%rcx,1),%xmm14
+	leaq	(%rdx,%rcx,1),%rdx
+	vmovdqu	0-64(%rsi),%xmm6
+	vmovdqu	32-64(%rsi),%xmm7
+	vpshufb	%xmm13,%xmm14,%xmm15
+
+	vmovdqa	%xmm0,%xmm3
+	vmovdqa	%xmm1,%xmm4
+	vmovdqa	%xmm2,%xmm5
+	subq	$0x10,%rcx
+	jz	.Ltail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-32(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	16-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vpsrldq	$8,%xmm7,%xmm7
+	subq	$0x10,%rcx
+	jz	.Ltail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-48(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	48-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vmovdqu	80-64(%rsi),%xmm7
+	subq	$0x10,%rcx
+	jz	.Ltail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-64(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	64-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vpsrldq	$8,%xmm7,%xmm7
+	subq	$0x10,%rcx
+	jz	.Ltail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-80(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	96-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vmovdqu	128-64(%rsi),%xmm7
+	subq	$0x10,%rcx
+	jz	.Ltail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-96(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	112-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vpsrldq	$8,%xmm7,%xmm7
+	subq	$0x10,%rcx
+	jz	.Ltail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-112(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	144-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vmovq	184-64(%rsi),%xmm7
+	subq	$0x10,%rcx
+	jmp	.Ltail_avx
+
+.align	32
+.Ltail_avx:
+	vpxor	%xmm10,%xmm15,%xmm15
+.Ltail_no_xor_avx:
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+
+	vmovdqu	(%r10),%xmm12
+
+	vpxor	%xmm0,%xmm3,%xmm10
+	vpxor	%xmm1,%xmm4,%xmm11
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vpxor	%xmm10,%xmm5,%xmm5
+	vpxor	%xmm11,%xmm5,%xmm5
+	vpslldq	$8,%xmm5,%xmm9
+	vpsrldq	$8,%xmm5,%xmm5
+	vpxor	%xmm9,%xmm10,%xmm10
+	vpxor	%xmm5,%xmm11,%xmm11
+
+	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
+	vpalignr	$8,%xmm10,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm10,%xmm10
+
+	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
+	vpalignr	$8,%xmm10,%xmm10,%xmm10
+	vpxor	%xmm11,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm10,%xmm10
+
+	cmpq	$0,%rcx
+	jne	.Lshort_avx
+
+	vpshufb	%xmm13,%xmm10,%xmm10
+	vmovdqu	%xmm10,(%rdi)
+	vzeroupper
+	.byte	0xf3,0xc3
+.size	gcm_ghash_avx,.-gcm_ghash_avx
+.align	64
+.Lbswap_mask:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.L0x1c2_polynomial:
+.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.L7_mask:
+.long	7,0,7,0
+.L7_mask_poly:
+.long	7,0,450,0
+.align	64
+.type	.Lrem_4bit,@object
+.Lrem_4bit:
+.long	0,0,0,471859200,0,943718400,0,610271232
+.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
+.type	.Lrem_8bit,@object
+.Lrem_8bit:
+.value	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
+.value	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
+.value	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
+.value	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
+.value	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
+.value	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
+.value	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
+.value	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
+.value	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
+.value	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
+.value	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
+.value	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
+.value	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
+.value	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
+.value	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
+.value	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
+.value	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
+.value	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
+.value	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
+.value	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
+.value	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
+.value	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
+.value	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
+.value	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
+.value	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
+.value	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
+.value	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
+.value	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
+.value	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
+.value	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
+.value	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
+.value	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
+
+.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	64
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/md5-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/md5-x86_64.S
new file mode 100644
index 0000000..8af6504
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/md5-x86_64.S
@@ -0,0 +1,671 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+.align	16
+
+.globl	md5_block_asm_data_order
+.hidden md5_block_asm_data_order
+.type	md5_block_asm_data_order,@function
+md5_block_asm_data_order:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r14
+	pushq	%r15
+.Lprologue:
+
+
+
+
+	movq	%rdi,%rbp
+	shlq	$6,%rdx
+	leaq	(%rsi,%rdx,1),%rdi
+	movl	0(%rbp),%eax
+	movl	4(%rbp),%ebx
+	movl	8(%rbp),%ecx
+	movl	12(%rbp),%edx
+
+
+
+
+
+
+
+	cmpq	%rdi,%rsi
+	je	.Lend
+
+
+.Lloop:
+	movl	%eax,%r8d
+	movl	%ebx,%r9d
+	movl	%ecx,%r14d
+	movl	%edx,%r15d
+	movl	0(%rsi),%r10d
+	movl	%edx,%r11d
+	xorl	%ecx,%r11d
+	leal	-680876936(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	4(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-389564586(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	606105819(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1044525330(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	-176418897(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	1200080426(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1473231341(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-45705983(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1770035416(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-1958414417(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-42063(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1990404162(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1804603682(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-40341101(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1502002290(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	1236535329(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	movl	4(%rsi),%r10d
+	movl	%edx,%r11d
+	movl	%edx,%r12d
+	notl	%r11d
+	leal	-165796510(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1069501632(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	643717713(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-373897302(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-701558691(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	38016083(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-660478335(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-405537848(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	568446438(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1019803690(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-187363961(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	1163531501(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-1444681467(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-51403784(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	1735328473(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-1926607734(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	movl	20(%rsi),%r10d
+	movl	%ecx,%r11d
+	leal	-378558(%rax,%r10,1),%eax
+	movl	32(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-2022574463(%rdx,%r10,1),%edx
+	movl	44(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	1839030562(%rcx,%r10,1),%ecx
+	movl	56(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-35309556(%rbx,%r10,1),%ebx
+	movl	4(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-1530992060(%rax,%r10,1),%eax
+	movl	16(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	1272893353(%rdx,%r10,1),%edx
+	movl	28(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-155497632(%rcx,%r10,1),%ecx
+	movl	40(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-1094730640(%rbx,%r10,1),%ebx
+	movl	52(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	681279174(%rax,%r10,1),%eax
+	movl	0(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-358537222(%rdx,%r10,1),%edx
+	movl	12(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-722521979(%rcx,%r10,1),%ecx
+	movl	24(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	76029189(%rbx,%r10,1),%ebx
+	movl	36(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-640364487(%rax,%r10,1),%eax
+	movl	48(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-421815835(%rdx,%r10,1),%edx
+	movl	60(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	530742520(%rcx,%r10,1),%ecx
+	movl	8(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-995338651(%rbx,%r10,1),%ebx
+	movl	0(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	xorl	%edx,%r11d
+	leal	-198630844(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	28(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	1126891415(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	56(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1416354905(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	20(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-57434055(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	48(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1700485571(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	12(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1894986606(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	40(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1051523(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	4(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-2054922799(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	32(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1873313359(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	60(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-30611744(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	24(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1560198380(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	52(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	1309151649(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	16(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	-145523070(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	44(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1120210379(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	8(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	718787259(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	36(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-343485551(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+
+	addl	%r8d,%eax
+	addl	%r9d,%ebx
+	addl	%r14d,%ecx
+	addl	%r15d,%edx
+
+
+	addq	$64,%rsi
+	cmpq	%rdi,%rsi
+	jb	.Lloop
+
+
+.Lend:
+	movl	%eax,0(%rbp)
+	movl	%ebx,4(%rbp)
+	movl	%ecx,8(%rbp)
+	movl	%edx,12(%rbp)
+
+	movq	(%rsp),%r15
+	movq	8(%rsp),%r14
+	movq	16(%rsp),%r12
+	movq	24(%rsp),%rbx
+	movq	32(%rsp),%rbp
+	addq	$40,%rsp
+.Lepilogue:
+	.byte	0xf3,0xc3
+.size	md5_block_asm_data_order,.-md5_block_asm_data_order
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
new file mode 100644
index 0000000..6d21888
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
@@ -0,0 +1,1791 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+
+
+.align	64
+.Lpoly:
+.quad	0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
+
+.LOne:
+.long	1,1,1,1,1,1,1,1
+.LTwo:
+.long	2,2,2,2,2,2,2,2
+.LThree:
+.long	3,3,3,3,3,3,3,3
+.LONE_mont:
+.quad	0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
+
+
+
+.globl	ecp_nistz256_neg
+.hidden ecp_nistz256_neg
+.type	ecp_nistz256_neg,@function
+.align	32
+ecp_nistz256_neg:
+	pushq	%r12
+	pushq	%r13
+
+	xorq	%r8,%r8
+	xorq	%r9,%r9
+	xorq	%r10,%r10
+	xorq	%r11,%r11
+	xorq	%r13,%r13
+
+	subq	0(%rsi),%r8
+	sbbq	8(%rsi),%r9
+	sbbq	16(%rsi),%r10
+	movq	%r8,%rax
+	sbbq	24(%rsi),%r11
+	leaq	.Lpoly(%rip),%rsi
+	movq	%r9,%rdx
+	sbbq	$0,%r13
+
+	addq	0(%rsi),%r8
+	movq	%r10,%rcx
+	adcq	8(%rsi),%r9
+	adcq	16(%rsi),%r10
+	movq	%r11,%r12
+	adcq	24(%rsi),%r11
+	testq	%r13,%r13
+
+	cmovzq	%rax,%r8
+	cmovzq	%rdx,%r9
+	movq	%r8,0(%rdi)
+	cmovzq	%rcx,%r10
+	movq	%r9,8(%rdi)
+	cmovzq	%r12,%r11
+	movq	%r10,16(%rdi)
+	movq	%r11,24(%rdi)
+
+	popq	%r13
+	popq	%r12
+	.byte	0xf3,0xc3
+.size	ecp_nistz256_neg,.-ecp_nistz256_neg
+
+
+
+
+
+
+.globl	ecp_nistz256_mul_mont
+.hidden ecp_nistz256_mul_mont
+.type	ecp_nistz256_mul_mont,@function
+.align	32
+ecp_nistz256_mul_mont:
+.Lmul_mont:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	movq	%rdx,%rbx
+	movq	0(%rdx),%rax
+	movq	0(%rsi),%r9
+	movq	8(%rsi),%r10
+	movq	16(%rsi),%r11
+	movq	24(%rsi),%r12
+
+	call	__ecp_nistz256_mul_montq
+.Lmul_mont_done:
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+	.byte	0xf3,0xc3
+.size	ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
+
+.type	__ecp_nistz256_mul_montq,@function
+.align	32
+__ecp_nistz256_mul_montq:
+
+
+	movq	%rax,%rbp
+	mulq	%r9
+	movq	.Lpoly+8(%rip),%r14
+	movq	%rax,%r8
+	movq	%rbp,%rax
+	movq	%rdx,%r9
+
+	mulq	%r10
+	movq	.Lpoly+24(%rip),%r15
+	addq	%rax,%r9
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%r11
+	addq	%rax,%r10
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%r12
+	addq	%rax,%r11
+	movq	%r8,%rax
+	adcq	$0,%rdx
+	xorq	%r13,%r13
+	movq	%rdx,%r12
+
+
+
+
+
+
+
+
+
+
+	movq	%r8,%rbp
+	shlq	$32,%r8
+	mulq	%r15
+	shrq	$32,%rbp
+	addq	%r8,%r9
+	adcq	%rbp,%r10
+	adcq	%rax,%r11
+	movq	8(%rbx),%rax
+	adcq	%rdx,%r12
+	adcq	$0,%r13
+	xorq	%r8,%r8
+
+
+
+	movq	%rax,%rbp
+	mulq	0(%rsi)
+	addq	%rax,%r9
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	8(%rsi)
+	addq	%rcx,%r10
+	adcq	$0,%rdx
+	addq	%rax,%r10
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	16(%rsi)
+	addq	%rcx,%r11
+	adcq	$0,%rdx
+	addq	%rax,%r11
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	24(%rsi)
+	addq	%rcx,%r12
+	adcq	$0,%rdx
+	addq	%rax,%r12
+	movq	%r9,%rax
+	adcq	%rdx,%r13
+	adcq	$0,%r8
+
+
+
+	movq	%r9,%rbp
+	shlq	$32,%r9
+	mulq	%r15
+	shrq	$32,%rbp
+	addq	%r9,%r10
+	adcq	%rbp,%r11
+	adcq	%rax,%r12
+	movq	16(%rbx),%rax
+	adcq	%rdx,%r13
+	adcq	$0,%r8
+	xorq	%r9,%r9
+
+
+
+	movq	%rax,%rbp
+	mulq	0(%rsi)
+	addq	%rax,%r10
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	8(%rsi)
+	addq	%rcx,%r11
+	adcq	$0,%rdx
+	addq	%rax,%r11
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	16(%rsi)
+	addq	%rcx,%r12
+	adcq	$0,%rdx
+	addq	%rax,%r12
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	24(%rsi)
+	addq	%rcx,%r13
+	adcq	$0,%rdx
+	addq	%rax,%r13
+	movq	%r10,%rax
+	adcq	%rdx,%r8
+	adcq	$0,%r9
+
+
+
+	movq	%r10,%rbp
+	shlq	$32,%r10
+	mulq	%r15
+	shrq	$32,%rbp
+	addq	%r10,%r11
+	adcq	%rbp,%r12
+	adcq	%rax,%r13
+	movq	24(%rbx),%rax
+	adcq	%rdx,%r8
+	adcq	$0,%r9
+	xorq	%r10,%r10
+
+
+
+	movq	%rax,%rbp
+	mulq	0(%rsi)
+	addq	%rax,%r11
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	8(%rsi)
+	addq	%rcx,%r12
+	adcq	$0,%rdx
+	addq	%rax,%r12
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	16(%rsi)
+	addq	%rcx,%r13
+	adcq	$0,%rdx
+	addq	%rax,%r13
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	24(%rsi)
+	addq	%rcx,%r8
+	adcq	$0,%rdx
+	addq	%rax,%r8
+	movq	%r11,%rax
+	adcq	%rdx,%r9
+	adcq	$0,%r10
+
+
+
+	movq	%r11,%rbp
+	shlq	$32,%r11
+	mulq	%r15
+	shrq	$32,%rbp
+	addq	%r11,%r12
+	adcq	%rbp,%r13
+	movq	%r12,%rcx
+	adcq	%rax,%r8
+	adcq	%rdx,%r9
+	movq	%r13,%rbp
+	adcq	$0,%r10
+
+
+
+	subq	$-1,%r12
+	movq	%r8,%rbx
+	sbbq	%r14,%r13
+	sbbq	$0,%r8
+	movq	%r9,%rdx
+	sbbq	%r15,%r9
+	sbbq	$0,%r10
+
+	cmovcq	%rcx,%r12
+	cmovcq	%rbp,%r13
+	movq	%r12,0(%rdi)
+	cmovcq	%rbx,%r8
+	movq	%r13,8(%rdi)
+	cmovcq	%rdx,%r9
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+
+	.byte	0xf3,0xc3
+.size	__ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
+
+
+
+
+
+
+
+
+.globl	ecp_nistz256_sqr_mont
+.hidden ecp_nistz256_sqr_mont
+.type	ecp_nistz256_sqr_mont,@function
+.align	32
+ecp_nistz256_sqr_mont:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	movq	0(%rsi),%rax
+	movq	8(%rsi),%r14
+	movq	16(%rsi),%r15
+	movq	24(%rsi),%r8
+
+	call	__ecp_nistz256_sqr_montq
+.Lsqr_mont_done:
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+	.byte	0xf3,0xc3
+.size	ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
+
+.type	__ecp_nistz256_sqr_montq,@function
+.align	32
+__ecp_nistz256_sqr_montq:
+	movq	%rax,%r13
+	mulq	%r14
+	movq	%rax,%r9
+	movq	%r15,%rax
+	movq	%rdx,%r10
+
+	mulq	%r13
+	addq	%rax,%r10
+	movq	%r8,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%r13
+	addq	%rax,%r11
+	movq	%r15,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r12
+
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%r8,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rbp
+
+	mulq	%r14
+	addq	%rax,%r12
+	movq	%r8,%rax
+	adcq	$0,%rdx
+	addq	%rbp,%r12
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+
+	mulq	%r15
+	xorq	%r15,%r15
+	addq	%rax,%r13
+	movq	0(%rsi),%rax
+	movq	%rdx,%r14
+	adcq	$0,%r14
+
+	addq	%r9,%r9
+	adcq	%r10,%r10
+	adcq	%r11,%r11
+	adcq	%r12,%r12
+	adcq	%r13,%r13
+	adcq	%r14,%r14
+	adcq	$0,%r15
+
+	mulq	%rax
+	movq	%rax,%r8
+	movq	8(%rsi),%rax
+	movq	%rdx,%rcx
+
+	mulq	%rax
+	addq	%rcx,%r9
+	adcq	%rax,%r10
+	movq	16(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	%rax
+	addq	%rcx,%r11
+	adcq	%rax,%r12
+	movq	24(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	%rax
+	addq	%rcx,%r13
+	adcq	%rax,%r14
+	movq	%r8,%rax
+	adcq	%rdx,%r15
+
+	movq	.Lpoly+8(%rip),%rsi
+	movq	.Lpoly+24(%rip),%rbp
+
+
+
+
+	movq	%r8,%rcx
+	shlq	$32,%r8
+	mulq	%rbp
+	shrq	$32,%rcx
+	addq	%r8,%r9
+	adcq	%rcx,%r10
+	adcq	%rax,%r11
+	movq	%r9,%rax
+	adcq	$0,%rdx
+
+
+
+	movq	%r9,%rcx
+	shlq	$32,%r9
+	movq	%rdx,%r8
+	mulq	%rbp
+	shrq	$32,%rcx
+	addq	%r9,%r10
+	adcq	%rcx,%r11
+	adcq	%rax,%r8
+	movq	%r10,%rax
+	adcq	$0,%rdx
+
+
+
+	movq	%r10,%rcx
+	shlq	$32,%r10
+	movq	%rdx,%r9
+	mulq	%rbp
+	shrq	$32,%rcx
+	addq	%r10,%r11
+	adcq	%rcx,%r8
+	adcq	%rax,%r9
+	movq	%r11,%rax
+	adcq	$0,%rdx
+
+
+
+	movq	%r11,%rcx
+	shlq	$32,%r11
+	movq	%rdx,%r10
+	mulq	%rbp
+	shrq	$32,%rcx
+	addq	%r11,%r8
+	adcq	%rcx,%r9
+	adcq	%rax,%r10
+	adcq	$0,%rdx
+	xorq	%r11,%r11
+
+
+
+	addq	%r8,%r12
+	adcq	%r9,%r13
+	movq	%r12,%r8
+	adcq	%r10,%r14
+	adcq	%rdx,%r15
+	movq	%r13,%r9
+	adcq	$0,%r11
+
+	subq	$-1,%r12
+	movq	%r14,%r10
+	sbbq	%rsi,%r13
+	sbbq	$0,%r14
+	movq	%r15,%rcx
+	sbbq	%rbp,%r15
+	sbbq	$0,%r11
+
+	cmovcq	%r8,%r12
+	cmovcq	%r9,%r13
+	movq	%r12,0(%rdi)
+	cmovcq	%r10,%r14
+	movq	%r13,8(%rdi)
+	cmovcq	%rcx,%r15
+	movq	%r14,16(%rdi)
+	movq	%r15,24(%rdi)
+
+	.byte	0xf3,0xc3
+.size	__ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
+
+
+.globl	ecp_nistz256_select_w5
+.hidden ecp_nistz256_select_w5
+.type	ecp_nistz256_select_w5,@function
+.align	32
+ecp_nistz256_select_w5:
+	leaq	OPENSSL_ia32cap_P(%rip),%rax
+	movq	8(%rax),%rax
+	testl	$32,%eax
+	jnz	.Lavx2_select_w5
+	movdqa	.LOne(%rip),%xmm0
+	movd	%edx,%xmm1
+
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+
+	movdqa	%xmm0,%xmm8
+	pshufd	$0,%xmm1,%xmm1
+
+	movq	$16,%rax
+.Lselect_loop_sse_w5:
+
+	movdqa	%xmm8,%xmm15
+	paddd	%xmm0,%xmm8
+	pcmpeqd	%xmm1,%xmm15
+
+	movdqa	0(%rsi),%xmm9
+	movdqa	16(%rsi),%xmm10
+	movdqa	32(%rsi),%xmm11
+	movdqa	48(%rsi),%xmm12
+	movdqa	64(%rsi),%xmm13
+	movdqa	80(%rsi),%xmm14
+	leaq	96(%rsi),%rsi
+
+	pand	%xmm15,%xmm9
+	pand	%xmm15,%xmm10
+	por	%xmm9,%xmm2
+	pand	%xmm15,%xmm11
+	por	%xmm10,%xmm3
+	pand	%xmm15,%xmm12
+	por	%xmm11,%xmm4
+	pand	%xmm15,%xmm13
+	por	%xmm12,%xmm5
+	pand	%xmm15,%xmm14
+	por	%xmm13,%xmm6
+	por	%xmm14,%xmm7
+
+	decq	%rax
+	jnz	.Lselect_loop_sse_w5
+
+	movdqu	%xmm2,0(%rdi)
+	movdqu	%xmm3,16(%rdi)
+	movdqu	%xmm4,32(%rdi)
+	movdqu	%xmm5,48(%rdi)
+	movdqu	%xmm6,64(%rdi)
+	movdqu	%xmm7,80(%rdi)
+	.byte	0xf3,0xc3
+.size	ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
+
+
+
+.globl	ecp_nistz256_select_w7
+.hidden ecp_nistz256_select_w7
+.type	ecp_nistz256_select_w7,@function
+.align	32
+ecp_nistz256_select_w7:
+	leaq	OPENSSL_ia32cap_P(%rip),%rax
+	movq	8(%rax),%rax
+	testl	$32,%eax
+	jnz	.Lavx2_select_w7
+	movdqa	.LOne(%rip),%xmm8
+	movd	%edx,%xmm1
+
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+
+	movdqa	%xmm8,%xmm0
+	pshufd	$0,%xmm1,%xmm1
+	movq	$64,%rax
+
+.Lselect_loop_sse_w7:
+	movdqa	%xmm8,%xmm15
+	paddd	%xmm0,%xmm8
+	movdqa	0(%rsi),%xmm9
+	movdqa	16(%rsi),%xmm10
+	pcmpeqd	%xmm1,%xmm15
+	movdqa	32(%rsi),%xmm11
+	movdqa	48(%rsi),%xmm12
+	leaq	64(%rsi),%rsi
+
+	pand	%xmm15,%xmm9
+	pand	%xmm15,%xmm10
+	por	%xmm9,%xmm2
+	pand	%xmm15,%xmm11
+	por	%xmm10,%xmm3
+	pand	%xmm15,%xmm12
+	por	%xmm11,%xmm4
+	prefetcht0	255(%rsi)
+	por	%xmm12,%xmm5
+
+	decq	%rax
+	jnz	.Lselect_loop_sse_w7
+
+	movdqu	%xmm2,0(%rdi)
+	movdqu	%xmm3,16(%rdi)
+	movdqu	%xmm4,32(%rdi)
+	movdqu	%xmm5,48(%rdi)
+	.byte	0xf3,0xc3
+.size	ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
+
+
+.type	ecp_nistz256_avx2_select_w5,@function
+.align	32
+ecp_nistz256_avx2_select_w5:
+.Lavx2_select_w5:
+	vzeroupper
+	vmovdqa	.LTwo(%rip),%ymm0
+
+	vpxor	%ymm2,%ymm2,%ymm2
+	vpxor	%ymm3,%ymm3,%ymm3
+	vpxor	%ymm4,%ymm4,%ymm4
+
+	vmovdqa	.LOne(%rip),%ymm5
+	vmovdqa	.LTwo(%rip),%ymm10
+
+	vmovd	%edx,%xmm1
+	vpermd	%ymm1,%ymm2,%ymm1
+
+	movq	$8,%rax
+.Lselect_loop_avx2_w5:
+
+	vmovdqa	0(%rsi),%ymm6
+	vmovdqa	32(%rsi),%ymm7
+	vmovdqa	64(%rsi),%ymm8
+
+	vmovdqa	96(%rsi),%ymm11
+	vmovdqa	128(%rsi),%ymm12
+	vmovdqa	160(%rsi),%ymm13
+
+	vpcmpeqd	%ymm1,%ymm5,%ymm9
+	vpcmpeqd	%ymm1,%ymm10,%ymm14
+
+	vpaddd	%ymm0,%ymm5,%ymm5
+	vpaddd	%ymm0,%ymm10,%ymm10
+	leaq	192(%rsi),%rsi
+
+	vpand	%ymm9,%ymm6,%ymm6
+	vpand	%ymm9,%ymm7,%ymm7
+	vpand	%ymm9,%ymm8,%ymm8
+	vpand	%ymm14,%ymm11,%ymm11
+	vpand	%ymm14,%ymm12,%ymm12
+	vpand	%ymm14,%ymm13,%ymm13
+
+	vpxor	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm7,%ymm3,%ymm3
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpxor	%ymm11,%ymm2,%ymm2
+	vpxor	%ymm12,%ymm3,%ymm3
+	vpxor	%ymm13,%ymm4,%ymm4
+
+	decq	%rax
+	jnz	.Lselect_loop_avx2_w5
+
+	vmovdqu	%ymm2,0(%rdi)
+	vmovdqu	%ymm3,32(%rdi)
+	vmovdqu	%ymm4,64(%rdi)
+	vzeroupper
+	.byte	0xf3,0xc3
+.size	ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
+
+
+
+.globl	ecp_nistz256_avx2_select_w7
+.hidden ecp_nistz256_avx2_select_w7
+.type	ecp_nistz256_avx2_select_w7,@function
+.align	32
+ecp_nistz256_avx2_select_w7:
+.Lavx2_select_w7:
+	vzeroupper
+	vmovdqa	.LThree(%rip),%ymm0
+
+	vpxor	%ymm2,%ymm2,%ymm2
+	vpxor	%ymm3,%ymm3,%ymm3
+
+	vmovdqa	.LOne(%rip),%ymm4
+	vmovdqa	.LTwo(%rip),%ymm8
+	vmovdqa	.LThree(%rip),%ymm12
+
+	vmovd	%edx,%xmm1
+	vpermd	%ymm1,%ymm2,%ymm1
+
+
+	movq	$21,%rax
+.Lselect_loop_avx2_w7:
+
+	vmovdqa	0(%rsi),%ymm5
+	vmovdqa	32(%rsi),%ymm6
+
+	vmovdqa	64(%rsi),%ymm9
+	vmovdqa	96(%rsi),%ymm10
+
+	vmovdqa	128(%rsi),%ymm13
+	vmovdqa	160(%rsi),%ymm14
+
+	vpcmpeqd	%ymm1,%ymm4,%ymm7
+	vpcmpeqd	%ymm1,%ymm8,%ymm11
+	vpcmpeqd	%ymm1,%ymm12,%ymm15
+
+	vpaddd	%ymm0,%ymm4,%ymm4
+	vpaddd	%ymm0,%ymm8,%ymm8
+	vpaddd	%ymm0,%ymm12,%ymm12
+	leaq	192(%rsi),%rsi
+
+	vpand	%ymm7,%ymm5,%ymm5
+	vpand	%ymm7,%ymm6,%ymm6
+	vpand	%ymm11,%ymm9,%ymm9
+	vpand	%ymm11,%ymm10,%ymm10
+	vpand	%ymm15,%ymm13,%ymm13
+	vpand	%ymm15,%ymm14,%ymm14
+
+	vpxor	%ymm5,%ymm2,%ymm2
+	vpxor	%ymm6,%ymm3,%ymm3
+	vpxor	%ymm9,%ymm2,%ymm2
+	vpxor	%ymm10,%ymm3,%ymm3
+	vpxor	%ymm13,%ymm2,%ymm2
+	vpxor	%ymm14,%ymm3,%ymm3
+
+	decq	%rax
+	jnz	.Lselect_loop_avx2_w7
+
+
+	vmovdqa	0(%rsi),%ymm5
+	vmovdqa	32(%rsi),%ymm6
+
+	vpcmpeqd	%ymm1,%ymm4,%ymm7
+
+	vpand	%ymm7,%ymm5,%ymm5
+	vpand	%ymm7,%ymm6,%ymm6
+
+	vpxor	%ymm5,%ymm2,%ymm2
+	vpxor	%ymm6,%ymm3,%ymm3
+
+	vmovdqu	%ymm2,0(%rdi)
+	vmovdqu	%ymm3,32(%rdi)
+	vzeroupper
+	.byte	0xf3,0xc3
+.size	ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
+.type	__ecp_nistz256_add_toq,@function
+.align	32
+__ecp_nistz256_add_toq:
+	xorq	%r11,%r11
+	addq	0(%rbx),%r12
+	adcq	8(%rbx),%r13
+	movq	%r12,%rax
+	adcq	16(%rbx),%r8
+	adcq	24(%rbx),%r9
+	movq	%r13,%rbp
+	adcq	$0,%r11
+
+	subq	$-1,%r12
+	movq	%r8,%rcx
+	sbbq	%r14,%r13
+	sbbq	$0,%r8
+	movq	%r9,%r10
+	sbbq	%r15,%r9
+	sbbq	$0,%r11
+
+	cmovcq	%rax,%r12
+	cmovcq	%rbp,%r13
+	movq	%r12,0(%rdi)
+	cmovcq	%rcx,%r8
+	movq	%r13,8(%rdi)
+	cmovcq	%r10,%r9
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+
+	.byte	0xf3,0xc3
+.size	__ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
+
+.type	__ecp_nistz256_sub_fromq,@function
+.align	32
+__ecp_nistz256_sub_fromq:
+	subq	0(%rbx),%r12
+	sbbq	8(%rbx),%r13
+	movq	%r12,%rax
+	sbbq	16(%rbx),%r8
+	sbbq	24(%rbx),%r9
+	movq	%r13,%rbp
+	sbbq	%r11,%r11
+
+	addq	$-1,%r12
+	movq	%r8,%rcx
+	adcq	%r14,%r13
+	adcq	$0,%r8
+	movq	%r9,%r10
+	adcq	%r15,%r9
+	testq	%r11,%r11
+
+	cmovzq	%rax,%r12
+	cmovzq	%rbp,%r13
+	movq	%r12,0(%rdi)
+	cmovzq	%rcx,%r8
+	movq	%r13,8(%rdi)
+	cmovzq	%r10,%r9
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+
+	.byte	0xf3,0xc3
+.size	__ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
+
+.type	__ecp_nistz256_subq,@function
+.align	32
+__ecp_nistz256_subq:
+	subq	%r12,%rax
+	sbbq	%r13,%rbp
+	movq	%rax,%r12
+	sbbq	%r8,%rcx
+	sbbq	%r9,%r10
+	movq	%rbp,%r13
+	sbbq	%r11,%r11
+
+	addq	$-1,%rax
+	movq	%rcx,%r8
+	adcq	%r14,%rbp
+	adcq	$0,%rcx
+	movq	%r10,%r9
+	adcq	%r15,%r10
+	testq	%r11,%r11
+
+	cmovnzq	%rax,%r12
+	cmovnzq	%rbp,%r13
+	cmovnzq	%rcx,%r8
+	cmovnzq	%r10,%r9
+
+	.byte	0xf3,0xc3
+.size	__ecp_nistz256_subq,.-__ecp_nistz256_subq
+
+.type	__ecp_nistz256_mul_by_2q,@function
+.align	32
+__ecp_nistz256_mul_by_2q:
+	xorq	%r11,%r11
+	addq	%r12,%r12
+	adcq	%r13,%r13
+	movq	%r12,%rax
+	adcq	%r8,%r8
+	adcq	%r9,%r9
+	movq	%r13,%rbp
+	adcq	$0,%r11
+
+	subq	$-1,%r12
+	movq	%r8,%rcx
+	sbbq	%r14,%r13
+	sbbq	$0,%r8
+	movq	%r9,%r10
+	sbbq	%r15,%r9
+	sbbq	$0,%r11
+
+	cmovcq	%rax,%r12
+	cmovcq	%rbp,%r13
+	movq	%r12,0(%rdi)
+	cmovcq	%rcx,%r8
+	movq	%r13,8(%rdi)
+	cmovcq	%r10,%r9
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+
+	.byte	0xf3,0xc3
+.size	__ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
+.globl	ecp_nistz256_point_double
+.hidden ecp_nistz256_point_double
+.type	ecp_nistz256_point_double,@function
+.align	32
+ecp_nistz256_point_double:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$160+8,%rsp
+
+.Lpoint_double_shortcutq:
+	movdqu	0(%rsi),%xmm0
+	movq	%rsi,%rbx
+	movdqu	16(%rsi),%xmm1
+	movq	32+0(%rsi),%r12
+	movq	32+8(%rsi),%r13
+	movq	32+16(%rsi),%r8
+	movq	32+24(%rsi),%r9
+	movq	.Lpoly+8(%rip),%r14
+	movq	.Lpoly+24(%rip),%r15
+	movdqa	%xmm0,96(%rsp)
+	movdqa	%xmm1,96+16(%rsp)
+	leaq	32(%rdi),%r10
+	leaq	64(%rdi),%r11
+.byte	102,72,15,110,199
+.byte	102,73,15,110,202
+.byte	102,73,15,110,211
+
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_mul_by_2q
+
+	movq	64+0(%rsi),%rax
+	movq	64+8(%rsi),%r14
+	movq	64+16(%rsi),%r15
+	movq	64+24(%rsi),%r8
+	leaq	64-0(%rsi),%rsi
+	leaq	64(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	0+0(%rsp),%rax
+	movq	8+0(%rsp),%r14
+	leaq	0+0(%rsp),%rsi
+	movq	16+0(%rsp),%r15
+	movq	24+0(%rsp),%r8
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	32(%rbx),%rax
+	movq	64+0(%rbx),%r9
+	movq	64+8(%rbx),%r10
+	movq	64+16(%rbx),%r11
+	movq	64+24(%rbx),%r12
+	leaq	64-0(%rbx),%rsi
+	leaq	32(%rbx),%rbx
+.byte	102,72,15,126,215
+	call	__ecp_nistz256_mul_montq
+	call	__ecp_nistz256_mul_by_2q
+
+	movq	96+0(%rsp),%r12
+	movq	96+8(%rsp),%r13
+	leaq	64(%rsp),%rbx
+	movq	96+16(%rsp),%r8
+	movq	96+24(%rsp),%r9
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_add_toq
+
+	movq	96+0(%rsp),%r12
+	movq	96+8(%rsp),%r13
+	leaq	64(%rsp),%rbx
+	movq	96+16(%rsp),%r8
+	movq	96+24(%rsp),%r9
+	leaq	64(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	movq	0+0(%rsp),%rax
+	movq	8+0(%rsp),%r14
+	leaq	0+0(%rsp),%rsi
+	movq	16+0(%rsp),%r15
+	movq	24+0(%rsp),%r8
+.byte	102,72,15,126,207
+	call	__ecp_nistz256_sqr_montq
+	xorq	%r9,%r9
+	movq	%r12,%rax
+	addq	$-1,%r12
+	movq	%r13,%r10
+	adcq	%rsi,%r13
+	movq	%r14,%rcx
+	adcq	$0,%r14
+	movq	%r15,%r8
+	adcq	%rbp,%r15
+	adcq	$0,%r9
+	xorq	%rsi,%rsi
+	testq	$1,%rax
+
+	cmovzq	%rax,%r12
+	cmovzq	%r10,%r13
+	cmovzq	%rcx,%r14
+	cmovzq	%r8,%r15
+	cmovzq	%rsi,%r9
+
+	movq	%r13,%rax
+	shrq	$1,%r12
+	shlq	$63,%rax
+	movq	%r14,%r10
+	shrq	$1,%r13
+	orq	%rax,%r12
+	shlq	$63,%r10
+	movq	%r15,%rcx
+	shrq	$1,%r14
+	orq	%r10,%r13
+	shlq	$63,%rcx
+	movq	%r12,0(%rdi)
+	shrq	$1,%r15
+	movq	%r13,8(%rdi)
+	shlq	$63,%r9
+	orq	%rcx,%r14
+	orq	%r9,%r15
+	movq	%r14,16(%rdi)
+	movq	%r15,24(%rdi)
+	movq	64(%rsp),%rax
+	leaq	64(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	128(%rsp),%rdi
+	call	__ecp_nistz256_mul_by_2q
+
+	leaq	32(%rsp),%rbx
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_add_toq
+
+	movq	96(%rsp),%rax
+	leaq	96(%rsp),%rbx
+	movq	0+0(%rsp),%r9
+	movq	8+0(%rsp),%r10
+	leaq	0+0(%rsp),%rsi
+	movq	16+0(%rsp),%r11
+	movq	24+0(%rsp),%r12
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	128(%rsp),%rdi
+	call	__ecp_nistz256_mul_by_2q
+
+	movq	0+32(%rsp),%rax
+	movq	8+32(%rsp),%r14
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r15
+	movq	24+32(%rsp),%r8
+.byte	102,72,15,126,199
+	call	__ecp_nistz256_sqr_montq
+
+	leaq	128(%rsp),%rbx
+	movq	%r14,%r8
+	movq	%r15,%r9
+	movq	%rsi,%r14
+	movq	%rbp,%r15
+	call	__ecp_nistz256_sub_fromq
+
+	movq	0+0(%rsp),%rax
+	movq	0+8(%rsp),%rbp
+	movq	0+16(%rsp),%rcx
+	movq	0+24(%rsp),%r10
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_subq
+
+	movq	32(%rsp),%rax
+	leaq	32(%rsp),%rbx
+	movq	%r12,%r14
+	xorl	%ecx,%ecx
+	movq	%r12,0+0(%rsp)
+	movq	%r13,%r10
+	movq	%r13,0+8(%rsp)
+	cmovzq	%r8,%r11
+	movq	%r8,0+16(%rsp)
+	leaq	0-0(%rsp),%rsi
+	cmovzq	%r9,%r12
+	movq	%r9,0+24(%rsp)
+	movq	%r14,%r9
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+.byte	102,72,15,126,203
+.byte	102,72,15,126,207
+	call	__ecp_nistz256_sub_fromq
+
+	addq	$160+8,%rsp
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+	.byte	0xf3,0xc3
+.size	ecp_nistz256_point_double,.-ecp_nistz256_point_double
+.globl	ecp_nistz256_point_add
+.hidden ecp_nistz256_point_add
+.type	ecp_nistz256_point_add,@function
+.align	32
+ecp_nistz256_point_add:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$576+8,%rsp
+
+	movdqu	0(%rsi),%xmm0
+	movdqu	16(%rsi),%xmm1
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm3
+	movdqu	64(%rsi),%xmm4
+	movdqu	80(%rsi),%xmm5
+	movq	%rsi,%rbx
+	movq	%rdx,%rsi
+	movdqa	%xmm0,384(%rsp)
+	movdqa	%xmm1,384+16(%rsp)
+	movdqa	%xmm2,416(%rsp)
+	movdqa	%xmm3,416+16(%rsp)
+	movdqa	%xmm4,448(%rsp)
+	movdqa	%xmm5,448+16(%rsp)
+	por	%xmm4,%xmm5
+
+	movdqu	0(%rsi),%xmm0
+	pshufd	$0xb1,%xmm5,%xmm3
+	movdqu	16(%rsi),%xmm1
+	movdqu	32(%rsi),%xmm2
+	por	%xmm3,%xmm5
+	movdqu	48(%rsi),%xmm3
+	movq	64+0(%rsi),%rax
+	movq	64+8(%rsi),%r14
+	movq	64+16(%rsi),%r15
+	movq	64+24(%rsi),%r8
+	movdqa	%xmm0,480(%rsp)
+	pshufd	$0x1e,%xmm5,%xmm4
+	movdqa	%xmm1,480+16(%rsp)
+	movdqu	64(%rsi),%xmm0
+	movdqu	80(%rsi),%xmm1
+	movdqa	%xmm2,512(%rsp)
+	movdqa	%xmm3,512+16(%rsp)
+	por	%xmm4,%xmm5
+	pxor	%xmm4,%xmm4
+	por	%xmm0,%xmm1
+.byte	102,72,15,110,199
+
+	leaq	64-0(%rsi),%rsi
+	movq	%rax,544+0(%rsp)
+	movq	%r14,544+8(%rsp)
+	movq	%r15,544+16(%rsp)
+	movq	%r8,544+24(%rsp)
+	leaq	96(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	pcmpeqd	%xmm4,%xmm5
+	pshufd	$0xb1,%xmm1,%xmm4
+	por	%xmm1,%xmm4
+	pshufd	$0,%xmm5,%xmm5
+	pshufd	$0x1e,%xmm4,%xmm3
+	por	%xmm3,%xmm4
+	pxor	%xmm3,%xmm3
+	pcmpeqd	%xmm3,%xmm4
+	pshufd	$0,%xmm4,%xmm4
+	movq	64+0(%rbx),%rax
+	movq	64+8(%rbx),%r14
+	movq	64+16(%rbx),%r15
+	movq	64+24(%rbx),%r8
+.byte	102,72,15,110,203
+
+	leaq	64-0(%rbx),%rsi
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	544(%rsp),%rax
+	leaq	544(%rsp),%rbx
+	movq	0+96(%rsp),%r9
+	movq	8+96(%rsp),%r10
+	leaq	0+96(%rsp),%rsi
+	movq	16+96(%rsp),%r11
+	movq	24+96(%rsp),%r12
+	leaq	224(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	448(%rsp),%rax
+	leaq	448(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	256(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	416(%rsp),%rax
+	leaq	416(%rsp),%rbx
+	movq	0+224(%rsp),%r9
+	movq	8+224(%rsp),%r10
+	leaq	0+224(%rsp),%rsi
+	movq	16+224(%rsp),%r11
+	movq	24+224(%rsp),%r12
+	leaq	224(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	512(%rsp),%rax
+	leaq	512(%rsp),%rbx
+	movq	0+256(%rsp),%r9
+	movq	8+256(%rsp),%r10
+	leaq	0+256(%rsp),%rsi
+	movq	16+256(%rsp),%r11
+	movq	24+256(%rsp),%r12
+	leaq	256(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	224(%rsp),%rbx
+	leaq	64(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	orq	%r13,%r12
+	movdqa	%xmm4,%xmm2
+	orq	%r8,%r12
+	orq	%r9,%r12
+	por	%xmm5,%xmm2
+.byte	102,73,15,110,220
+
+	movq	384(%rsp),%rax
+	leaq	384(%rsp),%rbx
+	movq	0+96(%rsp),%r9
+	movq	8+96(%rsp),%r10
+	leaq	0+96(%rsp),%rsi
+	movq	16+96(%rsp),%r11
+	movq	24+96(%rsp),%r12
+	leaq	160(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	480(%rsp),%rax
+	leaq	480(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	192(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	160(%rsp),%rbx
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	orq	%r13,%r12
+	orq	%r8,%r12
+	orq	%r9,%r12
+
+.byte	0x3e
+	jnz	.Ladd_proceedq
+.byte	102,73,15,126,208
+.byte	102,73,15,126,217
+	testq	%r8,%r8
+	jnz	.Ladd_proceedq
+	testq	%r9,%r9
+	jz	.Ladd_doubleq
+
+.byte	102,72,15,126,199
+	pxor	%xmm0,%xmm0
+	movdqu	%xmm0,0(%rdi)
+	movdqu	%xmm0,16(%rdi)
+	movdqu	%xmm0,32(%rdi)
+	movdqu	%xmm0,48(%rdi)
+	movdqu	%xmm0,64(%rdi)
+	movdqu	%xmm0,80(%rdi)
+	jmp	.Ladd_doneq
+
+.align	32
+.Ladd_doubleq:
+.byte	102,72,15,126,206
+.byte	102,72,15,126,199
+	addq	$416,%rsp
+	jmp	.Lpoint_double_shortcutq
+
+.align	32
+.Ladd_proceedq:
+	movq	0+64(%rsp),%rax
+	movq	8+64(%rsp),%r14
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r15
+	movq	24+64(%rsp),%r8
+	leaq	96(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	448(%rsp),%rax
+	leaq	448(%rsp),%rbx
+	movq	0+0(%rsp),%r9
+	movq	8+0(%rsp),%r10
+	leaq	0+0(%rsp),%rsi
+	movq	16+0(%rsp),%r11
+	movq	24+0(%rsp),%r12
+	leaq	352(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	0+0(%rsp),%rax
+	movq	8+0(%rsp),%r14
+	leaq	0+0(%rsp),%rsi
+	movq	16+0(%rsp),%r15
+	movq	24+0(%rsp),%r8
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	544(%rsp),%rax
+	leaq	544(%rsp),%rbx
+	movq	0+352(%rsp),%r9
+	movq	8+352(%rsp),%r10
+	leaq	0+352(%rsp),%rsi
+	movq	16+352(%rsp),%r11
+	movq	24+352(%rsp),%r12
+	leaq	352(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	0(%rsp),%rax
+	leaq	0(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	128(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	160(%rsp),%rax
+	leaq	160(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	192(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+
+
+
+	xorq	%r11,%r11
+	addq	%r12,%r12
+	leaq	96(%rsp),%rsi
+	adcq	%r13,%r13
+	movq	%r12,%rax
+	adcq	%r8,%r8
+	adcq	%r9,%r9
+	movq	%r13,%rbp
+	adcq	$0,%r11
+
+	subq	$-1,%r12
+	movq	%r8,%rcx
+	sbbq	%r14,%r13
+	sbbq	$0,%r8
+	movq	%r9,%r10
+	sbbq	%r15,%r9
+	sbbq	$0,%r11
+
+	cmovcq	%rax,%r12
+	movq	0(%rsi),%rax
+	cmovcq	%rbp,%r13
+	movq	8(%rsi),%rbp
+	cmovcq	%rcx,%r8
+	movq	16(%rsi),%rcx
+	cmovcq	%r10,%r9
+	movq	24(%rsi),%r10
+
+	call	__ecp_nistz256_subq
+
+	leaq	128(%rsp),%rbx
+	leaq	288(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	movq	192+0(%rsp),%rax
+	movq	192+8(%rsp),%rbp
+	movq	192+16(%rsp),%rcx
+	movq	192+24(%rsp),%r10
+	leaq	320(%rsp),%rdi
+
+	call	__ecp_nistz256_subq
+
+	movq	%r12,0(%rdi)
+	movq	%r13,8(%rdi)
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+	movq	128(%rsp),%rax
+	leaq	128(%rsp),%rbx
+	movq	0+224(%rsp),%r9
+	movq	8+224(%rsp),%r10
+	leaq	0+224(%rsp),%rsi
+	movq	16+224(%rsp),%r11
+	movq	24+224(%rsp),%r12
+	leaq	256(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	320(%rsp),%rax
+	leaq	320(%rsp),%rbx
+	movq	0+64(%rsp),%r9
+	movq	8+64(%rsp),%r10
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r11
+	movq	24+64(%rsp),%r12
+	leaq	320(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	256(%rsp),%rbx
+	leaq	320(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+.byte	102,72,15,126,199
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	352(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	352+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	544(%rsp),%xmm2
+	pand	544+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	448(%rsp),%xmm2
+	pand	448+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,64(%rdi)
+	movdqu	%xmm3,80(%rdi)
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	288(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	288+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	480(%rsp),%xmm2
+	pand	480+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	384(%rsp),%xmm2
+	pand	384+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,0(%rdi)
+	movdqu	%xmm3,16(%rdi)
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	320(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	320+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	512(%rsp),%xmm2
+	pand	512+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	416(%rsp),%xmm2
+	pand	416+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm3,48(%rdi)
+
+.Ladd_doneq:
+	addq	$576+8,%rsp
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+	.byte	0xf3,0xc3
+.size	ecp_nistz256_point_add,.-ecp_nistz256_point_add
+.globl	ecp_nistz256_point_add_affine
+.hidden ecp_nistz256_point_add_affine
+.type	ecp_nistz256_point_add_affine,@function
+.align	32
+ecp_nistz256_point_add_affine:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$480+8,%rsp
+
+	movdqu	0(%rsi),%xmm0
+	movq	%rdx,%rbx
+	movdqu	16(%rsi),%xmm1
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm3
+	movdqu	64(%rsi),%xmm4
+	movdqu	80(%rsi),%xmm5
+	movq	64+0(%rsi),%rax
+	movq	64+8(%rsi),%r14
+	movq	64+16(%rsi),%r15
+	movq	64+24(%rsi),%r8
+	movdqa	%xmm0,320(%rsp)
+	movdqa	%xmm1,320+16(%rsp)
+	movdqa	%xmm2,352(%rsp)
+	movdqa	%xmm3,352+16(%rsp)
+	movdqa	%xmm4,384(%rsp)
+	movdqa	%xmm5,384+16(%rsp)
+	por	%xmm4,%xmm5
+
+	movdqu	0(%rbx),%xmm0
+	pshufd	$0xb1,%xmm5,%xmm3
+	movdqu	16(%rbx),%xmm1
+	movdqu	32(%rbx),%xmm2
+	por	%xmm3,%xmm5
+	movdqu	48(%rbx),%xmm3
+	movdqa	%xmm0,416(%rsp)
+	pshufd	$0x1e,%xmm5,%xmm4
+	movdqa	%xmm1,416+16(%rsp)
+	por	%xmm0,%xmm1
+.byte	102,72,15,110,199
+	movdqa	%xmm2,448(%rsp)
+	movdqa	%xmm3,448+16(%rsp)
+	por	%xmm2,%xmm3
+	por	%xmm4,%xmm5
+	pxor	%xmm4,%xmm4
+	por	%xmm1,%xmm3
+
+	leaq	64-0(%rsi),%rsi
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	pcmpeqd	%xmm4,%xmm5
+	pshufd	$0xb1,%xmm3,%xmm4
+	movq	0(%rbx),%rax
+
+	movq	%r12,%r9
+	por	%xmm3,%xmm4
+	pshufd	$0,%xmm5,%xmm5
+	pshufd	$0x1e,%xmm4,%xmm3
+	movq	%r13,%r10
+	por	%xmm3,%xmm4
+	pxor	%xmm3,%xmm3
+	movq	%r14,%r11
+	pcmpeqd	%xmm3,%xmm4
+	pshufd	$0,%xmm4,%xmm4
+
+	leaq	32-0(%rsp),%rsi
+	movq	%r15,%r12
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	320(%rsp),%rbx
+	leaq	64(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	movq	384(%rsp),%rax
+	leaq	384(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	384(%rsp),%rax
+	leaq	384(%rsp),%rbx
+	movq	0+64(%rsp),%r9
+	movq	8+64(%rsp),%r10
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r11
+	movq	24+64(%rsp),%r12
+	leaq	288(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	448(%rsp),%rax
+	leaq	448(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	352(%rsp),%rbx
+	leaq	96(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	movq	0+64(%rsp),%rax
+	movq	8+64(%rsp),%r14
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r15
+	movq	24+64(%rsp),%r8
+	leaq	128(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	0+96(%rsp),%rax
+	movq	8+96(%rsp),%r14
+	leaq	0+96(%rsp),%rsi
+	movq	16+96(%rsp),%r15
+	movq	24+96(%rsp),%r8
+	leaq	192(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	128(%rsp),%rax
+	leaq	128(%rsp),%rbx
+	movq	0+64(%rsp),%r9
+	movq	8+64(%rsp),%r10
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r11
+	movq	24+64(%rsp),%r12
+	leaq	160(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	320(%rsp),%rax
+	leaq	320(%rsp),%rbx
+	movq	0+128(%rsp),%r9
+	movq	8+128(%rsp),%r10
+	leaq	0+128(%rsp),%rsi
+	movq	16+128(%rsp),%r11
+	movq	24+128(%rsp),%r12
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+
+
+
+	xorq	%r11,%r11
+	addq	%r12,%r12
+	leaq	192(%rsp),%rsi
+	adcq	%r13,%r13
+	movq	%r12,%rax
+	adcq	%r8,%r8
+	adcq	%r9,%r9
+	movq	%r13,%rbp
+	adcq	$0,%r11
+
+	subq	$-1,%r12
+	movq	%r8,%rcx
+	sbbq	%r14,%r13
+	sbbq	$0,%r8
+	movq	%r9,%r10
+	sbbq	%r15,%r9
+	sbbq	$0,%r11
+
+	cmovcq	%rax,%r12
+	movq	0(%rsi),%rax
+	cmovcq	%rbp,%r13
+	movq	8(%rsi),%rbp
+	cmovcq	%rcx,%r8
+	movq	16(%rsi),%rcx
+	cmovcq	%r10,%r9
+	movq	24(%rsi),%r10
+
+	call	__ecp_nistz256_subq
+
+	leaq	160(%rsp),%rbx
+	leaq	224(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	movq	0+0(%rsp),%rax
+	movq	0+8(%rsp),%rbp
+	movq	0+16(%rsp),%rcx
+	movq	0+24(%rsp),%r10
+	leaq	64(%rsp),%rdi
+
+	call	__ecp_nistz256_subq
+
+	movq	%r12,0(%rdi)
+	movq	%r13,8(%rdi)
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+	movq	352(%rsp),%rax
+	leaq	352(%rsp),%rbx
+	movq	0+160(%rsp),%r9
+	movq	8+160(%rsp),%r10
+	leaq	0+160(%rsp),%rsi
+	movq	16+160(%rsp),%r11
+	movq	24+160(%rsp),%r12
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	96(%rsp),%rax
+	leaq	96(%rsp),%rbx
+	movq	0+64(%rsp),%r9
+	movq	8+64(%rsp),%r10
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r11
+	movq	24+64(%rsp),%r12
+	leaq	64(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	32(%rsp),%rbx
+	leaq	256(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+.byte	102,72,15,126,199
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	288(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	288+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	.LONE_mont(%rip),%xmm2
+	pand	.LONE_mont+16(%rip),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	384(%rsp),%xmm2
+	pand	384+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,64(%rdi)
+	movdqu	%xmm3,80(%rdi)
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	224(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	224+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	416(%rsp),%xmm2
+	pand	416+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	320(%rsp),%xmm2
+	pand	320+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,0(%rdi)
+	movdqu	%xmm3,16(%rdi)
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	256(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	256+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	448(%rsp),%xmm2
+	pand	448+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	352(%rsp),%xmm2
+	pand	352+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm3,48(%rdi)
+
+	addq	$480+8,%rsp
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+	.byte	0xf3,0xc3
+.size	ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
new file mode 100644
index 0000000..7c1eeb7
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
@@ -0,0 +1,48 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+
+
+.globl	CRYPTO_rdrand
+.hidden CRYPTO_rdrand
+.type	CRYPTO_rdrand,@function
+.align	16
+CRYPTO_rdrand:
+	xorq	%rax,%rax
+
+
+.byte	0x48, 0x0f, 0xc7, 0xf1
+
+	adcq	%rax,%rax
+	movq	%rcx,0(%rdi)
+	.byte	0xf3,0xc3
+
+
+
+
+
+.globl	CRYPTO_rdrand_multiple8_buf
+.hidden CRYPTO_rdrand_multiple8_buf
+.type	CRYPTO_rdrand_multiple8_buf,@function
+.align	16
+CRYPTO_rdrand_multiple8_buf:
+	testq	%rsi,%rsi
+	jz	.Lout
+	movq	$8,%rdx
+.Lloop:
+
+
+.byte	0x48, 0x0f, 0xc7, 0xf1
+	jnc	.Lerr
+	movq	%rcx,0(%rdi)
+	addq	%rdx,%rdi
+	subq	%rdx,%rsi
+	jnz	.Lloop
+.Lout:
+	movq	$1,%rax
+	.byte	0xf3,0xc3
+.Lerr:
+	xorq	%rax,%rax
+	.byte	0xf3,0xc3
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
new file mode 100644
index 0000000..89b81ed
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
@@ -0,0 +1,1744 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.globl	rsaz_1024_sqr_avx2
+.hidden rsaz_1024_sqr_avx2
+.type	rsaz_1024_sqr_avx2,@function
+.align	64
+rsaz_1024_sqr_avx2:
+.cfi_startproc	
+	leaq	(%rsp),%rax
+.cfi_def_cfa_register	%rax
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+	vzeroupper
+	movq	%rax,%rbp
+.cfi_def_cfa_register	%rbp
+	movq	%rdx,%r13
+	subq	$832,%rsp
+	movq	%r13,%r15
+	subq	$-128,%rdi
+	subq	$-128,%rsi
+	subq	$-128,%r13
+
+	andq	$4095,%r15
+	addq	$320,%r15
+	shrq	$12,%r15
+	vpxor	%ymm9,%ymm9,%ymm9
+	jz	.Lsqr_1024_no_n_copy
+
+
+
+
+
+	subq	$320,%rsp
+	vmovdqu	0-128(%r13),%ymm0
+	andq	$-2048,%rsp
+	vmovdqu	32-128(%r13),%ymm1
+	vmovdqu	64-128(%r13),%ymm2
+	vmovdqu	96-128(%r13),%ymm3
+	vmovdqu	128-128(%r13),%ymm4
+	vmovdqu	160-128(%r13),%ymm5
+	vmovdqu	192-128(%r13),%ymm6
+	vmovdqu	224-128(%r13),%ymm7
+	vmovdqu	256-128(%r13),%ymm8
+	leaq	832+128(%rsp),%r13
+	vmovdqu	%ymm0,0-128(%r13)
+	vmovdqu	%ymm1,32-128(%r13)
+	vmovdqu	%ymm2,64-128(%r13)
+	vmovdqu	%ymm3,96-128(%r13)
+	vmovdqu	%ymm4,128-128(%r13)
+	vmovdqu	%ymm5,160-128(%r13)
+	vmovdqu	%ymm6,192-128(%r13)
+	vmovdqu	%ymm7,224-128(%r13)
+	vmovdqu	%ymm8,256-128(%r13)
+	vmovdqu	%ymm9,288-128(%r13)
+
+.Lsqr_1024_no_n_copy:
+	andq	$-1024,%rsp
+
+	vmovdqu	32-128(%rsi),%ymm1
+	vmovdqu	64-128(%rsi),%ymm2
+	vmovdqu	96-128(%rsi),%ymm3
+	vmovdqu	128-128(%rsi),%ymm4
+	vmovdqu	160-128(%rsi),%ymm5
+	vmovdqu	192-128(%rsi),%ymm6
+	vmovdqu	224-128(%rsi),%ymm7
+	vmovdqu	256-128(%rsi),%ymm8
+
+	leaq	192(%rsp),%rbx
+	vmovdqu	.Land_mask(%rip),%ymm15
+	jmp	.LOOP_GRANDE_SQR_1024
+
+.align	32
+.LOOP_GRANDE_SQR_1024:
+	leaq	576+128(%rsp),%r9
+	leaq	448(%rsp),%r12
+
+
+
+
+	vpaddq	%ymm1,%ymm1,%ymm1
+	vpbroadcastq	0-128(%rsi),%ymm10
+	vpaddq	%ymm2,%ymm2,%ymm2
+	vmovdqa	%ymm1,0-128(%r9)
+	vpaddq	%ymm3,%ymm3,%ymm3
+	vmovdqa	%ymm2,32-128(%r9)
+	vpaddq	%ymm4,%ymm4,%ymm4
+	vmovdqa	%ymm3,64-128(%r9)
+	vpaddq	%ymm5,%ymm5,%ymm5
+	vmovdqa	%ymm4,96-128(%r9)
+	vpaddq	%ymm6,%ymm6,%ymm6
+	vmovdqa	%ymm5,128-128(%r9)
+	vpaddq	%ymm7,%ymm7,%ymm7
+	vmovdqa	%ymm6,160-128(%r9)
+	vpaddq	%ymm8,%ymm8,%ymm8
+	vmovdqa	%ymm7,192-128(%r9)
+	vpxor	%ymm9,%ymm9,%ymm9
+	vmovdqa	%ymm8,224-128(%r9)
+
+	vpmuludq	0-128(%rsi),%ymm10,%ymm0
+	vpbroadcastq	32-128(%rsi),%ymm11
+	vmovdqu	%ymm9,288-192(%rbx)
+	vpmuludq	%ymm10,%ymm1,%ymm1
+	vmovdqu	%ymm9,320-448(%r12)
+	vpmuludq	%ymm10,%ymm2,%ymm2
+	vmovdqu	%ymm9,352-448(%r12)
+	vpmuludq	%ymm10,%ymm3,%ymm3
+	vmovdqu	%ymm9,384-448(%r12)
+	vpmuludq	%ymm10,%ymm4,%ymm4
+	vmovdqu	%ymm9,416-448(%r12)
+	vpmuludq	%ymm10,%ymm5,%ymm5
+	vmovdqu	%ymm9,448-448(%r12)
+	vpmuludq	%ymm10,%ymm6,%ymm6
+	vmovdqu	%ymm9,480-448(%r12)
+	vpmuludq	%ymm10,%ymm7,%ymm7
+	vmovdqu	%ymm9,512-448(%r12)
+	vpmuludq	%ymm10,%ymm8,%ymm8
+	vpbroadcastq	64-128(%rsi),%ymm10
+	vmovdqu	%ymm9,544-448(%r12)
+
+	movq	%rsi,%r15
+	movl	$4,%r14d
+	jmp	.Lsqr_entry_1024
+.align	32
+.LOOP_SQR_1024:
+	vpbroadcastq	32-128(%r15),%ymm11
+	vpmuludq	0-128(%rsi),%ymm10,%ymm0
+	vpaddq	0-192(%rbx),%ymm0,%ymm0
+	vpmuludq	0-128(%r9),%ymm10,%ymm1
+	vpaddq	32-192(%rbx),%ymm1,%ymm1
+	vpmuludq	32-128(%r9),%ymm10,%ymm2
+	vpaddq	64-192(%rbx),%ymm2,%ymm2
+	vpmuludq	64-128(%r9),%ymm10,%ymm3
+	vpaddq	96-192(%rbx),%ymm3,%ymm3
+	vpmuludq	96-128(%r9),%ymm10,%ymm4
+	vpaddq	128-192(%rbx),%ymm4,%ymm4
+	vpmuludq	128-128(%r9),%ymm10,%ymm5
+	vpaddq	160-192(%rbx),%ymm5,%ymm5
+	vpmuludq	160-128(%r9),%ymm10,%ymm6
+	vpaddq	192-192(%rbx),%ymm6,%ymm6
+	vpmuludq	192-128(%r9),%ymm10,%ymm7
+	vpaddq	224-192(%rbx),%ymm7,%ymm7
+	vpmuludq	224-128(%r9),%ymm10,%ymm8
+	vpbroadcastq	64-128(%r15),%ymm10
+	vpaddq	256-192(%rbx),%ymm8,%ymm8
+.Lsqr_entry_1024:
+	vmovdqu	%ymm0,0-192(%rbx)
+	vmovdqu	%ymm1,32-192(%rbx)
+
+	vpmuludq	32-128(%rsi),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	32-128(%r9),%ymm11,%ymm14
+	vpaddq	%ymm14,%ymm3,%ymm3
+	vpmuludq	64-128(%r9),%ymm11,%ymm13
+	vpaddq	%ymm13,%ymm4,%ymm4
+	vpmuludq	96-128(%r9),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	128-128(%r9),%ymm11,%ymm14
+	vpaddq	%ymm14,%ymm6,%ymm6
+	vpmuludq	160-128(%r9),%ymm11,%ymm13
+	vpaddq	%ymm13,%ymm7,%ymm7
+	vpmuludq	192-128(%r9),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	224-128(%r9),%ymm11,%ymm0
+	vpbroadcastq	96-128(%r15),%ymm11
+	vpaddq	288-192(%rbx),%ymm0,%ymm0
+
+	vmovdqu	%ymm2,64-192(%rbx)
+	vmovdqu	%ymm3,96-192(%rbx)
+
+	vpmuludq	64-128(%rsi),%ymm10,%ymm13
+	vpaddq	%ymm13,%ymm4,%ymm4
+	vpmuludq	64-128(%r9),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	96-128(%r9),%ymm10,%ymm14
+	vpaddq	%ymm14,%ymm6,%ymm6
+	vpmuludq	128-128(%r9),%ymm10,%ymm13
+	vpaddq	%ymm13,%ymm7,%ymm7
+	vpmuludq	160-128(%r9),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	192-128(%r9),%ymm10,%ymm14
+	vpaddq	%ymm14,%ymm0,%ymm0
+	vpmuludq	224-128(%r9),%ymm10,%ymm1
+	vpbroadcastq	128-128(%r15),%ymm10
+	vpaddq	320-448(%r12),%ymm1,%ymm1
+
+	vmovdqu	%ymm4,128-192(%rbx)
+	vmovdqu	%ymm5,160-192(%rbx)
+
+	vpmuludq	96-128(%rsi),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm6,%ymm6
+	vpmuludq	96-128(%r9),%ymm11,%ymm14
+	vpaddq	%ymm14,%ymm7,%ymm7
+	vpmuludq	128-128(%r9),%ymm11,%ymm13
+	vpaddq	%ymm13,%ymm8,%ymm8
+	vpmuludq	160-128(%r9),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm0,%ymm0
+	vpmuludq	192-128(%r9),%ymm11,%ymm14
+	vpaddq	%ymm14,%ymm1,%ymm1
+	vpmuludq	224-128(%r9),%ymm11,%ymm2
+	vpbroadcastq	160-128(%r15),%ymm11
+	vpaddq	352-448(%r12),%ymm2,%ymm2
+
+	vmovdqu	%ymm6,192-192(%rbx)
+	vmovdqu	%ymm7,224-192(%rbx)
+
+	vpmuludq	128-128(%rsi),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	128-128(%r9),%ymm10,%ymm14
+	vpaddq	%ymm14,%ymm0,%ymm0
+	vpmuludq	160-128(%r9),%ymm10,%ymm13
+	vpaddq	%ymm13,%ymm1,%ymm1
+	vpmuludq	192-128(%r9),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	224-128(%r9),%ymm10,%ymm3
+	vpbroadcastq	192-128(%r15),%ymm10
+	vpaddq	384-448(%r12),%ymm3,%ymm3
+
+	vmovdqu	%ymm8,256-192(%rbx)
+	vmovdqu	%ymm0,288-192(%rbx)
+	leaq	8(%rbx),%rbx
+
+	vpmuludq	160-128(%rsi),%ymm11,%ymm13
+	vpaddq	%ymm13,%ymm1,%ymm1
+	vpmuludq	160-128(%r9),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	192-128(%r9),%ymm11,%ymm14
+	vpaddq	%ymm14,%ymm3,%ymm3
+	vpmuludq	224-128(%r9),%ymm11,%ymm4
+	vpbroadcastq	224-128(%r15),%ymm11
+	vpaddq	416-448(%r12),%ymm4,%ymm4
+
+	vmovdqu	%ymm1,320-448(%r12)
+	vmovdqu	%ymm2,352-448(%r12)
+
+	vpmuludq	192-128(%rsi),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm3,%ymm3
+	vpmuludq	192-128(%r9),%ymm10,%ymm14
+	vpbroadcastq	256-128(%r15),%ymm0
+	vpaddq	%ymm14,%ymm4,%ymm4
+	vpmuludq	224-128(%r9),%ymm10,%ymm5
+	vpbroadcastq	0+8-128(%r15),%ymm10
+	vpaddq	448-448(%r12),%ymm5,%ymm5
+
+	vmovdqu	%ymm3,384-448(%r12)
+	vmovdqu	%ymm4,416-448(%r12)
+	leaq	8(%r15),%r15
+
+	vpmuludq	224-128(%rsi),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	224-128(%r9),%ymm11,%ymm6
+	vpaddq	480-448(%r12),%ymm6,%ymm6
+
+	vpmuludq	256-128(%rsi),%ymm0,%ymm7
+	vmovdqu	%ymm5,448-448(%r12)
+	vpaddq	512-448(%r12),%ymm7,%ymm7
+	vmovdqu	%ymm6,480-448(%r12)
+	vmovdqu	%ymm7,512-448(%r12)
+	leaq	8(%r12),%r12
+
+	decl	%r14d
+	jnz	.LOOP_SQR_1024
+
+	vmovdqu	256(%rsp),%ymm8
+	vmovdqu	288(%rsp),%ymm1
+	vmovdqu	320(%rsp),%ymm2
+	leaq	192(%rsp),%rbx
+
+	vpsrlq	$29,%ymm8,%ymm14
+	vpand	%ymm15,%ymm8,%ymm8
+	vpsrlq	$29,%ymm1,%ymm11
+	vpand	%ymm15,%ymm1,%ymm1
+
+	vpermq	$0x93,%ymm14,%ymm14
+	vpxor	%ymm9,%ymm9,%ymm9
+	vpermq	$0x93,%ymm11,%ymm11
+
+	vpblendd	$3,%ymm9,%ymm14,%ymm10
+	vpblendd	$3,%ymm14,%ymm11,%ymm14
+	vpaddq	%ymm10,%ymm8,%ymm8
+	vpblendd	$3,%ymm11,%ymm9,%ymm11
+	vpaddq	%ymm14,%ymm1,%ymm1
+	vpaddq	%ymm11,%ymm2,%ymm2
+	vmovdqu	%ymm1,288-192(%rbx)
+	vmovdqu	%ymm2,320-192(%rbx)
+
+	movq	(%rsp),%rax
+	movq	8(%rsp),%r10
+	movq	16(%rsp),%r11
+	movq	24(%rsp),%r12
+	vmovdqu	32(%rsp),%ymm1
+	vmovdqu	64-192(%rbx),%ymm2
+	vmovdqu	96-192(%rbx),%ymm3
+	vmovdqu	128-192(%rbx),%ymm4
+	vmovdqu	160-192(%rbx),%ymm5
+	vmovdqu	192-192(%rbx),%ymm6
+	vmovdqu	224-192(%rbx),%ymm7
+
+	movq	%rax,%r9
+	imull	%ecx,%eax
+	andl	$0x1fffffff,%eax
+	vmovd	%eax,%xmm12
+
+	movq	%rax,%rdx
+	imulq	-128(%r13),%rax
+	vpbroadcastq	%xmm12,%ymm12
+	addq	%rax,%r9
+	movq	%rdx,%rax
+	imulq	8-128(%r13),%rax
+	shrq	$29,%r9
+	addq	%rax,%r10
+	movq	%rdx,%rax
+	imulq	16-128(%r13),%rax
+	addq	%r9,%r10
+	addq	%rax,%r11
+	imulq	24-128(%r13),%rdx
+	addq	%rdx,%r12
+
+	movq	%r10,%rax
+	imull	%ecx,%eax
+	andl	$0x1fffffff,%eax
+
+	movl	$9,%r14d
+	jmp	.LOOP_REDUCE_1024
+
+.align	32
+.LOOP_REDUCE_1024:
+	vmovd	%eax,%xmm13
+	vpbroadcastq	%xmm13,%ymm13
+
+	vpmuludq	32-128(%r13),%ymm12,%ymm10
+	movq	%rax,%rdx
+	imulq	-128(%r13),%rax
+	vpaddq	%ymm10,%ymm1,%ymm1
+	addq	%rax,%r10
+	vpmuludq	64-128(%r13),%ymm12,%ymm14
+	movq	%rdx,%rax
+	imulq	8-128(%r13),%rax
+	vpaddq	%ymm14,%ymm2,%ymm2
+	vpmuludq	96-128(%r13),%ymm12,%ymm11
+.byte	0x67
+	addq	%rax,%r11
+.byte	0x67
+	movq	%rdx,%rax
+	imulq	16-128(%r13),%rax
+	shrq	$29,%r10
+	vpaddq	%ymm11,%ymm3,%ymm3
+	vpmuludq	128-128(%r13),%ymm12,%ymm10
+	addq	%rax,%r12
+	addq	%r10,%r11
+	vpaddq	%ymm10,%ymm4,%ymm4
+	vpmuludq	160-128(%r13),%ymm12,%ymm14
+	movq	%r11,%rax
+	imull	%ecx,%eax
+	vpaddq	%ymm14,%ymm5,%ymm5
+	vpmuludq	192-128(%r13),%ymm12,%ymm11
+	andl	$0x1fffffff,%eax
+	vpaddq	%ymm11,%ymm6,%ymm6
+	vpmuludq	224-128(%r13),%ymm12,%ymm10
+	vpaddq	%ymm10,%ymm7,%ymm7
+	vpmuludq	256-128(%r13),%ymm12,%ymm14
+	vmovd	%eax,%xmm12
+
+	vpaddq	%ymm14,%ymm8,%ymm8
+
+	vpbroadcastq	%xmm12,%ymm12
+
+	vpmuludq	32-8-128(%r13),%ymm13,%ymm11
+	vmovdqu	96-8-128(%r13),%ymm14
+	movq	%rax,%rdx
+	imulq	-128(%r13),%rax
+	vpaddq	%ymm11,%ymm1,%ymm1
+	vpmuludq	64-8-128(%r13),%ymm13,%ymm10
+	vmovdqu	128-8-128(%r13),%ymm11
+	addq	%rax,%r11
+	movq	%rdx,%rax
+	imulq	8-128(%r13),%rax
+	vpaddq	%ymm10,%ymm2,%ymm2
+	addq	%r12,%rax
+	shrq	$29,%r11
+	vpmuludq	%ymm13,%ymm14,%ymm14
+	vmovdqu	160-8-128(%r13),%ymm10
+	addq	%r11,%rax
+	vpaddq	%ymm14,%ymm3,%ymm3
+	vpmuludq	%ymm13,%ymm11,%ymm11
+	vmovdqu	192-8-128(%r13),%ymm14
+.byte	0x67
+	movq	%rax,%r12
+	imull	%ecx,%eax
+	vpaddq	%ymm11,%ymm4,%ymm4
+	vpmuludq	%ymm13,%ymm10,%ymm10
+.byte	0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00
+	andl	$0x1fffffff,%eax
+	vpaddq	%ymm10,%ymm5,%ymm5
+	vpmuludq	%ymm13,%ymm14,%ymm14
+	vmovdqu	256-8-128(%r13),%ymm10
+	vpaddq	%ymm14,%ymm6,%ymm6
+	vpmuludq	%ymm13,%ymm11,%ymm11
+	vmovdqu	288-8-128(%r13),%ymm9
+	vmovd	%eax,%xmm0
+	imulq	-128(%r13),%rax
+	vpaddq	%ymm11,%ymm7,%ymm7
+	vpmuludq	%ymm13,%ymm10,%ymm10
+	vmovdqu	32-16-128(%r13),%ymm14
+	vpbroadcastq	%xmm0,%ymm0
+	vpaddq	%ymm10,%ymm8,%ymm8
+	vpmuludq	%ymm13,%ymm9,%ymm9
+	vmovdqu	64-16-128(%r13),%ymm11
+	addq	%rax,%r12
+
+	vmovdqu	32-24-128(%r13),%ymm13
+	vpmuludq	%ymm12,%ymm14,%ymm14
+	vmovdqu	96-16-128(%r13),%ymm10
+	vpaddq	%ymm14,%ymm1,%ymm1
+	vpmuludq	%ymm0,%ymm13,%ymm13
+	vpmuludq	%ymm12,%ymm11,%ymm11
+.byte	0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff
+	vpaddq	%ymm1,%ymm13,%ymm13
+	vpaddq	%ymm11,%ymm2,%ymm2
+	vpmuludq	%ymm12,%ymm10,%ymm10
+	vmovdqu	160-16-128(%r13),%ymm11
+.byte	0x67
+	vmovq	%xmm13,%rax
+	vmovdqu	%ymm13,(%rsp)
+	vpaddq	%ymm10,%ymm3,%ymm3
+	vpmuludq	%ymm12,%ymm14,%ymm14
+	vmovdqu	192-16-128(%r13),%ymm10
+	vpaddq	%ymm14,%ymm4,%ymm4
+	vpmuludq	%ymm12,%ymm11,%ymm11
+	vmovdqu	224-16-128(%r13),%ymm14
+	vpaddq	%ymm11,%ymm5,%ymm5
+	vpmuludq	%ymm12,%ymm10,%ymm10
+	vmovdqu	256-16-128(%r13),%ymm11
+	vpaddq	%ymm10,%ymm6,%ymm6
+	vpmuludq	%ymm12,%ymm14,%ymm14
+	shrq	$29,%r12
+	vmovdqu	288-16-128(%r13),%ymm10
+	addq	%r12,%rax
+	vpaddq	%ymm14,%ymm7,%ymm7
+	vpmuludq	%ymm12,%ymm11,%ymm11
+
+	movq	%rax,%r9
+	imull	%ecx,%eax
+	vpaddq	%ymm11,%ymm8,%ymm8
+	vpmuludq	%ymm12,%ymm10,%ymm10
+	andl	$0x1fffffff,%eax
+	vmovd	%eax,%xmm12
+	vmovdqu	96-24-128(%r13),%ymm11
+.byte	0x67
+	vpaddq	%ymm10,%ymm9,%ymm9
+	vpbroadcastq	%xmm12,%ymm12
+
+	vpmuludq	64-24-128(%r13),%ymm0,%ymm14
+	vmovdqu	128-24-128(%r13),%ymm10
+	movq	%rax,%rdx
+	imulq	-128(%r13),%rax
+	movq	8(%rsp),%r10
+	vpaddq	%ymm14,%ymm2,%ymm1
+	vpmuludq	%ymm0,%ymm11,%ymm11
+	vmovdqu	160-24-128(%r13),%ymm14
+	addq	%rax,%r9
+	movq	%rdx,%rax
+	imulq	8-128(%r13),%rax
+.byte	0x67
+	shrq	$29,%r9
+	movq	16(%rsp),%r11
+	vpaddq	%ymm11,%ymm3,%ymm2
+	vpmuludq	%ymm0,%ymm10,%ymm10
+	vmovdqu	192-24-128(%r13),%ymm11
+	addq	%rax,%r10
+	movq	%rdx,%rax
+	imulq	16-128(%r13),%rax
+	vpaddq	%ymm10,%ymm4,%ymm3
+	vpmuludq	%ymm0,%ymm14,%ymm14
+	vmovdqu	224-24-128(%r13),%ymm10
+	imulq	24-128(%r13),%rdx
+	addq	%rax,%r11
+	leaq	(%r9,%r10,1),%rax
+	vpaddq	%ymm14,%ymm5,%ymm4
+	vpmuludq	%ymm0,%ymm11,%ymm11
+	vmovdqu	256-24-128(%r13),%ymm14
+	movq	%rax,%r10
+	imull	%ecx,%eax
+	vpmuludq	%ymm0,%ymm10,%ymm10
+	vpaddq	%ymm11,%ymm6,%ymm5
+	vmovdqu	288-24-128(%r13),%ymm11
+	andl	$0x1fffffff,%eax
+	vpaddq	%ymm10,%ymm7,%ymm6
+	vpmuludq	%ymm0,%ymm14,%ymm14
+	addq	24(%rsp),%rdx
+	vpaddq	%ymm14,%ymm8,%ymm7
+	vpmuludq	%ymm0,%ymm11,%ymm11
+	vpaddq	%ymm11,%ymm9,%ymm8
+	vmovq	%r12,%xmm9
+	movq	%rdx,%r12
+
+	decl	%r14d
+	jnz	.LOOP_REDUCE_1024
+	leaq	448(%rsp),%r12
+	vpaddq	%ymm9,%ymm13,%ymm0
+	vpxor	%ymm9,%ymm9,%ymm9
+
+	vpaddq	288-192(%rbx),%ymm0,%ymm0
+	vpaddq	320-448(%r12),%ymm1,%ymm1
+	vpaddq	352-448(%r12),%ymm2,%ymm2
+	vpaddq	384-448(%r12),%ymm3,%ymm3
+	vpaddq	416-448(%r12),%ymm4,%ymm4
+	vpaddq	448-448(%r12),%ymm5,%ymm5
+	vpaddq	480-448(%r12),%ymm6,%ymm6
+	vpaddq	512-448(%r12),%ymm7,%ymm7
+	vpaddq	544-448(%r12),%ymm8,%ymm8
+
+	vpsrlq	$29,%ymm0,%ymm14
+	vpand	%ymm15,%ymm0,%ymm0
+	vpsrlq	$29,%ymm1,%ymm11
+	vpand	%ymm15,%ymm1,%ymm1
+	vpsrlq	$29,%ymm2,%ymm12
+	vpermq	$0x93,%ymm14,%ymm14
+	vpand	%ymm15,%ymm2,%ymm2
+	vpsrlq	$29,%ymm3,%ymm13
+	vpermq	$0x93,%ymm11,%ymm11
+	vpand	%ymm15,%ymm3,%ymm3
+	vpermq	$0x93,%ymm12,%ymm12
+
+	vpblendd	$3,%ymm9,%ymm14,%ymm10
+	vpermq	$0x93,%ymm13,%ymm13
+	vpblendd	$3,%ymm14,%ymm11,%ymm14
+	vpaddq	%ymm10,%ymm0,%ymm0
+	vpblendd	$3,%ymm11,%ymm12,%ymm11
+	vpaddq	%ymm14,%ymm1,%ymm1
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm11,%ymm2,%ymm2
+	vpblendd	$3,%ymm13,%ymm9,%ymm13
+	vpaddq	%ymm12,%ymm3,%ymm3
+	vpaddq	%ymm13,%ymm4,%ymm4
+
+	vpsrlq	$29,%ymm0,%ymm14
+	vpand	%ymm15,%ymm0,%ymm0
+	vpsrlq	$29,%ymm1,%ymm11
+	vpand	%ymm15,%ymm1,%ymm1
+	vpsrlq	$29,%ymm2,%ymm12
+	vpermq	$0x93,%ymm14,%ymm14
+	vpand	%ymm15,%ymm2,%ymm2
+	vpsrlq	$29,%ymm3,%ymm13
+	vpermq	$0x93,%ymm11,%ymm11
+	vpand	%ymm15,%ymm3,%ymm3
+	vpermq	$0x93,%ymm12,%ymm12
+
+	vpblendd	$3,%ymm9,%ymm14,%ymm10
+	vpermq	$0x93,%ymm13,%ymm13
+	vpblendd	$3,%ymm14,%ymm11,%ymm14
+	vpaddq	%ymm10,%ymm0,%ymm0
+	vpblendd	$3,%ymm11,%ymm12,%ymm11
+	vpaddq	%ymm14,%ymm1,%ymm1
+	vmovdqu	%ymm0,0-128(%rdi)
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm11,%ymm2,%ymm2
+	vmovdqu	%ymm1,32-128(%rdi)
+	vpblendd	$3,%ymm13,%ymm9,%ymm13
+	vpaddq	%ymm12,%ymm3,%ymm3
+	vmovdqu	%ymm2,64-128(%rdi)
+	vpaddq	%ymm13,%ymm4,%ymm4
+	vmovdqu	%ymm3,96-128(%rdi)
+	vpsrlq	$29,%ymm4,%ymm14
+	vpand	%ymm15,%ymm4,%ymm4
+	vpsrlq	$29,%ymm5,%ymm11
+	vpand	%ymm15,%ymm5,%ymm5
+	vpsrlq	$29,%ymm6,%ymm12
+	vpermq	$0x93,%ymm14,%ymm14
+	vpand	%ymm15,%ymm6,%ymm6
+	vpsrlq	$29,%ymm7,%ymm13
+	vpermq	$0x93,%ymm11,%ymm11
+	vpand	%ymm15,%ymm7,%ymm7
+	vpsrlq	$29,%ymm8,%ymm0
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm8,%ymm8
+	vpermq	$0x93,%ymm13,%ymm13
+
+	vpblendd	$3,%ymm9,%ymm14,%ymm10
+	vpermq	$0x93,%ymm0,%ymm0
+	vpblendd	$3,%ymm14,%ymm11,%ymm14
+	vpaddq	%ymm10,%ymm4,%ymm4
+	vpblendd	$3,%ymm11,%ymm12,%ymm11
+	vpaddq	%ymm14,%ymm5,%ymm5
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm11,%ymm6,%ymm6
+	vpblendd	$3,%ymm13,%ymm0,%ymm13
+	vpaddq	%ymm12,%ymm7,%ymm7
+	vpaddq	%ymm13,%ymm8,%ymm8
+
+	vpsrlq	$29,%ymm4,%ymm14
+	vpand	%ymm15,%ymm4,%ymm4
+	vpsrlq	$29,%ymm5,%ymm11
+	vpand	%ymm15,%ymm5,%ymm5
+	vpsrlq	$29,%ymm6,%ymm12
+	vpermq	$0x93,%ymm14,%ymm14
+	vpand	%ymm15,%ymm6,%ymm6
+	vpsrlq	$29,%ymm7,%ymm13
+	vpermq	$0x93,%ymm11,%ymm11
+	vpand	%ymm15,%ymm7,%ymm7
+	vpsrlq	$29,%ymm8,%ymm0
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm8,%ymm8
+	vpermq	$0x93,%ymm13,%ymm13
+
+	vpblendd	$3,%ymm9,%ymm14,%ymm10
+	vpermq	$0x93,%ymm0,%ymm0
+	vpblendd	$3,%ymm14,%ymm11,%ymm14
+	vpaddq	%ymm10,%ymm4,%ymm4
+	vpblendd	$3,%ymm11,%ymm12,%ymm11
+	vpaddq	%ymm14,%ymm5,%ymm5
+	vmovdqu	%ymm4,128-128(%rdi)
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm11,%ymm6,%ymm6
+	vmovdqu	%ymm5,160-128(%rdi)
+	vpblendd	$3,%ymm13,%ymm0,%ymm13
+	vpaddq	%ymm12,%ymm7,%ymm7
+	vmovdqu	%ymm6,192-128(%rdi)
+	vpaddq	%ymm13,%ymm8,%ymm8
+	vmovdqu	%ymm7,224-128(%rdi)
+	vmovdqu	%ymm8,256-128(%rdi)
+
+	movq	%rdi,%rsi
+	decl	%r8d
+	jne	.LOOP_GRANDE_SQR_1024
+
+	vzeroall
+	movq	%rbp,%rax
+.cfi_def_cfa_register	%rax
+	movq	-48(%rax),%r15
+.cfi_restore	%r15
+	movq	-40(%rax),%r14
+.cfi_restore	%r14
+	movq	-32(%rax),%r13
+.cfi_restore	%r13
+	movq	-24(%rax),%r12
+.cfi_restore	%r12
+	movq	-16(%rax),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rax),%rbx
+.cfi_restore	%rbx
+	leaq	(%rax),%rsp
+.cfi_def_cfa_register	%rsp
+.Lsqr_1024_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
+.globl	rsaz_1024_mul_avx2
+.hidden rsaz_1024_mul_avx2
+.type	rsaz_1024_mul_avx2,@function
+.align	64
+rsaz_1024_mul_avx2:
+.cfi_startproc	
+	leaq	(%rsp),%rax
+.cfi_def_cfa_register	%rax
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+	movq	%rax,%rbp
+.cfi_def_cfa_register	%rbp
+	vzeroall
+	movq	%rdx,%r13
+	subq	$64,%rsp
+
+
+
+
+
+
+.byte	0x67,0x67
+	movq	%rsi,%r15
+	andq	$4095,%r15
+	addq	$320,%r15
+	shrq	$12,%r15
+	movq	%rsi,%r15
+	cmovnzq	%r13,%rsi
+	cmovnzq	%r15,%r13
+
+	movq	%rcx,%r15
+	subq	$-128,%rsi
+	subq	$-128,%rcx
+	subq	$-128,%rdi
+
+	andq	$4095,%r15
+	addq	$320,%r15
+.byte	0x67,0x67
+	shrq	$12,%r15
+	jz	.Lmul_1024_no_n_copy
+
+
+
+
+
+	subq	$320,%rsp
+	vmovdqu	0-128(%rcx),%ymm0
+	andq	$-512,%rsp
+	vmovdqu	32-128(%rcx),%ymm1
+	vmovdqu	64-128(%rcx),%ymm2
+	vmovdqu	96-128(%rcx),%ymm3
+	vmovdqu	128-128(%rcx),%ymm4
+	vmovdqu	160-128(%rcx),%ymm5
+	vmovdqu	192-128(%rcx),%ymm6
+	vmovdqu	224-128(%rcx),%ymm7
+	vmovdqu	256-128(%rcx),%ymm8
+	leaq	64+128(%rsp),%rcx
+	vmovdqu	%ymm0,0-128(%rcx)
+	vpxor	%ymm0,%ymm0,%ymm0
+	vmovdqu	%ymm1,32-128(%rcx)
+	vpxor	%ymm1,%ymm1,%ymm1
+	vmovdqu	%ymm2,64-128(%rcx)
+	vpxor	%ymm2,%ymm2,%ymm2
+	vmovdqu	%ymm3,96-128(%rcx)
+	vpxor	%ymm3,%ymm3,%ymm3
+	vmovdqu	%ymm4,128-128(%rcx)
+	vpxor	%ymm4,%ymm4,%ymm4
+	vmovdqu	%ymm5,160-128(%rcx)
+	vpxor	%ymm5,%ymm5,%ymm5
+	vmovdqu	%ymm6,192-128(%rcx)
+	vpxor	%ymm6,%ymm6,%ymm6
+	vmovdqu	%ymm7,224-128(%rcx)
+	vpxor	%ymm7,%ymm7,%ymm7
+	vmovdqu	%ymm8,256-128(%rcx)
+	vmovdqa	%ymm0,%ymm8
+	vmovdqu	%ymm9,288-128(%rcx)
+.Lmul_1024_no_n_copy:
+	andq	$-64,%rsp
+
+	movq	(%r13),%rbx
+	vpbroadcastq	(%r13),%ymm10
+	vmovdqu	%ymm0,(%rsp)
+	xorq	%r9,%r9
+.byte	0x67
+	xorq	%r10,%r10
+	xorq	%r11,%r11
+	xorq	%r12,%r12
+
+	vmovdqu	.Land_mask(%rip),%ymm15
+	movl	$9,%r14d
+	vmovdqu	%ymm9,288-128(%rdi)
+	jmp	.Loop_mul_1024
+
+.align	32
+.Loop_mul_1024:
+	vpsrlq	$29,%ymm3,%ymm9
+	movq	%rbx,%rax
+	imulq	-128(%rsi),%rax
+	addq	%r9,%rax
+	movq	%rbx,%r10
+	imulq	8-128(%rsi),%r10
+	addq	8(%rsp),%r10
+
+	movq	%rax,%r9
+	imull	%r8d,%eax
+	andl	$0x1fffffff,%eax
+
+	movq	%rbx,%r11
+	imulq	16-128(%rsi),%r11
+	addq	16(%rsp),%r11
+
+	movq	%rbx,%r12
+	imulq	24-128(%rsi),%r12
+	addq	24(%rsp),%r12
+	vpmuludq	32-128(%rsi),%ymm10,%ymm0
+	vmovd	%eax,%xmm11
+	vpaddq	%ymm0,%ymm1,%ymm1
+	vpmuludq	64-128(%rsi),%ymm10,%ymm12
+	vpbroadcastq	%xmm11,%ymm11
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	96-128(%rsi),%ymm10,%ymm13
+	vpand	%ymm15,%ymm3,%ymm3
+	vpaddq	%ymm13,%ymm3,%ymm3
+	vpmuludq	128-128(%rsi),%ymm10,%ymm0
+	vpaddq	%ymm0,%ymm4,%ymm4
+	vpmuludq	160-128(%rsi),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	192-128(%rsi),%ymm10,%ymm13
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpmuludq	224-128(%rsi),%ymm10,%ymm0
+	vpermq	$0x93,%ymm9,%ymm9
+	vpaddq	%ymm0,%ymm7,%ymm7
+	vpmuludq	256-128(%rsi),%ymm10,%ymm12
+	vpbroadcastq	8(%r13),%ymm10
+	vpaddq	%ymm12,%ymm8,%ymm8
+
+	movq	%rax,%rdx
+	imulq	-128(%rcx),%rax
+	addq	%rax,%r9
+	movq	%rdx,%rax
+	imulq	8-128(%rcx),%rax
+	addq	%rax,%r10
+	movq	%rdx,%rax
+	imulq	16-128(%rcx),%rax
+	addq	%rax,%r11
+	shrq	$29,%r9
+	imulq	24-128(%rcx),%rdx
+	addq	%rdx,%r12
+	addq	%r9,%r10
+
+	vpmuludq	32-128(%rcx),%ymm11,%ymm13
+	vmovq	%xmm10,%rbx
+	vpaddq	%ymm13,%ymm1,%ymm1
+	vpmuludq	64-128(%rcx),%ymm11,%ymm0
+	vpaddq	%ymm0,%ymm2,%ymm2
+	vpmuludq	96-128(%rcx),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm3,%ymm3
+	vpmuludq	128-128(%rcx),%ymm11,%ymm13
+	vpaddq	%ymm13,%ymm4,%ymm4
+	vpmuludq	160-128(%rcx),%ymm11,%ymm0
+	vpaddq	%ymm0,%ymm5,%ymm5
+	vpmuludq	192-128(%rcx),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm6,%ymm6
+	vpmuludq	224-128(%rcx),%ymm11,%ymm13
+	vpblendd	$3,%ymm14,%ymm9,%ymm12
+	vpaddq	%ymm13,%ymm7,%ymm7
+	vpmuludq	256-128(%rcx),%ymm11,%ymm0
+	vpaddq	%ymm12,%ymm3,%ymm3
+	vpaddq	%ymm0,%ymm8,%ymm8
+
+	movq	%rbx,%rax
+	imulq	-128(%rsi),%rax
+	addq	%rax,%r10
+	vmovdqu	-8+32-128(%rsi),%ymm12
+	movq	%rbx,%rax
+	imulq	8-128(%rsi),%rax
+	addq	%rax,%r11
+	vmovdqu	-8+64-128(%rsi),%ymm13
+
+	movq	%r10,%rax
+	vpblendd	$0xfc,%ymm14,%ymm9,%ymm9
+	imull	%r8d,%eax
+	vpaddq	%ymm9,%ymm4,%ymm4
+	andl	$0x1fffffff,%eax
+
+	imulq	16-128(%rsi),%rbx
+	addq	%rbx,%r12
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vmovd	%eax,%xmm11
+	vmovdqu	-8+96-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm1,%ymm1
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vpbroadcastq	%xmm11,%ymm11
+	vmovdqu	-8+128-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm2,%ymm2
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-8+160-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm3,%ymm3
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vmovdqu	-8+192-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm4,%ymm4
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vmovdqu	-8+224-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm5,%ymm5
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-8+256-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm6,%ymm6
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vmovdqu	-8+288-128(%rsi),%ymm9
+	vpaddq	%ymm12,%ymm7,%ymm7
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vpaddq	%ymm13,%ymm8,%ymm8
+	vpmuludq	%ymm10,%ymm9,%ymm9
+	vpbroadcastq	16(%r13),%ymm10
+
+	movq	%rax,%rdx
+	imulq	-128(%rcx),%rax
+	addq	%rax,%r10
+	vmovdqu	-8+32-128(%rcx),%ymm0
+	movq	%rdx,%rax
+	imulq	8-128(%rcx),%rax
+	addq	%rax,%r11
+	vmovdqu	-8+64-128(%rcx),%ymm12
+	shrq	$29,%r10
+	imulq	16-128(%rcx),%rdx
+	addq	%rdx,%r12
+	addq	%r10,%r11
+
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovq	%xmm10,%rbx
+	vmovdqu	-8+96-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm1,%ymm1
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-8+128-128(%rcx),%ymm0
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-8+160-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm3,%ymm3
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-8+192-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm4,%ymm4
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-8+224-128(%rcx),%ymm0
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-8+256-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-8+288-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm7,%ymm7
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vpaddq	%ymm13,%ymm9,%ymm9
+
+	vmovdqu	-16+32-128(%rsi),%ymm0
+	movq	%rbx,%rax
+	imulq	-128(%rsi),%rax
+	addq	%r11,%rax
+
+	vmovdqu	-16+64-128(%rsi),%ymm12
+	movq	%rax,%r11
+	imull	%r8d,%eax
+	andl	$0x1fffffff,%eax
+
+	imulq	8-128(%rsi),%rbx
+	addq	%rbx,%r12
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovd	%eax,%xmm11
+	vmovdqu	-16+96-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm1,%ymm1
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vpbroadcastq	%xmm11,%ymm11
+	vmovdqu	-16+128-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vmovdqu	-16+160-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm3,%ymm3
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-16+192-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm4,%ymm4
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vmovdqu	-16+224-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vmovdqu	-16+256-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-16+288-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm7,%ymm7
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vpbroadcastq	24(%r13),%ymm10
+	vpaddq	%ymm13,%ymm9,%ymm9
+
+	vmovdqu	-16+32-128(%rcx),%ymm0
+	movq	%rax,%rdx
+	imulq	-128(%rcx),%rax
+	addq	%rax,%r11
+	vmovdqu	-16+64-128(%rcx),%ymm12
+	imulq	8-128(%rcx),%rdx
+	addq	%rdx,%r12
+	shrq	$29,%r11
+
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovq	%xmm10,%rbx
+	vmovdqu	-16+96-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm1,%ymm1
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-16+128-128(%rcx),%ymm0
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-16+160-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm3,%ymm3
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-16+192-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm4,%ymm4
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-16+224-128(%rcx),%ymm0
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-16+256-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-16+288-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm7,%ymm7
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-24+32-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-24+64-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm9,%ymm9
+
+	addq	%r11,%r12
+	imulq	-128(%rsi),%rbx
+	addq	%rbx,%r12
+
+	movq	%r12,%rax
+	imull	%r8d,%eax
+	andl	$0x1fffffff,%eax
+
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovd	%eax,%xmm11
+	vmovdqu	-24+96-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm1,%ymm1
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vpbroadcastq	%xmm11,%ymm11
+	vmovdqu	-24+128-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vmovdqu	-24+160-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm3,%ymm3
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-24+192-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm4,%ymm4
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vmovdqu	-24+224-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vmovdqu	-24+256-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-24+288-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm7,%ymm7
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vpbroadcastq	32(%r13),%ymm10
+	vpaddq	%ymm13,%ymm9,%ymm9
+	addq	$32,%r13
+
+	vmovdqu	-24+32-128(%rcx),%ymm0
+	imulq	-128(%rcx),%rax
+	addq	%rax,%r12
+	shrq	$29,%r12
+
+	vmovdqu	-24+64-128(%rcx),%ymm12
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovq	%xmm10,%rbx
+	vmovdqu	-24+96-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm1,%ymm0
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	%ymm0,(%rsp)
+	vpaddq	%ymm12,%ymm2,%ymm1
+	vmovdqu	-24+128-128(%rcx),%ymm0
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-24+160-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm3,%ymm2
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-24+192-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm4,%ymm3
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-24+224-128(%rcx),%ymm0
+	vpaddq	%ymm12,%ymm5,%ymm4
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-24+256-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm6,%ymm5
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-24+288-128(%rcx),%ymm13
+	movq	%r12,%r9
+	vpaddq	%ymm0,%ymm7,%ymm6
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	addq	(%rsp),%r9
+	vpaddq	%ymm12,%ymm8,%ymm7
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovq	%r12,%xmm12
+	vpaddq	%ymm13,%ymm9,%ymm8
+
+	decl	%r14d
+	jnz	.Loop_mul_1024
+	vpaddq	(%rsp),%ymm12,%ymm0
+
+	vpsrlq	$29,%ymm0,%ymm12
+	vpand	%ymm15,%ymm0,%ymm0
+	vpsrlq	$29,%ymm1,%ymm13
+	vpand	%ymm15,%ymm1,%ymm1
+	vpsrlq	$29,%ymm2,%ymm10
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm2,%ymm2
+	vpsrlq	$29,%ymm3,%ymm11
+	vpermq	$0x93,%ymm13,%ymm13
+	vpand	%ymm15,%ymm3,%ymm3
+
+	vpblendd	$3,%ymm14,%ymm12,%ymm9
+	vpermq	$0x93,%ymm10,%ymm10
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpermq	$0x93,%ymm11,%ymm11
+	vpaddq	%ymm9,%ymm0,%ymm0
+	vpblendd	$3,%ymm13,%ymm10,%ymm13
+	vpaddq	%ymm12,%ymm1,%ymm1
+	vpblendd	$3,%ymm10,%ymm11,%ymm10
+	vpaddq	%ymm13,%ymm2,%ymm2
+	vpblendd	$3,%ymm11,%ymm14,%ymm11
+	vpaddq	%ymm10,%ymm3,%ymm3
+	vpaddq	%ymm11,%ymm4,%ymm4
+
+	vpsrlq	$29,%ymm0,%ymm12
+	vpand	%ymm15,%ymm0,%ymm0
+	vpsrlq	$29,%ymm1,%ymm13
+	vpand	%ymm15,%ymm1,%ymm1
+	vpsrlq	$29,%ymm2,%ymm10
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm2,%ymm2
+	vpsrlq	$29,%ymm3,%ymm11
+	vpermq	$0x93,%ymm13,%ymm13
+	vpand	%ymm15,%ymm3,%ymm3
+	vpermq	$0x93,%ymm10,%ymm10
+
+	vpblendd	$3,%ymm14,%ymm12,%ymm9
+	vpermq	$0x93,%ymm11,%ymm11
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm9,%ymm0,%ymm0
+	vpblendd	$3,%ymm13,%ymm10,%ymm13
+	vpaddq	%ymm12,%ymm1,%ymm1
+	vpblendd	$3,%ymm10,%ymm11,%ymm10
+	vpaddq	%ymm13,%ymm2,%ymm2
+	vpblendd	$3,%ymm11,%ymm14,%ymm11
+	vpaddq	%ymm10,%ymm3,%ymm3
+	vpaddq	%ymm11,%ymm4,%ymm4
+
+	vmovdqu	%ymm0,0-128(%rdi)
+	vmovdqu	%ymm1,32-128(%rdi)
+	vmovdqu	%ymm2,64-128(%rdi)
+	vmovdqu	%ymm3,96-128(%rdi)
+	vpsrlq	$29,%ymm4,%ymm12
+	vpand	%ymm15,%ymm4,%ymm4
+	vpsrlq	$29,%ymm5,%ymm13
+	vpand	%ymm15,%ymm5,%ymm5
+	vpsrlq	$29,%ymm6,%ymm10
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm6,%ymm6
+	vpsrlq	$29,%ymm7,%ymm11
+	vpermq	$0x93,%ymm13,%ymm13
+	vpand	%ymm15,%ymm7,%ymm7
+	vpsrlq	$29,%ymm8,%ymm0
+	vpermq	$0x93,%ymm10,%ymm10
+	vpand	%ymm15,%ymm8,%ymm8
+	vpermq	$0x93,%ymm11,%ymm11
+
+	vpblendd	$3,%ymm14,%ymm12,%ymm9
+	vpermq	$0x93,%ymm0,%ymm0
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm9,%ymm4,%ymm4
+	vpblendd	$3,%ymm13,%ymm10,%ymm13
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpblendd	$3,%ymm10,%ymm11,%ymm10
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpblendd	$3,%ymm11,%ymm0,%ymm11
+	vpaddq	%ymm10,%ymm7,%ymm7
+	vpaddq	%ymm11,%ymm8,%ymm8
+
+	vpsrlq	$29,%ymm4,%ymm12
+	vpand	%ymm15,%ymm4,%ymm4
+	vpsrlq	$29,%ymm5,%ymm13
+	vpand	%ymm15,%ymm5,%ymm5
+	vpsrlq	$29,%ymm6,%ymm10
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm6,%ymm6
+	vpsrlq	$29,%ymm7,%ymm11
+	vpermq	$0x93,%ymm13,%ymm13
+	vpand	%ymm15,%ymm7,%ymm7
+	vpsrlq	$29,%ymm8,%ymm0
+	vpermq	$0x93,%ymm10,%ymm10
+	vpand	%ymm15,%ymm8,%ymm8
+	vpermq	$0x93,%ymm11,%ymm11
+
+	vpblendd	$3,%ymm14,%ymm12,%ymm9
+	vpermq	$0x93,%ymm0,%ymm0
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm9,%ymm4,%ymm4
+	vpblendd	$3,%ymm13,%ymm10,%ymm13
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpblendd	$3,%ymm10,%ymm11,%ymm10
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpblendd	$3,%ymm11,%ymm0,%ymm11
+	vpaddq	%ymm10,%ymm7,%ymm7
+	vpaddq	%ymm11,%ymm8,%ymm8
+
+	vmovdqu	%ymm4,128-128(%rdi)
+	vmovdqu	%ymm5,160-128(%rdi)
+	vmovdqu	%ymm6,192-128(%rdi)
+	vmovdqu	%ymm7,224-128(%rdi)
+	vmovdqu	%ymm8,256-128(%rdi)
+	vzeroupper
+
+	movq	%rbp,%rax
+.cfi_def_cfa_register	%rax
+	movq	-48(%rax),%r15
+.cfi_restore	%r15
+	movq	-40(%rax),%r14
+.cfi_restore	%r14
+	movq	-32(%rax),%r13
+.cfi_restore	%r13
+	movq	-24(%rax),%r12
+.cfi_restore	%r12
+	movq	-16(%rax),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rax),%rbx
+.cfi_restore	%rbx
+	leaq	(%rax),%rsp
+.cfi_def_cfa_register	%rsp
+.Lmul_1024_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
+.globl	rsaz_1024_red2norm_avx2
+.hidden rsaz_1024_red2norm_avx2
+.type	rsaz_1024_red2norm_avx2,@function
+.align	32
+rsaz_1024_red2norm_avx2:
+	subq	$-128,%rsi
+	xorq	%rax,%rax
+	movq	-128(%rsi),%r8
+	movq	-120(%rsi),%r9
+	movq	-112(%rsi),%r10
+	shlq	$0,%r8
+	shlq	$29,%r9
+	movq	%r10,%r11
+	shlq	$58,%r10
+	shrq	$6,%r11
+	addq	%r8,%rax
+	addq	%r9,%rax
+	addq	%r10,%rax
+	adcq	$0,%r11
+	movq	%rax,0(%rdi)
+	movq	%r11,%rax
+	movq	-104(%rsi),%r8
+	movq	-96(%rsi),%r9
+	shlq	$23,%r8
+	movq	%r9,%r10
+	shlq	$52,%r9
+	shrq	$12,%r10
+	addq	%r8,%rax
+	addq	%r9,%rax
+	adcq	$0,%r10
+	movq	%rax,8(%rdi)
+	movq	%r10,%rax
+	movq	-88(%rsi),%r11
+	movq	-80(%rsi),%r8
+	shlq	$17,%r11
+	movq	%r8,%r9
+	shlq	$46,%r8
+	shrq	$18,%r9
+	addq	%r11,%rax
+	addq	%r8,%rax
+	adcq	$0,%r9
+	movq	%rax,16(%rdi)
+	movq	%r9,%rax
+	movq	-72(%rsi),%r10
+	movq	-64(%rsi),%r11
+	shlq	$11,%r10
+	movq	%r11,%r8
+	shlq	$40,%r11
+	shrq	$24,%r8
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,24(%rdi)
+	movq	%r8,%rax
+	movq	-56(%rsi),%r9
+	movq	-48(%rsi),%r10
+	movq	-40(%rsi),%r11
+	shlq	$5,%r9
+	shlq	$34,%r10
+	movq	%r11,%r8
+	shlq	$63,%r11
+	shrq	$1,%r8
+	addq	%r9,%rax
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,32(%rdi)
+	movq	%r8,%rax
+	movq	-32(%rsi),%r9
+	movq	-24(%rsi),%r10
+	shlq	$28,%r9
+	movq	%r10,%r11
+	shlq	$57,%r10
+	shrq	$7,%r11
+	addq	%r9,%rax
+	addq	%r10,%rax
+	adcq	$0,%r11
+	movq	%rax,40(%rdi)
+	movq	%r11,%rax
+	movq	-16(%rsi),%r8
+	movq	-8(%rsi),%r9
+	shlq	$22,%r8
+	movq	%r9,%r10
+	shlq	$51,%r9
+	shrq	$13,%r10
+	addq	%r8,%rax
+	addq	%r9,%rax
+	adcq	$0,%r10
+	movq	%rax,48(%rdi)
+	movq	%r10,%rax
+	movq	0(%rsi),%r11
+	movq	8(%rsi),%r8
+	shlq	$16,%r11
+	movq	%r8,%r9
+	shlq	$45,%r8
+	shrq	$19,%r9
+	addq	%r11,%rax
+	addq	%r8,%rax
+	adcq	$0,%r9
+	movq	%rax,56(%rdi)
+	movq	%r9,%rax
+	movq	16(%rsi),%r10
+	movq	24(%rsi),%r11
+	shlq	$10,%r10
+	movq	%r11,%r8
+	shlq	$39,%r11
+	shrq	$25,%r8
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,64(%rdi)
+	movq	%r8,%rax
+	movq	32(%rsi),%r9
+	movq	40(%rsi),%r10
+	movq	48(%rsi),%r11
+	shlq	$4,%r9
+	shlq	$33,%r10
+	movq	%r11,%r8
+	shlq	$62,%r11
+	shrq	$2,%r8
+	addq	%r9,%rax
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,72(%rdi)
+	movq	%r8,%rax
+	movq	56(%rsi),%r9
+	movq	64(%rsi),%r10
+	shlq	$27,%r9
+	movq	%r10,%r11
+	shlq	$56,%r10
+	shrq	$8,%r11
+	addq	%r9,%rax
+	addq	%r10,%rax
+	adcq	$0,%r11
+	movq	%rax,80(%rdi)
+	movq	%r11,%rax
+	movq	72(%rsi),%r8
+	movq	80(%rsi),%r9
+	shlq	$21,%r8
+	movq	%r9,%r10
+	shlq	$50,%r9
+	shrq	$14,%r10
+	addq	%r8,%rax
+	addq	%r9,%rax
+	adcq	$0,%r10
+	movq	%rax,88(%rdi)
+	movq	%r10,%rax
+	movq	88(%rsi),%r11
+	movq	96(%rsi),%r8
+	shlq	$15,%r11
+	movq	%r8,%r9
+	shlq	$44,%r8
+	shrq	$20,%r9
+	addq	%r11,%rax
+	addq	%r8,%rax
+	adcq	$0,%r9
+	movq	%rax,96(%rdi)
+	movq	%r9,%rax
+	movq	104(%rsi),%r10
+	movq	112(%rsi),%r11
+	shlq	$9,%r10
+	movq	%r11,%r8
+	shlq	$38,%r11
+	shrq	$26,%r8
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,104(%rdi)
+	movq	%r8,%rax
+	movq	120(%rsi),%r9
+	movq	128(%rsi),%r10
+	movq	136(%rsi),%r11
+	shlq	$3,%r9
+	shlq	$32,%r10
+	movq	%r11,%r8
+	shlq	$61,%r11
+	shrq	$3,%r8
+	addq	%r9,%rax
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,112(%rdi)
+	movq	%r8,%rax
+	movq	144(%rsi),%r9
+	movq	152(%rsi),%r10
+	shlq	$26,%r9
+	movq	%r10,%r11
+	shlq	$55,%r10
+	shrq	$9,%r11
+	addq	%r9,%rax
+	addq	%r10,%rax
+	adcq	$0,%r11
+	movq	%rax,120(%rdi)
+	movq	%r11,%rax
+	.byte	0xf3,0xc3
+.size	rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2
+
+.globl	rsaz_1024_norm2red_avx2
+.hidden rsaz_1024_norm2red_avx2
+.type	rsaz_1024_norm2red_avx2,@function
+.align	32
+rsaz_1024_norm2red_avx2:
+	subq	$-128,%rdi
+	movq	(%rsi),%r8
+	movl	$0x1fffffff,%eax
+	movq	8(%rsi),%r9
+	movq	%r8,%r11
+	shrq	$0,%r11
+	andq	%rax,%r11
+	movq	%r11,-128(%rdi)
+	movq	%r8,%r10
+	shrq	$29,%r10
+	andq	%rax,%r10
+	movq	%r10,-120(%rdi)
+	shrdq	$58,%r9,%r8
+	andq	%rax,%r8
+	movq	%r8,-112(%rdi)
+	movq	16(%rsi),%r10
+	movq	%r9,%r8
+	shrq	$23,%r8
+	andq	%rax,%r8
+	movq	%r8,-104(%rdi)
+	shrdq	$52,%r10,%r9
+	andq	%rax,%r9
+	movq	%r9,-96(%rdi)
+	movq	24(%rsi),%r11
+	movq	%r10,%r9
+	shrq	$17,%r9
+	andq	%rax,%r9
+	movq	%r9,-88(%rdi)
+	shrdq	$46,%r11,%r10
+	andq	%rax,%r10
+	movq	%r10,-80(%rdi)
+	movq	32(%rsi),%r8
+	movq	%r11,%r10
+	shrq	$11,%r10
+	andq	%rax,%r10
+	movq	%r10,-72(%rdi)
+	shrdq	$40,%r8,%r11
+	andq	%rax,%r11
+	movq	%r11,-64(%rdi)
+	movq	40(%rsi),%r9
+	movq	%r8,%r11
+	shrq	$5,%r11
+	andq	%rax,%r11
+	movq	%r11,-56(%rdi)
+	movq	%r8,%r10
+	shrq	$34,%r10
+	andq	%rax,%r10
+	movq	%r10,-48(%rdi)
+	shrdq	$63,%r9,%r8
+	andq	%rax,%r8
+	movq	%r8,-40(%rdi)
+	movq	48(%rsi),%r10
+	movq	%r9,%r8
+	shrq	$28,%r8
+	andq	%rax,%r8
+	movq	%r8,-32(%rdi)
+	shrdq	$57,%r10,%r9
+	andq	%rax,%r9
+	movq	%r9,-24(%rdi)
+	movq	56(%rsi),%r11
+	movq	%r10,%r9
+	shrq	$22,%r9
+	andq	%rax,%r9
+	movq	%r9,-16(%rdi)
+	shrdq	$51,%r11,%r10
+	andq	%rax,%r10
+	movq	%r10,-8(%rdi)
+	movq	64(%rsi),%r8
+	movq	%r11,%r10
+	shrq	$16,%r10
+	andq	%rax,%r10
+	movq	%r10,0(%rdi)
+	shrdq	$45,%r8,%r11
+	andq	%rax,%r11
+	movq	%r11,8(%rdi)
+	movq	72(%rsi),%r9
+	movq	%r8,%r11
+	shrq	$10,%r11
+	andq	%rax,%r11
+	movq	%r11,16(%rdi)
+	shrdq	$39,%r9,%r8
+	andq	%rax,%r8
+	movq	%r8,24(%rdi)
+	movq	80(%rsi),%r10
+	movq	%r9,%r8
+	shrq	$4,%r8
+	andq	%rax,%r8
+	movq	%r8,32(%rdi)
+	movq	%r9,%r11
+	shrq	$33,%r11
+	andq	%rax,%r11
+	movq	%r11,40(%rdi)
+	shrdq	$62,%r10,%r9
+	andq	%rax,%r9
+	movq	%r9,48(%rdi)
+	movq	88(%rsi),%r11
+	movq	%r10,%r9
+	shrq	$27,%r9
+	andq	%rax,%r9
+	movq	%r9,56(%rdi)
+	shrdq	$56,%r11,%r10
+	andq	%rax,%r10
+	movq	%r10,64(%rdi)
+	movq	96(%rsi),%r8
+	movq	%r11,%r10
+	shrq	$21,%r10
+	andq	%rax,%r10
+	movq	%r10,72(%rdi)
+	shrdq	$50,%r8,%r11
+	andq	%rax,%r11
+	movq	%r11,80(%rdi)
+	movq	104(%rsi),%r9
+	movq	%r8,%r11
+	shrq	$15,%r11
+	andq	%rax,%r11
+	movq	%r11,88(%rdi)
+	shrdq	$44,%r9,%r8
+	andq	%rax,%r8
+	movq	%r8,96(%rdi)
+	movq	112(%rsi),%r10
+	movq	%r9,%r8
+	shrq	$9,%r8
+	andq	%rax,%r8
+	movq	%r8,104(%rdi)
+	shrdq	$38,%r10,%r9
+	andq	%rax,%r9
+	movq	%r9,112(%rdi)
+	movq	120(%rsi),%r11
+	movq	%r10,%r9
+	shrq	$3,%r9
+	andq	%rax,%r9
+	movq	%r9,120(%rdi)
+	movq	%r10,%r8
+	shrq	$32,%r8
+	andq	%rax,%r8
+	movq	%r8,128(%rdi)
+	shrdq	$61,%r11,%r10
+	andq	%rax,%r10
+	movq	%r10,136(%rdi)
+	xorq	%r8,%r8
+	movq	%r11,%r10
+	shrq	$26,%r10
+	andq	%rax,%r10
+	movq	%r10,144(%rdi)
+	shrdq	$55,%r8,%r11
+	andq	%rax,%r11
+	movq	%r11,152(%rdi)
+	movq	%r8,160(%rdi)
+	movq	%r8,168(%rdi)
+	movq	%r8,176(%rdi)
+	movq	%r8,184(%rdi)
+	.byte	0xf3,0xc3
+.size	rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2
+.globl	rsaz_1024_scatter5_avx2
+.hidden rsaz_1024_scatter5_avx2
+.type	rsaz_1024_scatter5_avx2,@function
+.align	32
+rsaz_1024_scatter5_avx2:
+	vzeroupper
+	vmovdqu	.Lscatter_permd(%rip),%ymm5
+	shll	$4,%edx
+	leaq	(%rdi,%rdx,1),%rdi
+	movl	$9,%eax
+	jmp	.Loop_scatter_1024
+
+.align	32
+.Loop_scatter_1024:
+	vmovdqu	(%rsi),%ymm0
+	leaq	32(%rsi),%rsi
+	vpermd	%ymm0,%ymm5,%ymm0
+	vmovdqu	%xmm0,(%rdi)
+	leaq	512(%rdi),%rdi
+	decl	%eax
+	jnz	.Loop_scatter_1024
+
+	vzeroupper
+	.byte	0xf3,0xc3
+.size	rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2
+
+.globl	rsaz_1024_gather5_avx2
+.hidden rsaz_1024_gather5_avx2
+.type	rsaz_1024_gather5_avx2,@function
+.align	32
+rsaz_1024_gather5_avx2:
+.cfi_startproc	
+	vzeroupper
+	movq	%rsp,%r11
+.cfi_def_cfa_register	%r11
+	leaq	-256(%rsp),%rsp
+	andq	$-32,%rsp
+	leaq	.Linc(%rip),%r10
+	leaq	-128(%rsp),%rax
+
+	vmovd	%edx,%xmm4
+	vmovdqa	(%r10),%ymm0
+	vmovdqa	32(%r10),%ymm1
+	vmovdqa	64(%r10),%ymm5
+	vpbroadcastd	%xmm4,%ymm4
+
+	vpaddd	%ymm5,%ymm0,%ymm2
+	vpcmpeqd	%ymm4,%ymm0,%ymm0
+	vpaddd	%ymm5,%ymm1,%ymm3
+	vpcmpeqd	%ymm4,%ymm1,%ymm1
+	vmovdqa	%ymm0,0+128(%rax)
+	vpaddd	%ymm5,%ymm2,%ymm0
+	vpcmpeqd	%ymm4,%ymm2,%ymm2
+	vmovdqa	%ymm1,32+128(%rax)
+	vpaddd	%ymm5,%ymm3,%ymm1
+	vpcmpeqd	%ymm4,%ymm3,%ymm3
+	vmovdqa	%ymm2,64+128(%rax)
+	vpaddd	%ymm5,%ymm0,%ymm2
+	vpcmpeqd	%ymm4,%ymm0,%ymm0
+	vmovdqa	%ymm3,96+128(%rax)
+	vpaddd	%ymm5,%ymm1,%ymm3
+	vpcmpeqd	%ymm4,%ymm1,%ymm1
+	vmovdqa	%ymm0,128+128(%rax)
+	vpaddd	%ymm5,%ymm2,%ymm8
+	vpcmpeqd	%ymm4,%ymm2,%ymm2
+	vmovdqa	%ymm1,160+128(%rax)
+	vpaddd	%ymm5,%ymm3,%ymm9
+	vpcmpeqd	%ymm4,%ymm3,%ymm3
+	vmovdqa	%ymm2,192+128(%rax)
+	vpaddd	%ymm5,%ymm8,%ymm10
+	vpcmpeqd	%ymm4,%ymm8,%ymm8
+	vmovdqa	%ymm3,224+128(%rax)
+	vpaddd	%ymm5,%ymm9,%ymm11
+	vpcmpeqd	%ymm4,%ymm9,%ymm9
+	vpaddd	%ymm5,%ymm10,%ymm12
+	vpcmpeqd	%ymm4,%ymm10,%ymm10
+	vpaddd	%ymm5,%ymm11,%ymm13
+	vpcmpeqd	%ymm4,%ymm11,%ymm11
+	vpaddd	%ymm5,%ymm12,%ymm14
+	vpcmpeqd	%ymm4,%ymm12,%ymm12
+	vpaddd	%ymm5,%ymm13,%ymm15
+	vpcmpeqd	%ymm4,%ymm13,%ymm13
+	vpcmpeqd	%ymm4,%ymm14,%ymm14
+	vpcmpeqd	%ymm4,%ymm15,%ymm15
+
+	vmovdqa	-32(%r10),%ymm7
+	leaq	128(%rsi),%rsi
+	movl	$9,%edx
+
+.Loop_gather_1024:
+	vmovdqa	0-128(%rsi),%ymm0
+	vmovdqa	32-128(%rsi),%ymm1
+	vmovdqa	64-128(%rsi),%ymm2
+	vmovdqa	96-128(%rsi),%ymm3
+	vpand	0+128(%rax),%ymm0,%ymm0
+	vpand	32+128(%rax),%ymm1,%ymm1
+	vpand	64+128(%rax),%ymm2,%ymm2
+	vpor	%ymm0,%ymm1,%ymm4
+	vpand	96+128(%rax),%ymm3,%ymm3
+	vmovdqa	128-128(%rsi),%ymm0
+	vmovdqa	160-128(%rsi),%ymm1
+	vpor	%ymm2,%ymm3,%ymm5
+	vmovdqa	192-128(%rsi),%ymm2
+	vmovdqa	224-128(%rsi),%ymm3
+	vpand	128+128(%rax),%ymm0,%ymm0
+	vpand	160+128(%rax),%ymm1,%ymm1
+	vpand	192+128(%rax),%ymm2,%ymm2
+	vpor	%ymm0,%ymm4,%ymm4
+	vpand	224+128(%rax),%ymm3,%ymm3
+	vpand	256-128(%rsi),%ymm8,%ymm0
+	vpor	%ymm1,%ymm5,%ymm5
+	vpand	288-128(%rsi),%ymm9,%ymm1
+	vpor	%ymm2,%ymm4,%ymm4
+	vpand	320-128(%rsi),%ymm10,%ymm2
+	vpor	%ymm3,%ymm5,%ymm5
+	vpand	352-128(%rsi),%ymm11,%ymm3
+	vpor	%ymm0,%ymm4,%ymm4
+	vpand	384-128(%rsi),%ymm12,%ymm0
+	vpor	%ymm1,%ymm5,%ymm5
+	vpand	416-128(%rsi),%ymm13,%ymm1
+	vpor	%ymm2,%ymm4,%ymm4
+	vpand	448-128(%rsi),%ymm14,%ymm2
+	vpor	%ymm3,%ymm5,%ymm5
+	vpand	480-128(%rsi),%ymm15,%ymm3
+	leaq	512(%rsi),%rsi
+	vpor	%ymm0,%ymm4,%ymm4
+	vpor	%ymm1,%ymm5,%ymm5
+	vpor	%ymm2,%ymm4,%ymm4
+	vpor	%ymm3,%ymm5,%ymm5
+
+	vpor	%ymm5,%ymm4,%ymm4
+	vextracti128	$1,%ymm4,%xmm5
+	vpor	%xmm4,%xmm5,%xmm5
+	vpermd	%ymm5,%ymm7,%ymm5
+	vmovdqu	%ymm5,(%rdi)
+	leaq	32(%rdi),%rdi
+	decl	%edx
+	jnz	.Loop_gather_1024
+
+	vpxor	%ymm0,%ymm0,%ymm0
+	vmovdqu	%ymm0,(%rdi)
+	vzeroupper
+	leaq	(%r11),%rsp
+.cfi_def_cfa_register	%rsp
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.LSEH_end_rsaz_1024_gather5:
+.size	rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+.globl	rsaz_avx2_eligible
+.hidden rsaz_avx2_eligible
+.type	rsaz_avx2_eligible,@function
+.align	32
+rsaz_avx2_eligible:
+	leaq	OPENSSL_ia32cap_P(%rip),%rax
+	movl	8(%rax),%eax
+	andl	$32,%eax
+	shrl	$5,%eax
+	.byte	0xf3,0xc3
+.size	rsaz_avx2_eligible,.-rsaz_avx2_eligible
+
+.align	64
+.Land_mask:
+.quad	0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
+.Lscatter_permd:
+.long	0,2,4,6,7,7,7,7
+.Lgather_permd:
+.long	0,7,1,7,2,7,3,7
+.Linc:
+.long	0,0,0,0, 1,1,1,1
+.long	2,2,2,2, 3,3,3,3
+.long	4,4,4,4, 4,4,4,4
+.align	64
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
new file mode 100644
index 0000000..7f924dc
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
@@ -0,0 +1,3544 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+
+.globl	sha1_block_data_order
+.hidden sha1_block_data_order
+.type	sha1_block_data_order,@function
+.align	16
+sha1_block_data_order:
+	leaq	OPENSSL_ia32cap_P(%rip),%r10
+	movl	0(%r10),%r9d
+	movl	4(%r10),%r8d
+	movl	8(%r10),%r10d
+	testl	$512,%r8d
+	jz	.Lialu
+	andl	$268435456,%r8d
+	andl	$1073741824,%r9d
+	orl	%r9d,%r8d
+	cmpl	$1342177280,%r8d
+	je	_avx_shortcut
+	jmp	_ssse3_shortcut
+
+.align	16
+.Lialu:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	movq	%rdi,%r8
+	subq	$72,%rsp
+	movq	%rsi,%r9
+	andq	$-64,%rsp
+	movq	%rdx,%r10
+	movq	%rax,64(%rsp)
+.Lprologue:
+
+	movl	0(%r8),%esi
+	movl	4(%r8),%edi
+	movl	8(%r8),%r11d
+	movl	12(%r8),%r12d
+	movl	16(%r8),%r13d
+	jmp	.Lloop
+
+.align	16
+.Lloop:
+	movl	0(%r9),%edx
+	bswapl	%edx
+	movl	4(%r9),%ebp
+	movl	%r12d,%eax
+	movl	%edx,0(%rsp)
+	movl	%esi,%ecx
+	bswapl	%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	andl	%edi,%eax
+	leal	1518500249(%rdx,%r13,1),%r13d
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	8(%r9),%r14d
+	movl	%r11d,%eax
+	movl	%ebp,4(%rsp)
+	movl	%r13d,%ecx
+	bswapl	%r14d
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	andl	%esi,%eax
+	leal	1518500249(%rbp,%r12,1),%r12d
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	12(%r9),%edx
+	movl	%edi,%eax
+	movl	%r14d,8(%rsp)
+	movl	%r12d,%ecx
+	bswapl	%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	andl	%r13d,%eax
+	leal	1518500249(%r14,%r11,1),%r11d
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	16(%r9),%ebp
+	movl	%esi,%eax
+	movl	%edx,12(%rsp)
+	movl	%r11d,%ecx
+	bswapl	%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	andl	%r12d,%eax
+	leal	1518500249(%rdx,%rdi,1),%edi
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	20(%r9),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,16(%rsp)
+	movl	%edi,%ecx
+	bswapl	%r14d
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	andl	%r11d,%eax
+	leal	1518500249(%rbp,%rsi,1),%esi
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	24(%r9),%edx
+	movl	%r12d,%eax
+	movl	%r14d,20(%rsp)
+	movl	%esi,%ecx
+	bswapl	%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	andl	%edi,%eax
+	leal	1518500249(%r14,%r13,1),%r13d
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	28(%r9),%ebp
+	movl	%r11d,%eax
+	movl	%edx,24(%rsp)
+	movl	%r13d,%ecx
+	bswapl	%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	andl	%esi,%eax
+	leal	1518500249(%rdx,%r12,1),%r12d
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	32(%r9),%r14d
+	movl	%edi,%eax
+	movl	%ebp,28(%rsp)
+	movl	%r12d,%ecx
+	bswapl	%r14d
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	andl	%r13d,%eax
+	leal	1518500249(%rbp,%r11,1),%r11d
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	36(%r9),%edx
+	movl	%esi,%eax
+	movl	%r14d,32(%rsp)
+	movl	%r11d,%ecx
+	bswapl	%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	andl	%r12d,%eax
+	leal	1518500249(%r14,%rdi,1),%edi
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	40(%r9),%ebp
+	movl	%r13d,%eax
+	movl	%edx,36(%rsp)
+	movl	%edi,%ecx
+	bswapl	%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	andl	%r11d,%eax
+	leal	1518500249(%rdx,%rsi,1),%esi
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	44(%r9),%r14d
+	movl	%r12d,%eax
+	movl	%ebp,40(%rsp)
+	movl	%esi,%ecx
+	bswapl	%r14d
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	andl	%edi,%eax
+	leal	1518500249(%rbp,%r13,1),%r13d
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	48(%r9),%edx
+	movl	%r11d,%eax
+	movl	%r14d,44(%rsp)
+	movl	%r13d,%ecx
+	bswapl	%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	andl	%esi,%eax
+	leal	1518500249(%r14,%r12,1),%r12d
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	52(%r9),%ebp
+	movl	%edi,%eax
+	movl	%edx,48(%rsp)
+	movl	%r12d,%ecx
+	bswapl	%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	andl	%r13d,%eax
+	leal	1518500249(%rdx,%r11,1),%r11d
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	56(%r9),%r14d
+	movl	%esi,%eax
+	movl	%ebp,52(%rsp)
+	movl	%r11d,%ecx
+	bswapl	%r14d
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	andl	%r12d,%eax
+	leal	1518500249(%rbp,%rdi,1),%edi
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	60(%r9),%edx
+	movl	%r13d,%eax
+	movl	%r14d,56(%rsp)
+	movl	%edi,%ecx
+	bswapl	%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	andl	%r11d,%eax
+	leal	1518500249(%r14,%rsi,1),%esi
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	xorl	0(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edx,60(%rsp)
+	movl	%esi,%ecx
+	xorl	8(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	32(%rsp),%ebp
+	andl	%edi,%eax
+	leal	1518500249(%rdx,%r13,1),%r13d
+	roll	$30,%edi
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	roll	$1,%ebp
+	addl	%eax,%r13d
+	xorl	4(%rsp),%r14d
+	movl	%r11d,%eax
+	movl	%ebp,0(%rsp)
+	movl	%r13d,%ecx
+	xorl	12(%rsp),%r14d
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	36(%rsp),%r14d
+	andl	%esi,%eax
+	leal	1518500249(%rbp,%r12,1),%r12d
+	roll	$30,%esi
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	roll	$1,%r14d
+	addl	%eax,%r12d
+	xorl	8(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r14d,4(%rsp)
+	movl	%r12d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	40(%rsp),%edx
+	andl	%r13d,%eax
+	leal	1518500249(%r14,%r11,1),%r11d
+	roll	$30,%r13d
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	roll	$1,%edx
+	addl	%eax,%r11d
+	xorl	12(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%edx,8(%rsp)
+	movl	%r11d,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	44(%rsp),%ebp
+	andl	%r12d,%eax
+	leal	1518500249(%rdx,%rdi,1),%edi
+	roll	$30,%r12d
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	roll	$1,%ebp
+	addl	%eax,%edi
+	xorl	16(%rsp),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,12(%rsp)
+	movl	%edi,%ecx
+	xorl	24(%rsp),%r14d
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	48(%rsp),%r14d
+	andl	%r11d,%eax
+	leal	1518500249(%rbp,%rsi,1),%esi
+	roll	$30,%r11d
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	roll	$1,%r14d
+	addl	%eax,%esi
+	xorl	20(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r14d,16(%rsp)
+	movl	%esi,%ecx
+	xorl	28(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	52(%rsp),%edx
+	leal	1859775393(%r14,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	xorl	24(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%edx,20(%rsp)
+	movl	%r13d,%ecx
+	xorl	32(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	56(%rsp),%ebp
+	leal	1859775393(%rdx,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	xorl	28(%rsp),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,24(%rsp)
+	movl	%r12d,%ecx
+	xorl	36(%rsp),%r14d
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	60(%rsp),%r14d
+	leal	1859775393(%rbp,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%r14d
+	xorl	32(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r14d,28(%rsp)
+	movl	%r11d,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	0(%rsp),%edx
+	leal	1859775393(%r14,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	xorl	36(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%edx,32(%rsp)
+	movl	%edi,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	4(%rsp),%ebp
+	leal	1859775393(%rdx,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	xorl	40(%rsp),%r14d
+	movl	%edi,%eax
+	movl	%ebp,36(%rsp)
+	movl	%esi,%ecx
+	xorl	48(%rsp),%r14d
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	8(%rsp),%r14d
+	leal	1859775393(%rbp,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%r14d
+	xorl	44(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r14d,40(%rsp)
+	movl	%r13d,%ecx
+	xorl	52(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	12(%rsp),%edx
+	leal	1859775393(%r14,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	xorl	48(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%edx,44(%rsp)
+	movl	%r12d,%ecx
+	xorl	56(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	16(%rsp),%ebp
+	leal	1859775393(%rdx,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	xorl	52(%rsp),%r14d
+	movl	%r12d,%eax
+	movl	%ebp,48(%rsp)
+	movl	%r11d,%ecx
+	xorl	60(%rsp),%r14d
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	20(%rsp),%r14d
+	leal	1859775393(%rbp,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%r14d
+	xorl	56(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r14d,52(%rsp)
+	movl	%edi,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	24(%rsp),%edx
+	leal	1859775393(%r14,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	xorl	60(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edx,56(%rsp)
+	movl	%esi,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	28(%rsp),%ebp
+	leal	1859775393(%rdx,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	xorl	0(%rsp),%r14d
+	movl	%esi,%eax
+	movl	%ebp,60(%rsp)
+	movl	%r13d,%ecx
+	xorl	8(%rsp),%r14d
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	32(%rsp),%r14d
+	leal	1859775393(%rbp,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%r14d
+	xorl	4(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r14d,0(%rsp)
+	movl	%r12d,%ecx
+	xorl	12(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	36(%rsp),%edx
+	leal	1859775393(%r14,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	xorl	8(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edx,4(%rsp)
+	movl	%r11d,%ecx
+	xorl	16(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	40(%rsp),%ebp
+	leal	1859775393(%rdx,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	xorl	12(%rsp),%r14d
+	movl	%r11d,%eax
+	movl	%ebp,8(%rsp)
+	movl	%edi,%ecx
+	xorl	20(%rsp),%r14d
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	44(%rsp),%r14d
+	leal	1859775393(%rbp,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%r14d
+	xorl	16(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r14d,12(%rsp)
+	movl	%esi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	48(%rsp),%edx
+	leal	1859775393(%r14,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	xorl	20(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%edx,16(%rsp)
+	movl	%r13d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	52(%rsp),%ebp
+	leal	1859775393(%rdx,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	xorl	24(%rsp),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,20(%rsp)
+	movl	%r12d,%ecx
+	xorl	32(%rsp),%r14d
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	56(%rsp),%r14d
+	leal	1859775393(%rbp,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%r14d
+	xorl	28(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r14d,24(%rsp)
+	movl	%r11d,%ecx
+	xorl	36(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	60(%rsp),%edx
+	leal	1859775393(%r14,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	xorl	32(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%edx,28(%rsp)
+	movl	%edi,%ecx
+	xorl	40(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	0(%rsp),%ebp
+	leal	1859775393(%rdx,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	xorl	36(%rsp),%r14d
+	movl	%r12d,%eax
+	movl	%ebp,32(%rsp)
+	movl	%r12d,%ebx
+	xorl	44(%rsp),%r14d
+	andl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	4(%rsp),%r14d
+	leal	-1894007588(%rbp,%r13,1),%r13d
+	xorl	%r11d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r13d
+	roll	$1,%r14d
+	andl	%edi,%ebx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%ebx,%r13d
+	xorl	40(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r14d,36(%rsp)
+	movl	%r11d,%ebx
+	xorl	48(%rsp),%edx
+	andl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	8(%rsp),%edx
+	leal	-1894007588(%r14,%r12,1),%r12d
+	xorl	%edi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r12d
+	roll	$1,%edx
+	andl	%esi,%ebx
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%ebx,%r12d
+	xorl	44(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edx,40(%rsp)
+	movl	%edi,%ebx
+	xorl	52(%rsp),%ebp
+	andl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	12(%rsp),%ebp
+	leal	-1894007588(%rdx,%r11,1),%r11d
+	xorl	%esi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	andl	%r13d,%ebx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%ebx,%r11d
+	xorl	48(%rsp),%r14d
+	movl	%esi,%eax
+	movl	%ebp,44(%rsp)
+	movl	%esi,%ebx
+	xorl	56(%rsp),%r14d
+	andl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	16(%rsp),%r14d
+	leal	-1894007588(%rbp,%rdi,1),%edi
+	xorl	%r13d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%edi
+	roll	$1,%r14d
+	andl	%r12d,%ebx
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%ebx,%edi
+	xorl	52(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r14d,48(%rsp)
+	movl	%r13d,%ebx
+	xorl	60(%rsp),%edx
+	andl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	20(%rsp),%edx
+	leal	-1894007588(%r14,%rsi,1),%esi
+	xorl	%r12d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%esi
+	roll	$1,%edx
+	andl	%r11d,%ebx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%ebx,%esi
+	xorl	56(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edx,52(%rsp)
+	movl	%r12d,%ebx
+	xorl	0(%rsp),%ebp
+	andl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	24(%rsp),%ebp
+	leal	-1894007588(%rdx,%r13,1),%r13d
+	xorl	%r11d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	andl	%edi,%ebx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%ebx,%r13d
+	xorl	60(%rsp),%r14d
+	movl	%r11d,%eax
+	movl	%ebp,56(%rsp)
+	movl	%r11d,%ebx
+	xorl	4(%rsp),%r14d
+	andl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	28(%rsp),%r14d
+	leal	-1894007588(%rbp,%r12,1),%r12d
+	xorl	%edi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r12d
+	roll	$1,%r14d
+	andl	%esi,%ebx
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%ebx,%r12d
+	xorl	0(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r14d,60(%rsp)
+	movl	%edi,%ebx
+	xorl	8(%rsp),%edx
+	andl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	32(%rsp),%edx
+	leal	-1894007588(%r14,%r11,1),%r11d
+	xorl	%esi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r11d
+	roll	$1,%edx
+	andl	%r13d,%ebx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%ebx,%r11d
+	xorl	4(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%edx,0(%rsp)
+	movl	%esi,%ebx
+	xorl	12(%rsp),%ebp
+	andl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	36(%rsp),%ebp
+	leal	-1894007588(%rdx,%rdi,1),%edi
+	xorl	%r13d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%edi
+	roll	$1,%ebp
+	andl	%r12d,%ebx
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%ebx,%edi
+	xorl	8(%rsp),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,4(%rsp)
+	movl	%r13d,%ebx
+	xorl	16(%rsp),%r14d
+	andl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	40(%rsp),%r14d
+	leal	-1894007588(%rbp,%rsi,1),%esi
+	xorl	%r12d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%esi
+	roll	$1,%r14d
+	andl	%r11d,%ebx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%ebx,%esi
+	xorl	12(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r14d,8(%rsp)
+	movl	%r12d,%ebx
+	xorl	20(%rsp),%edx
+	andl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	44(%rsp),%edx
+	leal	-1894007588(%r14,%r13,1),%r13d
+	xorl	%r11d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r13d
+	roll	$1,%edx
+	andl	%edi,%ebx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%ebx,%r13d
+	xorl	16(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%edx,12(%rsp)
+	movl	%r11d,%ebx
+	xorl	24(%rsp),%ebp
+	andl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	48(%rsp),%ebp
+	leal	-1894007588(%rdx,%r12,1),%r12d
+	xorl	%edi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	andl	%esi,%ebx
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%ebx,%r12d
+	xorl	20(%rsp),%r14d
+	movl	%edi,%eax
+	movl	%ebp,16(%rsp)
+	movl	%edi,%ebx
+	xorl	28(%rsp),%r14d
+	andl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	52(%rsp),%r14d
+	leal	-1894007588(%rbp,%r11,1),%r11d
+	xorl	%esi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r11d
+	roll	$1,%r14d
+	andl	%r13d,%ebx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%ebx,%r11d
+	xorl	24(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r14d,20(%rsp)
+	movl	%esi,%ebx
+	xorl	32(%rsp),%edx
+	andl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	56(%rsp),%edx
+	leal	-1894007588(%r14,%rdi,1),%edi
+	xorl	%r13d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%edi
+	roll	$1,%edx
+	andl	%r12d,%ebx
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%ebx,%edi
+	xorl	28(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%edx,24(%rsp)
+	movl	%r13d,%ebx
+	xorl	36(%rsp),%ebp
+	andl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	60(%rsp),%ebp
+	leal	-1894007588(%rdx,%rsi,1),%esi
+	xorl	%r12d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%esi
+	roll	$1,%ebp
+	andl	%r11d,%ebx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%ebx,%esi
+	xorl	32(%rsp),%r14d
+	movl	%r12d,%eax
+	movl	%ebp,28(%rsp)
+	movl	%r12d,%ebx
+	xorl	40(%rsp),%r14d
+	andl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	0(%rsp),%r14d
+	leal	-1894007588(%rbp,%r13,1),%r13d
+	xorl	%r11d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r13d
+	roll	$1,%r14d
+	andl	%edi,%ebx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%ebx,%r13d
+	xorl	36(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r14d,32(%rsp)
+	movl	%r11d,%ebx
+	xorl	44(%rsp),%edx
+	andl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	4(%rsp),%edx
+	leal	-1894007588(%r14,%r12,1),%r12d
+	xorl	%edi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r12d
+	roll	$1,%edx
+	andl	%esi,%ebx
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%ebx,%r12d
+	xorl	40(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edx,36(%rsp)
+	movl	%edi,%ebx
+	xorl	48(%rsp),%ebp
+	andl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	8(%rsp),%ebp
+	leal	-1894007588(%rdx,%r11,1),%r11d
+	xorl	%esi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	andl	%r13d,%ebx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%ebx,%r11d
+	xorl	44(%rsp),%r14d
+	movl	%esi,%eax
+	movl	%ebp,40(%rsp)
+	movl	%esi,%ebx
+	xorl	52(%rsp),%r14d
+	andl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	12(%rsp),%r14d
+	leal	-1894007588(%rbp,%rdi,1),%edi
+	xorl	%r13d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%edi
+	roll	$1,%r14d
+	andl	%r12d,%ebx
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%ebx,%edi
+	xorl	48(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r14d,44(%rsp)
+	movl	%r13d,%ebx
+	xorl	56(%rsp),%edx
+	andl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	16(%rsp),%edx
+	leal	-1894007588(%r14,%rsi,1),%esi
+	xorl	%r12d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%esi
+	roll	$1,%edx
+	andl	%r11d,%ebx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%ebx,%esi
+	xorl	52(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edx,48(%rsp)
+	movl	%esi,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	20(%rsp),%ebp
+	leal	-899497514(%rdx,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	xorl	56(%rsp),%r14d
+	movl	%esi,%eax
+	movl	%ebp,52(%rsp)
+	movl	%r13d,%ecx
+	xorl	0(%rsp),%r14d
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	24(%rsp),%r14d
+	leal	-899497514(%rbp,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%r14d
+	xorl	60(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r14d,56(%rsp)
+	movl	%r12d,%ecx
+	xorl	4(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	28(%rsp),%edx
+	leal	-899497514(%r14,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	xorl	0(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edx,60(%rsp)
+	movl	%r11d,%ecx
+	xorl	8(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	32(%rsp),%ebp
+	leal	-899497514(%rdx,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	xorl	4(%rsp),%r14d
+	movl	%r11d,%eax
+	movl	%ebp,0(%rsp)
+	movl	%edi,%ecx
+	xorl	12(%rsp),%r14d
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	36(%rsp),%r14d
+	leal	-899497514(%rbp,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%r14d
+	xorl	8(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r14d,4(%rsp)
+	movl	%esi,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	40(%rsp),%edx
+	leal	-899497514(%r14,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	xorl	12(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%edx,8(%rsp)
+	movl	%r13d,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	44(%rsp),%ebp
+	leal	-899497514(%rdx,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	xorl	16(%rsp),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,12(%rsp)
+	movl	%r12d,%ecx
+	xorl	24(%rsp),%r14d
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	48(%rsp),%r14d
+	leal	-899497514(%rbp,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%r14d
+	xorl	20(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r14d,16(%rsp)
+	movl	%r11d,%ecx
+	xorl	28(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	52(%rsp),%edx
+	leal	-899497514(%r14,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	xorl	24(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%edx,20(%rsp)
+	movl	%edi,%ecx
+	xorl	32(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	56(%rsp),%ebp
+	leal	-899497514(%rdx,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	xorl	28(%rsp),%r14d
+	movl	%edi,%eax
+	movl	%ebp,24(%rsp)
+	movl	%esi,%ecx
+	xorl	36(%rsp),%r14d
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	60(%rsp),%r14d
+	leal	-899497514(%rbp,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%r14d
+	xorl	32(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r14d,28(%rsp)
+	movl	%r13d,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	0(%rsp),%edx
+	leal	-899497514(%r14,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	xorl	36(%rsp),%ebp
+	movl	%r13d,%eax
+
+	movl	%r12d,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	4(%rsp),%ebp
+	leal	-899497514(%rdx,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	xorl	40(%rsp),%r14d
+	movl	%r12d,%eax
+
+	movl	%r11d,%ecx
+	xorl	48(%rsp),%r14d
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	8(%rsp),%r14d
+	leal	-899497514(%rbp,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%r14d
+	xorl	44(%rsp),%edx
+	movl	%r11d,%eax
+
+	movl	%edi,%ecx
+	xorl	52(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	12(%rsp),%edx
+	leal	-899497514(%r14,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	xorl	48(%rsp),%ebp
+	movl	%edi,%eax
+
+	movl	%esi,%ecx
+	xorl	56(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	16(%rsp),%ebp
+	leal	-899497514(%rdx,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	xorl	52(%rsp),%r14d
+	movl	%esi,%eax
+
+	movl	%r13d,%ecx
+	xorl	60(%rsp),%r14d
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	20(%rsp),%r14d
+	leal	-899497514(%rbp,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%r14d
+	xorl	56(%rsp),%edx
+	movl	%r13d,%eax
+
+	movl	%r12d,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	24(%rsp),%edx
+	leal	-899497514(%r14,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	xorl	60(%rsp),%ebp
+	movl	%r12d,%eax
+
+	movl	%r11d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	28(%rsp),%ebp
+	leal	-899497514(%rdx,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%r11d,%eax
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	leal	-899497514(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	addl	0(%r8),%esi
+	addl	4(%r8),%edi
+	addl	8(%r8),%r11d
+	addl	12(%r8),%r12d
+	addl	16(%r8),%r13d
+	movl	%esi,0(%r8)
+	movl	%edi,4(%r8)
+	movl	%r11d,8(%r8)
+	movl	%r12d,12(%r8)
+	movl	%r13d,16(%r8)
+
+	subq	$1,%r10
+	leaq	64(%r9),%r9
+	jnz	.Lloop
+
+	movq	64(%rsp),%rsi
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Lepilogue:
+	.byte	0xf3,0xc3
+.size	sha1_block_data_order,.-sha1_block_data_order
+.type	sha1_block_data_order_ssse3,@function
+.align	16
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+	movq	%rsp,%r11
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	leaq	-64(%rsp),%rsp
+	andq	$-64,%rsp
+	movq	%rdi,%r8
+	movq	%rsi,%r9
+	movq	%rdx,%r10
+
+	shlq	$6,%r10
+	addq	%r9,%r10
+	leaq	K_XX_XX+64(%rip),%r14
+
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movl	%ebx,%esi
+	movl	16(%r8),%ebp
+	movl	%ecx,%edi
+	xorl	%edx,%edi
+	andl	%edi,%esi
+
+	movdqa	64(%r14),%xmm6
+	movdqa	-64(%r14),%xmm9
+	movdqu	0(%r9),%xmm0
+	movdqu	16(%r9),%xmm1
+	movdqu	32(%r9),%xmm2
+	movdqu	48(%r9),%xmm3
+.byte	102,15,56,0,198
+.byte	102,15,56,0,206
+.byte	102,15,56,0,214
+	addq	$64,%r9
+	paddd	%xmm9,%xmm0
+.byte	102,15,56,0,222
+	paddd	%xmm9,%xmm1
+	paddd	%xmm9,%xmm2
+	movdqa	%xmm0,0(%rsp)
+	psubd	%xmm9,%xmm0
+	movdqa	%xmm1,16(%rsp)
+	psubd	%xmm9,%xmm1
+	movdqa	%xmm2,32(%rsp)
+	psubd	%xmm9,%xmm2
+	jmp	.Loop_ssse3
+.align	16
+.Loop_ssse3:
+	rorl	$2,%ebx
+	pshufd	$238,%xmm0,%xmm4
+	xorl	%edx,%esi
+	movdqa	%xmm3,%xmm8
+	paddd	%xmm3,%xmm9
+	movl	%eax,%edi
+	addl	0(%rsp),%ebp
+	punpcklqdq	%xmm1,%xmm4
+	xorl	%ecx,%ebx
+	roll	$5,%eax
+	addl	%esi,%ebp
+	psrldq	$4,%xmm8
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	pxor	%xmm0,%xmm4
+	addl	%eax,%ebp
+	rorl	$7,%eax
+	pxor	%xmm2,%xmm8
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	addl	4(%rsp),%edx
+	pxor	%xmm8,%xmm4
+	xorl	%ebx,%eax
+	roll	$5,%ebp
+	movdqa	%xmm9,48(%rsp)
+	addl	%edi,%edx
+	andl	%eax,%esi
+	movdqa	%xmm4,%xmm10
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	rorl	$7,%ebp
+	movdqa	%xmm4,%xmm8
+	xorl	%ebx,%esi
+	pslldq	$12,%xmm10
+	paddd	%xmm4,%xmm4
+	movl	%edx,%edi
+	addl	8(%rsp),%ecx
+	psrld	$31,%xmm8
+	xorl	%eax,%ebp
+	roll	$5,%edx
+	addl	%esi,%ecx
+	movdqa	%xmm10,%xmm9
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	psrld	$30,%xmm10
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	por	%xmm8,%xmm4
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	addl	12(%rsp),%ebx
+	pslld	$2,%xmm9
+	pxor	%xmm10,%xmm4
+	xorl	%ebp,%edx
+	movdqa	-64(%r14),%xmm10
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	andl	%edx,%esi
+	pxor	%xmm9,%xmm4
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	rorl	$7,%ecx
+	pshufd	$238,%xmm1,%xmm5
+	xorl	%ebp,%esi
+	movdqa	%xmm4,%xmm9
+	paddd	%xmm4,%xmm10
+	movl	%ebx,%edi
+	addl	16(%rsp),%eax
+	punpcklqdq	%xmm2,%xmm5
+	xorl	%edx,%ecx
+	roll	$5,%ebx
+	addl	%esi,%eax
+	psrldq	$4,%xmm9
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	pxor	%xmm1,%xmm5
+	addl	%ebx,%eax
+	rorl	$7,%ebx
+	pxor	%xmm3,%xmm9
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	addl	20(%rsp),%ebp
+	pxor	%xmm9,%xmm5
+	xorl	%ecx,%ebx
+	roll	$5,%eax
+	movdqa	%xmm10,0(%rsp)
+	addl	%edi,%ebp
+	andl	%ebx,%esi
+	movdqa	%xmm5,%xmm8
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	rorl	$7,%eax
+	movdqa	%xmm5,%xmm9
+	xorl	%ecx,%esi
+	pslldq	$12,%xmm8
+	paddd	%xmm5,%xmm5
+	movl	%ebp,%edi
+	addl	24(%rsp),%edx
+	psrld	$31,%xmm9
+	xorl	%ebx,%eax
+	roll	$5,%ebp
+	addl	%esi,%edx
+	movdqa	%xmm8,%xmm10
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	psrld	$30,%xmm8
+	addl	%ebp,%edx
+	rorl	$7,%ebp
+	por	%xmm9,%xmm5
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	addl	28(%rsp),%ecx
+	pslld	$2,%xmm10
+	pxor	%xmm8,%xmm5
+	xorl	%eax,%ebp
+	movdqa	-32(%r14),%xmm8
+	roll	$5,%edx
+	addl	%edi,%ecx
+	andl	%ebp,%esi
+	pxor	%xmm10,%xmm5
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	pshufd	$238,%xmm2,%xmm6
+	xorl	%eax,%esi
+	movdqa	%xmm5,%xmm10
+	paddd	%xmm5,%xmm8
+	movl	%ecx,%edi
+	addl	32(%rsp),%ebx
+	punpcklqdq	%xmm3,%xmm6
+	xorl	%ebp,%edx
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	psrldq	$4,%xmm10
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	pxor	%xmm2,%xmm6
+	addl	%ecx,%ebx
+	rorl	$7,%ecx
+	pxor	%xmm4,%xmm10
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	addl	36(%rsp),%eax
+	pxor	%xmm10,%xmm6
+	xorl	%edx,%ecx
+	roll	$5,%ebx
+	movdqa	%xmm8,16(%rsp)
+	addl	%edi,%eax
+	andl	%ecx,%esi
+	movdqa	%xmm6,%xmm9
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	rorl	$7,%ebx
+	movdqa	%xmm6,%xmm10
+	xorl	%edx,%esi
+	pslldq	$12,%xmm9
+	paddd	%xmm6,%xmm6
+	movl	%eax,%edi
+	addl	40(%rsp),%ebp
+	psrld	$31,%xmm10
+	xorl	%ecx,%ebx
+	roll	$5,%eax
+	addl	%esi,%ebp
+	movdqa	%xmm9,%xmm8
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	psrld	$30,%xmm9
+	addl	%eax,%ebp
+	rorl	$7,%eax
+	por	%xmm10,%xmm6
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	addl	44(%rsp),%edx
+	pslld	$2,%xmm8
+	pxor	%xmm9,%xmm6
+	xorl	%ebx,%eax
+	movdqa	-32(%r14),%xmm9
+	roll	$5,%ebp
+	addl	%edi,%edx
+	andl	%eax,%esi
+	pxor	%xmm8,%xmm6
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	rorl	$7,%ebp
+	pshufd	$238,%xmm3,%xmm7
+	xorl	%ebx,%esi
+	movdqa	%xmm6,%xmm8
+	paddd	%xmm6,%xmm9
+	movl	%edx,%edi
+	addl	48(%rsp),%ecx
+	punpcklqdq	%xmm4,%xmm7
+	xorl	%eax,%ebp
+	roll	$5,%edx
+	addl	%esi,%ecx
+	psrldq	$4,%xmm8
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	pxor	%xmm3,%xmm7
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	pxor	%xmm5,%xmm8
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	addl	52(%rsp),%ebx
+	pxor	%xmm8,%xmm7
+	xorl	%ebp,%edx
+	roll	$5,%ecx
+	movdqa	%xmm9,32(%rsp)
+	addl	%edi,%ebx
+	andl	%edx,%esi
+	movdqa	%xmm7,%xmm10
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	rorl	$7,%ecx
+	movdqa	%xmm7,%xmm8
+	xorl	%ebp,%esi
+	pslldq	$12,%xmm10
+	paddd	%xmm7,%xmm7
+	movl	%ebx,%edi
+	addl	56(%rsp),%eax
+	psrld	$31,%xmm8
+	xorl	%edx,%ecx
+	roll	$5,%ebx
+	addl	%esi,%eax
+	movdqa	%xmm10,%xmm9
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	psrld	$30,%xmm10
+	addl	%ebx,%eax
+	rorl	$7,%ebx
+	por	%xmm8,%xmm7
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	addl	60(%rsp),%ebp
+	pslld	$2,%xmm9
+	pxor	%xmm10,%xmm7
+	xorl	%ecx,%ebx
+	movdqa	-32(%r14),%xmm10
+	roll	$5,%eax
+	addl	%edi,%ebp
+	andl	%ebx,%esi
+	pxor	%xmm9,%xmm7
+	pshufd	$238,%xmm6,%xmm9
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	rorl	$7,%eax
+	pxor	%xmm4,%xmm0
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	addl	0(%rsp),%edx
+	punpcklqdq	%xmm7,%xmm9
+	xorl	%ebx,%eax
+	roll	$5,%ebp
+	pxor	%xmm1,%xmm0
+	addl	%esi,%edx
+	andl	%eax,%edi
+	movdqa	%xmm10,%xmm8
+	xorl	%ebx,%eax
+	paddd	%xmm7,%xmm10
+	addl	%ebp,%edx
+	pxor	%xmm9,%xmm0
+	rorl	$7,%ebp
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	addl	4(%rsp),%ecx
+	movdqa	%xmm0,%xmm9
+	xorl	%eax,%ebp
+	roll	$5,%edx
+	movdqa	%xmm10,48(%rsp)
+	addl	%edi,%ecx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	pslld	$2,%xmm0
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	psrld	$30,%xmm9
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	addl	8(%rsp),%ebx
+	por	%xmm9,%xmm0
+	xorl	%ebp,%edx
+	roll	$5,%ecx
+	pshufd	$238,%xmm7,%xmm10
+	addl	%esi,%ebx
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	12(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	pxor	%xmm5,%xmm1
+	addl	16(%rsp),%ebp
+	xorl	%ecx,%esi
+	punpcklqdq	%xmm0,%xmm10
+	movl	%eax,%edi
+	roll	$5,%eax
+	pxor	%xmm2,%xmm1
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	movdqa	%xmm8,%xmm9
+	rorl	$7,%ebx
+	paddd	%xmm0,%xmm8
+	addl	%eax,%ebp
+	pxor	%xmm10,%xmm1
+	addl	20(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	movdqa	%xmm1,%xmm10
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	movdqa	%xmm8,0(%rsp)
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	24(%rsp),%ecx
+	pslld	$2,%xmm1
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	psrld	$30,%xmm10
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	rorl	$7,%ebp
+	por	%xmm10,%xmm1
+	addl	%edx,%ecx
+	addl	28(%rsp),%ebx
+	pshufd	$238,%xmm0,%xmm8
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	pxor	%xmm6,%xmm2
+	addl	32(%rsp),%eax
+	xorl	%edx,%esi
+	punpcklqdq	%xmm1,%xmm8
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	pxor	%xmm3,%xmm2
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	movdqa	0(%r14),%xmm10
+	rorl	$7,%ecx
+	paddd	%xmm1,%xmm9
+	addl	%ebx,%eax
+	pxor	%xmm8,%xmm2
+	addl	36(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	movdqa	%xmm2,%xmm8
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	movdqa	%xmm9,16(%rsp)
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	40(%rsp),%edx
+	pslld	$2,%xmm2
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	psrld	$30,%xmm8
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	rorl	$7,%eax
+	por	%xmm8,%xmm2
+	addl	%ebp,%edx
+	addl	44(%rsp),%ecx
+	pshufd	$238,%xmm1,%xmm9
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%ebp
+	addl	%edx,%ecx
+	pxor	%xmm7,%xmm3
+	addl	48(%rsp),%ebx
+	xorl	%ebp,%esi
+	punpcklqdq	%xmm2,%xmm9
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	pxor	%xmm4,%xmm3
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	movdqa	%xmm10,%xmm8
+	rorl	$7,%edx
+	paddd	%xmm2,%xmm10
+	addl	%ecx,%ebx
+	pxor	%xmm9,%xmm3
+	addl	52(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	movdqa	%xmm3,%xmm9
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	movdqa	%xmm10,32(%rsp)
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	56(%rsp),%ebp
+	pslld	$2,%xmm3
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	psrld	$30,%xmm9
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	rorl	$7,%ebx
+	por	%xmm9,%xmm3
+	addl	%eax,%ebp
+	addl	60(%rsp),%edx
+	pshufd	$238,%xmm2,%xmm10
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	pxor	%xmm0,%xmm4
+	addl	0(%rsp),%ecx
+	xorl	%eax,%esi
+	punpcklqdq	%xmm3,%xmm10
+	movl	%edx,%edi
+	roll	$5,%edx
+	pxor	%xmm5,%xmm4
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	movdqa	%xmm8,%xmm9
+	rorl	$7,%ebp
+	paddd	%xmm3,%xmm8
+	addl	%edx,%ecx
+	pxor	%xmm10,%xmm4
+	addl	4(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	movdqa	%xmm4,%xmm10
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	movdqa	%xmm8,48(%rsp)
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	8(%rsp),%eax
+	pslld	$2,%xmm4
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	psrld	$30,%xmm10
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	rorl	$7,%ecx
+	por	%xmm10,%xmm4
+	addl	%ebx,%eax
+	addl	12(%rsp),%ebp
+	pshufd	$238,%xmm3,%xmm8
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	pxor	%xmm1,%xmm5
+	addl	16(%rsp),%edx
+	xorl	%ebx,%esi
+	punpcklqdq	%xmm4,%xmm8
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	pxor	%xmm6,%xmm5
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	movdqa	%xmm9,%xmm10
+	rorl	$7,%eax
+	paddd	%xmm4,%xmm9
+	addl	%ebp,%edx
+	pxor	%xmm8,%xmm5
+	addl	20(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	movdqa	%xmm5,%xmm8
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	movdqa	%xmm9,0(%rsp)
+	rorl	$7,%ebp
+	addl	%edx,%ecx
+	addl	24(%rsp),%ebx
+	pslld	$2,%xmm5
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	psrld	$30,%xmm8
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	rorl	$7,%edx
+	por	%xmm8,%xmm5
+	addl	%ecx,%ebx
+	addl	28(%rsp),%eax
+	pshufd	$238,%xmm4,%xmm9
+	rorl	$7,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%edi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	pxor	%xmm2,%xmm6
+	addl	32(%rsp),%ebp
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	rorl	$7,%ebx
+	punpcklqdq	%xmm5,%xmm9
+	movl	%eax,%edi
+	xorl	%ecx,%esi
+	pxor	%xmm7,%xmm6
+	roll	$5,%eax
+	addl	%esi,%ebp
+	movdqa	%xmm10,%xmm8
+	xorl	%ebx,%edi
+	paddd	%xmm5,%xmm10
+	xorl	%ecx,%ebx
+	pxor	%xmm9,%xmm6
+	addl	%eax,%ebp
+	addl	36(%rsp),%edx
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	rorl	$7,%eax
+	movdqa	%xmm6,%xmm9
+	movl	%ebp,%esi
+	xorl	%ebx,%edi
+	movdqa	%xmm10,16(%rsp)
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%eax,%esi
+	pslld	$2,%xmm6
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	psrld	$30,%xmm9
+	addl	40(%rsp),%ecx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	por	%xmm9,%xmm6
+	rorl	$7,%ebp
+	movl	%edx,%edi
+	xorl	%eax,%esi
+	roll	$5,%edx
+	pshufd	$238,%xmm5,%xmm10
+	addl	%esi,%ecx
+	xorl	%ebp,%edi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	addl	44(%rsp),%ebx
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	rorl	$7,%edx
+	movl	%ecx,%esi
+	xorl	%ebp,%edi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	pxor	%xmm3,%xmm7
+	addl	48(%rsp),%eax
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	rorl	$7,%ecx
+	punpcklqdq	%xmm6,%xmm10
+	movl	%ebx,%edi
+	xorl	%edx,%esi
+	pxor	%xmm0,%xmm7
+	roll	$5,%ebx
+	addl	%esi,%eax
+	movdqa	32(%r14),%xmm9
+	xorl	%ecx,%edi
+	paddd	%xmm6,%xmm8
+	xorl	%edx,%ecx
+	pxor	%xmm10,%xmm7
+	addl	%ebx,%eax
+	addl	52(%rsp),%ebp
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	rorl	$7,%ebx
+	movdqa	%xmm7,%xmm10
+	movl	%eax,%esi
+	xorl	%ecx,%edi
+	movdqa	%xmm8,32(%rsp)
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ebx,%esi
+	pslld	$2,%xmm7
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	psrld	$30,%xmm10
+	addl	56(%rsp),%edx
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	por	%xmm10,%xmm7
+	rorl	$7,%eax
+	movl	%ebp,%edi
+	xorl	%ebx,%esi
+	roll	$5,%ebp
+	pshufd	$238,%xmm6,%xmm8
+	addl	%esi,%edx
+	xorl	%eax,%edi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	addl	60(%rsp),%ecx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	rorl	$7,%ebp
+	movl	%edx,%esi
+	xorl	%eax,%edi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	pxor	%xmm4,%xmm0
+	addl	0(%rsp),%ebx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	rorl	$7,%edx
+	punpcklqdq	%xmm7,%xmm8
+	movl	%ecx,%edi
+	xorl	%ebp,%esi
+	pxor	%xmm1,%xmm0
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	movdqa	%xmm9,%xmm10
+	xorl	%edx,%edi
+	paddd	%xmm7,%xmm9
+	xorl	%ebp,%edx
+	pxor	%xmm8,%xmm0
+	addl	%ecx,%ebx
+	addl	4(%rsp),%eax
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	rorl	$7,%ecx
+	movdqa	%xmm0,%xmm8
+	movl	%ebx,%esi
+	xorl	%edx,%edi
+	movdqa	%xmm9,48(%rsp)
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%ecx,%esi
+	pslld	$2,%xmm0
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	psrld	$30,%xmm8
+	addl	8(%rsp),%ebp
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	por	%xmm8,%xmm0
+	rorl	$7,%ebx
+	movl	%eax,%edi
+	xorl	%ecx,%esi
+	roll	$5,%eax
+	pshufd	$238,%xmm7,%xmm9
+	addl	%esi,%ebp
+	xorl	%ebx,%edi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	addl	12(%rsp),%edx
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	rorl	$7,%eax
+	movl	%ebp,%esi
+	xorl	%ebx,%edi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	pxor	%xmm5,%xmm1
+	addl	16(%rsp),%ecx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	rorl	$7,%ebp
+	punpcklqdq	%xmm0,%xmm9
+	movl	%edx,%edi
+	xorl	%eax,%esi
+	pxor	%xmm2,%xmm1
+	roll	$5,%edx
+	addl	%esi,%ecx
+	movdqa	%xmm10,%xmm8
+	xorl	%ebp,%edi
+	paddd	%xmm0,%xmm10
+	xorl	%eax,%ebp
+	pxor	%xmm9,%xmm1
+	addl	%edx,%ecx
+	addl	20(%rsp),%ebx
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	rorl	$7,%edx
+	movdqa	%xmm1,%xmm9
+	movl	%ecx,%esi
+	xorl	%ebp,%edi
+	movdqa	%xmm10,0(%rsp)
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%edx,%esi
+	pslld	$2,%xmm1
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	psrld	$30,%xmm9
+	addl	24(%rsp),%eax
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	por	%xmm9,%xmm1
+	rorl	$7,%ecx
+	movl	%ebx,%edi
+	xorl	%edx,%esi
+	roll	$5,%ebx
+	pshufd	$238,%xmm0,%xmm10
+	addl	%esi,%eax
+	xorl	%ecx,%edi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	28(%rsp),%ebp
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	rorl	$7,%ebx
+	movl	%eax,%esi
+	xorl	%ecx,%edi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	pxor	%xmm6,%xmm2
+	addl	32(%rsp),%edx
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	rorl	$7,%eax
+	punpcklqdq	%xmm1,%xmm10
+	movl	%ebp,%edi
+	xorl	%ebx,%esi
+	pxor	%xmm3,%xmm2
+	roll	$5,%ebp
+	addl	%esi,%edx
+	movdqa	%xmm8,%xmm9
+	xorl	%eax,%edi
+	paddd	%xmm1,%xmm8
+	xorl	%ebx,%eax
+	pxor	%xmm10,%xmm2
+	addl	%ebp,%edx
+	addl	36(%rsp),%ecx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	rorl	$7,%ebp
+	movdqa	%xmm2,%xmm10
+	movl	%edx,%esi
+	xorl	%eax,%edi
+	movdqa	%xmm8,16(%rsp)
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%ebp,%esi
+	pslld	$2,%xmm2
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	psrld	$30,%xmm10
+	addl	40(%rsp),%ebx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	por	%xmm10,%xmm2
+	rorl	$7,%edx
+	movl	%ecx,%edi
+	xorl	%ebp,%esi
+	roll	$5,%ecx
+	pshufd	$238,%xmm1,%xmm8
+	addl	%esi,%ebx
+	xorl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	44(%rsp),%eax
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	rorl	$7,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%edi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	pxor	%xmm7,%xmm3
+	addl	48(%rsp),%ebp
+	xorl	%ecx,%esi
+	punpcklqdq	%xmm2,%xmm8
+	movl	%eax,%edi
+	roll	$5,%eax
+	pxor	%xmm4,%xmm3
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	movdqa	%xmm9,%xmm10
+	rorl	$7,%ebx
+	paddd	%xmm2,%xmm9
+	addl	%eax,%ebp
+	pxor	%xmm8,%xmm3
+	addl	52(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	movdqa	%xmm3,%xmm8
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	movdqa	%xmm9,32(%rsp)
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	56(%rsp),%ecx
+	pslld	$2,%xmm3
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	psrld	$30,%xmm8
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	rorl	$7,%ebp
+	por	%xmm8,%xmm3
+	addl	%edx,%ecx
+	addl	60(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	0(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	paddd	%xmm3,%xmm10
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	movdqa	%xmm10,48(%rsp)
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	4(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	8(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	12(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%ebp
+	addl	%edx,%ecx
+	cmpq	%r10,%r9
+	je	.Ldone_ssse3
+	movdqa	64(%r14),%xmm6
+	movdqa	-64(%r14),%xmm9
+	movdqu	0(%r9),%xmm0
+	movdqu	16(%r9),%xmm1
+	movdqu	32(%r9),%xmm2
+	movdqu	48(%r9),%xmm3
+.byte	102,15,56,0,198
+	addq	$64,%r9
+	addl	16(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+.byte	102,15,56,0,206
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	rorl	$7,%edx
+	paddd	%xmm9,%xmm0
+	addl	%ecx,%ebx
+	addl	20(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	movdqa	%xmm0,0(%rsp)
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	rorl	$7,%ecx
+	psubd	%xmm9,%xmm0
+	addl	%ebx,%eax
+	addl	24(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	32(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+.byte	102,15,56,0,214
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	rorl	$7,%ebp
+	paddd	%xmm9,%xmm1
+	addl	%edx,%ecx
+	addl	36(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	movdqa	%xmm1,16(%rsp)
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	rorl	$7,%edx
+	psubd	%xmm9,%xmm1
+	addl	%ecx,%ebx
+	addl	40(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	44(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+.byte	102,15,56,0,222
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	rorl	$7,%eax
+	paddd	%xmm9,%xmm2
+	addl	%ebp,%edx
+	addl	52(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	movdqa	%xmm2,32(%rsp)
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%ebp
+	psubd	%xmm9,%xmm2
+	addl	%edx,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	60(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	addl	12(%r8),%edx
+	movl	%eax,0(%r8)
+	addl	16(%r8),%ebp
+	movl	%esi,4(%r8)
+	movl	%esi,%ebx
+	movl	%ecx,8(%r8)
+	movl	%ecx,%edi
+	movl	%edx,12(%r8)
+	xorl	%edx,%edi
+	movl	%ebp,16(%r8)
+	andl	%edi,%esi
+	jmp	.Loop_ssse3
+
+.align	16
+.Ldone_ssse3:
+	addl	16(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	20(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	24(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	32(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	rorl	$7,%ebp
+	addl	%edx,%ecx
+	addl	36(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	40(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	44(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	52(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%ebp
+	addl	%edx,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	60(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	movl	%eax,0(%r8)
+	addl	12(%r8),%edx
+	movl	%esi,4(%r8)
+	addl	16(%r8),%ebp
+	movl	%ecx,8(%r8)
+	movl	%edx,12(%r8)
+	movl	%ebp,16(%r8)
+	movq	-40(%r11),%r14
+	movq	-32(%r11),%r13
+	movq	-24(%r11),%r12
+	movq	-16(%r11),%rbp
+	movq	-8(%r11),%rbx
+	leaq	(%r11),%rsp
+.Lepilogue_ssse3:
+	.byte	0xf3,0xc3
+.size	sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
+.type	sha1_block_data_order_avx,@function
+.align	16
+sha1_block_data_order_avx:
+_avx_shortcut:
+	movq	%rsp,%r11
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	leaq	-64(%rsp),%rsp
+	vzeroupper
+	andq	$-64,%rsp
+	movq	%rdi,%r8
+	movq	%rsi,%r9
+	movq	%rdx,%r10
+
+	shlq	$6,%r10
+	addq	%r9,%r10
+	leaq	K_XX_XX+64(%rip),%r14
+
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movl	%ebx,%esi
+	movl	16(%r8),%ebp
+	movl	%ecx,%edi
+	xorl	%edx,%edi
+	andl	%edi,%esi
+
+	vmovdqa	64(%r14),%xmm6
+	vmovdqa	-64(%r14),%xmm11
+	vmovdqu	0(%r9),%xmm0
+	vmovdqu	16(%r9),%xmm1
+	vmovdqu	32(%r9),%xmm2
+	vmovdqu	48(%r9),%xmm3
+	vpshufb	%xmm6,%xmm0,%xmm0
+	addq	$64,%r9
+	vpshufb	%xmm6,%xmm1,%xmm1
+	vpshufb	%xmm6,%xmm2,%xmm2
+	vpshufb	%xmm6,%xmm3,%xmm3
+	vpaddd	%xmm11,%xmm0,%xmm4
+	vpaddd	%xmm11,%xmm1,%xmm5
+	vpaddd	%xmm11,%xmm2,%xmm6
+	vmovdqa	%xmm4,0(%rsp)
+	vmovdqa	%xmm5,16(%rsp)
+	vmovdqa	%xmm6,32(%rsp)
+	jmp	.Loop_avx
+.align	16
+.Loop_avx:
+	shrdl	$2,%ebx,%ebx
+	xorl	%edx,%esi
+	vpalignr	$8,%xmm0,%xmm1,%xmm4
+	movl	%eax,%edi
+	addl	0(%rsp),%ebp
+	vpaddd	%xmm3,%xmm11,%xmm9
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	vpsrldq	$4,%xmm3,%xmm8
+	addl	%esi,%ebp
+	andl	%ebx,%edi
+	vpxor	%xmm0,%xmm4,%xmm4
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	vpxor	%xmm2,%xmm8,%xmm8
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	addl	4(%rsp),%edx
+	vpxor	%xmm8,%xmm4,%xmm4
+	xorl	%ebx,%eax
+	shldl	$5,%ebp,%ebp
+	vmovdqa	%xmm9,48(%rsp)
+	addl	%edi,%edx
+	andl	%eax,%esi
+	vpsrld	$31,%xmm4,%xmm8
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	shrdl	$7,%ebp,%ebp
+	xorl	%ebx,%esi
+	vpslldq	$12,%xmm4,%xmm10
+	vpaddd	%xmm4,%xmm4,%xmm4
+	movl	%edx,%edi
+	addl	8(%rsp),%ecx
+	xorl	%eax,%ebp
+	shldl	$5,%edx,%edx
+	vpsrld	$30,%xmm10,%xmm9
+	vpor	%xmm8,%xmm4,%xmm4
+	addl	%esi,%ecx
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	vpslld	$2,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm4,%xmm4
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	addl	12(%rsp),%ebx
+	vpxor	%xmm10,%xmm4,%xmm4
+	xorl	%ebp,%edx
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	shrdl	$7,%ecx,%ecx
+	xorl	%ebp,%esi
+	vpalignr	$8,%xmm1,%xmm2,%xmm5
+	movl	%ebx,%edi
+	addl	16(%rsp),%eax
+	vpaddd	%xmm4,%xmm11,%xmm9
+	xorl	%edx,%ecx
+	shldl	$5,%ebx,%ebx
+	vpsrldq	$4,%xmm4,%xmm8
+	addl	%esi,%eax
+	andl	%ecx,%edi
+	vpxor	%xmm1,%xmm5,%xmm5
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	vpxor	%xmm3,%xmm8,%xmm8
+	shrdl	$7,%ebx,%ebx
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	addl	20(%rsp),%ebp
+	vpxor	%xmm8,%xmm5,%xmm5
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	vmovdqa	%xmm9,0(%rsp)
+	addl	%edi,%ebp
+	andl	%ebx,%esi
+	vpsrld	$31,%xmm5,%xmm8
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%esi
+	vpslldq	$12,%xmm5,%xmm10
+	vpaddd	%xmm5,%xmm5,%xmm5
+	movl	%ebp,%edi
+	addl	24(%rsp),%edx
+	xorl	%ebx,%eax
+	shldl	$5,%ebp,%ebp
+	vpsrld	$30,%xmm10,%xmm9
+	vpor	%xmm8,%xmm5,%xmm5
+	addl	%esi,%edx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	vpslld	$2,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm5,%xmm5
+	shrdl	$7,%ebp,%ebp
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	addl	28(%rsp),%ecx
+	vpxor	%xmm10,%xmm5,%xmm5
+	xorl	%eax,%ebp
+	shldl	$5,%edx,%edx
+	vmovdqa	-32(%r14),%xmm11
+	addl	%edi,%ecx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%esi
+	vpalignr	$8,%xmm2,%xmm3,%xmm6
+	movl	%ecx,%edi
+	addl	32(%rsp),%ebx
+	vpaddd	%xmm5,%xmm11,%xmm9
+	xorl	%ebp,%edx
+	shldl	$5,%ecx,%ecx
+	vpsrldq	$4,%xmm5,%xmm8
+	addl	%esi,%ebx
+	andl	%edx,%edi
+	vpxor	%xmm2,%xmm6,%xmm6
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	vpxor	%xmm4,%xmm8,%xmm8
+	shrdl	$7,%ecx,%ecx
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	addl	36(%rsp),%eax
+	vpxor	%xmm8,%xmm6,%xmm6
+	xorl	%edx,%ecx
+	shldl	$5,%ebx,%ebx
+	vmovdqa	%xmm9,16(%rsp)
+	addl	%edi,%eax
+	andl	%ecx,%esi
+	vpsrld	$31,%xmm6,%xmm8
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	shrdl	$7,%ebx,%ebx
+	xorl	%edx,%esi
+	vpslldq	$12,%xmm6,%xmm10
+	vpaddd	%xmm6,%xmm6,%xmm6
+	movl	%eax,%edi
+	addl	40(%rsp),%ebp
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	vpsrld	$30,%xmm10,%xmm9
+	vpor	%xmm8,%xmm6,%xmm6
+	addl	%esi,%ebp
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	vpslld	$2,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm6,%xmm6
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	addl	44(%rsp),%edx
+	vpxor	%xmm10,%xmm6,%xmm6
+	xorl	%ebx,%eax
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	shrdl	$7,%ebp,%ebp
+	xorl	%ebx,%esi
+	vpalignr	$8,%xmm3,%xmm4,%xmm7
+	movl	%edx,%edi
+	addl	48(%rsp),%ecx
+	vpaddd	%xmm6,%xmm11,%xmm9
+	xorl	%eax,%ebp
+	shldl	$5,%edx,%edx
+	vpsrldq	$4,%xmm6,%xmm8
+	addl	%esi,%ecx
+	andl	%ebp,%edi
+	vpxor	%xmm3,%xmm7,%xmm7
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	vpxor	%xmm5,%xmm8,%xmm8
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	addl	52(%rsp),%ebx
+	vpxor	%xmm8,%xmm7,%xmm7
+	xorl	%ebp,%edx
+	shldl	$5,%ecx,%ecx
+	vmovdqa	%xmm9,32(%rsp)
+	addl	%edi,%ebx
+	andl	%edx,%esi
+	vpsrld	$31,%xmm7,%xmm8
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	shrdl	$7,%ecx,%ecx
+	xorl	%ebp,%esi
+	vpslldq	$12,%xmm7,%xmm10
+	vpaddd	%xmm7,%xmm7,%xmm7
+	movl	%ebx,%edi
+	addl	56(%rsp),%eax
+	xorl	%edx,%ecx
+	shldl	$5,%ebx,%ebx
+	vpsrld	$30,%xmm10,%xmm9
+	vpor	%xmm8,%xmm7,%xmm7
+	addl	%esi,%eax
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	vpslld	$2,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm7,%xmm7
+	shrdl	$7,%ebx,%ebx
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	addl	60(%rsp),%ebp
+	vpxor	%xmm10,%xmm7,%xmm7
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	vpalignr	$8,%xmm6,%xmm7,%xmm8
+	vpxor	%xmm4,%xmm0,%xmm0
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	addl	0(%rsp),%edx
+	vpxor	%xmm1,%xmm0,%xmm0
+	xorl	%ebx,%eax
+	shldl	$5,%ebp,%ebp
+	vpaddd	%xmm7,%xmm11,%xmm9
+	addl	%esi,%edx
+	andl	%eax,%edi
+	vpxor	%xmm8,%xmm0,%xmm0
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	shrdl	$7,%ebp,%ebp
+	xorl	%ebx,%edi
+	vpsrld	$30,%xmm0,%xmm8
+	vmovdqa	%xmm9,48(%rsp)
+	movl	%edx,%esi
+	addl	4(%rsp),%ecx
+	xorl	%eax,%ebp
+	shldl	$5,%edx,%edx
+	vpslld	$2,%xmm0,%xmm0
+	addl	%edi,%ecx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	addl	8(%rsp),%ebx
+	vpor	%xmm8,%xmm0,%xmm0
+	xorl	%ebp,%edx
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	12(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpalignr	$8,%xmm7,%xmm0,%xmm8
+	vpxor	%xmm5,%xmm1,%xmm1
+	addl	16(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	vpxor	%xmm2,%xmm1,%xmm1
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	vpaddd	%xmm0,%xmm11,%xmm9
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	vpxor	%xmm8,%xmm1,%xmm1
+	addl	20(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	vpsrld	$30,%xmm1,%xmm8
+	vmovdqa	%xmm9,0(%rsp)
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vpslld	$2,%xmm1,%xmm1
+	addl	24(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vpor	%xmm8,%xmm1,%xmm1
+	addl	28(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpalignr	$8,%xmm0,%xmm1,%xmm8
+	vpxor	%xmm6,%xmm2,%xmm2
+	addl	32(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	vpxor	%xmm3,%xmm2,%xmm2
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	vpaddd	%xmm1,%xmm11,%xmm9
+	vmovdqa	0(%r14),%xmm11
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpxor	%xmm8,%xmm2,%xmm2
+	addl	36(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	vpsrld	$30,%xmm2,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	vpslld	$2,%xmm2,%xmm2
+	addl	40(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vpor	%xmm8,%xmm2,%xmm2
+	addl	44(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vpalignr	$8,%xmm1,%xmm2,%xmm8
+	vpxor	%xmm7,%xmm3,%xmm3
+	addl	48(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	vpxor	%xmm4,%xmm3,%xmm3
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	vpaddd	%xmm2,%xmm11,%xmm9
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpxor	%xmm8,%xmm3,%xmm3
+	addl	52(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	vpsrld	$30,%xmm3,%xmm8
+	vmovdqa	%xmm9,32(%rsp)
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpslld	$2,%xmm3,%xmm3
+	addl	56(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	vpor	%xmm8,%xmm3,%xmm3
+	addl	60(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vpalignr	$8,%xmm2,%xmm3,%xmm8
+	vpxor	%xmm0,%xmm4,%xmm4
+	addl	0(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	vpxor	%xmm5,%xmm4,%xmm4
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	vpaddd	%xmm3,%xmm11,%xmm9
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vpxor	%xmm8,%xmm4,%xmm4
+	addl	4(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	vpsrld	$30,%xmm4,%xmm8
+	vmovdqa	%xmm9,48(%rsp)
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpslld	$2,%xmm4,%xmm4
+	addl	8(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpor	%xmm8,%xmm4,%xmm4
+	addl	12(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	vpalignr	$8,%xmm3,%xmm4,%xmm8
+	vpxor	%xmm1,%xmm5,%xmm5
+	addl	16(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	vpxor	%xmm6,%xmm5,%xmm5
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	vpaddd	%xmm4,%xmm11,%xmm9
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vpxor	%xmm8,%xmm5,%xmm5
+	addl	20(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	vpsrld	$30,%xmm5,%xmm8
+	vmovdqa	%xmm9,0(%rsp)
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vpslld	$2,%xmm5,%xmm5
+	addl	24(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpor	%xmm8,%xmm5,%xmm5
+	addl	28(%rsp),%eax
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	vpalignr	$8,%xmm4,%xmm5,%xmm8
+	vpxor	%xmm2,%xmm6,%xmm6
+	addl	32(%rsp),%ebp
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	vpxor	%xmm7,%xmm6,%xmm6
+	movl	%eax,%edi
+	xorl	%ecx,%esi
+	vpaddd	%xmm5,%xmm11,%xmm9
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	vpxor	%xmm8,%xmm6,%xmm6
+	xorl	%ebx,%edi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	addl	36(%rsp),%edx
+	vpsrld	$30,%xmm6,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	movl	%ebp,%esi
+	vpslld	$2,%xmm6,%xmm6
+	xorl	%ebx,%edi
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	xorl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	addl	40(%rsp),%ecx
+	andl	%eax,%esi
+	vpor	%xmm8,%xmm6,%xmm6
+	xorl	%ebx,%eax
+	shrdl	$7,%ebp,%ebp
+	movl	%edx,%edi
+	xorl	%eax,%esi
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%ebp,%edi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	addl	44(%rsp),%ebx
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	shrdl	$7,%edx,%edx
+	movl	%ecx,%esi
+	xorl	%ebp,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	vpalignr	$8,%xmm5,%xmm6,%xmm8
+	vpxor	%xmm3,%xmm7,%xmm7
+	addl	48(%rsp),%eax
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	shrdl	$7,%ecx,%ecx
+	vpxor	%xmm0,%xmm7,%xmm7
+	movl	%ebx,%edi
+	xorl	%edx,%esi
+	vpaddd	%xmm6,%xmm11,%xmm9
+	vmovdqa	32(%r14),%xmm11
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	vpxor	%xmm8,%xmm7,%xmm7
+	xorl	%ecx,%edi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	52(%rsp),%ebp
+	vpsrld	$30,%xmm7,%xmm8
+	vmovdqa	%xmm9,32(%rsp)
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	movl	%eax,%esi
+	vpslld	$2,%xmm7,%xmm7
+	xorl	%ecx,%edi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	addl	56(%rsp),%edx
+	andl	%ebx,%esi
+	vpor	%xmm8,%xmm7,%xmm7
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	movl	%ebp,%edi
+	xorl	%ebx,%esi
+	shldl	$5,%ebp,%ebp
+	addl	%esi,%edx
+	xorl	%eax,%edi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	addl	60(%rsp),%ecx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	shrdl	$7,%ebp,%ebp
+	movl	%edx,%esi
+	xorl	%eax,%edi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	vpalignr	$8,%xmm6,%xmm7,%xmm8
+	vpxor	%xmm4,%xmm0,%xmm0
+	addl	0(%rsp),%ebx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	shrdl	$7,%edx,%edx
+	vpxor	%xmm1,%xmm0,%xmm0
+	movl	%ecx,%edi
+	xorl	%ebp,%esi
+	vpaddd	%xmm7,%xmm11,%xmm9
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	vpxor	%xmm8,%xmm0,%xmm0
+	xorl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	4(%rsp),%eax
+	vpsrld	$30,%xmm0,%xmm8
+	vmovdqa	%xmm9,48(%rsp)
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%esi
+	vpslld	$2,%xmm0,%xmm0
+	xorl	%edx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	8(%rsp),%ebp
+	andl	%ecx,%esi
+	vpor	%xmm8,%xmm0,%xmm0
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	movl	%eax,%edi
+	xorl	%ecx,%esi
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	xorl	%ebx,%edi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	addl	12(%rsp),%edx
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	movl	%ebp,%esi
+	xorl	%ebx,%edi
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	xorl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	vpalignr	$8,%xmm7,%xmm0,%xmm8
+	vpxor	%xmm5,%xmm1,%xmm1
+	addl	16(%rsp),%ecx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	shrdl	$7,%ebp,%ebp
+	vpxor	%xmm2,%xmm1,%xmm1
+	movl	%edx,%edi
+	xorl	%eax,%esi
+	vpaddd	%xmm0,%xmm11,%xmm9
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	vpxor	%xmm8,%xmm1,%xmm1
+	xorl	%ebp,%edi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	addl	20(%rsp),%ebx
+	vpsrld	$30,%xmm1,%xmm8
+	vmovdqa	%xmm9,0(%rsp)
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	shrdl	$7,%edx,%edx
+	movl	%ecx,%esi
+	vpslld	$2,%xmm1,%xmm1
+	xorl	%ebp,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	24(%rsp),%eax
+	andl	%edx,%esi
+	vpor	%xmm8,%xmm1,%xmm1
+	xorl	%ebp,%edx
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%edi
+	xorl	%edx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%ecx,%edi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	28(%rsp),%ebp
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	movl	%eax,%esi
+	xorl	%ecx,%edi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	vpalignr	$8,%xmm0,%xmm1,%xmm8
+	vpxor	%xmm6,%xmm2,%xmm2
+	addl	32(%rsp),%edx
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	vpxor	%xmm3,%xmm2,%xmm2
+	movl	%ebp,%edi
+	xorl	%ebx,%esi
+	vpaddd	%xmm1,%xmm11,%xmm9
+	shldl	$5,%ebp,%ebp
+	addl	%esi,%edx
+	vpxor	%xmm8,%xmm2,%xmm2
+	xorl	%eax,%edi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	addl	36(%rsp),%ecx
+	vpsrld	$30,%xmm2,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	shrdl	$7,%ebp,%ebp
+	movl	%edx,%esi
+	vpslld	$2,%xmm2,%xmm2
+	xorl	%eax,%edi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	addl	40(%rsp),%ebx
+	andl	%ebp,%esi
+	vpor	%xmm8,%xmm2,%xmm2
+	xorl	%eax,%ebp
+	shrdl	$7,%edx,%edx
+	movl	%ecx,%edi
+	xorl	%ebp,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	44(%rsp),%eax
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	vpalignr	$8,%xmm1,%xmm2,%xmm8
+	vpxor	%xmm7,%xmm3,%xmm3
+	addl	48(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	vpxor	%xmm4,%xmm3,%xmm3
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	vpaddd	%xmm2,%xmm11,%xmm9
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	vpxor	%xmm8,%xmm3,%xmm3
+	addl	52(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	vpsrld	$30,%xmm3,%xmm8
+	vmovdqa	%xmm9,32(%rsp)
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vpslld	$2,%xmm3,%xmm3
+	addl	56(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vpor	%xmm8,%xmm3,%xmm3
+	addl	60(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	0(%rsp),%eax
+	vpaddd	%xmm3,%xmm11,%xmm9
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	vmovdqa	%xmm9,48(%rsp)
+	xorl	%edx,%edi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	4(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	addl	8(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	addl	12(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	cmpq	%r10,%r9
+	je	.Ldone_avx
+	vmovdqa	64(%r14),%xmm6
+	vmovdqa	-64(%r14),%xmm11
+	vmovdqu	0(%r9),%xmm0
+	vmovdqu	16(%r9),%xmm1
+	vmovdqu	32(%r9),%xmm2
+	vmovdqu	48(%r9),%xmm3
+	vpshufb	%xmm6,%xmm0,%xmm0
+	addq	$64,%r9
+	addl	16(%rsp),%ebx
+	xorl	%ebp,%esi
+	vpshufb	%xmm6,%xmm1,%xmm1
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	vpaddd	%xmm11,%xmm0,%xmm4
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vmovdqa	%xmm4,0(%rsp)
+	addl	20(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	24(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	addl	32(%rsp),%ecx
+	xorl	%eax,%esi
+	vpshufb	%xmm6,%xmm2,%xmm2
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	vpaddd	%xmm11,%xmm1,%xmm5
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vmovdqa	%xmm5,16(%rsp)
+	addl	36(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	40(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	44(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ebx,%esi
+	vpshufb	%xmm6,%xmm3,%xmm3
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	vpaddd	%xmm11,%xmm2,%xmm6
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vmovdqa	%xmm6,32(%rsp)
+	addl	52(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	60(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	addl	12(%r8),%edx
+	movl	%eax,0(%r8)
+	addl	16(%r8),%ebp
+	movl	%esi,4(%r8)
+	movl	%esi,%ebx
+	movl	%ecx,8(%r8)
+	movl	%ecx,%edi
+	movl	%edx,12(%r8)
+	xorl	%edx,%edi
+	movl	%ebp,16(%r8)
+	andl	%edi,%esi
+	jmp	.Loop_avx
+
+.align	16
+.Ldone_avx:
+	addl	16(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	20(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	24(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	addl	32(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	addl	36(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	40(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	44(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	addl	52(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	60(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vzeroupper
+
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	movl	%eax,0(%r8)
+	addl	12(%r8),%edx
+	movl	%esi,4(%r8)
+	addl	16(%r8),%ebp
+	movl	%ecx,8(%r8)
+	movl	%edx,12(%r8)
+	movl	%ebp,16(%r8)
+	movq	-40(%r11),%r14
+	movq	-32(%r11),%r13
+	movq	-24(%r11),%r12
+	movq	-16(%r11),%rbp
+	movq	-8(%r11),%rbx
+	leaq	(%r11),%rsp
+.Lepilogue_avx:
+	.byte	0xf3,0xc3
+.size	sha1_block_data_order_avx,.-sha1_block_data_order_avx
+.align	64
+K_XX_XX:
+.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte	0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
+.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	64
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S
new file mode 100644
index 0000000..62534be
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S
@@ -0,0 +1,3906 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+.globl	sha256_block_data_order
+.hidden sha256_block_data_order
+.type	sha256_block_data_order,@function
+.align	16
+sha256_block_data_order:
+	leaq	OPENSSL_ia32cap_P(%rip),%r11
+	movl	0(%r11),%r9d
+	movl	4(%r11),%r10d
+	movl	8(%r11),%r11d
+	andl	$1073741824,%r9d
+	andl	$268435968,%r10d
+	orl	%r9d,%r10d
+	cmpl	$1342177792,%r10d
+	je	.Lavx_shortcut
+	testl	$512,%r10d
+	jnz	.Lssse3_shortcut
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$64+32,%rsp
+	leaq	(%rsi,%rdx,4),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,64+0(%rsp)
+	movq	%rsi,64+8(%rsp)
+	movq	%rdx,64+16(%rsp)
+	movq	%rax,64+24(%rsp)
+.Lprologue:
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+	movl	16(%rdi),%r8d
+	movl	20(%rdi),%r9d
+	movl	24(%rdi),%r10d
+	movl	28(%rdi),%r11d
+	jmp	.Lloop
+
+.align	16
+.Lloop:
+	movl	%ebx,%edi
+	leaq	K256(%rip),%rbp
+	xorl	%ecx,%edi
+	movl	0(%rsi),%r12d
+	movl	%r8d,%r13d
+	movl	%eax,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+
+	xorl	%r8d,%r13d
+	rorl	$9,%r14d
+	xorl	%r10d,%r15d
+
+	movl	%r12d,0(%rsp)
+	xorl	%eax,%r14d
+	andl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%r10d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%eax,%r15d
+	addl	(%rbp),%r12d
+	xorl	%eax,%r14d
+
+	xorl	%ebx,%r15d
+	rorl	$6,%r13d
+	movl	%ebx,%r11d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r11d
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r11d
+	movl	4(%rsi),%r12d
+	movl	%edx,%r13d
+	movl	%r11d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r8d,%edi
+
+	xorl	%edx,%r13d
+	rorl	$9,%r14d
+	xorl	%r9d,%edi
+
+	movl	%r12d,4(%rsp)
+	xorl	%r11d,%r14d
+	andl	%edx,%edi
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r9d,%edi
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r11d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r11d,%r14d
+
+	xorl	%eax,%edi
+	rorl	$6,%r13d
+	movl	%eax,%r10d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r10d
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r10d
+	movl	8(%rsi),%r12d
+	movl	%ecx,%r13d
+	movl	%r10d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+
+	xorl	%ecx,%r13d
+	rorl	$9,%r14d
+	xorl	%r8d,%r15d
+
+	movl	%r12d,8(%rsp)
+	xorl	%r10d,%r14d
+	andl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r8d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r10d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r10d,%r14d
+
+	xorl	%r11d,%r15d
+	rorl	$6,%r13d
+	movl	%r11d,%r9d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r9d
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r9d
+	movl	12(%rsi),%r12d
+	movl	%ebx,%r13d
+	movl	%r9d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ecx,%edi
+
+	xorl	%ebx,%r13d
+	rorl	$9,%r14d
+	xorl	%edx,%edi
+
+	movl	%r12d,12(%rsp)
+	xorl	%r9d,%r14d
+	andl	%ebx,%edi
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%edx,%edi
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r9d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r9d,%r14d
+
+	xorl	%r10d,%edi
+	rorl	$6,%r13d
+	movl	%r10d,%r8d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r8d
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+
+	leaq	20(%rbp),%rbp
+	addl	%r14d,%r8d
+	movl	16(%rsi),%r12d
+	movl	%eax,%r13d
+	movl	%r8d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+
+	xorl	%eax,%r13d
+	rorl	$9,%r14d
+	xorl	%ecx,%r15d
+
+	movl	%r12d,16(%rsp)
+	xorl	%r8d,%r14d
+	andl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%ecx,%r15d
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r8d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r8d,%r14d
+
+	xorl	%r9d,%r15d
+	rorl	$6,%r13d
+	movl	%r9d,%edx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%edx
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%edx
+	movl	20(%rsi),%r12d
+	movl	%r11d,%r13d
+	movl	%edx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%eax,%edi
+
+	xorl	%r11d,%r13d
+	rorl	$9,%r14d
+	xorl	%ebx,%edi
+
+	movl	%r12d,20(%rsp)
+	xorl	%edx,%r14d
+	andl	%r11d,%edi
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%ebx,%edi
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	addl	%edi,%r12d
+
+	movl	%edx,%edi
+	addl	(%rbp),%r12d
+	xorl	%edx,%r14d
+
+	xorl	%r8d,%edi
+	rorl	$6,%r13d
+	movl	%r8d,%ecx
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%ecx
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%ecx
+	movl	24(%rsi),%r12d
+	movl	%r10d,%r13d
+	movl	%ecx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+
+	xorl	%r10d,%r13d
+	rorl	$9,%r14d
+	xorl	%eax,%r15d
+
+	movl	%r12d,24(%rsp)
+	xorl	%ecx,%r14d
+	andl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%eax,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%ecx,%r15d
+	addl	(%rbp),%r12d
+	xorl	%ecx,%r14d
+
+	xorl	%edx,%r15d
+	rorl	$6,%r13d
+	movl	%edx,%ebx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%ebx
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%ebx
+	movl	28(%rsi),%r12d
+	movl	%r9d,%r13d
+	movl	%ebx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r10d,%edi
+
+	xorl	%r9d,%r13d
+	rorl	$9,%r14d
+	xorl	%r11d,%edi
+
+	movl	%r12d,28(%rsp)
+	xorl	%ebx,%r14d
+	andl	%r9d,%edi
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%r11d,%edi
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	addl	%edi,%r12d
+
+	movl	%ebx,%edi
+	addl	(%rbp),%r12d
+	xorl	%ebx,%r14d
+
+	xorl	%ecx,%edi
+	rorl	$6,%r13d
+	movl	%ecx,%eax
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%eax
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+
+	leaq	20(%rbp),%rbp
+	addl	%r14d,%eax
+	movl	32(%rsi),%r12d
+	movl	%r8d,%r13d
+	movl	%eax,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+
+	xorl	%r8d,%r13d
+	rorl	$9,%r14d
+	xorl	%r10d,%r15d
+
+	movl	%r12d,32(%rsp)
+	xorl	%eax,%r14d
+	andl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%r10d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%eax,%r15d
+	addl	(%rbp),%r12d
+	xorl	%eax,%r14d
+
+	xorl	%ebx,%r15d
+	rorl	$6,%r13d
+	movl	%ebx,%r11d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r11d
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r11d
+	movl	36(%rsi),%r12d
+	movl	%edx,%r13d
+	movl	%r11d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r8d,%edi
+
+	xorl	%edx,%r13d
+	rorl	$9,%r14d
+	xorl	%r9d,%edi
+
+	movl	%r12d,36(%rsp)
+	xorl	%r11d,%r14d
+	andl	%edx,%edi
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r9d,%edi
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r11d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r11d,%r14d
+
+	xorl	%eax,%edi
+	rorl	$6,%r13d
+	movl	%eax,%r10d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r10d
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r10d
+	movl	40(%rsi),%r12d
+	movl	%ecx,%r13d
+	movl	%r10d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+
+	xorl	%ecx,%r13d
+	rorl	$9,%r14d
+	xorl	%r8d,%r15d
+
+	movl	%r12d,40(%rsp)
+	xorl	%r10d,%r14d
+	andl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r8d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r10d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r10d,%r14d
+
+	xorl	%r11d,%r15d
+	rorl	$6,%r13d
+	movl	%r11d,%r9d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r9d
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r9d
+	movl	44(%rsi),%r12d
+	movl	%ebx,%r13d
+	movl	%r9d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ecx,%edi
+
+	xorl	%ebx,%r13d
+	rorl	$9,%r14d
+	xorl	%edx,%edi
+
+	movl	%r12d,44(%rsp)
+	xorl	%r9d,%r14d
+	andl	%ebx,%edi
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%edx,%edi
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r9d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r9d,%r14d
+
+	xorl	%r10d,%edi
+	rorl	$6,%r13d
+	movl	%r10d,%r8d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r8d
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+
+	leaq	20(%rbp),%rbp
+	addl	%r14d,%r8d
+	movl	48(%rsi),%r12d
+	movl	%eax,%r13d
+	movl	%r8d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+
+	xorl	%eax,%r13d
+	rorl	$9,%r14d
+	xorl	%ecx,%r15d
+
+	movl	%r12d,48(%rsp)
+	xorl	%r8d,%r14d
+	andl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%ecx,%r15d
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r8d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r8d,%r14d
+
+	xorl	%r9d,%r15d
+	rorl	$6,%r13d
+	movl	%r9d,%edx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%edx
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%edx
+	movl	52(%rsi),%r12d
+	movl	%r11d,%r13d
+	movl	%edx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%eax,%edi
+
+	xorl	%r11d,%r13d
+	rorl	$9,%r14d
+	xorl	%ebx,%edi
+
+	movl	%r12d,52(%rsp)
+	xorl	%edx,%r14d
+	andl	%r11d,%edi
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%ebx,%edi
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	addl	%edi,%r12d
+
+	movl	%edx,%edi
+	addl	(%rbp),%r12d
+	xorl	%edx,%r14d
+
+	xorl	%r8d,%edi
+	rorl	$6,%r13d
+	movl	%r8d,%ecx
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%ecx
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%ecx
+	movl	56(%rsi),%r12d
+	movl	%r10d,%r13d
+	movl	%ecx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+
+	xorl	%r10d,%r13d
+	rorl	$9,%r14d
+	xorl	%eax,%r15d
+
+	movl	%r12d,56(%rsp)
+	xorl	%ecx,%r14d
+	andl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%eax,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%ecx,%r15d
+	addl	(%rbp),%r12d
+	xorl	%ecx,%r14d
+
+	xorl	%edx,%r15d
+	rorl	$6,%r13d
+	movl	%edx,%ebx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%ebx
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%ebx
+	movl	60(%rsi),%r12d
+	movl	%r9d,%r13d
+	movl	%ebx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r10d,%edi
+
+	xorl	%r9d,%r13d
+	rorl	$9,%r14d
+	xorl	%r11d,%edi
+
+	movl	%r12d,60(%rsp)
+	xorl	%ebx,%r14d
+	andl	%r9d,%edi
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%r11d,%edi
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	addl	%edi,%r12d
+
+	movl	%ebx,%edi
+	addl	(%rbp),%r12d
+	xorl	%ebx,%r14d
+
+	xorl	%ecx,%edi
+	rorl	$6,%r13d
+	movl	%ecx,%eax
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%eax
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+
+	leaq	20(%rbp),%rbp
+	jmp	.Lrounds_16_xx
+.align	16
+.Lrounds_16_xx:
+	movl	4(%rsp),%r13d
+	movl	56(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%eax
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	36(%rsp),%r12d
+
+	addl	0(%rsp),%r12d
+	movl	%r8d,%r13d
+	addl	%r15d,%r12d
+	movl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+
+	xorl	%r8d,%r13d
+	rorl	$9,%r14d
+	xorl	%r10d,%r15d
+
+	movl	%r12d,0(%rsp)
+	xorl	%eax,%r14d
+	andl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%r10d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%eax,%r15d
+	addl	(%rbp),%r12d
+	xorl	%eax,%r14d
+
+	xorl	%ebx,%r15d
+	rorl	$6,%r13d
+	movl	%ebx,%r11d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r11d
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+
+	leaq	4(%rbp),%rbp
+	movl	8(%rsp),%r13d
+	movl	60(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r11d
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	40(%rsp),%r12d
+
+	addl	4(%rsp),%r12d
+	movl	%edx,%r13d
+	addl	%edi,%r12d
+	movl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r8d,%edi
+
+	xorl	%edx,%r13d
+	rorl	$9,%r14d
+	xorl	%r9d,%edi
+
+	movl	%r12d,4(%rsp)
+	xorl	%r11d,%r14d
+	andl	%edx,%edi
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r9d,%edi
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r11d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r11d,%r14d
+
+	xorl	%eax,%edi
+	rorl	$6,%r13d
+	movl	%eax,%r10d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r10d
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+
+	leaq	4(%rbp),%rbp
+	movl	12(%rsp),%r13d
+	movl	0(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r10d
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	44(%rsp),%r12d
+
+	addl	8(%rsp),%r12d
+	movl	%ecx,%r13d
+	addl	%r15d,%r12d
+	movl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+
+	xorl	%ecx,%r13d
+	rorl	$9,%r14d
+	xorl	%r8d,%r15d
+
+	movl	%r12d,8(%rsp)
+	xorl	%r10d,%r14d
+	andl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r8d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r10d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r10d,%r14d
+
+	xorl	%r11d,%r15d
+	rorl	$6,%r13d
+	movl	%r11d,%r9d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r9d
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+
+	leaq	4(%rbp),%rbp
+	movl	16(%rsp),%r13d
+	movl	4(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r9d
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	48(%rsp),%r12d
+
+	addl	12(%rsp),%r12d
+	movl	%ebx,%r13d
+	addl	%edi,%r12d
+	movl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%ecx,%edi
+
+	xorl	%ebx,%r13d
+	rorl	$9,%r14d
+	xorl	%edx,%edi
+
+	movl	%r12d,12(%rsp)
+	xorl	%r9d,%r14d
+	andl	%ebx,%edi
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%edx,%edi
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r9d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r9d,%r14d
+
+	xorl	%r10d,%edi
+	rorl	$6,%r13d
+	movl	%r10d,%r8d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r8d
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+
+	leaq	20(%rbp),%rbp
+	movl	20(%rsp),%r13d
+	movl	8(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r8d
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	52(%rsp),%r12d
+
+	addl	16(%rsp),%r12d
+	movl	%eax,%r13d
+	addl	%r15d,%r12d
+	movl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+
+	xorl	%eax,%r13d
+	rorl	$9,%r14d
+	xorl	%ecx,%r15d
+
+	movl	%r12d,16(%rsp)
+	xorl	%r8d,%r14d
+	andl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%ecx,%r15d
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r8d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r8d,%r14d
+
+	xorl	%r9d,%r15d
+	rorl	$6,%r13d
+	movl	%r9d,%edx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%edx
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+
+	leaq	4(%rbp),%rbp
+	movl	24(%rsp),%r13d
+	movl	12(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%edx
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	56(%rsp),%r12d
+
+	addl	20(%rsp),%r12d
+	movl	%r11d,%r13d
+	addl	%edi,%r12d
+	movl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%eax,%edi
+
+	xorl	%r11d,%r13d
+	rorl	$9,%r14d
+	xorl	%ebx,%edi
+
+	movl	%r12d,20(%rsp)
+	xorl	%edx,%r14d
+	andl	%r11d,%edi
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%ebx,%edi
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	addl	%edi,%r12d
+
+	movl	%edx,%edi
+	addl	(%rbp),%r12d
+	xorl	%edx,%r14d
+
+	xorl	%r8d,%edi
+	rorl	$6,%r13d
+	movl	%r8d,%ecx
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%ecx
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+
+	leaq	4(%rbp),%rbp
+	movl	28(%rsp),%r13d
+	movl	16(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%ecx
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	60(%rsp),%r12d
+
+	addl	24(%rsp),%r12d
+	movl	%r10d,%r13d
+	addl	%r15d,%r12d
+	movl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+
+	xorl	%r10d,%r13d
+	rorl	$9,%r14d
+	xorl	%eax,%r15d
+
+	movl	%r12d,24(%rsp)
+	xorl	%ecx,%r14d
+	andl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%eax,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%ecx,%r15d
+	addl	(%rbp),%r12d
+	xorl	%ecx,%r14d
+
+	xorl	%edx,%r15d
+	rorl	$6,%r13d
+	movl	%edx,%ebx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%ebx
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+
+	leaq	4(%rbp),%rbp
+	movl	32(%rsp),%r13d
+	movl	20(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%ebx
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	0(%rsp),%r12d
+
+	addl	28(%rsp),%r12d
+	movl	%r9d,%r13d
+	addl	%edi,%r12d
+	movl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r10d,%edi
+
+	xorl	%r9d,%r13d
+	rorl	$9,%r14d
+	xorl	%r11d,%edi
+
+	movl	%r12d,28(%rsp)
+	xorl	%ebx,%r14d
+	andl	%r9d,%edi
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%r11d,%edi
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	addl	%edi,%r12d
+
+	movl	%ebx,%edi
+	addl	(%rbp),%r12d
+	xorl	%ebx,%r14d
+
+	xorl	%ecx,%edi
+	rorl	$6,%r13d
+	movl	%ecx,%eax
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%eax
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+
+	leaq	20(%rbp),%rbp
+	movl	36(%rsp),%r13d
+	movl	24(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%eax
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	4(%rsp),%r12d
+
+	addl	32(%rsp),%r12d
+	movl	%r8d,%r13d
+	addl	%r15d,%r12d
+	movl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+
+	xorl	%r8d,%r13d
+	rorl	$9,%r14d
+	xorl	%r10d,%r15d
+
+	movl	%r12d,32(%rsp)
+	xorl	%eax,%r14d
+	andl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%r10d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%eax,%r15d
+	addl	(%rbp),%r12d
+	xorl	%eax,%r14d
+
+	xorl	%ebx,%r15d
+	rorl	$6,%r13d
+	movl	%ebx,%r11d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r11d
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+
+	leaq	4(%rbp),%rbp
+	movl	40(%rsp),%r13d
+	movl	28(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r11d
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	8(%rsp),%r12d
+
+	addl	36(%rsp),%r12d
+	movl	%edx,%r13d
+	addl	%edi,%r12d
+	movl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r8d,%edi
+
+	xorl	%edx,%r13d
+	rorl	$9,%r14d
+	xorl	%r9d,%edi
+
+	movl	%r12d,36(%rsp)
+	xorl	%r11d,%r14d
+	andl	%edx,%edi
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r9d,%edi
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r11d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r11d,%r14d
+
+	xorl	%eax,%edi
+	rorl	$6,%r13d
+	movl	%eax,%r10d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r10d
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+
+	leaq	4(%rbp),%rbp
+	movl	44(%rsp),%r13d
+	movl	32(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r10d
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	12(%rsp),%r12d
+
+	addl	40(%rsp),%r12d
+	movl	%ecx,%r13d
+	addl	%r15d,%r12d
+	movl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+
+	xorl	%ecx,%r13d
+	rorl	$9,%r14d
+	xorl	%r8d,%r15d
+
+	movl	%r12d,40(%rsp)
+	xorl	%r10d,%r14d
+	andl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r8d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r10d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r10d,%r14d
+
+	xorl	%r11d,%r15d
+	rorl	$6,%r13d
+	movl	%r11d,%r9d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r9d
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+
+	leaq	4(%rbp),%rbp
+	movl	48(%rsp),%r13d
+	movl	36(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r9d
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	16(%rsp),%r12d
+
+	addl	44(%rsp),%r12d
+	movl	%ebx,%r13d
+	addl	%edi,%r12d
+	movl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%ecx,%edi
+
+	xorl	%ebx,%r13d
+	rorl	$9,%r14d
+	xorl	%edx,%edi
+
+	movl	%r12d,44(%rsp)
+	xorl	%r9d,%r14d
+	andl	%ebx,%edi
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%edx,%edi
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r9d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r9d,%r14d
+
+	xorl	%r10d,%edi
+	rorl	$6,%r13d
+	movl	%r10d,%r8d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r8d
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+
+	leaq	20(%rbp),%rbp
+	movl	52(%rsp),%r13d
+	movl	40(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r8d
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	20(%rsp),%r12d
+
+	addl	48(%rsp),%r12d
+	movl	%eax,%r13d
+	addl	%r15d,%r12d
+	movl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+
+	xorl	%eax,%r13d
+	rorl	$9,%r14d
+	xorl	%ecx,%r15d
+
+	movl	%r12d,48(%rsp)
+	xorl	%r8d,%r14d
+	andl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%ecx,%r15d
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r8d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r8d,%r14d
+
+	xorl	%r9d,%r15d
+	rorl	$6,%r13d
+	movl	%r9d,%edx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%edx
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+
+	leaq	4(%rbp),%rbp
+	movl	56(%rsp),%r13d
+	movl	44(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%edx
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	24(%rsp),%r12d
+
+	addl	52(%rsp),%r12d
+	movl	%r11d,%r13d
+	addl	%edi,%r12d
+	movl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%eax,%edi
+
+	xorl	%r11d,%r13d
+	rorl	$9,%r14d
+	xorl	%ebx,%edi
+
+	movl	%r12d,52(%rsp)
+	xorl	%edx,%r14d
+	andl	%r11d,%edi
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%ebx,%edi
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	addl	%edi,%r12d
+
+	movl	%edx,%edi
+	addl	(%rbp),%r12d
+	xorl	%edx,%r14d
+
+	xorl	%r8d,%edi
+	rorl	$6,%r13d
+	movl	%r8d,%ecx
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%ecx
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+
+	leaq	4(%rbp),%rbp
+	movl	60(%rsp),%r13d
+	movl	48(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%ecx
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	28(%rsp),%r12d
+
+	addl	56(%rsp),%r12d
+	movl	%r10d,%r13d
+	addl	%r15d,%r12d
+	movl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+
+	xorl	%r10d,%r13d
+	rorl	$9,%r14d
+	xorl	%eax,%r15d
+
+	movl	%r12d,56(%rsp)
+	xorl	%ecx,%r14d
+	andl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%eax,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%ecx,%r15d
+	addl	(%rbp),%r12d
+	xorl	%ecx,%r14d
+
+	xorl	%edx,%r15d
+	rorl	$6,%r13d
+	movl	%edx,%ebx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%ebx
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+
+	leaq	4(%rbp),%rbp
+	movl	0(%rsp),%r13d
+	movl	52(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%ebx
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	32(%rsp),%r12d
+
+	addl	60(%rsp),%r12d
+	movl	%r9d,%r13d
+	addl	%edi,%r12d
+	movl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r10d,%edi
+
+	xorl	%r9d,%r13d
+	rorl	$9,%r14d
+	xorl	%r11d,%edi
+
+	movl	%r12d,60(%rsp)
+	xorl	%ebx,%r14d
+	andl	%r9d,%edi
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%r11d,%edi
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	addl	%edi,%r12d
+
+	movl	%ebx,%edi
+	addl	(%rbp),%r12d
+	xorl	%ebx,%r14d
+
+	xorl	%ecx,%edi
+	rorl	$6,%r13d
+	movl	%ecx,%eax
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%eax
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+
+	leaq	20(%rbp),%rbp
+	cmpb	$0,3(%rbp)
+	jnz	.Lrounds_16_xx
+
+	movq	64+0(%rsp),%rdi
+	addl	%r14d,%eax
+	leaq	64(%rsi),%rsi
+
+	addl	0(%rdi),%eax
+	addl	4(%rdi),%ebx
+	addl	8(%rdi),%ecx
+	addl	12(%rdi),%edx
+	addl	16(%rdi),%r8d
+	addl	20(%rdi),%r9d
+	addl	24(%rdi),%r10d
+	addl	28(%rdi),%r11d
+
+	cmpq	64+16(%rsp),%rsi
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	jb	.Lloop
+
+	movq	64+24(%rsp),%rsi
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Lepilogue:
+	.byte	0xf3,0xc3
+.size	sha256_block_data_order,.-sha256_block_data_order
+.align	64
+.type	K256,@object
+K256:
+.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long	0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long	0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long	0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.long	0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.type	sha256_block_data_order_ssse3,@function
+.align	64
+sha256_block_data_order_ssse3:
+.Lssse3_shortcut:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$96,%rsp
+	leaq	(%rsi,%rdx,4),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,64+0(%rsp)
+	movq	%rsi,64+8(%rsp)
+	movq	%rdx,64+16(%rsp)
+	movq	%rax,64+24(%rsp)
+.Lprologue_ssse3:
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+	movl	16(%rdi),%r8d
+	movl	20(%rdi),%r9d
+	movl	24(%rdi),%r10d
+	movl	28(%rdi),%r11d
+
+
+	jmp	.Lloop_ssse3
+.align	16
+.Lloop_ssse3:
+	movdqa	K256+512(%rip),%xmm7
+	movdqu	0(%rsi),%xmm0
+	movdqu	16(%rsi),%xmm1
+	movdqu	32(%rsi),%xmm2
+.byte	102,15,56,0,199
+	movdqu	48(%rsi),%xmm3
+	leaq	K256(%rip),%rbp
+.byte	102,15,56,0,207
+	movdqa	0(%rbp),%xmm4
+	movdqa	32(%rbp),%xmm5
+.byte	102,15,56,0,215
+	paddd	%xmm0,%xmm4
+	movdqa	64(%rbp),%xmm6
+.byte	102,15,56,0,223
+	movdqa	96(%rbp),%xmm7
+	paddd	%xmm1,%xmm5
+	paddd	%xmm2,%xmm6
+	paddd	%xmm3,%xmm7
+	movdqa	%xmm4,0(%rsp)
+	movl	%eax,%r14d
+	movdqa	%xmm5,16(%rsp)
+	movl	%ebx,%edi
+	movdqa	%xmm6,32(%rsp)
+	xorl	%ecx,%edi
+	movdqa	%xmm7,48(%rsp)
+	movl	%r8d,%r13d
+	jmp	.Lssse3_00_47
+
+.align	16
+.Lssse3_00_47:
+	subq	$-128,%rbp
+	rorl	$14,%r13d
+	movdqa	%xmm1,%xmm4
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	movdqa	%xmm3,%xmm7
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	rorl	$5,%r13d
+	xorl	%eax,%r14d
+.byte	102,15,58,15,224,4
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+.byte	102,15,58,15,250,4
+	addl	0(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm4,%xmm5
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	movdqa	%xmm4,%xmm6
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	psrld	$3,%xmm4
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	paddd	%xmm7,%xmm0
+	rorl	$2,%r14d
+	addl	%r11d,%edx
+	psrld	$7,%xmm6
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	pshufd	$250,%xmm3,%xmm7
+	addl	%r11d,%r14d
+	rorl	$14,%r13d
+	pslld	$14,%xmm5
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	pxor	%xmm6,%xmm4
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	rorl	$5,%r13d
+	psrld	$11,%xmm6
+	xorl	%r11d,%r14d
+	pxor	%xmm5,%xmm4
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	pslld	$11,%xmm5
+	addl	4(%rsp),%r10d
+	movl	%r11d,%edi
+	pxor	%xmm6,%xmm4
+	xorl	%r9d,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm7,%xmm6
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	pxor	%xmm5,%xmm4
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	psrld	$10,%xmm7
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	paddd	%xmm4,%xmm0
+	rorl	$2,%r14d
+	addl	%r10d,%ecx
+	psrlq	$17,%xmm6
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	pxor	%xmm6,%xmm7
+	rorl	$14,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	rorl	$9,%r14d
+	psrlq	$2,%xmm6
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$5,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	pshufd	$128,%xmm7,%xmm7
+	xorl	%ecx,%r13d
+	addl	8(%rsp),%r9d
+	movl	%r10d,%r15d
+	psrldq	$8,%xmm7
+	xorl	%r8d,%r12d
+	rorl	$11,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	rorl	$6,%r13d
+	paddd	%xmm7,%xmm0
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	pshufd	$80,%xmm0,%xmm7
+	xorl	%r11d,%edi
+	rorl	$2,%r14d
+	addl	%r9d,%ebx
+	movdqa	%xmm7,%xmm6
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	psrld	$10,%xmm7
+	addl	%r9d,%r14d
+	rorl	$14,%r13d
+	psrlq	$17,%xmm6
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	rorl	$5,%r13d
+	xorl	%r9d,%r14d
+	psrlq	$2,%xmm6
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	12(%rsp),%r8d
+	pxor	%xmm6,%xmm7
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	rorl	$11,%r14d
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	movdqa	0(%rbp),%xmm6
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	pslldq	$8,%xmm7
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	paddd	%xmm7,%xmm0
+	rorl	$2,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	paddd	%xmm0,%xmm6
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	movdqa	%xmm6,0(%rsp)
+	rorl	$14,%r13d
+	movdqa	%xmm2,%xmm4
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	movdqa	%xmm0,%xmm7
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	rorl	$5,%r13d
+	xorl	%r8d,%r14d
+.byte	102,15,58,15,225,4
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+.byte	102,15,58,15,251,4
+	addl	16(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm4,%xmm5
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	movdqa	%xmm4,%xmm6
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	psrld	$3,%xmm4
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	paddd	%xmm7,%xmm1
+	rorl	$2,%r14d
+	addl	%edx,%r11d
+	psrld	$7,%xmm6
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	pshufd	$250,%xmm0,%xmm7
+	addl	%edx,%r14d
+	rorl	$14,%r13d
+	pslld	$14,%xmm5
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	pxor	%xmm6,%xmm4
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	rorl	$5,%r13d
+	psrld	$11,%xmm6
+	xorl	%edx,%r14d
+	pxor	%xmm5,%xmm4
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	pslld	$11,%xmm5
+	addl	20(%rsp),%ecx
+	movl	%edx,%edi
+	pxor	%xmm6,%xmm4
+	xorl	%ebx,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm7,%xmm6
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	pxor	%xmm5,%xmm4
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	psrld	$10,%xmm7
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	paddd	%xmm4,%xmm1
+	rorl	$2,%r14d
+	addl	%ecx,%r10d
+	psrlq	$17,%xmm6
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	pxor	%xmm6,%xmm7
+	rorl	$14,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	rorl	$9,%r14d
+	psrlq	$2,%xmm6
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$5,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	pshufd	$128,%xmm7,%xmm7
+	xorl	%r10d,%r13d
+	addl	24(%rsp),%ebx
+	movl	%ecx,%r15d
+	psrldq	$8,%xmm7
+	xorl	%eax,%r12d
+	rorl	$11,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	rorl	$6,%r13d
+	paddd	%xmm7,%xmm1
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	pshufd	$80,%xmm1,%xmm7
+	xorl	%edx,%edi
+	rorl	$2,%r14d
+	addl	%ebx,%r9d
+	movdqa	%xmm7,%xmm6
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	psrld	$10,%xmm7
+	addl	%ebx,%r14d
+	rorl	$14,%r13d
+	psrlq	$17,%xmm6
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	rorl	$5,%r13d
+	xorl	%ebx,%r14d
+	psrlq	$2,%xmm6
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	28(%rsp),%eax
+	pxor	%xmm6,%xmm7
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	rorl	$11,%r14d
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	movdqa	32(%rbp),%xmm6
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	pslldq	$8,%xmm7
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	paddd	%xmm7,%xmm1
+	rorl	$2,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	paddd	%xmm1,%xmm6
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	movdqa	%xmm6,16(%rsp)
+	rorl	$14,%r13d
+	movdqa	%xmm3,%xmm4
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	movdqa	%xmm1,%xmm7
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	rorl	$5,%r13d
+	xorl	%eax,%r14d
+.byte	102,15,58,15,226,4
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+.byte	102,15,58,15,248,4
+	addl	32(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm4,%xmm5
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	movdqa	%xmm4,%xmm6
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	psrld	$3,%xmm4
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	paddd	%xmm7,%xmm2
+	rorl	$2,%r14d
+	addl	%r11d,%edx
+	psrld	$7,%xmm6
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	pshufd	$250,%xmm1,%xmm7
+	addl	%r11d,%r14d
+	rorl	$14,%r13d
+	pslld	$14,%xmm5
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	pxor	%xmm6,%xmm4
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	rorl	$5,%r13d
+	psrld	$11,%xmm6
+	xorl	%r11d,%r14d
+	pxor	%xmm5,%xmm4
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	pslld	$11,%xmm5
+	addl	36(%rsp),%r10d
+	movl	%r11d,%edi
+	pxor	%xmm6,%xmm4
+	xorl	%r9d,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm7,%xmm6
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	pxor	%xmm5,%xmm4
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	psrld	$10,%xmm7
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	paddd	%xmm4,%xmm2
+	rorl	$2,%r14d
+	addl	%r10d,%ecx
+	psrlq	$17,%xmm6
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	pxor	%xmm6,%xmm7
+	rorl	$14,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	rorl	$9,%r14d
+	psrlq	$2,%xmm6
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$5,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	pshufd	$128,%xmm7,%xmm7
+	xorl	%ecx,%r13d
+	addl	40(%rsp),%r9d
+	movl	%r10d,%r15d
+	psrldq	$8,%xmm7
+	xorl	%r8d,%r12d
+	rorl	$11,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	rorl	$6,%r13d
+	paddd	%xmm7,%xmm2
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	pshufd	$80,%xmm2,%xmm7
+	xorl	%r11d,%edi
+	rorl	$2,%r14d
+	addl	%r9d,%ebx
+	movdqa	%xmm7,%xmm6
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	psrld	$10,%xmm7
+	addl	%r9d,%r14d
+	rorl	$14,%r13d
+	psrlq	$17,%xmm6
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	rorl	$5,%r13d
+	xorl	%r9d,%r14d
+	psrlq	$2,%xmm6
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	44(%rsp),%r8d
+	pxor	%xmm6,%xmm7
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	rorl	$11,%r14d
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	movdqa	64(%rbp),%xmm6
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	pslldq	$8,%xmm7
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	paddd	%xmm7,%xmm2
+	rorl	$2,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	paddd	%xmm2,%xmm6
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	movdqa	%xmm6,32(%rsp)
+	rorl	$14,%r13d
+	movdqa	%xmm0,%xmm4
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	movdqa	%xmm2,%xmm7
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	rorl	$5,%r13d
+	xorl	%r8d,%r14d
+.byte	102,15,58,15,227,4
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+.byte	102,15,58,15,249,4
+	addl	48(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm4,%xmm5
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	movdqa	%xmm4,%xmm6
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	psrld	$3,%xmm4
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	paddd	%xmm7,%xmm3
+	rorl	$2,%r14d
+	addl	%edx,%r11d
+	psrld	$7,%xmm6
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	pshufd	$250,%xmm2,%xmm7
+	addl	%edx,%r14d
+	rorl	$14,%r13d
+	pslld	$14,%xmm5
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	pxor	%xmm6,%xmm4
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	rorl	$5,%r13d
+	psrld	$11,%xmm6
+	xorl	%edx,%r14d
+	pxor	%xmm5,%xmm4
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	pslld	$11,%xmm5
+	addl	52(%rsp),%ecx
+	movl	%edx,%edi
+	pxor	%xmm6,%xmm4
+	xorl	%ebx,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm7,%xmm6
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	pxor	%xmm5,%xmm4
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	psrld	$10,%xmm7
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	paddd	%xmm4,%xmm3
+	rorl	$2,%r14d
+	addl	%ecx,%r10d
+	psrlq	$17,%xmm6
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	pxor	%xmm6,%xmm7
+	rorl	$14,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	rorl	$9,%r14d
+	psrlq	$2,%xmm6
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$5,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	pshufd	$128,%xmm7,%xmm7
+	xorl	%r10d,%r13d
+	addl	56(%rsp),%ebx
+	movl	%ecx,%r15d
+	psrldq	$8,%xmm7
+	xorl	%eax,%r12d
+	rorl	$11,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	rorl	$6,%r13d
+	paddd	%xmm7,%xmm3
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	pshufd	$80,%xmm3,%xmm7
+	xorl	%edx,%edi
+	rorl	$2,%r14d
+	addl	%ebx,%r9d
+	movdqa	%xmm7,%xmm6
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	psrld	$10,%xmm7
+	addl	%ebx,%r14d
+	rorl	$14,%r13d
+	psrlq	$17,%xmm6
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	rorl	$5,%r13d
+	xorl	%ebx,%r14d
+	psrlq	$2,%xmm6
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	60(%rsp),%eax
+	pxor	%xmm6,%xmm7
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	rorl	$11,%r14d
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	movdqa	96(%rbp),%xmm6
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	pslldq	$8,%xmm7
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	paddd	%xmm7,%xmm3
+	rorl	$2,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	paddd	%xmm3,%xmm6
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	movdqa	%xmm6,48(%rsp)
+	cmpb	$0,131(%rbp)
+	jne	.Lssse3_00_47
+	rorl	$14,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	rorl	$5,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+	addl	0(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	rorl	$11,%r14d
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	rorl	$2,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	rorl	$5,%r13d
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	addl	4(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	rorl	$11,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	rorl	$2,%r14d
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	rorl	$5,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	8(%rsp),%r9d
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	rorl	$11,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	xorl	%r11d,%edi
+	rorl	$2,%r14d
+	addl	%r9d,%ebx
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	rorl	$5,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	12(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	rorl	$11,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	rorl	$2,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	rorl	$5,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+	addl	16(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	rorl	$11,%r14d
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	rorl	$2,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	rorl	$5,%r13d
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	addl	20(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	rorl	$11,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	rorl	$2,%r14d
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	rorl	$5,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	24(%rsp),%ebx
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	rorl	$11,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	xorl	%edx,%edi
+	rorl	$2,%r14d
+	addl	%ebx,%r9d
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	rorl	$5,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	28(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	rorl	$11,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	rorl	$2,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	rorl	$5,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+	addl	32(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	rorl	$11,%r14d
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	rorl	$2,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	rorl	$5,%r13d
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	addl	36(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	rorl	$11,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	rorl	$2,%r14d
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	rorl	$5,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	40(%rsp),%r9d
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	rorl	$11,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	xorl	%r11d,%edi
+	rorl	$2,%r14d
+	addl	%r9d,%ebx
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	rorl	$5,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	44(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	rorl	$11,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	rorl	$2,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	rorl	$5,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+	addl	48(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	rorl	$11,%r14d
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	rorl	$2,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	rorl	$5,%r13d
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	addl	52(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	rorl	$11,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	rorl	$2,%r14d
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	rorl	$5,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	56(%rsp),%ebx
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	rorl	$11,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	xorl	%edx,%edi
+	rorl	$2,%r14d
+	addl	%ebx,%r9d
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	rorl	$5,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	60(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	rorl	$11,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	rorl	$2,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	movq	64+0(%rsp),%rdi
+	movl	%r14d,%eax
+
+	addl	0(%rdi),%eax
+	leaq	64(%rsi),%rsi
+	addl	4(%rdi),%ebx
+	addl	8(%rdi),%ecx
+	addl	12(%rdi),%edx
+	addl	16(%rdi),%r8d
+	addl	20(%rdi),%r9d
+	addl	24(%rdi),%r10d
+	addl	28(%rdi),%r11d
+
+	cmpq	64+16(%rsp),%rsi
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	jb	.Lloop_ssse3
+
+	movq	64+24(%rsp),%rsi
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Lepilogue_ssse3:
+	.byte	0xf3,0xc3
+.size	sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3
+.type	sha256_block_data_order_avx,@function
+.align	64
+sha256_block_data_order_avx:
+.Lavx_shortcut:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$96,%rsp
+	leaq	(%rsi,%rdx,4),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,64+0(%rsp)
+	movq	%rsi,64+8(%rsp)
+	movq	%rdx,64+16(%rsp)
+	movq	%rax,64+24(%rsp)
+.Lprologue_avx:
+
+	vzeroupper
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+	movl	16(%rdi),%r8d
+	movl	20(%rdi),%r9d
+	movl	24(%rdi),%r10d
+	movl	28(%rdi),%r11d
+	vmovdqa	K256+512+32(%rip),%xmm8
+	vmovdqa	K256+512+64(%rip),%xmm9
+	jmp	.Lloop_avx
+.align	16
+.Lloop_avx:
+	vmovdqa	K256+512(%rip),%xmm7
+	vmovdqu	0(%rsi),%xmm0
+	vmovdqu	16(%rsi),%xmm1
+	vmovdqu	32(%rsi),%xmm2
+	vmovdqu	48(%rsi),%xmm3
+	vpshufb	%xmm7,%xmm0,%xmm0
+	leaq	K256(%rip),%rbp
+	vpshufb	%xmm7,%xmm1,%xmm1
+	vpshufb	%xmm7,%xmm2,%xmm2
+	vpaddd	0(%rbp),%xmm0,%xmm4
+	vpshufb	%xmm7,%xmm3,%xmm3
+	vpaddd	32(%rbp),%xmm1,%xmm5
+	vpaddd	64(%rbp),%xmm2,%xmm6
+	vpaddd	96(%rbp),%xmm3,%xmm7
+	vmovdqa	%xmm4,0(%rsp)
+	movl	%eax,%r14d
+	vmovdqa	%xmm5,16(%rsp)
+	movl	%ebx,%edi
+	vmovdqa	%xmm6,32(%rsp)
+	xorl	%ecx,%edi
+	vmovdqa	%xmm7,48(%rsp)
+	movl	%r8d,%r13d
+	jmp	.Lavx_00_47
+
+.align	16
+.Lavx_00_47:
+	subq	$-128,%rbp
+	vpalignr	$4,%xmm0,%xmm1,%xmm4
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	vpalignr	$4,%xmm2,%xmm3,%xmm7
+	shrdl	$9,%r14d,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%r13d,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	vpaddd	%xmm7,%xmm0,%xmm0
+	xorl	%r8d,%r13d
+	addl	0(%rsp),%r11d
+	movl	%eax,%r15d
+	vpsrld	$3,%xmm4,%xmm7
+	xorl	%r10d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ebx,%r15d
+	vpslld	$14,%xmm4,%xmm5
+	addl	%r12d,%r11d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	vpshufd	$250,%xmm3,%xmm7
+	shrdl	$2,%r14d,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	vpsrld	$11,%xmm6,%xmm6
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	shrdl	$14,%r13d,%r13d
+	vpxor	%xmm5,%xmm4,%xmm4
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	shrdl	$9,%r14d,%r14d
+	vpslld	$11,%xmm5,%xmm5
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	shrdl	$5,%r13d,%r13d
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	vpsrld	$10,%xmm7,%xmm6
+	addl	4(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	vpxor	%xmm5,%xmm4,%xmm4
+	shrdl	$11,%r14d,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	vpsrlq	$17,%xmm7,%xmm7
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	vpaddd	%xmm4,%xmm0,%xmm0
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	shrdl	$2,%r14d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%r10d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r10d
+	vpxor	%xmm7,%xmm6,%xmm6
+	movl	%edx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ecx,%r13d
+	vpshufb	%xmm8,%xmm6,%xmm6
+	xorl	%r8d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r10d,%r14d
+	vpaddd	%xmm6,%xmm0,%xmm0
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	8(%rsp),%r9d
+	vpshufd	$80,%xmm0,%xmm7
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	shrdl	$11,%r14d,%r14d
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	shrdl	$6,%r13d,%r13d
+	vpsrlq	$17,%xmm7,%xmm7
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	vpxor	%xmm7,%xmm6,%xmm6
+	xorl	%r11d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r9d,%ebx
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	vpshufb	%xmm9,%xmm6,%xmm6
+	shrdl	$9,%r14d,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	vpaddd	%xmm6,%xmm0,%xmm0
+	shrdl	$5,%r13d,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	vpaddd	0(%rbp),%xmm0,%xmm6
+	xorl	%ebx,%r13d
+	addl	12(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	vmovdqa	%xmm6,0(%rsp)
+	vpalignr	$4,%xmm1,%xmm2,%xmm4
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	vpalignr	$4,%xmm3,%xmm0,%xmm7
+	shrdl	$9,%r14d,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%r13d,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	vpaddd	%xmm7,%xmm1,%xmm1
+	xorl	%eax,%r13d
+	addl	16(%rsp),%edx
+	movl	%r8d,%r15d
+	vpsrld	$3,%xmm4,%xmm7
+	xorl	%ecx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r9d,%r15d
+	vpslld	$14,%xmm4,%xmm5
+	addl	%r12d,%edx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	vpshufd	$250,%xmm0,%xmm7
+	shrdl	$2,%r14d,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	vpsrld	$11,%xmm6,%xmm6
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	shrdl	$14,%r13d,%r13d
+	vpxor	%xmm5,%xmm4,%xmm4
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	shrdl	$9,%r14d,%r14d
+	vpslld	$11,%xmm5,%xmm5
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	shrdl	$5,%r13d,%r13d
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	vpsrld	$10,%xmm7,%xmm6
+	addl	20(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	vpxor	%xmm5,%xmm4,%xmm4
+	shrdl	$11,%r14d,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	vpsrlq	$17,%xmm7,%xmm7
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	vpaddd	%xmm4,%xmm1,%xmm1
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	shrdl	$2,%r14d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%ecx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ecx
+	vpxor	%xmm7,%xmm6,%xmm6
+	movl	%r11d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r10d,%r13d
+	vpshufb	%xmm8,%xmm6,%xmm6
+	xorl	%eax,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ecx,%r14d
+	vpaddd	%xmm6,%xmm1,%xmm1
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	24(%rsp),%ebx
+	vpshufd	$80,%xmm1,%xmm7
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	shrdl	$11,%r14d,%r14d
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	shrdl	$6,%r13d,%r13d
+	vpsrlq	$17,%xmm7,%xmm7
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	vpxor	%xmm7,%xmm6,%xmm6
+	xorl	%edx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%ebx,%r9d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	vpshufb	%xmm9,%xmm6,%xmm6
+	shrdl	$9,%r14d,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	vpaddd	%xmm6,%xmm1,%xmm1
+	shrdl	$5,%r13d,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	vpaddd	32(%rbp),%xmm1,%xmm6
+	xorl	%r9d,%r13d
+	addl	28(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	vmovdqa	%xmm6,16(%rsp)
+	vpalignr	$4,%xmm2,%xmm3,%xmm4
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	vpalignr	$4,%xmm0,%xmm1,%xmm7
+	shrdl	$9,%r14d,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%r13d,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	vpaddd	%xmm7,%xmm2,%xmm2
+	xorl	%r8d,%r13d
+	addl	32(%rsp),%r11d
+	movl	%eax,%r15d
+	vpsrld	$3,%xmm4,%xmm7
+	xorl	%r10d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ebx,%r15d
+	vpslld	$14,%xmm4,%xmm5
+	addl	%r12d,%r11d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	vpshufd	$250,%xmm1,%xmm7
+	shrdl	$2,%r14d,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	vpsrld	$11,%xmm6,%xmm6
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	shrdl	$14,%r13d,%r13d
+	vpxor	%xmm5,%xmm4,%xmm4
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	shrdl	$9,%r14d,%r14d
+	vpslld	$11,%xmm5,%xmm5
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	shrdl	$5,%r13d,%r13d
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	vpsrld	$10,%xmm7,%xmm6
+	addl	36(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	vpxor	%xmm5,%xmm4,%xmm4
+	shrdl	$11,%r14d,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	vpsrlq	$17,%xmm7,%xmm7
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	vpaddd	%xmm4,%xmm2,%xmm2
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	shrdl	$2,%r14d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%r10d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r10d
+	vpxor	%xmm7,%xmm6,%xmm6
+	movl	%edx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ecx,%r13d
+	vpshufb	%xmm8,%xmm6,%xmm6
+	xorl	%r8d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r10d,%r14d
+	vpaddd	%xmm6,%xmm2,%xmm2
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	40(%rsp),%r9d
+	vpshufd	$80,%xmm2,%xmm7
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	shrdl	$11,%r14d,%r14d
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	shrdl	$6,%r13d,%r13d
+	vpsrlq	$17,%xmm7,%xmm7
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	vpxor	%xmm7,%xmm6,%xmm6
+	xorl	%r11d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r9d,%ebx
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	vpshufb	%xmm9,%xmm6,%xmm6
+	shrdl	$9,%r14d,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	vpaddd	%xmm6,%xmm2,%xmm2
+	shrdl	$5,%r13d,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	vpaddd	64(%rbp),%xmm2,%xmm6
+	xorl	%ebx,%r13d
+	addl	44(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	vmovdqa	%xmm6,32(%rsp)
+	vpalignr	$4,%xmm3,%xmm0,%xmm4
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	vpalignr	$4,%xmm1,%xmm2,%xmm7
+	shrdl	$9,%r14d,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%r13d,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	vpaddd	%xmm7,%xmm3,%xmm3
+	xorl	%eax,%r13d
+	addl	48(%rsp),%edx
+	movl	%r8d,%r15d
+	vpsrld	$3,%xmm4,%xmm7
+	xorl	%ecx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r9d,%r15d
+	vpslld	$14,%xmm4,%xmm5
+	addl	%r12d,%edx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	vpshufd	$250,%xmm2,%xmm7
+	shrdl	$2,%r14d,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	vpsrld	$11,%xmm6,%xmm6
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	shrdl	$14,%r13d,%r13d
+	vpxor	%xmm5,%xmm4,%xmm4
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	shrdl	$9,%r14d,%r14d
+	vpslld	$11,%xmm5,%xmm5
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	shrdl	$5,%r13d,%r13d
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	vpsrld	$10,%xmm7,%xmm6
+	addl	52(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	vpxor	%xmm5,%xmm4,%xmm4
+	shrdl	$11,%r14d,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	vpsrlq	$17,%xmm7,%xmm7
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	vpaddd	%xmm4,%xmm3,%xmm3
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	shrdl	$2,%r14d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%ecx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ecx
+	vpxor	%xmm7,%xmm6,%xmm6
+	movl	%r11d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r10d,%r13d
+	vpshufb	%xmm8,%xmm6,%xmm6
+	xorl	%eax,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ecx,%r14d
+	vpaddd	%xmm6,%xmm3,%xmm3
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	56(%rsp),%ebx
+	vpshufd	$80,%xmm3,%xmm7
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	shrdl	$11,%r14d,%r14d
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	shrdl	$6,%r13d,%r13d
+	vpsrlq	$17,%xmm7,%xmm7
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	vpxor	%xmm7,%xmm6,%xmm6
+	xorl	%edx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%ebx,%r9d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	vpshufb	%xmm9,%xmm6,%xmm6
+	shrdl	$9,%r14d,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	vpaddd	%xmm6,%xmm3,%xmm3
+	shrdl	$5,%r13d,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	vpaddd	96(%rbp),%xmm3,%xmm6
+	xorl	%r9d,%r13d
+	addl	60(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	vmovdqa	%xmm6,48(%rsp)
+	cmpb	$0,131(%rbp)
+	jne	.Lavx_00_47
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+	addl	0(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	addl	4(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	8(%rsp),%r9d
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	xorl	%r11d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r9d,%ebx
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	12(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+	addl	16(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	addl	20(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	24(%rsp),%ebx
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	xorl	%edx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%ebx,%r9d
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	28(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+	addl	32(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	addl	36(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	40(%rsp),%r9d
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	xorl	%r11d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r9d,%ebx
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	44(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+	addl	48(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	addl	52(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	56(%rsp),%ebx
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	xorl	%edx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%ebx,%r9d
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	60(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	movq	64+0(%rsp),%rdi
+	movl	%r14d,%eax
+
+	addl	0(%rdi),%eax
+	leaq	64(%rsi),%rsi
+	addl	4(%rdi),%ebx
+	addl	8(%rdi),%ecx
+	addl	12(%rdi),%edx
+	addl	16(%rdi),%r8d
+	addl	20(%rdi),%r9d
+	addl	24(%rdi),%r10d
+	addl	28(%rdi),%r11d
+
+	cmpq	64+16(%rsp),%rsi
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	jb	.Lloop_avx
+
+	movq	64+24(%rsp),%rsi
+	vzeroupper
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Lepilogue_avx:
+	.byte	0xf3,0xc3
+.size	sha256_block_data_order_avx,.-sha256_block_data_order_avx
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/sha512-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/sha512-x86_64.S
new file mode 100644
index 0000000..1f1793b
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/sha512-x86_64.S
@@ -0,0 +1,4028 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+.globl	sha512_block_data_order
+.hidden sha512_block_data_order
+.type	sha512_block_data_order,@function
+.align	16
+sha512_block_data_order:
+	leaq	OPENSSL_ia32cap_P(%rip),%r11
+	movl	0(%r11),%r9d
+	movl	4(%r11),%r10d
+	movl	8(%r11),%r11d
+	testl	$2048,%r10d
+	jnz	.Lxop_shortcut
+	andl	$1073741824,%r9d
+	andl	$268435968,%r10d
+	orl	%r9d,%r10d
+	cmpl	$1342177792,%r10d
+	je	.Lavx_shortcut
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$128+32,%rsp
+	leaq	(%rsi,%rdx,8),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,128+0(%rsp)
+	movq	%rsi,128+8(%rsp)
+	movq	%rdx,128+16(%rsp)
+	movq	%rax,128+24(%rsp)
+.Lprologue:
+
+	movq	0(%rdi),%rax
+	movq	8(%rdi),%rbx
+	movq	16(%rdi),%rcx
+	movq	24(%rdi),%rdx
+	movq	32(%rdi),%r8
+	movq	40(%rdi),%r9
+	movq	48(%rdi),%r10
+	movq	56(%rdi),%r11
+	jmp	.Lloop
+
+.align	16
+.Lloop:
+	movq	%rbx,%rdi
+	leaq	K512(%rip),%rbp
+	xorq	%rcx,%rdi
+	movq	0(%rsi),%r12
+	movq	%r8,%r13
+	movq	%rax,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r9,%r15
+
+	xorq	%r8,%r13
+	rorq	$5,%r14
+	xorq	%r10,%r15
+
+	movq	%r12,0(%rsp)
+	xorq	%rax,%r14
+	andq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%r10,%r15
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	addq	%r15,%r12
+
+	movq	%rax,%r15
+	addq	(%rbp),%r12
+	xorq	%rax,%r14
+
+	xorq	%rbx,%r15
+	rorq	$14,%r13
+	movq	%rbx,%r11
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r11
+	addq	%r12,%rdx
+	addq	%r12,%r11
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%r11
+	movq	8(%rsi),%r12
+	movq	%rdx,%r13
+	movq	%r11,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r8,%rdi
+
+	xorq	%rdx,%r13
+	rorq	$5,%r14
+	xorq	%r9,%rdi
+
+	movq	%r12,8(%rsp)
+	xorq	%r11,%r14
+	andq	%rdx,%rdi
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r9,%rdi
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	addq	%rdi,%r12
+
+	movq	%r11,%rdi
+	addq	(%rbp),%r12
+	xorq	%r11,%r14
+
+	xorq	%rax,%rdi
+	rorq	$14,%r13
+	movq	%rax,%r10
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r10
+	addq	%r12,%rcx
+	addq	%r12,%r10
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%r10
+	movq	16(%rsi),%r12
+	movq	%rcx,%r13
+	movq	%r10,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rdx,%r15
+
+	xorq	%rcx,%r13
+	rorq	$5,%r14
+	xorq	%r8,%r15
+
+	movq	%r12,16(%rsp)
+	xorq	%r10,%r14
+	andq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r8,%r15
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	addq	%r15,%r12
+
+	movq	%r10,%r15
+	addq	(%rbp),%r12
+	xorq	%r10,%r14
+
+	xorq	%r11,%r15
+	rorq	$14,%r13
+	movq	%r11,%r9
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r9
+	addq	%r12,%rbx
+	addq	%r12,%r9
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%r9
+	movq	24(%rsi),%r12
+	movq	%rbx,%r13
+	movq	%r9,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rcx,%rdi
+
+	xorq	%rbx,%r13
+	rorq	$5,%r14
+	xorq	%rdx,%rdi
+
+	movq	%r12,24(%rsp)
+	xorq	%r9,%r14
+	andq	%rbx,%rdi
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%rdx,%rdi
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	addq	%rdi,%r12
+
+	movq	%r9,%rdi
+	addq	(%rbp),%r12
+	xorq	%r9,%r14
+
+	xorq	%r10,%rdi
+	rorq	$14,%r13
+	movq	%r10,%r8
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r8
+	addq	%r12,%rax
+	addq	%r12,%r8
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%r8
+	movq	32(%rsi),%r12
+	movq	%rax,%r13
+	movq	%r8,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rbx,%r15
+
+	xorq	%rax,%r13
+	rorq	$5,%r14
+	xorq	%rcx,%r15
+
+	movq	%r12,32(%rsp)
+	xorq	%r8,%r14
+	andq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%rcx,%r15
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	addq	%r15,%r12
+
+	movq	%r8,%r15
+	addq	(%rbp),%r12
+	xorq	%r8,%r14
+
+	xorq	%r9,%r15
+	rorq	$14,%r13
+	movq	%r9,%rdx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rdx
+	addq	%r12,%r11
+	addq	%r12,%rdx
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%rdx
+	movq	40(%rsi),%r12
+	movq	%r11,%r13
+	movq	%rdx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rax,%rdi
+
+	xorq	%r11,%r13
+	rorq	$5,%r14
+	xorq	%rbx,%rdi
+
+	movq	%r12,40(%rsp)
+	xorq	%rdx,%r14
+	andq	%r11,%rdi
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rbx,%rdi
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	addq	%rdi,%r12
+
+	movq	%rdx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rdx,%r14
+
+	xorq	%r8,%rdi
+	rorq	$14,%r13
+	movq	%r8,%rcx
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rcx
+	addq	%r12,%r10
+	addq	%r12,%rcx
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%rcx
+	movq	48(%rsi),%r12
+	movq	%r10,%r13
+	movq	%rcx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r11,%r15
+
+	xorq	%r10,%r13
+	rorq	$5,%r14
+	xorq	%rax,%r15
+
+	movq	%r12,48(%rsp)
+	xorq	%rcx,%r14
+	andq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rax,%r15
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	addq	%r15,%r12
+
+	movq	%rcx,%r15
+	addq	(%rbp),%r12
+	xorq	%rcx,%r14
+
+	xorq	%rdx,%r15
+	rorq	$14,%r13
+	movq	%rdx,%rbx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rbx
+	addq	%r12,%r9
+	addq	%r12,%rbx
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%rbx
+	movq	56(%rsi),%r12
+	movq	%r9,%r13
+	movq	%rbx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r10,%rdi
+
+	xorq	%r9,%r13
+	rorq	$5,%r14
+	xorq	%r11,%rdi
+
+	movq	%r12,56(%rsp)
+	xorq	%rbx,%r14
+	andq	%r9,%rdi
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%r11,%rdi
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	addq	%rdi,%r12
+
+	movq	%rbx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rbx,%r14
+
+	xorq	%rcx,%rdi
+	rorq	$14,%r13
+	movq	%rcx,%rax
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rax
+	addq	%r12,%r8
+	addq	%r12,%rax
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%rax
+	movq	64(%rsi),%r12
+	movq	%r8,%r13
+	movq	%rax,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r9,%r15
+
+	xorq	%r8,%r13
+	rorq	$5,%r14
+	xorq	%r10,%r15
+
+	movq	%r12,64(%rsp)
+	xorq	%rax,%r14
+	andq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%r10,%r15
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	addq	%r15,%r12
+
+	movq	%rax,%r15
+	addq	(%rbp),%r12
+	xorq	%rax,%r14
+
+	xorq	%rbx,%r15
+	rorq	$14,%r13
+	movq	%rbx,%r11
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r11
+	addq	%r12,%rdx
+	addq	%r12,%r11
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%r11
+	movq	72(%rsi),%r12
+	movq	%rdx,%r13
+	movq	%r11,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r8,%rdi
+
+	xorq	%rdx,%r13
+	rorq	$5,%r14
+	xorq	%r9,%rdi
+
+	movq	%r12,72(%rsp)
+	xorq	%r11,%r14
+	andq	%rdx,%rdi
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r9,%rdi
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	addq	%rdi,%r12
+
+	movq	%r11,%rdi
+	addq	(%rbp),%r12
+	xorq	%r11,%r14
+
+	xorq	%rax,%rdi
+	rorq	$14,%r13
+	movq	%rax,%r10
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r10
+	addq	%r12,%rcx
+	addq	%r12,%r10
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%r10
+	movq	80(%rsi),%r12
+	movq	%rcx,%r13
+	movq	%r10,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rdx,%r15
+
+	xorq	%rcx,%r13
+	rorq	$5,%r14
+	xorq	%r8,%r15
+
+	movq	%r12,80(%rsp)
+	xorq	%r10,%r14
+	andq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r8,%r15
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	addq	%r15,%r12
+
+	movq	%r10,%r15
+	addq	(%rbp),%r12
+	xorq	%r10,%r14
+
+	xorq	%r11,%r15
+	rorq	$14,%r13
+	movq	%r11,%r9
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r9
+	addq	%r12,%rbx
+	addq	%r12,%r9
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%r9
+	movq	88(%rsi),%r12
+	movq	%rbx,%r13
+	movq	%r9,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rcx,%rdi
+
+	xorq	%rbx,%r13
+	rorq	$5,%r14
+	xorq	%rdx,%rdi
+
+	movq	%r12,88(%rsp)
+	xorq	%r9,%r14
+	andq	%rbx,%rdi
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%rdx,%rdi
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	addq	%rdi,%r12
+
+	movq	%r9,%rdi
+	addq	(%rbp),%r12
+	xorq	%r9,%r14
+
+	xorq	%r10,%rdi
+	rorq	$14,%r13
+	movq	%r10,%r8
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r8
+	addq	%r12,%rax
+	addq	%r12,%r8
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%r8
+	movq	96(%rsi),%r12
+	movq	%rax,%r13
+	movq	%r8,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rbx,%r15
+
+	xorq	%rax,%r13
+	rorq	$5,%r14
+	xorq	%rcx,%r15
+
+	movq	%r12,96(%rsp)
+	xorq	%r8,%r14
+	andq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%rcx,%r15
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	addq	%r15,%r12
+
+	movq	%r8,%r15
+	addq	(%rbp),%r12
+	xorq	%r8,%r14
+
+	xorq	%r9,%r15
+	rorq	$14,%r13
+	movq	%r9,%rdx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rdx
+	addq	%r12,%r11
+	addq	%r12,%rdx
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%rdx
+	movq	104(%rsi),%r12
+	movq	%r11,%r13
+	movq	%rdx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rax,%rdi
+
+	xorq	%r11,%r13
+	rorq	$5,%r14
+	xorq	%rbx,%rdi
+
+	movq	%r12,104(%rsp)
+	xorq	%rdx,%r14
+	andq	%r11,%rdi
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rbx,%rdi
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	addq	%rdi,%r12
+
+	movq	%rdx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rdx,%r14
+
+	xorq	%r8,%rdi
+	rorq	$14,%r13
+	movq	%r8,%rcx
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rcx
+	addq	%r12,%r10
+	addq	%r12,%rcx
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%rcx
+	movq	112(%rsi),%r12
+	movq	%r10,%r13
+	movq	%rcx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r11,%r15
+
+	xorq	%r10,%r13
+	rorq	$5,%r14
+	xorq	%rax,%r15
+
+	movq	%r12,112(%rsp)
+	xorq	%rcx,%r14
+	andq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rax,%r15
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	addq	%r15,%r12
+
+	movq	%rcx,%r15
+	addq	(%rbp),%r12
+	xorq	%rcx,%r14
+
+	xorq	%rdx,%r15
+	rorq	$14,%r13
+	movq	%rdx,%rbx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rbx
+	addq	%r12,%r9
+	addq	%r12,%rbx
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%rbx
+	movq	120(%rsi),%r12
+	movq	%r9,%r13
+	movq	%rbx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r10,%rdi
+
+	xorq	%r9,%r13
+	rorq	$5,%r14
+	xorq	%r11,%rdi
+
+	movq	%r12,120(%rsp)
+	xorq	%rbx,%r14
+	andq	%r9,%rdi
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%r11,%rdi
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	addq	%rdi,%r12
+
+	movq	%rbx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rbx,%r14
+
+	xorq	%rcx,%rdi
+	rorq	$14,%r13
+	movq	%rcx,%rax
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rax
+	addq	%r12,%r8
+	addq	%r12,%rax
+
+	leaq	24(%rbp),%rbp
+	jmp	.Lrounds_16_xx
+.align	16
+.Lrounds_16_xx:
+	movq	8(%rsp),%r13
+	movq	112(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rax
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	72(%rsp),%r12
+
+	addq	0(%rsp),%r12
+	movq	%r8,%r13
+	addq	%r15,%r12
+	movq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r9,%r15
+
+	xorq	%r8,%r13
+	rorq	$5,%r14
+	xorq	%r10,%r15
+
+	movq	%r12,0(%rsp)
+	xorq	%rax,%r14
+	andq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%r10,%r15
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	addq	%r15,%r12
+
+	movq	%rax,%r15
+	addq	(%rbp),%r12
+	xorq	%rax,%r14
+
+	xorq	%rbx,%r15
+	rorq	$14,%r13
+	movq	%rbx,%r11
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r11
+	addq	%r12,%rdx
+	addq	%r12,%r11
+
+	leaq	8(%rbp),%rbp
+	movq	16(%rsp),%r13
+	movq	120(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r11
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	80(%rsp),%r12
+
+	addq	8(%rsp),%r12
+	movq	%rdx,%r13
+	addq	%rdi,%r12
+	movq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r8,%rdi
+
+	xorq	%rdx,%r13
+	rorq	$5,%r14
+	xorq	%r9,%rdi
+
+	movq	%r12,8(%rsp)
+	xorq	%r11,%r14
+	andq	%rdx,%rdi
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r9,%rdi
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	addq	%rdi,%r12
+
+	movq	%r11,%rdi
+	addq	(%rbp),%r12
+	xorq	%r11,%r14
+
+	xorq	%rax,%rdi
+	rorq	$14,%r13
+	movq	%rax,%r10
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r10
+	addq	%r12,%rcx
+	addq	%r12,%r10
+
+	leaq	24(%rbp),%rbp
+	movq	24(%rsp),%r13
+	movq	0(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r10
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	88(%rsp),%r12
+
+	addq	16(%rsp),%r12
+	movq	%rcx,%r13
+	addq	%r15,%r12
+	movq	%r10,%r14
+	rorq	$23,%r13
+	movq	%rdx,%r15
+
+	xorq	%rcx,%r13
+	rorq	$5,%r14
+	xorq	%r8,%r15
+
+	movq	%r12,16(%rsp)
+	xorq	%r10,%r14
+	andq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r8,%r15
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	addq	%r15,%r12
+
+	movq	%r10,%r15
+	addq	(%rbp),%r12
+	xorq	%r10,%r14
+
+	xorq	%r11,%r15
+	rorq	$14,%r13
+	movq	%r11,%r9
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r9
+	addq	%r12,%rbx
+	addq	%r12,%r9
+
+	leaq	8(%rbp),%rbp
+	movq	32(%rsp),%r13
+	movq	8(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r9
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	96(%rsp),%r12
+
+	addq	24(%rsp),%r12
+	movq	%rbx,%r13
+	addq	%rdi,%r12
+	movq	%r9,%r14
+	rorq	$23,%r13
+	movq	%rcx,%rdi
+
+	xorq	%rbx,%r13
+	rorq	$5,%r14
+	xorq	%rdx,%rdi
+
+	movq	%r12,24(%rsp)
+	xorq	%r9,%r14
+	andq	%rbx,%rdi
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%rdx,%rdi
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	addq	%rdi,%r12
+
+	movq	%r9,%rdi
+	addq	(%rbp),%r12
+	xorq	%r9,%r14
+
+	xorq	%r10,%rdi
+	rorq	$14,%r13
+	movq	%r10,%r8
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r8
+	addq	%r12,%rax
+	addq	%r12,%r8
+
+	leaq	24(%rbp),%rbp
+	movq	40(%rsp),%r13
+	movq	16(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r8
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	104(%rsp),%r12
+
+	addq	32(%rsp),%r12
+	movq	%rax,%r13
+	addq	%r15,%r12
+	movq	%r8,%r14
+	rorq	$23,%r13
+	movq	%rbx,%r15
+
+	xorq	%rax,%r13
+	rorq	$5,%r14
+	xorq	%rcx,%r15
+
+	movq	%r12,32(%rsp)
+	xorq	%r8,%r14
+	andq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%rcx,%r15
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	addq	%r15,%r12
+
+	movq	%r8,%r15
+	addq	(%rbp),%r12
+	xorq	%r8,%r14
+
+	xorq	%r9,%r15
+	rorq	$14,%r13
+	movq	%r9,%rdx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rdx
+	addq	%r12,%r11
+	addq	%r12,%rdx
+
+	leaq	8(%rbp),%rbp
+	movq	48(%rsp),%r13
+	movq	24(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rdx
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	112(%rsp),%r12
+
+	addq	40(%rsp),%r12
+	movq	%r11,%r13
+	addq	%rdi,%r12
+	movq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%rax,%rdi
+
+	xorq	%r11,%r13
+	rorq	$5,%r14
+	xorq	%rbx,%rdi
+
+	movq	%r12,40(%rsp)
+	xorq	%rdx,%r14
+	andq	%r11,%rdi
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rbx,%rdi
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	addq	%rdi,%r12
+
+	movq	%rdx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rdx,%r14
+
+	xorq	%r8,%rdi
+	rorq	$14,%r13
+	movq	%r8,%rcx
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rcx
+	addq	%r12,%r10
+	addq	%r12,%rcx
+
+	leaq	24(%rbp),%rbp
+	movq	56(%rsp),%r13
+	movq	32(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rcx
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	120(%rsp),%r12
+
+	addq	48(%rsp),%r12
+	movq	%r10,%r13
+	addq	%r15,%r12
+	movq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r11,%r15
+
+	xorq	%r10,%r13
+	rorq	$5,%r14
+	xorq	%rax,%r15
+
+	movq	%r12,48(%rsp)
+	xorq	%rcx,%r14
+	andq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rax,%r15
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	addq	%r15,%r12
+
+	movq	%rcx,%r15
+	addq	(%rbp),%r12
+	xorq	%rcx,%r14
+
+	xorq	%rdx,%r15
+	rorq	$14,%r13
+	movq	%rdx,%rbx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rbx
+	addq	%r12,%r9
+	addq	%r12,%rbx
+
+	leaq	8(%rbp),%rbp
+	movq	64(%rsp),%r13
+	movq	40(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rbx
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	0(%rsp),%r12
+
+	addq	56(%rsp),%r12
+	movq	%r9,%r13
+	addq	%rdi,%r12
+	movq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r10,%rdi
+
+	xorq	%r9,%r13
+	rorq	$5,%r14
+	xorq	%r11,%rdi
+
+	movq	%r12,56(%rsp)
+	xorq	%rbx,%r14
+	andq	%r9,%rdi
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%r11,%rdi
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	addq	%rdi,%r12
+
+	movq	%rbx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rbx,%r14
+
+	xorq	%rcx,%rdi
+	rorq	$14,%r13
+	movq	%rcx,%rax
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rax
+	addq	%r12,%r8
+	addq	%r12,%rax
+
+	leaq	24(%rbp),%rbp
+	movq	72(%rsp),%r13
+	movq	48(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rax
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	8(%rsp),%r12
+
+	addq	64(%rsp),%r12
+	movq	%r8,%r13
+	addq	%r15,%r12
+	movq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r9,%r15
+
+	xorq	%r8,%r13
+	rorq	$5,%r14
+	xorq	%r10,%r15
+
+	movq	%r12,64(%rsp)
+	xorq	%rax,%r14
+	andq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%r10,%r15
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	addq	%r15,%r12
+
+	movq	%rax,%r15
+	addq	(%rbp),%r12
+	xorq	%rax,%r14
+
+	xorq	%rbx,%r15
+	rorq	$14,%r13
+	movq	%rbx,%r11
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r11
+	addq	%r12,%rdx
+	addq	%r12,%r11
+
+	leaq	8(%rbp),%rbp
+	movq	80(%rsp),%r13
+	movq	56(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r11
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	16(%rsp),%r12
+
+	addq	72(%rsp),%r12
+	movq	%rdx,%r13
+	addq	%rdi,%r12
+	movq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r8,%rdi
+
+	xorq	%rdx,%r13
+	rorq	$5,%r14
+	xorq	%r9,%rdi
+
+	movq	%r12,72(%rsp)
+	xorq	%r11,%r14
+	andq	%rdx,%rdi
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r9,%rdi
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	addq	%rdi,%r12
+
+	movq	%r11,%rdi
+	addq	(%rbp),%r12
+	xorq	%r11,%r14
+
+	xorq	%rax,%rdi
+	rorq	$14,%r13
+	movq	%rax,%r10
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r10
+	addq	%r12,%rcx
+	addq	%r12,%r10
+
+	leaq	24(%rbp),%rbp
+	movq	88(%rsp),%r13
+	movq	64(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r10
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	24(%rsp),%r12
+
+	addq	80(%rsp),%r12
+	movq	%rcx,%r13
+	addq	%r15,%r12
+	movq	%r10,%r14
+	rorq	$23,%r13
+	movq	%rdx,%r15
+
+	xorq	%rcx,%r13
+	rorq	$5,%r14
+	xorq	%r8,%r15
+
+	movq	%r12,80(%rsp)
+	xorq	%r10,%r14
+	andq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r8,%r15
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	addq	%r15,%r12
+
+	movq	%r10,%r15
+	addq	(%rbp),%r12
+	xorq	%r10,%r14
+
+	xorq	%r11,%r15
+	rorq	$14,%r13
+	movq	%r11,%r9
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r9
+	addq	%r12,%rbx
+	addq	%r12,%r9
+
+	leaq	8(%rbp),%rbp
+	movq	96(%rsp),%r13
+	movq	72(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r9
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	32(%rsp),%r12
+
+	addq	88(%rsp),%r12
+	movq	%rbx,%r13
+	addq	%rdi,%r12
+	movq	%r9,%r14
+	rorq	$23,%r13
+	movq	%rcx,%rdi
+
+	xorq	%rbx,%r13
+	rorq	$5,%r14
+	xorq	%rdx,%rdi
+
+	movq	%r12,88(%rsp)
+	xorq	%r9,%r14
+	andq	%rbx,%rdi
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%rdx,%rdi
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	addq	%rdi,%r12
+
+	movq	%r9,%rdi
+	addq	(%rbp),%r12
+	xorq	%r9,%r14
+
+	xorq	%r10,%rdi
+	rorq	$14,%r13
+	movq	%r10,%r8
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r8
+	addq	%r12,%rax
+	addq	%r12,%r8
+
+	leaq	24(%rbp),%rbp
+	movq	104(%rsp),%r13
+	movq	80(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r8
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	40(%rsp),%r12
+
+	addq	96(%rsp),%r12
+	movq	%rax,%r13
+	addq	%r15,%r12
+	movq	%r8,%r14
+	rorq	$23,%r13
+	movq	%rbx,%r15
+
+	xorq	%rax,%r13
+	rorq	$5,%r14
+	xorq	%rcx,%r15
+
+	movq	%r12,96(%rsp)
+	xorq	%r8,%r14
+	andq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%rcx,%r15
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	addq	%r15,%r12
+
+	movq	%r8,%r15
+	addq	(%rbp),%r12
+	xorq	%r8,%r14
+
+	xorq	%r9,%r15
+	rorq	$14,%r13
+	movq	%r9,%rdx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rdx
+	addq	%r12,%r11
+	addq	%r12,%rdx
+
+	leaq	8(%rbp),%rbp
+	movq	112(%rsp),%r13
+	movq	88(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rdx
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	48(%rsp),%r12
+
+	addq	104(%rsp),%r12
+	movq	%r11,%r13
+	addq	%rdi,%r12
+	movq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%rax,%rdi
+
+	xorq	%r11,%r13
+	rorq	$5,%r14
+	xorq	%rbx,%rdi
+
+	movq	%r12,104(%rsp)
+	xorq	%rdx,%r14
+	andq	%r11,%rdi
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rbx,%rdi
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	addq	%rdi,%r12
+
+	movq	%rdx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rdx,%r14
+
+	xorq	%r8,%rdi
+	rorq	$14,%r13
+	movq	%r8,%rcx
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rcx
+	addq	%r12,%r10
+	addq	%r12,%rcx
+
+	leaq	24(%rbp),%rbp
+	movq	120(%rsp),%r13
+	movq	96(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rcx
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	56(%rsp),%r12
+
+	addq	112(%rsp),%r12
+	movq	%r10,%r13
+	addq	%r15,%r12
+	movq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r11,%r15
+
+	xorq	%r10,%r13
+	rorq	$5,%r14
+	xorq	%rax,%r15
+
+	movq	%r12,112(%rsp)
+	xorq	%rcx,%r14
+	andq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rax,%r15
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	addq	%r15,%r12
+
+	movq	%rcx,%r15
+	addq	(%rbp),%r12
+	xorq	%rcx,%r14
+
+	xorq	%rdx,%r15
+	rorq	$14,%r13
+	movq	%rdx,%rbx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rbx
+	addq	%r12,%r9
+	addq	%r12,%rbx
+
+	leaq	8(%rbp),%rbp
+	movq	0(%rsp),%r13
+	movq	104(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rbx
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	64(%rsp),%r12
+
+	addq	120(%rsp),%r12
+	movq	%r9,%r13
+	addq	%rdi,%r12
+	movq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r10,%rdi
+
+	xorq	%r9,%r13
+	rorq	$5,%r14
+	xorq	%r11,%rdi
+
+	movq	%r12,120(%rsp)
+	xorq	%rbx,%r14
+	andq	%r9,%rdi
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%r11,%rdi
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	addq	%rdi,%r12
+
+	movq	%rbx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rbx,%r14
+
+	xorq	%rcx,%rdi
+	rorq	$14,%r13
+	movq	%rcx,%rax
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rax
+	addq	%r12,%r8
+	addq	%r12,%rax
+
+	leaq	24(%rbp),%rbp
+	cmpb	$0,7(%rbp)
+	jnz	.Lrounds_16_xx
+
+	movq	128+0(%rsp),%rdi
+	addq	%r14,%rax
+	leaq	128(%rsi),%rsi
+
+	addq	0(%rdi),%rax
+	addq	8(%rdi),%rbx
+	addq	16(%rdi),%rcx
+	addq	24(%rdi),%rdx
+	addq	32(%rdi),%r8
+	addq	40(%rdi),%r9
+	addq	48(%rdi),%r10
+	addq	56(%rdi),%r11
+
+	cmpq	128+16(%rsp),%rsi
+
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+	movq	%r8,32(%rdi)
+	movq	%r9,40(%rdi)
+	movq	%r10,48(%rdi)
+	movq	%r11,56(%rdi)
+	jb	.Lloop
+
+	movq	128+24(%rsp),%rsi
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Lepilogue:
+	.byte	0xf3,0xc3
+.size	sha512_block_data_order,.-sha512_block_data_order
+.align	64
+.type	K512,@object
+K512:
+.quad	0x428a2f98d728ae22,0x7137449123ef65cd
+.quad	0x428a2f98d728ae22,0x7137449123ef65cd
+.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad	0x3956c25bf348b538,0x59f111f1b605d019
+.quad	0x3956c25bf348b538,0x59f111f1b605d019
+.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad	0xd807aa98a3030242,0x12835b0145706fbe
+.quad	0xd807aa98a3030242,0x12835b0145706fbe
+.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad	0x9bdc06a725c71235,0xc19bf174cf692694
+.quad	0x9bdc06a725c71235,0xc19bf174cf692694
+.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad	0x983e5152ee66dfab,0xa831c66d2db43210
+.quad	0x983e5152ee66dfab,0xa831c66d2db43210
+.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad	0x06ca6351e003826f,0x142929670a0e6e70
+.quad	0x06ca6351e003826f,0x142929670a0e6e70
+.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad	0x81c2c92e47edaee6,0x92722c851482353b
+.quad	0x81c2c92e47edaee6,0x92722c851482353b
+.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad	0xd192e819d6ef5218,0xd69906245565a910
+.quad	0xd192e819d6ef5218,0xd69906245565a910
+.quad	0xf40e35855771202a,0x106aa07032bbd1b8
+.quad	0xf40e35855771202a,0x106aa07032bbd1b8
+.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad	0x90befffa23631e28,0xa4506cebde82bde9
+.quad	0x90befffa23631e28,0xa4506cebde82bde9
+.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad	0xca273eceea26619c,0xd186b8c721c0c207
+.quad	0xca273eceea26619c,0xd186b8c721c0c207
+.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad	0x113f9804bef90dae,0x1b710b35131c471b
+.quad	0x113f9804bef90dae,0x1b710b35131c471b
+.quad	0x28db77f523047d84,0x32caab7b40c72493
+.quad	0x28db77f523047d84,0x32caab7b40c72493
+.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+.quad	0x0001020304050607,0x08090a0b0c0d0e0f
+.quad	0x0001020304050607,0x08090a0b0c0d0e0f
+.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.type	sha512_block_data_order_xop,@function
+.align	64
+sha512_block_data_order_xop:
+.Lxop_shortcut:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$160,%rsp
+	leaq	(%rsi,%rdx,8),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,128+0(%rsp)
+	movq	%rsi,128+8(%rsp)
+	movq	%rdx,128+16(%rsp)
+	movq	%rax,128+24(%rsp)
+.Lprologue_xop:
+
+	vzeroupper
+	movq	0(%rdi),%rax
+	movq	8(%rdi),%rbx
+	movq	16(%rdi),%rcx
+	movq	24(%rdi),%rdx
+	movq	32(%rdi),%r8
+	movq	40(%rdi),%r9
+	movq	48(%rdi),%r10
+	movq	56(%rdi),%r11
+	jmp	.Lloop_xop
+.align	16
+.Lloop_xop:
+	vmovdqa	K512+1280(%rip),%xmm11
+	vmovdqu	0(%rsi),%xmm0
+	leaq	K512+128(%rip),%rbp
+	vmovdqu	16(%rsi),%xmm1
+	vmovdqu	32(%rsi),%xmm2
+	vpshufb	%xmm11,%xmm0,%xmm0
+	vmovdqu	48(%rsi),%xmm3
+	vpshufb	%xmm11,%xmm1,%xmm1
+	vmovdqu	64(%rsi),%xmm4
+	vpshufb	%xmm11,%xmm2,%xmm2
+	vmovdqu	80(%rsi),%xmm5
+	vpshufb	%xmm11,%xmm3,%xmm3
+	vmovdqu	96(%rsi),%xmm6
+	vpshufb	%xmm11,%xmm4,%xmm4
+	vmovdqu	112(%rsi),%xmm7
+	vpshufb	%xmm11,%xmm5,%xmm5
+	vpaddq	-128(%rbp),%xmm0,%xmm8
+	vpshufb	%xmm11,%xmm6,%xmm6
+	vpaddq	-96(%rbp),%xmm1,%xmm9
+	vpshufb	%xmm11,%xmm7,%xmm7
+	vpaddq	-64(%rbp),%xmm2,%xmm10
+	vpaddq	-32(%rbp),%xmm3,%xmm11
+	vmovdqa	%xmm8,0(%rsp)
+	vpaddq	0(%rbp),%xmm4,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	vpaddq	32(%rbp),%xmm5,%xmm9
+	vmovdqa	%xmm10,32(%rsp)
+	vpaddq	64(%rbp),%xmm6,%xmm10
+	vmovdqa	%xmm11,48(%rsp)
+	vpaddq	96(%rbp),%xmm7,%xmm11
+	vmovdqa	%xmm8,64(%rsp)
+	movq	%rax,%r14
+	vmovdqa	%xmm9,80(%rsp)
+	movq	%rbx,%rdi
+	vmovdqa	%xmm10,96(%rsp)
+	xorq	%rcx,%rdi
+	vmovdqa	%xmm11,112(%rsp)
+	movq	%r8,%r13
+	jmp	.Lxop_00_47
+
+.align	16
+.Lxop_00_47:
+	addq	$256,%rbp
+	vpalignr	$8,%xmm0,%xmm1,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%rax
+	vpalignr	$8,%xmm4,%xmm5,%xmm11
+	movq	%r9,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%rax,%r14
+	vpaddq	%xmm11,%xmm0,%xmm0
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	0(%rsp),%r11
+	movq	%rax,%r15
+.byte	143,72,120,195,209,7
+	xorq	%r10,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,223,3
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rbx,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm7,%xmm10
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	vpaddq	%xmm8,%xmm0,%xmm0
+	movq	%rdx,%r13
+	addq	%r11,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%r11
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%r8,%r12
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	vpaddq	%xmm11,%xmm0,%xmm0
+	addq	8(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	rorq	$6,%r14
+	vpaddq	-128(%rbp),%xmm0,%xmm10
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	rorq	$28,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	vmovdqa	%xmm10,0(%rsp)
+	vpalignr	$8,%xmm1,%xmm2,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%r10
+	vpalignr	$8,%xmm5,%xmm6,%xmm11
+	movq	%rdx,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%r10,%r14
+	vpaddq	%xmm11,%xmm1,%xmm1
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	16(%rsp),%r9
+	movq	%r10,%r15
+.byte	143,72,120,195,209,7
+	xorq	%r8,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,216,3
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r11,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm0,%xmm10
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	vpaddq	%xmm8,%xmm1,%xmm1
+	movq	%rbx,%r13
+	addq	%r9,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%r9
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%rcx,%r12
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	vpaddq	%xmm11,%xmm1,%xmm1
+	addq	24(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	rorq	$6,%r14
+	vpaddq	-96(%rbp),%xmm1,%xmm10
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	rorq	$28,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	vmovdqa	%xmm10,16(%rsp)
+	vpalignr	$8,%xmm2,%xmm3,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%r8
+	vpalignr	$8,%xmm6,%xmm7,%xmm11
+	movq	%rbx,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%r8,%r14
+	vpaddq	%xmm11,%xmm2,%xmm2
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	32(%rsp),%rdx
+	movq	%r8,%r15
+.byte	143,72,120,195,209,7
+	xorq	%rcx,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,217,3
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r9,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm1,%xmm10
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	vpaddq	%xmm8,%xmm2,%xmm2
+	movq	%r11,%r13
+	addq	%rdx,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%rdx
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%rax,%r12
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	vpaddq	%xmm11,%xmm2,%xmm2
+	addq	40(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	rorq	$6,%r14
+	vpaddq	-64(%rbp),%xmm2,%xmm10
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	rorq	$28,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	vmovdqa	%xmm10,32(%rsp)
+	vpalignr	$8,%xmm3,%xmm4,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%rcx
+	vpalignr	$8,%xmm7,%xmm0,%xmm11
+	movq	%r11,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%rcx,%r14
+	vpaddq	%xmm11,%xmm3,%xmm3
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	48(%rsp),%rbx
+	movq	%rcx,%r15
+.byte	143,72,120,195,209,7
+	xorq	%rax,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,218,3
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rdx,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm2,%xmm10
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	vpaddq	%xmm8,%xmm3,%xmm3
+	movq	%r9,%r13
+	addq	%rbx,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%rbx
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%r10,%r12
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	vpaddq	%xmm11,%xmm3,%xmm3
+	addq	56(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	rorq	$6,%r14
+	vpaddq	-32(%rbp),%xmm3,%xmm10
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	rorq	$28,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	vmovdqa	%xmm10,48(%rsp)
+	vpalignr	$8,%xmm4,%xmm5,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%rax
+	vpalignr	$8,%xmm0,%xmm1,%xmm11
+	movq	%r9,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%rax,%r14
+	vpaddq	%xmm11,%xmm4,%xmm4
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	64(%rsp),%r11
+	movq	%rax,%r15
+.byte	143,72,120,195,209,7
+	xorq	%r10,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,219,3
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rbx,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm3,%xmm10
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	vpaddq	%xmm8,%xmm4,%xmm4
+	movq	%rdx,%r13
+	addq	%r11,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%r11
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%r8,%r12
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	vpaddq	%xmm11,%xmm4,%xmm4
+	addq	72(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	rorq	$6,%r14
+	vpaddq	0(%rbp),%xmm4,%xmm10
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	rorq	$28,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	vmovdqa	%xmm10,64(%rsp)
+	vpalignr	$8,%xmm5,%xmm6,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%r10
+	vpalignr	$8,%xmm1,%xmm2,%xmm11
+	movq	%rdx,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%r10,%r14
+	vpaddq	%xmm11,%xmm5,%xmm5
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	80(%rsp),%r9
+	movq	%r10,%r15
+.byte	143,72,120,195,209,7
+	xorq	%r8,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,220,3
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r11,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm4,%xmm10
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	vpaddq	%xmm8,%xmm5,%xmm5
+	movq	%rbx,%r13
+	addq	%r9,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%r9
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%rcx,%r12
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	vpaddq	%xmm11,%xmm5,%xmm5
+	addq	88(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	rorq	$6,%r14
+	vpaddq	32(%rbp),%xmm5,%xmm10
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	rorq	$28,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	vmovdqa	%xmm10,80(%rsp)
+	vpalignr	$8,%xmm6,%xmm7,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%r8
+	vpalignr	$8,%xmm2,%xmm3,%xmm11
+	movq	%rbx,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%r8,%r14
+	vpaddq	%xmm11,%xmm6,%xmm6
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	96(%rsp),%rdx
+	movq	%r8,%r15
+.byte	143,72,120,195,209,7
+	xorq	%rcx,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,221,3
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r9,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm5,%xmm10
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	vpaddq	%xmm8,%xmm6,%xmm6
+	movq	%r11,%r13
+	addq	%rdx,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%rdx
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%rax,%r12
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	vpaddq	%xmm11,%xmm6,%xmm6
+	addq	104(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	rorq	$6,%r14
+	vpaddq	64(%rbp),%xmm6,%xmm10
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	rorq	$28,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	vmovdqa	%xmm10,96(%rsp)
+	vpalignr	$8,%xmm7,%xmm0,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%rcx
+	vpalignr	$8,%xmm3,%xmm4,%xmm11
+	movq	%r11,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%rcx,%r14
+	vpaddq	%xmm11,%xmm7,%xmm7
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	112(%rsp),%rbx
+	movq	%rcx,%r15
+.byte	143,72,120,195,209,7
+	xorq	%rax,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,222,3
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rdx,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm6,%xmm10
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	vpaddq	%xmm8,%xmm7,%xmm7
+	movq	%r9,%r13
+	addq	%rbx,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%rbx
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%r10,%r12
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	vpaddq	%xmm11,%xmm7,%xmm7
+	addq	120(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	rorq	$6,%r14
+	vpaddq	96(%rbp),%xmm7,%xmm10
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	rorq	$28,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	vmovdqa	%xmm10,112(%rsp)
+	cmpb	$0,135(%rbp)
+	jne	.Lxop_00_47
+	rorq	$23,%r13
+	movq	%r14,%rax
+	movq	%r9,%r12
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	rorq	$4,%r13
+	xorq	%rax,%r14
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	0(%rsp),%r11
+	movq	%rax,%r15
+	xorq	%r10,%r12
+	rorq	$6,%r14
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	xorq	%rbx,%rdi
+	rorq	$28,%r14
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r14,%r11
+	movq	%r8,%r12
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	rorq	$4,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	addq	8(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	rorq	$6,%r14
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	rorq	$28,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	rorq	$23,%r13
+	movq	%r14,%r10
+	movq	%rdx,%r12
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	rorq	$4,%r13
+	xorq	%r10,%r14
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	16(%rsp),%r9
+	movq	%r10,%r15
+	xorq	%r8,%r12
+	rorq	$6,%r14
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	xorq	%r11,%rdi
+	rorq	$28,%r14
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	rorq	$23,%r13
+	movq	%r14,%r9
+	movq	%rcx,%r12
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	rorq	$4,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	addq	24(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	rorq	$6,%r14
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	rorq	$28,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	rorq	$23,%r13
+	movq	%r14,%r8
+	movq	%rbx,%r12
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	rorq	$4,%r13
+	xorq	%r8,%r14
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	32(%rsp),%rdx
+	movq	%r8,%r15
+	xorq	%rcx,%r12
+	rorq	$6,%r14
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	xorq	%r9,%rdi
+	rorq	$28,%r14
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rdx
+	movq	%rax,%r12
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	rorq	$4,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	addq	40(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	rorq	$6,%r14
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	rorq	$28,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rcx
+	movq	%r11,%r12
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	rorq	$4,%r13
+	xorq	%rcx,%r14
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	48(%rsp),%rbx
+	movq	%rcx,%r15
+	xorq	%rax,%r12
+	rorq	$6,%r14
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	xorq	%rdx,%rdi
+	rorq	$28,%r14
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rbx
+	movq	%r10,%r12
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	rorq	$4,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	addq	56(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	rorq	$6,%r14
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	rorq	$28,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r14,%rax
+	movq	%r9,%r12
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	rorq	$4,%r13
+	xorq	%rax,%r14
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	64(%rsp),%r11
+	movq	%rax,%r15
+	xorq	%r10,%r12
+	rorq	$6,%r14
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	xorq	%rbx,%rdi
+	rorq	$28,%r14
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r14,%r11
+	movq	%r8,%r12
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	rorq	$4,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	addq	72(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	rorq	$6,%r14
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	rorq	$28,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	rorq	$23,%r13
+	movq	%r14,%r10
+	movq	%rdx,%r12
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	rorq	$4,%r13
+	xorq	%r10,%r14
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	80(%rsp),%r9
+	movq	%r10,%r15
+	xorq	%r8,%r12
+	rorq	$6,%r14
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	xorq	%r11,%rdi
+	rorq	$28,%r14
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	rorq	$23,%r13
+	movq	%r14,%r9
+	movq	%rcx,%r12
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	rorq	$4,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	addq	88(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	rorq	$6,%r14
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	rorq	$28,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	rorq	$23,%r13
+	movq	%r14,%r8
+	movq	%rbx,%r12
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	rorq	$4,%r13
+	xorq	%r8,%r14
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	96(%rsp),%rdx
+	movq	%r8,%r15
+	xorq	%rcx,%r12
+	rorq	$6,%r14
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	xorq	%r9,%rdi
+	rorq	$28,%r14
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rdx
+	movq	%rax,%r12
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	rorq	$4,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	addq	104(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	rorq	$6,%r14
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	rorq	$28,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rcx
+	movq	%r11,%r12
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	rorq	$4,%r13
+	xorq	%rcx,%r14
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	112(%rsp),%rbx
+	movq	%rcx,%r15
+	xorq	%rax,%r12
+	rorq	$6,%r14
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	xorq	%rdx,%rdi
+	rorq	$28,%r14
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rbx
+	movq	%r10,%r12
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	rorq	$4,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	addq	120(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	rorq	$6,%r14
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	rorq	$28,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	movq	128+0(%rsp),%rdi
+	movq	%r14,%rax
+
+	addq	0(%rdi),%rax
+	leaq	128(%rsi),%rsi
+	addq	8(%rdi),%rbx
+	addq	16(%rdi),%rcx
+	addq	24(%rdi),%rdx
+	addq	32(%rdi),%r8
+	addq	40(%rdi),%r9
+	addq	48(%rdi),%r10
+	addq	56(%rdi),%r11
+
+	cmpq	128+16(%rsp),%rsi
+
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+	movq	%r8,32(%rdi)
+	movq	%r9,40(%rdi)
+	movq	%r10,48(%rdi)
+	movq	%r11,56(%rdi)
+	jb	.Lloop_xop
+
+	movq	128+24(%rsp),%rsi
+	vzeroupper
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Lepilogue_xop:
+	.byte	0xf3,0xc3
+.size	sha512_block_data_order_xop,.-sha512_block_data_order_xop
+.type	sha512_block_data_order_avx,@function
+.align	64
+sha512_block_data_order_avx:
+.Lavx_shortcut:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$160,%rsp
+	leaq	(%rsi,%rdx,8),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,128+0(%rsp)
+	movq	%rsi,128+8(%rsp)
+	movq	%rdx,128+16(%rsp)
+	movq	%rax,128+24(%rsp)
+.Lprologue_avx:
+
+	vzeroupper
+	movq	0(%rdi),%rax
+	movq	8(%rdi),%rbx
+	movq	16(%rdi),%rcx
+	movq	24(%rdi),%rdx
+	movq	32(%rdi),%r8
+	movq	40(%rdi),%r9
+	movq	48(%rdi),%r10
+	movq	56(%rdi),%r11
+	jmp	.Lloop_avx
+.align	16
+.Lloop_avx:
+	vmovdqa	K512+1280(%rip),%xmm11
+	vmovdqu	0(%rsi),%xmm0
+	leaq	K512+128(%rip),%rbp
+	vmovdqu	16(%rsi),%xmm1
+	vmovdqu	32(%rsi),%xmm2
+	vpshufb	%xmm11,%xmm0,%xmm0
+	vmovdqu	48(%rsi),%xmm3
+	vpshufb	%xmm11,%xmm1,%xmm1
+	vmovdqu	64(%rsi),%xmm4
+	vpshufb	%xmm11,%xmm2,%xmm2
+	vmovdqu	80(%rsi),%xmm5
+	vpshufb	%xmm11,%xmm3,%xmm3
+	vmovdqu	96(%rsi),%xmm6
+	vpshufb	%xmm11,%xmm4,%xmm4
+	vmovdqu	112(%rsi),%xmm7
+	vpshufb	%xmm11,%xmm5,%xmm5
+	vpaddq	-128(%rbp),%xmm0,%xmm8
+	vpshufb	%xmm11,%xmm6,%xmm6
+	vpaddq	-96(%rbp),%xmm1,%xmm9
+	vpshufb	%xmm11,%xmm7,%xmm7
+	vpaddq	-64(%rbp),%xmm2,%xmm10
+	vpaddq	-32(%rbp),%xmm3,%xmm11
+	vmovdqa	%xmm8,0(%rsp)
+	vpaddq	0(%rbp),%xmm4,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	vpaddq	32(%rbp),%xmm5,%xmm9
+	vmovdqa	%xmm10,32(%rsp)
+	vpaddq	64(%rbp),%xmm6,%xmm10
+	vmovdqa	%xmm11,48(%rsp)
+	vpaddq	96(%rbp),%xmm7,%xmm11
+	vmovdqa	%xmm8,64(%rsp)
+	movq	%rax,%r14
+	vmovdqa	%xmm9,80(%rsp)
+	movq	%rbx,%rdi
+	vmovdqa	%xmm10,96(%rsp)
+	xorq	%rcx,%rdi
+	vmovdqa	%xmm11,112(%rsp)
+	movq	%r8,%r13
+	jmp	.Lavx_00_47
+
+.align	16
+.Lavx_00_47:
+	addq	$256,%rbp
+	vpalignr	$8,%xmm0,%xmm1,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rax
+	vpalignr	$8,%xmm4,%xmm5,%xmm11
+	movq	%r9,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	vpaddq	%xmm11,%xmm0,%xmm0
+	shrdq	$4,%r13,%r13
+	xorq	%rax,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	0(%rsp),%r11
+	movq	%rax,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%r10,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rbx,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm7,%xmm11
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	vpsllq	$3,%xmm7,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r11
+	vpaddq	%xmm8,%xmm0,%xmm0
+	movq	%r8,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm7,%xmm9
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%r11,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	8(%rsp),%r10
+	movq	%r11,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%r9,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm0,%xmm0
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	vpaddq	-128(%rbp),%xmm0,%xmm10
+	xorq	%rax,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	vmovdqa	%xmm10,0(%rsp)
+	vpalignr	$8,%xmm1,%xmm2,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r10
+	vpalignr	$8,%xmm5,%xmm6,%xmm11
+	movq	%rdx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	vpaddq	%xmm11,%xmm1,%xmm1
+	shrdq	$4,%r13,%r13
+	xorq	%r10,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	16(%rsp),%r9
+	movq	%r10,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%r8,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r11,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm0,%xmm11
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	vpsllq	$3,%xmm0,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r9
+	vpaddq	%xmm8,%xmm1,%xmm1
+	movq	%rcx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm0,%xmm9
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%r9,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	24(%rsp),%r8
+	movq	%r9,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%rdx,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm1,%xmm1
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	vpaddq	-96(%rbp),%xmm1,%xmm10
+	xorq	%r10,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	vmovdqa	%xmm10,16(%rsp)
+	vpalignr	$8,%xmm2,%xmm3,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r8
+	vpalignr	$8,%xmm6,%xmm7,%xmm11
+	movq	%rbx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	vpaddq	%xmm11,%xmm2,%xmm2
+	shrdq	$4,%r13,%r13
+	xorq	%r8,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	32(%rsp),%rdx
+	movq	%r8,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%rcx,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r9,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm1,%xmm11
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	vpsllq	$3,%xmm1,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rdx
+	vpaddq	%xmm8,%xmm2,%xmm2
+	movq	%rax,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm1,%xmm9
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%rdx,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	40(%rsp),%rcx
+	movq	%rdx,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%rbx,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm2,%xmm2
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	vpaddq	-64(%rbp),%xmm2,%xmm10
+	xorq	%r8,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	vmovdqa	%xmm10,32(%rsp)
+	vpalignr	$8,%xmm3,%xmm4,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rcx
+	vpalignr	$8,%xmm7,%xmm0,%xmm11
+	movq	%r11,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	vpaddq	%xmm11,%xmm3,%xmm3
+	shrdq	$4,%r13,%r13
+	xorq	%rcx,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	48(%rsp),%rbx
+	movq	%rcx,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%rax,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rdx,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm2,%xmm11
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	vpsllq	$3,%xmm2,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rbx
+	vpaddq	%xmm8,%xmm3,%xmm3
+	movq	%r10,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm2,%xmm9
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%rbx,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	56(%rsp),%rax
+	movq	%rbx,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%r11,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm3,%xmm3
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	vpaddq	-32(%rbp),%xmm3,%xmm10
+	xorq	%rcx,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	vmovdqa	%xmm10,48(%rsp)
+	vpalignr	$8,%xmm4,%xmm5,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rax
+	vpalignr	$8,%xmm0,%xmm1,%xmm11
+	movq	%r9,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	vpaddq	%xmm11,%xmm4,%xmm4
+	shrdq	$4,%r13,%r13
+	xorq	%rax,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	64(%rsp),%r11
+	movq	%rax,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%r10,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rbx,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm3,%xmm11
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	vpsllq	$3,%xmm3,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r11
+	vpaddq	%xmm8,%xmm4,%xmm4
+	movq	%r8,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm3,%xmm9
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%r11,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	72(%rsp),%r10
+	movq	%r11,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%r9,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm4,%xmm4
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	vpaddq	0(%rbp),%xmm4,%xmm10
+	xorq	%rax,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	vmovdqa	%xmm10,64(%rsp)
+	vpalignr	$8,%xmm5,%xmm6,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r10
+	vpalignr	$8,%xmm1,%xmm2,%xmm11
+	movq	%rdx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	vpaddq	%xmm11,%xmm5,%xmm5
+	shrdq	$4,%r13,%r13
+	xorq	%r10,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	80(%rsp),%r9
+	movq	%r10,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%r8,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r11,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm4,%xmm11
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	vpsllq	$3,%xmm4,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r9
+	vpaddq	%xmm8,%xmm5,%xmm5
+	movq	%rcx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm4,%xmm9
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%r9,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	88(%rsp),%r8
+	movq	%r9,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%rdx,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm5,%xmm5
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	vpaddq	32(%rbp),%xmm5,%xmm10
+	xorq	%r10,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	vmovdqa	%xmm10,80(%rsp)
+	vpalignr	$8,%xmm6,%xmm7,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r8
+	vpalignr	$8,%xmm2,%xmm3,%xmm11
+	movq	%rbx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	vpaddq	%xmm11,%xmm6,%xmm6
+	shrdq	$4,%r13,%r13
+	xorq	%r8,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	96(%rsp),%rdx
+	movq	%r8,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%rcx,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r9,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm5,%xmm11
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	vpsllq	$3,%xmm5,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rdx
+	vpaddq	%xmm8,%xmm6,%xmm6
+	movq	%rax,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm5,%xmm9
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%rdx,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	104(%rsp),%rcx
+	movq	%rdx,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%rbx,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm6,%xmm6
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	vpaddq	64(%rbp),%xmm6,%xmm10
+	xorq	%r8,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	vmovdqa	%xmm10,96(%rsp)
+	vpalignr	$8,%xmm7,%xmm0,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rcx
+	vpalignr	$8,%xmm3,%xmm4,%xmm11
+	movq	%r11,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	vpaddq	%xmm11,%xmm7,%xmm7
+	shrdq	$4,%r13,%r13
+	xorq	%rcx,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	112(%rsp),%rbx
+	movq	%rcx,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%rax,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rdx,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm6,%xmm11
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	vpsllq	$3,%xmm6,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rbx
+	vpaddq	%xmm8,%xmm7,%xmm7
+	movq	%r10,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm6,%xmm9
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%rbx,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	120(%rsp),%rax
+	movq	%rbx,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%r11,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm7,%xmm7
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	vpaddq	96(%rbp),%xmm7,%xmm10
+	xorq	%rcx,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	vmovdqa	%xmm10,112(%rsp)
+	cmpb	$0,135(%rbp)
+	jne	.Lavx_00_47
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rax
+	movq	%r9,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rax,%r14
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	0(%rsp),%r11
+	movq	%rax,%r15
+	xorq	%r10,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	xorq	%rbx,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r11
+	movq	%r8,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	addq	8(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r10
+	movq	%rdx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r10,%r14
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	16(%rsp),%r9
+	movq	%r10,%r15
+	xorq	%r8,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	xorq	%r11,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r9
+	movq	%rcx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	addq	24(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r8
+	movq	%rbx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r8,%r14
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	32(%rsp),%rdx
+	movq	%r8,%r15
+	xorq	%rcx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	xorq	%r9,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rdx
+	movq	%rax,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	addq	40(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rcx
+	movq	%r11,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rcx,%r14
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	48(%rsp),%rbx
+	movq	%rcx,%r15
+	xorq	%rax,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	xorq	%rdx,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rbx
+	movq	%r10,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	addq	56(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rax
+	movq	%r9,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rax,%r14
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	64(%rsp),%r11
+	movq	%rax,%r15
+	xorq	%r10,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	xorq	%rbx,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r11
+	movq	%r8,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	addq	72(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r10
+	movq	%rdx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r10,%r14
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	80(%rsp),%r9
+	movq	%r10,%r15
+	xorq	%r8,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	xorq	%r11,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r9
+	movq	%rcx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	addq	88(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r8
+	movq	%rbx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r8,%r14
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	96(%rsp),%rdx
+	movq	%r8,%r15
+	xorq	%rcx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	xorq	%r9,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rdx
+	movq	%rax,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	addq	104(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rcx
+	movq	%r11,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rcx,%r14
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	112(%rsp),%rbx
+	movq	%rcx,%r15
+	xorq	%rax,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	xorq	%rdx,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rbx
+	movq	%r10,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	addq	120(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	movq	128+0(%rsp),%rdi
+	movq	%r14,%rax
+
+	addq	0(%rdi),%rax
+	leaq	128(%rsi),%rsi
+	addq	8(%rdi),%rbx
+	addq	16(%rdi),%rcx
+	addq	24(%rdi),%rdx
+	addq	32(%rdi),%r8
+	addq	40(%rdi),%r9
+	addq	48(%rdi),%r10
+	addq	56(%rdi),%r11
+
+	cmpq	128+16(%rsp),%rsi
+
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+	movq	%r8,32(%rdi)
+	movq	%r9,40(%rdi)
+	movq	%r10,48(%rdi)
+	movq	%r11,56(%rdi)
+	jb	.Lloop_avx
+
+	movq	128+24(%rsp),%rsi
+	vzeroupper
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+.Lepilogue_avx:
+	.byte	0xf3,0xc3
+.size	sha512_block_data_order_avx,.-sha512_block_data_order_avx
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
new file mode 100644
index 0000000..f3a089d
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
@@ -0,0 +1,834 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type	_vpaes_encrypt_core,@function
+.align	16
+_vpaes_encrypt_core:
+	movq	%rdx,%r9
+	movq	$16,%r11
+	movl	240(%rdx),%eax
+	movdqa	%xmm9,%xmm1
+	movdqa	.Lk_ipt(%rip),%xmm2
+	pandn	%xmm0,%xmm1
+	movdqu	(%r9),%xmm5
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm0
+.byte	102,15,56,0,208
+	movdqa	.Lk_ipt+16(%rip),%xmm0
+.byte	102,15,56,0,193
+	pxor	%xmm5,%xmm2
+	addq	$16,%r9
+	pxor	%xmm2,%xmm0
+	leaq	.Lk_mc_backward(%rip),%r10
+	jmp	.Lenc_entry
+
+.align	16
+.Lenc_loop:
+
+	movdqa	%xmm13,%xmm4
+	movdqa	%xmm12,%xmm0
+.byte	102,15,56,0,226
+.byte	102,15,56,0,195
+	pxor	%xmm5,%xmm4
+	movdqa	%xmm15,%xmm5
+	pxor	%xmm4,%xmm0
+	movdqa	-64(%r11,%r10,1),%xmm1
+.byte	102,15,56,0,234
+	movdqa	(%r11,%r10,1),%xmm4
+	movdqa	%xmm14,%xmm2
+.byte	102,15,56,0,211
+	movdqa	%xmm0,%xmm3
+	pxor	%xmm5,%xmm2
+.byte	102,15,56,0,193
+	addq	$16,%r9
+	pxor	%xmm2,%xmm0
+.byte	102,15,56,0,220
+	addq	$16,%r11
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,193
+	andq	$0x30,%r11
+	subq	$1,%rax
+	pxor	%xmm3,%xmm0
+
+.Lenc_entry:
+
+	movdqa	%xmm9,%xmm1
+	movdqa	%xmm11,%xmm5
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm0
+.byte	102,15,56,0,232
+	movdqa	%xmm10,%xmm3
+	pxor	%xmm1,%xmm0
+.byte	102,15,56,0,217
+	movdqa	%xmm10,%xmm4
+	pxor	%xmm5,%xmm3
+.byte	102,15,56,0,224
+	movdqa	%xmm10,%xmm2
+	pxor	%xmm5,%xmm4
+.byte	102,15,56,0,211
+	movdqa	%xmm10,%xmm3
+	pxor	%xmm0,%xmm2
+.byte	102,15,56,0,220
+	movdqu	(%r9),%xmm5
+	pxor	%xmm1,%xmm3
+	jnz	.Lenc_loop
+
+
+	movdqa	-96(%r10),%xmm4
+	movdqa	-80(%r10),%xmm0
+.byte	102,15,56,0,226
+	pxor	%xmm5,%xmm4
+.byte	102,15,56,0,195
+	movdqa	64(%r11,%r10,1),%xmm1
+	pxor	%xmm4,%xmm0
+.byte	102,15,56,0,193
+	.byte	0xf3,0xc3
+.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
+
+
+
+
+
+
+.type	_vpaes_decrypt_core,@function
+.align	16
+_vpaes_decrypt_core:
+	movq	%rdx,%r9
+	movl	240(%rdx),%eax
+	movdqa	%xmm9,%xmm1
+	movdqa	.Lk_dipt(%rip),%xmm2
+	pandn	%xmm0,%xmm1
+	movq	%rax,%r11
+	psrld	$4,%xmm1
+	movdqu	(%r9),%xmm5
+	shlq	$4,%r11
+	pand	%xmm9,%xmm0
+.byte	102,15,56,0,208
+	movdqa	.Lk_dipt+16(%rip),%xmm0
+	xorq	$0x30,%r11
+	leaq	.Lk_dsbd(%rip),%r10
+.byte	102,15,56,0,193
+	andq	$0x30,%r11
+	pxor	%xmm5,%xmm2
+	movdqa	.Lk_mc_forward+48(%rip),%xmm5
+	pxor	%xmm2,%xmm0
+	addq	$16,%r9
+	addq	%r10,%r11
+	jmp	.Ldec_entry
+
+.align	16
+.Ldec_loop:
+
+
+
+	movdqa	-32(%r10),%xmm4
+	movdqa	-16(%r10),%xmm1
+.byte	102,15,56,0,226
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	movdqa	0(%r10),%xmm4
+	pxor	%xmm1,%xmm0
+	movdqa	16(%r10),%xmm1
+
+.byte	102,15,56,0,226
+.byte	102,15,56,0,197
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	movdqa	32(%r10),%xmm4
+	pxor	%xmm1,%xmm0
+	movdqa	48(%r10),%xmm1
+
+.byte	102,15,56,0,226
+.byte	102,15,56,0,197
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	movdqa	64(%r10),%xmm4
+	pxor	%xmm1,%xmm0
+	movdqa	80(%r10),%xmm1
+
+.byte	102,15,56,0,226
+.byte	102,15,56,0,197
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	addq	$16,%r9
+.byte	102,15,58,15,237,12
+	pxor	%xmm1,%xmm0
+	subq	$1,%rax
+
+.Ldec_entry:
+
+	movdqa	%xmm9,%xmm1
+	pandn	%xmm0,%xmm1
+	movdqa	%xmm11,%xmm2
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm0
+.byte	102,15,56,0,208
+	movdqa	%xmm10,%xmm3
+	pxor	%xmm1,%xmm0
+.byte	102,15,56,0,217
+	movdqa	%xmm10,%xmm4
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,224
+	pxor	%xmm2,%xmm4
+	movdqa	%xmm10,%xmm2
+.byte	102,15,56,0,211
+	movdqa	%xmm10,%xmm3
+	pxor	%xmm0,%xmm2
+.byte	102,15,56,0,220
+	movdqu	(%r9),%xmm0
+	pxor	%xmm1,%xmm3
+	jnz	.Ldec_loop
+
+
+	movdqa	96(%r10),%xmm4
+.byte	102,15,56,0,226
+	pxor	%xmm0,%xmm4
+	movdqa	112(%r10),%xmm0
+	movdqa	-352(%r11),%xmm2
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+.byte	102,15,56,0,194
+	.byte	0xf3,0xc3
+.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core
+
+
+
+
+
+
+.type	_vpaes_schedule_core,@function
+.align	16
+_vpaes_schedule_core:
+
+
+
+
+
+	call	_vpaes_preheat
+	movdqa	.Lk_rcon(%rip),%xmm8
+	movdqu	(%rdi),%xmm0
+
+
+	movdqa	%xmm0,%xmm3
+	leaq	.Lk_ipt(%rip),%r11
+	call	_vpaes_schedule_transform
+	movdqa	%xmm0,%xmm7
+
+	leaq	.Lk_sr(%rip),%r10
+	testq	%rcx,%rcx
+	jnz	.Lschedule_am_decrypting
+
+
+	movdqu	%xmm0,(%rdx)
+	jmp	.Lschedule_go
+
+.Lschedule_am_decrypting:
+
+	movdqa	(%r8,%r10,1),%xmm1
+.byte	102,15,56,0,217
+	movdqu	%xmm3,(%rdx)
+	xorq	$0x30,%r8
+
+.Lschedule_go:
+	cmpl	$192,%esi
+	ja	.Lschedule_256
+	je	.Lschedule_192
+
+
+
+
+
+
+
+
+
+
+.Lschedule_128:
+	movl	$10,%esi
+
+.Loop_schedule_128:
+	call	_vpaes_schedule_round
+	decq	%rsi
+	jz	.Lschedule_mangle_last
+	call	_vpaes_schedule_mangle
+	jmp	.Loop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.align	16
+.Lschedule_192:
+	movdqu	8(%rdi),%xmm0
+	call	_vpaes_schedule_transform
+	movdqa	%xmm0,%xmm6
+	pxor	%xmm4,%xmm4
+	movhlps	%xmm4,%xmm6
+	movl	$4,%esi
+
+.Loop_schedule_192:
+	call	_vpaes_schedule_round
+.byte	102,15,58,15,198,8
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_round
+	decq	%rsi
+	jz	.Lschedule_mangle_last
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	jmp	.Loop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+.align	16
+.Lschedule_256:
+	movdqu	16(%rdi),%xmm0
+	call	_vpaes_schedule_transform
+	movl	$7,%esi
+
+.Loop_schedule_256:
+	call	_vpaes_schedule_mangle
+	movdqa	%xmm0,%xmm6
+
+
+	call	_vpaes_schedule_round
+	decq	%rsi
+	jz	.Lschedule_mangle_last
+	call	_vpaes_schedule_mangle
+
+
+	pshufd	$0xFF,%xmm0,%xmm0
+	movdqa	%xmm7,%xmm5
+	movdqa	%xmm6,%xmm7
+	call	_vpaes_schedule_low_round
+	movdqa	%xmm5,%xmm7
+
+	jmp	.Loop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+.align	16
+.Lschedule_mangle_last:
+
+	leaq	.Lk_deskew(%rip),%r11
+	testq	%rcx,%rcx
+	jnz	.Lschedule_mangle_last_dec
+
+
+	movdqa	(%r8,%r10,1),%xmm1
+.byte	102,15,56,0,193
+	leaq	.Lk_opt(%rip),%r11
+	addq	$32,%rdx
+
+.Lschedule_mangle_last_dec:
+	addq	$-16,%rdx
+	pxor	.Lk_s63(%rip),%xmm0
+	call	_vpaes_schedule_transform
+	movdqu	%xmm0,(%rdx)
+
+
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	.byte	0xf3,0xc3
+.size	_vpaes_schedule_core,.-_vpaes_schedule_core
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type	_vpaes_schedule_192_smear,@function
+.align	16
+_vpaes_schedule_192_smear:
+	pshufd	$0x80,%xmm6,%xmm1
+	pshufd	$0xFE,%xmm7,%xmm0
+	pxor	%xmm1,%xmm6
+	pxor	%xmm1,%xmm1
+	pxor	%xmm0,%xmm6
+	movdqa	%xmm6,%xmm0
+	movhlps	%xmm1,%xmm6
+	.byte	0xf3,0xc3
+.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type	_vpaes_schedule_round,@function
+.align	16
+_vpaes_schedule_round:
+
+	pxor	%xmm1,%xmm1
+.byte	102,65,15,58,15,200,15
+.byte	102,69,15,58,15,192,15
+	pxor	%xmm1,%xmm7
+
+
+	pshufd	$0xFF,%xmm0,%xmm0
+.byte	102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round:
+
+	movdqa	%xmm7,%xmm1
+	pslldq	$4,%xmm7
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm7,%xmm1
+	pslldq	$8,%xmm7
+	pxor	%xmm1,%xmm7
+	pxor	.Lk_s63(%rip),%xmm7
+
+
+	movdqa	%xmm9,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm0
+	movdqa	%xmm11,%xmm2
+.byte	102,15,56,0,208
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm10,%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+	movdqa	%xmm10,%xmm4
+.byte	102,15,56,0,224
+	pxor	%xmm2,%xmm4
+	movdqa	%xmm10,%xmm2
+.byte	102,15,56,0,211
+	pxor	%xmm0,%xmm2
+	movdqa	%xmm10,%xmm3
+.byte	102,15,56,0,220
+	pxor	%xmm1,%xmm3
+	movdqa	%xmm13,%xmm4
+.byte	102,15,56,0,226
+	movdqa	%xmm12,%xmm0
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+
+
+	pxor	%xmm7,%xmm0
+	movdqa	%xmm0,%xmm7
+	.byte	0xf3,0xc3
+.size	_vpaes_schedule_round,.-_vpaes_schedule_round
+
+
+
+
+
+
+
+
+
+
+.type	_vpaes_schedule_transform,@function
+.align	16
+_vpaes_schedule_transform:
+	movdqa	%xmm9,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm0
+	movdqa	(%r11),%xmm2
+.byte	102,15,56,0,208
+	movdqa	16(%r11),%xmm0
+.byte	102,15,56,0,193
+	pxor	%xmm2,%xmm0
+	.byte	0xf3,0xc3
+.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type	_vpaes_schedule_mangle,@function
+.align	16
+_vpaes_schedule_mangle:
+	movdqa	%xmm0,%xmm4
+	movdqa	.Lk_mc_forward(%rip),%xmm5
+	testq	%rcx,%rcx
+	jnz	.Lschedule_mangle_dec
+
+
+	addq	$16,%rdx
+	pxor	.Lk_s63(%rip),%xmm4
+.byte	102,15,56,0,229
+	movdqa	%xmm4,%xmm3
+.byte	102,15,56,0,229
+	pxor	%xmm4,%xmm3
+.byte	102,15,56,0,229
+	pxor	%xmm4,%xmm3
+
+	jmp	.Lschedule_mangle_both
+.align	16
+.Lschedule_mangle_dec:
+
+	leaq	.Lk_dksd(%rip),%r11
+	movdqa	%xmm9,%xmm1
+	pandn	%xmm4,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm4
+
+	movdqa	0(%r11),%xmm2
+.byte	102,15,56,0,212
+	movdqa	16(%r11),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+
+	movdqa	32(%r11),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	48(%r11),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+
+	movdqa	64(%r11),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	80(%r11),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+
+	movdqa	96(%r11),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	112(%r11),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+
+	addq	$-16,%rdx
+
+.Lschedule_mangle_both:
+	movdqa	(%r8,%r10,1),%xmm1
+.byte	102,15,56,0,217
+	addq	$-16,%r8
+	andq	$0x30,%r8
+	movdqu	%xmm3,(%rdx)
+	.byte	0xf3,0xc3
+.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
+
+
+
+
+.globl	vpaes_set_encrypt_key
+.hidden vpaes_set_encrypt_key
+.type	vpaes_set_encrypt_key,@function
+.align	16
+vpaes_set_encrypt_key:
+	movl	%esi,%eax
+	shrl	$5,%eax
+	addl	$5,%eax
+	movl	%eax,240(%rdx)
+
+	movl	$0,%ecx
+	movl	$0x30,%r8d
+	call	_vpaes_schedule_core
+	xorl	%eax,%eax
+	.byte	0xf3,0xc3
+.size	vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
+
+.globl	vpaes_set_decrypt_key
+.hidden vpaes_set_decrypt_key
+.type	vpaes_set_decrypt_key,@function
+.align	16
+vpaes_set_decrypt_key:
+	movl	%esi,%eax
+	shrl	$5,%eax
+	addl	$5,%eax
+	movl	%eax,240(%rdx)
+	shll	$4,%eax
+	leaq	16(%rdx,%rax,1),%rdx
+
+	movl	$1,%ecx
+	movl	%esi,%r8d
+	shrl	$1,%r8d
+	andl	$32,%r8d
+	xorl	$32,%r8d
+	call	_vpaes_schedule_core
+	xorl	%eax,%eax
+	.byte	0xf3,0xc3
+.size	vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
+
+.globl	vpaes_encrypt
+.hidden vpaes_encrypt
+.type	vpaes_encrypt,@function
+.align	16
+vpaes_encrypt:
+	movdqu	(%rdi),%xmm0
+	call	_vpaes_preheat
+	call	_vpaes_encrypt_core
+	movdqu	%xmm0,(%rsi)
+	.byte	0xf3,0xc3
+.size	vpaes_encrypt,.-vpaes_encrypt
+
+.globl	vpaes_decrypt
+.hidden vpaes_decrypt
+.type	vpaes_decrypt,@function
+.align	16
+vpaes_decrypt:
+	movdqu	(%rdi),%xmm0
+	call	_vpaes_preheat
+	call	_vpaes_decrypt_core
+	movdqu	%xmm0,(%rsi)
+	.byte	0xf3,0xc3
+.size	vpaes_decrypt,.-vpaes_decrypt
+.globl	vpaes_cbc_encrypt
+.hidden vpaes_cbc_encrypt
+.type	vpaes_cbc_encrypt,@function
+.align	16
+vpaes_cbc_encrypt:
+	xchgq	%rcx,%rdx
+	subq	$16,%rcx
+	jc	.Lcbc_abort
+	movdqu	(%r8),%xmm6
+	subq	%rdi,%rsi
+	call	_vpaes_preheat
+	cmpl	$0,%r9d
+	je	.Lcbc_dec_loop
+	jmp	.Lcbc_enc_loop
+.align	16
+.Lcbc_enc_loop:
+	movdqu	(%rdi),%xmm0
+	pxor	%xmm6,%xmm0
+	call	_vpaes_encrypt_core
+	movdqa	%xmm0,%xmm6
+	movdqu	%xmm0,(%rsi,%rdi,1)
+	leaq	16(%rdi),%rdi
+	subq	$16,%rcx
+	jnc	.Lcbc_enc_loop
+	jmp	.Lcbc_done
+.align	16
+.Lcbc_dec_loop:
+	movdqu	(%rdi),%xmm0
+	movdqa	%xmm0,%xmm7
+	call	_vpaes_decrypt_core
+	pxor	%xmm6,%xmm0
+	movdqa	%xmm7,%xmm6
+	movdqu	%xmm0,(%rsi,%rdi,1)
+	leaq	16(%rdi),%rdi
+	subq	$16,%rcx
+	jnc	.Lcbc_dec_loop
+.Lcbc_done:
+	movdqu	%xmm6,(%r8)
+.Lcbc_abort:
+	.byte	0xf3,0xc3
+.size	vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
+
+
+
+
+
+
+.type	_vpaes_preheat,@function
+.align	16
+_vpaes_preheat:
+	leaq	.Lk_s0F(%rip),%r10
+	movdqa	-32(%r10),%xmm10
+	movdqa	-16(%r10),%xmm11
+	movdqa	0(%r10),%xmm9
+	movdqa	48(%r10),%xmm13
+	movdqa	64(%r10),%xmm12
+	movdqa	80(%r10),%xmm15
+	movdqa	96(%r10),%xmm14
+	.byte	0xf3,0xc3
+.size	_vpaes_preheat,.-_vpaes_preheat
+
+
+
+
+
+.type	_vpaes_consts,@object
+.align	64
+_vpaes_consts:
+.Lk_inv:
+.quad	0x0E05060F0D080180, 0x040703090A0B0C02
+.quad	0x01040A060F0B0780, 0x030D0E0C02050809
+
+.Lk_s0F:
+.quad	0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+
+.Lk_ipt:
+.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+
+.Lk_sb1:
+.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+.Lk_sb2:
+.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
+.Lk_sbo:
+.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+
+.Lk_mc_forward:
+.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad	0x080B0A0904070605, 0x000302010C0F0E0D
+.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad	0x000302010C0F0E0D, 0x080B0A0904070605
+
+.Lk_mc_backward:
+.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad	0x020100030E0D0C0F, 0x0A09080B06050407
+.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad	0x0A09080B06050407, 0x020100030E0D0C0F
+
+.Lk_sr:
+.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad	0x030E09040F0A0500, 0x0B06010C07020D08
+.quad	0x0F060D040B020900, 0x070E050C030A0108
+.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
+
+.Lk_rcon:
+.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+
+.Lk_s63:
+.quad	0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+
+.Lk_opt:
+.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+
+.Lk_deskew:
+.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+
+
+
+
+
+.Lk_dksd:
+.quad	0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad	0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+.Lk_dksb:
+.quad	0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad	0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+.Lk_dkse:
+.quad	0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad	0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+.Lk_dks9:
+.quad	0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad	0x4576516227143300, 0x8BB89FACE9DAFDCE
+
+
+
+
+
+.Lk_dipt:
+.quad	0x0F505B040B545F00, 0x154A411E114E451A
+.quad	0x86E383E660056500, 0x12771772F491F194
+
+.Lk_dsb9:
+.quad	0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad	0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+.Lk_dsbd:
+.quad	0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad	0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+.Lk_dsbb:
+.quad	0xD022649296B44200, 0x602646F6B0F2D404
+.quad	0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+.Lk_dsbe:
+.quad	0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad	0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+.Lk_dsbo:
+.quad	0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad	0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.align	64
+.size	_vpaes_consts,.-_vpaes_consts
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/x86_64-mont.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/x86_64-mont.S
new file mode 100644
index 0000000..b32e2f0
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/x86_64-mont.S
@@ -0,0 +1,866 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+
+.globl	bn_mul_mont
+.hidden bn_mul_mont
+.type	bn_mul_mont,@function
+.align	16
+bn_mul_mont:
+.cfi_startproc	
+	movl	%r9d,%r9d
+	movq	%rsp,%rax
+.cfi_def_cfa_register	%rax
+	testl	$3,%r9d
+	jnz	.Lmul_enter
+	cmpl	$8,%r9d
+	jb	.Lmul_enter
+	cmpq	%rsi,%rdx
+	jne	.Lmul4x_enter
+	testl	$7,%r9d
+	jz	.Lsqr8x_enter
+	jmp	.Lmul4x_enter
+
+.align	16
+.Lmul_enter:
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+
+	negq	%r9
+	movq	%rsp,%r11
+	leaq	-16(%rsp,%r9,8),%r10
+	negq	%r9
+	andq	$-1024,%r10
+
+
+
+
+
+
+
+
+
+	subq	%r10,%r11
+	andq	$-4096,%r11
+	leaq	(%r10,%r11,1),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	.Lmul_page_walk
+	jmp	.Lmul_page_walk_done
+
+.align	16
+.Lmul_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	.Lmul_page_walk
+.Lmul_page_walk_done:
+
+	movq	%rax,8(%rsp,%r9,8)
+.cfi_escape	0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
+.Lmul_body:
+	movq	%rdx,%r12
+	movq	(%r8),%r8
+	movq	(%r12),%rbx
+	movq	(%rsi),%rax
+
+	xorq	%r14,%r14
+	xorq	%r15,%r15
+
+	movq	%r8,%rbp
+	mulq	%rbx
+	movq	%rax,%r10
+	movq	(%rcx),%rax
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r13
+
+	leaq	1(%r15),%r15
+	jmp	.L1st_enter
+
+.align	16
+.L1st:
+	addq	%rax,%r13
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	movq	%r10,%r11
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+.L1st_enter:
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	leaq	1(%r15),%r15
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	cmpq	%r9,%r15
+	jne	.L1st
+
+	addq	%rax,%r13
+	movq	(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+	movq	%r10,%r11
+
+	xorq	%rdx,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r9,8)
+	movq	%rdx,(%rsp,%r9,8)
+
+	leaq	1(%r14),%r14
+	jmp	.Louter
+.align	16
+.Louter:
+	movq	(%r12,%r14,8),%rbx
+	xorq	%r15,%r15
+	movq	%r8,%rbp
+	movq	(%rsp),%r10
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx),%rax
+	adcq	$0,%rdx
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	8(%rsp),%r10
+	movq	%rdx,%r13
+
+	leaq	1(%r15),%r15
+	jmp	.Linner_enter
+
+.align	16
+.Linner:
+	addq	%rax,%r13
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	movq	(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+.Linner_enter:
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	leaq	1(%r15),%r15
+
+	mulq	%rbp
+	cmpq	%r9,%r15
+	jne	.Linner
+
+	addq	%rax,%r13
+	movq	(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	movq	(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+	xorq	%rdx,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r9,8)
+	movq	%rdx,(%rsp,%r9,8)
+
+	leaq	1(%r14),%r14
+	cmpq	%r9,%r14
+	jb	.Louter
+
+	xorq	%r14,%r14
+	movq	(%rsp),%rax
+	leaq	(%rsp),%rsi
+	movq	%r9,%r15
+	jmp	.Lsub
+.align	16
+.Lsub:
+	sbbq	(%rcx,%r14,8),%rax
+	movq	%rax,(%rdi,%r14,8)
+	movq	8(%rsi,%r14,8),%rax
+	leaq	1(%r14),%r14
+	decq	%r15
+	jnz	.Lsub
+
+	sbbq	$0,%rax
+	xorq	%r14,%r14
+	andq	%rax,%rsi
+	notq	%rax
+	movq	%rdi,%rcx
+	andq	%rax,%rcx
+	movq	%r9,%r15
+	orq	%rcx,%rsi
+.align	16
+.Lcopy:
+	movq	(%rsi,%r14,8),%rax
+	movq	%r14,(%rsp,%r14,8)
+	movq	%rax,(%rdi,%r14,8)
+	leaq	1(%r14),%r14
+	subq	$1,%r15
+	jnz	.Lcopy
+
+	movq	8(%rsp,%r9,8),%rsi
+.cfi_def_cfa	%rsi,8
+	movq	$1,%rax
+	movq	-48(%rsi),%r15
+.cfi_restore	%r15
+	movq	-40(%rsi),%r14
+.cfi_restore	%r14
+	movq	-32(%rsi),%r13
+.cfi_restore	%r13
+	movq	-24(%rsi),%r12
+.cfi_restore	%r12
+	movq	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	leaq	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
+.Lmul_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	bn_mul_mont,.-bn_mul_mont
+.type	bn_mul4x_mont,@function
+.align	16
+bn_mul4x_mont:
+.cfi_startproc	
+	movl	%r9d,%r9d
+	movq	%rsp,%rax
+.cfi_def_cfa_register	%rax
+.Lmul4x_enter:
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+
+	negq	%r9
+	movq	%rsp,%r11
+	leaq	-32(%rsp,%r9,8),%r10
+	negq	%r9
+	andq	$-1024,%r10
+
+	subq	%r10,%r11
+	andq	$-4096,%r11
+	leaq	(%r10,%r11,1),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	.Lmul4x_page_walk
+	jmp	.Lmul4x_page_walk_done
+
+.Lmul4x_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	.Lmul4x_page_walk
+.Lmul4x_page_walk_done:
+
+	movq	%rax,8(%rsp,%r9,8)
+.cfi_escape	0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
+.Lmul4x_body:
+	movq	%rdi,16(%rsp,%r9,8)
+	movq	%rdx,%r12
+	movq	(%r8),%r8
+	movq	(%r12),%rbx
+	movq	(%rsi),%rax
+
+	xorq	%r14,%r14
+	xorq	%r15,%r15
+
+	movq	%r8,%rbp
+	mulq	%rbx
+	movq	%rax,%r10
+	movq	(%rcx),%rax
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	4(%r15),%r15
+	adcq	$0,%rdx
+	movq	%rdi,(%rsp)
+	movq	%rdx,%r13
+	jmp	.L1st4x
+.align	16
+.L1st4x:
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	leaq	4(%r15),%r15
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	-16(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-32(%rsp,%r15,8)
+	movq	%rdx,%r13
+	cmpq	%r9,%r15
+	jb	.L1st4x
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+	xorq	%rdi,%rdi
+	addq	%r10,%r13
+	adcq	$0,%rdi
+	movq	%r13,-8(%rsp,%r15,8)
+	movq	%rdi,(%rsp,%r15,8)
+
+	leaq	1(%r14),%r14
+.align	4
+.Louter4x:
+	movq	(%r12,%r14,8),%rbx
+	xorq	%r15,%r15
+	movq	(%rsp),%r10
+	movq	%r8,%rbp
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx),%rax
+	adcq	$0,%rdx
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	addq	8(%rsp),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	4(%r15),%r15
+	adcq	$0,%rdx
+	movq	%rdi,(%rsp)
+	movq	%rdx,%r13
+	jmp	.Linner4x
+.align	16
+.Linner4x:
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	-16(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	-8(%rsp,%r15,8),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	8(%rsp,%r15,8),%r11
+	adcq	$0,%rdx
+	leaq	4(%r15),%r15
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	-16(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-32(%rsp,%r15,8)
+	movq	%rdx,%r13
+	cmpq	%r9,%r15
+	jb	.Linner4x
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	-16(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	-8(%rsp,%r15,8),%r11
+	adcq	$0,%rdx
+	leaq	1(%r14),%r14
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+	xorq	%rdi,%rdi
+	addq	%r10,%r13
+	adcq	$0,%rdi
+	addq	(%rsp,%r9,8),%r13
+	adcq	$0,%rdi
+	movq	%r13,-8(%rsp,%r15,8)
+	movq	%rdi,(%rsp,%r15,8)
+
+	cmpq	%r9,%r14
+	jb	.Louter4x
+	movq	16(%rsp,%r9,8),%rdi
+	leaq	-4(%r9),%r15
+	movq	0(%rsp),%rax
+	pxor	%xmm0,%xmm0
+	movq	8(%rsp),%rdx
+	shrq	$2,%r15
+	leaq	(%rsp),%rsi
+	xorq	%r14,%r14
+
+	subq	0(%rcx),%rax
+	movq	16(%rsi),%rbx
+	movq	24(%rsi),%rbp
+	sbbq	8(%rcx),%rdx
+	jmp	.Lsub4x
+.align	16
+.Lsub4x:
+	movq	%rax,0(%rdi,%r14,8)
+	movq	%rdx,8(%rdi,%r14,8)
+	sbbq	16(%rcx,%r14,8),%rbx
+	movq	32(%rsi,%r14,8),%rax
+	movq	40(%rsi,%r14,8),%rdx
+	sbbq	24(%rcx,%r14,8),%rbp
+	movq	%rbx,16(%rdi,%r14,8)
+	movq	%rbp,24(%rdi,%r14,8)
+	sbbq	32(%rcx,%r14,8),%rax
+	movq	48(%rsi,%r14,8),%rbx
+	movq	56(%rsi,%r14,8),%rbp
+	sbbq	40(%rcx,%r14,8),%rdx
+	leaq	4(%r14),%r14
+	decq	%r15
+	jnz	.Lsub4x
+
+	movq	%rax,0(%rdi,%r14,8)
+	movq	32(%rsi,%r14,8),%rax
+	sbbq	16(%rcx,%r14,8),%rbx
+	movq	%rdx,8(%rdi,%r14,8)
+	sbbq	24(%rcx,%r14,8),%rbp
+	movq	%rbx,16(%rdi,%r14,8)
+
+	sbbq	$0,%rax
+	movq	%rbp,24(%rdi,%r14,8)
+	xorq	%r14,%r14
+	andq	%rax,%rsi
+	notq	%rax
+	movq	%rdi,%rcx
+	andq	%rax,%rcx
+	leaq	-4(%r9),%r15
+	orq	%rcx,%rsi
+	shrq	$2,%r15
+
+	movdqu	(%rsi),%xmm1
+	movdqa	%xmm0,(%rsp)
+	movdqu	%xmm1,(%rdi)
+	jmp	.Lcopy4x
+.align	16
+.Lcopy4x:
+	movdqu	16(%rsi,%r14,1),%xmm2
+	movdqu	32(%rsi,%r14,1),%xmm1
+	movdqa	%xmm0,16(%rsp,%r14,1)
+	movdqu	%xmm2,16(%rdi,%r14,1)
+	movdqa	%xmm0,32(%rsp,%r14,1)
+	movdqu	%xmm1,32(%rdi,%r14,1)
+	leaq	32(%r14),%r14
+	decq	%r15
+	jnz	.Lcopy4x
+
+	movdqu	16(%rsi,%r14,1),%xmm2
+	movdqa	%xmm0,16(%rsp,%r14,1)
+	movdqu	%xmm2,16(%rdi,%r14,1)
+	movq	8(%rsp,%r9,8),%rsi
+.cfi_def_cfa	%rsi, 8
+	movq	$1,%rax
+	movq	-48(%rsi),%r15
+.cfi_restore	%r15
+	movq	-40(%rsi),%r14
+.cfi_restore	%r14
+	movq	-32(%rsi),%r13
+.cfi_restore	%r13
+	movq	-24(%rsi),%r12
+.cfi_restore	%r12
+	movq	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	leaq	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
+.Lmul4x_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	bn_mul4x_mont,.-bn_mul4x_mont
+.extern	bn_sqr8x_internal
+.hidden bn_sqr8x_internal
+
+.type	bn_sqr8x_mont,@function
+.align	32
+bn_sqr8x_mont:
+.cfi_startproc	
+	movq	%rsp,%rax
+.cfi_def_cfa_register	%rax
+.Lsqr8x_enter:
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+.Lsqr8x_prologue:
+
+	movl	%r9d,%r10d
+	shll	$3,%r9d
+	shlq	$3+2,%r10
+	negq	%r9
+
+
+
+
+
+
+	leaq	-64(%rsp,%r9,2),%r11
+	movq	%rsp,%rbp
+	movq	(%r8),%r8
+	subq	%rsi,%r11
+	andq	$4095,%r11
+	cmpq	%r11,%r10
+	jb	.Lsqr8x_sp_alt
+	subq	%r11,%rbp
+	leaq	-64(%rbp,%r9,2),%rbp
+	jmp	.Lsqr8x_sp_done
+
+.align	32
+.Lsqr8x_sp_alt:
+	leaq	4096-64(,%r9,2),%r10
+	leaq	-64(%rbp,%r9,2),%rbp
+	subq	%r10,%r11
+	movq	$0,%r10
+	cmovcq	%r10,%r11
+	subq	%r11,%rbp
+.Lsqr8x_sp_done:
+	andq	$-64,%rbp
+	movq	%rsp,%r11
+	subq	%rbp,%r11
+	andq	$-4096,%r11
+	leaq	(%r11,%rbp,1),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	.Lsqr8x_page_walk
+	jmp	.Lsqr8x_page_walk_done
+
+.align	16
+.Lsqr8x_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	.Lsqr8x_page_walk
+.Lsqr8x_page_walk_done:
+
+	movq	%r9,%r10
+	negq	%r9
+
+	movq	%r8,32(%rsp)
+	movq	%rax,40(%rsp)
+.cfi_escape	0x0f,0x05,0x77,0x28,0x06,0x23,0x08
+.Lsqr8x_body:
+
+.byte	102,72,15,110,209
+	pxor	%xmm0,%xmm0
+.byte	102,72,15,110,207
+.byte	102,73,15,110,218
+	call	bn_sqr8x_internal
+
+
+
+
+	leaq	(%rdi,%r9,1),%rbx
+	movq	%r9,%rcx
+	movq	%r9,%rdx
+.byte	102,72,15,126,207
+	sarq	$3+2,%rcx
+	jmp	.Lsqr8x_sub
+
+.align	32
+.Lsqr8x_sub:
+	movq	0(%rbx),%r12
+	movq	8(%rbx),%r13
+	movq	16(%rbx),%r14
+	movq	24(%rbx),%r15
+	leaq	32(%rbx),%rbx
+	sbbq	0(%rbp),%r12
+	sbbq	8(%rbp),%r13
+	sbbq	16(%rbp),%r14
+	sbbq	24(%rbp),%r15
+	leaq	32(%rbp),%rbp
+	movq	%r12,0(%rdi)
+	movq	%r13,8(%rdi)
+	movq	%r14,16(%rdi)
+	movq	%r15,24(%rdi)
+	leaq	32(%rdi),%rdi
+	incq	%rcx
+	jnz	.Lsqr8x_sub
+
+	sbbq	$0,%rax
+	leaq	(%rbx,%r9,1),%rbx
+	leaq	(%rdi,%r9,1),%rdi
+
+.byte	102,72,15,110,200
+	pxor	%xmm0,%xmm0
+	pshufd	$0,%xmm1,%xmm1
+	movq	40(%rsp),%rsi
+.cfi_def_cfa	%rsi,8
+	jmp	.Lsqr8x_cond_copy
+
+.align	32
+.Lsqr8x_cond_copy:
+	movdqa	0(%rbx),%xmm2
+	movdqa	16(%rbx),%xmm3
+	leaq	32(%rbx),%rbx
+	movdqu	0(%rdi),%xmm4
+	movdqu	16(%rdi),%xmm5
+	leaq	32(%rdi),%rdi
+	movdqa	%xmm0,-32(%rbx)
+	movdqa	%xmm0,-16(%rbx)
+	movdqa	%xmm0,-32(%rbx,%rdx,1)
+	movdqa	%xmm0,-16(%rbx,%rdx,1)
+	pcmpeqd	%xmm1,%xmm0
+	pand	%xmm1,%xmm2
+	pand	%xmm1,%xmm3
+	pand	%xmm0,%xmm4
+	pand	%xmm0,%xmm5
+	pxor	%xmm0,%xmm0
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqu	%xmm4,-32(%rdi)
+	movdqu	%xmm5,-16(%rdi)
+	addq	$32,%r9
+	jnz	.Lsqr8x_cond_copy
+
+	movq	$1,%rax
+	movq	-48(%rsi),%r15
+.cfi_restore	%r15
+	movq	-40(%rsi),%r14
+.cfi_restore	%r14
+	movq	-32(%rsi),%r13
+.cfi_restore	%r13
+	movq	-24(%rsi),%r12
+.cfi_restore	%r12
+	movq	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	leaq	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
+.Lsqr8x_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	bn_sqr8x_mont,.-bn_sqr8x_mont
+.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	16
+#endif
diff --git a/third_party/boringssl/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
new file mode 100644
index 0000000..208b1dc
--- /dev/null
+++ b/third_party/boringssl/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
@@ -0,0 +1,2393 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.extern	OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+
+.globl	bn_mul_mont_gather5
+.hidden bn_mul_mont_gather5
+.type	bn_mul_mont_gather5,@function
+.align	64
+bn_mul_mont_gather5:
+.cfi_startproc	
+	movl	%r9d,%r9d
+	movq	%rsp,%rax
+.cfi_def_cfa_register	%rax
+	testl	$7,%r9d
+	jnz	.Lmul_enter
+	jmp	.Lmul4x_enter
+
+.align	16
+.Lmul_enter:
+	movd	8(%rsp),%xmm5
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+
+	negq	%r9
+	movq	%rsp,%r11
+	leaq	-280(%rsp,%r9,8),%r10
+	negq	%r9
+	andq	$-1024,%r10
+
+
+
+
+
+
+
+
+
+	subq	%r10,%r11
+	andq	$-4096,%r11
+	leaq	(%r10,%r11,1),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	.Lmul_page_walk
+	jmp	.Lmul_page_walk_done
+
+.Lmul_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	.Lmul_page_walk
+.Lmul_page_walk_done:
+
+	leaq	.Linc(%rip),%r10
+	movq	%rax,8(%rsp,%r9,8)
+.cfi_escape	0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
+.Lmul_body:
+
+	leaq	128(%rdx),%r12
+	movdqa	0(%r10),%xmm0
+	movdqa	16(%r10),%xmm1
+	leaq	24-112(%rsp,%r9,8),%r10
+	andq	$-16,%r10
+
+	pshufd	$0,%xmm5,%xmm5
+	movdqa	%xmm1,%xmm4
+	movdqa	%xmm1,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+.byte	0x67
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,112(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,128(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,144(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,160(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,176(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,192(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,208(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,224(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,240(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,256(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,272(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,288(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,304(%r10)
+
+	paddd	%xmm2,%xmm3
+.byte	0x67
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,320(%r10)
+
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,336(%r10)
+	pand	64(%r12),%xmm0
+
+	pand	80(%r12),%xmm1
+	pand	96(%r12),%xmm2
+	movdqa	%xmm3,352(%r10)
+	pand	112(%r12),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	-128(%r12),%xmm4
+	movdqa	-112(%r12),%xmm5
+	movdqa	-96(%r12),%xmm2
+	pand	112(%r10),%xmm4
+	movdqa	-80(%r12),%xmm3
+	pand	128(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	144(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	160(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	-64(%r12),%xmm4
+	movdqa	-48(%r12),%xmm5
+	movdqa	-32(%r12),%xmm2
+	pand	176(%r10),%xmm4
+	movdqa	-16(%r12),%xmm3
+	pand	192(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	208(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	224(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	0(%r12),%xmm4
+	movdqa	16(%r12),%xmm5
+	movdqa	32(%r12),%xmm2
+	pand	240(%r10),%xmm4
+	movdqa	48(%r12),%xmm3
+	pand	256(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	272(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	288(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	por	%xmm1,%xmm0
+	pshufd	$0x4e,%xmm0,%xmm1
+	por	%xmm1,%xmm0
+	leaq	256(%r12),%r12
+.byte	102,72,15,126,195
+
+	movq	(%r8),%r8
+	movq	(%rsi),%rax
+
+	xorq	%r14,%r14
+	xorq	%r15,%r15
+
+	movq	%r8,%rbp
+	mulq	%rbx
+	movq	%rax,%r10
+	movq	(%rcx),%rax
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r13
+
+	leaq	1(%r15),%r15
+	jmp	.L1st_enter
+
+.align	16
+.L1st:
+	addq	%rax,%r13
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	movq	%r10,%r11
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+.L1st_enter:
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	leaq	1(%r15),%r15
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	cmpq	%r9,%r15
+	jne	.L1st
+
+
+	addq	%rax,%r13
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r9,8)
+	movq	%rdx,%r13
+	movq	%r10,%r11
+
+	xorq	%rdx,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r9,8)
+	movq	%rdx,(%rsp,%r9,8)
+
+	leaq	1(%r14),%r14
+	jmp	.Louter
+.align	16
+.Louter:
+	leaq	24+128(%rsp,%r9,8),%rdx
+	andq	$-16,%rdx
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	movdqa	-128(%r12),%xmm0
+	movdqa	-112(%r12),%xmm1
+	movdqa	-96(%r12),%xmm2
+	movdqa	-80(%r12),%xmm3
+	pand	-128(%rdx),%xmm0
+	pand	-112(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	-96(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	-80(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	-64(%r12),%xmm0
+	movdqa	-48(%r12),%xmm1
+	movdqa	-32(%r12),%xmm2
+	movdqa	-16(%r12),%xmm3
+	pand	-64(%rdx),%xmm0
+	pand	-48(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	-32(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	-16(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	0(%r12),%xmm0
+	movdqa	16(%r12),%xmm1
+	movdqa	32(%r12),%xmm2
+	movdqa	48(%r12),%xmm3
+	pand	0(%rdx),%xmm0
+	pand	16(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	32(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	48(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	64(%r12),%xmm0
+	movdqa	80(%r12),%xmm1
+	movdqa	96(%r12),%xmm2
+	movdqa	112(%r12),%xmm3
+	pand	64(%rdx),%xmm0
+	pand	80(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	96(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	112(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	por	%xmm5,%xmm4
+	pshufd	$0x4e,%xmm4,%xmm0
+	por	%xmm4,%xmm0
+	leaq	256(%r12),%r12
+
+	movq	(%rsi),%rax
+.byte	102,72,15,126,195
+
+	xorq	%r15,%r15
+	movq	%r8,%rbp
+	movq	(%rsp),%r10
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx),%rax
+	adcq	$0,%rdx
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	8(%rsp),%r10
+	movq	%rdx,%r13
+
+	leaq	1(%r15),%r15
+	jmp	.Linner_enter
+
+.align	16
+.Linner:
+	addq	%rax,%r13
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	movq	(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+.Linner_enter:
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	leaq	1(%r15),%r15
+
+	mulq	%rbp
+	cmpq	%r9,%r15
+	jne	.Linner
+
+	addq	%rax,%r13
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	movq	(%rsp,%r9,8),%r10
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r9,8)
+	movq	%rdx,%r13
+
+	xorq	%rdx,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r9,8)
+	movq	%rdx,(%rsp,%r9,8)
+
+	leaq	1(%r14),%r14
+	cmpq	%r9,%r14
+	jb	.Louter
+
+	xorq	%r14,%r14
+	movq	(%rsp),%rax
+	leaq	(%rsp),%rsi
+	movq	%r9,%r15
+	jmp	.Lsub
+.align	16
+.Lsub:
+	sbbq	(%rcx,%r14,8),%rax
+	movq	%rax,(%rdi,%r14,8)
+	movq	8(%rsi,%r14,8),%rax
+	leaq	1(%r14),%r14
+	decq	%r15
+	jnz	.Lsub
+
+	sbbq	$0,%rax
+	xorq	%r14,%r14
+	andq	%rax,%rsi
+	notq	%rax
+	movq	%rdi,%rcx
+	andq	%rax,%rcx
+	movq	%r9,%r15
+	orq	%rcx,%rsi
+.align	16
+.Lcopy:
+	movq	(%rsi,%r14,8),%rax
+	movq	%r14,(%rsp,%r14,8)
+	movq	%rax,(%rdi,%r14,8)
+	leaq	1(%r14),%r14
+	subq	$1,%r15
+	jnz	.Lcopy
+
+	movq	8(%rsp,%r9,8),%rsi
+.cfi_def_cfa	%rsi,8
+	movq	$1,%rax
+
+	movq	-48(%rsi),%r15
+.cfi_restore	%r15
+	movq	-40(%rsi),%r14
+.cfi_restore	%r14
+	movq	-32(%rsi),%r13
+.cfi_restore	%r13
+	movq	-24(%rsi),%r12
+.cfi_restore	%r12
+	movq	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	leaq	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
+.Lmul_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	bn_mul_mont_gather5,.-bn_mul_mont_gather5
+.type	bn_mul4x_mont_gather5,@function
+.align	32
+bn_mul4x_mont_gather5:
+.cfi_startproc	
+.byte	0x67
+	movq	%rsp,%rax
+.cfi_def_cfa_register	%rax
+.Lmul4x_enter:
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+.Lmul4x_prologue:
+
+.byte	0x67
+	shll	$3,%r9d
+	leaq	(%r9,%r9,2),%r10
+	negq	%r9
+
+
+
+
+
+
+
+
+
+
+	leaq	-320(%rsp,%r9,2),%r11
+	movq	%rsp,%rbp
+	subq	%rdi,%r11
+	andq	$4095,%r11
+	cmpq	%r11,%r10
+	jb	.Lmul4xsp_alt
+	subq	%r11,%rbp
+	leaq	-320(%rbp,%r9,2),%rbp
+	jmp	.Lmul4xsp_done
+
+.align	32
+.Lmul4xsp_alt:
+	leaq	4096-320(,%r9,2),%r10
+	leaq	-320(%rbp,%r9,2),%rbp
+	subq	%r10,%r11
+	movq	$0,%r10
+	cmovcq	%r10,%r11
+	subq	%r11,%rbp
+.Lmul4xsp_done:
+	andq	$-64,%rbp
+	movq	%rsp,%r11
+	subq	%rbp,%r11
+	andq	$-4096,%r11
+	leaq	(%r11,%rbp,1),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	.Lmul4x_page_walk
+	jmp	.Lmul4x_page_walk_done
+
+.Lmul4x_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	.Lmul4x_page_walk
+.Lmul4x_page_walk_done:
+
+	negq	%r9
+
+	movq	%rax,40(%rsp)
+.cfi_escape	0x0f,0x05,0x77,0x28,0x06,0x23,0x08
+.Lmul4x_body:
+
+	call	mul4x_internal
+
+	movq	40(%rsp),%rsi
+.cfi_def_cfa	%rsi,8
+	movq	$1,%rax
+
+	movq	-48(%rsi),%r15
+.cfi_restore	%r15
+	movq	-40(%rsi),%r14
+.cfi_restore	%r14
+	movq	-32(%rsi),%r13
+.cfi_restore	%r13
+	movq	-24(%rsi),%r12
+.cfi_restore	%r12
+	movq	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	leaq	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
+.Lmul4x_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
+
+.type	mul4x_internal,@function
+.align	32
+mul4x_internal:
+	shlq	$5,%r9
+	movd	8(%rax),%xmm5
+	leaq	.Linc(%rip),%rax
+	leaq	128(%rdx,%r9,1),%r13
+	shrq	$5,%r9
+	movdqa	0(%rax),%xmm0
+	movdqa	16(%rax),%xmm1
+	leaq	88-112(%rsp,%r9,1),%r10
+	leaq	128(%rdx),%r12
+
+	pshufd	$0,%xmm5,%xmm5
+	movdqa	%xmm1,%xmm4
+.byte	0x67,0x67
+	movdqa	%xmm1,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+.byte	0x67
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,112(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,128(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,144(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,160(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,176(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,192(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,208(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,224(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,240(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,256(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,272(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,288(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,304(%r10)
+
+	paddd	%xmm2,%xmm3
+.byte	0x67
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,320(%r10)
+
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,336(%r10)
+	pand	64(%r12),%xmm0
+
+	pand	80(%r12),%xmm1
+	pand	96(%r12),%xmm2
+	movdqa	%xmm3,352(%r10)
+	pand	112(%r12),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	-128(%r12),%xmm4
+	movdqa	-112(%r12),%xmm5
+	movdqa	-96(%r12),%xmm2
+	pand	112(%r10),%xmm4
+	movdqa	-80(%r12),%xmm3
+	pand	128(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	144(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	160(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	-64(%r12),%xmm4
+	movdqa	-48(%r12),%xmm5
+	movdqa	-32(%r12),%xmm2
+	pand	176(%r10),%xmm4
+	movdqa	-16(%r12),%xmm3
+	pand	192(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	208(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	224(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	0(%r12),%xmm4
+	movdqa	16(%r12),%xmm5
+	movdqa	32(%r12),%xmm2
+	pand	240(%r10),%xmm4
+	movdqa	48(%r12),%xmm3
+	pand	256(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	272(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	288(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	por	%xmm1,%xmm0
+	pshufd	$0x4e,%xmm0,%xmm1
+	por	%xmm1,%xmm0
+	leaq	256(%r12),%r12
+.byte	102,72,15,126,195
+
+	movq	%r13,16+8(%rsp)
+	movq	%rdi,56+8(%rsp)
+
+	movq	(%r8),%r8
+	movq	(%rsi),%rax
+	leaq	(%rsi,%r9,1),%rsi
+	negq	%r9
+
+	movq	%r8,%rbp
+	mulq	%rbx
+	movq	%rax,%r10
+	movq	(%rcx),%rax
+
+	imulq	%r10,%rbp
+	leaq	64+8(%rsp),%r14
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	32(%r9),%r15
+	leaq	32(%rcx),%rcx
+	adcq	$0,%rdx
+	movq	%rdi,(%r14)
+	movq	%rdx,%r13
+	jmp	.L1st4x
+
+.align	32
+.L1st4x:
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx),%rax
+	leaq	32(%r14),%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%r14)
+	movq	%rdx,%r13
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	0(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	8(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	32(%rcx),%rcx
+	adcq	$0,%rdx
+	movq	%rdi,(%r14)
+	movq	%rdx,%r13
+
+	addq	$32,%r15
+	jnz	.L1st4x
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx),%rax
+	leaq	32(%r14),%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%r14)
+	movq	%rdx,%r13
+
+	leaq	(%rcx,%r9,1),%rcx
+
+	xorq	%rdi,%rdi
+	addq	%r10,%r13
+	adcq	$0,%rdi
+	movq	%r13,-8(%r14)
+
+	jmp	.Louter4x
+
+.align	32
+.Louter4x:
+	leaq	16+128(%r14),%rdx
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	movdqa	-128(%r12),%xmm0
+	movdqa	-112(%r12),%xmm1
+	movdqa	-96(%r12),%xmm2
+	movdqa	-80(%r12),%xmm3
+	pand	-128(%rdx),%xmm0
+	pand	-112(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	-96(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	-80(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	-64(%r12),%xmm0
+	movdqa	-48(%r12),%xmm1
+	movdqa	-32(%r12),%xmm2
+	movdqa	-16(%r12),%xmm3
+	pand	-64(%rdx),%xmm0
+	pand	-48(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	-32(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	-16(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	0(%r12),%xmm0
+	movdqa	16(%r12),%xmm1
+	movdqa	32(%r12),%xmm2
+	movdqa	48(%r12),%xmm3
+	pand	0(%rdx),%xmm0
+	pand	16(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	32(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	48(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	64(%r12),%xmm0
+	movdqa	80(%r12),%xmm1
+	movdqa	96(%r12),%xmm2
+	movdqa	112(%r12),%xmm3
+	pand	64(%rdx),%xmm0
+	pand	80(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	96(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	112(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	por	%xmm5,%xmm4
+	pshufd	$0x4e,%xmm4,%xmm0
+	por	%xmm4,%xmm0
+	leaq	256(%r12),%r12
+.byte	102,72,15,126,195
+
+	movq	(%r14,%r9,1),%r10
+	movq	%r8,%rbp
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx),%rax
+	adcq	$0,%rdx
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+	movq	%rdi,(%r14)
+
+	leaq	(%r14,%r9,1),%r14
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	addq	8(%r14),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	32(%r9),%r15
+	leaq	32(%rcx),%rcx
+	adcq	$0,%rdx
+	movq	%rdx,%r13
+	jmp	.Linner4x
+
+.align	32
+.Linner4x:
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx),%rax
+	adcq	$0,%rdx
+	addq	16(%r14),%r10
+	leaq	32(%r14),%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%rdi,-32(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx),%rax
+	adcq	$0,%rdx
+	addq	-8(%r14),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%r13,-24(%r14)
+	movq	%rdx,%r13
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	0(%rcx),%rax
+	adcq	$0,%rdx
+	addq	(%r14),%r10
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	8(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%rdi,-16(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	addq	8(%r14),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	32(%rcx),%rcx
+	adcq	$0,%rdx
+	movq	%r13,-8(%r14)
+	movq	%rdx,%r13
+
+	addq	$32,%r15
+	jnz	.Linner4x
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx),%rax
+	adcq	$0,%rdx
+	addq	16(%r14),%r10
+	leaq	32(%r14),%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%rdi,-32(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	%rbp,%rax
+	movq	-8(%rcx),%rbp
+	adcq	$0,%rdx
+	addq	-8(%r14),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%r13,-24(%r14)
+	movq	%rdx,%r13
+
+	movq	%rdi,-16(%r14)
+	leaq	(%rcx,%r9,1),%rcx
+
+	xorq	%rdi,%rdi
+	addq	%r10,%r13
+	adcq	$0,%rdi
+	addq	(%r14),%r13
+	adcq	$0,%rdi
+	movq	%r13,-8(%r14)
+
+	cmpq	16+8(%rsp),%r12
+	jb	.Louter4x
+	xorq	%rax,%rax
+	subq	%r13,%rbp
+	adcq	%r15,%r15
+	orq	%r15,%rdi
+	subq	%rdi,%rax
+	leaq	(%r14,%r9,1),%rbx
+	movq	(%rcx),%r12
+	leaq	(%rcx),%rbp
+	movq	%r9,%rcx
+	sarq	$3+2,%rcx
+	movq	56+8(%rsp),%rdi
+	decq	%r12
+	xorq	%r10,%r10
+	movq	8(%rbp),%r13
+	movq	16(%rbp),%r14
+	movq	24(%rbp),%r15
+	jmp	.Lsqr4x_sub_entry
+.size	mul4x_internal,.-mul4x_internal
+.globl	bn_power5
+.hidden bn_power5
+.type	bn_power5,@function
+.align	32
+bn_power5:
+.cfi_startproc	
+	movq	%rsp,%rax
+.cfi_def_cfa_register	%rax
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+.Lpower5_prologue:
+
+	shll	$3,%r9d
+	leal	(%r9,%r9,2),%r10d
+	negq	%r9
+	movq	(%r8),%r8
+
+
+
+
+
+
+
+
+	leaq	-320(%rsp,%r9,2),%r11
+	movq	%rsp,%rbp
+	subq	%rdi,%r11
+	andq	$4095,%r11
+	cmpq	%r11,%r10
+	jb	.Lpwr_sp_alt
+	subq	%r11,%rbp
+	leaq	-320(%rbp,%r9,2),%rbp
+	jmp	.Lpwr_sp_done
+
+.align	32
+.Lpwr_sp_alt:
+	leaq	4096-320(,%r9,2),%r10
+	leaq	-320(%rbp,%r9,2),%rbp
+	subq	%r10,%r11
+	movq	$0,%r10
+	cmovcq	%r10,%r11
+	subq	%r11,%rbp
+.Lpwr_sp_done:
+	andq	$-64,%rbp
+	movq	%rsp,%r11
+	subq	%rbp,%r11
+	andq	$-4096,%r11
+	leaq	(%r11,%rbp,1),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	.Lpwr_page_walk
+	jmp	.Lpwr_page_walk_done
+
+.Lpwr_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	.Lpwr_page_walk
+.Lpwr_page_walk_done:
+
+	movq	%r9,%r10
+	negq	%r9
+
+
+
+
+
+
+
+
+
+
+	movq	%r8,32(%rsp)
+	movq	%rax,40(%rsp)
+.cfi_escape	0x0f,0x05,0x77,0x28,0x06,0x23,0x08
+.Lpower5_body:
+.byte	102,72,15,110,207
+.byte	102,72,15,110,209
+.byte	102,73,15,110,218
+.byte	102,72,15,110,226
+
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+
+.byte	102,72,15,126,209
+.byte	102,72,15,126,226
+	movq	%rsi,%rdi
+	movq	40(%rsp),%rax
+	leaq	32(%rsp),%r8
+
+	call	mul4x_internal
+
+	movq	40(%rsp),%rsi
+.cfi_def_cfa	%rsi,8
+	movq	$1,%rax
+	movq	-48(%rsi),%r15
+.cfi_restore	%r15
+	movq	-40(%rsi),%r14
+.cfi_restore	%r14
+	movq	-32(%rsi),%r13
+.cfi_restore	%r13
+	movq	-24(%rsi),%r12
+.cfi_restore	%r12
+	movq	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	leaq	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
+.Lpower5_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	bn_power5,.-bn_power5
+
+.globl	bn_sqr8x_internal
+.hidden bn_sqr8x_internal
+.hidden	bn_sqr8x_internal
+.type	bn_sqr8x_internal,@function
+.align	32
+bn_sqr8x_internal:
+__bn_sqr8x_internal:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	leaq	32(%r10),%rbp
+	leaq	(%rsi,%r9,1),%rsi
+
+	movq	%r9,%rcx
+
+
+	movq	-32(%rsi,%rbp,1),%r14
+	leaq	48+8(%rsp,%r9,2),%rdi
+	movq	-24(%rsi,%rbp,1),%rax
+	leaq	-32(%rdi,%rbp,1),%rdi
+	movq	-16(%rsi,%rbp,1),%rbx
+	movq	%rax,%r15
+
+	mulq	%r14
+	movq	%rax,%r10
+	movq	%rbx,%rax
+	movq	%rdx,%r11
+	movq	%r10,-24(%rdi,%rbp,1)
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	movq	%r11,-16(%rdi,%rbp,1)
+	movq	%rdx,%r10
+
+
+	movq	-8(%rsi,%rbp,1),%rbx
+	mulq	%r15
+	movq	%rax,%r12
+	movq	%rbx,%rax
+	movq	%rdx,%r13
+
+	leaq	(%rbp),%rcx
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	addq	%r12,%r10
+	adcq	$0,%r11
+	movq	%r10,-8(%rdi,%rcx,1)
+	jmp	.Lsqr4x_1st
+
+.align	32
+.Lsqr4x_1st:
+	movq	(%rsi,%rcx,1),%rbx
+	mulq	%r15
+	addq	%rax,%r13
+	movq	%rbx,%rax
+	movq	%rdx,%r12
+	adcq	$0,%r12
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	movq	8(%rsi,%rcx,1),%rbx
+	movq	%rdx,%r10
+	adcq	$0,%r10
+	addq	%r13,%r11
+	adcq	$0,%r10
+
+
+	mulq	%r15
+	addq	%rax,%r12
+	movq	%rbx,%rax
+	movq	%r11,(%rdi,%rcx,1)
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	movq	16(%rsi,%rcx,1),%rbx
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	addq	%r12,%r10
+	adcq	$0,%r11
+
+	mulq	%r15
+	addq	%rax,%r13
+	movq	%rbx,%rax
+	movq	%r10,8(%rdi,%rcx,1)
+	movq	%rdx,%r12
+	adcq	$0,%r12
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	movq	24(%rsi,%rcx,1),%rbx
+	movq	%rdx,%r10
+	adcq	$0,%r10
+	addq	%r13,%r11
+	adcq	$0,%r10
+
+
+	mulq	%r15
+	addq	%rax,%r12
+	movq	%rbx,%rax
+	movq	%r11,16(%rdi,%rcx,1)
+	movq	%rdx,%r13
+	adcq	$0,%r13
+	leaq	32(%rcx),%rcx
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	addq	%r12,%r10
+	adcq	$0,%r11
+	movq	%r10,-8(%rdi,%rcx,1)
+
+	cmpq	$0,%rcx
+	jne	.Lsqr4x_1st
+
+	mulq	%r15
+	addq	%rax,%r13
+	leaq	16(%rbp),%rbp
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+
+	movq	%r13,(%rdi)
+	movq	%rdx,%r12
+	movq	%rdx,8(%rdi)
+	jmp	.Lsqr4x_outer
+
+.align	32
+.Lsqr4x_outer:
+	movq	-32(%rsi,%rbp,1),%r14
+	leaq	48+8(%rsp,%r9,2),%rdi
+	movq	-24(%rsi,%rbp,1),%rax
+	leaq	-32(%rdi,%rbp,1),%rdi
+	movq	-16(%rsi,%rbp,1),%rbx
+	movq	%rax,%r15
+
+	mulq	%r14
+	movq	-24(%rdi,%rbp,1),%r10
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	movq	%r10,-24(%rdi,%rbp,1)
+	movq	%rdx,%r11
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	addq	-16(%rdi,%rbp,1),%r11
+	movq	%rdx,%r10
+	adcq	$0,%r10
+	movq	%r11,-16(%rdi,%rbp,1)
+
+	xorq	%r12,%r12
+
+	movq	-8(%rsi,%rbp,1),%rbx
+	mulq	%r15
+	addq	%rax,%r12
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	addq	-8(%rdi,%rbp,1),%r12
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	addq	%r12,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	movq	%r10,-8(%rdi,%rbp,1)
+
+	leaq	(%rbp),%rcx
+	jmp	.Lsqr4x_inner
+
+.align	32
+.Lsqr4x_inner:
+	movq	(%rsi,%rcx,1),%rbx
+	mulq	%r15
+	addq	%rax,%r13
+	movq	%rbx,%rax
+	movq	%rdx,%r12
+	adcq	$0,%r12
+	addq	(%rdi,%rcx,1),%r13
+	adcq	$0,%r12
+
+.byte	0x67
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	movq	8(%rsi,%rcx,1),%rbx
+	movq	%rdx,%r10
+	adcq	$0,%r10
+	addq	%r13,%r11
+	adcq	$0,%r10
+
+	mulq	%r15
+	addq	%rax,%r12
+	movq	%r11,(%rdi,%rcx,1)
+	movq	%rbx,%rax
+	movq	%rdx,%r13
+	adcq	$0,%r13
+	addq	8(%rdi,%rcx,1),%r12
+	leaq	16(%rcx),%rcx
+	adcq	$0,%r13
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	addq	%r12,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	movq	%r10,-8(%rdi,%rcx,1)
+
+	cmpq	$0,%rcx
+	jne	.Lsqr4x_inner
+
+.byte	0x67
+	mulq	%r15
+	addq	%rax,%r13
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+
+	movq	%r13,(%rdi)
+	movq	%rdx,%r12
+	movq	%rdx,8(%rdi)
+
+	addq	$16,%rbp
+	jnz	.Lsqr4x_outer
+
+
+	movq	-32(%rsi),%r14
+	leaq	48+8(%rsp,%r9,2),%rdi
+	movq	-24(%rsi),%rax
+	leaq	-32(%rdi,%rbp,1),%rdi
+	movq	-16(%rsi),%rbx
+	movq	%rax,%r15
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	movq	%rdx,%r11
+	adcq	$0,%r11
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	movq	%r10,-24(%rdi)
+	movq	%rdx,%r10
+	adcq	$0,%r10
+	addq	%r13,%r11
+	movq	-8(%rsi),%rbx
+	adcq	$0,%r10
+
+	mulq	%r15
+	addq	%rax,%r12
+	movq	%rbx,%rax
+	movq	%r11,-16(%rdi)
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	addq	%r12,%r10
+	adcq	$0,%r11
+	movq	%r10,-8(%rdi)
+
+	mulq	%r15
+	addq	%rax,%r13
+	movq	-16(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+
+	movq	%r13,(%rdi)
+	movq	%rdx,%r12
+	movq	%rdx,8(%rdi)
+
+	mulq	%rbx
+	addq	$16,%rbp
+	xorq	%r14,%r14
+	subq	%r9,%rbp
+	xorq	%r15,%r15
+
+	addq	%r12,%rax
+	adcq	$0,%rdx
+	movq	%rax,8(%rdi)
+	movq	%rdx,16(%rdi)
+	movq	%r15,24(%rdi)
+
+	movq	-16(%rsi,%rbp,1),%rax
+	leaq	48+8(%rsp),%rdi
+	xorq	%r10,%r10
+	movq	8(%rdi),%r11
+
+	leaq	(%r14,%r10,2),%r12
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r13
+	shrq	$63,%r11
+	orq	%r10,%r13
+	movq	16(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	24(%rdi),%r11
+	adcq	%rax,%r12
+	movq	-8(%rsi,%rbp,1),%rax
+	movq	%r12,(%rdi)
+	adcq	%rdx,%r13
+
+	leaq	(%r14,%r10,2),%rbx
+	movq	%r13,8(%rdi)
+	sbbq	%r15,%r15
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r8
+	shrq	$63,%r11
+	orq	%r10,%r8
+	movq	32(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	40(%rdi),%r11
+	adcq	%rax,%rbx
+	movq	0(%rsi,%rbp,1),%rax
+	movq	%rbx,16(%rdi)
+	adcq	%rdx,%r8
+	leaq	16(%rbp),%rbp
+	movq	%r8,24(%rdi)
+	sbbq	%r15,%r15
+	leaq	64(%rdi),%rdi
+	jmp	.Lsqr4x_shift_n_add
+
+.align	32
+.Lsqr4x_shift_n_add:
+	leaq	(%r14,%r10,2),%r12
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r13
+	shrq	$63,%r11
+	orq	%r10,%r13
+	movq	-16(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	-8(%rdi),%r11
+	adcq	%rax,%r12
+	movq	-8(%rsi,%rbp,1),%rax
+	movq	%r12,-32(%rdi)
+	adcq	%rdx,%r13
+
+	leaq	(%r14,%r10,2),%rbx
+	movq	%r13,-24(%rdi)
+	sbbq	%r15,%r15
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r8
+	shrq	$63,%r11
+	orq	%r10,%r8
+	movq	0(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	8(%rdi),%r11
+	adcq	%rax,%rbx
+	movq	0(%rsi,%rbp,1),%rax
+	movq	%rbx,-16(%rdi)
+	adcq	%rdx,%r8
+
+	leaq	(%r14,%r10,2),%r12
+	movq	%r8,-8(%rdi)
+	sbbq	%r15,%r15
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r13
+	shrq	$63,%r11
+	orq	%r10,%r13
+	movq	16(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	24(%rdi),%r11
+	adcq	%rax,%r12
+	movq	8(%rsi,%rbp,1),%rax
+	movq	%r12,0(%rdi)
+	adcq	%rdx,%r13
+
+	leaq	(%r14,%r10,2),%rbx
+	movq	%r13,8(%rdi)
+	sbbq	%r15,%r15
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r8
+	shrq	$63,%r11
+	orq	%r10,%r8
+	movq	32(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	40(%rdi),%r11
+	adcq	%rax,%rbx
+	movq	16(%rsi,%rbp,1),%rax
+	movq	%rbx,16(%rdi)
+	adcq	%rdx,%r8
+	movq	%r8,24(%rdi)
+	sbbq	%r15,%r15
+	leaq	64(%rdi),%rdi
+	addq	$32,%rbp
+	jnz	.Lsqr4x_shift_n_add
+
+	leaq	(%r14,%r10,2),%r12
+.byte	0x67
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r13
+	shrq	$63,%r11
+	orq	%r10,%r13
+	movq	-16(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	-8(%rdi),%r11
+	adcq	%rax,%r12
+	movq	-8(%rsi),%rax
+	movq	%r12,-32(%rdi)
+	adcq	%rdx,%r13
+
+	leaq	(%r14,%r10,2),%rbx
+	movq	%r13,-24(%rdi)
+	sbbq	%r15,%r15
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r8
+	shrq	$63,%r11
+	orq	%r10,%r8
+	mulq	%rax
+	negq	%r15
+	adcq	%rax,%rbx
+	adcq	%rdx,%r8
+	movq	%rbx,-16(%rdi)
+	movq	%r8,-8(%rdi)
+.byte	102,72,15,126,213
+__bn_sqr8x_reduction:
+	xorq	%rax,%rax
+	leaq	(%r9,%rbp,1),%rcx
+	leaq	48+8(%rsp,%r9,2),%rdx
+	movq	%rcx,0+8(%rsp)
+	leaq	48+8(%rsp,%r9,1),%rdi
+	movq	%rdx,8+8(%rsp)
+	negq	%r9
+	jmp	.L8x_reduction_loop
+
+.align	32
+.L8x_reduction_loop:
+	leaq	(%rdi,%r9,1),%rdi
+.byte	0x66
+	movq	0(%rdi),%rbx
+	movq	8(%rdi),%r9
+	movq	16(%rdi),%r10
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%r12
+	movq	40(%rdi),%r13
+	movq	48(%rdi),%r14
+	movq	56(%rdi),%r15
+	movq	%rax,(%rdx)
+	leaq	64(%rdi),%rdi
+
+.byte	0x67
+	movq	%rbx,%r8
+	imulq	32+8(%rsp),%rbx
+	movq	0(%rbp),%rax
+	movl	$8,%ecx
+	jmp	.L8x_reduce
+
+.align	32
+.L8x_reduce:
+	mulq	%rbx
+	movq	8(%rbp),%rax
+	negq	%r8
+	movq	%rdx,%r8
+	adcq	$0,%r8
+
+	mulq	%rbx
+	addq	%rax,%r9
+	movq	16(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r9,%r8
+	movq	%rbx,48-8+8(%rsp,%rcx,8)
+	movq	%rdx,%r9
+	adcq	$0,%r9
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	24(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r9
+	movq	32+8(%rsp),%rsi
+	movq	%rdx,%r10
+	adcq	$0,%r10
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	32(%rbp),%rax
+	adcq	$0,%rdx
+	imulq	%r8,%rsi
+	addq	%r11,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+
+	mulq	%rbx
+	addq	%rax,%r12
+	movq	40(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r12,%r11
+	movq	%rdx,%r12
+	adcq	$0,%r12
+
+	mulq	%rbx
+	addq	%rax,%r13
+	movq	48(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r13,%r12
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+	mulq	%rbx
+	addq	%rax,%r14
+	movq	56(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r14,%r13
+	movq	%rdx,%r14
+	adcq	$0,%r14
+
+	mulq	%rbx
+	movq	%rsi,%rbx
+	addq	%rax,%r15
+	movq	0(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r15,%r14
+	movq	%rdx,%r15
+	adcq	$0,%r15
+
+	decl	%ecx
+	jnz	.L8x_reduce
+
+	leaq	64(%rbp),%rbp
+	xorq	%rax,%rax
+	movq	8+8(%rsp),%rdx
+	cmpq	0+8(%rsp),%rbp
+	jae	.L8x_no_tail
+
+.byte	0x66
+	addq	0(%rdi),%r8
+	adcq	8(%rdi),%r9
+	adcq	16(%rdi),%r10
+	adcq	24(%rdi),%r11
+	adcq	32(%rdi),%r12
+	adcq	40(%rdi),%r13
+	adcq	48(%rdi),%r14
+	adcq	56(%rdi),%r15
+	sbbq	%rsi,%rsi
+
+	movq	48+56+8(%rsp),%rbx
+	movl	$8,%ecx
+	movq	0(%rbp),%rax
+	jmp	.L8x_tail
+
+.align	32
+.L8x_tail:
+	mulq	%rbx
+	addq	%rax,%r8
+	movq	8(%rbp),%rax
+	movq	%r8,(%rdi)
+	movq	%rdx,%r8
+	adcq	$0,%r8
+
+	mulq	%rbx
+	addq	%rax,%r9
+	movq	16(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r9,%r8
+	leaq	8(%rdi),%rdi
+	movq	%rdx,%r9
+	adcq	$0,%r9
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	24(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r9
+	movq	%rdx,%r10
+	adcq	$0,%r10
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	32(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+
+	mulq	%rbx
+	addq	%rax,%r12
+	movq	40(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r12,%r11
+	movq	%rdx,%r12
+	adcq	$0,%r12
+
+	mulq	%rbx
+	addq	%rax,%r13
+	movq	48(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r13,%r12
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+	mulq	%rbx
+	addq	%rax,%r14
+	movq	56(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r14,%r13
+	movq	%rdx,%r14
+	adcq	$0,%r14
+
+	mulq	%rbx
+	movq	48-16+8(%rsp,%rcx,8),%rbx
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	addq	%r15,%r14
+	movq	0(%rbp),%rax
+	movq	%rdx,%r15
+	adcq	$0,%r15
+
+	decl	%ecx
+	jnz	.L8x_tail
+
+	leaq	64(%rbp),%rbp
+	movq	8+8(%rsp),%rdx
+	cmpq	0+8(%rsp),%rbp
+	jae	.L8x_tail_done
+
+	movq	48+56+8(%rsp),%rbx
+	negq	%rsi
+	movq	0(%rbp),%rax
+	adcq	0(%rdi),%r8
+	adcq	8(%rdi),%r9
+	adcq	16(%rdi),%r10
+	adcq	24(%rdi),%r11
+	adcq	32(%rdi),%r12
+	adcq	40(%rdi),%r13
+	adcq	48(%rdi),%r14
+	adcq	56(%rdi),%r15
+	sbbq	%rsi,%rsi
+
+	movl	$8,%ecx
+	jmp	.L8x_tail
+
+.align	32
+.L8x_tail_done:
+	xorq	%rax,%rax
+	addq	(%rdx),%r8
+	adcq	$0,%r9
+	adcq	$0,%r10
+	adcq	$0,%r11
+	adcq	$0,%r12
+	adcq	$0,%r13
+	adcq	$0,%r14
+	adcq	$0,%r15
+	adcq	$0,%rax
+
+	negq	%rsi
+.L8x_no_tail:
+	adcq	0(%rdi),%r8
+	adcq	8(%rdi),%r9
+	adcq	16(%rdi),%r10
+	adcq	24(%rdi),%r11
+	adcq	32(%rdi),%r12
+	adcq	40(%rdi),%r13
+	adcq	48(%rdi),%r14
+	adcq	56(%rdi),%r15
+	adcq	$0,%rax
+	movq	-8(%rbp),%rcx
+	xorq	%rsi,%rsi
+
+.byte	102,72,15,126,213
+
+	movq	%r8,0(%rdi)
+	movq	%r9,8(%rdi)
+.byte	102,73,15,126,217
+	movq	%r10,16(%rdi)
+	movq	%r11,24(%rdi)
+	movq	%r12,32(%rdi)
+	movq	%r13,40(%rdi)
+	movq	%r14,48(%rdi)
+	movq	%r15,56(%rdi)
+	leaq	64(%rdi),%rdi
+
+	cmpq	%rdx,%rdi
+	jb	.L8x_reduction_loop
+	.byte	0xf3,0xc3
+.size	bn_sqr8x_internal,.-bn_sqr8x_internal
+.type	__bn_post4x_internal,@function
+.align	32
+__bn_post4x_internal:
+	movq	0(%rbp),%r12
+	leaq	(%rdi,%r9,1),%rbx
+	movq	%r9,%rcx
+.byte	102,72,15,126,207
+	negq	%rax
+.byte	102,72,15,126,206
+	sarq	$3+2,%rcx
+	decq	%r12
+	xorq	%r10,%r10
+	movq	8(%rbp),%r13
+	movq	16(%rbp),%r14
+	movq	24(%rbp),%r15
+	jmp	.Lsqr4x_sub_entry
+
+.align	16
+.Lsqr4x_sub:
+	movq	0(%rbp),%r12
+	movq	8(%rbp),%r13
+	movq	16(%rbp),%r14
+	movq	24(%rbp),%r15
+.Lsqr4x_sub_entry:
+	leaq	32(%rbp),%rbp
+	notq	%r12
+	notq	%r13
+	notq	%r14
+	notq	%r15
+	andq	%rax,%r12
+	andq	%rax,%r13
+	andq	%rax,%r14
+	andq	%rax,%r15
+
+	negq	%r10
+	adcq	0(%rbx),%r12
+	adcq	8(%rbx),%r13
+	adcq	16(%rbx),%r14
+	adcq	24(%rbx),%r15
+	movq	%r12,0(%rdi)
+	leaq	32(%rbx),%rbx
+	movq	%r13,8(%rdi)
+	sbbq	%r10,%r10
+	movq	%r14,16(%rdi)
+	movq	%r15,24(%rdi)
+	leaq	32(%rdi),%rdi
+
+	incq	%rcx
+	jnz	.Lsqr4x_sub
+
+	movq	%r9,%r10
+	negq	%r9
+	.byte	0xf3,0xc3
+.size	__bn_post4x_internal,.-__bn_post4x_internal
+.globl	bn_from_montgomery
+.hidden bn_from_montgomery
+.type	bn_from_montgomery,@function
+.align	32
+bn_from_montgomery:
+	testl	$7,%r9d
+	jz	bn_from_mont8x
+	xorl	%eax,%eax
+	.byte	0xf3,0xc3
+.size	bn_from_montgomery,.-bn_from_montgomery
+
+.type	bn_from_mont8x,@function
+.align	32
+bn_from_mont8x:
+.cfi_startproc	
+.byte	0x67
+	movq	%rsp,%rax
+.cfi_def_cfa_register	%rax
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+.Lfrom_prologue:
+
+	shll	$3,%r9d
+	leaq	(%r9,%r9,2),%r10
+	negq	%r9
+	movq	(%r8),%r8
+
+
+
+
+
+
+
+
+	leaq	-320(%rsp,%r9,2),%r11
+	movq	%rsp,%rbp
+	subq	%rdi,%r11
+	andq	$4095,%r11
+	cmpq	%r11,%r10
+	jb	.Lfrom_sp_alt
+	subq	%r11,%rbp
+	leaq	-320(%rbp,%r9,2),%rbp
+	jmp	.Lfrom_sp_done
+
+.align	32
+.Lfrom_sp_alt:
+	leaq	4096-320(,%r9,2),%r10
+	leaq	-320(%rbp,%r9,2),%rbp
+	subq	%r10,%r11
+	movq	$0,%r10
+	cmovcq	%r10,%r11
+	subq	%r11,%rbp
+.Lfrom_sp_done:
+	andq	$-64,%rbp
+	movq	%rsp,%r11
+	subq	%rbp,%r11
+	andq	$-4096,%r11
+	leaq	(%r11,%rbp,1),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	.Lfrom_page_walk
+	jmp	.Lfrom_page_walk_done
+
+.Lfrom_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	.Lfrom_page_walk
+.Lfrom_page_walk_done:
+
+	movq	%r9,%r10
+	negq	%r9
+
+
+
+
+
+
+
+
+
+
+	movq	%r8,32(%rsp)
+	movq	%rax,40(%rsp)
+.cfi_escape	0x0f,0x05,0x77,0x28,0x06,0x23,0x08
+.Lfrom_body:
+	movq	%r9,%r11
+	leaq	48(%rsp),%rax
+	pxor	%xmm0,%xmm0
+	jmp	.Lmul_by_1
+
+.align	32
+.Lmul_by_1:
+	movdqu	(%rsi),%xmm1
+	movdqu	16(%rsi),%xmm2
+	movdqu	32(%rsi),%xmm3
+	movdqa	%xmm0,(%rax,%r9,1)
+	movdqu	48(%rsi),%xmm4
+	movdqa	%xmm0,16(%rax,%r9,1)
+.byte	0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
+	movdqa	%xmm1,(%rax)
+	movdqa	%xmm0,32(%rax,%r9,1)
+	movdqa	%xmm2,16(%rax)
+	movdqa	%xmm0,48(%rax,%r9,1)
+	movdqa	%xmm3,32(%rax)
+	movdqa	%xmm4,48(%rax)
+	leaq	64(%rax),%rax
+	subq	$64,%r11
+	jnz	.Lmul_by_1
+
+.byte	102,72,15,110,207
+.byte	102,72,15,110,209
+.byte	0x67
+	movq	%rcx,%rbp
+.byte	102,73,15,110,218
+	call	__bn_sqr8x_reduction
+	call	__bn_post4x_internal
+
+	pxor	%xmm0,%xmm0
+	leaq	48(%rsp),%rax
+	jmp	.Lfrom_mont_zero
+
+.align	32
+.Lfrom_mont_zero:
+	movq	40(%rsp),%rsi
+.cfi_def_cfa	%rsi,8
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm0,16(%rax)
+	movdqa	%xmm0,32(%rax)
+	movdqa	%xmm0,48(%rax)
+	leaq	64(%rax),%rax
+	subq	$32,%r9
+	jnz	.Lfrom_mont_zero
+
+	movq	$1,%rax
+	movq	-48(%rsi),%r15
+.cfi_restore	%r15
+	movq	-40(%rsi),%r14
+.cfi_restore	%r14
+	movq	-32(%rsi),%r13
+.cfi_restore	%r13
+	movq	-24(%rsi),%r12
+.cfi_restore	%r12
+	movq	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	leaq	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
+.Lfrom_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	bn_from_mont8x,.-bn_from_mont8x
+.globl	bn_scatter5
+.hidden bn_scatter5
+.type	bn_scatter5,@function
+.align	16
+bn_scatter5:
+	cmpl	$0,%esi
+	jz	.Lscatter_epilogue
+	leaq	(%rdx,%rcx,8),%rdx
+.Lscatter:
+	movq	(%rdi),%rax
+	leaq	8(%rdi),%rdi
+	movq	%rax,(%rdx)
+	leaq	256(%rdx),%rdx
+	subl	$1,%esi
+	jnz	.Lscatter
+.Lscatter_epilogue:
+	.byte	0xf3,0xc3
+.size	bn_scatter5,.-bn_scatter5
+
+.globl	bn_gather5
+.hidden bn_gather5
+.type	bn_gather5,@function
+.align	32
+bn_gather5:
+.LSEH_begin_bn_gather5:
+
+.byte	0x4c,0x8d,0x14,0x24
+.byte	0x48,0x81,0xec,0x08,0x01,0x00,0x00
+	leaq	.Linc(%rip),%rax
+	andq	$-16,%rsp
+
+	movd	%ecx,%xmm5
+	movdqa	0(%rax),%xmm0
+	movdqa	16(%rax),%xmm1
+	leaq	128(%rdx),%r11
+	leaq	128(%rsp),%rax
+
+	pshufd	$0,%xmm5,%xmm5
+	movdqa	%xmm1,%xmm4
+	movdqa	%xmm1,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm4,%xmm3
+
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,-128(%rax)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,-112(%rax)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,-96(%rax)
+	movdqa	%xmm4,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,-80(%rax)
+	movdqa	%xmm4,%xmm3
+
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,-64(%rax)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,-48(%rax)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,-32(%rax)
+	movdqa	%xmm4,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,-16(%rax)
+	movdqa	%xmm4,%xmm3
+
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,16(%rax)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,32(%rax)
+	movdqa	%xmm4,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,48(%rax)
+	movdqa	%xmm4,%xmm3
+
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,64(%rax)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,80(%rax)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,96(%rax)
+	movdqa	%xmm4,%xmm2
+	movdqa	%xmm3,112(%rax)
+	jmp	.Lgather
+
+.align	32
+.Lgather:
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	movdqa	-128(%r11),%xmm0
+	movdqa	-112(%r11),%xmm1
+	movdqa	-96(%r11),%xmm2
+	pand	-128(%rax),%xmm0
+	movdqa	-80(%r11),%xmm3
+	pand	-112(%rax),%xmm1
+	por	%xmm0,%xmm4
+	pand	-96(%rax),%xmm2
+	por	%xmm1,%xmm5
+	pand	-80(%rax),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	-64(%r11),%xmm0
+	movdqa	-48(%r11),%xmm1
+	movdqa	-32(%r11),%xmm2
+	pand	-64(%rax),%xmm0
+	movdqa	-16(%r11),%xmm3
+	pand	-48(%rax),%xmm1
+	por	%xmm0,%xmm4
+	pand	-32(%rax),%xmm2
+	por	%xmm1,%xmm5
+	pand	-16(%rax),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	0(%r11),%xmm0
+	movdqa	16(%r11),%xmm1
+	movdqa	32(%r11),%xmm2
+	pand	0(%rax),%xmm0
+	movdqa	48(%r11),%xmm3
+	pand	16(%rax),%xmm1
+	por	%xmm0,%xmm4
+	pand	32(%rax),%xmm2
+	por	%xmm1,%xmm5
+	pand	48(%rax),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	64(%r11),%xmm0
+	movdqa	80(%r11),%xmm1
+	movdqa	96(%r11),%xmm2
+	pand	64(%rax),%xmm0
+	movdqa	112(%r11),%xmm3
+	pand	80(%rax),%xmm1
+	por	%xmm0,%xmm4
+	pand	96(%rax),%xmm2
+	por	%xmm1,%xmm5
+	pand	112(%rax),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	por	%xmm5,%xmm4
+	leaq	256(%r11),%r11
+	pshufd	$0x4e,%xmm4,%xmm0
+	por	%xmm4,%xmm0
+	movq	%xmm0,(%rdi)
+	leaq	8(%rdi),%rdi
+	subl	$1,%esi
+	jnz	.Lgather
+
+	leaq	(%r10),%rsp
+	.byte	0xf3,0xc3
+.LSEH_end_bn_gather5:
+.size	bn_gather5,.-bn_gather5
+.align	64
+.Linc:
+.long	0,0, 1,1
+.long	2,2, 2,2
+.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/chacha/chacha-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/chacha/chacha-x86_64.S
new file mode 100644
index 0000000..30edc7b
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/chacha/chacha-x86_64.S
@@ -0,0 +1,1585 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+
+.p2align	6
+L$zero:
+.long	0,0,0,0
+L$one:
+.long	1,0,0,0
+L$inc:
+.long	0,1,2,3
+L$four:
+.long	4,4,4,4
+L$incy:
+.long	0,2,4,6,1,3,5,7
+L$eight:
+.long	8,8,8,8,8,8,8,8
+L$rot16:
+.byte	0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd
+L$rot24:
+.byte	0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe
+L$sigma:
+.byte	101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0
+.p2align	6
+L$zeroz:
+.long	0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0
+L$fourz:
+.long	4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0
+L$incz:
+.long	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+L$sixteen:
+.long	16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
+.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.globl	_ChaCha20_ctr32
+.private_extern _ChaCha20_ctr32
+
+.p2align	6
+_ChaCha20_ctr32:
+	cmpq	$0,%rdx
+	je	L$no_data
+	movq	_OPENSSL_ia32cap_P+4(%rip),%r10
+	testl	$512,%r10d
+	jnz	L$ChaCha20_ssse3
+
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$64+24,%rsp
+L$ctr32_body:
+
+
+	movdqu	(%rcx),%xmm1
+	movdqu	16(%rcx),%xmm2
+	movdqu	(%r8),%xmm3
+	movdqa	L$one(%rip),%xmm4
+
+
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	movq	%rdx,%rbp
+	jmp	L$oop_outer
+
+.p2align	5
+L$oop_outer:
+	movl	$0x61707865,%eax
+	movl	$0x3320646e,%ebx
+	movl	$0x79622d32,%ecx
+	movl	$0x6b206574,%edx
+	movl	16(%rsp),%r8d
+	movl	20(%rsp),%r9d
+	movl	24(%rsp),%r10d
+	movl	28(%rsp),%r11d
+	movd	%xmm3,%r12d
+	movl	52(%rsp),%r13d
+	movl	56(%rsp),%r14d
+	movl	60(%rsp),%r15d
+
+	movq	%rbp,64+0(%rsp)
+	movl	$10,%ebp
+	movq	%rsi,64+8(%rsp)
+.byte	102,72,15,126,214
+	movq	%rdi,64+16(%rsp)
+	movq	%rsi,%rdi
+	shrq	$32,%rdi
+	jmp	L$oop
+
+.p2align	5
+L$oop:
+	addl	%r8d,%eax
+	xorl	%eax,%r12d
+	roll	$16,%r12d
+	addl	%r9d,%ebx
+	xorl	%ebx,%r13d
+	roll	$16,%r13d
+	addl	%r12d,%esi
+	xorl	%esi,%r8d
+	roll	$12,%r8d
+	addl	%r13d,%edi
+	xorl	%edi,%r9d
+	roll	$12,%r9d
+	addl	%r8d,%eax
+	xorl	%eax,%r12d
+	roll	$8,%r12d
+	addl	%r9d,%ebx
+	xorl	%ebx,%r13d
+	roll	$8,%r13d
+	addl	%r12d,%esi
+	xorl	%esi,%r8d
+	roll	$7,%r8d
+	addl	%r13d,%edi
+	xorl	%edi,%r9d
+	roll	$7,%r9d
+	movl	%esi,32(%rsp)
+	movl	%edi,36(%rsp)
+	movl	40(%rsp),%esi
+	movl	44(%rsp),%edi
+	addl	%r10d,%ecx
+	xorl	%ecx,%r14d
+	roll	$16,%r14d
+	addl	%r11d,%edx
+	xorl	%edx,%r15d
+	roll	$16,%r15d
+	addl	%r14d,%esi
+	xorl	%esi,%r10d
+	roll	$12,%r10d
+	addl	%r15d,%edi
+	xorl	%edi,%r11d
+	roll	$12,%r11d
+	addl	%r10d,%ecx
+	xorl	%ecx,%r14d
+	roll	$8,%r14d
+	addl	%r11d,%edx
+	xorl	%edx,%r15d
+	roll	$8,%r15d
+	addl	%r14d,%esi
+	xorl	%esi,%r10d
+	roll	$7,%r10d
+	addl	%r15d,%edi
+	xorl	%edi,%r11d
+	roll	$7,%r11d
+	addl	%r9d,%eax
+	xorl	%eax,%r15d
+	roll	$16,%r15d
+	addl	%r10d,%ebx
+	xorl	%ebx,%r12d
+	roll	$16,%r12d
+	addl	%r15d,%esi
+	xorl	%esi,%r9d
+	roll	$12,%r9d
+	addl	%r12d,%edi
+	xorl	%edi,%r10d
+	roll	$12,%r10d
+	addl	%r9d,%eax
+	xorl	%eax,%r15d
+	roll	$8,%r15d
+	addl	%r10d,%ebx
+	xorl	%ebx,%r12d
+	roll	$8,%r12d
+	addl	%r15d,%esi
+	xorl	%esi,%r9d
+	roll	$7,%r9d
+	addl	%r12d,%edi
+	xorl	%edi,%r10d
+	roll	$7,%r10d
+	movl	%esi,40(%rsp)
+	movl	%edi,44(%rsp)
+	movl	32(%rsp),%esi
+	movl	36(%rsp),%edi
+	addl	%r11d,%ecx
+	xorl	%ecx,%r13d
+	roll	$16,%r13d
+	addl	%r8d,%edx
+	xorl	%edx,%r14d
+	roll	$16,%r14d
+	addl	%r13d,%esi
+	xorl	%esi,%r11d
+	roll	$12,%r11d
+	addl	%r14d,%edi
+	xorl	%edi,%r8d
+	roll	$12,%r8d
+	addl	%r11d,%ecx
+	xorl	%ecx,%r13d
+	roll	$8,%r13d
+	addl	%r8d,%edx
+	xorl	%edx,%r14d
+	roll	$8,%r14d
+	addl	%r13d,%esi
+	xorl	%esi,%r11d
+	roll	$7,%r11d
+	addl	%r14d,%edi
+	xorl	%edi,%r8d
+	roll	$7,%r8d
+	decl	%ebp
+	jnz	L$oop
+	movl	%edi,36(%rsp)
+	movl	%esi,32(%rsp)
+	movq	64(%rsp),%rbp
+	movdqa	%xmm2,%xmm1
+	movq	64+8(%rsp),%rsi
+	paddd	%xmm4,%xmm3
+	movq	64+16(%rsp),%rdi
+
+	addl	$0x61707865,%eax
+	addl	$0x3320646e,%ebx
+	addl	$0x79622d32,%ecx
+	addl	$0x6b206574,%edx
+	addl	16(%rsp),%r8d
+	addl	20(%rsp),%r9d
+	addl	24(%rsp),%r10d
+	addl	28(%rsp),%r11d
+	addl	48(%rsp),%r12d
+	addl	52(%rsp),%r13d
+	addl	56(%rsp),%r14d
+	addl	60(%rsp),%r15d
+	paddd	32(%rsp),%xmm1
+
+	cmpq	$64,%rbp
+	jb	L$tail
+
+	xorl	0(%rsi),%eax
+	xorl	4(%rsi),%ebx
+	xorl	8(%rsi),%ecx
+	xorl	12(%rsi),%edx
+	xorl	16(%rsi),%r8d
+	xorl	20(%rsi),%r9d
+	xorl	24(%rsi),%r10d
+	xorl	28(%rsi),%r11d
+	movdqu	32(%rsi),%xmm0
+	xorl	48(%rsi),%r12d
+	xorl	52(%rsi),%r13d
+	xorl	56(%rsi),%r14d
+	xorl	60(%rsi),%r15d
+	leaq	64(%rsi),%rsi
+	pxor	%xmm1,%xmm0
+
+	movdqa	%xmm2,32(%rsp)
+	movd	%xmm3,48(%rsp)
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	movdqu	%xmm0,32(%rdi)
+	movl	%r12d,48(%rdi)
+	movl	%r13d,52(%rdi)
+	movl	%r14d,56(%rdi)
+	movl	%r15d,60(%rdi)
+	leaq	64(%rdi),%rdi
+
+	subq	$64,%rbp
+	jnz	L$oop_outer
+
+	jmp	L$done
+
+.p2align	4
+L$tail:
+	movl	%eax,0(%rsp)
+	movl	%ebx,4(%rsp)
+	xorq	%rbx,%rbx
+	movl	%ecx,8(%rsp)
+	movl	%edx,12(%rsp)
+	movl	%r8d,16(%rsp)
+	movl	%r9d,20(%rsp)
+	movl	%r10d,24(%rsp)
+	movl	%r11d,28(%rsp)
+	movdqa	%xmm1,32(%rsp)
+	movl	%r12d,48(%rsp)
+	movl	%r13d,52(%rsp)
+	movl	%r14d,56(%rsp)
+	movl	%r15d,60(%rsp)
+
+L$oop_tail:
+	movzbl	(%rsi,%rbx,1),%eax
+	movzbl	(%rsp,%rbx,1),%edx
+	leaq	1(%rbx),%rbx
+	xorl	%edx,%eax
+	movb	%al,-1(%rdi,%rbx,1)
+	decq	%rbp
+	jnz	L$oop_tail
+
+L$done:
+	leaq	64+24+48(%rsp),%rsi
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$no_data:
+	.byte	0xf3,0xc3
+
+
+.p2align	5
+ChaCha20_ssse3:
+L$ChaCha20_ssse3:
+	movq	%rsp,%r9
+	cmpq	$128,%rdx
+	ja	L$ChaCha20_4x
+
+L$do_sse3_after_all:
+	subq	$64+8,%rsp
+	movdqa	L$sigma(%rip),%xmm0
+	movdqu	(%rcx),%xmm1
+	movdqu	16(%rcx),%xmm2
+	movdqu	(%r8),%xmm3
+	movdqa	L$rot16(%rip),%xmm6
+	movdqa	L$rot24(%rip),%xmm7
+
+	movdqa	%xmm0,0(%rsp)
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	movq	$10,%r8
+	jmp	L$oop_ssse3
+
+.p2align	5
+L$oop_outer_ssse3:
+	movdqa	L$one(%rip),%xmm3
+	movdqa	0(%rsp),%xmm0
+	movdqa	16(%rsp),%xmm1
+	movdqa	32(%rsp),%xmm2
+	paddd	48(%rsp),%xmm3
+	movq	$10,%r8
+	movdqa	%xmm3,48(%rsp)
+	jmp	L$oop_ssse3
+
+.p2align	5
+L$oop_ssse3:
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,222
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,223
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$57,%xmm1,%xmm1
+	pshufd	$147,%xmm3,%xmm3
+	nop
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,222
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,223
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$147,%xmm1,%xmm1
+	pshufd	$57,%xmm3,%xmm3
+	decq	%r8
+	jnz	L$oop_ssse3
+	paddd	0(%rsp),%xmm0
+	paddd	16(%rsp),%xmm1
+	paddd	32(%rsp),%xmm2
+	paddd	48(%rsp),%xmm3
+
+	cmpq	$64,%rdx
+	jb	L$tail_ssse3
+
+	movdqu	0(%rsi),%xmm4
+	movdqu	16(%rsi),%xmm5
+	pxor	%xmm4,%xmm0
+	movdqu	32(%rsi),%xmm4
+	pxor	%xmm5,%xmm1
+	movdqu	48(%rsi),%xmm5
+	leaq	64(%rsi),%rsi
+	pxor	%xmm4,%xmm2
+	pxor	%xmm5,%xmm3
+
+	movdqu	%xmm0,0(%rdi)
+	movdqu	%xmm1,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm3,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	subq	$64,%rdx
+	jnz	L$oop_outer_ssse3
+
+	jmp	L$done_ssse3
+
+.p2align	4
+L$tail_ssse3:
+	movdqa	%xmm0,0(%rsp)
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	xorq	%r8,%r8
+
+L$oop_tail_ssse3:
+	movzbl	(%rsi,%r8,1),%eax
+	movzbl	(%rsp,%r8,1),%ecx
+	leaq	1(%r8),%r8
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r8,1)
+	decq	%rdx
+	jnz	L$oop_tail_ssse3
+
+L$done_ssse3:
+	leaq	(%r9),%rsp
+L$ssse3_epilogue:
+	.byte	0xf3,0xc3
+
+
+.p2align	5
+ChaCha20_4x:
+L$ChaCha20_4x:
+	movq	%rsp,%r9
+	movq	%r10,%r11
+	shrq	$32,%r10
+	testq	$32,%r10
+	jnz	L$ChaCha20_8x
+	cmpq	$192,%rdx
+	ja	L$proceed4x
+
+	andq	$71303168,%r11
+	cmpq	$4194304,%r11
+	je	L$do_sse3_after_all
+
+L$proceed4x:
+	subq	$0x140+8,%rsp
+	movdqa	L$sigma(%rip),%xmm11
+	movdqu	(%rcx),%xmm15
+	movdqu	16(%rcx),%xmm7
+	movdqu	(%r8),%xmm3
+	leaq	256(%rsp),%rcx
+	leaq	L$rot16(%rip),%r10
+	leaq	L$rot24(%rip),%r11
+
+	pshufd	$0x00,%xmm11,%xmm8
+	pshufd	$0x55,%xmm11,%xmm9
+	movdqa	%xmm8,64(%rsp)
+	pshufd	$0xaa,%xmm11,%xmm10
+	movdqa	%xmm9,80(%rsp)
+	pshufd	$0xff,%xmm11,%xmm11
+	movdqa	%xmm10,96(%rsp)
+	movdqa	%xmm11,112(%rsp)
+
+	pshufd	$0x00,%xmm15,%xmm12
+	pshufd	$0x55,%xmm15,%xmm13
+	movdqa	%xmm12,128-256(%rcx)
+	pshufd	$0xaa,%xmm15,%xmm14
+	movdqa	%xmm13,144-256(%rcx)
+	pshufd	$0xff,%xmm15,%xmm15
+	movdqa	%xmm14,160-256(%rcx)
+	movdqa	%xmm15,176-256(%rcx)
+
+	pshufd	$0x00,%xmm7,%xmm4
+	pshufd	$0x55,%xmm7,%xmm5
+	movdqa	%xmm4,192-256(%rcx)
+	pshufd	$0xaa,%xmm7,%xmm6
+	movdqa	%xmm5,208-256(%rcx)
+	pshufd	$0xff,%xmm7,%xmm7
+	movdqa	%xmm6,224-256(%rcx)
+	movdqa	%xmm7,240-256(%rcx)
+
+	pshufd	$0x00,%xmm3,%xmm0
+	pshufd	$0x55,%xmm3,%xmm1
+	paddd	L$inc(%rip),%xmm0
+	pshufd	$0xaa,%xmm3,%xmm2
+	movdqa	%xmm1,272-256(%rcx)
+	pshufd	$0xff,%xmm3,%xmm3
+	movdqa	%xmm2,288-256(%rcx)
+	movdqa	%xmm3,304-256(%rcx)
+
+	jmp	L$oop_enter4x
+
+.p2align	5
+L$oop_outer4x:
+	movdqa	64(%rsp),%xmm8
+	movdqa	80(%rsp),%xmm9
+	movdqa	96(%rsp),%xmm10
+	movdqa	112(%rsp),%xmm11
+	movdqa	128-256(%rcx),%xmm12
+	movdqa	144-256(%rcx),%xmm13
+	movdqa	160-256(%rcx),%xmm14
+	movdqa	176-256(%rcx),%xmm15
+	movdqa	192-256(%rcx),%xmm4
+	movdqa	208-256(%rcx),%xmm5
+	movdqa	224-256(%rcx),%xmm6
+	movdqa	240-256(%rcx),%xmm7
+	movdqa	256-256(%rcx),%xmm0
+	movdqa	272-256(%rcx),%xmm1
+	movdqa	288-256(%rcx),%xmm2
+	movdqa	304-256(%rcx),%xmm3
+	paddd	L$four(%rip),%xmm0
+
+L$oop_enter4x:
+	movdqa	%xmm6,32(%rsp)
+	movdqa	%xmm7,48(%rsp)
+	movdqa	(%r10),%xmm7
+	movl	$10,%eax
+	movdqa	%xmm0,256-256(%rcx)
+	jmp	L$oop4x
+
+.p2align	5
+L$oop4x:
+	paddd	%xmm12,%xmm8
+	paddd	%xmm13,%xmm9
+	pxor	%xmm8,%xmm0
+	pxor	%xmm9,%xmm1
+.byte	102,15,56,0,199
+.byte	102,15,56,0,207
+	paddd	%xmm0,%xmm4
+	paddd	%xmm1,%xmm5
+	pxor	%xmm4,%xmm12
+	pxor	%xmm5,%xmm13
+	movdqa	%xmm12,%xmm6
+	pslld	$12,%xmm12
+	psrld	$20,%xmm6
+	movdqa	%xmm13,%xmm7
+	pslld	$12,%xmm13
+	por	%xmm6,%xmm12
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm13
+	paddd	%xmm12,%xmm8
+	paddd	%xmm13,%xmm9
+	pxor	%xmm8,%xmm0
+	pxor	%xmm9,%xmm1
+.byte	102,15,56,0,198
+.byte	102,15,56,0,206
+	paddd	%xmm0,%xmm4
+	paddd	%xmm1,%xmm5
+	pxor	%xmm4,%xmm12
+	pxor	%xmm5,%xmm13
+	movdqa	%xmm12,%xmm7
+	pslld	$7,%xmm12
+	psrld	$25,%xmm7
+	movdqa	%xmm13,%xmm6
+	pslld	$7,%xmm13
+	por	%xmm7,%xmm12
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm13
+	movdqa	%xmm4,0(%rsp)
+	movdqa	%xmm5,16(%rsp)
+	movdqa	32(%rsp),%xmm4
+	movdqa	48(%rsp),%xmm5
+	paddd	%xmm14,%xmm10
+	paddd	%xmm15,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm3
+.byte	102,15,56,0,215
+.byte	102,15,56,0,223
+	paddd	%xmm2,%xmm4
+	paddd	%xmm3,%xmm5
+	pxor	%xmm4,%xmm14
+	pxor	%xmm5,%xmm15
+	movdqa	%xmm14,%xmm6
+	pslld	$12,%xmm14
+	psrld	$20,%xmm6
+	movdqa	%xmm15,%xmm7
+	pslld	$12,%xmm15
+	por	%xmm6,%xmm14
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm15
+	paddd	%xmm14,%xmm10
+	paddd	%xmm15,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm3
+.byte	102,15,56,0,214
+.byte	102,15,56,0,222
+	paddd	%xmm2,%xmm4
+	paddd	%xmm3,%xmm5
+	pxor	%xmm4,%xmm14
+	pxor	%xmm5,%xmm15
+	movdqa	%xmm14,%xmm7
+	pslld	$7,%xmm14
+	psrld	$25,%xmm7
+	movdqa	%xmm15,%xmm6
+	pslld	$7,%xmm15
+	por	%xmm7,%xmm14
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm15
+	paddd	%xmm13,%xmm8
+	paddd	%xmm14,%xmm9
+	pxor	%xmm8,%xmm3
+	pxor	%xmm9,%xmm0
+.byte	102,15,56,0,223
+.byte	102,15,56,0,199
+	paddd	%xmm3,%xmm4
+	paddd	%xmm0,%xmm5
+	pxor	%xmm4,%xmm13
+	pxor	%xmm5,%xmm14
+	movdqa	%xmm13,%xmm6
+	pslld	$12,%xmm13
+	psrld	$20,%xmm6
+	movdqa	%xmm14,%xmm7
+	pslld	$12,%xmm14
+	por	%xmm6,%xmm13
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm14
+	paddd	%xmm13,%xmm8
+	paddd	%xmm14,%xmm9
+	pxor	%xmm8,%xmm3
+	pxor	%xmm9,%xmm0
+.byte	102,15,56,0,222
+.byte	102,15,56,0,198
+	paddd	%xmm3,%xmm4
+	paddd	%xmm0,%xmm5
+	pxor	%xmm4,%xmm13
+	pxor	%xmm5,%xmm14
+	movdqa	%xmm13,%xmm7
+	pslld	$7,%xmm13
+	psrld	$25,%xmm7
+	movdqa	%xmm14,%xmm6
+	pslld	$7,%xmm14
+	por	%xmm7,%xmm13
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm14
+	movdqa	%xmm4,32(%rsp)
+	movdqa	%xmm5,48(%rsp)
+	movdqa	0(%rsp),%xmm4
+	movdqa	16(%rsp),%xmm5
+	paddd	%xmm15,%xmm10
+	paddd	%xmm12,%xmm11
+	pxor	%xmm10,%xmm1
+	pxor	%xmm11,%xmm2
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	paddd	%xmm1,%xmm4
+	paddd	%xmm2,%xmm5
+	pxor	%xmm4,%xmm15
+	pxor	%xmm5,%xmm12
+	movdqa	%xmm15,%xmm6
+	pslld	$12,%xmm15
+	psrld	$20,%xmm6
+	movdqa	%xmm12,%xmm7
+	pslld	$12,%xmm12
+	por	%xmm6,%xmm15
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm12
+	paddd	%xmm15,%xmm10
+	paddd	%xmm12,%xmm11
+	pxor	%xmm10,%xmm1
+	pxor	%xmm11,%xmm2
+.byte	102,15,56,0,206
+.byte	102,15,56,0,214
+	paddd	%xmm1,%xmm4
+	paddd	%xmm2,%xmm5
+	pxor	%xmm4,%xmm15
+	pxor	%xmm5,%xmm12
+	movdqa	%xmm15,%xmm7
+	pslld	$7,%xmm15
+	psrld	$25,%xmm7
+	movdqa	%xmm12,%xmm6
+	pslld	$7,%xmm12
+	por	%xmm7,%xmm15
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm12
+	decl	%eax
+	jnz	L$oop4x
+
+	paddd	64(%rsp),%xmm8
+	paddd	80(%rsp),%xmm9
+	paddd	96(%rsp),%xmm10
+	paddd	112(%rsp),%xmm11
+
+	movdqa	%xmm8,%xmm6
+	punpckldq	%xmm9,%xmm8
+	movdqa	%xmm10,%xmm7
+	punpckldq	%xmm11,%xmm10
+	punpckhdq	%xmm9,%xmm6
+	punpckhdq	%xmm11,%xmm7
+	movdqa	%xmm8,%xmm9
+	punpcklqdq	%xmm10,%xmm8
+	movdqa	%xmm6,%xmm11
+	punpcklqdq	%xmm7,%xmm6
+	punpckhqdq	%xmm10,%xmm9
+	punpckhqdq	%xmm7,%xmm11
+	paddd	128-256(%rcx),%xmm12
+	paddd	144-256(%rcx),%xmm13
+	paddd	160-256(%rcx),%xmm14
+	paddd	176-256(%rcx),%xmm15
+
+	movdqa	%xmm8,0(%rsp)
+	movdqa	%xmm9,16(%rsp)
+	movdqa	32(%rsp),%xmm8
+	movdqa	48(%rsp),%xmm9
+
+	movdqa	%xmm12,%xmm10
+	punpckldq	%xmm13,%xmm12
+	movdqa	%xmm14,%xmm7
+	punpckldq	%xmm15,%xmm14
+	punpckhdq	%xmm13,%xmm10
+	punpckhdq	%xmm15,%xmm7
+	movdqa	%xmm12,%xmm13
+	punpcklqdq	%xmm14,%xmm12
+	movdqa	%xmm10,%xmm15
+	punpcklqdq	%xmm7,%xmm10
+	punpckhqdq	%xmm14,%xmm13
+	punpckhqdq	%xmm7,%xmm15
+	paddd	192-256(%rcx),%xmm4
+	paddd	208-256(%rcx),%xmm5
+	paddd	224-256(%rcx),%xmm8
+	paddd	240-256(%rcx),%xmm9
+
+	movdqa	%xmm6,32(%rsp)
+	movdqa	%xmm11,48(%rsp)
+
+	movdqa	%xmm4,%xmm14
+	punpckldq	%xmm5,%xmm4
+	movdqa	%xmm8,%xmm7
+	punpckldq	%xmm9,%xmm8
+	punpckhdq	%xmm5,%xmm14
+	punpckhdq	%xmm9,%xmm7
+	movdqa	%xmm4,%xmm5
+	punpcklqdq	%xmm8,%xmm4
+	movdqa	%xmm14,%xmm9
+	punpcklqdq	%xmm7,%xmm14
+	punpckhqdq	%xmm8,%xmm5
+	punpckhqdq	%xmm7,%xmm9
+	paddd	256-256(%rcx),%xmm0
+	paddd	272-256(%rcx),%xmm1
+	paddd	288-256(%rcx),%xmm2
+	paddd	304-256(%rcx),%xmm3
+
+	movdqa	%xmm0,%xmm8
+	punpckldq	%xmm1,%xmm0
+	movdqa	%xmm2,%xmm7
+	punpckldq	%xmm3,%xmm2
+	punpckhdq	%xmm1,%xmm8
+	punpckhdq	%xmm3,%xmm7
+	movdqa	%xmm0,%xmm1
+	punpcklqdq	%xmm2,%xmm0
+	movdqa	%xmm8,%xmm3
+	punpcklqdq	%xmm7,%xmm8
+	punpckhqdq	%xmm2,%xmm1
+	punpckhqdq	%xmm7,%xmm3
+	cmpq	$256,%rdx
+	jb	L$tail4x
+
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+
+	movdqu	%xmm6,64(%rdi)
+	movdqu	0(%rsi),%xmm6
+	movdqu	%xmm11,80(%rdi)
+	movdqu	16(%rsi),%xmm11
+	movdqu	%xmm2,96(%rdi)
+	movdqu	32(%rsi),%xmm2
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+	movdqu	48(%rsi),%xmm7
+	pxor	32(%rsp),%xmm6
+	pxor	%xmm10,%xmm11
+	pxor	%xmm14,%xmm2
+	pxor	%xmm8,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	48(%rsp),%xmm6
+	pxor	%xmm15,%xmm11
+	pxor	%xmm9,%xmm2
+	pxor	%xmm3,%xmm7
+	movdqu	%xmm6,64(%rdi)
+	movdqu	%xmm11,80(%rdi)
+	movdqu	%xmm2,96(%rdi)
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+
+	subq	$256,%rdx
+	jnz	L$oop_outer4x
+
+	jmp	L$done4x
+
+L$tail4x:
+	cmpq	$192,%rdx
+	jae	L$192_or_more4x
+	cmpq	$128,%rdx
+	jae	L$128_or_more4x
+	cmpq	$64,%rdx
+	jae	L$64_or_more4x
+
+
+	xorq	%r10,%r10
+
+	movdqa	%xmm12,16(%rsp)
+	movdqa	%xmm4,32(%rsp)
+	movdqa	%xmm0,48(%rsp)
+	jmp	L$oop_tail4x
+
+.p2align	5
+L$64_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+	movdqu	%xmm6,0(%rdi)
+	movdqu	%xmm11,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm7,48(%rdi)
+	je	L$done4x
+
+	movdqa	16(%rsp),%xmm6
+	leaq	64(%rsi),%rsi
+	xorq	%r10,%r10
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm13,16(%rsp)
+	leaq	64(%rdi),%rdi
+	movdqa	%xmm5,32(%rsp)
+	subq	$64,%rdx
+	movdqa	%xmm1,48(%rsp)
+	jmp	L$oop_tail4x
+
+.p2align	5
+L$128_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+	movdqu	%xmm6,64(%rdi)
+	movdqu	%xmm11,80(%rdi)
+	movdqu	%xmm2,96(%rdi)
+	movdqu	%xmm7,112(%rdi)
+	je	L$done4x
+
+	movdqa	32(%rsp),%xmm6
+	leaq	128(%rsi),%rsi
+	xorq	%r10,%r10
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm10,16(%rsp)
+	leaq	128(%rdi),%rdi
+	movdqa	%xmm14,32(%rsp)
+	subq	$128,%rdx
+	movdqa	%xmm8,48(%rsp)
+	jmp	L$oop_tail4x
+
+.p2align	5
+L$192_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+
+	movdqu	%xmm6,64(%rdi)
+	movdqu	0(%rsi),%xmm6
+	movdqu	%xmm11,80(%rdi)
+	movdqu	16(%rsi),%xmm11
+	movdqu	%xmm2,96(%rdi)
+	movdqu	32(%rsi),%xmm2
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+	movdqu	48(%rsi),%xmm7
+	pxor	32(%rsp),%xmm6
+	pxor	%xmm10,%xmm11
+	pxor	%xmm14,%xmm2
+	pxor	%xmm8,%xmm7
+	movdqu	%xmm6,0(%rdi)
+	movdqu	%xmm11,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm7,48(%rdi)
+	je	L$done4x
+
+	movdqa	48(%rsp),%xmm6
+	leaq	64(%rsi),%rsi
+	xorq	%r10,%r10
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm15,16(%rsp)
+	leaq	64(%rdi),%rdi
+	movdqa	%xmm9,32(%rsp)
+	subq	$192,%rdx
+	movdqa	%xmm3,48(%rsp)
+
+L$oop_tail4x:
+	movzbl	(%rsi,%r10,1),%eax
+	movzbl	(%rsp,%r10,1),%ecx
+	leaq	1(%r10),%r10
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r10,1)
+	decq	%rdx
+	jnz	L$oop_tail4x
+
+L$done4x:
+	leaq	(%r9),%rsp
+L$4x_epilogue:
+	.byte	0xf3,0xc3
+
+
+.p2align	5
+ChaCha20_8x:
+L$ChaCha20_8x:
+	movq	%rsp,%r9
+	subq	$0x280+8,%rsp
+	andq	$-32,%rsp
+	vzeroupper
+
+
+
+
+
+
+
+
+
+
+	vbroadcasti128	L$sigma(%rip),%ymm11
+	vbroadcasti128	(%rcx),%ymm3
+	vbroadcasti128	16(%rcx),%ymm15
+	vbroadcasti128	(%r8),%ymm7
+	leaq	256(%rsp),%rcx
+	leaq	512(%rsp),%rax
+	leaq	L$rot16(%rip),%r10
+	leaq	L$rot24(%rip),%r11
+
+	vpshufd	$0x00,%ymm11,%ymm8
+	vpshufd	$0x55,%ymm11,%ymm9
+	vmovdqa	%ymm8,128-256(%rcx)
+	vpshufd	$0xaa,%ymm11,%ymm10
+	vmovdqa	%ymm9,160-256(%rcx)
+	vpshufd	$0xff,%ymm11,%ymm11
+	vmovdqa	%ymm10,192-256(%rcx)
+	vmovdqa	%ymm11,224-256(%rcx)
+
+	vpshufd	$0x00,%ymm3,%ymm0
+	vpshufd	$0x55,%ymm3,%ymm1
+	vmovdqa	%ymm0,256-256(%rcx)
+	vpshufd	$0xaa,%ymm3,%ymm2
+	vmovdqa	%ymm1,288-256(%rcx)
+	vpshufd	$0xff,%ymm3,%ymm3
+	vmovdqa	%ymm2,320-256(%rcx)
+	vmovdqa	%ymm3,352-256(%rcx)
+
+	vpshufd	$0x00,%ymm15,%ymm12
+	vpshufd	$0x55,%ymm15,%ymm13
+	vmovdqa	%ymm12,384-512(%rax)
+	vpshufd	$0xaa,%ymm15,%ymm14
+	vmovdqa	%ymm13,416-512(%rax)
+	vpshufd	$0xff,%ymm15,%ymm15
+	vmovdqa	%ymm14,448-512(%rax)
+	vmovdqa	%ymm15,480-512(%rax)
+
+	vpshufd	$0x00,%ymm7,%ymm4
+	vpshufd	$0x55,%ymm7,%ymm5
+	vpaddd	L$incy(%rip),%ymm4,%ymm4
+	vpshufd	$0xaa,%ymm7,%ymm6
+	vmovdqa	%ymm5,544-512(%rax)
+	vpshufd	$0xff,%ymm7,%ymm7
+	vmovdqa	%ymm6,576-512(%rax)
+	vmovdqa	%ymm7,608-512(%rax)
+
+	jmp	L$oop_enter8x
+
+.p2align	5
+L$oop_outer8x:
+	vmovdqa	128-256(%rcx),%ymm8
+	vmovdqa	160-256(%rcx),%ymm9
+	vmovdqa	192-256(%rcx),%ymm10
+	vmovdqa	224-256(%rcx),%ymm11
+	vmovdqa	256-256(%rcx),%ymm0
+	vmovdqa	288-256(%rcx),%ymm1
+	vmovdqa	320-256(%rcx),%ymm2
+	vmovdqa	352-256(%rcx),%ymm3
+	vmovdqa	384-512(%rax),%ymm12
+	vmovdqa	416-512(%rax),%ymm13
+	vmovdqa	448-512(%rax),%ymm14
+	vmovdqa	480-512(%rax),%ymm15
+	vmovdqa	512-512(%rax),%ymm4
+	vmovdqa	544-512(%rax),%ymm5
+	vmovdqa	576-512(%rax),%ymm6
+	vmovdqa	608-512(%rax),%ymm7
+	vpaddd	L$eight(%rip),%ymm4,%ymm4
+
+L$oop_enter8x:
+	vmovdqa	%ymm14,64(%rsp)
+	vmovdqa	%ymm15,96(%rsp)
+	vbroadcasti128	(%r10),%ymm15
+	vmovdqa	%ymm4,512-512(%rax)
+	movl	$10,%eax
+	jmp	L$oop8x
+
+.p2align	5
+L$oop8x:
+	vpaddd	%ymm0,%ymm8,%ymm8
+	vpxor	%ymm4,%ymm8,%ymm4
+	vpshufb	%ymm15,%ymm4,%ymm4
+	vpaddd	%ymm1,%ymm9,%ymm9
+	vpxor	%ymm5,%ymm9,%ymm5
+	vpshufb	%ymm15,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm12,%ymm12
+	vpxor	%ymm0,%ymm12,%ymm0
+	vpslld	$12,%ymm0,%ymm14
+	vpsrld	$20,%ymm0,%ymm0
+	vpor	%ymm0,%ymm14,%ymm0
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm5,%ymm13,%ymm13
+	vpxor	%ymm1,%ymm13,%ymm1
+	vpslld	$12,%ymm1,%ymm15
+	vpsrld	$20,%ymm1,%ymm1
+	vpor	%ymm1,%ymm15,%ymm1
+	vpaddd	%ymm0,%ymm8,%ymm8
+	vpxor	%ymm4,%ymm8,%ymm4
+	vpshufb	%ymm14,%ymm4,%ymm4
+	vpaddd	%ymm1,%ymm9,%ymm9
+	vpxor	%ymm5,%ymm9,%ymm5
+	vpshufb	%ymm14,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm12,%ymm12
+	vpxor	%ymm0,%ymm12,%ymm0
+	vpslld	$7,%ymm0,%ymm15
+	vpsrld	$25,%ymm0,%ymm0
+	vpor	%ymm0,%ymm15,%ymm0
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm5,%ymm13,%ymm13
+	vpxor	%ymm1,%ymm13,%ymm1
+	vpslld	$7,%ymm1,%ymm14
+	vpsrld	$25,%ymm1,%ymm1
+	vpor	%ymm1,%ymm14,%ymm1
+	vmovdqa	%ymm12,0(%rsp)
+	vmovdqa	%ymm13,32(%rsp)
+	vmovdqa	64(%rsp),%ymm12
+	vmovdqa	96(%rsp),%ymm13
+	vpaddd	%ymm2,%ymm10,%ymm10
+	vpxor	%ymm6,%ymm10,%ymm6
+	vpshufb	%ymm15,%ymm6,%ymm6
+	vpaddd	%ymm3,%ymm11,%ymm11
+	vpxor	%ymm7,%ymm11,%ymm7
+	vpshufb	%ymm15,%ymm7,%ymm7
+	vpaddd	%ymm6,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm12,%ymm2
+	vpslld	$12,%ymm2,%ymm14
+	vpsrld	$20,%ymm2,%ymm2
+	vpor	%ymm2,%ymm14,%ymm2
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm7,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm13,%ymm3
+	vpslld	$12,%ymm3,%ymm15
+	vpsrld	$20,%ymm3,%ymm3
+	vpor	%ymm3,%ymm15,%ymm3
+	vpaddd	%ymm2,%ymm10,%ymm10
+	vpxor	%ymm6,%ymm10,%ymm6
+	vpshufb	%ymm14,%ymm6,%ymm6
+	vpaddd	%ymm3,%ymm11,%ymm11
+	vpxor	%ymm7,%ymm11,%ymm7
+	vpshufb	%ymm14,%ymm7,%ymm7
+	vpaddd	%ymm6,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm12,%ymm2
+	vpslld	$7,%ymm2,%ymm15
+	vpsrld	$25,%ymm2,%ymm2
+	vpor	%ymm2,%ymm15,%ymm2
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm7,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm13,%ymm3
+	vpslld	$7,%ymm3,%ymm14
+	vpsrld	$25,%ymm3,%ymm3
+	vpor	%ymm3,%ymm14,%ymm3
+	vpaddd	%ymm1,%ymm8,%ymm8
+	vpxor	%ymm7,%ymm8,%ymm7
+	vpshufb	%ymm15,%ymm7,%ymm7
+	vpaddd	%ymm2,%ymm9,%ymm9
+	vpxor	%ymm4,%ymm9,%ymm4
+	vpshufb	%ymm15,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm12,%ymm1
+	vpslld	$12,%ymm1,%ymm14
+	vpsrld	$20,%ymm1,%ymm1
+	vpor	%ymm1,%ymm14,%ymm1
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm4,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm13,%ymm2
+	vpslld	$12,%ymm2,%ymm15
+	vpsrld	$20,%ymm2,%ymm2
+	vpor	%ymm2,%ymm15,%ymm2
+	vpaddd	%ymm1,%ymm8,%ymm8
+	vpxor	%ymm7,%ymm8,%ymm7
+	vpshufb	%ymm14,%ymm7,%ymm7
+	vpaddd	%ymm2,%ymm9,%ymm9
+	vpxor	%ymm4,%ymm9,%ymm4
+	vpshufb	%ymm14,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm12,%ymm1
+	vpslld	$7,%ymm1,%ymm15
+	vpsrld	$25,%ymm1,%ymm1
+	vpor	%ymm1,%ymm15,%ymm1
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm4,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm13,%ymm2
+	vpslld	$7,%ymm2,%ymm14
+	vpsrld	$25,%ymm2,%ymm2
+	vpor	%ymm2,%ymm14,%ymm2
+	vmovdqa	%ymm12,64(%rsp)
+	vmovdqa	%ymm13,96(%rsp)
+	vmovdqa	0(%rsp),%ymm12
+	vmovdqa	32(%rsp),%ymm13
+	vpaddd	%ymm3,%ymm10,%ymm10
+	vpxor	%ymm5,%ymm10,%ymm5
+	vpshufb	%ymm15,%ymm5,%ymm5
+	vpaddd	%ymm0,%ymm11,%ymm11
+	vpxor	%ymm6,%ymm11,%ymm6
+	vpshufb	%ymm15,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm12,%ymm12
+	vpxor	%ymm3,%ymm12,%ymm3
+	vpslld	$12,%ymm3,%ymm14
+	vpsrld	$20,%ymm3,%ymm3
+	vpor	%ymm3,%ymm14,%ymm3
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm6,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm13,%ymm0
+	vpslld	$12,%ymm0,%ymm15
+	vpsrld	$20,%ymm0,%ymm0
+	vpor	%ymm0,%ymm15,%ymm0
+	vpaddd	%ymm3,%ymm10,%ymm10
+	vpxor	%ymm5,%ymm10,%ymm5
+	vpshufb	%ymm14,%ymm5,%ymm5
+	vpaddd	%ymm0,%ymm11,%ymm11
+	vpxor	%ymm6,%ymm11,%ymm6
+	vpshufb	%ymm14,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm12,%ymm12
+	vpxor	%ymm3,%ymm12,%ymm3
+	vpslld	$7,%ymm3,%ymm15
+	vpsrld	$25,%ymm3,%ymm3
+	vpor	%ymm3,%ymm15,%ymm3
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm6,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm13,%ymm0
+	vpslld	$7,%ymm0,%ymm14
+	vpsrld	$25,%ymm0,%ymm0
+	vpor	%ymm0,%ymm14,%ymm0
+	decl	%eax
+	jnz	L$oop8x
+
+	leaq	512(%rsp),%rax
+	vpaddd	128-256(%rcx),%ymm8,%ymm8
+	vpaddd	160-256(%rcx),%ymm9,%ymm9
+	vpaddd	192-256(%rcx),%ymm10,%ymm10
+	vpaddd	224-256(%rcx),%ymm11,%ymm11
+
+	vpunpckldq	%ymm9,%ymm8,%ymm14
+	vpunpckldq	%ymm11,%ymm10,%ymm15
+	vpunpckhdq	%ymm9,%ymm8,%ymm8
+	vpunpckhdq	%ymm11,%ymm10,%ymm10
+	vpunpcklqdq	%ymm15,%ymm14,%ymm9
+	vpunpckhqdq	%ymm15,%ymm14,%ymm14
+	vpunpcklqdq	%ymm10,%ymm8,%ymm11
+	vpunpckhqdq	%ymm10,%ymm8,%ymm8
+	vpaddd	256-256(%rcx),%ymm0,%ymm0
+	vpaddd	288-256(%rcx),%ymm1,%ymm1
+	vpaddd	320-256(%rcx),%ymm2,%ymm2
+	vpaddd	352-256(%rcx),%ymm3,%ymm3
+
+	vpunpckldq	%ymm1,%ymm0,%ymm10
+	vpunpckldq	%ymm3,%ymm2,%ymm15
+	vpunpckhdq	%ymm1,%ymm0,%ymm0
+	vpunpckhdq	%ymm3,%ymm2,%ymm2
+	vpunpcklqdq	%ymm15,%ymm10,%ymm1
+	vpunpckhqdq	%ymm15,%ymm10,%ymm10
+	vpunpcklqdq	%ymm2,%ymm0,%ymm3
+	vpunpckhqdq	%ymm2,%ymm0,%ymm0
+	vperm2i128	$0x20,%ymm1,%ymm9,%ymm15
+	vperm2i128	$0x31,%ymm1,%ymm9,%ymm1
+	vperm2i128	$0x20,%ymm10,%ymm14,%ymm9
+	vperm2i128	$0x31,%ymm10,%ymm14,%ymm10
+	vperm2i128	$0x20,%ymm3,%ymm11,%ymm14
+	vperm2i128	$0x31,%ymm3,%ymm11,%ymm3
+	vperm2i128	$0x20,%ymm0,%ymm8,%ymm11
+	vperm2i128	$0x31,%ymm0,%ymm8,%ymm0
+	vmovdqa	%ymm15,0(%rsp)
+	vmovdqa	%ymm9,32(%rsp)
+	vmovdqa	64(%rsp),%ymm15
+	vmovdqa	96(%rsp),%ymm9
+
+	vpaddd	384-512(%rax),%ymm12,%ymm12
+	vpaddd	416-512(%rax),%ymm13,%ymm13
+	vpaddd	448-512(%rax),%ymm15,%ymm15
+	vpaddd	480-512(%rax),%ymm9,%ymm9
+
+	vpunpckldq	%ymm13,%ymm12,%ymm2
+	vpunpckldq	%ymm9,%ymm15,%ymm8
+	vpunpckhdq	%ymm13,%ymm12,%ymm12
+	vpunpckhdq	%ymm9,%ymm15,%ymm15
+	vpunpcklqdq	%ymm8,%ymm2,%ymm13
+	vpunpckhqdq	%ymm8,%ymm2,%ymm2
+	vpunpcklqdq	%ymm15,%ymm12,%ymm9
+	vpunpckhqdq	%ymm15,%ymm12,%ymm12
+	vpaddd	512-512(%rax),%ymm4,%ymm4
+	vpaddd	544-512(%rax),%ymm5,%ymm5
+	vpaddd	576-512(%rax),%ymm6,%ymm6
+	vpaddd	608-512(%rax),%ymm7,%ymm7
+
+	vpunpckldq	%ymm5,%ymm4,%ymm15
+	vpunpckldq	%ymm7,%ymm6,%ymm8
+	vpunpckhdq	%ymm5,%ymm4,%ymm4
+	vpunpckhdq	%ymm7,%ymm6,%ymm6
+	vpunpcklqdq	%ymm8,%ymm15,%ymm5
+	vpunpckhqdq	%ymm8,%ymm15,%ymm15
+	vpunpcklqdq	%ymm6,%ymm4,%ymm7
+	vpunpckhqdq	%ymm6,%ymm4,%ymm4
+	vperm2i128	$0x20,%ymm5,%ymm13,%ymm8
+	vperm2i128	$0x31,%ymm5,%ymm13,%ymm5
+	vperm2i128	$0x20,%ymm15,%ymm2,%ymm13
+	vperm2i128	$0x31,%ymm15,%ymm2,%ymm15
+	vperm2i128	$0x20,%ymm7,%ymm9,%ymm2
+	vperm2i128	$0x31,%ymm7,%ymm9,%ymm7
+	vperm2i128	$0x20,%ymm4,%ymm12,%ymm9
+	vperm2i128	$0x31,%ymm4,%ymm12,%ymm4
+	vmovdqa	0(%rsp),%ymm6
+	vmovdqa	32(%rsp),%ymm12
+
+	cmpq	$512,%rdx
+	jb	L$tail8x
+
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm12,%ymm12
+	vpxor	32(%rsi),%ymm13,%ymm13
+	vpxor	64(%rsi),%ymm10,%ymm10
+	vpxor	96(%rsi),%ymm15,%ymm15
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm12,0(%rdi)
+	vmovdqu	%ymm13,32(%rdi)
+	vmovdqu	%ymm10,64(%rdi)
+	vmovdqu	%ymm15,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm14,%ymm14
+	vpxor	32(%rsi),%ymm2,%ymm2
+	vpxor	64(%rsi),%ymm3,%ymm3
+	vpxor	96(%rsi),%ymm7,%ymm7
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm14,0(%rdi)
+	vmovdqu	%ymm2,32(%rdi)
+	vmovdqu	%ymm3,64(%rdi)
+	vmovdqu	%ymm7,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm11,%ymm11
+	vpxor	32(%rsi),%ymm9,%ymm9
+	vpxor	64(%rsi),%ymm0,%ymm0
+	vpxor	96(%rsi),%ymm4,%ymm4
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm11,0(%rdi)
+	vmovdqu	%ymm9,32(%rdi)
+	vmovdqu	%ymm0,64(%rdi)
+	vmovdqu	%ymm4,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	subq	$512,%rdx
+	jnz	L$oop_outer8x
+
+	jmp	L$done8x
+
+L$tail8x:
+	cmpq	$448,%rdx
+	jae	L$448_or_more8x
+	cmpq	$384,%rdx
+	jae	L$384_or_more8x
+	cmpq	$320,%rdx
+	jae	L$320_or_more8x
+	cmpq	$256,%rdx
+	jae	L$256_or_more8x
+	cmpq	$192,%rdx
+	jae	L$192_or_more8x
+	cmpq	$128,%rdx
+	jae	L$128_or_more8x
+	cmpq	$64,%rdx
+	jae	L$64_or_more8x
+
+	xorq	%r10,%r10
+	vmovdqa	%ymm6,0(%rsp)
+	vmovdqa	%ymm8,32(%rsp)
+	jmp	L$oop_tail8x
+
+.p2align	5
+L$64_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	je	L$done8x
+
+	leaq	64(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm1,0(%rsp)
+	leaq	64(%rdi),%rdi
+	subq	$64,%rdx
+	vmovdqa	%ymm5,32(%rsp)
+	jmp	L$oop_tail8x
+
+.p2align	5
+L$128_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	je	L$done8x
+
+	leaq	128(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm12,0(%rsp)
+	leaq	128(%rdi),%rdi
+	subq	$128,%rdx
+	vmovdqa	%ymm13,32(%rsp)
+	jmp	L$oop_tail8x
+
+.p2align	5
+L$192_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	je	L$done8x
+
+	leaq	192(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm10,0(%rsp)
+	leaq	192(%rdi),%rdi
+	subq	$192,%rdx
+	vmovdqa	%ymm15,32(%rsp)
+	jmp	L$oop_tail8x
+
+.p2align	5
+L$256_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	je	L$done8x
+
+	leaq	256(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm14,0(%rsp)
+	leaq	256(%rdi),%rdi
+	subq	$256,%rdx
+	vmovdqa	%ymm2,32(%rsp)
+	jmp	L$oop_tail8x
+
+.p2align	5
+L$320_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	je	L$done8x
+
+	leaq	320(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm3,0(%rsp)
+	leaq	320(%rdi),%rdi
+	subq	$320,%rdx
+	vmovdqa	%ymm7,32(%rsp)
+	jmp	L$oop_tail8x
+
+.p2align	5
+L$384_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vpxor	320(%rsi),%ymm3,%ymm3
+	vpxor	352(%rsi),%ymm7,%ymm7
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	vmovdqu	%ymm3,320(%rdi)
+	vmovdqu	%ymm7,352(%rdi)
+	je	L$done8x
+
+	leaq	384(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm11,0(%rsp)
+	leaq	384(%rdi),%rdi
+	subq	$384,%rdx
+	vmovdqa	%ymm9,32(%rsp)
+	jmp	L$oop_tail8x
+
+.p2align	5
+L$448_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vpxor	320(%rsi),%ymm3,%ymm3
+	vpxor	352(%rsi),%ymm7,%ymm7
+	vpxor	384(%rsi),%ymm11,%ymm11
+	vpxor	416(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	vmovdqu	%ymm3,320(%rdi)
+	vmovdqu	%ymm7,352(%rdi)
+	vmovdqu	%ymm11,384(%rdi)
+	vmovdqu	%ymm9,416(%rdi)
+	je	L$done8x
+
+	leaq	448(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm0,0(%rsp)
+	leaq	448(%rdi),%rdi
+	subq	$448,%rdx
+	vmovdqa	%ymm4,32(%rsp)
+
+L$oop_tail8x:
+	movzbl	(%rsi,%r10,1),%eax
+	movzbl	(%rsp,%r10,1),%ecx
+	leaq	1(%r10),%r10
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r10,1)
+	decq	%rdx
+	jnz	L$oop_tail8x
+
+L$done8x:
+	vzeroall
+	leaq	(%r9),%rsp
+L$8x_epilogue:
+	.byte	0xf3,0xc3
+
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
new file mode 100644
index 0000000..c8a5262
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
@@ -0,0 +1,3056 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.data	
+
+.p2align	4
+one:
+.quad	1,0
+two:
+.quad	2,0
+three:
+.quad	3,0
+four:
+.quad	4,0
+five:
+.quad	5,0
+six:
+.quad	6,0
+seven:
+.quad	7,0
+eight:
+.quad	8,0
+
+OR_MASK:
+.long	0x00000000,0x00000000,0x00000000,0x80000000
+poly:
+.quad	0x1, 0xc200000000000000
+mask:
+.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+con1:
+.long	1,1,1,1
+con2:
+.long	0x1b,0x1b,0x1b,0x1b
+con3:
+.byte	-1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7
+and_mask:
+.long	0,0xffffffff, 0xffffffff, 0xffffffff
+.text	
+
+.p2align	4
+GFMUL:
+
+	vpclmulqdq	$0x00,%xmm1,%xmm0,%xmm2
+	vpclmulqdq	$0x11,%xmm1,%xmm0,%xmm5
+	vpclmulqdq	$0x10,%xmm1,%xmm0,%xmm3
+	vpclmulqdq	$0x01,%xmm1,%xmm0,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$8,%xmm3,%xmm4
+	vpsrldq	$8,%xmm3,%xmm3
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpxor	%xmm3,%xmm5,%xmm5
+
+	vpclmulqdq	$0x10,poly(%rip),%xmm2,%xmm3
+	vpshufd	$78,%xmm2,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm2
+
+	vpclmulqdq	$0x10,poly(%rip),%xmm2,%xmm3
+	vpshufd	$78,%xmm2,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm2
+
+	vpxor	%xmm5,%xmm2,%xmm0
+	.byte	0xf3,0xc3
+
+
+.globl	_aesgcmsiv_htable_init
+.private_extern _aesgcmsiv_htable_init
+
+.p2align	4
+_aesgcmsiv_htable_init:
+
+	vmovdqa	(%rsi),%xmm0
+	vmovdqa	%xmm0,%xmm1
+	vmovdqa	%xmm0,(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,16(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,32(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,48(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,64(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,80(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,96(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,112(%rdi)
+	.byte	0xf3,0xc3
+
+
+.globl	_aesgcmsiv_htable6_init
+.private_extern _aesgcmsiv_htable6_init
+
+.p2align	4
+_aesgcmsiv_htable6_init:
+
+	vmovdqa	(%rsi),%xmm0
+	vmovdqa	%xmm0,%xmm1
+	vmovdqa	%xmm0,(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,16(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,32(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,48(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,64(%rdi)
+	call	GFMUL
+	vmovdqa	%xmm0,80(%rdi)
+	.byte	0xf3,0xc3
+
+
+.globl	_aesgcmsiv_htable_polyval
+.private_extern _aesgcmsiv_htable_polyval
+
+.p2align	4
+_aesgcmsiv_htable_polyval:
+
+	testq	%rdx,%rdx
+	jnz	L$htable_polyval_start
+	.byte	0xf3,0xc3
+
+L$htable_polyval_start:
+	vzeroall
+
+
+
+	movq	%rdx,%r11
+	andq	$127,%r11
+
+	jz	L$htable_polyval_no_prefix
+
+	vpxor	%xmm9,%xmm9,%xmm9
+	vmovdqa	(%rcx),%xmm1
+	subq	%r11,%rdx
+
+	subq	$16,%r11
+
+
+	vmovdqu	(%rsi),%xmm0
+	vpxor	%xmm1,%xmm0,%xmm0
+
+	vpclmulqdq	$0x01,(%rdi,%r11,1),%xmm0,%xmm5
+	vpclmulqdq	$0x00,(%rdi,%r11,1),%xmm0,%xmm3
+	vpclmulqdq	$0x11,(%rdi,%r11,1),%xmm0,%xmm4
+	vpclmulqdq	$0x10,(%rdi,%r11,1),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+	leaq	16(%rsi),%rsi
+	testq	%r11,%r11
+	jnz	L$htable_polyval_prefix_loop
+	jmp	L$htable_polyval_prefix_complete
+
+
+.p2align	6
+L$htable_polyval_prefix_loop:
+	subq	$16,%r11
+
+	vmovdqu	(%rsi),%xmm0
+
+	vpclmulqdq	$0x00,(%rdi,%r11,1),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,(%rdi,%r11,1),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x01,(%rdi,%r11,1),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x10,(%rdi,%r11,1),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+	testq	%r11,%r11
+
+	leaq	16(%rsi),%rsi
+
+	jnz	L$htable_polyval_prefix_loop
+
+L$htable_polyval_prefix_complete:
+	vpsrldq	$8,%xmm5,%xmm6
+	vpslldq	$8,%xmm5,%xmm5
+
+	vpxor	%xmm6,%xmm4,%xmm9
+	vpxor	%xmm5,%xmm3,%xmm1
+
+	jmp	L$htable_polyval_main_loop
+
+L$htable_polyval_no_prefix:
+
+
+
+
+	vpxor	%xmm1,%xmm1,%xmm1
+	vmovdqa	(%rcx),%xmm9
+
+.p2align	6
+L$htable_polyval_main_loop:
+	subq	$0x80,%rdx
+	jb	L$htable_polyval_out
+
+	vmovdqu	112(%rsi),%xmm0
+
+	vpclmulqdq	$0x01,(%rdi),%xmm0,%xmm5
+	vpclmulqdq	$0x00,(%rdi),%xmm0,%xmm3
+	vpclmulqdq	$0x11,(%rdi),%xmm0,%xmm4
+	vpclmulqdq	$0x10,(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vmovdqu	96(%rsi),%xmm0
+	vpclmulqdq	$0x01,16(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,16(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,16(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,16(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+
+	vmovdqu	80(%rsi),%xmm0
+
+	vpclmulqdq	$0x10,poly(%rip),%xmm1,%xmm7
+	vpalignr	$8,%xmm1,%xmm1,%xmm1
+
+	vpclmulqdq	$0x01,32(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,32(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,32(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,32(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vpxor	%xmm7,%xmm1,%xmm1
+
+	vmovdqu	64(%rsi),%xmm0
+
+	vpclmulqdq	$0x01,48(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,48(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,48(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,48(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vmovdqu	48(%rsi),%xmm0
+
+	vpclmulqdq	$0x10,poly(%rip),%xmm1,%xmm7
+	vpalignr	$8,%xmm1,%xmm1,%xmm1
+
+	vpclmulqdq	$0x01,64(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,64(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,64(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,64(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vpxor	%xmm7,%xmm1,%xmm1
+
+	vmovdqu	32(%rsi),%xmm0
+
+	vpclmulqdq	$0x01,80(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,80(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,80(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,80(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vpxor	%xmm9,%xmm1,%xmm1
+
+	vmovdqu	16(%rsi),%xmm0
+
+	vpclmulqdq	$0x01,96(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,96(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,96(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,96(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vmovdqu	0(%rsi),%xmm0
+	vpxor	%xmm1,%xmm0,%xmm0
+
+	vpclmulqdq	$0x01,112(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x00,112(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm3,%xmm3
+	vpclmulqdq	$0x11,112(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm4,%xmm4
+	vpclmulqdq	$0x10,112(%rdi),%xmm0,%xmm6
+	vpxor	%xmm6,%xmm5,%xmm5
+
+
+	vpsrldq	$8,%xmm5,%xmm6
+	vpslldq	$8,%xmm5,%xmm5
+
+	vpxor	%xmm6,%xmm4,%xmm9
+	vpxor	%xmm5,%xmm3,%xmm1
+
+	leaq	128(%rsi),%rsi
+	jmp	L$htable_polyval_main_loop
+
+
+
+L$htable_polyval_out:
+	vpclmulqdq	$0x10,poly(%rip),%xmm1,%xmm6
+	vpalignr	$8,%xmm1,%xmm1,%xmm1
+	vpxor	%xmm6,%xmm1,%xmm1
+
+	vpclmulqdq	$0x10,poly(%rip),%xmm1,%xmm6
+	vpalignr	$8,%xmm1,%xmm1,%xmm1
+	vpxor	%xmm6,%xmm1,%xmm1
+	vpxor	%xmm9,%xmm1,%xmm1
+
+	vmovdqu	%xmm1,(%rcx)
+	vzeroupper
+	.byte	0xf3,0xc3
+
+
+.globl	_aesgcmsiv_polyval_horner
+.private_extern _aesgcmsiv_polyval_horner
+
+.p2align	4
+_aesgcmsiv_polyval_horner:
+
+	testq	%rcx,%rcx
+	jnz	L$polyval_horner_start
+	.byte	0xf3,0xc3
+
+L$polyval_horner_start:
+
+
+
+	xorq	%r10,%r10
+	shlq	$4,%rcx
+
+	vmovdqa	(%rsi),%xmm1
+	vmovdqa	(%rdi),%xmm0
+
+L$polyval_horner_loop:
+	vpxor	(%rdx,%r10,1),%xmm0,%xmm0
+	call	GFMUL
+
+	addq	$16,%r10
+	cmpq	%r10,%rcx
+	jne	L$polyval_horner_loop
+
+
+	vmovdqa	%xmm0,(%rdi)
+	.byte	0xf3,0xc3
+
+
+.globl	_aes128gcmsiv_aes_ks
+.private_extern _aes128gcmsiv_aes_ks
+
+.p2align	4
+_aes128gcmsiv_aes_ks:
+
+	vmovdqu	(%rdi),%xmm1
+	vmovdqa	%xmm1,(%rsi)
+
+	vmovdqa	con1(%rip),%xmm0
+	vmovdqa	mask(%rip),%xmm15
+
+	movq	$8,%rax
+
+L$ks128_loop:
+	addq	$16,%rsi
+	subq	$1,%rax
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm1,(%rsi)
+	jne	L$ks128_loop
+
+	vmovdqa	con2(%rip),%xmm0
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm1,16(%rsi)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslldq	$4,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpslldq	$4,%xmm3,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm1,32(%rsi)
+	.byte	0xf3,0xc3
+
+
+.globl	_aes256gcmsiv_aes_ks
+.private_extern _aes256gcmsiv_aes_ks
+
+.p2align	4
+_aes256gcmsiv_aes_ks:
+
+	vmovdqu	(%rdi),%xmm1
+	vmovdqu	16(%rdi),%xmm3
+	vmovdqa	%xmm1,(%rsi)
+	vmovdqa	%xmm3,16(%rsi)
+	vmovdqa	con1(%rip),%xmm0
+	vmovdqa	mask(%rip),%xmm15
+	vpxor	%xmm14,%xmm14,%xmm14
+	movq	$6,%rax
+
+L$ks256_loop:
+	addq	$32,%rsi
+	subq	$1,%rax
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm1,(%rsi)
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpsllq	$32,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpshufb	con3(%rip),%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vmovdqa	%xmm3,16(%rsi)
+	jne	L$ks256_loop
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpsllq	$32,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vmovdqa	%xmm1,32(%rsi)
+	.byte	0xf3,0xc3
+
+.globl	_aes128gcmsiv_aes_ks_enc_x1
+.private_extern _aes128gcmsiv_aes_ks_enc_x1
+
+.p2align	4
+_aes128gcmsiv_aes_ks_enc_x1:
+
+	vmovdqa	(%rcx),%xmm1
+	vmovdqa	0(%rdi),%xmm4
+
+	vmovdqa	%xmm1,(%rdx)
+	vpxor	%xmm1,%xmm4,%xmm4
+
+	vmovdqa	con1(%rip),%xmm0
+	vmovdqa	mask(%rip),%xmm15
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,16(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,32(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,48(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,64(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,80(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,96(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,112(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,128(%rdx)
+
+
+	vmovdqa	con2(%rip),%xmm0
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,144(%rdx)
+
+	vpshufb	%xmm15,%xmm1,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpsllq	$32,%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpshufb	con3(%rip),%xmm1,%xmm3
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+
+	vaesenclast	%xmm1,%xmm4,%xmm4
+	vmovdqa	%xmm1,160(%rdx)
+
+
+	vmovdqa	%xmm4,0(%rsi)
+	.byte	0xf3,0xc3
+
+
+.globl	_aes128gcmsiv_kdf
+.private_extern _aes128gcmsiv_kdf
+
+.p2align	4
+_aes128gcmsiv_kdf:
+
+
+
+
+
+	vmovdqa	(%rdx),%xmm1
+	vmovdqa	0(%rdi),%xmm9
+	vmovdqa	and_mask(%rip),%xmm12
+	vmovdqa	one(%rip),%xmm13
+	vpshufd	$0x90,%xmm9,%xmm9
+	vpand	%xmm12,%xmm9,%xmm9
+	vpaddd	%xmm13,%xmm9,%xmm10
+	vpaddd	%xmm13,%xmm10,%xmm11
+	vpaddd	%xmm13,%xmm11,%xmm12
+
+	vpxor	%xmm1,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm10,%xmm10
+	vpxor	%xmm1,%xmm11,%xmm11
+	vpxor	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	16(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	32(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+
+	vmovdqa	48(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	64(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+
+	vmovdqa	80(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	96(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+
+	vmovdqa	112(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	128(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+
+	vmovdqa	144(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+
+	vmovdqa	160(%rdx),%xmm2
+	vaesenclast	%xmm2,%xmm9,%xmm9
+	vaesenclast	%xmm2,%xmm10,%xmm10
+	vaesenclast	%xmm2,%xmm11,%xmm11
+	vaesenclast	%xmm2,%xmm12,%xmm12
+
+
+	vmovdqa	%xmm9,0(%rsi)
+	vmovdqa	%xmm10,16(%rsi)
+	vmovdqa	%xmm11,32(%rsi)
+	vmovdqa	%xmm12,48(%rsi)
+	.byte	0xf3,0xc3
+
+
+.globl	_aes128gcmsiv_enc_msg_x4
+.private_extern _aes128gcmsiv_enc_msg_x4
+
+.p2align	4
+_aes128gcmsiv_enc_msg_x4:
+
+	testq	%r8,%r8
+	jnz	L$128_enc_msg_x4_start
+	.byte	0xf3,0xc3
+
+L$128_enc_msg_x4_start:
+	pushq	%r12
+
+	pushq	%r13
+
+
+	shrq	$4,%r8
+	movq	%r8,%r10
+	shlq	$62,%r10
+	shrq	$62,%r10
+
+
+	vmovdqa	(%rdx),%xmm15
+	vpor	OR_MASK(%rip),%xmm15,%xmm15
+
+	vmovdqu	four(%rip),%xmm4
+	vmovdqa	%xmm15,%xmm0
+	vpaddd	one(%rip),%xmm15,%xmm1
+	vpaddd	two(%rip),%xmm15,%xmm2
+	vpaddd	three(%rip),%xmm15,%xmm3
+
+	shrq	$2,%r8
+	je	L$128_enc_msg_x4_check_remainder
+
+	subq	$64,%rsi
+	subq	$64,%rdi
+
+L$128_enc_msg_x4_loop1:
+	addq	$64,%rsi
+	addq	$64,%rdi
+
+	vmovdqa	%xmm0,%xmm5
+	vmovdqa	%xmm1,%xmm6
+	vmovdqa	%xmm2,%xmm7
+	vmovdqa	%xmm3,%xmm8
+
+	vpxor	(%rcx),%xmm5,%xmm5
+	vpxor	(%rcx),%xmm6,%xmm6
+	vpxor	(%rcx),%xmm7,%xmm7
+	vpxor	(%rcx),%xmm8,%xmm8
+
+	vmovdqu	16(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm0,%xmm0
+	vmovdqu	32(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm1,%xmm1
+	vmovdqu	48(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm2,%xmm2
+	vmovdqu	64(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm3,%xmm3
+
+	vmovdqu	80(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	96(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	112(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	128(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	144(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	160(%rcx),%xmm12
+	vaesenclast	%xmm12,%xmm5,%xmm5
+	vaesenclast	%xmm12,%xmm6,%xmm6
+	vaesenclast	%xmm12,%xmm7,%xmm7
+	vaesenclast	%xmm12,%xmm8,%xmm8
+
+
+
+	vpxor	0(%rdi),%xmm5,%xmm5
+	vpxor	16(%rdi),%xmm6,%xmm6
+	vpxor	32(%rdi),%xmm7,%xmm7
+	vpxor	48(%rdi),%xmm8,%xmm8
+
+	subq	$1,%r8
+
+	vmovdqu	%xmm5,0(%rsi)
+	vmovdqu	%xmm6,16(%rsi)
+	vmovdqu	%xmm7,32(%rsi)
+	vmovdqu	%xmm8,48(%rsi)
+
+	jne	L$128_enc_msg_x4_loop1
+
+	addq	$64,%rsi
+	addq	$64,%rdi
+
+L$128_enc_msg_x4_check_remainder:
+	cmpq	$0,%r10
+	je	L$128_enc_msg_x4_out
+
+L$128_enc_msg_x4_loop2:
+
+
+	vmovdqa	%xmm0,%xmm5
+	vpaddd	one(%rip),%xmm0,%xmm0
+
+	vpxor	(%rcx),%xmm5,%xmm5
+	vaesenc	16(%rcx),%xmm5,%xmm5
+	vaesenc	32(%rcx),%xmm5,%xmm5
+	vaesenc	48(%rcx),%xmm5,%xmm5
+	vaesenc	64(%rcx),%xmm5,%xmm5
+	vaesenc	80(%rcx),%xmm5,%xmm5
+	vaesenc	96(%rcx),%xmm5,%xmm5
+	vaesenc	112(%rcx),%xmm5,%xmm5
+	vaesenc	128(%rcx),%xmm5,%xmm5
+	vaesenc	144(%rcx),%xmm5,%xmm5
+	vaesenclast	160(%rcx),%xmm5,%xmm5
+
+
+	vpxor	(%rdi),%xmm5,%xmm5
+	vmovdqu	%xmm5,(%rsi)
+
+	addq	$16,%rdi
+	addq	$16,%rsi
+
+	subq	$1,%r10
+	jne	L$128_enc_msg_x4_loop2
+
+L$128_enc_msg_x4_out:
+	popq	%r13
+
+	popq	%r12
+
+	.byte	0xf3,0xc3
+
+
+.globl	_aes128gcmsiv_enc_msg_x8
+.private_extern _aes128gcmsiv_enc_msg_x8
+
+.p2align	4
+_aes128gcmsiv_enc_msg_x8:
+
+	testq	%r8,%r8
+	jnz	L$128_enc_msg_x8_start
+	.byte	0xf3,0xc3
+
+L$128_enc_msg_x8_start:
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%rbp
+
+	movq	%rsp,%rbp
+
+
+
+	subq	$128,%rsp
+	andq	$-64,%rsp
+
+	shrq	$4,%r8
+	movq	%r8,%r10
+	shlq	$61,%r10
+	shrq	$61,%r10
+
+
+	vmovdqu	(%rdx),%xmm1
+	vpor	OR_MASK(%rip),%xmm1,%xmm1
+
+
+	vpaddd	seven(%rip),%xmm1,%xmm0
+	vmovdqu	%xmm0,(%rsp)
+	vpaddd	one(%rip),%xmm1,%xmm9
+	vpaddd	two(%rip),%xmm1,%xmm10
+	vpaddd	three(%rip),%xmm1,%xmm11
+	vpaddd	four(%rip),%xmm1,%xmm12
+	vpaddd	five(%rip),%xmm1,%xmm13
+	vpaddd	six(%rip),%xmm1,%xmm14
+	vmovdqa	%xmm1,%xmm0
+
+	shrq	$3,%r8
+	je	L$128_enc_msg_x8_check_remainder
+
+	subq	$128,%rsi
+	subq	$128,%rdi
+
+L$128_enc_msg_x8_loop1:
+	addq	$128,%rsi
+	addq	$128,%rdi
+
+	vmovdqa	%xmm0,%xmm1
+	vmovdqa	%xmm9,%xmm2
+	vmovdqa	%xmm10,%xmm3
+	vmovdqa	%xmm11,%xmm4
+	vmovdqa	%xmm12,%xmm5
+	vmovdqa	%xmm13,%xmm6
+	vmovdqa	%xmm14,%xmm7
+
+	vmovdqu	(%rsp),%xmm8
+
+	vpxor	(%rcx),%xmm1,%xmm1
+	vpxor	(%rcx),%xmm2,%xmm2
+	vpxor	(%rcx),%xmm3,%xmm3
+	vpxor	(%rcx),%xmm4,%xmm4
+	vpxor	(%rcx),%xmm5,%xmm5
+	vpxor	(%rcx),%xmm6,%xmm6
+	vpxor	(%rcx),%xmm7,%xmm7
+	vpxor	(%rcx),%xmm8,%xmm8
+
+	vmovdqu	16(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	(%rsp),%xmm14
+	vpaddd	eight(%rip),%xmm14,%xmm14
+	vmovdqu	%xmm14,(%rsp)
+	vmovdqu	32(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpsubd	one(%rip),%xmm14,%xmm14
+	vmovdqu	48(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm0,%xmm0
+	vmovdqu	64(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm9,%xmm9
+	vmovdqu	80(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm10,%xmm10
+	vmovdqu	96(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm11,%xmm11
+	vmovdqu	112(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm12,%xmm12
+	vmovdqu	128(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm13,%xmm13
+	vmovdqu	144(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	160(%rcx),%xmm15
+	vaesenclast	%xmm15,%xmm1,%xmm1
+	vaesenclast	%xmm15,%xmm2,%xmm2
+	vaesenclast	%xmm15,%xmm3,%xmm3
+	vaesenclast	%xmm15,%xmm4,%xmm4
+	vaesenclast	%xmm15,%xmm5,%xmm5
+	vaesenclast	%xmm15,%xmm6,%xmm6
+	vaesenclast	%xmm15,%xmm7,%xmm7
+	vaesenclast	%xmm15,%xmm8,%xmm8
+
+
+
+	vpxor	0(%rdi),%xmm1,%xmm1
+	vpxor	16(%rdi),%xmm2,%xmm2
+	vpxor	32(%rdi),%xmm3,%xmm3
+	vpxor	48(%rdi),%xmm4,%xmm4
+	vpxor	64(%rdi),%xmm5,%xmm5
+	vpxor	80(%rdi),%xmm6,%xmm6
+	vpxor	96(%rdi),%xmm7,%xmm7
+	vpxor	112(%rdi),%xmm8,%xmm8
+
+	decq	%r8
+
+	vmovdqu	%xmm1,0(%rsi)
+	vmovdqu	%xmm2,16(%rsi)
+	vmovdqu	%xmm3,32(%rsi)
+	vmovdqu	%xmm4,48(%rsi)
+	vmovdqu	%xmm5,64(%rsi)
+	vmovdqu	%xmm6,80(%rsi)
+	vmovdqu	%xmm7,96(%rsi)
+	vmovdqu	%xmm8,112(%rsi)
+
+	jne	L$128_enc_msg_x8_loop1
+
+	addq	$128,%rsi
+	addq	$128,%rdi
+
+L$128_enc_msg_x8_check_remainder:
+	cmpq	$0,%r10
+	je	L$128_enc_msg_x8_out
+
+L$128_enc_msg_x8_loop2:
+
+
+	vmovdqa	%xmm0,%xmm1
+	vpaddd	one(%rip),%xmm0,%xmm0
+
+	vpxor	(%rcx),%xmm1,%xmm1
+	vaesenc	16(%rcx),%xmm1,%xmm1
+	vaesenc	32(%rcx),%xmm1,%xmm1
+	vaesenc	48(%rcx),%xmm1,%xmm1
+	vaesenc	64(%rcx),%xmm1,%xmm1
+	vaesenc	80(%rcx),%xmm1,%xmm1
+	vaesenc	96(%rcx),%xmm1,%xmm1
+	vaesenc	112(%rcx),%xmm1,%xmm1
+	vaesenc	128(%rcx),%xmm1,%xmm1
+	vaesenc	144(%rcx),%xmm1,%xmm1
+	vaesenclast	160(%rcx),%xmm1,%xmm1
+
+
+	vpxor	(%rdi),%xmm1,%xmm1
+
+	vmovdqu	%xmm1,(%rsi)
+
+	addq	$16,%rdi
+	addq	$16,%rsi
+
+	decq	%r10
+	jne	L$128_enc_msg_x8_loop2
+
+L$128_enc_msg_x8_out:
+	movq	%rbp,%rsp
+
+	popq	%rbp
+
+	popq	%r13
+
+	popq	%r12
+
+	.byte	0xf3,0xc3
+
+
+.globl	_aes128gcmsiv_dec
+.private_extern _aes128gcmsiv_dec
+
+.p2align	4
+_aes128gcmsiv_dec:
+
+	testq	$~15,%r9
+	jnz	L$128_dec_start
+	.byte	0xf3,0xc3
+
+L$128_dec_start:
+	vzeroupper
+	vmovdqa	(%rdx),%xmm0
+	movq	%rdx,%rax
+
+	leaq	32(%rax),%rax
+	leaq	32(%rcx),%rcx
+
+
+	vmovdqu	(%rdi,%r9,1),%xmm15
+	vpor	OR_MASK(%rip),%xmm15,%xmm15
+	andq	$~15,%r9
+
+
+	cmpq	$96,%r9
+	jb	L$128_dec_loop2
+
+
+	subq	$96,%r9
+	vmovdqa	%xmm15,%xmm7
+	vpaddd	one(%rip),%xmm7,%xmm8
+	vpaddd	two(%rip),%xmm7,%xmm9
+	vpaddd	one(%rip),%xmm9,%xmm10
+	vpaddd	two(%rip),%xmm9,%xmm11
+	vpaddd	one(%rip),%xmm11,%xmm12
+	vpaddd	two(%rip),%xmm11,%xmm15
+
+	vpxor	(%r8),%xmm7,%xmm7
+	vpxor	(%r8),%xmm8,%xmm8
+	vpxor	(%r8),%xmm9,%xmm9
+	vpxor	(%r8),%xmm10,%xmm10
+	vpxor	(%r8),%xmm11,%xmm11
+	vpxor	(%r8),%xmm12,%xmm12
+
+	vmovdqu	16(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	32(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	48(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	64(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	80(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	96(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	112(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	128(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	144(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	160(%r8),%xmm4
+	vaesenclast	%xmm4,%xmm7,%xmm7
+	vaesenclast	%xmm4,%xmm8,%xmm8
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vaesenclast	%xmm4,%xmm10,%xmm10
+	vaesenclast	%xmm4,%xmm11,%xmm11
+	vaesenclast	%xmm4,%xmm12,%xmm12
+
+
+	vpxor	0(%rdi),%xmm7,%xmm7
+	vpxor	16(%rdi),%xmm8,%xmm8
+	vpxor	32(%rdi),%xmm9,%xmm9
+	vpxor	48(%rdi),%xmm10,%xmm10
+	vpxor	64(%rdi),%xmm11,%xmm11
+	vpxor	80(%rdi),%xmm12,%xmm12
+
+	vmovdqu	%xmm7,0(%rsi)
+	vmovdqu	%xmm8,16(%rsi)
+	vmovdqu	%xmm9,32(%rsi)
+	vmovdqu	%xmm10,48(%rsi)
+	vmovdqu	%xmm11,64(%rsi)
+	vmovdqu	%xmm12,80(%rsi)
+
+	addq	$96,%rdi
+	addq	$96,%rsi
+	jmp	L$128_dec_loop1
+
+
+.p2align	6
+L$128_dec_loop1:
+	cmpq	$96,%r9
+	jb	L$128_dec_finish_96
+	subq	$96,%r9
+
+	vmovdqa	%xmm12,%xmm6
+	vmovdqa	%xmm11,16-32(%rax)
+	vmovdqa	%xmm10,32-32(%rax)
+	vmovdqa	%xmm9,48-32(%rax)
+	vmovdqa	%xmm8,64-32(%rax)
+	vmovdqa	%xmm7,80-32(%rax)
+
+	vmovdqa	%xmm15,%xmm7
+	vpaddd	one(%rip),%xmm7,%xmm8
+	vpaddd	two(%rip),%xmm7,%xmm9
+	vpaddd	one(%rip),%xmm9,%xmm10
+	vpaddd	two(%rip),%xmm9,%xmm11
+	vpaddd	one(%rip),%xmm11,%xmm12
+	vpaddd	two(%rip),%xmm11,%xmm15
+
+	vmovdqa	(%r8),%xmm4
+	vpxor	%xmm4,%xmm7,%xmm7
+	vpxor	%xmm4,%xmm8,%xmm8
+	vpxor	%xmm4,%xmm9,%xmm9
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpxor	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	0-32(%rcx),%xmm4
+	vpclmulqdq	$0x11,%xmm4,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm4,%xmm6,%xmm3
+	vpclmulqdq	$0x01,%xmm4,%xmm6,%xmm1
+	vpclmulqdq	$0x10,%xmm4,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	16(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	-16(%rax),%xmm6
+	vmovdqu	-16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	32(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	0(%rax),%xmm6
+	vmovdqu	0(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	48(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	16(%rax),%xmm6
+	vmovdqu	16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	64(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	32(%rax),%xmm6
+	vmovdqu	32(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	80(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	96(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	112(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+
+	vmovdqa	80-32(%rax),%xmm6
+	vpxor	%xmm0,%xmm6,%xmm6
+	vmovdqu	80-32(%rcx),%xmm5
+
+	vpclmulqdq	$0x01,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x10,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	128(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+
+	vpsrldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm5
+	vpslldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm0
+
+	vmovdqa	poly(%rip),%xmm3
+
+	vmovdqu	144(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	160(%r8),%xmm6
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpxor	0(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm7,%xmm7
+	vpxor	16(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm8,%xmm8
+	vpxor	32(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vpxor	48(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm10,%xmm10
+	vpxor	64(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm11,%xmm11
+	vpxor	80(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm12,%xmm12
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vmovdqu	%xmm7,0(%rsi)
+	vmovdqu	%xmm8,16(%rsi)
+	vmovdqu	%xmm9,32(%rsi)
+	vmovdqu	%xmm10,48(%rsi)
+	vmovdqu	%xmm11,64(%rsi)
+	vmovdqu	%xmm12,80(%rsi)
+
+	vpxor	%xmm5,%xmm0,%xmm0
+
+	leaq	96(%rdi),%rdi
+	leaq	96(%rsi),%rsi
+	jmp	L$128_dec_loop1
+
+L$128_dec_finish_96:
+	vmovdqa	%xmm12,%xmm6
+	vmovdqa	%xmm11,16-32(%rax)
+	vmovdqa	%xmm10,32-32(%rax)
+	vmovdqa	%xmm9,48-32(%rax)
+	vmovdqa	%xmm8,64-32(%rax)
+	vmovdqa	%xmm7,80-32(%rax)
+
+	vmovdqu	0-32(%rcx),%xmm4
+	vpclmulqdq	$0x10,%xmm4,%xmm6,%xmm1
+	vpclmulqdq	$0x11,%xmm4,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm4,%xmm6,%xmm3
+	vpclmulqdq	$0x01,%xmm4,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	-16(%rax),%xmm6
+	vmovdqu	-16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	0(%rax),%xmm6
+	vmovdqu	0(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	16(%rax),%xmm6
+	vmovdqu	16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	32(%rax),%xmm6
+	vmovdqu	32(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	80-32(%rax),%xmm6
+	vpxor	%xmm0,%xmm6,%xmm6
+	vmovdqu	80-32(%rcx),%xmm5
+	vpclmulqdq	$0x11,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x10,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x01,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vpsrldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm5
+	vpslldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm0
+
+	vmovdqa	poly(%rip),%xmm3
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpxor	%xmm5,%xmm0,%xmm0
+
+L$128_dec_loop2:
+
+
+
+	cmpq	$16,%r9
+	jb	L$128_dec_out
+	subq	$16,%r9
+
+	vmovdqa	%xmm15,%xmm2
+	vpaddd	one(%rip),%xmm15,%xmm15
+
+	vpxor	0(%r8),%xmm2,%xmm2
+	vaesenc	16(%r8),%xmm2,%xmm2
+	vaesenc	32(%r8),%xmm2,%xmm2
+	vaesenc	48(%r8),%xmm2,%xmm2
+	vaesenc	64(%r8),%xmm2,%xmm2
+	vaesenc	80(%r8),%xmm2,%xmm2
+	vaesenc	96(%r8),%xmm2,%xmm2
+	vaesenc	112(%r8),%xmm2,%xmm2
+	vaesenc	128(%r8),%xmm2,%xmm2
+	vaesenc	144(%r8),%xmm2,%xmm2
+	vaesenclast	160(%r8),%xmm2,%xmm2
+	vpxor	(%rdi),%xmm2,%xmm2
+	vmovdqu	%xmm2,(%rsi)
+	addq	$16,%rdi
+	addq	$16,%rsi
+
+	vpxor	%xmm2,%xmm0,%xmm0
+	vmovdqa	-32(%rcx),%xmm1
+	call	GFMUL
+
+	jmp	L$128_dec_loop2
+
+L$128_dec_out:
+	vmovdqu	%xmm0,(%rdx)
+	.byte	0xf3,0xc3
+
+
+.globl	_aes128gcmsiv_ecb_enc_block
+.private_extern _aes128gcmsiv_ecb_enc_block
+
+.p2align	4
+_aes128gcmsiv_ecb_enc_block:
+
+	vmovdqa	(%rdi),%xmm1
+
+	vpxor	(%rdx),%xmm1,%xmm1
+	vaesenc	16(%rdx),%xmm1,%xmm1
+	vaesenc	32(%rdx),%xmm1,%xmm1
+	vaesenc	48(%rdx),%xmm1,%xmm1
+	vaesenc	64(%rdx),%xmm1,%xmm1
+	vaesenc	80(%rdx),%xmm1,%xmm1
+	vaesenc	96(%rdx),%xmm1,%xmm1
+	vaesenc	112(%rdx),%xmm1,%xmm1
+	vaesenc	128(%rdx),%xmm1,%xmm1
+	vaesenc	144(%rdx),%xmm1,%xmm1
+	vaesenclast	160(%rdx),%xmm1,%xmm1
+
+	vmovdqa	%xmm1,(%rsi)
+
+	.byte	0xf3,0xc3
+
+
+.globl	_aes256gcmsiv_aes_ks_enc_x1
+.private_extern _aes256gcmsiv_aes_ks_enc_x1
+
+.p2align	4
+_aes256gcmsiv_aes_ks_enc_x1:
+
+	vmovdqa	con1(%rip),%xmm0
+	vmovdqa	mask(%rip),%xmm15
+	vmovdqa	(%rdi),%xmm8
+	vmovdqa	(%rcx),%xmm1
+	vmovdqa	16(%rcx),%xmm3
+	vpxor	%xmm1,%xmm8,%xmm8
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm1,(%rdx)
+	vmovdqu	%xmm3,16(%rdx)
+	vpxor	%xmm14,%xmm14,%xmm14
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,32(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,48(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,64(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,80(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,96(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,112(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,128(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,144(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,160(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,176(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslld	$1,%xmm0,%xmm0
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenc	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,192(%rdx)
+
+	vpshufd	$0xff,%xmm1,%xmm2
+	vaesenclast	%xmm14,%xmm2,%xmm2
+	vpslldq	$4,%xmm3,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpxor	%xmm2,%xmm3,%xmm3
+	vaesenc	%xmm3,%xmm8,%xmm8
+	vmovdqu	%xmm3,208(%rdx)
+
+	vpshufb	%xmm15,%xmm3,%xmm2
+	vaesenclast	%xmm0,%xmm2,%xmm2
+	vpslldq	$4,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpslldq	$4,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm1,%xmm1
+	vaesenclast	%xmm1,%xmm8,%xmm8
+	vmovdqu	%xmm1,224(%rdx)
+
+	vmovdqa	%xmm8,(%rsi)
+	.byte	0xf3,0xc3
+
+
+.globl	_aes256gcmsiv_ecb_enc_block
+.private_extern _aes256gcmsiv_ecb_enc_block
+
+.p2align	4
+_aes256gcmsiv_ecb_enc_block:
+
+	vmovdqa	(%rdi),%xmm1
+	vpxor	(%rdx),%xmm1,%xmm1
+	vaesenc	16(%rdx),%xmm1,%xmm1
+	vaesenc	32(%rdx),%xmm1,%xmm1
+	vaesenc	48(%rdx),%xmm1,%xmm1
+	vaesenc	64(%rdx),%xmm1,%xmm1
+	vaesenc	80(%rdx),%xmm1,%xmm1
+	vaesenc	96(%rdx),%xmm1,%xmm1
+	vaesenc	112(%rdx),%xmm1,%xmm1
+	vaesenc	128(%rdx),%xmm1,%xmm1
+	vaesenc	144(%rdx),%xmm1,%xmm1
+	vaesenc	160(%rdx),%xmm1,%xmm1
+	vaesenc	176(%rdx),%xmm1,%xmm1
+	vaesenc	192(%rdx),%xmm1,%xmm1
+	vaesenc	208(%rdx),%xmm1,%xmm1
+	vaesenclast	224(%rdx),%xmm1,%xmm1
+	vmovdqa	%xmm1,(%rsi)
+	.byte	0xf3,0xc3
+
+
+.globl	_aes256gcmsiv_enc_msg_x4
+.private_extern _aes256gcmsiv_enc_msg_x4
+
+.p2align	4
+_aes256gcmsiv_enc_msg_x4:
+
+	testq	%r8,%r8
+	jnz	L$256_enc_msg_x4_start
+	.byte	0xf3,0xc3
+
+L$256_enc_msg_x4_start:
+	movq	%r8,%r10
+	shrq	$4,%r8
+	shlq	$60,%r10
+	jz	L$256_enc_msg_x4_start2
+	addq	$1,%r8
+
+L$256_enc_msg_x4_start2:
+	movq	%r8,%r10
+	shlq	$62,%r10
+	shrq	$62,%r10
+
+
+	vmovdqa	(%rdx),%xmm15
+	vpor	OR_MASK(%rip),%xmm15,%xmm15
+
+	vmovdqa	four(%rip),%xmm4
+	vmovdqa	%xmm15,%xmm0
+	vpaddd	one(%rip),%xmm15,%xmm1
+	vpaddd	two(%rip),%xmm15,%xmm2
+	vpaddd	three(%rip),%xmm15,%xmm3
+
+	shrq	$2,%r8
+	je	L$256_enc_msg_x4_check_remainder
+
+	subq	$64,%rsi
+	subq	$64,%rdi
+
+L$256_enc_msg_x4_loop1:
+	addq	$64,%rsi
+	addq	$64,%rdi
+
+	vmovdqa	%xmm0,%xmm5
+	vmovdqa	%xmm1,%xmm6
+	vmovdqa	%xmm2,%xmm7
+	vmovdqa	%xmm3,%xmm8
+
+	vpxor	(%rcx),%xmm5,%xmm5
+	vpxor	(%rcx),%xmm6,%xmm6
+	vpxor	(%rcx),%xmm7,%xmm7
+	vpxor	(%rcx),%xmm8,%xmm8
+
+	vmovdqu	16(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm0,%xmm0
+	vmovdqu	32(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm1,%xmm1
+	vmovdqu	48(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm2,%xmm2
+	vmovdqu	64(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vpaddd	%xmm4,%xmm3,%xmm3
+
+	vmovdqu	80(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	96(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	112(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	128(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	144(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	160(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	176(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	192(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	208(%rcx),%xmm12
+	vaesenc	%xmm12,%xmm5,%xmm5
+	vaesenc	%xmm12,%xmm6,%xmm6
+	vaesenc	%xmm12,%xmm7,%xmm7
+	vaesenc	%xmm12,%xmm8,%xmm8
+
+	vmovdqu	224(%rcx),%xmm12
+	vaesenclast	%xmm12,%xmm5,%xmm5
+	vaesenclast	%xmm12,%xmm6,%xmm6
+	vaesenclast	%xmm12,%xmm7,%xmm7
+	vaesenclast	%xmm12,%xmm8,%xmm8
+
+
+
+	vpxor	0(%rdi),%xmm5,%xmm5
+	vpxor	16(%rdi),%xmm6,%xmm6
+	vpxor	32(%rdi),%xmm7,%xmm7
+	vpxor	48(%rdi),%xmm8,%xmm8
+
+	subq	$1,%r8
+
+	vmovdqu	%xmm5,0(%rsi)
+	vmovdqu	%xmm6,16(%rsi)
+	vmovdqu	%xmm7,32(%rsi)
+	vmovdqu	%xmm8,48(%rsi)
+
+	jne	L$256_enc_msg_x4_loop1
+
+	addq	$64,%rsi
+	addq	$64,%rdi
+
+L$256_enc_msg_x4_check_remainder:
+	cmpq	$0,%r10
+	je	L$256_enc_msg_x4_out
+
+L$256_enc_msg_x4_loop2:
+
+
+
+	vmovdqa	%xmm0,%xmm5
+	vpaddd	one(%rip),%xmm0,%xmm0
+	vpxor	(%rcx),%xmm5,%xmm5
+	vaesenc	16(%rcx),%xmm5,%xmm5
+	vaesenc	32(%rcx),%xmm5,%xmm5
+	vaesenc	48(%rcx),%xmm5,%xmm5
+	vaesenc	64(%rcx),%xmm5,%xmm5
+	vaesenc	80(%rcx),%xmm5,%xmm5
+	vaesenc	96(%rcx),%xmm5,%xmm5
+	vaesenc	112(%rcx),%xmm5,%xmm5
+	vaesenc	128(%rcx),%xmm5,%xmm5
+	vaesenc	144(%rcx),%xmm5,%xmm5
+	vaesenc	160(%rcx),%xmm5,%xmm5
+	vaesenc	176(%rcx),%xmm5,%xmm5
+	vaesenc	192(%rcx),%xmm5,%xmm5
+	vaesenc	208(%rcx),%xmm5,%xmm5
+	vaesenclast	224(%rcx),%xmm5,%xmm5
+
+
+	vpxor	(%rdi),%xmm5,%xmm5
+
+	vmovdqu	%xmm5,(%rsi)
+
+	addq	$16,%rdi
+	addq	$16,%rsi
+
+	subq	$1,%r10
+	jne	L$256_enc_msg_x4_loop2
+
+L$256_enc_msg_x4_out:
+	.byte	0xf3,0xc3
+
+
+.globl	_aes256gcmsiv_enc_msg_x8
+.private_extern _aes256gcmsiv_enc_msg_x8
+
+.p2align	4
+_aes256gcmsiv_enc_msg_x8:
+
+	testq	%r8,%r8
+	jnz	L$256_enc_msg_x8_start
+	.byte	0xf3,0xc3
+
+L$256_enc_msg_x8_start:
+
+	movq	%rsp,%r11
+	subq	$16,%r11
+	andq	$-64,%r11
+
+	movq	%r8,%r10
+	shrq	$4,%r8
+	shlq	$60,%r10
+	jz	L$256_enc_msg_x8_start2
+	addq	$1,%r8
+
+L$256_enc_msg_x8_start2:
+	movq	%r8,%r10
+	shlq	$61,%r10
+	shrq	$61,%r10
+
+
+	vmovdqa	(%rdx),%xmm1
+	vpor	OR_MASK(%rip),%xmm1,%xmm1
+
+
+	vpaddd	seven(%rip),%xmm1,%xmm0
+	vmovdqa	%xmm0,(%r11)
+	vpaddd	one(%rip),%xmm1,%xmm9
+	vpaddd	two(%rip),%xmm1,%xmm10
+	vpaddd	three(%rip),%xmm1,%xmm11
+	vpaddd	four(%rip),%xmm1,%xmm12
+	vpaddd	five(%rip),%xmm1,%xmm13
+	vpaddd	six(%rip),%xmm1,%xmm14
+	vmovdqa	%xmm1,%xmm0
+
+	shrq	$3,%r8
+	jz	L$256_enc_msg_x8_check_remainder
+
+	subq	$128,%rsi
+	subq	$128,%rdi
+
+L$256_enc_msg_x8_loop1:
+	addq	$128,%rsi
+	addq	$128,%rdi
+
+	vmovdqa	%xmm0,%xmm1
+	vmovdqa	%xmm9,%xmm2
+	vmovdqa	%xmm10,%xmm3
+	vmovdqa	%xmm11,%xmm4
+	vmovdqa	%xmm12,%xmm5
+	vmovdqa	%xmm13,%xmm6
+	vmovdqa	%xmm14,%xmm7
+
+	vmovdqa	(%r11),%xmm8
+
+	vpxor	(%rcx),%xmm1,%xmm1
+	vpxor	(%rcx),%xmm2,%xmm2
+	vpxor	(%rcx),%xmm3,%xmm3
+	vpxor	(%rcx),%xmm4,%xmm4
+	vpxor	(%rcx),%xmm5,%xmm5
+	vpxor	(%rcx),%xmm6,%xmm6
+	vpxor	(%rcx),%xmm7,%xmm7
+	vpxor	(%rcx),%xmm8,%xmm8
+
+	vmovdqu	16(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqa	(%r11),%xmm14
+	vpaddd	eight(%rip),%xmm14,%xmm14
+	vmovdqa	%xmm14,(%r11)
+	vmovdqu	32(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpsubd	one(%rip),%xmm14,%xmm14
+	vmovdqu	48(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm0,%xmm0
+	vmovdqu	64(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm9,%xmm9
+	vmovdqu	80(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm10,%xmm10
+	vmovdqu	96(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm11,%xmm11
+	vmovdqu	112(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm12,%xmm12
+	vmovdqu	128(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vpaddd	eight(%rip),%xmm13,%xmm13
+	vmovdqu	144(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	160(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	176(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	192(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	208(%rcx),%xmm15
+	vaesenc	%xmm15,%xmm1,%xmm1
+	vaesenc	%xmm15,%xmm2,%xmm2
+	vaesenc	%xmm15,%xmm3,%xmm3
+	vaesenc	%xmm15,%xmm4,%xmm4
+	vaesenc	%xmm15,%xmm5,%xmm5
+	vaesenc	%xmm15,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	224(%rcx),%xmm15
+	vaesenclast	%xmm15,%xmm1,%xmm1
+	vaesenclast	%xmm15,%xmm2,%xmm2
+	vaesenclast	%xmm15,%xmm3,%xmm3
+	vaesenclast	%xmm15,%xmm4,%xmm4
+	vaesenclast	%xmm15,%xmm5,%xmm5
+	vaesenclast	%xmm15,%xmm6,%xmm6
+	vaesenclast	%xmm15,%xmm7,%xmm7
+	vaesenclast	%xmm15,%xmm8,%xmm8
+
+
+
+	vpxor	0(%rdi),%xmm1,%xmm1
+	vpxor	16(%rdi),%xmm2,%xmm2
+	vpxor	32(%rdi),%xmm3,%xmm3
+	vpxor	48(%rdi),%xmm4,%xmm4
+	vpxor	64(%rdi),%xmm5,%xmm5
+	vpxor	80(%rdi),%xmm6,%xmm6
+	vpxor	96(%rdi),%xmm7,%xmm7
+	vpxor	112(%rdi),%xmm8,%xmm8
+
+	subq	$1,%r8
+
+	vmovdqu	%xmm1,0(%rsi)
+	vmovdqu	%xmm2,16(%rsi)
+	vmovdqu	%xmm3,32(%rsi)
+	vmovdqu	%xmm4,48(%rsi)
+	vmovdqu	%xmm5,64(%rsi)
+	vmovdqu	%xmm6,80(%rsi)
+	vmovdqu	%xmm7,96(%rsi)
+	vmovdqu	%xmm8,112(%rsi)
+
+	jne	L$256_enc_msg_x8_loop1
+
+	addq	$128,%rsi
+	addq	$128,%rdi
+
+L$256_enc_msg_x8_check_remainder:
+	cmpq	$0,%r10
+	je	L$256_enc_msg_x8_out
+
+L$256_enc_msg_x8_loop2:
+
+
+	vmovdqa	%xmm0,%xmm1
+	vpaddd	one(%rip),%xmm0,%xmm0
+
+	vpxor	(%rcx),%xmm1,%xmm1
+	vaesenc	16(%rcx),%xmm1,%xmm1
+	vaesenc	32(%rcx),%xmm1,%xmm1
+	vaesenc	48(%rcx),%xmm1,%xmm1
+	vaesenc	64(%rcx),%xmm1,%xmm1
+	vaesenc	80(%rcx),%xmm1,%xmm1
+	vaesenc	96(%rcx),%xmm1,%xmm1
+	vaesenc	112(%rcx),%xmm1,%xmm1
+	vaesenc	128(%rcx),%xmm1,%xmm1
+	vaesenc	144(%rcx),%xmm1,%xmm1
+	vaesenc	160(%rcx),%xmm1,%xmm1
+	vaesenc	176(%rcx),%xmm1,%xmm1
+	vaesenc	192(%rcx),%xmm1,%xmm1
+	vaesenc	208(%rcx),%xmm1,%xmm1
+	vaesenclast	224(%rcx),%xmm1,%xmm1
+
+
+	vpxor	(%rdi),%xmm1,%xmm1
+
+	vmovdqu	%xmm1,(%rsi)
+
+	addq	$16,%rdi
+	addq	$16,%rsi
+	subq	$1,%r10
+	jnz	L$256_enc_msg_x8_loop2
+
+L$256_enc_msg_x8_out:
+	.byte	0xf3,0xc3
+
+
+
+.globl	_aes256gcmsiv_dec
+.private_extern _aes256gcmsiv_dec
+
+.p2align	4
+_aes256gcmsiv_dec:
+
+	testq	$~15,%r9
+	jnz	L$256_dec_start
+	.byte	0xf3,0xc3
+
+L$256_dec_start:
+	vzeroupper
+	vmovdqa	(%rdx),%xmm0
+	movq	%rdx,%rax
+
+	leaq	32(%rax),%rax
+	leaq	32(%rcx),%rcx
+
+
+	vmovdqu	(%rdi,%r9,1),%xmm15
+	vpor	OR_MASK(%rip),%xmm15,%xmm15
+	andq	$~15,%r9
+
+
+	cmpq	$96,%r9
+	jb	L$256_dec_loop2
+
+
+	subq	$96,%r9
+	vmovdqa	%xmm15,%xmm7
+	vpaddd	one(%rip),%xmm7,%xmm8
+	vpaddd	two(%rip),%xmm7,%xmm9
+	vpaddd	one(%rip),%xmm9,%xmm10
+	vpaddd	two(%rip),%xmm9,%xmm11
+	vpaddd	one(%rip),%xmm11,%xmm12
+	vpaddd	two(%rip),%xmm11,%xmm15
+
+	vpxor	(%r8),%xmm7,%xmm7
+	vpxor	(%r8),%xmm8,%xmm8
+	vpxor	(%r8),%xmm9,%xmm9
+	vpxor	(%r8),%xmm10,%xmm10
+	vpxor	(%r8),%xmm11,%xmm11
+	vpxor	(%r8),%xmm12,%xmm12
+
+	vmovdqu	16(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	32(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	48(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	64(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	80(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	96(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	112(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	128(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	144(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	160(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	176(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	192(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	208(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	224(%r8),%xmm4
+	vaesenclast	%xmm4,%xmm7,%xmm7
+	vaesenclast	%xmm4,%xmm8,%xmm8
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vaesenclast	%xmm4,%xmm10,%xmm10
+	vaesenclast	%xmm4,%xmm11,%xmm11
+	vaesenclast	%xmm4,%xmm12,%xmm12
+
+
+	vpxor	0(%rdi),%xmm7,%xmm7
+	vpxor	16(%rdi),%xmm8,%xmm8
+	vpxor	32(%rdi),%xmm9,%xmm9
+	vpxor	48(%rdi),%xmm10,%xmm10
+	vpxor	64(%rdi),%xmm11,%xmm11
+	vpxor	80(%rdi),%xmm12,%xmm12
+
+	vmovdqu	%xmm7,0(%rsi)
+	vmovdqu	%xmm8,16(%rsi)
+	vmovdqu	%xmm9,32(%rsi)
+	vmovdqu	%xmm10,48(%rsi)
+	vmovdqu	%xmm11,64(%rsi)
+	vmovdqu	%xmm12,80(%rsi)
+
+	addq	$96,%rdi
+	addq	$96,%rsi
+	jmp	L$256_dec_loop1
+
+
+.p2align	6
+L$256_dec_loop1:
+	cmpq	$96,%r9
+	jb	L$256_dec_finish_96
+	subq	$96,%r9
+
+	vmovdqa	%xmm12,%xmm6
+	vmovdqa	%xmm11,16-32(%rax)
+	vmovdqa	%xmm10,32-32(%rax)
+	vmovdqa	%xmm9,48-32(%rax)
+	vmovdqa	%xmm8,64-32(%rax)
+	vmovdqa	%xmm7,80-32(%rax)
+
+	vmovdqa	%xmm15,%xmm7
+	vpaddd	one(%rip),%xmm7,%xmm8
+	vpaddd	two(%rip),%xmm7,%xmm9
+	vpaddd	one(%rip),%xmm9,%xmm10
+	vpaddd	two(%rip),%xmm9,%xmm11
+	vpaddd	one(%rip),%xmm11,%xmm12
+	vpaddd	two(%rip),%xmm11,%xmm15
+
+	vmovdqa	(%r8),%xmm4
+	vpxor	%xmm4,%xmm7,%xmm7
+	vpxor	%xmm4,%xmm8,%xmm8
+	vpxor	%xmm4,%xmm9,%xmm9
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpxor	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	0-32(%rcx),%xmm4
+	vpclmulqdq	$0x11,%xmm4,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm4,%xmm6,%xmm3
+	vpclmulqdq	$0x01,%xmm4,%xmm6,%xmm1
+	vpclmulqdq	$0x10,%xmm4,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	16(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	-16(%rax),%xmm6
+	vmovdqu	-16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	32(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	0(%rax),%xmm6
+	vmovdqu	0(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	48(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	16(%rax),%xmm6
+	vmovdqu	16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	64(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	32(%rax),%xmm6
+	vmovdqu	32(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	80(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	96(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	112(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+
+	vmovdqa	80-32(%rax),%xmm6
+	vpxor	%xmm0,%xmm6,%xmm6
+	vmovdqu	80-32(%rcx),%xmm5
+
+	vpclmulqdq	$0x01,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x10,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	128(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+
+	vpsrldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm5
+	vpslldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm0
+
+	vmovdqa	poly(%rip),%xmm3
+
+	vmovdqu	144(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	160(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	176(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	192(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	208(%r8),%xmm4
+	vaesenc	%xmm4,%xmm7,%xmm7
+	vaesenc	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm4,%xmm9,%xmm9
+	vaesenc	%xmm4,%xmm10,%xmm10
+	vaesenc	%xmm4,%xmm11,%xmm11
+	vaesenc	%xmm4,%xmm12,%xmm12
+
+	vmovdqu	224(%r8),%xmm6
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpxor	0(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm7,%xmm7
+	vpxor	16(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm8,%xmm8
+	vpxor	32(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vpxor	48(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm10,%xmm10
+	vpxor	64(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm11,%xmm11
+	vpxor	80(%rdi),%xmm6,%xmm4
+	vaesenclast	%xmm4,%xmm12,%xmm12
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vmovdqu	%xmm7,0(%rsi)
+	vmovdqu	%xmm8,16(%rsi)
+	vmovdqu	%xmm9,32(%rsi)
+	vmovdqu	%xmm10,48(%rsi)
+	vmovdqu	%xmm11,64(%rsi)
+	vmovdqu	%xmm12,80(%rsi)
+
+	vpxor	%xmm5,%xmm0,%xmm0
+
+	leaq	96(%rdi),%rdi
+	leaq	96(%rsi),%rsi
+	jmp	L$256_dec_loop1
+
+L$256_dec_finish_96:
+	vmovdqa	%xmm12,%xmm6
+	vmovdqa	%xmm11,16-32(%rax)
+	vmovdqa	%xmm10,32-32(%rax)
+	vmovdqa	%xmm9,48-32(%rax)
+	vmovdqa	%xmm8,64-32(%rax)
+	vmovdqa	%xmm7,80-32(%rax)
+
+	vmovdqu	0-32(%rcx),%xmm4
+	vpclmulqdq	$0x10,%xmm4,%xmm6,%xmm1
+	vpclmulqdq	$0x11,%xmm4,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm4,%xmm6,%xmm3
+	vpclmulqdq	$0x01,%xmm4,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	-16(%rax),%xmm6
+	vmovdqu	-16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	0(%rax),%xmm6
+	vmovdqu	0(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	16(%rax),%xmm6
+	vmovdqu	16(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vmovdqu	32(%rax),%xmm6
+	vmovdqu	32(%rcx),%xmm13
+
+	vpclmulqdq	$0x10,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x11,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x01,%xmm13,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+
+	vmovdqu	80-32(%rax),%xmm6
+	vpxor	%xmm0,%xmm6,%xmm6
+	vmovdqu	80-32(%rcx),%xmm5
+	vpclmulqdq	$0x11,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+	vpclmulqdq	$0x10,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x01,%xmm5,%xmm6,%xmm4
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vpsrldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm2,%xmm5
+	vpslldq	$8,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm0
+
+	vmovdqa	poly(%rip),%xmm3
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpalignr	$8,%xmm0,%xmm0,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm0
+	vpxor	%xmm0,%xmm2,%xmm0
+
+	vpxor	%xmm5,%xmm0,%xmm0
+
+L$256_dec_loop2:
+
+
+
+	cmpq	$16,%r9
+	jb	L$256_dec_out
+	subq	$16,%r9
+
+	vmovdqa	%xmm15,%xmm2
+	vpaddd	one(%rip),%xmm15,%xmm15
+
+	vpxor	0(%r8),%xmm2,%xmm2
+	vaesenc	16(%r8),%xmm2,%xmm2
+	vaesenc	32(%r8),%xmm2,%xmm2
+	vaesenc	48(%r8),%xmm2,%xmm2
+	vaesenc	64(%r8),%xmm2,%xmm2
+	vaesenc	80(%r8),%xmm2,%xmm2
+	vaesenc	96(%r8),%xmm2,%xmm2
+	vaesenc	112(%r8),%xmm2,%xmm2
+	vaesenc	128(%r8),%xmm2,%xmm2
+	vaesenc	144(%r8),%xmm2,%xmm2
+	vaesenc	160(%r8),%xmm2,%xmm2
+	vaesenc	176(%r8),%xmm2,%xmm2
+	vaesenc	192(%r8),%xmm2,%xmm2
+	vaesenc	208(%r8),%xmm2,%xmm2
+	vaesenclast	224(%r8),%xmm2,%xmm2
+	vpxor	(%rdi),%xmm2,%xmm2
+	vmovdqu	%xmm2,(%rsi)
+	addq	$16,%rdi
+	addq	$16,%rsi
+
+	vpxor	%xmm2,%xmm0,%xmm0
+	vmovdqa	-32(%rcx),%xmm1
+	call	GFMUL
+
+	jmp	L$256_dec_loop2
+
+L$256_dec_out:
+	vmovdqu	%xmm0,(%rdx)
+	.byte	0xf3,0xc3
+
+
+.globl	_aes256gcmsiv_kdf
+.private_extern _aes256gcmsiv_kdf
+
+.p2align	4
+_aes256gcmsiv_kdf:
+
+
+
+
+
+	vmovdqa	(%rdx),%xmm1
+	vmovdqa	0(%rdi),%xmm4
+	vmovdqa	and_mask(%rip),%xmm11
+	vmovdqa	one(%rip),%xmm8
+	vpshufd	$0x90,%xmm4,%xmm4
+	vpand	%xmm11,%xmm4,%xmm4
+	vpaddd	%xmm8,%xmm4,%xmm6
+	vpaddd	%xmm8,%xmm6,%xmm7
+	vpaddd	%xmm8,%xmm7,%xmm11
+	vpaddd	%xmm8,%xmm11,%xmm12
+	vpaddd	%xmm8,%xmm12,%xmm13
+
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpxor	%xmm1,%xmm6,%xmm6
+	vpxor	%xmm1,%xmm7,%xmm7
+	vpxor	%xmm1,%xmm11,%xmm11
+	vpxor	%xmm1,%xmm12,%xmm12
+	vpxor	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	16(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	32(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	48(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	64(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	80(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	96(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	112(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	128(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	144(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	160(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	176(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	192(%rdx),%xmm2
+	vaesenc	%xmm2,%xmm4,%xmm4
+	vaesenc	%xmm2,%xmm6,%xmm6
+	vaesenc	%xmm2,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vaesenc	%xmm2,%xmm13,%xmm13
+
+	vmovdqa	208(%rdx),%xmm1
+	vaesenc	%xmm1,%xmm4,%xmm4
+	vaesenc	%xmm1,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+
+	vmovdqa	224(%rdx),%xmm2
+	vaesenclast	%xmm2,%xmm4,%xmm4
+	vaesenclast	%xmm2,%xmm6,%xmm6
+	vaesenclast	%xmm2,%xmm7,%xmm7
+	vaesenclast	%xmm2,%xmm11,%xmm11
+	vaesenclast	%xmm2,%xmm12,%xmm12
+	vaesenclast	%xmm2,%xmm13,%xmm13
+
+
+	vmovdqa	%xmm4,0(%rsi)
+	vmovdqa	%xmm6,16(%rsi)
+	vmovdqa	%xmm7,32(%rsi)
+	vmovdqa	%xmm11,48(%rsi)
+	vmovdqa	%xmm12,64(%rsi)
+	vmovdqa	%xmm13,80(%rsi)
+	.byte	0xf3,0xc3
+
+
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S b/third_party/boringssl/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
new file mode 100644
index 0000000..c90447a
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
@@ -0,0 +1,8973 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+chacha20_poly1305_constants:
+
+.p2align	6
+.chacha20_consts:
+.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
+.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
+.rol8:
+.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
+.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
+.rol16:
+.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
+.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
+.avx2_init:
+.long	0,0,0,0
+.sse_inc:
+.long	1,0,0,0
+.avx2_inc:
+.long	2,0,0,0,2,0,0,0
+.clamp:
+.quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
+.quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
+.p2align	4
+.and_masks:
+.byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
+.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+
+.p2align	6
+poly_hash_ad_internal:
+
+	xorq	%r10,%r10
+	xorq	%r11,%r11
+	xorq	%r12,%r12
+	cmpq	$13,%r8
+	jne	hash_ad_loop
+poly_fast_tls_ad:
+
+	movq	(%rcx),%r10
+	movq	5(%rcx),%r11
+	shrq	$24,%r11
+	movq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	.byte	0xf3,0xc3
+hash_ad_loop:
+
+	cmpq	$16,%r8
+	jb	hash_ad_tail
+	addq	0(%rcx),%r10
+	adcq	8+0(%rcx),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rcx),%rcx
+	subq	$16,%r8
+	jmp	hash_ad_loop
+hash_ad_tail:
+	cmpq	$0,%r8
+	je	1f
+
+	xorq	%r13,%r13
+	xorq	%r14,%r14
+	xorq	%r15,%r15
+	addq	%r8,%rcx
+hash_ad_tail_loop:
+	shldq	$8,%r13,%r14
+	shlq	$8,%r13
+	movzbq	-1(%rcx),%r15
+	xorq	%r15,%r13
+	decq	%rcx
+	decq	%r8
+	jne	hash_ad_tail_loop
+
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+1:
+	.byte	0xf3,0xc3
+
+
+
+.globl	_chacha20_poly1305_open
+.private_extern _chacha20_poly1305_open
+
+.p2align	6
+_chacha20_poly1305_open:
+
+	pushq	%rbp
+
+	pushq	%rbx
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+
+
+	pushq	%r9
+
+	subq	$288 + 32,%rsp
+
+
+
+
+
+
+
+	leaq	32(%rsp),%rbp
+	andq	$-32,%rbp
+	movq	%rdx,8+32(%rbp)
+	movq	%r8,0+32(%rbp)
+	movq	%rdx,%rbx
+
+	movl	_OPENSSL_ia32cap_P+8(%rip),%eax
+	andl	$288,%eax
+	xorl	$288,%eax
+	jz	chacha20_poly1305_open_avx2
+
+1:
+	cmpq	$128,%rbx
+	jbe	open_sse_128
+
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqu	0(%r9),%xmm4
+	movdqu	16(%r9),%xmm8
+	movdqu	32(%r9),%xmm12
+	movdqa	%xmm12,%xmm7
+
+	movdqa	%xmm4,48(%rbp)
+	movdqa	%xmm8,64(%rbp)
+	movdqa	%xmm12,96(%rbp)
+	movq	$10,%r10
+1:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+
+	decq	%r10
+	jne	1b
+
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+
+	pand	.clamp(%rip),%xmm0
+	movdqa	%xmm0,0(%rbp)
+	movdqa	%xmm4,16(%rbp)
+
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+open_sse_main_loop:
+	cmpq	$256,%rbx
+	jb	2f
+
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm8,%xmm10
+	movdqa	%xmm0,%xmm3
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm11
+	movdqa	96(%rbp),%xmm15
+	paddd	.sse_inc(%rip),%xmm15
+	movdqa	%xmm15,%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm14,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+	movdqa	%xmm15,144(%rbp)
+
+
+
+	movq	$4,%rcx
+	movq	%rsi,%r8
+1:
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+
+	leaq	16(%r8),%r8
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+.byte	102,15,58,15,255,4
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,12
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+.byte	102,15,58,15,255,12
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,4
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+
+	decq	%rcx
+	jge	1b
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%r8),%r8
+	cmpq	$-6,%rcx
+	jg	1b
+	paddd	.chacha20_consts(%rip),%xmm3
+	paddd	48(%rbp),%xmm7
+	paddd	64(%rbp),%xmm11
+	paddd	144(%rbp),%xmm15
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqa	%xmm12,80(%rbp)
+	movdqu	0 + 0(%rsi),%xmm12
+	pxor	%xmm3,%xmm12
+	movdqu	%xmm12,0 + 0(%rdi)
+	movdqu	16 + 0(%rsi),%xmm12
+	pxor	%xmm7,%xmm12
+	movdqu	%xmm12,16 + 0(%rdi)
+	movdqu	32 + 0(%rsi),%xmm12
+	pxor	%xmm11,%xmm12
+	movdqu	%xmm12,32 + 0(%rdi)
+	movdqu	48 + 0(%rsi),%xmm12
+	pxor	%xmm15,%xmm12
+	movdqu	%xmm12,48 + 0(%rdi)
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 64(%rdi)
+	movdqu	%xmm6,16 + 64(%rdi)
+	movdqu	%xmm10,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+	movdqu	0 + 128(%rsi),%xmm3
+	movdqu	16 + 128(%rsi),%xmm7
+	movdqu	32 + 128(%rsi),%xmm11
+	movdqu	48 + 128(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 128(%rdi)
+	movdqu	%xmm5,16 + 128(%rdi)
+	movdqu	%xmm9,32 + 128(%rdi)
+	movdqu	%xmm15,48 + 128(%rdi)
+	movdqu	0 + 192(%rsi),%xmm3
+	movdqu	16 + 192(%rsi),%xmm7
+	movdqu	32 + 192(%rsi),%xmm11
+	movdqu	48 + 192(%rsi),%xmm15
+	pxor	%xmm3,%xmm0
+	pxor	%xmm7,%xmm4
+	pxor	%xmm11,%xmm8
+	pxor	80(%rbp),%xmm15
+	movdqu	%xmm0,0 + 192(%rdi)
+	movdqu	%xmm4,16 + 192(%rdi)
+	movdqu	%xmm8,32 + 192(%rdi)
+	movdqu	%xmm15,48 + 192(%rdi)
+
+	leaq	256(%rsi),%rsi
+	leaq	256(%rdi),%rdi
+	subq	$256,%rbx
+	jmp	open_sse_main_loop
+2:
+
+	testq	%rbx,%rbx
+	jz	open_sse_finalize
+	cmpq	$64,%rbx
+	ja	3f
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	96(%rbp),%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+
+	xorq	%r8,%r8
+	movq	%rbx,%rcx
+	cmpq	$16,%rcx
+	jb	2f
+1:
+	addq	0(%rsi,%r8), %r10
+	adcq	8+0(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	subq	$16,%rcx
+2:
+	addq	$16,%r8
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+
+	cmpq	$16,%rcx
+	jae	1b
+	cmpq	$160,%r8
+	jne	2b
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+
+	jmp	open_sse_tail_64_dec_loop
+3:
+	cmpq	$128,%rbx
+	ja	3f
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	96(%rbp),%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+
+	movq	%rbx,%rcx
+	andq	$-16,%rcx
+	xorq	%r8,%r8
+1:
+	addq	0(%rsi,%r8), %r10
+	adcq	8+0(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+2:
+	addq	$16,%r8
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+
+	cmpq	%rcx,%r8
+	jb	1b
+	cmpq	$160,%r8
+	jne	2b
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqu	0 + 0(%rsi),%xmm3
+	movdqu	16 + 0(%rsi),%xmm7
+	movdqu	32 + 0(%rsi),%xmm11
+	movdqu	48 + 0(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 0(%rdi)
+	movdqu	%xmm5,16 + 0(%rdi)
+	movdqu	%xmm9,32 + 0(%rdi)
+	movdqu	%xmm15,48 + 0(%rdi)
+
+	subq	$64,%rbx
+	leaq	64(%rsi),%rsi
+	leaq	64(%rdi),%rdi
+	jmp	open_sse_tail_64_dec_loop
+3:
+	cmpq	$192,%rbx
+	ja	3f
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm8,%xmm10
+	movdqa	96(%rbp),%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm14,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+
+	movq	%rbx,%rcx
+	movq	$160,%r8
+	cmpq	$160,%rcx
+	cmovgq	%r8,%rcx
+	andq	$-16,%rcx
+	xorq	%r8,%r8
+1:
+	addq	0(%rsi,%r8), %r10
+	adcq	8+0(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+2:
+	addq	$16,%r8
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+
+	cmpq	%rcx,%r8
+	jb	1b
+	cmpq	$160,%r8
+	jne	2b
+	cmpq	$176,%rbx
+	jb	1f
+	addq	160(%rsi),%r10
+	adcq	8+160(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	cmpq	$192,%rbx
+	jb	1f
+	addq	176(%rsi),%r10
+	adcq	8+176(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+1:
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqu	0 + 0(%rsi),%xmm3
+	movdqu	16 + 0(%rsi),%xmm7
+	movdqu	32 + 0(%rsi),%xmm11
+	movdqu	48 + 0(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 0(%rdi)
+	movdqu	%xmm6,16 + 0(%rdi)
+	movdqu	%xmm10,32 + 0(%rdi)
+	movdqu	%xmm15,48 + 0(%rdi)
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 64(%rdi)
+	movdqu	%xmm5,16 + 64(%rdi)
+	movdqu	%xmm9,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+
+	subq	$128,%rbx
+	leaq	128(%rsi),%rsi
+	leaq	128(%rdi),%rdi
+	jmp	open_sse_tail_64_dec_loop
+3:
+
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm8,%xmm10
+	movdqa	%xmm0,%xmm3
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm11
+	movdqa	96(%rbp),%xmm15
+	paddd	.sse_inc(%rip),%xmm15
+	movdqa	%xmm15,%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm14,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+	movdqa	%xmm15,144(%rbp)
+
+	xorq	%r8,%r8
+1:
+	addq	0(%rsi,%r8), %r10
+	adcq	8+0(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movdqa	%xmm11,80(%rbp)
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm4
+	pxor	%xmm11,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm4
+	pxor	%xmm11,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm5
+	pxor	%xmm11,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm5
+	pxor	%xmm11,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm6
+	pxor	%xmm11,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm6
+	pxor	%xmm11,%xmm6
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+	movdqa	80(%rbp),%xmm11
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movdqa	%xmm9,80(%rbp)
+	paddd	%xmm7,%xmm3
+	pxor	%xmm3,%xmm15
+	pshufb	.rol16(%rip),%xmm15
+	paddd	%xmm15,%xmm11
+	pxor	%xmm11,%xmm7
+	movdqa	%xmm7,%xmm9
+	pslld	$12,%xmm9
+	psrld	$20,%xmm7
+	pxor	%xmm9,%xmm7
+	paddd	%xmm7,%xmm3
+	pxor	%xmm3,%xmm15
+	pshufb	.rol8(%rip),%xmm15
+	paddd	%xmm15,%xmm11
+	pxor	%xmm11,%xmm7
+	movdqa	%xmm7,%xmm9
+	pslld	$7,%xmm9
+	psrld	$25,%xmm7
+	pxor	%xmm9,%xmm7
+.byte	102,15,58,15,255,4
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,12
+	movdqa	80(%rbp),%xmm9
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	movdqa	%xmm11,80(%rbp)
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm4
+	pxor	%xmm11,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm4
+	pxor	%xmm11,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm5
+	pxor	%xmm11,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm5
+	pxor	%xmm11,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm11
+	pslld	$12,%xmm11
+	psrld	$20,%xmm6
+	pxor	%xmm11,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm11
+	pslld	$7,%xmm11
+	psrld	$25,%xmm6
+	pxor	%xmm11,%xmm6
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+	movdqa	80(%rbp),%xmm11
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	movdqa	%xmm9,80(%rbp)
+	paddd	%xmm7,%xmm3
+	pxor	%xmm3,%xmm15
+	pshufb	.rol16(%rip),%xmm15
+	paddd	%xmm15,%xmm11
+	pxor	%xmm11,%xmm7
+	movdqa	%xmm7,%xmm9
+	pslld	$12,%xmm9
+	psrld	$20,%xmm7
+	pxor	%xmm9,%xmm7
+	paddd	%xmm7,%xmm3
+	pxor	%xmm3,%xmm15
+	pshufb	.rol8(%rip),%xmm15
+	paddd	%xmm15,%xmm11
+	pxor	%xmm11,%xmm7
+	movdqa	%xmm7,%xmm9
+	pslld	$7,%xmm9
+	psrld	$25,%xmm7
+	pxor	%xmm9,%xmm7
+.byte	102,15,58,15,255,12
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,4
+	movdqa	80(%rbp),%xmm9
+
+	addq	$16,%r8
+	cmpq	$160,%r8
+	jb	1b
+	movq	%rbx,%rcx
+	andq	$-16,%rcx
+1:
+	addq	0(%rsi,%r8), %r10
+	adcq	8+0(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	addq	$16,%r8
+	cmpq	%rcx,%r8
+	jb	1b
+	paddd	.chacha20_consts(%rip),%xmm3
+	paddd	48(%rbp),%xmm7
+	paddd	64(%rbp),%xmm11
+	paddd	144(%rbp),%xmm15
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqa	%xmm12,80(%rbp)
+	movdqu	0 + 0(%rsi),%xmm12
+	pxor	%xmm3,%xmm12
+	movdqu	%xmm12,0 + 0(%rdi)
+	movdqu	16 + 0(%rsi),%xmm12
+	pxor	%xmm7,%xmm12
+	movdqu	%xmm12,16 + 0(%rdi)
+	movdqu	32 + 0(%rsi),%xmm12
+	pxor	%xmm11,%xmm12
+	movdqu	%xmm12,32 + 0(%rdi)
+	movdqu	48 + 0(%rsi),%xmm12
+	pxor	%xmm15,%xmm12
+	movdqu	%xmm12,48 + 0(%rdi)
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 64(%rdi)
+	movdqu	%xmm6,16 + 64(%rdi)
+	movdqu	%xmm10,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+	movdqu	0 + 128(%rsi),%xmm3
+	movdqu	16 + 128(%rsi),%xmm7
+	movdqu	32 + 128(%rsi),%xmm11
+	movdqu	48 + 128(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 128(%rdi)
+	movdqu	%xmm5,16 + 128(%rdi)
+	movdqu	%xmm9,32 + 128(%rdi)
+	movdqu	%xmm15,48 + 128(%rdi)
+
+	movdqa	80(%rbp),%xmm12
+	subq	$192,%rbx
+	leaq	192(%rsi),%rsi
+	leaq	192(%rdi),%rdi
+
+
+open_sse_tail_64_dec_loop:
+	cmpq	$16,%rbx
+	jb	1f
+	subq	$16,%rbx
+	movdqu	(%rsi),%xmm3
+	pxor	%xmm3,%xmm0
+	movdqu	%xmm0,(%rdi)
+	leaq	16(%rsi),%rsi
+	leaq	16(%rdi),%rdi
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm8,%xmm4
+	movdqa	%xmm12,%xmm8
+	jmp	open_sse_tail_64_dec_loop
+1:
+	movdqa	%xmm0,%xmm1
+
+
+open_sse_tail_16:
+	testq	%rbx,%rbx
+	jz	open_sse_finalize
+
+
+
+	pxor	%xmm3,%xmm3
+	leaq	-1(%rsi,%rbx), %rsi
+	movq	%rbx,%r8
+2:
+	pslldq	$1,%xmm3
+	pinsrb	$0,(%rsi),%xmm3
+	subq	$1,%rsi
+	subq	$1,%r8
+	jnz	2b
+
+3:
+.byte	102,73,15,126,221
+	pextrq	$1,%xmm3,%r14
+
+	pxor	%xmm1,%xmm3
+
+
+2:
+	pextrb	$0,%xmm3,(%rdi)
+	psrldq	$1,%xmm3
+	addq	$1,%rdi
+	subq	$1,%rbx
+	jne	2b
+
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+open_sse_finalize:
+	addq	32(%rbp),%r10
+	adcq	8+32(%rbp),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+	movq	%r10,%r13
+	movq	%r11,%r14
+	movq	%r12,%r15
+	subq	$-5,%r10
+	sbbq	$-1,%r11
+	sbbq	$3,%r12
+	cmovcq	%r13,%r10
+	cmovcq	%r14,%r11
+	cmovcq	%r15,%r12
+
+	addq	0+16(%rbp),%r10
+	adcq	8+16(%rbp),%r11
+
+	addq	$288 + 32,%rsp
+
+	popq	%r9
+
+	movq	%r10,(%r9)
+	movq	%r11,8(%r9)
+
+	popq	%r15
+
+	popq	%r14
+
+	popq	%r13
+
+	popq	%r12
+
+	popq	%rbx
+
+	popq	%rbp
+
+	.byte	0xf3,0xc3
+
+
+open_sse_128:
+	movdqu	.chacha20_consts(%rip),%xmm0
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm0,%xmm2
+	movdqu	0(%r9),%xmm4
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm4,%xmm6
+	movdqu	16(%r9),%xmm8
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm8,%xmm10
+	movdqu	32(%r9),%xmm12
+	movdqa	%xmm12,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm11
+	movdqa	%xmm13,%xmm15
+	movq	$10,%r10
+1:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+
+	decq	%r10
+	jnz	1b
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	%xmm7,%xmm4
+	paddd	%xmm7,%xmm5
+	paddd	%xmm7,%xmm6
+	paddd	%xmm11,%xmm9
+	paddd	%xmm11,%xmm10
+	paddd	%xmm15,%xmm13
+	paddd	.sse_inc(%rip),%xmm15
+	paddd	%xmm15,%xmm14
+
+	pand	.clamp(%rip),%xmm0
+	movdqa	%xmm0,0(%rbp)
+	movdqa	%xmm4,16(%rbp)
+
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+1:
+	cmpq	$16,%rbx
+	jb	open_sse_tail_16
+	subq	$16,%rbx
+	addq	0(%rsi),%r10
+	adcq	8+0(%rsi),%r11
+	adcq	$1,%r12
+
+
+	movdqu	0(%rsi),%xmm3
+	pxor	%xmm3,%xmm1
+	movdqu	%xmm1,0(%rdi)
+	leaq	16(%rsi),%rsi
+	leaq	16(%rdi),%rdi
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+	movdqa	%xmm5,%xmm1
+	movdqa	%xmm9,%xmm5
+	movdqa	%xmm13,%xmm9
+	movdqa	%xmm2,%xmm13
+	movdqa	%xmm6,%xmm2
+	movdqa	%xmm10,%xmm6
+	movdqa	%xmm14,%xmm10
+	jmp	1b
+	jmp	open_sse_tail_16
+
+
+
+
+
+
+.globl	_chacha20_poly1305_seal
+.private_extern _chacha20_poly1305_seal
+
+.p2align	6
+_chacha20_poly1305_seal:
+
+	pushq	%rbp
+
+	pushq	%rbx
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+
+
+	pushq	%r9
+
+	subq	$288 + 32,%rsp
+
+
+
+
+
+
+
+	leaq	32(%rsp),%rbp
+	andq	$-32,%rbp
+	movq	56(%r9),%rbx
+	addq	%rdx,%rbx
+	movq	%rbx,8+32(%rbp)
+	movq	%r8,0+32(%rbp)
+	movq	%rdx,%rbx
+
+	movl	_OPENSSL_ia32cap_P+8(%rip),%eax
+	andl	$288,%eax
+	xorl	$288,%eax
+	jz	chacha20_poly1305_seal_avx2
+
+	cmpq	$128,%rbx
+	jbe	seal_sse_128
+
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqu	0(%r9),%xmm4
+	movdqu	16(%r9),%xmm8
+	movdqu	32(%r9),%xmm12
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm0,%xmm3
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm8,%xmm10
+	movdqa	%xmm8,%xmm11
+	movdqa	%xmm12,%xmm15
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,%xmm14
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,%xmm13
+	paddd	.sse_inc(%rip),%xmm12
+
+	movdqa	%xmm4,48(%rbp)
+	movdqa	%xmm8,64(%rbp)
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+	movdqa	%xmm15,144(%rbp)
+	movq	$10,%r10
+1:
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+.byte	102,15,58,15,255,4
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,12
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+.byte	102,15,58,15,255,12
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,4
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+
+	decq	%r10
+	jnz	1b
+	paddd	.chacha20_consts(%rip),%xmm3
+	paddd	48(%rbp),%xmm7
+	paddd	64(%rbp),%xmm11
+	paddd	144(%rbp),%xmm15
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+
+
+	pand	.clamp(%rip),%xmm3
+	movdqa	%xmm3,0(%rbp)
+	movdqa	%xmm7,16(%rbp)
+
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+	movdqu	0 + 0(%rsi),%xmm3
+	movdqu	16 + 0(%rsi),%xmm7
+	movdqu	32 + 0(%rsi),%xmm11
+	movdqu	48 + 0(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 0(%rdi)
+	movdqu	%xmm6,16 + 0(%rdi)
+	movdqu	%xmm10,32 + 0(%rdi)
+	movdqu	%xmm15,48 + 0(%rdi)
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 64(%rdi)
+	movdqu	%xmm5,16 + 64(%rdi)
+	movdqu	%xmm9,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+
+	cmpq	$192,%rbx
+	ja	1f
+	movq	$128,%rcx
+	subq	$128,%rbx
+	leaq	128(%rsi),%rsi
+	jmp	seal_sse_128_seal_hash
+1:
+	movdqu	0 + 128(%rsi),%xmm3
+	movdqu	16 + 128(%rsi),%xmm7
+	movdqu	32 + 128(%rsi),%xmm11
+	movdqu	48 + 128(%rsi),%xmm15
+	pxor	%xmm3,%xmm0
+	pxor	%xmm7,%xmm4
+	pxor	%xmm11,%xmm8
+	pxor	%xmm12,%xmm15
+	movdqu	%xmm0,0 + 128(%rdi)
+	movdqu	%xmm4,16 + 128(%rdi)
+	movdqu	%xmm8,32 + 128(%rdi)
+	movdqu	%xmm15,48 + 128(%rdi)
+
+	movq	$192,%rcx
+	subq	$192,%rbx
+	leaq	192(%rsi),%rsi
+	movq	$2,%rcx
+	movq	$8,%r8
+	cmpq	$64,%rbx
+	jbe	seal_sse_tail_64
+	cmpq	$128,%rbx
+	jbe	seal_sse_tail_128
+	cmpq	$192,%rbx
+	jbe	seal_sse_tail_192
+
+1:
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm8,%xmm10
+	movdqa	%xmm0,%xmm3
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm11
+	movdqa	96(%rbp),%xmm15
+	paddd	.sse_inc(%rip),%xmm15
+	movdqa	%xmm15,%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm14,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+	movdqa	%xmm15,144(%rbp)
+
+2:
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+.byte	102,15,58,15,255,4
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,12
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	movdqa	%xmm8,80(%rbp)
+	movdqa	.rol16(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$20,%xmm8
+	pslld	$32-20,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	.rol8(%rip),%xmm8
+	paddd	%xmm7,%xmm3
+	paddd	%xmm6,%xmm2
+	paddd	%xmm5,%xmm1
+	paddd	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pxor	%xmm2,%xmm14
+	pxor	%xmm1,%xmm13
+	pxor	%xmm0,%xmm12
+.byte	102,69,15,56,0,248
+.byte	102,69,15,56,0,240
+.byte	102,69,15,56,0,232
+.byte	102,69,15,56,0,224
+	movdqa	80(%rbp),%xmm8
+	paddd	%xmm15,%xmm11
+	paddd	%xmm14,%xmm10
+	paddd	%xmm13,%xmm9
+	paddd	%xmm12,%xmm8
+	pxor	%xmm11,%xmm7
+	pxor	%xmm10,%xmm6
+	pxor	%xmm9,%xmm5
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm7,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm7
+	pxor	%xmm8,%xmm7
+	movdqa	%xmm6,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm6
+	pxor	%xmm8,%xmm6
+	movdqa	%xmm5,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm5
+	pxor	%xmm8,%xmm5
+	movdqa	%xmm4,%xmm8
+	psrld	$25,%xmm8
+	pslld	$32-25,%xmm4
+	pxor	%xmm8,%xmm4
+	movdqa	80(%rbp),%xmm8
+.byte	102,15,58,15,255,12
+.byte	102,69,15,58,15,219,8
+.byte	102,69,15,58,15,255,4
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+
+	leaq	16(%rdi),%rdi
+	decq	%r8
+	jge	2b
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+	decq	%rcx
+	jg	2b
+	paddd	.chacha20_consts(%rip),%xmm3
+	paddd	48(%rbp),%xmm7
+	paddd	64(%rbp),%xmm11
+	paddd	144(%rbp),%xmm15
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+
+	movdqa	%xmm14,80(%rbp)
+	movdqa	%xmm14,80(%rbp)
+	movdqu	0 + 0(%rsi),%xmm14
+	pxor	%xmm3,%xmm14
+	movdqu	%xmm14,0 + 0(%rdi)
+	movdqu	16 + 0(%rsi),%xmm14
+	pxor	%xmm7,%xmm14
+	movdqu	%xmm14,16 + 0(%rdi)
+	movdqu	32 + 0(%rsi),%xmm14
+	pxor	%xmm11,%xmm14
+	movdqu	%xmm14,32 + 0(%rdi)
+	movdqu	48 + 0(%rsi),%xmm14
+	pxor	%xmm15,%xmm14
+	movdqu	%xmm14,48 + 0(%rdi)
+
+	movdqa	80(%rbp),%xmm14
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 64(%rdi)
+	movdqu	%xmm6,16 + 64(%rdi)
+	movdqu	%xmm10,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+	movdqu	0 + 128(%rsi),%xmm3
+	movdqu	16 + 128(%rsi),%xmm7
+	movdqu	32 + 128(%rsi),%xmm11
+	movdqu	48 + 128(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 128(%rdi)
+	movdqu	%xmm5,16 + 128(%rdi)
+	movdqu	%xmm9,32 + 128(%rdi)
+	movdqu	%xmm15,48 + 128(%rdi)
+
+	cmpq	$256,%rbx
+	ja	3f
+
+	movq	$192,%rcx
+	subq	$192,%rbx
+	leaq	192(%rsi),%rsi
+	jmp	seal_sse_128_seal_hash
+3:
+	movdqu	0 + 192(%rsi),%xmm3
+	movdqu	16 + 192(%rsi),%xmm7
+	movdqu	32 + 192(%rsi),%xmm11
+	movdqu	48 + 192(%rsi),%xmm15
+	pxor	%xmm3,%xmm0
+	pxor	%xmm7,%xmm4
+	pxor	%xmm11,%xmm8
+	pxor	%xmm12,%xmm15
+	movdqu	%xmm0,0 + 192(%rdi)
+	movdqu	%xmm4,16 + 192(%rdi)
+	movdqu	%xmm8,32 + 192(%rdi)
+	movdqu	%xmm15,48 + 192(%rdi)
+
+	leaq	256(%rsi),%rsi
+	subq	$256,%rbx
+	movq	$6,%rcx
+	movq	$4,%r8
+	cmpq	$192,%rbx
+	jg	1b
+	movq	%rbx,%rcx
+	testq	%rbx,%rbx
+	je	seal_sse_128_seal_hash
+	movq	$6,%rcx
+	cmpq	$64,%rbx
+	jg	3f
+
+seal_sse_tail_64:
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	96(%rbp),%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+
+	jmp	seal_sse_128_seal
+3:
+	cmpq	$128,%rbx
+	jg	3f
+
+seal_sse_tail_128:
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	96(%rbp),%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+
+	leaq	16(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqu	0 + 0(%rsi),%xmm3
+	movdqu	16 + 0(%rsi),%xmm7
+	movdqu	32 + 0(%rsi),%xmm11
+	movdqu	48 + 0(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 0(%rdi)
+	movdqu	%xmm5,16 + 0(%rdi)
+	movdqu	%xmm9,32 + 0(%rdi)
+	movdqu	%xmm15,48 + 0(%rdi)
+
+	movq	$64,%rcx
+	subq	$64,%rbx
+	leaq	64(%rsi),%rsi
+	jmp	seal_sse_128_seal_hash
+3:
+
+seal_sse_tail_192:
+	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	48(%rbp),%xmm4
+	movdqa	64(%rbp),%xmm8
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm0,%xmm2
+	movdqa	%xmm4,%xmm6
+	movdqa	%xmm8,%xmm10
+	movdqa	96(%rbp),%xmm14
+	paddd	.sse_inc(%rip),%xmm14
+	movdqa	%xmm14,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm13,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm13,112(%rbp)
+	movdqa	%xmm14,128(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+
+	leaq	16(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	48(%rbp),%xmm6
+	paddd	64(%rbp),%xmm10
+	paddd	128(%rbp),%xmm14
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	48(%rbp),%xmm5
+	paddd	64(%rbp),%xmm9
+	paddd	112(%rbp),%xmm13
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	48(%rbp),%xmm4
+	paddd	64(%rbp),%xmm8
+	paddd	96(%rbp),%xmm12
+	movdqu	0 + 0(%rsi),%xmm3
+	movdqu	16 + 0(%rsi),%xmm7
+	movdqu	32 + 0(%rsi),%xmm11
+	movdqu	48 + 0(%rsi),%xmm15
+	pxor	%xmm3,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm11,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqu	%xmm2,0 + 0(%rdi)
+	movdqu	%xmm6,16 + 0(%rdi)
+	movdqu	%xmm10,32 + 0(%rdi)
+	movdqu	%xmm15,48 + 0(%rdi)
+	movdqu	0 + 64(%rsi),%xmm3
+	movdqu	16 + 64(%rsi),%xmm7
+	movdqu	32 + 64(%rsi),%xmm11
+	movdqu	48 + 64(%rsi),%xmm15
+	pxor	%xmm3,%xmm1
+	pxor	%xmm7,%xmm5
+	pxor	%xmm11,%xmm9
+	pxor	%xmm13,%xmm15
+	movdqu	%xmm1,0 + 64(%rdi)
+	movdqu	%xmm5,16 + 64(%rdi)
+	movdqu	%xmm9,32 + 64(%rdi)
+	movdqu	%xmm15,48 + 64(%rdi)
+
+	movq	$128,%rcx
+	subq	$128,%rbx
+	leaq	128(%rsi),%rsi
+
+seal_sse_128_seal_hash:
+	cmpq	$16,%rcx
+	jb	seal_sse_128_seal
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	subq	$16,%rcx
+	leaq	16(%rdi),%rdi
+	jmp	seal_sse_128_seal_hash
+
+seal_sse_128_seal:
+	cmpq	$16,%rbx
+	jb	seal_sse_tail_16
+	subq	$16,%rbx
+
+	movdqu	0(%rsi),%xmm3
+	pxor	%xmm3,%xmm0
+	movdqu	%xmm0,0(%rdi)
+
+	addq	0(%rdi),%r10
+	adcq	8(%rdi),%r11
+	adcq	$1,%r12
+	leaq	16(%rsi),%rsi
+	leaq	16(%rdi),%rdi
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm8,%xmm4
+	movdqa	%xmm12,%xmm8
+	movdqa	%xmm1,%xmm12
+	movdqa	%xmm5,%xmm1
+	movdqa	%xmm9,%xmm5
+	movdqa	%xmm13,%xmm9
+	jmp	seal_sse_128_seal
+
+seal_sse_tail_16:
+	testq	%rbx,%rbx
+	jz	process_blocks_of_extra_in
+
+	movq	%rbx,%r8
+	movq	%rbx,%rcx
+	leaq	-1(%rsi,%rbx), %rsi
+	pxor	%xmm15,%xmm15
+1:
+	pslldq	$1,%xmm15
+	pinsrb	$0,(%rsi),%xmm15
+	leaq	-1(%rsi),%rsi
+	decq	%rcx
+	jne	1b
+
+
+	pxor	%xmm0,%xmm15
+
+
+	movq	%rbx,%rcx
+	movdqu	%xmm15,%xmm0
+2:
+	pextrb	$0,%xmm0,(%rdi)
+	psrldq	$1,%xmm0
+	addq	$1,%rdi
+	subq	$1,%rcx
+	jnz	2b
+
+
+
+
+
+
+
+
+	movq	288+32(%rsp),%r9
+	movq	56(%r9),%r14
+	movq	48(%r9),%r13
+	testq	%r14,%r14
+	jz	process_partial_block
+
+	movq	$16,%r15
+	subq	%rbx,%r15
+	cmpq	%r15,%r14
+
+	jge	load_extra_in
+	movq	%r14,%r15
+
+load_extra_in:
+
+
+	leaq	-1(%r13,%r15), %rsi
+
+
+	addq	%r15,%r13
+	subq	%r15,%r14
+	movq	%r13,48(%r9)
+	movq	%r14,56(%r9)
+
+
+
+	addq	%r15,%r8
+
+
+	pxor	%xmm11,%xmm11
+3:
+	pslldq	$1,%xmm11
+	pinsrb	$0,(%rsi),%xmm11
+	leaq	-1(%rsi),%rsi
+	subq	$1,%r15
+	jnz	3b
+
+
+
+
+	movq	%rbx,%r15
+
+4:
+	pslldq	$1,%xmm11
+	subq	$1,%r15
+	jnz	4b
+
+
+
+
+	leaq	.and_masks(%rip),%r15
+	shlq	$4,%rbx
+	pand	-16(%r15,%rbx), %xmm15
+
+
+	por	%xmm11,%xmm15
+
+
+
+.byte	102,77,15,126,253
+	pextrq	$1,%xmm15,%r14
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+process_blocks_of_extra_in:
+
+	movq	288+32(%rsp),%r9
+	movq	48(%r9),%rsi
+	movq	56(%r9),%r8
+	movq	%r8,%rcx
+	shrq	$4,%r8
+
+5:
+	jz	process_extra_in_trailer
+	addq	0(%rsi),%r10
+	adcq	8+0(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rsi),%rsi
+	subq	$1,%r8
+	jmp	5b
+
+process_extra_in_trailer:
+	andq	$15,%rcx
+	movq	%rcx,%rbx
+	jz	do_length_block
+	leaq	-1(%rsi,%rcx), %rsi
+
+6:
+	pslldq	$1,%xmm15
+	pinsrb	$0,(%rsi),%xmm15
+	leaq	-1(%rsi),%rsi
+	subq	$1,%rcx
+	jnz	6b
+
+process_partial_block:
+
+	leaq	.and_masks(%rip),%r15
+	shlq	$4,%rbx
+	pand	-16(%r15,%rbx), %xmm15
+.byte	102,77,15,126,253
+	pextrq	$1,%xmm15,%r14
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+do_length_block:
+	addq	32(%rbp),%r10
+	adcq	8+32(%rbp),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+	movq	%r10,%r13
+	movq	%r11,%r14
+	movq	%r12,%r15
+	subq	$-5,%r10
+	sbbq	$-1,%r11
+	sbbq	$3,%r12
+	cmovcq	%r13,%r10
+	cmovcq	%r14,%r11
+	cmovcq	%r15,%r12
+
+	addq	0+16(%rbp),%r10
+	adcq	8+16(%rbp),%r11
+
+	addq	$288 + 32,%rsp
+
+	popq	%r9
+
+	movq	%r10,0(%r9)
+	movq	%r11,8(%r9)
+
+	popq	%r15
+
+	popq	%r14
+
+	popq	%r13
+
+	popq	%r12
+
+	popq	%rbx
+
+	popq	%rbp
+
+	.byte	0xf3,0xc3
+
+
+seal_sse_128:
+	movdqu	.chacha20_consts(%rip),%xmm0
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm0,%xmm2
+	movdqu	0(%r9),%xmm4
+	movdqa	%xmm4,%xmm5
+	movdqa	%xmm4,%xmm6
+	movdqu	16(%r9),%xmm8
+	movdqa	%xmm8,%xmm9
+	movdqa	%xmm8,%xmm10
+	movdqu	32(%r9),%xmm14
+	movdqa	%xmm14,%xmm12
+	paddd	.sse_inc(%rip),%xmm12
+	movdqa	%xmm12,%xmm13
+	paddd	.sse_inc(%rip),%xmm13
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm8,%xmm11
+	movdqa	%xmm12,%xmm15
+	movq	$10,%r10
+1:
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,4
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,12
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,4
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,12
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,4
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,12
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol16(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm4
+	pxor	%xmm3,%xmm4
+	paddd	%xmm4,%xmm0
+	pxor	%xmm0,%xmm12
+	pshufb	.rol8(%rip),%xmm12
+	paddd	%xmm12,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,15,228,12
+.byte	102,69,15,58,15,192,8
+.byte	102,69,15,58,15,228,4
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol16(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm5
+	pxor	%xmm3,%xmm5
+	paddd	%xmm5,%xmm1
+	pxor	%xmm1,%xmm13
+	pshufb	.rol8(%rip),%xmm13
+	paddd	%xmm13,%xmm9
+	pxor	%xmm9,%xmm5
+	movdqa	%xmm5,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm5
+	pxor	%xmm3,%xmm5
+.byte	102,15,58,15,237,12
+.byte	102,69,15,58,15,201,8
+.byte	102,69,15,58,15,237,4
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol16(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$12,%xmm3
+	psrld	$20,%xmm6
+	pxor	%xmm3,%xmm6
+	paddd	%xmm6,%xmm2
+	pxor	%xmm2,%xmm14
+	pshufb	.rol8(%rip),%xmm14
+	paddd	%xmm14,%xmm10
+	pxor	%xmm10,%xmm6
+	movdqa	%xmm6,%xmm3
+	pslld	$7,%xmm3
+	psrld	$25,%xmm6
+	pxor	%xmm3,%xmm6
+.byte	102,15,58,15,246,12
+.byte	102,69,15,58,15,210,8
+.byte	102,69,15,58,15,246,4
+
+	decq	%r10
+	jnz	1b
+	paddd	.chacha20_consts(%rip),%xmm0
+	paddd	.chacha20_consts(%rip),%xmm1
+	paddd	.chacha20_consts(%rip),%xmm2
+	paddd	%xmm7,%xmm4
+	paddd	%xmm7,%xmm5
+	paddd	%xmm7,%xmm6
+	paddd	%xmm11,%xmm8
+	paddd	%xmm11,%xmm9
+	paddd	%xmm15,%xmm12
+	paddd	.sse_inc(%rip),%xmm15
+	paddd	%xmm15,%xmm13
+
+	pand	.clamp(%rip),%xmm2
+	movdqa	%xmm2,0(%rbp)
+	movdqa	%xmm6,16(%rbp)
+
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+	jmp	seal_sse_128_seal
+
+
+
+
+.p2align	6
+chacha20_poly1305_open_avx2:
+	vzeroupper
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vbroadcasti128	0(%r9),%ymm4
+	vbroadcasti128	16(%r9),%ymm8
+	vbroadcasti128	32(%r9),%ymm12
+	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
+	cmpq	$192,%rbx
+	jbe	open_avx2_192
+	cmpq	$320,%rbx
+	jbe	open_avx2_320
+
+	vmovdqa	%ymm4,64(%rbp)
+	vmovdqa	%ymm8,96(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+	movq	$10,%r10
+1:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+
+	decq	%r10
+	jne	1b
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+
+	vpand	.clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0(%rbp)
+
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
+
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+	xorq	%rcx,%rcx
+
+1:
+	addq	0(%rsi,%rcx), %r10
+	adcq	8+0(%rsi,%rcx), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	addq	$16,%rcx
+	cmpq	$64,%rcx
+	jne	1b
+
+	vpxor	0(%rsi),%ymm0,%ymm0
+	vpxor	32(%rsi),%ymm4,%ymm4
+	vmovdqu	%ymm0,0(%rdi)
+	vmovdqu	%ymm4,32(%rdi)
+	leaq	64(%rsi),%rsi
+	leaq	64(%rdi),%rdi
+	subq	$64,%rbx
+1:
+
+	cmpq	$512,%rbx
+	jb	3f
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm15
+	vpaddd	%ymm15,%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm15,256(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+
+	xorq	%rcx,%rcx
+2:
+	addq	0*8(%rsi,%rcx), %r10
+	adcq	8+0*8(%rsi,%rcx), %r11
+	adcq	$1,%r12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	addq	2*8(%rsi,%rcx), %r10
+	adcq	8+2*8(%rsi,%rcx), %r11
+	adcq	$1,%r12
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	addq	4*8(%rsi,%rcx), %r10
+	adcq	8+4*8(%rsi,%rcx), %r11
+	adcq	$1,%r12
+
+	leaq	48(%rcx),%rcx
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+
+	cmpq	$60*8,%rcx
+	jne	2b
+	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	64(%rbp),%ymm7,%ymm7
+	vpaddd	96(%rbp),%ymm11,%ymm11
+	vpaddd	256(%rbp),%ymm15,%ymm15
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	vmovdqa	%ymm0,128(%rbp)
+	addq	60*8(%rsi),%r10
+	adcq	8+60*8(%rsi),%r11
+	adcq	$1,%r12
+	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
+	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
+	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
+	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
+	vpxor	0+0(%rsi),%ymm0,%ymm0
+	vpxor	32+0(%rsi),%ymm3,%ymm3
+	vpxor	64+0(%rsi),%ymm7,%ymm7
+	vpxor	96+0(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm0,0+0(%rdi)
+	vmovdqu	%ymm3,32+0(%rdi)
+	vmovdqu	%ymm7,64+0(%rdi)
+	vmovdqu	%ymm11,96+0(%rdi)
+
+	vmovdqa	128(%rbp),%ymm0
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm2,%ymm2
+	vpxor	64+128(%rsi),%ymm6,%ymm6
+	vpxor	96+128(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm2,32+128(%rdi)
+	vmovdqu	%ymm6,64+128(%rdi)
+	vmovdqu	%ymm10,96+128(%rdi)
+	addq	60*8+16(%rsi),%r10
+	adcq	8+60*8+16(%rsi),%r11
+	adcq	$1,%r12
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+256(%rsi),%ymm3,%ymm3
+	vpxor	32+256(%rsi),%ymm1,%ymm1
+	vpxor	64+256(%rsi),%ymm5,%ymm5
+	vpxor	96+256(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+256(%rdi)
+	vmovdqu	%ymm1,32+256(%rdi)
+	vmovdqu	%ymm5,64+256(%rdi)
+	vmovdqu	%ymm9,96+256(%rdi)
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
+	vpxor	0+384(%rsi),%ymm3,%ymm3
+	vpxor	32+384(%rsi),%ymm0,%ymm0
+	vpxor	64+384(%rsi),%ymm4,%ymm4
+	vpxor	96+384(%rsi),%ymm8,%ymm8
+	vmovdqu	%ymm3,0+384(%rdi)
+	vmovdqu	%ymm0,32+384(%rdi)
+	vmovdqu	%ymm4,64+384(%rdi)
+	vmovdqu	%ymm8,96+384(%rdi)
+
+	leaq	512(%rsi),%rsi
+	leaq	512(%rdi),%rdi
+	subq	$512,%rbx
+	jmp	1b
+3:
+	testq	%rbx,%rbx
+	vzeroupper
+	je	open_sse_finalize
+3:
+	cmpq	$128,%rbx
+	ja	3f
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+
+	xorq	%r8,%r8
+	movq	%rbx,%rcx
+	andq	$-16,%rcx
+	testq	%rcx,%rcx
+	je	2f
+1:
+	addq	0*8(%rsi,%r8), %r10
+	adcq	8+0*8(%rsi,%r8), %r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+2:
+	addq	$16,%r8
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+
+	cmpq	%rcx,%r8
+	jb	1b
+	cmpq	$160,%r8
+	jne	2b
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	jmp	open_avx2_tail_loop
+3:
+	cmpq	$256,%rbx
+	ja	3f
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+
+	movq	%rbx,128(%rbp)
+	movq	%rbx,%rcx
+	subq	$128,%rcx
+	shrq	$4,%rcx
+	movq	$10,%r8
+	cmpq	$10,%rcx
+	cmovgq	%r8,%rcx
+	movq	%rsi,%rbx
+	xorq	%r8,%r8
+1:
+	addq	0(%rbx),%r10
+	adcq	8+0(%rbx),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rbx),%rbx
+2:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+
+	incq	%r8
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+
+	cmpq	%rcx,%r8
+	jb	1b
+	cmpq	$10,%r8
+	jne	2b
+	movq	%rbx,%r8
+	subq	%rsi,%rbx
+	movq	%rbx,%rcx
+	movq	128(%rbp),%rbx
+1:
+	addq	$16,%rcx
+	cmpq	%rbx,%rcx
+	jg	1f
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%r8),%r8
+	jmp	1b
+1:
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+0(%rsi),%ymm3,%ymm3
+	vpxor	32+0(%rsi),%ymm1,%ymm1
+	vpxor	64+0(%rsi),%ymm5,%ymm5
+	vpxor	96+0(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+0(%rdi)
+	vmovdqu	%ymm1,32+0(%rdi)
+	vmovdqu	%ymm5,64+0(%rdi)
+	vmovdqu	%ymm9,96+0(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	leaq	128(%rsi),%rsi
+	leaq	128(%rdi),%rdi
+	subq	$128,%rbx
+	jmp	open_avx2_tail_loop
+3:
+	cmpq	$384,%rbx
+	ja	3f
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+
+	movq	%rbx,128(%rbp)
+	movq	%rbx,%rcx
+	subq	$256,%rcx
+	shrq	$4,%rcx
+	addq	$6,%rcx
+	movq	$10,%r8
+	cmpq	$10,%rcx
+	cmovgq	%r8,%rcx
+	movq	%rsi,%rbx
+	xorq	%r8,%r8
+1:
+	addq	0(%rbx),%r10
+	adcq	8+0(%rbx),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rbx),%rbx
+2:
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	addq	0(%rbx),%r10
+	adcq	8+0(%rbx),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rbx),%rbx
+	incq	%r8
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+
+	cmpq	%rcx,%r8
+	jb	1b
+	cmpq	$10,%r8
+	jne	2b
+	movq	%rbx,%r8
+	subq	%rsi,%rbx
+	movq	%rbx,%rcx
+	movq	128(%rbp),%rbx
+1:
+	addq	$16,%rcx
+	cmpq	%rbx,%rcx
+	jg	1f
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%r8),%r8
+	jmp	1b
+1:
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+0(%rsi),%ymm3,%ymm3
+	vpxor	32+0(%rsi),%ymm2,%ymm2
+	vpxor	64+0(%rsi),%ymm6,%ymm6
+	vpxor	96+0(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+0(%rdi)
+	vmovdqu	%ymm2,32+0(%rdi)
+	vmovdqu	%ymm6,64+0(%rdi)
+	vmovdqu	%ymm10,96+0(%rdi)
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm1,%ymm1
+	vpxor	64+128(%rsi),%ymm5,%ymm5
+	vpxor	96+128(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm1,32+128(%rdi)
+	vmovdqu	%ymm5,64+128(%rdi)
+	vmovdqu	%ymm9,96+128(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	leaq	256(%rsi),%rsi
+	leaq	256(%rdi),%rdi
+	subq	$256,%rbx
+	jmp	open_avx2_tail_loop
+3:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm15
+	vpaddd	%ymm15,%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm15,256(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+
+	xorq	%rcx,%rcx
+	movq	%rsi,%r8
+1:
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%r8),%r8
+2:
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	addq	16(%r8),%r10
+	adcq	8+16(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%r8),%r8
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+
+	incq	%rcx
+	cmpq	$4,%rcx
+	jl	1b
+	cmpq	$10,%rcx
+	jne	2b
+	movq	%rbx,%rcx
+	subq	$384,%rcx
+	andq	$-16,%rcx
+1:
+	testq	%rcx,%rcx
+	je	1f
+	addq	0(%r8),%r10
+	adcq	8+0(%r8),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%r8),%r8
+	subq	$16,%rcx
+	jmp	1b
+1:
+	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	64(%rbp),%ymm7,%ymm7
+	vpaddd	96(%rbp),%ymm11,%ymm11
+	vpaddd	256(%rbp),%ymm15,%ymm15
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	vmovdqa	%ymm0,128(%rbp)
+	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
+	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
+	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
+	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
+	vpxor	0+0(%rsi),%ymm0,%ymm0
+	vpxor	32+0(%rsi),%ymm3,%ymm3
+	vpxor	64+0(%rsi),%ymm7,%ymm7
+	vpxor	96+0(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm0,0+0(%rdi)
+	vmovdqu	%ymm3,32+0(%rdi)
+	vmovdqu	%ymm7,64+0(%rdi)
+	vmovdqu	%ymm11,96+0(%rdi)
+
+	vmovdqa	128(%rbp),%ymm0
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm2,%ymm2
+	vpxor	64+128(%rsi),%ymm6,%ymm6
+	vpxor	96+128(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm2,32+128(%rdi)
+	vmovdqu	%ymm6,64+128(%rdi)
+	vmovdqu	%ymm10,96+128(%rdi)
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+256(%rsi),%ymm3,%ymm3
+	vpxor	32+256(%rsi),%ymm1,%ymm1
+	vpxor	64+256(%rsi),%ymm5,%ymm5
+	vpxor	96+256(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+256(%rdi)
+	vmovdqu	%ymm1,32+256(%rdi)
+	vmovdqu	%ymm5,64+256(%rdi)
+	vmovdqu	%ymm9,96+256(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	leaq	384(%rsi),%rsi
+	leaq	384(%rdi),%rdi
+	subq	$384,%rbx
+open_avx2_tail_loop:
+	cmpq	$32,%rbx
+	jb	open_avx2_tail
+	subq	$32,%rbx
+	vpxor	(%rsi),%ymm0,%ymm0
+	vmovdqu	%ymm0,(%rdi)
+	leaq	32(%rsi),%rsi
+	leaq	32(%rdi),%rdi
+	vmovdqa	%ymm4,%ymm0
+	vmovdqa	%ymm8,%ymm4
+	vmovdqa	%ymm12,%ymm8
+	jmp	open_avx2_tail_loop
+open_avx2_tail:
+	cmpq	$16,%rbx
+	vmovdqa	%xmm0,%xmm1
+	jb	1f
+	subq	$16,%rbx
+
+	vpxor	(%rsi),%xmm0,%xmm1
+	vmovdqu	%xmm1,(%rdi)
+	leaq	16(%rsi),%rsi
+	leaq	16(%rdi),%rdi
+	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
+	vmovdqa	%xmm0,%xmm1
+1:
+	vzeroupper
+	jmp	open_sse_tail_16
+
+open_avx2_192:
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm8,%ymm10
+	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vmovdqa	%ymm12,%ymm11
+	vmovdqa	%ymm13,%ymm15
+	movq	$10,%r10
+1:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+
+	decq	%r10
+	jne	1b
+	vpaddd	%ymm2,%ymm0,%ymm0
+	vpaddd	%ymm2,%ymm1,%ymm1
+	vpaddd	%ymm6,%ymm4,%ymm4
+	vpaddd	%ymm6,%ymm5,%ymm5
+	vpaddd	%ymm10,%ymm8,%ymm8
+	vpaddd	%ymm10,%ymm9,%ymm9
+	vpaddd	%ymm11,%ymm12,%ymm12
+	vpaddd	%ymm15,%ymm13,%ymm13
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+
+	vpand	.clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0(%rbp)
+
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
+open_avx2_short:
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+open_avx2_hash_and_xor_loop:
+	cmpq	$32,%rbx
+	jb	open_avx2_short_tail_32
+	subq	$32,%rbx
+	addq	0(%rsi),%r10
+	adcq	8+0(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	addq	16(%rsi),%r10
+	adcq	8+16(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+
+	vpxor	(%rsi),%ymm0,%ymm0
+	vmovdqu	%ymm0,(%rdi)
+	leaq	32(%rsi),%rsi
+	leaq	32(%rdi),%rdi
+
+	vmovdqa	%ymm4,%ymm0
+	vmovdqa	%ymm8,%ymm4
+	vmovdqa	%ymm12,%ymm8
+	vmovdqa	%ymm1,%ymm12
+	vmovdqa	%ymm5,%ymm1
+	vmovdqa	%ymm9,%ymm5
+	vmovdqa	%ymm13,%ymm9
+	vmovdqa	%ymm2,%ymm13
+	vmovdqa	%ymm6,%ymm2
+	jmp	open_avx2_hash_and_xor_loop
+open_avx2_short_tail_32:
+	cmpq	$16,%rbx
+	vmovdqa	%xmm0,%xmm1
+	jb	1f
+	subq	$16,%rbx
+	addq	0(%rsi),%r10
+	adcq	8+0(%rsi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	vpxor	(%rsi),%xmm0,%xmm3
+	vmovdqu	%xmm3,(%rdi)
+	leaq	16(%rsi),%rsi
+	leaq	16(%rdi),%rdi
+	vextracti128	$1,%ymm0,%xmm1
+1:
+	vzeroupper
+	jmp	open_sse_tail_16
+
+open_avx2_320:
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm8,%ymm10
+	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	movq	$10,%r10
+1:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+
+	decq	%r10
+	jne	1b
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	%ymm7,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm5,%ymm5
+	vpaddd	%ymm7,%ymm6,%ymm6
+	vpaddd	%ymm11,%ymm8,%ymm8
+	vpaddd	%ymm11,%ymm9,%ymm9
+	vpaddd	%ymm11,%ymm10,%ymm10
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+
+	vpand	.clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0(%rbp)
+
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
+	jmp	open_avx2_short
+
+
+
+
+.p2align	6
+chacha20_poly1305_seal_avx2:
+	vzeroupper
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vbroadcasti128	0(%r9),%ymm4
+	vbroadcasti128	16(%r9),%ymm8
+	vbroadcasti128	32(%r9),%ymm12
+	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
+	cmpq	$192,%rbx
+	jbe	seal_avx2_192
+	cmpq	$320,%rbx
+	jbe	seal_avx2_320
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm4,64(%rbp)
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	%ymm8,96(%rbp)
+	vmovdqa	%ymm12,%ymm15
+	vpaddd	.avx2_inc(%rip),%ymm15,%ymm14
+	vpaddd	.avx2_inc(%rip),%ymm14,%ymm13
+	vpaddd	.avx2_inc(%rip),%ymm13,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm15,256(%rbp)
+	movq	$10,%r10
+1:
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+
+	decq	%r10
+	jnz	1b
+	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	64(%rbp),%ymm7,%ymm7
+	vpaddd	96(%rbp),%ymm11,%ymm11
+	vpaddd	256(%rbp),%ymm15,%ymm15
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
+	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
+	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
+	vpand	.clamp(%rip),%ymm15,%ymm15
+	vmovdqa	%ymm15,0(%rbp)
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+
+	vpxor	0(%rsi),%ymm3,%ymm3
+	vpxor	32(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm3,0(%rdi)
+	vmovdqu	%ymm11,32(%rdi)
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+64(%rsi),%ymm15,%ymm15
+	vpxor	32+64(%rsi),%ymm2,%ymm2
+	vpxor	64+64(%rsi),%ymm6,%ymm6
+	vpxor	96+64(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm15,0+64(%rdi)
+	vmovdqu	%ymm2,32+64(%rdi)
+	vmovdqu	%ymm6,64+64(%rdi)
+	vmovdqu	%ymm10,96+64(%rdi)
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+192(%rsi),%ymm15,%ymm15
+	vpxor	32+192(%rsi),%ymm1,%ymm1
+	vpxor	64+192(%rsi),%ymm5,%ymm5
+	vpxor	96+192(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm15,0+192(%rdi)
+	vmovdqu	%ymm1,32+192(%rdi)
+	vmovdqu	%ymm5,64+192(%rdi)
+	vmovdqu	%ymm9,96+192(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm15,%ymm8
+
+	leaq	320(%rsi),%rsi
+	subq	$320,%rbx
+	movq	$320,%rcx
+	cmpq	$128,%rbx
+	jbe	seal_avx2_hash
+	vpxor	0(%rsi),%ymm0,%ymm0
+	vpxor	32(%rsi),%ymm4,%ymm4
+	vpxor	64(%rsi),%ymm8,%ymm8
+	vpxor	96(%rsi),%ymm12,%ymm12
+	vmovdqu	%ymm0,320(%rdi)
+	vmovdqu	%ymm4,352(%rdi)
+	vmovdqu	%ymm8,384(%rdi)
+	vmovdqu	%ymm12,416(%rdi)
+	leaq	128(%rsi),%rsi
+	subq	$128,%rbx
+	movq	$8,%rcx
+	movq	$2,%r8
+	cmpq	$128,%rbx
+	jbe	seal_avx2_tail_128
+	cmpq	$256,%rbx
+	jbe	seal_avx2_tail_256
+	cmpq	$384,%rbx
+	jbe	seal_avx2_tail_384
+	cmpq	$512,%rbx
+	jbe	seal_avx2_tail_512
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm15
+	vpaddd	%ymm15,%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm15,256(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+
+	subq	$16,%rdi
+	movq	$9,%rcx
+	jmp	4f
+1:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm15
+	vpaddd	%ymm15,%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm15,256(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+
+	movq	$10,%rcx
+2:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+4:
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	addq	32(%rdi),%r10
+	adcq	8+32(%rdi),%r11
+	adcq	$1,%r12
+
+	leaq	48(%rdi),%rdi
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+
+	decq	%rcx
+	jne	2b
+	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	64(%rbp),%ymm7,%ymm7
+	vpaddd	96(%rbp),%ymm11,%ymm11
+	vpaddd	256(%rbp),%ymm15,%ymm15
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	leaq	32(%rdi),%rdi
+	vmovdqa	%ymm0,128(%rbp)
+	addq	-32(%rdi),%r10
+	adcq	8+-32(%rdi),%r11
+	adcq	$1,%r12
+	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
+	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
+	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
+	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
+	vpxor	0+0(%rsi),%ymm0,%ymm0
+	vpxor	32+0(%rsi),%ymm3,%ymm3
+	vpxor	64+0(%rsi),%ymm7,%ymm7
+	vpxor	96+0(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm0,0+0(%rdi)
+	vmovdqu	%ymm3,32+0(%rdi)
+	vmovdqu	%ymm7,64+0(%rdi)
+	vmovdqu	%ymm11,96+0(%rdi)
+
+	vmovdqa	128(%rbp),%ymm0
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm2,%ymm2
+	vpxor	64+128(%rsi),%ymm6,%ymm6
+	vpxor	96+128(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm2,32+128(%rdi)
+	vmovdqu	%ymm6,64+128(%rdi)
+	vmovdqu	%ymm10,96+128(%rdi)
+	addq	-16(%rdi),%r10
+	adcq	8+-16(%rdi),%r11
+	adcq	$1,%r12
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+256(%rsi),%ymm3,%ymm3
+	vpxor	32+256(%rsi),%ymm1,%ymm1
+	vpxor	64+256(%rsi),%ymm5,%ymm5
+	vpxor	96+256(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+256(%rdi)
+	vmovdqu	%ymm1,32+256(%rdi)
+	vmovdqu	%ymm5,64+256(%rdi)
+	vmovdqu	%ymm9,96+256(%rdi)
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
+	vpxor	0+384(%rsi),%ymm3,%ymm3
+	vpxor	32+384(%rsi),%ymm0,%ymm0
+	vpxor	64+384(%rsi),%ymm4,%ymm4
+	vpxor	96+384(%rsi),%ymm8,%ymm8
+	vmovdqu	%ymm3,0+384(%rdi)
+	vmovdqu	%ymm0,32+384(%rdi)
+	vmovdqu	%ymm4,64+384(%rdi)
+	vmovdqu	%ymm8,96+384(%rdi)
+
+	leaq	512(%rsi),%rsi
+	subq	$512,%rbx
+	cmpq	$512,%rbx
+	jg	1b
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%rdi),%rdi
+	movq	$10,%rcx
+	xorq	%r8,%r8
+	cmpq	$128,%rbx
+	ja	3f
+
+seal_avx2_tail_128:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	jmp	seal_avx2_short_loop
+3:
+	cmpq	$256,%rbx
+	ja	3f
+
+seal_avx2_tail_256:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+0(%rsi),%ymm3,%ymm3
+	vpxor	32+0(%rsi),%ymm1,%ymm1
+	vpxor	64+0(%rsi),%ymm5,%ymm5
+	vpxor	96+0(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+0(%rdi)
+	vmovdqu	%ymm1,32+0(%rdi)
+	vmovdqu	%ymm5,64+0(%rdi)
+	vmovdqu	%ymm9,96+0(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	movq	$128,%rcx
+	leaq	128(%rsi),%rsi
+	subq	$128,%rbx
+	jmp	seal_avx2_hash
+3:
+	cmpq	$384,%rbx
+	ja	seal_avx2_tail_512
+
+seal_avx2_tail_384:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+
+	leaq	32(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+0(%rsi),%ymm3,%ymm3
+	vpxor	32+0(%rsi),%ymm2,%ymm2
+	vpxor	64+0(%rsi),%ymm6,%ymm6
+	vpxor	96+0(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+0(%rdi)
+	vmovdqu	%ymm2,32+0(%rdi)
+	vmovdqu	%ymm6,64+0(%rdi)
+	vmovdqu	%ymm10,96+0(%rdi)
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm1,%ymm1
+	vpxor	64+128(%rsi),%ymm5,%ymm5
+	vpxor	96+128(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm1,32+128(%rdi)
+	vmovdqu	%ymm5,64+128(%rdi)
+	vmovdqu	%ymm9,96+128(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	movq	$256,%rcx
+	leaq	256(%rsi),%rsi
+	subq	$256,%rbx
+	jmp	seal_avx2_hash
+
+seal_avx2_tail_512:
+	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	64(%rbp),%ymm4
+	vmovdqa	96(%rbp),%ymm8
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm10
+	vmovdqa	%ymm0,%ymm3
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	.avx2_inc(%rip),%ymm12
+	vpaddd	160(%rbp),%ymm12,%ymm15
+	vpaddd	%ymm15,%ymm12,%ymm14
+	vpaddd	%ymm14,%ymm12,%ymm13
+	vpaddd	%ymm13,%ymm12,%ymm12
+	vmovdqa	%ymm15,256(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,160(%rbp)
+
+1:
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+2:
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$4,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$12,%ymm15,%ymm15,%ymm15
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	.rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$20,%ymm7,%ymm8
+	vpslld	$32-20,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$20,%ymm4,%ymm8
+	vpslld	$32-20,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	vmovdqa	.rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	vpshufb	%ymm8,%ymm12,%ymm12
+	vmovdqa	128(%rbp),%ymm8
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	movq	0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpsrld	$25,%ymm6,%ymm8
+	vpslld	$32-25,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	vpsrld	$25,%ymm5,%ymm8
+	vpslld	$32-25,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm5,%ymm5
+	vpsrld	$25,%ymm4,%ymm8
+	vpslld	$32-25,%ymm4,%ymm4
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	128(%rbp),%ymm8
+	vpalignr	$12,%ymm7,%ymm7,%ymm7
+	vpalignr	$8,%ymm11,%ymm11,%ymm11
+	vpalignr	$4,%ymm15,%ymm15,%ymm15
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+	movq	8+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+
+
+
+
+
+
+
+
+
+
+
+
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%rdi),%rdi
+	decq	%rcx
+	jg	1b
+	decq	%r8
+	jge	2b
+	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	64(%rbp),%ymm7,%ymm7
+	vpaddd	96(%rbp),%ymm11,%ymm11
+	vpaddd	256(%rbp),%ymm15,%ymm15
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	64(%rbp),%ymm6,%ymm6
+	vpaddd	96(%rbp),%ymm10,%ymm10
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	64(%rbp),%ymm5,%ymm5
+	vpaddd	96(%rbp),%ymm9,%ymm9
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	64(%rbp),%ymm4,%ymm4
+	vpaddd	96(%rbp),%ymm8,%ymm8
+	vpaddd	160(%rbp),%ymm12,%ymm12
+
+	vmovdqa	%ymm0,128(%rbp)
+	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
+	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
+	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
+	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
+	vpxor	0+0(%rsi),%ymm0,%ymm0
+	vpxor	32+0(%rsi),%ymm3,%ymm3
+	vpxor	64+0(%rsi),%ymm7,%ymm7
+	vpxor	96+0(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm0,0+0(%rdi)
+	vmovdqu	%ymm3,32+0(%rdi)
+	vmovdqu	%ymm7,64+0(%rdi)
+	vmovdqu	%ymm11,96+0(%rdi)
+
+	vmovdqa	128(%rbp),%ymm0
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
+	vpxor	0+128(%rsi),%ymm3,%ymm3
+	vpxor	32+128(%rsi),%ymm2,%ymm2
+	vpxor	64+128(%rsi),%ymm6,%ymm6
+	vpxor	96+128(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm3,0+128(%rdi)
+	vmovdqu	%ymm2,32+128(%rdi)
+	vmovdqu	%ymm6,64+128(%rdi)
+	vmovdqu	%ymm10,96+128(%rdi)
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
+	vpxor	0+256(%rsi),%ymm3,%ymm3
+	vpxor	32+256(%rsi),%ymm1,%ymm1
+	vpxor	64+256(%rsi),%ymm5,%ymm5
+	vpxor	96+256(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm3,0+256(%rdi)
+	vmovdqu	%ymm1,32+256(%rdi)
+	vmovdqu	%ymm5,64+256(%rdi)
+	vmovdqu	%ymm9,96+256(%rdi)
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
+	vmovdqa	%ymm3,%ymm8
+
+	movq	$384,%rcx
+	leaq	384(%rsi),%rsi
+	subq	$384,%rbx
+	jmp	seal_avx2_hash
+
+seal_avx2_320:
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm8,%ymm10
+	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
+	vmovdqa	%ymm4,%ymm7
+	vmovdqa	%ymm8,%ymm11
+	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm14,224(%rbp)
+	movq	$10,%r10
+1:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm6,%ymm6,%ymm6
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpsrld	$20,%ymm6,%ymm3
+	vpslld	$12,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpslld	$7,%ymm6,%ymm3
+	vpsrld	$25,%ymm6,%ymm6
+	vpxor	%ymm3,%ymm6,%ymm6
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm6,%ymm6,%ymm6
+
+	decq	%r10
+	jne	1b
+	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	%ymm7,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm5,%ymm5
+	vpaddd	%ymm7,%ymm6,%ymm6
+	vpaddd	%ymm11,%ymm8,%ymm8
+	vpaddd	%ymm11,%ymm9,%ymm9
+	vpaddd	%ymm11,%ymm10,%ymm10
+	vpaddd	160(%rbp),%ymm12,%ymm12
+	vpaddd	192(%rbp),%ymm13,%ymm13
+	vpaddd	224(%rbp),%ymm14,%ymm14
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+
+	vpand	.clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0(%rbp)
+
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
+	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
+	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
+	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
+	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
+	jmp	seal_avx2_short
+
+seal_avx2_192:
+	vmovdqa	%ymm0,%ymm1
+	vmovdqa	%ymm0,%ymm2
+	vmovdqa	%ymm4,%ymm5
+	vmovdqa	%ymm4,%ymm6
+	vmovdqa	%ymm8,%ymm9
+	vmovdqa	%ymm8,%ymm10
+	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vmovdqa	%ymm12,%ymm11
+	vmovdqa	%ymm13,%ymm15
+	movq	$10,%r10
+1:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$12,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$12,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm5,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpsrld	$20,%ymm4,%ymm3
+	vpslld	$12,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpslld	$7,%ymm4,%ymm3
+	vpsrld	$25,%ymm4,%ymm4
+	vpxor	%ymm3,%ymm4,%ymm4
+	vpalignr	$4,%ymm12,%ymm12,%ymm12
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpsrld	$20,%ymm5,%ymm3
+	vpslld	$12,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpslld	$7,%ymm5,%ymm3
+	vpsrld	$25,%ymm5,%ymm5
+	vpxor	%ymm3,%ymm5,%ymm5
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+
+	decq	%r10
+	jne	1b
+	vpaddd	%ymm2,%ymm0,%ymm0
+	vpaddd	%ymm2,%ymm1,%ymm1
+	vpaddd	%ymm6,%ymm4,%ymm4
+	vpaddd	%ymm6,%ymm5,%ymm5
+	vpaddd	%ymm10,%ymm8,%ymm8
+	vpaddd	%ymm10,%ymm9,%ymm9
+	vpaddd	%ymm11,%ymm12,%ymm12
+	vpaddd	%ymm15,%ymm13,%ymm13
+	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
+
+	vpand	.clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0(%rbp)
+
+	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
+	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
+	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
+	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
+	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
+	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
+seal_avx2_short:
+	movq	%r8,%r8
+	call	poly_hash_ad_internal
+	xorq	%rcx,%rcx
+seal_avx2_hash:
+	cmpq	$16,%rcx
+	jb	seal_avx2_short_loop
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	subq	$16,%rcx
+	addq	$16,%rdi
+	jmp	seal_avx2_hash
+seal_avx2_short_loop:
+	cmpq	$32,%rbx
+	jb	seal_avx2_short_tail
+	subq	$32,%rbx
+
+	vpxor	(%rsi),%ymm0,%ymm0
+	vmovdqu	%ymm0,(%rdi)
+	leaq	32(%rsi),%rsi
+
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	addq	16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	32(%rdi),%rdi
+
+	vmovdqa	%ymm4,%ymm0
+	vmovdqa	%ymm8,%ymm4
+	vmovdqa	%ymm12,%ymm8
+	vmovdqa	%ymm1,%ymm12
+	vmovdqa	%ymm5,%ymm1
+	vmovdqa	%ymm9,%ymm5
+	vmovdqa	%ymm13,%ymm9
+	vmovdqa	%ymm2,%ymm13
+	vmovdqa	%ymm6,%ymm2
+	jmp	seal_avx2_short_loop
+seal_avx2_short_tail:
+	cmpq	$16,%rbx
+	jb	1f
+	subq	$16,%rbx
+	vpxor	(%rsi),%xmm0,%xmm3
+	vmovdqu	%xmm3,(%rdi)
+	leaq	16(%rsi),%rsi
+	addq	0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0(%rbp),%rax
+	movq	%rax,%r15
+	mulq	%r10
+	movq	%rax,%r13
+	movq	%rdx,%r14
+	movq	0+0(%rbp),%rax
+	mulq	%r11
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0(%rbp),%rax
+	movq	%rax,%r9
+	mulq	%r10
+	addq	%rax,%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+	movq	8+0(%rbp),%rax
+	mulq	%r11
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	imulq	%r12,%r9
+	addq	%r10,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r10
+	adcq	%r14,%r11
+	adcq	$0,%r12
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+
+	leaq	16(%rdi),%rdi
+	vextracti128	$1,%ymm0,%xmm0
+1:
+	vzeroupper
+	jmp	seal_sse_tail_16
+
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/aes-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/aes-x86_64.S
new file mode 100644
index 0000000..c7c4829
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/aes-x86_64.S
@@ -0,0 +1,2535 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.p2align	4
+_x86_64_AES_encrypt:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+
+	movl	240(%r15),%r13d
+	subl	$1,%r13d
+	jmp	L$enc_loop
+.p2align	4
+L$enc_loop:
+
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movl	0(%r14,%rsi,8),%r10d
+	movl	0(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r12d
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	movzbl	%dl,%ebp
+	xorl	3(%r14,%rsi,8),%r10d
+	xorl	3(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r8d
+
+	movzbl	%dh,%esi
+	shrl	$16,%ecx
+	movzbl	%ah,%ebp
+	xorl	3(%r14,%rsi,8),%r12d
+	shrl	$16,%edx
+	xorl	3(%r14,%rbp,8),%r8d
+
+	shrl	$16,%ebx
+	leaq	16(%r15),%r15
+	shrl	$16,%eax
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	xorl	2(%r14,%rsi,8),%r10d
+	xorl	2(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r12d
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	movzbl	%bl,%ebp
+	xorl	1(%r14,%rsi,8),%r10d
+	xorl	1(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r8d
+
+	movl	12(%r15),%edx
+	movzbl	%bh,%edi
+	movzbl	%ch,%ebp
+	movl	0(%r15),%eax
+	xorl	1(%r14,%rdi,8),%r12d
+	xorl	1(%r14,%rbp,8),%r8d
+
+	movl	4(%r15),%ebx
+	movl	8(%r15),%ecx
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+	subl	$1,%r13d
+	jnz	L$enc_loop
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movzbl	2(%r14,%rsi,8),%r10d
+	movzbl	2(%r14,%rdi,8),%r11d
+	movzbl	2(%r14,%rbp,8),%r12d
+
+	movzbl	%dl,%esi
+	movzbl	%bh,%edi
+	movzbl	%ch,%ebp
+	movzbl	2(%r14,%rsi,8),%r8d
+	movl	0(%r14,%rdi,8),%edi
+	movl	0(%r14,%rbp,8),%ebp
+
+	andl	$0x0000ff00,%edi
+	andl	$0x0000ff00,%ebp
+
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+	shrl	$16,%ecx
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	shrl	$16,%edx
+	movl	0(%r14,%rsi,8),%esi
+	movl	0(%r14,%rdi,8),%edi
+
+	andl	$0x0000ff00,%esi
+	andl	$0x0000ff00,%edi
+	shrl	$16,%ebx
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+	shrl	$16,%eax
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	movl	0(%r14,%rsi,8),%esi
+	movl	0(%r14,%rdi,8),%edi
+	movl	0(%r14,%rbp,8),%ebp
+
+	andl	$0x00ff0000,%esi
+	andl	$0x00ff0000,%edi
+	andl	$0x00ff0000,%ebp
+
+	xorl	%esi,%r10d
+	xorl	%edi,%r11d
+	xorl	%ebp,%r12d
+
+	movzbl	%bl,%esi
+	movzbl	%dh,%edi
+	movzbl	%ah,%ebp
+	movl	0(%r14,%rsi,8),%esi
+	movl	2(%r14,%rdi,8),%edi
+	movl	2(%r14,%rbp,8),%ebp
+
+	andl	$0x00ff0000,%esi
+	andl	$0xff000000,%edi
+	andl	$0xff000000,%ebp
+
+	xorl	%esi,%r8d
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	movl	16+12(%r15),%edx
+	movl	2(%r14,%rsi,8),%esi
+	movl	2(%r14,%rdi,8),%edi
+	movl	16+0(%r15),%eax
+
+	andl	$0xff000000,%esi
+	andl	$0xff000000,%edi
+
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+
+	movl	16+4(%r15),%ebx
+	movl	16+8(%r15),%ecx
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+.byte	0xf3,0xc3
+
+
+.p2align	4
+_x86_64_AES_encrypt_compact:
+	leaq	128(%r14),%r8
+	movl	0-128(%r8),%edi
+	movl	32-128(%r8),%ebp
+	movl	64-128(%r8),%r10d
+	movl	96-128(%r8),%r11d
+	movl	128-128(%r8),%edi
+	movl	160-128(%r8),%ebp
+	movl	192-128(%r8),%r10d
+	movl	224-128(%r8),%r11d
+	jmp	L$enc_loop_compact
+.p2align	4
+L$enc_loop_compact:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+	leaq	16(%r15),%r15
+	movzbl	%al,%r10d
+	movzbl	%bl,%r11d
+	movzbl	%cl,%r12d
+	movzbl	%dl,%r8d
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	shrl	$16,%ecx
+	movzbl	%dh,%ebp
+	movzbl	(%r14,%r10,1),%r10d
+	movzbl	(%r14,%r11,1),%r11d
+	movzbl	(%r14,%r12,1),%r12d
+	movzbl	(%r14,%r8,1),%r8d
+
+	movzbl	(%r14,%rsi,1),%r9d
+	movzbl	%ah,%esi
+	movzbl	(%r14,%rdi,1),%r13d
+	movzbl	%cl,%edi
+	movzbl	(%r14,%rbp,1),%ebp
+	movzbl	(%r14,%rsi,1),%esi
+
+	shll	$8,%r9d
+	shrl	$16,%edx
+	shll	$8,%r13d
+	xorl	%r9d,%r10d
+	shrl	$16,%eax
+	movzbl	%dl,%r9d
+	shrl	$16,%ebx
+	xorl	%r13d,%r11d
+	shll	$8,%ebp
+	movzbl	%al,%r13d
+	movzbl	(%r14,%rdi,1),%edi
+	xorl	%ebp,%r12d
+
+	shll	$8,%esi
+	movzbl	%bl,%ebp
+	shll	$16,%edi
+	xorl	%esi,%r8d
+	movzbl	(%r14,%r9,1),%r9d
+	movzbl	%dh,%esi
+	movzbl	(%r14,%r13,1),%r13d
+	xorl	%edi,%r10d
+
+	shrl	$8,%ecx
+	movzbl	%ah,%edi
+	shll	$16,%r9d
+	shrl	$8,%ebx
+	shll	$16,%r13d
+	xorl	%r9d,%r11d
+	movzbl	(%r14,%rbp,1),%ebp
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rcx,1),%edx
+	movzbl	(%r14,%rbx,1),%ecx
+
+	shll	$16,%ebp
+	xorl	%r13d,%r12d
+	shll	$24,%esi
+	xorl	%ebp,%r8d
+	shll	$24,%edi
+	xorl	%esi,%r10d
+	shll	$24,%edx
+	xorl	%edi,%r11d
+	shll	$24,%ecx
+	movl	%r10d,%eax
+	movl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+	cmpq	16(%rsp),%r15
+	je	L$enc_compact_done
+	movl	$0x80808080,%r10d
+	movl	$0x80808080,%r11d
+	andl	%eax,%r10d
+	andl	%ebx,%r11d
+	movl	%r10d,%esi
+	movl	%r11d,%edi
+	shrl	$7,%r10d
+	leal	(%rax,%rax,1),%r8d
+	shrl	$7,%r11d
+	leal	(%rbx,%rbx,1),%r9d
+	subl	%r10d,%esi
+	subl	%r11d,%edi
+	andl	$0xfefefefe,%r8d
+	andl	$0xfefefefe,%r9d
+	andl	$0x1b1b1b1b,%esi
+	andl	$0x1b1b1b1b,%edi
+	movl	%eax,%r10d
+	movl	%ebx,%r11d
+	xorl	%esi,%r8d
+	xorl	%edi,%r9d
+
+	xorl	%r8d,%eax
+	xorl	%r9d,%ebx
+	movl	$0x80808080,%r12d
+	roll	$24,%eax
+	movl	$0x80808080,%ebp
+	roll	$24,%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%ebp
+	xorl	%r8d,%eax
+	xorl	%r9d,%ebx
+	movl	%r12d,%esi
+	rorl	$16,%r10d
+	movl	%ebp,%edi
+	rorl	$16,%r11d
+	leal	(%rcx,%rcx,1),%r8d
+	shrl	$7,%r12d
+	xorl	%r10d,%eax
+	shrl	$7,%ebp
+	xorl	%r11d,%ebx
+	rorl	$8,%r10d
+	leal	(%rdx,%rdx,1),%r9d
+	rorl	$8,%r11d
+	subl	%r12d,%esi
+	subl	%ebp,%edi
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+
+	andl	$0xfefefefe,%r8d
+	andl	$0xfefefefe,%r9d
+	andl	$0x1b1b1b1b,%esi
+	andl	$0x1b1b1b1b,%edi
+	movl	%ecx,%r12d
+	movl	%edx,%ebp
+	xorl	%esi,%r8d
+	xorl	%edi,%r9d
+
+	rorl	$16,%r12d
+	xorl	%r8d,%ecx
+	rorl	$16,%ebp
+	xorl	%r9d,%edx
+	roll	$24,%ecx
+	movl	0(%r14),%esi
+	roll	$24,%edx
+	xorl	%r8d,%ecx
+	movl	64(%r14),%edi
+	xorl	%r9d,%edx
+	movl	128(%r14),%r8d
+	xorl	%r12d,%ecx
+	rorl	$8,%r12d
+	xorl	%ebp,%edx
+	rorl	$8,%ebp
+	xorl	%r12d,%ecx
+	movl	192(%r14),%r9d
+	xorl	%ebp,%edx
+	jmp	L$enc_loop_compact
+.p2align	4
+L$enc_compact_done:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+.byte	0xf3,0xc3
+
+.p2align	4
+.globl	_asm_AES_encrypt
+.private_extern _asm_AES_encrypt
+
+.private_extern	_asm_AES_encrypt
+_asm_AES_encrypt:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+
+
+	leaq	-63(%rdx),%rcx
+	andq	$-64,%rsp
+	subq	%rsp,%rcx
+	negq	%rcx
+	andq	$0x3c0,%rcx
+	subq	%rcx,%rsp
+	subq	$32,%rsp
+
+	movq	%rsi,16(%rsp)
+	movq	%rax,24(%rsp)
+L$enc_prologue:
+
+	movq	%rdx,%r15
+	movl	240(%r15),%r13d
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+
+	shll	$4,%r13d
+	leaq	(%r15,%r13,1),%rbp
+	movq	%r15,(%rsp)
+	movq	%rbp,8(%rsp)
+
+
+	leaq	L$AES_Te+2048(%rip),%r14
+	leaq	768(%rsp),%rbp
+	subq	%r14,%rbp
+	andq	$0x300,%rbp
+	leaq	(%r14,%rbp,1),%r14
+
+	call	_x86_64_AES_encrypt_compact
+
+	movq	16(%rsp),%r9
+	movq	24(%rsp),%rsi
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$enc_epilogue:
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_x86_64_AES_decrypt:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+
+	movl	240(%r15),%r13d
+	subl	$1,%r13d
+	jmp	L$dec_loop
+.p2align	4
+L$dec_loop:
+
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movl	0(%r14,%rsi,8),%r10d
+	movl	0(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r12d
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	movzbl	%dl,%ebp
+	xorl	3(%r14,%rsi,8),%r10d
+	xorl	3(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r8d
+
+	movzbl	%bh,%esi
+	shrl	$16,%eax
+	movzbl	%ch,%ebp
+	xorl	3(%r14,%rsi,8),%r12d
+	shrl	$16,%edx
+	xorl	3(%r14,%rbp,8),%r8d
+
+	shrl	$16,%ebx
+	leaq	16(%r15),%r15
+	shrl	$16,%ecx
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	xorl	2(%r14,%rsi,8),%r10d
+	xorl	2(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r12d
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	movzbl	%bl,%ebp
+	xorl	1(%r14,%rsi,8),%r10d
+	xorl	1(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r8d
+
+	movzbl	%dh,%esi
+	movl	12(%r15),%edx
+	movzbl	%ah,%ebp
+	xorl	1(%r14,%rsi,8),%r12d
+	movl	0(%r15),%eax
+	xorl	1(%r14,%rbp,8),%r8d
+
+	xorl	%r10d,%eax
+	movl	4(%r15),%ebx
+	movl	8(%r15),%ecx
+	xorl	%r12d,%ecx
+	xorl	%r11d,%ebx
+	xorl	%r8d,%edx
+	subl	$1,%r13d
+	jnz	L$dec_loop
+	leaq	2048(%r14),%r14
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movzbl	(%r14,%rsi,1),%r10d
+	movzbl	(%r14,%rdi,1),%r11d
+	movzbl	(%r14,%rbp,1),%r12d
+
+	movzbl	%dl,%esi
+	movzbl	%dh,%edi
+	movzbl	%ah,%ebp
+	movzbl	(%r14,%rsi,1),%r8d
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rbp,1),%ebp
+
+	shll	$8,%edi
+	shll	$8,%ebp
+
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+	shrl	$16,%edx
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	shrl	$16,%eax
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+
+	shll	$8,%esi
+	shll	$8,%edi
+	shrl	$16,%ebx
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+	shrl	$16,%ecx
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rbp,1),%ebp
+
+	shll	$16,%esi
+	shll	$16,%edi
+	shll	$16,%ebp
+
+	xorl	%esi,%r10d
+	xorl	%edi,%r11d
+	xorl	%ebp,%r12d
+
+	movzbl	%bl,%esi
+	movzbl	%bh,%edi
+	movzbl	%ch,%ebp
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rbp,1),%ebp
+
+	shll	$16,%esi
+	shll	$24,%edi
+	shll	$24,%ebp
+
+	xorl	%esi,%r8d
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	movl	16+12(%r15),%edx
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movl	16+0(%r15),%eax
+
+	shll	$24,%esi
+	shll	$24,%edi
+
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+
+	movl	16+4(%r15),%ebx
+	movl	16+8(%r15),%ecx
+	leaq	-2048(%r14),%r14
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+.byte	0xf3,0xc3
+
+
+.p2align	4
+_x86_64_AES_decrypt_compact:
+	leaq	128(%r14),%r8
+	movl	0-128(%r8),%edi
+	movl	32-128(%r8),%ebp
+	movl	64-128(%r8),%r10d
+	movl	96-128(%r8),%r11d
+	movl	128-128(%r8),%edi
+	movl	160-128(%r8),%ebp
+	movl	192-128(%r8),%r10d
+	movl	224-128(%r8),%r11d
+	jmp	L$dec_loop_compact
+
+.p2align	4
+L$dec_loop_compact:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+	leaq	16(%r15),%r15
+	movzbl	%al,%r10d
+	movzbl	%bl,%r11d
+	movzbl	%cl,%r12d
+	movzbl	%dl,%r8d
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	shrl	$16,%edx
+	movzbl	%bh,%ebp
+	movzbl	(%r14,%r10,1),%r10d
+	movzbl	(%r14,%r11,1),%r11d
+	movzbl	(%r14,%r12,1),%r12d
+	movzbl	(%r14,%r8,1),%r8d
+
+	movzbl	(%r14,%rsi,1),%r9d
+	movzbl	%ch,%esi
+	movzbl	(%r14,%rdi,1),%r13d
+	movzbl	(%r14,%rbp,1),%ebp
+	movzbl	(%r14,%rsi,1),%esi
+
+	shrl	$16,%ecx
+	shll	$8,%r13d
+	shll	$8,%r9d
+	movzbl	%cl,%edi
+	shrl	$16,%eax
+	xorl	%r9d,%r10d
+	shrl	$16,%ebx
+	movzbl	%dl,%r9d
+
+	shll	$8,%ebp
+	xorl	%r13d,%r11d
+	shll	$8,%esi
+	movzbl	%al,%r13d
+	movzbl	(%r14,%rdi,1),%edi
+	xorl	%ebp,%r12d
+	movzbl	%bl,%ebp
+
+	shll	$16,%edi
+	xorl	%esi,%r8d
+	movzbl	(%r14,%r9,1),%r9d
+	movzbl	%bh,%esi
+	movzbl	(%r14,%rbp,1),%ebp
+	xorl	%edi,%r10d
+	movzbl	(%r14,%r13,1),%r13d
+	movzbl	%ch,%edi
+
+	shll	$16,%ebp
+	shll	$16,%r9d
+	shll	$16,%r13d
+	xorl	%ebp,%r8d
+	movzbl	%dh,%ebp
+	xorl	%r9d,%r11d
+	shrl	$8,%eax
+	xorl	%r13d,%r12d
+
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%ebx
+	movzbl	(%r14,%rbp,1),%ecx
+	movzbl	(%r14,%rax,1),%edx
+
+	movl	%r10d,%eax
+	shll	$24,%esi
+	shll	$24,%ebx
+	shll	$24,%ecx
+	xorl	%esi,%eax
+	shll	$24,%edx
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+	cmpq	16(%rsp),%r15
+	je	L$dec_compact_done
+
+	movq	256+0(%r14),%rsi
+	shlq	$32,%rbx
+	shlq	$32,%rdx
+	movq	256+8(%r14),%rdi
+	orq	%rbx,%rax
+	orq	%rdx,%rcx
+	movq	256+16(%r14),%rbp
+	movq	%rsi,%r9
+	movq	%rsi,%r12
+	andq	%rax,%r9
+	andq	%rcx,%r12
+	movq	%r9,%rbx
+	movq	%r12,%rdx
+	shrq	$7,%r9
+	leaq	(%rax,%rax,1),%r8
+	shrq	$7,%r12
+	leaq	(%rcx,%rcx,1),%r11
+	subq	%r9,%rbx
+	subq	%r12,%rdx
+	andq	%rdi,%r8
+	andq	%rdi,%r11
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r8
+	xorq	%rdx,%r11
+	movq	%rsi,%r10
+	movq	%rsi,%r13
+
+	andq	%r8,%r10
+	andq	%r11,%r13
+	movq	%r10,%rbx
+	movq	%r13,%rdx
+	shrq	$7,%r10
+	leaq	(%r8,%r8,1),%r9
+	shrq	$7,%r13
+	leaq	(%r11,%r11,1),%r12
+	subq	%r10,%rbx
+	subq	%r13,%rdx
+	andq	%rdi,%r9
+	andq	%rdi,%r12
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r9
+	xorq	%rdx,%r12
+	movq	%rsi,%r10
+	movq	%rsi,%r13
+
+	andq	%r9,%r10
+	andq	%r12,%r13
+	movq	%r10,%rbx
+	movq	%r13,%rdx
+	shrq	$7,%r10
+	xorq	%rax,%r8
+	shrq	$7,%r13
+	xorq	%rcx,%r11
+	subq	%r10,%rbx
+	subq	%r13,%rdx
+	leaq	(%r9,%r9,1),%r10
+	leaq	(%r12,%r12,1),%r13
+	xorq	%rax,%r9
+	xorq	%rcx,%r12
+	andq	%rdi,%r10
+	andq	%rdi,%r13
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r10
+	xorq	%rdx,%r13
+
+	xorq	%r10,%rax
+	xorq	%r13,%rcx
+	xorq	%r10,%r8
+	xorq	%r13,%r11
+	movq	%rax,%rbx
+	movq	%rcx,%rdx
+	xorq	%r10,%r9
+	shrq	$32,%rbx
+	xorq	%r13,%r12
+	shrq	$32,%rdx
+	xorq	%r8,%r10
+	roll	$8,%eax
+	xorq	%r11,%r13
+	roll	$8,%ecx
+	xorq	%r9,%r10
+	roll	$8,%ebx
+	xorq	%r12,%r13
+
+	roll	$8,%edx
+	xorl	%r10d,%eax
+	shrq	$32,%r10
+	xorl	%r13d,%ecx
+	shrq	$32,%r13
+	xorl	%r10d,%ebx
+	xorl	%r13d,%edx
+
+	movq	%r8,%r10
+	roll	$24,%r8d
+	movq	%r11,%r13
+	roll	$24,%r11d
+	shrq	$32,%r10
+	xorl	%r8d,%eax
+	shrq	$32,%r13
+	xorl	%r11d,%ecx
+	roll	$24,%r10d
+	movq	%r9,%r8
+	roll	$24,%r13d
+	movq	%r12,%r11
+	shrq	$32,%r8
+	xorl	%r10d,%ebx
+	shrq	$32,%r11
+	xorl	%r13d,%edx
+
+	movq	0(%r14),%rsi
+	roll	$16,%r9d
+	movq	64(%r14),%rdi
+	roll	$16,%r12d
+	movq	128(%r14),%rbp
+	roll	$16,%r8d
+	movq	192(%r14),%r10
+	xorl	%r9d,%eax
+	roll	$16,%r11d
+	xorl	%r12d,%ecx
+	movq	256(%r14),%r13
+	xorl	%r8d,%ebx
+	xorl	%r11d,%edx
+	jmp	L$dec_loop_compact
+.p2align	4
+L$dec_compact_done:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+.byte	0xf3,0xc3
+
+.p2align	4
+.globl	_asm_AES_decrypt
+.private_extern _asm_AES_decrypt
+
+.private_extern	_asm_AES_decrypt
+_asm_AES_decrypt:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+
+
+	leaq	-63(%rdx),%rcx
+	andq	$-64,%rsp
+	subq	%rsp,%rcx
+	negq	%rcx
+	andq	$0x3c0,%rcx
+	subq	%rcx,%rsp
+	subq	$32,%rsp
+
+	movq	%rsi,16(%rsp)
+	movq	%rax,24(%rsp)
+L$dec_prologue:
+
+	movq	%rdx,%r15
+	movl	240(%r15),%r13d
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+
+	shll	$4,%r13d
+	leaq	(%r15,%r13,1),%rbp
+	movq	%r15,(%rsp)
+	movq	%rbp,8(%rsp)
+
+
+	leaq	L$AES_Td+2048(%rip),%r14
+	leaq	768(%rsp),%rbp
+	subq	%r14,%rbp
+	andq	$0x300,%rbp
+	leaq	(%r14,%rbp,1),%r14
+	shrq	$3,%rbp
+	addq	%rbp,%r14
+
+	call	_x86_64_AES_decrypt_compact
+
+	movq	16(%rsp),%r9
+	movq	24(%rsp),%rsi
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$dec_epilogue:
+	.byte	0xf3,0xc3
+
+.p2align	4
+.globl	_asm_AES_set_encrypt_key
+.private_extern _asm_AES_set_encrypt_key
+
+_asm_AES_set_encrypt_key:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$8,%rsp
+L$enc_key_prologue:
+
+	call	_x86_64_AES_set_encrypt_key
+
+	movq	40(%rsp),%rbp
+	movq	48(%rsp),%rbx
+	addq	$56,%rsp
+L$enc_key_epilogue:
+	.byte	0xf3,0xc3
+
+
+
+.p2align	4
+_x86_64_AES_set_encrypt_key:
+	movl	%esi,%ecx
+	movq	%rdi,%rsi
+	movq	%rdx,%rdi
+
+	testq	$-1,%rsi
+	jz	L$badpointer
+	testq	$-1,%rdi
+	jz	L$badpointer
+
+	leaq	L$AES_Te(%rip),%rbp
+	leaq	2048+128(%rbp),%rbp
+
+
+	movl	0-128(%rbp),%eax
+	movl	32-128(%rbp),%ebx
+	movl	64-128(%rbp),%r8d
+	movl	96-128(%rbp),%edx
+	movl	128-128(%rbp),%eax
+	movl	160-128(%rbp),%ebx
+	movl	192-128(%rbp),%r8d
+	movl	224-128(%rbp),%edx
+
+	cmpl	$128,%ecx
+	je	L$10rounds
+	cmpl	$192,%ecx
+	je	L$12rounds
+	cmpl	$256,%ecx
+	je	L$14rounds
+	movq	$-2,%rax
+	jmp	L$exit
+
+L$10rounds:
+	movq	0(%rsi),%rax
+	movq	8(%rsi),%rdx
+	movq	%rax,0(%rdi)
+	movq	%rdx,8(%rdi)
+
+	shrq	$32,%rdx
+	xorl	%ecx,%ecx
+	jmp	L$10shortcut
+.p2align	2
+L$10loop:
+	movl	0(%rdi),%eax
+	movl	12(%rdi),%edx
+L$10shortcut:
+	movzbl	%dl,%esi
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+
+	xorl	1024-128(%rbp,%rcx,4),%eax
+	movl	%eax,16(%rdi)
+	xorl	4(%rdi),%eax
+	movl	%eax,20(%rdi)
+	xorl	8(%rdi),%eax
+	movl	%eax,24(%rdi)
+	xorl	12(%rdi),%eax
+	movl	%eax,28(%rdi)
+	addl	$1,%ecx
+	leaq	16(%rdi),%rdi
+	cmpl	$10,%ecx
+	jl	L$10loop
+
+	movl	$10,80(%rdi)
+	xorq	%rax,%rax
+	jmp	L$exit
+
+L$12rounds:
+	movq	0(%rsi),%rax
+	movq	8(%rsi),%rbx
+	movq	16(%rsi),%rdx
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rdx,16(%rdi)
+
+	shrq	$32,%rdx
+	xorl	%ecx,%ecx
+	jmp	L$12shortcut
+.p2align	2
+L$12loop:
+	movl	0(%rdi),%eax
+	movl	20(%rdi),%edx
+L$12shortcut:
+	movzbl	%dl,%esi
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+
+	xorl	1024-128(%rbp,%rcx,4),%eax
+	movl	%eax,24(%rdi)
+	xorl	4(%rdi),%eax
+	movl	%eax,28(%rdi)
+	xorl	8(%rdi),%eax
+	movl	%eax,32(%rdi)
+	xorl	12(%rdi),%eax
+	movl	%eax,36(%rdi)
+
+	cmpl	$7,%ecx
+	je	L$12break
+	addl	$1,%ecx
+
+	xorl	16(%rdi),%eax
+	movl	%eax,40(%rdi)
+	xorl	20(%rdi),%eax
+	movl	%eax,44(%rdi)
+
+	leaq	24(%rdi),%rdi
+	jmp	L$12loop
+L$12break:
+	movl	$12,72(%rdi)
+	xorq	%rax,%rax
+	jmp	L$exit
+
+L$14rounds:
+	movq	0(%rsi),%rax
+	movq	8(%rsi),%rbx
+	movq	16(%rsi),%rcx
+	movq	24(%rsi),%rdx
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+
+	shrq	$32,%rdx
+	xorl	%ecx,%ecx
+	jmp	L$14shortcut
+.p2align	2
+L$14loop:
+	movl	0(%rdi),%eax
+	movl	28(%rdi),%edx
+L$14shortcut:
+	movzbl	%dl,%esi
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+
+	xorl	1024-128(%rbp,%rcx,4),%eax
+	movl	%eax,32(%rdi)
+	xorl	4(%rdi),%eax
+	movl	%eax,36(%rdi)
+	xorl	8(%rdi),%eax
+	movl	%eax,40(%rdi)
+	xorl	12(%rdi),%eax
+	movl	%eax,44(%rdi)
+
+	cmpl	$6,%ecx
+	je	L$14break
+	addl	$1,%ecx
+
+	movl	%eax,%edx
+	movl	16(%rdi),%eax
+	movzbl	%dl,%esi
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shrl	$16,%edx
+	shll	$8,%ebx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+
+	movzbl	-128(%rbp,%rsi,1),%ebx
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+
+	movl	%eax,48(%rdi)
+	xorl	20(%rdi),%eax
+	movl	%eax,52(%rdi)
+	xorl	24(%rdi),%eax
+	movl	%eax,56(%rdi)
+	xorl	28(%rdi),%eax
+	movl	%eax,60(%rdi)
+
+	leaq	32(%rdi),%rdi
+	jmp	L$14loop
+L$14break:
+	movl	$14,48(%rdi)
+	xorq	%rax,%rax
+	jmp	L$exit
+
+L$badpointer:
+	movq	$-1,%rax
+L$exit:
+.byte	0xf3,0xc3
+
+.p2align	4
+.globl	_asm_AES_set_decrypt_key
+.private_extern _asm_AES_set_decrypt_key
+
+_asm_AES_set_decrypt_key:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushq	%rdx
+L$dec_key_prologue:
+
+	call	_x86_64_AES_set_encrypt_key
+	movq	(%rsp),%r8
+	cmpl	$0,%eax
+	jne	L$abort
+
+	movl	240(%r8),%r14d
+	xorq	%rdi,%rdi
+	leaq	(%rdi,%r14,4),%rcx
+	movq	%r8,%rsi
+	leaq	(%r8,%rcx,4),%rdi
+.p2align	2
+L$invert:
+	movq	0(%rsi),%rax
+	movq	8(%rsi),%rbx
+	movq	0(%rdi),%rcx
+	movq	8(%rdi),%rdx
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,0(%rsi)
+	movq	%rdx,8(%rsi)
+	leaq	16(%rsi),%rsi
+	leaq	-16(%rdi),%rdi
+	cmpq	%rsi,%rdi
+	jne	L$invert
+
+	leaq	L$AES_Te+2048+1024(%rip),%rax
+
+	movq	40(%rax),%rsi
+	movq	48(%rax),%rdi
+	movq	56(%rax),%rbp
+
+	movq	%r8,%r15
+	subl	$1,%r14d
+.p2align	2
+L$permute:
+	leaq	16(%r15),%r15
+	movq	0(%r15),%rax
+	movq	8(%r15),%rcx
+	movq	%rsi,%r9
+	movq	%rsi,%r12
+	andq	%rax,%r9
+	andq	%rcx,%r12
+	movq	%r9,%rbx
+	movq	%r12,%rdx
+	shrq	$7,%r9
+	leaq	(%rax,%rax,1),%r8
+	shrq	$7,%r12
+	leaq	(%rcx,%rcx,1),%r11
+	subq	%r9,%rbx
+	subq	%r12,%rdx
+	andq	%rdi,%r8
+	andq	%rdi,%r11
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r8
+	xorq	%rdx,%r11
+	movq	%rsi,%r10
+	movq	%rsi,%r13
+
+	andq	%r8,%r10
+	andq	%r11,%r13
+	movq	%r10,%rbx
+	movq	%r13,%rdx
+	shrq	$7,%r10
+	leaq	(%r8,%r8,1),%r9
+	shrq	$7,%r13
+	leaq	(%r11,%r11,1),%r12
+	subq	%r10,%rbx
+	subq	%r13,%rdx
+	andq	%rdi,%r9
+	andq	%rdi,%r12
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r9
+	xorq	%rdx,%r12
+	movq	%rsi,%r10
+	movq	%rsi,%r13
+
+	andq	%r9,%r10
+	andq	%r12,%r13
+	movq	%r10,%rbx
+	movq	%r13,%rdx
+	shrq	$7,%r10
+	xorq	%rax,%r8
+	shrq	$7,%r13
+	xorq	%rcx,%r11
+	subq	%r10,%rbx
+	subq	%r13,%rdx
+	leaq	(%r9,%r9,1),%r10
+	leaq	(%r12,%r12,1),%r13
+	xorq	%rax,%r9
+	xorq	%rcx,%r12
+	andq	%rdi,%r10
+	andq	%rdi,%r13
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r10
+	xorq	%rdx,%r13
+
+	xorq	%r10,%rax
+	xorq	%r13,%rcx
+	xorq	%r10,%r8
+	xorq	%r13,%r11
+	movq	%rax,%rbx
+	movq	%rcx,%rdx
+	xorq	%r10,%r9
+	shrq	$32,%rbx
+	xorq	%r13,%r12
+	shrq	$32,%rdx
+	xorq	%r8,%r10
+	roll	$8,%eax
+	xorq	%r11,%r13
+	roll	$8,%ecx
+	xorq	%r9,%r10
+	roll	$8,%ebx
+	xorq	%r12,%r13
+
+	roll	$8,%edx
+	xorl	%r10d,%eax
+	shrq	$32,%r10
+	xorl	%r13d,%ecx
+	shrq	$32,%r13
+	xorl	%r10d,%ebx
+	xorl	%r13d,%edx
+
+	movq	%r8,%r10
+	roll	$24,%r8d
+	movq	%r11,%r13
+	roll	$24,%r11d
+	shrq	$32,%r10
+	xorl	%r8d,%eax
+	shrq	$32,%r13
+	xorl	%r11d,%ecx
+	roll	$24,%r10d
+	movq	%r9,%r8
+	roll	$24,%r13d
+	movq	%r12,%r11
+	shrq	$32,%r8
+	xorl	%r10d,%ebx
+	shrq	$32,%r11
+	xorl	%r13d,%edx
+
+
+	roll	$16,%r9d
+
+	roll	$16,%r12d
+
+	roll	$16,%r8d
+
+	xorl	%r9d,%eax
+	roll	$16,%r11d
+	xorl	%r12d,%ecx
+
+	xorl	%r8d,%ebx
+	xorl	%r11d,%edx
+	movl	%eax,0(%r15)
+	movl	%ebx,4(%r15)
+	movl	%ecx,8(%r15)
+	movl	%edx,12(%r15)
+	subl	$1,%r14d
+	jnz	L$permute
+
+	xorq	%rax,%rax
+L$abort:
+	movq	8(%rsp),%r15
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r13
+	movq	32(%rsp),%r12
+	movq	40(%rsp),%rbp
+	movq	48(%rsp),%rbx
+	addq	$56,%rsp
+L$dec_key_epilogue:
+	.byte	0xf3,0xc3
+
+.p2align	4
+.globl	_asm_AES_cbc_encrypt
+.private_extern _asm_AES_cbc_encrypt
+
+
+.private_extern	_asm_AES_cbc_encrypt
+_asm_AES_cbc_encrypt:
+	cmpq	$0,%rdx
+	je	L$cbc_epilogue
+	pushfq
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+L$cbc_prologue:
+
+	cld
+	movl	%r9d,%r9d
+
+	leaq	L$AES_Te(%rip),%r14
+	leaq	L$AES_Td(%rip),%r10
+	cmpq	$0,%r9
+	cmoveq	%r10,%r14
+
+	leaq	_OPENSSL_ia32cap_P(%rip),%r10
+	movl	(%r10),%r10d
+	cmpq	$512,%rdx
+	jb	L$cbc_slow_prologue
+	testq	$15,%rdx
+	jnz	L$cbc_slow_prologue
+	btl	$28,%r10d
+	jc	L$cbc_slow_prologue
+
+
+	leaq	-88-248(%rsp),%r15
+	andq	$-64,%r15
+
+
+	movq	%r14,%r10
+	leaq	2304(%r14),%r11
+	movq	%r15,%r12
+	andq	$0xFFF,%r10
+	andq	$0xFFF,%r11
+	andq	$0xFFF,%r12
+
+	cmpq	%r11,%r12
+	jb	L$cbc_te_break_out
+	subq	%r11,%r12
+	subq	%r12,%r15
+	jmp	L$cbc_te_ok
+L$cbc_te_break_out:
+	subq	%r10,%r12
+	andq	$0xFFF,%r12
+	addq	$320,%r12
+	subq	%r12,%r15
+.p2align	2
+L$cbc_te_ok:
+
+	xchgq	%rsp,%r15
+
+	movq	%r15,16(%rsp)
+L$cbc_fast_body:
+	movq	%rdi,24(%rsp)
+	movq	%rsi,32(%rsp)
+	movq	%rdx,40(%rsp)
+	movq	%rcx,48(%rsp)
+	movq	%r8,56(%rsp)
+	movl	$0,80+240(%rsp)
+	movq	%r8,%rbp
+	movq	%r9,%rbx
+	movq	%rsi,%r9
+	movq	%rdi,%r8
+	movq	%rcx,%r15
+
+	movl	240(%r15),%eax
+
+	movq	%r15,%r10
+	subq	%r14,%r10
+	andq	$0xfff,%r10
+	cmpq	$2304,%r10
+	jb	L$cbc_do_ecopy
+	cmpq	$4096-248,%r10
+	jb	L$cbc_skip_ecopy
+.p2align	2
+L$cbc_do_ecopy:
+	movq	%r15,%rsi
+	leaq	80(%rsp),%rdi
+	leaq	80(%rsp),%r15
+	movl	$30,%ecx
+.long	0x90A548F3
+	movl	%eax,(%rdi)
+L$cbc_skip_ecopy:
+	movq	%r15,0(%rsp)
+
+	movl	$18,%ecx
+.p2align	2
+L$cbc_prefetch_te:
+	movq	0(%r14),%r10
+	movq	32(%r14),%r11
+	movq	64(%r14),%r12
+	movq	96(%r14),%r13
+	leaq	128(%r14),%r14
+	subl	$1,%ecx
+	jnz	L$cbc_prefetch_te
+	leaq	-2304(%r14),%r14
+
+	cmpq	$0,%rbx
+	je	L$FAST_DECRYPT
+
+
+	movl	0(%rbp),%eax
+	movl	4(%rbp),%ebx
+	movl	8(%rbp),%ecx
+	movl	12(%rbp),%edx
+
+.p2align	2
+L$cbc_fast_enc_loop:
+	xorl	0(%r8),%eax
+	xorl	4(%r8),%ebx
+	xorl	8(%r8),%ecx
+	xorl	12(%r8),%edx
+	movq	0(%rsp),%r15
+	movq	%r8,24(%rsp)
+
+	call	_x86_64_AES_encrypt
+
+	movq	24(%rsp),%r8
+	movq	40(%rsp),%r10
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	leaq	16(%r8),%r8
+	leaq	16(%r9),%r9
+	subq	$16,%r10
+	testq	$-16,%r10
+	movq	%r10,40(%rsp)
+	jnz	L$cbc_fast_enc_loop
+	movq	56(%rsp),%rbp
+	movl	%eax,0(%rbp)
+	movl	%ebx,4(%rbp)
+	movl	%ecx,8(%rbp)
+	movl	%edx,12(%rbp)
+
+	jmp	L$cbc_fast_cleanup
+
+
+.p2align	4
+L$FAST_DECRYPT:
+	cmpq	%r8,%r9
+	je	L$cbc_fast_dec_in_place
+
+	movq	%rbp,64(%rsp)
+.p2align	2
+L$cbc_fast_dec_loop:
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movq	0(%rsp),%r15
+	movq	%r8,24(%rsp)
+
+	call	_x86_64_AES_decrypt
+
+	movq	64(%rsp),%rbp
+	movq	24(%rsp),%r8
+	movq	40(%rsp),%r10
+	xorl	0(%rbp),%eax
+	xorl	4(%rbp),%ebx
+	xorl	8(%rbp),%ecx
+	xorl	12(%rbp),%edx
+	movq	%r8,%rbp
+
+	subq	$16,%r10
+	movq	%r10,40(%rsp)
+	movq	%rbp,64(%rsp)
+
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	leaq	16(%r8),%r8
+	leaq	16(%r9),%r9
+	jnz	L$cbc_fast_dec_loop
+	movq	56(%rsp),%r12
+	movq	0(%rbp),%r10
+	movq	8(%rbp),%r11
+	movq	%r10,0(%r12)
+	movq	%r11,8(%r12)
+	jmp	L$cbc_fast_cleanup
+
+.p2align	4
+L$cbc_fast_dec_in_place:
+	movq	0(%rbp),%r10
+	movq	8(%rbp),%r11
+	movq	%r10,0+64(%rsp)
+	movq	%r11,8+64(%rsp)
+.p2align	2
+L$cbc_fast_dec_in_place_loop:
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movq	0(%rsp),%r15
+	movq	%r8,24(%rsp)
+
+	call	_x86_64_AES_decrypt
+
+	movq	24(%rsp),%r8
+	movq	40(%rsp),%r10
+	xorl	0+64(%rsp),%eax
+	xorl	4+64(%rsp),%ebx
+	xorl	8+64(%rsp),%ecx
+	xorl	12+64(%rsp),%edx
+
+	movq	0(%r8),%r11
+	movq	8(%r8),%r12
+	subq	$16,%r10
+	jz	L$cbc_fast_dec_in_place_done
+
+	movq	%r11,0+64(%rsp)
+	movq	%r12,8+64(%rsp)
+
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	leaq	16(%r8),%r8
+	leaq	16(%r9),%r9
+	movq	%r10,40(%rsp)
+	jmp	L$cbc_fast_dec_in_place_loop
+L$cbc_fast_dec_in_place_done:
+	movq	56(%rsp),%rdi
+	movq	%r11,0(%rdi)
+	movq	%r12,8(%rdi)
+
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+.p2align	2
+L$cbc_fast_cleanup:
+	cmpl	$0,80+240(%rsp)
+	leaq	80(%rsp),%rdi
+	je	L$cbc_exit
+	movl	$30,%ecx
+	xorq	%rax,%rax
+.long	0x90AB48F3
+
+	jmp	L$cbc_exit
+
+
+.p2align	4
+L$cbc_slow_prologue:
+
+	leaq	-88(%rsp),%rbp
+	andq	$-64,%rbp
+
+	leaq	-88-63(%rcx),%r10
+	subq	%rbp,%r10
+	negq	%r10
+	andq	$0x3c0,%r10
+	subq	%r10,%rbp
+
+	xchgq	%rsp,%rbp
+
+	movq	%rbp,16(%rsp)
+L$cbc_slow_body:
+
+
+
+
+	movq	%r8,56(%rsp)
+	movq	%r8,%rbp
+	movq	%r9,%rbx
+	movq	%rsi,%r9
+	movq	%rdi,%r8
+	movq	%rcx,%r15
+	movq	%rdx,%r10
+
+	movl	240(%r15),%eax
+	movq	%r15,0(%rsp)
+	shll	$4,%eax
+	leaq	(%r15,%rax,1),%rax
+	movq	%rax,8(%rsp)
+
+
+	leaq	2048(%r14),%r14
+	leaq	768-8(%rsp),%rax
+	subq	%r14,%rax
+	andq	$0x300,%rax
+	leaq	(%r14,%rax,1),%r14
+
+	cmpq	$0,%rbx
+	je	L$SLOW_DECRYPT
+
+
+	testq	$-16,%r10
+	movl	0(%rbp),%eax
+	movl	4(%rbp),%ebx
+	movl	8(%rbp),%ecx
+	movl	12(%rbp),%edx
+	jz	L$cbc_slow_enc_tail
+
+.p2align	2
+L$cbc_slow_enc_loop:
+	xorl	0(%r8),%eax
+	xorl	4(%r8),%ebx
+	xorl	8(%r8),%ecx
+	xorl	12(%r8),%edx
+	movq	0(%rsp),%r15
+	movq	%r8,24(%rsp)
+	movq	%r9,32(%rsp)
+	movq	%r10,40(%rsp)
+
+	call	_x86_64_AES_encrypt_compact
+
+	movq	24(%rsp),%r8
+	movq	32(%rsp),%r9
+	movq	40(%rsp),%r10
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	leaq	16(%r8),%r8
+	leaq	16(%r9),%r9
+	subq	$16,%r10
+	testq	$-16,%r10
+	jnz	L$cbc_slow_enc_loop
+	testq	$15,%r10
+	jnz	L$cbc_slow_enc_tail
+	movq	56(%rsp),%rbp
+	movl	%eax,0(%rbp)
+	movl	%ebx,4(%rbp)
+	movl	%ecx,8(%rbp)
+	movl	%edx,12(%rbp)
+
+	jmp	L$cbc_exit
+
+.p2align	2
+L$cbc_slow_enc_tail:
+	movq	%rax,%r11
+	movq	%rcx,%r12
+	movq	%r10,%rcx
+	movq	%r8,%rsi
+	movq	%r9,%rdi
+.long	0x9066A4F3
+	movq	$16,%rcx
+	subq	%r10,%rcx
+	xorq	%rax,%rax
+.long	0x9066AAF3
+	movq	%r9,%r8
+	movq	$16,%r10
+	movq	%r11,%rax
+	movq	%r12,%rcx
+	jmp	L$cbc_slow_enc_loop
+
+.p2align	4
+L$SLOW_DECRYPT:
+	shrq	$3,%rax
+	addq	%rax,%r14
+
+	movq	0(%rbp),%r11
+	movq	8(%rbp),%r12
+	movq	%r11,0+64(%rsp)
+	movq	%r12,8+64(%rsp)
+
+.p2align	2
+L$cbc_slow_dec_loop:
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movq	0(%rsp),%r15
+	movq	%r8,24(%rsp)
+	movq	%r9,32(%rsp)
+	movq	%r10,40(%rsp)
+
+	call	_x86_64_AES_decrypt_compact
+
+	movq	24(%rsp),%r8
+	movq	32(%rsp),%r9
+	movq	40(%rsp),%r10
+	xorl	0+64(%rsp),%eax
+	xorl	4+64(%rsp),%ebx
+	xorl	8+64(%rsp),%ecx
+	xorl	12+64(%rsp),%edx
+
+	movq	0(%r8),%r11
+	movq	8(%r8),%r12
+	subq	$16,%r10
+	jc	L$cbc_slow_dec_partial
+	jz	L$cbc_slow_dec_done
+
+	movq	%r11,0+64(%rsp)
+	movq	%r12,8+64(%rsp)
+
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	leaq	16(%r8),%r8
+	leaq	16(%r9),%r9
+	jmp	L$cbc_slow_dec_loop
+L$cbc_slow_dec_done:
+	movq	56(%rsp),%rdi
+	movq	%r11,0(%rdi)
+	movq	%r12,8(%rdi)
+
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	jmp	L$cbc_exit
+
+.p2align	2
+L$cbc_slow_dec_partial:
+	movq	56(%rsp),%rdi
+	movq	%r11,0(%rdi)
+	movq	%r12,8(%rdi)
+
+	movl	%eax,0+64(%rsp)
+	movl	%ebx,4+64(%rsp)
+	movl	%ecx,8+64(%rsp)
+	movl	%edx,12+64(%rsp)
+
+	movq	%r9,%rdi
+	leaq	64(%rsp),%rsi
+	leaq	16(%r10),%rcx
+.long	0x9066A4F3
+	jmp	L$cbc_exit
+
+.p2align	4
+L$cbc_exit:
+	movq	16(%rsp),%rsi
+	movq	(%rsi),%r15
+	movq	8(%rsi),%r14
+	movq	16(%rsi),%r13
+	movq	24(%rsi),%r12
+	movq	32(%rsi),%rbp
+	movq	40(%rsi),%rbx
+	leaq	48(%rsi),%rsp
+L$cbc_popfq:
+	popfq
+L$cbc_epilogue:
+	.byte	0xf3,0xc3
+
+.p2align	6
+L$AES_Te:
+.long	0xa56363c6,0xa56363c6
+.long	0x847c7cf8,0x847c7cf8
+.long	0x997777ee,0x997777ee
+.long	0x8d7b7bf6,0x8d7b7bf6
+.long	0x0df2f2ff,0x0df2f2ff
+.long	0xbd6b6bd6,0xbd6b6bd6
+.long	0xb16f6fde,0xb16f6fde
+.long	0x54c5c591,0x54c5c591
+.long	0x50303060,0x50303060
+.long	0x03010102,0x03010102
+.long	0xa96767ce,0xa96767ce
+.long	0x7d2b2b56,0x7d2b2b56
+.long	0x19fefee7,0x19fefee7
+.long	0x62d7d7b5,0x62d7d7b5
+.long	0xe6abab4d,0xe6abab4d
+.long	0x9a7676ec,0x9a7676ec
+.long	0x45caca8f,0x45caca8f
+.long	0x9d82821f,0x9d82821f
+.long	0x40c9c989,0x40c9c989
+.long	0x877d7dfa,0x877d7dfa
+.long	0x15fafaef,0x15fafaef
+.long	0xeb5959b2,0xeb5959b2
+.long	0xc947478e,0xc947478e
+.long	0x0bf0f0fb,0x0bf0f0fb
+.long	0xecadad41,0xecadad41
+.long	0x67d4d4b3,0x67d4d4b3
+.long	0xfda2a25f,0xfda2a25f
+.long	0xeaafaf45,0xeaafaf45
+.long	0xbf9c9c23,0xbf9c9c23
+.long	0xf7a4a453,0xf7a4a453
+.long	0x967272e4,0x967272e4
+.long	0x5bc0c09b,0x5bc0c09b
+.long	0xc2b7b775,0xc2b7b775
+.long	0x1cfdfde1,0x1cfdfde1
+.long	0xae93933d,0xae93933d
+.long	0x6a26264c,0x6a26264c
+.long	0x5a36366c,0x5a36366c
+.long	0x413f3f7e,0x413f3f7e
+.long	0x02f7f7f5,0x02f7f7f5
+.long	0x4fcccc83,0x4fcccc83
+.long	0x5c343468,0x5c343468
+.long	0xf4a5a551,0xf4a5a551
+.long	0x34e5e5d1,0x34e5e5d1
+.long	0x08f1f1f9,0x08f1f1f9
+.long	0x937171e2,0x937171e2
+.long	0x73d8d8ab,0x73d8d8ab
+.long	0x53313162,0x53313162
+.long	0x3f15152a,0x3f15152a
+.long	0x0c040408,0x0c040408
+.long	0x52c7c795,0x52c7c795
+.long	0x65232346,0x65232346
+.long	0x5ec3c39d,0x5ec3c39d
+.long	0x28181830,0x28181830
+.long	0xa1969637,0xa1969637
+.long	0x0f05050a,0x0f05050a
+.long	0xb59a9a2f,0xb59a9a2f
+.long	0x0907070e,0x0907070e
+.long	0x36121224,0x36121224
+.long	0x9b80801b,0x9b80801b
+.long	0x3de2e2df,0x3de2e2df
+.long	0x26ebebcd,0x26ebebcd
+.long	0x6927274e,0x6927274e
+.long	0xcdb2b27f,0xcdb2b27f
+.long	0x9f7575ea,0x9f7575ea
+.long	0x1b090912,0x1b090912
+.long	0x9e83831d,0x9e83831d
+.long	0x742c2c58,0x742c2c58
+.long	0x2e1a1a34,0x2e1a1a34
+.long	0x2d1b1b36,0x2d1b1b36
+.long	0xb26e6edc,0xb26e6edc
+.long	0xee5a5ab4,0xee5a5ab4
+.long	0xfba0a05b,0xfba0a05b
+.long	0xf65252a4,0xf65252a4
+.long	0x4d3b3b76,0x4d3b3b76
+.long	0x61d6d6b7,0x61d6d6b7
+.long	0xceb3b37d,0xceb3b37d
+.long	0x7b292952,0x7b292952
+.long	0x3ee3e3dd,0x3ee3e3dd
+.long	0x712f2f5e,0x712f2f5e
+.long	0x97848413,0x97848413
+.long	0xf55353a6,0xf55353a6
+.long	0x68d1d1b9,0x68d1d1b9
+.long	0x00000000,0x00000000
+.long	0x2cededc1,0x2cededc1
+.long	0x60202040,0x60202040
+.long	0x1ffcfce3,0x1ffcfce3
+.long	0xc8b1b179,0xc8b1b179
+.long	0xed5b5bb6,0xed5b5bb6
+.long	0xbe6a6ad4,0xbe6a6ad4
+.long	0x46cbcb8d,0x46cbcb8d
+.long	0xd9bebe67,0xd9bebe67
+.long	0x4b393972,0x4b393972
+.long	0xde4a4a94,0xde4a4a94
+.long	0xd44c4c98,0xd44c4c98
+.long	0xe85858b0,0xe85858b0
+.long	0x4acfcf85,0x4acfcf85
+.long	0x6bd0d0bb,0x6bd0d0bb
+.long	0x2aefefc5,0x2aefefc5
+.long	0xe5aaaa4f,0xe5aaaa4f
+.long	0x16fbfbed,0x16fbfbed
+.long	0xc5434386,0xc5434386
+.long	0xd74d4d9a,0xd74d4d9a
+.long	0x55333366,0x55333366
+.long	0x94858511,0x94858511
+.long	0xcf45458a,0xcf45458a
+.long	0x10f9f9e9,0x10f9f9e9
+.long	0x06020204,0x06020204
+.long	0x817f7ffe,0x817f7ffe
+.long	0xf05050a0,0xf05050a0
+.long	0x443c3c78,0x443c3c78
+.long	0xba9f9f25,0xba9f9f25
+.long	0xe3a8a84b,0xe3a8a84b
+.long	0xf35151a2,0xf35151a2
+.long	0xfea3a35d,0xfea3a35d
+.long	0xc0404080,0xc0404080
+.long	0x8a8f8f05,0x8a8f8f05
+.long	0xad92923f,0xad92923f
+.long	0xbc9d9d21,0xbc9d9d21
+.long	0x48383870,0x48383870
+.long	0x04f5f5f1,0x04f5f5f1
+.long	0xdfbcbc63,0xdfbcbc63
+.long	0xc1b6b677,0xc1b6b677
+.long	0x75dadaaf,0x75dadaaf
+.long	0x63212142,0x63212142
+.long	0x30101020,0x30101020
+.long	0x1affffe5,0x1affffe5
+.long	0x0ef3f3fd,0x0ef3f3fd
+.long	0x6dd2d2bf,0x6dd2d2bf
+.long	0x4ccdcd81,0x4ccdcd81
+.long	0x140c0c18,0x140c0c18
+.long	0x35131326,0x35131326
+.long	0x2fececc3,0x2fececc3
+.long	0xe15f5fbe,0xe15f5fbe
+.long	0xa2979735,0xa2979735
+.long	0xcc444488,0xcc444488
+.long	0x3917172e,0x3917172e
+.long	0x57c4c493,0x57c4c493
+.long	0xf2a7a755,0xf2a7a755
+.long	0x827e7efc,0x827e7efc
+.long	0x473d3d7a,0x473d3d7a
+.long	0xac6464c8,0xac6464c8
+.long	0xe75d5dba,0xe75d5dba
+.long	0x2b191932,0x2b191932
+.long	0x957373e6,0x957373e6
+.long	0xa06060c0,0xa06060c0
+.long	0x98818119,0x98818119
+.long	0xd14f4f9e,0xd14f4f9e
+.long	0x7fdcdca3,0x7fdcdca3
+.long	0x66222244,0x66222244
+.long	0x7e2a2a54,0x7e2a2a54
+.long	0xab90903b,0xab90903b
+.long	0x8388880b,0x8388880b
+.long	0xca46468c,0xca46468c
+.long	0x29eeeec7,0x29eeeec7
+.long	0xd3b8b86b,0xd3b8b86b
+.long	0x3c141428,0x3c141428
+.long	0x79dedea7,0x79dedea7
+.long	0xe25e5ebc,0xe25e5ebc
+.long	0x1d0b0b16,0x1d0b0b16
+.long	0x76dbdbad,0x76dbdbad
+.long	0x3be0e0db,0x3be0e0db
+.long	0x56323264,0x56323264
+.long	0x4e3a3a74,0x4e3a3a74
+.long	0x1e0a0a14,0x1e0a0a14
+.long	0xdb494992,0xdb494992
+.long	0x0a06060c,0x0a06060c
+.long	0x6c242448,0x6c242448
+.long	0xe45c5cb8,0xe45c5cb8
+.long	0x5dc2c29f,0x5dc2c29f
+.long	0x6ed3d3bd,0x6ed3d3bd
+.long	0xefacac43,0xefacac43
+.long	0xa66262c4,0xa66262c4
+.long	0xa8919139,0xa8919139
+.long	0xa4959531,0xa4959531
+.long	0x37e4e4d3,0x37e4e4d3
+.long	0x8b7979f2,0x8b7979f2
+.long	0x32e7e7d5,0x32e7e7d5
+.long	0x43c8c88b,0x43c8c88b
+.long	0x5937376e,0x5937376e
+.long	0xb76d6dda,0xb76d6dda
+.long	0x8c8d8d01,0x8c8d8d01
+.long	0x64d5d5b1,0x64d5d5b1
+.long	0xd24e4e9c,0xd24e4e9c
+.long	0xe0a9a949,0xe0a9a949
+.long	0xb46c6cd8,0xb46c6cd8
+.long	0xfa5656ac,0xfa5656ac
+.long	0x07f4f4f3,0x07f4f4f3
+.long	0x25eaeacf,0x25eaeacf
+.long	0xaf6565ca,0xaf6565ca
+.long	0x8e7a7af4,0x8e7a7af4
+.long	0xe9aeae47,0xe9aeae47
+.long	0x18080810,0x18080810
+.long	0xd5baba6f,0xd5baba6f
+.long	0x887878f0,0x887878f0
+.long	0x6f25254a,0x6f25254a
+.long	0x722e2e5c,0x722e2e5c
+.long	0x241c1c38,0x241c1c38
+.long	0xf1a6a657,0xf1a6a657
+.long	0xc7b4b473,0xc7b4b473
+.long	0x51c6c697,0x51c6c697
+.long	0x23e8e8cb,0x23e8e8cb
+.long	0x7cdddda1,0x7cdddda1
+.long	0x9c7474e8,0x9c7474e8
+.long	0x211f1f3e,0x211f1f3e
+.long	0xdd4b4b96,0xdd4b4b96
+.long	0xdcbdbd61,0xdcbdbd61
+.long	0x868b8b0d,0x868b8b0d
+.long	0x858a8a0f,0x858a8a0f
+.long	0x907070e0,0x907070e0
+.long	0x423e3e7c,0x423e3e7c
+.long	0xc4b5b571,0xc4b5b571
+.long	0xaa6666cc,0xaa6666cc
+.long	0xd8484890,0xd8484890
+.long	0x05030306,0x05030306
+.long	0x01f6f6f7,0x01f6f6f7
+.long	0x120e0e1c,0x120e0e1c
+.long	0xa36161c2,0xa36161c2
+.long	0x5f35356a,0x5f35356a
+.long	0xf95757ae,0xf95757ae
+.long	0xd0b9b969,0xd0b9b969
+.long	0x91868617,0x91868617
+.long	0x58c1c199,0x58c1c199
+.long	0x271d1d3a,0x271d1d3a
+.long	0xb99e9e27,0xb99e9e27
+.long	0x38e1e1d9,0x38e1e1d9
+.long	0x13f8f8eb,0x13f8f8eb
+.long	0xb398982b,0xb398982b
+.long	0x33111122,0x33111122
+.long	0xbb6969d2,0xbb6969d2
+.long	0x70d9d9a9,0x70d9d9a9
+.long	0x898e8e07,0x898e8e07
+.long	0xa7949433,0xa7949433
+.long	0xb69b9b2d,0xb69b9b2d
+.long	0x221e1e3c,0x221e1e3c
+.long	0x92878715,0x92878715
+.long	0x20e9e9c9,0x20e9e9c9
+.long	0x49cece87,0x49cece87
+.long	0xff5555aa,0xff5555aa
+.long	0x78282850,0x78282850
+.long	0x7adfdfa5,0x7adfdfa5
+.long	0x8f8c8c03,0x8f8c8c03
+.long	0xf8a1a159,0xf8a1a159
+.long	0x80898909,0x80898909
+.long	0x170d0d1a,0x170d0d1a
+.long	0xdabfbf65,0xdabfbf65
+.long	0x31e6e6d7,0x31e6e6d7
+.long	0xc6424284,0xc6424284
+.long	0xb86868d0,0xb86868d0
+.long	0xc3414182,0xc3414182
+.long	0xb0999929,0xb0999929
+.long	0x772d2d5a,0x772d2d5a
+.long	0x110f0f1e,0x110f0f1e
+.long	0xcbb0b07b,0xcbb0b07b
+.long	0xfc5454a8,0xfc5454a8
+.long	0xd6bbbb6d,0xd6bbbb6d
+.long	0x3a16162c,0x3a16162c
+.byte	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long	0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long	0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long	0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long	0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.p2align	6
+L$AES_Td:
+.long	0x50a7f451,0x50a7f451
+.long	0x5365417e,0x5365417e
+.long	0xc3a4171a,0xc3a4171a
+.long	0x965e273a,0x965e273a
+.long	0xcb6bab3b,0xcb6bab3b
+.long	0xf1459d1f,0xf1459d1f
+.long	0xab58faac,0xab58faac
+.long	0x9303e34b,0x9303e34b
+.long	0x55fa3020,0x55fa3020
+.long	0xf66d76ad,0xf66d76ad
+.long	0x9176cc88,0x9176cc88
+.long	0x254c02f5,0x254c02f5
+.long	0xfcd7e54f,0xfcd7e54f
+.long	0xd7cb2ac5,0xd7cb2ac5
+.long	0x80443526,0x80443526
+.long	0x8fa362b5,0x8fa362b5
+.long	0x495ab1de,0x495ab1de
+.long	0x671bba25,0x671bba25
+.long	0x980eea45,0x980eea45
+.long	0xe1c0fe5d,0xe1c0fe5d
+.long	0x02752fc3,0x02752fc3
+.long	0x12f04c81,0x12f04c81
+.long	0xa397468d,0xa397468d
+.long	0xc6f9d36b,0xc6f9d36b
+.long	0xe75f8f03,0xe75f8f03
+.long	0x959c9215,0x959c9215
+.long	0xeb7a6dbf,0xeb7a6dbf
+.long	0xda595295,0xda595295
+.long	0x2d83bed4,0x2d83bed4
+.long	0xd3217458,0xd3217458
+.long	0x2969e049,0x2969e049
+.long	0x44c8c98e,0x44c8c98e
+.long	0x6a89c275,0x6a89c275
+.long	0x78798ef4,0x78798ef4
+.long	0x6b3e5899,0x6b3e5899
+.long	0xdd71b927,0xdd71b927
+.long	0xb64fe1be,0xb64fe1be
+.long	0x17ad88f0,0x17ad88f0
+.long	0x66ac20c9,0x66ac20c9
+.long	0xb43ace7d,0xb43ace7d
+.long	0x184adf63,0x184adf63
+.long	0x82311ae5,0x82311ae5
+.long	0x60335197,0x60335197
+.long	0x457f5362,0x457f5362
+.long	0xe07764b1,0xe07764b1
+.long	0x84ae6bbb,0x84ae6bbb
+.long	0x1ca081fe,0x1ca081fe
+.long	0x942b08f9,0x942b08f9
+.long	0x58684870,0x58684870
+.long	0x19fd458f,0x19fd458f
+.long	0x876cde94,0x876cde94
+.long	0xb7f87b52,0xb7f87b52
+.long	0x23d373ab,0x23d373ab
+.long	0xe2024b72,0xe2024b72
+.long	0x578f1fe3,0x578f1fe3
+.long	0x2aab5566,0x2aab5566
+.long	0x0728ebb2,0x0728ebb2
+.long	0x03c2b52f,0x03c2b52f
+.long	0x9a7bc586,0x9a7bc586
+.long	0xa50837d3,0xa50837d3
+.long	0xf2872830,0xf2872830
+.long	0xb2a5bf23,0xb2a5bf23
+.long	0xba6a0302,0xba6a0302
+.long	0x5c8216ed,0x5c8216ed
+.long	0x2b1ccf8a,0x2b1ccf8a
+.long	0x92b479a7,0x92b479a7
+.long	0xf0f207f3,0xf0f207f3
+.long	0xa1e2694e,0xa1e2694e
+.long	0xcdf4da65,0xcdf4da65
+.long	0xd5be0506,0xd5be0506
+.long	0x1f6234d1,0x1f6234d1
+.long	0x8afea6c4,0x8afea6c4
+.long	0x9d532e34,0x9d532e34
+.long	0xa055f3a2,0xa055f3a2
+.long	0x32e18a05,0x32e18a05
+.long	0x75ebf6a4,0x75ebf6a4
+.long	0x39ec830b,0x39ec830b
+.long	0xaaef6040,0xaaef6040
+.long	0x069f715e,0x069f715e
+.long	0x51106ebd,0x51106ebd
+.long	0xf98a213e,0xf98a213e
+.long	0x3d06dd96,0x3d06dd96
+.long	0xae053edd,0xae053edd
+.long	0x46bde64d,0x46bde64d
+.long	0xb58d5491,0xb58d5491
+.long	0x055dc471,0x055dc471
+.long	0x6fd40604,0x6fd40604
+.long	0xff155060,0xff155060
+.long	0x24fb9819,0x24fb9819
+.long	0x97e9bdd6,0x97e9bdd6
+.long	0xcc434089,0xcc434089
+.long	0x779ed967,0x779ed967
+.long	0xbd42e8b0,0xbd42e8b0
+.long	0x888b8907,0x888b8907
+.long	0x385b19e7,0x385b19e7
+.long	0xdbeec879,0xdbeec879
+.long	0x470a7ca1,0x470a7ca1
+.long	0xe90f427c,0xe90f427c
+.long	0xc91e84f8,0xc91e84f8
+.long	0x00000000,0x00000000
+.long	0x83868009,0x83868009
+.long	0x48ed2b32,0x48ed2b32
+.long	0xac70111e,0xac70111e
+.long	0x4e725a6c,0x4e725a6c
+.long	0xfbff0efd,0xfbff0efd
+.long	0x5638850f,0x5638850f
+.long	0x1ed5ae3d,0x1ed5ae3d
+.long	0x27392d36,0x27392d36
+.long	0x64d90f0a,0x64d90f0a
+.long	0x21a65c68,0x21a65c68
+.long	0xd1545b9b,0xd1545b9b
+.long	0x3a2e3624,0x3a2e3624
+.long	0xb1670a0c,0xb1670a0c
+.long	0x0fe75793,0x0fe75793
+.long	0xd296eeb4,0xd296eeb4
+.long	0x9e919b1b,0x9e919b1b
+.long	0x4fc5c080,0x4fc5c080
+.long	0xa220dc61,0xa220dc61
+.long	0x694b775a,0x694b775a
+.long	0x161a121c,0x161a121c
+.long	0x0aba93e2,0x0aba93e2
+.long	0xe52aa0c0,0xe52aa0c0
+.long	0x43e0223c,0x43e0223c
+.long	0x1d171b12,0x1d171b12
+.long	0x0b0d090e,0x0b0d090e
+.long	0xadc78bf2,0xadc78bf2
+.long	0xb9a8b62d,0xb9a8b62d
+.long	0xc8a91e14,0xc8a91e14
+.long	0x8519f157,0x8519f157
+.long	0x4c0775af,0x4c0775af
+.long	0xbbdd99ee,0xbbdd99ee
+.long	0xfd607fa3,0xfd607fa3
+.long	0x9f2601f7,0x9f2601f7
+.long	0xbcf5725c,0xbcf5725c
+.long	0xc53b6644,0xc53b6644
+.long	0x347efb5b,0x347efb5b
+.long	0x7629438b,0x7629438b
+.long	0xdcc623cb,0xdcc623cb
+.long	0x68fcedb6,0x68fcedb6
+.long	0x63f1e4b8,0x63f1e4b8
+.long	0xcadc31d7,0xcadc31d7
+.long	0x10856342,0x10856342
+.long	0x40229713,0x40229713
+.long	0x2011c684,0x2011c684
+.long	0x7d244a85,0x7d244a85
+.long	0xf83dbbd2,0xf83dbbd2
+.long	0x1132f9ae,0x1132f9ae
+.long	0x6da129c7,0x6da129c7
+.long	0x4b2f9e1d,0x4b2f9e1d
+.long	0xf330b2dc,0xf330b2dc
+.long	0xec52860d,0xec52860d
+.long	0xd0e3c177,0xd0e3c177
+.long	0x6c16b32b,0x6c16b32b
+.long	0x99b970a9,0x99b970a9
+.long	0xfa489411,0xfa489411
+.long	0x2264e947,0x2264e947
+.long	0xc48cfca8,0xc48cfca8
+.long	0x1a3ff0a0,0x1a3ff0a0
+.long	0xd82c7d56,0xd82c7d56
+.long	0xef903322,0xef903322
+.long	0xc74e4987,0xc74e4987
+.long	0xc1d138d9,0xc1d138d9
+.long	0xfea2ca8c,0xfea2ca8c
+.long	0x360bd498,0x360bd498
+.long	0xcf81f5a6,0xcf81f5a6
+.long	0x28de7aa5,0x28de7aa5
+.long	0x268eb7da,0x268eb7da
+.long	0xa4bfad3f,0xa4bfad3f
+.long	0xe49d3a2c,0xe49d3a2c
+.long	0x0d927850,0x0d927850
+.long	0x9bcc5f6a,0x9bcc5f6a
+.long	0x62467e54,0x62467e54
+.long	0xc2138df6,0xc2138df6
+.long	0xe8b8d890,0xe8b8d890
+.long	0x5ef7392e,0x5ef7392e
+.long	0xf5afc382,0xf5afc382
+.long	0xbe805d9f,0xbe805d9f
+.long	0x7c93d069,0x7c93d069
+.long	0xa92dd56f,0xa92dd56f
+.long	0xb31225cf,0xb31225cf
+.long	0x3b99acc8,0x3b99acc8
+.long	0xa77d1810,0xa77d1810
+.long	0x6e639ce8,0x6e639ce8
+.long	0x7bbb3bdb,0x7bbb3bdb
+.long	0x097826cd,0x097826cd
+.long	0xf418596e,0xf418596e
+.long	0x01b79aec,0x01b79aec
+.long	0xa89a4f83,0xa89a4f83
+.long	0x656e95e6,0x656e95e6
+.long	0x7ee6ffaa,0x7ee6ffaa
+.long	0x08cfbc21,0x08cfbc21
+.long	0xe6e815ef,0xe6e815ef
+.long	0xd99be7ba,0xd99be7ba
+.long	0xce366f4a,0xce366f4a
+.long	0xd4099fea,0xd4099fea
+.long	0xd67cb029,0xd67cb029
+.long	0xafb2a431,0xafb2a431
+.long	0x31233f2a,0x31233f2a
+.long	0x3094a5c6,0x3094a5c6
+.long	0xc066a235,0xc066a235
+.long	0x37bc4e74,0x37bc4e74
+.long	0xa6ca82fc,0xa6ca82fc
+.long	0xb0d090e0,0xb0d090e0
+.long	0x15d8a733,0x15d8a733
+.long	0x4a9804f1,0x4a9804f1
+.long	0xf7daec41,0xf7daec41
+.long	0x0e50cd7f,0x0e50cd7f
+.long	0x2ff69117,0x2ff69117
+.long	0x8dd64d76,0x8dd64d76
+.long	0x4db0ef43,0x4db0ef43
+.long	0x544daacc,0x544daacc
+.long	0xdf0496e4,0xdf0496e4
+.long	0xe3b5d19e,0xe3b5d19e
+.long	0x1b886a4c,0x1b886a4c
+.long	0xb81f2cc1,0xb81f2cc1
+.long	0x7f516546,0x7f516546
+.long	0x04ea5e9d,0x04ea5e9d
+.long	0x5d358c01,0x5d358c01
+.long	0x737487fa,0x737487fa
+.long	0x2e410bfb,0x2e410bfb
+.long	0x5a1d67b3,0x5a1d67b3
+.long	0x52d2db92,0x52d2db92
+.long	0x335610e9,0x335610e9
+.long	0x1347d66d,0x1347d66d
+.long	0x8c61d79a,0x8c61d79a
+.long	0x7a0ca137,0x7a0ca137
+.long	0x8e14f859,0x8e14f859
+.long	0x893c13eb,0x893c13eb
+.long	0xee27a9ce,0xee27a9ce
+.long	0x35c961b7,0x35c961b7
+.long	0xede51ce1,0xede51ce1
+.long	0x3cb1477a,0x3cb1477a
+.long	0x59dfd29c,0x59dfd29c
+.long	0x3f73f255,0x3f73f255
+.long	0x79ce1418,0x79ce1418
+.long	0xbf37c773,0xbf37c773
+.long	0xeacdf753,0xeacdf753
+.long	0x5baafd5f,0x5baafd5f
+.long	0x146f3ddf,0x146f3ddf
+.long	0x86db4478,0x86db4478
+.long	0x81f3afca,0x81f3afca
+.long	0x3ec468b9,0x3ec468b9
+.long	0x2c342438,0x2c342438
+.long	0x5f40a3c2,0x5f40a3c2
+.long	0x72c31d16,0x72c31d16
+.long	0x0c25e2bc,0x0c25e2bc
+.long	0x8b493c28,0x8b493c28
+.long	0x41950dff,0x41950dff
+.long	0x7101a839,0x7101a839
+.long	0xdeb30c08,0xdeb30c08
+.long	0x9ce4b4d8,0x9ce4b4d8
+.long	0x90c15664,0x90c15664
+.long	0x6184cb7b,0x6184cb7b
+.long	0x70b632d5,0x70b632d5
+.long	0x745c6c48,0x745c6c48
+.long	0x4257b8d0,0x4257b8d0
+.byte	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long	0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long	0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long	0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long	0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long	0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long	0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long	0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long	0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte	65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align	6
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
new file mode 100644
index 0000000..2513904
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
@@ -0,0 +1,834 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+.p2align	5
+_aesni_ctr32_ghash_6x:
+
+	vmovdqu	32(%r11),%xmm2
+	subq	$6,%rdx
+	vpxor	%xmm4,%xmm4,%xmm4
+	vmovdqu	0-128(%rcx),%xmm15
+	vpaddb	%xmm2,%xmm1,%xmm10
+	vpaddb	%xmm2,%xmm10,%xmm11
+	vpaddb	%xmm2,%xmm11,%xmm12
+	vpaddb	%xmm2,%xmm12,%xmm13
+	vpaddb	%xmm2,%xmm13,%xmm14
+	vpxor	%xmm15,%xmm1,%xmm9
+	vmovdqu	%xmm4,16+8(%rsp)
+	jmp	L$oop6x
+
+.p2align	5
+L$oop6x:
+	addl	$100663296,%ebx
+	jc	L$handle_ctr32
+	vmovdqu	0-32(%r9),%xmm3
+	vpaddb	%xmm2,%xmm14,%xmm1
+	vpxor	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm15,%xmm11,%xmm11
+
+L$resume_ctr32:
+	vmovdqu	%xmm1,(%r8)
+	vpclmulqdq	$0x10,%xmm3,%xmm7,%xmm5
+	vpxor	%xmm15,%xmm12,%xmm12
+	vmovups	16-128(%rcx),%xmm2
+	vpclmulqdq	$0x01,%xmm3,%xmm7,%xmm6
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	xorq	%r12,%r12
+	cmpq	%r14,%r15
+
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vmovdqu	48+8(%rsp),%xmm0
+	vpxor	%xmm15,%xmm13,%xmm13
+	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm1
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vpxor	%xmm15,%xmm14,%xmm14
+	setnc	%r12b
+	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vmovdqu	16-32(%r9),%xmm3
+	negq	%r12
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm5
+	vpxor	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm2,%xmm13,%xmm13
+	vpxor	%xmm5,%xmm1,%xmm4
+	andq	$0x60,%r12
+	vmovups	32-128(%rcx),%xmm15
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm1
+	vaesenc	%xmm2,%xmm14,%xmm14
+
+	vpclmulqdq	$0x01,%xmm3,%xmm0,%xmm2
+	leaq	(%r14,%r12,1),%r14
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	16+8(%rsp),%xmm8,%xmm8
+	vpclmulqdq	$0x11,%xmm3,%xmm0,%xmm3
+	vmovdqu	64+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	88(%r14),%r13
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	80(%r14),%r12
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,32+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,40+8(%rsp)
+	vmovdqu	48-32(%r9),%xmm5
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	48-128(%rcx),%xmm15
+	vpxor	%xmm1,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm5,%xmm0,%xmm1
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm5,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm3,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm5,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpclmulqdq	$0x11,%xmm5,%xmm0,%xmm5
+	vmovdqu	80+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	%xmm1,%xmm4,%xmm4
+	vmovdqu	64-32(%r9),%xmm1
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	64-128(%rcx),%xmm15
+	vpxor	%xmm2,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm1,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm3,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm1,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	72(%r14),%r13
+	vpxor	%xmm5,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm1,%xmm0,%xmm5
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	64(%r14),%r12
+	vpclmulqdq	$0x11,%xmm1,%xmm0,%xmm1
+	vmovdqu	96+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,48+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,56+8(%rsp)
+	vpxor	%xmm2,%xmm4,%xmm4
+	vmovdqu	96-32(%r9),%xmm2
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	80-128(%rcx),%xmm15
+	vpxor	%xmm3,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm2,%xmm0,%xmm5
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	56(%r14),%r13
+	vpxor	%xmm1,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm2,%xmm0,%xmm1
+	vpxor	112+8(%rsp),%xmm8,%xmm8
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	48(%r14),%r12
+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,64+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,72+8(%rsp)
+	vpxor	%xmm3,%xmm4,%xmm4
+	vmovdqu	112-32(%r9),%xmm3
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	96-128(%rcx),%xmm15
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm5
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm6,%xmm6
+	vpclmulqdq	$0x01,%xmm3,%xmm8,%xmm1
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	40(%r14),%r13
+	vpxor	%xmm2,%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm3,%xmm8,%xmm2
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	32(%r14),%r12
+	vpclmulqdq	$0x11,%xmm3,%xmm8,%xmm8
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,80+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,88+8(%rsp)
+	vpxor	%xmm5,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	%xmm1,%xmm6,%xmm6
+
+	vmovups	112-128(%rcx),%xmm15
+	vpslldq	$8,%xmm6,%xmm5
+	vpxor	%xmm2,%xmm4,%xmm4
+	vmovdqu	16(%r11),%xmm3
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm8,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm5,%xmm4,%xmm4
+	movbeq	24(%r14),%r13
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	16(%r14),%r12
+	vpalignr	$8,%xmm4,%xmm4,%xmm0
+	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
+	movq	%r13,96+8(%rsp)
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r12,104+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vmovups	128-128(%rcx),%xmm1
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vmovups	144-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vpsrldq	$8,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vpxor	%xmm6,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vpxor	%xmm0,%xmm4,%xmm4
+	movbeq	8(%r14),%r13
+	vaesenc	%xmm1,%xmm13,%xmm13
+	movbeq	0(%r14),%r12
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	160-128(%rcx),%xmm1
+	cmpl	$11,%ebp
+	jb	L$enc_tail
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+	vmovups	176-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	192-128(%rcx),%xmm1
+	je	L$enc_tail
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+	vmovups	208-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	224-128(%rcx),%xmm1
+	jmp	L$enc_tail
+
+.p2align	5
+L$handle_ctr32:
+	vmovdqu	(%r11),%xmm0
+	vpshufb	%xmm0,%xmm1,%xmm6
+	vmovdqu	48(%r11),%xmm5
+	vpaddd	64(%r11),%xmm6,%xmm10
+	vpaddd	%xmm5,%xmm6,%xmm11
+	vmovdqu	0-32(%r9),%xmm3
+	vpaddd	%xmm5,%xmm10,%xmm12
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm11,%xmm13
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vpxor	%xmm15,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm12,%xmm14
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vpxor	%xmm15,%xmm11,%xmm11
+	vpaddd	%xmm5,%xmm13,%xmm1
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vpshufb	%xmm0,%xmm1,%xmm1
+	jmp	L$resume_ctr32
+
+.p2align	5
+L$enc_tail:
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vmovdqu	%xmm7,16+8(%rsp)
+	vpalignr	$8,%xmm4,%xmm4,%xmm8
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
+	vpxor	0(%rdi),%xmm1,%xmm2
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpxor	16(%rdi),%xmm1,%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vpxor	32(%rdi),%xmm1,%xmm5
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	48(%rdi),%xmm1,%xmm6
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	64(%rdi),%xmm1,%xmm7
+	vpxor	80(%rdi),%xmm1,%xmm3
+	vmovdqu	(%r8),%xmm1
+
+	vaesenclast	%xmm2,%xmm9,%xmm9
+	vmovdqu	32(%r11),%xmm2
+	vaesenclast	%xmm0,%xmm10,%xmm10
+	vpaddb	%xmm2,%xmm1,%xmm0
+	movq	%r13,112+8(%rsp)
+	leaq	96(%rdi),%rdi
+	vaesenclast	%xmm5,%xmm11,%xmm11
+	vpaddb	%xmm2,%xmm0,%xmm5
+	movq	%r12,120+8(%rsp)
+	leaq	96(%rsi),%rsi
+	vmovdqu	0-128(%rcx),%xmm15
+	vaesenclast	%xmm6,%xmm12,%xmm12
+	vpaddb	%xmm2,%xmm5,%xmm6
+	vaesenclast	%xmm7,%xmm13,%xmm13
+	vpaddb	%xmm2,%xmm6,%xmm7
+	vaesenclast	%xmm3,%xmm14,%xmm14
+	vpaddb	%xmm2,%xmm7,%xmm3
+
+	addq	$0x60,%r10
+	subq	$0x6,%rdx
+	jc	L$6x_done
+
+	vmovups	%xmm9,-96(%rsi)
+	vpxor	%xmm15,%xmm1,%xmm9
+	vmovups	%xmm10,-80(%rsi)
+	vmovdqa	%xmm0,%xmm10
+	vmovups	%xmm11,-64(%rsi)
+	vmovdqa	%xmm5,%xmm11
+	vmovups	%xmm12,-48(%rsi)
+	vmovdqa	%xmm6,%xmm12
+	vmovups	%xmm13,-32(%rsi)
+	vmovdqa	%xmm7,%xmm13
+	vmovups	%xmm14,-16(%rsi)
+	vmovdqa	%xmm3,%xmm14
+	vmovdqu	32+8(%rsp),%xmm7
+	jmp	L$oop6x
+
+L$6x_done:
+	vpxor	16+8(%rsp),%xmm8,%xmm8
+	vpxor	%xmm4,%xmm8,%xmm8
+
+	.byte	0xf3,0xc3
+
+
+.globl	_aesni_gcm_decrypt
+.private_extern _aesni_gcm_decrypt
+
+.p2align	5
+_aesni_gcm_decrypt:
+
+	xorq	%r10,%r10
+
+
+
+	cmpq	$0x60,%rdx
+	jb	L$gcm_dec_abort
+
+	leaq	(%rsp),%rax
+
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+	vzeroupper
+
+	vmovdqu	(%r8),%xmm1
+	addq	$-128,%rsp
+	movl	12(%r8),%ebx
+	leaq	L$bswap_mask(%rip),%r11
+	leaq	-128(%rcx),%r14
+	movq	$0xf80,%r15
+	vmovdqu	(%r9),%xmm8
+	andq	$-128,%rsp
+	vmovdqu	(%r11),%xmm0
+	leaq	128(%rcx),%rcx
+	leaq	32+32(%r9),%r9
+	movl	240-128(%rcx),%ebp
+	vpshufb	%xmm0,%xmm8,%xmm8
+
+	andq	%r15,%r14
+	andq	%rsp,%r15
+	subq	%r14,%r15
+	jc	L$dec_no_key_aliasing
+	cmpq	$768,%r15
+	jnc	L$dec_no_key_aliasing
+	subq	%r15,%rsp
+L$dec_no_key_aliasing:
+
+	vmovdqu	80(%rdi),%xmm7
+	leaq	(%rdi),%r14
+	vmovdqu	64(%rdi),%xmm4
+
+
+
+
+
+
+
+	leaq	-192(%rdi,%rdx,1),%r15
+
+	vmovdqu	48(%rdi),%xmm5
+	shrq	$4,%rdx
+	xorq	%r10,%r10
+	vmovdqu	32(%rdi),%xmm6
+	vpshufb	%xmm0,%xmm7,%xmm7
+	vmovdqu	16(%rdi),%xmm2
+	vpshufb	%xmm0,%xmm4,%xmm4
+	vmovdqu	(%rdi),%xmm3
+	vpshufb	%xmm0,%xmm5,%xmm5
+	vmovdqu	%xmm4,48(%rsp)
+	vpshufb	%xmm0,%xmm6,%xmm6
+	vmovdqu	%xmm5,64(%rsp)
+	vpshufb	%xmm0,%xmm2,%xmm2
+	vmovdqu	%xmm6,80(%rsp)
+	vpshufb	%xmm0,%xmm3,%xmm3
+	vmovdqu	%xmm2,96(%rsp)
+	vmovdqu	%xmm3,112(%rsp)
+
+	call	_aesni_ctr32_ghash_6x
+
+	vmovups	%xmm9,-96(%rsi)
+	vmovups	%xmm10,-80(%rsi)
+	vmovups	%xmm11,-64(%rsi)
+	vmovups	%xmm12,-48(%rsi)
+	vmovups	%xmm13,-32(%rsi)
+	vmovups	%xmm14,-16(%rsi)
+
+	vpshufb	(%r11),%xmm8,%xmm8
+	vmovdqu	%xmm8,-64(%r9)
+
+	vzeroupper
+	movq	-48(%rax),%r15
+
+	movq	-40(%rax),%r14
+
+	movq	-32(%rax),%r13
+
+	movq	-24(%rax),%r12
+
+	movq	-16(%rax),%rbp
+
+	movq	-8(%rax),%rbx
+
+	leaq	(%rax),%rsp
+
+L$gcm_dec_abort:
+	movq	%r10,%rax
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+_aesni_ctr32_6x:
+
+	vmovdqu	0-128(%rcx),%xmm4
+	vmovdqu	32(%r11),%xmm2
+	leaq	-1(%rbp),%r13
+	vmovups	16-128(%rcx),%xmm15
+	leaq	32-128(%rcx),%r12
+	vpxor	%xmm4,%xmm1,%xmm9
+	addl	$100663296,%ebx
+	jc	L$handle_ctr32_2
+	vpaddb	%xmm2,%xmm1,%xmm10
+	vpaddb	%xmm2,%xmm10,%xmm11
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpaddb	%xmm2,%xmm11,%xmm12
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpaddb	%xmm2,%xmm12,%xmm13
+	vpxor	%xmm4,%xmm12,%xmm12
+	vpaddb	%xmm2,%xmm13,%xmm14
+	vpxor	%xmm4,%xmm13,%xmm13
+	vpaddb	%xmm2,%xmm14,%xmm1
+	vpxor	%xmm4,%xmm14,%xmm14
+	jmp	L$oop_ctr32
+
+.p2align	4
+L$oop_ctr32:
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vmovups	(%r12),%xmm15
+	leaq	16(%r12),%r12
+	decl	%r13d
+	jnz	L$oop_ctr32
+
+	vmovdqu	(%r12),%xmm3
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	0(%rdi),%xmm3,%xmm4
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	16(%rdi),%xmm3,%xmm5
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpxor	32(%rdi),%xmm3,%xmm6
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vpxor	48(%rdi),%xmm3,%xmm8
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	64(%rdi),%xmm3,%xmm2
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	80(%rdi),%xmm3,%xmm3
+	leaq	96(%rdi),%rdi
+
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vaesenclast	%xmm5,%xmm10,%xmm10
+	vaesenclast	%xmm6,%xmm11,%xmm11
+	vaesenclast	%xmm8,%xmm12,%xmm12
+	vaesenclast	%xmm2,%xmm13,%xmm13
+	vaesenclast	%xmm3,%xmm14,%xmm14
+	vmovups	%xmm9,0(%rsi)
+	vmovups	%xmm10,16(%rsi)
+	vmovups	%xmm11,32(%rsi)
+	vmovups	%xmm12,48(%rsi)
+	vmovups	%xmm13,64(%rsi)
+	vmovups	%xmm14,80(%rsi)
+	leaq	96(%rsi),%rsi
+
+	.byte	0xf3,0xc3
+.p2align	5
+L$handle_ctr32_2:
+	vpshufb	%xmm0,%xmm1,%xmm6
+	vmovdqu	48(%r11),%xmm5
+	vpaddd	64(%r11),%xmm6,%xmm10
+	vpaddd	%xmm5,%xmm6,%xmm11
+	vpaddd	%xmm5,%xmm10,%xmm12
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm11,%xmm13
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm12,%xmm14
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpaddd	%xmm5,%xmm13,%xmm1
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vpxor	%xmm4,%xmm12,%xmm12
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vpxor	%xmm4,%xmm13,%xmm13
+	vpshufb	%xmm0,%xmm1,%xmm1
+	vpxor	%xmm4,%xmm14,%xmm14
+	jmp	L$oop_ctr32
+
+
+
+.globl	_aesni_gcm_encrypt
+.private_extern _aesni_gcm_encrypt
+
+.p2align	5
+_aesni_gcm_encrypt:
+
+	xorq	%r10,%r10
+
+
+
+
+	cmpq	$288,%rdx
+	jb	L$gcm_enc_abort
+
+	leaq	(%rsp),%rax
+
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+	vzeroupper
+
+	vmovdqu	(%r8),%xmm1
+	addq	$-128,%rsp
+	movl	12(%r8),%ebx
+	leaq	L$bswap_mask(%rip),%r11
+	leaq	-128(%rcx),%r14
+	movq	$0xf80,%r15
+	leaq	128(%rcx),%rcx
+	vmovdqu	(%r11),%xmm0
+	andq	$-128,%rsp
+	movl	240-128(%rcx),%ebp
+
+	andq	%r15,%r14
+	andq	%rsp,%r15
+	subq	%r14,%r15
+	jc	L$enc_no_key_aliasing
+	cmpq	$768,%r15
+	jnc	L$enc_no_key_aliasing
+	subq	%r15,%rsp
+L$enc_no_key_aliasing:
+
+	leaq	(%rsi),%r14
+
+
+
+
+
+
+
+
+	leaq	-192(%rsi,%rdx,1),%r15
+
+	shrq	$4,%rdx
+
+	call	_aesni_ctr32_6x
+	vpshufb	%xmm0,%xmm9,%xmm8
+	vpshufb	%xmm0,%xmm10,%xmm2
+	vmovdqu	%xmm8,112(%rsp)
+	vpshufb	%xmm0,%xmm11,%xmm4
+	vmovdqu	%xmm2,96(%rsp)
+	vpshufb	%xmm0,%xmm12,%xmm5
+	vmovdqu	%xmm4,80(%rsp)
+	vpshufb	%xmm0,%xmm13,%xmm6
+	vmovdqu	%xmm5,64(%rsp)
+	vpshufb	%xmm0,%xmm14,%xmm7
+	vmovdqu	%xmm6,48(%rsp)
+
+	call	_aesni_ctr32_6x
+
+	vmovdqu	(%r9),%xmm8
+	leaq	32+32(%r9),%r9
+	subq	$12,%rdx
+	movq	$192,%r10
+	vpshufb	%xmm0,%xmm8,%xmm8
+
+	call	_aesni_ctr32_ghash_6x
+	vmovdqu	32(%rsp),%xmm7
+	vmovdqu	(%r11),%xmm0
+	vmovdqu	0-32(%r9),%xmm3
+	vpunpckhqdq	%xmm7,%xmm7,%xmm1
+	vmovdqu	32-32(%r9),%xmm15
+	vmovups	%xmm9,-96(%rsi)
+	vpshufb	%xmm0,%xmm9,%xmm9
+	vpxor	%xmm7,%xmm1,%xmm1
+	vmovups	%xmm10,-80(%rsi)
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vmovups	%xmm11,-64(%rsi)
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vmovups	%xmm12,-48(%rsi)
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vmovups	%xmm13,-32(%rsi)
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vmovups	%xmm14,-16(%rsi)
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vmovdqu	%xmm9,16(%rsp)
+	vmovdqu	48(%rsp),%xmm6
+	vmovdqu	16-32(%r9),%xmm0
+	vpunpckhqdq	%xmm6,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm5
+	vpxor	%xmm6,%xmm2,%xmm2
+	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
+
+	vmovdqu	64(%rsp),%xmm9
+	vpclmulqdq	$0x00,%xmm0,%xmm6,%xmm4
+	vmovdqu	48-32(%r9),%xmm3
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm9,%xmm9,%xmm5
+	vpclmulqdq	$0x11,%xmm0,%xmm6,%xmm6
+	vpxor	%xmm9,%xmm5,%xmm5
+	vpxor	%xmm7,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
+	vmovdqu	80-32(%r9),%xmm15
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vmovdqu	80(%rsp),%xmm1
+	vpclmulqdq	$0x00,%xmm3,%xmm9,%xmm7
+	vmovdqu	64-32(%r9),%xmm0
+	vpxor	%xmm4,%xmm7,%xmm7
+	vpunpckhqdq	%xmm1,%xmm1,%xmm4
+	vpclmulqdq	$0x11,%xmm3,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpxor	%xmm6,%xmm9,%xmm9
+	vpclmulqdq	$0x00,%xmm15,%xmm5,%xmm5
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vmovdqu	96(%rsp),%xmm2
+	vpclmulqdq	$0x00,%xmm0,%xmm1,%xmm6
+	vmovdqu	96-32(%r9),%xmm3
+	vpxor	%xmm7,%xmm6,%xmm6
+	vpunpckhqdq	%xmm2,%xmm2,%xmm7
+	vpclmulqdq	$0x11,%xmm0,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm7,%xmm7
+	vpxor	%xmm9,%xmm1,%xmm1
+	vpclmulqdq	$0x10,%xmm15,%xmm4,%xmm4
+	vmovdqu	128-32(%r9),%xmm15
+	vpxor	%xmm5,%xmm4,%xmm4
+
+	vpxor	112(%rsp),%xmm8,%xmm8
+	vpclmulqdq	$0x00,%xmm3,%xmm2,%xmm5
+	vmovdqu	112-32(%r9),%xmm0
+	vpunpckhqdq	%xmm8,%xmm8,%xmm9
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x11,%xmm3,%xmm2,%xmm2
+	vpxor	%xmm8,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm15,%xmm7,%xmm7
+	vpxor	%xmm4,%xmm7,%xmm4
+
+	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm6
+	vmovdqu	0-32(%r9),%xmm3
+	vpunpckhqdq	%xmm14,%xmm14,%xmm1
+	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm8
+	vpxor	%xmm14,%xmm1,%xmm1
+	vpxor	%xmm5,%xmm6,%xmm5
+	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm9
+	vmovdqu	32-32(%r9),%xmm15
+	vpxor	%xmm2,%xmm8,%xmm7
+	vpxor	%xmm4,%xmm9,%xmm6
+
+	vmovdqu	16-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm7,%xmm9
+	vpclmulqdq	$0x00,%xmm3,%xmm14,%xmm4
+	vpxor	%xmm9,%xmm6,%xmm6
+	vpunpckhqdq	%xmm13,%xmm13,%xmm2
+	vpclmulqdq	$0x11,%xmm3,%xmm14,%xmm14
+	vpxor	%xmm13,%xmm2,%xmm2
+	vpslldq	$8,%xmm6,%xmm9
+	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
+	vpxor	%xmm9,%xmm5,%xmm8
+	vpsrldq	$8,%xmm6,%xmm6
+	vpxor	%xmm6,%xmm7,%xmm7
+
+	vpclmulqdq	$0x00,%xmm0,%xmm13,%xmm5
+	vmovdqu	48-32(%r9),%xmm3
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpunpckhqdq	%xmm12,%xmm12,%xmm9
+	vpclmulqdq	$0x11,%xmm0,%xmm13,%xmm13
+	vpxor	%xmm12,%xmm9,%xmm9
+	vpxor	%xmm14,%xmm13,%xmm13
+	vpalignr	$8,%xmm8,%xmm8,%xmm14
+	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
+	vmovdqu	80-32(%r9),%xmm15
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vpclmulqdq	$0x00,%xmm3,%xmm12,%xmm4
+	vmovdqu	64-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm11,%xmm11,%xmm1
+	vpclmulqdq	$0x11,%xmm3,%xmm12,%xmm12
+	vpxor	%xmm11,%xmm1,%xmm1
+	vpxor	%xmm13,%xmm12,%xmm12
+	vxorps	16(%rsp),%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm15,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm9,%xmm9
+
+	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
+	vxorps	%xmm14,%xmm8,%xmm8
+
+	vpclmulqdq	$0x00,%xmm0,%xmm11,%xmm5
+	vmovdqu	96-32(%r9),%xmm3
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpunpckhqdq	%xmm10,%xmm10,%xmm2
+	vpclmulqdq	$0x11,%xmm0,%xmm11,%xmm11
+	vpxor	%xmm10,%xmm2,%xmm2
+	vpalignr	$8,%xmm8,%xmm8,%xmm14
+	vpxor	%xmm12,%xmm11,%xmm11
+	vpclmulqdq	$0x10,%xmm15,%xmm1,%xmm1
+	vmovdqu	128-32(%r9),%xmm15
+	vpxor	%xmm9,%xmm1,%xmm1
+
+	vxorps	%xmm7,%xmm14,%xmm14
+	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
+	vxorps	%xmm14,%xmm8,%xmm8
+
+	vpclmulqdq	$0x00,%xmm3,%xmm10,%xmm4
+	vmovdqu	112-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm8,%xmm8,%xmm9
+	vpclmulqdq	$0x11,%xmm3,%xmm10,%xmm10
+	vpxor	%xmm8,%xmm9,%xmm9
+	vpxor	%xmm11,%xmm10,%xmm10
+	vpclmulqdq	$0x00,%xmm15,%xmm2,%xmm2
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm5
+	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm7
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm6
+	vpxor	%xmm10,%xmm7,%xmm7
+	vpxor	%xmm2,%xmm6,%xmm6
+
+	vpxor	%xmm5,%xmm7,%xmm4
+	vpxor	%xmm4,%xmm6,%xmm6
+	vpslldq	$8,%xmm6,%xmm1
+	vmovdqu	16(%r11),%xmm3
+	vpsrldq	$8,%xmm6,%xmm6
+	vpxor	%xmm1,%xmm5,%xmm8
+	vpxor	%xmm6,%xmm7,%xmm7
+
+	vpalignr	$8,%xmm8,%xmm8,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
+	vpxor	%xmm2,%xmm8,%xmm8
+
+	vpalignr	$8,%xmm8,%xmm8,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
+	vpxor	%xmm7,%xmm2,%xmm2
+	vpxor	%xmm2,%xmm8,%xmm8
+	vpshufb	(%r11),%xmm8,%xmm8
+	vmovdqu	%xmm8,-64(%r9)
+
+	vzeroupper
+	movq	-48(%rax),%r15
+
+	movq	-40(%rax),%r14
+
+	movq	-32(%rax),%r13
+
+	movq	-24(%rax),%r12
+
+	movq	-16(%rax),%rbp
+
+	movq	-8(%rax),%rbx
+
+	leaq	(%rax),%rsp
+
+L$gcm_enc_abort:
+	movq	%r10,%rax
+	.byte	0xf3,0xc3
+
+
+.p2align	6
+L$bswap_mask:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+L$poly:
+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+L$one_msb:
+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+L$two_lsb:
+.byte	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+L$one_lsb:
+.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.byte	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align	6
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
new file mode 100644
index 0000000..4ee0dc4
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
@@ -0,0 +1,4380 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.globl	_aesni_encrypt
+.private_extern _aesni_encrypt
+
+.p2align	4
+_aesni_encrypt:
+	movups	(%rdi),%xmm2
+	movl	240(%rdx),%eax
+	movups	(%rdx),%xmm0
+	movups	16(%rdx),%xmm1
+	leaq	32(%rdx),%rdx
+	xorps	%xmm0,%xmm2
+L$oop_enc1_1:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rdx),%xmm1
+	leaq	16(%rdx),%rdx
+	jnz	L$oop_enc1_1
+.byte	102,15,56,221,209
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	.byte	0xf3,0xc3
+
+
+.globl	_aesni_decrypt
+.private_extern _aesni_decrypt
+
+.p2align	4
+_aesni_decrypt:
+	movups	(%rdi),%xmm2
+	movl	240(%rdx),%eax
+	movups	(%rdx),%xmm0
+	movups	16(%rdx),%xmm1
+	leaq	32(%rdx),%rdx
+	xorps	%xmm0,%xmm2
+L$oop_dec1_2:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rdx),%xmm1
+	leaq	16(%rdx),%rdx
+	jnz	L$oop_dec1_2
+.byte	102,15,56,223,209
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_aesni_encrypt2:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+	addq	$16,%rax
+
+L$enc_loop2:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$enc_loop2
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_aesni_decrypt2:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+	addq	$16,%rax
+
+L$dec_loop2:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$dec_loop2
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_aesni_encrypt3:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	xorps	%xmm0,%xmm4
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+	addq	$16,%rax
+
+L$enc_loop3:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$enc_loop3
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_aesni_decrypt3:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	xorps	%xmm0,%xmm4
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+	addq	$16,%rax
+
+L$dec_loop3:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$dec_loop3
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_aesni_encrypt4:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	xorps	%xmm0,%xmm4
+	xorps	%xmm0,%xmm5
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	0x0f,0x1f,0x00
+	addq	$16,%rax
+
+L$enc_loop4:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$enc_loop4
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+.byte	102,15,56,221,232
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_aesni_decrypt4:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	xorps	%xmm0,%xmm4
+	xorps	%xmm0,%xmm5
+	movups	32(%rcx),%xmm0
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	0x0f,0x1f,0x00
+	addq	$16,%rax
+
+L$dec_loop4:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$dec_loop4
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+.byte	102,15,56,223,232
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_aesni_encrypt6:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+.byte	102,15,56,220,209
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	102,15,56,220,217
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+.byte	102,15,56,220,225
+	pxor	%xmm0,%xmm7
+	movups	(%rcx,%rax,1),%xmm0
+	addq	$16,%rax
+	jmp	L$enc_loop6_enter
+.p2align	4
+L$enc_loop6:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+L$enc_loop6_enter:
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$enc_loop6
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+.byte	102,15,56,221,232
+.byte	102,15,56,221,240
+.byte	102,15,56,221,248
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_aesni_decrypt6:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+.byte	102,15,56,222,209
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	102,15,56,222,217
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+.byte	102,15,56,222,225
+	pxor	%xmm0,%xmm7
+	movups	(%rcx,%rax,1),%xmm0
+	addq	$16,%rax
+	jmp	L$dec_loop6_enter
+.p2align	4
+L$dec_loop6:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+L$dec_loop6_enter:
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$dec_loop6
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+.byte	102,15,56,223,232
+.byte	102,15,56,223,240
+.byte	102,15,56,223,248
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_aesni_encrypt8:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	102,15,56,220,209
+	pxor	%xmm0,%xmm7
+	pxor	%xmm0,%xmm8
+.byte	102,15,56,220,217
+	pxor	%xmm0,%xmm9
+	movups	(%rcx,%rax,1),%xmm0
+	addq	$16,%rax
+	jmp	L$enc_loop8_inner
+.p2align	4
+L$enc_loop8:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+L$enc_loop8_inner:
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+L$enc_loop8_enter:
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$enc_loop8
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+.byte	102,15,56,221,232
+.byte	102,15,56,221,240
+.byte	102,15,56,221,248
+.byte	102,68,15,56,221,192
+.byte	102,68,15,56,221,200
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_aesni_decrypt8:
+	movups	(%rcx),%xmm0
+	shll	$4,%eax
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm2
+	xorps	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+	leaq	32(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	102,15,56,222,209
+	pxor	%xmm0,%xmm7
+	pxor	%xmm0,%xmm8
+.byte	102,15,56,222,217
+	pxor	%xmm0,%xmm9
+	movups	(%rcx,%rax,1),%xmm0
+	addq	$16,%rax
+	jmp	L$dec_loop8_inner
+.p2align	4
+L$dec_loop8:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+L$dec_loop8_inner:
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+L$dec_loop8_enter:
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$dec_loop8
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+.byte	102,15,56,223,232
+.byte	102,15,56,223,240
+.byte	102,15,56,223,248
+.byte	102,68,15,56,223,192
+.byte	102,68,15,56,223,200
+	.byte	0xf3,0xc3
+
+.globl	_aesni_ecb_encrypt
+.private_extern _aesni_ecb_encrypt
+
+.p2align	4
+_aesni_ecb_encrypt:
+	andq	$-16,%rdx
+	jz	L$ecb_ret
+
+	movl	240(%rcx),%eax
+	movups	(%rcx),%xmm0
+	movq	%rcx,%r11
+	movl	%eax,%r10d
+	testl	%r8d,%r8d
+	jz	L$ecb_decrypt
+
+	cmpq	$0x80,%rdx
+	jb	L$ecb_enc_tail
+
+	movdqu	(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	movdqu	48(%rdi),%xmm5
+	movdqu	64(%rdi),%xmm6
+	movdqu	80(%rdi),%xmm7
+	movdqu	96(%rdi),%xmm8
+	movdqu	112(%rdi),%xmm9
+	leaq	128(%rdi),%rdi
+	subq	$0x80,%rdx
+	jmp	L$ecb_enc_loop8_enter
+.p2align	4
+L$ecb_enc_loop8:
+	movups	%xmm2,(%rsi)
+	movq	%r11,%rcx
+	movdqu	(%rdi),%xmm2
+	movl	%r10d,%eax
+	movups	%xmm3,16(%rsi)
+	movdqu	16(%rdi),%xmm3
+	movups	%xmm4,32(%rsi)
+	movdqu	32(%rdi),%xmm4
+	movups	%xmm5,48(%rsi)
+	movdqu	48(%rdi),%xmm5
+	movups	%xmm6,64(%rsi)
+	movdqu	64(%rdi),%xmm6
+	movups	%xmm7,80(%rsi)
+	movdqu	80(%rdi),%xmm7
+	movups	%xmm8,96(%rsi)
+	movdqu	96(%rdi),%xmm8
+	movups	%xmm9,112(%rsi)
+	leaq	128(%rsi),%rsi
+	movdqu	112(%rdi),%xmm9
+	leaq	128(%rdi),%rdi
+L$ecb_enc_loop8_enter:
+
+	call	_aesni_encrypt8
+
+	subq	$0x80,%rdx
+	jnc	L$ecb_enc_loop8
+
+	movups	%xmm2,(%rsi)
+	movq	%r11,%rcx
+	movups	%xmm3,16(%rsi)
+	movl	%r10d,%eax
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+	movups	%xmm7,80(%rsi)
+	movups	%xmm8,96(%rsi)
+	movups	%xmm9,112(%rsi)
+	leaq	128(%rsi),%rsi
+	addq	$0x80,%rdx
+	jz	L$ecb_ret
+
+L$ecb_enc_tail:
+	movups	(%rdi),%xmm2
+	cmpq	$0x20,%rdx
+	jb	L$ecb_enc_one
+	movups	16(%rdi),%xmm3
+	je	L$ecb_enc_two
+	movups	32(%rdi),%xmm4
+	cmpq	$0x40,%rdx
+	jb	L$ecb_enc_three
+	movups	48(%rdi),%xmm5
+	je	L$ecb_enc_four
+	movups	64(%rdi),%xmm6
+	cmpq	$0x60,%rdx
+	jb	L$ecb_enc_five
+	movups	80(%rdi),%xmm7
+	je	L$ecb_enc_six
+	movdqu	96(%rdi),%xmm8
+	xorps	%xmm9,%xmm9
+	call	_aesni_encrypt8
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+	movups	%xmm7,80(%rsi)
+	movups	%xmm8,96(%rsi)
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_enc_one:
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_enc1_3:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_enc1_3
+.byte	102,15,56,221,209
+	movups	%xmm2,(%rsi)
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_enc_two:
+	call	_aesni_encrypt2
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_enc_three:
+	call	_aesni_encrypt3
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_enc_four:
+	call	_aesni_encrypt4
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_enc_five:
+	xorps	%xmm7,%xmm7
+	call	_aesni_encrypt6
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_enc_six:
+	call	_aesni_encrypt6
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+	movups	%xmm7,80(%rsi)
+	jmp	L$ecb_ret
+
+.p2align	4
+L$ecb_decrypt:
+	cmpq	$0x80,%rdx
+	jb	L$ecb_dec_tail
+
+	movdqu	(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	movdqu	48(%rdi),%xmm5
+	movdqu	64(%rdi),%xmm6
+	movdqu	80(%rdi),%xmm7
+	movdqu	96(%rdi),%xmm8
+	movdqu	112(%rdi),%xmm9
+	leaq	128(%rdi),%rdi
+	subq	$0x80,%rdx
+	jmp	L$ecb_dec_loop8_enter
+.p2align	4
+L$ecb_dec_loop8:
+	movups	%xmm2,(%rsi)
+	movq	%r11,%rcx
+	movdqu	(%rdi),%xmm2
+	movl	%r10d,%eax
+	movups	%xmm3,16(%rsi)
+	movdqu	16(%rdi),%xmm3
+	movups	%xmm4,32(%rsi)
+	movdqu	32(%rdi),%xmm4
+	movups	%xmm5,48(%rsi)
+	movdqu	48(%rdi),%xmm5
+	movups	%xmm6,64(%rsi)
+	movdqu	64(%rdi),%xmm6
+	movups	%xmm7,80(%rsi)
+	movdqu	80(%rdi),%xmm7
+	movups	%xmm8,96(%rsi)
+	movdqu	96(%rdi),%xmm8
+	movups	%xmm9,112(%rsi)
+	leaq	128(%rsi),%rsi
+	movdqu	112(%rdi),%xmm9
+	leaq	128(%rdi),%rdi
+L$ecb_dec_loop8_enter:
+
+	call	_aesni_decrypt8
+
+	movups	(%r11),%xmm0
+	subq	$0x80,%rdx
+	jnc	L$ecb_dec_loop8
+
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movq	%r11,%rcx
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movl	%r10d,%eax
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	movups	%xmm7,80(%rsi)
+	pxor	%xmm7,%xmm7
+	movups	%xmm8,96(%rsi)
+	pxor	%xmm8,%xmm8
+	movups	%xmm9,112(%rsi)
+	pxor	%xmm9,%xmm9
+	leaq	128(%rsi),%rsi
+	addq	$0x80,%rdx
+	jz	L$ecb_ret
+
+L$ecb_dec_tail:
+	movups	(%rdi),%xmm2
+	cmpq	$0x20,%rdx
+	jb	L$ecb_dec_one
+	movups	16(%rdi),%xmm3
+	je	L$ecb_dec_two
+	movups	32(%rdi),%xmm4
+	cmpq	$0x40,%rdx
+	jb	L$ecb_dec_three
+	movups	48(%rdi),%xmm5
+	je	L$ecb_dec_four
+	movups	64(%rdi),%xmm6
+	cmpq	$0x60,%rdx
+	jb	L$ecb_dec_five
+	movups	80(%rdi),%xmm7
+	je	L$ecb_dec_six
+	movups	96(%rdi),%xmm8
+	movups	(%rcx),%xmm0
+	xorps	%xmm9,%xmm9
+	call	_aesni_decrypt8
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	movups	%xmm7,80(%rsi)
+	pxor	%xmm7,%xmm7
+	movups	%xmm8,96(%rsi)
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_dec_one:
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_dec1_4:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_dec1_4
+.byte	102,15,56,223,209
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_dec_two:
+	call	_aesni_decrypt2
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_dec_three:
+	call	_aesni_decrypt3
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_dec_four:
+	call	_aesni_decrypt4
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_dec_five:
+	xorps	%xmm7,%xmm7
+	call	_aesni_decrypt6
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	jmp	L$ecb_ret
+.p2align	4
+L$ecb_dec_six:
+	call	_aesni_decrypt6
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	movups	%xmm7,80(%rsi)
+	pxor	%xmm7,%xmm7
+
+L$ecb_ret:
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	.byte	0xf3,0xc3
+
+.globl	_aesni_ccm64_encrypt_blocks
+.private_extern _aesni_ccm64_encrypt_blocks
+
+.p2align	4
+_aesni_ccm64_encrypt_blocks:
+	movl	240(%rcx),%eax
+	movdqu	(%r8),%xmm6
+	movdqa	L$increment64(%rip),%xmm9
+	movdqa	L$bswap_mask(%rip),%xmm7
+
+	shll	$4,%eax
+	movl	$16,%r10d
+	leaq	0(%rcx),%r11
+	movdqu	(%r9),%xmm3
+	movdqa	%xmm6,%xmm2
+	leaq	32(%rcx,%rax,1),%rcx
+.byte	102,15,56,0,247
+	subq	%rax,%r10
+	jmp	L$ccm64_enc_outer
+.p2align	4
+L$ccm64_enc_outer:
+	movups	(%r11),%xmm0
+	movq	%r10,%rax
+	movups	(%rdi),%xmm8
+
+	xorps	%xmm0,%xmm2
+	movups	16(%r11),%xmm1
+	xorps	%xmm8,%xmm0
+	xorps	%xmm0,%xmm3
+	movups	32(%r11),%xmm0
+
+L$ccm64_enc2_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$ccm64_enc2_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	paddq	%xmm9,%xmm6
+	decq	%rdx
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+
+	leaq	16(%rdi),%rdi
+	xorps	%xmm2,%xmm8
+	movdqa	%xmm6,%xmm2
+	movups	%xmm8,(%rsi)
+.byte	102,15,56,0,215
+	leaq	16(%rsi),%rsi
+	jnz	L$ccm64_enc_outer
+
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,(%r9)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm8,%xmm8
+	pxor	%xmm6,%xmm6
+	.byte	0xf3,0xc3
+
+.globl	_aesni_ccm64_decrypt_blocks
+.private_extern _aesni_ccm64_decrypt_blocks
+
+.p2align	4
+_aesni_ccm64_decrypt_blocks:
+	movl	240(%rcx),%eax
+	movups	(%r8),%xmm6
+	movdqu	(%r9),%xmm3
+	movdqa	L$increment64(%rip),%xmm9
+	movdqa	L$bswap_mask(%rip),%xmm7
+
+	movaps	%xmm6,%xmm2
+	movl	%eax,%r10d
+	movq	%rcx,%r11
+.byte	102,15,56,0,247
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_enc1_5:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_enc1_5
+.byte	102,15,56,221,209
+	shll	$4,%r10d
+	movl	$16,%eax
+	movups	(%rdi),%xmm8
+	paddq	%xmm9,%xmm6
+	leaq	16(%rdi),%rdi
+	subq	%r10,%rax
+	leaq	32(%r11,%r10,1),%rcx
+	movq	%rax,%r10
+	jmp	L$ccm64_dec_outer
+.p2align	4
+L$ccm64_dec_outer:
+	xorps	%xmm2,%xmm8
+	movdqa	%xmm6,%xmm2
+	movups	%xmm8,(%rsi)
+	leaq	16(%rsi),%rsi
+.byte	102,15,56,0,215
+
+	subq	$1,%rdx
+	jz	L$ccm64_dec_break
+
+	movups	(%r11),%xmm0
+	movq	%r10,%rax
+	movups	16(%r11),%xmm1
+	xorps	%xmm0,%xmm8
+	xorps	%xmm0,%xmm2
+	xorps	%xmm8,%xmm3
+	movups	32(%r11),%xmm0
+	jmp	L$ccm64_dec2_loop
+.p2align	4
+L$ccm64_dec2_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$ccm64_dec2_loop
+	movups	(%rdi),%xmm8
+	paddq	%xmm9,%xmm6
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+	leaq	16(%rdi),%rdi
+	jmp	L$ccm64_dec_outer
+
+.p2align	4
+L$ccm64_dec_break:
+
+	movl	240(%r11),%eax
+	movups	(%r11),%xmm0
+	movups	16(%r11),%xmm1
+	xorps	%xmm0,%xmm8
+	leaq	32(%r11),%r11
+	xorps	%xmm8,%xmm3
+L$oop_enc1_6:
+.byte	102,15,56,220,217
+	decl	%eax
+	movups	(%r11),%xmm1
+	leaq	16(%r11),%r11
+	jnz	L$oop_enc1_6
+.byte	102,15,56,221,217
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	movups	%xmm3,(%r9)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm8,%xmm8
+	pxor	%xmm6,%xmm6
+	.byte	0xf3,0xc3
+
+.globl	_aesni_ctr32_encrypt_blocks
+.private_extern _aesni_ctr32_encrypt_blocks
+
+.p2align	4
+_aesni_ctr32_encrypt_blocks:
+	cmpq	$1,%rdx
+	jne	L$ctr32_bulk
+
+
+
+	movups	(%r8),%xmm2
+	movups	(%rdi),%xmm3
+	movl	240(%rcx),%edx
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_enc1_7:
+.byte	102,15,56,220,209
+	decl	%edx
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_enc1_7
+.byte	102,15,56,221,209
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	xorps	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movups	%xmm2,(%rsi)
+	xorps	%xmm2,%xmm2
+	jmp	L$ctr32_epilogue
+
+.p2align	4
+L$ctr32_bulk:
+	leaq	(%rsp),%r11
+	pushq	%rbp
+	subq	$128,%rsp
+	andq	$-16,%rsp
+
+
+
+
+	movdqu	(%r8),%xmm2
+	movdqu	(%rcx),%xmm0
+	movl	12(%r8),%r8d
+	pxor	%xmm0,%xmm2
+	movl	12(%rcx),%ebp
+	movdqa	%xmm2,0(%rsp)
+	bswapl	%r8d
+	movdqa	%xmm2,%xmm3
+	movdqa	%xmm2,%xmm4
+	movdqa	%xmm2,%xmm5
+	movdqa	%xmm2,64(%rsp)
+	movdqa	%xmm2,80(%rsp)
+	movdqa	%xmm2,96(%rsp)
+	movq	%rdx,%r10
+	movdqa	%xmm2,112(%rsp)
+
+	leaq	1(%r8),%rax
+	leaq	2(%r8),%rdx
+	bswapl	%eax
+	bswapl	%edx
+	xorl	%ebp,%eax
+	xorl	%ebp,%edx
+.byte	102,15,58,34,216,3
+	leaq	3(%r8),%rax
+	movdqa	%xmm3,16(%rsp)
+.byte	102,15,58,34,226,3
+	bswapl	%eax
+	movq	%r10,%rdx
+	leaq	4(%r8),%r10
+	movdqa	%xmm4,32(%rsp)
+	xorl	%ebp,%eax
+	bswapl	%r10d
+.byte	102,15,58,34,232,3
+	xorl	%ebp,%r10d
+	movdqa	%xmm5,48(%rsp)
+	leaq	5(%r8),%r9
+	movl	%r10d,64+12(%rsp)
+	bswapl	%r9d
+	leaq	6(%r8),%r10
+	movl	240(%rcx),%eax
+	xorl	%ebp,%r9d
+	bswapl	%r10d
+	movl	%r9d,80+12(%rsp)
+	xorl	%ebp,%r10d
+	leaq	7(%r8),%r9
+	movl	%r10d,96+12(%rsp)
+	bswapl	%r9d
+	leaq	_OPENSSL_ia32cap_P(%rip),%r10
+	movl	4(%r10),%r10d
+	xorl	%ebp,%r9d
+	andl	$71303168,%r10d
+	movl	%r9d,112+12(%rsp)
+
+	movups	16(%rcx),%xmm1
+
+	movdqa	64(%rsp),%xmm6
+	movdqa	80(%rsp),%xmm7
+
+	cmpq	$8,%rdx
+	jb	L$ctr32_tail
+
+	subq	$6,%rdx
+	cmpl	$4194304,%r10d
+	je	L$ctr32_6x
+
+	leaq	128(%rcx),%rcx
+	subq	$2,%rdx
+	jmp	L$ctr32_loop8
+
+.p2align	4
+L$ctr32_6x:
+	shll	$4,%eax
+	movl	$48,%r10d
+	bswapl	%ebp
+	leaq	32(%rcx,%rax,1),%rcx
+	subq	%rax,%r10
+	jmp	L$ctr32_loop6
+
+.p2align	4
+L$ctr32_loop6:
+	addl	$6,%r8d
+	movups	-48(%rcx,%r10,1),%xmm0
+.byte	102,15,56,220,209
+	movl	%r8d,%eax
+	xorl	%ebp,%eax
+.byte	102,15,56,220,217
+.byte	0x0f,0x38,0xf1,0x44,0x24,12
+	leal	1(%r8),%eax
+.byte	102,15,56,220,225
+	xorl	%ebp,%eax
+.byte	0x0f,0x38,0xf1,0x44,0x24,28
+.byte	102,15,56,220,233
+	leal	2(%r8),%eax
+	xorl	%ebp,%eax
+.byte	102,15,56,220,241
+.byte	0x0f,0x38,0xf1,0x44,0x24,44
+	leal	3(%r8),%eax
+.byte	102,15,56,220,249
+	movups	-32(%rcx,%r10,1),%xmm1
+	xorl	%ebp,%eax
+
+.byte	102,15,56,220,208
+.byte	0x0f,0x38,0xf1,0x44,0x24,60
+	leal	4(%r8),%eax
+.byte	102,15,56,220,216
+	xorl	%ebp,%eax
+.byte	0x0f,0x38,0xf1,0x44,0x24,76
+.byte	102,15,56,220,224
+	leal	5(%r8),%eax
+	xorl	%ebp,%eax
+.byte	102,15,56,220,232
+.byte	0x0f,0x38,0xf1,0x44,0x24,92
+	movq	%r10,%rax
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	-16(%rcx,%r10,1),%xmm0
+
+	call	L$enc_loop6
+
+	movdqu	(%rdi),%xmm8
+	movdqu	16(%rdi),%xmm9
+	movdqu	32(%rdi),%xmm10
+	movdqu	48(%rdi),%xmm11
+	movdqu	64(%rdi),%xmm12
+	movdqu	80(%rdi),%xmm13
+	leaq	96(%rdi),%rdi
+	movups	-64(%rcx,%r10,1),%xmm1
+	pxor	%xmm2,%xmm8
+	movaps	0(%rsp),%xmm2
+	pxor	%xmm3,%xmm9
+	movaps	16(%rsp),%xmm3
+	pxor	%xmm4,%xmm10
+	movaps	32(%rsp),%xmm4
+	pxor	%xmm5,%xmm11
+	movaps	48(%rsp),%xmm5
+	pxor	%xmm6,%xmm12
+	movaps	64(%rsp),%xmm6
+	pxor	%xmm7,%xmm13
+	movaps	80(%rsp),%xmm7
+	movdqu	%xmm8,(%rsi)
+	movdqu	%xmm9,16(%rsi)
+	movdqu	%xmm10,32(%rsi)
+	movdqu	%xmm11,48(%rsi)
+	movdqu	%xmm12,64(%rsi)
+	movdqu	%xmm13,80(%rsi)
+	leaq	96(%rsi),%rsi
+
+	subq	$6,%rdx
+	jnc	L$ctr32_loop6
+
+	addq	$6,%rdx
+	jz	L$ctr32_done
+
+	leal	-48(%r10),%eax
+	leaq	-80(%rcx,%r10,1),%rcx
+	negl	%eax
+	shrl	$4,%eax
+	jmp	L$ctr32_tail
+
+.p2align	5
+L$ctr32_loop8:
+	addl	$8,%r8d
+	movdqa	96(%rsp),%xmm8
+.byte	102,15,56,220,209
+	movl	%r8d,%r9d
+	movdqa	112(%rsp),%xmm9
+.byte	102,15,56,220,217
+	bswapl	%r9d
+	movups	32-128(%rcx),%xmm0
+.byte	102,15,56,220,225
+	xorl	%ebp,%r9d
+	nop
+.byte	102,15,56,220,233
+	movl	%r9d,0+12(%rsp)
+	leaq	1(%r8),%r9
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	48-128(%rcx),%xmm1
+	bswapl	%r9d
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movl	%r9d,16+12(%rsp)
+	leaq	2(%r8),%r9
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	64-128(%rcx),%xmm0
+	bswapl	%r9d
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movl	%r9d,32+12(%rsp)
+	leaq	3(%r8),%r9
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	80-128(%rcx),%xmm1
+	bswapl	%r9d
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movl	%r9d,48+12(%rsp)
+	leaq	4(%r8),%r9
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	96-128(%rcx),%xmm0
+	bswapl	%r9d
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movl	%r9d,64+12(%rsp)
+	leaq	5(%r8),%r9
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	112-128(%rcx),%xmm1
+	bswapl	%r9d
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movl	%r9d,80+12(%rsp)
+	leaq	6(%r8),%r9
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	128-128(%rcx),%xmm0
+	bswapl	%r9d
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	xorl	%ebp,%r9d
+.byte	0x66,0x90
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movl	%r9d,96+12(%rsp)
+	leaq	7(%r8),%r9
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	144-128(%rcx),%xmm1
+	bswapl	%r9d
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+	xorl	%ebp,%r9d
+	movdqu	0(%rdi),%xmm10
+.byte	102,15,56,220,232
+	movl	%r9d,112+12(%rsp)
+	cmpl	$11,%eax
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	160-128(%rcx),%xmm0
+
+	jb	L$ctr32_enc_done
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	176-128(%rcx),%xmm1
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	192-128(%rcx),%xmm0
+	je	L$ctr32_enc_done
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movups	208-128(%rcx),%xmm1
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+.byte	102,68,15,56,220,192
+.byte	102,68,15,56,220,200
+	movups	224-128(%rcx),%xmm0
+	jmp	L$ctr32_enc_done
+
+.p2align	4
+L$ctr32_enc_done:
+	movdqu	16(%rdi),%xmm11
+	pxor	%xmm0,%xmm10
+	movdqu	32(%rdi),%xmm12
+	pxor	%xmm0,%xmm11
+	movdqu	48(%rdi),%xmm13
+	pxor	%xmm0,%xmm12
+	movdqu	64(%rdi),%xmm14
+	pxor	%xmm0,%xmm13
+	movdqu	80(%rdi),%xmm15
+	pxor	%xmm0,%xmm14
+	pxor	%xmm0,%xmm15
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+.byte	102,68,15,56,220,201
+	movdqu	96(%rdi),%xmm1
+	leaq	128(%rdi),%rdi
+
+.byte	102,65,15,56,221,210
+	pxor	%xmm0,%xmm1
+	movdqu	112-128(%rdi),%xmm10
+.byte	102,65,15,56,221,219
+	pxor	%xmm0,%xmm10
+	movdqa	0(%rsp),%xmm11
+.byte	102,65,15,56,221,228
+.byte	102,65,15,56,221,237
+	movdqa	16(%rsp),%xmm12
+	movdqa	32(%rsp),%xmm13
+.byte	102,65,15,56,221,246
+.byte	102,65,15,56,221,255
+	movdqa	48(%rsp),%xmm14
+	movdqa	64(%rsp),%xmm15
+.byte	102,68,15,56,221,193
+	movdqa	80(%rsp),%xmm0
+	movups	16-128(%rcx),%xmm1
+.byte	102,69,15,56,221,202
+
+	movups	%xmm2,(%rsi)
+	movdqa	%xmm11,%xmm2
+	movups	%xmm3,16(%rsi)
+	movdqa	%xmm12,%xmm3
+	movups	%xmm4,32(%rsi)
+	movdqa	%xmm13,%xmm4
+	movups	%xmm5,48(%rsi)
+	movdqa	%xmm14,%xmm5
+	movups	%xmm6,64(%rsi)
+	movdqa	%xmm15,%xmm6
+	movups	%xmm7,80(%rsi)
+	movdqa	%xmm0,%xmm7
+	movups	%xmm8,96(%rsi)
+	movups	%xmm9,112(%rsi)
+	leaq	128(%rsi),%rsi
+
+	subq	$8,%rdx
+	jnc	L$ctr32_loop8
+
+	addq	$8,%rdx
+	jz	L$ctr32_done
+	leaq	-128(%rcx),%rcx
+
+L$ctr32_tail:
+
+
+	leaq	16(%rcx),%rcx
+	cmpq	$4,%rdx
+	jb	L$ctr32_loop3
+	je	L$ctr32_loop4
+
+
+	shll	$4,%eax
+	movdqa	96(%rsp),%xmm8
+	pxor	%xmm9,%xmm9
+
+	movups	16(%rcx),%xmm0
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	leaq	32-16(%rcx,%rax,1),%rcx
+	negq	%rax
+.byte	102,15,56,220,225
+	addq	$16,%rax
+	movups	(%rdi),%xmm10
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+	movups	16(%rdi),%xmm11
+	movups	32(%rdi),%xmm12
+.byte	102,15,56,220,249
+.byte	102,68,15,56,220,193
+
+	call	L$enc_loop8_enter
+
+	movdqu	48(%rdi),%xmm13
+	pxor	%xmm10,%xmm2
+	movdqu	64(%rdi),%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm10,%xmm6
+	movdqu	%xmm5,48(%rsi)
+	movdqu	%xmm6,64(%rsi)
+	cmpq	$6,%rdx
+	jb	L$ctr32_done
+
+	movups	80(%rdi),%xmm11
+	xorps	%xmm11,%xmm7
+	movups	%xmm7,80(%rsi)
+	je	L$ctr32_done
+
+	movups	96(%rdi),%xmm12
+	xorps	%xmm12,%xmm8
+	movups	%xmm8,96(%rsi)
+	jmp	L$ctr32_done
+
+.p2align	5
+L$ctr32_loop4:
+.byte	102,15,56,220,209
+	leaq	16(%rcx),%rcx
+	decl	%eax
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	(%rcx),%xmm1
+	jnz	L$ctr32_loop4
+.byte	102,15,56,221,209
+.byte	102,15,56,221,217
+	movups	(%rdi),%xmm10
+	movups	16(%rdi),%xmm11
+.byte	102,15,56,221,225
+.byte	102,15,56,221,233
+	movups	32(%rdi),%xmm12
+	movups	48(%rdi),%xmm13
+
+	xorps	%xmm10,%xmm2
+	movups	%xmm2,(%rsi)
+	xorps	%xmm11,%xmm3
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm5,48(%rsi)
+	jmp	L$ctr32_done
+
+.p2align	5
+L$ctr32_loop3:
+.byte	102,15,56,220,209
+	leaq	16(%rcx),%rcx
+	decl	%eax
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+	movups	(%rcx),%xmm1
+	jnz	L$ctr32_loop3
+.byte	102,15,56,221,209
+.byte	102,15,56,221,217
+.byte	102,15,56,221,225
+
+	movups	(%rdi),%xmm10
+	xorps	%xmm10,%xmm2
+	movups	%xmm2,(%rsi)
+	cmpq	$2,%rdx
+	jb	L$ctr32_done
+
+	movups	16(%rdi),%xmm11
+	xorps	%xmm11,%xmm3
+	movups	%xmm3,16(%rsi)
+	je	L$ctr32_done
+
+	movups	32(%rdi),%xmm12
+	xorps	%xmm12,%xmm4
+	movups	%xmm4,32(%rsi)
+
+L$ctr32_done:
+	xorps	%xmm0,%xmm0
+	xorl	%ebp,%ebp
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	movaps	%xmm0,0(%rsp)
+	pxor	%xmm8,%xmm8
+	movaps	%xmm0,16(%rsp)
+	pxor	%xmm9,%xmm9
+	movaps	%xmm0,32(%rsp)
+	pxor	%xmm10,%xmm10
+	movaps	%xmm0,48(%rsp)
+	pxor	%xmm11,%xmm11
+	movaps	%xmm0,64(%rsp)
+	pxor	%xmm12,%xmm12
+	movaps	%xmm0,80(%rsp)
+	pxor	%xmm13,%xmm13
+	movaps	%xmm0,96(%rsp)
+	pxor	%xmm14,%xmm14
+	movaps	%xmm0,112(%rsp)
+	pxor	%xmm15,%xmm15
+	movq	-8(%r11),%rbp
+	leaq	(%r11),%rsp
+L$ctr32_epilogue:
+	.byte	0xf3,0xc3
+
+.globl	_aesni_xts_encrypt
+.private_extern _aesni_xts_encrypt
+
+.p2align	4
+_aesni_xts_encrypt:
+	leaq	(%rsp),%r11
+	pushq	%rbp
+	subq	$112,%rsp
+	andq	$-16,%rsp
+	movups	(%r9),%xmm2
+	movl	240(%r8),%eax
+	movl	240(%rcx),%r10d
+	movups	(%r8),%xmm0
+	movups	16(%r8),%xmm1
+	leaq	32(%r8),%r8
+	xorps	%xmm0,%xmm2
+L$oop_enc1_8:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%r8),%xmm1
+	leaq	16(%r8),%r8
+	jnz	L$oop_enc1_8
+.byte	102,15,56,221,209
+	movups	(%rcx),%xmm0
+	movq	%rcx,%rbp
+	movl	%r10d,%eax
+	shll	$4,%r10d
+	movq	%rdx,%r9
+	andq	$-16,%rdx
+
+	movups	16(%rcx,%r10,1),%xmm1
+
+	movdqa	L$xts_magic(%rip),%xmm8
+	movdqa	%xmm2,%xmm15
+	pshufd	$0x5f,%xmm2,%xmm9
+	pxor	%xmm0,%xmm1
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm10
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm11
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm11
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm12
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm12
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm13
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm13
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm15,%xmm14
+	psrad	$31,%xmm9
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm9
+	pxor	%xmm0,%xmm14
+	pxor	%xmm9,%xmm15
+	movaps	%xmm1,96(%rsp)
+
+	subq	$96,%rdx
+	jc	L$xts_enc_short
+
+	movl	$16+96,%eax
+	leaq	32(%rbp,%r10,1),%rcx
+	subq	%r10,%rax
+	movups	16(%rbp),%xmm1
+	movq	%rax,%r10
+	leaq	L$xts_magic(%rip),%r8
+	jmp	L$xts_enc_grandloop
+
+.p2align	5
+L$xts_enc_grandloop:
+	movdqu	0(%rdi),%xmm2
+	movdqa	%xmm0,%xmm8
+	movdqu	16(%rdi),%xmm3
+	pxor	%xmm10,%xmm2
+	movdqu	32(%rdi),%xmm4
+	pxor	%xmm11,%xmm3
+.byte	102,15,56,220,209
+	movdqu	48(%rdi),%xmm5
+	pxor	%xmm12,%xmm4
+.byte	102,15,56,220,217
+	movdqu	64(%rdi),%xmm6
+	pxor	%xmm13,%xmm5
+.byte	102,15,56,220,225
+	movdqu	80(%rdi),%xmm7
+	pxor	%xmm15,%xmm8
+	movdqa	96(%rsp),%xmm9
+	pxor	%xmm14,%xmm6
+.byte	102,15,56,220,233
+	movups	32(%rbp),%xmm0
+	leaq	96(%rdi),%rdi
+	pxor	%xmm8,%xmm7
+
+	pxor	%xmm9,%xmm10
+.byte	102,15,56,220,241
+	pxor	%xmm9,%xmm11
+	movdqa	%xmm10,0(%rsp)
+.byte	102,15,56,220,249
+	movups	48(%rbp),%xmm1
+	pxor	%xmm9,%xmm12
+
+.byte	102,15,56,220,208
+	pxor	%xmm9,%xmm13
+	movdqa	%xmm11,16(%rsp)
+.byte	102,15,56,220,216
+	pxor	%xmm9,%xmm14
+	movdqa	%xmm12,32(%rsp)
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	pxor	%xmm9,%xmm8
+	movdqa	%xmm14,64(%rsp)
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	64(%rbp),%xmm0
+	movdqa	%xmm8,80(%rsp)
+	pshufd	$0x5f,%xmm15,%xmm9
+	jmp	L$xts_enc_loop6
+.p2align	5
+L$xts_enc_loop6:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	movups	-64(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	-80(%rcx,%rax,1),%xmm0
+	jnz	L$xts_enc_loop6
+
+	movdqa	(%r8),%xmm8
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,220,209
+	paddq	%xmm15,%xmm15
+	psrad	$31,%xmm14
+.byte	102,15,56,220,217
+	pand	%xmm8,%xmm14
+	movups	(%rbp),%xmm10
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+	pxor	%xmm14,%xmm15
+	movaps	%xmm10,%xmm11
+.byte	102,15,56,220,249
+	movups	-64(%rcx),%xmm1
+
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,220,208
+	paddd	%xmm9,%xmm9
+	pxor	%xmm15,%xmm10
+.byte	102,15,56,220,216
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	pand	%xmm8,%xmm14
+	movaps	%xmm11,%xmm12
+.byte	102,15,56,220,240
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,220,248
+	movups	-48(%rcx),%xmm0
+
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,220,209
+	pxor	%xmm15,%xmm11
+	psrad	$31,%xmm14
+.byte	102,15,56,220,217
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movdqa	%xmm13,48(%rsp)
+	pxor	%xmm14,%xmm15
+.byte	102,15,56,220,241
+	movaps	%xmm12,%xmm13
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,220,249
+	movups	-32(%rcx),%xmm1
+
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,220,208
+	pxor	%xmm15,%xmm12
+	psrad	$31,%xmm14
+.byte	102,15,56,220,216
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+	pxor	%xmm14,%xmm15
+	movaps	%xmm13,%xmm14
+.byte	102,15,56,220,248
+
+	movdqa	%xmm9,%xmm0
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,220,209
+	pxor	%xmm15,%xmm13
+	psrad	$31,%xmm0
+.byte	102,15,56,220,217
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm0
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	pxor	%xmm0,%xmm15
+	movups	(%rbp),%xmm0
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	movups	16(%rbp),%xmm1
+
+	pxor	%xmm15,%xmm14
+.byte	102,15,56,221,84,36,0
+	psrad	$31,%xmm9
+	paddq	%xmm15,%xmm15
+.byte	102,15,56,221,92,36,16
+.byte	102,15,56,221,100,36,32
+	pand	%xmm8,%xmm9
+	movq	%r10,%rax
+.byte	102,15,56,221,108,36,48
+.byte	102,15,56,221,116,36,64
+.byte	102,15,56,221,124,36,80
+	pxor	%xmm9,%xmm15
+
+	leaq	96(%rsi),%rsi
+	movups	%xmm2,-96(%rsi)
+	movups	%xmm3,-80(%rsi)
+	movups	%xmm4,-64(%rsi)
+	movups	%xmm5,-48(%rsi)
+	movups	%xmm6,-32(%rsi)
+	movups	%xmm7,-16(%rsi)
+	subq	$96,%rdx
+	jnc	L$xts_enc_grandloop
+
+	movl	$16+96,%eax
+	subl	%r10d,%eax
+	movq	%rbp,%rcx
+	shrl	$4,%eax
+
+L$xts_enc_short:
+
+	movl	%eax,%r10d
+	pxor	%xmm0,%xmm10
+	addq	$96,%rdx
+	jz	L$xts_enc_done
+
+	pxor	%xmm0,%xmm11
+	cmpq	$0x20,%rdx
+	jb	L$xts_enc_one
+	pxor	%xmm0,%xmm12
+	je	L$xts_enc_two
+
+	pxor	%xmm0,%xmm13
+	cmpq	$0x40,%rdx
+	jb	L$xts_enc_three
+	pxor	%xmm0,%xmm14
+	je	L$xts_enc_four
+
+	movdqu	(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	pxor	%xmm10,%xmm2
+	movdqu	48(%rdi),%xmm5
+	pxor	%xmm11,%xmm3
+	movdqu	64(%rdi),%xmm6
+	leaq	80(%rdi),%rdi
+	pxor	%xmm12,%xmm4
+	pxor	%xmm13,%xmm5
+	pxor	%xmm14,%xmm6
+	pxor	%xmm7,%xmm7
+
+	call	_aesni_encrypt6
+
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm15,%xmm10
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+	movdqu	%xmm2,(%rsi)
+	xorps	%xmm13,%xmm5
+	movdqu	%xmm3,16(%rsi)
+	xorps	%xmm14,%xmm6
+	movdqu	%xmm4,32(%rsi)
+	movdqu	%xmm5,48(%rsi)
+	movdqu	%xmm6,64(%rsi)
+	leaq	80(%rsi),%rsi
+	jmp	L$xts_enc_done
+
+.p2align	4
+L$xts_enc_one:
+	movups	(%rdi),%xmm2
+	leaq	16(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_enc1_9:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_enc1_9
+.byte	102,15,56,221,209
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm11,%xmm10
+	movups	%xmm2,(%rsi)
+	leaq	16(%rsi),%rsi
+	jmp	L$xts_enc_done
+
+.p2align	4
+L$xts_enc_two:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	leaq	32(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	xorps	%xmm11,%xmm3
+
+	call	_aesni_encrypt2
+
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm12,%xmm10
+	xorps	%xmm11,%xmm3
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	leaq	32(%rsi),%rsi
+	jmp	L$xts_enc_done
+
+.p2align	4
+L$xts_enc_three:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	movups	32(%rdi),%xmm4
+	leaq	48(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+
+	call	_aesni_encrypt3
+
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm13,%xmm10
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	leaq	48(%rsi),%rsi
+	jmp	L$xts_enc_done
+
+.p2align	4
+L$xts_enc_four:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	movups	32(%rdi),%xmm4
+	xorps	%xmm10,%xmm2
+	movups	48(%rdi),%xmm5
+	leaq	64(%rdi),%rdi
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+	xorps	%xmm13,%xmm5
+
+	call	_aesni_encrypt4
+
+	pxor	%xmm10,%xmm2
+	movdqa	%xmm14,%xmm10
+	pxor	%xmm11,%xmm3
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm3,16(%rsi)
+	movdqu	%xmm4,32(%rsi)
+	movdqu	%xmm5,48(%rsi)
+	leaq	64(%rsi),%rsi
+	jmp	L$xts_enc_done
+
+.p2align	4
+L$xts_enc_done:
+	andq	$15,%r9
+	jz	L$xts_enc_ret
+	movq	%r9,%rdx
+
+L$xts_enc_steal:
+	movzbl	(%rdi),%eax
+	movzbl	-16(%rsi),%ecx
+	leaq	1(%rdi),%rdi
+	movb	%al,-16(%rsi)
+	movb	%cl,0(%rsi)
+	leaq	1(%rsi),%rsi
+	subq	$1,%rdx
+	jnz	L$xts_enc_steal
+
+	subq	%r9,%rsi
+	movq	%rbp,%rcx
+	movl	%r10d,%eax
+
+	movups	-16(%rsi),%xmm2
+	xorps	%xmm10,%xmm2
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_enc1_10:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_enc1_10
+.byte	102,15,56,221,209
+	xorps	%xmm10,%xmm2
+	movups	%xmm2,-16(%rsi)
+
+L$xts_enc_ret:
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	movaps	%xmm0,0(%rsp)
+	pxor	%xmm8,%xmm8
+	movaps	%xmm0,16(%rsp)
+	pxor	%xmm9,%xmm9
+	movaps	%xmm0,32(%rsp)
+	pxor	%xmm10,%xmm10
+	movaps	%xmm0,48(%rsp)
+	pxor	%xmm11,%xmm11
+	movaps	%xmm0,64(%rsp)
+	pxor	%xmm12,%xmm12
+	movaps	%xmm0,80(%rsp)
+	pxor	%xmm13,%xmm13
+	movaps	%xmm0,96(%rsp)
+	pxor	%xmm14,%xmm14
+	pxor	%xmm15,%xmm15
+	movq	-8(%r11),%rbp
+	leaq	(%r11),%rsp
+L$xts_enc_epilogue:
+	.byte	0xf3,0xc3
+
+.globl	_aesni_xts_decrypt
+.private_extern _aesni_xts_decrypt
+
+.p2align	4
+_aesni_xts_decrypt:
+	leaq	(%rsp),%r11
+	pushq	%rbp
+	subq	$112,%rsp
+	andq	$-16,%rsp
+	movups	(%r9),%xmm2
+	movl	240(%r8),%eax
+	movl	240(%rcx),%r10d
+	movups	(%r8),%xmm0
+	movups	16(%r8),%xmm1
+	leaq	32(%r8),%r8
+	xorps	%xmm0,%xmm2
+L$oop_enc1_11:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%r8),%xmm1
+	leaq	16(%r8),%r8
+	jnz	L$oop_enc1_11
+.byte	102,15,56,221,209
+	xorl	%eax,%eax
+	testq	$15,%rdx
+	setnz	%al
+	shlq	$4,%rax
+	subq	%rax,%rdx
+
+	movups	(%rcx),%xmm0
+	movq	%rcx,%rbp
+	movl	%r10d,%eax
+	shll	$4,%r10d
+	movq	%rdx,%r9
+	andq	$-16,%rdx
+
+	movups	16(%rcx,%r10,1),%xmm1
+
+	movdqa	L$xts_magic(%rip),%xmm8
+	movdqa	%xmm2,%xmm15
+	pshufd	$0x5f,%xmm2,%xmm9
+	pxor	%xmm0,%xmm1
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm10
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm10
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm11
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm11
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm12
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm12
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+	movdqa	%xmm15,%xmm13
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+	pxor	%xmm0,%xmm13
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm15,%xmm14
+	psrad	$31,%xmm9
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm9
+	pxor	%xmm0,%xmm14
+	pxor	%xmm9,%xmm15
+	movaps	%xmm1,96(%rsp)
+
+	subq	$96,%rdx
+	jc	L$xts_dec_short
+
+	movl	$16+96,%eax
+	leaq	32(%rbp,%r10,1),%rcx
+	subq	%r10,%rax
+	movups	16(%rbp),%xmm1
+	movq	%rax,%r10
+	leaq	L$xts_magic(%rip),%r8
+	jmp	L$xts_dec_grandloop
+
+.p2align	5
+L$xts_dec_grandloop:
+	movdqu	0(%rdi),%xmm2
+	movdqa	%xmm0,%xmm8
+	movdqu	16(%rdi),%xmm3
+	pxor	%xmm10,%xmm2
+	movdqu	32(%rdi),%xmm4
+	pxor	%xmm11,%xmm3
+.byte	102,15,56,222,209
+	movdqu	48(%rdi),%xmm5
+	pxor	%xmm12,%xmm4
+.byte	102,15,56,222,217
+	movdqu	64(%rdi),%xmm6
+	pxor	%xmm13,%xmm5
+.byte	102,15,56,222,225
+	movdqu	80(%rdi),%xmm7
+	pxor	%xmm15,%xmm8
+	movdqa	96(%rsp),%xmm9
+	pxor	%xmm14,%xmm6
+.byte	102,15,56,222,233
+	movups	32(%rbp),%xmm0
+	leaq	96(%rdi),%rdi
+	pxor	%xmm8,%xmm7
+
+	pxor	%xmm9,%xmm10
+.byte	102,15,56,222,241
+	pxor	%xmm9,%xmm11
+	movdqa	%xmm10,0(%rsp)
+.byte	102,15,56,222,249
+	movups	48(%rbp),%xmm1
+	pxor	%xmm9,%xmm12
+
+.byte	102,15,56,222,208
+	pxor	%xmm9,%xmm13
+	movdqa	%xmm11,16(%rsp)
+.byte	102,15,56,222,216
+	pxor	%xmm9,%xmm14
+	movdqa	%xmm12,32(%rsp)
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	pxor	%xmm9,%xmm8
+	movdqa	%xmm14,64(%rsp)
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	64(%rbp),%xmm0
+	movdqa	%xmm8,80(%rsp)
+	pshufd	$0x5f,%xmm15,%xmm9
+	jmp	L$xts_dec_loop6
+.p2align	5
+L$xts_dec_loop6:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	movups	-64(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	-80(%rcx,%rax,1),%xmm0
+	jnz	L$xts_dec_loop6
+
+	movdqa	(%r8),%xmm8
+	movdqa	%xmm9,%xmm14
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,222,209
+	paddq	%xmm15,%xmm15
+	psrad	$31,%xmm14
+.byte	102,15,56,222,217
+	pand	%xmm8,%xmm14
+	movups	(%rbp),%xmm10
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+	pxor	%xmm14,%xmm15
+	movaps	%xmm10,%xmm11
+.byte	102,15,56,222,249
+	movups	-64(%rcx),%xmm1
+
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,222,208
+	paddd	%xmm9,%xmm9
+	pxor	%xmm15,%xmm10
+.byte	102,15,56,222,216
+	psrad	$31,%xmm14
+	paddq	%xmm15,%xmm15
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	pand	%xmm8,%xmm14
+	movaps	%xmm11,%xmm12
+.byte	102,15,56,222,240
+	pxor	%xmm14,%xmm15
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,222,248
+	movups	-48(%rcx),%xmm0
+
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,222,209
+	pxor	%xmm15,%xmm11
+	psrad	$31,%xmm14
+.byte	102,15,56,222,217
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movdqa	%xmm13,48(%rsp)
+	pxor	%xmm14,%xmm15
+.byte	102,15,56,222,241
+	movaps	%xmm12,%xmm13
+	movdqa	%xmm9,%xmm14
+.byte	102,15,56,222,249
+	movups	-32(%rcx),%xmm1
+
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,222,208
+	pxor	%xmm15,%xmm12
+	psrad	$31,%xmm14
+.byte	102,15,56,222,216
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm14
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+	pxor	%xmm14,%xmm15
+	movaps	%xmm13,%xmm14
+.byte	102,15,56,222,248
+
+	movdqa	%xmm9,%xmm0
+	paddd	%xmm9,%xmm9
+.byte	102,15,56,222,209
+	pxor	%xmm15,%xmm13
+	psrad	$31,%xmm0
+.byte	102,15,56,222,217
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm0
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	pxor	%xmm0,%xmm15
+	movups	(%rbp),%xmm0
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	movups	16(%rbp),%xmm1
+
+	pxor	%xmm15,%xmm14
+.byte	102,15,56,223,84,36,0
+	psrad	$31,%xmm9
+	paddq	%xmm15,%xmm15
+.byte	102,15,56,223,92,36,16
+.byte	102,15,56,223,100,36,32
+	pand	%xmm8,%xmm9
+	movq	%r10,%rax
+.byte	102,15,56,223,108,36,48
+.byte	102,15,56,223,116,36,64
+.byte	102,15,56,223,124,36,80
+	pxor	%xmm9,%xmm15
+
+	leaq	96(%rsi),%rsi
+	movups	%xmm2,-96(%rsi)
+	movups	%xmm3,-80(%rsi)
+	movups	%xmm4,-64(%rsi)
+	movups	%xmm5,-48(%rsi)
+	movups	%xmm6,-32(%rsi)
+	movups	%xmm7,-16(%rsi)
+	subq	$96,%rdx
+	jnc	L$xts_dec_grandloop
+
+	movl	$16+96,%eax
+	subl	%r10d,%eax
+	movq	%rbp,%rcx
+	shrl	$4,%eax
+
+L$xts_dec_short:
+
+	movl	%eax,%r10d
+	pxor	%xmm0,%xmm10
+	pxor	%xmm0,%xmm11
+	addq	$96,%rdx
+	jz	L$xts_dec_done
+
+	pxor	%xmm0,%xmm12
+	cmpq	$0x20,%rdx
+	jb	L$xts_dec_one
+	pxor	%xmm0,%xmm13
+	je	L$xts_dec_two
+
+	pxor	%xmm0,%xmm14
+	cmpq	$0x40,%rdx
+	jb	L$xts_dec_three
+	je	L$xts_dec_four
+
+	movdqu	(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	pxor	%xmm10,%xmm2
+	movdqu	48(%rdi),%xmm5
+	pxor	%xmm11,%xmm3
+	movdqu	64(%rdi),%xmm6
+	leaq	80(%rdi),%rdi
+	pxor	%xmm12,%xmm4
+	pxor	%xmm13,%xmm5
+	pxor	%xmm14,%xmm6
+
+	call	_aesni_decrypt6
+
+	xorps	%xmm10,%xmm2
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+	movdqu	%xmm2,(%rsi)
+	xorps	%xmm13,%xmm5
+	movdqu	%xmm3,16(%rsi)
+	xorps	%xmm14,%xmm6
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm14,%xmm14
+	movdqu	%xmm5,48(%rsi)
+	pcmpgtd	%xmm15,%xmm14
+	movdqu	%xmm6,64(%rsi)
+	leaq	80(%rsi),%rsi
+	pshufd	$0x13,%xmm14,%xmm11
+	andq	$15,%r9
+	jz	L$xts_dec_ret
+
+	movdqa	%xmm15,%xmm10
+	paddq	%xmm15,%xmm15
+	pand	%xmm8,%xmm11
+	pxor	%xmm15,%xmm11
+	jmp	L$xts_dec_done2
+
+.p2align	4
+L$xts_dec_one:
+	movups	(%rdi),%xmm2
+	leaq	16(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_dec1_12:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_dec1_12
+.byte	102,15,56,223,209
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm11,%xmm10
+	movups	%xmm2,(%rsi)
+	movdqa	%xmm12,%xmm11
+	leaq	16(%rsi),%rsi
+	jmp	L$xts_dec_done
+
+.p2align	4
+L$xts_dec_two:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	leaq	32(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	xorps	%xmm11,%xmm3
+
+	call	_aesni_decrypt2
+
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm12,%xmm10
+	xorps	%xmm11,%xmm3
+	movdqa	%xmm13,%xmm11
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	leaq	32(%rsi),%rsi
+	jmp	L$xts_dec_done
+
+.p2align	4
+L$xts_dec_three:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	movups	32(%rdi),%xmm4
+	leaq	48(%rdi),%rdi
+	xorps	%xmm10,%xmm2
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+
+	call	_aesni_decrypt3
+
+	xorps	%xmm10,%xmm2
+	movdqa	%xmm13,%xmm10
+	xorps	%xmm11,%xmm3
+	movdqa	%xmm14,%xmm11
+	xorps	%xmm12,%xmm4
+	movups	%xmm2,(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	leaq	48(%rsi),%rsi
+	jmp	L$xts_dec_done
+
+.p2align	4
+L$xts_dec_four:
+	movups	(%rdi),%xmm2
+	movups	16(%rdi),%xmm3
+	movups	32(%rdi),%xmm4
+	xorps	%xmm10,%xmm2
+	movups	48(%rdi),%xmm5
+	leaq	64(%rdi),%rdi
+	xorps	%xmm11,%xmm3
+	xorps	%xmm12,%xmm4
+	xorps	%xmm13,%xmm5
+
+	call	_aesni_decrypt4
+
+	pxor	%xmm10,%xmm2
+	movdqa	%xmm14,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqa	%xmm15,%xmm11
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm3,16(%rsi)
+	movdqu	%xmm4,32(%rsi)
+	movdqu	%xmm5,48(%rsi)
+	leaq	64(%rsi),%rsi
+	jmp	L$xts_dec_done
+
+.p2align	4
+L$xts_dec_done:
+	andq	$15,%r9
+	jz	L$xts_dec_ret
+L$xts_dec_done2:
+	movq	%r9,%rdx
+	movq	%rbp,%rcx
+	movl	%r10d,%eax
+
+	movups	(%rdi),%xmm2
+	xorps	%xmm11,%xmm2
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_dec1_13:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_dec1_13
+.byte	102,15,56,223,209
+	xorps	%xmm11,%xmm2
+	movups	%xmm2,(%rsi)
+
+L$xts_dec_steal:
+	movzbl	16(%rdi),%eax
+	movzbl	(%rsi),%ecx
+	leaq	1(%rdi),%rdi
+	movb	%al,(%rsi)
+	movb	%cl,16(%rsi)
+	leaq	1(%rsi),%rsi
+	subq	$1,%rdx
+	jnz	L$xts_dec_steal
+
+	subq	%r9,%rsi
+	movq	%rbp,%rcx
+	movl	%r10d,%eax
+
+	movups	(%rsi),%xmm2
+	xorps	%xmm10,%xmm2
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_dec1_14:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_dec1_14
+.byte	102,15,56,223,209
+	xorps	%xmm10,%xmm2
+	movups	%xmm2,(%rsi)
+
+L$xts_dec_ret:
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	movaps	%xmm0,0(%rsp)
+	pxor	%xmm8,%xmm8
+	movaps	%xmm0,16(%rsp)
+	pxor	%xmm9,%xmm9
+	movaps	%xmm0,32(%rsp)
+	pxor	%xmm10,%xmm10
+	movaps	%xmm0,48(%rsp)
+	pxor	%xmm11,%xmm11
+	movaps	%xmm0,64(%rsp)
+	pxor	%xmm12,%xmm12
+	movaps	%xmm0,80(%rsp)
+	pxor	%xmm13,%xmm13
+	movaps	%xmm0,96(%rsp)
+	pxor	%xmm14,%xmm14
+	pxor	%xmm15,%xmm15
+	movq	-8(%r11),%rbp
+	leaq	(%r11),%rsp
+L$xts_dec_epilogue:
+	.byte	0xf3,0xc3
+
+.globl	_aesni_ocb_encrypt
+.private_extern _aesni_ocb_encrypt
+
+.p2align	5
+_aesni_ocb_encrypt:
+	leaq	(%rsp),%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	movq	8(%rax),%rbx
+	movq	8+8(%rax),%rbp
+
+	movl	240(%rcx),%r10d
+	movq	%rcx,%r11
+	shll	$4,%r10d
+	movups	(%rcx),%xmm9
+	movups	16(%rcx,%r10,1),%xmm1
+
+	movdqu	(%r9),%xmm15
+	pxor	%xmm1,%xmm9
+	pxor	%xmm1,%xmm15
+
+	movl	$16+32,%eax
+	leaq	32(%r11,%r10,1),%rcx
+	movups	16(%r11),%xmm1
+	subq	%r10,%rax
+	movq	%rax,%r10
+
+	movdqu	(%rbx),%xmm10
+	movdqu	(%rbp),%xmm8
+
+	testq	$1,%r8
+	jnz	L$ocb_enc_odd
+
+	bsfq	%r8,%r12
+	addq	$1,%r8
+	shlq	$4,%r12
+	movdqu	(%rbx,%r12,1),%xmm7
+	movdqu	(%rdi),%xmm2
+	leaq	16(%rdi),%rdi
+
+	call	__ocb_encrypt1
+
+	movdqa	%xmm7,%xmm15
+	movups	%xmm2,(%rsi)
+	leaq	16(%rsi),%rsi
+	subq	$1,%rdx
+	jz	L$ocb_enc_done
+
+L$ocb_enc_odd:
+	leaq	1(%r8),%r12
+	leaq	3(%r8),%r13
+	leaq	5(%r8),%r14
+	leaq	6(%r8),%r8
+	bsfq	%r12,%r12
+	bsfq	%r13,%r13
+	bsfq	%r14,%r14
+	shlq	$4,%r12
+	shlq	$4,%r13
+	shlq	$4,%r14
+
+	subq	$6,%rdx
+	jc	L$ocb_enc_short
+	jmp	L$ocb_enc_grandloop
+
+.p2align	5
+L$ocb_enc_grandloop:
+	movdqu	0(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	movdqu	48(%rdi),%xmm5
+	movdqu	64(%rdi),%xmm6
+	movdqu	80(%rdi),%xmm7
+	leaq	96(%rdi),%rdi
+
+	call	__ocb_encrypt6
+
+	movups	%xmm2,0(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+	movups	%xmm7,80(%rsi)
+	leaq	96(%rsi),%rsi
+	subq	$6,%rdx
+	jnc	L$ocb_enc_grandloop
+
+L$ocb_enc_short:
+	addq	$6,%rdx
+	jz	L$ocb_enc_done
+
+	movdqu	0(%rdi),%xmm2
+	cmpq	$2,%rdx
+	jb	L$ocb_enc_one
+	movdqu	16(%rdi),%xmm3
+	je	L$ocb_enc_two
+
+	movdqu	32(%rdi),%xmm4
+	cmpq	$4,%rdx
+	jb	L$ocb_enc_three
+	movdqu	48(%rdi),%xmm5
+	je	L$ocb_enc_four
+
+	movdqu	64(%rdi),%xmm6
+	pxor	%xmm7,%xmm7
+
+	call	__ocb_encrypt6
+
+	movdqa	%xmm14,%xmm15
+	movups	%xmm2,0(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+	movups	%xmm6,64(%rsi)
+
+	jmp	L$ocb_enc_done
+
+.p2align	4
+L$ocb_enc_one:
+	movdqa	%xmm10,%xmm7
+
+	call	__ocb_encrypt1
+
+	movdqa	%xmm7,%xmm15
+	movups	%xmm2,0(%rsi)
+	jmp	L$ocb_enc_done
+
+.p2align	4
+L$ocb_enc_two:
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+
+	call	__ocb_encrypt4
+
+	movdqa	%xmm11,%xmm15
+	movups	%xmm2,0(%rsi)
+	movups	%xmm3,16(%rsi)
+
+	jmp	L$ocb_enc_done
+
+.p2align	4
+L$ocb_enc_three:
+	pxor	%xmm5,%xmm5
+
+	call	__ocb_encrypt4
+
+	movdqa	%xmm12,%xmm15
+	movups	%xmm2,0(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+
+	jmp	L$ocb_enc_done
+
+.p2align	4
+L$ocb_enc_four:
+	call	__ocb_encrypt4
+
+	movdqa	%xmm13,%xmm15
+	movups	%xmm2,0(%rsi)
+	movups	%xmm3,16(%rsi)
+	movups	%xmm4,32(%rsi)
+	movups	%xmm5,48(%rsi)
+
+L$ocb_enc_done:
+	pxor	%xmm0,%xmm15
+	movdqu	%xmm8,(%rbp)
+	movdqu	%xmm15,(%r9)
+
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+	pxor	%xmm10,%xmm10
+	pxor	%xmm11,%xmm11
+	pxor	%xmm12,%xmm12
+	pxor	%xmm13,%xmm13
+	pxor	%xmm14,%xmm14
+	pxor	%xmm15,%xmm15
+	leaq	40(%rsp),%rax
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbp
+	movq	-8(%rax),%rbx
+	leaq	(%rax),%rsp
+L$ocb_enc_epilogue:
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ocb_encrypt6:
+	pxor	%xmm9,%xmm15
+	movdqu	(%rbx,%r12,1),%xmm11
+	movdqa	%xmm10,%xmm12
+	movdqu	(%rbx,%r13,1),%xmm13
+	movdqa	%xmm10,%xmm14
+	pxor	%xmm15,%xmm10
+	movdqu	(%rbx,%r14,1),%xmm15
+	pxor	%xmm10,%xmm11
+	pxor	%xmm2,%xmm8
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm12
+	pxor	%xmm3,%xmm8
+	pxor	%xmm11,%xmm3
+	pxor	%xmm12,%xmm13
+	pxor	%xmm4,%xmm8
+	pxor	%xmm12,%xmm4
+	pxor	%xmm13,%xmm14
+	pxor	%xmm5,%xmm8
+	pxor	%xmm13,%xmm5
+	pxor	%xmm14,%xmm15
+	pxor	%xmm6,%xmm8
+	pxor	%xmm14,%xmm6
+	pxor	%xmm7,%xmm8
+	pxor	%xmm15,%xmm7
+	movups	32(%r11),%xmm0
+
+	leaq	1(%r8),%r12
+	leaq	3(%r8),%r13
+	leaq	5(%r8),%r14
+	addq	$6,%r8
+	pxor	%xmm9,%xmm10
+	bsfq	%r12,%r12
+	bsfq	%r13,%r13
+	bsfq	%r14,%r14
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	pxor	%xmm9,%xmm11
+	pxor	%xmm9,%xmm12
+.byte	102,15,56,220,241
+	pxor	%xmm9,%xmm13
+	pxor	%xmm9,%xmm14
+.byte	102,15,56,220,249
+	movups	48(%r11),%xmm1
+	pxor	%xmm9,%xmm15
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	64(%r11),%xmm0
+	shlq	$4,%r12
+	shlq	$4,%r13
+	jmp	L$ocb_enc_loop6
+
+.p2align	5
+L$ocb_enc_loop6:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$ocb_enc_loop6
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+	movups	16(%r11),%xmm1
+	shlq	$4,%r14
+
+.byte	102,65,15,56,221,210
+	movdqu	(%rbx),%xmm10
+	movq	%r10,%rax
+.byte	102,65,15,56,221,219
+.byte	102,65,15,56,221,228
+.byte	102,65,15,56,221,237
+.byte	102,65,15,56,221,246
+.byte	102,65,15,56,221,255
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ocb_encrypt4:
+	pxor	%xmm9,%xmm15
+	movdqu	(%rbx,%r12,1),%xmm11
+	movdqa	%xmm10,%xmm12
+	movdqu	(%rbx,%r13,1),%xmm13
+	pxor	%xmm15,%xmm10
+	pxor	%xmm10,%xmm11
+	pxor	%xmm2,%xmm8
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm12
+	pxor	%xmm3,%xmm8
+	pxor	%xmm11,%xmm3
+	pxor	%xmm12,%xmm13
+	pxor	%xmm4,%xmm8
+	pxor	%xmm12,%xmm4
+	pxor	%xmm5,%xmm8
+	pxor	%xmm13,%xmm5
+	movups	32(%r11),%xmm0
+
+	pxor	%xmm9,%xmm10
+	pxor	%xmm9,%xmm11
+	pxor	%xmm9,%xmm12
+	pxor	%xmm9,%xmm13
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	48(%r11),%xmm1
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movups	64(%r11),%xmm0
+	jmp	L$ocb_enc_loop4
+
+.p2align	5
+L$ocb_enc_loop4:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$ocb_enc_loop4
+
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	16(%r11),%xmm1
+	movq	%r10,%rax
+
+.byte	102,65,15,56,221,210
+.byte	102,65,15,56,221,219
+.byte	102,65,15,56,221,228
+.byte	102,65,15,56,221,237
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ocb_encrypt1:
+	pxor	%xmm15,%xmm7
+	pxor	%xmm9,%xmm7
+	pxor	%xmm2,%xmm8
+	pxor	%xmm7,%xmm2
+	movups	32(%r11),%xmm0
+
+.byte	102,15,56,220,209
+	movups	48(%r11),%xmm1
+	pxor	%xmm9,%xmm7
+
+.byte	102,15,56,220,208
+	movups	64(%r11),%xmm0
+	jmp	L$ocb_enc_loop1
+
+.p2align	5
+L$ocb_enc_loop1:
+.byte	102,15,56,220,209
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,220,208
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$ocb_enc_loop1
+
+.byte	102,15,56,220,209
+	movups	16(%r11),%xmm1
+	movq	%r10,%rax
+
+.byte	102,15,56,221,215
+	.byte	0xf3,0xc3
+
+
+.globl	_aesni_ocb_decrypt
+.private_extern _aesni_ocb_decrypt
+
+.p2align	5
+_aesni_ocb_decrypt:
+	leaq	(%rsp),%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	movq	8(%rax),%rbx
+	movq	8+8(%rax),%rbp
+
+	movl	240(%rcx),%r10d
+	movq	%rcx,%r11
+	shll	$4,%r10d
+	movups	(%rcx),%xmm9
+	movups	16(%rcx,%r10,1),%xmm1
+
+	movdqu	(%r9),%xmm15
+	pxor	%xmm1,%xmm9
+	pxor	%xmm1,%xmm15
+
+	movl	$16+32,%eax
+	leaq	32(%r11,%r10,1),%rcx
+	movups	16(%r11),%xmm1
+	subq	%r10,%rax
+	movq	%rax,%r10
+
+	movdqu	(%rbx),%xmm10
+	movdqu	(%rbp),%xmm8
+
+	testq	$1,%r8
+	jnz	L$ocb_dec_odd
+
+	bsfq	%r8,%r12
+	addq	$1,%r8
+	shlq	$4,%r12
+	movdqu	(%rbx,%r12,1),%xmm7
+	movdqu	(%rdi),%xmm2
+	leaq	16(%rdi),%rdi
+
+	call	__ocb_decrypt1
+
+	movdqa	%xmm7,%xmm15
+	movups	%xmm2,(%rsi)
+	xorps	%xmm2,%xmm8
+	leaq	16(%rsi),%rsi
+	subq	$1,%rdx
+	jz	L$ocb_dec_done
+
+L$ocb_dec_odd:
+	leaq	1(%r8),%r12
+	leaq	3(%r8),%r13
+	leaq	5(%r8),%r14
+	leaq	6(%r8),%r8
+	bsfq	%r12,%r12
+	bsfq	%r13,%r13
+	bsfq	%r14,%r14
+	shlq	$4,%r12
+	shlq	$4,%r13
+	shlq	$4,%r14
+
+	subq	$6,%rdx
+	jc	L$ocb_dec_short
+	jmp	L$ocb_dec_grandloop
+
+.p2align	5
+L$ocb_dec_grandloop:
+	movdqu	0(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqu	32(%rdi),%xmm4
+	movdqu	48(%rdi),%xmm5
+	movdqu	64(%rdi),%xmm6
+	movdqu	80(%rdi),%xmm7
+	leaq	96(%rdi),%rdi
+
+	call	__ocb_decrypt6
+
+	movups	%xmm2,0(%rsi)
+	pxor	%xmm2,%xmm8
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm8
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm8
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm8
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm8
+	movups	%xmm7,80(%rsi)
+	pxor	%xmm7,%xmm8
+	leaq	96(%rsi),%rsi
+	subq	$6,%rdx
+	jnc	L$ocb_dec_grandloop
+
+L$ocb_dec_short:
+	addq	$6,%rdx
+	jz	L$ocb_dec_done
+
+	movdqu	0(%rdi),%xmm2
+	cmpq	$2,%rdx
+	jb	L$ocb_dec_one
+	movdqu	16(%rdi),%xmm3
+	je	L$ocb_dec_two
+
+	movdqu	32(%rdi),%xmm4
+	cmpq	$4,%rdx
+	jb	L$ocb_dec_three
+	movdqu	48(%rdi),%xmm5
+	je	L$ocb_dec_four
+
+	movdqu	64(%rdi),%xmm6
+	pxor	%xmm7,%xmm7
+
+	call	__ocb_decrypt6
+
+	movdqa	%xmm14,%xmm15
+	movups	%xmm2,0(%rsi)
+	pxor	%xmm2,%xmm8
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm8
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm8
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm8
+	movups	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm8
+
+	jmp	L$ocb_dec_done
+
+.p2align	4
+L$ocb_dec_one:
+	movdqa	%xmm10,%xmm7
+
+	call	__ocb_decrypt1
+
+	movdqa	%xmm7,%xmm15
+	movups	%xmm2,0(%rsi)
+	xorps	%xmm2,%xmm8
+	jmp	L$ocb_dec_done
+
+.p2align	4
+L$ocb_dec_two:
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+
+	call	__ocb_decrypt4
+
+	movdqa	%xmm11,%xmm15
+	movups	%xmm2,0(%rsi)
+	xorps	%xmm2,%xmm8
+	movups	%xmm3,16(%rsi)
+	xorps	%xmm3,%xmm8
+
+	jmp	L$ocb_dec_done
+
+.p2align	4
+L$ocb_dec_three:
+	pxor	%xmm5,%xmm5
+
+	call	__ocb_decrypt4
+
+	movdqa	%xmm12,%xmm15
+	movups	%xmm2,0(%rsi)
+	xorps	%xmm2,%xmm8
+	movups	%xmm3,16(%rsi)
+	xorps	%xmm3,%xmm8
+	movups	%xmm4,32(%rsi)
+	xorps	%xmm4,%xmm8
+
+	jmp	L$ocb_dec_done
+
+.p2align	4
+L$ocb_dec_four:
+	call	__ocb_decrypt4
+
+	movdqa	%xmm13,%xmm15
+	movups	%xmm2,0(%rsi)
+	pxor	%xmm2,%xmm8
+	movups	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm8
+	movups	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm8
+	movups	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm8
+
+L$ocb_dec_done:
+	pxor	%xmm0,%xmm15
+	movdqu	%xmm8,(%rbp)
+	movdqu	%xmm15,(%r9)
+
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+	pxor	%xmm10,%xmm10
+	pxor	%xmm11,%xmm11
+	pxor	%xmm12,%xmm12
+	pxor	%xmm13,%xmm13
+	pxor	%xmm14,%xmm14
+	pxor	%xmm15,%xmm15
+	leaq	40(%rsp),%rax
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbp
+	movq	-8(%rax),%rbx
+	leaq	(%rax),%rsp
+L$ocb_dec_epilogue:
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ocb_decrypt6:
+	pxor	%xmm9,%xmm15
+	movdqu	(%rbx,%r12,1),%xmm11
+	movdqa	%xmm10,%xmm12
+	movdqu	(%rbx,%r13,1),%xmm13
+	movdqa	%xmm10,%xmm14
+	pxor	%xmm15,%xmm10
+	movdqu	(%rbx,%r14,1),%xmm15
+	pxor	%xmm10,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm12
+	pxor	%xmm11,%xmm3
+	pxor	%xmm12,%xmm13
+	pxor	%xmm12,%xmm4
+	pxor	%xmm13,%xmm14
+	pxor	%xmm13,%xmm5
+	pxor	%xmm14,%xmm15
+	pxor	%xmm14,%xmm6
+	pxor	%xmm15,%xmm7
+	movups	32(%r11),%xmm0
+
+	leaq	1(%r8),%r12
+	leaq	3(%r8),%r13
+	leaq	5(%r8),%r14
+	addq	$6,%r8
+	pxor	%xmm9,%xmm10
+	bsfq	%r12,%r12
+	bsfq	%r13,%r13
+	bsfq	%r14,%r14
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	pxor	%xmm9,%xmm11
+	pxor	%xmm9,%xmm12
+.byte	102,15,56,222,241
+	pxor	%xmm9,%xmm13
+	pxor	%xmm9,%xmm14
+.byte	102,15,56,222,249
+	movups	48(%r11),%xmm1
+	pxor	%xmm9,%xmm15
+
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	64(%r11),%xmm0
+	shlq	$4,%r12
+	shlq	$4,%r13
+	jmp	L$ocb_dec_loop6
+
+.p2align	5
+L$ocb_dec_loop6:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$ocb_dec_loop6
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	movups	16(%r11),%xmm1
+	shlq	$4,%r14
+
+.byte	102,65,15,56,223,210
+	movdqu	(%rbx),%xmm10
+	movq	%r10,%rax
+.byte	102,65,15,56,223,219
+.byte	102,65,15,56,223,228
+.byte	102,65,15,56,223,237
+.byte	102,65,15,56,223,246
+.byte	102,65,15,56,223,255
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ocb_decrypt4:
+	pxor	%xmm9,%xmm15
+	movdqu	(%rbx,%r12,1),%xmm11
+	movdqa	%xmm10,%xmm12
+	movdqu	(%rbx,%r13,1),%xmm13
+	pxor	%xmm15,%xmm10
+	pxor	%xmm10,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm12
+	pxor	%xmm11,%xmm3
+	pxor	%xmm12,%xmm13
+	pxor	%xmm12,%xmm4
+	pxor	%xmm13,%xmm5
+	movups	32(%r11),%xmm0
+
+	pxor	%xmm9,%xmm10
+	pxor	%xmm9,%xmm11
+	pxor	%xmm9,%xmm12
+	pxor	%xmm9,%xmm13
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movups	48(%r11),%xmm1
+
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	movups	64(%r11),%xmm0
+	jmp	L$ocb_dec_loop4
+
+.p2align	5
+L$ocb_dec_loop4:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$ocb_dec_loop4
+
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movups	16(%r11),%xmm1
+	movq	%r10,%rax
+
+.byte	102,65,15,56,223,210
+.byte	102,65,15,56,223,219
+.byte	102,65,15,56,223,228
+.byte	102,65,15,56,223,237
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ocb_decrypt1:
+	pxor	%xmm15,%xmm7
+	pxor	%xmm9,%xmm7
+	pxor	%xmm7,%xmm2
+	movups	32(%r11),%xmm0
+
+.byte	102,15,56,222,209
+	movups	48(%r11),%xmm1
+	pxor	%xmm9,%xmm7
+
+.byte	102,15,56,222,208
+	movups	64(%r11),%xmm0
+	jmp	L$ocb_dec_loop1
+
+.p2align	5
+L$ocb_dec_loop1:
+.byte	102,15,56,222,209
+	movups	(%rcx,%rax,1),%xmm1
+	addq	$32,%rax
+
+.byte	102,15,56,222,208
+	movups	-16(%rcx,%rax,1),%xmm0
+	jnz	L$ocb_dec_loop1
+
+.byte	102,15,56,222,209
+	movups	16(%r11),%xmm1
+	movq	%r10,%rax
+
+.byte	102,15,56,223,215
+	.byte	0xf3,0xc3
+
+.globl	_aesni_cbc_encrypt
+.private_extern _aesni_cbc_encrypt
+
+.p2align	4
+_aesni_cbc_encrypt:
+	testq	%rdx,%rdx
+	jz	L$cbc_ret
+
+	movl	240(%rcx),%r10d
+	movq	%rcx,%r11
+	testl	%r9d,%r9d
+	jz	L$cbc_decrypt
+
+	movups	(%r8),%xmm2
+	movl	%r10d,%eax
+	cmpq	$16,%rdx
+	jb	L$cbc_enc_tail
+	subq	$16,%rdx
+	jmp	L$cbc_enc_loop
+.p2align	4
+L$cbc_enc_loop:
+	movups	(%rdi),%xmm3
+	leaq	16(%rdi),%rdi
+
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	xorps	%xmm0,%xmm3
+	leaq	32(%rcx),%rcx
+	xorps	%xmm3,%xmm2
+L$oop_enc1_15:
+.byte	102,15,56,220,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_enc1_15
+.byte	102,15,56,221,209
+	movl	%r10d,%eax
+	movq	%r11,%rcx
+	movups	%xmm2,0(%rsi)
+	leaq	16(%rsi),%rsi
+	subq	$16,%rdx
+	jnc	L$cbc_enc_loop
+	addq	$16,%rdx
+	jnz	L$cbc_enc_tail
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movups	%xmm2,(%r8)
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	jmp	L$cbc_ret
+
+L$cbc_enc_tail:
+	movq	%rdx,%rcx
+	xchgq	%rdi,%rsi
+.long	0x9066A4F3
+	movl	$16,%ecx
+	subq	%rdx,%rcx
+	xorl	%eax,%eax
+.long	0x9066AAF3
+	leaq	-16(%rdi),%rdi
+	movl	%r10d,%eax
+	movq	%rdi,%rsi
+	movq	%r11,%rcx
+	xorq	%rdx,%rdx
+	jmp	L$cbc_enc_loop
+
+.p2align	4
+L$cbc_decrypt:
+	cmpq	$16,%rdx
+	jne	L$cbc_decrypt_bulk
+
+
+
+	movdqu	(%rdi),%xmm2
+	movdqu	(%r8),%xmm3
+	movdqa	%xmm2,%xmm4
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_dec1_16:
+.byte	102,15,56,222,209
+	decl	%r10d
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_dec1_16
+.byte	102,15,56,223,209
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movdqu	%xmm4,(%r8)
+	xorps	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	jmp	L$cbc_ret
+.p2align	4
+L$cbc_decrypt_bulk:
+	leaq	(%rsp),%r11
+	pushq	%rbp
+	subq	$16,%rsp
+	andq	$-16,%rsp
+	movq	%rcx,%rbp
+	movups	(%r8),%xmm10
+	movl	%r10d,%eax
+	cmpq	$0x50,%rdx
+	jbe	L$cbc_dec_tail
+
+	movups	(%rcx),%xmm0
+	movdqu	0(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqa	%xmm2,%xmm11
+	movdqu	32(%rdi),%xmm4
+	movdqa	%xmm3,%xmm12
+	movdqu	48(%rdi),%xmm5
+	movdqa	%xmm4,%xmm13
+	movdqu	64(%rdi),%xmm6
+	movdqa	%xmm5,%xmm14
+	movdqu	80(%rdi),%xmm7
+	movdqa	%xmm6,%xmm15
+	leaq	_OPENSSL_ia32cap_P(%rip),%r9
+	movl	4(%r9),%r9d
+	cmpq	$0x70,%rdx
+	jbe	L$cbc_dec_six_or_seven
+
+	andl	$71303168,%r9d
+	subq	$0x50,%rdx
+	cmpl	$4194304,%r9d
+	je	L$cbc_dec_loop6_enter
+	subq	$0x20,%rdx
+	leaq	112(%rcx),%rcx
+	jmp	L$cbc_dec_loop8_enter
+.p2align	4
+L$cbc_dec_loop8:
+	movups	%xmm9,(%rsi)
+	leaq	16(%rsi),%rsi
+L$cbc_dec_loop8_enter:
+	movdqu	96(%rdi),%xmm8
+	pxor	%xmm0,%xmm2
+	movdqu	112(%rdi),%xmm9
+	pxor	%xmm0,%xmm3
+	movups	16-112(%rcx),%xmm1
+	pxor	%xmm0,%xmm4
+	movq	$-1,%rbp
+	cmpq	$0x70,%rdx
+	pxor	%xmm0,%xmm5
+	pxor	%xmm0,%xmm6
+	pxor	%xmm0,%xmm7
+	pxor	%xmm0,%xmm8
+
+.byte	102,15,56,222,209
+	pxor	%xmm0,%xmm9
+	movups	32-112(%rcx),%xmm0
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+	adcq	$0,%rbp
+	andq	$128,%rbp
+.byte	102,68,15,56,222,201
+	addq	%rdi,%rbp
+	movups	48-112(%rcx),%xmm1
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	64-112(%rcx),%xmm0
+	nop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movups	80-112(%rcx),%xmm1
+	nop
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	96-112(%rcx),%xmm0
+	nop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movups	112-112(%rcx),%xmm1
+	nop
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	128-112(%rcx),%xmm0
+	nop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movups	144-112(%rcx),%xmm1
+	cmpl	$11,%eax
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	160-112(%rcx),%xmm0
+	jb	L$cbc_dec_done
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movups	176-112(%rcx),%xmm1
+	nop
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	192-112(%rcx),%xmm0
+	je	L$cbc_dec_done
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movups	208-112(%rcx),%xmm1
+	nop
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+.byte	102,68,15,56,222,192
+.byte	102,68,15,56,222,200
+	movups	224-112(%rcx),%xmm0
+	jmp	L$cbc_dec_done
+.p2align	4
+L$cbc_dec_done:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+	pxor	%xmm0,%xmm10
+	pxor	%xmm0,%xmm11
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	pxor	%xmm0,%xmm12
+	pxor	%xmm0,%xmm13
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+	pxor	%xmm0,%xmm14
+	pxor	%xmm0,%xmm15
+.byte	102,68,15,56,222,193
+.byte	102,68,15,56,222,201
+	movdqu	80(%rdi),%xmm1
+
+.byte	102,65,15,56,223,210
+	movdqu	96(%rdi),%xmm10
+	pxor	%xmm0,%xmm1
+.byte	102,65,15,56,223,219
+	pxor	%xmm0,%xmm10
+	movdqu	112(%rdi),%xmm0
+.byte	102,65,15,56,223,228
+	leaq	128(%rdi),%rdi
+	movdqu	0(%rbp),%xmm11
+.byte	102,65,15,56,223,237
+.byte	102,65,15,56,223,246
+	movdqu	16(%rbp),%xmm12
+	movdqu	32(%rbp),%xmm13
+.byte	102,65,15,56,223,255
+.byte	102,68,15,56,223,193
+	movdqu	48(%rbp),%xmm14
+	movdqu	64(%rbp),%xmm15
+.byte	102,69,15,56,223,202
+	movdqa	%xmm0,%xmm10
+	movdqu	80(%rbp),%xmm1
+	movups	-112(%rcx),%xmm0
+
+	movups	%xmm2,(%rsi)
+	movdqa	%xmm11,%xmm2
+	movups	%xmm3,16(%rsi)
+	movdqa	%xmm12,%xmm3
+	movups	%xmm4,32(%rsi)
+	movdqa	%xmm13,%xmm4
+	movups	%xmm5,48(%rsi)
+	movdqa	%xmm14,%xmm5
+	movups	%xmm6,64(%rsi)
+	movdqa	%xmm15,%xmm6
+	movups	%xmm7,80(%rsi)
+	movdqa	%xmm1,%xmm7
+	movups	%xmm8,96(%rsi)
+	leaq	112(%rsi),%rsi
+
+	subq	$0x80,%rdx
+	ja	L$cbc_dec_loop8
+
+	movaps	%xmm9,%xmm2
+	leaq	-112(%rcx),%rcx
+	addq	$0x70,%rdx
+	jle	L$cbc_dec_clear_tail_collected
+	movups	%xmm9,(%rsi)
+	leaq	16(%rsi),%rsi
+	cmpq	$0x50,%rdx
+	jbe	L$cbc_dec_tail
+
+	movaps	%xmm11,%xmm2
+L$cbc_dec_six_or_seven:
+	cmpq	$0x60,%rdx
+	ja	L$cbc_dec_seven
+
+	movaps	%xmm7,%xmm8
+	call	_aesni_decrypt6
+	pxor	%xmm10,%xmm2
+	movaps	%xmm8,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	pxor	%xmm14,%xmm6
+	movdqu	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	pxor	%xmm15,%xmm7
+	movdqu	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	leaq	80(%rsi),%rsi
+	movdqa	%xmm7,%xmm2
+	pxor	%xmm7,%xmm7
+	jmp	L$cbc_dec_tail_collected
+
+.p2align	4
+L$cbc_dec_seven:
+	movups	96(%rdi),%xmm8
+	xorps	%xmm9,%xmm9
+	call	_aesni_decrypt8
+	movups	80(%rdi),%xmm9
+	pxor	%xmm10,%xmm2
+	movups	96(%rdi),%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	pxor	%xmm14,%xmm6
+	movdqu	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	pxor	%xmm15,%xmm7
+	movdqu	%xmm6,64(%rsi)
+	pxor	%xmm6,%xmm6
+	pxor	%xmm9,%xmm8
+	movdqu	%xmm7,80(%rsi)
+	pxor	%xmm7,%xmm7
+	leaq	96(%rsi),%rsi
+	movdqa	%xmm8,%xmm2
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+	jmp	L$cbc_dec_tail_collected
+
+.p2align	4
+L$cbc_dec_loop6:
+	movups	%xmm7,(%rsi)
+	leaq	16(%rsi),%rsi
+	movdqu	0(%rdi),%xmm2
+	movdqu	16(%rdi),%xmm3
+	movdqa	%xmm2,%xmm11
+	movdqu	32(%rdi),%xmm4
+	movdqa	%xmm3,%xmm12
+	movdqu	48(%rdi),%xmm5
+	movdqa	%xmm4,%xmm13
+	movdqu	64(%rdi),%xmm6
+	movdqa	%xmm5,%xmm14
+	movdqu	80(%rdi),%xmm7
+	movdqa	%xmm6,%xmm15
+L$cbc_dec_loop6_enter:
+	leaq	96(%rdi),%rdi
+	movdqa	%xmm7,%xmm8
+
+	call	_aesni_decrypt6
+
+	pxor	%xmm10,%xmm2
+	movdqa	%xmm8,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm14,%xmm6
+	movq	%rbp,%rcx
+	movdqu	%xmm5,48(%rsi)
+	pxor	%xmm15,%xmm7
+	movl	%r10d,%eax
+	movdqu	%xmm6,64(%rsi)
+	leaq	80(%rsi),%rsi
+	subq	$0x60,%rdx
+	ja	L$cbc_dec_loop6
+
+	movdqa	%xmm7,%xmm2
+	addq	$0x50,%rdx
+	jle	L$cbc_dec_clear_tail_collected
+	movups	%xmm7,(%rsi)
+	leaq	16(%rsi),%rsi
+
+L$cbc_dec_tail:
+	movups	(%rdi),%xmm2
+	subq	$0x10,%rdx
+	jbe	L$cbc_dec_one
+
+	movups	16(%rdi),%xmm3
+	movaps	%xmm2,%xmm11
+	subq	$0x10,%rdx
+	jbe	L$cbc_dec_two
+
+	movups	32(%rdi),%xmm4
+	movaps	%xmm3,%xmm12
+	subq	$0x10,%rdx
+	jbe	L$cbc_dec_three
+
+	movups	48(%rdi),%xmm5
+	movaps	%xmm4,%xmm13
+	subq	$0x10,%rdx
+	jbe	L$cbc_dec_four
+
+	movups	64(%rdi),%xmm6
+	movaps	%xmm5,%xmm14
+	movaps	%xmm6,%xmm15
+	xorps	%xmm7,%xmm7
+	call	_aesni_decrypt6
+	pxor	%xmm10,%xmm2
+	movaps	%xmm15,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	pxor	%xmm14,%xmm6
+	movdqu	%xmm5,48(%rsi)
+	pxor	%xmm5,%xmm5
+	leaq	64(%rsi),%rsi
+	movdqa	%xmm6,%xmm2
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	subq	$0x10,%rdx
+	jmp	L$cbc_dec_tail_collected
+
+.p2align	4
+L$cbc_dec_one:
+	movaps	%xmm2,%xmm11
+	movups	(%rcx),%xmm0
+	movups	16(%rcx),%xmm1
+	leaq	32(%rcx),%rcx
+	xorps	%xmm0,%xmm2
+L$oop_dec1_17:
+.byte	102,15,56,222,209
+	decl	%eax
+	movups	(%rcx),%xmm1
+	leaq	16(%rcx),%rcx
+	jnz	L$oop_dec1_17
+.byte	102,15,56,223,209
+	xorps	%xmm10,%xmm2
+	movaps	%xmm11,%xmm10
+	jmp	L$cbc_dec_tail_collected
+.p2align	4
+L$cbc_dec_two:
+	movaps	%xmm3,%xmm12
+	call	_aesni_decrypt2
+	pxor	%xmm10,%xmm2
+	movaps	%xmm12,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	movdqa	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	leaq	16(%rsi),%rsi
+	jmp	L$cbc_dec_tail_collected
+.p2align	4
+L$cbc_dec_three:
+	movaps	%xmm4,%xmm13
+	call	_aesni_decrypt3
+	pxor	%xmm10,%xmm2
+	movaps	%xmm13,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	movdqa	%xmm4,%xmm2
+	pxor	%xmm4,%xmm4
+	leaq	32(%rsi),%rsi
+	jmp	L$cbc_dec_tail_collected
+.p2align	4
+L$cbc_dec_four:
+	movaps	%xmm5,%xmm14
+	call	_aesni_decrypt4
+	pxor	%xmm10,%xmm2
+	movaps	%xmm14,%xmm10
+	pxor	%xmm11,%xmm3
+	movdqu	%xmm2,(%rsi)
+	pxor	%xmm12,%xmm4
+	movdqu	%xmm3,16(%rsi)
+	pxor	%xmm3,%xmm3
+	pxor	%xmm13,%xmm5
+	movdqu	%xmm4,32(%rsi)
+	pxor	%xmm4,%xmm4
+	movdqa	%xmm5,%xmm2
+	pxor	%xmm5,%xmm5
+	leaq	48(%rsi),%rsi
+	jmp	L$cbc_dec_tail_collected
+
+.p2align	4
+L$cbc_dec_clear_tail_collected:
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+L$cbc_dec_tail_collected:
+	movups	%xmm10,(%r8)
+	andq	$15,%rdx
+	jnz	L$cbc_dec_tail_partial
+	movups	%xmm2,(%rsi)
+	pxor	%xmm2,%xmm2
+	jmp	L$cbc_dec_ret
+.p2align	4
+L$cbc_dec_tail_partial:
+	movaps	%xmm2,(%rsp)
+	pxor	%xmm2,%xmm2
+	movq	$16,%rcx
+	movq	%rsi,%rdi
+	subq	%rdx,%rcx
+	leaq	(%rsp),%rsi
+.long	0x9066A4F3
+	movdqa	%xmm2,(%rsp)
+
+L$cbc_dec_ret:
+	xorps	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	movq	-8(%r11),%rbp
+	leaq	(%r11),%rsp
+L$cbc_ret:
+	.byte	0xf3,0xc3
+
+.globl	_aesni_set_decrypt_key
+.private_extern _aesni_set_decrypt_key
+
+.p2align	4
+_aesni_set_decrypt_key:
+.byte	0x48,0x83,0xEC,0x08
+	call	__aesni_set_encrypt_key
+	shll	$4,%esi
+	testl	%eax,%eax
+	jnz	L$dec_key_ret
+	leaq	16(%rdx,%rsi,1),%rdi
+
+	movups	(%rdx),%xmm0
+	movups	(%rdi),%xmm1
+	movups	%xmm0,(%rdi)
+	movups	%xmm1,(%rdx)
+	leaq	16(%rdx),%rdx
+	leaq	-16(%rdi),%rdi
+
+L$dec_key_inverse:
+	movups	(%rdx),%xmm0
+	movups	(%rdi),%xmm1
+.byte	102,15,56,219,192
+.byte	102,15,56,219,201
+	leaq	16(%rdx),%rdx
+	leaq	-16(%rdi),%rdi
+	movups	%xmm0,16(%rdi)
+	movups	%xmm1,-16(%rdx)
+	cmpq	%rdx,%rdi
+	ja	L$dec_key_inverse
+
+	movups	(%rdx),%xmm0
+.byte	102,15,56,219,192
+	pxor	%xmm1,%xmm1
+	movups	%xmm0,(%rdi)
+	pxor	%xmm0,%xmm0
+L$dec_key_ret:
+	addq	$8,%rsp
+	.byte	0xf3,0xc3
+L$SEH_end_set_decrypt_key:
+
+.globl	_aesni_set_encrypt_key
+.private_extern _aesni_set_encrypt_key
+
+.p2align	4
+_aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+.byte	0x48,0x83,0xEC,0x08
+	movq	$-1,%rax
+	testq	%rdi,%rdi
+	jz	L$enc_key_ret
+	testq	%rdx,%rdx
+	jz	L$enc_key_ret
+
+	movups	(%rdi),%xmm0
+	xorps	%xmm4,%xmm4
+	leaq	_OPENSSL_ia32cap_P(%rip),%r10
+	movl	4(%r10),%r10d
+	andl	$268437504,%r10d
+	leaq	16(%rdx),%rax
+	cmpl	$256,%esi
+	je	L$14rounds
+	cmpl	$192,%esi
+	je	L$12rounds
+	cmpl	$128,%esi
+	jne	L$bad_keybits
+
+L$10rounds:
+	movl	$9,%esi
+	cmpl	$268435456,%r10d
+	je	L$10rounds_alt
+
+	movups	%xmm0,(%rdx)
+.byte	102,15,58,223,200,1
+	call	L$key_expansion_128_cold
+.byte	102,15,58,223,200,2
+	call	L$key_expansion_128
+.byte	102,15,58,223,200,4
+	call	L$key_expansion_128
+.byte	102,15,58,223,200,8
+	call	L$key_expansion_128
+.byte	102,15,58,223,200,16
+	call	L$key_expansion_128
+.byte	102,15,58,223,200,32
+	call	L$key_expansion_128
+.byte	102,15,58,223,200,64
+	call	L$key_expansion_128
+.byte	102,15,58,223,200,128
+	call	L$key_expansion_128
+.byte	102,15,58,223,200,27
+	call	L$key_expansion_128
+.byte	102,15,58,223,200,54
+	call	L$key_expansion_128
+	movups	%xmm0,(%rax)
+	movl	%esi,80(%rax)
+	xorl	%eax,%eax
+	jmp	L$enc_key_ret
+
+.p2align	4
+L$10rounds_alt:
+	movdqa	L$key_rotate(%rip),%xmm5
+	movl	$8,%r10d
+	movdqa	L$key_rcon1(%rip),%xmm4
+	movdqa	%xmm0,%xmm2
+	movdqu	%xmm0,(%rdx)
+	jmp	L$oop_key128
+
+.p2align	4
+L$oop_key128:
+.byte	102,15,56,0,197
+.byte	102,15,56,221,196
+	pslld	$1,%xmm4
+	leaq	16(%rax),%rax
+
+	movdqa	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm3,%xmm2
+
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,-16(%rax)
+	movdqa	%xmm0,%xmm2
+
+	decl	%r10d
+	jnz	L$oop_key128
+
+	movdqa	L$key_rcon1b(%rip),%xmm4
+
+.byte	102,15,56,0,197
+.byte	102,15,56,221,196
+	pslld	$1,%xmm4
+
+	movdqa	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm3,%xmm2
+
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,(%rax)
+
+	movdqa	%xmm0,%xmm2
+.byte	102,15,56,0,197
+.byte	102,15,56,221,196
+
+	movdqa	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm2,%xmm3
+	pslldq	$4,%xmm2
+	pxor	%xmm3,%xmm2
+
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,16(%rax)
+
+	movl	%esi,96(%rax)
+	xorl	%eax,%eax
+	jmp	L$enc_key_ret
+
+.p2align	4
+L$12rounds:
+	movq	16(%rdi),%xmm2
+	movl	$11,%esi
+	cmpl	$268435456,%r10d
+	je	L$12rounds_alt
+
+	movups	%xmm0,(%rdx)
+.byte	102,15,58,223,202,1
+	call	L$key_expansion_192a_cold
+.byte	102,15,58,223,202,2
+	call	L$key_expansion_192b
+.byte	102,15,58,223,202,4
+	call	L$key_expansion_192a
+.byte	102,15,58,223,202,8
+	call	L$key_expansion_192b
+.byte	102,15,58,223,202,16
+	call	L$key_expansion_192a
+.byte	102,15,58,223,202,32
+	call	L$key_expansion_192b
+.byte	102,15,58,223,202,64
+	call	L$key_expansion_192a
+.byte	102,15,58,223,202,128
+	call	L$key_expansion_192b
+	movups	%xmm0,(%rax)
+	movl	%esi,48(%rax)
+	xorq	%rax,%rax
+	jmp	L$enc_key_ret
+
+.p2align	4
+L$12rounds_alt:
+	movdqa	L$key_rotate192(%rip),%xmm5
+	movdqa	L$key_rcon1(%rip),%xmm4
+	movl	$8,%r10d
+	movdqu	%xmm0,(%rdx)
+	jmp	L$oop_key192
+
+.p2align	4
+L$oop_key192:
+	movq	%xmm2,0(%rax)
+	movdqa	%xmm2,%xmm1
+.byte	102,15,56,0,213
+.byte	102,15,56,221,212
+	pslld	$1,%xmm4
+	leaq	24(%rax),%rax
+
+	movdqa	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm3,%xmm0
+
+	pshufd	$0xff,%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm1,%xmm3
+
+	pxor	%xmm2,%xmm0
+	pxor	%xmm3,%xmm2
+	movdqu	%xmm0,-16(%rax)
+
+	decl	%r10d
+	jnz	L$oop_key192
+
+	movl	%esi,32(%rax)
+	xorl	%eax,%eax
+	jmp	L$enc_key_ret
+
+.p2align	4
+L$14rounds:
+	movups	16(%rdi),%xmm2
+	movl	$13,%esi
+	leaq	16(%rax),%rax
+	cmpl	$268435456,%r10d
+	je	L$14rounds_alt
+
+	movups	%xmm0,(%rdx)
+	movups	%xmm2,16(%rdx)
+.byte	102,15,58,223,202,1
+	call	L$key_expansion_256a_cold
+.byte	102,15,58,223,200,1
+	call	L$key_expansion_256b
+.byte	102,15,58,223,202,2
+	call	L$key_expansion_256a
+.byte	102,15,58,223,200,2
+	call	L$key_expansion_256b
+.byte	102,15,58,223,202,4
+	call	L$key_expansion_256a
+.byte	102,15,58,223,200,4
+	call	L$key_expansion_256b
+.byte	102,15,58,223,202,8
+	call	L$key_expansion_256a
+.byte	102,15,58,223,200,8
+	call	L$key_expansion_256b
+.byte	102,15,58,223,202,16
+	call	L$key_expansion_256a
+.byte	102,15,58,223,200,16
+	call	L$key_expansion_256b
+.byte	102,15,58,223,202,32
+	call	L$key_expansion_256a
+.byte	102,15,58,223,200,32
+	call	L$key_expansion_256b
+.byte	102,15,58,223,202,64
+	call	L$key_expansion_256a
+	movups	%xmm0,(%rax)
+	movl	%esi,16(%rax)
+	xorq	%rax,%rax
+	jmp	L$enc_key_ret
+
+.p2align	4
+L$14rounds_alt:
+	movdqa	L$key_rotate(%rip),%xmm5
+	movdqa	L$key_rcon1(%rip),%xmm4
+	movl	$7,%r10d
+	movdqu	%xmm0,0(%rdx)
+	movdqa	%xmm2,%xmm1
+	movdqu	%xmm2,16(%rdx)
+	jmp	L$oop_key256
+
+.p2align	4
+L$oop_key256:
+.byte	102,15,56,0,213
+.byte	102,15,56,221,212
+
+	movdqa	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm0,%xmm3
+	pslldq	$4,%xmm0
+	pxor	%xmm3,%xmm0
+	pslld	$1,%xmm4
+
+	pxor	%xmm2,%xmm0
+	movdqu	%xmm0,(%rax)
+
+	decl	%r10d
+	jz	L$done_key256
+
+	pshufd	$0xff,%xmm0,%xmm2
+	pxor	%xmm3,%xmm3
+.byte	102,15,56,221,211
+
+	movdqa	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm1,%xmm3
+	pslldq	$4,%xmm1
+	pxor	%xmm3,%xmm1
+
+	pxor	%xmm1,%xmm2
+	movdqu	%xmm2,16(%rax)
+	leaq	32(%rax),%rax
+	movdqa	%xmm2,%xmm1
+
+	jmp	L$oop_key256
+
+L$done_key256:
+	movl	%esi,16(%rax)
+	xorl	%eax,%eax
+	jmp	L$enc_key_ret
+
+.p2align	4
+L$bad_keybits:
+	movq	$-2,%rax
+L$enc_key_ret:
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	addq	$8,%rsp
+	.byte	0xf3,0xc3
+L$SEH_end_set_encrypt_key:
+
+.p2align	4
+L$key_expansion_128:
+	movups	%xmm0,(%rax)
+	leaq	16(%rax),%rax
+L$key_expansion_128_cold:
+	shufps	$16,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$255,%xmm1,%xmm1
+	xorps	%xmm1,%xmm0
+	.byte	0xf3,0xc3
+
+.p2align	4
+L$key_expansion_192a:
+	movups	%xmm0,(%rax)
+	leaq	16(%rax),%rax
+L$key_expansion_192a_cold:
+	movaps	%xmm2,%xmm5
+L$key_expansion_192b_warm:
+	shufps	$16,%xmm0,%xmm4
+	movdqa	%xmm2,%xmm3
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	pslldq	$4,%xmm3
+	xorps	%xmm4,%xmm0
+	pshufd	$85,%xmm1,%xmm1
+	pxor	%xmm3,%xmm2
+	pxor	%xmm1,%xmm0
+	pshufd	$255,%xmm0,%xmm3
+	pxor	%xmm3,%xmm2
+	.byte	0xf3,0xc3
+
+.p2align	4
+L$key_expansion_192b:
+	movaps	%xmm0,%xmm3
+	shufps	$68,%xmm0,%xmm5
+	movups	%xmm5,(%rax)
+	shufps	$78,%xmm2,%xmm3
+	movups	%xmm3,16(%rax)
+	leaq	32(%rax),%rax
+	jmp	L$key_expansion_192b_warm
+
+.p2align	4
+L$key_expansion_256a:
+	movups	%xmm2,(%rax)
+	leaq	16(%rax),%rax
+L$key_expansion_256a_cold:
+	shufps	$16,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$255,%xmm1,%xmm1
+	xorps	%xmm1,%xmm0
+	.byte	0xf3,0xc3
+
+.p2align	4
+L$key_expansion_256b:
+	movups	%xmm0,(%rax)
+	leaq	16(%rax),%rax
+
+	shufps	$16,%xmm2,%xmm4
+	xorps	%xmm4,%xmm2
+	shufps	$140,%xmm2,%xmm4
+	xorps	%xmm4,%xmm2
+	shufps	$170,%xmm1,%xmm1
+	xorps	%xmm1,%xmm2
+	.byte	0xf3,0xc3
+
+
+.p2align	6
+L$bswap_mask:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+L$increment32:
+.long	6,6,6,0
+L$increment64:
+.long	1,0,0,0
+L$xts_magic:
+.long	0x87,0,1,0
+L$increment1:
+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+L$key_rotate:
+.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+L$key_rotate192:
+.long	0x04070605,0x04070605,0x04070605,0x04070605
+L$key_rcon1:
+.long	1,1,1,1
+L$key_rcon1b:
+.long	0x1b,0x1b,0x1b,0x1b
+
+.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align	6
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S
new file mode 100644
index 0000000..195abd3
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S
@@ -0,0 +1,2500 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+
+
+
+.p2align	6
+_bsaes_encrypt8:
+	leaq	L$BS0(%rip),%r11
+
+	movdqa	(%rax),%xmm8
+	leaq	16(%rax),%rax
+	movdqa	80(%r11),%xmm7
+	pxor	%xmm8,%xmm15
+	pxor	%xmm8,%xmm0
+	pxor	%xmm8,%xmm1
+	pxor	%xmm8,%xmm2
+.byte	102,68,15,56,0,255
+.byte	102,15,56,0,199
+	pxor	%xmm8,%xmm3
+	pxor	%xmm8,%xmm4
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	pxor	%xmm8,%xmm5
+	pxor	%xmm8,%xmm6
+.byte	102,15,56,0,223
+.byte	102,15,56,0,231
+.byte	102,15,56,0,239
+.byte	102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+	movdqa	0(%r11),%xmm7
+	movdqa	16(%r11),%xmm8
+	movdqa	%xmm5,%xmm9
+	psrlq	$1,%xmm5
+	movdqa	%xmm3,%xmm10
+	psrlq	$1,%xmm3
+	pxor	%xmm6,%xmm5
+	pxor	%xmm4,%xmm3
+	pand	%xmm7,%xmm5
+	pand	%xmm7,%xmm3
+	pxor	%xmm5,%xmm6
+	psllq	$1,%xmm5
+	pxor	%xmm3,%xmm4
+	psllq	$1,%xmm3
+	pxor	%xmm9,%xmm5
+	pxor	%xmm10,%xmm3
+	movdqa	%xmm1,%xmm9
+	psrlq	$1,%xmm1
+	movdqa	%xmm15,%xmm10
+	psrlq	$1,%xmm15
+	pxor	%xmm2,%xmm1
+	pxor	%xmm0,%xmm15
+	pand	%xmm7,%xmm1
+	pand	%xmm7,%xmm15
+	pxor	%xmm1,%xmm2
+	psllq	$1,%xmm1
+	pxor	%xmm15,%xmm0
+	psllq	$1,%xmm15
+	pxor	%xmm9,%xmm1
+	pxor	%xmm10,%xmm15
+	movdqa	32(%r11),%xmm7
+	movdqa	%xmm4,%xmm9
+	psrlq	$2,%xmm4
+	movdqa	%xmm3,%xmm10
+	psrlq	$2,%xmm3
+	pxor	%xmm6,%xmm4
+	pxor	%xmm5,%xmm3
+	pand	%xmm8,%xmm4
+	pand	%xmm8,%xmm3
+	pxor	%xmm4,%xmm6
+	psllq	$2,%xmm4
+	pxor	%xmm3,%xmm5
+	psllq	$2,%xmm3
+	pxor	%xmm9,%xmm4
+	pxor	%xmm10,%xmm3
+	movdqa	%xmm0,%xmm9
+	psrlq	$2,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$2,%xmm15
+	pxor	%xmm2,%xmm0
+	pxor	%xmm1,%xmm15
+	pand	%xmm8,%xmm0
+	pand	%xmm8,%xmm15
+	pxor	%xmm0,%xmm2
+	psllq	$2,%xmm0
+	pxor	%xmm15,%xmm1
+	psllq	$2,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	%xmm2,%xmm9
+	psrlq	$4,%xmm2
+	movdqa	%xmm1,%xmm10
+	psrlq	$4,%xmm1
+	pxor	%xmm6,%xmm2
+	pxor	%xmm5,%xmm1
+	pand	%xmm7,%xmm2
+	pand	%xmm7,%xmm1
+	pxor	%xmm2,%xmm6
+	psllq	$4,%xmm2
+	pxor	%xmm1,%xmm5
+	psllq	$4,%xmm1
+	pxor	%xmm9,%xmm2
+	pxor	%xmm10,%xmm1
+	movdqa	%xmm0,%xmm9
+	psrlq	$4,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$4,%xmm15
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pand	%xmm7,%xmm0
+	pand	%xmm7,%xmm15
+	pxor	%xmm0,%xmm4
+	psllq	$4,%xmm0
+	pxor	%xmm15,%xmm3
+	psllq	$4,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	decl	%r10d
+	jmp	L$enc_sbox
+.p2align	4
+L$enc_loop:
+	pxor	0(%rax),%xmm15
+	pxor	16(%rax),%xmm0
+	pxor	32(%rax),%xmm1
+	pxor	48(%rax),%xmm2
+.byte	102,68,15,56,0,255
+.byte	102,15,56,0,199
+	pxor	64(%rax),%xmm3
+	pxor	80(%rax),%xmm4
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	pxor	96(%rax),%xmm5
+	pxor	112(%rax),%xmm6
+.byte	102,15,56,0,223
+.byte	102,15,56,0,231
+.byte	102,15,56,0,239
+.byte	102,15,56,0,247
+	leaq	128(%rax),%rax
+L$enc_sbox:
+	pxor	%xmm5,%xmm4
+	pxor	%xmm0,%xmm1
+	pxor	%xmm15,%xmm2
+	pxor	%xmm1,%xmm5
+	pxor	%xmm15,%xmm4
+
+	pxor	%xmm2,%xmm5
+	pxor	%xmm6,%xmm2
+	pxor	%xmm4,%xmm6
+	pxor	%xmm3,%xmm2
+	pxor	%xmm4,%xmm3
+	pxor	%xmm0,%xmm2
+
+	pxor	%xmm6,%xmm1
+	pxor	%xmm4,%xmm0
+	movdqa	%xmm6,%xmm10
+	movdqa	%xmm0,%xmm9
+	movdqa	%xmm4,%xmm8
+	movdqa	%xmm1,%xmm12
+	movdqa	%xmm5,%xmm11
+
+	pxor	%xmm3,%xmm10
+	pxor	%xmm1,%xmm9
+	pxor	%xmm2,%xmm8
+	movdqa	%xmm10,%xmm13
+	pxor	%xmm3,%xmm12
+	movdqa	%xmm9,%xmm7
+	pxor	%xmm15,%xmm11
+	movdqa	%xmm10,%xmm14
+
+	por	%xmm8,%xmm9
+	por	%xmm11,%xmm10
+	pxor	%xmm7,%xmm14
+	pand	%xmm11,%xmm13
+	pxor	%xmm8,%xmm11
+	pand	%xmm8,%xmm7
+	pand	%xmm11,%xmm14
+	movdqa	%xmm2,%xmm11
+	pxor	%xmm15,%xmm11
+	pand	%xmm11,%xmm12
+	pxor	%xmm12,%xmm10
+	pxor	%xmm12,%xmm9
+	movdqa	%xmm6,%xmm12
+	movdqa	%xmm4,%xmm11
+	pxor	%xmm0,%xmm12
+	pxor	%xmm5,%xmm11
+	movdqa	%xmm12,%xmm8
+	pand	%xmm11,%xmm12
+	por	%xmm11,%xmm8
+	pxor	%xmm12,%xmm7
+	pxor	%xmm14,%xmm10
+	pxor	%xmm13,%xmm9
+	pxor	%xmm14,%xmm8
+	movdqa	%xmm1,%xmm11
+	pxor	%xmm13,%xmm7
+	movdqa	%xmm3,%xmm12
+	pxor	%xmm13,%xmm8
+	movdqa	%xmm0,%xmm13
+	pand	%xmm2,%xmm11
+	movdqa	%xmm6,%xmm14
+	pand	%xmm15,%xmm12
+	pand	%xmm4,%xmm13
+	por	%xmm5,%xmm14
+	pxor	%xmm11,%xmm10
+	pxor	%xmm12,%xmm9
+	pxor	%xmm13,%xmm8
+	pxor	%xmm14,%xmm7
+
+
+
+
+
+	movdqa	%xmm10,%xmm11
+	pand	%xmm8,%xmm10
+	pxor	%xmm9,%xmm11
+
+	movdqa	%xmm7,%xmm13
+	movdqa	%xmm11,%xmm14
+	pxor	%xmm10,%xmm13
+	pand	%xmm13,%xmm14
+
+	movdqa	%xmm8,%xmm12
+	pxor	%xmm9,%xmm14
+	pxor	%xmm7,%xmm12
+
+	pxor	%xmm9,%xmm10
+
+	pand	%xmm10,%xmm12
+
+	movdqa	%xmm13,%xmm9
+	pxor	%xmm7,%xmm12
+
+	pxor	%xmm12,%xmm9
+	pxor	%xmm12,%xmm8
+
+	pand	%xmm7,%xmm9
+
+	pxor	%xmm9,%xmm13
+	pxor	%xmm9,%xmm8
+
+	pand	%xmm14,%xmm13
+
+	pxor	%xmm11,%xmm13
+	movdqa	%xmm5,%xmm11
+	movdqa	%xmm4,%xmm7
+	movdqa	%xmm14,%xmm9
+	pxor	%xmm13,%xmm9
+	pand	%xmm5,%xmm9
+	pxor	%xmm4,%xmm5
+	pand	%xmm14,%xmm4
+	pand	%xmm13,%xmm5
+	pxor	%xmm4,%xmm5
+	pxor	%xmm9,%xmm4
+	pxor	%xmm15,%xmm11
+	pxor	%xmm2,%xmm7
+	pxor	%xmm12,%xmm14
+	pxor	%xmm8,%xmm13
+	movdqa	%xmm14,%xmm10
+	movdqa	%xmm12,%xmm9
+	pxor	%xmm13,%xmm10
+	pxor	%xmm8,%xmm9
+	pand	%xmm11,%xmm10
+	pand	%xmm15,%xmm9
+	pxor	%xmm7,%xmm11
+	pxor	%xmm2,%xmm15
+	pand	%xmm14,%xmm7
+	pand	%xmm12,%xmm2
+	pand	%xmm13,%xmm11
+	pand	%xmm8,%xmm15
+	pxor	%xmm11,%xmm7
+	pxor	%xmm2,%xmm15
+	pxor	%xmm10,%xmm11
+	pxor	%xmm9,%xmm2
+	pxor	%xmm11,%xmm5
+	pxor	%xmm11,%xmm15
+	pxor	%xmm7,%xmm4
+	pxor	%xmm7,%xmm2
+
+	movdqa	%xmm6,%xmm11
+	movdqa	%xmm0,%xmm7
+	pxor	%xmm3,%xmm11
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm14,%xmm10
+	movdqa	%xmm12,%xmm9
+	pxor	%xmm13,%xmm10
+	pxor	%xmm8,%xmm9
+	pand	%xmm11,%xmm10
+	pand	%xmm3,%xmm9
+	pxor	%xmm7,%xmm11
+	pxor	%xmm1,%xmm3
+	pand	%xmm14,%xmm7
+	pand	%xmm12,%xmm1
+	pand	%xmm13,%xmm11
+	pand	%xmm8,%xmm3
+	pxor	%xmm11,%xmm7
+	pxor	%xmm1,%xmm3
+	pxor	%xmm10,%xmm11
+	pxor	%xmm9,%xmm1
+	pxor	%xmm12,%xmm14
+	pxor	%xmm8,%xmm13
+	movdqa	%xmm14,%xmm10
+	pxor	%xmm13,%xmm10
+	pand	%xmm6,%xmm10
+	pxor	%xmm0,%xmm6
+	pand	%xmm14,%xmm0
+	pand	%xmm13,%xmm6
+	pxor	%xmm0,%xmm6
+	pxor	%xmm10,%xmm0
+	pxor	%xmm11,%xmm6
+	pxor	%xmm11,%xmm3
+	pxor	%xmm7,%xmm0
+	pxor	%xmm7,%xmm1
+	pxor	%xmm15,%xmm6
+	pxor	%xmm5,%xmm0
+	pxor	%xmm6,%xmm3
+	pxor	%xmm15,%xmm5
+	pxor	%xmm0,%xmm15
+
+	pxor	%xmm4,%xmm0
+	pxor	%xmm1,%xmm4
+	pxor	%xmm2,%xmm1
+	pxor	%xmm4,%xmm2
+	pxor	%xmm4,%xmm3
+
+	pxor	%xmm2,%xmm5
+	decl	%r10d
+	jl	L$enc_done
+	pshufd	$0x93,%xmm15,%xmm7
+	pshufd	$0x93,%xmm0,%xmm8
+	pxor	%xmm7,%xmm15
+	pshufd	$0x93,%xmm3,%xmm9
+	pxor	%xmm8,%xmm0
+	pshufd	$0x93,%xmm5,%xmm10
+	pxor	%xmm9,%xmm3
+	pshufd	$0x93,%xmm2,%xmm11
+	pxor	%xmm10,%xmm5
+	pshufd	$0x93,%xmm6,%xmm12
+	pxor	%xmm11,%xmm2
+	pshufd	$0x93,%xmm1,%xmm13
+	pxor	%xmm12,%xmm6
+	pshufd	$0x93,%xmm4,%xmm14
+	pxor	%xmm13,%xmm1
+	pxor	%xmm14,%xmm4
+
+	pxor	%xmm15,%xmm8
+	pxor	%xmm4,%xmm7
+	pxor	%xmm4,%xmm8
+	pshufd	$0x4E,%xmm15,%xmm15
+	pxor	%xmm0,%xmm9
+	pshufd	$0x4E,%xmm0,%xmm0
+	pxor	%xmm2,%xmm12
+	pxor	%xmm7,%xmm15
+	pxor	%xmm6,%xmm13
+	pxor	%xmm8,%xmm0
+	pxor	%xmm5,%xmm11
+	pshufd	$0x4E,%xmm2,%xmm7
+	pxor	%xmm1,%xmm14
+	pshufd	$0x4E,%xmm6,%xmm8
+	pxor	%xmm3,%xmm10
+	pshufd	$0x4E,%xmm5,%xmm2
+	pxor	%xmm4,%xmm10
+	pshufd	$0x4E,%xmm4,%xmm6
+	pxor	%xmm4,%xmm11
+	pshufd	$0x4E,%xmm1,%xmm5
+	pxor	%xmm11,%xmm7
+	pshufd	$0x4E,%xmm3,%xmm1
+	pxor	%xmm12,%xmm8
+	pxor	%xmm10,%xmm2
+	pxor	%xmm14,%xmm6
+	pxor	%xmm13,%xmm5
+	movdqa	%xmm7,%xmm3
+	pxor	%xmm9,%xmm1
+	movdqa	%xmm8,%xmm4
+	movdqa	48(%r11),%xmm7
+	jnz	L$enc_loop
+	movdqa	64(%r11),%xmm7
+	jmp	L$enc_loop
+.p2align	4
+L$enc_done:
+	movdqa	0(%r11),%xmm7
+	movdqa	16(%r11),%xmm8
+	movdqa	%xmm1,%xmm9
+	psrlq	$1,%xmm1
+	movdqa	%xmm2,%xmm10
+	psrlq	$1,%xmm2
+	pxor	%xmm4,%xmm1
+	pxor	%xmm6,%xmm2
+	pand	%xmm7,%xmm1
+	pand	%xmm7,%xmm2
+	pxor	%xmm1,%xmm4
+	psllq	$1,%xmm1
+	pxor	%xmm2,%xmm6
+	psllq	$1,%xmm2
+	pxor	%xmm9,%xmm1
+	pxor	%xmm10,%xmm2
+	movdqa	%xmm3,%xmm9
+	psrlq	$1,%xmm3
+	movdqa	%xmm15,%xmm10
+	psrlq	$1,%xmm15
+	pxor	%xmm5,%xmm3
+	pxor	%xmm0,%xmm15
+	pand	%xmm7,%xmm3
+	pand	%xmm7,%xmm15
+	pxor	%xmm3,%xmm5
+	psllq	$1,%xmm3
+	pxor	%xmm15,%xmm0
+	psllq	$1,%xmm15
+	pxor	%xmm9,%xmm3
+	pxor	%xmm10,%xmm15
+	movdqa	32(%r11),%xmm7
+	movdqa	%xmm6,%xmm9
+	psrlq	$2,%xmm6
+	movdqa	%xmm2,%xmm10
+	psrlq	$2,%xmm2
+	pxor	%xmm4,%xmm6
+	pxor	%xmm1,%xmm2
+	pand	%xmm8,%xmm6
+	pand	%xmm8,%xmm2
+	pxor	%xmm6,%xmm4
+	psllq	$2,%xmm6
+	pxor	%xmm2,%xmm1
+	psllq	$2,%xmm2
+	pxor	%xmm9,%xmm6
+	pxor	%xmm10,%xmm2
+	movdqa	%xmm0,%xmm9
+	psrlq	$2,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$2,%xmm15
+	pxor	%xmm5,%xmm0
+	pxor	%xmm3,%xmm15
+	pand	%xmm8,%xmm0
+	pand	%xmm8,%xmm15
+	pxor	%xmm0,%xmm5
+	psllq	$2,%xmm0
+	pxor	%xmm15,%xmm3
+	psllq	$2,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	%xmm5,%xmm9
+	psrlq	$4,%xmm5
+	movdqa	%xmm3,%xmm10
+	psrlq	$4,%xmm3
+	pxor	%xmm4,%xmm5
+	pxor	%xmm1,%xmm3
+	pand	%xmm7,%xmm5
+	pand	%xmm7,%xmm3
+	pxor	%xmm5,%xmm4
+	psllq	$4,%xmm5
+	pxor	%xmm3,%xmm1
+	psllq	$4,%xmm3
+	pxor	%xmm9,%xmm5
+	pxor	%xmm10,%xmm3
+	movdqa	%xmm0,%xmm9
+	psrlq	$4,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$4,%xmm15
+	pxor	%xmm6,%xmm0
+	pxor	%xmm2,%xmm15
+	pand	%xmm7,%xmm0
+	pand	%xmm7,%xmm15
+	pxor	%xmm0,%xmm6
+	psllq	$4,%xmm0
+	pxor	%xmm15,%xmm2
+	psllq	$4,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	(%rax),%xmm7
+	pxor	%xmm7,%xmm3
+	pxor	%xmm7,%xmm5
+	pxor	%xmm7,%xmm2
+	pxor	%xmm7,%xmm6
+	pxor	%xmm7,%xmm1
+	pxor	%xmm7,%xmm4
+	pxor	%xmm7,%xmm15
+	pxor	%xmm7,%xmm0
+	.byte	0xf3,0xc3
+
+
+
+.p2align	6
+_bsaes_decrypt8:
+	leaq	L$BS0(%rip),%r11
+
+	movdqa	(%rax),%xmm8
+	leaq	16(%rax),%rax
+	movdqa	-48(%r11),%xmm7
+	pxor	%xmm8,%xmm15
+	pxor	%xmm8,%xmm0
+	pxor	%xmm8,%xmm1
+	pxor	%xmm8,%xmm2
+.byte	102,68,15,56,0,255
+.byte	102,15,56,0,199
+	pxor	%xmm8,%xmm3
+	pxor	%xmm8,%xmm4
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	pxor	%xmm8,%xmm5
+	pxor	%xmm8,%xmm6
+.byte	102,15,56,0,223
+.byte	102,15,56,0,231
+.byte	102,15,56,0,239
+.byte	102,15,56,0,247
+	movdqa	0(%r11),%xmm7
+	movdqa	16(%r11),%xmm8
+	movdqa	%xmm5,%xmm9
+	psrlq	$1,%xmm5
+	movdqa	%xmm3,%xmm10
+	psrlq	$1,%xmm3
+	pxor	%xmm6,%xmm5
+	pxor	%xmm4,%xmm3
+	pand	%xmm7,%xmm5
+	pand	%xmm7,%xmm3
+	pxor	%xmm5,%xmm6
+	psllq	$1,%xmm5
+	pxor	%xmm3,%xmm4
+	psllq	$1,%xmm3
+	pxor	%xmm9,%xmm5
+	pxor	%xmm10,%xmm3
+	movdqa	%xmm1,%xmm9
+	psrlq	$1,%xmm1
+	movdqa	%xmm15,%xmm10
+	psrlq	$1,%xmm15
+	pxor	%xmm2,%xmm1
+	pxor	%xmm0,%xmm15
+	pand	%xmm7,%xmm1
+	pand	%xmm7,%xmm15
+	pxor	%xmm1,%xmm2
+	psllq	$1,%xmm1
+	pxor	%xmm15,%xmm0
+	psllq	$1,%xmm15
+	pxor	%xmm9,%xmm1
+	pxor	%xmm10,%xmm15
+	movdqa	32(%r11),%xmm7
+	movdqa	%xmm4,%xmm9
+	psrlq	$2,%xmm4
+	movdqa	%xmm3,%xmm10
+	psrlq	$2,%xmm3
+	pxor	%xmm6,%xmm4
+	pxor	%xmm5,%xmm3
+	pand	%xmm8,%xmm4
+	pand	%xmm8,%xmm3
+	pxor	%xmm4,%xmm6
+	psllq	$2,%xmm4
+	pxor	%xmm3,%xmm5
+	psllq	$2,%xmm3
+	pxor	%xmm9,%xmm4
+	pxor	%xmm10,%xmm3
+	movdqa	%xmm0,%xmm9
+	psrlq	$2,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$2,%xmm15
+	pxor	%xmm2,%xmm0
+	pxor	%xmm1,%xmm15
+	pand	%xmm8,%xmm0
+	pand	%xmm8,%xmm15
+	pxor	%xmm0,%xmm2
+	psllq	$2,%xmm0
+	pxor	%xmm15,%xmm1
+	psllq	$2,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	%xmm2,%xmm9
+	psrlq	$4,%xmm2
+	movdqa	%xmm1,%xmm10
+	psrlq	$4,%xmm1
+	pxor	%xmm6,%xmm2
+	pxor	%xmm5,%xmm1
+	pand	%xmm7,%xmm2
+	pand	%xmm7,%xmm1
+	pxor	%xmm2,%xmm6
+	psllq	$4,%xmm2
+	pxor	%xmm1,%xmm5
+	psllq	$4,%xmm1
+	pxor	%xmm9,%xmm2
+	pxor	%xmm10,%xmm1
+	movdqa	%xmm0,%xmm9
+	psrlq	$4,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$4,%xmm15
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm15
+	pand	%xmm7,%xmm0
+	pand	%xmm7,%xmm15
+	pxor	%xmm0,%xmm4
+	psllq	$4,%xmm0
+	pxor	%xmm15,%xmm3
+	psllq	$4,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	decl	%r10d
+	jmp	L$dec_sbox
+.p2align	4
+L$dec_loop:
+	pxor	0(%rax),%xmm15
+	pxor	16(%rax),%xmm0
+	pxor	32(%rax),%xmm1
+	pxor	48(%rax),%xmm2
+.byte	102,68,15,56,0,255
+.byte	102,15,56,0,199
+	pxor	64(%rax),%xmm3
+	pxor	80(%rax),%xmm4
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	pxor	96(%rax),%xmm5
+	pxor	112(%rax),%xmm6
+.byte	102,15,56,0,223
+.byte	102,15,56,0,231
+.byte	102,15,56,0,239
+.byte	102,15,56,0,247
+	leaq	128(%rax),%rax
+L$dec_sbox:
+	pxor	%xmm3,%xmm2
+
+	pxor	%xmm6,%xmm3
+	pxor	%xmm6,%xmm1
+	pxor	%xmm3,%xmm5
+	pxor	%xmm5,%xmm6
+	pxor	%xmm6,%xmm0
+
+	pxor	%xmm0,%xmm15
+	pxor	%xmm4,%xmm1
+	pxor	%xmm15,%xmm2
+	pxor	%xmm15,%xmm4
+	pxor	%xmm2,%xmm0
+	movdqa	%xmm2,%xmm10
+	movdqa	%xmm6,%xmm9
+	movdqa	%xmm0,%xmm8
+	movdqa	%xmm3,%xmm12
+	movdqa	%xmm4,%xmm11
+
+	pxor	%xmm15,%xmm10
+	pxor	%xmm3,%xmm9
+	pxor	%xmm5,%xmm8
+	movdqa	%xmm10,%xmm13
+	pxor	%xmm15,%xmm12
+	movdqa	%xmm9,%xmm7
+	pxor	%xmm1,%xmm11
+	movdqa	%xmm10,%xmm14
+
+	por	%xmm8,%xmm9
+	por	%xmm11,%xmm10
+	pxor	%xmm7,%xmm14
+	pand	%xmm11,%xmm13
+	pxor	%xmm8,%xmm11
+	pand	%xmm8,%xmm7
+	pand	%xmm11,%xmm14
+	movdqa	%xmm5,%xmm11
+	pxor	%xmm1,%xmm11
+	pand	%xmm11,%xmm12
+	pxor	%xmm12,%xmm10
+	pxor	%xmm12,%xmm9
+	movdqa	%xmm2,%xmm12
+	movdqa	%xmm0,%xmm11
+	pxor	%xmm6,%xmm12
+	pxor	%xmm4,%xmm11
+	movdqa	%xmm12,%xmm8
+	pand	%xmm11,%xmm12
+	por	%xmm11,%xmm8
+	pxor	%xmm12,%xmm7
+	pxor	%xmm14,%xmm10
+	pxor	%xmm13,%xmm9
+	pxor	%xmm14,%xmm8
+	movdqa	%xmm3,%xmm11
+	pxor	%xmm13,%xmm7
+	movdqa	%xmm15,%xmm12
+	pxor	%xmm13,%xmm8
+	movdqa	%xmm6,%xmm13
+	pand	%xmm5,%xmm11
+	movdqa	%xmm2,%xmm14
+	pand	%xmm1,%xmm12
+	pand	%xmm0,%xmm13
+	por	%xmm4,%xmm14
+	pxor	%xmm11,%xmm10
+	pxor	%xmm12,%xmm9
+	pxor	%xmm13,%xmm8
+	pxor	%xmm14,%xmm7
+
+
+
+
+
+	movdqa	%xmm10,%xmm11
+	pand	%xmm8,%xmm10
+	pxor	%xmm9,%xmm11
+
+	movdqa	%xmm7,%xmm13
+	movdqa	%xmm11,%xmm14
+	pxor	%xmm10,%xmm13
+	pand	%xmm13,%xmm14
+
+	movdqa	%xmm8,%xmm12
+	pxor	%xmm9,%xmm14
+	pxor	%xmm7,%xmm12
+
+	pxor	%xmm9,%xmm10
+
+	pand	%xmm10,%xmm12
+
+	movdqa	%xmm13,%xmm9
+	pxor	%xmm7,%xmm12
+
+	pxor	%xmm12,%xmm9
+	pxor	%xmm12,%xmm8
+
+	pand	%xmm7,%xmm9
+
+	pxor	%xmm9,%xmm13
+	pxor	%xmm9,%xmm8
+
+	pand	%xmm14,%xmm13
+
+	pxor	%xmm11,%xmm13
+	movdqa	%xmm4,%xmm11
+	movdqa	%xmm0,%xmm7
+	movdqa	%xmm14,%xmm9
+	pxor	%xmm13,%xmm9
+	pand	%xmm4,%xmm9
+	pxor	%xmm0,%xmm4
+	pand	%xmm14,%xmm0
+	pand	%xmm13,%xmm4
+	pxor	%xmm0,%xmm4
+	pxor	%xmm9,%xmm0
+	pxor	%xmm1,%xmm11
+	pxor	%xmm5,%xmm7
+	pxor	%xmm12,%xmm14
+	pxor	%xmm8,%xmm13
+	movdqa	%xmm14,%xmm10
+	movdqa	%xmm12,%xmm9
+	pxor	%xmm13,%xmm10
+	pxor	%xmm8,%xmm9
+	pand	%xmm11,%xmm10
+	pand	%xmm1,%xmm9
+	pxor	%xmm7,%xmm11
+	pxor	%xmm5,%xmm1
+	pand	%xmm14,%xmm7
+	pand	%xmm12,%xmm5
+	pand	%xmm13,%xmm11
+	pand	%xmm8,%xmm1
+	pxor	%xmm11,%xmm7
+	pxor	%xmm5,%xmm1
+	pxor	%xmm10,%xmm11
+	pxor	%xmm9,%xmm5
+	pxor	%xmm11,%xmm4
+	pxor	%xmm11,%xmm1
+	pxor	%xmm7,%xmm0
+	pxor	%xmm7,%xmm5
+
+	movdqa	%xmm2,%xmm11
+	movdqa	%xmm6,%xmm7
+	pxor	%xmm15,%xmm11
+	pxor	%xmm3,%xmm7
+	movdqa	%xmm14,%xmm10
+	movdqa	%xmm12,%xmm9
+	pxor	%xmm13,%xmm10
+	pxor	%xmm8,%xmm9
+	pand	%xmm11,%xmm10
+	pand	%xmm15,%xmm9
+	pxor	%xmm7,%xmm11
+	pxor	%xmm3,%xmm15
+	pand	%xmm14,%xmm7
+	pand	%xmm12,%xmm3
+	pand	%xmm13,%xmm11
+	pand	%xmm8,%xmm15
+	pxor	%xmm11,%xmm7
+	pxor	%xmm3,%xmm15
+	pxor	%xmm10,%xmm11
+	pxor	%xmm9,%xmm3
+	pxor	%xmm12,%xmm14
+	pxor	%xmm8,%xmm13
+	movdqa	%xmm14,%xmm10
+	pxor	%xmm13,%xmm10
+	pand	%xmm2,%xmm10
+	pxor	%xmm6,%xmm2
+	pand	%xmm14,%xmm6
+	pand	%xmm13,%xmm2
+	pxor	%xmm6,%xmm2
+	pxor	%xmm10,%xmm6
+	pxor	%xmm11,%xmm2
+	pxor	%xmm11,%xmm15
+	pxor	%xmm7,%xmm6
+	pxor	%xmm7,%xmm3
+	pxor	%xmm6,%xmm0
+	pxor	%xmm4,%xmm5
+
+	pxor	%xmm0,%xmm3
+	pxor	%xmm6,%xmm1
+	pxor	%xmm6,%xmm4
+	pxor	%xmm1,%xmm3
+	pxor	%xmm15,%xmm6
+	pxor	%xmm4,%xmm3
+	pxor	%xmm5,%xmm2
+	pxor	%xmm0,%xmm5
+	pxor	%xmm3,%xmm2
+
+	pxor	%xmm15,%xmm3
+	pxor	%xmm2,%xmm6
+	decl	%r10d
+	jl	L$dec_done
+
+	pshufd	$0x4E,%xmm15,%xmm7
+	pshufd	$0x4E,%xmm2,%xmm13
+	pxor	%xmm15,%xmm7
+	pshufd	$0x4E,%xmm4,%xmm14
+	pxor	%xmm2,%xmm13
+	pshufd	$0x4E,%xmm0,%xmm8
+	pxor	%xmm4,%xmm14
+	pshufd	$0x4E,%xmm5,%xmm9
+	pxor	%xmm0,%xmm8
+	pshufd	$0x4E,%xmm3,%xmm10
+	pxor	%xmm5,%xmm9
+	pxor	%xmm13,%xmm15
+	pxor	%xmm13,%xmm0
+	pshufd	$0x4E,%xmm1,%xmm11
+	pxor	%xmm3,%xmm10
+	pxor	%xmm7,%xmm5
+	pxor	%xmm8,%xmm3
+	pshufd	$0x4E,%xmm6,%xmm12
+	pxor	%xmm1,%xmm11
+	pxor	%xmm14,%xmm0
+	pxor	%xmm9,%xmm1
+	pxor	%xmm6,%xmm12
+
+	pxor	%xmm14,%xmm5
+	pxor	%xmm13,%xmm3
+	pxor	%xmm13,%xmm1
+	pxor	%xmm10,%xmm6
+	pxor	%xmm11,%xmm2
+	pxor	%xmm14,%xmm1
+	pxor	%xmm14,%xmm6
+	pxor	%xmm12,%xmm4
+	pshufd	$0x93,%xmm15,%xmm7
+	pshufd	$0x93,%xmm0,%xmm8
+	pxor	%xmm7,%xmm15
+	pshufd	$0x93,%xmm5,%xmm9
+	pxor	%xmm8,%xmm0
+	pshufd	$0x93,%xmm3,%xmm10
+	pxor	%xmm9,%xmm5
+	pshufd	$0x93,%xmm1,%xmm11
+	pxor	%xmm10,%xmm3
+	pshufd	$0x93,%xmm6,%xmm12
+	pxor	%xmm11,%xmm1
+	pshufd	$0x93,%xmm2,%xmm13
+	pxor	%xmm12,%xmm6
+	pshufd	$0x93,%xmm4,%xmm14
+	pxor	%xmm13,%xmm2
+	pxor	%xmm14,%xmm4
+
+	pxor	%xmm15,%xmm8
+	pxor	%xmm4,%xmm7
+	pxor	%xmm4,%xmm8
+	pshufd	$0x4E,%xmm15,%xmm15
+	pxor	%xmm0,%xmm9
+	pshufd	$0x4E,%xmm0,%xmm0
+	pxor	%xmm1,%xmm12
+	pxor	%xmm7,%xmm15
+	pxor	%xmm6,%xmm13
+	pxor	%xmm8,%xmm0
+	pxor	%xmm3,%xmm11
+	pshufd	$0x4E,%xmm1,%xmm7
+	pxor	%xmm2,%xmm14
+	pshufd	$0x4E,%xmm6,%xmm8
+	pxor	%xmm5,%xmm10
+	pshufd	$0x4E,%xmm3,%xmm1
+	pxor	%xmm4,%xmm10
+	pshufd	$0x4E,%xmm4,%xmm6
+	pxor	%xmm4,%xmm11
+	pshufd	$0x4E,%xmm2,%xmm3
+	pxor	%xmm11,%xmm7
+	pshufd	$0x4E,%xmm5,%xmm2
+	pxor	%xmm12,%xmm8
+	pxor	%xmm1,%xmm10
+	pxor	%xmm14,%xmm6
+	pxor	%xmm3,%xmm13
+	movdqa	%xmm7,%xmm3
+	pxor	%xmm9,%xmm2
+	movdqa	%xmm13,%xmm5
+	movdqa	%xmm8,%xmm4
+	movdqa	%xmm2,%xmm1
+	movdqa	%xmm10,%xmm2
+	movdqa	-16(%r11),%xmm7
+	jnz	L$dec_loop
+	movdqa	-32(%r11),%xmm7
+	jmp	L$dec_loop
+.p2align	4
+L$dec_done:
+	movdqa	0(%r11),%xmm7
+	movdqa	16(%r11),%xmm8
+	movdqa	%xmm2,%xmm9
+	psrlq	$1,%xmm2
+	movdqa	%xmm1,%xmm10
+	psrlq	$1,%xmm1
+	pxor	%xmm4,%xmm2
+	pxor	%xmm6,%xmm1
+	pand	%xmm7,%xmm2
+	pand	%xmm7,%xmm1
+	pxor	%xmm2,%xmm4
+	psllq	$1,%xmm2
+	pxor	%xmm1,%xmm6
+	psllq	$1,%xmm1
+	pxor	%xmm9,%xmm2
+	pxor	%xmm10,%xmm1
+	movdqa	%xmm5,%xmm9
+	psrlq	$1,%xmm5
+	movdqa	%xmm15,%xmm10
+	psrlq	$1,%xmm15
+	pxor	%xmm3,%xmm5
+	pxor	%xmm0,%xmm15
+	pand	%xmm7,%xmm5
+	pand	%xmm7,%xmm15
+	pxor	%xmm5,%xmm3
+	psllq	$1,%xmm5
+	pxor	%xmm15,%xmm0
+	psllq	$1,%xmm15
+	pxor	%xmm9,%xmm5
+	pxor	%xmm10,%xmm15
+	movdqa	32(%r11),%xmm7
+	movdqa	%xmm6,%xmm9
+	psrlq	$2,%xmm6
+	movdqa	%xmm1,%xmm10
+	psrlq	$2,%xmm1
+	pxor	%xmm4,%xmm6
+	pxor	%xmm2,%xmm1
+	pand	%xmm8,%xmm6
+	pand	%xmm8,%xmm1
+	pxor	%xmm6,%xmm4
+	psllq	$2,%xmm6
+	pxor	%xmm1,%xmm2
+	psllq	$2,%xmm1
+	pxor	%xmm9,%xmm6
+	pxor	%xmm10,%xmm1
+	movdqa	%xmm0,%xmm9
+	psrlq	$2,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$2,%xmm15
+	pxor	%xmm3,%xmm0
+	pxor	%xmm5,%xmm15
+	pand	%xmm8,%xmm0
+	pand	%xmm8,%xmm15
+	pxor	%xmm0,%xmm3
+	psllq	$2,%xmm0
+	pxor	%xmm15,%xmm5
+	psllq	$2,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	%xmm3,%xmm9
+	psrlq	$4,%xmm3
+	movdqa	%xmm5,%xmm10
+	psrlq	$4,%xmm5
+	pxor	%xmm4,%xmm3
+	pxor	%xmm2,%xmm5
+	pand	%xmm7,%xmm3
+	pand	%xmm7,%xmm5
+	pxor	%xmm3,%xmm4
+	psllq	$4,%xmm3
+	pxor	%xmm5,%xmm2
+	psllq	$4,%xmm5
+	pxor	%xmm9,%xmm3
+	pxor	%xmm10,%xmm5
+	movdqa	%xmm0,%xmm9
+	psrlq	$4,%xmm0
+	movdqa	%xmm15,%xmm10
+	psrlq	$4,%xmm15
+	pxor	%xmm6,%xmm0
+	pxor	%xmm1,%xmm15
+	pand	%xmm7,%xmm0
+	pand	%xmm7,%xmm15
+	pxor	%xmm0,%xmm6
+	psllq	$4,%xmm0
+	pxor	%xmm15,%xmm1
+	psllq	$4,%xmm15
+	pxor	%xmm9,%xmm0
+	pxor	%xmm10,%xmm15
+	movdqa	(%rax),%xmm7
+	pxor	%xmm7,%xmm5
+	pxor	%xmm7,%xmm3
+	pxor	%xmm7,%xmm1
+	pxor	%xmm7,%xmm6
+	pxor	%xmm7,%xmm2
+	pxor	%xmm7,%xmm4
+	pxor	%xmm7,%xmm15
+	pxor	%xmm7,%xmm0
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+_bsaes_key_convert:
+	leaq	L$masks(%rip),%r11
+	movdqu	(%rcx),%xmm7
+	leaq	16(%rcx),%rcx
+	movdqa	0(%r11),%xmm0
+	movdqa	16(%r11),%xmm1
+	movdqa	32(%r11),%xmm2
+	movdqa	48(%r11),%xmm3
+	movdqa	64(%r11),%xmm4
+	pcmpeqd	%xmm5,%xmm5
+
+	movdqu	(%rcx),%xmm6
+	movdqa	%xmm7,(%rax)
+	leaq	16(%rax),%rax
+	decl	%r10d
+	jmp	L$key_loop
+.p2align	4
+L$key_loop:
+.byte	102,15,56,0,244
+
+	movdqa	%xmm0,%xmm8
+	movdqa	%xmm1,%xmm9
+
+	pand	%xmm6,%xmm8
+	pand	%xmm6,%xmm9
+	movdqa	%xmm2,%xmm10
+	pcmpeqb	%xmm0,%xmm8
+	psllq	$4,%xmm0
+	movdqa	%xmm3,%xmm11
+	pcmpeqb	%xmm1,%xmm9
+	psllq	$4,%xmm1
+
+	pand	%xmm6,%xmm10
+	pand	%xmm6,%xmm11
+	movdqa	%xmm0,%xmm12
+	pcmpeqb	%xmm2,%xmm10
+	psllq	$4,%xmm2
+	movdqa	%xmm1,%xmm13
+	pcmpeqb	%xmm3,%xmm11
+	psllq	$4,%xmm3
+
+	movdqa	%xmm2,%xmm14
+	movdqa	%xmm3,%xmm15
+	pxor	%xmm5,%xmm8
+	pxor	%xmm5,%xmm9
+
+	pand	%xmm6,%xmm12
+	pand	%xmm6,%xmm13
+	movdqa	%xmm8,0(%rax)
+	pcmpeqb	%xmm0,%xmm12
+	psrlq	$4,%xmm0
+	movdqa	%xmm9,16(%rax)
+	pcmpeqb	%xmm1,%xmm13
+	psrlq	$4,%xmm1
+	leaq	16(%rcx),%rcx
+
+	pand	%xmm6,%xmm14
+	pand	%xmm6,%xmm15
+	movdqa	%xmm10,32(%rax)
+	pcmpeqb	%xmm2,%xmm14
+	psrlq	$4,%xmm2
+	movdqa	%xmm11,48(%rax)
+	pcmpeqb	%xmm3,%xmm15
+	psrlq	$4,%xmm3
+	movdqu	(%rcx),%xmm6
+
+	pxor	%xmm5,%xmm13
+	pxor	%xmm5,%xmm14
+	movdqa	%xmm12,64(%rax)
+	movdqa	%xmm13,80(%rax)
+	movdqa	%xmm14,96(%rax)
+	movdqa	%xmm15,112(%rax)
+	leaq	128(%rax),%rax
+	decl	%r10d
+	jnz	L$key_loop
+
+	movdqa	80(%r11),%xmm7
+
+	.byte	0xf3,0xc3
+
+
+.globl	_bsaes_cbc_encrypt
+.private_extern _bsaes_cbc_encrypt
+
+.p2align	4
+_bsaes_cbc_encrypt:
+	cmpl	$0,%r9d
+	jne	_asm_AES_cbc_encrypt
+	cmpq	$128,%rdx
+	jb	_asm_AES_cbc_encrypt
+
+	movq	%rsp,%rax
+L$cbc_dec_prologue:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	leaq	-72(%rsp),%rsp
+	movq	%rsp,%rbp
+	movl	240(%rcx),%eax
+	movq	%rdi,%r12
+	movq	%rsi,%r13
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+	movq	%r8,%rbx
+	shrq	$4,%r14
+
+	movl	%eax,%edx
+	shlq	$7,%rax
+	subq	$96,%rax
+	subq	%rax,%rsp
+
+	movq	%rsp,%rax
+	movq	%r15,%rcx
+	movl	%edx,%r10d
+	call	_bsaes_key_convert
+	pxor	(%rsp),%xmm7
+	movdqa	%xmm6,(%rax)
+	movdqa	%xmm7,(%rsp)
+
+	movdqu	(%rbx),%xmm14
+	subq	$8,%r14
+L$cbc_dec_loop:
+	movdqu	0(%r12),%xmm15
+	movdqu	16(%r12),%xmm0
+	movdqu	32(%r12),%xmm1
+	movdqu	48(%r12),%xmm2
+	movdqu	64(%r12),%xmm3
+	movdqu	80(%r12),%xmm4
+	movq	%rsp,%rax
+	movdqu	96(%r12),%xmm5
+	movl	%edx,%r10d
+	movdqu	112(%r12),%xmm6
+	movdqa	%xmm14,32(%rbp)
+
+	call	_bsaes_decrypt8
+
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm5
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm3
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm10,%xmm1
+	movdqu	80(%r12),%xmm12
+	pxor	%xmm11,%xmm6
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm12,%xmm2
+	movdqu	112(%r12),%xmm14
+	pxor	%xmm13,%xmm4
+	movdqu	%xmm15,0(%r13)
+	leaq	128(%r12),%r12
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	movdqu	%xmm1,64(%r13)
+	movdqu	%xmm6,80(%r13)
+	movdqu	%xmm2,96(%r13)
+	movdqu	%xmm4,112(%r13)
+	leaq	128(%r13),%r13
+	subq	$8,%r14
+	jnc	L$cbc_dec_loop
+
+	addq	$8,%r14
+	jz	L$cbc_dec_done
+
+	movdqu	0(%r12),%xmm15
+	movq	%rsp,%rax
+	movl	%edx,%r10d
+	cmpq	$2,%r14
+	jb	L$cbc_dec_one
+	movdqu	16(%r12),%xmm0
+	je	L$cbc_dec_two
+	movdqu	32(%r12),%xmm1
+	cmpq	$4,%r14
+	jb	L$cbc_dec_three
+	movdqu	48(%r12),%xmm2
+	je	L$cbc_dec_four
+	movdqu	64(%r12),%xmm3
+	cmpq	$6,%r14
+	jb	L$cbc_dec_five
+	movdqu	80(%r12),%xmm4
+	je	L$cbc_dec_six
+	movdqu	96(%r12),%xmm5
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm5
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm3
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm10,%xmm1
+	movdqu	80(%r12),%xmm12
+	pxor	%xmm11,%xmm6
+	movdqu	96(%r12),%xmm14
+	pxor	%xmm12,%xmm2
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	movdqu	%xmm1,64(%r13)
+	movdqu	%xmm6,80(%r13)
+	movdqu	%xmm2,96(%r13)
+	jmp	L$cbc_dec_done
+.p2align	4
+L$cbc_dec_six:
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm5
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm3
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm10,%xmm1
+	movdqu	80(%r12),%xmm14
+	pxor	%xmm11,%xmm6
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	movdqu	%xmm1,64(%r13)
+	movdqu	%xmm6,80(%r13)
+	jmp	L$cbc_dec_done
+.p2align	4
+L$cbc_dec_five:
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm5
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm3
+	movdqu	64(%r12),%xmm14
+	pxor	%xmm10,%xmm1
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	movdqu	%xmm1,64(%r13)
+	jmp	L$cbc_dec_done
+.p2align	4
+L$cbc_dec_four:
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm5
+	movdqu	48(%r12),%xmm14
+	pxor	%xmm9,%xmm3
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	jmp	L$cbc_dec_done
+.p2align	4
+L$cbc_dec_three:
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm0
+	movdqu	32(%r12),%xmm14
+	pxor	%xmm8,%xmm5
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	jmp	L$cbc_dec_done
+.p2align	4
+L$cbc_dec_two:
+	movdqa	%xmm14,32(%rbp)
+	call	_bsaes_decrypt8
+	pxor	32(%rbp),%xmm15
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm14
+	pxor	%xmm7,%xmm0
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	jmp	L$cbc_dec_done
+.p2align	4
+L$cbc_dec_one:
+	leaq	(%r12),%rdi
+	leaq	32(%rbp),%rsi
+	leaq	(%r15),%rdx
+	call	_asm_AES_decrypt
+	pxor	32(%rbp),%xmm14
+	movdqu	%xmm14,(%r13)
+	movdqa	%xmm15,%xmm14
+
+L$cbc_dec_done:
+	movdqu	%xmm14,(%rbx)
+	leaq	(%rsp),%rax
+	pxor	%xmm0,%xmm0
+L$cbc_dec_bzero:
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm0,16(%rax)
+	leaq	32(%rax),%rax
+	cmpq	%rax,%rbp
+	ja	L$cbc_dec_bzero
+
+	leaq	120(%rbp),%rax
+	movq	-48(%rax),%r15
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbx
+	movq	-8(%rax),%rbp
+	leaq	(%rax),%rsp
+L$cbc_dec_epilogue:
+	.byte	0xf3,0xc3
+
+
+.globl	_bsaes_ctr32_encrypt_blocks
+.private_extern _bsaes_ctr32_encrypt_blocks
+
+.p2align	4
+_bsaes_ctr32_encrypt_blocks:
+	movq	%rsp,%rax
+L$ctr_enc_prologue:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	leaq	-72(%rsp),%rsp
+	movq	%rsp,%rbp
+	movdqu	(%r8),%xmm0
+	movl	240(%rcx),%eax
+	movq	%rdi,%r12
+	movq	%rsi,%r13
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+	movdqa	%xmm0,32(%rbp)
+	cmpq	$8,%rdx
+	jb	L$ctr_enc_short
+
+	movl	%eax,%ebx
+	shlq	$7,%rax
+	subq	$96,%rax
+	subq	%rax,%rsp
+
+	movq	%rsp,%rax
+	movq	%r15,%rcx
+	movl	%ebx,%r10d
+	call	_bsaes_key_convert
+	pxor	%xmm6,%xmm7
+	movdqa	%xmm7,(%rax)
+
+	movdqa	(%rsp),%xmm8
+	leaq	L$ADD1(%rip),%r11
+	movdqa	32(%rbp),%xmm15
+	movdqa	-32(%r11),%xmm7
+.byte	102,68,15,56,0,199
+.byte	102,68,15,56,0,255
+	movdqa	%xmm8,(%rsp)
+	jmp	L$ctr_enc_loop
+.p2align	4
+L$ctr_enc_loop:
+	movdqa	%xmm15,32(%rbp)
+	movdqa	%xmm15,%xmm0
+	movdqa	%xmm15,%xmm1
+	paddd	0(%r11),%xmm0
+	movdqa	%xmm15,%xmm2
+	paddd	16(%r11),%xmm1
+	movdqa	%xmm15,%xmm3
+	paddd	32(%r11),%xmm2
+	movdqa	%xmm15,%xmm4
+	paddd	48(%r11),%xmm3
+	movdqa	%xmm15,%xmm5
+	paddd	64(%r11),%xmm4
+	movdqa	%xmm15,%xmm6
+	paddd	80(%r11),%xmm5
+	paddd	96(%r11),%xmm6
+
+
+
+	movdqa	(%rsp),%xmm8
+	leaq	16(%rsp),%rax
+	movdqa	-16(%r11),%xmm7
+	pxor	%xmm8,%xmm15
+	pxor	%xmm8,%xmm0
+	pxor	%xmm8,%xmm1
+	pxor	%xmm8,%xmm2
+.byte	102,68,15,56,0,255
+.byte	102,15,56,0,199
+	pxor	%xmm8,%xmm3
+	pxor	%xmm8,%xmm4
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	pxor	%xmm8,%xmm5
+	pxor	%xmm8,%xmm6
+.byte	102,15,56,0,223
+.byte	102,15,56,0,231
+.byte	102,15,56,0,239
+.byte	102,15,56,0,247
+	leaq	L$BS0(%rip),%r11
+	movl	%ebx,%r10d
+
+	call	_bsaes_encrypt8_bitslice
+
+	subq	$8,%r14
+	jc	L$ctr_enc_loop_done
+
+	movdqu	0(%r12),%xmm7
+	movdqu	16(%r12),%xmm8
+	movdqu	32(%r12),%xmm9
+	movdqu	48(%r12),%xmm10
+	movdqu	64(%r12),%xmm11
+	movdqu	80(%r12),%xmm12
+	movdqu	96(%r12),%xmm13
+	movdqu	112(%r12),%xmm14
+	leaq	128(%r12),%r12
+	pxor	%xmm15,%xmm7
+	movdqa	32(%rbp),%xmm15
+	pxor	%xmm8,%xmm0
+	movdqu	%xmm7,0(%r13)
+	pxor	%xmm9,%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	%xmm10,%xmm5
+	movdqu	%xmm3,32(%r13)
+	pxor	%xmm11,%xmm2
+	movdqu	%xmm5,48(%r13)
+	pxor	%xmm12,%xmm6
+	movdqu	%xmm2,64(%r13)
+	pxor	%xmm13,%xmm1
+	movdqu	%xmm6,80(%r13)
+	pxor	%xmm14,%xmm4
+	movdqu	%xmm1,96(%r13)
+	leaq	L$ADD1(%rip),%r11
+	movdqu	%xmm4,112(%r13)
+	leaq	128(%r13),%r13
+	paddd	112(%r11),%xmm15
+	jnz	L$ctr_enc_loop
+
+	jmp	L$ctr_enc_done
+.p2align	4
+L$ctr_enc_loop_done:
+	addq	$8,%r14
+	movdqu	0(%r12),%xmm7
+	pxor	%xmm7,%xmm15
+	movdqu	%xmm15,0(%r13)
+	cmpq	$2,%r14
+	jb	L$ctr_enc_done
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm8,%xmm0
+	movdqu	%xmm0,16(%r13)
+	je	L$ctr_enc_done
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm9,%xmm3
+	movdqu	%xmm3,32(%r13)
+	cmpq	$4,%r14
+	jb	L$ctr_enc_done
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm10,%xmm5
+	movdqu	%xmm5,48(%r13)
+	je	L$ctr_enc_done
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm11,%xmm2
+	movdqu	%xmm2,64(%r13)
+	cmpq	$6,%r14
+	jb	L$ctr_enc_done
+	movdqu	80(%r12),%xmm12
+	pxor	%xmm12,%xmm6
+	movdqu	%xmm6,80(%r13)
+	je	L$ctr_enc_done
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm13,%xmm1
+	movdqu	%xmm1,96(%r13)
+	jmp	L$ctr_enc_done
+
+.p2align	4
+L$ctr_enc_short:
+	leaq	32(%rbp),%rdi
+	leaq	48(%rbp),%rsi
+	leaq	(%r15),%rdx
+	call	_asm_AES_encrypt
+	movdqu	(%r12),%xmm0
+	leaq	16(%r12),%r12
+	movl	44(%rbp),%eax
+	bswapl	%eax
+	pxor	48(%rbp),%xmm0
+	incl	%eax
+	movdqu	%xmm0,(%r13)
+	bswapl	%eax
+	leaq	16(%r13),%r13
+	movl	%eax,44(%rsp)
+	decq	%r14
+	jnz	L$ctr_enc_short
+
+L$ctr_enc_done:
+	leaq	(%rsp),%rax
+	pxor	%xmm0,%xmm0
+L$ctr_enc_bzero:
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm0,16(%rax)
+	leaq	32(%rax),%rax
+	cmpq	%rax,%rbp
+	ja	L$ctr_enc_bzero
+
+	leaq	120(%rbp),%rax
+	movq	-48(%rax),%r15
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbx
+	movq	-8(%rax),%rbp
+	leaq	(%rax),%rsp
+L$ctr_enc_epilogue:
+	.byte	0xf3,0xc3
+
+.globl	_bsaes_xts_encrypt
+.private_extern _bsaes_xts_encrypt
+
+.p2align	4
+_bsaes_xts_encrypt:
+	movq	%rsp,%rax
+L$xts_enc_prologue:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	leaq	-72(%rsp),%rsp
+	movq	%rsp,%rbp
+	movq	%rdi,%r12
+	movq	%rsi,%r13
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+
+	leaq	(%r9),%rdi
+	leaq	32(%rbp),%rsi
+	leaq	(%r8),%rdx
+	call	_asm_AES_encrypt
+
+	movl	240(%r15),%eax
+	movq	%r14,%rbx
+
+	movl	%eax,%edx
+	shlq	$7,%rax
+	subq	$96,%rax
+	subq	%rax,%rsp
+
+	movq	%rsp,%rax
+	movq	%r15,%rcx
+	movl	%edx,%r10d
+	call	_bsaes_key_convert
+	pxor	%xmm6,%xmm7
+	movdqa	%xmm7,(%rax)
+
+	andq	$-16,%r14
+	subq	$0x80,%rsp
+	movdqa	32(%rbp),%xmm6
+
+	pxor	%xmm14,%xmm14
+	movdqa	L$xts_magic(%rip),%xmm12
+	pcmpgtd	%xmm6,%xmm14
+
+	subq	$0x80,%r14
+	jc	L$xts_enc_short
+	jmp	L$xts_enc_loop
+
+.p2align	4
+L$xts_enc_loop:
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm15
+	movdqa	%xmm6,0(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm0
+	movdqa	%xmm6,16(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	0(%r12),%xmm7
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm1
+	movdqa	%xmm6,32(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm15
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm2
+	movdqa	%xmm6,48(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm0
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm3
+	movdqa	%xmm6,64(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm4
+	movdqa	%xmm6,80(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm10,%xmm2
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm5
+	movdqa	%xmm6,96(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	80(%r12),%xmm12
+	pxor	%xmm11,%xmm3
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm12,%xmm4
+	movdqu	112(%r12),%xmm14
+	leaq	128(%r12),%r12
+	movdqa	%xmm6,112(%rsp)
+	pxor	%xmm13,%xmm5
+	leaq	128(%rsp),%rax
+	pxor	%xmm14,%xmm6
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm5
+	movdqu	%xmm3,32(%r13)
+	pxor	64(%rsp),%xmm2
+	movdqu	%xmm5,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm2,64(%r13)
+	pxor	96(%rsp),%xmm1
+	movdqu	%xmm6,80(%r13)
+	pxor	112(%rsp),%xmm4
+	movdqu	%xmm1,96(%r13)
+	movdqu	%xmm4,112(%r13)
+	leaq	128(%r13),%r13
+
+	movdqa	112(%rsp),%xmm6
+	pxor	%xmm14,%xmm14
+	movdqa	L$xts_magic(%rip),%xmm12
+	pcmpgtd	%xmm6,%xmm14
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+
+	subq	$0x80,%r14
+	jnc	L$xts_enc_loop
+
+L$xts_enc_short:
+	addq	$0x80,%r14
+	jz	L$xts_enc_done
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm15
+	movdqa	%xmm6,0(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm0
+	movdqa	%xmm6,16(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	0(%r12),%xmm7
+	cmpq	$16,%r14
+	je	L$xts_enc_1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm1
+	movdqa	%xmm6,32(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	16(%r12),%xmm8
+	cmpq	$32,%r14
+	je	L$xts_enc_2
+	pxor	%xmm7,%xmm15
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm2
+	movdqa	%xmm6,48(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	32(%r12),%xmm9
+	cmpq	$48,%r14
+	je	L$xts_enc_3
+	pxor	%xmm8,%xmm0
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm3
+	movdqa	%xmm6,64(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	48(%r12),%xmm10
+	cmpq	$64,%r14
+	je	L$xts_enc_4
+	pxor	%xmm9,%xmm1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm4
+	movdqa	%xmm6,80(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	64(%r12),%xmm11
+	cmpq	$80,%r14
+	je	L$xts_enc_5
+	pxor	%xmm10,%xmm2
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm5
+	movdqa	%xmm6,96(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	80(%r12),%xmm12
+	cmpq	$96,%r14
+	je	L$xts_enc_6
+	pxor	%xmm11,%xmm3
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm12,%xmm4
+	movdqa	%xmm6,112(%rsp)
+	leaq	112(%r12),%r12
+	pxor	%xmm13,%xmm5
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm5
+	movdqu	%xmm3,32(%r13)
+	pxor	64(%rsp),%xmm2
+	movdqu	%xmm5,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm2,64(%r13)
+	pxor	96(%rsp),%xmm1
+	movdqu	%xmm6,80(%r13)
+	movdqu	%xmm1,96(%r13)
+	leaq	112(%r13),%r13
+
+	movdqa	112(%rsp),%xmm6
+	jmp	L$xts_enc_done
+.p2align	4
+L$xts_enc_6:
+	pxor	%xmm11,%xmm3
+	leaq	96(%r12),%r12
+	pxor	%xmm12,%xmm4
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm5
+	movdqu	%xmm3,32(%r13)
+	pxor	64(%rsp),%xmm2
+	movdqu	%xmm5,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm2,64(%r13)
+	movdqu	%xmm6,80(%r13)
+	leaq	96(%r13),%r13
+
+	movdqa	96(%rsp),%xmm6
+	jmp	L$xts_enc_done
+.p2align	4
+L$xts_enc_5:
+	pxor	%xmm10,%xmm2
+	leaq	80(%r12),%r12
+	pxor	%xmm11,%xmm3
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm5
+	movdqu	%xmm3,32(%r13)
+	pxor	64(%rsp),%xmm2
+	movdqu	%xmm5,48(%r13)
+	movdqu	%xmm2,64(%r13)
+	leaq	80(%r13),%r13
+
+	movdqa	80(%rsp),%xmm6
+	jmp	L$xts_enc_done
+.p2align	4
+L$xts_enc_4:
+	pxor	%xmm9,%xmm1
+	leaq	64(%r12),%r12
+	pxor	%xmm10,%xmm2
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm5
+	movdqu	%xmm3,32(%r13)
+	movdqu	%xmm5,48(%r13)
+	leaq	64(%r13),%r13
+
+	movdqa	64(%rsp),%xmm6
+	jmp	L$xts_enc_done
+.p2align	4
+L$xts_enc_3:
+	pxor	%xmm8,%xmm0
+	leaq	48(%r12),%r12
+	pxor	%xmm9,%xmm1
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm3
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm3,32(%r13)
+	leaq	48(%r13),%r13
+
+	movdqa	48(%rsp),%xmm6
+	jmp	L$xts_enc_done
+.p2align	4
+L$xts_enc_2:
+	pxor	%xmm7,%xmm15
+	leaq	32(%r12),%r12
+	pxor	%xmm8,%xmm0
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_encrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	leaq	32(%r13),%r13
+
+	movdqa	32(%rsp),%xmm6
+	jmp	L$xts_enc_done
+.p2align	4
+L$xts_enc_1:
+	pxor	%xmm15,%xmm7
+	leaq	16(%r12),%r12
+	movdqa	%xmm7,32(%rbp)
+	leaq	32(%rbp),%rdi
+	leaq	32(%rbp),%rsi
+	leaq	(%r15),%rdx
+	call	_asm_AES_encrypt
+	pxor	32(%rbp),%xmm15
+
+
+
+
+
+	movdqu	%xmm15,0(%r13)
+	leaq	16(%r13),%r13
+
+	movdqa	16(%rsp),%xmm6
+
+L$xts_enc_done:
+	andl	$15,%ebx
+	jz	L$xts_enc_ret
+	movq	%r13,%rdx
+
+L$xts_enc_steal:
+	movzbl	(%r12),%eax
+	movzbl	-16(%rdx),%ecx
+	leaq	1(%r12),%r12
+	movb	%al,-16(%rdx)
+	movb	%cl,0(%rdx)
+	leaq	1(%rdx),%rdx
+	subl	$1,%ebx
+	jnz	L$xts_enc_steal
+
+	movdqu	-16(%r13),%xmm15
+	leaq	32(%rbp),%rdi
+	pxor	%xmm6,%xmm15
+	leaq	32(%rbp),%rsi
+	movdqa	%xmm15,32(%rbp)
+	leaq	(%r15),%rdx
+	call	_asm_AES_encrypt
+	pxor	32(%rbp),%xmm6
+	movdqu	%xmm6,-16(%r13)
+
+L$xts_enc_ret:
+	leaq	(%rsp),%rax
+	pxor	%xmm0,%xmm0
+L$xts_enc_bzero:
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm0,16(%rax)
+	leaq	32(%rax),%rax
+	cmpq	%rax,%rbp
+	ja	L$xts_enc_bzero
+
+	leaq	120(%rbp),%rax
+	movq	-48(%rax),%r15
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbx
+	movq	-8(%rax),%rbp
+	leaq	(%rax),%rsp
+L$xts_enc_epilogue:
+	.byte	0xf3,0xc3
+
+
+.globl	_bsaes_xts_decrypt
+.private_extern _bsaes_xts_decrypt
+
+.p2align	4
+_bsaes_xts_decrypt:
+	movq	%rsp,%rax
+L$xts_dec_prologue:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	leaq	-72(%rsp),%rsp
+	movq	%rsp,%rbp
+	movq	%rdi,%r12
+	movq	%rsi,%r13
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+
+	leaq	(%r9),%rdi
+	leaq	32(%rbp),%rsi
+	leaq	(%r8),%rdx
+	call	_asm_AES_encrypt
+
+	movl	240(%r15),%eax
+	movq	%r14,%rbx
+
+	movl	%eax,%edx
+	shlq	$7,%rax
+	subq	$96,%rax
+	subq	%rax,%rsp
+
+	movq	%rsp,%rax
+	movq	%r15,%rcx
+	movl	%edx,%r10d
+	call	_bsaes_key_convert
+	pxor	(%rsp),%xmm7
+	movdqa	%xmm6,(%rax)
+	movdqa	%xmm7,(%rsp)
+
+	xorl	%eax,%eax
+	andq	$-16,%r14
+	testl	$15,%ebx
+	setnz	%al
+	shlq	$4,%rax
+	subq	%rax,%r14
+
+	subq	$0x80,%rsp
+	movdqa	32(%rbp),%xmm6
+
+	pxor	%xmm14,%xmm14
+	movdqa	L$xts_magic(%rip),%xmm12
+	pcmpgtd	%xmm6,%xmm14
+
+	subq	$0x80,%r14
+	jc	L$xts_dec_short
+	jmp	L$xts_dec_loop
+
+.p2align	4
+L$xts_dec_loop:
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm15
+	movdqa	%xmm6,0(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm0
+	movdqa	%xmm6,16(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	0(%r12),%xmm7
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm1
+	movdqa	%xmm6,32(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	16(%r12),%xmm8
+	pxor	%xmm7,%xmm15
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm2
+	movdqa	%xmm6,48(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	32(%r12),%xmm9
+	pxor	%xmm8,%xmm0
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm3
+	movdqa	%xmm6,64(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	48(%r12),%xmm10
+	pxor	%xmm9,%xmm1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm4
+	movdqa	%xmm6,80(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	64(%r12),%xmm11
+	pxor	%xmm10,%xmm2
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm5
+	movdqa	%xmm6,96(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	80(%r12),%xmm12
+	pxor	%xmm11,%xmm3
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm12,%xmm4
+	movdqu	112(%r12),%xmm14
+	leaq	128(%r12),%r12
+	movdqa	%xmm6,112(%rsp)
+	pxor	%xmm13,%xmm5
+	leaq	128(%rsp),%rax
+	pxor	%xmm14,%xmm6
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm3
+	movdqu	%xmm5,32(%r13)
+	pxor	64(%rsp),%xmm1
+	movdqu	%xmm3,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm1,64(%r13)
+	pxor	96(%rsp),%xmm2
+	movdqu	%xmm6,80(%r13)
+	pxor	112(%rsp),%xmm4
+	movdqu	%xmm2,96(%r13)
+	movdqu	%xmm4,112(%r13)
+	leaq	128(%r13),%r13
+
+	movdqa	112(%rsp),%xmm6
+	pxor	%xmm14,%xmm14
+	movdqa	L$xts_magic(%rip),%xmm12
+	pcmpgtd	%xmm6,%xmm14
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+
+	subq	$0x80,%r14
+	jnc	L$xts_dec_loop
+
+L$xts_dec_short:
+	addq	$0x80,%r14
+	jz	L$xts_dec_done
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm15
+	movdqa	%xmm6,0(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm0
+	movdqa	%xmm6,16(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	0(%r12),%xmm7
+	cmpq	$16,%r14
+	je	L$xts_dec_1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm1
+	movdqa	%xmm6,32(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	16(%r12),%xmm8
+	cmpq	$32,%r14
+	je	L$xts_dec_2
+	pxor	%xmm7,%xmm15
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm2
+	movdqa	%xmm6,48(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	32(%r12),%xmm9
+	cmpq	$48,%r14
+	je	L$xts_dec_3
+	pxor	%xmm8,%xmm0
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm3
+	movdqa	%xmm6,64(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	48(%r12),%xmm10
+	cmpq	$64,%r14
+	je	L$xts_dec_4
+	pxor	%xmm9,%xmm1
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm4
+	movdqa	%xmm6,80(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	64(%r12),%xmm11
+	cmpq	$80,%r14
+	je	L$xts_dec_5
+	pxor	%xmm10,%xmm2
+	pshufd	$0x13,%xmm14,%xmm13
+	pxor	%xmm14,%xmm14
+	movdqa	%xmm6,%xmm5
+	movdqa	%xmm6,96(%rsp)
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	pcmpgtd	%xmm6,%xmm14
+	pxor	%xmm13,%xmm6
+	movdqu	80(%r12),%xmm12
+	cmpq	$96,%r14
+	je	L$xts_dec_6
+	pxor	%xmm11,%xmm3
+	movdqu	96(%r12),%xmm13
+	pxor	%xmm12,%xmm4
+	movdqa	%xmm6,112(%rsp)
+	leaq	112(%r12),%r12
+	pxor	%xmm13,%xmm5
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm3
+	movdqu	%xmm5,32(%r13)
+	pxor	64(%rsp),%xmm1
+	movdqu	%xmm3,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm1,64(%r13)
+	pxor	96(%rsp),%xmm2
+	movdqu	%xmm6,80(%r13)
+	movdqu	%xmm2,96(%r13)
+	leaq	112(%r13),%r13
+
+	movdqa	112(%rsp),%xmm6
+	jmp	L$xts_dec_done
+.p2align	4
+L$xts_dec_6:
+	pxor	%xmm11,%xmm3
+	leaq	96(%r12),%r12
+	pxor	%xmm12,%xmm4
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm3
+	movdqu	%xmm5,32(%r13)
+	pxor	64(%rsp),%xmm1
+	movdqu	%xmm3,48(%r13)
+	pxor	80(%rsp),%xmm6
+	movdqu	%xmm1,64(%r13)
+	movdqu	%xmm6,80(%r13)
+	leaq	96(%r13),%r13
+
+	movdqa	96(%rsp),%xmm6
+	jmp	L$xts_dec_done
+.p2align	4
+L$xts_dec_5:
+	pxor	%xmm10,%xmm2
+	leaq	80(%r12),%r12
+	pxor	%xmm11,%xmm3
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm3
+	movdqu	%xmm5,32(%r13)
+	pxor	64(%rsp),%xmm1
+	movdqu	%xmm3,48(%r13)
+	movdqu	%xmm1,64(%r13)
+	leaq	80(%r13),%r13
+
+	movdqa	80(%rsp),%xmm6
+	jmp	L$xts_dec_done
+.p2align	4
+L$xts_dec_4:
+	pxor	%xmm9,%xmm1
+	leaq	64(%r12),%r12
+	pxor	%xmm10,%xmm2
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	pxor	48(%rsp),%xmm3
+	movdqu	%xmm5,32(%r13)
+	movdqu	%xmm3,48(%r13)
+	leaq	64(%r13),%r13
+
+	movdqa	64(%rsp),%xmm6
+	jmp	L$xts_dec_done
+.p2align	4
+L$xts_dec_3:
+	pxor	%xmm8,%xmm0
+	leaq	48(%r12),%r12
+	pxor	%xmm9,%xmm1
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	pxor	32(%rsp),%xmm5
+	movdqu	%xmm0,16(%r13)
+	movdqu	%xmm5,32(%r13)
+	leaq	48(%r13),%r13
+
+	movdqa	48(%rsp),%xmm6
+	jmp	L$xts_dec_done
+.p2align	4
+L$xts_dec_2:
+	pxor	%xmm7,%xmm15
+	leaq	32(%r12),%r12
+	pxor	%xmm8,%xmm0
+	leaq	128(%rsp),%rax
+	movl	%edx,%r10d
+
+	call	_bsaes_decrypt8
+
+	pxor	0(%rsp),%xmm15
+	pxor	16(%rsp),%xmm0
+	movdqu	%xmm15,0(%r13)
+	movdqu	%xmm0,16(%r13)
+	leaq	32(%r13),%r13
+
+	movdqa	32(%rsp),%xmm6
+	jmp	L$xts_dec_done
+.p2align	4
+L$xts_dec_1:
+	pxor	%xmm15,%xmm7
+	leaq	16(%r12),%r12
+	movdqa	%xmm7,32(%rbp)
+	leaq	32(%rbp),%rdi
+	leaq	32(%rbp),%rsi
+	leaq	(%r15),%rdx
+	call	_asm_AES_decrypt
+	pxor	32(%rbp),%xmm15
+
+
+
+
+
+	movdqu	%xmm15,0(%r13)
+	leaq	16(%r13),%r13
+
+	movdqa	16(%rsp),%xmm6
+
+L$xts_dec_done:
+	andl	$15,%ebx
+	jz	L$xts_dec_ret
+
+	pxor	%xmm14,%xmm14
+	movdqa	L$xts_magic(%rip),%xmm12
+	pcmpgtd	%xmm6,%xmm14
+	pshufd	$0x13,%xmm14,%xmm13
+	movdqa	%xmm6,%xmm5
+	paddq	%xmm6,%xmm6
+	pand	%xmm12,%xmm13
+	movdqu	(%r12),%xmm15
+	pxor	%xmm13,%xmm6
+
+	leaq	32(%rbp),%rdi
+	pxor	%xmm6,%xmm15
+	leaq	32(%rbp),%rsi
+	movdqa	%xmm15,32(%rbp)
+	leaq	(%r15),%rdx
+	call	_asm_AES_decrypt
+	pxor	32(%rbp),%xmm6
+	movq	%r13,%rdx
+	movdqu	%xmm6,(%r13)
+
+L$xts_dec_steal:
+	movzbl	16(%r12),%eax
+	movzbl	(%rdx),%ecx
+	leaq	1(%r12),%r12
+	movb	%al,(%rdx)
+	movb	%cl,16(%rdx)
+	leaq	1(%rdx),%rdx
+	subl	$1,%ebx
+	jnz	L$xts_dec_steal
+
+	movdqu	(%r13),%xmm15
+	leaq	32(%rbp),%rdi
+	pxor	%xmm5,%xmm15
+	leaq	32(%rbp),%rsi
+	movdqa	%xmm15,32(%rbp)
+	leaq	(%r15),%rdx
+	call	_asm_AES_decrypt
+	pxor	32(%rbp),%xmm5
+	movdqu	%xmm5,(%r13)
+
+L$xts_dec_ret:
+	leaq	(%rsp),%rax
+	pxor	%xmm0,%xmm0
+L$xts_dec_bzero:
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm0,16(%rax)
+	leaq	32(%rax),%rax
+	cmpq	%rax,%rbp
+	ja	L$xts_dec_bzero
+
+	leaq	120(%rbp),%rax
+	movq	-48(%rax),%r15
+	movq	-40(%rax),%r14
+	movq	-32(%rax),%r13
+	movq	-24(%rax),%r12
+	movq	-16(%rax),%rbx
+	movq	-8(%rax),%rbp
+	leaq	(%rax),%rsp
+L$xts_dec_epilogue:
+	.byte	0xf3,0xc3
+
+
+.p2align	6
+_bsaes_const:
+L$M0ISR:
+.quad	0x0a0e0206070b0f03, 0x0004080c0d010509
+L$ISRM0:
+.quad	0x01040b0e0205080f, 0x0306090c00070a0d
+L$ISR:
+.quad	0x0504070602010003, 0x0f0e0d0c080b0a09
+L$BS0:
+.quad	0x5555555555555555, 0x5555555555555555
+L$BS1:
+.quad	0x3333333333333333, 0x3333333333333333
+L$BS2:
+.quad	0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+L$SR:
+.quad	0x0504070600030201, 0x0f0e0d0c0a09080b
+L$SRM0:
+.quad	0x0304090e00050a0f, 0x01060b0c0207080d
+L$M0SR:
+.quad	0x0a0e02060f03070b, 0x0004080c05090d01
+L$SWPUP:
+.quad	0x0706050403020100, 0x0c0d0e0f0b0a0908
+L$SWPUPM0SR:
+.quad	0x0a0d02060c03070b, 0x0004080f05090e01
+L$ADD1:
+.quad	0x0000000000000000, 0x0000000100000000
+L$ADD2:
+.quad	0x0000000000000000, 0x0000000200000000
+L$ADD3:
+.quad	0x0000000000000000, 0x0000000300000000
+L$ADD4:
+.quad	0x0000000000000000, 0x0000000400000000
+L$ADD5:
+.quad	0x0000000000000000, 0x0000000500000000
+L$ADD6:
+.quad	0x0000000000000000, 0x0000000600000000
+L$ADD7:
+.quad	0x0000000000000000, 0x0000000700000000
+L$ADD8:
+.quad	0x0000000000000000, 0x0000000800000000
+L$xts_magic:
+.long	0x87,0,1,0
+L$masks:
+.quad	0x0101010101010101, 0x0101010101010101
+.quad	0x0202020202020202, 0x0202020202020202
+.quad	0x0404040404040404, 0x0404040404040404
+.quad	0x0808080808080808, 0x0808080808080808
+L$M0:
+.quad	0x02060a0e03070b0f, 0x0004080c0105090d
+L$63:
+.quad	0x6363636363636363, 0x6363636363636363
+.byte	66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.p2align	6
+
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/ghash-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/ghash-x86_64.S
new file mode 100644
index 0000000..78b88cc
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/ghash-x86_64.S
@@ -0,0 +1,1805 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+.globl	_gcm_gmult_4bit
+.private_extern _gcm_gmult_4bit
+
+.p2align	4
+_gcm_gmult_4bit:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$280,%rsp
+L$gmult_prologue:
+
+	movzbq	15(%rdi),%r8
+	leaq	L$rem_4bit(%rip),%r11
+	xorq	%rax,%rax
+	xorq	%rbx,%rbx
+	movb	%r8b,%al
+	movb	%r8b,%bl
+	shlb	$4,%al
+	movq	$14,%rcx
+	movq	8(%rsi,%rax,1),%r8
+	movq	(%rsi,%rax,1),%r9
+	andb	$0xf0,%bl
+	movq	%r8,%rdx
+	jmp	L$oop1
+
+.p2align	4
+L$oop1:
+	shrq	$4,%r8
+	andq	$0xf,%rdx
+	movq	%r9,%r10
+	movb	(%rdi,%rcx,1),%al
+	shrq	$4,%r9
+	xorq	8(%rsi,%rbx,1),%r8
+	shlq	$60,%r10
+	xorq	(%rsi,%rbx,1),%r9
+	movb	%al,%bl
+	xorq	(%r11,%rdx,8),%r9
+	movq	%r8,%rdx
+	shlb	$4,%al
+	xorq	%r10,%r8
+	decq	%rcx
+	js	L$break1
+
+	shrq	$4,%r8
+	andq	$0xf,%rdx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	xorq	8(%rsi,%rax,1),%r8
+	shlq	$60,%r10
+	xorq	(%rsi,%rax,1),%r9
+	andb	$0xf0,%bl
+	xorq	(%r11,%rdx,8),%r9
+	movq	%r8,%rdx
+	xorq	%r10,%r8
+	jmp	L$oop1
+
+.p2align	4
+L$break1:
+	shrq	$4,%r8
+	andq	$0xf,%rdx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	xorq	8(%rsi,%rax,1),%r8
+	shlq	$60,%r10
+	xorq	(%rsi,%rax,1),%r9
+	andb	$0xf0,%bl
+	xorq	(%r11,%rdx,8),%r9
+	movq	%r8,%rdx
+	xorq	%r10,%r8
+
+	shrq	$4,%r8
+	andq	$0xf,%rdx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	xorq	8(%rsi,%rbx,1),%r8
+	shlq	$60,%r10
+	xorq	(%rsi,%rbx,1),%r9
+	xorq	%r10,%r8
+	xorq	(%r11,%rdx,8),%r9
+
+	bswapq	%r8
+	bswapq	%r9
+	movq	%r8,8(%rdi)
+	movq	%r9,(%rdi)
+
+	leaq	280+48(%rsp),%rsi
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$gmult_epilogue:
+	.byte	0xf3,0xc3
+
+.globl	_gcm_ghash_4bit
+.private_extern _gcm_ghash_4bit
+
+.p2align	4
+_gcm_ghash_4bit:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$280,%rsp
+L$ghash_prologue:
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+	subq	$-128,%rsi
+	leaq	16+128(%rsp),%rbp
+	xorl	%edx,%edx
+	movq	0+0-128(%rsi),%r8
+	movq	0+8-128(%rsi),%rax
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	16+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	16+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,0(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,0(%rbp)
+	movq	32+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,0-128(%rbp)
+	movq	32+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,1(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,8(%rbp)
+	movq	48+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,8-128(%rbp)
+	movq	48+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,2(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,16(%rbp)
+	movq	64+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,16-128(%rbp)
+	movq	64+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,3(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,24(%rbp)
+	movq	80+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,24-128(%rbp)
+	movq	80+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,4(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,32(%rbp)
+	movq	96+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,32-128(%rbp)
+	movq	96+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,5(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,40(%rbp)
+	movq	112+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,40-128(%rbp)
+	movq	112+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,6(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,48(%rbp)
+	movq	128+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,48-128(%rbp)
+	movq	128+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,7(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,56(%rbp)
+	movq	144+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,56-128(%rbp)
+	movq	144+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,8(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,64(%rbp)
+	movq	160+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,64-128(%rbp)
+	movq	160+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,9(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,72(%rbp)
+	movq	176+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,72-128(%rbp)
+	movq	176+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,10(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,80(%rbp)
+	movq	192+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,80-128(%rbp)
+	movq	192+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,11(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,88(%rbp)
+	movq	208+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,88-128(%rbp)
+	movq	208+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,12(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,96(%rbp)
+	movq	224+0-128(%rsi),%r8
+	shlb	$4,%dl
+	movq	%rax,96-128(%rbp)
+	movq	224+8-128(%rsi),%rax
+	shlq	$60,%r10
+	movb	%dl,13(%rsp)
+	orq	%r10,%rbx
+	movb	%al,%dl
+	shrq	$4,%rax
+	movq	%r8,%r10
+	shrq	$4,%r8
+	movq	%r9,104(%rbp)
+	movq	240+0-128(%rsi),%r9
+	shlb	$4,%dl
+	movq	%rbx,104-128(%rbp)
+	movq	240+8-128(%rsi),%rbx
+	shlq	$60,%r10
+	movb	%dl,14(%rsp)
+	orq	%r10,%rax
+	movb	%bl,%dl
+	shrq	$4,%rbx
+	movq	%r9,%r10
+	shrq	$4,%r9
+	movq	%r8,112(%rbp)
+	shlb	$4,%dl
+	movq	%rax,112-128(%rbp)
+	shlq	$60,%r10
+	movb	%dl,15(%rsp)
+	orq	%r10,%rbx
+	movq	%r9,120(%rbp)
+	movq	%rbx,120-128(%rbp)
+	addq	$-128,%rsi
+	movq	8(%rdi),%r8
+	movq	0(%rdi),%r9
+	addq	%r14,%r15
+	leaq	L$rem_8bit(%rip),%r11
+	jmp	L$outer_loop
+.p2align	4
+L$outer_loop:
+	xorq	(%r14),%r9
+	movq	8(%r14),%rdx
+	leaq	16(%r14),%r14
+	xorq	%r8,%rdx
+	movq	%r9,(%rdi)
+	movq	%rdx,8(%rdi)
+	shrq	$32,%rdx
+	xorq	%rax,%rax
+	roll	$8,%edx
+	movb	%dl,%al
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	shrl	$4,%ebx
+	roll	$8,%edx
+	movq	8(%rsi,%rax,1),%r8
+	movq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	movl	8(%rdi),%edx
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	movl	4(%rdi),%edx
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	movl	0(%rdi),%edx
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	shrl	$4,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r12,2),%r12
+	movzbl	%dl,%ebx
+	shlb	$4,%al
+	movzbq	(%rsp,%rcx,1),%r13
+	shrl	$4,%ebx
+	shlq	$48,%r12
+	xorq	%r8,%r13
+	movq	%r9,%r10
+	xorq	%r12,%r9
+	shrq	$8,%r8
+	movzbq	%r13b,%r13
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rcx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rcx,8),%r9
+	roll	$8,%edx
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	movb	%dl,%al
+	xorq	%r10,%r8
+	movzwq	(%r11,%r13,2),%r13
+	movzbl	%dl,%ecx
+	shlb	$4,%al
+	movzbq	(%rsp,%rbx,1),%r12
+	andl	$240,%ecx
+	shlq	$48,%r13
+	xorq	%r8,%r12
+	movq	%r9,%r10
+	xorq	%r13,%r9
+	shrq	$8,%r8
+	movzbq	%r12b,%r12
+	movl	-4(%rdi),%edx
+	shrq	$8,%r9
+	xorq	-128(%rbp,%rbx,8),%r8
+	shlq	$56,%r10
+	xorq	(%rbp,%rbx,8),%r9
+	movzwq	(%r11,%r12,2),%r12
+	xorq	8(%rsi,%rax,1),%r8
+	xorq	(%rsi,%rax,1),%r9
+	shlq	$48,%r12
+	xorq	%r10,%r8
+	xorq	%r12,%r9
+	movzbq	%r8b,%r13
+	shrq	$4,%r8
+	movq	%r9,%r10
+	shlb	$4,%r13b
+	shrq	$4,%r9
+	xorq	8(%rsi,%rcx,1),%r8
+	movzwq	(%r11,%r13,2),%r13
+	shlq	$60,%r10
+	xorq	(%rsi,%rcx,1),%r9
+	xorq	%r10,%r8
+	shlq	$48,%r13
+	bswapq	%r8
+	xorq	%r13,%r9
+	bswapq	%r9
+	cmpq	%r15,%r14
+	jb	L$outer_loop
+	movq	%r8,8(%rdi)
+	movq	%r9,(%rdi)
+
+	leaq	280+48(%rsp),%rsi
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	0(%rsi),%rsp
+L$ghash_epilogue:
+	.byte	0xf3,0xc3
+
+.globl	_gcm_init_clmul
+.private_extern _gcm_init_clmul
+
+.p2align	4
+_gcm_init_clmul:
+L$_init_clmul:
+	movdqu	(%rsi),%xmm2
+	pshufd	$78,%xmm2,%xmm2
+
+
+	pshufd	$255,%xmm2,%xmm4
+	movdqa	%xmm2,%xmm3
+	psllq	$1,%xmm2
+	pxor	%xmm5,%xmm5
+	psrlq	$63,%xmm3
+	pcmpgtd	%xmm4,%xmm5
+	pslldq	$8,%xmm3
+	por	%xmm3,%xmm2
+
+
+	pand	L$0x1c2_polynomial(%rip),%xmm5
+	pxor	%xmm5,%xmm2
+
+
+	pshufd	$78,%xmm2,%xmm6
+	movdqa	%xmm2,%xmm0
+	pxor	%xmm2,%xmm6
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,222,0
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	pshufd	$78,%xmm2,%xmm3
+	pshufd	$78,%xmm0,%xmm4
+	pxor	%xmm2,%xmm3
+	movdqu	%xmm2,0(%rdi)
+	pxor	%xmm0,%xmm4
+	movdqu	%xmm0,16(%rdi)
+.byte	102,15,58,15,227,8
+	movdqu	%xmm4,32(%rdi)
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,222,0
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm0,%xmm5
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,222,0
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	pshufd	$78,%xmm5,%xmm3
+	pshufd	$78,%xmm0,%xmm4
+	pxor	%xmm5,%xmm3
+	movdqu	%xmm5,48(%rdi)
+	pxor	%xmm0,%xmm4
+	movdqu	%xmm0,64(%rdi)
+.byte	102,15,58,15,227,8
+	movdqu	%xmm4,80(%rdi)
+	.byte	0xf3,0xc3
+
+.globl	_gcm_gmult_clmul
+.private_extern _gcm_gmult_clmul
+
+.p2align	4
+_gcm_gmult_clmul:
+L$_gmult_clmul:
+	movdqu	(%rdi),%xmm0
+	movdqa	L$bswap_mask(%rip),%xmm5
+	movdqu	(%rsi),%xmm2
+	movdqu	32(%rsi),%xmm4
+.byte	102,15,56,0,197
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,220,0
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+.byte	102,15,56,0,197
+	movdqu	%xmm0,(%rdi)
+	.byte	0xf3,0xc3
+
+.globl	_gcm_ghash_clmul
+.private_extern _gcm_ghash_clmul
+
+.p2align	5
+_gcm_ghash_clmul:
+L$_ghash_clmul:
+	movdqa	L$bswap_mask(%rip),%xmm10
+
+	movdqu	(%rdi),%xmm0
+	movdqu	(%rsi),%xmm2
+	movdqu	32(%rsi),%xmm7
+.byte	102,65,15,56,0,194
+
+	subq	$0x10,%rcx
+	jz	L$odd_tail
+
+	movdqu	16(%rsi),%xmm6
+	leaq	_OPENSSL_ia32cap_P(%rip),%rax
+	movl	4(%rax),%eax
+	cmpq	$0x30,%rcx
+	jb	L$skip4x
+
+	andl	$71303168,%eax
+	cmpl	$4194304,%eax
+	je	L$skip4x
+
+	subq	$0x30,%rcx
+	movq	$0xA040608020C0E000,%rax
+	movdqu	48(%rsi),%xmm14
+	movdqu	64(%rsi),%xmm15
+
+
+
+
+	movdqu	48(%rdx),%xmm3
+	movdqu	32(%rdx),%xmm11
+.byte	102,65,15,56,0,218
+.byte	102,69,15,56,0,218
+	movdqa	%xmm3,%xmm5
+	pshufd	$78,%xmm3,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,68,218,0
+.byte	102,15,58,68,234,17
+.byte	102,15,58,68,231,0
+
+	movdqa	%xmm11,%xmm13
+	pshufd	$78,%xmm11,%xmm12
+	pxor	%xmm11,%xmm12
+.byte	102,68,15,58,68,222,0
+.byte	102,68,15,58,68,238,17
+.byte	102,68,15,58,68,231,16
+	xorps	%xmm11,%xmm3
+	xorps	%xmm13,%xmm5
+	movups	80(%rsi),%xmm7
+	xorps	%xmm12,%xmm4
+
+	movdqu	16(%rdx),%xmm11
+	movdqu	0(%rdx),%xmm8
+.byte	102,69,15,56,0,218
+.byte	102,69,15,56,0,194
+	movdqa	%xmm11,%xmm13
+	pshufd	$78,%xmm11,%xmm12
+	pxor	%xmm8,%xmm0
+	pxor	%xmm11,%xmm12
+.byte	102,69,15,58,68,222,0
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm8
+	pxor	%xmm0,%xmm8
+.byte	102,69,15,58,68,238,17
+.byte	102,68,15,58,68,231,0
+	xorps	%xmm11,%xmm3
+	xorps	%xmm13,%xmm5
+
+	leaq	64(%rdx),%rdx
+	subq	$0x40,%rcx
+	jc	L$tail4x
+
+	jmp	L$mod4_loop
+.p2align	5
+L$mod4_loop:
+.byte	102,65,15,58,68,199,0
+	xorps	%xmm12,%xmm4
+	movdqu	48(%rdx),%xmm11
+.byte	102,69,15,56,0,218
+.byte	102,65,15,58,68,207,17
+	xorps	%xmm3,%xmm0
+	movdqu	32(%rdx),%xmm3
+	movdqa	%xmm11,%xmm13
+.byte	102,68,15,58,68,199,16
+	pshufd	$78,%xmm11,%xmm12
+	xorps	%xmm5,%xmm1
+	pxor	%xmm11,%xmm12
+.byte	102,65,15,56,0,218
+	movups	32(%rsi),%xmm7
+	xorps	%xmm4,%xmm8
+.byte	102,68,15,58,68,218,0
+	pshufd	$78,%xmm3,%xmm4
+
+	pxor	%xmm0,%xmm8
+	movdqa	%xmm3,%xmm5
+	pxor	%xmm1,%xmm8
+	pxor	%xmm3,%xmm4
+	movdqa	%xmm8,%xmm9
+.byte	102,68,15,58,68,234,17
+	pslldq	$8,%xmm8
+	psrldq	$8,%xmm9
+	pxor	%xmm8,%xmm0
+	movdqa	L$7_mask(%rip),%xmm8
+	pxor	%xmm9,%xmm1
+.byte	102,76,15,110,200
+
+	pand	%xmm0,%xmm8
+.byte	102,69,15,56,0,200
+	pxor	%xmm0,%xmm9
+.byte	102,68,15,58,68,231,0
+	psllq	$57,%xmm9
+	movdqa	%xmm9,%xmm8
+	pslldq	$8,%xmm9
+.byte	102,15,58,68,222,0
+	psrldq	$8,%xmm8
+	pxor	%xmm9,%xmm0
+	pxor	%xmm8,%xmm1
+	movdqu	0(%rdx),%xmm8
+
+	movdqa	%xmm0,%xmm9
+	psrlq	$1,%xmm0
+.byte	102,15,58,68,238,17
+	xorps	%xmm11,%xmm3
+	movdqu	16(%rdx),%xmm11
+.byte	102,69,15,56,0,218
+.byte	102,15,58,68,231,16
+	xorps	%xmm13,%xmm5
+	movups	80(%rsi),%xmm7
+.byte	102,69,15,56,0,194
+	pxor	%xmm9,%xmm1
+	pxor	%xmm0,%xmm9
+	psrlq	$5,%xmm0
+
+	movdqa	%xmm11,%xmm13
+	pxor	%xmm12,%xmm4
+	pshufd	$78,%xmm11,%xmm12
+	pxor	%xmm9,%xmm0
+	pxor	%xmm8,%xmm1
+	pxor	%xmm11,%xmm12
+.byte	102,69,15,58,68,222,0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm0,%xmm1
+.byte	102,69,15,58,68,238,17
+	xorps	%xmm11,%xmm3
+	pshufd	$78,%xmm0,%xmm8
+	pxor	%xmm0,%xmm8
+
+.byte	102,68,15,58,68,231,0
+	xorps	%xmm13,%xmm5
+
+	leaq	64(%rdx),%rdx
+	subq	$0x40,%rcx
+	jnc	L$mod4_loop
+
+L$tail4x:
+.byte	102,65,15,58,68,199,0
+.byte	102,65,15,58,68,207,17
+.byte	102,68,15,58,68,199,16
+	xorps	%xmm12,%xmm4
+	xorps	%xmm3,%xmm0
+	xorps	%xmm5,%xmm1
+	pxor	%xmm0,%xmm1
+	pxor	%xmm4,%xmm8
+
+	pxor	%xmm1,%xmm8
+	pxor	%xmm0,%xmm1
+
+	movdqa	%xmm8,%xmm9
+	psrldq	$8,%xmm8
+	pslldq	$8,%xmm9
+	pxor	%xmm8,%xmm1
+	pxor	%xmm9,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	addq	$0x40,%rcx
+	jz	L$done
+	movdqu	32(%rsi),%xmm7
+	subq	$0x10,%rcx
+	jz	L$odd_tail
+L$skip4x:
+
+
+
+
+
+	movdqu	(%rdx),%xmm8
+	movdqu	16(%rdx),%xmm3
+.byte	102,69,15,56,0,194
+.byte	102,65,15,56,0,218
+	pxor	%xmm8,%xmm0
+
+	movdqa	%xmm3,%xmm5
+	pshufd	$78,%xmm3,%xmm4
+	pxor	%xmm3,%xmm4
+.byte	102,15,58,68,218,0
+.byte	102,15,58,68,234,17
+.byte	102,15,58,68,231,0
+
+	leaq	32(%rdx),%rdx
+	nop
+	subq	$0x20,%rcx
+	jbe	L$even_tail
+	nop
+	jmp	L$mod_loop
+
+.p2align	5
+L$mod_loop:
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm8
+	pshufd	$78,%xmm0,%xmm4
+	pxor	%xmm0,%xmm4
+
+.byte	102,15,58,68,198,0
+.byte	102,15,58,68,206,17
+.byte	102,15,58,68,231,16
+
+	pxor	%xmm3,%xmm0
+	pxor	%xmm5,%xmm1
+	movdqu	(%rdx),%xmm9
+	pxor	%xmm0,%xmm8
+.byte	102,69,15,56,0,202
+	movdqu	16(%rdx),%xmm3
+
+	pxor	%xmm1,%xmm8
+	pxor	%xmm9,%xmm1
+	pxor	%xmm8,%xmm4
+.byte	102,65,15,56,0,218
+	movdqa	%xmm4,%xmm8
+	psrldq	$8,%xmm8
+	pslldq	$8,%xmm4
+	pxor	%xmm8,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm3,%xmm5
+
+	movdqa	%xmm0,%xmm9
+	movdqa	%xmm0,%xmm8
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm8
+.byte	102,15,58,68,218,0
+	psllq	$1,%xmm0
+	pxor	%xmm8,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm8
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm8
+	pxor	%xmm9,%xmm0
+	pshufd	$78,%xmm5,%xmm4
+	pxor	%xmm8,%xmm1
+	pxor	%xmm5,%xmm4
+
+	movdqa	%xmm0,%xmm9
+	psrlq	$1,%xmm0
+.byte	102,15,58,68,234,17
+	pxor	%xmm9,%xmm1
+	pxor	%xmm0,%xmm9
+	psrlq	$5,%xmm0
+	pxor	%xmm9,%xmm0
+	leaq	32(%rdx),%rdx
+	psrlq	$1,%xmm0
+.byte	102,15,58,68,231,0
+	pxor	%xmm1,%xmm0
+
+	subq	$0x20,%rcx
+	ja	L$mod_loop
+
+L$even_tail:
+	movdqa	%xmm0,%xmm1
+	movdqa	%xmm4,%xmm8
+	pshufd	$78,%xmm0,%xmm4
+	pxor	%xmm0,%xmm4
+
+.byte	102,15,58,68,198,0
+.byte	102,15,58,68,206,17
+.byte	102,15,58,68,231,16
+
+	pxor	%xmm3,%xmm0
+	pxor	%xmm5,%xmm1
+	pxor	%xmm0,%xmm8
+	pxor	%xmm1,%xmm8
+	pxor	%xmm8,%xmm4
+	movdqa	%xmm4,%xmm8
+	psrldq	$8,%xmm8
+	pslldq	$8,%xmm4
+	pxor	%xmm8,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	testq	%rcx,%rcx
+	jnz	L$done
+
+L$odd_tail:
+	movdqu	(%rdx),%xmm8
+.byte	102,69,15,56,0,194
+	pxor	%xmm8,%xmm0
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,223,0
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+
+
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+L$done:
+.byte	102,65,15,56,0,194
+	movdqu	%xmm0,(%rdi)
+	.byte	0xf3,0xc3
+
+.globl	_gcm_init_avx
+.private_extern _gcm_init_avx
+
+.p2align	5
+_gcm_init_avx:
+	vzeroupper
+
+	vmovdqu	(%rsi),%xmm2
+	vpshufd	$78,%xmm2,%xmm2
+
+
+	vpshufd	$255,%xmm2,%xmm4
+	vpsrlq	$63,%xmm2,%xmm3
+	vpsllq	$1,%xmm2,%xmm2
+	vpxor	%xmm5,%xmm5,%xmm5
+	vpcmpgtd	%xmm4,%xmm5,%xmm5
+	vpslldq	$8,%xmm3,%xmm3
+	vpor	%xmm3,%xmm2,%xmm2
+
+
+	vpand	L$0x1c2_polynomial(%rip),%xmm5,%xmm5
+	vpxor	%xmm5,%xmm2,%xmm2
+
+	vpunpckhqdq	%xmm2,%xmm2,%xmm6
+	vmovdqa	%xmm2,%xmm0
+	vpxor	%xmm2,%xmm6,%xmm6
+	movq	$4,%r10
+	jmp	L$init_start_avx
+.p2align	5
+L$init_loop_avx:
+	vpalignr	$8,%xmm3,%xmm4,%xmm5
+	vmovdqu	%xmm5,-16(%rdi)
+	vpunpckhqdq	%xmm0,%xmm0,%xmm3
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
+	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
+	vpxor	%xmm0,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+
+	vpslldq	$8,%xmm3,%xmm4
+	vpsrldq	$8,%xmm3,%xmm3
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpsllq	$57,%xmm0,%xmm3
+	vpsllq	$62,%xmm0,%xmm4
+	vpxor	%xmm3,%xmm4,%xmm4
+	vpsllq	$63,%xmm0,%xmm3
+	vpxor	%xmm3,%xmm4,%xmm4
+	vpslldq	$8,%xmm4,%xmm3
+	vpsrldq	$8,%xmm4,%xmm4
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vpsrlq	$1,%xmm0,%xmm4
+	vpxor	%xmm0,%xmm1,%xmm1
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpsrlq	$5,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpsrlq	$1,%xmm0,%xmm0
+	vpxor	%xmm1,%xmm0,%xmm0
+L$init_start_avx:
+	vmovdqa	%xmm0,%xmm5
+	vpunpckhqdq	%xmm0,%xmm0,%xmm3
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
+	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
+	vpxor	%xmm0,%xmm1,%xmm4
+	vpxor	%xmm4,%xmm3,%xmm3
+
+	vpslldq	$8,%xmm3,%xmm4
+	vpsrldq	$8,%xmm3,%xmm3
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpxor	%xmm3,%xmm1,%xmm1
+	vpsllq	$57,%xmm0,%xmm3
+	vpsllq	$62,%xmm0,%xmm4
+	vpxor	%xmm3,%xmm4,%xmm4
+	vpsllq	$63,%xmm0,%xmm3
+	vpxor	%xmm3,%xmm4,%xmm4
+	vpslldq	$8,%xmm4,%xmm3
+	vpsrldq	$8,%xmm4,%xmm4
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpxor	%xmm4,%xmm1,%xmm1
+
+	vpsrlq	$1,%xmm0,%xmm4
+	vpxor	%xmm0,%xmm1,%xmm1
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpsrlq	$5,%xmm4,%xmm4
+	vpxor	%xmm4,%xmm0,%xmm0
+	vpsrlq	$1,%xmm0,%xmm0
+	vpxor	%xmm1,%xmm0,%xmm0
+	vpshufd	$78,%xmm5,%xmm3
+	vpshufd	$78,%xmm0,%xmm4
+	vpxor	%xmm5,%xmm3,%xmm3
+	vmovdqu	%xmm5,0(%rdi)
+	vpxor	%xmm0,%xmm4,%xmm4
+	vmovdqu	%xmm0,16(%rdi)
+	leaq	48(%rdi),%rdi
+	subq	$1,%r10
+	jnz	L$init_loop_avx
+
+	vpalignr	$8,%xmm4,%xmm3,%xmm5
+	vmovdqu	%xmm5,-16(%rdi)
+
+	vzeroupper
+	.byte	0xf3,0xc3
+
+.globl	_gcm_gmult_avx
+.private_extern _gcm_gmult_avx
+
+.p2align	5
+_gcm_gmult_avx:
+	jmp	L$_gmult_clmul
+
+.globl	_gcm_ghash_avx
+.private_extern _gcm_ghash_avx
+
+.p2align	5
+_gcm_ghash_avx:
+	vzeroupper
+
+	vmovdqu	(%rdi),%xmm10
+	leaq	L$0x1c2_polynomial(%rip),%r10
+	leaq	64(%rsi),%rsi
+	vmovdqu	L$bswap_mask(%rip),%xmm13
+	vpshufb	%xmm13,%xmm10,%xmm10
+	cmpq	$0x80,%rcx
+	jb	L$short_avx
+	subq	$0x80,%rcx
+
+	vmovdqu	112(%rdx),%xmm14
+	vmovdqu	0-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vmovdqu	32-64(%rsi),%xmm7
+
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vmovdqu	96(%rdx),%xmm15
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpxor	%xmm14,%xmm9,%xmm9
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	16-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vmovdqu	80(%rdx),%xmm14
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm15,%xmm8,%xmm8
+
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vmovdqu	48-64(%rsi),%xmm6
+	vpxor	%xmm14,%xmm9,%xmm9
+	vmovdqu	64(%rdx),%xmm15
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	80-64(%rsi),%xmm7
+
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	64-64(%rsi),%xmm6
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	48(%rdx),%xmm14
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vmovdqu	96-64(%rsi),%xmm6
+	vpxor	%xmm5,%xmm2,%xmm2
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	128-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+
+	vmovdqu	32(%rdx),%xmm15
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	112-64(%rsi),%xmm6
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	16(%rdx),%xmm14
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vmovdqu	144-64(%rsi),%xmm6
+	vpxor	%xmm5,%xmm2,%xmm2
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	176-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+
+	vmovdqu	(%rdx),%xmm15
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	160-64(%rsi),%xmm6
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
+
+	leaq	128(%rdx),%rdx
+	cmpq	$0x80,%rcx
+	jb	L$tail_avx
+
+	vpxor	%xmm10,%xmm15,%xmm15
+	subq	$0x80,%rcx
+	jmp	L$oop8x_avx
+
+.p2align	5
+L$oop8x_avx:
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vmovdqu	112(%rdx),%xmm14
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpxor	%xmm15,%xmm8,%xmm8
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm10
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm11
+	vmovdqu	0-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm12
+	vmovdqu	32-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+
+	vmovdqu	96(%rdx),%xmm15
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpxor	%xmm3,%xmm10,%xmm10
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vxorps	%xmm4,%xmm11,%xmm11
+	vmovdqu	16-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm5,%xmm12,%xmm12
+	vxorps	%xmm15,%xmm8,%xmm8
+
+	vmovdqu	80(%rdx),%xmm14
+	vpxor	%xmm10,%xmm12,%xmm12
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpxor	%xmm11,%xmm12,%xmm12
+	vpslldq	$8,%xmm12,%xmm9
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vpsrldq	$8,%xmm12,%xmm12
+	vpxor	%xmm9,%xmm10,%xmm10
+	vmovdqu	48-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vxorps	%xmm12,%xmm11,%xmm11
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	80-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vmovdqu	64(%rdx),%xmm15
+	vpalignr	$8,%xmm10,%xmm10,%xmm12
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	64-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vxorps	%xmm15,%xmm8,%xmm8
+	vpxor	%xmm5,%xmm2,%xmm2
+
+	vmovdqu	48(%rdx),%xmm14
+	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vmovdqu	96-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	128-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vmovdqu	32(%rdx),%xmm15
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpxor	%xmm3,%xmm0,%xmm0
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	112-64(%rsi),%xmm6
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm4,%xmm1,%xmm1
+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm15,%xmm8,%xmm8
+	vpxor	%xmm5,%xmm2,%xmm2
+	vxorps	%xmm12,%xmm10,%xmm10
+
+	vmovdqu	16(%rdx),%xmm14
+	vpalignr	$8,%xmm10,%xmm10,%xmm12
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
+	vpshufb	%xmm13,%xmm14,%xmm14
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
+	vmovdqu	144-64(%rsi),%xmm6
+	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
+	vxorps	%xmm11,%xmm12,%xmm12
+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
+	vmovdqu	176-64(%rsi),%xmm7
+	vpxor	%xmm14,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vmovdqu	(%rdx),%xmm15
+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
+	vpshufb	%xmm13,%xmm15,%xmm15
+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
+	vmovdqu	160-64(%rsi),%xmm6
+	vpxor	%xmm12,%xmm15,%xmm15
+	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
+	vpxor	%xmm10,%xmm15,%xmm15
+
+	leaq	128(%rdx),%rdx
+	subq	$0x80,%rcx
+	jnc	L$oop8x_avx
+
+	addq	$0x80,%rcx
+	jmp	L$tail_no_xor_avx
+
+.p2align	5
+L$short_avx:
+	vmovdqu	-16(%rdx,%rcx,1),%xmm14
+	leaq	(%rdx,%rcx,1),%rdx
+	vmovdqu	0-64(%rsi),%xmm6
+	vmovdqu	32-64(%rsi),%xmm7
+	vpshufb	%xmm13,%xmm14,%xmm15
+
+	vmovdqa	%xmm0,%xmm3
+	vmovdqa	%xmm1,%xmm4
+	vmovdqa	%xmm2,%xmm5
+	subq	$0x10,%rcx
+	jz	L$tail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-32(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	16-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vpsrldq	$8,%xmm7,%xmm7
+	subq	$0x10,%rcx
+	jz	L$tail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-48(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	48-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vmovdqu	80-64(%rsi),%xmm7
+	subq	$0x10,%rcx
+	jz	L$tail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-64(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	64-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vpsrldq	$8,%xmm7,%xmm7
+	subq	$0x10,%rcx
+	jz	L$tail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-80(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	96-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vmovdqu	128-64(%rsi),%xmm7
+	subq	$0x10,%rcx
+	jz	L$tail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-96(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	112-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vpsrldq	$8,%xmm7,%xmm7
+	subq	$0x10,%rcx
+	jz	L$tail_avx
+
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vmovdqu	-112(%rdx),%xmm14
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vmovdqu	144-64(%rsi),%xmm6
+	vpshufb	%xmm13,%xmm14,%xmm15
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+	vmovq	184-64(%rsi),%xmm7
+	subq	$0x10,%rcx
+	jmp	L$tail_avx
+
+.p2align	5
+L$tail_avx:
+	vpxor	%xmm10,%xmm15,%xmm15
+L$tail_no_xor_avx:
+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
+	vpxor	%xmm0,%xmm3,%xmm3
+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
+	vpxor	%xmm15,%xmm8,%xmm8
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
+	vpxor	%xmm2,%xmm5,%xmm5
+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
+
+	vmovdqu	(%r10),%xmm12
+
+	vpxor	%xmm0,%xmm3,%xmm10
+	vpxor	%xmm1,%xmm4,%xmm11
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vpxor	%xmm10,%xmm5,%xmm5
+	vpxor	%xmm11,%xmm5,%xmm5
+	vpslldq	$8,%xmm5,%xmm9
+	vpsrldq	$8,%xmm5,%xmm5
+	vpxor	%xmm9,%xmm10,%xmm10
+	vpxor	%xmm5,%xmm11,%xmm11
+
+	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
+	vpalignr	$8,%xmm10,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm10,%xmm10
+
+	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
+	vpalignr	$8,%xmm10,%xmm10,%xmm10
+	vpxor	%xmm11,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm10,%xmm10
+
+	cmpq	$0,%rcx
+	jne	L$short_avx
+
+	vpshufb	%xmm13,%xmm10,%xmm10
+	vmovdqu	%xmm10,(%rdi)
+	vzeroupper
+	.byte	0xf3,0xc3
+
+.p2align	6
+L$bswap_mask:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+L$0x1c2_polynomial:
+.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+L$7_mask:
+.long	7,0,7,0
+L$7_mask_poly:
+.long	7,0,450,0
+.p2align	6
+
+L$rem_4bit:
+.long	0,0,0,471859200,0,943718400,0,610271232
+.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
+
+L$rem_8bit:
+.value	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
+.value	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
+.value	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
+.value	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
+.value	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
+.value	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
+.value	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
+.value	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
+.value	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
+.value	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
+.value	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
+.value	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
+.value	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
+.value	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
+.value	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
+.value	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
+.value	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
+.value	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
+.value	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
+.value	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
+.value	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
+.value	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
+.value	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
+.value	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
+.value	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
+.value	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
+.value	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
+.value	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
+.value	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
+.value	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
+.value	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
+.value	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
+
+.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align	6
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/md5-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/md5-x86_64.S
new file mode 100644
index 0000000..776c116
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/md5-x86_64.S
@@ -0,0 +1,671 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+.p2align	4
+
+.globl	_md5_block_asm_data_order
+.private_extern _md5_block_asm_data_order
+
+_md5_block_asm_data_order:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r14
+	pushq	%r15
+L$prologue:
+
+
+
+
+	movq	%rdi,%rbp
+	shlq	$6,%rdx
+	leaq	(%rsi,%rdx,1),%rdi
+	movl	0(%rbp),%eax
+	movl	4(%rbp),%ebx
+	movl	8(%rbp),%ecx
+	movl	12(%rbp),%edx
+
+
+
+
+
+
+
+	cmpq	%rdi,%rsi
+	je	L$end
+
+
+L$loop:
+	movl	%eax,%r8d
+	movl	%ebx,%r9d
+	movl	%ecx,%r14d
+	movl	%edx,%r15d
+	movl	0(%rsi),%r10d
+	movl	%edx,%r11d
+	xorl	%ecx,%r11d
+	leal	-680876936(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	4(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-389564586(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	606105819(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1044525330(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	-176418897(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	1200080426(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1473231341(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-45705983(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1770035416(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-1958414417(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-42063(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1990404162(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1804603682(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-40341101(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1502002290(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	1236535329(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	movl	4(%rsi),%r10d
+	movl	%edx,%r11d
+	movl	%edx,%r12d
+	notl	%r11d
+	leal	-165796510(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1069501632(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	643717713(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-373897302(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-701558691(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	38016083(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-660478335(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-405537848(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	568446438(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1019803690(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-187363961(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	1163531501(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-1444681467(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-51403784(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	1735328473(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-1926607734(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	movl	20(%rsi),%r10d
+	movl	%ecx,%r11d
+	leal	-378558(%rax,%r10,1),%eax
+	movl	32(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-2022574463(%rdx,%r10,1),%edx
+	movl	44(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	1839030562(%rcx,%r10,1),%ecx
+	movl	56(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-35309556(%rbx,%r10,1),%ebx
+	movl	4(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-1530992060(%rax,%r10,1),%eax
+	movl	16(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	1272893353(%rdx,%r10,1),%edx
+	movl	28(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-155497632(%rcx,%r10,1),%ecx
+	movl	40(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-1094730640(%rbx,%r10,1),%ebx
+	movl	52(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	681279174(%rax,%r10,1),%eax
+	movl	0(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-358537222(%rdx,%r10,1),%edx
+	movl	12(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-722521979(%rcx,%r10,1),%ecx
+	movl	24(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	76029189(%rbx,%r10,1),%ebx
+	movl	36(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-640364487(%rax,%r10,1),%eax
+	movl	48(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-421815835(%rdx,%r10,1),%edx
+	movl	60(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	530742520(%rcx,%r10,1),%ecx
+	movl	8(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-995338651(%rbx,%r10,1),%ebx
+	movl	0(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	xorl	%edx,%r11d
+	leal	-198630844(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	28(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	1126891415(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	56(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1416354905(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	20(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-57434055(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	48(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1700485571(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	12(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1894986606(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	40(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1051523(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	4(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-2054922799(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	32(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1873313359(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	60(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-30611744(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	24(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1560198380(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	52(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	1309151649(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	16(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	-145523070(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	44(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1120210379(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	8(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	718787259(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	36(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-343485551(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+
+	addl	%r8d,%eax
+	addl	%r9d,%ebx
+	addl	%r14d,%ecx
+	addl	%r15d,%edx
+
+
+	addq	$64,%rsi
+	cmpq	%rdi,%rsi
+	jb	L$loop
+
+
+L$end:
+	movl	%eax,0(%rbp)
+	movl	%ebx,4(%rbp)
+	movl	%ecx,8(%rbp)
+	movl	%edx,12(%rbp)
+
+	movq	(%rsp),%r15
+	movq	8(%rsp),%r14
+	movq	16(%rsp),%r12
+	movq	24(%rsp),%rbx
+	movq	32(%rsp),%rbp
+	addq	$40,%rsp
+L$epilogue:
+	.byte	0xf3,0xc3
+
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
new file mode 100644
index 0000000..f787577
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
@@ -0,0 +1,1790 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+
+.p2align	6
+L$poly:
+.quad	0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
+
+L$One:
+.long	1,1,1,1,1,1,1,1
+L$Two:
+.long	2,2,2,2,2,2,2,2
+L$Three:
+.long	3,3,3,3,3,3,3,3
+L$ONE_mont:
+.quad	0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
+
+
+
+.globl	_ecp_nistz256_neg
+.private_extern _ecp_nistz256_neg
+
+.p2align	5
+_ecp_nistz256_neg:
+	pushq	%r12
+	pushq	%r13
+
+	xorq	%r8,%r8
+	xorq	%r9,%r9
+	xorq	%r10,%r10
+	xorq	%r11,%r11
+	xorq	%r13,%r13
+
+	subq	0(%rsi),%r8
+	sbbq	8(%rsi),%r9
+	sbbq	16(%rsi),%r10
+	movq	%r8,%rax
+	sbbq	24(%rsi),%r11
+	leaq	L$poly(%rip),%rsi
+	movq	%r9,%rdx
+	sbbq	$0,%r13
+
+	addq	0(%rsi),%r8
+	movq	%r10,%rcx
+	adcq	8(%rsi),%r9
+	adcq	16(%rsi),%r10
+	movq	%r11,%r12
+	adcq	24(%rsi),%r11
+	testq	%r13,%r13
+
+	cmovzq	%rax,%r8
+	cmovzq	%rdx,%r9
+	movq	%r8,0(%rdi)
+	cmovzq	%rcx,%r10
+	movq	%r9,8(%rdi)
+	cmovzq	%r12,%r11
+	movq	%r10,16(%rdi)
+	movq	%r11,24(%rdi)
+
+	popq	%r13
+	popq	%r12
+	.byte	0xf3,0xc3
+
+
+
+
+
+
+
+.globl	_ecp_nistz256_mul_mont
+.private_extern _ecp_nistz256_mul_mont
+
+.p2align	5
+_ecp_nistz256_mul_mont:
+L$mul_mont:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	movq	%rdx,%rbx
+	movq	0(%rdx),%rax
+	movq	0(%rsi),%r9
+	movq	8(%rsi),%r10
+	movq	16(%rsi),%r11
+	movq	24(%rsi),%r12
+
+	call	__ecp_nistz256_mul_montq
+L$mul_mont_done:
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ecp_nistz256_mul_montq:
+
+
+	movq	%rax,%rbp
+	mulq	%r9
+	movq	L$poly+8(%rip),%r14
+	movq	%rax,%r8
+	movq	%rbp,%rax
+	movq	%rdx,%r9
+
+	mulq	%r10
+	movq	L$poly+24(%rip),%r15
+	addq	%rax,%r9
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%r11
+	addq	%rax,%r10
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%r12
+	addq	%rax,%r11
+	movq	%r8,%rax
+	adcq	$0,%rdx
+	xorq	%r13,%r13
+	movq	%rdx,%r12
+
+
+
+
+
+
+
+
+
+
+	movq	%r8,%rbp
+	shlq	$32,%r8
+	mulq	%r15
+	shrq	$32,%rbp
+	addq	%r8,%r9
+	adcq	%rbp,%r10
+	adcq	%rax,%r11
+	movq	8(%rbx),%rax
+	adcq	%rdx,%r12
+	adcq	$0,%r13
+	xorq	%r8,%r8
+
+
+
+	movq	%rax,%rbp
+	mulq	0(%rsi)
+	addq	%rax,%r9
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	8(%rsi)
+	addq	%rcx,%r10
+	adcq	$0,%rdx
+	addq	%rax,%r10
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	16(%rsi)
+	addq	%rcx,%r11
+	adcq	$0,%rdx
+	addq	%rax,%r11
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	24(%rsi)
+	addq	%rcx,%r12
+	adcq	$0,%rdx
+	addq	%rax,%r12
+	movq	%r9,%rax
+	adcq	%rdx,%r13
+	adcq	$0,%r8
+
+
+
+	movq	%r9,%rbp
+	shlq	$32,%r9
+	mulq	%r15
+	shrq	$32,%rbp
+	addq	%r9,%r10
+	adcq	%rbp,%r11
+	adcq	%rax,%r12
+	movq	16(%rbx),%rax
+	adcq	%rdx,%r13
+	adcq	$0,%r8
+	xorq	%r9,%r9
+
+
+
+	movq	%rax,%rbp
+	mulq	0(%rsi)
+	addq	%rax,%r10
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	8(%rsi)
+	addq	%rcx,%r11
+	adcq	$0,%rdx
+	addq	%rax,%r11
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	16(%rsi)
+	addq	%rcx,%r12
+	adcq	$0,%rdx
+	addq	%rax,%r12
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	24(%rsi)
+	addq	%rcx,%r13
+	adcq	$0,%rdx
+	addq	%rax,%r13
+	movq	%r10,%rax
+	adcq	%rdx,%r8
+	adcq	$0,%r9
+
+
+
+	movq	%r10,%rbp
+	shlq	$32,%r10
+	mulq	%r15
+	shrq	$32,%rbp
+	addq	%r10,%r11
+	adcq	%rbp,%r12
+	adcq	%rax,%r13
+	movq	24(%rbx),%rax
+	adcq	%rdx,%r8
+	adcq	$0,%r9
+	xorq	%r10,%r10
+
+
+
+	movq	%rax,%rbp
+	mulq	0(%rsi)
+	addq	%rax,%r11
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	8(%rsi)
+	addq	%rcx,%r12
+	adcq	$0,%rdx
+	addq	%rax,%r12
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	16(%rsi)
+	addq	%rcx,%r13
+	adcq	$0,%rdx
+	addq	%rax,%r13
+	movq	%rbp,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	24(%rsi)
+	addq	%rcx,%r8
+	adcq	$0,%rdx
+	addq	%rax,%r8
+	movq	%r11,%rax
+	adcq	%rdx,%r9
+	adcq	$0,%r10
+
+
+
+	movq	%r11,%rbp
+	shlq	$32,%r11
+	mulq	%r15
+	shrq	$32,%rbp
+	addq	%r11,%r12
+	adcq	%rbp,%r13
+	movq	%r12,%rcx
+	adcq	%rax,%r8
+	adcq	%rdx,%r9
+	movq	%r13,%rbp
+	adcq	$0,%r10
+
+
+
+	subq	$-1,%r12
+	movq	%r8,%rbx
+	sbbq	%r14,%r13
+	sbbq	$0,%r8
+	movq	%r9,%rdx
+	sbbq	%r15,%r9
+	sbbq	$0,%r10
+
+	cmovcq	%rcx,%r12
+	cmovcq	%rbp,%r13
+	movq	%r12,0(%rdi)
+	cmovcq	%rbx,%r8
+	movq	%r13,8(%rdi)
+	cmovcq	%rdx,%r9
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+
+	.byte	0xf3,0xc3
+
+
+
+
+
+
+
+
+
+.globl	_ecp_nistz256_sqr_mont
+.private_extern _ecp_nistz256_sqr_mont
+
+.p2align	5
+_ecp_nistz256_sqr_mont:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	movq	0(%rsi),%rax
+	movq	8(%rsi),%r14
+	movq	16(%rsi),%r15
+	movq	24(%rsi),%r8
+
+	call	__ecp_nistz256_sqr_montq
+L$sqr_mont_done:
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ecp_nistz256_sqr_montq:
+	movq	%rax,%r13
+	mulq	%r14
+	movq	%rax,%r9
+	movq	%r15,%rax
+	movq	%rdx,%r10
+
+	mulq	%r13
+	addq	%rax,%r10
+	movq	%r8,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%r13
+	addq	%rax,%r11
+	movq	%r15,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r12
+
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%r8,%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rbp
+
+	mulq	%r14
+	addq	%rax,%r12
+	movq	%r8,%rax
+	adcq	$0,%rdx
+	addq	%rbp,%r12
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+
+	mulq	%r15
+	xorq	%r15,%r15
+	addq	%rax,%r13
+	movq	0(%rsi),%rax
+	movq	%rdx,%r14
+	adcq	$0,%r14
+
+	addq	%r9,%r9
+	adcq	%r10,%r10
+	adcq	%r11,%r11
+	adcq	%r12,%r12
+	adcq	%r13,%r13
+	adcq	%r14,%r14
+	adcq	$0,%r15
+
+	mulq	%rax
+	movq	%rax,%r8
+	movq	8(%rsi),%rax
+	movq	%rdx,%rcx
+
+	mulq	%rax
+	addq	%rcx,%r9
+	adcq	%rax,%r10
+	movq	16(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	%rax
+	addq	%rcx,%r11
+	adcq	%rax,%r12
+	movq	24(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rcx
+
+	mulq	%rax
+	addq	%rcx,%r13
+	adcq	%rax,%r14
+	movq	%r8,%rax
+	adcq	%rdx,%r15
+
+	movq	L$poly+8(%rip),%rsi
+	movq	L$poly+24(%rip),%rbp
+
+
+
+
+	movq	%r8,%rcx
+	shlq	$32,%r8
+	mulq	%rbp
+	shrq	$32,%rcx
+	addq	%r8,%r9
+	adcq	%rcx,%r10
+	adcq	%rax,%r11
+	movq	%r9,%rax
+	adcq	$0,%rdx
+
+
+
+	movq	%r9,%rcx
+	shlq	$32,%r9
+	movq	%rdx,%r8
+	mulq	%rbp
+	shrq	$32,%rcx
+	addq	%r9,%r10
+	adcq	%rcx,%r11
+	adcq	%rax,%r8
+	movq	%r10,%rax
+	adcq	$0,%rdx
+
+
+
+	movq	%r10,%rcx
+	shlq	$32,%r10
+	movq	%rdx,%r9
+	mulq	%rbp
+	shrq	$32,%rcx
+	addq	%r10,%r11
+	adcq	%rcx,%r8
+	adcq	%rax,%r9
+	movq	%r11,%rax
+	adcq	$0,%rdx
+
+
+
+	movq	%r11,%rcx
+	shlq	$32,%r11
+	movq	%rdx,%r10
+	mulq	%rbp
+	shrq	$32,%rcx
+	addq	%r11,%r8
+	adcq	%rcx,%r9
+	adcq	%rax,%r10
+	adcq	$0,%rdx
+	xorq	%r11,%r11
+
+
+
+	addq	%r8,%r12
+	adcq	%r9,%r13
+	movq	%r12,%r8
+	adcq	%r10,%r14
+	adcq	%rdx,%r15
+	movq	%r13,%r9
+	adcq	$0,%r11
+
+	subq	$-1,%r12
+	movq	%r14,%r10
+	sbbq	%rsi,%r13
+	sbbq	$0,%r14
+	movq	%r15,%rcx
+	sbbq	%rbp,%r15
+	sbbq	$0,%r11
+
+	cmovcq	%r8,%r12
+	cmovcq	%r9,%r13
+	movq	%r12,0(%rdi)
+	cmovcq	%r10,%r14
+	movq	%r13,8(%rdi)
+	cmovcq	%rcx,%r15
+	movq	%r14,16(%rdi)
+	movq	%r15,24(%rdi)
+
+	.byte	0xf3,0xc3
+
+
+
+.globl	_ecp_nistz256_select_w5
+.private_extern _ecp_nistz256_select_w5
+
+.p2align	5
+_ecp_nistz256_select_w5:
+	leaq	_OPENSSL_ia32cap_P(%rip),%rax
+	movq	8(%rax),%rax
+	testl	$32,%eax
+	jnz	L$avx2_select_w5
+	movdqa	L$One(%rip),%xmm0
+	movd	%edx,%xmm1
+
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+
+	movdqa	%xmm0,%xmm8
+	pshufd	$0,%xmm1,%xmm1
+
+	movq	$16,%rax
+L$select_loop_sse_w5:
+
+	movdqa	%xmm8,%xmm15
+	paddd	%xmm0,%xmm8
+	pcmpeqd	%xmm1,%xmm15
+
+	movdqa	0(%rsi),%xmm9
+	movdqa	16(%rsi),%xmm10
+	movdqa	32(%rsi),%xmm11
+	movdqa	48(%rsi),%xmm12
+	movdqa	64(%rsi),%xmm13
+	movdqa	80(%rsi),%xmm14
+	leaq	96(%rsi),%rsi
+
+	pand	%xmm15,%xmm9
+	pand	%xmm15,%xmm10
+	por	%xmm9,%xmm2
+	pand	%xmm15,%xmm11
+	por	%xmm10,%xmm3
+	pand	%xmm15,%xmm12
+	por	%xmm11,%xmm4
+	pand	%xmm15,%xmm13
+	por	%xmm12,%xmm5
+	pand	%xmm15,%xmm14
+	por	%xmm13,%xmm6
+	por	%xmm14,%xmm7
+
+	decq	%rax
+	jnz	L$select_loop_sse_w5
+
+	movdqu	%xmm2,0(%rdi)
+	movdqu	%xmm3,16(%rdi)
+	movdqu	%xmm4,32(%rdi)
+	movdqu	%xmm5,48(%rdi)
+	movdqu	%xmm6,64(%rdi)
+	movdqu	%xmm7,80(%rdi)
+	.byte	0xf3,0xc3
+
+
+
+
+.globl	_ecp_nistz256_select_w7
+.private_extern _ecp_nistz256_select_w7
+
+.p2align	5
+_ecp_nistz256_select_w7:
+	leaq	_OPENSSL_ia32cap_P(%rip),%rax
+	movq	8(%rax),%rax
+	testl	$32,%eax
+	jnz	L$avx2_select_w7
+	movdqa	L$One(%rip),%xmm8
+	movd	%edx,%xmm1
+
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+
+	movdqa	%xmm8,%xmm0
+	pshufd	$0,%xmm1,%xmm1
+	movq	$64,%rax
+
+L$select_loop_sse_w7:
+	movdqa	%xmm8,%xmm15
+	paddd	%xmm0,%xmm8
+	movdqa	0(%rsi),%xmm9
+	movdqa	16(%rsi),%xmm10
+	pcmpeqd	%xmm1,%xmm15
+	movdqa	32(%rsi),%xmm11
+	movdqa	48(%rsi),%xmm12
+	leaq	64(%rsi),%rsi
+
+	pand	%xmm15,%xmm9
+	pand	%xmm15,%xmm10
+	por	%xmm9,%xmm2
+	pand	%xmm15,%xmm11
+	por	%xmm10,%xmm3
+	pand	%xmm15,%xmm12
+	por	%xmm11,%xmm4
+	prefetcht0	255(%rsi)
+	por	%xmm12,%xmm5
+
+	decq	%rax
+	jnz	L$select_loop_sse_w7
+
+	movdqu	%xmm2,0(%rdi)
+	movdqu	%xmm3,16(%rdi)
+	movdqu	%xmm4,32(%rdi)
+	movdqu	%xmm5,48(%rdi)
+	.byte	0xf3,0xc3
+
+
+
+
+.p2align	5
+ecp_nistz256_avx2_select_w5:
+L$avx2_select_w5:
+	vzeroupper
+	vmovdqa	L$Two(%rip),%ymm0
+
+	vpxor	%ymm2,%ymm2,%ymm2
+	vpxor	%ymm3,%ymm3,%ymm3
+	vpxor	%ymm4,%ymm4,%ymm4
+
+	vmovdqa	L$One(%rip),%ymm5
+	vmovdqa	L$Two(%rip),%ymm10
+
+	vmovd	%edx,%xmm1
+	vpermd	%ymm1,%ymm2,%ymm1
+
+	movq	$8,%rax
+L$select_loop_avx2_w5:
+
+	vmovdqa	0(%rsi),%ymm6
+	vmovdqa	32(%rsi),%ymm7
+	vmovdqa	64(%rsi),%ymm8
+
+	vmovdqa	96(%rsi),%ymm11
+	vmovdqa	128(%rsi),%ymm12
+	vmovdqa	160(%rsi),%ymm13
+
+	vpcmpeqd	%ymm1,%ymm5,%ymm9
+	vpcmpeqd	%ymm1,%ymm10,%ymm14
+
+	vpaddd	%ymm0,%ymm5,%ymm5
+	vpaddd	%ymm0,%ymm10,%ymm10
+	leaq	192(%rsi),%rsi
+
+	vpand	%ymm9,%ymm6,%ymm6
+	vpand	%ymm9,%ymm7,%ymm7
+	vpand	%ymm9,%ymm8,%ymm8
+	vpand	%ymm14,%ymm11,%ymm11
+	vpand	%ymm14,%ymm12,%ymm12
+	vpand	%ymm14,%ymm13,%ymm13
+
+	vpxor	%ymm6,%ymm2,%ymm2
+	vpxor	%ymm7,%ymm3,%ymm3
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpxor	%ymm11,%ymm2,%ymm2
+	vpxor	%ymm12,%ymm3,%ymm3
+	vpxor	%ymm13,%ymm4,%ymm4
+
+	decq	%rax
+	jnz	L$select_loop_avx2_w5
+
+	vmovdqu	%ymm2,0(%rdi)
+	vmovdqu	%ymm3,32(%rdi)
+	vmovdqu	%ymm4,64(%rdi)
+	vzeroupper
+	.byte	0xf3,0xc3
+
+
+
+
+.globl	_ecp_nistz256_avx2_select_w7
+.private_extern _ecp_nistz256_avx2_select_w7
+
+.p2align	5
+_ecp_nistz256_avx2_select_w7:
+L$avx2_select_w7:
+	vzeroupper
+	vmovdqa	L$Three(%rip),%ymm0
+
+	vpxor	%ymm2,%ymm2,%ymm2
+	vpxor	%ymm3,%ymm3,%ymm3
+
+	vmovdqa	L$One(%rip),%ymm4
+	vmovdqa	L$Two(%rip),%ymm8
+	vmovdqa	L$Three(%rip),%ymm12
+
+	vmovd	%edx,%xmm1
+	vpermd	%ymm1,%ymm2,%ymm1
+
+
+	movq	$21,%rax
+L$select_loop_avx2_w7:
+
+	vmovdqa	0(%rsi),%ymm5
+	vmovdqa	32(%rsi),%ymm6
+
+	vmovdqa	64(%rsi),%ymm9
+	vmovdqa	96(%rsi),%ymm10
+
+	vmovdqa	128(%rsi),%ymm13
+	vmovdqa	160(%rsi),%ymm14
+
+	vpcmpeqd	%ymm1,%ymm4,%ymm7
+	vpcmpeqd	%ymm1,%ymm8,%ymm11
+	vpcmpeqd	%ymm1,%ymm12,%ymm15
+
+	vpaddd	%ymm0,%ymm4,%ymm4
+	vpaddd	%ymm0,%ymm8,%ymm8
+	vpaddd	%ymm0,%ymm12,%ymm12
+	leaq	192(%rsi),%rsi
+
+	vpand	%ymm7,%ymm5,%ymm5
+	vpand	%ymm7,%ymm6,%ymm6
+	vpand	%ymm11,%ymm9,%ymm9
+	vpand	%ymm11,%ymm10,%ymm10
+	vpand	%ymm15,%ymm13,%ymm13
+	vpand	%ymm15,%ymm14,%ymm14
+
+	vpxor	%ymm5,%ymm2,%ymm2
+	vpxor	%ymm6,%ymm3,%ymm3
+	vpxor	%ymm9,%ymm2,%ymm2
+	vpxor	%ymm10,%ymm3,%ymm3
+	vpxor	%ymm13,%ymm2,%ymm2
+	vpxor	%ymm14,%ymm3,%ymm3
+
+	decq	%rax
+	jnz	L$select_loop_avx2_w7
+
+
+	vmovdqa	0(%rsi),%ymm5
+	vmovdqa	32(%rsi),%ymm6
+
+	vpcmpeqd	%ymm1,%ymm4,%ymm7
+
+	vpand	%ymm7,%ymm5,%ymm5
+	vpand	%ymm7,%ymm6,%ymm6
+
+	vpxor	%ymm5,%ymm2,%ymm2
+	vpxor	%ymm6,%ymm3,%ymm3
+
+	vmovdqu	%ymm2,0(%rdi)
+	vmovdqu	%ymm3,32(%rdi)
+	vzeroupper
+	.byte	0xf3,0xc3
+
+
+.p2align	5
+__ecp_nistz256_add_toq:
+	xorq	%r11,%r11
+	addq	0(%rbx),%r12
+	adcq	8(%rbx),%r13
+	movq	%r12,%rax
+	adcq	16(%rbx),%r8
+	adcq	24(%rbx),%r9
+	movq	%r13,%rbp
+	adcq	$0,%r11
+
+	subq	$-1,%r12
+	movq	%r8,%rcx
+	sbbq	%r14,%r13
+	sbbq	$0,%r8
+	movq	%r9,%r10
+	sbbq	%r15,%r9
+	sbbq	$0,%r11
+
+	cmovcq	%rax,%r12
+	cmovcq	%rbp,%r13
+	movq	%r12,0(%rdi)
+	cmovcq	%rcx,%r8
+	movq	%r13,8(%rdi)
+	cmovcq	%r10,%r9
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ecp_nistz256_sub_fromq:
+	subq	0(%rbx),%r12
+	sbbq	8(%rbx),%r13
+	movq	%r12,%rax
+	sbbq	16(%rbx),%r8
+	sbbq	24(%rbx),%r9
+	movq	%r13,%rbp
+	sbbq	%r11,%r11
+
+	addq	$-1,%r12
+	movq	%r8,%rcx
+	adcq	%r14,%r13
+	adcq	$0,%r8
+	movq	%r9,%r10
+	adcq	%r15,%r9
+	testq	%r11,%r11
+
+	cmovzq	%rax,%r12
+	cmovzq	%rbp,%r13
+	movq	%r12,0(%rdi)
+	cmovzq	%rcx,%r8
+	movq	%r13,8(%rdi)
+	cmovzq	%r10,%r9
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ecp_nistz256_subq:
+	subq	%r12,%rax
+	sbbq	%r13,%rbp
+	movq	%rax,%r12
+	sbbq	%r8,%rcx
+	sbbq	%r9,%r10
+	movq	%rbp,%r13
+	sbbq	%r11,%r11
+
+	addq	$-1,%rax
+	movq	%rcx,%r8
+	adcq	%r14,%rbp
+	adcq	$0,%rcx
+	movq	%r10,%r9
+	adcq	%r15,%r10
+	testq	%r11,%r11
+
+	cmovnzq	%rax,%r12
+	cmovnzq	%rbp,%r13
+	cmovnzq	%rcx,%r8
+	cmovnzq	%r10,%r9
+
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+__ecp_nistz256_mul_by_2q:
+	xorq	%r11,%r11
+	addq	%r12,%r12
+	adcq	%r13,%r13
+	movq	%r12,%rax
+	adcq	%r8,%r8
+	adcq	%r9,%r9
+	movq	%r13,%rbp
+	adcq	$0,%r11
+
+	subq	$-1,%r12
+	movq	%r8,%rcx
+	sbbq	%r14,%r13
+	sbbq	$0,%r8
+	movq	%r9,%r10
+	sbbq	%r15,%r9
+	sbbq	$0,%r11
+
+	cmovcq	%rax,%r12
+	cmovcq	%rbp,%r13
+	movq	%r12,0(%rdi)
+	cmovcq	%rcx,%r8
+	movq	%r13,8(%rdi)
+	cmovcq	%r10,%r9
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+
+	.byte	0xf3,0xc3
+
+.globl	_ecp_nistz256_point_double
+.private_extern _ecp_nistz256_point_double
+
+.p2align	5
+_ecp_nistz256_point_double:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$160+8,%rsp
+
+L$point_double_shortcutq:
+	movdqu	0(%rsi),%xmm0
+	movq	%rsi,%rbx
+	movdqu	16(%rsi),%xmm1
+	movq	32+0(%rsi),%r12
+	movq	32+8(%rsi),%r13
+	movq	32+16(%rsi),%r8
+	movq	32+24(%rsi),%r9
+	movq	L$poly+8(%rip),%r14
+	movq	L$poly+24(%rip),%r15
+	movdqa	%xmm0,96(%rsp)
+	movdqa	%xmm1,96+16(%rsp)
+	leaq	32(%rdi),%r10
+	leaq	64(%rdi),%r11
+.byte	102,72,15,110,199
+.byte	102,73,15,110,202
+.byte	102,73,15,110,211
+
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_mul_by_2q
+
+	movq	64+0(%rsi),%rax
+	movq	64+8(%rsi),%r14
+	movq	64+16(%rsi),%r15
+	movq	64+24(%rsi),%r8
+	leaq	64-0(%rsi),%rsi
+	leaq	64(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	0+0(%rsp),%rax
+	movq	8+0(%rsp),%r14
+	leaq	0+0(%rsp),%rsi
+	movq	16+0(%rsp),%r15
+	movq	24+0(%rsp),%r8
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	32(%rbx),%rax
+	movq	64+0(%rbx),%r9
+	movq	64+8(%rbx),%r10
+	movq	64+16(%rbx),%r11
+	movq	64+24(%rbx),%r12
+	leaq	64-0(%rbx),%rsi
+	leaq	32(%rbx),%rbx
+.byte	102,72,15,126,215
+	call	__ecp_nistz256_mul_montq
+	call	__ecp_nistz256_mul_by_2q
+
+	movq	96+0(%rsp),%r12
+	movq	96+8(%rsp),%r13
+	leaq	64(%rsp),%rbx
+	movq	96+16(%rsp),%r8
+	movq	96+24(%rsp),%r9
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_add_toq
+
+	movq	96+0(%rsp),%r12
+	movq	96+8(%rsp),%r13
+	leaq	64(%rsp),%rbx
+	movq	96+16(%rsp),%r8
+	movq	96+24(%rsp),%r9
+	leaq	64(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	movq	0+0(%rsp),%rax
+	movq	8+0(%rsp),%r14
+	leaq	0+0(%rsp),%rsi
+	movq	16+0(%rsp),%r15
+	movq	24+0(%rsp),%r8
+.byte	102,72,15,126,207
+	call	__ecp_nistz256_sqr_montq
+	xorq	%r9,%r9
+	movq	%r12,%rax
+	addq	$-1,%r12
+	movq	%r13,%r10
+	adcq	%rsi,%r13
+	movq	%r14,%rcx
+	adcq	$0,%r14
+	movq	%r15,%r8
+	adcq	%rbp,%r15
+	adcq	$0,%r9
+	xorq	%rsi,%rsi
+	testq	$1,%rax
+
+	cmovzq	%rax,%r12
+	cmovzq	%r10,%r13
+	cmovzq	%rcx,%r14
+	cmovzq	%r8,%r15
+	cmovzq	%rsi,%r9
+
+	movq	%r13,%rax
+	shrq	$1,%r12
+	shlq	$63,%rax
+	movq	%r14,%r10
+	shrq	$1,%r13
+	orq	%rax,%r12
+	shlq	$63,%r10
+	movq	%r15,%rcx
+	shrq	$1,%r14
+	orq	%r10,%r13
+	shlq	$63,%rcx
+	movq	%r12,0(%rdi)
+	shrq	$1,%r15
+	movq	%r13,8(%rdi)
+	shlq	$63,%r9
+	orq	%rcx,%r14
+	orq	%r9,%r15
+	movq	%r14,16(%rdi)
+	movq	%r15,24(%rdi)
+	movq	64(%rsp),%rax
+	leaq	64(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	128(%rsp),%rdi
+	call	__ecp_nistz256_mul_by_2q
+
+	leaq	32(%rsp),%rbx
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_add_toq
+
+	movq	96(%rsp),%rax
+	leaq	96(%rsp),%rbx
+	movq	0+0(%rsp),%r9
+	movq	8+0(%rsp),%r10
+	leaq	0+0(%rsp),%rsi
+	movq	16+0(%rsp),%r11
+	movq	24+0(%rsp),%r12
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	128(%rsp),%rdi
+	call	__ecp_nistz256_mul_by_2q
+
+	movq	0+32(%rsp),%rax
+	movq	8+32(%rsp),%r14
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r15
+	movq	24+32(%rsp),%r8
+.byte	102,72,15,126,199
+	call	__ecp_nistz256_sqr_montq
+
+	leaq	128(%rsp),%rbx
+	movq	%r14,%r8
+	movq	%r15,%r9
+	movq	%rsi,%r14
+	movq	%rbp,%r15
+	call	__ecp_nistz256_sub_fromq
+
+	movq	0+0(%rsp),%rax
+	movq	0+8(%rsp),%rbp
+	movq	0+16(%rsp),%rcx
+	movq	0+24(%rsp),%r10
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_subq
+
+	movq	32(%rsp),%rax
+	leaq	32(%rsp),%rbx
+	movq	%r12,%r14
+	xorl	%ecx,%ecx
+	movq	%r12,0+0(%rsp)
+	movq	%r13,%r10
+	movq	%r13,0+8(%rsp)
+	cmovzq	%r8,%r11
+	movq	%r8,0+16(%rsp)
+	leaq	0-0(%rsp),%rsi
+	cmovzq	%r9,%r12
+	movq	%r9,0+24(%rsp)
+	movq	%r14,%r9
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+.byte	102,72,15,126,203
+.byte	102,72,15,126,207
+	call	__ecp_nistz256_sub_fromq
+
+	addq	$160+8,%rsp
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+	.byte	0xf3,0xc3
+
+.globl	_ecp_nistz256_point_add
+.private_extern _ecp_nistz256_point_add
+
+.p2align	5
+_ecp_nistz256_point_add:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$576+8,%rsp
+
+	movdqu	0(%rsi),%xmm0
+	movdqu	16(%rsi),%xmm1
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm3
+	movdqu	64(%rsi),%xmm4
+	movdqu	80(%rsi),%xmm5
+	movq	%rsi,%rbx
+	movq	%rdx,%rsi
+	movdqa	%xmm0,384(%rsp)
+	movdqa	%xmm1,384+16(%rsp)
+	movdqa	%xmm2,416(%rsp)
+	movdqa	%xmm3,416+16(%rsp)
+	movdqa	%xmm4,448(%rsp)
+	movdqa	%xmm5,448+16(%rsp)
+	por	%xmm4,%xmm5
+
+	movdqu	0(%rsi),%xmm0
+	pshufd	$0xb1,%xmm5,%xmm3
+	movdqu	16(%rsi),%xmm1
+	movdqu	32(%rsi),%xmm2
+	por	%xmm3,%xmm5
+	movdqu	48(%rsi),%xmm3
+	movq	64+0(%rsi),%rax
+	movq	64+8(%rsi),%r14
+	movq	64+16(%rsi),%r15
+	movq	64+24(%rsi),%r8
+	movdqa	%xmm0,480(%rsp)
+	pshufd	$0x1e,%xmm5,%xmm4
+	movdqa	%xmm1,480+16(%rsp)
+	movdqu	64(%rsi),%xmm0
+	movdqu	80(%rsi),%xmm1
+	movdqa	%xmm2,512(%rsp)
+	movdqa	%xmm3,512+16(%rsp)
+	por	%xmm4,%xmm5
+	pxor	%xmm4,%xmm4
+	por	%xmm0,%xmm1
+.byte	102,72,15,110,199
+
+	leaq	64-0(%rsi),%rsi
+	movq	%rax,544+0(%rsp)
+	movq	%r14,544+8(%rsp)
+	movq	%r15,544+16(%rsp)
+	movq	%r8,544+24(%rsp)
+	leaq	96(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	pcmpeqd	%xmm4,%xmm5
+	pshufd	$0xb1,%xmm1,%xmm4
+	por	%xmm1,%xmm4
+	pshufd	$0,%xmm5,%xmm5
+	pshufd	$0x1e,%xmm4,%xmm3
+	por	%xmm3,%xmm4
+	pxor	%xmm3,%xmm3
+	pcmpeqd	%xmm3,%xmm4
+	pshufd	$0,%xmm4,%xmm4
+	movq	64+0(%rbx),%rax
+	movq	64+8(%rbx),%r14
+	movq	64+16(%rbx),%r15
+	movq	64+24(%rbx),%r8
+.byte	102,72,15,110,203
+
+	leaq	64-0(%rbx),%rsi
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	544(%rsp),%rax
+	leaq	544(%rsp),%rbx
+	movq	0+96(%rsp),%r9
+	movq	8+96(%rsp),%r10
+	leaq	0+96(%rsp),%rsi
+	movq	16+96(%rsp),%r11
+	movq	24+96(%rsp),%r12
+	leaq	224(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	448(%rsp),%rax
+	leaq	448(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	256(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	416(%rsp),%rax
+	leaq	416(%rsp),%rbx
+	movq	0+224(%rsp),%r9
+	movq	8+224(%rsp),%r10
+	leaq	0+224(%rsp),%rsi
+	movq	16+224(%rsp),%r11
+	movq	24+224(%rsp),%r12
+	leaq	224(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	512(%rsp),%rax
+	leaq	512(%rsp),%rbx
+	movq	0+256(%rsp),%r9
+	movq	8+256(%rsp),%r10
+	leaq	0+256(%rsp),%rsi
+	movq	16+256(%rsp),%r11
+	movq	24+256(%rsp),%r12
+	leaq	256(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	224(%rsp),%rbx
+	leaq	64(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	orq	%r13,%r12
+	movdqa	%xmm4,%xmm2
+	orq	%r8,%r12
+	orq	%r9,%r12
+	por	%xmm5,%xmm2
+.byte	102,73,15,110,220
+
+	movq	384(%rsp),%rax
+	leaq	384(%rsp),%rbx
+	movq	0+96(%rsp),%r9
+	movq	8+96(%rsp),%r10
+	leaq	0+96(%rsp),%rsi
+	movq	16+96(%rsp),%r11
+	movq	24+96(%rsp),%r12
+	leaq	160(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	480(%rsp),%rax
+	leaq	480(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	192(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	160(%rsp),%rbx
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	orq	%r13,%r12
+	orq	%r8,%r12
+	orq	%r9,%r12
+
+.byte	0x3e
+	jnz	L$add_proceedq
+.byte	102,73,15,126,208
+.byte	102,73,15,126,217
+	testq	%r8,%r8
+	jnz	L$add_proceedq
+	testq	%r9,%r9
+	jz	L$add_doubleq
+
+.byte	102,72,15,126,199
+	pxor	%xmm0,%xmm0
+	movdqu	%xmm0,0(%rdi)
+	movdqu	%xmm0,16(%rdi)
+	movdqu	%xmm0,32(%rdi)
+	movdqu	%xmm0,48(%rdi)
+	movdqu	%xmm0,64(%rdi)
+	movdqu	%xmm0,80(%rdi)
+	jmp	L$add_doneq
+
+.p2align	5
+L$add_doubleq:
+.byte	102,72,15,126,206
+.byte	102,72,15,126,199
+	addq	$416,%rsp
+	jmp	L$point_double_shortcutq
+
+.p2align	5
+L$add_proceedq:
+	movq	0+64(%rsp),%rax
+	movq	8+64(%rsp),%r14
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r15
+	movq	24+64(%rsp),%r8
+	leaq	96(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	448(%rsp),%rax
+	leaq	448(%rsp),%rbx
+	movq	0+0(%rsp),%r9
+	movq	8+0(%rsp),%r10
+	leaq	0+0(%rsp),%rsi
+	movq	16+0(%rsp),%r11
+	movq	24+0(%rsp),%r12
+	leaq	352(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	0+0(%rsp),%rax
+	movq	8+0(%rsp),%r14
+	leaq	0+0(%rsp),%rsi
+	movq	16+0(%rsp),%r15
+	movq	24+0(%rsp),%r8
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	544(%rsp),%rax
+	leaq	544(%rsp),%rbx
+	movq	0+352(%rsp),%r9
+	movq	8+352(%rsp),%r10
+	leaq	0+352(%rsp),%rsi
+	movq	16+352(%rsp),%r11
+	movq	24+352(%rsp),%r12
+	leaq	352(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	0(%rsp),%rax
+	leaq	0(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	128(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	160(%rsp),%rax
+	leaq	160(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	192(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+
+
+
+	xorq	%r11,%r11
+	addq	%r12,%r12
+	leaq	96(%rsp),%rsi
+	adcq	%r13,%r13
+	movq	%r12,%rax
+	adcq	%r8,%r8
+	adcq	%r9,%r9
+	movq	%r13,%rbp
+	adcq	$0,%r11
+
+	subq	$-1,%r12
+	movq	%r8,%rcx
+	sbbq	%r14,%r13
+	sbbq	$0,%r8
+	movq	%r9,%r10
+	sbbq	%r15,%r9
+	sbbq	$0,%r11
+
+	cmovcq	%rax,%r12
+	movq	0(%rsi),%rax
+	cmovcq	%rbp,%r13
+	movq	8(%rsi),%rbp
+	cmovcq	%rcx,%r8
+	movq	16(%rsi),%rcx
+	cmovcq	%r10,%r9
+	movq	24(%rsi),%r10
+
+	call	__ecp_nistz256_subq
+
+	leaq	128(%rsp),%rbx
+	leaq	288(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	movq	192+0(%rsp),%rax
+	movq	192+8(%rsp),%rbp
+	movq	192+16(%rsp),%rcx
+	movq	192+24(%rsp),%r10
+	leaq	320(%rsp),%rdi
+
+	call	__ecp_nistz256_subq
+
+	movq	%r12,0(%rdi)
+	movq	%r13,8(%rdi)
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+	movq	128(%rsp),%rax
+	leaq	128(%rsp),%rbx
+	movq	0+224(%rsp),%r9
+	movq	8+224(%rsp),%r10
+	leaq	0+224(%rsp),%rsi
+	movq	16+224(%rsp),%r11
+	movq	24+224(%rsp),%r12
+	leaq	256(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	320(%rsp),%rax
+	leaq	320(%rsp),%rbx
+	movq	0+64(%rsp),%r9
+	movq	8+64(%rsp),%r10
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r11
+	movq	24+64(%rsp),%r12
+	leaq	320(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	256(%rsp),%rbx
+	leaq	320(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+.byte	102,72,15,126,199
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	352(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	352+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	544(%rsp),%xmm2
+	pand	544+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	448(%rsp),%xmm2
+	pand	448+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,64(%rdi)
+	movdqu	%xmm3,80(%rdi)
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	288(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	288+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	480(%rsp),%xmm2
+	pand	480+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	384(%rsp),%xmm2
+	pand	384+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,0(%rdi)
+	movdqu	%xmm3,16(%rdi)
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	320(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	320+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	512(%rsp),%xmm2
+	pand	512+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	416(%rsp),%xmm2
+	pand	416+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm3,48(%rdi)
+
+L$add_doneq:
+	addq	$576+8,%rsp
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+	.byte	0xf3,0xc3
+
+.globl	_ecp_nistz256_point_add_affine
+.private_extern _ecp_nistz256_point_add_affine
+
+.p2align	5
+_ecp_nistz256_point_add_affine:
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	subq	$480+8,%rsp
+
+	movdqu	0(%rsi),%xmm0
+	movq	%rdx,%rbx
+	movdqu	16(%rsi),%xmm1
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm3
+	movdqu	64(%rsi),%xmm4
+	movdqu	80(%rsi),%xmm5
+	movq	64+0(%rsi),%rax
+	movq	64+8(%rsi),%r14
+	movq	64+16(%rsi),%r15
+	movq	64+24(%rsi),%r8
+	movdqa	%xmm0,320(%rsp)
+	movdqa	%xmm1,320+16(%rsp)
+	movdqa	%xmm2,352(%rsp)
+	movdqa	%xmm3,352+16(%rsp)
+	movdqa	%xmm4,384(%rsp)
+	movdqa	%xmm5,384+16(%rsp)
+	por	%xmm4,%xmm5
+
+	movdqu	0(%rbx),%xmm0
+	pshufd	$0xb1,%xmm5,%xmm3
+	movdqu	16(%rbx),%xmm1
+	movdqu	32(%rbx),%xmm2
+	por	%xmm3,%xmm5
+	movdqu	48(%rbx),%xmm3
+	movdqa	%xmm0,416(%rsp)
+	pshufd	$0x1e,%xmm5,%xmm4
+	movdqa	%xmm1,416+16(%rsp)
+	por	%xmm0,%xmm1
+.byte	102,72,15,110,199
+	movdqa	%xmm2,448(%rsp)
+	movdqa	%xmm3,448+16(%rsp)
+	por	%xmm2,%xmm3
+	por	%xmm4,%xmm5
+	pxor	%xmm4,%xmm4
+	por	%xmm1,%xmm3
+
+	leaq	64-0(%rsi),%rsi
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	pcmpeqd	%xmm4,%xmm5
+	pshufd	$0xb1,%xmm3,%xmm4
+	movq	0(%rbx),%rax
+
+	movq	%r12,%r9
+	por	%xmm3,%xmm4
+	pshufd	$0,%xmm5,%xmm5
+	pshufd	$0x1e,%xmm4,%xmm3
+	movq	%r13,%r10
+	por	%xmm3,%xmm4
+	pxor	%xmm3,%xmm3
+	movq	%r14,%r11
+	pcmpeqd	%xmm3,%xmm4
+	pshufd	$0,%xmm4,%xmm4
+
+	leaq	32-0(%rsp),%rsi
+	movq	%r15,%r12
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	320(%rsp),%rbx
+	leaq	64(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	movq	384(%rsp),%rax
+	leaq	384(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	384(%rsp),%rax
+	leaq	384(%rsp),%rbx
+	movq	0+64(%rsp),%r9
+	movq	8+64(%rsp),%r10
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r11
+	movq	24+64(%rsp),%r12
+	leaq	288(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	448(%rsp),%rax
+	leaq	448(%rsp),%rbx
+	movq	0+32(%rsp),%r9
+	movq	8+32(%rsp),%r10
+	leaq	0+32(%rsp),%rsi
+	movq	16+32(%rsp),%r11
+	movq	24+32(%rsp),%r12
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	352(%rsp),%rbx
+	leaq	96(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	movq	0+64(%rsp),%rax
+	movq	8+64(%rsp),%r14
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r15
+	movq	24+64(%rsp),%r8
+	leaq	128(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	0+96(%rsp),%rax
+	movq	8+96(%rsp),%r14
+	leaq	0+96(%rsp),%rsi
+	movq	16+96(%rsp),%r15
+	movq	24+96(%rsp),%r8
+	leaq	192(%rsp),%rdi
+	call	__ecp_nistz256_sqr_montq
+
+	movq	128(%rsp),%rax
+	leaq	128(%rsp),%rbx
+	movq	0+64(%rsp),%r9
+	movq	8+64(%rsp),%r10
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r11
+	movq	24+64(%rsp),%r12
+	leaq	160(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	320(%rsp),%rax
+	leaq	320(%rsp),%rbx
+	movq	0+128(%rsp),%r9
+	movq	8+128(%rsp),%r10
+	leaq	0+128(%rsp),%rsi
+	movq	16+128(%rsp),%r11
+	movq	24+128(%rsp),%r12
+	leaq	0(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+
+
+
+	xorq	%r11,%r11
+	addq	%r12,%r12
+	leaq	192(%rsp),%rsi
+	adcq	%r13,%r13
+	movq	%r12,%rax
+	adcq	%r8,%r8
+	adcq	%r9,%r9
+	movq	%r13,%rbp
+	adcq	$0,%r11
+
+	subq	$-1,%r12
+	movq	%r8,%rcx
+	sbbq	%r14,%r13
+	sbbq	$0,%r8
+	movq	%r9,%r10
+	sbbq	%r15,%r9
+	sbbq	$0,%r11
+
+	cmovcq	%rax,%r12
+	movq	0(%rsi),%rax
+	cmovcq	%rbp,%r13
+	movq	8(%rsi),%rbp
+	cmovcq	%rcx,%r8
+	movq	16(%rsi),%rcx
+	cmovcq	%r10,%r9
+	movq	24(%rsi),%r10
+
+	call	__ecp_nistz256_subq
+
+	leaq	160(%rsp),%rbx
+	leaq	224(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+	movq	0+0(%rsp),%rax
+	movq	0+8(%rsp),%rbp
+	movq	0+16(%rsp),%rcx
+	movq	0+24(%rsp),%r10
+	leaq	64(%rsp),%rdi
+
+	call	__ecp_nistz256_subq
+
+	movq	%r12,0(%rdi)
+	movq	%r13,8(%rdi)
+	movq	%r8,16(%rdi)
+	movq	%r9,24(%rdi)
+	movq	352(%rsp),%rax
+	leaq	352(%rsp),%rbx
+	movq	0+160(%rsp),%r9
+	movq	8+160(%rsp),%r10
+	leaq	0+160(%rsp),%rsi
+	movq	16+160(%rsp),%r11
+	movq	24+160(%rsp),%r12
+	leaq	32(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	movq	96(%rsp),%rax
+	leaq	96(%rsp),%rbx
+	movq	0+64(%rsp),%r9
+	movq	8+64(%rsp),%r10
+	leaq	0+64(%rsp),%rsi
+	movq	16+64(%rsp),%r11
+	movq	24+64(%rsp),%r12
+	leaq	64(%rsp),%rdi
+	call	__ecp_nistz256_mul_montq
+
+	leaq	32(%rsp),%rbx
+	leaq	256(%rsp),%rdi
+	call	__ecp_nistz256_sub_fromq
+
+.byte	102,72,15,126,199
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	288(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	288+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	L$ONE_mont(%rip),%xmm2
+	pand	L$ONE_mont+16(%rip),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	384(%rsp),%xmm2
+	pand	384+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,64(%rdi)
+	movdqu	%xmm3,80(%rdi)
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	224(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	224+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	416(%rsp),%xmm2
+	pand	416+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	320(%rsp),%xmm2
+	pand	320+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,0(%rdi)
+	movdqu	%xmm3,16(%rdi)
+
+	movdqa	%xmm5,%xmm0
+	movdqa	%xmm5,%xmm1
+	pandn	256(%rsp),%xmm0
+	movdqa	%xmm5,%xmm2
+	pandn	256+16(%rsp),%xmm1
+	movdqa	%xmm5,%xmm3
+	pand	448(%rsp),%xmm2
+	pand	448+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+
+	movdqa	%xmm4,%xmm0
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm2,%xmm0
+	movdqa	%xmm4,%xmm2
+	pandn	%xmm3,%xmm1
+	movdqa	%xmm4,%xmm3
+	pand	352(%rsp),%xmm2
+	pand	352+16(%rsp),%xmm3
+	por	%xmm0,%xmm2
+	por	%xmm1,%xmm3
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm3,48(%rdi)
+
+	addq	$480+8,%rsp
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+	.byte	0xf3,0xc3
+
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S
new file mode 100644
index 0000000..b259286
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S
@@ -0,0 +1,48 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+
+
+.globl	_CRYPTO_rdrand
+.private_extern _CRYPTO_rdrand
+
+.p2align	4
+_CRYPTO_rdrand:
+	xorq	%rax,%rax
+
+
+.byte	0x48, 0x0f, 0xc7, 0xf1
+
+	adcq	%rax,%rax
+	movq	%rcx,0(%rdi)
+	.byte	0xf3,0xc3
+
+
+
+
+
+.globl	_CRYPTO_rdrand_multiple8_buf
+.private_extern _CRYPTO_rdrand_multiple8_buf
+
+.p2align	4
+_CRYPTO_rdrand_multiple8_buf:
+	testq	%rsi,%rsi
+	jz	L$out
+	movq	$8,%rdx
+L$loop:
+
+
+.byte	0x48, 0x0f, 0xc7, 0xf1
+	jnc	L$err
+	movq	%rcx,0(%rdi)
+	addq	%rdx,%rdi
+	subq	%rdx,%rsi
+	jnz	L$loop
+L$out:
+	movq	$1,%rax
+	.byte	0xf3,0xc3
+L$err:
+	xorq	%rax,%rax
+	.byte	0xf3,0xc3
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/rsaz-avx2.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/rsaz-avx2.S
new file mode 100644
index 0000000..6dd50af
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/rsaz-avx2.S
@@ -0,0 +1,1743 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+.globl	_rsaz_1024_sqr_avx2
+.private_extern _rsaz_1024_sqr_avx2
+
+.p2align	6
+_rsaz_1024_sqr_avx2:
+
+	leaq	(%rsp),%rax
+
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+	vzeroupper
+	movq	%rax,%rbp
+
+	movq	%rdx,%r13
+	subq	$832,%rsp
+	movq	%r13,%r15
+	subq	$-128,%rdi
+	subq	$-128,%rsi
+	subq	$-128,%r13
+
+	andq	$4095,%r15
+	addq	$320,%r15
+	shrq	$12,%r15
+	vpxor	%ymm9,%ymm9,%ymm9
+	jz	L$sqr_1024_no_n_copy
+
+
+
+
+
+	subq	$320,%rsp
+	vmovdqu	0-128(%r13),%ymm0
+	andq	$-2048,%rsp
+	vmovdqu	32-128(%r13),%ymm1
+	vmovdqu	64-128(%r13),%ymm2
+	vmovdqu	96-128(%r13),%ymm3
+	vmovdqu	128-128(%r13),%ymm4
+	vmovdqu	160-128(%r13),%ymm5
+	vmovdqu	192-128(%r13),%ymm6
+	vmovdqu	224-128(%r13),%ymm7
+	vmovdqu	256-128(%r13),%ymm8
+	leaq	832+128(%rsp),%r13
+	vmovdqu	%ymm0,0-128(%r13)
+	vmovdqu	%ymm1,32-128(%r13)
+	vmovdqu	%ymm2,64-128(%r13)
+	vmovdqu	%ymm3,96-128(%r13)
+	vmovdqu	%ymm4,128-128(%r13)
+	vmovdqu	%ymm5,160-128(%r13)
+	vmovdqu	%ymm6,192-128(%r13)
+	vmovdqu	%ymm7,224-128(%r13)
+	vmovdqu	%ymm8,256-128(%r13)
+	vmovdqu	%ymm9,288-128(%r13)
+
+L$sqr_1024_no_n_copy:
+	andq	$-1024,%rsp
+
+	vmovdqu	32-128(%rsi),%ymm1
+	vmovdqu	64-128(%rsi),%ymm2
+	vmovdqu	96-128(%rsi),%ymm3
+	vmovdqu	128-128(%rsi),%ymm4
+	vmovdqu	160-128(%rsi),%ymm5
+	vmovdqu	192-128(%rsi),%ymm6
+	vmovdqu	224-128(%rsi),%ymm7
+	vmovdqu	256-128(%rsi),%ymm8
+
+	leaq	192(%rsp),%rbx
+	vmovdqu	L$and_mask(%rip),%ymm15
+	jmp	L$OOP_GRANDE_SQR_1024
+
+.p2align	5
+L$OOP_GRANDE_SQR_1024:
+	leaq	576+128(%rsp),%r9
+	leaq	448(%rsp),%r12
+
+
+
+
+	vpaddq	%ymm1,%ymm1,%ymm1
+	vpbroadcastq	0-128(%rsi),%ymm10
+	vpaddq	%ymm2,%ymm2,%ymm2
+	vmovdqa	%ymm1,0-128(%r9)
+	vpaddq	%ymm3,%ymm3,%ymm3
+	vmovdqa	%ymm2,32-128(%r9)
+	vpaddq	%ymm4,%ymm4,%ymm4
+	vmovdqa	%ymm3,64-128(%r9)
+	vpaddq	%ymm5,%ymm5,%ymm5
+	vmovdqa	%ymm4,96-128(%r9)
+	vpaddq	%ymm6,%ymm6,%ymm6
+	vmovdqa	%ymm5,128-128(%r9)
+	vpaddq	%ymm7,%ymm7,%ymm7
+	vmovdqa	%ymm6,160-128(%r9)
+	vpaddq	%ymm8,%ymm8,%ymm8
+	vmovdqa	%ymm7,192-128(%r9)
+	vpxor	%ymm9,%ymm9,%ymm9
+	vmovdqa	%ymm8,224-128(%r9)
+
+	vpmuludq	0-128(%rsi),%ymm10,%ymm0
+	vpbroadcastq	32-128(%rsi),%ymm11
+	vmovdqu	%ymm9,288-192(%rbx)
+	vpmuludq	%ymm10,%ymm1,%ymm1
+	vmovdqu	%ymm9,320-448(%r12)
+	vpmuludq	%ymm10,%ymm2,%ymm2
+	vmovdqu	%ymm9,352-448(%r12)
+	vpmuludq	%ymm10,%ymm3,%ymm3
+	vmovdqu	%ymm9,384-448(%r12)
+	vpmuludq	%ymm10,%ymm4,%ymm4
+	vmovdqu	%ymm9,416-448(%r12)
+	vpmuludq	%ymm10,%ymm5,%ymm5
+	vmovdqu	%ymm9,448-448(%r12)
+	vpmuludq	%ymm10,%ymm6,%ymm6
+	vmovdqu	%ymm9,480-448(%r12)
+	vpmuludq	%ymm10,%ymm7,%ymm7
+	vmovdqu	%ymm9,512-448(%r12)
+	vpmuludq	%ymm10,%ymm8,%ymm8
+	vpbroadcastq	64-128(%rsi),%ymm10
+	vmovdqu	%ymm9,544-448(%r12)
+
+	movq	%rsi,%r15
+	movl	$4,%r14d
+	jmp	L$sqr_entry_1024
+.p2align	5
+L$OOP_SQR_1024:
+	vpbroadcastq	32-128(%r15),%ymm11
+	vpmuludq	0-128(%rsi),%ymm10,%ymm0
+	vpaddq	0-192(%rbx),%ymm0,%ymm0
+	vpmuludq	0-128(%r9),%ymm10,%ymm1
+	vpaddq	32-192(%rbx),%ymm1,%ymm1
+	vpmuludq	32-128(%r9),%ymm10,%ymm2
+	vpaddq	64-192(%rbx),%ymm2,%ymm2
+	vpmuludq	64-128(%r9),%ymm10,%ymm3
+	vpaddq	96-192(%rbx),%ymm3,%ymm3
+	vpmuludq	96-128(%r9),%ymm10,%ymm4
+	vpaddq	128-192(%rbx),%ymm4,%ymm4
+	vpmuludq	128-128(%r9),%ymm10,%ymm5
+	vpaddq	160-192(%rbx),%ymm5,%ymm5
+	vpmuludq	160-128(%r9),%ymm10,%ymm6
+	vpaddq	192-192(%rbx),%ymm6,%ymm6
+	vpmuludq	192-128(%r9),%ymm10,%ymm7
+	vpaddq	224-192(%rbx),%ymm7,%ymm7
+	vpmuludq	224-128(%r9),%ymm10,%ymm8
+	vpbroadcastq	64-128(%r15),%ymm10
+	vpaddq	256-192(%rbx),%ymm8,%ymm8
+L$sqr_entry_1024:
+	vmovdqu	%ymm0,0-192(%rbx)
+	vmovdqu	%ymm1,32-192(%rbx)
+
+	vpmuludq	32-128(%rsi),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	32-128(%r9),%ymm11,%ymm14
+	vpaddq	%ymm14,%ymm3,%ymm3
+	vpmuludq	64-128(%r9),%ymm11,%ymm13
+	vpaddq	%ymm13,%ymm4,%ymm4
+	vpmuludq	96-128(%r9),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	128-128(%r9),%ymm11,%ymm14
+	vpaddq	%ymm14,%ymm6,%ymm6
+	vpmuludq	160-128(%r9),%ymm11,%ymm13
+	vpaddq	%ymm13,%ymm7,%ymm7
+	vpmuludq	192-128(%r9),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	224-128(%r9),%ymm11,%ymm0
+	vpbroadcastq	96-128(%r15),%ymm11
+	vpaddq	288-192(%rbx),%ymm0,%ymm0
+
+	vmovdqu	%ymm2,64-192(%rbx)
+	vmovdqu	%ymm3,96-192(%rbx)
+
+	vpmuludq	64-128(%rsi),%ymm10,%ymm13
+	vpaddq	%ymm13,%ymm4,%ymm4
+	vpmuludq	64-128(%r9),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	96-128(%r9),%ymm10,%ymm14
+	vpaddq	%ymm14,%ymm6,%ymm6
+	vpmuludq	128-128(%r9),%ymm10,%ymm13
+	vpaddq	%ymm13,%ymm7,%ymm7
+	vpmuludq	160-128(%r9),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	192-128(%r9),%ymm10,%ymm14
+	vpaddq	%ymm14,%ymm0,%ymm0
+	vpmuludq	224-128(%r9),%ymm10,%ymm1
+	vpbroadcastq	128-128(%r15),%ymm10
+	vpaddq	320-448(%r12),%ymm1,%ymm1
+
+	vmovdqu	%ymm4,128-192(%rbx)
+	vmovdqu	%ymm5,160-192(%rbx)
+
+	vpmuludq	96-128(%rsi),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm6,%ymm6
+	vpmuludq	96-128(%r9),%ymm11,%ymm14
+	vpaddq	%ymm14,%ymm7,%ymm7
+	vpmuludq	128-128(%r9),%ymm11,%ymm13
+	vpaddq	%ymm13,%ymm8,%ymm8
+	vpmuludq	160-128(%r9),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm0,%ymm0
+	vpmuludq	192-128(%r9),%ymm11,%ymm14
+	vpaddq	%ymm14,%ymm1,%ymm1
+	vpmuludq	224-128(%r9),%ymm11,%ymm2
+	vpbroadcastq	160-128(%r15),%ymm11
+	vpaddq	352-448(%r12),%ymm2,%ymm2
+
+	vmovdqu	%ymm6,192-192(%rbx)
+	vmovdqu	%ymm7,224-192(%rbx)
+
+	vpmuludq	128-128(%rsi),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	128-128(%r9),%ymm10,%ymm14
+	vpaddq	%ymm14,%ymm0,%ymm0
+	vpmuludq	160-128(%r9),%ymm10,%ymm13
+	vpaddq	%ymm13,%ymm1,%ymm1
+	vpmuludq	192-128(%r9),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	224-128(%r9),%ymm10,%ymm3
+	vpbroadcastq	192-128(%r15),%ymm10
+	vpaddq	384-448(%r12),%ymm3,%ymm3
+
+	vmovdqu	%ymm8,256-192(%rbx)
+	vmovdqu	%ymm0,288-192(%rbx)
+	leaq	8(%rbx),%rbx
+
+	vpmuludq	160-128(%rsi),%ymm11,%ymm13
+	vpaddq	%ymm13,%ymm1,%ymm1
+	vpmuludq	160-128(%r9),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	192-128(%r9),%ymm11,%ymm14
+	vpaddq	%ymm14,%ymm3,%ymm3
+	vpmuludq	224-128(%r9),%ymm11,%ymm4
+	vpbroadcastq	224-128(%r15),%ymm11
+	vpaddq	416-448(%r12),%ymm4,%ymm4
+
+	vmovdqu	%ymm1,320-448(%r12)
+	vmovdqu	%ymm2,352-448(%r12)
+
+	vpmuludq	192-128(%rsi),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm3,%ymm3
+	vpmuludq	192-128(%r9),%ymm10,%ymm14
+	vpbroadcastq	256-128(%r15),%ymm0
+	vpaddq	%ymm14,%ymm4,%ymm4
+	vpmuludq	224-128(%r9),%ymm10,%ymm5
+	vpbroadcastq	0+8-128(%r15),%ymm10
+	vpaddq	448-448(%r12),%ymm5,%ymm5
+
+	vmovdqu	%ymm3,384-448(%r12)
+	vmovdqu	%ymm4,416-448(%r12)
+	leaq	8(%r15),%r15
+
+	vpmuludq	224-128(%rsi),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	224-128(%r9),%ymm11,%ymm6
+	vpaddq	480-448(%r12),%ymm6,%ymm6
+
+	vpmuludq	256-128(%rsi),%ymm0,%ymm7
+	vmovdqu	%ymm5,448-448(%r12)
+	vpaddq	512-448(%r12),%ymm7,%ymm7
+	vmovdqu	%ymm6,480-448(%r12)
+	vmovdqu	%ymm7,512-448(%r12)
+	leaq	8(%r12),%r12
+
+	decl	%r14d
+	jnz	L$OOP_SQR_1024
+
+	vmovdqu	256(%rsp),%ymm8
+	vmovdqu	288(%rsp),%ymm1
+	vmovdqu	320(%rsp),%ymm2
+	leaq	192(%rsp),%rbx
+
+	vpsrlq	$29,%ymm8,%ymm14
+	vpand	%ymm15,%ymm8,%ymm8
+	vpsrlq	$29,%ymm1,%ymm11
+	vpand	%ymm15,%ymm1,%ymm1
+
+	vpermq	$0x93,%ymm14,%ymm14
+	vpxor	%ymm9,%ymm9,%ymm9
+	vpermq	$0x93,%ymm11,%ymm11
+
+	vpblendd	$3,%ymm9,%ymm14,%ymm10
+	vpblendd	$3,%ymm14,%ymm11,%ymm14
+	vpaddq	%ymm10,%ymm8,%ymm8
+	vpblendd	$3,%ymm11,%ymm9,%ymm11
+	vpaddq	%ymm14,%ymm1,%ymm1
+	vpaddq	%ymm11,%ymm2,%ymm2
+	vmovdqu	%ymm1,288-192(%rbx)
+	vmovdqu	%ymm2,320-192(%rbx)
+
+	movq	(%rsp),%rax
+	movq	8(%rsp),%r10
+	movq	16(%rsp),%r11
+	movq	24(%rsp),%r12
+	vmovdqu	32(%rsp),%ymm1
+	vmovdqu	64-192(%rbx),%ymm2
+	vmovdqu	96-192(%rbx),%ymm3
+	vmovdqu	128-192(%rbx),%ymm4
+	vmovdqu	160-192(%rbx),%ymm5
+	vmovdqu	192-192(%rbx),%ymm6
+	vmovdqu	224-192(%rbx),%ymm7
+
+	movq	%rax,%r9
+	imull	%ecx,%eax
+	andl	$0x1fffffff,%eax
+	vmovd	%eax,%xmm12
+
+	movq	%rax,%rdx
+	imulq	-128(%r13),%rax
+	vpbroadcastq	%xmm12,%ymm12
+	addq	%rax,%r9
+	movq	%rdx,%rax
+	imulq	8-128(%r13),%rax
+	shrq	$29,%r9
+	addq	%rax,%r10
+	movq	%rdx,%rax
+	imulq	16-128(%r13),%rax
+	addq	%r9,%r10
+	addq	%rax,%r11
+	imulq	24-128(%r13),%rdx
+	addq	%rdx,%r12
+
+	movq	%r10,%rax
+	imull	%ecx,%eax
+	andl	$0x1fffffff,%eax
+
+	movl	$9,%r14d
+	jmp	L$OOP_REDUCE_1024
+
+.p2align	5
+L$OOP_REDUCE_1024:
+	vmovd	%eax,%xmm13
+	vpbroadcastq	%xmm13,%ymm13
+
+	vpmuludq	32-128(%r13),%ymm12,%ymm10
+	movq	%rax,%rdx
+	imulq	-128(%r13),%rax
+	vpaddq	%ymm10,%ymm1,%ymm1
+	addq	%rax,%r10
+	vpmuludq	64-128(%r13),%ymm12,%ymm14
+	movq	%rdx,%rax
+	imulq	8-128(%r13),%rax
+	vpaddq	%ymm14,%ymm2,%ymm2
+	vpmuludq	96-128(%r13),%ymm12,%ymm11
+.byte	0x67
+	addq	%rax,%r11
+.byte	0x67
+	movq	%rdx,%rax
+	imulq	16-128(%r13),%rax
+	shrq	$29,%r10
+	vpaddq	%ymm11,%ymm3,%ymm3
+	vpmuludq	128-128(%r13),%ymm12,%ymm10
+	addq	%rax,%r12
+	addq	%r10,%r11
+	vpaddq	%ymm10,%ymm4,%ymm4
+	vpmuludq	160-128(%r13),%ymm12,%ymm14
+	movq	%r11,%rax
+	imull	%ecx,%eax
+	vpaddq	%ymm14,%ymm5,%ymm5
+	vpmuludq	192-128(%r13),%ymm12,%ymm11
+	andl	$0x1fffffff,%eax
+	vpaddq	%ymm11,%ymm6,%ymm6
+	vpmuludq	224-128(%r13),%ymm12,%ymm10
+	vpaddq	%ymm10,%ymm7,%ymm7
+	vpmuludq	256-128(%r13),%ymm12,%ymm14
+	vmovd	%eax,%xmm12
+
+	vpaddq	%ymm14,%ymm8,%ymm8
+
+	vpbroadcastq	%xmm12,%ymm12
+
+	vpmuludq	32-8-128(%r13),%ymm13,%ymm11
+	vmovdqu	96-8-128(%r13),%ymm14
+	movq	%rax,%rdx
+	imulq	-128(%r13),%rax
+	vpaddq	%ymm11,%ymm1,%ymm1
+	vpmuludq	64-8-128(%r13),%ymm13,%ymm10
+	vmovdqu	128-8-128(%r13),%ymm11
+	addq	%rax,%r11
+	movq	%rdx,%rax
+	imulq	8-128(%r13),%rax
+	vpaddq	%ymm10,%ymm2,%ymm2
+	addq	%r12,%rax
+	shrq	$29,%r11
+	vpmuludq	%ymm13,%ymm14,%ymm14
+	vmovdqu	160-8-128(%r13),%ymm10
+	addq	%r11,%rax
+	vpaddq	%ymm14,%ymm3,%ymm3
+	vpmuludq	%ymm13,%ymm11,%ymm11
+	vmovdqu	192-8-128(%r13),%ymm14
+.byte	0x67
+	movq	%rax,%r12
+	imull	%ecx,%eax
+	vpaddq	%ymm11,%ymm4,%ymm4
+	vpmuludq	%ymm13,%ymm10,%ymm10
+.byte	0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00
+	andl	$0x1fffffff,%eax
+	vpaddq	%ymm10,%ymm5,%ymm5
+	vpmuludq	%ymm13,%ymm14,%ymm14
+	vmovdqu	256-8-128(%r13),%ymm10
+	vpaddq	%ymm14,%ymm6,%ymm6
+	vpmuludq	%ymm13,%ymm11,%ymm11
+	vmovdqu	288-8-128(%r13),%ymm9
+	vmovd	%eax,%xmm0
+	imulq	-128(%r13),%rax
+	vpaddq	%ymm11,%ymm7,%ymm7
+	vpmuludq	%ymm13,%ymm10,%ymm10
+	vmovdqu	32-16-128(%r13),%ymm14
+	vpbroadcastq	%xmm0,%ymm0
+	vpaddq	%ymm10,%ymm8,%ymm8
+	vpmuludq	%ymm13,%ymm9,%ymm9
+	vmovdqu	64-16-128(%r13),%ymm11
+	addq	%rax,%r12
+
+	vmovdqu	32-24-128(%r13),%ymm13
+	vpmuludq	%ymm12,%ymm14,%ymm14
+	vmovdqu	96-16-128(%r13),%ymm10
+	vpaddq	%ymm14,%ymm1,%ymm1
+	vpmuludq	%ymm0,%ymm13,%ymm13
+	vpmuludq	%ymm12,%ymm11,%ymm11
+.byte	0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff
+	vpaddq	%ymm1,%ymm13,%ymm13
+	vpaddq	%ymm11,%ymm2,%ymm2
+	vpmuludq	%ymm12,%ymm10,%ymm10
+	vmovdqu	160-16-128(%r13),%ymm11
+.byte	0x67
+	vmovq	%xmm13,%rax
+	vmovdqu	%ymm13,(%rsp)
+	vpaddq	%ymm10,%ymm3,%ymm3
+	vpmuludq	%ymm12,%ymm14,%ymm14
+	vmovdqu	192-16-128(%r13),%ymm10
+	vpaddq	%ymm14,%ymm4,%ymm4
+	vpmuludq	%ymm12,%ymm11,%ymm11
+	vmovdqu	224-16-128(%r13),%ymm14
+	vpaddq	%ymm11,%ymm5,%ymm5
+	vpmuludq	%ymm12,%ymm10,%ymm10
+	vmovdqu	256-16-128(%r13),%ymm11
+	vpaddq	%ymm10,%ymm6,%ymm6
+	vpmuludq	%ymm12,%ymm14,%ymm14
+	shrq	$29,%r12
+	vmovdqu	288-16-128(%r13),%ymm10
+	addq	%r12,%rax
+	vpaddq	%ymm14,%ymm7,%ymm7
+	vpmuludq	%ymm12,%ymm11,%ymm11
+
+	movq	%rax,%r9
+	imull	%ecx,%eax
+	vpaddq	%ymm11,%ymm8,%ymm8
+	vpmuludq	%ymm12,%ymm10,%ymm10
+	andl	$0x1fffffff,%eax
+	vmovd	%eax,%xmm12
+	vmovdqu	96-24-128(%r13),%ymm11
+.byte	0x67
+	vpaddq	%ymm10,%ymm9,%ymm9
+	vpbroadcastq	%xmm12,%ymm12
+
+	vpmuludq	64-24-128(%r13),%ymm0,%ymm14
+	vmovdqu	128-24-128(%r13),%ymm10
+	movq	%rax,%rdx
+	imulq	-128(%r13),%rax
+	movq	8(%rsp),%r10
+	vpaddq	%ymm14,%ymm2,%ymm1
+	vpmuludq	%ymm0,%ymm11,%ymm11
+	vmovdqu	160-24-128(%r13),%ymm14
+	addq	%rax,%r9
+	movq	%rdx,%rax
+	imulq	8-128(%r13),%rax
+.byte	0x67
+	shrq	$29,%r9
+	movq	16(%rsp),%r11
+	vpaddq	%ymm11,%ymm3,%ymm2
+	vpmuludq	%ymm0,%ymm10,%ymm10
+	vmovdqu	192-24-128(%r13),%ymm11
+	addq	%rax,%r10
+	movq	%rdx,%rax
+	imulq	16-128(%r13),%rax
+	vpaddq	%ymm10,%ymm4,%ymm3
+	vpmuludq	%ymm0,%ymm14,%ymm14
+	vmovdqu	224-24-128(%r13),%ymm10
+	imulq	24-128(%r13),%rdx
+	addq	%rax,%r11
+	leaq	(%r9,%r10,1),%rax
+	vpaddq	%ymm14,%ymm5,%ymm4
+	vpmuludq	%ymm0,%ymm11,%ymm11
+	vmovdqu	256-24-128(%r13),%ymm14
+	movq	%rax,%r10
+	imull	%ecx,%eax
+	vpmuludq	%ymm0,%ymm10,%ymm10
+	vpaddq	%ymm11,%ymm6,%ymm5
+	vmovdqu	288-24-128(%r13),%ymm11
+	andl	$0x1fffffff,%eax
+	vpaddq	%ymm10,%ymm7,%ymm6
+	vpmuludq	%ymm0,%ymm14,%ymm14
+	addq	24(%rsp),%rdx
+	vpaddq	%ymm14,%ymm8,%ymm7
+	vpmuludq	%ymm0,%ymm11,%ymm11
+	vpaddq	%ymm11,%ymm9,%ymm8
+	vmovq	%r12,%xmm9
+	movq	%rdx,%r12
+
+	decl	%r14d
+	jnz	L$OOP_REDUCE_1024
+	leaq	448(%rsp),%r12
+	vpaddq	%ymm9,%ymm13,%ymm0
+	vpxor	%ymm9,%ymm9,%ymm9
+
+	vpaddq	288-192(%rbx),%ymm0,%ymm0
+	vpaddq	320-448(%r12),%ymm1,%ymm1
+	vpaddq	352-448(%r12),%ymm2,%ymm2
+	vpaddq	384-448(%r12),%ymm3,%ymm3
+	vpaddq	416-448(%r12),%ymm4,%ymm4
+	vpaddq	448-448(%r12),%ymm5,%ymm5
+	vpaddq	480-448(%r12),%ymm6,%ymm6
+	vpaddq	512-448(%r12),%ymm7,%ymm7
+	vpaddq	544-448(%r12),%ymm8,%ymm8
+
+	vpsrlq	$29,%ymm0,%ymm14
+	vpand	%ymm15,%ymm0,%ymm0
+	vpsrlq	$29,%ymm1,%ymm11
+	vpand	%ymm15,%ymm1,%ymm1
+	vpsrlq	$29,%ymm2,%ymm12
+	vpermq	$0x93,%ymm14,%ymm14
+	vpand	%ymm15,%ymm2,%ymm2
+	vpsrlq	$29,%ymm3,%ymm13
+	vpermq	$0x93,%ymm11,%ymm11
+	vpand	%ymm15,%ymm3,%ymm3
+	vpermq	$0x93,%ymm12,%ymm12
+
+	vpblendd	$3,%ymm9,%ymm14,%ymm10
+	vpermq	$0x93,%ymm13,%ymm13
+	vpblendd	$3,%ymm14,%ymm11,%ymm14
+	vpaddq	%ymm10,%ymm0,%ymm0
+	vpblendd	$3,%ymm11,%ymm12,%ymm11
+	vpaddq	%ymm14,%ymm1,%ymm1
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm11,%ymm2,%ymm2
+	vpblendd	$3,%ymm13,%ymm9,%ymm13
+	vpaddq	%ymm12,%ymm3,%ymm3
+	vpaddq	%ymm13,%ymm4,%ymm4
+
+	vpsrlq	$29,%ymm0,%ymm14
+	vpand	%ymm15,%ymm0,%ymm0
+	vpsrlq	$29,%ymm1,%ymm11
+	vpand	%ymm15,%ymm1,%ymm1
+	vpsrlq	$29,%ymm2,%ymm12
+	vpermq	$0x93,%ymm14,%ymm14
+	vpand	%ymm15,%ymm2,%ymm2
+	vpsrlq	$29,%ymm3,%ymm13
+	vpermq	$0x93,%ymm11,%ymm11
+	vpand	%ymm15,%ymm3,%ymm3
+	vpermq	$0x93,%ymm12,%ymm12
+
+	vpblendd	$3,%ymm9,%ymm14,%ymm10
+	vpermq	$0x93,%ymm13,%ymm13
+	vpblendd	$3,%ymm14,%ymm11,%ymm14
+	vpaddq	%ymm10,%ymm0,%ymm0
+	vpblendd	$3,%ymm11,%ymm12,%ymm11
+	vpaddq	%ymm14,%ymm1,%ymm1
+	vmovdqu	%ymm0,0-128(%rdi)
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm11,%ymm2,%ymm2
+	vmovdqu	%ymm1,32-128(%rdi)
+	vpblendd	$3,%ymm13,%ymm9,%ymm13
+	vpaddq	%ymm12,%ymm3,%ymm3
+	vmovdqu	%ymm2,64-128(%rdi)
+	vpaddq	%ymm13,%ymm4,%ymm4
+	vmovdqu	%ymm3,96-128(%rdi)
+	vpsrlq	$29,%ymm4,%ymm14
+	vpand	%ymm15,%ymm4,%ymm4
+	vpsrlq	$29,%ymm5,%ymm11
+	vpand	%ymm15,%ymm5,%ymm5
+	vpsrlq	$29,%ymm6,%ymm12
+	vpermq	$0x93,%ymm14,%ymm14
+	vpand	%ymm15,%ymm6,%ymm6
+	vpsrlq	$29,%ymm7,%ymm13
+	vpermq	$0x93,%ymm11,%ymm11
+	vpand	%ymm15,%ymm7,%ymm7
+	vpsrlq	$29,%ymm8,%ymm0
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm8,%ymm8
+	vpermq	$0x93,%ymm13,%ymm13
+
+	vpblendd	$3,%ymm9,%ymm14,%ymm10
+	vpermq	$0x93,%ymm0,%ymm0
+	vpblendd	$3,%ymm14,%ymm11,%ymm14
+	vpaddq	%ymm10,%ymm4,%ymm4
+	vpblendd	$3,%ymm11,%ymm12,%ymm11
+	vpaddq	%ymm14,%ymm5,%ymm5
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm11,%ymm6,%ymm6
+	vpblendd	$3,%ymm13,%ymm0,%ymm13
+	vpaddq	%ymm12,%ymm7,%ymm7
+	vpaddq	%ymm13,%ymm8,%ymm8
+
+	vpsrlq	$29,%ymm4,%ymm14
+	vpand	%ymm15,%ymm4,%ymm4
+	vpsrlq	$29,%ymm5,%ymm11
+	vpand	%ymm15,%ymm5,%ymm5
+	vpsrlq	$29,%ymm6,%ymm12
+	vpermq	$0x93,%ymm14,%ymm14
+	vpand	%ymm15,%ymm6,%ymm6
+	vpsrlq	$29,%ymm7,%ymm13
+	vpermq	$0x93,%ymm11,%ymm11
+	vpand	%ymm15,%ymm7,%ymm7
+	vpsrlq	$29,%ymm8,%ymm0
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm8,%ymm8
+	vpermq	$0x93,%ymm13,%ymm13
+
+	vpblendd	$3,%ymm9,%ymm14,%ymm10
+	vpermq	$0x93,%ymm0,%ymm0
+	vpblendd	$3,%ymm14,%ymm11,%ymm14
+	vpaddq	%ymm10,%ymm4,%ymm4
+	vpblendd	$3,%ymm11,%ymm12,%ymm11
+	vpaddq	%ymm14,%ymm5,%ymm5
+	vmovdqu	%ymm4,128-128(%rdi)
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm11,%ymm6,%ymm6
+	vmovdqu	%ymm5,160-128(%rdi)
+	vpblendd	$3,%ymm13,%ymm0,%ymm13
+	vpaddq	%ymm12,%ymm7,%ymm7
+	vmovdqu	%ymm6,192-128(%rdi)
+	vpaddq	%ymm13,%ymm8,%ymm8
+	vmovdqu	%ymm7,224-128(%rdi)
+	vmovdqu	%ymm8,256-128(%rdi)
+
+	movq	%rdi,%rsi
+	decl	%r8d
+	jne	L$OOP_GRANDE_SQR_1024
+
+	vzeroall
+	movq	%rbp,%rax
+
+	movq	-48(%rax),%r15
+
+	movq	-40(%rax),%r14
+
+	movq	-32(%rax),%r13
+
+	movq	-24(%rax),%r12
+
+	movq	-16(%rax),%rbp
+
+	movq	-8(%rax),%rbx
+
+	leaq	(%rax),%rsp
+
+L$sqr_1024_epilogue:
+	.byte	0xf3,0xc3
+
+
+.globl	_rsaz_1024_mul_avx2
+.private_extern _rsaz_1024_mul_avx2
+
+.p2align	6
+_rsaz_1024_mul_avx2:
+
+	leaq	(%rsp),%rax
+
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+	movq	%rax,%rbp
+
+	vzeroall
+	movq	%rdx,%r13
+	subq	$64,%rsp
+
+
+
+
+
+
+.byte	0x67,0x67
+	movq	%rsi,%r15
+	andq	$4095,%r15
+	addq	$320,%r15
+	shrq	$12,%r15
+	movq	%rsi,%r15
+	cmovnzq	%r13,%rsi
+	cmovnzq	%r15,%r13
+
+	movq	%rcx,%r15
+	subq	$-128,%rsi
+	subq	$-128,%rcx
+	subq	$-128,%rdi
+
+	andq	$4095,%r15
+	addq	$320,%r15
+.byte	0x67,0x67
+	shrq	$12,%r15
+	jz	L$mul_1024_no_n_copy
+
+
+
+
+
+	subq	$320,%rsp
+	vmovdqu	0-128(%rcx),%ymm0
+	andq	$-512,%rsp
+	vmovdqu	32-128(%rcx),%ymm1
+	vmovdqu	64-128(%rcx),%ymm2
+	vmovdqu	96-128(%rcx),%ymm3
+	vmovdqu	128-128(%rcx),%ymm4
+	vmovdqu	160-128(%rcx),%ymm5
+	vmovdqu	192-128(%rcx),%ymm6
+	vmovdqu	224-128(%rcx),%ymm7
+	vmovdqu	256-128(%rcx),%ymm8
+	leaq	64+128(%rsp),%rcx
+	vmovdqu	%ymm0,0-128(%rcx)
+	vpxor	%ymm0,%ymm0,%ymm0
+	vmovdqu	%ymm1,32-128(%rcx)
+	vpxor	%ymm1,%ymm1,%ymm1
+	vmovdqu	%ymm2,64-128(%rcx)
+	vpxor	%ymm2,%ymm2,%ymm2
+	vmovdqu	%ymm3,96-128(%rcx)
+	vpxor	%ymm3,%ymm3,%ymm3
+	vmovdqu	%ymm4,128-128(%rcx)
+	vpxor	%ymm4,%ymm4,%ymm4
+	vmovdqu	%ymm5,160-128(%rcx)
+	vpxor	%ymm5,%ymm5,%ymm5
+	vmovdqu	%ymm6,192-128(%rcx)
+	vpxor	%ymm6,%ymm6,%ymm6
+	vmovdqu	%ymm7,224-128(%rcx)
+	vpxor	%ymm7,%ymm7,%ymm7
+	vmovdqu	%ymm8,256-128(%rcx)
+	vmovdqa	%ymm0,%ymm8
+	vmovdqu	%ymm9,288-128(%rcx)
+L$mul_1024_no_n_copy:
+	andq	$-64,%rsp
+
+	movq	(%r13),%rbx
+	vpbroadcastq	(%r13),%ymm10
+	vmovdqu	%ymm0,(%rsp)
+	xorq	%r9,%r9
+.byte	0x67
+	xorq	%r10,%r10
+	xorq	%r11,%r11
+	xorq	%r12,%r12
+
+	vmovdqu	L$and_mask(%rip),%ymm15
+	movl	$9,%r14d
+	vmovdqu	%ymm9,288-128(%rdi)
+	jmp	L$oop_mul_1024
+
+.p2align	5
+L$oop_mul_1024:
+	vpsrlq	$29,%ymm3,%ymm9
+	movq	%rbx,%rax
+	imulq	-128(%rsi),%rax
+	addq	%r9,%rax
+	movq	%rbx,%r10
+	imulq	8-128(%rsi),%r10
+	addq	8(%rsp),%r10
+
+	movq	%rax,%r9
+	imull	%r8d,%eax
+	andl	$0x1fffffff,%eax
+
+	movq	%rbx,%r11
+	imulq	16-128(%rsi),%r11
+	addq	16(%rsp),%r11
+
+	movq	%rbx,%r12
+	imulq	24-128(%rsi),%r12
+	addq	24(%rsp),%r12
+	vpmuludq	32-128(%rsi),%ymm10,%ymm0
+	vmovd	%eax,%xmm11
+	vpaddq	%ymm0,%ymm1,%ymm1
+	vpmuludq	64-128(%rsi),%ymm10,%ymm12
+	vpbroadcastq	%xmm11,%ymm11
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	96-128(%rsi),%ymm10,%ymm13
+	vpand	%ymm15,%ymm3,%ymm3
+	vpaddq	%ymm13,%ymm3,%ymm3
+	vpmuludq	128-128(%rsi),%ymm10,%ymm0
+	vpaddq	%ymm0,%ymm4,%ymm4
+	vpmuludq	160-128(%rsi),%ymm10,%ymm12
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	192-128(%rsi),%ymm10,%ymm13
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpmuludq	224-128(%rsi),%ymm10,%ymm0
+	vpermq	$0x93,%ymm9,%ymm9
+	vpaddq	%ymm0,%ymm7,%ymm7
+	vpmuludq	256-128(%rsi),%ymm10,%ymm12
+	vpbroadcastq	8(%r13),%ymm10
+	vpaddq	%ymm12,%ymm8,%ymm8
+
+	movq	%rax,%rdx
+	imulq	-128(%rcx),%rax
+	addq	%rax,%r9
+	movq	%rdx,%rax
+	imulq	8-128(%rcx),%rax
+	addq	%rax,%r10
+	movq	%rdx,%rax
+	imulq	16-128(%rcx),%rax
+	addq	%rax,%r11
+	shrq	$29,%r9
+	imulq	24-128(%rcx),%rdx
+	addq	%rdx,%r12
+	addq	%r9,%r10
+
+	vpmuludq	32-128(%rcx),%ymm11,%ymm13
+	vmovq	%xmm10,%rbx
+	vpaddq	%ymm13,%ymm1,%ymm1
+	vpmuludq	64-128(%rcx),%ymm11,%ymm0
+	vpaddq	%ymm0,%ymm2,%ymm2
+	vpmuludq	96-128(%rcx),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm3,%ymm3
+	vpmuludq	128-128(%rcx),%ymm11,%ymm13
+	vpaddq	%ymm13,%ymm4,%ymm4
+	vpmuludq	160-128(%rcx),%ymm11,%ymm0
+	vpaddq	%ymm0,%ymm5,%ymm5
+	vpmuludq	192-128(%rcx),%ymm11,%ymm12
+	vpaddq	%ymm12,%ymm6,%ymm6
+	vpmuludq	224-128(%rcx),%ymm11,%ymm13
+	vpblendd	$3,%ymm14,%ymm9,%ymm12
+	vpaddq	%ymm13,%ymm7,%ymm7
+	vpmuludq	256-128(%rcx),%ymm11,%ymm0
+	vpaddq	%ymm12,%ymm3,%ymm3
+	vpaddq	%ymm0,%ymm8,%ymm8
+
+	movq	%rbx,%rax
+	imulq	-128(%rsi),%rax
+	addq	%rax,%r10
+	vmovdqu	-8+32-128(%rsi),%ymm12
+	movq	%rbx,%rax
+	imulq	8-128(%rsi),%rax
+	addq	%rax,%r11
+	vmovdqu	-8+64-128(%rsi),%ymm13
+
+	movq	%r10,%rax
+	vpblendd	$0xfc,%ymm14,%ymm9,%ymm9
+	imull	%r8d,%eax
+	vpaddq	%ymm9,%ymm4,%ymm4
+	andl	$0x1fffffff,%eax
+
+	imulq	16-128(%rsi),%rbx
+	addq	%rbx,%r12
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vmovd	%eax,%xmm11
+	vmovdqu	-8+96-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm1,%ymm1
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vpbroadcastq	%xmm11,%ymm11
+	vmovdqu	-8+128-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm2,%ymm2
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-8+160-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm3,%ymm3
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vmovdqu	-8+192-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm4,%ymm4
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vmovdqu	-8+224-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm5,%ymm5
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-8+256-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm6,%ymm6
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vmovdqu	-8+288-128(%rsi),%ymm9
+	vpaddq	%ymm12,%ymm7,%ymm7
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vpaddq	%ymm13,%ymm8,%ymm8
+	vpmuludq	%ymm10,%ymm9,%ymm9
+	vpbroadcastq	16(%r13),%ymm10
+
+	movq	%rax,%rdx
+	imulq	-128(%rcx),%rax
+	addq	%rax,%r10
+	vmovdqu	-8+32-128(%rcx),%ymm0
+	movq	%rdx,%rax
+	imulq	8-128(%rcx),%rax
+	addq	%rax,%r11
+	vmovdqu	-8+64-128(%rcx),%ymm12
+	shrq	$29,%r10
+	imulq	16-128(%rcx),%rdx
+	addq	%rdx,%r12
+	addq	%r10,%r11
+
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovq	%xmm10,%rbx
+	vmovdqu	-8+96-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm1,%ymm1
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-8+128-128(%rcx),%ymm0
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-8+160-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm3,%ymm3
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-8+192-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm4,%ymm4
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-8+224-128(%rcx),%ymm0
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-8+256-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-8+288-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm7,%ymm7
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vpaddq	%ymm13,%ymm9,%ymm9
+
+	vmovdqu	-16+32-128(%rsi),%ymm0
+	movq	%rbx,%rax
+	imulq	-128(%rsi),%rax
+	addq	%r11,%rax
+
+	vmovdqu	-16+64-128(%rsi),%ymm12
+	movq	%rax,%r11
+	imull	%r8d,%eax
+	andl	$0x1fffffff,%eax
+
+	imulq	8-128(%rsi),%rbx
+	addq	%rbx,%r12
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovd	%eax,%xmm11
+	vmovdqu	-16+96-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm1,%ymm1
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vpbroadcastq	%xmm11,%ymm11
+	vmovdqu	-16+128-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vmovdqu	-16+160-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm3,%ymm3
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-16+192-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm4,%ymm4
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vmovdqu	-16+224-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vmovdqu	-16+256-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-16+288-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm7,%ymm7
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vpbroadcastq	24(%r13),%ymm10
+	vpaddq	%ymm13,%ymm9,%ymm9
+
+	vmovdqu	-16+32-128(%rcx),%ymm0
+	movq	%rax,%rdx
+	imulq	-128(%rcx),%rax
+	addq	%rax,%r11
+	vmovdqu	-16+64-128(%rcx),%ymm12
+	imulq	8-128(%rcx),%rdx
+	addq	%rdx,%r12
+	shrq	$29,%r11
+
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovq	%xmm10,%rbx
+	vmovdqu	-16+96-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm1,%ymm1
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-16+128-128(%rcx),%ymm0
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-16+160-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm3,%ymm3
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-16+192-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm4,%ymm4
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-16+224-128(%rcx),%ymm0
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-16+256-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-16+288-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm7,%ymm7
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-24+32-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-24+64-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm9,%ymm9
+
+	addq	%r11,%r12
+	imulq	-128(%rsi),%rbx
+	addq	%rbx,%r12
+
+	movq	%r12,%rax
+	imull	%r8d,%eax
+	andl	$0x1fffffff,%eax
+
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovd	%eax,%xmm11
+	vmovdqu	-24+96-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm1,%ymm1
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vpbroadcastq	%xmm11,%ymm11
+	vmovdqu	-24+128-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm2,%ymm2
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vmovdqu	-24+160-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm3,%ymm3
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-24+192-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm4,%ymm4
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vmovdqu	-24+224-128(%rsi),%ymm0
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vmovdqu	-24+256-128(%rsi),%ymm12
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpmuludq	%ymm10,%ymm0,%ymm0
+	vmovdqu	-24+288-128(%rsi),%ymm13
+	vpaddq	%ymm0,%ymm7,%ymm7
+	vpmuludq	%ymm10,%ymm12,%ymm12
+	vpaddq	%ymm12,%ymm8,%ymm8
+	vpmuludq	%ymm10,%ymm13,%ymm13
+	vpbroadcastq	32(%r13),%ymm10
+	vpaddq	%ymm13,%ymm9,%ymm9
+	addq	$32,%r13
+
+	vmovdqu	-24+32-128(%rcx),%ymm0
+	imulq	-128(%rcx),%rax
+	addq	%rax,%r12
+	shrq	$29,%r12
+
+	vmovdqu	-24+64-128(%rcx),%ymm12
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovq	%xmm10,%rbx
+	vmovdqu	-24+96-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm1,%ymm0
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	%ymm0,(%rsp)
+	vpaddq	%ymm12,%ymm2,%ymm1
+	vmovdqu	-24+128-128(%rcx),%ymm0
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-24+160-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm3,%ymm2
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-24+192-128(%rcx),%ymm13
+	vpaddq	%ymm0,%ymm4,%ymm3
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	vmovdqu	-24+224-128(%rcx),%ymm0
+	vpaddq	%ymm12,%ymm5,%ymm4
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovdqu	-24+256-128(%rcx),%ymm12
+	vpaddq	%ymm13,%ymm6,%ymm5
+	vpmuludq	%ymm11,%ymm0,%ymm0
+	vmovdqu	-24+288-128(%rcx),%ymm13
+	movq	%r12,%r9
+	vpaddq	%ymm0,%ymm7,%ymm6
+	vpmuludq	%ymm11,%ymm12,%ymm12
+	addq	(%rsp),%r9
+	vpaddq	%ymm12,%ymm8,%ymm7
+	vpmuludq	%ymm11,%ymm13,%ymm13
+	vmovq	%r12,%xmm12
+	vpaddq	%ymm13,%ymm9,%ymm8
+
+	decl	%r14d
+	jnz	L$oop_mul_1024
+	vpaddq	(%rsp),%ymm12,%ymm0
+
+	vpsrlq	$29,%ymm0,%ymm12
+	vpand	%ymm15,%ymm0,%ymm0
+	vpsrlq	$29,%ymm1,%ymm13
+	vpand	%ymm15,%ymm1,%ymm1
+	vpsrlq	$29,%ymm2,%ymm10
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm2,%ymm2
+	vpsrlq	$29,%ymm3,%ymm11
+	vpermq	$0x93,%ymm13,%ymm13
+	vpand	%ymm15,%ymm3,%ymm3
+
+	vpblendd	$3,%ymm14,%ymm12,%ymm9
+	vpermq	$0x93,%ymm10,%ymm10
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpermq	$0x93,%ymm11,%ymm11
+	vpaddq	%ymm9,%ymm0,%ymm0
+	vpblendd	$3,%ymm13,%ymm10,%ymm13
+	vpaddq	%ymm12,%ymm1,%ymm1
+	vpblendd	$3,%ymm10,%ymm11,%ymm10
+	vpaddq	%ymm13,%ymm2,%ymm2
+	vpblendd	$3,%ymm11,%ymm14,%ymm11
+	vpaddq	%ymm10,%ymm3,%ymm3
+	vpaddq	%ymm11,%ymm4,%ymm4
+
+	vpsrlq	$29,%ymm0,%ymm12
+	vpand	%ymm15,%ymm0,%ymm0
+	vpsrlq	$29,%ymm1,%ymm13
+	vpand	%ymm15,%ymm1,%ymm1
+	vpsrlq	$29,%ymm2,%ymm10
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm2,%ymm2
+	vpsrlq	$29,%ymm3,%ymm11
+	vpermq	$0x93,%ymm13,%ymm13
+	vpand	%ymm15,%ymm3,%ymm3
+	vpermq	$0x93,%ymm10,%ymm10
+
+	vpblendd	$3,%ymm14,%ymm12,%ymm9
+	vpermq	$0x93,%ymm11,%ymm11
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm9,%ymm0,%ymm0
+	vpblendd	$3,%ymm13,%ymm10,%ymm13
+	vpaddq	%ymm12,%ymm1,%ymm1
+	vpblendd	$3,%ymm10,%ymm11,%ymm10
+	vpaddq	%ymm13,%ymm2,%ymm2
+	vpblendd	$3,%ymm11,%ymm14,%ymm11
+	vpaddq	%ymm10,%ymm3,%ymm3
+	vpaddq	%ymm11,%ymm4,%ymm4
+
+	vmovdqu	%ymm0,0-128(%rdi)
+	vmovdqu	%ymm1,32-128(%rdi)
+	vmovdqu	%ymm2,64-128(%rdi)
+	vmovdqu	%ymm3,96-128(%rdi)
+	vpsrlq	$29,%ymm4,%ymm12
+	vpand	%ymm15,%ymm4,%ymm4
+	vpsrlq	$29,%ymm5,%ymm13
+	vpand	%ymm15,%ymm5,%ymm5
+	vpsrlq	$29,%ymm6,%ymm10
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm6,%ymm6
+	vpsrlq	$29,%ymm7,%ymm11
+	vpermq	$0x93,%ymm13,%ymm13
+	vpand	%ymm15,%ymm7,%ymm7
+	vpsrlq	$29,%ymm8,%ymm0
+	vpermq	$0x93,%ymm10,%ymm10
+	vpand	%ymm15,%ymm8,%ymm8
+	vpermq	$0x93,%ymm11,%ymm11
+
+	vpblendd	$3,%ymm14,%ymm12,%ymm9
+	vpermq	$0x93,%ymm0,%ymm0
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm9,%ymm4,%ymm4
+	vpblendd	$3,%ymm13,%ymm10,%ymm13
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpblendd	$3,%ymm10,%ymm11,%ymm10
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpblendd	$3,%ymm11,%ymm0,%ymm11
+	vpaddq	%ymm10,%ymm7,%ymm7
+	vpaddq	%ymm11,%ymm8,%ymm8
+
+	vpsrlq	$29,%ymm4,%ymm12
+	vpand	%ymm15,%ymm4,%ymm4
+	vpsrlq	$29,%ymm5,%ymm13
+	vpand	%ymm15,%ymm5,%ymm5
+	vpsrlq	$29,%ymm6,%ymm10
+	vpermq	$0x93,%ymm12,%ymm12
+	vpand	%ymm15,%ymm6,%ymm6
+	vpsrlq	$29,%ymm7,%ymm11
+	vpermq	$0x93,%ymm13,%ymm13
+	vpand	%ymm15,%ymm7,%ymm7
+	vpsrlq	$29,%ymm8,%ymm0
+	vpermq	$0x93,%ymm10,%ymm10
+	vpand	%ymm15,%ymm8,%ymm8
+	vpermq	$0x93,%ymm11,%ymm11
+
+	vpblendd	$3,%ymm14,%ymm12,%ymm9
+	vpermq	$0x93,%ymm0,%ymm0
+	vpblendd	$3,%ymm12,%ymm13,%ymm12
+	vpaddq	%ymm9,%ymm4,%ymm4
+	vpblendd	$3,%ymm13,%ymm10,%ymm13
+	vpaddq	%ymm12,%ymm5,%ymm5
+	vpblendd	$3,%ymm10,%ymm11,%ymm10
+	vpaddq	%ymm13,%ymm6,%ymm6
+	vpblendd	$3,%ymm11,%ymm0,%ymm11
+	vpaddq	%ymm10,%ymm7,%ymm7
+	vpaddq	%ymm11,%ymm8,%ymm8
+
+	vmovdqu	%ymm4,128-128(%rdi)
+	vmovdqu	%ymm5,160-128(%rdi)
+	vmovdqu	%ymm6,192-128(%rdi)
+	vmovdqu	%ymm7,224-128(%rdi)
+	vmovdqu	%ymm8,256-128(%rdi)
+	vzeroupper
+
+	movq	%rbp,%rax
+
+	movq	-48(%rax),%r15
+
+	movq	-40(%rax),%r14
+
+	movq	-32(%rax),%r13
+
+	movq	-24(%rax),%r12
+
+	movq	-16(%rax),%rbp
+
+	movq	-8(%rax),%rbx
+
+	leaq	(%rax),%rsp
+
+L$mul_1024_epilogue:
+	.byte	0xf3,0xc3
+
+
+.globl	_rsaz_1024_red2norm_avx2
+.private_extern _rsaz_1024_red2norm_avx2
+
+.p2align	5
+_rsaz_1024_red2norm_avx2:
+	subq	$-128,%rsi
+	xorq	%rax,%rax
+	movq	-128(%rsi),%r8
+	movq	-120(%rsi),%r9
+	movq	-112(%rsi),%r10
+	shlq	$0,%r8
+	shlq	$29,%r9
+	movq	%r10,%r11
+	shlq	$58,%r10
+	shrq	$6,%r11
+	addq	%r8,%rax
+	addq	%r9,%rax
+	addq	%r10,%rax
+	adcq	$0,%r11
+	movq	%rax,0(%rdi)
+	movq	%r11,%rax
+	movq	-104(%rsi),%r8
+	movq	-96(%rsi),%r9
+	shlq	$23,%r8
+	movq	%r9,%r10
+	shlq	$52,%r9
+	shrq	$12,%r10
+	addq	%r8,%rax
+	addq	%r9,%rax
+	adcq	$0,%r10
+	movq	%rax,8(%rdi)
+	movq	%r10,%rax
+	movq	-88(%rsi),%r11
+	movq	-80(%rsi),%r8
+	shlq	$17,%r11
+	movq	%r8,%r9
+	shlq	$46,%r8
+	shrq	$18,%r9
+	addq	%r11,%rax
+	addq	%r8,%rax
+	adcq	$0,%r9
+	movq	%rax,16(%rdi)
+	movq	%r9,%rax
+	movq	-72(%rsi),%r10
+	movq	-64(%rsi),%r11
+	shlq	$11,%r10
+	movq	%r11,%r8
+	shlq	$40,%r11
+	shrq	$24,%r8
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,24(%rdi)
+	movq	%r8,%rax
+	movq	-56(%rsi),%r9
+	movq	-48(%rsi),%r10
+	movq	-40(%rsi),%r11
+	shlq	$5,%r9
+	shlq	$34,%r10
+	movq	%r11,%r8
+	shlq	$63,%r11
+	shrq	$1,%r8
+	addq	%r9,%rax
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,32(%rdi)
+	movq	%r8,%rax
+	movq	-32(%rsi),%r9
+	movq	-24(%rsi),%r10
+	shlq	$28,%r9
+	movq	%r10,%r11
+	shlq	$57,%r10
+	shrq	$7,%r11
+	addq	%r9,%rax
+	addq	%r10,%rax
+	adcq	$0,%r11
+	movq	%rax,40(%rdi)
+	movq	%r11,%rax
+	movq	-16(%rsi),%r8
+	movq	-8(%rsi),%r9
+	shlq	$22,%r8
+	movq	%r9,%r10
+	shlq	$51,%r9
+	shrq	$13,%r10
+	addq	%r8,%rax
+	addq	%r9,%rax
+	adcq	$0,%r10
+	movq	%rax,48(%rdi)
+	movq	%r10,%rax
+	movq	0(%rsi),%r11
+	movq	8(%rsi),%r8
+	shlq	$16,%r11
+	movq	%r8,%r9
+	shlq	$45,%r8
+	shrq	$19,%r9
+	addq	%r11,%rax
+	addq	%r8,%rax
+	adcq	$0,%r9
+	movq	%rax,56(%rdi)
+	movq	%r9,%rax
+	movq	16(%rsi),%r10
+	movq	24(%rsi),%r11
+	shlq	$10,%r10
+	movq	%r11,%r8
+	shlq	$39,%r11
+	shrq	$25,%r8
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,64(%rdi)
+	movq	%r8,%rax
+	movq	32(%rsi),%r9
+	movq	40(%rsi),%r10
+	movq	48(%rsi),%r11
+	shlq	$4,%r9
+	shlq	$33,%r10
+	movq	%r11,%r8
+	shlq	$62,%r11
+	shrq	$2,%r8
+	addq	%r9,%rax
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,72(%rdi)
+	movq	%r8,%rax
+	movq	56(%rsi),%r9
+	movq	64(%rsi),%r10
+	shlq	$27,%r9
+	movq	%r10,%r11
+	shlq	$56,%r10
+	shrq	$8,%r11
+	addq	%r9,%rax
+	addq	%r10,%rax
+	adcq	$0,%r11
+	movq	%rax,80(%rdi)
+	movq	%r11,%rax
+	movq	72(%rsi),%r8
+	movq	80(%rsi),%r9
+	shlq	$21,%r8
+	movq	%r9,%r10
+	shlq	$50,%r9
+	shrq	$14,%r10
+	addq	%r8,%rax
+	addq	%r9,%rax
+	adcq	$0,%r10
+	movq	%rax,88(%rdi)
+	movq	%r10,%rax
+	movq	88(%rsi),%r11
+	movq	96(%rsi),%r8
+	shlq	$15,%r11
+	movq	%r8,%r9
+	shlq	$44,%r8
+	shrq	$20,%r9
+	addq	%r11,%rax
+	addq	%r8,%rax
+	adcq	$0,%r9
+	movq	%rax,96(%rdi)
+	movq	%r9,%rax
+	movq	104(%rsi),%r10
+	movq	112(%rsi),%r11
+	shlq	$9,%r10
+	movq	%r11,%r8
+	shlq	$38,%r11
+	shrq	$26,%r8
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,104(%rdi)
+	movq	%r8,%rax
+	movq	120(%rsi),%r9
+	movq	128(%rsi),%r10
+	movq	136(%rsi),%r11
+	shlq	$3,%r9
+	shlq	$32,%r10
+	movq	%r11,%r8
+	shlq	$61,%r11
+	shrq	$3,%r8
+	addq	%r9,%rax
+	addq	%r10,%rax
+	addq	%r11,%rax
+	adcq	$0,%r8
+	movq	%rax,112(%rdi)
+	movq	%r8,%rax
+	movq	144(%rsi),%r9
+	movq	152(%rsi),%r10
+	shlq	$26,%r9
+	movq	%r10,%r11
+	shlq	$55,%r10
+	shrq	$9,%r11
+	addq	%r9,%rax
+	addq	%r10,%rax
+	adcq	$0,%r11
+	movq	%rax,120(%rdi)
+	movq	%r11,%rax
+	.byte	0xf3,0xc3
+
+
+.globl	_rsaz_1024_norm2red_avx2
+.private_extern _rsaz_1024_norm2red_avx2
+
+.p2align	5
+_rsaz_1024_norm2red_avx2:
+	subq	$-128,%rdi
+	movq	(%rsi),%r8
+	movl	$0x1fffffff,%eax
+	movq	8(%rsi),%r9
+	movq	%r8,%r11
+	shrq	$0,%r11
+	andq	%rax,%r11
+	movq	%r11,-128(%rdi)
+	movq	%r8,%r10
+	shrq	$29,%r10
+	andq	%rax,%r10
+	movq	%r10,-120(%rdi)
+	shrdq	$58,%r9,%r8
+	andq	%rax,%r8
+	movq	%r8,-112(%rdi)
+	movq	16(%rsi),%r10
+	movq	%r9,%r8
+	shrq	$23,%r8
+	andq	%rax,%r8
+	movq	%r8,-104(%rdi)
+	shrdq	$52,%r10,%r9
+	andq	%rax,%r9
+	movq	%r9,-96(%rdi)
+	movq	24(%rsi),%r11
+	movq	%r10,%r9
+	shrq	$17,%r9
+	andq	%rax,%r9
+	movq	%r9,-88(%rdi)
+	shrdq	$46,%r11,%r10
+	andq	%rax,%r10
+	movq	%r10,-80(%rdi)
+	movq	32(%rsi),%r8
+	movq	%r11,%r10
+	shrq	$11,%r10
+	andq	%rax,%r10
+	movq	%r10,-72(%rdi)
+	shrdq	$40,%r8,%r11
+	andq	%rax,%r11
+	movq	%r11,-64(%rdi)
+	movq	40(%rsi),%r9
+	movq	%r8,%r11
+	shrq	$5,%r11
+	andq	%rax,%r11
+	movq	%r11,-56(%rdi)
+	movq	%r8,%r10
+	shrq	$34,%r10
+	andq	%rax,%r10
+	movq	%r10,-48(%rdi)
+	shrdq	$63,%r9,%r8
+	andq	%rax,%r8
+	movq	%r8,-40(%rdi)
+	movq	48(%rsi),%r10
+	movq	%r9,%r8
+	shrq	$28,%r8
+	andq	%rax,%r8
+	movq	%r8,-32(%rdi)
+	shrdq	$57,%r10,%r9
+	andq	%rax,%r9
+	movq	%r9,-24(%rdi)
+	movq	56(%rsi),%r11
+	movq	%r10,%r9
+	shrq	$22,%r9
+	andq	%rax,%r9
+	movq	%r9,-16(%rdi)
+	shrdq	$51,%r11,%r10
+	andq	%rax,%r10
+	movq	%r10,-8(%rdi)
+	movq	64(%rsi),%r8
+	movq	%r11,%r10
+	shrq	$16,%r10
+	andq	%rax,%r10
+	movq	%r10,0(%rdi)
+	shrdq	$45,%r8,%r11
+	andq	%rax,%r11
+	movq	%r11,8(%rdi)
+	movq	72(%rsi),%r9
+	movq	%r8,%r11
+	shrq	$10,%r11
+	andq	%rax,%r11
+	movq	%r11,16(%rdi)
+	shrdq	$39,%r9,%r8
+	andq	%rax,%r8
+	movq	%r8,24(%rdi)
+	movq	80(%rsi),%r10
+	movq	%r9,%r8
+	shrq	$4,%r8
+	andq	%rax,%r8
+	movq	%r8,32(%rdi)
+	movq	%r9,%r11
+	shrq	$33,%r11
+	andq	%rax,%r11
+	movq	%r11,40(%rdi)
+	shrdq	$62,%r10,%r9
+	andq	%rax,%r9
+	movq	%r9,48(%rdi)
+	movq	88(%rsi),%r11
+	movq	%r10,%r9
+	shrq	$27,%r9
+	andq	%rax,%r9
+	movq	%r9,56(%rdi)
+	shrdq	$56,%r11,%r10
+	andq	%rax,%r10
+	movq	%r10,64(%rdi)
+	movq	96(%rsi),%r8
+	movq	%r11,%r10
+	shrq	$21,%r10
+	andq	%rax,%r10
+	movq	%r10,72(%rdi)
+	shrdq	$50,%r8,%r11
+	andq	%rax,%r11
+	movq	%r11,80(%rdi)
+	movq	104(%rsi),%r9
+	movq	%r8,%r11
+	shrq	$15,%r11
+	andq	%rax,%r11
+	movq	%r11,88(%rdi)
+	shrdq	$44,%r9,%r8
+	andq	%rax,%r8
+	movq	%r8,96(%rdi)
+	movq	112(%rsi),%r10
+	movq	%r9,%r8
+	shrq	$9,%r8
+	andq	%rax,%r8
+	movq	%r8,104(%rdi)
+	shrdq	$38,%r10,%r9
+	andq	%rax,%r9
+	movq	%r9,112(%rdi)
+	movq	120(%rsi),%r11
+	movq	%r10,%r9
+	shrq	$3,%r9
+	andq	%rax,%r9
+	movq	%r9,120(%rdi)
+	movq	%r10,%r8
+	shrq	$32,%r8
+	andq	%rax,%r8
+	movq	%r8,128(%rdi)
+	shrdq	$61,%r11,%r10
+	andq	%rax,%r10
+	movq	%r10,136(%rdi)
+	xorq	%r8,%r8
+	movq	%r11,%r10
+	shrq	$26,%r10
+	andq	%rax,%r10
+	movq	%r10,144(%rdi)
+	shrdq	$55,%r8,%r11
+	andq	%rax,%r11
+	movq	%r11,152(%rdi)
+	movq	%r8,160(%rdi)
+	movq	%r8,168(%rdi)
+	movq	%r8,176(%rdi)
+	movq	%r8,184(%rdi)
+	.byte	0xf3,0xc3
+
+.globl	_rsaz_1024_scatter5_avx2
+.private_extern _rsaz_1024_scatter5_avx2
+
+.p2align	5
+_rsaz_1024_scatter5_avx2:
+	vzeroupper
+	vmovdqu	L$scatter_permd(%rip),%ymm5
+	shll	$4,%edx
+	leaq	(%rdi,%rdx,1),%rdi
+	movl	$9,%eax
+	jmp	L$oop_scatter_1024
+
+.p2align	5
+L$oop_scatter_1024:
+	vmovdqu	(%rsi),%ymm0
+	leaq	32(%rsi),%rsi
+	vpermd	%ymm0,%ymm5,%ymm0
+	vmovdqu	%xmm0,(%rdi)
+	leaq	512(%rdi),%rdi
+	decl	%eax
+	jnz	L$oop_scatter_1024
+
+	vzeroupper
+	.byte	0xf3,0xc3
+
+
+.globl	_rsaz_1024_gather5_avx2
+.private_extern _rsaz_1024_gather5_avx2
+
+.p2align	5
+_rsaz_1024_gather5_avx2:
+
+	vzeroupper
+	movq	%rsp,%r11
+
+	leaq	-256(%rsp),%rsp
+	andq	$-32,%rsp
+	leaq	L$inc(%rip),%r10
+	leaq	-128(%rsp),%rax
+
+	vmovd	%edx,%xmm4
+	vmovdqa	(%r10),%ymm0
+	vmovdqa	32(%r10),%ymm1
+	vmovdqa	64(%r10),%ymm5
+	vpbroadcastd	%xmm4,%ymm4
+
+	vpaddd	%ymm5,%ymm0,%ymm2
+	vpcmpeqd	%ymm4,%ymm0,%ymm0
+	vpaddd	%ymm5,%ymm1,%ymm3
+	vpcmpeqd	%ymm4,%ymm1,%ymm1
+	vmovdqa	%ymm0,0+128(%rax)
+	vpaddd	%ymm5,%ymm2,%ymm0
+	vpcmpeqd	%ymm4,%ymm2,%ymm2
+	vmovdqa	%ymm1,32+128(%rax)
+	vpaddd	%ymm5,%ymm3,%ymm1
+	vpcmpeqd	%ymm4,%ymm3,%ymm3
+	vmovdqa	%ymm2,64+128(%rax)
+	vpaddd	%ymm5,%ymm0,%ymm2
+	vpcmpeqd	%ymm4,%ymm0,%ymm0
+	vmovdqa	%ymm3,96+128(%rax)
+	vpaddd	%ymm5,%ymm1,%ymm3
+	vpcmpeqd	%ymm4,%ymm1,%ymm1
+	vmovdqa	%ymm0,128+128(%rax)
+	vpaddd	%ymm5,%ymm2,%ymm8
+	vpcmpeqd	%ymm4,%ymm2,%ymm2
+	vmovdqa	%ymm1,160+128(%rax)
+	vpaddd	%ymm5,%ymm3,%ymm9
+	vpcmpeqd	%ymm4,%ymm3,%ymm3
+	vmovdqa	%ymm2,192+128(%rax)
+	vpaddd	%ymm5,%ymm8,%ymm10
+	vpcmpeqd	%ymm4,%ymm8,%ymm8
+	vmovdqa	%ymm3,224+128(%rax)
+	vpaddd	%ymm5,%ymm9,%ymm11
+	vpcmpeqd	%ymm4,%ymm9,%ymm9
+	vpaddd	%ymm5,%ymm10,%ymm12
+	vpcmpeqd	%ymm4,%ymm10,%ymm10
+	vpaddd	%ymm5,%ymm11,%ymm13
+	vpcmpeqd	%ymm4,%ymm11,%ymm11
+	vpaddd	%ymm5,%ymm12,%ymm14
+	vpcmpeqd	%ymm4,%ymm12,%ymm12
+	vpaddd	%ymm5,%ymm13,%ymm15
+	vpcmpeqd	%ymm4,%ymm13,%ymm13
+	vpcmpeqd	%ymm4,%ymm14,%ymm14
+	vpcmpeqd	%ymm4,%ymm15,%ymm15
+
+	vmovdqa	-32(%r10),%ymm7
+	leaq	128(%rsi),%rsi
+	movl	$9,%edx
+
+L$oop_gather_1024:
+	vmovdqa	0-128(%rsi),%ymm0
+	vmovdqa	32-128(%rsi),%ymm1
+	vmovdqa	64-128(%rsi),%ymm2
+	vmovdqa	96-128(%rsi),%ymm3
+	vpand	0+128(%rax),%ymm0,%ymm0
+	vpand	32+128(%rax),%ymm1,%ymm1
+	vpand	64+128(%rax),%ymm2,%ymm2
+	vpor	%ymm0,%ymm1,%ymm4
+	vpand	96+128(%rax),%ymm3,%ymm3
+	vmovdqa	128-128(%rsi),%ymm0
+	vmovdqa	160-128(%rsi),%ymm1
+	vpor	%ymm2,%ymm3,%ymm5
+	vmovdqa	192-128(%rsi),%ymm2
+	vmovdqa	224-128(%rsi),%ymm3
+	vpand	128+128(%rax),%ymm0,%ymm0
+	vpand	160+128(%rax),%ymm1,%ymm1
+	vpand	192+128(%rax),%ymm2,%ymm2
+	vpor	%ymm0,%ymm4,%ymm4
+	vpand	224+128(%rax),%ymm3,%ymm3
+	vpand	256-128(%rsi),%ymm8,%ymm0
+	vpor	%ymm1,%ymm5,%ymm5
+	vpand	288-128(%rsi),%ymm9,%ymm1
+	vpor	%ymm2,%ymm4,%ymm4
+	vpand	320-128(%rsi),%ymm10,%ymm2
+	vpor	%ymm3,%ymm5,%ymm5
+	vpand	352-128(%rsi),%ymm11,%ymm3
+	vpor	%ymm0,%ymm4,%ymm4
+	vpand	384-128(%rsi),%ymm12,%ymm0
+	vpor	%ymm1,%ymm5,%ymm5
+	vpand	416-128(%rsi),%ymm13,%ymm1
+	vpor	%ymm2,%ymm4,%ymm4
+	vpand	448-128(%rsi),%ymm14,%ymm2
+	vpor	%ymm3,%ymm5,%ymm5
+	vpand	480-128(%rsi),%ymm15,%ymm3
+	leaq	512(%rsi),%rsi
+	vpor	%ymm0,%ymm4,%ymm4
+	vpor	%ymm1,%ymm5,%ymm5
+	vpor	%ymm2,%ymm4,%ymm4
+	vpor	%ymm3,%ymm5,%ymm5
+
+	vpor	%ymm5,%ymm4,%ymm4
+	vextracti128	$1,%ymm4,%xmm5
+	vpor	%xmm4,%xmm5,%xmm5
+	vpermd	%ymm5,%ymm7,%ymm5
+	vmovdqu	%ymm5,(%rdi)
+	leaq	32(%rdi),%rdi
+	decl	%edx
+	jnz	L$oop_gather_1024
+
+	vpxor	%ymm0,%ymm0,%ymm0
+	vmovdqu	%ymm0,(%rdi)
+	vzeroupper
+	leaq	(%r11),%rsp
+
+	.byte	0xf3,0xc3
+
+L$SEH_end_rsaz_1024_gather5:
+
+
+.globl	_rsaz_avx2_eligible
+.private_extern _rsaz_avx2_eligible
+
+.p2align	5
+_rsaz_avx2_eligible:
+	leaq	_OPENSSL_ia32cap_P(%rip),%rax
+	movl	8(%rax),%eax
+	andl	$32,%eax
+	shrl	$5,%eax
+	.byte	0xf3,0xc3
+
+
+.p2align	6
+L$and_mask:
+.quad	0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
+L$scatter_permd:
+.long	0,2,4,6,7,7,7,7
+L$gather_permd:
+.long	0,7,1,7,2,7,3,7
+L$inc:
+.long	0,0,0,0, 1,1,1,1
+.long	2,2,2,2, 3,3,3,3
+.long	4,4,4,4, 4,4,4,4
+.p2align	6
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
new file mode 100644
index 0000000..c22431c
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
@@ -0,0 +1,3543 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+.globl	_sha1_block_data_order
+.private_extern _sha1_block_data_order
+
+.p2align	4
+_sha1_block_data_order:
+	leaq	_OPENSSL_ia32cap_P(%rip),%r10
+	movl	0(%r10),%r9d
+	movl	4(%r10),%r8d
+	movl	8(%r10),%r10d
+	testl	$512,%r8d
+	jz	L$ialu
+	andl	$268435456,%r8d
+	andl	$1073741824,%r9d
+	orl	%r9d,%r8d
+	cmpl	$1342177280,%r8d
+	je	_avx_shortcut
+	jmp	_ssse3_shortcut
+
+.p2align	4
+L$ialu:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	movq	%rdi,%r8
+	subq	$72,%rsp
+	movq	%rsi,%r9
+	andq	$-64,%rsp
+	movq	%rdx,%r10
+	movq	%rax,64(%rsp)
+L$prologue:
+
+	movl	0(%r8),%esi
+	movl	4(%r8),%edi
+	movl	8(%r8),%r11d
+	movl	12(%r8),%r12d
+	movl	16(%r8),%r13d
+	jmp	L$loop
+
+.p2align	4
+L$loop:
+	movl	0(%r9),%edx
+	bswapl	%edx
+	movl	4(%r9),%ebp
+	movl	%r12d,%eax
+	movl	%edx,0(%rsp)
+	movl	%esi,%ecx
+	bswapl	%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	andl	%edi,%eax
+	leal	1518500249(%rdx,%r13,1),%r13d
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	8(%r9),%r14d
+	movl	%r11d,%eax
+	movl	%ebp,4(%rsp)
+	movl	%r13d,%ecx
+	bswapl	%r14d
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	andl	%esi,%eax
+	leal	1518500249(%rbp,%r12,1),%r12d
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	12(%r9),%edx
+	movl	%edi,%eax
+	movl	%r14d,8(%rsp)
+	movl	%r12d,%ecx
+	bswapl	%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	andl	%r13d,%eax
+	leal	1518500249(%r14,%r11,1),%r11d
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	16(%r9),%ebp
+	movl	%esi,%eax
+	movl	%edx,12(%rsp)
+	movl	%r11d,%ecx
+	bswapl	%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	andl	%r12d,%eax
+	leal	1518500249(%rdx,%rdi,1),%edi
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	20(%r9),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,16(%rsp)
+	movl	%edi,%ecx
+	bswapl	%r14d
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	andl	%r11d,%eax
+	leal	1518500249(%rbp,%rsi,1),%esi
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	24(%r9),%edx
+	movl	%r12d,%eax
+	movl	%r14d,20(%rsp)
+	movl	%esi,%ecx
+	bswapl	%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	andl	%edi,%eax
+	leal	1518500249(%r14,%r13,1),%r13d
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	28(%r9),%ebp
+	movl	%r11d,%eax
+	movl	%edx,24(%rsp)
+	movl	%r13d,%ecx
+	bswapl	%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	andl	%esi,%eax
+	leal	1518500249(%rdx,%r12,1),%r12d
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	32(%r9),%r14d
+	movl	%edi,%eax
+	movl	%ebp,28(%rsp)
+	movl	%r12d,%ecx
+	bswapl	%r14d
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	andl	%r13d,%eax
+	leal	1518500249(%rbp,%r11,1),%r11d
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	36(%r9),%edx
+	movl	%esi,%eax
+	movl	%r14d,32(%rsp)
+	movl	%r11d,%ecx
+	bswapl	%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	andl	%r12d,%eax
+	leal	1518500249(%r14,%rdi,1),%edi
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	40(%r9),%ebp
+	movl	%r13d,%eax
+	movl	%edx,36(%rsp)
+	movl	%edi,%ecx
+	bswapl	%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	andl	%r11d,%eax
+	leal	1518500249(%rdx,%rsi,1),%esi
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	44(%r9),%r14d
+	movl	%r12d,%eax
+	movl	%ebp,40(%rsp)
+	movl	%esi,%ecx
+	bswapl	%r14d
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	andl	%edi,%eax
+	leal	1518500249(%rbp,%r13,1),%r13d
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	48(%r9),%edx
+	movl	%r11d,%eax
+	movl	%r14d,44(%rsp)
+	movl	%r13d,%ecx
+	bswapl	%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	andl	%esi,%eax
+	leal	1518500249(%r14,%r12,1),%r12d
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	52(%r9),%ebp
+	movl	%edi,%eax
+	movl	%edx,48(%rsp)
+	movl	%r12d,%ecx
+	bswapl	%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	andl	%r13d,%eax
+	leal	1518500249(%rdx,%r11,1),%r11d
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	56(%r9),%r14d
+	movl	%esi,%eax
+	movl	%ebp,52(%rsp)
+	movl	%r11d,%ecx
+	bswapl	%r14d
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	andl	%r12d,%eax
+	leal	1518500249(%rbp,%rdi,1),%edi
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	60(%r9),%edx
+	movl	%r13d,%eax
+	movl	%r14d,56(%rsp)
+	movl	%edi,%ecx
+	bswapl	%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	andl	%r11d,%eax
+	leal	1518500249(%r14,%rsi,1),%esi
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	xorl	0(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edx,60(%rsp)
+	movl	%esi,%ecx
+	xorl	8(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	32(%rsp),%ebp
+	andl	%edi,%eax
+	leal	1518500249(%rdx,%r13,1),%r13d
+	roll	$30,%edi
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	roll	$1,%ebp
+	addl	%eax,%r13d
+	xorl	4(%rsp),%r14d
+	movl	%r11d,%eax
+	movl	%ebp,0(%rsp)
+	movl	%r13d,%ecx
+	xorl	12(%rsp),%r14d
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	36(%rsp),%r14d
+	andl	%esi,%eax
+	leal	1518500249(%rbp,%r12,1),%r12d
+	roll	$30,%esi
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	roll	$1,%r14d
+	addl	%eax,%r12d
+	xorl	8(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r14d,4(%rsp)
+	movl	%r12d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	40(%rsp),%edx
+	andl	%r13d,%eax
+	leal	1518500249(%r14,%r11,1),%r11d
+	roll	$30,%r13d
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	roll	$1,%edx
+	addl	%eax,%r11d
+	xorl	12(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%edx,8(%rsp)
+	movl	%r11d,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	44(%rsp),%ebp
+	andl	%r12d,%eax
+	leal	1518500249(%rdx,%rdi,1),%edi
+	roll	$30,%r12d
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	roll	$1,%ebp
+	addl	%eax,%edi
+	xorl	16(%rsp),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,12(%rsp)
+	movl	%edi,%ecx
+	xorl	24(%rsp),%r14d
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	48(%rsp),%r14d
+	andl	%r11d,%eax
+	leal	1518500249(%rbp,%rsi,1),%esi
+	roll	$30,%r11d
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	roll	$1,%r14d
+	addl	%eax,%esi
+	xorl	20(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r14d,16(%rsp)
+	movl	%esi,%ecx
+	xorl	28(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	52(%rsp),%edx
+	leal	1859775393(%r14,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	xorl	24(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%edx,20(%rsp)
+	movl	%r13d,%ecx
+	xorl	32(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	56(%rsp),%ebp
+	leal	1859775393(%rdx,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	xorl	28(%rsp),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,24(%rsp)
+	movl	%r12d,%ecx
+	xorl	36(%rsp),%r14d
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	60(%rsp),%r14d
+	leal	1859775393(%rbp,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%r14d
+	xorl	32(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r14d,28(%rsp)
+	movl	%r11d,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	0(%rsp),%edx
+	leal	1859775393(%r14,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	xorl	36(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%edx,32(%rsp)
+	movl	%edi,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	4(%rsp),%ebp
+	leal	1859775393(%rdx,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	xorl	40(%rsp),%r14d
+	movl	%edi,%eax
+	movl	%ebp,36(%rsp)
+	movl	%esi,%ecx
+	xorl	48(%rsp),%r14d
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	8(%rsp),%r14d
+	leal	1859775393(%rbp,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%r14d
+	xorl	44(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r14d,40(%rsp)
+	movl	%r13d,%ecx
+	xorl	52(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	12(%rsp),%edx
+	leal	1859775393(%r14,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	xorl	48(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%edx,44(%rsp)
+	movl	%r12d,%ecx
+	xorl	56(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	16(%rsp),%ebp
+	leal	1859775393(%rdx,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	xorl	52(%rsp),%r14d
+	movl	%r12d,%eax
+	movl	%ebp,48(%rsp)
+	movl	%r11d,%ecx
+	xorl	60(%rsp),%r14d
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	20(%rsp),%r14d
+	leal	1859775393(%rbp,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%r14d
+	xorl	56(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r14d,52(%rsp)
+	movl	%edi,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	24(%rsp),%edx
+	leal	1859775393(%r14,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	xorl	60(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edx,56(%rsp)
+	movl	%esi,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	28(%rsp),%ebp
+	leal	1859775393(%rdx,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	xorl	0(%rsp),%r14d
+	movl	%esi,%eax
+	movl	%ebp,60(%rsp)
+	movl	%r13d,%ecx
+	xorl	8(%rsp),%r14d
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	32(%rsp),%r14d
+	leal	1859775393(%rbp,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%r14d
+	xorl	4(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r14d,0(%rsp)
+	movl	%r12d,%ecx
+	xorl	12(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	36(%rsp),%edx
+	leal	1859775393(%r14,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	xorl	8(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edx,4(%rsp)
+	movl	%r11d,%ecx
+	xorl	16(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	40(%rsp),%ebp
+	leal	1859775393(%rdx,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	xorl	12(%rsp),%r14d
+	movl	%r11d,%eax
+	movl	%ebp,8(%rsp)
+	movl	%edi,%ecx
+	xorl	20(%rsp),%r14d
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	44(%rsp),%r14d
+	leal	1859775393(%rbp,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%r14d
+	xorl	16(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r14d,12(%rsp)
+	movl	%esi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	48(%rsp),%edx
+	leal	1859775393(%r14,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	xorl	20(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%edx,16(%rsp)
+	movl	%r13d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	52(%rsp),%ebp
+	leal	1859775393(%rdx,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	xorl	24(%rsp),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,20(%rsp)
+	movl	%r12d,%ecx
+	xorl	32(%rsp),%r14d
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	56(%rsp),%r14d
+	leal	1859775393(%rbp,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%r14d
+	xorl	28(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r14d,24(%rsp)
+	movl	%r11d,%ecx
+	xorl	36(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	60(%rsp),%edx
+	leal	1859775393(%r14,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	xorl	32(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%edx,28(%rsp)
+	movl	%edi,%ecx
+	xorl	40(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	0(%rsp),%ebp
+	leal	1859775393(%rdx,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	xorl	36(%rsp),%r14d
+	movl	%r12d,%eax
+	movl	%ebp,32(%rsp)
+	movl	%r12d,%ebx
+	xorl	44(%rsp),%r14d
+	andl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	4(%rsp),%r14d
+	leal	-1894007588(%rbp,%r13,1),%r13d
+	xorl	%r11d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r13d
+	roll	$1,%r14d
+	andl	%edi,%ebx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%ebx,%r13d
+	xorl	40(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r14d,36(%rsp)
+	movl	%r11d,%ebx
+	xorl	48(%rsp),%edx
+	andl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	8(%rsp),%edx
+	leal	-1894007588(%r14,%r12,1),%r12d
+	xorl	%edi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r12d
+	roll	$1,%edx
+	andl	%esi,%ebx
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%ebx,%r12d
+	xorl	44(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edx,40(%rsp)
+	movl	%edi,%ebx
+	xorl	52(%rsp),%ebp
+	andl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	12(%rsp),%ebp
+	leal	-1894007588(%rdx,%r11,1),%r11d
+	xorl	%esi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	andl	%r13d,%ebx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%ebx,%r11d
+	xorl	48(%rsp),%r14d
+	movl	%esi,%eax
+	movl	%ebp,44(%rsp)
+	movl	%esi,%ebx
+	xorl	56(%rsp),%r14d
+	andl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	16(%rsp),%r14d
+	leal	-1894007588(%rbp,%rdi,1),%edi
+	xorl	%r13d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%edi
+	roll	$1,%r14d
+	andl	%r12d,%ebx
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%ebx,%edi
+	xorl	52(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r14d,48(%rsp)
+	movl	%r13d,%ebx
+	xorl	60(%rsp),%edx
+	andl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	20(%rsp),%edx
+	leal	-1894007588(%r14,%rsi,1),%esi
+	xorl	%r12d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%esi
+	roll	$1,%edx
+	andl	%r11d,%ebx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%ebx,%esi
+	xorl	56(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edx,52(%rsp)
+	movl	%r12d,%ebx
+	xorl	0(%rsp),%ebp
+	andl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	24(%rsp),%ebp
+	leal	-1894007588(%rdx,%r13,1),%r13d
+	xorl	%r11d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	andl	%edi,%ebx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%ebx,%r13d
+	xorl	60(%rsp),%r14d
+	movl	%r11d,%eax
+	movl	%ebp,56(%rsp)
+	movl	%r11d,%ebx
+	xorl	4(%rsp),%r14d
+	andl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	28(%rsp),%r14d
+	leal	-1894007588(%rbp,%r12,1),%r12d
+	xorl	%edi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r12d
+	roll	$1,%r14d
+	andl	%esi,%ebx
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%ebx,%r12d
+	xorl	0(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r14d,60(%rsp)
+	movl	%edi,%ebx
+	xorl	8(%rsp),%edx
+	andl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	32(%rsp),%edx
+	leal	-1894007588(%r14,%r11,1),%r11d
+	xorl	%esi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r11d
+	roll	$1,%edx
+	andl	%r13d,%ebx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%ebx,%r11d
+	xorl	4(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%edx,0(%rsp)
+	movl	%esi,%ebx
+	xorl	12(%rsp),%ebp
+	andl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	36(%rsp),%ebp
+	leal	-1894007588(%rdx,%rdi,1),%edi
+	xorl	%r13d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%edi
+	roll	$1,%ebp
+	andl	%r12d,%ebx
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%ebx,%edi
+	xorl	8(%rsp),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,4(%rsp)
+	movl	%r13d,%ebx
+	xorl	16(%rsp),%r14d
+	andl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	40(%rsp),%r14d
+	leal	-1894007588(%rbp,%rsi,1),%esi
+	xorl	%r12d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%esi
+	roll	$1,%r14d
+	andl	%r11d,%ebx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%ebx,%esi
+	xorl	12(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r14d,8(%rsp)
+	movl	%r12d,%ebx
+	xorl	20(%rsp),%edx
+	andl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	44(%rsp),%edx
+	leal	-1894007588(%r14,%r13,1),%r13d
+	xorl	%r11d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r13d
+	roll	$1,%edx
+	andl	%edi,%ebx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%ebx,%r13d
+	xorl	16(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%edx,12(%rsp)
+	movl	%r11d,%ebx
+	xorl	24(%rsp),%ebp
+	andl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	48(%rsp),%ebp
+	leal	-1894007588(%rdx,%r12,1),%r12d
+	xorl	%edi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	andl	%esi,%ebx
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%ebx,%r12d
+	xorl	20(%rsp),%r14d
+	movl	%edi,%eax
+	movl	%ebp,16(%rsp)
+	movl	%edi,%ebx
+	xorl	28(%rsp),%r14d
+	andl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	52(%rsp),%r14d
+	leal	-1894007588(%rbp,%r11,1),%r11d
+	xorl	%esi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r11d
+	roll	$1,%r14d
+	andl	%r13d,%ebx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%ebx,%r11d
+	xorl	24(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r14d,20(%rsp)
+	movl	%esi,%ebx
+	xorl	32(%rsp),%edx
+	andl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	56(%rsp),%edx
+	leal	-1894007588(%r14,%rdi,1),%edi
+	xorl	%r13d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%edi
+	roll	$1,%edx
+	andl	%r12d,%ebx
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%ebx,%edi
+	xorl	28(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%edx,24(%rsp)
+	movl	%r13d,%ebx
+	xorl	36(%rsp),%ebp
+	andl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	60(%rsp),%ebp
+	leal	-1894007588(%rdx,%rsi,1),%esi
+	xorl	%r12d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%esi
+	roll	$1,%ebp
+	andl	%r11d,%ebx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%ebx,%esi
+	xorl	32(%rsp),%r14d
+	movl	%r12d,%eax
+	movl	%ebp,28(%rsp)
+	movl	%r12d,%ebx
+	xorl	40(%rsp),%r14d
+	andl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	0(%rsp),%r14d
+	leal	-1894007588(%rbp,%r13,1),%r13d
+	xorl	%r11d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r13d
+	roll	$1,%r14d
+	andl	%edi,%ebx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%ebx,%r13d
+	xorl	36(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r14d,32(%rsp)
+	movl	%r11d,%ebx
+	xorl	44(%rsp),%edx
+	andl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	4(%rsp),%edx
+	leal	-1894007588(%r14,%r12,1),%r12d
+	xorl	%edi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r12d
+	roll	$1,%edx
+	andl	%esi,%ebx
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%ebx,%r12d
+	xorl	40(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edx,36(%rsp)
+	movl	%edi,%ebx
+	xorl	48(%rsp),%ebp
+	andl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	8(%rsp),%ebp
+	leal	-1894007588(%rdx,%r11,1),%r11d
+	xorl	%esi,%ebx
+	roll	$5,%ecx
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	andl	%r13d,%ebx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%ebx,%r11d
+	xorl	44(%rsp),%r14d
+	movl	%esi,%eax
+	movl	%ebp,40(%rsp)
+	movl	%esi,%ebx
+	xorl	52(%rsp),%r14d
+	andl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	12(%rsp),%r14d
+	leal	-1894007588(%rbp,%rdi,1),%edi
+	xorl	%r13d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%edi
+	roll	$1,%r14d
+	andl	%r12d,%ebx
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%ebx,%edi
+	xorl	48(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r14d,44(%rsp)
+	movl	%r13d,%ebx
+	xorl	56(%rsp),%edx
+	andl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	16(%rsp),%edx
+	leal	-1894007588(%r14,%rsi,1),%esi
+	xorl	%r12d,%ebx
+	roll	$5,%ecx
+	addl	%eax,%esi
+	roll	$1,%edx
+	andl	%r11d,%ebx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%ebx,%esi
+	xorl	52(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edx,48(%rsp)
+	movl	%esi,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	20(%rsp),%ebp
+	leal	-899497514(%rdx,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	xorl	56(%rsp),%r14d
+	movl	%esi,%eax
+	movl	%ebp,52(%rsp)
+	movl	%r13d,%ecx
+	xorl	0(%rsp),%r14d
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	24(%rsp),%r14d
+	leal	-899497514(%rbp,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%r14d
+	xorl	60(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r14d,56(%rsp)
+	movl	%r12d,%ecx
+	xorl	4(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	28(%rsp),%edx
+	leal	-899497514(%r14,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	xorl	0(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edx,60(%rsp)
+	movl	%r11d,%ecx
+	xorl	8(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	32(%rsp),%ebp
+	leal	-899497514(%rdx,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	xorl	4(%rsp),%r14d
+	movl	%r11d,%eax
+	movl	%ebp,0(%rsp)
+	movl	%edi,%ecx
+	xorl	12(%rsp),%r14d
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	36(%rsp),%r14d
+	leal	-899497514(%rbp,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%r14d
+	xorl	8(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r14d,4(%rsp)
+	movl	%esi,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	40(%rsp),%edx
+	leal	-899497514(%r14,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	xorl	12(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%edx,8(%rsp)
+	movl	%r13d,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	44(%rsp),%ebp
+	leal	-899497514(%rdx,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	xorl	16(%rsp),%r14d
+	movl	%r13d,%eax
+	movl	%ebp,12(%rsp)
+	movl	%r12d,%ecx
+	xorl	24(%rsp),%r14d
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	48(%rsp),%r14d
+	leal	-899497514(%rbp,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%r14d
+	xorl	20(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r14d,16(%rsp)
+	movl	%r11d,%ecx
+	xorl	28(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	52(%rsp),%edx
+	leal	-899497514(%r14,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	xorl	24(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%edx,20(%rsp)
+	movl	%edi,%ecx
+	xorl	32(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	56(%rsp),%ebp
+	leal	-899497514(%rdx,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	xorl	28(%rsp),%r14d
+	movl	%edi,%eax
+	movl	%ebp,24(%rsp)
+	movl	%esi,%ecx
+	xorl	36(%rsp),%r14d
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	60(%rsp),%r14d
+	leal	-899497514(%rbp,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%r14d
+	xorl	32(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r14d,28(%rsp)
+	movl	%r13d,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	0(%rsp),%edx
+	leal	-899497514(%r14,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	xorl	36(%rsp),%ebp
+	movl	%r13d,%eax
+
+	movl	%r12d,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	4(%rsp),%ebp
+	leal	-899497514(%rdx,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	xorl	40(%rsp),%r14d
+	movl	%r12d,%eax
+
+	movl	%r11d,%ecx
+	xorl	48(%rsp),%r14d
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	8(%rsp),%r14d
+	leal	-899497514(%rbp,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%r14d
+	xorl	44(%rsp),%edx
+	movl	%r11d,%eax
+
+	movl	%edi,%ecx
+	xorl	52(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	12(%rsp),%edx
+	leal	-899497514(%r14,%rsi,1),%esi
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	xorl	48(%rsp),%ebp
+	movl	%edi,%eax
+
+	movl	%esi,%ecx
+	xorl	56(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	16(%rsp),%ebp
+	leal	-899497514(%rdx,%r13,1),%r13d
+	xorl	%r11d,%eax
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	xorl	52(%rsp),%r14d
+	movl	%esi,%eax
+
+	movl	%r13d,%ecx
+	xorl	60(%rsp),%r14d
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	20(%rsp),%r14d
+	leal	-899497514(%rbp,%r12,1),%r12d
+	xorl	%edi,%eax
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%r14d
+	xorl	56(%rsp),%edx
+	movl	%r13d,%eax
+
+	movl	%r12d,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	24(%rsp),%edx
+	leal	-899497514(%r14,%r11,1),%r11d
+	xorl	%esi,%eax
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	xorl	60(%rsp),%ebp
+	movl	%r12d,%eax
+
+	movl	%r11d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	28(%rsp),%ebp
+	leal	-899497514(%rdx,%rdi,1),%edi
+	xorl	%r13d,%eax
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%r11d,%eax
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	leal	-899497514(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	%r12d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	addl	0(%r8),%esi
+	addl	4(%r8),%edi
+	addl	8(%r8),%r11d
+	addl	12(%r8),%r12d
+	addl	16(%r8),%r13d
+	movl	%esi,0(%r8)
+	movl	%edi,4(%r8)
+	movl	%r11d,8(%r8)
+	movl	%r12d,12(%r8)
+	movl	%r13d,16(%r8)
+
+	subq	$1,%r10
+	leaq	64(%r9),%r9
+	jnz	L$loop
+
+	movq	64(%rsp),%rsi
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$epilogue:
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+	movq	%rsp,%r11
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	leaq	-64(%rsp),%rsp
+	andq	$-64,%rsp
+	movq	%rdi,%r8
+	movq	%rsi,%r9
+	movq	%rdx,%r10
+
+	shlq	$6,%r10
+	addq	%r9,%r10
+	leaq	K_XX_XX+64(%rip),%r14
+
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movl	%ebx,%esi
+	movl	16(%r8),%ebp
+	movl	%ecx,%edi
+	xorl	%edx,%edi
+	andl	%edi,%esi
+
+	movdqa	64(%r14),%xmm6
+	movdqa	-64(%r14),%xmm9
+	movdqu	0(%r9),%xmm0
+	movdqu	16(%r9),%xmm1
+	movdqu	32(%r9),%xmm2
+	movdqu	48(%r9),%xmm3
+.byte	102,15,56,0,198
+.byte	102,15,56,0,206
+.byte	102,15,56,0,214
+	addq	$64,%r9
+	paddd	%xmm9,%xmm0
+.byte	102,15,56,0,222
+	paddd	%xmm9,%xmm1
+	paddd	%xmm9,%xmm2
+	movdqa	%xmm0,0(%rsp)
+	psubd	%xmm9,%xmm0
+	movdqa	%xmm1,16(%rsp)
+	psubd	%xmm9,%xmm1
+	movdqa	%xmm2,32(%rsp)
+	psubd	%xmm9,%xmm2
+	jmp	L$oop_ssse3
+.p2align	4
+L$oop_ssse3:
+	rorl	$2,%ebx
+	pshufd	$238,%xmm0,%xmm4
+	xorl	%edx,%esi
+	movdqa	%xmm3,%xmm8
+	paddd	%xmm3,%xmm9
+	movl	%eax,%edi
+	addl	0(%rsp),%ebp
+	punpcklqdq	%xmm1,%xmm4
+	xorl	%ecx,%ebx
+	roll	$5,%eax
+	addl	%esi,%ebp
+	psrldq	$4,%xmm8
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	pxor	%xmm0,%xmm4
+	addl	%eax,%ebp
+	rorl	$7,%eax
+	pxor	%xmm2,%xmm8
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	addl	4(%rsp),%edx
+	pxor	%xmm8,%xmm4
+	xorl	%ebx,%eax
+	roll	$5,%ebp
+	movdqa	%xmm9,48(%rsp)
+	addl	%edi,%edx
+	andl	%eax,%esi
+	movdqa	%xmm4,%xmm10
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	rorl	$7,%ebp
+	movdqa	%xmm4,%xmm8
+	xorl	%ebx,%esi
+	pslldq	$12,%xmm10
+	paddd	%xmm4,%xmm4
+	movl	%edx,%edi
+	addl	8(%rsp),%ecx
+	psrld	$31,%xmm8
+	xorl	%eax,%ebp
+	roll	$5,%edx
+	addl	%esi,%ecx
+	movdqa	%xmm10,%xmm9
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	psrld	$30,%xmm10
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	por	%xmm8,%xmm4
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	addl	12(%rsp),%ebx
+	pslld	$2,%xmm9
+	pxor	%xmm10,%xmm4
+	xorl	%ebp,%edx
+	movdqa	-64(%r14),%xmm10
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	andl	%edx,%esi
+	pxor	%xmm9,%xmm4
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	rorl	$7,%ecx
+	pshufd	$238,%xmm1,%xmm5
+	xorl	%ebp,%esi
+	movdqa	%xmm4,%xmm9
+	paddd	%xmm4,%xmm10
+	movl	%ebx,%edi
+	addl	16(%rsp),%eax
+	punpcklqdq	%xmm2,%xmm5
+	xorl	%edx,%ecx
+	roll	$5,%ebx
+	addl	%esi,%eax
+	psrldq	$4,%xmm9
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	pxor	%xmm1,%xmm5
+	addl	%ebx,%eax
+	rorl	$7,%ebx
+	pxor	%xmm3,%xmm9
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	addl	20(%rsp),%ebp
+	pxor	%xmm9,%xmm5
+	xorl	%ecx,%ebx
+	roll	$5,%eax
+	movdqa	%xmm10,0(%rsp)
+	addl	%edi,%ebp
+	andl	%ebx,%esi
+	movdqa	%xmm5,%xmm8
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	rorl	$7,%eax
+	movdqa	%xmm5,%xmm9
+	xorl	%ecx,%esi
+	pslldq	$12,%xmm8
+	paddd	%xmm5,%xmm5
+	movl	%ebp,%edi
+	addl	24(%rsp),%edx
+	psrld	$31,%xmm9
+	xorl	%ebx,%eax
+	roll	$5,%ebp
+	addl	%esi,%edx
+	movdqa	%xmm8,%xmm10
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	psrld	$30,%xmm8
+	addl	%ebp,%edx
+	rorl	$7,%ebp
+	por	%xmm9,%xmm5
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	addl	28(%rsp),%ecx
+	pslld	$2,%xmm10
+	pxor	%xmm8,%xmm5
+	xorl	%eax,%ebp
+	movdqa	-32(%r14),%xmm8
+	roll	$5,%edx
+	addl	%edi,%ecx
+	andl	%ebp,%esi
+	pxor	%xmm10,%xmm5
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	pshufd	$238,%xmm2,%xmm6
+	xorl	%eax,%esi
+	movdqa	%xmm5,%xmm10
+	paddd	%xmm5,%xmm8
+	movl	%ecx,%edi
+	addl	32(%rsp),%ebx
+	punpcklqdq	%xmm3,%xmm6
+	xorl	%ebp,%edx
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	psrldq	$4,%xmm10
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	pxor	%xmm2,%xmm6
+	addl	%ecx,%ebx
+	rorl	$7,%ecx
+	pxor	%xmm4,%xmm10
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	addl	36(%rsp),%eax
+	pxor	%xmm10,%xmm6
+	xorl	%edx,%ecx
+	roll	$5,%ebx
+	movdqa	%xmm8,16(%rsp)
+	addl	%edi,%eax
+	andl	%ecx,%esi
+	movdqa	%xmm6,%xmm9
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	rorl	$7,%ebx
+	movdqa	%xmm6,%xmm10
+	xorl	%edx,%esi
+	pslldq	$12,%xmm9
+	paddd	%xmm6,%xmm6
+	movl	%eax,%edi
+	addl	40(%rsp),%ebp
+	psrld	$31,%xmm10
+	xorl	%ecx,%ebx
+	roll	$5,%eax
+	addl	%esi,%ebp
+	movdqa	%xmm9,%xmm8
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	psrld	$30,%xmm9
+	addl	%eax,%ebp
+	rorl	$7,%eax
+	por	%xmm10,%xmm6
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	addl	44(%rsp),%edx
+	pslld	$2,%xmm8
+	pxor	%xmm9,%xmm6
+	xorl	%ebx,%eax
+	movdqa	-32(%r14),%xmm9
+	roll	$5,%ebp
+	addl	%edi,%edx
+	andl	%eax,%esi
+	pxor	%xmm8,%xmm6
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	rorl	$7,%ebp
+	pshufd	$238,%xmm3,%xmm7
+	xorl	%ebx,%esi
+	movdqa	%xmm6,%xmm8
+	paddd	%xmm6,%xmm9
+	movl	%edx,%edi
+	addl	48(%rsp),%ecx
+	punpcklqdq	%xmm4,%xmm7
+	xorl	%eax,%ebp
+	roll	$5,%edx
+	addl	%esi,%ecx
+	psrldq	$4,%xmm8
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	pxor	%xmm3,%xmm7
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	pxor	%xmm5,%xmm8
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	addl	52(%rsp),%ebx
+	pxor	%xmm8,%xmm7
+	xorl	%ebp,%edx
+	roll	$5,%ecx
+	movdqa	%xmm9,32(%rsp)
+	addl	%edi,%ebx
+	andl	%edx,%esi
+	movdqa	%xmm7,%xmm10
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	rorl	$7,%ecx
+	movdqa	%xmm7,%xmm8
+	xorl	%ebp,%esi
+	pslldq	$12,%xmm10
+	paddd	%xmm7,%xmm7
+	movl	%ebx,%edi
+	addl	56(%rsp),%eax
+	psrld	$31,%xmm8
+	xorl	%edx,%ecx
+	roll	$5,%ebx
+	addl	%esi,%eax
+	movdqa	%xmm10,%xmm9
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	psrld	$30,%xmm10
+	addl	%ebx,%eax
+	rorl	$7,%ebx
+	por	%xmm8,%xmm7
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	addl	60(%rsp),%ebp
+	pslld	$2,%xmm9
+	pxor	%xmm10,%xmm7
+	xorl	%ecx,%ebx
+	movdqa	-32(%r14),%xmm10
+	roll	$5,%eax
+	addl	%edi,%ebp
+	andl	%ebx,%esi
+	pxor	%xmm9,%xmm7
+	pshufd	$238,%xmm6,%xmm9
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	rorl	$7,%eax
+	pxor	%xmm4,%xmm0
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	addl	0(%rsp),%edx
+	punpcklqdq	%xmm7,%xmm9
+	xorl	%ebx,%eax
+	roll	$5,%ebp
+	pxor	%xmm1,%xmm0
+	addl	%esi,%edx
+	andl	%eax,%edi
+	movdqa	%xmm10,%xmm8
+	xorl	%ebx,%eax
+	paddd	%xmm7,%xmm10
+	addl	%ebp,%edx
+	pxor	%xmm9,%xmm0
+	rorl	$7,%ebp
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	addl	4(%rsp),%ecx
+	movdqa	%xmm0,%xmm9
+	xorl	%eax,%ebp
+	roll	$5,%edx
+	movdqa	%xmm10,48(%rsp)
+	addl	%edi,%ecx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	pslld	$2,%xmm0
+	addl	%edx,%ecx
+	rorl	$7,%edx
+	psrld	$30,%xmm9
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	addl	8(%rsp),%ebx
+	por	%xmm9,%xmm0
+	xorl	%ebp,%edx
+	roll	$5,%ecx
+	pshufd	$238,%xmm7,%xmm10
+	addl	%esi,%ebx
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	12(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	pxor	%xmm5,%xmm1
+	addl	16(%rsp),%ebp
+	xorl	%ecx,%esi
+	punpcklqdq	%xmm0,%xmm10
+	movl	%eax,%edi
+	roll	$5,%eax
+	pxor	%xmm2,%xmm1
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	movdqa	%xmm8,%xmm9
+	rorl	$7,%ebx
+	paddd	%xmm0,%xmm8
+	addl	%eax,%ebp
+	pxor	%xmm10,%xmm1
+	addl	20(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	movdqa	%xmm1,%xmm10
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	movdqa	%xmm8,0(%rsp)
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	24(%rsp),%ecx
+	pslld	$2,%xmm1
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	psrld	$30,%xmm10
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	rorl	$7,%ebp
+	por	%xmm10,%xmm1
+	addl	%edx,%ecx
+	addl	28(%rsp),%ebx
+	pshufd	$238,%xmm0,%xmm8
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	pxor	%xmm6,%xmm2
+	addl	32(%rsp),%eax
+	xorl	%edx,%esi
+	punpcklqdq	%xmm1,%xmm8
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	pxor	%xmm3,%xmm2
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	movdqa	0(%r14),%xmm10
+	rorl	$7,%ecx
+	paddd	%xmm1,%xmm9
+	addl	%ebx,%eax
+	pxor	%xmm8,%xmm2
+	addl	36(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	movdqa	%xmm2,%xmm8
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	movdqa	%xmm9,16(%rsp)
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	40(%rsp),%edx
+	pslld	$2,%xmm2
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	psrld	$30,%xmm8
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	rorl	$7,%eax
+	por	%xmm8,%xmm2
+	addl	%ebp,%edx
+	addl	44(%rsp),%ecx
+	pshufd	$238,%xmm1,%xmm9
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%ebp
+	addl	%edx,%ecx
+	pxor	%xmm7,%xmm3
+	addl	48(%rsp),%ebx
+	xorl	%ebp,%esi
+	punpcklqdq	%xmm2,%xmm9
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	pxor	%xmm4,%xmm3
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	movdqa	%xmm10,%xmm8
+	rorl	$7,%edx
+	paddd	%xmm2,%xmm10
+	addl	%ecx,%ebx
+	pxor	%xmm9,%xmm3
+	addl	52(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	movdqa	%xmm3,%xmm9
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	movdqa	%xmm10,32(%rsp)
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	56(%rsp),%ebp
+	pslld	$2,%xmm3
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	psrld	$30,%xmm9
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	rorl	$7,%ebx
+	por	%xmm9,%xmm3
+	addl	%eax,%ebp
+	addl	60(%rsp),%edx
+	pshufd	$238,%xmm2,%xmm10
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	pxor	%xmm0,%xmm4
+	addl	0(%rsp),%ecx
+	xorl	%eax,%esi
+	punpcklqdq	%xmm3,%xmm10
+	movl	%edx,%edi
+	roll	$5,%edx
+	pxor	%xmm5,%xmm4
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	movdqa	%xmm8,%xmm9
+	rorl	$7,%ebp
+	paddd	%xmm3,%xmm8
+	addl	%edx,%ecx
+	pxor	%xmm10,%xmm4
+	addl	4(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	movdqa	%xmm4,%xmm10
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	movdqa	%xmm8,48(%rsp)
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	8(%rsp),%eax
+	pslld	$2,%xmm4
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	psrld	$30,%xmm10
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	rorl	$7,%ecx
+	por	%xmm10,%xmm4
+	addl	%ebx,%eax
+	addl	12(%rsp),%ebp
+	pshufd	$238,%xmm3,%xmm8
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	pxor	%xmm1,%xmm5
+	addl	16(%rsp),%edx
+	xorl	%ebx,%esi
+	punpcklqdq	%xmm4,%xmm8
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	pxor	%xmm6,%xmm5
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	movdqa	%xmm9,%xmm10
+	rorl	$7,%eax
+	paddd	%xmm4,%xmm9
+	addl	%ebp,%edx
+	pxor	%xmm8,%xmm5
+	addl	20(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	movdqa	%xmm5,%xmm8
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	movdqa	%xmm9,0(%rsp)
+	rorl	$7,%ebp
+	addl	%edx,%ecx
+	addl	24(%rsp),%ebx
+	pslld	$2,%xmm5
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	psrld	$30,%xmm8
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	rorl	$7,%edx
+	por	%xmm8,%xmm5
+	addl	%ecx,%ebx
+	addl	28(%rsp),%eax
+	pshufd	$238,%xmm4,%xmm9
+	rorl	$7,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%edi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	pxor	%xmm2,%xmm6
+	addl	32(%rsp),%ebp
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	rorl	$7,%ebx
+	punpcklqdq	%xmm5,%xmm9
+	movl	%eax,%edi
+	xorl	%ecx,%esi
+	pxor	%xmm7,%xmm6
+	roll	$5,%eax
+	addl	%esi,%ebp
+	movdqa	%xmm10,%xmm8
+	xorl	%ebx,%edi
+	paddd	%xmm5,%xmm10
+	xorl	%ecx,%ebx
+	pxor	%xmm9,%xmm6
+	addl	%eax,%ebp
+	addl	36(%rsp),%edx
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	rorl	$7,%eax
+	movdqa	%xmm6,%xmm9
+	movl	%ebp,%esi
+	xorl	%ebx,%edi
+	movdqa	%xmm10,16(%rsp)
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%eax,%esi
+	pslld	$2,%xmm6
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	psrld	$30,%xmm9
+	addl	40(%rsp),%ecx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	por	%xmm9,%xmm6
+	rorl	$7,%ebp
+	movl	%edx,%edi
+	xorl	%eax,%esi
+	roll	$5,%edx
+	pshufd	$238,%xmm5,%xmm10
+	addl	%esi,%ecx
+	xorl	%ebp,%edi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	addl	44(%rsp),%ebx
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	rorl	$7,%edx
+	movl	%ecx,%esi
+	xorl	%ebp,%edi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	pxor	%xmm3,%xmm7
+	addl	48(%rsp),%eax
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	rorl	$7,%ecx
+	punpcklqdq	%xmm6,%xmm10
+	movl	%ebx,%edi
+	xorl	%edx,%esi
+	pxor	%xmm0,%xmm7
+	roll	$5,%ebx
+	addl	%esi,%eax
+	movdqa	32(%r14),%xmm9
+	xorl	%ecx,%edi
+	paddd	%xmm6,%xmm8
+	xorl	%edx,%ecx
+	pxor	%xmm10,%xmm7
+	addl	%ebx,%eax
+	addl	52(%rsp),%ebp
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	rorl	$7,%ebx
+	movdqa	%xmm7,%xmm10
+	movl	%eax,%esi
+	xorl	%ecx,%edi
+	movdqa	%xmm8,32(%rsp)
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ebx,%esi
+	pslld	$2,%xmm7
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	psrld	$30,%xmm10
+	addl	56(%rsp),%edx
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	por	%xmm10,%xmm7
+	rorl	$7,%eax
+	movl	%ebp,%edi
+	xorl	%ebx,%esi
+	roll	$5,%ebp
+	pshufd	$238,%xmm6,%xmm8
+	addl	%esi,%edx
+	xorl	%eax,%edi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	addl	60(%rsp),%ecx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	rorl	$7,%ebp
+	movl	%edx,%esi
+	xorl	%eax,%edi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	pxor	%xmm4,%xmm0
+	addl	0(%rsp),%ebx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	rorl	$7,%edx
+	punpcklqdq	%xmm7,%xmm8
+	movl	%ecx,%edi
+	xorl	%ebp,%esi
+	pxor	%xmm1,%xmm0
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	movdqa	%xmm9,%xmm10
+	xorl	%edx,%edi
+	paddd	%xmm7,%xmm9
+	xorl	%ebp,%edx
+	pxor	%xmm8,%xmm0
+	addl	%ecx,%ebx
+	addl	4(%rsp),%eax
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	rorl	$7,%ecx
+	movdqa	%xmm0,%xmm8
+	movl	%ebx,%esi
+	xorl	%edx,%edi
+	movdqa	%xmm9,48(%rsp)
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%ecx,%esi
+	pslld	$2,%xmm0
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	psrld	$30,%xmm8
+	addl	8(%rsp),%ebp
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	por	%xmm8,%xmm0
+	rorl	$7,%ebx
+	movl	%eax,%edi
+	xorl	%ecx,%esi
+	roll	$5,%eax
+	pshufd	$238,%xmm7,%xmm9
+	addl	%esi,%ebp
+	xorl	%ebx,%edi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	addl	12(%rsp),%edx
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	rorl	$7,%eax
+	movl	%ebp,%esi
+	xorl	%ebx,%edi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	pxor	%xmm5,%xmm1
+	addl	16(%rsp),%ecx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	rorl	$7,%ebp
+	punpcklqdq	%xmm0,%xmm9
+	movl	%edx,%edi
+	xorl	%eax,%esi
+	pxor	%xmm2,%xmm1
+	roll	$5,%edx
+	addl	%esi,%ecx
+	movdqa	%xmm10,%xmm8
+	xorl	%ebp,%edi
+	paddd	%xmm0,%xmm10
+	xorl	%eax,%ebp
+	pxor	%xmm9,%xmm1
+	addl	%edx,%ecx
+	addl	20(%rsp),%ebx
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	rorl	$7,%edx
+	movdqa	%xmm1,%xmm9
+	movl	%ecx,%esi
+	xorl	%ebp,%edi
+	movdqa	%xmm10,0(%rsp)
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%edx,%esi
+	pslld	$2,%xmm1
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	psrld	$30,%xmm9
+	addl	24(%rsp),%eax
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	por	%xmm9,%xmm1
+	rorl	$7,%ecx
+	movl	%ebx,%edi
+	xorl	%edx,%esi
+	roll	$5,%ebx
+	pshufd	$238,%xmm0,%xmm10
+	addl	%esi,%eax
+	xorl	%ecx,%edi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	28(%rsp),%ebp
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	rorl	$7,%ebx
+	movl	%eax,%esi
+	xorl	%ecx,%edi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	pxor	%xmm6,%xmm2
+	addl	32(%rsp),%edx
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	rorl	$7,%eax
+	punpcklqdq	%xmm1,%xmm10
+	movl	%ebp,%edi
+	xorl	%ebx,%esi
+	pxor	%xmm3,%xmm2
+	roll	$5,%ebp
+	addl	%esi,%edx
+	movdqa	%xmm8,%xmm9
+	xorl	%eax,%edi
+	paddd	%xmm1,%xmm8
+	xorl	%ebx,%eax
+	pxor	%xmm10,%xmm2
+	addl	%ebp,%edx
+	addl	36(%rsp),%ecx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	rorl	$7,%ebp
+	movdqa	%xmm2,%xmm10
+	movl	%edx,%esi
+	xorl	%eax,%edi
+	movdqa	%xmm8,16(%rsp)
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%ebp,%esi
+	pslld	$2,%xmm2
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	psrld	$30,%xmm10
+	addl	40(%rsp),%ebx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	por	%xmm10,%xmm2
+	rorl	$7,%edx
+	movl	%ecx,%edi
+	xorl	%ebp,%esi
+	roll	$5,%ecx
+	pshufd	$238,%xmm1,%xmm8
+	addl	%esi,%ebx
+	xorl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	44(%rsp),%eax
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	rorl	$7,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%edi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	pxor	%xmm7,%xmm3
+	addl	48(%rsp),%ebp
+	xorl	%ecx,%esi
+	punpcklqdq	%xmm2,%xmm8
+	movl	%eax,%edi
+	roll	$5,%eax
+	pxor	%xmm4,%xmm3
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	movdqa	%xmm9,%xmm10
+	rorl	$7,%ebx
+	paddd	%xmm2,%xmm9
+	addl	%eax,%ebp
+	pxor	%xmm8,%xmm3
+	addl	52(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	movdqa	%xmm3,%xmm8
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	movdqa	%xmm9,32(%rsp)
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	56(%rsp),%ecx
+	pslld	$2,%xmm3
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	psrld	$30,%xmm8
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	rorl	$7,%ebp
+	por	%xmm8,%xmm3
+	addl	%edx,%ecx
+	addl	60(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	0(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	paddd	%xmm3,%xmm10
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	movdqa	%xmm10,48(%rsp)
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	4(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	8(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	12(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%ebp
+	addl	%edx,%ecx
+	cmpq	%r10,%r9
+	je	L$done_ssse3
+	movdqa	64(%r14),%xmm6
+	movdqa	-64(%r14),%xmm9
+	movdqu	0(%r9),%xmm0
+	movdqu	16(%r9),%xmm1
+	movdqu	32(%r9),%xmm2
+	movdqu	48(%r9),%xmm3
+.byte	102,15,56,0,198
+	addq	$64,%r9
+	addl	16(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+.byte	102,15,56,0,206
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	rorl	$7,%edx
+	paddd	%xmm9,%xmm0
+	addl	%ecx,%ebx
+	addl	20(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	movdqa	%xmm0,0(%rsp)
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	rorl	$7,%ecx
+	psubd	%xmm9,%xmm0
+	addl	%ebx,%eax
+	addl	24(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	32(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+.byte	102,15,56,0,214
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	rorl	$7,%ebp
+	paddd	%xmm9,%xmm1
+	addl	%edx,%ecx
+	addl	36(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	movdqa	%xmm1,16(%rsp)
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	rorl	$7,%edx
+	psubd	%xmm9,%xmm1
+	addl	%ecx,%ebx
+	addl	40(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	44(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+.byte	102,15,56,0,222
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	rorl	$7,%eax
+	paddd	%xmm9,%xmm2
+	addl	%ebp,%edx
+	addl	52(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	movdqa	%xmm2,32(%rsp)
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%ebp
+	psubd	%xmm9,%xmm2
+	addl	%edx,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	60(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	addl	12(%r8),%edx
+	movl	%eax,0(%r8)
+	addl	16(%r8),%ebp
+	movl	%esi,4(%r8)
+	movl	%esi,%ebx
+	movl	%ecx,8(%r8)
+	movl	%ecx,%edi
+	movl	%edx,12(%r8)
+	xorl	%edx,%edi
+	movl	%ebp,16(%r8)
+	andl	%edi,%esi
+	jmp	L$oop_ssse3
+
+.p2align	4
+L$done_ssse3:
+	addl	16(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	20(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	24(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	32(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	rorl	$7,%ebp
+	addl	%edx,%ecx
+	addl	36(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	40(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	44(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%eax,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	rorl	$7,%eax
+	addl	%ebp,%edx
+	addl	52(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	rorl	$7,%ebp
+	addl	%edx,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	rorl	$7,%edx
+	addl	%ecx,%ebx
+	addl	60(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	rorl	$7,%ecx
+	addl	%ebx,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	movl	%eax,0(%r8)
+	addl	12(%r8),%edx
+	movl	%esi,4(%r8)
+	addl	16(%r8),%ebp
+	movl	%ecx,8(%r8)
+	movl	%edx,12(%r8)
+	movl	%ebp,16(%r8)
+	movq	-40(%r11),%r14
+	movq	-32(%r11),%r13
+	movq	-24(%r11),%r12
+	movq	-16(%r11),%rbp
+	movq	-8(%r11),%rbx
+	leaq	(%r11),%rsp
+L$epilogue_ssse3:
+	.byte	0xf3,0xc3
+
+
+.p2align	4
+sha1_block_data_order_avx:
+_avx_shortcut:
+	movq	%rsp,%r11
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	leaq	-64(%rsp),%rsp
+	vzeroupper
+	andq	$-64,%rsp
+	movq	%rdi,%r8
+	movq	%rsi,%r9
+	movq	%rdx,%r10
+
+	shlq	$6,%r10
+	addq	%r9,%r10
+	leaq	K_XX_XX+64(%rip),%r14
+
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movl	%ebx,%esi
+	movl	16(%r8),%ebp
+	movl	%ecx,%edi
+	xorl	%edx,%edi
+	andl	%edi,%esi
+
+	vmovdqa	64(%r14),%xmm6
+	vmovdqa	-64(%r14),%xmm11
+	vmovdqu	0(%r9),%xmm0
+	vmovdqu	16(%r9),%xmm1
+	vmovdqu	32(%r9),%xmm2
+	vmovdqu	48(%r9),%xmm3
+	vpshufb	%xmm6,%xmm0,%xmm0
+	addq	$64,%r9
+	vpshufb	%xmm6,%xmm1,%xmm1
+	vpshufb	%xmm6,%xmm2,%xmm2
+	vpshufb	%xmm6,%xmm3,%xmm3
+	vpaddd	%xmm11,%xmm0,%xmm4
+	vpaddd	%xmm11,%xmm1,%xmm5
+	vpaddd	%xmm11,%xmm2,%xmm6
+	vmovdqa	%xmm4,0(%rsp)
+	vmovdqa	%xmm5,16(%rsp)
+	vmovdqa	%xmm6,32(%rsp)
+	jmp	L$oop_avx
+.p2align	4
+L$oop_avx:
+	shrdl	$2,%ebx,%ebx
+	xorl	%edx,%esi
+	vpalignr	$8,%xmm0,%xmm1,%xmm4
+	movl	%eax,%edi
+	addl	0(%rsp),%ebp
+	vpaddd	%xmm3,%xmm11,%xmm9
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	vpsrldq	$4,%xmm3,%xmm8
+	addl	%esi,%ebp
+	andl	%ebx,%edi
+	vpxor	%xmm0,%xmm4,%xmm4
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	vpxor	%xmm2,%xmm8,%xmm8
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	addl	4(%rsp),%edx
+	vpxor	%xmm8,%xmm4,%xmm4
+	xorl	%ebx,%eax
+	shldl	$5,%ebp,%ebp
+	vmovdqa	%xmm9,48(%rsp)
+	addl	%edi,%edx
+	andl	%eax,%esi
+	vpsrld	$31,%xmm4,%xmm8
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	shrdl	$7,%ebp,%ebp
+	xorl	%ebx,%esi
+	vpslldq	$12,%xmm4,%xmm10
+	vpaddd	%xmm4,%xmm4,%xmm4
+	movl	%edx,%edi
+	addl	8(%rsp),%ecx
+	xorl	%eax,%ebp
+	shldl	$5,%edx,%edx
+	vpsrld	$30,%xmm10,%xmm9
+	vpor	%xmm8,%xmm4,%xmm4
+	addl	%esi,%ecx
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	vpslld	$2,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm4,%xmm4
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	addl	12(%rsp),%ebx
+	vpxor	%xmm10,%xmm4,%xmm4
+	xorl	%ebp,%edx
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	shrdl	$7,%ecx,%ecx
+	xorl	%ebp,%esi
+	vpalignr	$8,%xmm1,%xmm2,%xmm5
+	movl	%ebx,%edi
+	addl	16(%rsp),%eax
+	vpaddd	%xmm4,%xmm11,%xmm9
+	xorl	%edx,%ecx
+	shldl	$5,%ebx,%ebx
+	vpsrldq	$4,%xmm4,%xmm8
+	addl	%esi,%eax
+	andl	%ecx,%edi
+	vpxor	%xmm1,%xmm5,%xmm5
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	vpxor	%xmm3,%xmm8,%xmm8
+	shrdl	$7,%ebx,%ebx
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	addl	20(%rsp),%ebp
+	vpxor	%xmm8,%xmm5,%xmm5
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	vmovdqa	%xmm9,0(%rsp)
+	addl	%edi,%ebp
+	andl	%ebx,%esi
+	vpsrld	$31,%xmm5,%xmm8
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%esi
+	vpslldq	$12,%xmm5,%xmm10
+	vpaddd	%xmm5,%xmm5,%xmm5
+	movl	%ebp,%edi
+	addl	24(%rsp),%edx
+	xorl	%ebx,%eax
+	shldl	$5,%ebp,%ebp
+	vpsrld	$30,%xmm10,%xmm9
+	vpor	%xmm8,%xmm5,%xmm5
+	addl	%esi,%edx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	vpslld	$2,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm5,%xmm5
+	shrdl	$7,%ebp,%ebp
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	addl	28(%rsp),%ecx
+	vpxor	%xmm10,%xmm5,%xmm5
+	xorl	%eax,%ebp
+	shldl	$5,%edx,%edx
+	vmovdqa	-32(%r14),%xmm11
+	addl	%edi,%ecx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%esi
+	vpalignr	$8,%xmm2,%xmm3,%xmm6
+	movl	%ecx,%edi
+	addl	32(%rsp),%ebx
+	vpaddd	%xmm5,%xmm11,%xmm9
+	xorl	%ebp,%edx
+	shldl	$5,%ecx,%ecx
+	vpsrldq	$4,%xmm5,%xmm8
+	addl	%esi,%ebx
+	andl	%edx,%edi
+	vpxor	%xmm2,%xmm6,%xmm6
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	vpxor	%xmm4,%xmm8,%xmm8
+	shrdl	$7,%ecx,%ecx
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	addl	36(%rsp),%eax
+	vpxor	%xmm8,%xmm6,%xmm6
+	xorl	%edx,%ecx
+	shldl	$5,%ebx,%ebx
+	vmovdqa	%xmm9,16(%rsp)
+	addl	%edi,%eax
+	andl	%ecx,%esi
+	vpsrld	$31,%xmm6,%xmm8
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	shrdl	$7,%ebx,%ebx
+	xorl	%edx,%esi
+	vpslldq	$12,%xmm6,%xmm10
+	vpaddd	%xmm6,%xmm6,%xmm6
+	movl	%eax,%edi
+	addl	40(%rsp),%ebp
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	vpsrld	$30,%xmm10,%xmm9
+	vpor	%xmm8,%xmm6,%xmm6
+	addl	%esi,%ebp
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	vpslld	$2,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm6,%xmm6
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	addl	44(%rsp),%edx
+	vpxor	%xmm10,%xmm6,%xmm6
+	xorl	%ebx,%eax
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	shrdl	$7,%ebp,%ebp
+	xorl	%ebx,%esi
+	vpalignr	$8,%xmm3,%xmm4,%xmm7
+	movl	%edx,%edi
+	addl	48(%rsp),%ecx
+	vpaddd	%xmm6,%xmm11,%xmm9
+	xorl	%eax,%ebp
+	shldl	$5,%edx,%edx
+	vpsrldq	$4,%xmm6,%xmm8
+	addl	%esi,%ecx
+	andl	%ebp,%edi
+	vpxor	%xmm3,%xmm7,%xmm7
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	vpxor	%xmm5,%xmm8,%xmm8
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	addl	52(%rsp),%ebx
+	vpxor	%xmm8,%xmm7,%xmm7
+	xorl	%ebp,%edx
+	shldl	$5,%ecx,%ecx
+	vmovdqa	%xmm9,32(%rsp)
+	addl	%edi,%ebx
+	andl	%edx,%esi
+	vpsrld	$31,%xmm7,%xmm8
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	shrdl	$7,%ecx,%ecx
+	xorl	%ebp,%esi
+	vpslldq	$12,%xmm7,%xmm10
+	vpaddd	%xmm7,%xmm7,%xmm7
+	movl	%ebx,%edi
+	addl	56(%rsp),%eax
+	xorl	%edx,%ecx
+	shldl	$5,%ebx,%ebx
+	vpsrld	$30,%xmm10,%xmm9
+	vpor	%xmm8,%xmm7,%xmm7
+	addl	%esi,%eax
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	vpslld	$2,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm7,%xmm7
+	shrdl	$7,%ebx,%ebx
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	addl	60(%rsp),%ebp
+	vpxor	%xmm10,%xmm7,%xmm7
+	xorl	%ecx,%ebx
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	vpalignr	$8,%xmm6,%xmm7,%xmm8
+	vpxor	%xmm4,%xmm0,%xmm0
+	shrdl	$7,%eax,%eax
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	addl	0(%rsp),%edx
+	vpxor	%xmm1,%xmm0,%xmm0
+	xorl	%ebx,%eax
+	shldl	$5,%ebp,%ebp
+	vpaddd	%xmm7,%xmm11,%xmm9
+	addl	%esi,%edx
+	andl	%eax,%edi
+	vpxor	%xmm8,%xmm0,%xmm0
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	shrdl	$7,%ebp,%ebp
+	xorl	%ebx,%edi
+	vpsrld	$30,%xmm0,%xmm8
+	vmovdqa	%xmm9,48(%rsp)
+	movl	%edx,%esi
+	addl	4(%rsp),%ecx
+	xorl	%eax,%ebp
+	shldl	$5,%edx,%edx
+	vpslld	$2,%xmm0,%xmm0
+	addl	%edi,%ecx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	shrdl	$7,%edx,%edx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	addl	8(%rsp),%ebx
+	vpor	%xmm8,%xmm0,%xmm0
+	xorl	%ebp,%edx
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	12(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpalignr	$8,%xmm7,%xmm0,%xmm8
+	vpxor	%xmm5,%xmm1,%xmm1
+	addl	16(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	vpxor	%xmm2,%xmm1,%xmm1
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	vpaddd	%xmm0,%xmm11,%xmm9
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	vpxor	%xmm8,%xmm1,%xmm1
+	addl	20(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	vpsrld	$30,%xmm1,%xmm8
+	vmovdqa	%xmm9,0(%rsp)
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vpslld	$2,%xmm1,%xmm1
+	addl	24(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vpor	%xmm8,%xmm1,%xmm1
+	addl	28(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpalignr	$8,%xmm0,%xmm1,%xmm8
+	vpxor	%xmm6,%xmm2,%xmm2
+	addl	32(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	vpxor	%xmm3,%xmm2,%xmm2
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	vpaddd	%xmm1,%xmm11,%xmm9
+	vmovdqa	0(%r14),%xmm11
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpxor	%xmm8,%xmm2,%xmm2
+	addl	36(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	vpsrld	$30,%xmm2,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	vpslld	$2,%xmm2,%xmm2
+	addl	40(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vpor	%xmm8,%xmm2,%xmm2
+	addl	44(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vpalignr	$8,%xmm1,%xmm2,%xmm8
+	vpxor	%xmm7,%xmm3,%xmm3
+	addl	48(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	vpxor	%xmm4,%xmm3,%xmm3
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	vpaddd	%xmm2,%xmm11,%xmm9
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpxor	%xmm8,%xmm3,%xmm3
+	addl	52(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	vpsrld	$30,%xmm3,%xmm8
+	vmovdqa	%xmm9,32(%rsp)
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpslld	$2,%xmm3,%xmm3
+	addl	56(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	vpor	%xmm8,%xmm3,%xmm3
+	addl	60(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vpalignr	$8,%xmm2,%xmm3,%xmm8
+	vpxor	%xmm0,%xmm4,%xmm4
+	addl	0(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	vpxor	%xmm5,%xmm4,%xmm4
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	vpaddd	%xmm3,%xmm11,%xmm9
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vpxor	%xmm8,%xmm4,%xmm4
+	addl	4(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	vpsrld	$30,%xmm4,%xmm8
+	vmovdqa	%xmm9,48(%rsp)
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpslld	$2,%xmm4,%xmm4
+	addl	8(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vpor	%xmm8,%xmm4,%xmm4
+	addl	12(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	vpalignr	$8,%xmm3,%xmm4,%xmm8
+	vpxor	%xmm1,%xmm5,%xmm5
+	addl	16(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	vpxor	%xmm6,%xmm5,%xmm5
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	vpaddd	%xmm4,%xmm11,%xmm9
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vpxor	%xmm8,%xmm5,%xmm5
+	addl	20(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	vpsrld	$30,%xmm5,%xmm8
+	vmovdqa	%xmm9,0(%rsp)
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vpslld	$2,%xmm5,%xmm5
+	addl	24(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vpor	%xmm8,%xmm5,%xmm5
+	addl	28(%rsp),%eax
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	vpalignr	$8,%xmm4,%xmm5,%xmm8
+	vpxor	%xmm2,%xmm6,%xmm6
+	addl	32(%rsp),%ebp
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	vpxor	%xmm7,%xmm6,%xmm6
+	movl	%eax,%edi
+	xorl	%ecx,%esi
+	vpaddd	%xmm5,%xmm11,%xmm9
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	vpxor	%xmm8,%xmm6,%xmm6
+	xorl	%ebx,%edi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	addl	36(%rsp),%edx
+	vpsrld	$30,%xmm6,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	movl	%ebp,%esi
+	vpslld	$2,%xmm6,%xmm6
+	xorl	%ebx,%edi
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	xorl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	addl	40(%rsp),%ecx
+	andl	%eax,%esi
+	vpor	%xmm8,%xmm6,%xmm6
+	xorl	%ebx,%eax
+	shrdl	$7,%ebp,%ebp
+	movl	%edx,%edi
+	xorl	%eax,%esi
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%ebp,%edi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	addl	44(%rsp),%ebx
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	shrdl	$7,%edx,%edx
+	movl	%ecx,%esi
+	xorl	%ebp,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	vpalignr	$8,%xmm5,%xmm6,%xmm8
+	vpxor	%xmm3,%xmm7,%xmm7
+	addl	48(%rsp),%eax
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	shrdl	$7,%ecx,%ecx
+	vpxor	%xmm0,%xmm7,%xmm7
+	movl	%ebx,%edi
+	xorl	%edx,%esi
+	vpaddd	%xmm6,%xmm11,%xmm9
+	vmovdqa	32(%r14),%xmm11
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	vpxor	%xmm8,%xmm7,%xmm7
+	xorl	%ecx,%edi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	52(%rsp),%ebp
+	vpsrld	$30,%xmm7,%xmm8
+	vmovdqa	%xmm9,32(%rsp)
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	movl	%eax,%esi
+	vpslld	$2,%xmm7,%xmm7
+	xorl	%ecx,%edi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	addl	56(%rsp),%edx
+	andl	%ebx,%esi
+	vpor	%xmm8,%xmm7,%xmm7
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	movl	%ebp,%edi
+	xorl	%ebx,%esi
+	shldl	$5,%ebp,%ebp
+	addl	%esi,%edx
+	xorl	%eax,%edi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	addl	60(%rsp),%ecx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	shrdl	$7,%ebp,%ebp
+	movl	%edx,%esi
+	xorl	%eax,%edi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	vpalignr	$8,%xmm6,%xmm7,%xmm8
+	vpxor	%xmm4,%xmm0,%xmm0
+	addl	0(%rsp),%ebx
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	shrdl	$7,%edx,%edx
+	vpxor	%xmm1,%xmm0,%xmm0
+	movl	%ecx,%edi
+	xorl	%ebp,%esi
+	vpaddd	%xmm7,%xmm11,%xmm9
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	vpxor	%xmm8,%xmm0,%xmm0
+	xorl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	4(%rsp),%eax
+	vpsrld	$30,%xmm0,%xmm8
+	vmovdqa	%xmm9,48(%rsp)
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%esi
+	vpslld	$2,%xmm0,%xmm0
+	xorl	%edx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	8(%rsp),%ebp
+	andl	%ecx,%esi
+	vpor	%xmm8,%xmm0,%xmm0
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	movl	%eax,%edi
+	xorl	%ecx,%esi
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	xorl	%ebx,%edi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	addl	12(%rsp),%edx
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	movl	%ebp,%esi
+	xorl	%ebx,%edi
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	xorl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	vpalignr	$8,%xmm7,%xmm0,%xmm8
+	vpxor	%xmm5,%xmm1,%xmm1
+	addl	16(%rsp),%ecx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	shrdl	$7,%ebp,%ebp
+	vpxor	%xmm2,%xmm1,%xmm1
+	movl	%edx,%edi
+	xorl	%eax,%esi
+	vpaddd	%xmm0,%xmm11,%xmm9
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	vpxor	%xmm8,%xmm1,%xmm1
+	xorl	%ebp,%edi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	addl	20(%rsp),%ebx
+	vpsrld	$30,%xmm1,%xmm8
+	vmovdqa	%xmm9,0(%rsp)
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	shrdl	$7,%edx,%edx
+	movl	%ecx,%esi
+	vpslld	$2,%xmm1,%xmm1
+	xorl	%ebp,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	24(%rsp),%eax
+	andl	%edx,%esi
+	vpor	%xmm8,%xmm1,%xmm1
+	xorl	%ebp,%edx
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%edi
+	xorl	%edx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%ecx,%edi
+	xorl	%edx,%ecx
+	addl	%ebx,%eax
+	addl	28(%rsp),%ebp
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	shrdl	$7,%ebx,%ebx
+	movl	%eax,%esi
+	xorl	%ecx,%edi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	%eax,%ebp
+	vpalignr	$8,%xmm0,%xmm1,%xmm8
+	vpxor	%xmm6,%xmm2,%xmm2
+	addl	32(%rsp),%edx
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	shrdl	$7,%eax,%eax
+	vpxor	%xmm3,%xmm2,%xmm2
+	movl	%ebp,%edi
+	xorl	%ebx,%esi
+	vpaddd	%xmm1,%xmm11,%xmm9
+	shldl	$5,%ebp,%ebp
+	addl	%esi,%edx
+	vpxor	%xmm8,%xmm2,%xmm2
+	xorl	%eax,%edi
+	xorl	%ebx,%eax
+	addl	%ebp,%edx
+	addl	36(%rsp),%ecx
+	vpsrld	$30,%xmm2,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	shrdl	$7,%ebp,%ebp
+	movl	%edx,%esi
+	vpslld	$2,%xmm2,%xmm2
+	xorl	%eax,%edi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	%edx,%ecx
+	addl	40(%rsp),%ebx
+	andl	%ebp,%esi
+	vpor	%xmm8,%xmm2,%xmm2
+	xorl	%eax,%ebp
+	shrdl	$7,%edx,%edx
+	movl	%ecx,%edi
+	xorl	%ebp,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	%ecx,%ebx
+	addl	44(%rsp),%eax
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	shrdl	$7,%ecx,%ecx
+	movl	%ebx,%esi
+	xorl	%edx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	vpalignr	$8,%xmm1,%xmm2,%xmm8
+	vpxor	%xmm7,%xmm3,%xmm3
+	addl	48(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	vpxor	%xmm4,%xmm3,%xmm3
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	vpaddd	%xmm2,%xmm11,%xmm9
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	vpxor	%xmm8,%xmm3,%xmm3
+	addl	52(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	vpsrld	$30,%xmm3,%xmm8
+	vmovdqa	%xmm9,32(%rsp)
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vpslld	$2,%xmm3,%xmm3
+	addl	56(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vpor	%xmm8,%xmm3,%xmm3
+	addl	60(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	0(%rsp),%eax
+	vpaddd	%xmm3,%xmm11,%xmm9
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	vmovdqa	%xmm9,48(%rsp)
+	xorl	%edx,%edi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	4(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	addl	8(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	addl	12(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	cmpq	%r10,%r9
+	je	L$done_avx
+	vmovdqa	64(%r14),%xmm6
+	vmovdqa	-64(%r14),%xmm11
+	vmovdqu	0(%r9),%xmm0
+	vmovdqu	16(%r9),%xmm1
+	vmovdqu	32(%r9),%xmm2
+	vmovdqu	48(%r9),%xmm3
+	vpshufb	%xmm6,%xmm0,%xmm0
+	addq	$64,%r9
+	addl	16(%rsp),%ebx
+	xorl	%ebp,%esi
+	vpshufb	%xmm6,%xmm1,%xmm1
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	vpaddd	%xmm11,%xmm0,%xmm4
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	vmovdqa	%xmm4,0(%rsp)
+	addl	20(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	24(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	addl	32(%rsp),%ecx
+	xorl	%eax,%esi
+	vpshufb	%xmm6,%xmm2,%xmm2
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	vpaddd	%xmm11,%xmm1,%xmm5
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	vmovdqa	%xmm5,16(%rsp)
+	addl	36(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	40(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	44(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ebx,%esi
+	vpshufb	%xmm6,%xmm3,%xmm3
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	vpaddd	%xmm11,%xmm2,%xmm6
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	vmovdqa	%xmm6,32(%rsp)
+	addl	52(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	60(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	addl	12(%r8),%edx
+	movl	%eax,0(%r8)
+	addl	16(%r8),%ebp
+	movl	%esi,4(%r8)
+	movl	%esi,%ebx
+	movl	%ecx,8(%r8)
+	movl	%ecx,%edi
+	movl	%edx,12(%r8)
+	xorl	%edx,%edi
+	movl	%ebp,16(%r8)
+	andl	%edi,%esi
+	jmp	L$oop_avx
+
+.p2align	4
+L$done_avx:
+	addl	16(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	20(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	xorl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	24(%rsp),%ebp
+	xorl	%ecx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	xorl	%ecx,%edi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ebx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	addl	%edi,%edx
+	xorl	%ebx,%esi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	addl	32(%rsp),%ecx
+	xorl	%eax,%esi
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%eax,%edi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	addl	36(%rsp),%ebx
+	xorl	%ebp,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%ebp,%esi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	40(%rsp),%eax
+	xorl	%edx,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%edx,%edi
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	addl	44(%rsp),%ebp
+	xorl	%ecx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	addl	%eax,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ebx,%esi
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	addl	%esi,%edx
+	xorl	%ebx,%edi
+	shrdl	$7,%eax,%eax
+	addl	%ebp,%edx
+	addl	52(%rsp),%ecx
+	xorl	%eax,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	addl	%edi,%ecx
+	xorl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edx,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%ebp,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	addl	%esi,%ebx
+	xorl	%ebp,%edi
+	shrdl	$7,%edx,%edx
+	addl	%ecx,%ebx
+	addl	60(%rsp),%eax
+	xorl	%edx,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	addl	%edi,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%ebx,%eax
+	vzeroupper
+
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	movl	%eax,0(%r8)
+	addl	12(%r8),%edx
+	movl	%esi,4(%r8)
+	addl	16(%r8),%ebp
+	movl	%ecx,8(%r8)
+	movl	%edx,12(%r8)
+	movl	%ebp,16(%r8)
+	movq	-40(%r11),%r14
+	movq	-32(%r11),%r13
+	movq	-24(%r11),%r12
+	movq	-16(%r11),%rbp
+	movq	-8(%r11),%rbx
+	leaq	(%r11),%rsp
+L$epilogue_avx:
+	.byte	0xf3,0xc3
+
+.p2align	6
+K_XX_XX:
+.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte	0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
+.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align	6
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S
new file mode 100644
index 0000000..ac6559e
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S
@@ -0,0 +1,3905 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+.globl	_sha256_block_data_order
+.private_extern _sha256_block_data_order
+
+.p2align	4
+_sha256_block_data_order:
+	leaq	_OPENSSL_ia32cap_P(%rip),%r11
+	movl	0(%r11),%r9d
+	movl	4(%r11),%r10d
+	movl	8(%r11),%r11d
+	andl	$1073741824,%r9d
+	andl	$268435968,%r10d
+	orl	%r9d,%r10d
+	cmpl	$1342177792,%r10d
+	je	L$avx_shortcut
+	testl	$512,%r10d
+	jnz	L$ssse3_shortcut
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$64+32,%rsp
+	leaq	(%rsi,%rdx,4),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,64+0(%rsp)
+	movq	%rsi,64+8(%rsp)
+	movq	%rdx,64+16(%rsp)
+	movq	%rax,64+24(%rsp)
+L$prologue:
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+	movl	16(%rdi),%r8d
+	movl	20(%rdi),%r9d
+	movl	24(%rdi),%r10d
+	movl	28(%rdi),%r11d
+	jmp	L$loop
+
+.p2align	4
+L$loop:
+	movl	%ebx,%edi
+	leaq	K256(%rip),%rbp
+	xorl	%ecx,%edi
+	movl	0(%rsi),%r12d
+	movl	%r8d,%r13d
+	movl	%eax,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+
+	xorl	%r8d,%r13d
+	rorl	$9,%r14d
+	xorl	%r10d,%r15d
+
+	movl	%r12d,0(%rsp)
+	xorl	%eax,%r14d
+	andl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%r10d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%eax,%r15d
+	addl	(%rbp),%r12d
+	xorl	%eax,%r14d
+
+	xorl	%ebx,%r15d
+	rorl	$6,%r13d
+	movl	%ebx,%r11d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r11d
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r11d
+	movl	4(%rsi),%r12d
+	movl	%edx,%r13d
+	movl	%r11d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r8d,%edi
+
+	xorl	%edx,%r13d
+	rorl	$9,%r14d
+	xorl	%r9d,%edi
+
+	movl	%r12d,4(%rsp)
+	xorl	%r11d,%r14d
+	andl	%edx,%edi
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r9d,%edi
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r11d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r11d,%r14d
+
+	xorl	%eax,%edi
+	rorl	$6,%r13d
+	movl	%eax,%r10d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r10d
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r10d
+	movl	8(%rsi),%r12d
+	movl	%ecx,%r13d
+	movl	%r10d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+
+	xorl	%ecx,%r13d
+	rorl	$9,%r14d
+	xorl	%r8d,%r15d
+
+	movl	%r12d,8(%rsp)
+	xorl	%r10d,%r14d
+	andl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r8d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r10d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r10d,%r14d
+
+	xorl	%r11d,%r15d
+	rorl	$6,%r13d
+	movl	%r11d,%r9d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r9d
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r9d
+	movl	12(%rsi),%r12d
+	movl	%ebx,%r13d
+	movl	%r9d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ecx,%edi
+
+	xorl	%ebx,%r13d
+	rorl	$9,%r14d
+	xorl	%edx,%edi
+
+	movl	%r12d,12(%rsp)
+	xorl	%r9d,%r14d
+	andl	%ebx,%edi
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%edx,%edi
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r9d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r9d,%r14d
+
+	xorl	%r10d,%edi
+	rorl	$6,%r13d
+	movl	%r10d,%r8d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r8d
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+
+	leaq	20(%rbp),%rbp
+	addl	%r14d,%r8d
+	movl	16(%rsi),%r12d
+	movl	%eax,%r13d
+	movl	%r8d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+
+	xorl	%eax,%r13d
+	rorl	$9,%r14d
+	xorl	%ecx,%r15d
+
+	movl	%r12d,16(%rsp)
+	xorl	%r8d,%r14d
+	andl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%ecx,%r15d
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r8d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r8d,%r14d
+
+	xorl	%r9d,%r15d
+	rorl	$6,%r13d
+	movl	%r9d,%edx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%edx
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%edx
+	movl	20(%rsi),%r12d
+	movl	%r11d,%r13d
+	movl	%edx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%eax,%edi
+
+	xorl	%r11d,%r13d
+	rorl	$9,%r14d
+	xorl	%ebx,%edi
+
+	movl	%r12d,20(%rsp)
+	xorl	%edx,%r14d
+	andl	%r11d,%edi
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%ebx,%edi
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	addl	%edi,%r12d
+
+	movl	%edx,%edi
+	addl	(%rbp),%r12d
+	xorl	%edx,%r14d
+
+	xorl	%r8d,%edi
+	rorl	$6,%r13d
+	movl	%r8d,%ecx
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%ecx
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%ecx
+	movl	24(%rsi),%r12d
+	movl	%r10d,%r13d
+	movl	%ecx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+
+	xorl	%r10d,%r13d
+	rorl	$9,%r14d
+	xorl	%eax,%r15d
+
+	movl	%r12d,24(%rsp)
+	xorl	%ecx,%r14d
+	andl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%eax,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%ecx,%r15d
+	addl	(%rbp),%r12d
+	xorl	%ecx,%r14d
+
+	xorl	%edx,%r15d
+	rorl	$6,%r13d
+	movl	%edx,%ebx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%ebx
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%ebx
+	movl	28(%rsi),%r12d
+	movl	%r9d,%r13d
+	movl	%ebx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r10d,%edi
+
+	xorl	%r9d,%r13d
+	rorl	$9,%r14d
+	xorl	%r11d,%edi
+
+	movl	%r12d,28(%rsp)
+	xorl	%ebx,%r14d
+	andl	%r9d,%edi
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%r11d,%edi
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	addl	%edi,%r12d
+
+	movl	%ebx,%edi
+	addl	(%rbp),%r12d
+	xorl	%ebx,%r14d
+
+	xorl	%ecx,%edi
+	rorl	$6,%r13d
+	movl	%ecx,%eax
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%eax
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+
+	leaq	20(%rbp),%rbp
+	addl	%r14d,%eax
+	movl	32(%rsi),%r12d
+	movl	%r8d,%r13d
+	movl	%eax,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+
+	xorl	%r8d,%r13d
+	rorl	$9,%r14d
+	xorl	%r10d,%r15d
+
+	movl	%r12d,32(%rsp)
+	xorl	%eax,%r14d
+	andl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%r10d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%eax,%r15d
+	addl	(%rbp),%r12d
+	xorl	%eax,%r14d
+
+	xorl	%ebx,%r15d
+	rorl	$6,%r13d
+	movl	%ebx,%r11d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r11d
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r11d
+	movl	36(%rsi),%r12d
+	movl	%edx,%r13d
+	movl	%r11d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r8d,%edi
+
+	xorl	%edx,%r13d
+	rorl	$9,%r14d
+	xorl	%r9d,%edi
+
+	movl	%r12d,36(%rsp)
+	xorl	%r11d,%r14d
+	andl	%edx,%edi
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r9d,%edi
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r11d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r11d,%r14d
+
+	xorl	%eax,%edi
+	rorl	$6,%r13d
+	movl	%eax,%r10d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r10d
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r10d
+	movl	40(%rsi),%r12d
+	movl	%ecx,%r13d
+	movl	%r10d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+
+	xorl	%ecx,%r13d
+	rorl	$9,%r14d
+	xorl	%r8d,%r15d
+
+	movl	%r12d,40(%rsp)
+	xorl	%r10d,%r14d
+	andl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r8d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r10d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r10d,%r14d
+
+	xorl	%r11d,%r15d
+	rorl	$6,%r13d
+	movl	%r11d,%r9d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r9d
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%r9d
+	movl	44(%rsi),%r12d
+	movl	%ebx,%r13d
+	movl	%r9d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ecx,%edi
+
+	xorl	%ebx,%r13d
+	rorl	$9,%r14d
+	xorl	%edx,%edi
+
+	movl	%r12d,44(%rsp)
+	xorl	%r9d,%r14d
+	andl	%ebx,%edi
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%edx,%edi
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r9d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r9d,%r14d
+
+	xorl	%r10d,%edi
+	rorl	$6,%r13d
+	movl	%r10d,%r8d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r8d
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+
+	leaq	20(%rbp),%rbp
+	addl	%r14d,%r8d
+	movl	48(%rsi),%r12d
+	movl	%eax,%r13d
+	movl	%r8d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+
+	xorl	%eax,%r13d
+	rorl	$9,%r14d
+	xorl	%ecx,%r15d
+
+	movl	%r12d,48(%rsp)
+	xorl	%r8d,%r14d
+	andl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%ecx,%r15d
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r8d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r8d,%r14d
+
+	xorl	%r9d,%r15d
+	rorl	$6,%r13d
+	movl	%r9d,%edx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%edx
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%edx
+	movl	52(%rsi),%r12d
+	movl	%r11d,%r13d
+	movl	%edx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%eax,%edi
+
+	xorl	%r11d,%r13d
+	rorl	$9,%r14d
+	xorl	%ebx,%edi
+
+	movl	%r12d,52(%rsp)
+	xorl	%edx,%r14d
+	andl	%r11d,%edi
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%ebx,%edi
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	addl	%edi,%r12d
+
+	movl	%edx,%edi
+	addl	(%rbp),%r12d
+	xorl	%edx,%r14d
+
+	xorl	%r8d,%edi
+	rorl	$6,%r13d
+	movl	%r8d,%ecx
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%ecx
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%ecx
+	movl	56(%rsi),%r12d
+	movl	%r10d,%r13d
+	movl	%ecx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+
+	xorl	%r10d,%r13d
+	rorl	$9,%r14d
+	xorl	%eax,%r15d
+
+	movl	%r12d,56(%rsp)
+	xorl	%ecx,%r14d
+	andl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%eax,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%ecx,%r15d
+	addl	(%rbp),%r12d
+	xorl	%ecx,%r14d
+
+	xorl	%edx,%r15d
+	rorl	$6,%r13d
+	movl	%edx,%ebx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%ebx
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+
+	leaq	4(%rbp),%rbp
+	addl	%r14d,%ebx
+	movl	60(%rsi),%r12d
+	movl	%r9d,%r13d
+	movl	%ebx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r10d,%edi
+
+	xorl	%r9d,%r13d
+	rorl	$9,%r14d
+	xorl	%r11d,%edi
+
+	movl	%r12d,60(%rsp)
+	xorl	%ebx,%r14d
+	andl	%r9d,%edi
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%r11d,%edi
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	addl	%edi,%r12d
+
+	movl	%ebx,%edi
+	addl	(%rbp),%r12d
+	xorl	%ebx,%r14d
+
+	xorl	%ecx,%edi
+	rorl	$6,%r13d
+	movl	%ecx,%eax
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%eax
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+
+	leaq	20(%rbp),%rbp
+	jmp	L$rounds_16_xx
+.p2align	4
+L$rounds_16_xx:
+	movl	4(%rsp),%r13d
+	movl	56(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%eax
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	36(%rsp),%r12d
+
+	addl	0(%rsp),%r12d
+	movl	%r8d,%r13d
+	addl	%r15d,%r12d
+	movl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+
+	xorl	%r8d,%r13d
+	rorl	$9,%r14d
+	xorl	%r10d,%r15d
+
+	movl	%r12d,0(%rsp)
+	xorl	%eax,%r14d
+	andl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%r10d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%eax,%r15d
+	addl	(%rbp),%r12d
+	xorl	%eax,%r14d
+
+	xorl	%ebx,%r15d
+	rorl	$6,%r13d
+	movl	%ebx,%r11d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r11d
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+
+	leaq	4(%rbp),%rbp
+	movl	8(%rsp),%r13d
+	movl	60(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r11d
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	40(%rsp),%r12d
+
+	addl	4(%rsp),%r12d
+	movl	%edx,%r13d
+	addl	%edi,%r12d
+	movl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r8d,%edi
+
+	xorl	%edx,%r13d
+	rorl	$9,%r14d
+	xorl	%r9d,%edi
+
+	movl	%r12d,4(%rsp)
+	xorl	%r11d,%r14d
+	andl	%edx,%edi
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r9d,%edi
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r11d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r11d,%r14d
+
+	xorl	%eax,%edi
+	rorl	$6,%r13d
+	movl	%eax,%r10d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r10d
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+
+	leaq	4(%rbp),%rbp
+	movl	12(%rsp),%r13d
+	movl	0(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r10d
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	44(%rsp),%r12d
+
+	addl	8(%rsp),%r12d
+	movl	%ecx,%r13d
+	addl	%r15d,%r12d
+	movl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+
+	xorl	%ecx,%r13d
+	rorl	$9,%r14d
+	xorl	%r8d,%r15d
+
+	movl	%r12d,8(%rsp)
+	xorl	%r10d,%r14d
+	andl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r8d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r10d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r10d,%r14d
+
+	xorl	%r11d,%r15d
+	rorl	$6,%r13d
+	movl	%r11d,%r9d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r9d
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+
+	leaq	4(%rbp),%rbp
+	movl	16(%rsp),%r13d
+	movl	4(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r9d
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	48(%rsp),%r12d
+
+	addl	12(%rsp),%r12d
+	movl	%ebx,%r13d
+	addl	%edi,%r12d
+	movl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%ecx,%edi
+
+	xorl	%ebx,%r13d
+	rorl	$9,%r14d
+	xorl	%edx,%edi
+
+	movl	%r12d,12(%rsp)
+	xorl	%r9d,%r14d
+	andl	%ebx,%edi
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%edx,%edi
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r9d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r9d,%r14d
+
+	xorl	%r10d,%edi
+	rorl	$6,%r13d
+	movl	%r10d,%r8d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r8d
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+
+	leaq	20(%rbp),%rbp
+	movl	20(%rsp),%r13d
+	movl	8(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r8d
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	52(%rsp),%r12d
+
+	addl	16(%rsp),%r12d
+	movl	%eax,%r13d
+	addl	%r15d,%r12d
+	movl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+
+	xorl	%eax,%r13d
+	rorl	$9,%r14d
+	xorl	%ecx,%r15d
+
+	movl	%r12d,16(%rsp)
+	xorl	%r8d,%r14d
+	andl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%ecx,%r15d
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r8d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r8d,%r14d
+
+	xorl	%r9d,%r15d
+	rorl	$6,%r13d
+	movl	%r9d,%edx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%edx
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+
+	leaq	4(%rbp),%rbp
+	movl	24(%rsp),%r13d
+	movl	12(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%edx
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	56(%rsp),%r12d
+
+	addl	20(%rsp),%r12d
+	movl	%r11d,%r13d
+	addl	%edi,%r12d
+	movl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%eax,%edi
+
+	xorl	%r11d,%r13d
+	rorl	$9,%r14d
+	xorl	%ebx,%edi
+
+	movl	%r12d,20(%rsp)
+	xorl	%edx,%r14d
+	andl	%r11d,%edi
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%ebx,%edi
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	addl	%edi,%r12d
+
+	movl	%edx,%edi
+	addl	(%rbp),%r12d
+	xorl	%edx,%r14d
+
+	xorl	%r8d,%edi
+	rorl	$6,%r13d
+	movl	%r8d,%ecx
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%ecx
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+
+	leaq	4(%rbp),%rbp
+	movl	28(%rsp),%r13d
+	movl	16(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%ecx
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	60(%rsp),%r12d
+
+	addl	24(%rsp),%r12d
+	movl	%r10d,%r13d
+	addl	%r15d,%r12d
+	movl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+
+	xorl	%r10d,%r13d
+	rorl	$9,%r14d
+	xorl	%eax,%r15d
+
+	movl	%r12d,24(%rsp)
+	xorl	%ecx,%r14d
+	andl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%eax,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%ecx,%r15d
+	addl	(%rbp),%r12d
+	xorl	%ecx,%r14d
+
+	xorl	%edx,%r15d
+	rorl	$6,%r13d
+	movl	%edx,%ebx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%ebx
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+
+	leaq	4(%rbp),%rbp
+	movl	32(%rsp),%r13d
+	movl	20(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%ebx
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	0(%rsp),%r12d
+
+	addl	28(%rsp),%r12d
+	movl	%r9d,%r13d
+	addl	%edi,%r12d
+	movl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r10d,%edi
+
+	xorl	%r9d,%r13d
+	rorl	$9,%r14d
+	xorl	%r11d,%edi
+
+	movl	%r12d,28(%rsp)
+	xorl	%ebx,%r14d
+	andl	%r9d,%edi
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%r11d,%edi
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	addl	%edi,%r12d
+
+	movl	%ebx,%edi
+	addl	(%rbp),%r12d
+	xorl	%ebx,%r14d
+
+	xorl	%ecx,%edi
+	rorl	$6,%r13d
+	movl	%ecx,%eax
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%eax
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+
+	leaq	20(%rbp),%rbp
+	movl	36(%rsp),%r13d
+	movl	24(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%eax
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	4(%rsp),%r12d
+
+	addl	32(%rsp),%r12d
+	movl	%r8d,%r13d
+	addl	%r15d,%r12d
+	movl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+
+	xorl	%r8d,%r13d
+	rorl	$9,%r14d
+	xorl	%r10d,%r15d
+
+	movl	%r12d,32(%rsp)
+	xorl	%eax,%r14d
+	andl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%r10d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%eax,%r15d
+	addl	(%rbp),%r12d
+	xorl	%eax,%r14d
+
+	xorl	%ebx,%r15d
+	rorl	$6,%r13d
+	movl	%ebx,%r11d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r11d
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+
+	leaq	4(%rbp),%rbp
+	movl	40(%rsp),%r13d
+	movl	28(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r11d
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	8(%rsp),%r12d
+
+	addl	36(%rsp),%r12d
+	movl	%edx,%r13d
+	addl	%edi,%r12d
+	movl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r8d,%edi
+
+	xorl	%edx,%r13d
+	rorl	$9,%r14d
+	xorl	%r9d,%edi
+
+	movl	%r12d,36(%rsp)
+	xorl	%r11d,%r14d
+	andl	%edx,%edi
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r9d,%edi
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r11d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r11d,%r14d
+
+	xorl	%eax,%edi
+	rorl	$6,%r13d
+	movl	%eax,%r10d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r10d
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+
+	leaq	4(%rbp),%rbp
+	movl	44(%rsp),%r13d
+	movl	32(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r10d
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	12(%rsp),%r12d
+
+	addl	40(%rsp),%r12d
+	movl	%ecx,%r13d
+	addl	%r15d,%r12d
+	movl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+
+	xorl	%ecx,%r13d
+	rorl	$9,%r14d
+	xorl	%r8d,%r15d
+
+	movl	%r12d,40(%rsp)
+	xorl	%r10d,%r14d
+	andl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r8d,%r15d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r10d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r10d,%r14d
+
+	xorl	%r11d,%r15d
+	rorl	$6,%r13d
+	movl	%r11d,%r9d
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%r9d
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+
+	leaq	4(%rbp),%rbp
+	movl	48(%rsp),%r13d
+	movl	36(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r9d
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	16(%rsp),%r12d
+
+	addl	44(%rsp),%r12d
+	movl	%ebx,%r13d
+	addl	%edi,%r12d
+	movl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%ecx,%edi
+
+	xorl	%ebx,%r13d
+	rorl	$9,%r14d
+	xorl	%edx,%edi
+
+	movl	%r12d,44(%rsp)
+	xorl	%r9d,%r14d
+	andl	%ebx,%edi
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%edx,%edi
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	addl	%edi,%r12d
+
+	movl	%r9d,%edi
+	addl	(%rbp),%r12d
+	xorl	%r9d,%r14d
+
+	xorl	%r10d,%edi
+	rorl	$6,%r13d
+	movl	%r10d,%r8d
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%r8d
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+
+	leaq	20(%rbp),%rbp
+	movl	52(%rsp),%r13d
+	movl	40(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%r8d
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	20(%rsp),%r12d
+
+	addl	48(%rsp),%r12d
+	movl	%eax,%r13d
+	addl	%r15d,%r12d
+	movl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+
+	xorl	%eax,%r13d
+	rorl	$9,%r14d
+	xorl	%ecx,%r15d
+
+	movl	%r12d,48(%rsp)
+	xorl	%r8d,%r14d
+	andl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%ecx,%r15d
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	addl	%r15d,%r12d
+
+	movl	%r8d,%r15d
+	addl	(%rbp),%r12d
+	xorl	%r8d,%r14d
+
+	xorl	%r9d,%r15d
+	rorl	$6,%r13d
+	movl	%r9d,%edx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%edx
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+
+	leaq	4(%rbp),%rbp
+	movl	56(%rsp),%r13d
+	movl	44(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%edx
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	24(%rsp),%r12d
+
+	addl	52(%rsp),%r12d
+	movl	%r11d,%r13d
+	addl	%edi,%r12d
+	movl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%eax,%edi
+
+	xorl	%r11d,%r13d
+	rorl	$9,%r14d
+	xorl	%ebx,%edi
+
+	movl	%r12d,52(%rsp)
+	xorl	%edx,%r14d
+	andl	%r11d,%edi
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%ebx,%edi
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	addl	%edi,%r12d
+
+	movl	%edx,%edi
+	addl	(%rbp),%r12d
+	xorl	%edx,%r14d
+
+	xorl	%r8d,%edi
+	rorl	$6,%r13d
+	movl	%r8d,%ecx
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%ecx
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+
+	leaq	4(%rbp),%rbp
+	movl	60(%rsp),%r13d
+	movl	48(%rsp),%r15d
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%ecx
+	movl	%r15d,%r14d
+	rorl	$2,%r15d
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	xorl	%r13d,%r12d
+	xorl	%r14d,%r15d
+	addl	28(%rsp),%r12d
+
+	addl	56(%rsp),%r12d
+	movl	%r10d,%r13d
+	addl	%r15d,%r12d
+	movl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+
+	xorl	%r10d,%r13d
+	rorl	$9,%r14d
+	xorl	%eax,%r15d
+
+	movl	%r12d,56(%rsp)
+	xorl	%ecx,%r14d
+	andl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%eax,%r15d
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	addl	%r15d,%r12d
+
+	movl	%ecx,%r15d
+	addl	(%rbp),%r12d
+	xorl	%ecx,%r14d
+
+	xorl	%edx,%r15d
+	rorl	$6,%r13d
+	movl	%edx,%ebx
+
+	andl	%r15d,%edi
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%edi,%ebx
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+
+	leaq	4(%rbp),%rbp
+	movl	0(%rsp),%r13d
+	movl	52(%rsp),%edi
+
+	movl	%r13d,%r12d
+	rorl	$11,%r13d
+	addl	%r14d,%ebx
+	movl	%edi,%r14d
+	rorl	$2,%edi
+
+	xorl	%r12d,%r13d
+	shrl	$3,%r12d
+	rorl	$7,%r13d
+	xorl	%r14d,%edi
+	shrl	$10,%r14d
+
+	rorl	$17,%edi
+	xorl	%r13d,%r12d
+	xorl	%r14d,%edi
+	addl	32(%rsp),%r12d
+
+	addl	60(%rsp),%r12d
+	movl	%r9d,%r13d
+	addl	%edi,%r12d
+	movl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r10d,%edi
+
+	xorl	%r9d,%r13d
+	rorl	$9,%r14d
+	xorl	%r11d,%edi
+
+	movl	%r12d,60(%rsp)
+	xorl	%ebx,%r14d
+	andl	%r9d,%edi
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%r11d,%edi
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	addl	%edi,%r12d
+
+	movl	%ebx,%edi
+	addl	(%rbp),%r12d
+	xorl	%ebx,%r14d
+
+	xorl	%ecx,%edi
+	rorl	$6,%r13d
+	movl	%ecx,%eax
+
+	andl	%edi,%r15d
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+
+	xorl	%r15d,%eax
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+
+	leaq	20(%rbp),%rbp
+	cmpb	$0,3(%rbp)
+	jnz	L$rounds_16_xx
+
+	movq	64+0(%rsp),%rdi
+	addl	%r14d,%eax
+	leaq	64(%rsi),%rsi
+
+	addl	0(%rdi),%eax
+	addl	4(%rdi),%ebx
+	addl	8(%rdi),%ecx
+	addl	12(%rdi),%edx
+	addl	16(%rdi),%r8d
+	addl	20(%rdi),%r9d
+	addl	24(%rdi),%r10d
+	addl	28(%rdi),%r11d
+
+	cmpq	64+16(%rsp),%rsi
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	jb	L$loop
+
+	movq	64+24(%rsp),%rsi
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$epilogue:
+	.byte	0xf3,0xc3
+
+.p2align	6
+
+K256:
+.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long	0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long	0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long	0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.long	0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+
+.p2align	6
+sha256_block_data_order_ssse3:
+L$ssse3_shortcut:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$96,%rsp
+	leaq	(%rsi,%rdx,4),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,64+0(%rsp)
+	movq	%rsi,64+8(%rsp)
+	movq	%rdx,64+16(%rsp)
+	movq	%rax,64+24(%rsp)
+L$prologue_ssse3:
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+	movl	16(%rdi),%r8d
+	movl	20(%rdi),%r9d
+	movl	24(%rdi),%r10d
+	movl	28(%rdi),%r11d
+
+
+	jmp	L$loop_ssse3
+.p2align	4
+L$loop_ssse3:
+	movdqa	K256+512(%rip),%xmm7
+	movdqu	0(%rsi),%xmm0
+	movdqu	16(%rsi),%xmm1
+	movdqu	32(%rsi),%xmm2
+.byte	102,15,56,0,199
+	movdqu	48(%rsi),%xmm3
+	leaq	K256(%rip),%rbp
+.byte	102,15,56,0,207
+	movdqa	0(%rbp),%xmm4
+	movdqa	32(%rbp),%xmm5
+.byte	102,15,56,0,215
+	paddd	%xmm0,%xmm4
+	movdqa	64(%rbp),%xmm6
+.byte	102,15,56,0,223
+	movdqa	96(%rbp),%xmm7
+	paddd	%xmm1,%xmm5
+	paddd	%xmm2,%xmm6
+	paddd	%xmm3,%xmm7
+	movdqa	%xmm4,0(%rsp)
+	movl	%eax,%r14d
+	movdqa	%xmm5,16(%rsp)
+	movl	%ebx,%edi
+	movdqa	%xmm6,32(%rsp)
+	xorl	%ecx,%edi
+	movdqa	%xmm7,48(%rsp)
+	movl	%r8d,%r13d
+	jmp	L$ssse3_00_47
+
+.p2align	4
+L$ssse3_00_47:
+	subq	$-128,%rbp
+	rorl	$14,%r13d
+	movdqa	%xmm1,%xmm4
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	movdqa	%xmm3,%xmm7
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	rorl	$5,%r13d
+	xorl	%eax,%r14d
+.byte	102,15,58,15,224,4
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+.byte	102,15,58,15,250,4
+	addl	0(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm4,%xmm5
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	movdqa	%xmm4,%xmm6
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	psrld	$3,%xmm4
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	paddd	%xmm7,%xmm0
+	rorl	$2,%r14d
+	addl	%r11d,%edx
+	psrld	$7,%xmm6
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	pshufd	$250,%xmm3,%xmm7
+	addl	%r11d,%r14d
+	rorl	$14,%r13d
+	pslld	$14,%xmm5
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	pxor	%xmm6,%xmm4
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	rorl	$5,%r13d
+	psrld	$11,%xmm6
+	xorl	%r11d,%r14d
+	pxor	%xmm5,%xmm4
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	pslld	$11,%xmm5
+	addl	4(%rsp),%r10d
+	movl	%r11d,%edi
+	pxor	%xmm6,%xmm4
+	xorl	%r9d,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm7,%xmm6
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	pxor	%xmm5,%xmm4
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	psrld	$10,%xmm7
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	paddd	%xmm4,%xmm0
+	rorl	$2,%r14d
+	addl	%r10d,%ecx
+	psrlq	$17,%xmm6
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	pxor	%xmm6,%xmm7
+	rorl	$14,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	rorl	$9,%r14d
+	psrlq	$2,%xmm6
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$5,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	pshufd	$128,%xmm7,%xmm7
+	xorl	%ecx,%r13d
+	addl	8(%rsp),%r9d
+	movl	%r10d,%r15d
+	psrldq	$8,%xmm7
+	xorl	%r8d,%r12d
+	rorl	$11,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	rorl	$6,%r13d
+	paddd	%xmm7,%xmm0
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	pshufd	$80,%xmm0,%xmm7
+	xorl	%r11d,%edi
+	rorl	$2,%r14d
+	addl	%r9d,%ebx
+	movdqa	%xmm7,%xmm6
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	psrld	$10,%xmm7
+	addl	%r9d,%r14d
+	rorl	$14,%r13d
+	psrlq	$17,%xmm6
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	rorl	$5,%r13d
+	xorl	%r9d,%r14d
+	psrlq	$2,%xmm6
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	12(%rsp),%r8d
+	pxor	%xmm6,%xmm7
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	rorl	$11,%r14d
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	movdqa	0(%rbp),%xmm6
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	pslldq	$8,%xmm7
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	paddd	%xmm7,%xmm0
+	rorl	$2,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	paddd	%xmm0,%xmm6
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	movdqa	%xmm6,0(%rsp)
+	rorl	$14,%r13d
+	movdqa	%xmm2,%xmm4
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	movdqa	%xmm0,%xmm7
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	rorl	$5,%r13d
+	xorl	%r8d,%r14d
+.byte	102,15,58,15,225,4
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+.byte	102,15,58,15,251,4
+	addl	16(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm4,%xmm5
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	movdqa	%xmm4,%xmm6
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	psrld	$3,%xmm4
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	paddd	%xmm7,%xmm1
+	rorl	$2,%r14d
+	addl	%edx,%r11d
+	psrld	$7,%xmm6
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	pshufd	$250,%xmm0,%xmm7
+	addl	%edx,%r14d
+	rorl	$14,%r13d
+	pslld	$14,%xmm5
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	pxor	%xmm6,%xmm4
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	rorl	$5,%r13d
+	psrld	$11,%xmm6
+	xorl	%edx,%r14d
+	pxor	%xmm5,%xmm4
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	pslld	$11,%xmm5
+	addl	20(%rsp),%ecx
+	movl	%edx,%edi
+	pxor	%xmm6,%xmm4
+	xorl	%ebx,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm7,%xmm6
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	pxor	%xmm5,%xmm4
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	psrld	$10,%xmm7
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	paddd	%xmm4,%xmm1
+	rorl	$2,%r14d
+	addl	%ecx,%r10d
+	psrlq	$17,%xmm6
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	pxor	%xmm6,%xmm7
+	rorl	$14,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	rorl	$9,%r14d
+	psrlq	$2,%xmm6
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$5,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	pshufd	$128,%xmm7,%xmm7
+	xorl	%r10d,%r13d
+	addl	24(%rsp),%ebx
+	movl	%ecx,%r15d
+	psrldq	$8,%xmm7
+	xorl	%eax,%r12d
+	rorl	$11,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	rorl	$6,%r13d
+	paddd	%xmm7,%xmm1
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	pshufd	$80,%xmm1,%xmm7
+	xorl	%edx,%edi
+	rorl	$2,%r14d
+	addl	%ebx,%r9d
+	movdqa	%xmm7,%xmm6
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	psrld	$10,%xmm7
+	addl	%ebx,%r14d
+	rorl	$14,%r13d
+	psrlq	$17,%xmm6
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	rorl	$5,%r13d
+	xorl	%ebx,%r14d
+	psrlq	$2,%xmm6
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	28(%rsp),%eax
+	pxor	%xmm6,%xmm7
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	rorl	$11,%r14d
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	movdqa	32(%rbp),%xmm6
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	pslldq	$8,%xmm7
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	paddd	%xmm7,%xmm1
+	rorl	$2,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	paddd	%xmm1,%xmm6
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	movdqa	%xmm6,16(%rsp)
+	rorl	$14,%r13d
+	movdqa	%xmm3,%xmm4
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	movdqa	%xmm1,%xmm7
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	rorl	$5,%r13d
+	xorl	%eax,%r14d
+.byte	102,15,58,15,226,4
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+.byte	102,15,58,15,248,4
+	addl	32(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm4,%xmm5
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	movdqa	%xmm4,%xmm6
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	psrld	$3,%xmm4
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	paddd	%xmm7,%xmm2
+	rorl	$2,%r14d
+	addl	%r11d,%edx
+	psrld	$7,%xmm6
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	pshufd	$250,%xmm1,%xmm7
+	addl	%r11d,%r14d
+	rorl	$14,%r13d
+	pslld	$14,%xmm5
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	pxor	%xmm6,%xmm4
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	rorl	$5,%r13d
+	psrld	$11,%xmm6
+	xorl	%r11d,%r14d
+	pxor	%xmm5,%xmm4
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	pslld	$11,%xmm5
+	addl	36(%rsp),%r10d
+	movl	%r11d,%edi
+	pxor	%xmm6,%xmm4
+	xorl	%r9d,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm7,%xmm6
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	pxor	%xmm5,%xmm4
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	psrld	$10,%xmm7
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	paddd	%xmm4,%xmm2
+	rorl	$2,%r14d
+	addl	%r10d,%ecx
+	psrlq	$17,%xmm6
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	pxor	%xmm6,%xmm7
+	rorl	$14,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	rorl	$9,%r14d
+	psrlq	$2,%xmm6
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$5,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	pshufd	$128,%xmm7,%xmm7
+	xorl	%ecx,%r13d
+	addl	40(%rsp),%r9d
+	movl	%r10d,%r15d
+	psrldq	$8,%xmm7
+	xorl	%r8d,%r12d
+	rorl	$11,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	rorl	$6,%r13d
+	paddd	%xmm7,%xmm2
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	pshufd	$80,%xmm2,%xmm7
+	xorl	%r11d,%edi
+	rorl	$2,%r14d
+	addl	%r9d,%ebx
+	movdqa	%xmm7,%xmm6
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	psrld	$10,%xmm7
+	addl	%r9d,%r14d
+	rorl	$14,%r13d
+	psrlq	$17,%xmm6
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	rorl	$5,%r13d
+	xorl	%r9d,%r14d
+	psrlq	$2,%xmm6
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	44(%rsp),%r8d
+	pxor	%xmm6,%xmm7
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	rorl	$11,%r14d
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	movdqa	64(%rbp),%xmm6
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	pslldq	$8,%xmm7
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	paddd	%xmm7,%xmm2
+	rorl	$2,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	paddd	%xmm2,%xmm6
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	movdqa	%xmm6,32(%rsp)
+	rorl	$14,%r13d
+	movdqa	%xmm0,%xmm4
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	movdqa	%xmm2,%xmm7
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	rorl	$5,%r13d
+	xorl	%r8d,%r14d
+.byte	102,15,58,15,227,4
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+.byte	102,15,58,15,249,4
+	addl	48(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm4,%xmm5
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	movdqa	%xmm4,%xmm6
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	psrld	$3,%xmm4
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	paddd	%xmm7,%xmm3
+	rorl	$2,%r14d
+	addl	%edx,%r11d
+	psrld	$7,%xmm6
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	pshufd	$250,%xmm2,%xmm7
+	addl	%edx,%r14d
+	rorl	$14,%r13d
+	pslld	$14,%xmm5
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	pxor	%xmm6,%xmm4
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	rorl	$5,%r13d
+	psrld	$11,%xmm6
+	xorl	%edx,%r14d
+	pxor	%xmm5,%xmm4
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	pslld	$11,%xmm5
+	addl	52(%rsp),%ecx
+	movl	%edx,%edi
+	pxor	%xmm6,%xmm4
+	xorl	%ebx,%r12d
+	rorl	$11,%r14d
+	movdqa	%xmm7,%xmm6
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	pxor	%xmm5,%xmm4
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	psrld	$10,%xmm7
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	paddd	%xmm4,%xmm3
+	rorl	$2,%r14d
+	addl	%ecx,%r10d
+	psrlq	$17,%xmm6
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	pxor	%xmm6,%xmm7
+	rorl	$14,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	rorl	$9,%r14d
+	psrlq	$2,%xmm6
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$5,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	pshufd	$128,%xmm7,%xmm7
+	xorl	%r10d,%r13d
+	addl	56(%rsp),%ebx
+	movl	%ecx,%r15d
+	psrldq	$8,%xmm7
+	xorl	%eax,%r12d
+	rorl	$11,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	rorl	$6,%r13d
+	paddd	%xmm7,%xmm3
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	pshufd	$80,%xmm3,%xmm7
+	xorl	%edx,%edi
+	rorl	$2,%r14d
+	addl	%ebx,%r9d
+	movdqa	%xmm7,%xmm6
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	psrld	$10,%xmm7
+	addl	%ebx,%r14d
+	rorl	$14,%r13d
+	psrlq	$17,%xmm6
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	pxor	%xmm6,%xmm7
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	rorl	$5,%r13d
+	xorl	%ebx,%r14d
+	psrlq	$2,%xmm6
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	60(%rsp),%eax
+	pxor	%xmm6,%xmm7
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	rorl	$11,%r14d
+	pshufd	$8,%xmm7,%xmm7
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	movdqa	96(%rbp),%xmm6
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	pslldq	$8,%xmm7
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	paddd	%xmm7,%xmm3
+	rorl	$2,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	paddd	%xmm3,%xmm6
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	movdqa	%xmm6,48(%rsp)
+	cmpb	$0,131(%rbp)
+	jne	L$ssse3_00_47
+	rorl	$14,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	rorl	$5,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+	addl	0(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	rorl	$11,%r14d
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	rorl	$2,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	rorl	$5,%r13d
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	addl	4(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	rorl	$11,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	rorl	$2,%r14d
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	rorl	$5,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	8(%rsp),%r9d
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	rorl	$11,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	xorl	%r11d,%edi
+	rorl	$2,%r14d
+	addl	%r9d,%ebx
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	rorl	$5,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	12(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	rorl	$11,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	rorl	$2,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	rorl	$5,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+	addl	16(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	rorl	$11,%r14d
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	rorl	$2,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	rorl	$5,%r13d
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	addl	20(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	rorl	$11,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	rorl	$2,%r14d
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	rorl	$5,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	24(%rsp),%ebx
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	rorl	$11,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	xorl	%edx,%edi
+	rorl	$2,%r14d
+	addl	%ebx,%r9d
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	rorl	$5,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	28(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	rorl	$11,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	rorl	$2,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	rorl	$5,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+	addl	32(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	rorl	$11,%r14d
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	rorl	$2,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	rorl	$5,%r13d
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	addl	36(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	rorl	$11,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	rorl	$2,%r14d
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	rorl	$5,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	40(%rsp),%r9d
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	rorl	$11,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	xorl	%r11d,%edi
+	rorl	$2,%r14d
+	addl	%r9d,%ebx
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	rorl	$5,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	44(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	rorl	$11,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	rorl	$2,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	rorl	$5,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+	addl	48(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	rorl	$11,%r14d
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	rorl	$2,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	rorl	$5,%r13d
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	addl	52(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	rorl	$11,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	rorl	$2,%r14d
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	rorl	$5,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	56(%rsp),%ebx
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	rorl	$11,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	rorl	$6,%r13d
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	xorl	%edx,%edi
+	rorl	$2,%r14d
+	addl	%ebx,%r9d
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	rorl	$5,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	60(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	rorl	$11,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	rorl	$6,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	rorl	$2,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	movq	64+0(%rsp),%rdi
+	movl	%r14d,%eax
+
+	addl	0(%rdi),%eax
+	leaq	64(%rsi),%rsi
+	addl	4(%rdi),%ebx
+	addl	8(%rdi),%ecx
+	addl	12(%rdi),%edx
+	addl	16(%rdi),%r8d
+	addl	20(%rdi),%r9d
+	addl	24(%rdi),%r10d
+	addl	28(%rdi),%r11d
+
+	cmpq	64+16(%rsp),%rsi
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	jb	L$loop_ssse3
+
+	movq	64+24(%rsp),%rsi
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$epilogue_ssse3:
+	.byte	0xf3,0xc3
+
+
+.p2align	6
+sha256_block_data_order_avx:
+L$avx_shortcut:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$96,%rsp
+	leaq	(%rsi,%rdx,4),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,64+0(%rsp)
+	movq	%rsi,64+8(%rsp)
+	movq	%rdx,64+16(%rsp)
+	movq	%rax,64+24(%rsp)
+L$prologue_avx:
+
+	vzeroupper
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+	movl	16(%rdi),%r8d
+	movl	20(%rdi),%r9d
+	movl	24(%rdi),%r10d
+	movl	28(%rdi),%r11d
+	vmovdqa	K256+512+32(%rip),%xmm8
+	vmovdqa	K256+512+64(%rip),%xmm9
+	jmp	L$loop_avx
+.p2align	4
+L$loop_avx:
+	vmovdqa	K256+512(%rip),%xmm7
+	vmovdqu	0(%rsi),%xmm0
+	vmovdqu	16(%rsi),%xmm1
+	vmovdqu	32(%rsi),%xmm2
+	vmovdqu	48(%rsi),%xmm3
+	vpshufb	%xmm7,%xmm0,%xmm0
+	leaq	K256(%rip),%rbp
+	vpshufb	%xmm7,%xmm1,%xmm1
+	vpshufb	%xmm7,%xmm2,%xmm2
+	vpaddd	0(%rbp),%xmm0,%xmm4
+	vpshufb	%xmm7,%xmm3,%xmm3
+	vpaddd	32(%rbp),%xmm1,%xmm5
+	vpaddd	64(%rbp),%xmm2,%xmm6
+	vpaddd	96(%rbp),%xmm3,%xmm7
+	vmovdqa	%xmm4,0(%rsp)
+	movl	%eax,%r14d
+	vmovdqa	%xmm5,16(%rsp)
+	movl	%ebx,%edi
+	vmovdqa	%xmm6,32(%rsp)
+	xorl	%ecx,%edi
+	vmovdqa	%xmm7,48(%rsp)
+	movl	%r8d,%r13d
+	jmp	L$avx_00_47
+
+.p2align	4
+L$avx_00_47:
+	subq	$-128,%rbp
+	vpalignr	$4,%xmm0,%xmm1,%xmm4
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	vpalignr	$4,%xmm2,%xmm3,%xmm7
+	shrdl	$9,%r14d,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%r13d,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	vpaddd	%xmm7,%xmm0,%xmm0
+	xorl	%r8d,%r13d
+	addl	0(%rsp),%r11d
+	movl	%eax,%r15d
+	vpsrld	$3,%xmm4,%xmm7
+	xorl	%r10d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ebx,%r15d
+	vpslld	$14,%xmm4,%xmm5
+	addl	%r12d,%r11d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	vpshufd	$250,%xmm3,%xmm7
+	shrdl	$2,%r14d,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	vpsrld	$11,%xmm6,%xmm6
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	shrdl	$14,%r13d,%r13d
+	vpxor	%xmm5,%xmm4,%xmm4
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	shrdl	$9,%r14d,%r14d
+	vpslld	$11,%xmm5,%xmm5
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	shrdl	$5,%r13d,%r13d
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	vpsrld	$10,%xmm7,%xmm6
+	addl	4(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	vpxor	%xmm5,%xmm4,%xmm4
+	shrdl	$11,%r14d,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	vpsrlq	$17,%xmm7,%xmm7
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	vpaddd	%xmm4,%xmm0,%xmm0
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	shrdl	$2,%r14d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%r10d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r10d
+	vpxor	%xmm7,%xmm6,%xmm6
+	movl	%edx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ecx,%r13d
+	vpshufb	%xmm8,%xmm6,%xmm6
+	xorl	%r8d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r10d,%r14d
+	vpaddd	%xmm6,%xmm0,%xmm0
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	8(%rsp),%r9d
+	vpshufd	$80,%xmm0,%xmm7
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	shrdl	$11,%r14d,%r14d
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	shrdl	$6,%r13d,%r13d
+	vpsrlq	$17,%xmm7,%xmm7
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	vpxor	%xmm7,%xmm6,%xmm6
+	xorl	%r11d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r9d,%ebx
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	vpshufb	%xmm9,%xmm6,%xmm6
+	shrdl	$9,%r14d,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	vpaddd	%xmm6,%xmm0,%xmm0
+	shrdl	$5,%r13d,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	vpaddd	0(%rbp),%xmm0,%xmm6
+	xorl	%ebx,%r13d
+	addl	12(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	vmovdqa	%xmm6,0(%rsp)
+	vpalignr	$4,%xmm1,%xmm2,%xmm4
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	vpalignr	$4,%xmm3,%xmm0,%xmm7
+	shrdl	$9,%r14d,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%r13d,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	vpaddd	%xmm7,%xmm1,%xmm1
+	xorl	%eax,%r13d
+	addl	16(%rsp),%edx
+	movl	%r8d,%r15d
+	vpsrld	$3,%xmm4,%xmm7
+	xorl	%ecx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r9d,%r15d
+	vpslld	$14,%xmm4,%xmm5
+	addl	%r12d,%edx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	vpshufd	$250,%xmm0,%xmm7
+	shrdl	$2,%r14d,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	vpsrld	$11,%xmm6,%xmm6
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	shrdl	$14,%r13d,%r13d
+	vpxor	%xmm5,%xmm4,%xmm4
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	shrdl	$9,%r14d,%r14d
+	vpslld	$11,%xmm5,%xmm5
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	shrdl	$5,%r13d,%r13d
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	vpsrld	$10,%xmm7,%xmm6
+	addl	20(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	vpxor	%xmm5,%xmm4,%xmm4
+	shrdl	$11,%r14d,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	vpsrlq	$17,%xmm7,%xmm7
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	vpaddd	%xmm4,%xmm1,%xmm1
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	shrdl	$2,%r14d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%ecx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ecx
+	vpxor	%xmm7,%xmm6,%xmm6
+	movl	%r11d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r10d,%r13d
+	vpshufb	%xmm8,%xmm6,%xmm6
+	xorl	%eax,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ecx,%r14d
+	vpaddd	%xmm6,%xmm1,%xmm1
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	24(%rsp),%ebx
+	vpshufd	$80,%xmm1,%xmm7
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	shrdl	$11,%r14d,%r14d
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	shrdl	$6,%r13d,%r13d
+	vpsrlq	$17,%xmm7,%xmm7
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	vpxor	%xmm7,%xmm6,%xmm6
+	xorl	%edx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%ebx,%r9d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	vpshufb	%xmm9,%xmm6,%xmm6
+	shrdl	$9,%r14d,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	vpaddd	%xmm6,%xmm1,%xmm1
+	shrdl	$5,%r13d,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	vpaddd	32(%rbp),%xmm1,%xmm6
+	xorl	%r9d,%r13d
+	addl	28(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	vmovdqa	%xmm6,16(%rsp)
+	vpalignr	$4,%xmm2,%xmm3,%xmm4
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	vpalignr	$4,%xmm0,%xmm1,%xmm7
+	shrdl	$9,%r14d,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%r13d,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	vpaddd	%xmm7,%xmm2,%xmm2
+	xorl	%r8d,%r13d
+	addl	32(%rsp),%r11d
+	movl	%eax,%r15d
+	vpsrld	$3,%xmm4,%xmm7
+	xorl	%r10d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ebx,%r15d
+	vpslld	$14,%xmm4,%xmm5
+	addl	%r12d,%r11d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	vpshufd	$250,%xmm1,%xmm7
+	shrdl	$2,%r14d,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	vpsrld	$11,%xmm6,%xmm6
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	shrdl	$14,%r13d,%r13d
+	vpxor	%xmm5,%xmm4,%xmm4
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	shrdl	$9,%r14d,%r14d
+	vpslld	$11,%xmm5,%xmm5
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	shrdl	$5,%r13d,%r13d
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	vpsrld	$10,%xmm7,%xmm6
+	addl	36(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	vpxor	%xmm5,%xmm4,%xmm4
+	shrdl	$11,%r14d,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	vpsrlq	$17,%xmm7,%xmm7
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	vpaddd	%xmm4,%xmm2,%xmm2
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	shrdl	$2,%r14d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%r10d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r10d
+	vpxor	%xmm7,%xmm6,%xmm6
+	movl	%edx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ecx,%r13d
+	vpshufb	%xmm8,%xmm6,%xmm6
+	xorl	%r8d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r10d,%r14d
+	vpaddd	%xmm6,%xmm2,%xmm2
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	40(%rsp),%r9d
+	vpshufd	$80,%xmm2,%xmm7
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	shrdl	$11,%r14d,%r14d
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	shrdl	$6,%r13d,%r13d
+	vpsrlq	$17,%xmm7,%xmm7
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	vpxor	%xmm7,%xmm6,%xmm6
+	xorl	%r11d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r9d,%ebx
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	vpshufb	%xmm9,%xmm6,%xmm6
+	shrdl	$9,%r14d,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	vpaddd	%xmm6,%xmm2,%xmm2
+	shrdl	$5,%r13d,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	vpaddd	64(%rbp),%xmm2,%xmm6
+	xorl	%ebx,%r13d
+	addl	44(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	vmovdqa	%xmm6,32(%rsp)
+	vpalignr	$4,%xmm3,%xmm0,%xmm4
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	vpalignr	$4,%xmm1,%xmm2,%xmm7
+	shrdl	$9,%r14d,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	vpsrld	$7,%xmm4,%xmm6
+	shrdl	$5,%r13d,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	vpaddd	%xmm7,%xmm3,%xmm3
+	xorl	%eax,%r13d
+	addl	48(%rsp),%edx
+	movl	%r8d,%r15d
+	vpsrld	$3,%xmm4,%xmm7
+	xorl	%ecx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r9d,%r15d
+	vpslld	$14,%xmm4,%xmm5
+	addl	%r12d,%edx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	vpxor	%xmm6,%xmm7,%xmm4
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	vpshufd	$250,%xmm2,%xmm7
+	shrdl	$2,%r14d,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	vpsrld	$11,%xmm6,%xmm6
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	shrdl	$14,%r13d,%r13d
+	vpxor	%xmm5,%xmm4,%xmm4
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	shrdl	$9,%r14d,%r14d
+	vpslld	$11,%xmm5,%xmm5
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	shrdl	$5,%r13d,%r13d
+	vpxor	%xmm6,%xmm4,%xmm4
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	vpsrld	$10,%xmm7,%xmm6
+	addl	52(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	vpxor	%xmm5,%xmm4,%xmm4
+	shrdl	$11,%r14d,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	vpsrlq	$17,%xmm7,%xmm7
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	vpaddd	%xmm4,%xmm3,%xmm3
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	shrdl	$2,%r14d,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%ecx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ecx
+	vpxor	%xmm7,%xmm6,%xmm6
+	movl	%r11d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r10d,%r13d
+	vpshufb	%xmm8,%xmm6,%xmm6
+	xorl	%eax,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ecx,%r14d
+	vpaddd	%xmm6,%xmm3,%xmm3
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	56(%rsp),%ebx
+	vpshufd	$80,%xmm3,%xmm7
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	shrdl	$11,%r14d,%r14d
+	vpsrld	$10,%xmm7,%xmm6
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	shrdl	$6,%r13d,%r13d
+	vpsrlq	$17,%xmm7,%xmm7
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	vpxor	%xmm7,%xmm6,%xmm6
+	xorl	%edx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%ebx,%r9d
+	vpsrlq	$2,%xmm7,%xmm7
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	vpxor	%xmm7,%xmm6,%xmm6
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	vpshufb	%xmm9,%xmm6,%xmm6
+	shrdl	$9,%r14d,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	vpaddd	%xmm6,%xmm3,%xmm3
+	shrdl	$5,%r13d,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	vpaddd	96(%rbp),%xmm3,%xmm6
+	xorl	%r9d,%r13d
+	addl	60(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	vmovdqa	%xmm6,48(%rsp)
+	cmpb	$0,131(%rbp)
+	jne	L$avx_00_47
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+	addl	0(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	addl	4(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	8(%rsp),%r9d
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	xorl	%r11d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r9d,%ebx
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	12(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+	addl	16(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	addl	20(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	24(%rsp),%ebx
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	xorl	%edx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%ebx,%r9d
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	28(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%eax
+	movl	%r9d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%eax,%r14d
+	andl	%r8d,%r12d
+	xorl	%r8d,%r13d
+	addl	32(%rsp),%r11d
+	movl	%eax,%r15d
+	xorl	%r10d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ebx,%r15d
+	addl	%r12d,%r11d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%eax,%r14d
+	addl	%r13d,%r11d
+	xorl	%ebx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r11d,%edx
+	addl	%edi,%r11d
+	movl	%edx,%r13d
+	addl	%r11d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r11d
+	movl	%r8d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r11d,%r14d
+	andl	%edx,%r12d
+	xorl	%edx,%r13d
+	addl	36(%rsp),%r10d
+	movl	%r11d,%edi
+	xorl	%r9d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%eax,%edi
+	addl	%r12d,%r10d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r11d,%r14d
+	addl	%r13d,%r10d
+	xorl	%eax,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r10d,%ecx
+	addl	%r15d,%r10d
+	movl	%ecx,%r13d
+	addl	%r10d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r10d
+	movl	%edx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r10d,%r14d
+	andl	%ecx,%r12d
+	xorl	%ecx,%r13d
+	addl	40(%rsp),%r9d
+	movl	%r10d,%r15d
+	xorl	%r8d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r11d,%r15d
+	addl	%r12d,%r9d
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%r10d,%r14d
+	addl	%r13d,%r9d
+	xorl	%r11d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%r9d,%ebx
+	addl	%edi,%r9d
+	movl	%ebx,%r13d
+	addl	%r9d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r9d
+	movl	%ecx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r9d,%r14d
+	andl	%ebx,%r12d
+	xorl	%ebx,%r13d
+	addl	44(%rsp),%r8d
+	movl	%r9d,%edi
+	xorl	%edx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r10d,%edi
+	addl	%r12d,%r8d
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%r9d,%r14d
+	addl	%r13d,%r8d
+	xorl	%r10d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%r8d,%eax
+	addl	%r15d,%r8d
+	movl	%eax,%r13d
+	addl	%r8d,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%r8d
+	movl	%ebx,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%r8d,%r14d
+	andl	%eax,%r12d
+	xorl	%eax,%r13d
+	addl	48(%rsp),%edx
+	movl	%r8d,%r15d
+	xorl	%ecx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r9d,%r15d
+	addl	%r12d,%edx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%r8d,%r14d
+	addl	%r13d,%edx
+	xorl	%r9d,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%edx,%r11d
+	addl	%edi,%edx
+	movl	%r11d,%r13d
+	addl	%edx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%edx
+	movl	%eax,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%edx,%r14d
+	andl	%r11d,%r12d
+	xorl	%r11d,%r13d
+	addl	52(%rsp),%ecx
+	movl	%edx,%edi
+	xorl	%ebx,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%r8d,%edi
+	addl	%r12d,%ecx
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%edx,%r14d
+	addl	%r13d,%ecx
+	xorl	%r8d,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%ecx,%r10d
+	addl	%r15d,%ecx
+	movl	%r10d,%r13d
+	addl	%ecx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ecx
+	movl	%r11d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ecx,%r14d
+	andl	%r10d,%r12d
+	xorl	%r10d,%r13d
+	addl	56(%rsp),%ebx
+	movl	%ecx,%r15d
+	xorl	%eax,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%edx,%r15d
+	addl	%r12d,%ebx
+	shrdl	$6,%r13d,%r13d
+	andl	%r15d,%edi
+	xorl	%ecx,%r14d
+	addl	%r13d,%ebx
+	xorl	%edx,%edi
+	shrdl	$2,%r14d,%r14d
+	addl	%ebx,%r9d
+	addl	%edi,%ebx
+	movl	%r9d,%r13d
+	addl	%ebx,%r14d
+	shrdl	$14,%r13d,%r13d
+	movl	%r14d,%ebx
+	movl	%r10d,%r12d
+	shrdl	$9,%r14d,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r12d
+	shrdl	$5,%r13d,%r13d
+	xorl	%ebx,%r14d
+	andl	%r9d,%r12d
+	xorl	%r9d,%r13d
+	addl	60(%rsp),%eax
+	movl	%ebx,%edi
+	xorl	%r11d,%r12d
+	shrdl	$11,%r14d,%r14d
+	xorl	%ecx,%edi
+	addl	%r12d,%eax
+	shrdl	$6,%r13d,%r13d
+	andl	%edi,%r15d
+	xorl	%ebx,%r14d
+	addl	%r13d,%eax
+	xorl	%ecx,%r15d
+	shrdl	$2,%r14d,%r14d
+	addl	%eax,%r8d
+	addl	%r15d,%eax
+	movl	%r8d,%r13d
+	addl	%eax,%r14d
+	movq	64+0(%rsp),%rdi
+	movl	%r14d,%eax
+
+	addl	0(%rdi),%eax
+	leaq	64(%rsi),%rsi
+	addl	4(%rdi),%ebx
+	addl	8(%rdi),%ecx
+	addl	12(%rdi),%edx
+	addl	16(%rdi),%r8d
+	addl	20(%rdi),%r9d
+	addl	24(%rdi),%r10d
+	addl	28(%rdi),%r11d
+
+	cmpq	64+16(%rsp),%rsi
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	jb	L$loop_avx
+
+	movq	64+24(%rsp),%rsi
+	vzeroupper
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$epilogue_avx:
+	.byte	0xf3,0xc3
+
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S
new file mode 100644
index 0000000..0b738e6
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S
@@ -0,0 +1,4027 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+.globl	_sha512_block_data_order
+.private_extern _sha512_block_data_order
+
+.p2align	4
+_sha512_block_data_order:
+	leaq	_OPENSSL_ia32cap_P(%rip),%r11
+	movl	0(%r11),%r9d
+	movl	4(%r11),%r10d
+	movl	8(%r11),%r11d
+	testl	$2048,%r10d
+	jnz	L$xop_shortcut
+	andl	$1073741824,%r9d
+	andl	$268435968,%r10d
+	orl	%r9d,%r10d
+	cmpl	$1342177792,%r10d
+	je	L$avx_shortcut
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$128+32,%rsp
+	leaq	(%rsi,%rdx,8),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,128+0(%rsp)
+	movq	%rsi,128+8(%rsp)
+	movq	%rdx,128+16(%rsp)
+	movq	%rax,128+24(%rsp)
+L$prologue:
+
+	movq	0(%rdi),%rax
+	movq	8(%rdi),%rbx
+	movq	16(%rdi),%rcx
+	movq	24(%rdi),%rdx
+	movq	32(%rdi),%r8
+	movq	40(%rdi),%r9
+	movq	48(%rdi),%r10
+	movq	56(%rdi),%r11
+	jmp	L$loop
+
+.p2align	4
+L$loop:
+	movq	%rbx,%rdi
+	leaq	K512(%rip),%rbp
+	xorq	%rcx,%rdi
+	movq	0(%rsi),%r12
+	movq	%r8,%r13
+	movq	%rax,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r9,%r15
+
+	xorq	%r8,%r13
+	rorq	$5,%r14
+	xorq	%r10,%r15
+
+	movq	%r12,0(%rsp)
+	xorq	%rax,%r14
+	andq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%r10,%r15
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	addq	%r15,%r12
+
+	movq	%rax,%r15
+	addq	(%rbp),%r12
+	xorq	%rax,%r14
+
+	xorq	%rbx,%r15
+	rorq	$14,%r13
+	movq	%rbx,%r11
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r11
+	addq	%r12,%rdx
+	addq	%r12,%r11
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%r11
+	movq	8(%rsi),%r12
+	movq	%rdx,%r13
+	movq	%r11,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r8,%rdi
+
+	xorq	%rdx,%r13
+	rorq	$5,%r14
+	xorq	%r9,%rdi
+
+	movq	%r12,8(%rsp)
+	xorq	%r11,%r14
+	andq	%rdx,%rdi
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r9,%rdi
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	addq	%rdi,%r12
+
+	movq	%r11,%rdi
+	addq	(%rbp),%r12
+	xorq	%r11,%r14
+
+	xorq	%rax,%rdi
+	rorq	$14,%r13
+	movq	%rax,%r10
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r10
+	addq	%r12,%rcx
+	addq	%r12,%r10
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%r10
+	movq	16(%rsi),%r12
+	movq	%rcx,%r13
+	movq	%r10,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rdx,%r15
+
+	xorq	%rcx,%r13
+	rorq	$5,%r14
+	xorq	%r8,%r15
+
+	movq	%r12,16(%rsp)
+	xorq	%r10,%r14
+	andq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r8,%r15
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	addq	%r15,%r12
+
+	movq	%r10,%r15
+	addq	(%rbp),%r12
+	xorq	%r10,%r14
+
+	xorq	%r11,%r15
+	rorq	$14,%r13
+	movq	%r11,%r9
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r9
+	addq	%r12,%rbx
+	addq	%r12,%r9
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%r9
+	movq	24(%rsi),%r12
+	movq	%rbx,%r13
+	movq	%r9,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rcx,%rdi
+
+	xorq	%rbx,%r13
+	rorq	$5,%r14
+	xorq	%rdx,%rdi
+
+	movq	%r12,24(%rsp)
+	xorq	%r9,%r14
+	andq	%rbx,%rdi
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%rdx,%rdi
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	addq	%rdi,%r12
+
+	movq	%r9,%rdi
+	addq	(%rbp),%r12
+	xorq	%r9,%r14
+
+	xorq	%r10,%rdi
+	rorq	$14,%r13
+	movq	%r10,%r8
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r8
+	addq	%r12,%rax
+	addq	%r12,%r8
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%r8
+	movq	32(%rsi),%r12
+	movq	%rax,%r13
+	movq	%r8,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rbx,%r15
+
+	xorq	%rax,%r13
+	rorq	$5,%r14
+	xorq	%rcx,%r15
+
+	movq	%r12,32(%rsp)
+	xorq	%r8,%r14
+	andq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%rcx,%r15
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	addq	%r15,%r12
+
+	movq	%r8,%r15
+	addq	(%rbp),%r12
+	xorq	%r8,%r14
+
+	xorq	%r9,%r15
+	rorq	$14,%r13
+	movq	%r9,%rdx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rdx
+	addq	%r12,%r11
+	addq	%r12,%rdx
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%rdx
+	movq	40(%rsi),%r12
+	movq	%r11,%r13
+	movq	%rdx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rax,%rdi
+
+	xorq	%r11,%r13
+	rorq	$5,%r14
+	xorq	%rbx,%rdi
+
+	movq	%r12,40(%rsp)
+	xorq	%rdx,%r14
+	andq	%r11,%rdi
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rbx,%rdi
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	addq	%rdi,%r12
+
+	movq	%rdx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rdx,%r14
+
+	xorq	%r8,%rdi
+	rorq	$14,%r13
+	movq	%r8,%rcx
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rcx
+	addq	%r12,%r10
+	addq	%r12,%rcx
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%rcx
+	movq	48(%rsi),%r12
+	movq	%r10,%r13
+	movq	%rcx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r11,%r15
+
+	xorq	%r10,%r13
+	rorq	$5,%r14
+	xorq	%rax,%r15
+
+	movq	%r12,48(%rsp)
+	xorq	%rcx,%r14
+	andq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rax,%r15
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	addq	%r15,%r12
+
+	movq	%rcx,%r15
+	addq	(%rbp),%r12
+	xorq	%rcx,%r14
+
+	xorq	%rdx,%r15
+	rorq	$14,%r13
+	movq	%rdx,%rbx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rbx
+	addq	%r12,%r9
+	addq	%r12,%rbx
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%rbx
+	movq	56(%rsi),%r12
+	movq	%r9,%r13
+	movq	%rbx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r10,%rdi
+
+	xorq	%r9,%r13
+	rorq	$5,%r14
+	xorq	%r11,%rdi
+
+	movq	%r12,56(%rsp)
+	xorq	%rbx,%r14
+	andq	%r9,%rdi
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%r11,%rdi
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	addq	%rdi,%r12
+
+	movq	%rbx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rbx,%r14
+
+	xorq	%rcx,%rdi
+	rorq	$14,%r13
+	movq	%rcx,%rax
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rax
+	addq	%r12,%r8
+	addq	%r12,%rax
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%rax
+	movq	64(%rsi),%r12
+	movq	%r8,%r13
+	movq	%rax,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r9,%r15
+
+	xorq	%r8,%r13
+	rorq	$5,%r14
+	xorq	%r10,%r15
+
+	movq	%r12,64(%rsp)
+	xorq	%rax,%r14
+	andq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%r10,%r15
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	addq	%r15,%r12
+
+	movq	%rax,%r15
+	addq	(%rbp),%r12
+	xorq	%rax,%r14
+
+	xorq	%rbx,%r15
+	rorq	$14,%r13
+	movq	%rbx,%r11
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r11
+	addq	%r12,%rdx
+	addq	%r12,%r11
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%r11
+	movq	72(%rsi),%r12
+	movq	%rdx,%r13
+	movq	%r11,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r8,%rdi
+
+	xorq	%rdx,%r13
+	rorq	$5,%r14
+	xorq	%r9,%rdi
+
+	movq	%r12,72(%rsp)
+	xorq	%r11,%r14
+	andq	%rdx,%rdi
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r9,%rdi
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	addq	%rdi,%r12
+
+	movq	%r11,%rdi
+	addq	(%rbp),%r12
+	xorq	%r11,%r14
+
+	xorq	%rax,%rdi
+	rorq	$14,%r13
+	movq	%rax,%r10
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r10
+	addq	%r12,%rcx
+	addq	%r12,%r10
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%r10
+	movq	80(%rsi),%r12
+	movq	%rcx,%r13
+	movq	%r10,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rdx,%r15
+
+	xorq	%rcx,%r13
+	rorq	$5,%r14
+	xorq	%r8,%r15
+
+	movq	%r12,80(%rsp)
+	xorq	%r10,%r14
+	andq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r8,%r15
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	addq	%r15,%r12
+
+	movq	%r10,%r15
+	addq	(%rbp),%r12
+	xorq	%r10,%r14
+
+	xorq	%r11,%r15
+	rorq	$14,%r13
+	movq	%r11,%r9
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r9
+	addq	%r12,%rbx
+	addq	%r12,%r9
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%r9
+	movq	88(%rsi),%r12
+	movq	%rbx,%r13
+	movq	%r9,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rcx,%rdi
+
+	xorq	%rbx,%r13
+	rorq	$5,%r14
+	xorq	%rdx,%rdi
+
+	movq	%r12,88(%rsp)
+	xorq	%r9,%r14
+	andq	%rbx,%rdi
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%rdx,%rdi
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	addq	%rdi,%r12
+
+	movq	%r9,%rdi
+	addq	(%rbp),%r12
+	xorq	%r9,%r14
+
+	xorq	%r10,%rdi
+	rorq	$14,%r13
+	movq	%r10,%r8
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r8
+	addq	%r12,%rax
+	addq	%r12,%r8
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%r8
+	movq	96(%rsi),%r12
+	movq	%rax,%r13
+	movq	%r8,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rbx,%r15
+
+	xorq	%rax,%r13
+	rorq	$5,%r14
+	xorq	%rcx,%r15
+
+	movq	%r12,96(%rsp)
+	xorq	%r8,%r14
+	andq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%rcx,%r15
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	addq	%r15,%r12
+
+	movq	%r8,%r15
+	addq	(%rbp),%r12
+	xorq	%r8,%r14
+
+	xorq	%r9,%r15
+	rorq	$14,%r13
+	movq	%r9,%rdx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rdx
+	addq	%r12,%r11
+	addq	%r12,%rdx
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%rdx
+	movq	104(%rsi),%r12
+	movq	%r11,%r13
+	movq	%rdx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rax,%rdi
+
+	xorq	%r11,%r13
+	rorq	$5,%r14
+	xorq	%rbx,%rdi
+
+	movq	%r12,104(%rsp)
+	xorq	%rdx,%r14
+	andq	%r11,%rdi
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rbx,%rdi
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	addq	%rdi,%r12
+
+	movq	%rdx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rdx,%r14
+
+	xorq	%r8,%rdi
+	rorq	$14,%r13
+	movq	%r8,%rcx
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rcx
+	addq	%r12,%r10
+	addq	%r12,%rcx
+
+	leaq	24(%rbp),%rbp
+	addq	%r14,%rcx
+	movq	112(%rsi),%r12
+	movq	%r10,%r13
+	movq	%rcx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r11,%r15
+
+	xorq	%r10,%r13
+	rorq	$5,%r14
+	xorq	%rax,%r15
+
+	movq	%r12,112(%rsp)
+	xorq	%rcx,%r14
+	andq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rax,%r15
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	addq	%r15,%r12
+
+	movq	%rcx,%r15
+	addq	(%rbp),%r12
+	xorq	%rcx,%r14
+
+	xorq	%rdx,%r15
+	rorq	$14,%r13
+	movq	%rdx,%rbx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rbx
+	addq	%r12,%r9
+	addq	%r12,%rbx
+
+	leaq	8(%rbp),%rbp
+	addq	%r14,%rbx
+	movq	120(%rsi),%r12
+	movq	%r9,%r13
+	movq	%rbx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r10,%rdi
+
+	xorq	%r9,%r13
+	rorq	$5,%r14
+	xorq	%r11,%rdi
+
+	movq	%r12,120(%rsp)
+	xorq	%rbx,%r14
+	andq	%r9,%rdi
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%r11,%rdi
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	addq	%rdi,%r12
+
+	movq	%rbx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rbx,%r14
+
+	xorq	%rcx,%rdi
+	rorq	$14,%r13
+	movq	%rcx,%rax
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rax
+	addq	%r12,%r8
+	addq	%r12,%rax
+
+	leaq	24(%rbp),%rbp
+	jmp	L$rounds_16_xx
+.p2align	4
+L$rounds_16_xx:
+	movq	8(%rsp),%r13
+	movq	112(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rax
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	72(%rsp),%r12
+
+	addq	0(%rsp),%r12
+	movq	%r8,%r13
+	addq	%r15,%r12
+	movq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r9,%r15
+
+	xorq	%r8,%r13
+	rorq	$5,%r14
+	xorq	%r10,%r15
+
+	movq	%r12,0(%rsp)
+	xorq	%rax,%r14
+	andq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%r10,%r15
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	addq	%r15,%r12
+
+	movq	%rax,%r15
+	addq	(%rbp),%r12
+	xorq	%rax,%r14
+
+	xorq	%rbx,%r15
+	rorq	$14,%r13
+	movq	%rbx,%r11
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r11
+	addq	%r12,%rdx
+	addq	%r12,%r11
+
+	leaq	8(%rbp),%rbp
+	movq	16(%rsp),%r13
+	movq	120(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r11
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	80(%rsp),%r12
+
+	addq	8(%rsp),%r12
+	movq	%rdx,%r13
+	addq	%rdi,%r12
+	movq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r8,%rdi
+
+	xorq	%rdx,%r13
+	rorq	$5,%r14
+	xorq	%r9,%rdi
+
+	movq	%r12,8(%rsp)
+	xorq	%r11,%r14
+	andq	%rdx,%rdi
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r9,%rdi
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	addq	%rdi,%r12
+
+	movq	%r11,%rdi
+	addq	(%rbp),%r12
+	xorq	%r11,%r14
+
+	xorq	%rax,%rdi
+	rorq	$14,%r13
+	movq	%rax,%r10
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r10
+	addq	%r12,%rcx
+	addq	%r12,%r10
+
+	leaq	24(%rbp),%rbp
+	movq	24(%rsp),%r13
+	movq	0(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r10
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	88(%rsp),%r12
+
+	addq	16(%rsp),%r12
+	movq	%rcx,%r13
+	addq	%r15,%r12
+	movq	%r10,%r14
+	rorq	$23,%r13
+	movq	%rdx,%r15
+
+	xorq	%rcx,%r13
+	rorq	$5,%r14
+	xorq	%r8,%r15
+
+	movq	%r12,16(%rsp)
+	xorq	%r10,%r14
+	andq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r8,%r15
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	addq	%r15,%r12
+
+	movq	%r10,%r15
+	addq	(%rbp),%r12
+	xorq	%r10,%r14
+
+	xorq	%r11,%r15
+	rorq	$14,%r13
+	movq	%r11,%r9
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r9
+	addq	%r12,%rbx
+	addq	%r12,%r9
+
+	leaq	8(%rbp),%rbp
+	movq	32(%rsp),%r13
+	movq	8(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r9
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	96(%rsp),%r12
+
+	addq	24(%rsp),%r12
+	movq	%rbx,%r13
+	addq	%rdi,%r12
+	movq	%r9,%r14
+	rorq	$23,%r13
+	movq	%rcx,%rdi
+
+	xorq	%rbx,%r13
+	rorq	$5,%r14
+	xorq	%rdx,%rdi
+
+	movq	%r12,24(%rsp)
+	xorq	%r9,%r14
+	andq	%rbx,%rdi
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%rdx,%rdi
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	addq	%rdi,%r12
+
+	movq	%r9,%rdi
+	addq	(%rbp),%r12
+	xorq	%r9,%r14
+
+	xorq	%r10,%rdi
+	rorq	$14,%r13
+	movq	%r10,%r8
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r8
+	addq	%r12,%rax
+	addq	%r12,%r8
+
+	leaq	24(%rbp),%rbp
+	movq	40(%rsp),%r13
+	movq	16(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r8
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	104(%rsp),%r12
+
+	addq	32(%rsp),%r12
+	movq	%rax,%r13
+	addq	%r15,%r12
+	movq	%r8,%r14
+	rorq	$23,%r13
+	movq	%rbx,%r15
+
+	xorq	%rax,%r13
+	rorq	$5,%r14
+	xorq	%rcx,%r15
+
+	movq	%r12,32(%rsp)
+	xorq	%r8,%r14
+	andq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%rcx,%r15
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	addq	%r15,%r12
+
+	movq	%r8,%r15
+	addq	(%rbp),%r12
+	xorq	%r8,%r14
+
+	xorq	%r9,%r15
+	rorq	$14,%r13
+	movq	%r9,%rdx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rdx
+	addq	%r12,%r11
+	addq	%r12,%rdx
+
+	leaq	8(%rbp),%rbp
+	movq	48(%rsp),%r13
+	movq	24(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rdx
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	112(%rsp),%r12
+
+	addq	40(%rsp),%r12
+	movq	%r11,%r13
+	addq	%rdi,%r12
+	movq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%rax,%rdi
+
+	xorq	%r11,%r13
+	rorq	$5,%r14
+	xorq	%rbx,%rdi
+
+	movq	%r12,40(%rsp)
+	xorq	%rdx,%r14
+	andq	%r11,%rdi
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rbx,%rdi
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	addq	%rdi,%r12
+
+	movq	%rdx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rdx,%r14
+
+	xorq	%r8,%rdi
+	rorq	$14,%r13
+	movq	%r8,%rcx
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rcx
+	addq	%r12,%r10
+	addq	%r12,%rcx
+
+	leaq	24(%rbp),%rbp
+	movq	56(%rsp),%r13
+	movq	32(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rcx
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	120(%rsp),%r12
+
+	addq	48(%rsp),%r12
+	movq	%r10,%r13
+	addq	%r15,%r12
+	movq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r11,%r15
+
+	xorq	%r10,%r13
+	rorq	$5,%r14
+	xorq	%rax,%r15
+
+	movq	%r12,48(%rsp)
+	xorq	%rcx,%r14
+	andq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rax,%r15
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	addq	%r15,%r12
+
+	movq	%rcx,%r15
+	addq	(%rbp),%r12
+	xorq	%rcx,%r14
+
+	xorq	%rdx,%r15
+	rorq	$14,%r13
+	movq	%rdx,%rbx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rbx
+	addq	%r12,%r9
+	addq	%r12,%rbx
+
+	leaq	8(%rbp),%rbp
+	movq	64(%rsp),%r13
+	movq	40(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rbx
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	0(%rsp),%r12
+
+	addq	56(%rsp),%r12
+	movq	%r9,%r13
+	addq	%rdi,%r12
+	movq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r10,%rdi
+
+	xorq	%r9,%r13
+	rorq	$5,%r14
+	xorq	%r11,%rdi
+
+	movq	%r12,56(%rsp)
+	xorq	%rbx,%r14
+	andq	%r9,%rdi
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%r11,%rdi
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	addq	%rdi,%r12
+
+	movq	%rbx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rbx,%r14
+
+	xorq	%rcx,%rdi
+	rorq	$14,%r13
+	movq	%rcx,%rax
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rax
+	addq	%r12,%r8
+	addq	%r12,%rax
+
+	leaq	24(%rbp),%rbp
+	movq	72(%rsp),%r13
+	movq	48(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rax
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	8(%rsp),%r12
+
+	addq	64(%rsp),%r12
+	movq	%r8,%r13
+	addq	%r15,%r12
+	movq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r9,%r15
+
+	xorq	%r8,%r13
+	rorq	$5,%r14
+	xorq	%r10,%r15
+
+	movq	%r12,64(%rsp)
+	xorq	%rax,%r14
+	andq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%r10,%r15
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	addq	%r15,%r12
+
+	movq	%rax,%r15
+	addq	(%rbp),%r12
+	xorq	%rax,%r14
+
+	xorq	%rbx,%r15
+	rorq	$14,%r13
+	movq	%rbx,%r11
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r11
+	addq	%r12,%rdx
+	addq	%r12,%r11
+
+	leaq	8(%rbp),%rbp
+	movq	80(%rsp),%r13
+	movq	56(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r11
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	16(%rsp),%r12
+
+	addq	72(%rsp),%r12
+	movq	%rdx,%r13
+	addq	%rdi,%r12
+	movq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r8,%rdi
+
+	xorq	%rdx,%r13
+	rorq	$5,%r14
+	xorq	%r9,%rdi
+
+	movq	%r12,72(%rsp)
+	xorq	%r11,%r14
+	andq	%rdx,%rdi
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r9,%rdi
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	addq	%rdi,%r12
+
+	movq	%r11,%rdi
+	addq	(%rbp),%r12
+	xorq	%r11,%r14
+
+	xorq	%rax,%rdi
+	rorq	$14,%r13
+	movq	%rax,%r10
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r10
+	addq	%r12,%rcx
+	addq	%r12,%r10
+
+	leaq	24(%rbp),%rbp
+	movq	88(%rsp),%r13
+	movq	64(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r10
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	24(%rsp),%r12
+
+	addq	80(%rsp),%r12
+	movq	%rcx,%r13
+	addq	%r15,%r12
+	movq	%r10,%r14
+	rorq	$23,%r13
+	movq	%rdx,%r15
+
+	xorq	%rcx,%r13
+	rorq	$5,%r14
+	xorq	%r8,%r15
+
+	movq	%r12,80(%rsp)
+	xorq	%r10,%r14
+	andq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r8,%r15
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	addq	%r15,%r12
+
+	movq	%r10,%r15
+	addq	(%rbp),%r12
+	xorq	%r10,%r14
+
+	xorq	%r11,%r15
+	rorq	$14,%r13
+	movq	%r11,%r9
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%r9
+	addq	%r12,%rbx
+	addq	%r12,%r9
+
+	leaq	8(%rbp),%rbp
+	movq	96(%rsp),%r13
+	movq	72(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r9
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	32(%rsp),%r12
+
+	addq	88(%rsp),%r12
+	movq	%rbx,%r13
+	addq	%rdi,%r12
+	movq	%r9,%r14
+	rorq	$23,%r13
+	movq	%rcx,%rdi
+
+	xorq	%rbx,%r13
+	rorq	$5,%r14
+	xorq	%rdx,%rdi
+
+	movq	%r12,88(%rsp)
+	xorq	%r9,%r14
+	andq	%rbx,%rdi
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%rdx,%rdi
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	addq	%rdi,%r12
+
+	movq	%r9,%rdi
+	addq	(%rbp),%r12
+	xorq	%r9,%r14
+
+	xorq	%r10,%rdi
+	rorq	$14,%r13
+	movq	%r10,%r8
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%r8
+	addq	%r12,%rax
+	addq	%r12,%r8
+
+	leaq	24(%rbp),%rbp
+	movq	104(%rsp),%r13
+	movq	80(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%r8
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	40(%rsp),%r12
+
+	addq	96(%rsp),%r12
+	movq	%rax,%r13
+	addq	%r15,%r12
+	movq	%r8,%r14
+	rorq	$23,%r13
+	movq	%rbx,%r15
+
+	xorq	%rax,%r13
+	rorq	$5,%r14
+	xorq	%rcx,%r15
+
+	movq	%r12,96(%rsp)
+	xorq	%r8,%r14
+	andq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%rcx,%r15
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	addq	%r15,%r12
+
+	movq	%r8,%r15
+	addq	(%rbp),%r12
+	xorq	%r8,%r14
+
+	xorq	%r9,%r15
+	rorq	$14,%r13
+	movq	%r9,%rdx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rdx
+	addq	%r12,%r11
+	addq	%r12,%rdx
+
+	leaq	8(%rbp),%rbp
+	movq	112(%rsp),%r13
+	movq	88(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rdx
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	48(%rsp),%r12
+
+	addq	104(%rsp),%r12
+	movq	%r11,%r13
+	addq	%rdi,%r12
+	movq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%rax,%rdi
+
+	xorq	%r11,%r13
+	rorq	$5,%r14
+	xorq	%rbx,%rdi
+
+	movq	%r12,104(%rsp)
+	xorq	%rdx,%r14
+	andq	%r11,%rdi
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rbx,%rdi
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	addq	%rdi,%r12
+
+	movq	%rdx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rdx,%r14
+
+	xorq	%r8,%rdi
+	rorq	$14,%r13
+	movq	%r8,%rcx
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rcx
+	addq	%r12,%r10
+	addq	%r12,%rcx
+
+	leaq	24(%rbp),%rbp
+	movq	120(%rsp),%r13
+	movq	96(%rsp),%r15
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rcx
+	movq	%r15,%r14
+	rorq	$42,%r15
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	xorq	%r13,%r12
+	xorq	%r14,%r15
+	addq	56(%rsp),%r12
+
+	addq	112(%rsp),%r12
+	movq	%r10,%r13
+	addq	%r15,%r12
+	movq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r11,%r15
+
+	xorq	%r10,%r13
+	rorq	$5,%r14
+	xorq	%rax,%r15
+
+	movq	%r12,112(%rsp)
+	xorq	%rcx,%r14
+	andq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rax,%r15
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	addq	%r15,%r12
+
+	movq	%rcx,%r15
+	addq	(%rbp),%r12
+	xorq	%rcx,%r14
+
+	xorq	%rdx,%r15
+	rorq	$14,%r13
+	movq	%rdx,%rbx
+
+	andq	%r15,%rdi
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%rdi,%rbx
+	addq	%r12,%r9
+	addq	%r12,%rbx
+
+	leaq	8(%rbp),%rbp
+	movq	0(%rsp),%r13
+	movq	104(%rsp),%rdi
+
+	movq	%r13,%r12
+	rorq	$7,%r13
+	addq	%r14,%rbx
+	movq	%rdi,%r14
+	rorq	$42,%rdi
+
+	xorq	%r12,%r13
+	shrq	$7,%r12
+	rorq	$1,%r13
+	xorq	%r14,%rdi
+	shrq	$6,%r14
+
+	rorq	$19,%rdi
+	xorq	%r13,%r12
+	xorq	%r14,%rdi
+	addq	64(%rsp),%r12
+
+	addq	120(%rsp),%r12
+	movq	%r9,%r13
+	addq	%rdi,%r12
+	movq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r10,%rdi
+
+	xorq	%r9,%r13
+	rorq	$5,%r14
+	xorq	%r11,%rdi
+
+	movq	%r12,120(%rsp)
+	xorq	%rbx,%r14
+	andq	%r9,%rdi
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%r11,%rdi
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	addq	%rdi,%r12
+
+	movq	%rbx,%rdi
+	addq	(%rbp),%r12
+	xorq	%rbx,%r14
+
+	xorq	%rcx,%rdi
+	rorq	$14,%r13
+	movq	%rcx,%rax
+
+	andq	%rdi,%r15
+	rorq	$28,%r14
+	addq	%r13,%r12
+
+	xorq	%r15,%rax
+	addq	%r12,%r8
+	addq	%r12,%rax
+
+	leaq	24(%rbp),%rbp
+	cmpb	$0,7(%rbp)
+	jnz	L$rounds_16_xx
+
+	movq	128+0(%rsp),%rdi
+	addq	%r14,%rax
+	leaq	128(%rsi),%rsi
+
+	addq	0(%rdi),%rax
+	addq	8(%rdi),%rbx
+	addq	16(%rdi),%rcx
+	addq	24(%rdi),%rdx
+	addq	32(%rdi),%r8
+	addq	40(%rdi),%r9
+	addq	48(%rdi),%r10
+	addq	56(%rdi),%r11
+
+	cmpq	128+16(%rsp),%rsi
+
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+	movq	%r8,32(%rdi)
+	movq	%r9,40(%rdi)
+	movq	%r10,48(%rdi)
+	movq	%r11,56(%rdi)
+	jb	L$loop
+
+	movq	128+24(%rsp),%rsi
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$epilogue:
+	.byte	0xf3,0xc3
+
+.p2align	6
+
+K512:
+.quad	0x428a2f98d728ae22,0x7137449123ef65cd
+.quad	0x428a2f98d728ae22,0x7137449123ef65cd
+.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad	0x3956c25bf348b538,0x59f111f1b605d019
+.quad	0x3956c25bf348b538,0x59f111f1b605d019
+.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad	0xd807aa98a3030242,0x12835b0145706fbe
+.quad	0xd807aa98a3030242,0x12835b0145706fbe
+.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad	0x9bdc06a725c71235,0xc19bf174cf692694
+.quad	0x9bdc06a725c71235,0xc19bf174cf692694
+.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad	0x983e5152ee66dfab,0xa831c66d2db43210
+.quad	0x983e5152ee66dfab,0xa831c66d2db43210
+.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad	0x06ca6351e003826f,0x142929670a0e6e70
+.quad	0x06ca6351e003826f,0x142929670a0e6e70
+.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad	0x81c2c92e47edaee6,0x92722c851482353b
+.quad	0x81c2c92e47edaee6,0x92722c851482353b
+.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad	0xd192e819d6ef5218,0xd69906245565a910
+.quad	0xd192e819d6ef5218,0xd69906245565a910
+.quad	0xf40e35855771202a,0x106aa07032bbd1b8
+.quad	0xf40e35855771202a,0x106aa07032bbd1b8
+.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad	0x90befffa23631e28,0xa4506cebde82bde9
+.quad	0x90befffa23631e28,0xa4506cebde82bde9
+.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad	0xca273eceea26619c,0xd186b8c721c0c207
+.quad	0xca273eceea26619c,0xd186b8c721c0c207
+.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad	0x113f9804bef90dae,0x1b710b35131c471b
+.quad	0x113f9804bef90dae,0x1b710b35131c471b
+.quad	0x28db77f523047d84,0x32caab7b40c72493
+.quad	0x28db77f523047d84,0x32caab7b40c72493
+.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+.quad	0x0001020304050607,0x08090a0b0c0d0e0f
+.quad	0x0001020304050607,0x08090a0b0c0d0e0f
+.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+
+.p2align	6
+sha512_block_data_order_xop:
+L$xop_shortcut:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$160,%rsp
+	leaq	(%rsi,%rdx,8),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,128+0(%rsp)
+	movq	%rsi,128+8(%rsp)
+	movq	%rdx,128+16(%rsp)
+	movq	%rax,128+24(%rsp)
+L$prologue_xop:
+
+	vzeroupper
+	movq	0(%rdi),%rax
+	movq	8(%rdi),%rbx
+	movq	16(%rdi),%rcx
+	movq	24(%rdi),%rdx
+	movq	32(%rdi),%r8
+	movq	40(%rdi),%r9
+	movq	48(%rdi),%r10
+	movq	56(%rdi),%r11
+	jmp	L$loop_xop
+.p2align	4
+L$loop_xop:
+	vmovdqa	K512+1280(%rip),%xmm11
+	vmovdqu	0(%rsi),%xmm0
+	leaq	K512+128(%rip),%rbp
+	vmovdqu	16(%rsi),%xmm1
+	vmovdqu	32(%rsi),%xmm2
+	vpshufb	%xmm11,%xmm0,%xmm0
+	vmovdqu	48(%rsi),%xmm3
+	vpshufb	%xmm11,%xmm1,%xmm1
+	vmovdqu	64(%rsi),%xmm4
+	vpshufb	%xmm11,%xmm2,%xmm2
+	vmovdqu	80(%rsi),%xmm5
+	vpshufb	%xmm11,%xmm3,%xmm3
+	vmovdqu	96(%rsi),%xmm6
+	vpshufb	%xmm11,%xmm4,%xmm4
+	vmovdqu	112(%rsi),%xmm7
+	vpshufb	%xmm11,%xmm5,%xmm5
+	vpaddq	-128(%rbp),%xmm0,%xmm8
+	vpshufb	%xmm11,%xmm6,%xmm6
+	vpaddq	-96(%rbp),%xmm1,%xmm9
+	vpshufb	%xmm11,%xmm7,%xmm7
+	vpaddq	-64(%rbp),%xmm2,%xmm10
+	vpaddq	-32(%rbp),%xmm3,%xmm11
+	vmovdqa	%xmm8,0(%rsp)
+	vpaddq	0(%rbp),%xmm4,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	vpaddq	32(%rbp),%xmm5,%xmm9
+	vmovdqa	%xmm10,32(%rsp)
+	vpaddq	64(%rbp),%xmm6,%xmm10
+	vmovdqa	%xmm11,48(%rsp)
+	vpaddq	96(%rbp),%xmm7,%xmm11
+	vmovdqa	%xmm8,64(%rsp)
+	movq	%rax,%r14
+	vmovdqa	%xmm9,80(%rsp)
+	movq	%rbx,%rdi
+	vmovdqa	%xmm10,96(%rsp)
+	xorq	%rcx,%rdi
+	vmovdqa	%xmm11,112(%rsp)
+	movq	%r8,%r13
+	jmp	L$xop_00_47
+
+.p2align	4
+L$xop_00_47:
+	addq	$256,%rbp
+	vpalignr	$8,%xmm0,%xmm1,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%rax
+	vpalignr	$8,%xmm4,%xmm5,%xmm11
+	movq	%r9,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%rax,%r14
+	vpaddq	%xmm11,%xmm0,%xmm0
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	0(%rsp),%r11
+	movq	%rax,%r15
+.byte	143,72,120,195,209,7
+	xorq	%r10,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,223,3
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rbx,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm7,%xmm10
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	vpaddq	%xmm8,%xmm0,%xmm0
+	movq	%rdx,%r13
+	addq	%r11,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%r11
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%r8,%r12
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	vpaddq	%xmm11,%xmm0,%xmm0
+	addq	8(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	rorq	$6,%r14
+	vpaddq	-128(%rbp),%xmm0,%xmm10
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	rorq	$28,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	vmovdqa	%xmm10,0(%rsp)
+	vpalignr	$8,%xmm1,%xmm2,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%r10
+	vpalignr	$8,%xmm5,%xmm6,%xmm11
+	movq	%rdx,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%r10,%r14
+	vpaddq	%xmm11,%xmm1,%xmm1
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	16(%rsp),%r9
+	movq	%r10,%r15
+.byte	143,72,120,195,209,7
+	xorq	%r8,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,216,3
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r11,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm0,%xmm10
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	vpaddq	%xmm8,%xmm1,%xmm1
+	movq	%rbx,%r13
+	addq	%r9,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%r9
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%rcx,%r12
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	vpaddq	%xmm11,%xmm1,%xmm1
+	addq	24(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	rorq	$6,%r14
+	vpaddq	-96(%rbp),%xmm1,%xmm10
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	rorq	$28,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	vmovdqa	%xmm10,16(%rsp)
+	vpalignr	$8,%xmm2,%xmm3,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%r8
+	vpalignr	$8,%xmm6,%xmm7,%xmm11
+	movq	%rbx,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%r8,%r14
+	vpaddq	%xmm11,%xmm2,%xmm2
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	32(%rsp),%rdx
+	movq	%r8,%r15
+.byte	143,72,120,195,209,7
+	xorq	%rcx,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,217,3
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r9,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm1,%xmm10
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	vpaddq	%xmm8,%xmm2,%xmm2
+	movq	%r11,%r13
+	addq	%rdx,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%rdx
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%rax,%r12
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	vpaddq	%xmm11,%xmm2,%xmm2
+	addq	40(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	rorq	$6,%r14
+	vpaddq	-64(%rbp),%xmm2,%xmm10
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	rorq	$28,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	vmovdqa	%xmm10,32(%rsp)
+	vpalignr	$8,%xmm3,%xmm4,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%rcx
+	vpalignr	$8,%xmm7,%xmm0,%xmm11
+	movq	%r11,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%rcx,%r14
+	vpaddq	%xmm11,%xmm3,%xmm3
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	48(%rsp),%rbx
+	movq	%rcx,%r15
+.byte	143,72,120,195,209,7
+	xorq	%rax,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,218,3
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rdx,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm2,%xmm10
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	vpaddq	%xmm8,%xmm3,%xmm3
+	movq	%r9,%r13
+	addq	%rbx,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%rbx
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%r10,%r12
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	vpaddq	%xmm11,%xmm3,%xmm3
+	addq	56(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	rorq	$6,%r14
+	vpaddq	-32(%rbp),%xmm3,%xmm10
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	rorq	$28,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	vmovdqa	%xmm10,48(%rsp)
+	vpalignr	$8,%xmm4,%xmm5,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%rax
+	vpalignr	$8,%xmm0,%xmm1,%xmm11
+	movq	%r9,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%rax,%r14
+	vpaddq	%xmm11,%xmm4,%xmm4
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	64(%rsp),%r11
+	movq	%rax,%r15
+.byte	143,72,120,195,209,7
+	xorq	%r10,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,219,3
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rbx,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm3,%xmm10
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	vpaddq	%xmm8,%xmm4,%xmm4
+	movq	%rdx,%r13
+	addq	%r11,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%r11
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%r8,%r12
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	vpaddq	%xmm11,%xmm4,%xmm4
+	addq	72(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	rorq	$6,%r14
+	vpaddq	0(%rbp),%xmm4,%xmm10
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	rorq	$28,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	vmovdqa	%xmm10,64(%rsp)
+	vpalignr	$8,%xmm5,%xmm6,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%r10
+	vpalignr	$8,%xmm1,%xmm2,%xmm11
+	movq	%rdx,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%r10,%r14
+	vpaddq	%xmm11,%xmm5,%xmm5
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	80(%rsp),%r9
+	movq	%r10,%r15
+.byte	143,72,120,195,209,7
+	xorq	%r8,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,220,3
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r11,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm4,%xmm10
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	vpaddq	%xmm8,%xmm5,%xmm5
+	movq	%rbx,%r13
+	addq	%r9,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%r9
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%rcx,%r12
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	vpaddq	%xmm11,%xmm5,%xmm5
+	addq	88(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	rorq	$6,%r14
+	vpaddq	32(%rbp),%xmm5,%xmm10
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	rorq	$28,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	vmovdqa	%xmm10,80(%rsp)
+	vpalignr	$8,%xmm6,%xmm7,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%r8
+	vpalignr	$8,%xmm2,%xmm3,%xmm11
+	movq	%rbx,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%r8,%r14
+	vpaddq	%xmm11,%xmm6,%xmm6
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	96(%rsp),%rdx
+	movq	%r8,%r15
+.byte	143,72,120,195,209,7
+	xorq	%rcx,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,221,3
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r9,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm5,%xmm10
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	vpaddq	%xmm8,%xmm6,%xmm6
+	movq	%r11,%r13
+	addq	%rdx,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%rdx
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%rax,%r12
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	vpaddq	%xmm11,%xmm6,%xmm6
+	addq	104(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	rorq	$6,%r14
+	vpaddq	64(%rbp),%xmm6,%xmm10
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	rorq	$28,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	vmovdqa	%xmm10,96(%rsp)
+	vpalignr	$8,%xmm7,%xmm0,%xmm8
+	rorq	$23,%r13
+	movq	%r14,%rcx
+	vpalignr	$8,%xmm3,%xmm4,%xmm11
+	movq	%r11,%r12
+	rorq	$5,%r14
+.byte	143,72,120,195,200,56
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	vpsrlq	$7,%xmm8,%xmm8
+	rorq	$4,%r13
+	xorq	%rcx,%r14
+	vpaddq	%xmm11,%xmm7,%xmm7
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	112(%rsp),%rbx
+	movq	%rcx,%r15
+.byte	143,72,120,195,209,7
+	xorq	%rax,%r12
+	rorq	$6,%r14
+	vpxor	%xmm9,%xmm8,%xmm8
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+.byte	143,104,120,195,222,3
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rdx,%rdi
+	rorq	$28,%r14
+	vpsrlq	$6,%xmm6,%xmm10
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	vpaddq	%xmm8,%xmm7,%xmm7
+	movq	%r9,%r13
+	addq	%rbx,%r14
+.byte	143,72,120,195,203,42
+	rorq	$23,%r13
+	movq	%r14,%rbx
+	vpxor	%xmm10,%xmm11,%xmm11
+	movq	%r10,%r12
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	vpxor	%xmm9,%xmm11,%xmm11
+	rorq	$4,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	vpaddq	%xmm11,%xmm7,%xmm7
+	addq	120(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	rorq	$6,%r14
+	vpaddq	96(%rbp),%xmm7,%xmm10
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	rorq	$28,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	vmovdqa	%xmm10,112(%rsp)
+	cmpb	$0,135(%rbp)
+	jne	L$xop_00_47
+	rorq	$23,%r13
+	movq	%r14,%rax
+	movq	%r9,%r12
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	rorq	$4,%r13
+	xorq	%rax,%r14
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	0(%rsp),%r11
+	movq	%rax,%r15
+	xorq	%r10,%r12
+	rorq	$6,%r14
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	xorq	%rbx,%rdi
+	rorq	$28,%r14
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r14,%r11
+	movq	%r8,%r12
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	rorq	$4,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	addq	8(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	rorq	$6,%r14
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	rorq	$28,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	rorq	$23,%r13
+	movq	%r14,%r10
+	movq	%rdx,%r12
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	rorq	$4,%r13
+	xorq	%r10,%r14
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	16(%rsp),%r9
+	movq	%r10,%r15
+	xorq	%r8,%r12
+	rorq	$6,%r14
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	xorq	%r11,%rdi
+	rorq	$28,%r14
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	rorq	$23,%r13
+	movq	%r14,%r9
+	movq	%rcx,%r12
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	rorq	$4,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	addq	24(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	rorq	$6,%r14
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	rorq	$28,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	rorq	$23,%r13
+	movq	%r14,%r8
+	movq	%rbx,%r12
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	rorq	$4,%r13
+	xorq	%r8,%r14
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	32(%rsp),%rdx
+	movq	%r8,%r15
+	xorq	%rcx,%r12
+	rorq	$6,%r14
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	xorq	%r9,%rdi
+	rorq	$28,%r14
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rdx
+	movq	%rax,%r12
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	rorq	$4,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	addq	40(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	rorq	$6,%r14
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	rorq	$28,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rcx
+	movq	%r11,%r12
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	rorq	$4,%r13
+	xorq	%rcx,%r14
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	48(%rsp),%rbx
+	movq	%rcx,%r15
+	xorq	%rax,%r12
+	rorq	$6,%r14
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	xorq	%rdx,%rdi
+	rorq	$28,%r14
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rbx
+	movq	%r10,%r12
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	rorq	$4,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	addq	56(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	rorq	$6,%r14
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	rorq	$28,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r14,%rax
+	movq	%r9,%r12
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	rorq	$4,%r13
+	xorq	%rax,%r14
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	64(%rsp),%r11
+	movq	%rax,%r15
+	xorq	%r10,%r12
+	rorq	$6,%r14
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	xorq	%rbx,%rdi
+	rorq	$28,%r14
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r14,%r11
+	movq	%r8,%r12
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	rorq	$4,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	addq	72(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	rorq	$6,%r14
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	rorq	$28,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	rorq	$23,%r13
+	movq	%r14,%r10
+	movq	%rdx,%r12
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	rorq	$4,%r13
+	xorq	%r10,%r14
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	80(%rsp),%r9
+	movq	%r10,%r15
+	xorq	%r8,%r12
+	rorq	$6,%r14
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	xorq	%r11,%rdi
+	rorq	$28,%r14
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	rorq	$23,%r13
+	movq	%r14,%r9
+	movq	%rcx,%r12
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	rorq	$4,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	addq	88(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	rorq	$6,%r14
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	rorq	$28,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	rorq	$23,%r13
+	movq	%r14,%r8
+	movq	%rbx,%r12
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	rorq	$4,%r13
+	xorq	%r8,%r14
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	96(%rsp),%rdx
+	movq	%r8,%r15
+	xorq	%rcx,%r12
+	rorq	$6,%r14
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	xorq	%r9,%rdi
+	rorq	$28,%r14
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rdx
+	movq	%rax,%r12
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	rorq	$4,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	addq	104(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	rorq	$6,%r14
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	rorq	$28,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rcx
+	movq	%r11,%r12
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	rorq	$4,%r13
+	xorq	%rcx,%r14
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	112(%rsp),%rbx
+	movq	%rcx,%r15
+	xorq	%rax,%r12
+	rorq	$6,%r14
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	rorq	$14,%r13
+	andq	%r15,%rdi
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	xorq	%rdx,%rdi
+	rorq	$28,%r14
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r14,%rbx
+	movq	%r10,%r12
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	rorq	$4,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	addq	120(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	rorq	$6,%r14
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	rorq	$14,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	rorq	$28,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	movq	128+0(%rsp),%rdi
+	movq	%r14,%rax
+
+	addq	0(%rdi),%rax
+	leaq	128(%rsi),%rsi
+	addq	8(%rdi),%rbx
+	addq	16(%rdi),%rcx
+	addq	24(%rdi),%rdx
+	addq	32(%rdi),%r8
+	addq	40(%rdi),%r9
+	addq	48(%rdi),%r10
+	addq	56(%rdi),%r11
+
+	cmpq	128+16(%rsp),%rsi
+
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+	movq	%r8,32(%rdi)
+	movq	%r9,40(%rdi)
+	movq	%r10,48(%rdi)
+	movq	%r11,56(%rdi)
+	jb	L$loop_xop
+
+	movq	128+24(%rsp),%rsi
+	vzeroupper
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$epilogue_xop:
+	.byte	0xf3,0xc3
+
+
+.p2align	6
+sha512_block_data_order_avx:
+L$avx_shortcut:
+	movq	%rsp,%rax
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	shlq	$4,%rdx
+	subq	$160,%rsp
+	leaq	(%rsi,%rdx,8),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,128+0(%rsp)
+	movq	%rsi,128+8(%rsp)
+	movq	%rdx,128+16(%rsp)
+	movq	%rax,128+24(%rsp)
+L$prologue_avx:
+
+	vzeroupper
+	movq	0(%rdi),%rax
+	movq	8(%rdi),%rbx
+	movq	16(%rdi),%rcx
+	movq	24(%rdi),%rdx
+	movq	32(%rdi),%r8
+	movq	40(%rdi),%r9
+	movq	48(%rdi),%r10
+	movq	56(%rdi),%r11
+	jmp	L$loop_avx
+.p2align	4
+L$loop_avx:
+	vmovdqa	K512+1280(%rip),%xmm11
+	vmovdqu	0(%rsi),%xmm0
+	leaq	K512+128(%rip),%rbp
+	vmovdqu	16(%rsi),%xmm1
+	vmovdqu	32(%rsi),%xmm2
+	vpshufb	%xmm11,%xmm0,%xmm0
+	vmovdqu	48(%rsi),%xmm3
+	vpshufb	%xmm11,%xmm1,%xmm1
+	vmovdqu	64(%rsi),%xmm4
+	vpshufb	%xmm11,%xmm2,%xmm2
+	vmovdqu	80(%rsi),%xmm5
+	vpshufb	%xmm11,%xmm3,%xmm3
+	vmovdqu	96(%rsi),%xmm6
+	vpshufb	%xmm11,%xmm4,%xmm4
+	vmovdqu	112(%rsi),%xmm7
+	vpshufb	%xmm11,%xmm5,%xmm5
+	vpaddq	-128(%rbp),%xmm0,%xmm8
+	vpshufb	%xmm11,%xmm6,%xmm6
+	vpaddq	-96(%rbp),%xmm1,%xmm9
+	vpshufb	%xmm11,%xmm7,%xmm7
+	vpaddq	-64(%rbp),%xmm2,%xmm10
+	vpaddq	-32(%rbp),%xmm3,%xmm11
+	vmovdqa	%xmm8,0(%rsp)
+	vpaddq	0(%rbp),%xmm4,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	vpaddq	32(%rbp),%xmm5,%xmm9
+	vmovdqa	%xmm10,32(%rsp)
+	vpaddq	64(%rbp),%xmm6,%xmm10
+	vmovdqa	%xmm11,48(%rsp)
+	vpaddq	96(%rbp),%xmm7,%xmm11
+	vmovdqa	%xmm8,64(%rsp)
+	movq	%rax,%r14
+	vmovdqa	%xmm9,80(%rsp)
+	movq	%rbx,%rdi
+	vmovdqa	%xmm10,96(%rsp)
+	xorq	%rcx,%rdi
+	vmovdqa	%xmm11,112(%rsp)
+	movq	%r8,%r13
+	jmp	L$avx_00_47
+
+.p2align	4
+L$avx_00_47:
+	addq	$256,%rbp
+	vpalignr	$8,%xmm0,%xmm1,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rax
+	vpalignr	$8,%xmm4,%xmm5,%xmm11
+	movq	%r9,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	vpaddq	%xmm11,%xmm0,%xmm0
+	shrdq	$4,%r13,%r13
+	xorq	%rax,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	0(%rsp),%r11
+	movq	%rax,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%r10,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rbx,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm7,%xmm11
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	vpsllq	$3,%xmm7,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r11
+	vpaddq	%xmm8,%xmm0,%xmm0
+	movq	%r8,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm7,%xmm9
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%r11,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	8(%rsp),%r10
+	movq	%r11,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%r9,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm0,%xmm0
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	vpaddq	-128(%rbp),%xmm0,%xmm10
+	xorq	%rax,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	vmovdqa	%xmm10,0(%rsp)
+	vpalignr	$8,%xmm1,%xmm2,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r10
+	vpalignr	$8,%xmm5,%xmm6,%xmm11
+	movq	%rdx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	vpaddq	%xmm11,%xmm1,%xmm1
+	shrdq	$4,%r13,%r13
+	xorq	%r10,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	16(%rsp),%r9
+	movq	%r10,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%r8,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r11,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm0,%xmm11
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	vpsllq	$3,%xmm0,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r9
+	vpaddq	%xmm8,%xmm1,%xmm1
+	movq	%rcx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm0,%xmm9
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%r9,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	24(%rsp),%r8
+	movq	%r9,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%rdx,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm1,%xmm1
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	vpaddq	-96(%rbp),%xmm1,%xmm10
+	xorq	%r10,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	vmovdqa	%xmm10,16(%rsp)
+	vpalignr	$8,%xmm2,%xmm3,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r8
+	vpalignr	$8,%xmm6,%xmm7,%xmm11
+	movq	%rbx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	vpaddq	%xmm11,%xmm2,%xmm2
+	shrdq	$4,%r13,%r13
+	xorq	%r8,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	32(%rsp),%rdx
+	movq	%r8,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%rcx,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r9,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm1,%xmm11
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	vpsllq	$3,%xmm1,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rdx
+	vpaddq	%xmm8,%xmm2,%xmm2
+	movq	%rax,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm1,%xmm9
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%rdx,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	40(%rsp),%rcx
+	movq	%rdx,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%rbx,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm2,%xmm2
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	vpaddq	-64(%rbp),%xmm2,%xmm10
+	xorq	%r8,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	vmovdqa	%xmm10,32(%rsp)
+	vpalignr	$8,%xmm3,%xmm4,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rcx
+	vpalignr	$8,%xmm7,%xmm0,%xmm11
+	movq	%r11,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	vpaddq	%xmm11,%xmm3,%xmm3
+	shrdq	$4,%r13,%r13
+	xorq	%rcx,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	48(%rsp),%rbx
+	movq	%rcx,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%rax,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rdx,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm2,%xmm11
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	vpsllq	$3,%xmm2,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rbx
+	vpaddq	%xmm8,%xmm3,%xmm3
+	movq	%r10,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm2,%xmm9
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%rbx,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	56(%rsp),%rax
+	movq	%rbx,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%r11,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm3,%xmm3
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	vpaddq	-32(%rbp),%xmm3,%xmm10
+	xorq	%rcx,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	vmovdqa	%xmm10,48(%rsp)
+	vpalignr	$8,%xmm4,%xmm5,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rax
+	vpalignr	$8,%xmm0,%xmm1,%xmm11
+	movq	%r9,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	vpaddq	%xmm11,%xmm4,%xmm4
+	shrdq	$4,%r13,%r13
+	xorq	%rax,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	64(%rsp),%r11
+	movq	%rax,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%r10,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rbx,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm3,%xmm11
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	vpsllq	$3,%xmm3,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r11
+	vpaddq	%xmm8,%xmm4,%xmm4
+	movq	%r8,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm3,%xmm9
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%r11,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	72(%rsp),%r10
+	movq	%r11,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%r9,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm4,%xmm4
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	vpaddq	0(%rbp),%xmm4,%xmm10
+	xorq	%rax,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	vmovdqa	%xmm10,64(%rsp)
+	vpalignr	$8,%xmm5,%xmm6,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r10
+	vpalignr	$8,%xmm1,%xmm2,%xmm11
+	movq	%rdx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	vpaddq	%xmm11,%xmm5,%xmm5
+	shrdq	$4,%r13,%r13
+	xorq	%r10,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	80(%rsp),%r9
+	movq	%r10,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%r8,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r11,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm4,%xmm11
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	vpsllq	$3,%xmm4,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r9
+	vpaddq	%xmm8,%xmm5,%xmm5
+	movq	%rcx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm4,%xmm9
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%r9,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	88(%rsp),%r8
+	movq	%r9,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%rdx,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm5,%xmm5
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	vpaddq	32(%rbp),%xmm5,%xmm10
+	xorq	%r10,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	vmovdqa	%xmm10,80(%rsp)
+	vpalignr	$8,%xmm6,%xmm7,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r8
+	vpalignr	$8,%xmm2,%xmm3,%xmm11
+	movq	%rbx,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	vpaddq	%xmm11,%xmm6,%xmm6
+	shrdq	$4,%r13,%r13
+	xorq	%r8,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	96(%rsp),%rdx
+	movq	%r8,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%rcx,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%r9,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm5,%xmm11
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	vpsllq	$3,%xmm5,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rdx
+	vpaddq	%xmm8,%xmm6,%xmm6
+	movq	%rax,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm5,%xmm9
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%rdx,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	104(%rsp),%rcx
+	movq	%rdx,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%rbx,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm6,%xmm6
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	vpaddq	64(%rbp),%xmm6,%xmm10
+	xorq	%r8,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	vmovdqa	%xmm10,96(%rsp)
+	vpalignr	$8,%xmm7,%xmm0,%xmm8
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rcx
+	vpalignr	$8,%xmm3,%xmm4,%xmm11
+	movq	%r11,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$1,%xmm8,%xmm10
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	vpaddq	%xmm11,%xmm7,%xmm7
+	shrdq	$4,%r13,%r13
+	xorq	%rcx,%r14
+	vpsrlq	$7,%xmm8,%xmm11
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	vpsllq	$56,%xmm8,%xmm9
+	addq	112(%rsp),%rbx
+	movq	%rcx,%r15
+	vpxor	%xmm10,%xmm11,%xmm8
+	xorq	%rax,%r12
+	shrdq	$6,%r14,%r14
+	vpsrlq	$7,%xmm10,%xmm10
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	vpxor	%xmm9,%xmm8,%xmm8
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	vpsllq	$7,%xmm9,%xmm9
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	vpxor	%xmm10,%xmm8,%xmm8
+	xorq	%rdx,%rdi
+	shrdq	$28,%r14,%r14
+	vpsrlq	$6,%xmm6,%xmm11
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	vpxor	%xmm9,%xmm8,%xmm8
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	vpsllq	$3,%xmm6,%xmm10
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rbx
+	vpaddq	%xmm8,%xmm7,%xmm7
+	movq	%r10,%r12
+	shrdq	$5,%r14,%r14
+	vpsrlq	$19,%xmm6,%xmm9
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	vpxor	%xmm10,%xmm11,%xmm11
+	shrdq	$4,%r13,%r13
+	xorq	%rbx,%r14
+	vpsllq	$42,%xmm10,%xmm10
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	vpxor	%xmm9,%xmm11,%xmm11
+	addq	120(%rsp),%rax
+	movq	%rbx,%rdi
+	vpsrlq	$42,%xmm9,%xmm9
+	xorq	%r11,%r12
+	shrdq	$6,%r14,%r14
+	vpxor	%xmm10,%xmm11,%xmm11
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	vpxor	%xmm9,%xmm11,%xmm11
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	vpaddq	%xmm11,%xmm7,%xmm7
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	vpaddq	96(%rbp),%xmm7,%xmm10
+	xorq	%rcx,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	vmovdqa	%xmm10,112(%rsp)
+	cmpb	$0,135(%rbp)
+	jne	L$avx_00_47
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rax
+	movq	%r9,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rax,%r14
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	0(%rsp),%r11
+	movq	%rax,%r15
+	xorq	%r10,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	xorq	%rbx,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r11
+	movq	%r8,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	addq	8(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r10
+	movq	%rdx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r10,%r14
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	16(%rsp),%r9
+	movq	%r10,%r15
+	xorq	%r8,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	xorq	%r11,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r9
+	movq	%rcx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	addq	24(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r8
+	movq	%rbx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r8,%r14
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	32(%rsp),%rdx
+	movq	%r8,%r15
+	xorq	%rcx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	xorq	%r9,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rdx
+	movq	%rax,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	addq	40(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rcx
+	movq	%r11,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rcx,%r14
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	48(%rsp),%rbx
+	movq	%rcx,%r15
+	xorq	%rax,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	xorq	%rdx,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rbx
+	movq	%r10,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	addq	56(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rax
+	movq	%r9,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rax,%r14
+	andq	%r8,%r12
+	xorq	%r8,%r13
+	addq	64(%rsp),%r11
+	movq	%rax,%r15
+	xorq	%r10,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rbx,%r15
+	addq	%r12,%r11
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%rax,%r14
+	addq	%r13,%r11
+	xorq	%rbx,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%r11,%rdx
+	addq	%rdi,%r11
+	movq	%rdx,%r13
+	addq	%r11,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r11
+	movq	%r8,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r11,%r14
+	andq	%rdx,%r12
+	xorq	%rdx,%r13
+	addq	72(%rsp),%r10
+	movq	%r11,%rdi
+	xorq	%r9,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rax,%rdi
+	addq	%r12,%r10
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%r11,%r14
+	addq	%r13,%r10
+	xorq	%rax,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r10,%rcx
+	addq	%r15,%r10
+	movq	%rcx,%r13
+	addq	%r10,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r10
+	movq	%rdx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r10,%r14
+	andq	%rcx,%r12
+	xorq	%rcx,%r13
+	addq	80(%rsp),%r9
+	movq	%r10,%r15
+	xorq	%r8,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r11,%r15
+	addq	%r12,%r9
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%r10,%r14
+	addq	%r13,%r9
+	xorq	%r11,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%r9,%rbx
+	addq	%rdi,%r9
+	movq	%rbx,%r13
+	addq	%r9,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r9
+	movq	%rcx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r9,%r14
+	andq	%rbx,%r12
+	xorq	%rbx,%r13
+	addq	88(%rsp),%r8
+	movq	%r9,%rdi
+	xorq	%rdx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r10,%rdi
+	addq	%r12,%r8
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%r9,%r14
+	addq	%r13,%r8
+	xorq	%r10,%r15
+	shrdq	$28,%r14,%r14
+	addq	%r8,%rax
+	addq	%r15,%r8
+	movq	%rax,%r13
+	addq	%r8,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%r8
+	movq	%rbx,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%r8,%r14
+	andq	%rax,%r12
+	xorq	%rax,%r13
+	addq	96(%rsp),%rdx
+	movq	%r8,%r15
+	xorq	%rcx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r9,%r15
+	addq	%r12,%rdx
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%r8,%r14
+	addq	%r13,%rdx
+	xorq	%r9,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%rdx,%r11
+	addq	%rdi,%rdx
+	movq	%r11,%r13
+	addq	%rdx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rdx
+	movq	%rax,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rdx,%r14
+	andq	%r11,%r12
+	xorq	%r11,%r13
+	addq	104(%rsp),%rcx
+	movq	%rdx,%rdi
+	xorq	%rbx,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%r8,%rdi
+	addq	%r12,%rcx
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%rdx,%r14
+	addq	%r13,%rcx
+	xorq	%r8,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rcx,%r10
+	addq	%r15,%rcx
+	movq	%r10,%r13
+	addq	%rcx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rcx
+	movq	%r11,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rcx,%r14
+	andq	%r10,%r12
+	xorq	%r10,%r13
+	addq	112(%rsp),%rbx
+	movq	%rcx,%r15
+	xorq	%rax,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rdx,%r15
+	addq	%r12,%rbx
+	shrdq	$14,%r13,%r13
+	andq	%r15,%rdi
+	xorq	%rcx,%r14
+	addq	%r13,%rbx
+	xorq	%rdx,%rdi
+	shrdq	$28,%r14,%r14
+	addq	%rbx,%r9
+	addq	%rdi,%rbx
+	movq	%r9,%r13
+	addq	%rbx,%r14
+	shrdq	$23,%r13,%r13
+	movq	%r14,%rbx
+	movq	%r10,%r12
+	shrdq	$5,%r14,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r12
+	shrdq	$4,%r13,%r13
+	xorq	%rbx,%r14
+	andq	%r9,%r12
+	xorq	%r9,%r13
+	addq	120(%rsp),%rax
+	movq	%rbx,%rdi
+	xorq	%r11,%r12
+	shrdq	$6,%r14,%r14
+	xorq	%rcx,%rdi
+	addq	%r12,%rax
+	shrdq	$14,%r13,%r13
+	andq	%rdi,%r15
+	xorq	%rbx,%r14
+	addq	%r13,%rax
+	xorq	%rcx,%r15
+	shrdq	$28,%r14,%r14
+	addq	%rax,%r8
+	addq	%r15,%rax
+	movq	%r8,%r13
+	addq	%rax,%r14
+	movq	128+0(%rsp),%rdi
+	movq	%r14,%rax
+
+	addq	0(%rdi),%rax
+	leaq	128(%rsi),%rsi
+	addq	8(%rdi),%rbx
+	addq	16(%rdi),%rcx
+	addq	24(%rdi),%rdx
+	addq	32(%rdi),%r8
+	addq	40(%rdi),%r9
+	addq	48(%rdi),%r10
+	addq	56(%rdi),%r11
+
+	cmpq	128+16(%rsp),%rsi
+
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+	movq	%r8,32(%rdi)
+	movq	%r9,40(%rdi)
+	movq	%r10,48(%rdi)
+	movq	%r11,56(%rdi)
+	jb	L$loop_avx
+
+	movq	128+24(%rsp),%rsi
+	vzeroupper
+	movq	-48(%rsi),%r15
+	movq	-40(%rsi),%r14
+	movq	-32(%rsi),%r13
+	movq	-24(%rsi),%r12
+	movq	-16(%rsi),%rbp
+	movq	-8(%rsi),%rbx
+	leaq	(%rsi),%rsp
+L$epilogue_avx:
+	.byte	0xf3,0xc3
+
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
new file mode 100644
index 0000000..867df68
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
@@ -0,0 +1,834 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align	4
+_vpaes_encrypt_core:
+	movq	%rdx,%r9
+	movq	$16,%r11
+	movl	240(%rdx),%eax
+	movdqa	%xmm9,%xmm1
+	movdqa	L$k_ipt(%rip),%xmm2
+	pandn	%xmm0,%xmm1
+	movdqu	(%r9),%xmm5
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm0
+.byte	102,15,56,0,208
+	movdqa	L$k_ipt+16(%rip),%xmm0
+.byte	102,15,56,0,193
+	pxor	%xmm5,%xmm2
+	addq	$16,%r9
+	pxor	%xmm2,%xmm0
+	leaq	L$k_mc_backward(%rip),%r10
+	jmp	L$enc_entry
+
+.p2align	4
+L$enc_loop:
+
+	movdqa	%xmm13,%xmm4
+	movdqa	%xmm12,%xmm0
+.byte	102,15,56,0,226
+.byte	102,15,56,0,195
+	pxor	%xmm5,%xmm4
+	movdqa	%xmm15,%xmm5
+	pxor	%xmm4,%xmm0
+	movdqa	-64(%r11,%r10,1),%xmm1
+.byte	102,15,56,0,234
+	movdqa	(%r11,%r10,1),%xmm4
+	movdqa	%xmm14,%xmm2
+.byte	102,15,56,0,211
+	movdqa	%xmm0,%xmm3
+	pxor	%xmm5,%xmm2
+.byte	102,15,56,0,193
+	addq	$16,%r9
+	pxor	%xmm2,%xmm0
+.byte	102,15,56,0,220
+	addq	$16,%r11
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,193
+	andq	$0x30,%r11
+	subq	$1,%rax
+	pxor	%xmm3,%xmm0
+
+L$enc_entry:
+
+	movdqa	%xmm9,%xmm1
+	movdqa	%xmm11,%xmm5
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm0
+.byte	102,15,56,0,232
+	movdqa	%xmm10,%xmm3
+	pxor	%xmm1,%xmm0
+.byte	102,15,56,0,217
+	movdqa	%xmm10,%xmm4
+	pxor	%xmm5,%xmm3
+.byte	102,15,56,0,224
+	movdqa	%xmm10,%xmm2
+	pxor	%xmm5,%xmm4
+.byte	102,15,56,0,211
+	movdqa	%xmm10,%xmm3
+	pxor	%xmm0,%xmm2
+.byte	102,15,56,0,220
+	movdqu	(%r9),%xmm5
+	pxor	%xmm1,%xmm3
+	jnz	L$enc_loop
+
+
+	movdqa	-96(%r10),%xmm4
+	movdqa	-80(%r10),%xmm0
+.byte	102,15,56,0,226
+	pxor	%xmm5,%xmm4
+.byte	102,15,56,0,195
+	movdqa	64(%r11,%r10,1),%xmm1
+	pxor	%xmm4,%xmm0
+.byte	102,15,56,0,193
+	.byte	0xf3,0xc3
+
+
+
+
+
+
+
+
+.p2align	4
+_vpaes_decrypt_core:
+	movq	%rdx,%r9
+	movl	240(%rdx),%eax
+	movdqa	%xmm9,%xmm1
+	movdqa	L$k_dipt(%rip),%xmm2
+	pandn	%xmm0,%xmm1
+	movq	%rax,%r11
+	psrld	$4,%xmm1
+	movdqu	(%r9),%xmm5
+	shlq	$4,%r11
+	pand	%xmm9,%xmm0
+.byte	102,15,56,0,208
+	movdqa	L$k_dipt+16(%rip),%xmm0
+	xorq	$0x30,%r11
+	leaq	L$k_dsbd(%rip),%r10
+.byte	102,15,56,0,193
+	andq	$0x30,%r11
+	pxor	%xmm5,%xmm2
+	movdqa	L$k_mc_forward+48(%rip),%xmm5
+	pxor	%xmm2,%xmm0
+	addq	$16,%r9
+	addq	%r10,%r11
+	jmp	L$dec_entry
+
+.p2align	4
+L$dec_loop:
+
+
+
+	movdqa	-32(%r10),%xmm4
+	movdqa	-16(%r10),%xmm1
+.byte	102,15,56,0,226
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	movdqa	0(%r10),%xmm4
+	pxor	%xmm1,%xmm0
+	movdqa	16(%r10),%xmm1
+
+.byte	102,15,56,0,226
+.byte	102,15,56,0,197
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	movdqa	32(%r10),%xmm4
+	pxor	%xmm1,%xmm0
+	movdqa	48(%r10),%xmm1
+
+.byte	102,15,56,0,226
+.byte	102,15,56,0,197
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	movdqa	64(%r10),%xmm4
+	pxor	%xmm1,%xmm0
+	movdqa	80(%r10),%xmm1
+
+.byte	102,15,56,0,226
+.byte	102,15,56,0,197
+.byte	102,15,56,0,203
+	pxor	%xmm4,%xmm0
+	addq	$16,%r9
+.byte	102,15,58,15,237,12
+	pxor	%xmm1,%xmm0
+	subq	$1,%rax
+
+L$dec_entry:
+
+	movdqa	%xmm9,%xmm1
+	pandn	%xmm0,%xmm1
+	movdqa	%xmm11,%xmm2
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm0
+.byte	102,15,56,0,208
+	movdqa	%xmm10,%xmm3
+	pxor	%xmm1,%xmm0
+.byte	102,15,56,0,217
+	movdqa	%xmm10,%xmm4
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,224
+	pxor	%xmm2,%xmm4
+	movdqa	%xmm10,%xmm2
+.byte	102,15,56,0,211
+	movdqa	%xmm10,%xmm3
+	pxor	%xmm0,%xmm2
+.byte	102,15,56,0,220
+	movdqu	(%r9),%xmm0
+	pxor	%xmm1,%xmm3
+	jnz	L$dec_loop
+
+
+	movdqa	96(%r10),%xmm4
+.byte	102,15,56,0,226
+	pxor	%xmm0,%xmm4
+	movdqa	112(%r10),%xmm0
+	movdqa	-352(%r11),%xmm2
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+.byte	102,15,56,0,194
+	.byte	0xf3,0xc3
+
+
+
+
+
+
+
+
+.p2align	4
+_vpaes_schedule_core:
+
+
+
+
+
+	call	_vpaes_preheat
+	movdqa	L$k_rcon(%rip),%xmm8
+	movdqu	(%rdi),%xmm0
+
+
+	movdqa	%xmm0,%xmm3
+	leaq	L$k_ipt(%rip),%r11
+	call	_vpaes_schedule_transform
+	movdqa	%xmm0,%xmm7
+
+	leaq	L$k_sr(%rip),%r10
+	testq	%rcx,%rcx
+	jnz	L$schedule_am_decrypting
+
+
+	movdqu	%xmm0,(%rdx)
+	jmp	L$schedule_go
+
+L$schedule_am_decrypting:
+
+	movdqa	(%r8,%r10,1),%xmm1
+.byte	102,15,56,0,217
+	movdqu	%xmm3,(%rdx)
+	xorq	$0x30,%r8
+
+L$schedule_go:
+	cmpl	$192,%esi
+	ja	L$schedule_256
+	je	L$schedule_192
+
+
+
+
+
+
+
+
+
+
+L$schedule_128:
+	movl	$10,%esi
+
+L$oop_schedule_128:
+	call	_vpaes_schedule_round
+	decq	%rsi
+	jz	L$schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	jmp	L$oop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align	4
+L$schedule_192:
+	movdqu	8(%rdi),%xmm0
+	call	_vpaes_schedule_transform
+	movdqa	%xmm0,%xmm6
+	pxor	%xmm4,%xmm4
+	movhlps	%xmm4,%xmm6
+	movl	$4,%esi
+
+L$oop_schedule_192:
+	call	_vpaes_schedule_round
+.byte	102,15,58,15,198,8
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_round
+	decq	%rsi
+	jz	L$schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	jmp	L$oop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+.p2align	4
+L$schedule_256:
+	movdqu	16(%rdi),%xmm0
+	call	_vpaes_schedule_transform
+	movl	$7,%esi
+
+L$oop_schedule_256:
+	call	_vpaes_schedule_mangle
+	movdqa	%xmm0,%xmm6
+
+
+	call	_vpaes_schedule_round
+	decq	%rsi
+	jz	L$schedule_mangle_last
+	call	_vpaes_schedule_mangle
+
+
+	pshufd	$0xFF,%xmm0,%xmm0
+	movdqa	%xmm7,%xmm5
+	movdqa	%xmm6,%xmm7
+	call	_vpaes_schedule_low_round
+	movdqa	%xmm5,%xmm7
+
+	jmp	L$oop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align	4
+L$schedule_mangle_last:
+
+	leaq	L$k_deskew(%rip),%r11
+	testq	%rcx,%rcx
+	jnz	L$schedule_mangle_last_dec
+
+
+	movdqa	(%r8,%r10,1),%xmm1
+.byte	102,15,56,0,193
+	leaq	L$k_opt(%rip),%r11
+	addq	$32,%rdx
+
+L$schedule_mangle_last_dec:
+	addq	$-16,%rdx
+	pxor	L$k_s63(%rip),%xmm0
+	call	_vpaes_schedule_transform
+	movdqu	%xmm0,(%rdx)
+
+
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	.byte	0xf3,0xc3
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align	4
+_vpaes_schedule_192_smear:
+	pshufd	$0x80,%xmm6,%xmm1
+	pshufd	$0xFE,%xmm7,%xmm0
+	pxor	%xmm1,%xmm6
+	pxor	%xmm1,%xmm1
+	pxor	%xmm0,%xmm6
+	movdqa	%xmm6,%xmm0
+	movhlps	%xmm1,%xmm6
+	.byte	0xf3,0xc3
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align	4
+_vpaes_schedule_round:
+
+	pxor	%xmm1,%xmm1
+.byte	102,65,15,58,15,200,15
+.byte	102,69,15,58,15,192,15
+	pxor	%xmm1,%xmm7
+
+
+	pshufd	$0xFF,%xmm0,%xmm0
+.byte	102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round:
+
+	movdqa	%xmm7,%xmm1
+	pslldq	$4,%xmm7
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm7,%xmm1
+	pslldq	$8,%xmm7
+	pxor	%xmm1,%xmm7
+	pxor	L$k_s63(%rip),%xmm7
+
+
+	movdqa	%xmm9,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm0
+	movdqa	%xmm11,%xmm2
+.byte	102,15,56,0,208
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm10,%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+	movdqa	%xmm10,%xmm4
+.byte	102,15,56,0,224
+	pxor	%xmm2,%xmm4
+	movdqa	%xmm10,%xmm2
+.byte	102,15,56,0,211
+	pxor	%xmm0,%xmm2
+	movdqa	%xmm10,%xmm3
+.byte	102,15,56,0,220
+	pxor	%xmm1,%xmm3
+	movdqa	%xmm13,%xmm4
+.byte	102,15,56,0,226
+	movdqa	%xmm12,%xmm0
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+
+
+	pxor	%xmm7,%xmm0
+	movdqa	%xmm0,%xmm7
+	.byte	0xf3,0xc3
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align	4
+_vpaes_schedule_transform:
+	movdqa	%xmm9,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm0
+	movdqa	(%r11),%xmm2
+.byte	102,15,56,0,208
+	movdqa	16(%r11),%xmm0
+.byte	102,15,56,0,193
+	pxor	%xmm2,%xmm0
+	.byte	0xf3,0xc3
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align	4
+_vpaes_schedule_mangle:
+	movdqa	%xmm0,%xmm4
+	movdqa	L$k_mc_forward(%rip),%xmm5
+	testq	%rcx,%rcx
+	jnz	L$schedule_mangle_dec
+
+
+	addq	$16,%rdx
+	pxor	L$k_s63(%rip),%xmm4
+.byte	102,15,56,0,229
+	movdqa	%xmm4,%xmm3
+.byte	102,15,56,0,229
+	pxor	%xmm4,%xmm3
+.byte	102,15,56,0,229
+	pxor	%xmm4,%xmm3
+
+	jmp	L$schedule_mangle_both
+.p2align	4
+L$schedule_mangle_dec:
+
+	leaq	L$k_dksd(%rip),%r11
+	movdqa	%xmm9,%xmm1
+	pandn	%xmm4,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm9,%xmm4
+
+	movdqa	0(%r11),%xmm2
+.byte	102,15,56,0,212
+	movdqa	16(%r11),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+
+	movdqa	32(%r11),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	48(%r11),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+
+	movdqa	64(%r11),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	80(%r11),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+
+	movdqa	96(%r11),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	112(%r11),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+
+	addq	$-16,%rdx
+
+L$schedule_mangle_both:
+	movdqa	(%r8,%r10,1),%xmm1
+.byte	102,15,56,0,217
+	addq	$-16,%r8
+	andq	$0x30,%r8
+	movdqu	%xmm3,(%rdx)
+	.byte	0xf3,0xc3
+
+
+
+
+
+.globl	_vpaes_set_encrypt_key
+.private_extern _vpaes_set_encrypt_key
+
+.p2align	4
+_vpaes_set_encrypt_key:
+	movl	%esi,%eax
+	shrl	$5,%eax
+	addl	$5,%eax
+	movl	%eax,240(%rdx)
+
+	movl	$0,%ecx
+	movl	$0x30,%r8d
+	call	_vpaes_schedule_core
+	xorl	%eax,%eax
+	.byte	0xf3,0xc3
+
+
+.globl	_vpaes_set_decrypt_key
+.private_extern _vpaes_set_decrypt_key
+
+.p2align	4
+_vpaes_set_decrypt_key:
+	movl	%esi,%eax
+	shrl	$5,%eax
+	addl	$5,%eax
+	movl	%eax,240(%rdx)
+	shll	$4,%eax
+	leaq	16(%rdx,%rax,1),%rdx
+
+	movl	$1,%ecx
+	movl	%esi,%r8d
+	shrl	$1,%r8d
+	andl	$32,%r8d
+	xorl	$32,%r8d
+	call	_vpaes_schedule_core
+	xorl	%eax,%eax
+	.byte	0xf3,0xc3
+
+
+.globl	_vpaes_encrypt
+.private_extern _vpaes_encrypt
+
+.p2align	4
+_vpaes_encrypt:
+	movdqu	(%rdi),%xmm0
+	call	_vpaes_preheat
+	call	_vpaes_encrypt_core
+	movdqu	%xmm0,(%rsi)
+	.byte	0xf3,0xc3
+
+
+.globl	_vpaes_decrypt
+.private_extern _vpaes_decrypt
+
+.p2align	4
+_vpaes_decrypt:
+	movdqu	(%rdi),%xmm0
+	call	_vpaes_preheat
+	call	_vpaes_decrypt_core
+	movdqu	%xmm0,(%rsi)
+	.byte	0xf3,0xc3
+
+.globl	_vpaes_cbc_encrypt
+.private_extern _vpaes_cbc_encrypt
+
+.p2align	4
+_vpaes_cbc_encrypt:
+	xchgq	%rcx,%rdx
+	subq	$16,%rcx
+	jc	L$cbc_abort
+	movdqu	(%r8),%xmm6
+	subq	%rdi,%rsi
+	call	_vpaes_preheat
+	cmpl	$0,%r9d
+	je	L$cbc_dec_loop
+	jmp	L$cbc_enc_loop
+.p2align	4
+L$cbc_enc_loop:
+	movdqu	(%rdi),%xmm0
+	pxor	%xmm6,%xmm0
+	call	_vpaes_encrypt_core
+	movdqa	%xmm0,%xmm6
+	movdqu	%xmm0,(%rsi,%rdi,1)
+	leaq	16(%rdi),%rdi
+	subq	$16,%rcx
+	jnc	L$cbc_enc_loop
+	jmp	L$cbc_done
+.p2align	4
+L$cbc_dec_loop:
+	movdqu	(%rdi),%xmm0
+	movdqa	%xmm0,%xmm7
+	call	_vpaes_decrypt_core
+	pxor	%xmm6,%xmm0
+	movdqa	%xmm7,%xmm6
+	movdqu	%xmm0,(%rsi,%rdi,1)
+	leaq	16(%rdi),%rdi
+	subq	$16,%rcx
+	jnc	L$cbc_dec_loop
+L$cbc_done:
+	movdqu	%xmm6,(%r8)
+L$cbc_abort:
+	.byte	0xf3,0xc3
+
+
+
+
+
+
+
+
+.p2align	4
+_vpaes_preheat:
+	leaq	L$k_s0F(%rip),%r10
+	movdqa	-32(%r10),%xmm10
+	movdqa	-16(%r10),%xmm11
+	movdqa	0(%r10),%xmm9
+	movdqa	48(%r10),%xmm13
+	movdqa	64(%r10),%xmm12
+	movdqa	80(%r10),%xmm15
+	movdqa	96(%r10),%xmm14
+	.byte	0xf3,0xc3
+
+
+
+
+
+
+
+.p2align	6
+_vpaes_consts:
+L$k_inv:
+.quad	0x0E05060F0D080180, 0x040703090A0B0C02
+.quad	0x01040A060F0B0780, 0x030D0E0C02050809
+
+L$k_s0F:
+.quad	0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+
+L$k_ipt:
+.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+
+L$k_sb1:
+.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+L$k_sb2:
+.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
+L$k_sbo:
+.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+
+L$k_mc_forward:
+.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad	0x080B0A0904070605, 0x000302010C0F0E0D
+.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad	0x000302010C0F0E0D, 0x080B0A0904070605
+
+L$k_mc_backward:
+.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad	0x020100030E0D0C0F, 0x0A09080B06050407
+.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad	0x0A09080B06050407, 0x020100030E0D0C0F
+
+L$k_sr:
+.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad	0x030E09040F0A0500, 0x0B06010C07020D08
+.quad	0x0F060D040B020900, 0x070E050C030A0108
+.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
+
+L$k_rcon:
+.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+
+L$k_s63:
+.quad	0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+
+L$k_opt:
+.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+
+L$k_deskew:
+.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+
+
+
+
+
+L$k_dksd:
+.quad	0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad	0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+L$k_dksb:
+.quad	0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad	0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+L$k_dkse:
+.quad	0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad	0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+L$k_dks9:
+.quad	0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad	0x4576516227143300, 0x8BB89FACE9DAFDCE
+
+
+
+
+
+L$k_dipt:
+.quad	0x0F505B040B545F00, 0x154A411E114E451A
+.quad	0x86E383E660056500, 0x12771772F491F194
+
+L$k_dsb9:
+.quad	0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad	0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+L$k_dsbd:
+.quad	0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad	0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+L$k_dsbb:
+.quad	0xD022649296B44200, 0x602646F6B0F2D404
+.quad	0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+L$k_dsbe:
+.quad	0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad	0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+L$k_dsbo:
+.quad	0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad	0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.p2align	6
+
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/x86_64-mont.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/x86_64-mont.S
new file mode 100644
index 0000000..4904417
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/x86_64-mont.S
@@ -0,0 +1,864 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+
+.globl	_bn_mul_mont
+.private_extern _bn_mul_mont
+
+.p2align	4
+_bn_mul_mont:
+
+	movl	%r9d,%r9d
+	movq	%rsp,%rax
+
+	testl	$3,%r9d
+	jnz	L$mul_enter
+	cmpl	$8,%r9d
+	jb	L$mul_enter
+	cmpq	%rsi,%rdx
+	jne	L$mul4x_enter
+	testl	$7,%r9d
+	jz	L$sqr8x_enter
+	jmp	L$mul4x_enter
+
+.p2align	4
+L$mul_enter:
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+
+	negq	%r9
+	movq	%rsp,%r11
+	leaq	-16(%rsp,%r9,8),%r10
+	negq	%r9
+	andq	$-1024,%r10
+
+
+
+
+
+
+
+
+
+	subq	%r10,%r11
+	andq	$-4096,%r11
+	leaq	(%r10,%r11,1),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	L$mul_page_walk
+	jmp	L$mul_page_walk_done
+
+.p2align	4
+L$mul_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	L$mul_page_walk
+L$mul_page_walk_done:
+
+	movq	%rax,8(%rsp,%r9,8)
+
+L$mul_body:
+	movq	%rdx,%r12
+	movq	(%r8),%r8
+	movq	(%r12),%rbx
+	movq	(%rsi),%rax
+
+	xorq	%r14,%r14
+	xorq	%r15,%r15
+
+	movq	%r8,%rbp
+	mulq	%rbx
+	movq	%rax,%r10
+	movq	(%rcx),%rax
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r13
+
+	leaq	1(%r15),%r15
+	jmp	L$1st_enter
+
+.p2align	4
+L$1st:
+	addq	%rax,%r13
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	movq	%r10,%r11
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+L$1st_enter:
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	leaq	1(%r15),%r15
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	cmpq	%r9,%r15
+	jne	L$1st
+
+	addq	%rax,%r13
+	movq	(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+	movq	%r10,%r11
+
+	xorq	%rdx,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r9,8)
+	movq	%rdx,(%rsp,%r9,8)
+
+	leaq	1(%r14),%r14
+	jmp	L$outer
+.p2align	4
+L$outer:
+	movq	(%r12,%r14,8),%rbx
+	xorq	%r15,%r15
+	movq	%r8,%rbp
+	movq	(%rsp),%r10
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx),%rax
+	adcq	$0,%rdx
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	8(%rsp),%r10
+	movq	%rdx,%r13
+
+	leaq	1(%r15),%r15
+	jmp	L$inner_enter
+
+.p2align	4
+L$inner:
+	addq	%rax,%r13
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	movq	(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+L$inner_enter:
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	leaq	1(%r15),%r15
+
+	mulq	%rbp
+	cmpq	%r9,%r15
+	jne	L$inner
+
+	addq	%rax,%r13
+	movq	(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	movq	(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+	xorq	%rdx,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r9,8)
+	movq	%rdx,(%rsp,%r9,8)
+
+	leaq	1(%r14),%r14
+	cmpq	%r9,%r14
+	jb	L$outer
+
+	xorq	%r14,%r14
+	movq	(%rsp),%rax
+	leaq	(%rsp),%rsi
+	movq	%r9,%r15
+	jmp	L$sub
+.p2align	4
+L$sub:
+	sbbq	(%rcx,%r14,8),%rax
+	movq	%rax,(%rdi,%r14,8)
+	movq	8(%rsi,%r14,8),%rax
+	leaq	1(%r14),%r14
+	decq	%r15
+	jnz	L$sub
+
+	sbbq	$0,%rax
+	xorq	%r14,%r14
+	andq	%rax,%rsi
+	notq	%rax
+	movq	%rdi,%rcx
+	andq	%rax,%rcx
+	movq	%r9,%r15
+	orq	%rcx,%rsi
+.p2align	4
+L$copy:
+	movq	(%rsi,%r14,8),%rax
+	movq	%r14,(%rsp,%r14,8)
+	movq	%rax,(%rdi,%r14,8)
+	leaq	1(%r14),%r14
+	subq	$1,%r15
+	jnz	L$copy
+
+	movq	8(%rsp,%r9,8),%rsi
+
+	movq	$1,%rax
+	movq	-48(%rsi),%r15
+
+	movq	-40(%rsi),%r14
+
+	movq	-32(%rsi),%r13
+
+	movq	-24(%rsi),%r12
+
+	movq	-16(%rsi),%rbp
+
+	movq	-8(%rsi),%rbx
+
+	leaq	(%rsi),%rsp
+
+L$mul_epilogue:
+	.byte	0xf3,0xc3
+
+
+
+.p2align	4
+bn_mul4x_mont:
+
+	movl	%r9d,%r9d
+	movq	%rsp,%rax
+
+L$mul4x_enter:
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+
+	negq	%r9
+	movq	%rsp,%r11
+	leaq	-32(%rsp,%r9,8),%r10
+	negq	%r9
+	andq	$-1024,%r10
+
+	subq	%r10,%r11
+	andq	$-4096,%r11
+	leaq	(%r10,%r11,1),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	L$mul4x_page_walk
+	jmp	L$mul4x_page_walk_done
+
+L$mul4x_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	L$mul4x_page_walk
+L$mul4x_page_walk_done:
+
+	movq	%rax,8(%rsp,%r9,8)
+
+L$mul4x_body:
+	movq	%rdi,16(%rsp,%r9,8)
+	movq	%rdx,%r12
+	movq	(%r8),%r8
+	movq	(%r12),%rbx
+	movq	(%rsi),%rax
+
+	xorq	%r14,%r14
+	xorq	%r15,%r15
+
+	movq	%r8,%rbp
+	mulq	%rbx
+	movq	%rax,%r10
+	movq	(%rcx),%rax
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	4(%r15),%r15
+	adcq	$0,%rdx
+	movq	%rdi,(%rsp)
+	movq	%rdx,%r13
+	jmp	L$1st4x
+.p2align	4
+L$1st4x:
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	leaq	4(%r15),%r15
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	-16(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-32(%rsp,%r15,8)
+	movq	%rdx,%r13
+	cmpq	%r9,%r15
+	jb	L$1st4x
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+	xorq	%rdi,%rdi
+	addq	%r10,%r13
+	adcq	$0,%rdi
+	movq	%r13,-8(%rsp,%r15,8)
+	movq	%rdi,(%rsp,%r15,8)
+
+	leaq	1(%r14),%r14
+.p2align	2
+L$outer4x:
+	movq	(%r12,%r14,8),%rbx
+	xorq	%r15,%r15
+	movq	(%rsp),%r10
+	movq	%r8,%rbp
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx),%rax
+	adcq	$0,%rdx
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	addq	8(%rsp),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	4(%r15),%r15
+	adcq	$0,%rdx
+	movq	%rdi,(%rsp)
+	movq	%rdx,%r13
+	jmp	L$inner4x
+.p2align	4
+L$inner4x:
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	-16(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	-8(%rsp,%r15,8),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	8(%rsp,%r15,8),%r11
+	adcq	$0,%rdx
+	leaq	4(%r15),%r15
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	-16(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-32(%rsp,%r15,8)
+	movq	%rdx,%r13
+	cmpq	%r9,%r15
+	jb	L$inner4x
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	-16(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%rsp,%r15,8)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	-8(%rsp,%r15,8),%r11
+	adcq	$0,%rdx
+	leaq	1(%r14),%r14
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+	xorq	%rdi,%rdi
+	addq	%r10,%r13
+	adcq	$0,%rdi
+	addq	(%rsp,%r9,8),%r13
+	adcq	$0,%rdi
+	movq	%r13,-8(%rsp,%r15,8)
+	movq	%rdi,(%rsp,%r15,8)
+
+	cmpq	%r9,%r14
+	jb	L$outer4x
+	movq	16(%rsp,%r9,8),%rdi
+	leaq	-4(%r9),%r15
+	movq	0(%rsp),%rax
+	pxor	%xmm0,%xmm0
+	movq	8(%rsp),%rdx
+	shrq	$2,%r15
+	leaq	(%rsp),%rsi
+	xorq	%r14,%r14
+
+	subq	0(%rcx),%rax
+	movq	16(%rsi),%rbx
+	movq	24(%rsi),%rbp
+	sbbq	8(%rcx),%rdx
+	jmp	L$sub4x
+.p2align	4
+L$sub4x:
+	movq	%rax,0(%rdi,%r14,8)
+	movq	%rdx,8(%rdi,%r14,8)
+	sbbq	16(%rcx,%r14,8),%rbx
+	movq	32(%rsi,%r14,8),%rax
+	movq	40(%rsi,%r14,8),%rdx
+	sbbq	24(%rcx,%r14,8),%rbp
+	movq	%rbx,16(%rdi,%r14,8)
+	movq	%rbp,24(%rdi,%r14,8)
+	sbbq	32(%rcx,%r14,8),%rax
+	movq	48(%rsi,%r14,8),%rbx
+	movq	56(%rsi,%r14,8),%rbp
+	sbbq	40(%rcx,%r14,8),%rdx
+	leaq	4(%r14),%r14
+	decq	%r15
+	jnz	L$sub4x
+
+	movq	%rax,0(%rdi,%r14,8)
+	movq	32(%rsi,%r14,8),%rax
+	sbbq	16(%rcx,%r14,8),%rbx
+	movq	%rdx,8(%rdi,%r14,8)
+	sbbq	24(%rcx,%r14,8),%rbp
+	movq	%rbx,16(%rdi,%r14,8)
+
+	sbbq	$0,%rax
+	movq	%rbp,24(%rdi,%r14,8)
+	xorq	%r14,%r14
+	andq	%rax,%rsi
+	notq	%rax
+	movq	%rdi,%rcx
+	andq	%rax,%rcx
+	leaq	-4(%r9),%r15
+	orq	%rcx,%rsi
+	shrq	$2,%r15
+
+	movdqu	(%rsi),%xmm1
+	movdqa	%xmm0,(%rsp)
+	movdqu	%xmm1,(%rdi)
+	jmp	L$copy4x
+.p2align	4
+L$copy4x:
+	movdqu	16(%rsi,%r14,1),%xmm2
+	movdqu	32(%rsi,%r14,1),%xmm1
+	movdqa	%xmm0,16(%rsp,%r14,1)
+	movdqu	%xmm2,16(%rdi,%r14,1)
+	movdqa	%xmm0,32(%rsp,%r14,1)
+	movdqu	%xmm1,32(%rdi,%r14,1)
+	leaq	32(%r14),%r14
+	decq	%r15
+	jnz	L$copy4x
+
+	movdqu	16(%rsi,%r14,1),%xmm2
+	movdqa	%xmm0,16(%rsp,%r14,1)
+	movdqu	%xmm2,16(%rdi,%r14,1)
+	movq	8(%rsp,%r9,8),%rsi
+
+	movq	$1,%rax
+	movq	-48(%rsi),%r15
+
+	movq	-40(%rsi),%r14
+
+	movq	-32(%rsi),%r13
+
+	movq	-24(%rsi),%r12
+
+	movq	-16(%rsi),%rbp
+
+	movq	-8(%rsi),%rbx
+
+	leaq	(%rsi),%rsp
+
+L$mul4x_epilogue:
+	.byte	0xf3,0xc3
+
+
+
+
+
+.p2align	5
+bn_sqr8x_mont:
+
+	movq	%rsp,%rax
+
+L$sqr8x_enter:
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+L$sqr8x_prologue:
+
+	movl	%r9d,%r10d
+	shll	$3,%r9d
+	shlq	$3+2,%r10
+	negq	%r9
+
+
+
+
+
+
+	leaq	-64(%rsp,%r9,2),%r11
+	movq	%rsp,%rbp
+	movq	(%r8),%r8
+	subq	%rsi,%r11
+	andq	$4095,%r11
+	cmpq	%r11,%r10
+	jb	L$sqr8x_sp_alt
+	subq	%r11,%rbp
+	leaq	-64(%rbp,%r9,2),%rbp
+	jmp	L$sqr8x_sp_done
+
+.p2align	5
+L$sqr8x_sp_alt:
+	leaq	4096-64(,%r9,2),%r10
+	leaq	-64(%rbp,%r9,2),%rbp
+	subq	%r10,%r11
+	movq	$0,%r10
+	cmovcq	%r10,%r11
+	subq	%r11,%rbp
+L$sqr8x_sp_done:
+	andq	$-64,%rbp
+	movq	%rsp,%r11
+	subq	%rbp,%r11
+	andq	$-4096,%r11
+	leaq	(%r11,%rbp,1),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	L$sqr8x_page_walk
+	jmp	L$sqr8x_page_walk_done
+
+.p2align	4
+L$sqr8x_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	L$sqr8x_page_walk
+L$sqr8x_page_walk_done:
+
+	movq	%r9,%r10
+	negq	%r9
+
+	movq	%r8,32(%rsp)
+	movq	%rax,40(%rsp)
+
+L$sqr8x_body:
+
+.byte	102,72,15,110,209
+	pxor	%xmm0,%xmm0
+.byte	102,72,15,110,207
+.byte	102,73,15,110,218
+	call	_bn_sqr8x_internal
+
+
+
+
+	leaq	(%rdi,%r9,1),%rbx
+	movq	%r9,%rcx
+	movq	%r9,%rdx
+.byte	102,72,15,126,207
+	sarq	$3+2,%rcx
+	jmp	L$sqr8x_sub
+
+.p2align	5
+L$sqr8x_sub:
+	movq	0(%rbx),%r12
+	movq	8(%rbx),%r13
+	movq	16(%rbx),%r14
+	movq	24(%rbx),%r15
+	leaq	32(%rbx),%rbx
+	sbbq	0(%rbp),%r12
+	sbbq	8(%rbp),%r13
+	sbbq	16(%rbp),%r14
+	sbbq	24(%rbp),%r15
+	leaq	32(%rbp),%rbp
+	movq	%r12,0(%rdi)
+	movq	%r13,8(%rdi)
+	movq	%r14,16(%rdi)
+	movq	%r15,24(%rdi)
+	leaq	32(%rdi),%rdi
+	incq	%rcx
+	jnz	L$sqr8x_sub
+
+	sbbq	$0,%rax
+	leaq	(%rbx,%r9,1),%rbx
+	leaq	(%rdi,%r9,1),%rdi
+
+.byte	102,72,15,110,200
+	pxor	%xmm0,%xmm0
+	pshufd	$0,%xmm1,%xmm1
+	movq	40(%rsp),%rsi
+
+	jmp	L$sqr8x_cond_copy
+
+.p2align	5
+L$sqr8x_cond_copy:
+	movdqa	0(%rbx),%xmm2
+	movdqa	16(%rbx),%xmm3
+	leaq	32(%rbx),%rbx
+	movdqu	0(%rdi),%xmm4
+	movdqu	16(%rdi),%xmm5
+	leaq	32(%rdi),%rdi
+	movdqa	%xmm0,-32(%rbx)
+	movdqa	%xmm0,-16(%rbx)
+	movdqa	%xmm0,-32(%rbx,%rdx,1)
+	movdqa	%xmm0,-16(%rbx,%rdx,1)
+	pcmpeqd	%xmm1,%xmm0
+	pand	%xmm1,%xmm2
+	pand	%xmm1,%xmm3
+	pand	%xmm0,%xmm4
+	pand	%xmm0,%xmm5
+	pxor	%xmm0,%xmm0
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqu	%xmm4,-32(%rdi)
+	movdqu	%xmm5,-16(%rdi)
+	addq	$32,%r9
+	jnz	L$sqr8x_cond_copy
+
+	movq	$1,%rax
+	movq	-48(%rsi),%r15
+
+	movq	-40(%rsi),%r14
+
+	movq	-32(%rsi),%r13
+
+	movq	-24(%rsi),%r12
+
+	movq	-16(%rsi),%rbp
+
+	movq	-8(%rsi),%rbx
+
+	leaq	(%rsi),%rsp
+
+L$sqr8x_epilogue:
+	.byte	0xf3,0xc3
+
+
+.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align	4
+#endif
diff --git a/third_party/boringssl/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
new file mode 100644
index 0000000..abc65f1
--- /dev/null
+++ b/third_party/boringssl/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
@@ -0,0 +1,2392 @@
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+.text	
+
+
+
+.globl	_bn_mul_mont_gather5
+.private_extern _bn_mul_mont_gather5
+
+.p2align	6
+_bn_mul_mont_gather5:
+
+	movl	%r9d,%r9d
+	movq	%rsp,%rax
+
+	testl	$7,%r9d
+	jnz	L$mul_enter
+	jmp	L$mul4x_enter
+
+.p2align	4
+L$mul_enter:
+	movd	8(%rsp),%xmm5
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+
+	negq	%r9
+	movq	%rsp,%r11
+	leaq	-280(%rsp,%r9,8),%r10
+	negq	%r9
+	andq	$-1024,%r10
+
+
+
+
+
+
+
+
+
+	subq	%r10,%r11
+	andq	$-4096,%r11
+	leaq	(%r10,%r11,1),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	L$mul_page_walk
+	jmp	L$mul_page_walk_done
+
+L$mul_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r11
+	cmpq	%r10,%rsp
+	ja	L$mul_page_walk
+L$mul_page_walk_done:
+
+	leaq	L$inc(%rip),%r10
+	movq	%rax,8(%rsp,%r9,8)
+
+L$mul_body:
+
+	leaq	128(%rdx),%r12
+	movdqa	0(%r10),%xmm0
+	movdqa	16(%r10),%xmm1
+	leaq	24-112(%rsp,%r9,8),%r10
+	andq	$-16,%r10
+
+	pshufd	$0,%xmm5,%xmm5
+	movdqa	%xmm1,%xmm4
+	movdqa	%xmm1,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+.byte	0x67
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,112(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,128(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,144(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,160(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,176(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,192(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,208(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,224(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,240(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,256(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,272(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,288(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,304(%r10)
+
+	paddd	%xmm2,%xmm3
+.byte	0x67
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,320(%r10)
+
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,336(%r10)
+	pand	64(%r12),%xmm0
+
+	pand	80(%r12),%xmm1
+	pand	96(%r12),%xmm2
+	movdqa	%xmm3,352(%r10)
+	pand	112(%r12),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	-128(%r12),%xmm4
+	movdqa	-112(%r12),%xmm5
+	movdqa	-96(%r12),%xmm2
+	pand	112(%r10),%xmm4
+	movdqa	-80(%r12),%xmm3
+	pand	128(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	144(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	160(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	-64(%r12),%xmm4
+	movdqa	-48(%r12),%xmm5
+	movdqa	-32(%r12),%xmm2
+	pand	176(%r10),%xmm4
+	movdqa	-16(%r12),%xmm3
+	pand	192(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	208(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	224(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	0(%r12),%xmm4
+	movdqa	16(%r12),%xmm5
+	movdqa	32(%r12),%xmm2
+	pand	240(%r10),%xmm4
+	movdqa	48(%r12),%xmm3
+	pand	256(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	272(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	288(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	por	%xmm1,%xmm0
+	pshufd	$0x4e,%xmm0,%xmm1
+	por	%xmm1,%xmm0
+	leaq	256(%r12),%r12
+.byte	102,72,15,126,195
+
+	movq	(%r8),%r8
+	movq	(%rsi),%rax
+
+	xorq	%r14,%r14
+	xorq	%r15,%r15
+
+	movq	%r8,%rbp
+	mulq	%rbx
+	movq	%rax,%r10
+	movq	(%rcx),%rax
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r13
+
+	leaq	1(%r15),%r15
+	jmp	L$1st_enter
+
+.p2align	4
+L$1st:
+	addq	%rax,%r13
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	movq	%r10,%r11
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+L$1st_enter:
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	leaq	1(%r15),%r15
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	cmpq	%r9,%r15
+	jne	L$1st
+
+
+	addq	%rax,%r13
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r9,8)
+	movq	%rdx,%r13
+	movq	%r10,%r11
+
+	xorq	%rdx,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r9,8)
+	movq	%rdx,(%rsp,%r9,8)
+
+	leaq	1(%r14),%r14
+	jmp	L$outer
+.p2align	4
+L$outer:
+	leaq	24+128(%rsp,%r9,8),%rdx
+	andq	$-16,%rdx
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	movdqa	-128(%r12),%xmm0
+	movdqa	-112(%r12),%xmm1
+	movdqa	-96(%r12),%xmm2
+	movdqa	-80(%r12),%xmm3
+	pand	-128(%rdx),%xmm0
+	pand	-112(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	-96(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	-80(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	-64(%r12),%xmm0
+	movdqa	-48(%r12),%xmm1
+	movdqa	-32(%r12),%xmm2
+	movdqa	-16(%r12),%xmm3
+	pand	-64(%rdx),%xmm0
+	pand	-48(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	-32(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	-16(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	0(%r12),%xmm0
+	movdqa	16(%r12),%xmm1
+	movdqa	32(%r12),%xmm2
+	movdqa	48(%r12),%xmm3
+	pand	0(%rdx),%xmm0
+	pand	16(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	32(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	48(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	64(%r12),%xmm0
+	movdqa	80(%r12),%xmm1
+	movdqa	96(%r12),%xmm2
+	movdqa	112(%r12),%xmm3
+	pand	64(%rdx),%xmm0
+	pand	80(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	96(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	112(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	por	%xmm5,%xmm4
+	pshufd	$0x4e,%xmm4,%xmm0
+	por	%xmm4,%xmm0
+	leaq	256(%r12),%r12
+
+	movq	(%rsi),%rax
+.byte	102,72,15,126,195
+
+	xorq	%r15,%r15
+	movq	%r8,%rbp
+	movq	(%rsp),%r10
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx),%rax
+	adcq	$0,%rdx
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi),%rax
+	adcq	$0,%rdx
+	movq	8(%rsp),%r10
+	movq	%rdx,%r13
+
+	leaq	1(%r15),%r15
+	jmp	L$inner_enter
+
+.p2align	4
+L$inner:
+	addq	%rax,%r13
+	movq	(%rsi,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	movq	(%rsp,%r15,8),%r10
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r15,8)
+	movq	%rdx,%r13
+
+L$inner_enter:
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	(%rcx,%r15,8),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	leaq	1(%r15),%r15
+
+	mulq	%rbp
+	cmpq	%r9,%r15
+	jne	L$inner
+
+	addq	%rax,%r13
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	movq	(%rsp,%r9,8),%r10
+	adcq	$0,%rdx
+	movq	%r13,-16(%rsp,%r9,8)
+	movq	%rdx,%r13
+
+	xorq	%rdx,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%rsp,%r9,8)
+	movq	%rdx,(%rsp,%r9,8)
+
+	leaq	1(%r14),%r14
+	cmpq	%r9,%r14
+	jb	L$outer
+
+	xorq	%r14,%r14
+	movq	(%rsp),%rax
+	leaq	(%rsp),%rsi
+	movq	%r9,%r15
+	jmp	L$sub
+.p2align	4
+L$sub:
+	sbbq	(%rcx,%r14,8),%rax
+	movq	%rax,(%rdi,%r14,8)
+	movq	8(%rsi,%r14,8),%rax
+	leaq	1(%r14),%r14
+	decq	%r15
+	jnz	L$sub
+
+	sbbq	$0,%rax
+	xorq	%r14,%r14
+	andq	%rax,%rsi
+	notq	%rax
+	movq	%rdi,%rcx
+	andq	%rax,%rcx
+	movq	%r9,%r15
+	orq	%rcx,%rsi
+.p2align	4
+L$copy:
+	movq	(%rsi,%r14,8),%rax
+	movq	%r14,(%rsp,%r14,8)
+	movq	%rax,(%rdi,%r14,8)
+	leaq	1(%r14),%r14
+	subq	$1,%r15
+	jnz	L$copy
+
+	movq	8(%rsp,%r9,8),%rsi
+
+	movq	$1,%rax
+
+	movq	-48(%rsi),%r15
+
+	movq	-40(%rsi),%r14
+
+	movq	-32(%rsi),%r13
+
+	movq	-24(%rsi),%r12
+
+	movq	-16(%rsi),%rbp
+
+	movq	-8(%rsi),%rbx
+
+	leaq	(%rsi),%rsp
+
+L$mul_epilogue:
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+bn_mul4x_mont_gather5:
+
+.byte	0x67
+	movq	%rsp,%rax
+
+L$mul4x_enter:
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+L$mul4x_prologue:
+
+.byte	0x67
+	shll	$3,%r9d
+	leaq	(%r9,%r9,2),%r10
+	negq	%r9
+
+
+
+
+
+
+
+
+
+
+	leaq	-320(%rsp,%r9,2),%r11
+	movq	%rsp,%rbp
+	subq	%rdi,%r11
+	andq	$4095,%r11
+	cmpq	%r11,%r10
+	jb	L$mul4xsp_alt
+	subq	%r11,%rbp
+	leaq	-320(%rbp,%r9,2),%rbp
+	jmp	L$mul4xsp_done
+
+.p2align	5
+L$mul4xsp_alt:
+	leaq	4096-320(,%r9,2),%r10
+	leaq	-320(%rbp,%r9,2),%rbp
+	subq	%r10,%r11
+	movq	$0,%r10
+	cmovcq	%r10,%r11
+	subq	%r11,%rbp
+L$mul4xsp_done:
+	andq	$-64,%rbp
+	movq	%rsp,%r11
+	subq	%rbp,%r11
+	andq	$-4096,%r11
+	leaq	(%r11,%rbp,1),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	L$mul4x_page_walk
+	jmp	L$mul4x_page_walk_done
+
+L$mul4x_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	L$mul4x_page_walk
+L$mul4x_page_walk_done:
+
+	negq	%r9
+
+	movq	%rax,40(%rsp)
+
+L$mul4x_body:
+
+	call	mul4x_internal
+
+	movq	40(%rsp),%rsi
+
+	movq	$1,%rax
+
+	movq	-48(%rsi),%r15
+
+	movq	-40(%rsi),%r14
+
+	movq	-32(%rsi),%r13
+
+	movq	-24(%rsi),%r12
+
+	movq	-16(%rsi),%rbp
+
+	movq	-8(%rsi),%rbx
+
+	leaq	(%rsi),%rsp
+
+L$mul4x_epilogue:
+	.byte	0xf3,0xc3
+
+
+
+
+.p2align	5
+mul4x_internal:
+	shlq	$5,%r9
+	movd	8(%rax),%xmm5
+	leaq	L$inc(%rip),%rax
+	leaq	128(%rdx,%r9,1),%r13
+	shrq	$5,%r9
+	movdqa	0(%rax),%xmm0
+	movdqa	16(%rax),%xmm1
+	leaq	88-112(%rsp,%r9,1),%r10
+	leaq	128(%rdx),%r12
+
+	pshufd	$0,%xmm5,%xmm5
+	movdqa	%xmm1,%xmm4
+.byte	0x67,0x67
+	movdqa	%xmm1,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+.byte	0x67
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,112(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,128(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,144(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,160(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,176(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,192(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,208(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,224(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,240(%r10)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,256(%r10)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,272(%r10)
+	movdqa	%xmm4,%xmm2
+
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,288(%r10)
+	movdqa	%xmm4,%xmm3
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,304(%r10)
+
+	paddd	%xmm2,%xmm3
+.byte	0x67
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,320(%r10)
+
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,336(%r10)
+	pand	64(%r12),%xmm0
+
+	pand	80(%r12),%xmm1
+	pand	96(%r12),%xmm2
+	movdqa	%xmm3,352(%r10)
+	pand	112(%r12),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	-128(%r12),%xmm4
+	movdqa	-112(%r12),%xmm5
+	movdqa	-96(%r12),%xmm2
+	pand	112(%r10),%xmm4
+	movdqa	-80(%r12),%xmm3
+	pand	128(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	144(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	160(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	-64(%r12),%xmm4
+	movdqa	-48(%r12),%xmm5
+	movdqa	-32(%r12),%xmm2
+	pand	176(%r10),%xmm4
+	movdqa	-16(%r12),%xmm3
+	pand	192(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	208(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	224(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	movdqa	0(%r12),%xmm4
+	movdqa	16(%r12),%xmm5
+	movdqa	32(%r12),%xmm2
+	pand	240(%r10),%xmm4
+	movdqa	48(%r12),%xmm3
+	pand	256(%r10),%xmm5
+	por	%xmm4,%xmm0
+	pand	272(%r10),%xmm2
+	por	%xmm5,%xmm1
+	pand	288(%r10),%xmm3
+	por	%xmm2,%xmm0
+	por	%xmm3,%xmm1
+	por	%xmm1,%xmm0
+	pshufd	$0x4e,%xmm0,%xmm1
+	por	%xmm1,%xmm0
+	leaq	256(%r12),%r12
+.byte	102,72,15,126,195
+
+	movq	%r13,16+8(%rsp)
+	movq	%rdi,56+8(%rsp)
+
+	movq	(%r8),%r8
+	movq	(%rsi),%rax
+	leaq	(%rsi,%r9,1),%rsi
+	negq	%r9
+
+	movq	%r8,%rbp
+	mulq	%rbx
+	movq	%rax,%r10
+	movq	(%rcx),%rax
+
+	imulq	%r10,%rbp
+	leaq	64+8(%rsp),%r14
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	32(%r9),%r15
+	leaq	32(%rcx),%rcx
+	adcq	$0,%rdx
+	movq	%rdi,(%r14)
+	movq	%rdx,%r13
+	jmp	L$1st4x
+
+.p2align	5
+L$1st4x:
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx),%rax
+	leaq	32(%r14),%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%r14)
+	movq	%rdx,%r13
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	0(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	8(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-8(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	32(%rcx),%rcx
+	adcq	$0,%rdx
+	movq	%rdi,(%r14)
+	movq	%rdx,%r13
+
+	addq	$32,%r15
+	jnz	L$1st4x
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx),%rax
+	leaq	32(%r14),%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%r13,-24(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%rdi,-16(%r14)
+	movq	%rdx,%r13
+
+	leaq	(%rcx,%r9,1),%rcx
+
+	xorq	%rdi,%rdi
+	addq	%r10,%r13
+	adcq	$0,%rdi
+	movq	%r13,-8(%r14)
+
+	jmp	L$outer4x
+
+.p2align	5
+L$outer4x:
+	leaq	16+128(%r14),%rdx
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	movdqa	-128(%r12),%xmm0
+	movdqa	-112(%r12),%xmm1
+	movdqa	-96(%r12),%xmm2
+	movdqa	-80(%r12),%xmm3
+	pand	-128(%rdx),%xmm0
+	pand	-112(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	-96(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	-80(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	-64(%r12),%xmm0
+	movdqa	-48(%r12),%xmm1
+	movdqa	-32(%r12),%xmm2
+	movdqa	-16(%r12),%xmm3
+	pand	-64(%rdx),%xmm0
+	pand	-48(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	-32(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	-16(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	0(%r12),%xmm0
+	movdqa	16(%r12),%xmm1
+	movdqa	32(%r12),%xmm2
+	movdqa	48(%r12),%xmm3
+	pand	0(%rdx),%xmm0
+	pand	16(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	32(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	48(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	64(%r12),%xmm0
+	movdqa	80(%r12),%xmm1
+	movdqa	96(%r12),%xmm2
+	movdqa	112(%r12),%xmm3
+	pand	64(%rdx),%xmm0
+	pand	80(%rdx),%xmm1
+	por	%xmm0,%xmm4
+	pand	96(%rdx),%xmm2
+	por	%xmm1,%xmm5
+	pand	112(%rdx),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	por	%xmm5,%xmm4
+	pshufd	$0x4e,%xmm4,%xmm0
+	por	%xmm4,%xmm0
+	leaq	256(%r12),%r12
+.byte	102,72,15,126,195
+
+	movq	(%r14,%r9,1),%r10
+	movq	%r8,%rbp
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	(%rcx),%rax
+	adcq	$0,%rdx
+
+	imulq	%r10,%rbp
+	movq	%rdx,%r11
+	movq	%rdi,(%r14)
+
+	leaq	(%r14,%r9,1),%r14
+
+	mulq	%rbp
+	addq	%rax,%r10
+	movq	8(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	addq	8(%r14),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	32(%r9),%r15
+	leaq	32(%rcx),%rcx
+	adcq	$0,%rdx
+	movq	%rdx,%r13
+	jmp	L$inner4x
+
+.p2align	5
+L$inner4x:
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx),%rax
+	adcq	$0,%rdx
+	addq	16(%r14),%r10
+	leaq	32(%r14),%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%rdi,-32(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	-8(%rcx),%rax
+	adcq	$0,%rdx
+	addq	-8(%r14),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%r13,-24(%r14)
+	movq	%rdx,%r13
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	0(%rcx),%rax
+	adcq	$0,%rdx
+	addq	(%r14),%r10
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	8(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%rdi,-16(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	8(%rcx),%rax
+	adcq	$0,%rdx
+	addq	8(%r14),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	16(%rsi,%r15,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	leaq	32(%rcx),%rcx
+	adcq	$0,%rdx
+	movq	%r13,-8(%r14)
+	movq	%rdx,%r13
+
+	addq	$32,%r15
+	jnz	L$inner4x
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	-16(%rcx),%rax
+	adcq	$0,%rdx
+	addq	16(%r14),%r10
+	leaq	32(%r14),%r14
+	adcq	$0,%rdx
+	movq	%rdx,%r11
+
+	mulq	%rbp
+	addq	%rax,%r13
+	movq	-8(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r13
+	adcq	$0,%rdx
+	movq	%rdi,-32(%r14)
+	movq	%rdx,%rdi
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	%rbp,%rax
+	movq	-8(%rcx),%rbp
+	adcq	$0,%rdx
+	addq	-8(%r14),%r11
+	adcq	$0,%rdx
+	movq	%rdx,%r10
+
+	mulq	%rbp
+	addq	%rax,%rdi
+	movq	(%rsi,%r9,1),%rax
+	adcq	$0,%rdx
+	addq	%r11,%rdi
+	adcq	$0,%rdx
+	movq	%r13,-24(%r14)
+	movq	%rdx,%r13
+
+	movq	%rdi,-16(%r14)
+	leaq	(%rcx,%r9,1),%rcx
+
+	xorq	%rdi,%rdi
+	addq	%r10,%r13
+	adcq	$0,%rdi
+	addq	(%r14),%r13
+	adcq	$0,%rdi
+	movq	%r13,-8(%r14)
+
+	cmpq	16+8(%rsp),%r12
+	jb	L$outer4x
+	xorq	%rax,%rax
+	subq	%r13,%rbp
+	adcq	%r15,%r15
+	orq	%r15,%rdi
+	subq	%rdi,%rax
+	leaq	(%r14,%r9,1),%rbx
+	movq	(%rcx),%r12
+	leaq	(%rcx),%rbp
+	movq	%r9,%rcx
+	sarq	$3+2,%rcx
+	movq	56+8(%rsp),%rdi
+	decq	%r12
+	xorq	%r10,%r10
+	movq	8(%rbp),%r13
+	movq	16(%rbp),%r14
+	movq	24(%rbp),%r15
+	jmp	L$sqr4x_sub_entry
+
+.globl	_bn_power5
+.private_extern _bn_power5
+
+.p2align	5
+_bn_power5:
+
+	movq	%rsp,%rax
+
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+L$power5_prologue:
+
+	shll	$3,%r9d
+	leal	(%r9,%r9,2),%r10d
+	negq	%r9
+	movq	(%r8),%r8
+
+
+
+
+
+
+
+
+	leaq	-320(%rsp,%r9,2),%r11
+	movq	%rsp,%rbp
+	subq	%rdi,%r11
+	andq	$4095,%r11
+	cmpq	%r11,%r10
+	jb	L$pwr_sp_alt
+	subq	%r11,%rbp
+	leaq	-320(%rbp,%r9,2),%rbp
+	jmp	L$pwr_sp_done
+
+.p2align	5
+L$pwr_sp_alt:
+	leaq	4096-320(,%r9,2),%r10
+	leaq	-320(%rbp,%r9,2),%rbp
+	subq	%r10,%r11
+	movq	$0,%r10
+	cmovcq	%r10,%r11
+	subq	%r11,%rbp
+L$pwr_sp_done:
+	andq	$-64,%rbp
+	movq	%rsp,%r11
+	subq	%rbp,%r11
+	andq	$-4096,%r11
+	leaq	(%r11,%rbp,1),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	L$pwr_page_walk
+	jmp	L$pwr_page_walk_done
+
+L$pwr_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	L$pwr_page_walk
+L$pwr_page_walk_done:
+
+	movq	%r9,%r10
+	negq	%r9
+
+
+
+
+
+
+
+
+
+
+	movq	%r8,32(%rsp)
+	movq	%rax,40(%rsp)
+
+L$power5_body:
+.byte	102,72,15,110,207
+.byte	102,72,15,110,209
+.byte	102,73,15,110,218
+.byte	102,72,15,110,226
+
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+
+.byte	102,72,15,126,209
+.byte	102,72,15,126,226
+	movq	%rsi,%rdi
+	movq	40(%rsp),%rax
+	leaq	32(%rsp),%r8
+
+	call	mul4x_internal
+
+	movq	40(%rsp),%rsi
+
+	movq	$1,%rax
+	movq	-48(%rsi),%r15
+
+	movq	-40(%rsi),%r14
+
+	movq	-32(%rsi),%r13
+
+	movq	-24(%rsi),%r12
+
+	movq	-16(%rsi),%rbp
+
+	movq	-8(%rsi),%rbx
+
+	leaq	(%rsi),%rsp
+
+L$power5_epilogue:
+	.byte	0xf3,0xc3
+
+
+
+.globl	_bn_sqr8x_internal
+.private_extern _bn_sqr8x_internal
+.private_extern	_bn_sqr8x_internal
+
+.p2align	5
+_bn_sqr8x_internal:
+__bn_sqr8x_internal:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	leaq	32(%r10),%rbp
+	leaq	(%rsi,%r9,1),%rsi
+
+	movq	%r9,%rcx
+
+
+	movq	-32(%rsi,%rbp,1),%r14
+	leaq	48+8(%rsp,%r9,2),%rdi
+	movq	-24(%rsi,%rbp,1),%rax
+	leaq	-32(%rdi,%rbp,1),%rdi
+	movq	-16(%rsi,%rbp,1),%rbx
+	movq	%rax,%r15
+
+	mulq	%r14
+	movq	%rax,%r10
+	movq	%rbx,%rax
+	movq	%rdx,%r11
+	movq	%r10,-24(%rdi,%rbp,1)
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	movq	%r11,-16(%rdi,%rbp,1)
+	movq	%rdx,%r10
+
+
+	movq	-8(%rsi,%rbp,1),%rbx
+	mulq	%r15
+	movq	%rax,%r12
+	movq	%rbx,%rax
+	movq	%rdx,%r13
+
+	leaq	(%rbp),%rcx
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	addq	%r12,%r10
+	adcq	$0,%r11
+	movq	%r10,-8(%rdi,%rcx,1)
+	jmp	L$sqr4x_1st
+
+.p2align	5
+L$sqr4x_1st:
+	movq	(%rsi,%rcx,1),%rbx
+	mulq	%r15
+	addq	%rax,%r13
+	movq	%rbx,%rax
+	movq	%rdx,%r12
+	adcq	$0,%r12
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	movq	8(%rsi,%rcx,1),%rbx
+	movq	%rdx,%r10
+	adcq	$0,%r10
+	addq	%r13,%r11
+	adcq	$0,%r10
+
+
+	mulq	%r15
+	addq	%rax,%r12
+	movq	%rbx,%rax
+	movq	%r11,(%rdi,%rcx,1)
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	movq	16(%rsi,%rcx,1),%rbx
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	addq	%r12,%r10
+	adcq	$0,%r11
+
+	mulq	%r15
+	addq	%rax,%r13
+	movq	%rbx,%rax
+	movq	%r10,8(%rdi,%rcx,1)
+	movq	%rdx,%r12
+	adcq	$0,%r12
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	movq	24(%rsi,%rcx,1),%rbx
+	movq	%rdx,%r10
+	adcq	$0,%r10
+	addq	%r13,%r11
+	adcq	$0,%r10
+
+
+	mulq	%r15
+	addq	%rax,%r12
+	movq	%rbx,%rax
+	movq	%r11,16(%rdi,%rcx,1)
+	movq	%rdx,%r13
+	adcq	$0,%r13
+	leaq	32(%rcx),%rcx
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	addq	%r12,%r10
+	adcq	$0,%r11
+	movq	%r10,-8(%rdi,%rcx,1)
+
+	cmpq	$0,%rcx
+	jne	L$sqr4x_1st
+
+	mulq	%r15
+	addq	%rax,%r13
+	leaq	16(%rbp),%rbp
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+
+	movq	%r13,(%rdi)
+	movq	%rdx,%r12
+	movq	%rdx,8(%rdi)
+	jmp	L$sqr4x_outer
+
+.p2align	5
+L$sqr4x_outer:
+	movq	-32(%rsi,%rbp,1),%r14
+	leaq	48+8(%rsp,%r9,2),%rdi
+	movq	-24(%rsi,%rbp,1),%rax
+	leaq	-32(%rdi,%rbp,1),%rdi
+	movq	-16(%rsi,%rbp,1),%rbx
+	movq	%rax,%r15
+
+	mulq	%r14
+	movq	-24(%rdi,%rbp,1),%r10
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	movq	%r10,-24(%rdi,%rbp,1)
+	movq	%rdx,%r11
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	addq	-16(%rdi,%rbp,1),%r11
+	movq	%rdx,%r10
+	adcq	$0,%r10
+	movq	%r11,-16(%rdi,%rbp,1)
+
+	xorq	%r12,%r12
+
+	movq	-8(%rsi,%rbp,1),%rbx
+	mulq	%r15
+	addq	%rax,%r12
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	addq	-8(%rdi,%rbp,1),%r12
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	addq	%r12,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	movq	%r10,-8(%rdi,%rbp,1)
+
+	leaq	(%rbp),%rcx
+	jmp	L$sqr4x_inner
+
+.p2align	5
+L$sqr4x_inner:
+	movq	(%rsi,%rcx,1),%rbx
+	mulq	%r15
+	addq	%rax,%r13
+	movq	%rbx,%rax
+	movq	%rdx,%r12
+	adcq	$0,%r12
+	addq	(%rdi,%rcx,1),%r13
+	adcq	$0,%r12
+
+.byte	0x67
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	movq	8(%rsi,%rcx,1),%rbx
+	movq	%rdx,%r10
+	adcq	$0,%r10
+	addq	%r13,%r11
+	adcq	$0,%r10
+
+	mulq	%r15
+	addq	%rax,%r12
+	movq	%r11,(%rdi,%rcx,1)
+	movq	%rbx,%rax
+	movq	%rdx,%r13
+	adcq	$0,%r13
+	addq	8(%rdi,%rcx,1),%r12
+	leaq	16(%rcx),%rcx
+	adcq	$0,%r13
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	adcq	$0,%rdx
+	addq	%r12,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	movq	%r10,-8(%rdi,%rcx,1)
+
+	cmpq	$0,%rcx
+	jne	L$sqr4x_inner
+
+.byte	0x67
+	mulq	%r15
+	addq	%rax,%r13
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+
+	movq	%r13,(%rdi)
+	movq	%rdx,%r12
+	movq	%rdx,8(%rdi)
+
+	addq	$16,%rbp
+	jnz	L$sqr4x_outer
+
+
+	movq	-32(%rsi),%r14
+	leaq	48+8(%rsp,%r9,2),%rdi
+	movq	-24(%rsi),%rax
+	leaq	-32(%rdi,%rbp,1),%rdi
+	movq	-16(%rsi),%rbx
+	movq	%rax,%r15
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	movq	%rdx,%r11
+	adcq	$0,%r11
+
+	mulq	%r14
+	addq	%rax,%r11
+	movq	%rbx,%rax
+	movq	%r10,-24(%rdi)
+	movq	%rdx,%r10
+	adcq	$0,%r10
+	addq	%r13,%r11
+	movq	-8(%rsi),%rbx
+	adcq	$0,%r10
+
+	mulq	%r15
+	addq	%rax,%r12
+	movq	%rbx,%rax
+	movq	%r11,-16(%rdi)
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+	mulq	%r14
+	addq	%rax,%r10
+	movq	%rbx,%rax
+	movq	%rdx,%r11
+	adcq	$0,%r11
+	addq	%r12,%r10
+	adcq	$0,%r11
+	movq	%r10,-8(%rdi)
+
+	mulq	%r15
+	addq	%rax,%r13
+	movq	-16(%rsi),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r13
+	adcq	$0,%rdx
+
+	movq	%r13,(%rdi)
+	movq	%rdx,%r12
+	movq	%rdx,8(%rdi)
+
+	mulq	%rbx
+	addq	$16,%rbp
+	xorq	%r14,%r14
+	subq	%r9,%rbp
+	xorq	%r15,%r15
+
+	addq	%r12,%rax
+	adcq	$0,%rdx
+	movq	%rax,8(%rdi)
+	movq	%rdx,16(%rdi)
+	movq	%r15,24(%rdi)
+
+	movq	-16(%rsi,%rbp,1),%rax
+	leaq	48+8(%rsp),%rdi
+	xorq	%r10,%r10
+	movq	8(%rdi),%r11
+
+	leaq	(%r14,%r10,2),%r12
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r13
+	shrq	$63,%r11
+	orq	%r10,%r13
+	movq	16(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	24(%rdi),%r11
+	adcq	%rax,%r12
+	movq	-8(%rsi,%rbp,1),%rax
+	movq	%r12,(%rdi)
+	adcq	%rdx,%r13
+
+	leaq	(%r14,%r10,2),%rbx
+	movq	%r13,8(%rdi)
+	sbbq	%r15,%r15
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r8
+	shrq	$63,%r11
+	orq	%r10,%r8
+	movq	32(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	40(%rdi),%r11
+	adcq	%rax,%rbx
+	movq	0(%rsi,%rbp,1),%rax
+	movq	%rbx,16(%rdi)
+	adcq	%rdx,%r8
+	leaq	16(%rbp),%rbp
+	movq	%r8,24(%rdi)
+	sbbq	%r15,%r15
+	leaq	64(%rdi),%rdi
+	jmp	L$sqr4x_shift_n_add
+
+.p2align	5
+L$sqr4x_shift_n_add:
+	leaq	(%r14,%r10,2),%r12
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r13
+	shrq	$63,%r11
+	orq	%r10,%r13
+	movq	-16(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	-8(%rdi),%r11
+	adcq	%rax,%r12
+	movq	-8(%rsi,%rbp,1),%rax
+	movq	%r12,-32(%rdi)
+	adcq	%rdx,%r13
+
+	leaq	(%r14,%r10,2),%rbx
+	movq	%r13,-24(%rdi)
+	sbbq	%r15,%r15
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r8
+	shrq	$63,%r11
+	orq	%r10,%r8
+	movq	0(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	8(%rdi),%r11
+	adcq	%rax,%rbx
+	movq	0(%rsi,%rbp,1),%rax
+	movq	%rbx,-16(%rdi)
+	adcq	%rdx,%r8
+
+	leaq	(%r14,%r10,2),%r12
+	movq	%r8,-8(%rdi)
+	sbbq	%r15,%r15
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r13
+	shrq	$63,%r11
+	orq	%r10,%r13
+	movq	16(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	24(%rdi),%r11
+	adcq	%rax,%r12
+	movq	8(%rsi,%rbp,1),%rax
+	movq	%r12,0(%rdi)
+	adcq	%rdx,%r13
+
+	leaq	(%r14,%r10,2),%rbx
+	movq	%r13,8(%rdi)
+	sbbq	%r15,%r15
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r8
+	shrq	$63,%r11
+	orq	%r10,%r8
+	movq	32(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	40(%rdi),%r11
+	adcq	%rax,%rbx
+	movq	16(%rsi,%rbp,1),%rax
+	movq	%rbx,16(%rdi)
+	adcq	%rdx,%r8
+	movq	%r8,24(%rdi)
+	sbbq	%r15,%r15
+	leaq	64(%rdi),%rdi
+	addq	$32,%rbp
+	jnz	L$sqr4x_shift_n_add
+
+	leaq	(%r14,%r10,2),%r12
+.byte	0x67
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r13
+	shrq	$63,%r11
+	orq	%r10,%r13
+	movq	-16(%rdi),%r10
+	movq	%r11,%r14
+	mulq	%rax
+	negq	%r15
+	movq	-8(%rdi),%r11
+	adcq	%rax,%r12
+	movq	-8(%rsi),%rax
+	movq	%r12,-32(%rdi)
+	adcq	%rdx,%r13
+
+	leaq	(%r14,%r10,2),%rbx
+	movq	%r13,-24(%rdi)
+	sbbq	%r15,%r15
+	shrq	$63,%r10
+	leaq	(%rcx,%r11,2),%r8
+	shrq	$63,%r11
+	orq	%r10,%r8
+	mulq	%rax
+	negq	%r15
+	adcq	%rax,%rbx
+	adcq	%rdx,%r8
+	movq	%rbx,-16(%rdi)
+	movq	%r8,-8(%rdi)
+.byte	102,72,15,126,213
+__bn_sqr8x_reduction:
+	xorq	%rax,%rax
+	leaq	(%r9,%rbp,1),%rcx
+	leaq	48+8(%rsp,%r9,2),%rdx
+	movq	%rcx,0+8(%rsp)
+	leaq	48+8(%rsp,%r9,1),%rdi
+	movq	%rdx,8+8(%rsp)
+	negq	%r9
+	jmp	L$8x_reduction_loop
+
+.p2align	5
+L$8x_reduction_loop:
+	leaq	(%rdi,%r9,1),%rdi
+.byte	0x66
+	movq	0(%rdi),%rbx
+	movq	8(%rdi),%r9
+	movq	16(%rdi),%r10
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%r12
+	movq	40(%rdi),%r13
+	movq	48(%rdi),%r14
+	movq	56(%rdi),%r15
+	movq	%rax,(%rdx)
+	leaq	64(%rdi),%rdi
+
+.byte	0x67
+	movq	%rbx,%r8
+	imulq	32+8(%rsp),%rbx
+	movq	0(%rbp),%rax
+	movl	$8,%ecx
+	jmp	L$8x_reduce
+
+.p2align	5
+L$8x_reduce:
+	mulq	%rbx
+	movq	8(%rbp),%rax
+	negq	%r8
+	movq	%rdx,%r8
+	adcq	$0,%r8
+
+	mulq	%rbx
+	addq	%rax,%r9
+	movq	16(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r9,%r8
+	movq	%rbx,48-8+8(%rsp,%rcx,8)
+	movq	%rdx,%r9
+	adcq	$0,%r9
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	24(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r9
+	movq	32+8(%rsp),%rsi
+	movq	%rdx,%r10
+	adcq	$0,%r10
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	32(%rbp),%rax
+	adcq	$0,%rdx
+	imulq	%r8,%rsi
+	addq	%r11,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+
+	mulq	%rbx
+	addq	%rax,%r12
+	movq	40(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r12,%r11
+	movq	%rdx,%r12
+	adcq	$0,%r12
+
+	mulq	%rbx
+	addq	%rax,%r13
+	movq	48(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r13,%r12
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+	mulq	%rbx
+	addq	%rax,%r14
+	movq	56(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r14,%r13
+	movq	%rdx,%r14
+	adcq	$0,%r14
+
+	mulq	%rbx
+	movq	%rsi,%rbx
+	addq	%rax,%r15
+	movq	0(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r15,%r14
+	movq	%rdx,%r15
+	adcq	$0,%r15
+
+	decl	%ecx
+	jnz	L$8x_reduce
+
+	leaq	64(%rbp),%rbp
+	xorq	%rax,%rax
+	movq	8+8(%rsp),%rdx
+	cmpq	0+8(%rsp),%rbp
+	jae	L$8x_no_tail
+
+.byte	0x66
+	addq	0(%rdi),%r8
+	adcq	8(%rdi),%r9
+	adcq	16(%rdi),%r10
+	adcq	24(%rdi),%r11
+	adcq	32(%rdi),%r12
+	adcq	40(%rdi),%r13
+	adcq	48(%rdi),%r14
+	adcq	56(%rdi),%r15
+	sbbq	%rsi,%rsi
+
+	movq	48+56+8(%rsp),%rbx
+	movl	$8,%ecx
+	movq	0(%rbp),%rax
+	jmp	L$8x_tail
+
+.p2align	5
+L$8x_tail:
+	mulq	%rbx
+	addq	%rax,%r8
+	movq	8(%rbp),%rax
+	movq	%r8,(%rdi)
+	movq	%rdx,%r8
+	adcq	$0,%r8
+
+	mulq	%rbx
+	addq	%rax,%r9
+	movq	16(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r9,%r8
+	leaq	8(%rdi),%rdi
+	movq	%rdx,%r9
+	adcq	$0,%r9
+
+	mulq	%rbx
+	addq	%rax,%r10
+	movq	24(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r10,%r9
+	movq	%rdx,%r10
+	adcq	$0,%r10
+
+	mulq	%rbx
+	addq	%rax,%r11
+	movq	32(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r11,%r10
+	movq	%rdx,%r11
+	adcq	$0,%r11
+
+	mulq	%rbx
+	addq	%rax,%r12
+	movq	40(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r12,%r11
+	movq	%rdx,%r12
+	adcq	$0,%r12
+
+	mulq	%rbx
+	addq	%rax,%r13
+	movq	48(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r13,%r12
+	movq	%rdx,%r13
+	adcq	$0,%r13
+
+	mulq	%rbx
+	addq	%rax,%r14
+	movq	56(%rbp),%rax
+	adcq	$0,%rdx
+	addq	%r14,%r13
+	movq	%rdx,%r14
+	adcq	$0,%r14
+
+	mulq	%rbx
+	movq	48-16+8(%rsp,%rcx,8),%rbx
+	addq	%rax,%r15
+	adcq	$0,%rdx
+	addq	%r15,%r14
+	movq	0(%rbp),%rax
+	movq	%rdx,%r15
+	adcq	$0,%r15
+
+	decl	%ecx
+	jnz	L$8x_tail
+
+	leaq	64(%rbp),%rbp
+	movq	8+8(%rsp),%rdx
+	cmpq	0+8(%rsp),%rbp
+	jae	L$8x_tail_done
+
+	movq	48+56+8(%rsp),%rbx
+	negq	%rsi
+	movq	0(%rbp),%rax
+	adcq	0(%rdi),%r8
+	adcq	8(%rdi),%r9
+	adcq	16(%rdi),%r10
+	adcq	24(%rdi),%r11
+	adcq	32(%rdi),%r12
+	adcq	40(%rdi),%r13
+	adcq	48(%rdi),%r14
+	adcq	56(%rdi),%r15
+	sbbq	%rsi,%rsi
+
+	movl	$8,%ecx
+	jmp	L$8x_tail
+
+.p2align	5
+L$8x_tail_done:
+	xorq	%rax,%rax
+	addq	(%rdx),%r8
+	adcq	$0,%r9
+	adcq	$0,%r10
+	adcq	$0,%r11
+	adcq	$0,%r12
+	adcq	$0,%r13
+	adcq	$0,%r14
+	adcq	$0,%r15
+	adcq	$0,%rax
+
+	negq	%rsi
+L$8x_no_tail:
+	adcq	0(%rdi),%r8
+	adcq	8(%rdi),%r9
+	adcq	16(%rdi),%r10
+	adcq	24(%rdi),%r11
+	adcq	32(%rdi),%r12
+	adcq	40(%rdi),%r13
+	adcq	48(%rdi),%r14
+	adcq	56(%rdi),%r15
+	adcq	$0,%rax
+	movq	-8(%rbp),%rcx
+	xorq	%rsi,%rsi
+
+.byte	102,72,15,126,213
+
+	movq	%r8,0(%rdi)
+	movq	%r9,8(%rdi)
+.byte	102,73,15,126,217
+	movq	%r10,16(%rdi)
+	movq	%r11,24(%rdi)
+	movq	%r12,32(%rdi)
+	movq	%r13,40(%rdi)
+	movq	%r14,48(%rdi)
+	movq	%r15,56(%rdi)
+	leaq	64(%rdi),%rdi
+
+	cmpq	%rdx,%rdi
+	jb	L$8x_reduction_loop
+	.byte	0xf3,0xc3
+
+
+.p2align	5
+__bn_post4x_internal:
+	movq	0(%rbp),%r12
+	leaq	(%rdi,%r9,1),%rbx
+	movq	%r9,%rcx
+.byte	102,72,15,126,207
+	negq	%rax
+.byte	102,72,15,126,206
+	sarq	$3+2,%rcx
+	decq	%r12
+	xorq	%r10,%r10
+	movq	8(%rbp),%r13
+	movq	16(%rbp),%r14
+	movq	24(%rbp),%r15
+	jmp	L$sqr4x_sub_entry
+
+.p2align	4
+L$sqr4x_sub:
+	movq	0(%rbp),%r12
+	movq	8(%rbp),%r13
+	movq	16(%rbp),%r14
+	movq	24(%rbp),%r15
+L$sqr4x_sub_entry:
+	leaq	32(%rbp),%rbp
+	notq	%r12
+	notq	%r13
+	notq	%r14
+	notq	%r15
+	andq	%rax,%r12
+	andq	%rax,%r13
+	andq	%rax,%r14
+	andq	%rax,%r15
+
+	negq	%r10
+	adcq	0(%rbx),%r12
+	adcq	8(%rbx),%r13
+	adcq	16(%rbx),%r14
+	adcq	24(%rbx),%r15
+	movq	%r12,0(%rdi)
+	leaq	32(%rbx),%rbx
+	movq	%r13,8(%rdi)
+	sbbq	%r10,%r10
+	movq	%r14,16(%rdi)
+	movq	%r15,24(%rdi)
+	leaq	32(%rdi),%rdi
+
+	incq	%rcx
+	jnz	L$sqr4x_sub
+
+	movq	%r9,%r10
+	negq	%r9
+	.byte	0xf3,0xc3
+
+.globl	_bn_from_montgomery
+.private_extern _bn_from_montgomery
+
+.p2align	5
+_bn_from_montgomery:
+	testl	$7,%r9d
+	jz	bn_from_mont8x
+	xorl	%eax,%eax
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+bn_from_mont8x:
+
+.byte	0x67
+	movq	%rsp,%rax
+
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+L$from_prologue:
+
+	shll	$3,%r9d
+	leaq	(%r9,%r9,2),%r10
+	negq	%r9
+	movq	(%r8),%r8
+
+
+
+
+
+
+
+
+	leaq	-320(%rsp,%r9,2),%r11
+	movq	%rsp,%rbp
+	subq	%rdi,%r11
+	andq	$4095,%r11
+	cmpq	%r11,%r10
+	jb	L$from_sp_alt
+	subq	%r11,%rbp
+	leaq	-320(%rbp,%r9,2),%rbp
+	jmp	L$from_sp_done
+
+.p2align	5
+L$from_sp_alt:
+	leaq	4096-320(,%r9,2),%r10
+	leaq	-320(%rbp,%r9,2),%rbp
+	subq	%r10,%r11
+	movq	$0,%r10
+	cmovcq	%r10,%r11
+	subq	%r11,%rbp
+L$from_sp_done:
+	andq	$-64,%rbp
+	movq	%rsp,%r11
+	subq	%rbp,%r11
+	andq	$-4096,%r11
+	leaq	(%r11,%rbp,1),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	L$from_page_walk
+	jmp	L$from_page_walk_done
+
+L$from_page_walk:
+	leaq	-4096(%rsp),%rsp
+	movq	(%rsp),%r10
+	cmpq	%rbp,%rsp
+	ja	L$from_page_walk
+L$from_page_walk_done:
+
+	movq	%r9,%r10
+	negq	%r9
+
+
+
+
+
+
+
+
+
+
+	movq	%r8,32(%rsp)
+	movq	%rax,40(%rsp)
+
+L$from_body:
+	movq	%r9,%r11
+	leaq	48(%rsp),%rax
+	pxor	%xmm0,%xmm0
+	jmp	L$mul_by_1
+
+.p2align	5
+L$mul_by_1:
+	movdqu	(%rsi),%xmm1
+	movdqu	16(%rsi),%xmm2
+	movdqu	32(%rsi),%xmm3
+	movdqa	%xmm0,(%rax,%r9,1)
+	movdqu	48(%rsi),%xmm4
+	movdqa	%xmm0,16(%rax,%r9,1)
+.byte	0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
+	movdqa	%xmm1,(%rax)
+	movdqa	%xmm0,32(%rax,%r9,1)
+	movdqa	%xmm2,16(%rax)
+	movdqa	%xmm0,48(%rax,%r9,1)
+	movdqa	%xmm3,32(%rax)
+	movdqa	%xmm4,48(%rax)
+	leaq	64(%rax),%rax
+	subq	$64,%r11
+	jnz	L$mul_by_1
+
+.byte	102,72,15,110,207
+.byte	102,72,15,110,209
+.byte	0x67
+	movq	%rcx,%rbp
+.byte	102,73,15,110,218
+	call	__bn_sqr8x_reduction
+	call	__bn_post4x_internal
+
+	pxor	%xmm0,%xmm0
+	leaq	48(%rsp),%rax
+	jmp	L$from_mont_zero
+
+.p2align	5
+L$from_mont_zero:
+	movq	40(%rsp),%rsi
+
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm0,16(%rax)
+	movdqa	%xmm0,32(%rax)
+	movdqa	%xmm0,48(%rax)
+	leaq	64(%rax),%rax
+	subq	$32,%r9
+	jnz	L$from_mont_zero
+
+	movq	$1,%rax
+	movq	-48(%rsi),%r15
+
+	movq	-40(%rsi),%r14
+
+	movq	-32(%rsi),%r13
+
+	movq	-24(%rsi),%r12
+
+	movq	-16(%rsi),%rbp
+
+	movq	-8(%rsi),%rbx
+
+	leaq	(%rsi),%rsp
+
+L$from_epilogue:
+	.byte	0xf3,0xc3
+
+
+.globl	_bn_scatter5
+.private_extern _bn_scatter5
+
+.p2align	4
+_bn_scatter5:
+	cmpl	$0,%esi
+	jz	L$scatter_epilogue
+	leaq	(%rdx,%rcx,8),%rdx
+L$scatter:
+	movq	(%rdi),%rax
+	leaq	8(%rdi),%rdi
+	movq	%rax,(%rdx)
+	leaq	256(%rdx),%rdx
+	subl	$1,%esi
+	jnz	L$scatter
+L$scatter_epilogue:
+	.byte	0xf3,0xc3
+
+
+.globl	_bn_gather5
+.private_extern _bn_gather5
+
+.p2align	5
+_bn_gather5:
+L$SEH_begin_bn_gather5:
+
+.byte	0x4c,0x8d,0x14,0x24
+.byte	0x48,0x81,0xec,0x08,0x01,0x00,0x00
+	leaq	L$inc(%rip),%rax
+	andq	$-16,%rsp
+
+	movd	%ecx,%xmm5
+	movdqa	0(%rax),%xmm0
+	movdqa	16(%rax),%xmm1
+	leaq	128(%rdx),%r11
+	leaq	128(%rsp),%rax
+
+	pshufd	$0,%xmm5,%xmm5
+	movdqa	%xmm1,%xmm4
+	movdqa	%xmm1,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm4,%xmm3
+
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,-128(%rax)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,-112(%rax)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,-96(%rax)
+	movdqa	%xmm4,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,-80(%rax)
+	movdqa	%xmm4,%xmm3
+
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,-64(%rax)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,-48(%rax)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,-32(%rax)
+	movdqa	%xmm4,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,-16(%rax)
+	movdqa	%xmm4,%xmm3
+
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,0(%rax)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,16(%rax)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,32(%rax)
+	movdqa	%xmm4,%xmm2
+	paddd	%xmm0,%xmm1
+	pcmpeqd	%xmm5,%xmm0
+	movdqa	%xmm3,48(%rax)
+	movdqa	%xmm4,%xmm3
+
+	paddd	%xmm1,%xmm2
+	pcmpeqd	%xmm5,%xmm1
+	movdqa	%xmm0,64(%rax)
+	movdqa	%xmm4,%xmm0
+
+	paddd	%xmm2,%xmm3
+	pcmpeqd	%xmm5,%xmm2
+	movdqa	%xmm1,80(%rax)
+	movdqa	%xmm4,%xmm1
+
+	paddd	%xmm3,%xmm0
+	pcmpeqd	%xmm5,%xmm3
+	movdqa	%xmm2,96(%rax)
+	movdqa	%xmm4,%xmm2
+	movdqa	%xmm3,112(%rax)
+	jmp	L$gather
+
+.p2align	5
+L$gather:
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	movdqa	-128(%r11),%xmm0
+	movdqa	-112(%r11),%xmm1
+	movdqa	-96(%r11),%xmm2
+	pand	-128(%rax),%xmm0
+	movdqa	-80(%r11),%xmm3
+	pand	-112(%rax),%xmm1
+	por	%xmm0,%xmm4
+	pand	-96(%rax),%xmm2
+	por	%xmm1,%xmm5
+	pand	-80(%rax),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	-64(%r11),%xmm0
+	movdqa	-48(%r11),%xmm1
+	movdqa	-32(%r11),%xmm2
+	pand	-64(%rax),%xmm0
+	movdqa	-16(%r11),%xmm3
+	pand	-48(%rax),%xmm1
+	por	%xmm0,%xmm4
+	pand	-32(%rax),%xmm2
+	por	%xmm1,%xmm5
+	pand	-16(%rax),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	0(%r11),%xmm0
+	movdqa	16(%r11),%xmm1
+	movdqa	32(%r11),%xmm2
+	pand	0(%rax),%xmm0
+	movdqa	48(%r11),%xmm3
+	pand	16(%rax),%xmm1
+	por	%xmm0,%xmm4
+	pand	32(%rax),%xmm2
+	por	%xmm1,%xmm5
+	pand	48(%rax),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	movdqa	64(%r11),%xmm0
+	movdqa	80(%r11),%xmm1
+	movdqa	96(%r11),%xmm2
+	pand	64(%rax),%xmm0
+	movdqa	112(%r11),%xmm3
+	pand	80(%rax),%xmm1
+	por	%xmm0,%xmm4
+	pand	96(%rax),%xmm2
+	por	%xmm1,%xmm5
+	pand	112(%rax),%xmm3
+	por	%xmm2,%xmm4
+	por	%xmm3,%xmm5
+	por	%xmm5,%xmm4
+	leaq	256(%r11),%r11
+	pshufd	$0x4e,%xmm4,%xmm0
+	por	%xmm4,%xmm0
+	movq	%xmm0,(%rdi)
+	leaq	8(%rdi),%rdi
+	subl	$1,%esi
+	jnz	L$gather
+
+	leaq	(%r10),%rsp
+	.byte	0xf3,0xc3
+L$SEH_end_bn_gather5:
+
+.p2align	6
+L$inc:
+.long	0,0, 1,1
+.long	2,2, 2,2
+.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+#endif
diff --git a/third_party/boringssl/win-x86_64/crypto/chacha/chacha-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/chacha/chacha-x86_64.asm
new file mode 100644
index 0000000..cb36246
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/chacha/chacha-x86_64.asm
@@ -0,0 +1,1891 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+EXTERN	OPENSSL_ia32cap_P
+
+ALIGN	64
+$L$zero:
+	DD	0,0,0,0
+$L$one:
+	DD	1,0,0,0
+$L$inc:
+	DD	0,1,2,3
+$L$four:
+	DD	4,4,4,4
+$L$incy:
+	DD	0,2,4,6,1,3,5,7
+$L$eight:
+	DD	8,8,8,8,8,8,8,8
+$L$rot16:
+DB	0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd
+$L$rot24:
+DB	0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe
+$L$sigma:
+DB	101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107
+DB	0
+ALIGN	64
+$L$zeroz:
+	DD	0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0
+$L$fourz:
+	DD	4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0
+$L$incz:
+	DD	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+$L$sixteen:
+	DD	16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
+DB	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
+DB	95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32
+DB	98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115
+DB	108,46,111,114,103,62,0
+global	ChaCha20_ctr32
+
+ALIGN	64
+ChaCha20_ctr32:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ChaCha20_ctr32:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+	cmp	rdx,0
+	je	NEAR $L$no_data
+	mov	r10,QWORD[((OPENSSL_ia32cap_P+4))]
+	test	r10d,512
+	jnz	NEAR $L$ChaCha20_ssse3
+
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp,64+24
+$L$ctr32_body:
+
+
+	movdqu	xmm1,XMMWORD[rcx]
+	movdqu	xmm2,XMMWORD[16+rcx]
+	movdqu	xmm3,XMMWORD[r8]
+	movdqa	xmm4,XMMWORD[$L$one]
+
+
+	movdqa	XMMWORD[16+rsp],xmm1
+	movdqa	XMMWORD[32+rsp],xmm2
+	movdqa	XMMWORD[48+rsp],xmm3
+	mov	rbp,rdx
+	jmp	NEAR $L$oop_outer
+
+ALIGN	32
+$L$oop_outer:
+	mov	eax,0x61707865
+	mov	ebx,0x3320646e
+	mov	ecx,0x79622d32
+	mov	edx,0x6b206574
+	mov	r8d,DWORD[16+rsp]
+	mov	r9d,DWORD[20+rsp]
+	mov	r10d,DWORD[24+rsp]
+	mov	r11d,DWORD[28+rsp]
+	movd	r12d,xmm3
+	mov	r13d,DWORD[52+rsp]
+	mov	r14d,DWORD[56+rsp]
+	mov	r15d,DWORD[60+rsp]
+
+	mov	QWORD[((64+0))+rsp],rbp
+	mov	ebp,10
+	mov	QWORD[((64+8))+rsp],rsi
+DB	102,72,15,126,214
+	mov	QWORD[((64+16))+rsp],rdi
+	mov	rdi,rsi
+	shr	rdi,32
+	jmp	NEAR $L$oop
+
+ALIGN	32
+$L$oop:
+	add	eax,r8d
+	xor	r12d,eax
+	rol	r12d,16
+	add	ebx,r9d
+	xor	r13d,ebx
+	rol	r13d,16
+	add	esi,r12d
+	xor	r8d,esi
+	rol	r8d,12
+	add	edi,r13d
+	xor	r9d,edi
+	rol	r9d,12
+	add	eax,r8d
+	xor	r12d,eax
+	rol	r12d,8
+	add	ebx,r9d
+	xor	r13d,ebx
+	rol	r13d,8
+	add	esi,r12d
+	xor	r8d,esi
+	rol	r8d,7
+	add	edi,r13d
+	xor	r9d,edi
+	rol	r9d,7
+	mov	DWORD[32+rsp],esi
+	mov	DWORD[36+rsp],edi
+	mov	esi,DWORD[40+rsp]
+	mov	edi,DWORD[44+rsp]
+	add	ecx,r10d
+	xor	r14d,ecx
+	rol	r14d,16
+	add	edx,r11d
+	xor	r15d,edx
+	rol	r15d,16
+	add	esi,r14d
+	xor	r10d,esi
+	rol	r10d,12
+	add	edi,r15d
+	xor	r11d,edi
+	rol	r11d,12
+	add	ecx,r10d
+	xor	r14d,ecx
+	rol	r14d,8
+	add	edx,r11d
+	xor	r15d,edx
+	rol	r15d,8
+	add	esi,r14d
+	xor	r10d,esi
+	rol	r10d,7
+	add	edi,r15d
+	xor	r11d,edi
+	rol	r11d,7
+	add	eax,r9d
+	xor	r15d,eax
+	rol	r15d,16
+	add	ebx,r10d
+	xor	r12d,ebx
+	rol	r12d,16
+	add	esi,r15d
+	xor	r9d,esi
+	rol	r9d,12
+	add	edi,r12d
+	xor	r10d,edi
+	rol	r10d,12
+	add	eax,r9d
+	xor	r15d,eax
+	rol	r15d,8
+	add	ebx,r10d
+	xor	r12d,ebx
+	rol	r12d,8
+	add	esi,r15d
+	xor	r9d,esi
+	rol	r9d,7
+	add	edi,r12d
+	xor	r10d,edi
+	rol	r10d,7
+	mov	DWORD[40+rsp],esi
+	mov	DWORD[44+rsp],edi
+	mov	esi,DWORD[32+rsp]
+	mov	edi,DWORD[36+rsp]
+	add	ecx,r11d
+	xor	r13d,ecx
+	rol	r13d,16
+	add	edx,r8d
+	xor	r14d,edx
+	rol	r14d,16
+	add	esi,r13d
+	xor	r11d,esi
+	rol	r11d,12
+	add	edi,r14d
+	xor	r8d,edi
+	rol	r8d,12
+	add	ecx,r11d
+	xor	r13d,ecx
+	rol	r13d,8
+	add	edx,r8d
+	xor	r14d,edx
+	rol	r14d,8
+	add	esi,r13d
+	xor	r11d,esi
+	rol	r11d,7
+	add	edi,r14d
+	xor	r8d,edi
+	rol	r8d,7
+	dec	ebp
+	jnz	NEAR $L$oop
+	mov	DWORD[36+rsp],edi
+	mov	DWORD[32+rsp],esi
+	mov	rbp,QWORD[64+rsp]
+	movdqa	xmm1,xmm2
+	mov	rsi,QWORD[((64+8))+rsp]
+	paddd	xmm3,xmm4
+	mov	rdi,QWORD[((64+16))+rsp]
+
+	add	eax,0x61707865
+	add	ebx,0x3320646e
+	add	ecx,0x79622d32
+	add	edx,0x6b206574
+	add	r8d,DWORD[16+rsp]
+	add	r9d,DWORD[20+rsp]
+	add	r10d,DWORD[24+rsp]
+	add	r11d,DWORD[28+rsp]
+	add	r12d,DWORD[48+rsp]
+	add	r13d,DWORD[52+rsp]
+	add	r14d,DWORD[56+rsp]
+	add	r15d,DWORD[60+rsp]
+	paddd	xmm1,XMMWORD[32+rsp]
+
+	cmp	rbp,64
+	jb	NEAR $L$tail
+
+	xor	eax,DWORD[rsi]
+	xor	ebx,DWORD[4+rsi]
+	xor	ecx,DWORD[8+rsi]
+	xor	edx,DWORD[12+rsi]
+	xor	r8d,DWORD[16+rsi]
+	xor	r9d,DWORD[20+rsi]
+	xor	r10d,DWORD[24+rsi]
+	xor	r11d,DWORD[28+rsi]
+	movdqu	xmm0,XMMWORD[32+rsi]
+	xor	r12d,DWORD[48+rsi]
+	xor	r13d,DWORD[52+rsi]
+	xor	r14d,DWORD[56+rsi]
+	xor	r15d,DWORD[60+rsi]
+	lea	rsi,[64+rsi]
+	pxor	xmm0,xmm1
+
+	movdqa	XMMWORD[32+rsp],xmm2
+	movd	DWORD[48+rsp],xmm3
+
+	mov	DWORD[rdi],eax
+	mov	DWORD[4+rdi],ebx
+	mov	DWORD[8+rdi],ecx
+	mov	DWORD[12+rdi],edx
+	mov	DWORD[16+rdi],r8d
+	mov	DWORD[20+rdi],r9d
+	mov	DWORD[24+rdi],r10d
+	mov	DWORD[28+rdi],r11d
+	movdqu	XMMWORD[32+rdi],xmm0
+	mov	DWORD[48+rdi],r12d
+	mov	DWORD[52+rdi],r13d
+	mov	DWORD[56+rdi],r14d
+	mov	DWORD[60+rdi],r15d
+	lea	rdi,[64+rdi]
+
+	sub	rbp,64
+	jnz	NEAR $L$oop_outer
+
+	jmp	NEAR $L$done
+
+ALIGN	16
+$L$tail:
+	mov	DWORD[rsp],eax
+	mov	DWORD[4+rsp],ebx
+	xor	rbx,rbx
+	mov	DWORD[8+rsp],ecx
+	mov	DWORD[12+rsp],edx
+	mov	DWORD[16+rsp],r8d
+	mov	DWORD[20+rsp],r9d
+	mov	DWORD[24+rsp],r10d
+	mov	DWORD[28+rsp],r11d
+	movdqa	XMMWORD[32+rsp],xmm1
+	mov	DWORD[48+rsp],r12d
+	mov	DWORD[52+rsp],r13d
+	mov	DWORD[56+rsp],r14d
+	mov	DWORD[60+rsp],r15d
+
+$L$oop_tail:
+	movzx	eax,BYTE[rbx*1+rsi]
+	movzx	edx,BYTE[rbx*1+rsp]
+	lea	rbx,[1+rbx]
+	xor	eax,edx
+	mov	BYTE[((-1))+rbx*1+rdi],al
+	dec	rbp
+	jnz	NEAR $L$oop_tail
+
+$L$done:
+	lea	rsi,[((64+24+48))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$no_data:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_ChaCha20_ctr32:
+
+ALIGN	32
+ChaCha20_ssse3:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ChaCha20_ssse3:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+$L$ChaCha20_ssse3:
+	mov	r9,rsp
+	cmp	rdx,128
+	ja	NEAR $L$ChaCha20_4x
+
+$L$do_sse3_after_all:
+	sub	rsp,64+40
+	movaps	XMMWORD[(-40)+r9],xmm6
+	movaps	XMMWORD[(-24)+r9],xmm7
+$L$ssse3_body:
+	movdqa	xmm0,XMMWORD[$L$sigma]
+	movdqu	xmm1,XMMWORD[rcx]
+	movdqu	xmm2,XMMWORD[16+rcx]
+	movdqu	xmm3,XMMWORD[r8]
+	movdqa	xmm6,XMMWORD[$L$rot16]
+	movdqa	xmm7,XMMWORD[$L$rot24]
+
+	movdqa	XMMWORD[rsp],xmm0
+	movdqa	XMMWORD[16+rsp],xmm1
+	movdqa	XMMWORD[32+rsp],xmm2
+	movdqa	XMMWORD[48+rsp],xmm3
+	mov	r8,10
+	jmp	NEAR $L$oop_ssse3
+
+ALIGN	32
+$L$oop_outer_ssse3:
+	movdqa	xmm3,XMMWORD[$L$one]
+	movdqa	xmm0,XMMWORD[rsp]
+	movdqa	xmm1,XMMWORD[16+rsp]
+	movdqa	xmm2,XMMWORD[32+rsp]
+	paddd	xmm3,XMMWORD[48+rsp]
+	mov	r8,10
+	movdqa	XMMWORD[48+rsp],xmm3
+	jmp	NEAR $L$oop_ssse3
+
+ALIGN	32
+$L$oop_ssse3:
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+DB	102,15,56,0,222
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,20
+	pslld	xmm4,12
+	por	xmm1,xmm4
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+DB	102,15,56,0,223
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,25
+	pslld	xmm4,7
+	por	xmm1,xmm4
+	pshufd	xmm2,xmm2,78
+	pshufd	xmm1,xmm1,57
+	pshufd	xmm3,xmm3,147
+	nop
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+DB	102,15,56,0,222
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,20
+	pslld	xmm4,12
+	por	xmm1,xmm4
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+DB	102,15,56,0,223
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,25
+	pslld	xmm4,7
+	por	xmm1,xmm4
+	pshufd	xmm2,xmm2,78
+	pshufd	xmm1,xmm1,147
+	pshufd	xmm3,xmm3,57
+	dec	r8
+	jnz	NEAR $L$oop_ssse3
+	paddd	xmm0,XMMWORD[rsp]
+	paddd	xmm1,XMMWORD[16+rsp]
+	paddd	xmm2,XMMWORD[32+rsp]
+	paddd	xmm3,XMMWORD[48+rsp]
+
+	cmp	rdx,64
+	jb	NEAR $L$tail_ssse3
+
+	movdqu	xmm4,XMMWORD[rsi]
+	movdqu	xmm5,XMMWORD[16+rsi]
+	pxor	xmm0,xmm4
+	movdqu	xmm4,XMMWORD[32+rsi]
+	pxor	xmm1,xmm5
+	movdqu	xmm5,XMMWORD[48+rsi]
+	lea	rsi,[64+rsi]
+	pxor	xmm2,xmm4
+	pxor	xmm3,xmm5
+
+	movdqu	XMMWORD[rdi],xmm0
+	movdqu	XMMWORD[16+rdi],xmm1
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm3
+	lea	rdi,[64+rdi]
+
+	sub	rdx,64
+	jnz	NEAR $L$oop_outer_ssse3
+
+	jmp	NEAR $L$done_ssse3
+
+ALIGN	16
+$L$tail_ssse3:
+	movdqa	XMMWORD[rsp],xmm0
+	movdqa	XMMWORD[16+rsp],xmm1
+	movdqa	XMMWORD[32+rsp],xmm2
+	movdqa	XMMWORD[48+rsp],xmm3
+	xor	r8,r8
+
+$L$oop_tail_ssse3:
+	movzx	eax,BYTE[r8*1+rsi]
+	movzx	ecx,BYTE[r8*1+rsp]
+	lea	r8,[1+r8]
+	xor	eax,ecx
+	mov	BYTE[((-1))+r8*1+rdi],al
+	dec	rdx
+	jnz	NEAR $L$oop_tail_ssse3
+
+$L$done_ssse3:
+	movaps	xmm6,XMMWORD[((-40))+r9]
+	movaps	xmm7,XMMWORD[((-24))+r9]
+	lea	rsp,[r9]
+$L$ssse3_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_ChaCha20_ssse3:
+
+ALIGN	32
+ChaCha20_4x:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ChaCha20_4x:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+$L$ChaCha20_4x:
+	mov	r9,rsp
+	mov	r11,r10
+	shr	r10,32
+	test	r10,32
+	jnz	NEAR $L$ChaCha20_8x
+	cmp	rdx,192
+	ja	NEAR $L$proceed4x
+
+	and	r11,71303168
+	cmp	r11,4194304
+	je	NEAR $L$do_sse3_after_all
+
+$L$proceed4x:
+	sub	rsp,0x140+168
+	movaps	XMMWORD[(-168)+r9],xmm6
+	movaps	XMMWORD[(-152)+r9],xmm7
+	movaps	XMMWORD[(-136)+r9],xmm8
+	movaps	XMMWORD[(-120)+r9],xmm9
+	movaps	XMMWORD[(-104)+r9],xmm10
+	movaps	XMMWORD[(-88)+r9],xmm11
+	movaps	XMMWORD[(-72)+r9],xmm12
+	movaps	XMMWORD[(-56)+r9],xmm13
+	movaps	XMMWORD[(-40)+r9],xmm14
+	movaps	XMMWORD[(-24)+r9],xmm15
+$L$4x_body:
+	movdqa	xmm11,XMMWORD[$L$sigma]
+	movdqu	xmm15,XMMWORD[rcx]
+	movdqu	xmm7,XMMWORD[16+rcx]
+	movdqu	xmm3,XMMWORD[r8]
+	lea	rcx,[256+rsp]
+	lea	r10,[$L$rot16]
+	lea	r11,[$L$rot24]
+
+	pshufd	xmm8,xmm11,0x00
+	pshufd	xmm9,xmm11,0x55
+	movdqa	XMMWORD[64+rsp],xmm8
+	pshufd	xmm10,xmm11,0xaa
+	movdqa	XMMWORD[80+rsp],xmm9
+	pshufd	xmm11,xmm11,0xff
+	movdqa	XMMWORD[96+rsp],xmm10
+	movdqa	XMMWORD[112+rsp],xmm11
+
+	pshufd	xmm12,xmm15,0x00
+	pshufd	xmm13,xmm15,0x55
+	movdqa	XMMWORD[(128-256)+rcx],xmm12
+	pshufd	xmm14,xmm15,0xaa
+	movdqa	XMMWORD[(144-256)+rcx],xmm13
+	pshufd	xmm15,xmm15,0xff
+	movdqa	XMMWORD[(160-256)+rcx],xmm14
+	movdqa	XMMWORD[(176-256)+rcx],xmm15
+
+	pshufd	xmm4,xmm7,0x00
+	pshufd	xmm5,xmm7,0x55
+	movdqa	XMMWORD[(192-256)+rcx],xmm4
+	pshufd	xmm6,xmm7,0xaa
+	movdqa	XMMWORD[(208-256)+rcx],xmm5
+	pshufd	xmm7,xmm7,0xff
+	movdqa	XMMWORD[(224-256)+rcx],xmm6
+	movdqa	XMMWORD[(240-256)+rcx],xmm7
+
+	pshufd	xmm0,xmm3,0x00
+	pshufd	xmm1,xmm3,0x55
+	paddd	xmm0,XMMWORD[$L$inc]
+	pshufd	xmm2,xmm3,0xaa
+	movdqa	XMMWORD[(272-256)+rcx],xmm1
+	pshufd	xmm3,xmm3,0xff
+	movdqa	XMMWORD[(288-256)+rcx],xmm2
+	movdqa	XMMWORD[(304-256)+rcx],xmm3
+
+	jmp	NEAR $L$oop_enter4x
+
+ALIGN	32
+$L$oop_outer4x:
+	movdqa	xmm8,XMMWORD[64+rsp]
+	movdqa	xmm9,XMMWORD[80+rsp]
+	movdqa	xmm10,XMMWORD[96+rsp]
+	movdqa	xmm11,XMMWORD[112+rsp]
+	movdqa	xmm12,XMMWORD[((128-256))+rcx]
+	movdqa	xmm13,XMMWORD[((144-256))+rcx]
+	movdqa	xmm14,XMMWORD[((160-256))+rcx]
+	movdqa	xmm15,XMMWORD[((176-256))+rcx]
+	movdqa	xmm4,XMMWORD[((192-256))+rcx]
+	movdqa	xmm5,XMMWORD[((208-256))+rcx]
+	movdqa	xmm6,XMMWORD[((224-256))+rcx]
+	movdqa	xmm7,XMMWORD[((240-256))+rcx]
+	movdqa	xmm0,XMMWORD[((256-256))+rcx]
+	movdqa	xmm1,XMMWORD[((272-256))+rcx]
+	movdqa	xmm2,XMMWORD[((288-256))+rcx]
+	movdqa	xmm3,XMMWORD[((304-256))+rcx]
+	paddd	xmm0,XMMWORD[$L$four]
+
+$L$oop_enter4x:
+	movdqa	XMMWORD[32+rsp],xmm6
+	movdqa	XMMWORD[48+rsp],xmm7
+	movdqa	xmm7,XMMWORD[r10]
+	mov	eax,10
+	movdqa	XMMWORD[(256-256)+rcx],xmm0
+	jmp	NEAR $L$oop4x
+
+ALIGN	32
+$L$oop4x:
+	paddd	xmm8,xmm12
+	paddd	xmm9,xmm13
+	pxor	xmm0,xmm8
+	pxor	xmm1,xmm9
+DB	102,15,56,0,199
+DB	102,15,56,0,207
+	paddd	xmm4,xmm0
+	paddd	xmm5,xmm1
+	pxor	xmm12,xmm4
+	pxor	xmm13,xmm5
+	movdqa	xmm6,xmm12
+	pslld	xmm12,12
+	psrld	xmm6,20
+	movdqa	xmm7,xmm13
+	pslld	xmm13,12
+	por	xmm12,xmm6
+	psrld	xmm7,20
+	movdqa	xmm6,XMMWORD[r11]
+	por	xmm13,xmm7
+	paddd	xmm8,xmm12
+	paddd	xmm9,xmm13
+	pxor	xmm0,xmm8
+	pxor	xmm1,xmm9
+DB	102,15,56,0,198
+DB	102,15,56,0,206
+	paddd	xmm4,xmm0
+	paddd	xmm5,xmm1
+	pxor	xmm12,xmm4
+	pxor	xmm13,xmm5
+	movdqa	xmm7,xmm12
+	pslld	xmm12,7
+	psrld	xmm7,25
+	movdqa	xmm6,xmm13
+	pslld	xmm13,7
+	por	xmm12,xmm7
+	psrld	xmm6,25
+	movdqa	xmm7,XMMWORD[r10]
+	por	xmm13,xmm6
+	movdqa	XMMWORD[rsp],xmm4
+	movdqa	XMMWORD[16+rsp],xmm5
+	movdqa	xmm4,XMMWORD[32+rsp]
+	movdqa	xmm5,XMMWORD[48+rsp]
+	paddd	xmm10,xmm14
+	paddd	xmm11,xmm15
+	pxor	xmm2,xmm10
+	pxor	xmm3,xmm11
+DB	102,15,56,0,215
+DB	102,15,56,0,223
+	paddd	xmm4,xmm2
+	paddd	xmm5,xmm3
+	pxor	xmm14,xmm4
+	pxor	xmm15,xmm5
+	movdqa	xmm6,xmm14
+	pslld	xmm14,12
+	psrld	xmm6,20
+	movdqa	xmm7,xmm15
+	pslld	xmm15,12
+	por	xmm14,xmm6
+	psrld	xmm7,20
+	movdqa	xmm6,XMMWORD[r11]
+	por	xmm15,xmm7
+	paddd	xmm10,xmm14
+	paddd	xmm11,xmm15
+	pxor	xmm2,xmm10
+	pxor	xmm3,xmm11
+DB	102,15,56,0,214
+DB	102,15,56,0,222
+	paddd	xmm4,xmm2
+	paddd	xmm5,xmm3
+	pxor	xmm14,xmm4
+	pxor	xmm15,xmm5
+	movdqa	xmm7,xmm14
+	pslld	xmm14,7
+	psrld	xmm7,25
+	movdqa	xmm6,xmm15
+	pslld	xmm15,7
+	por	xmm14,xmm7
+	psrld	xmm6,25
+	movdqa	xmm7,XMMWORD[r10]
+	por	xmm15,xmm6
+	paddd	xmm8,xmm13
+	paddd	xmm9,xmm14
+	pxor	xmm3,xmm8
+	pxor	xmm0,xmm9
+DB	102,15,56,0,223
+DB	102,15,56,0,199
+	paddd	xmm4,xmm3
+	paddd	xmm5,xmm0
+	pxor	xmm13,xmm4
+	pxor	xmm14,xmm5
+	movdqa	xmm6,xmm13
+	pslld	xmm13,12
+	psrld	xmm6,20
+	movdqa	xmm7,xmm14
+	pslld	xmm14,12
+	por	xmm13,xmm6
+	psrld	xmm7,20
+	movdqa	xmm6,XMMWORD[r11]
+	por	xmm14,xmm7
+	paddd	xmm8,xmm13
+	paddd	xmm9,xmm14
+	pxor	xmm3,xmm8
+	pxor	xmm0,xmm9
+DB	102,15,56,0,222
+DB	102,15,56,0,198
+	paddd	xmm4,xmm3
+	paddd	xmm5,xmm0
+	pxor	xmm13,xmm4
+	pxor	xmm14,xmm5
+	movdqa	xmm7,xmm13
+	pslld	xmm13,7
+	psrld	xmm7,25
+	movdqa	xmm6,xmm14
+	pslld	xmm14,7
+	por	xmm13,xmm7
+	psrld	xmm6,25
+	movdqa	xmm7,XMMWORD[r10]
+	por	xmm14,xmm6
+	movdqa	XMMWORD[32+rsp],xmm4
+	movdqa	XMMWORD[48+rsp],xmm5
+	movdqa	xmm4,XMMWORD[rsp]
+	movdqa	xmm5,XMMWORD[16+rsp]
+	paddd	xmm10,xmm15
+	paddd	xmm11,xmm12
+	pxor	xmm1,xmm10
+	pxor	xmm2,xmm11
+DB	102,15,56,0,207
+DB	102,15,56,0,215
+	paddd	xmm4,xmm1
+	paddd	xmm5,xmm2
+	pxor	xmm15,xmm4
+	pxor	xmm12,xmm5
+	movdqa	xmm6,xmm15
+	pslld	xmm15,12
+	psrld	xmm6,20
+	movdqa	xmm7,xmm12
+	pslld	xmm12,12
+	por	xmm15,xmm6
+	psrld	xmm7,20
+	movdqa	xmm6,XMMWORD[r11]
+	por	xmm12,xmm7
+	paddd	xmm10,xmm15
+	paddd	xmm11,xmm12
+	pxor	xmm1,xmm10
+	pxor	xmm2,xmm11
+DB	102,15,56,0,206
+DB	102,15,56,0,214
+	paddd	xmm4,xmm1
+	paddd	xmm5,xmm2
+	pxor	xmm15,xmm4
+	pxor	xmm12,xmm5
+	movdqa	xmm7,xmm15
+	pslld	xmm15,7
+	psrld	xmm7,25
+	movdqa	xmm6,xmm12
+	pslld	xmm12,7
+	por	xmm15,xmm7
+	psrld	xmm6,25
+	movdqa	xmm7,XMMWORD[r10]
+	por	xmm12,xmm6
+	dec	eax
+	jnz	NEAR $L$oop4x
+
+	paddd	xmm8,XMMWORD[64+rsp]
+	paddd	xmm9,XMMWORD[80+rsp]
+	paddd	xmm10,XMMWORD[96+rsp]
+	paddd	xmm11,XMMWORD[112+rsp]
+
+	movdqa	xmm6,xmm8
+	punpckldq	xmm8,xmm9
+	movdqa	xmm7,xmm10
+	punpckldq	xmm10,xmm11
+	punpckhdq	xmm6,xmm9
+	punpckhdq	xmm7,xmm11
+	movdqa	xmm9,xmm8
+	punpcklqdq	xmm8,xmm10
+	movdqa	xmm11,xmm6
+	punpcklqdq	xmm6,xmm7
+	punpckhqdq	xmm9,xmm10
+	punpckhqdq	xmm11,xmm7
+	paddd	xmm12,XMMWORD[((128-256))+rcx]
+	paddd	xmm13,XMMWORD[((144-256))+rcx]
+	paddd	xmm14,XMMWORD[((160-256))+rcx]
+	paddd	xmm15,XMMWORD[((176-256))+rcx]
+
+	movdqa	XMMWORD[rsp],xmm8
+	movdqa	XMMWORD[16+rsp],xmm9
+	movdqa	xmm8,XMMWORD[32+rsp]
+	movdqa	xmm9,XMMWORD[48+rsp]
+
+	movdqa	xmm10,xmm12
+	punpckldq	xmm12,xmm13
+	movdqa	xmm7,xmm14
+	punpckldq	xmm14,xmm15
+	punpckhdq	xmm10,xmm13
+	punpckhdq	xmm7,xmm15
+	movdqa	xmm13,xmm12
+	punpcklqdq	xmm12,xmm14
+	movdqa	xmm15,xmm10
+	punpcklqdq	xmm10,xmm7
+	punpckhqdq	xmm13,xmm14
+	punpckhqdq	xmm15,xmm7
+	paddd	xmm4,XMMWORD[((192-256))+rcx]
+	paddd	xmm5,XMMWORD[((208-256))+rcx]
+	paddd	xmm8,XMMWORD[((224-256))+rcx]
+	paddd	xmm9,XMMWORD[((240-256))+rcx]
+
+	movdqa	XMMWORD[32+rsp],xmm6
+	movdqa	XMMWORD[48+rsp],xmm11
+
+	movdqa	xmm14,xmm4
+	punpckldq	xmm4,xmm5
+	movdqa	xmm7,xmm8
+	punpckldq	xmm8,xmm9
+	punpckhdq	xmm14,xmm5
+	punpckhdq	xmm7,xmm9
+	movdqa	xmm5,xmm4
+	punpcklqdq	xmm4,xmm8
+	movdqa	xmm9,xmm14
+	punpcklqdq	xmm14,xmm7
+	punpckhqdq	xmm5,xmm8
+	punpckhqdq	xmm9,xmm7
+	paddd	xmm0,XMMWORD[((256-256))+rcx]
+	paddd	xmm1,XMMWORD[((272-256))+rcx]
+	paddd	xmm2,XMMWORD[((288-256))+rcx]
+	paddd	xmm3,XMMWORD[((304-256))+rcx]
+
+	movdqa	xmm8,xmm0
+	punpckldq	xmm0,xmm1
+	movdqa	xmm7,xmm2
+	punpckldq	xmm2,xmm3
+	punpckhdq	xmm8,xmm1
+	punpckhdq	xmm7,xmm3
+	movdqa	xmm1,xmm0
+	punpcklqdq	xmm0,xmm2
+	movdqa	xmm3,xmm8
+	punpcklqdq	xmm8,xmm7
+	punpckhqdq	xmm1,xmm2
+	punpckhqdq	xmm3,xmm7
+	cmp	rdx,64*4
+	jb	NEAR $L$tail4x
+
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[rsp]
+	pxor	xmm11,xmm12
+	pxor	xmm2,xmm4
+	pxor	xmm7,xmm0
+
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	xmm6,XMMWORD[64+rsi]
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	xmm11,XMMWORD[80+rsi]
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	xmm2,XMMWORD[96+rsi]
+	movdqu	XMMWORD[48+rdi],xmm7
+	movdqu	xmm7,XMMWORD[112+rsi]
+	lea	rsi,[128+rsi]
+	pxor	xmm6,XMMWORD[16+rsp]
+	pxor	xmm11,xmm13
+	pxor	xmm2,xmm5
+	pxor	xmm7,xmm1
+
+	movdqu	XMMWORD[64+rdi],xmm6
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	XMMWORD[80+rdi],xmm11
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	XMMWORD[96+rdi],xmm2
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	XMMWORD[112+rdi],xmm7
+	lea	rdi,[128+rdi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[32+rsp]
+	pxor	xmm11,xmm10
+	pxor	xmm2,xmm14
+	pxor	xmm7,xmm8
+
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	xmm6,XMMWORD[64+rsi]
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	xmm11,XMMWORD[80+rsi]
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	xmm2,XMMWORD[96+rsi]
+	movdqu	XMMWORD[48+rdi],xmm7
+	movdqu	xmm7,XMMWORD[112+rsi]
+	lea	rsi,[128+rsi]
+	pxor	xmm6,XMMWORD[48+rsp]
+	pxor	xmm11,xmm15
+	pxor	xmm2,xmm9
+	pxor	xmm7,xmm3
+	movdqu	XMMWORD[64+rdi],xmm6
+	movdqu	XMMWORD[80+rdi],xmm11
+	movdqu	XMMWORD[96+rdi],xmm2
+	movdqu	XMMWORD[112+rdi],xmm7
+	lea	rdi,[128+rdi]
+
+	sub	rdx,64*4
+	jnz	NEAR $L$oop_outer4x
+
+	jmp	NEAR $L$done4x
+
+$L$tail4x:
+	cmp	rdx,192
+	jae	NEAR $L$192_or_more4x
+	cmp	rdx,128
+	jae	NEAR $L$128_or_more4x
+	cmp	rdx,64
+	jae	NEAR $L$64_or_more4x
+
+
+	xor	r10,r10
+
+	movdqa	XMMWORD[16+rsp],xmm12
+	movdqa	XMMWORD[32+rsp],xmm4
+	movdqa	XMMWORD[48+rsp],xmm0
+	jmp	NEAR $L$oop_tail4x
+
+ALIGN	32
+$L$64_or_more4x:
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[rsp]
+	pxor	xmm11,xmm12
+	pxor	xmm2,xmm4
+	pxor	xmm7,xmm0
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm7
+	je	NEAR $L$done4x
+
+	movdqa	xmm6,XMMWORD[16+rsp]
+	lea	rsi,[64+rsi]
+	xor	r10,r10
+	movdqa	XMMWORD[rsp],xmm6
+	movdqa	XMMWORD[16+rsp],xmm13
+	lea	rdi,[64+rdi]
+	movdqa	XMMWORD[32+rsp],xmm5
+	sub	rdx,64
+	movdqa	XMMWORD[48+rsp],xmm1
+	jmp	NEAR $L$oop_tail4x
+
+ALIGN	32
+$L$128_or_more4x:
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[rsp]
+	pxor	xmm11,xmm12
+	pxor	xmm2,xmm4
+	pxor	xmm7,xmm0
+
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	xmm6,XMMWORD[64+rsi]
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	xmm11,XMMWORD[80+rsi]
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	xmm2,XMMWORD[96+rsi]
+	movdqu	XMMWORD[48+rdi],xmm7
+	movdqu	xmm7,XMMWORD[112+rsi]
+	pxor	xmm6,XMMWORD[16+rsp]
+	pxor	xmm11,xmm13
+	pxor	xmm2,xmm5
+	pxor	xmm7,xmm1
+	movdqu	XMMWORD[64+rdi],xmm6
+	movdqu	XMMWORD[80+rdi],xmm11
+	movdqu	XMMWORD[96+rdi],xmm2
+	movdqu	XMMWORD[112+rdi],xmm7
+	je	NEAR $L$done4x
+
+	movdqa	xmm6,XMMWORD[32+rsp]
+	lea	rsi,[128+rsi]
+	xor	r10,r10
+	movdqa	XMMWORD[rsp],xmm6
+	movdqa	XMMWORD[16+rsp],xmm10
+	lea	rdi,[128+rdi]
+	movdqa	XMMWORD[32+rsp],xmm14
+	sub	rdx,128
+	movdqa	XMMWORD[48+rsp],xmm8
+	jmp	NEAR $L$oop_tail4x
+
+ALIGN	32
+$L$192_or_more4x:
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[rsp]
+	pxor	xmm11,xmm12
+	pxor	xmm2,xmm4
+	pxor	xmm7,xmm0
+
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	xmm6,XMMWORD[64+rsi]
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	xmm11,XMMWORD[80+rsi]
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	xmm2,XMMWORD[96+rsi]
+	movdqu	XMMWORD[48+rdi],xmm7
+	movdqu	xmm7,XMMWORD[112+rsi]
+	lea	rsi,[128+rsi]
+	pxor	xmm6,XMMWORD[16+rsp]
+	pxor	xmm11,xmm13
+	pxor	xmm2,xmm5
+	pxor	xmm7,xmm1
+
+	movdqu	XMMWORD[64+rdi],xmm6
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	XMMWORD[80+rdi],xmm11
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	XMMWORD[96+rdi],xmm2
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	XMMWORD[112+rdi],xmm7
+	lea	rdi,[128+rdi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[32+rsp]
+	pxor	xmm11,xmm10
+	pxor	xmm2,xmm14
+	pxor	xmm7,xmm8
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm7
+	je	NEAR $L$done4x
+
+	movdqa	xmm6,XMMWORD[48+rsp]
+	lea	rsi,[64+rsi]
+	xor	r10,r10
+	movdqa	XMMWORD[rsp],xmm6
+	movdqa	XMMWORD[16+rsp],xmm15
+	lea	rdi,[64+rdi]
+	movdqa	XMMWORD[32+rsp],xmm9
+	sub	rdx,192
+	movdqa	XMMWORD[48+rsp],xmm3
+
+$L$oop_tail4x:
+	movzx	eax,BYTE[r10*1+rsi]
+	movzx	ecx,BYTE[r10*1+rsp]
+	lea	r10,[1+r10]
+	xor	eax,ecx
+	mov	BYTE[((-1))+r10*1+rdi],al
+	dec	rdx
+	jnz	NEAR $L$oop_tail4x
+
+$L$done4x:
+	movaps	xmm6,XMMWORD[((-168))+r9]
+	movaps	xmm7,XMMWORD[((-152))+r9]
+	movaps	xmm8,XMMWORD[((-136))+r9]
+	movaps	xmm9,XMMWORD[((-120))+r9]
+	movaps	xmm10,XMMWORD[((-104))+r9]
+	movaps	xmm11,XMMWORD[((-88))+r9]
+	movaps	xmm12,XMMWORD[((-72))+r9]
+	movaps	xmm13,XMMWORD[((-56))+r9]
+	movaps	xmm14,XMMWORD[((-40))+r9]
+	movaps	xmm15,XMMWORD[((-24))+r9]
+	lea	rsp,[r9]
+$L$4x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_ChaCha20_4x:
+
+ALIGN	32
+ChaCha20_8x:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ChaCha20_8x:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+$L$ChaCha20_8x:
+	mov	r9,rsp
+	sub	rsp,0x280+168
+	and	rsp,-32
+	movaps	XMMWORD[(-168)+r9],xmm6
+	movaps	XMMWORD[(-152)+r9],xmm7
+	movaps	XMMWORD[(-136)+r9],xmm8
+	movaps	XMMWORD[(-120)+r9],xmm9
+	movaps	XMMWORD[(-104)+r9],xmm10
+	movaps	XMMWORD[(-88)+r9],xmm11
+	movaps	XMMWORD[(-72)+r9],xmm12
+	movaps	XMMWORD[(-56)+r9],xmm13
+	movaps	XMMWORD[(-40)+r9],xmm14
+	movaps	XMMWORD[(-24)+r9],xmm15
+$L$8x_body:
+	vzeroupper
+
+
+
+
+
+
+
+
+
+
+	vbroadcasti128	ymm11,XMMWORD[$L$sigma]
+	vbroadcasti128	ymm3,XMMWORD[rcx]
+	vbroadcasti128	ymm15,XMMWORD[16+rcx]
+	vbroadcasti128	ymm7,XMMWORD[r8]
+	lea	rcx,[256+rsp]
+	lea	rax,[512+rsp]
+	lea	r10,[$L$rot16]
+	lea	r11,[$L$rot24]
+
+	vpshufd	ymm8,ymm11,0x00
+	vpshufd	ymm9,ymm11,0x55
+	vmovdqa	YMMWORD[(128-256)+rcx],ymm8
+	vpshufd	ymm10,ymm11,0xaa
+	vmovdqa	YMMWORD[(160-256)+rcx],ymm9
+	vpshufd	ymm11,ymm11,0xff
+	vmovdqa	YMMWORD[(192-256)+rcx],ymm10
+	vmovdqa	YMMWORD[(224-256)+rcx],ymm11
+
+	vpshufd	ymm0,ymm3,0x00
+	vpshufd	ymm1,ymm3,0x55
+	vmovdqa	YMMWORD[(256-256)+rcx],ymm0
+	vpshufd	ymm2,ymm3,0xaa
+	vmovdqa	YMMWORD[(288-256)+rcx],ymm1
+	vpshufd	ymm3,ymm3,0xff
+	vmovdqa	YMMWORD[(320-256)+rcx],ymm2
+	vmovdqa	YMMWORD[(352-256)+rcx],ymm3
+
+	vpshufd	ymm12,ymm15,0x00
+	vpshufd	ymm13,ymm15,0x55
+	vmovdqa	YMMWORD[(384-512)+rax],ymm12
+	vpshufd	ymm14,ymm15,0xaa
+	vmovdqa	YMMWORD[(416-512)+rax],ymm13
+	vpshufd	ymm15,ymm15,0xff
+	vmovdqa	YMMWORD[(448-512)+rax],ymm14
+	vmovdqa	YMMWORD[(480-512)+rax],ymm15
+
+	vpshufd	ymm4,ymm7,0x00
+	vpshufd	ymm5,ymm7,0x55
+	vpaddd	ymm4,ymm4,YMMWORD[$L$incy]
+	vpshufd	ymm6,ymm7,0xaa
+	vmovdqa	YMMWORD[(544-512)+rax],ymm5
+	vpshufd	ymm7,ymm7,0xff
+	vmovdqa	YMMWORD[(576-512)+rax],ymm6
+	vmovdqa	YMMWORD[(608-512)+rax],ymm7
+
+	jmp	NEAR $L$oop_enter8x
+
+ALIGN	32
+$L$oop_outer8x:
+	vmovdqa	ymm8,YMMWORD[((128-256))+rcx]
+	vmovdqa	ymm9,YMMWORD[((160-256))+rcx]
+	vmovdqa	ymm10,YMMWORD[((192-256))+rcx]
+	vmovdqa	ymm11,YMMWORD[((224-256))+rcx]
+	vmovdqa	ymm0,YMMWORD[((256-256))+rcx]
+	vmovdqa	ymm1,YMMWORD[((288-256))+rcx]
+	vmovdqa	ymm2,YMMWORD[((320-256))+rcx]
+	vmovdqa	ymm3,YMMWORD[((352-256))+rcx]
+	vmovdqa	ymm12,YMMWORD[((384-512))+rax]
+	vmovdqa	ymm13,YMMWORD[((416-512))+rax]
+	vmovdqa	ymm14,YMMWORD[((448-512))+rax]
+	vmovdqa	ymm15,YMMWORD[((480-512))+rax]
+	vmovdqa	ymm4,YMMWORD[((512-512))+rax]
+	vmovdqa	ymm5,YMMWORD[((544-512))+rax]
+	vmovdqa	ymm6,YMMWORD[((576-512))+rax]
+	vmovdqa	ymm7,YMMWORD[((608-512))+rax]
+	vpaddd	ymm4,ymm4,YMMWORD[$L$eight]
+
+$L$oop_enter8x:
+	vmovdqa	YMMWORD[64+rsp],ymm14
+	vmovdqa	YMMWORD[96+rsp],ymm15
+	vbroadcasti128	ymm15,XMMWORD[r10]
+	vmovdqa	YMMWORD[(512-512)+rax],ymm4
+	mov	eax,10
+	jmp	NEAR $L$oop8x
+
+ALIGN	32
+$L$oop8x:
+	vpaddd	ymm8,ymm8,ymm0
+	vpxor	ymm4,ymm8,ymm4
+	vpshufb	ymm4,ymm4,ymm15
+	vpaddd	ymm9,ymm9,ymm1
+	vpxor	ymm5,ymm9,ymm5
+	vpshufb	ymm5,ymm5,ymm15
+	vpaddd	ymm12,ymm12,ymm4
+	vpxor	ymm0,ymm12,ymm0
+	vpslld	ymm14,ymm0,12
+	vpsrld	ymm0,ymm0,20
+	vpor	ymm0,ymm14,ymm0
+	vbroadcasti128	ymm14,XMMWORD[r11]
+	vpaddd	ymm13,ymm13,ymm5
+	vpxor	ymm1,ymm13,ymm1
+	vpslld	ymm15,ymm1,12
+	vpsrld	ymm1,ymm1,20
+	vpor	ymm1,ymm15,ymm1
+	vpaddd	ymm8,ymm8,ymm0
+	vpxor	ymm4,ymm8,ymm4
+	vpshufb	ymm4,ymm4,ymm14
+	vpaddd	ymm9,ymm9,ymm1
+	vpxor	ymm5,ymm9,ymm5
+	vpshufb	ymm5,ymm5,ymm14
+	vpaddd	ymm12,ymm12,ymm4
+	vpxor	ymm0,ymm12,ymm0
+	vpslld	ymm15,ymm0,7
+	vpsrld	ymm0,ymm0,25
+	vpor	ymm0,ymm15,ymm0
+	vbroadcasti128	ymm15,XMMWORD[r10]
+	vpaddd	ymm13,ymm13,ymm5
+	vpxor	ymm1,ymm13,ymm1
+	vpslld	ymm14,ymm1,7
+	vpsrld	ymm1,ymm1,25
+	vpor	ymm1,ymm14,ymm1
+	vmovdqa	YMMWORD[rsp],ymm12
+	vmovdqa	YMMWORD[32+rsp],ymm13
+	vmovdqa	ymm12,YMMWORD[64+rsp]
+	vmovdqa	ymm13,YMMWORD[96+rsp]
+	vpaddd	ymm10,ymm10,ymm2
+	vpxor	ymm6,ymm10,ymm6
+	vpshufb	ymm6,ymm6,ymm15
+	vpaddd	ymm11,ymm11,ymm3
+	vpxor	ymm7,ymm11,ymm7
+	vpshufb	ymm7,ymm7,ymm15
+	vpaddd	ymm12,ymm12,ymm6
+	vpxor	ymm2,ymm12,ymm2
+	vpslld	ymm14,ymm2,12
+	vpsrld	ymm2,ymm2,20
+	vpor	ymm2,ymm14,ymm2
+	vbroadcasti128	ymm14,XMMWORD[r11]
+	vpaddd	ymm13,ymm13,ymm7
+	vpxor	ymm3,ymm13,ymm3
+	vpslld	ymm15,ymm3,12
+	vpsrld	ymm3,ymm3,20
+	vpor	ymm3,ymm15,ymm3
+	vpaddd	ymm10,ymm10,ymm2
+	vpxor	ymm6,ymm10,ymm6
+	vpshufb	ymm6,ymm6,ymm14
+	vpaddd	ymm11,ymm11,ymm3
+	vpxor	ymm7,ymm11,ymm7
+	vpshufb	ymm7,ymm7,ymm14
+	vpaddd	ymm12,ymm12,ymm6
+	vpxor	ymm2,ymm12,ymm2
+	vpslld	ymm15,ymm2,7
+	vpsrld	ymm2,ymm2,25
+	vpor	ymm2,ymm15,ymm2
+	vbroadcasti128	ymm15,XMMWORD[r10]
+	vpaddd	ymm13,ymm13,ymm7
+	vpxor	ymm3,ymm13,ymm3
+	vpslld	ymm14,ymm3,7
+	vpsrld	ymm3,ymm3,25
+	vpor	ymm3,ymm14,ymm3
+	vpaddd	ymm8,ymm8,ymm1
+	vpxor	ymm7,ymm8,ymm7
+	vpshufb	ymm7,ymm7,ymm15
+	vpaddd	ymm9,ymm9,ymm2
+	vpxor	ymm4,ymm9,ymm4
+	vpshufb	ymm4,ymm4,ymm15
+	vpaddd	ymm12,ymm12,ymm7
+	vpxor	ymm1,ymm12,ymm1
+	vpslld	ymm14,ymm1,12
+	vpsrld	ymm1,ymm1,20
+	vpor	ymm1,ymm14,ymm1
+	vbroadcasti128	ymm14,XMMWORD[r11]
+	vpaddd	ymm13,ymm13,ymm4
+	vpxor	ymm2,ymm13,ymm2
+	vpslld	ymm15,ymm2,12
+	vpsrld	ymm2,ymm2,20
+	vpor	ymm2,ymm15,ymm2
+	vpaddd	ymm8,ymm8,ymm1
+	vpxor	ymm7,ymm8,ymm7
+	vpshufb	ymm7,ymm7,ymm14
+	vpaddd	ymm9,ymm9,ymm2
+	vpxor	ymm4,ymm9,ymm4
+	vpshufb	ymm4,ymm4,ymm14
+	vpaddd	ymm12,ymm12,ymm7
+	vpxor	ymm1,ymm12,ymm1
+	vpslld	ymm15,ymm1,7
+	vpsrld	ymm1,ymm1,25
+	vpor	ymm1,ymm15,ymm1
+	vbroadcasti128	ymm15,XMMWORD[r10]
+	vpaddd	ymm13,ymm13,ymm4
+	vpxor	ymm2,ymm13,ymm2
+	vpslld	ymm14,ymm2,7
+	vpsrld	ymm2,ymm2,25
+	vpor	ymm2,ymm14,ymm2
+	vmovdqa	YMMWORD[64+rsp],ymm12
+	vmovdqa	YMMWORD[96+rsp],ymm13
+	vmovdqa	ymm12,YMMWORD[rsp]
+	vmovdqa	ymm13,YMMWORD[32+rsp]
+	vpaddd	ymm10,ymm10,ymm3
+	vpxor	ymm5,ymm10,ymm5
+	vpshufb	ymm5,ymm5,ymm15
+	vpaddd	ymm11,ymm11,ymm0
+	vpxor	ymm6,ymm11,ymm6
+	vpshufb	ymm6,ymm6,ymm15
+	vpaddd	ymm12,ymm12,ymm5
+	vpxor	ymm3,ymm12,ymm3
+	vpslld	ymm14,ymm3,12
+	vpsrld	ymm3,ymm3,20
+	vpor	ymm3,ymm14,ymm3
+	vbroadcasti128	ymm14,XMMWORD[r11]
+	vpaddd	ymm13,ymm13,ymm6
+	vpxor	ymm0,ymm13,ymm0
+	vpslld	ymm15,ymm0,12
+	vpsrld	ymm0,ymm0,20
+	vpor	ymm0,ymm15,ymm0
+	vpaddd	ymm10,ymm10,ymm3
+	vpxor	ymm5,ymm10,ymm5
+	vpshufb	ymm5,ymm5,ymm14
+	vpaddd	ymm11,ymm11,ymm0
+	vpxor	ymm6,ymm11,ymm6
+	vpshufb	ymm6,ymm6,ymm14
+	vpaddd	ymm12,ymm12,ymm5
+	vpxor	ymm3,ymm12,ymm3
+	vpslld	ymm15,ymm3,7
+	vpsrld	ymm3,ymm3,25
+	vpor	ymm3,ymm15,ymm3
+	vbroadcasti128	ymm15,XMMWORD[r10]
+	vpaddd	ymm13,ymm13,ymm6
+	vpxor	ymm0,ymm13,ymm0
+	vpslld	ymm14,ymm0,7
+	vpsrld	ymm0,ymm0,25
+	vpor	ymm0,ymm14,ymm0
+	dec	eax
+	jnz	NEAR $L$oop8x
+
+	lea	rax,[512+rsp]
+	vpaddd	ymm8,ymm8,YMMWORD[((128-256))+rcx]
+	vpaddd	ymm9,ymm9,YMMWORD[((160-256))+rcx]
+	vpaddd	ymm10,ymm10,YMMWORD[((192-256))+rcx]
+	vpaddd	ymm11,ymm11,YMMWORD[((224-256))+rcx]
+
+	vpunpckldq	ymm14,ymm8,ymm9
+	vpunpckldq	ymm15,ymm10,ymm11
+	vpunpckhdq	ymm8,ymm8,ymm9
+	vpunpckhdq	ymm10,ymm10,ymm11
+	vpunpcklqdq	ymm9,ymm14,ymm15
+	vpunpckhqdq	ymm14,ymm14,ymm15
+	vpunpcklqdq	ymm11,ymm8,ymm10
+	vpunpckhqdq	ymm8,ymm8,ymm10
+	vpaddd	ymm0,ymm0,YMMWORD[((256-256))+rcx]
+	vpaddd	ymm1,ymm1,YMMWORD[((288-256))+rcx]
+	vpaddd	ymm2,ymm2,YMMWORD[((320-256))+rcx]
+	vpaddd	ymm3,ymm3,YMMWORD[((352-256))+rcx]
+
+	vpunpckldq	ymm10,ymm0,ymm1
+	vpunpckldq	ymm15,ymm2,ymm3
+	vpunpckhdq	ymm0,ymm0,ymm1
+	vpunpckhdq	ymm2,ymm2,ymm3
+	vpunpcklqdq	ymm1,ymm10,ymm15
+	vpunpckhqdq	ymm10,ymm10,ymm15
+	vpunpcklqdq	ymm3,ymm0,ymm2
+	vpunpckhqdq	ymm0,ymm0,ymm2
+	vperm2i128	ymm15,ymm9,ymm1,0x20
+	vperm2i128	ymm1,ymm9,ymm1,0x31
+	vperm2i128	ymm9,ymm14,ymm10,0x20
+	vperm2i128	ymm10,ymm14,ymm10,0x31
+	vperm2i128	ymm14,ymm11,ymm3,0x20
+	vperm2i128	ymm3,ymm11,ymm3,0x31
+	vperm2i128	ymm11,ymm8,ymm0,0x20
+	vperm2i128	ymm0,ymm8,ymm0,0x31
+	vmovdqa	YMMWORD[rsp],ymm15
+	vmovdqa	YMMWORD[32+rsp],ymm9
+	vmovdqa	ymm15,YMMWORD[64+rsp]
+	vmovdqa	ymm9,YMMWORD[96+rsp]
+
+	vpaddd	ymm12,ymm12,YMMWORD[((384-512))+rax]
+	vpaddd	ymm13,ymm13,YMMWORD[((416-512))+rax]
+	vpaddd	ymm15,ymm15,YMMWORD[((448-512))+rax]
+	vpaddd	ymm9,ymm9,YMMWORD[((480-512))+rax]
+
+	vpunpckldq	ymm2,ymm12,ymm13
+	vpunpckldq	ymm8,ymm15,ymm9
+	vpunpckhdq	ymm12,ymm12,ymm13
+	vpunpckhdq	ymm15,ymm15,ymm9
+	vpunpcklqdq	ymm13,ymm2,ymm8
+	vpunpckhqdq	ymm2,ymm2,ymm8
+	vpunpcklqdq	ymm9,ymm12,ymm15
+	vpunpckhqdq	ymm12,ymm12,ymm15
+	vpaddd	ymm4,ymm4,YMMWORD[((512-512))+rax]
+	vpaddd	ymm5,ymm5,YMMWORD[((544-512))+rax]
+	vpaddd	ymm6,ymm6,YMMWORD[((576-512))+rax]
+	vpaddd	ymm7,ymm7,YMMWORD[((608-512))+rax]
+
+	vpunpckldq	ymm15,ymm4,ymm5
+	vpunpckldq	ymm8,ymm6,ymm7
+	vpunpckhdq	ymm4,ymm4,ymm5
+	vpunpckhdq	ymm6,ymm6,ymm7
+	vpunpcklqdq	ymm5,ymm15,ymm8
+	vpunpckhqdq	ymm15,ymm15,ymm8
+	vpunpcklqdq	ymm7,ymm4,ymm6
+	vpunpckhqdq	ymm4,ymm4,ymm6
+	vperm2i128	ymm8,ymm13,ymm5,0x20
+	vperm2i128	ymm5,ymm13,ymm5,0x31
+	vperm2i128	ymm13,ymm2,ymm15,0x20
+	vperm2i128	ymm15,ymm2,ymm15,0x31
+	vperm2i128	ymm2,ymm9,ymm7,0x20
+	vperm2i128	ymm7,ymm9,ymm7,0x31
+	vperm2i128	ymm9,ymm12,ymm4,0x20
+	vperm2i128	ymm4,ymm12,ymm4,0x31
+	vmovdqa	ymm6,YMMWORD[rsp]
+	vmovdqa	ymm12,YMMWORD[32+rsp]
+
+	cmp	rdx,64*8
+	jb	NEAR $L$tail8x
+
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	lea	rsi,[128+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	lea	rdi,[128+rdi]
+
+	vpxor	ymm12,ymm12,YMMWORD[rsi]
+	vpxor	ymm13,ymm13,YMMWORD[32+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[64+rsi]
+	vpxor	ymm15,ymm15,YMMWORD[96+rsi]
+	lea	rsi,[128+rsi]
+	vmovdqu	YMMWORD[rdi],ymm12
+	vmovdqu	YMMWORD[32+rdi],ymm13
+	vmovdqu	YMMWORD[64+rdi],ymm10
+	vmovdqu	YMMWORD[96+rdi],ymm15
+	lea	rdi,[128+rdi]
+
+	vpxor	ymm14,ymm14,YMMWORD[rsi]
+	vpxor	ymm2,ymm2,YMMWORD[32+rsi]
+	vpxor	ymm3,ymm3,YMMWORD[64+rsi]
+	vpxor	ymm7,ymm7,YMMWORD[96+rsi]
+	lea	rsi,[128+rsi]
+	vmovdqu	YMMWORD[rdi],ymm14
+	vmovdqu	YMMWORD[32+rdi],ymm2
+	vmovdqu	YMMWORD[64+rdi],ymm3
+	vmovdqu	YMMWORD[96+rdi],ymm7
+	lea	rdi,[128+rdi]
+
+	vpxor	ymm11,ymm11,YMMWORD[rsi]
+	vpxor	ymm9,ymm9,YMMWORD[32+rsi]
+	vpxor	ymm0,ymm0,YMMWORD[64+rsi]
+	vpxor	ymm4,ymm4,YMMWORD[96+rsi]
+	lea	rsi,[128+rsi]
+	vmovdqu	YMMWORD[rdi],ymm11
+	vmovdqu	YMMWORD[32+rdi],ymm9
+	vmovdqu	YMMWORD[64+rdi],ymm0
+	vmovdqu	YMMWORD[96+rdi],ymm4
+	lea	rdi,[128+rdi]
+
+	sub	rdx,64*8
+	jnz	NEAR $L$oop_outer8x
+
+	jmp	NEAR $L$done8x
+
+$L$tail8x:
+	cmp	rdx,448
+	jae	NEAR $L$448_or_more8x
+	cmp	rdx,384
+	jae	NEAR $L$384_or_more8x
+	cmp	rdx,320
+	jae	NEAR $L$320_or_more8x
+	cmp	rdx,256
+	jae	NEAR $L$256_or_more8x
+	cmp	rdx,192
+	jae	NEAR $L$192_or_more8x
+	cmp	rdx,128
+	jae	NEAR $L$128_or_more8x
+	cmp	rdx,64
+	jae	NEAR $L$64_or_more8x
+
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm6
+	vmovdqa	YMMWORD[32+rsp],ymm8
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$64_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	je	NEAR $L$done8x
+
+	lea	rsi,[64+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm1
+	lea	rdi,[64+rdi]
+	sub	rdx,64
+	vmovdqa	YMMWORD[32+rsp],ymm5
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$128_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	je	NEAR $L$done8x
+
+	lea	rsi,[128+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm12
+	lea	rdi,[128+rdi]
+	sub	rdx,128
+	vmovdqa	YMMWORD[32+rsp],ymm13
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$192_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[128+rsi]
+	vpxor	ymm13,ymm13,YMMWORD[160+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	vmovdqu	YMMWORD[128+rdi],ymm12
+	vmovdqu	YMMWORD[160+rdi],ymm13
+	je	NEAR $L$done8x
+
+	lea	rsi,[192+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm10
+	lea	rdi,[192+rdi]
+	sub	rdx,192
+	vmovdqa	YMMWORD[32+rsp],ymm15
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$256_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[128+rsi]
+	vpxor	ymm13,ymm13,YMMWORD[160+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[192+rsi]
+	vpxor	ymm15,ymm15,YMMWORD[224+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	vmovdqu	YMMWORD[128+rdi],ymm12
+	vmovdqu	YMMWORD[160+rdi],ymm13
+	vmovdqu	YMMWORD[192+rdi],ymm10
+	vmovdqu	YMMWORD[224+rdi],ymm15
+	je	NEAR $L$done8x
+
+	lea	rsi,[256+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm14
+	lea	rdi,[256+rdi]
+	sub	rdx,256
+	vmovdqa	YMMWORD[32+rsp],ymm2
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$320_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[128+rsi]
+	vpxor	ymm13,ymm13,YMMWORD[160+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[192+rsi]
+	vpxor	ymm15,ymm15,YMMWORD[224+rsi]
+	vpxor	ymm14,ymm14,YMMWORD[256+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[288+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	vmovdqu	YMMWORD[128+rdi],ymm12
+	vmovdqu	YMMWORD[160+rdi],ymm13
+	vmovdqu	YMMWORD[192+rdi],ymm10
+	vmovdqu	YMMWORD[224+rdi],ymm15
+	vmovdqu	YMMWORD[256+rdi],ymm14
+	vmovdqu	YMMWORD[288+rdi],ymm2
+	je	NEAR $L$done8x
+
+	lea	rsi,[320+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm3
+	lea	rdi,[320+rdi]
+	sub	rdx,320
+	vmovdqa	YMMWORD[32+rsp],ymm7
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$384_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[128+rsi]
+	vpxor	ymm13,ymm13,YMMWORD[160+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[192+rsi]
+	vpxor	ymm15,ymm15,YMMWORD[224+rsi]
+	vpxor	ymm14,ymm14,YMMWORD[256+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[288+rsi]
+	vpxor	ymm3,ymm3,YMMWORD[320+rsi]
+	vpxor	ymm7,ymm7,YMMWORD[352+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	vmovdqu	YMMWORD[128+rdi],ymm12
+	vmovdqu	YMMWORD[160+rdi],ymm13
+	vmovdqu	YMMWORD[192+rdi],ymm10
+	vmovdqu	YMMWORD[224+rdi],ymm15
+	vmovdqu	YMMWORD[256+rdi],ymm14
+	vmovdqu	YMMWORD[288+rdi],ymm2
+	vmovdqu	YMMWORD[320+rdi],ymm3
+	vmovdqu	YMMWORD[352+rdi],ymm7
+	je	NEAR $L$done8x
+
+	lea	rsi,[384+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm11
+	lea	rdi,[384+rdi]
+	sub	rdx,384
+	vmovdqa	YMMWORD[32+rsp],ymm9
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$448_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[128+rsi]
+	vpxor	ymm13,ymm13,YMMWORD[160+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[192+rsi]
+	vpxor	ymm15,ymm15,YMMWORD[224+rsi]
+	vpxor	ymm14,ymm14,YMMWORD[256+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[288+rsi]
+	vpxor	ymm3,ymm3,YMMWORD[320+rsi]
+	vpxor	ymm7,ymm7,YMMWORD[352+rsi]
+	vpxor	ymm11,ymm11,YMMWORD[384+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[416+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	vmovdqu	YMMWORD[128+rdi],ymm12
+	vmovdqu	YMMWORD[160+rdi],ymm13
+	vmovdqu	YMMWORD[192+rdi],ymm10
+	vmovdqu	YMMWORD[224+rdi],ymm15
+	vmovdqu	YMMWORD[256+rdi],ymm14
+	vmovdqu	YMMWORD[288+rdi],ymm2
+	vmovdqu	YMMWORD[320+rdi],ymm3
+	vmovdqu	YMMWORD[352+rdi],ymm7
+	vmovdqu	YMMWORD[384+rdi],ymm11
+	vmovdqu	YMMWORD[416+rdi],ymm9
+	je	NEAR $L$done8x
+
+	lea	rsi,[448+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm0
+	lea	rdi,[448+rdi]
+	sub	rdx,448
+	vmovdqa	YMMWORD[32+rsp],ymm4
+
+$L$oop_tail8x:
+	movzx	eax,BYTE[r10*1+rsi]
+	movzx	ecx,BYTE[r10*1+rsp]
+	lea	r10,[1+r10]
+	xor	eax,ecx
+	mov	BYTE[((-1))+r10*1+rdi],al
+	dec	rdx
+	jnz	NEAR $L$oop_tail8x
+
+$L$done8x:
+	vzeroall
+	movaps	xmm6,XMMWORD[((-168))+r9]
+	movaps	xmm7,XMMWORD[((-152))+r9]
+	movaps	xmm8,XMMWORD[((-136))+r9]
+	movaps	xmm9,XMMWORD[((-120))+r9]
+	movaps	xmm10,XMMWORD[((-104))+r9]
+	movaps	xmm11,XMMWORD[((-88))+r9]
+	movaps	xmm12,XMMWORD[((-72))+r9]
+	movaps	xmm13,XMMWORD[((-56))+r9]
+	movaps	xmm14,XMMWORD[((-40))+r9]
+	movaps	xmm15,XMMWORD[((-24))+r9]
+	lea	rsp,[r9]
+$L$8x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_ChaCha20_8x:
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	lea	r10,[$L$ctr32_body]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	lea	r10,[$L$no_data]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rax,[((64+24+48))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+ssse3_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[192+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[((-40))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,4
+	DD	0xa548f3fc
+
+	jmp	NEAR $L$common_seh_tail
+
+
+
+ALIGN	16
+full_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[192+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[((-168))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+
+	jmp	NEAR $L$common_seh_tail
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_ChaCha20_ctr32 wrt ..imagebase
+	DD	$L$SEH_end_ChaCha20_ctr32 wrt ..imagebase
+	DD	$L$SEH_info_ChaCha20_ctr32 wrt ..imagebase
+
+	DD	$L$SEH_begin_ChaCha20_ssse3 wrt ..imagebase
+	DD	$L$SEH_end_ChaCha20_ssse3 wrt ..imagebase
+	DD	$L$SEH_info_ChaCha20_ssse3 wrt ..imagebase
+
+	DD	$L$SEH_begin_ChaCha20_4x wrt ..imagebase
+	DD	$L$SEH_end_ChaCha20_4x wrt ..imagebase
+	DD	$L$SEH_info_ChaCha20_4x wrt ..imagebase
+	DD	$L$SEH_begin_ChaCha20_8x wrt ..imagebase
+	DD	$L$SEH_end_ChaCha20_8x wrt ..imagebase
+	DD	$L$SEH_info_ChaCha20_8x wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_ChaCha20_ctr32:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+
+$L$SEH_info_ChaCha20_ssse3:
+DB	9,0,0,0
+	DD	ssse3_handler wrt ..imagebase
+	DD	$L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase
+
+$L$SEH_info_ChaCha20_4x:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase
+$L$SEH_info_ChaCha20_8x:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase
diff --git a/third_party/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
new file mode 100644
index 0000000..56dc206
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
@@ -0,0 +1,3270 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.data data align=8
+
+
+ALIGN	16
+one:
+	DQ	1,0
+two:
+	DQ	2,0
+three:
+	DQ	3,0
+four:
+	DQ	4,0
+five:
+	DQ	5,0
+six:
+	DQ	6,0
+seven:
+	DQ	7,0
+eight:
+	DQ	8,0
+
+OR_MASK:
+	DD	0x00000000,0x00000000,0x00000000,0x80000000
+poly:
+	DQ	0x1,0xc200000000000000
+mask:
+	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+con1:
+	DD	1,1,1,1
+con2:
+	DD	0x1b,0x1b,0x1b,0x1b
+con3:
+DB	-1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7
+and_mask:
+	DD	0,0xffffffff,0xffffffff,0xffffffff
+section	.text code align=64
+
+
+ALIGN	16
+GFMUL:
+
+	vpclmulqdq	xmm2,xmm0,xmm1,0x00
+	vpclmulqdq	xmm5,xmm0,xmm1,0x11
+	vpclmulqdq	xmm3,xmm0,xmm1,0x10
+	vpclmulqdq	xmm4,xmm0,xmm1,0x01
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm3,8
+	vpsrldq	xmm3,xmm3,8
+	vpxor	xmm2,xmm2,xmm4
+	vpxor	xmm5,xmm5,xmm3
+
+	vpclmulqdq	xmm3,xmm2,XMMWORD[poly],0x10
+	vpshufd	xmm4,xmm2,78
+	vpxor	xmm2,xmm3,xmm4
+
+	vpclmulqdq	xmm3,xmm2,XMMWORD[poly],0x10
+	vpshufd	xmm4,xmm2,78
+	vpxor	xmm2,xmm3,xmm4
+
+	vpxor	xmm0,xmm2,xmm5
+	DB	0F3h,0C3h		;repret
+
+
+global	aesgcmsiv_htable_init
+
+ALIGN	16
+aesgcmsiv_htable_init:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesgcmsiv_htable_init:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	vmovdqa	xmm0,XMMWORD[rsi]
+	vmovdqa	xmm1,xmm0
+	vmovdqa	XMMWORD[rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[16+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[32+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[48+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[64+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[80+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[96+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[112+rdi],xmm0
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesgcmsiv_htable_init:
+global	aesgcmsiv_htable6_init
+
+ALIGN	16
+aesgcmsiv_htable6_init:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesgcmsiv_htable6_init:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	vmovdqa	xmm0,XMMWORD[rsi]
+	vmovdqa	xmm1,xmm0
+	vmovdqa	XMMWORD[rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[16+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[32+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[48+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[64+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[80+rdi],xmm0
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesgcmsiv_htable6_init:
+global	aesgcmsiv_htable_polyval
+
+ALIGN	16
+aesgcmsiv_htable_polyval:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesgcmsiv_htable_polyval:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+
+	test	rdx,rdx
+	jnz	NEAR $L$htable_polyval_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$htable_polyval_start:
+	vzeroall
+
+
+
+	mov	r11,rdx
+	and	r11,127
+
+	jz	NEAR $L$htable_polyval_no_prefix
+
+	vpxor	xmm9,xmm9,xmm9
+	vmovdqa	xmm1,XMMWORD[rcx]
+	sub	rdx,r11
+
+	sub	r11,16
+
+
+	vmovdqu	xmm0,XMMWORD[rsi]
+	vpxor	xmm0,xmm0,xmm1
+
+	vpclmulqdq	xmm5,xmm0,XMMWORD[r11*1+rdi],0x01
+	vpclmulqdq	xmm3,xmm0,XMMWORD[r11*1+rdi],0x00
+	vpclmulqdq	xmm4,xmm0,XMMWORD[r11*1+rdi],0x11
+	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+	lea	rsi,[16+rsi]
+	test	r11,r11
+	jnz	NEAR $L$htable_polyval_prefix_loop
+	jmp	NEAR $L$htable_polyval_prefix_complete
+
+
+ALIGN	64
+$L$htable_polyval_prefix_loop:
+	sub	r11,16
+
+	vmovdqu	xmm0,XMMWORD[rsi]
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+	test	r11,r11
+
+	lea	rsi,[16+rsi]
+
+	jnz	NEAR $L$htable_polyval_prefix_loop
+
+$L$htable_polyval_prefix_complete:
+	vpsrldq	xmm6,xmm5,8
+	vpslldq	xmm5,xmm5,8
+
+	vpxor	xmm9,xmm4,xmm6
+	vpxor	xmm1,xmm3,xmm5
+
+	jmp	NEAR $L$htable_polyval_main_loop
+
+$L$htable_polyval_no_prefix:
+
+
+
+
+	vpxor	xmm1,xmm1,xmm1
+	vmovdqa	xmm9,XMMWORD[rcx]
+
+ALIGN	64
+$L$htable_polyval_main_loop:
+	sub	rdx,0x80
+	jb	NEAR $L$htable_polyval_out
+
+	vmovdqu	xmm0,XMMWORD[112+rsi]
+
+	vpclmulqdq	xmm5,xmm0,XMMWORD[rdi],0x01
+	vpclmulqdq	xmm3,xmm0,XMMWORD[rdi],0x00
+	vpclmulqdq	xmm4,xmm0,XMMWORD[rdi],0x11
+	vpclmulqdq	xmm6,xmm0,XMMWORD[rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vmovdqu	xmm0,XMMWORD[96+rsi]
+	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+
+	vmovdqu	xmm0,XMMWORD[80+rsi]
+
+	vpclmulqdq	xmm7,xmm1,XMMWORD[poly],0x10
+	vpalignr	xmm1,xmm1,xmm1,8
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vpxor	xmm1,xmm1,xmm7
+
+	vmovdqu	xmm0,XMMWORD[64+rsi]
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vmovdqu	xmm0,XMMWORD[48+rsi]
+
+	vpclmulqdq	xmm7,xmm1,XMMWORD[poly],0x10
+	vpalignr	xmm1,xmm1,xmm1,8
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vpxor	xmm1,xmm1,xmm7
+
+	vmovdqu	xmm0,XMMWORD[32+rsi]
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vpxor	xmm1,xmm1,xmm9
+
+	vmovdqu	xmm0,XMMWORD[16+rsi]
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vmovdqu	xmm0,XMMWORD[rsi]
+	vpxor	xmm0,xmm0,xmm1
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vpsrldq	xmm6,xmm5,8
+	vpslldq	xmm5,xmm5,8
+
+	vpxor	xmm9,xmm4,xmm6
+	vpxor	xmm1,xmm3,xmm5
+
+	lea	rsi,[128+rsi]
+	jmp	NEAR $L$htable_polyval_main_loop
+
+
+
+$L$htable_polyval_out:
+	vpclmulqdq	xmm6,xmm1,XMMWORD[poly],0x10
+	vpalignr	xmm1,xmm1,xmm1,8
+	vpxor	xmm1,xmm1,xmm6
+
+	vpclmulqdq	xmm6,xmm1,XMMWORD[poly],0x10
+	vpalignr	xmm1,xmm1,xmm1,8
+	vpxor	xmm1,xmm1,xmm6
+	vpxor	xmm1,xmm1,xmm9
+
+	vmovdqu	XMMWORD[rcx],xmm1
+	vzeroupper
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesgcmsiv_htable_polyval:
+global	aesgcmsiv_polyval_horner
+
+ALIGN	16
+aesgcmsiv_polyval_horner:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesgcmsiv_polyval_horner:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+
+	test	rcx,rcx
+	jnz	NEAR $L$polyval_horner_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$polyval_horner_start:
+
+
+
+	xor	r10,r10
+	shl	rcx,4
+
+	vmovdqa	xmm1,XMMWORD[rsi]
+	vmovdqa	xmm0,XMMWORD[rdi]
+
+$L$polyval_horner_loop:
+	vpxor	xmm0,xmm0,XMMWORD[r10*1+rdx]
+	call	GFMUL
+
+	add	r10,16
+	cmp	rcx,r10
+	jne	NEAR $L$polyval_horner_loop
+
+
+	vmovdqa	XMMWORD[rdi],xmm0
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesgcmsiv_polyval_horner:
+global	aes128gcmsiv_aes_ks
+
+ALIGN	16
+aes128gcmsiv_aes_ks:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_aes_ks:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	vmovdqu	xmm1,XMMWORD[rdi]
+	vmovdqa	XMMWORD[rsi],xmm1
+
+	vmovdqa	xmm0,XMMWORD[con1]
+	vmovdqa	xmm15,XMMWORD[mask]
+
+	mov	rax,8
+
+$L$ks128_loop:
+	add	rsi,16
+	sub	rax,1
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm3,xmm1,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	XMMWORD[rsi],xmm1
+	jne	NEAR $L$ks128_loop
+
+	vmovdqa	xmm0,XMMWORD[con2]
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm3,xmm1,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	XMMWORD[16+rsi],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslldq	xmm3,xmm1,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	XMMWORD[32+rsi],xmm1
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_aes_ks:
+global	aes256gcmsiv_aes_ks
+
+ALIGN	16
+aes256gcmsiv_aes_ks:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_aes_ks:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	vmovdqu	xmm1,XMMWORD[rdi]
+	vmovdqu	xmm3,XMMWORD[16+rdi]
+	vmovdqa	XMMWORD[rsi],xmm1
+	vmovdqa	XMMWORD[16+rsi],xmm3
+	vmovdqa	xmm0,XMMWORD[con1]
+	vmovdqa	xmm15,XMMWORD[mask]
+	vpxor	xmm14,xmm14,xmm14
+	mov	rax,6
+
+$L$ks256_loop:
+	add	rsi,32
+	sub	rax,1
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm4,xmm1,32
+	vpxor	xmm1,xmm1,xmm4
+	vpshufb	xmm4,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	XMMWORD[rsi],xmm1
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpsllq	xmm4,xmm3,32
+	vpxor	xmm3,xmm3,xmm4
+	vpshufb	xmm4,xmm3,XMMWORD[con3]
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vmovdqa	XMMWORD[16+rsi],xmm3
+	jne	NEAR $L$ks256_loop
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpsllq	xmm4,xmm1,32
+	vpxor	xmm1,xmm1,xmm4
+	vpshufb	xmm4,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	XMMWORD[32+rsi],xmm1
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+global	aes128gcmsiv_aes_ks_enc_x1
+
+ALIGN	16
+aes128gcmsiv_aes_ks_enc_x1:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_aes_ks_enc_x1:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+
+	vmovdqa	xmm1,XMMWORD[rcx]
+	vmovdqa	xmm4,XMMWORD[rdi]
+
+	vmovdqa	XMMWORD[rdx],xmm1
+	vpxor	xmm4,xmm4,xmm1
+
+	vmovdqa	xmm0,XMMWORD[con1]
+	vmovdqa	xmm15,XMMWORD[mask]
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[16+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[32+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[48+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[64+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[80+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[96+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[112+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[128+rdx],xmm1
+
+
+	vmovdqa	xmm0,XMMWORD[con2]
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[144+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenclast	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[160+rdx],xmm1
+
+
+	vmovdqa	XMMWORD[rsi],xmm4
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_aes_ks_enc_x1:
+global	aes128gcmsiv_kdf
+
+ALIGN	16
+aes128gcmsiv_kdf:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_kdf:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+
+
+
+
+	vmovdqa	xmm1,XMMWORD[rdx]
+	vmovdqa	xmm9,XMMWORD[rdi]
+	vmovdqa	xmm12,XMMWORD[and_mask]
+	vmovdqa	xmm13,XMMWORD[one]
+	vpshufd	xmm9,xmm9,0x90
+	vpand	xmm9,xmm9,xmm12
+	vpaddd	xmm10,xmm9,xmm13
+	vpaddd	xmm11,xmm10,xmm13
+	vpaddd	xmm12,xmm11,xmm13
+
+	vpxor	xmm9,xmm9,xmm1
+	vpxor	xmm10,xmm10,xmm1
+	vpxor	xmm11,xmm11,xmm1
+	vpxor	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm1,XMMWORD[16+rdx]
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm2,XMMWORD[32+rdx]
+	vaesenc	xmm9,xmm9,xmm2
+	vaesenc	xmm10,xmm10,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+
+	vmovdqa	xmm1,XMMWORD[48+rdx]
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm2,XMMWORD[64+rdx]
+	vaesenc	xmm9,xmm9,xmm2
+	vaesenc	xmm10,xmm10,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+
+	vmovdqa	xmm1,XMMWORD[80+rdx]
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm2,XMMWORD[96+rdx]
+	vaesenc	xmm9,xmm9,xmm2
+	vaesenc	xmm10,xmm10,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+
+	vmovdqa	xmm1,XMMWORD[112+rdx]
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm2,XMMWORD[128+rdx]
+	vaesenc	xmm9,xmm9,xmm2
+	vaesenc	xmm10,xmm10,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+
+	vmovdqa	xmm1,XMMWORD[144+rdx]
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm2,XMMWORD[160+rdx]
+	vaesenclast	xmm9,xmm9,xmm2
+	vaesenclast	xmm10,xmm10,xmm2
+	vaesenclast	xmm11,xmm11,xmm2
+	vaesenclast	xmm12,xmm12,xmm2
+
+
+	vmovdqa	XMMWORD[rsi],xmm9
+	vmovdqa	XMMWORD[16+rsi],xmm10
+	vmovdqa	XMMWORD[32+rsi],xmm11
+	vmovdqa	XMMWORD[48+rsi],xmm12
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_kdf:
+global	aes128gcmsiv_enc_msg_x4
+
+ALIGN	16
+aes128gcmsiv_enc_msg_x4:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_enc_msg_x4:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	test	r8,r8
+	jnz	NEAR $L$128_enc_msg_x4_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$128_enc_msg_x4_start:
+	push	r12
+
+	push	r13
+
+
+	shr	r8,4
+	mov	r10,r8
+	shl	r10,62
+	shr	r10,62
+
+
+	vmovdqa	xmm15,XMMWORD[rdx]
+	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
+
+	vmovdqu	xmm4,XMMWORD[four]
+	vmovdqa	xmm0,xmm15
+	vpaddd	xmm1,xmm15,XMMWORD[one]
+	vpaddd	xmm2,xmm15,XMMWORD[two]
+	vpaddd	xmm3,xmm15,XMMWORD[three]
+
+	shr	r8,2
+	je	NEAR $L$128_enc_msg_x4_check_remainder
+
+	sub	rsi,64
+	sub	rdi,64
+
+$L$128_enc_msg_x4_loop1:
+	add	rsi,64
+	add	rdi,64
+
+	vmovdqa	xmm5,xmm0
+	vmovdqa	xmm6,xmm1
+	vmovdqa	xmm7,xmm2
+	vmovdqa	xmm8,xmm3
+
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vpxor	xmm6,xmm6,XMMWORD[rcx]
+	vpxor	xmm7,xmm7,XMMWORD[rcx]
+	vpxor	xmm8,xmm8,XMMWORD[rcx]
+
+	vmovdqu	xmm12,XMMWORD[16+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm0,xmm0,xmm4
+	vmovdqu	xmm12,XMMWORD[32+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm1,xmm1,xmm4
+	vmovdqu	xmm12,XMMWORD[48+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm2,xmm2,xmm4
+	vmovdqu	xmm12,XMMWORD[64+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm3,xmm3,xmm4
+
+	vmovdqu	xmm12,XMMWORD[80+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[96+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[112+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[128+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[144+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[160+rcx]
+	vaesenclast	xmm5,xmm5,xmm12
+	vaesenclast	xmm6,xmm6,xmm12
+	vaesenclast	xmm7,xmm7,xmm12
+	vaesenclast	xmm8,xmm8,xmm12
+
+
+
+	vpxor	xmm5,xmm5,XMMWORD[rdi]
+	vpxor	xmm6,xmm6,XMMWORD[16+rdi]
+	vpxor	xmm7,xmm7,XMMWORD[32+rdi]
+	vpxor	xmm8,xmm8,XMMWORD[48+rdi]
+
+	sub	r8,1
+
+	vmovdqu	XMMWORD[rsi],xmm5
+	vmovdqu	XMMWORD[16+rsi],xmm6
+	vmovdqu	XMMWORD[32+rsi],xmm7
+	vmovdqu	XMMWORD[48+rsi],xmm8
+
+	jne	NEAR $L$128_enc_msg_x4_loop1
+
+	add	rsi,64
+	add	rdi,64
+
+$L$128_enc_msg_x4_check_remainder:
+	cmp	r10,0
+	je	NEAR $L$128_enc_msg_x4_out
+
+$L$128_enc_msg_x4_loop2:
+
+
+	vmovdqa	xmm5,xmm0
+	vpaddd	xmm0,xmm0,XMMWORD[one]
+
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[16+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[32+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[48+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[64+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[80+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[96+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[112+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[128+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[144+rcx]
+	vaesenclast	xmm5,xmm5,XMMWORD[160+rcx]
+
+
+	vpxor	xmm5,xmm5,XMMWORD[rdi]
+	vmovdqu	XMMWORD[rsi],xmm5
+
+	add	rdi,16
+	add	rsi,16
+
+	sub	r10,1
+	jne	NEAR $L$128_enc_msg_x4_loop2
+
+$L$128_enc_msg_x4_out:
+	pop	r13
+
+	pop	r12
+
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_enc_msg_x4:
+global	aes128gcmsiv_enc_msg_x8
+
+ALIGN	16
+aes128gcmsiv_enc_msg_x8:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_enc_msg_x8:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	test	r8,r8
+	jnz	NEAR $L$128_enc_msg_x8_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$128_enc_msg_x8_start:
+	push	r12
+
+	push	r13
+
+	push	rbp
+
+	mov	rbp,rsp
+
+
+
+	sub	rsp,128
+	and	rsp,-64
+
+	shr	r8,4
+	mov	r10,r8
+	shl	r10,61
+	shr	r10,61
+
+
+	vmovdqu	xmm1,XMMWORD[rdx]
+	vpor	xmm1,xmm1,XMMWORD[OR_MASK]
+
+
+	vpaddd	xmm0,xmm1,XMMWORD[seven]
+	vmovdqu	XMMWORD[rsp],xmm0
+	vpaddd	xmm9,xmm1,XMMWORD[one]
+	vpaddd	xmm10,xmm1,XMMWORD[two]
+	vpaddd	xmm11,xmm1,XMMWORD[three]
+	vpaddd	xmm12,xmm1,XMMWORD[four]
+	vpaddd	xmm13,xmm1,XMMWORD[five]
+	vpaddd	xmm14,xmm1,XMMWORD[six]
+	vmovdqa	xmm0,xmm1
+
+	shr	r8,3
+	je	NEAR $L$128_enc_msg_x8_check_remainder
+
+	sub	rsi,128
+	sub	rdi,128
+
+$L$128_enc_msg_x8_loop1:
+	add	rsi,128
+	add	rdi,128
+
+	vmovdqa	xmm1,xmm0
+	vmovdqa	xmm2,xmm9
+	vmovdqa	xmm3,xmm10
+	vmovdqa	xmm4,xmm11
+	vmovdqa	xmm5,xmm12
+	vmovdqa	xmm6,xmm13
+	vmovdqa	xmm7,xmm14
+
+	vmovdqu	xmm8,XMMWORD[rsp]
+
+	vpxor	xmm1,xmm1,XMMWORD[rcx]
+	vpxor	xmm2,xmm2,XMMWORD[rcx]
+	vpxor	xmm3,xmm3,XMMWORD[rcx]
+	vpxor	xmm4,xmm4,XMMWORD[rcx]
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vpxor	xmm6,xmm6,XMMWORD[rcx]
+	vpxor	xmm7,xmm7,XMMWORD[rcx]
+	vpxor	xmm8,xmm8,XMMWORD[rcx]
+
+	vmovdqu	xmm15,XMMWORD[16+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm14,XMMWORD[rsp]
+	vpaddd	xmm14,xmm14,XMMWORD[eight]
+	vmovdqu	XMMWORD[rsp],xmm14
+	vmovdqu	xmm15,XMMWORD[32+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpsubd	xmm14,xmm14,XMMWORD[one]
+	vmovdqu	xmm15,XMMWORD[48+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm0,xmm0,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[64+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm9,xmm9,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[80+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm10,xmm10,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[96+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm11,xmm11,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[112+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm12,xmm12,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[128+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm13,xmm13,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[144+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[160+rcx]
+	vaesenclast	xmm1,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm15
+	vaesenclast	xmm3,xmm3,xmm15
+	vaesenclast	xmm4,xmm4,xmm15
+	vaesenclast	xmm5,xmm5,xmm15
+	vaesenclast	xmm6,xmm6,xmm15
+	vaesenclast	xmm7,xmm7,xmm15
+	vaesenclast	xmm8,xmm8,xmm15
+
+
+
+	vpxor	xmm1,xmm1,XMMWORD[rdi]
+	vpxor	xmm2,xmm2,XMMWORD[16+rdi]
+	vpxor	xmm3,xmm3,XMMWORD[32+rdi]
+	vpxor	xmm4,xmm4,XMMWORD[48+rdi]
+	vpxor	xmm5,xmm5,XMMWORD[64+rdi]
+	vpxor	xmm6,xmm6,XMMWORD[80+rdi]
+	vpxor	xmm7,xmm7,XMMWORD[96+rdi]
+	vpxor	xmm8,xmm8,XMMWORD[112+rdi]
+
+	dec	r8
+
+	vmovdqu	XMMWORD[rsi],xmm1
+	vmovdqu	XMMWORD[16+rsi],xmm2
+	vmovdqu	XMMWORD[32+rsi],xmm3
+	vmovdqu	XMMWORD[48+rsi],xmm4
+	vmovdqu	XMMWORD[64+rsi],xmm5
+	vmovdqu	XMMWORD[80+rsi],xmm6
+	vmovdqu	XMMWORD[96+rsi],xmm7
+	vmovdqu	XMMWORD[112+rsi],xmm8
+
+	jne	NEAR $L$128_enc_msg_x8_loop1
+
+	add	rsi,128
+	add	rdi,128
+
+$L$128_enc_msg_x8_check_remainder:
+	cmp	r10,0
+	je	NEAR $L$128_enc_msg_x8_out
+
+$L$128_enc_msg_x8_loop2:
+
+
+	vmovdqa	xmm1,xmm0
+	vpaddd	xmm0,xmm0,XMMWORD[one]
+
+	vpxor	xmm1,xmm1,XMMWORD[rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[16+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[32+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[48+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[64+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[80+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[96+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[112+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[128+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[144+rcx]
+	vaesenclast	xmm1,xmm1,XMMWORD[160+rcx]
+
+
+	vpxor	xmm1,xmm1,XMMWORD[rdi]
+
+	vmovdqu	XMMWORD[rsi],xmm1
+
+	add	rdi,16
+	add	rsi,16
+
+	dec	r10
+	jne	NEAR $L$128_enc_msg_x8_loop2
+
+$L$128_enc_msg_x8_out:
+	mov	rsp,rbp
+
+	pop	rbp
+
+	pop	r13
+
+	pop	r12
+
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_enc_msg_x8:
+global	aes128gcmsiv_dec
+
+ALIGN	16
+aes128gcmsiv_dec:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_dec:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	test	r9,~15
+	jnz	NEAR $L$128_dec_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$128_dec_start:
+	vzeroupper
+	vmovdqa	xmm0,XMMWORD[rdx]
+	mov	rax,rdx
+
+	lea	rax,[32+rax]
+	lea	rcx,[32+rcx]
+
+
+	vmovdqu	xmm15,XMMWORD[r9*1+rdi]
+	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
+	and	r9,~15
+
+
+	cmp	r9,96
+	jb	NEAR $L$128_dec_loop2
+
+
+	sub	r9,96
+	vmovdqa	xmm7,xmm15
+	vpaddd	xmm8,xmm7,XMMWORD[one]
+	vpaddd	xmm9,xmm7,XMMWORD[two]
+	vpaddd	xmm10,xmm9,XMMWORD[one]
+	vpaddd	xmm11,xmm9,XMMWORD[two]
+	vpaddd	xmm12,xmm11,XMMWORD[one]
+	vpaddd	xmm15,xmm11,XMMWORD[two]
+
+	vpxor	xmm7,xmm7,XMMWORD[r8]
+	vpxor	xmm8,xmm8,XMMWORD[r8]
+	vpxor	xmm9,xmm9,XMMWORD[r8]
+	vpxor	xmm10,xmm10,XMMWORD[r8]
+	vpxor	xmm11,xmm11,XMMWORD[r8]
+	vpxor	xmm12,xmm12,XMMWORD[r8]
+
+	vmovdqu	xmm4,XMMWORD[16+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[32+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[48+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[64+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[80+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[96+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[112+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[128+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[144+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[160+r8]
+	vaesenclast	xmm7,xmm7,xmm4
+	vaesenclast	xmm8,xmm8,xmm4
+	vaesenclast	xmm9,xmm9,xmm4
+	vaesenclast	xmm10,xmm10,xmm4
+	vaesenclast	xmm11,xmm11,xmm4
+	vaesenclast	xmm12,xmm12,xmm4
+
+
+	vpxor	xmm7,xmm7,XMMWORD[rdi]
+	vpxor	xmm8,xmm8,XMMWORD[16+rdi]
+	vpxor	xmm9,xmm9,XMMWORD[32+rdi]
+	vpxor	xmm10,xmm10,XMMWORD[48+rdi]
+	vpxor	xmm11,xmm11,XMMWORD[64+rdi]
+	vpxor	xmm12,xmm12,XMMWORD[80+rdi]
+
+	vmovdqu	XMMWORD[rsi],xmm7
+	vmovdqu	XMMWORD[16+rsi],xmm8
+	vmovdqu	XMMWORD[32+rsi],xmm9
+	vmovdqu	XMMWORD[48+rsi],xmm10
+	vmovdqu	XMMWORD[64+rsi],xmm11
+	vmovdqu	XMMWORD[80+rsi],xmm12
+
+	add	rdi,96
+	add	rsi,96
+	jmp	NEAR $L$128_dec_loop1
+
+
+ALIGN	64
+$L$128_dec_loop1:
+	cmp	r9,96
+	jb	NEAR $L$128_dec_finish_96
+	sub	r9,96
+
+	vmovdqa	xmm6,xmm12
+	vmovdqa	XMMWORD[(16-32)+rax],xmm11
+	vmovdqa	XMMWORD[(32-32)+rax],xmm10
+	vmovdqa	XMMWORD[(48-32)+rax],xmm9
+	vmovdqa	XMMWORD[(64-32)+rax],xmm8
+	vmovdqa	XMMWORD[(80-32)+rax],xmm7
+
+	vmovdqa	xmm7,xmm15
+	vpaddd	xmm8,xmm7,XMMWORD[one]
+	vpaddd	xmm9,xmm7,XMMWORD[two]
+	vpaddd	xmm10,xmm9,XMMWORD[one]
+	vpaddd	xmm11,xmm9,XMMWORD[two]
+	vpaddd	xmm12,xmm11,XMMWORD[one]
+	vpaddd	xmm15,xmm11,XMMWORD[two]
+
+	vmovdqa	xmm4,XMMWORD[r8]
+	vpxor	xmm7,xmm7,xmm4
+	vpxor	xmm8,xmm8,xmm4
+	vpxor	xmm9,xmm9,xmm4
+	vpxor	xmm10,xmm10,xmm4
+	vpxor	xmm11,xmm11,xmm4
+	vpxor	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
+	vpclmulqdq	xmm2,xmm6,xmm4,0x11
+	vpclmulqdq	xmm3,xmm6,xmm4,0x00
+	vpclmulqdq	xmm1,xmm6,xmm4,0x01
+	vpclmulqdq	xmm4,xmm6,xmm4,0x10
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm4,XMMWORD[16+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[((-16))+rax]
+	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[32+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[rax]
+	vmovdqu	xmm13,XMMWORD[rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[48+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[16+rax]
+	vmovdqu	xmm13,XMMWORD[16+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[64+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[32+rax]
+	vmovdqu	xmm13,XMMWORD[32+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[80+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[96+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[112+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+
+	vmovdqa	xmm6,XMMWORD[((80-32))+rax]
+	vpxor	xmm6,xmm6,xmm0
+	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm5,0x01
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x10
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm4,XMMWORD[128+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+
+	vpsrldq	xmm4,xmm1,8
+	vpxor	xmm5,xmm2,xmm4
+	vpslldq	xmm4,xmm1,8
+	vpxor	xmm0,xmm3,xmm4
+
+	vmovdqa	xmm3,XMMWORD[poly]
+
+	vmovdqu	xmm4,XMMWORD[144+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[160+r8]
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpxor	xmm4,xmm6,XMMWORD[rdi]
+	vaesenclast	xmm7,xmm7,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[16+rdi]
+	vaesenclast	xmm8,xmm8,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[32+rdi]
+	vaesenclast	xmm9,xmm9,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[48+rdi]
+	vaesenclast	xmm10,xmm10,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[64+rdi]
+	vaesenclast	xmm11,xmm11,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[80+rdi]
+	vaesenclast	xmm12,xmm12,xmm4
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vmovdqu	XMMWORD[rsi],xmm7
+	vmovdqu	XMMWORD[16+rsi],xmm8
+	vmovdqu	XMMWORD[32+rsi],xmm9
+	vmovdqu	XMMWORD[48+rsi],xmm10
+	vmovdqu	XMMWORD[64+rsi],xmm11
+	vmovdqu	XMMWORD[80+rsi],xmm12
+
+	vpxor	xmm0,xmm0,xmm5
+
+	lea	rdi,[96+rdi]
+	lea	rsi,[96+rsi]
+	jmp	NEAR $L$128_dec_loop1
+
+$L$128_dec_finish_96:
+	vmovdqa	xmm6,xmm12
+	vmovdqa	XMMWORD[(16-32)+rax],xmm11
+	vmovdqa	XMMWORD[(32-32)+rax],xmm10
+	vmovdqa	XMMWORD[(48-32)+rax],xmm9
+	vmovdqa	XMMWORD[(64-32)+rax],xmm8
+	vmovdqa	XMMWORD[(80-32)+rax],xmm7
+
+	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
+	vpclmulqdq	xmm1,xmm6,xmm4,0x10
+	vpclmulqdq	xmm2,xmm6,xmm4,0x11
+	vpclmulqdq	xmm3,xmm6,xmm4,0x00
+	vpclmulqdq	xmm4,xmm6,xmm4,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[((-16))+rax]
+	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[rax]
+	vmovdqu	xmm13,XMMWORD[rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[16+rax]
+	vmovdqu	xmm13,XMMWORD[16+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[32+rax]
+	vmovdqu	xmm13,XMMWORD[32+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm6,XMMWORD[((80-32))+rax]
+	vpxor	xmm6,xmm6,xmm0
+	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
+	vpclmulqdq	xmm4,xmm6,xmm5,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vpsrldq	xmm4,xmm1,8
+	vpxor	xmm5,xmm2,xmm4
+	vpslldq	xmm4,xmm1,8
+	vpxor	xmm0,xmm3,xmm4
+
+	vmovdqa	xmm3,XMMWORD[poly]
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpxor	xmm0,xmm0,xmm5
+
+$L$128_dec_loop2:
+
+
+
+	cmp	r9,16
+	jb	NEAR $L$128_dec_out
+	sub	r9,16
+
+	vmovdqa	xmm2,xmm15
+	vpaddd	xmm15,xmm15,XMMWORD[one]
+
+	vpxor	xmm2,xmm2,XMMWORD[r8]
+	vaesenc	xmm2,xmm2,XMMWORD[16+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[32+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[48+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[64+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[80+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[96+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[112+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[128+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[144+r8]
+	vaesenclast	xmm2,xmm2,XMMWORD[160+r8]
+	vpxor	xmm2,xmm2,XMMWORD[rdi]
+	vmovdqu	XMMWORD[rsi],xmm2
+	add	rdi,16
+	add	rsi,16
+
+	vpxor	xmm0,xmm0,xmm2
+	vmovdqa	xmm1,XMMWORD[((-32))+rcx]
+	call	GFMUL
+
+	jmp	NEAR $L$128_dec_loop2
+
+$L$128_dec_out:
+	vmovdqu	XMMWORD[rdx],xmm0
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_dec:
+global	aes128gcmsiv_ecb_enc_block
+
+ALIGN	16
+aes128gcmsiv_ecb_enc_block:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_ecb_enc_block:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	vmovdqa	xmm1,XMMWORD[rdi]
+
+	vpxor	xmm1,xmm1,XMMWORD[rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[16+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[32+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[48+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[64+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[80+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[96+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[112+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[128+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[144+rdx]
+	vaesenclast	xmm1,xmm1,XMMWORD[160+rdx]
+
+	vmovdqa	XMMWORD[rsi],xmm1
+
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_ecb_enc_block:
+global	aes256gcmsiv_aes_ks_enc_x1
+
+ALIGN	16
+aes256gcmsiv_aes_ks_enc_x1:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_aes_ks_enc_x1:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+
+	vmovdqa	xmm0,XMMWORD[con1]
+	vmovdqa	xmm15,XMMWORD[mask]
+	vmovdqa	xmm8,XMMWORD[rdi]
+	vmovdqa	xmm1,XMMWORD[rcx]
+	vmovdqa	xmm3,XMMWORD[16+rcx]
+	vpxor	xmm8,xmm8,xmm1
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[rdx],xmm1
+	vmovdqu	XMMWORD[16+rdx],xmm3
+	vpxor	xmm14,xmm14,xmm14
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[32+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[48+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[64+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[80+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[96+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[112+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[128+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[144+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[160+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[176+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[192+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[208+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenclast	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[224+rdx],xmm1
+
+	vmovdqa	XMMWORD[rsi],xmm8
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes256gcmsiv_aes_ks_enc_x1:
+global	aes256gcmsiv_ecb_enc_block
+
+ALIGN	16
+aes256gcmsiv_ecb_enc_block:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_ecb_enc_block:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	vmovdqa	xmm1,XMMWORD[rdi]
+	vpxor	xmm1,xmm1,XMMWORD[rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[16+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[32+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[48+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[64+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[80+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[96+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[112+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[128+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[144+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[160+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[176+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[192+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[208+rdx]
+	vaesenclast	xmm1,xmm1,XMMWORD[224+rdx]
+	vmovdqa	XMMWORD[rsi],xmm1
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes256gcmsiv_ecb_enc_block:
+global	aes256gcmsiv_enc_msg_x4
+
+ALIGN	16
+aes256gcmsiv_enc_msg_x4:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_enc_msg_x4:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	test	r8,r8
+	jnz	NEAR $L$256_enc_msg_x4_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$256_enc_msg_x4_start:
+	mov	r10,r8
+	shr	r8,4
+	shl	r10,60
+	jz	NEAR $L$256_enc_msg_x4_start2
+	add	r8,1
+
+$L$256_enc_msg_x4_start2:
+	mov	r10,r8
+	shl	r10,62
+	shr	r10,62
+
+
+	vmovdqa	xmm15,XMMWORD[rdx]
+	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
+
+	vmovdqa	xmm4,XMMWORD[four]
+	vmovdqa	xmm0,xmm15
+	vpaddd	xmm1,xmm15,XMMWORD[one]
+	vpaddd	xmm2,xmm15,XMMWORD[two]
+	vpaddd	xmm3,xmm15,XMMWORD[three]
+
+	shr	r8,2
+	je	NEAR $L$256_enc_msg_x4_check_remainder
+
+	sub	rsi,64
+	sub	rdi,64
+
+$L$256_enc_msg_x4_loop1:
+	add	rsi,64
+	add	rdi,64
+
+	vmovdqa	xmm5,xmm0
+	vmovdqa	xmm6,xmm1
+	vmovdqa	xmm7,xmm2
+	vmovdqa	xmm8,xmm3
+
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vpxor	xmm6,xmm6,XMMWORD[rcx]
+	vpxor	xmm7,xmm7,XMMWORD[rcx]
+	vpxor	xmm8,xmm8,XMMWORD[rcx]
+
+	vmovdqu	xmm12,XMMWORD[16+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm0,xmm0,xmm4
+	vmovdqu	xmm12,XMMWORD[32+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm1,xmm1,xmm4
+	vmovdqu	xmm12,XMMWORD[48+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm2,xmm2,xmm4
+	vmovdqu	xmm12,XMMWORD[64+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm3,xmm3,xmm4
+
+	vmovdqu	xmm12,XMMWORD[80+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[96+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[112+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[128+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[144+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[160+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[176+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[192+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[208+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[224+rcx]
+	vaesenclast	xmm5,xmm5,xmm12
+	vaesenclast	xmm6,xmm6,xmm12
+	vaesenclast	xmm7,xmm7,xmm12
+	vaesenclast	xmm8,xmm8,xmm12
+
+
+
+	vpxor	xmm5,xmm5,XMMWORD[rdi]
+	vpxor	xmm6,xmm6,XMMWORD[16+rdi]
+	vpxor	xmm7,xmm7,XMMWORD[32+rdi]
+	vpxor	xmm8,xmm8,XMMWORD[48+rdi]
+
+	sub	r8,1
+
+	vmovdqu	XMMWORD[rsi],xmm5
+	vmovdqu	XMMWORD[16+rsi],xmm6
+	vmovdqu	XMMWORD[32+rsi],xmm7
+	vmovdqu	XMMWORD[48+rsi],xmm8
+
+	jne	NEAR $L$256_enc_msg_x4_loop1
+
+	add	rsi,64
+	add	rdi,64
+
+$L$256_enc_msg_x4_check_remainder:
+	cmp	r10,0
+	je	NEAR $L$256_enc_msg_x4_out
+
+$L$256_enc_msg_x4_loop2:
+
+
+
+	vmovdqa	xmm5,xmm0
+	vpaddd	xmm0,xmm0,XMMWORD[one]
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[16+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[32+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[48+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[64+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[80+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[96+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[112+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[128+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[144+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[160+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[176+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[192+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[208+rcx]
+	vaesenclast	xmm5,xmm5,XMMWORD[224+rcx]
+
+
+	vpxor	xmm5,xmm5,XMMWORD[rdi]
+
+	vmovdqu	XMMWORD[rsi],xmm5
+
+	add	rdi,16
+	add	rsi,16
+
+	sub	r10,1
+	jne	NEAR $L$256_enc_msg_x4_loop2
+
+$L$256_enc_msg_x4_out:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes256gcmsiv_enc_msg_x4:
+global	aes256gcmsiv_enc_msg_x8
+
+ALIGN	16
+aes256gcmsiv_enc_msg_x8:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_enc_msg_x8:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	test	r8,r8
+	jnz	NEAR $L$256_enc_msg_x8_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$256_enc_msg_x8_start:
+
+	mov	r11,rsp
+	sub	r11,16
+	and	r11,-64
+
+	mov	r10,r8
+	shr	r8,4
+	shl	r10,60
+	jz	NEAR $L$256_enc_msg_x8_start2
+	add	r8,1
+
+$L$256_enc_msg_x8_start2:
+	mov	r10,r8
+	shl	r10,61
+	shr	r10,61
+
+
+	vmovdqa	xmm1,XMMWORD[rdx]
+	vpor	xmm1,xmm1,XMMWORD[OR_MASK]
+
+
+	vpaddd	xmm0,xmm1,XMMWORD[seven]
+	vmovdqa	XMMWORD[r11],xmm0
+	vpaddd	xmm9,xmm1,XMMWORD[one]
+	vpaddd	xmm10,xmm1,XMMWORD[two]
+	vpaddd	xmm11,xmm1,XMMWORD[three]
+	vpaddd	xmm12,xmm1,XMMWORD[four]
+	vpaddd	xmm13,xmm1,XMMWORD[five]
+	vpaddd	xmm14,xmm1,XMMWORD[six]
+	vmovdqa	xmm0,xmm1
+
+	shr	r8,3
+	jz	NEAR $L$256_enc_msg_x8_check_remainder
+
+	sub	rsi,128
+	sub	rdi,128
+
+$L$256_enc_msg_x8_loop1:
+	add	rsi,128
+	add	rdi,128
+
+	vmovdqa	xmm1,xmm0
+	vmovdqa	xmm2,xmm9
+	vmovdqa	xmm3,xmm10
+	vmovdqa	xmm4,xmm11
+	vmovdqa	xmm5,xmm12
+	vmovdqa	xmm6,xmm13
+	vmovdqa	xmm7,xmm14
+
+	vmovdqa	xmm8,XMMWORD[r11]
+
+	vpxor	xmm1,xmm1,XMMWORD[rcx]
+	vpxor	xmm2,xmm2,XMMWORD[rcx]
+	vpxor	xmm3,xmm3,XMMWORD[rcx]
+	vpxor	xmm4,xmm4,XMMWORD[rcx]
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vpxor	xmm6,xmm6,XMMWORD[rcx]
+	vpxor	xmm7,xmm7,XMMWORD[rcx]
+	vpxor	xmm8,xmm8,XMMWORD[rcx]
+
+	vmovdqu	xmm15,XMMWORD[16+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqa	xmm14,XMMWORD[r11]
+	vpaddd	xmm14,xmm14,XMMWORD[eight]
+	vmovdqa	XMMWORD[r11],xmm14
+	vmovdqu	xmm15,XMMWORD[32+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpsubd	xmm14,xmm14,XMMWORD[one]
+	vmovdqu	xmm15,XMMWORD[48+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm0,xmm0,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[64+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm9,xmm9,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[80+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm10,xmm10,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[96+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm11,xmm11,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[112+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm12,xmm12,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[128+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm13,xmm13,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[144+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[160+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[176+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[192+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[208+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[224+rcx]
+	vaesenclast	xmm1,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm15
+	vaesenclast	xmm3,xmm3,xmm15
+	vaesenclast	xmm4,xmm4,xmm15
+	vaesenclast	xmm5,xmm5,xmm15
+	vaesenclast	xmm6,xmm6,xmm15
+	vaesenclast	xmm7,xmm7,xmm15
+	vaesenclast	xmm8,xmm8,xmm15
+
+
+
+	vpxor	xmm1,xmm1,XMMWORD[rdi]
+	vpxor	xmm2,xmm2,XMMWORD[16+rdi]
+	vpxor	xmm3,xmm3,XMMWORD[32+rdi]
+	vpxor	xmm4,xmm4,XMMWORD[48+rdi]
+	vpxor	xmm5,xmm5,XMMWORD[64+rdi]
+	vpxor	xmm6,xmm6,XMMWORD[80+rdi]
+	vpxor	xmm7,xmm7,XMMWORD[96+rdi]
+	vpxor	xmm8,xmm8,XMMWORD[112+rdi]
+
+	sub	r8,1
+
+	vmovdqu	XMMWORD[rsi],xmm1
+	vmovdqu	XMMWORD[16+rsi],xmm2
+	vmovdqu	XMMWORD[32+rsi],xmm3
+	vmovdqu	XMMWORD[48+rsi],xmm4
+	vmovdqu	XMMWORD[64+rsi],xmm5
+	vmovdqu	XMMWORD[80+rsi],xmm6
+	vmovdqu	XMMWORD[96+rsi],xmm7
+	vmovdqu	XMMWORD[112+rsi],xmm8
+
+	jne	NEAR $L$256_enc_msg_x8_loop1
+
+	add	rsi,128
+	add	rdi,128
+
+$L$256_enc_msg_x8_check_remainder:
+	cmp	r10,0
+	je	NEAR $L$256_enc_msg_x8_out
+
+$L$256_enc_msg_x8_loop2:
+
+
+	vmovdqa	xmm1,xmm0
+	vpaddd	xmm0,xmm0,XMMWORD[one]
+
+	vpxor	xmm1,xmm1,XMMWORD[rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[16+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[32+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[48+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[64+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[80+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[96+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[112+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[128+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[144+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[160+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[176+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[192+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[208+rcx]
+	vaesenclast	xmm1,xmm1,XMMWORD[224+rcx]
+
+
+	vpxor	xmm1,xmm1,XMMWORD[rdi]
+
+	vmovdqu	XMMWORD[rsi],xmm1
+
+	add	rdi,16
+	add	rsi,16
+	sub	r10,1
+	jnz	NEAR $L$256_enc_msg_x8_loop2
+
+$L$256_enc_msg_x8_out:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+
+$L$SEH_end_aes256gcmsiv_enc_msg_x8:
+global	aes256gcmsiv_dec
+
+ALIGN	16
+aes256gcmsiv_dec:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_dec:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	test	r9,~15
+	jnz	NEAR $L$256_dec_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$256_dec_start:
+	vzeroupper
+	vmovdqa	xmm0,XMMWORD[rdx]
+	mov	rax,rdx
+
+	lea	rax,[32+rax]
+	lea	rcx,[32+rcx]
+
+
+	vmovdqu	xmm15,XMMWORD[r9*1+rdi]
+	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
+	and	r9,~15
+
+
+	cmp	r9,96
+	jb	NEAR $L$256_dec_loop2
+
+
+	sub	r9,96
+	vmovdqa	xmm7,xmm15
+	vpaddd	xmm8,xmm7,XMMWORD[one]
+	vpaddd	xmm9,xmm7,XMMWORD[two]
+	vpaddd	xmm10,xmm9,XMMWORD[one]
+	vpaddd	xmm11,xmm9,XMMWORD[two]
+	vpaddd	xmm12,xmm11,XMMWORD[one]
+	vpaddd	xmm15,xmm11,XMMWORD[two]
+
+	vpxor	xmm7,xmm7,XMMWORD[r8]
+	vpxor	xmm8,xmm8,XMMWORD[r8]
+	vpxor	xmm9,xmm9,XMMWORD[r8]
+	vpxor	xmm10,xmm10,XMMWORD[r8]
+	vpxor	xmm11,xmm11,XMMWORD[r8]
+	vpxor	xmm12,xmm12,XMMWORD[r8]
+
+	vmovdqu	xmm4,XMMWORD[16+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[32+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[48+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[64+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[80+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[96+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[112+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[128+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[144+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[160+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[176+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[192+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[208+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[224+r8]
+	vaesenclast	xmm7,xmm7,xmm4
+	vaesenclast	xmm8,xmm8,xmm4
+	vaesenclast	xmm9,xmm9,xmm4
+	vaesenclast	xmm10,xmm10,xmm4
+	vaesenclast	xmm11,xmm11,xmm4
+	vaesenclast	xmm12,xmm12,xmm4
+
+
+	vpxor	xmm7,xmm7,XMMWORD[rdi]
+	vpxor	xmm8,xmm8,XMMWORD[16+rdi]
+	vpxor	xmm9,xmm9,XMMWORD[32+rdi]
+	vpxor	xmm10,xmm10,XMMWORD[48+rdi]
+	vpxor	xmm11,xmm11,XMMWORD[64+rdi]
+	vpxor	xmm12,xmm12,XMMWORD[80+rdi]
+
+	vmovdqu	XMMWORD[rsi],xmm7
+	vmovdqu	XMMWORD[16+rsi],xmm8
+	vmovdqu	XMMWORD[32+rsi],xmm9
+	vmovdqu	XMMWORD[48+rsi],xmm10
+	vmovdqu	XMMWORD[64+rsi],xmm11
+	vmovdqu	XMMWORD[80+rsi],xmm12
+
+	add	rdi,96
+	add	rsi,96
+	jmp	NEAR $L$256_dec_loop1
+
+
+ALIGN	64
+$L$256_dec_loop1:
+	cmp	r9,96
+	jb	NEAR $L$256_dec_finish_96
+	sub	r9,96
+
+	vmovdqa	xmm6,xmm12
+	vmovdqa	XMMWORD[(16-32)+rax],xmm11
+	vmovdqa	XMMWORD[(32-32)+rax],xmm10
+	vmovdqa	XMMWORD[(48-32)+rax],xmm9
+	vmovdqa	XMMWORD[(64-32)+rax],xmm8
+	vmovdqa	XMMWORD[(80-32)+rax],xmm7
+
+	vmovdqa	xmm7,xmm15
+	vpaddd	xmm8,xmm7,XMMWORD[one]
+	vpaddd	xmm9,xmm7,XMMWORD[two]
+	vpaddd	xmm10,xmm9,XMMWORD[one]
+	vpaddd	xmm11,xmm9,XMMWORD[two]
+	vpaddd	xmm12,xmm11,XMMWORD[one]
+	vpaddd	xmm15,xmm11,XMMWORD[two]
+
+	vmovdqa	xmm4,XMMWORD[r8]
+	vpxor	xmm7,xmm7,xmm4
+	vpxor	xmm8,xmm8,xmm4
+	vpxor	xmm9,xmm9,xmm4
+	vpxor	xmm10,xmm10,xmm4
+	vpxor	xmm11,xmm11,xmm4
+	vpxor	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
+	vpclmulqdq	xmm2,xmm6,xmm4,0x11
+	vpclmulqdq	xmm3,xmm6,xmm4,0x00
+	vpclmulqdq	xmm1,xmm6,xmm4,0x01
+	vpclmulqdq	xmm4,xmm6,xmm4,0x10
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm4,XMMWORD[16+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[((-16))+rax]
+	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[32+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[rax]
+	vmovdqu	xmm13,XMMWORD[rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[48+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[16+rax]
+	vmovdqu	xmm13,XMMWORD[16+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[64+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[32+rax]
+	vmovdqu	xmm13,XMMWORD[32+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[80+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[96+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[112+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+
+	vmovdqa	xmm6,XMMWORD[((80-32))+rax]
+	vpxor	xmm6,xmm6,xmm0
+	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm5,0x01
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x10
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm4,XMMWORD[128+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+
+	vpsrldq	xmm4,xmm1,8
+	vpxor	xmm5,xmm2,xmm4
+	vpslldq	xmm4,xmm1,8
+	vpxor	xmm0,xmm3,xmm4
+
+	vmovdqa	xmm3,XMMWORD[poly]
+
+	vmovdqu	xmm4,XMMWORD[144+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[160+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[176+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[192+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[208+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[224+r8]
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpxor	xmm4,xmm6,XMMWORD[rdi]
+	vaesenclast	xmm7,xmm7,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[16+rdi]
+	vaesenclast	xmm8,xmm8,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[32+rdi]
+	vaesenclast	xmm9,xmm9,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[48+rdi]
+	vaesenclast	xmm10,xmm10,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[64+rdi]
+	vaesenclast	xmm11,xmm11,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[80+rdi]
+	vaesenclast	xmm12,xmm12,xmm4
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vmovdqu	XMMWORD[rsi],xmm7
+	vmovdqu	XMMWORD[16+rsi],xmm8
+	vmovdqu	XMMWORD[32+rsi],xmm9
+	vmovdqu	XMMWORD[48+rsi],xmm10
+	vmovdqu	XMMWORD[64+rsi],xmm11
+	vmovdqu	XMMWORD[80+rsi],xmm12
+
+	vpxor	xmm0,xmm0,xmm5
+
+	lea	rdi,[96+rdi]
+	lea	rsi,[96+rsi]
+	jmp	NEAR $L$256_dec_loop1
+
+$L$256_dec_finish_96:
+	vmovdqa	xmm6,xmm12
+	vmovdqa	XMMWORD[(16-32)+rax],xmm11
+	vmovdqa	XMMWORD[(32-32)+rax],xmm10
+	vmovdqa	XMMWORD[(48-32)+rax],xmm9
+	vmovdqa	XMMWORD[(64-32)+rax],xmm8
+	vmovdqa	XMMWORD[(80-32)+rax],xmm7
+
+	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
+	vpclmulqdq	xmm1,xmm6,xmm4,0x10
+	vpclmulqdq	xmm2,xmm6,xmm4,0x11
+	vpclmulqdq	xmm3,xmm6,xmm4,0x00
+	vpclmulqdq	xmm4,xmm6,xmm4,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[((-16))+rax]
+	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[rax]
+	vmovdqu	xmm13,XMMWORD[rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[16+rax]
+	vmovdqu	xmm13,XMMWORD[16+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[32+rax]
+	vmovdqu	xmm13,XMMWORD[32+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm6,XMMWORD[((80-32))+rax]
+	vpxor	xmm6,xmm6,xmm0
+	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
+	vpclmulqdq	xmm4,xmm6,xmm5,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vpsrldq	xmm4,xmm1,8
+	vpxor	xmm5,xmm2,xmm4
+	vpslldq	xmm4,xmm1,8
+	vpxor	xmm0,xmm3,xmm4
+
+	vmovdqa	xmm3,XMMWORD[poly]
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpxor	xmm0,xmm0,xmm5
+
+$L$256_dec_loop2:
+
+
+
+	cmp	r9,16
+	jb	NEAR $L$256_dec_out
+	sub	r9,16
+
+	vmovdqa	xmm2,xmm15
+	vpaddd	xmm15,xmm15,XMMWORD[one]
+
+	vpxor	xmm2,xmm2,XMMWORD[r8]
+	vaesenc	xmm2,xmm2,XMMWORD[16+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[32+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[48+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[64+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[80+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[96+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[112+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[128+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[144+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[160+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[176+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[192+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[208+r8]
+	vaesenclast	xmm2,xmm2,XMMWORD[224+r8]
+	vpxor	xmm2,xmm2,XMMWORD[rdi]
+	vmovdqu	XMMWORD[rsi],xmm2
+	add	rdi,16
+	add	rsi,16
+
+	vpxor	xmm0,xmm0,xmm2
+	vmovdqa	xmm1,XMMWORD[((-32))+rcx]
+	call	GFMUL
+
+	jmp	NEAR $L$256_dec_loop2
+
+$L$256_dec_out:
+	vmovdqu	XMMWORD[rdx],xmm0
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes256gcmsiv_dec:
+global	aes256gcmsiv_kdf
+
+ALIGN	16
+aes256gcmsiv_kdf:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_kdf:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+
+
+
+
+	vmovdqa	xmm1,XMMWORD[rdx]
+	vmovdqa	xmm4,XMMWORD[rdi]
+	vmovdqa	xmm11,XMMWORD[and_mask]
+	vmovdqa	xmm8,XMMWORD[one]
+	vpshufd	xmm4,xmm4,0x90
+	vpand	xmm4,xmm4,xmm11
+	vpaddd	xmm6,xmm4,xmm8
+	vpaddd	xmm7,xmm6,xmm8
+	vpaddd	xmm11,xmm7,xmm8
+	vpaddd	xmm12,xmm11,xmm8
+	vpaddd	xmm13,xmm12,xmm8
+
+	vpxor	xmm4,xmm4,xmm1
+	vpxor	xmm6,xmm6,xmm1
+	vpxor	xmm7,xmm7,xmm1
+	vpxor	xmm11,xmm11,xmm1
+	vpxor	xmm12,xmm12,xmm1
+	vpxor	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm1,XMMWORD[16+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[32+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[48+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[64+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[80+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[96+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[112+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[128+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[144+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[160+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[176+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[192+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[208+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[224+rdx]
+	vaesenclast	xmm4,xmm4,xmm2
+	vaesenclast	xmm6,xmm6,xmm2
+	vaesenclast	xmm7,xmm7,xmm2
+	vaesenclast	xmm11,xmm11,xmm2
+	vaesenclast	xmm12,xmm12,xmm2
+	vaesenclast	xmm13,xmm13,xmm2
+
+
+	vmovdqa	XMMWORD[rsi],xmm4
+	vmovdqa	XMMWORD[16+rsi],xmm6
+	vmovdqa	XMMWORD[32+rsi],xmm7
+	vmovdqa	XMMWORD[48+rsi],xmm11
+	vmovdqa	XMMWORD[64+rsi],xmm12
+	vmovdqa	XMMWORD[80+rsi],xmm13
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes256gcmsiv_kdf:
diff --git a/third_party/boringssl/win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm b/third_party/boringssl/win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
new file mode 100644
index 0000000..ab8cf92
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
@@ -0,0 +1,8 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+global	dummy_chacha20_poly1305_asm
+
+dummy_chacha20_poly1305_asm:
+	DB	0F3h,0C3h		;repret
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/aes-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/aes-x86_64.asm
new file mode 100644
index 0000000..f6a4edf
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/aes-x86_64.asm
@@ -0,0 +1,2857 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+ALIGN	16
+_x86_64_AES_encrypt:
+	xor	eax,DWORD[r15]
+	xor	ebx,DWORD[4+r15]
+	xor	ecx,DWORD[8+r15]
+	xor	edx,DWORD[12+r15]
+
+	mov	r13d,DWORD[240+r15]
+	sub	r13d,1
+	jmp	NEAR $L$enc_loop
+ALIGN	16
+$L$enc_loop:
+
+	movzx	esi,al
+	movzx	edi,bl
+	movzx	ebp,cl
+	mov	r10d,DWORD[rsi*8+r14]
+	mov	r11d,DWORD[rdi*8+r14]
+	mov	r12d,DWORD[rbp*8+r14]
+
+	movzx	esi,bh
+	movzx	edi,ch
+	movzx	ebp,dl
+	xor	r10d,DWORD[3+rsi*8+r14]
+	xor	r11d,DWORD[3+rdi*8+r14]
+	mov	r8d,DWORD[rbp*8+r14]
+
+	movzx	esi,dh
+	shr	ecx,16
+	movzx	ebp,ah
+	xor	r12d,DWORD[3+rsi*8+r14]
+	shr	edx,16
+	xor	r8d,DWORD[3+rbp*8+r14]
+
+	shr	ebx,16
+	lea	r15,[16+r15]
+	shr	eax,16
+
+	movzx	esi,cl
+	movzx	edi,dl
+	movzx	ebp,al
+	xor	r10d,DWORD[2+rsi*8+r14]
+	xor	r11d,DWORD[2+rdi*8+r14]
+	xor	r12d,DWORD[2+rbp*8+r14]
+
+	movzx	esi,dh
+	movzx	edi,ah
+	movzx	ebp,bl
+	xor	r10d,DWORD[1+rsi*8+r14]
+	xor	r11d,DWORD[1+rdi*8+r14]
+	xor	r8d,DWORD[2+rbp*8+r14]
+
+	mov	edx,DWORD[12+r15]
+	movzx	edi,bh
+	movzx	ebp,ch
+	mov	eax,DWORD[r15]
+	xor	r12d,DWORD[1+rdi*8+r14]
+	xor	r8d,DWORD[1+rbp*8+r14]
+
+	mov	ebx,DWORD[4+r15]
+	mov	ecx,DWORD[8+r15]
+	xor	eax,r10d
+	xor	ebx,r11d
+	xor	ecx,r12d
+	xor	edx,r8d
+	sub	r13d,1
+	jnz	NEAR $L$enc_loop
+	movzx	esi,al
+	movzx	edi,bl
+	movzx	ebp,cl
+	movzx	r10d,BYTE[2+rsi*8+r14]
+	movzx	r11d,BYTE[2+rdi*8+r14]
+	movzx	r12d,BYTE[2+rbp*8+r14]
+
+	movzx	esi,dl
+	movzx	edi,bh
+	movzx	ebp,ch
+	movzx	r8d,BYTE[2+rsi*8+r14]
+	mov	edi,DWORD[rdi*8+r14]
+	mov	ebp,DWORD[rbp*8+r14]
+
+	and	edi,0x0000ff00
+	and	ebp,0x0000ff00
+
+	xor	r10d,edi
+	xor	r11d,ebp
+	shr	ecx,16
+
+	movzx	esi,dh
+	movzx	edi,ah
+	shr	edx,16
+	mov	esi,DWORD[rsi*8+r14]
+	mov	edi,DWORD[rdi*8+r14]
+
+	and	esi,0x0000ff00
+	and	edi,0x0000ff00
+	shr	ebx,16
+	xor	r12d,esi
+	xor	r8d,edi
+	shr	eax,16
+
+	movzx	esi,cl
+	movzx	edi,dl
+	movzx	ebp,al
+	mov	esi,DWORD[rsi*8+r14]
+	mov	edi,DWORD[rdi*8+r14]
+	mov	ebp,DWORD[rbp*8+r14]
+
+	and	esi,0x00ff0000
+	and	edi,0x00ff0000
+	and	ebp,0x00ff0000
+
+	xor	r10d,esi
+	xor	r11d,edi
+	xor	r12d,ebp
+
+	movzx	esi,bl
+	movzx	edi,dh
+	movzx	ebp,ah
+	mov	esi,DWORD[rsi*8+r14]
+	mov	edi,DWORD[2+rdi*8+r14]
+	mov	ebp,DWORD[2+rbp*8+r14]
+
+	and	esi,0x00ff0000
+	and	edi,0xff000000
+	and	ebp,0xff000000
+
+	xor	r8d,esi
+	xor	r10d,edi
+	xor	r11d,ebp
+
+	movzx	esi,bh
+	movzx	edi,ch
+	mov	edx,DWORD[((16+12))+r15]
+	mov	esi,DWORD[2+rsi*8+r14]
+	mov	edi,DWORD[2+rdi*8+r14]
+	mov	eax,DWORD[((16+0))+r15]
+
+	and	esi,0xff000000
+	and	edi,0xff000000
+
+	xor	r12d,esi
+	xor	r8d,edi
+
+	mov	ebx,DWORD[((16+4))+r15]
+	mov	ecx,DWORD[((16+8))+r15]
+	xor	eax,r10d
+	xor	ebx,r11d
+	xor	ecx,r12d
+	xor	edx,r8d
+DB	0xf3,0xc3
+
+
+ALIGN	16
+_x86_64_AES_encrypt_compact:
+	lea	r8,[128+r14]
+	mov	edi,DWORD[((0-128))+r8]
+	mov	ebp,DWORD[((32-128))+r8]
+	mov	r10d,DWORD[((64-128))+r8]
+	mov	r11d,DWORD[((96-128))+r8]
+	mov	edi,DWORD[((128-128))+r8]
+	mov	ebp,DWORD[((160-128))+r8]
+	mov	r10d,DWORD[((192-128))+r8]
+	mov	r11d,DWORD[((224-128))+r8]
+	jmp	NEAR $L$enc_loop_compact
+ALIGN	16
+$L$enc_loop_compact:
+	xor	eax,DWORD[r15]
+	xor	ebx,DWORD[4+r15]
+	xor	ecx,DWORD[8+r15]
+	xor	edx,DWORD[12+r15]
+	lea	r15,[16+r15]
+	movzx	r10d,al
+	movzx	r11d,bl
+	movzx	r12d,cl
+	movzx	r8d,dl
+	movzx	esi,bh
+	movzx	edi,ch
+	shr	ecx,16
+	movzx	ebp,dh
+	movzx	r10d,BYTE[r10*1+r14]
+	movzx	r11d,BYTE[r11*1+r14]
+	movzx	r12d,BYTE[r12*1+r14]
+	movzx	r8d,BYTE[r8*1+r14]
+
+	movzx	r9d,BYTE[rsi*1+r14]
+	movzx	esi,ah
+	movzx	r13d,BYTE[rdi*1+r14]
+	movzx	edi,cl
+	movzx	ebp,BYTE[rbp*1+r14]
+	movzx	esi,BYTE[rsi*1+r14]
+
+	shl	r9d,8
+	shr	edx,16
+	shl	r13d,8
+	xor	r10d,r9d
+	shr	eax,16
+	movzx	r9d,dl
+	shr	ebx,16
+	xor	r11d,r13d
+	shl	ebp,8
+	movzx	r13d,al
+	movzx	edi,BYTE[rdi*1+r14]
+	xor	r12d,ebp
+
+	shl	esi,8
+	movzx	ebp,bl
+	shl	edi,16
+	xor	r8d,esi
+	movzx	r9d,BYTE[r9*1+r14]
+	movzx	esi,dh
+	movzx	r13d,BYTE[r13*1+r14]
+	xor	r10d,edi
+
+	shr	ecx,8
+	movzx	edi,ah
+	shl	r9d,16
+	shr	ebx,8
+	shl	r13d,16
+	xor	r11d,r9d
+	movzx	ebp,BYTE[rbp*1+r14]
+	movzx	esi,BYTE[rsi*1+r14]
+	movzx	edi,BYTE[rdi*1+r14]
+	movzx	edx,BYTE[rcx*1+r14]
+	movzx	ecx,BYTE[rbx*1+r14]
+
+	shl	ebp,16
+	xor	r12d,r13d
+	shl	esi,24
+	xor	r8d,ebp
+	shl	edi,24
+	xor	r10d,esi
+	shl	edx,24
+	xor	r11d,edi
+	shl	ecx,24
+	mov	eax,r10d
+	mov	ebx,r11d
+	xor	ecx,r12d
+	xor	edx,r8d
+	cmp	r15,QWORD[16+rsp]
+	je	NEAR $L$enc_compact_done
+	mov	r10d,0x80808080
+	mov	r11d,0x80808080
+	and	r10d,eax
+	and	r11d,ebx
+	mov	esi,r10d
+	mov	edi,r11d
+	shr	r10d,7
+	lea	r8d,[rax*1+rax]
+	shr	r11d,7
+	lea	r9d,[rbx*1+rbx]
+	sub	esi,r10d
+	sub	edi,r11d
+	and	r8d,0xfefefefe
+	and	r9d,0xfefefefe
+	and	esi,0x1b1b1b1b
+	and	edi,0x1b1b1b1b
+	mov	r10d,eax
+	mov	r11d,ebx
+	xor	r8d,esi
+	xor	r9d,edi
+
+	xor	eax,r8d
+	xor	ebx,r9d
+	mov	r12d,0x80808080
+	rol	eax,24
+	mov	ebp,0x80808080
+	rol	ebx,24
+	and	r12d,ecx
+	and	ebp,edx
+	xor	eax,r8d
+	xor	ebx,r9d
+	mov	esi,r12d
+	ror	r10d,16
+	mov	edi,ebp
+	ror	r11d,16
+	lea	r8d,[rcx*1+rcx]
+	shr	r12d,7
+	xor	eax,r10d
+	shr	ebp,7
+	xor	ebx,r11d
+	ror	r10d,8
+	lea	r9d,[rdx*1+rdx]
+	ror	r11d,8
+	sub	esi,r12d
+	sub	edi,ebp
+	xor	eax,r10d
+	xor	ebx,r11d
+
+	and	r8d,0xfefefefe
+	and	r9d,0xfefefefe
+	and	esi,0x1b1b1b1b
+	and	edi,0x1b1b1b1b
+	mov	r12d,ecx
+	mov	ebp,edx
+	xor	r8d,esi
+	xor	r9d,edi
+
+	ror	r12d,16
+	xor	ecx,r8d
+	ror	ebp,16
+	xor	edx,r9d
+	rol	ecx,24
+	mov	esi,DWORD[r14]
+	rol	edx,24
+	xor	ecx,r8d
+	mov	edi,DWORD[64+r14]
+	xor	edx,r9d
+	mov	r8d,DWORD[128+r14]
+	xor	ecx,r12d
+	ror	r12d,8
+	xor	edx,ebp
+	ror	ebp,8
+	xor	ecx,r12d
+	mov	r9d,DWORD[192+r14]
+	xor	edx,ebp
+	jmp	NEAR $L$enc_loop_compact
+ALIGN	16
+$L$enc_compact_done:
+	xor	eax,DWORD[r15]
+	xor	ebx,DWORD[4+r15]
+	xor	ecx,DWORD[8+r15]
+	xor	edx,DWORD[12+r15]
+DB	0xf3,0xc3
+
+ALIGN	16
+global	asm_AES_encrypt
+
+
+asm_AES_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_asm_AES_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	mov	rax,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+
+
+	lea	rcx,[((-63))+rdx]
+	and	rsp,-64
+	sub	rcx,rsp
+	neg	rcx
+	and	rcx,0x3c0
+	sub	rsp,rcx
+	sub	rsp,32
+
+	mov	QWORD[16+rsp],rsi
+	mov	QWORD[24+rsp],rax
+$L$enc_prologue:
+
+	mov	r15,rdx
+	mov	r13d,DWORD[240+r15]
+
+	mov	eax,DWORD[rdi]
+	mov	ebx,DWORD[4+rdi]
+	mov	ecx,DWORD[8+rdi]
+	mov	edx,DWORD[12+rdi]
+
+	shl	r13d,4
+	lea	rbp,[r13*1+r15]
+	mov	QWORD[rsp],r15
+	mov	QWORD[8+rsp],rbp
+
+
+	lea	r14,[(($L$AES_Te+2048))]
+	lea	rbp,[768+rsp]
+	sub	rbp,r14
+	and	rbp,0x300
+	lea	r14,[rbp*1+r14]
+
+	call	_x86_64_AES_encrypt_compact
+
+	mov	r9,QWORD[16+rsp]
+	mov	rsi,QWORD[24+rsp]
+	mov	DWORD[r9],eax
+	mov	DWORD[4+r9],ebx
+	mov	DWORD[8+r9],ecx
+	mov	DWORD[12+r9],edx
+
+	mov	r15,QWORD[((-48))+rsi]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$enc_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_asm_AES_encrypt:
+
+ALIGN	16
+_x86_64_AES_decrypt:
+	xor	eax,DWORD[r15]
+	xor	ebx,DWORD[4+r15]
+	xor	ecx,DWORD[8+r15]
+	xor	edx,DWORD[12+r15]
+
+	mov	r13d,DWORD[240+r15]
+	sub	r13d,1
+	jmp	NEAR $L$dec_loop
+ALIGN	16
+$L$dec_loop:
+
+	movzx	esi,al
+	movzx	edi,bl
+	movzx	ebp,cl
+	mov	r10d,DWORD[rsi*8+r14]
+	mov	r11d,DWORD[rdi*8+r14]
+	mov	r12d,DWORD[rbp*8+r14]
+
+	movzx	esi,dh
+	movzx	edi,ah
+	movzx	ebp,dl
+	xor	r10d,DWORD[3+rsi*8+r14]
+	xor	r11d,DWORD[3+rdi*8+r14]
+	mov	r8d,DWORD[rbp*8+r14]
+
+	movzx	esi,bh
+	shr	eax,16
+	movzx	ebp,ch
+	xor	r12d,DWORD[3+rsi*8+r14]
+	shr	edx,16
+	xor	r8d,DWORD[3+rbp*8+r14]
+
+	shr	ebx,16
+	lea	r15,[16+r15]
+	shr	ecx,16
+
+	movzx	esi,cl
+	movzx	edi,dl
+	movzx	ebp,al
+	xor	r10d,DWORD[2+rsi*8+r14]
+	xor	r11d,DWORD[2+rdi*8+r14]
+	xor	r12d,DWORD[2+rbp*8+r14]
+
+	movzx	esi,bh
+	movzx	edi,ch
+	movzx	ebp,bl
+	xor	r10d,DWORD[1+rsi*8+r14]
+	xor	r11d,DWORD[1+rdi*8+r14]
+	xor	r8d,DWORD[2+rbp*8+r14]
+
+	movzx	esi,dh
+	mov	edx,DWORD[12+r15]
+	movzx	ebp,ah
+	xor	r12d,DWORD[1+rsi*8+r14]
+	mov	eax,DWORD[r15]
+	xor	r8d,DWORD[1+rbp*8+r14]
+
+	xor	eax,r10d
+	mov	ebx,DWORD[4+r15]
+	mov	ecx,DWORD[8+r15]
+	xor	ecx,r12d
+	xor	ebx,r11d
+	xor	edx,r8d
+	sub	r13d,1
+	jnz	NEAR $L$dec_loop
+	lea	r14,[2048+r14]
+	movzx	esi,al
+	movzx	edi,bl
+	movzx	ebp,cl
+	movzx	r10d,BYTE[rsi*1+r14]
+	movzx	r11d,BYTE[rdi*1+r14]
+	movzx	r12d,BYTE[rbp*1+r14]
+
+	movzx	esi,dl
+	movzx	edi,dh
+	movzx	ebp,ah
+	movzx	r8d,BYTE[rsi*1+r14]
+	movzx	edi,BYTE[rdi*1+r14]
+	movzx	ebp,BYTE[rbp*1+r14]
+
+	shl	edi,8
+	shl	ebp,8
+
+	xor	r10d,edi
+	xor	r11d,ebp
+	shr	edx,16
+
+	movzx	esi,bh
+	movzx	edi,ch
+	shr	eax,16
+	movzx	esi,BYTE[rsi*1+r14]
+	movzx	edi,BYTE[rdi*1+r14]
+
+	shl	esi,8
+	shl	edi,8
+	shr	ebx,16
+	xor	r12d,esi
+	xor	r8d,edi
+	shr	ecx,16
+
+	movzx	esi,cl
+	movzx	edi,dl
+	movzx	ebp,al
+	movzx	esi,BYTE[rsi*1+r14]
+	movzx	edi,BYTE[rdi*1+r14]
+	movzx	ebp,BYTE[rbp*1+r14]
+
+	shl	esi,16
+	shl	edi,16
+	shl	ebp,16
+
+	xor	r10d,esi
+	xor	r11d,edi
+	xor	r12d,ebp
+
+	movzx	esi,bl
+	movzx	edi,bh
+	movzx	ebp,ch
+	movzx	esi,BYTE[rsi*1+r14]
+	movzx	edi,BYTE[rdi*1+r14]
+	movzx	ebp,BYTE[rbp*1+r14]
+
+	shl	esi,16
+	shl	edi,24
+	shl	ebp,24
+
+	xor	r8d,esi
+	xor	r10d,edi
+	xor	r11d,ebp
+
+	movzx	esi,dh
+	movzx	edi,ah
+	mov	edx,DWORD[((16+12))+r15]
+	movzx	esi,BYTE[rsi*1+r14]
+	movzx	edi,BYTE[rdi*1+r14]
+	mov	eax,DWORD[((16+0))+r15]
+
+	shl	esi,24
+	shl	edi,24
+
+	xor	r12d,esi
+	xor	r8d,edi
+
+	mov	ebx,DWORD[((16+4))+r15]
+	mov	ecx,DWORD[((16+8))+r15]
+	lea	r14,[((-2048))+r14]
+	xor	eax,r10d
+	xor	ebx,r11d
+	xor	ecx,r12d
+	xor	edx,r8d
+DB	0xf3,0xc3
+
+
+ALIGN	16
+_x86_64_AES_decrypt_compact:
+	lea	r8,[128+r14]
+	mov	edi,DWORD[((0-128))+r8]
+	mov	ebp,DWORD[((32-128))+r8]
+	mov	r10d,DWORD[((64-128))+r8]
+	mov	r11d,DWORD[((96-128))+r8]
+	mov	edi,DWORD[((128-128))+r8]
+	mov	ebp,DWORD[((160-128))+r8]
+	mov	r10d,DWORD[((192-128))+r8]
+	mov	r11d,DWORD[((224-128))+r8]
+	jmp	NEAR $L$dec_loop_compact
+
+ALIGN	16
+$L$dec_loop_compact:
+	xor	eax,DWORD[r15]
+	xor	ebx,DWORD[4+r15]
+	xor	ecx,DWORD[8+r15]
+	xor	edx,DWORD[12+r15]
+	lea	r15,[16+r15]
+	movzx	r10d,al
+	movzx	r11d,bl
+	movzx	r12d,cl
+	movzx	r8d,dl
+	movzx	esi,dh
+	movzx	edi,ah
+	shr	edx,16
+	movzx	ebp,bh
+	movzx	r10d,BYTE[r10*1+r14]
+	movzx	r11d,BYTE[r11*1+r14]
+	movzx	r12d,BYTE[r12*1+r14]
+	movzx	r8d,BYTE[r8*1+r14]
+
+	movzx	r9d,BYTE[rsi*1+r14]
+	movzx	esi,ch
+	movzx	r13d,BYTE[rdi*1+r14]
+	movzx	ebp,BYTE[rbp*1+r14]
+	movzx	esi,BYTE[rsi*1+r14]
+
+	shr	ecx,16
+	shl	r13d,8
+	shl	r9d,8
+	movzx	edi,cl
+	shr	eax,16
+	xor	r10d,r9d
+	shr	ebx,16
+	movzx	r9d,dl
+
+	shl	ebp,8
+	xor	r11d,r13d
+	shl	esi,8
+	movzx	r13d,al
+	movzx	edi,BYTE[rdi*1+r14]
+	xor	r12d,ebp
+	movzx	ebp,bl
+
+	shl	edi,16
+	xor	r8d,esi
+	movzx	r9d,BYTE[r9*1+r14]
+	movzx	esi,bh
+	movzx	ebp,BYTE[rbp*1+r14]
+	xor	r10d,edi
+	movzx	r13d,BYTE[r13*1+r14]
+	movzx	edi,ch
+
+	shl	ebp,16
+	shl	r9d,16
+	shl	r13d,16
+	xor	r8d,ebp
+	movzx	ebp,dh
+	xor	r11d,r9d
+	shr	eax,8
+	xor	r12d,r13d
+
+	movzx	esi,BYTE[rsi*1+r14]
+	movzx	ebx,BYTE[rdi*1+r14]
+	movzx	ecx,BYTE[rbp*1+r14]
+	movzx	edx,BYTE[rax*1+r14]
+
+	mov	eax,r10d
+	shl	esi,24
+	shl	ebx,24
+	shl	ecx,24
+	xor	eax,esi
+	shl	edx,24
+	xor	ebx,r11d
+	xor	ecx,r12d
+	xor	edx,r8d
+	cmp	r15,QWORD[16+rsp]
+	je	NEAR $L$dec_compact_done
+
+	mov	rsi,QWORD[((256+0))+r14]
+	shl	rbx,32
+	shl	rdx,32
+	mov	rdi,QWORD[((256+8))+r14]
+	or	rax,rbx
+	or	rcx,rdx
+	mov	rbp,QWORD[((256+16))+r14]
+	mov	r9,rsi
+	mov	r12,rsi
+	and	r9,rax
+	and	r12,rcx
+	mov	rbx,r9
+	mov	rdx,r12
+	shr	r9,7
+	lea	r8,[rax*1+rax]
+	shr	r12,7
+	lea	r11,[rcx*1+rcx]
+	sub	rbx,r9
+	sub	rdx,r12
+	and	r8,rdi
+	and	r11,rdi
+	and	rbx,rbp
+	and	rdx,rbp
+	xor	r8,rbx
+	xor	r11,rdx
+	mov	r10,rsi
+	mov	r13,rsi
+
+	and	r10,r8
+	and	r13,r11
+	mov	rbx,r10
+	mov	rdx,r13
+	shr	r10,7
+	lea	r9,[r8*1+r8]
+	shr	r13,7
+	lea	r12,[r11*1+r11]
+	sub	rbx,r10
+	sub	rdx,r13
+	and	r9,rdi
+	and	r12,rdi
+	and	rbx,rbp
+	and	rdx,rbp
+	xor	r9,rbx
+	xor	r12,rdx
+	mov	r10,rsi
+	mov	r13,rsi
+
+	and	r10,r9
+	and	r13,r12
+	mov	rbx,r10
+	mov	rdx,r13
+	shr	r10,7
+	xor	r8,rax
+	shr	r13,7
+	xor	r11,rcx
+	sub	rbx,r10
+	sub	rdx,r13
+	lea	r10,[r9*1+r9]
+	lea	r13,[r12*1+r12]
+	xor	r9,rax
+	xor	r12,rcx
+	and	r10,rdi
+	and	r13,rdi
+	and	rbx,rbp
+	and	rdx,rbp
+	xor	r10,rbx
+	xor	r13,rdx
+
+	xor	rax,r10
+	xor	rcx,r13
+	xor	r8,r10
+	xor	r11,r13
+	mov	rbx,rax
+	mov	rdx,rcx
+	xor	r9,r10
+	shr	rbx,32
+	xor	r12,r13
+	shr	rdx,32
+	xor	r10,r8
+	rol	eax,8
+	xor	r13,r11
+	rol	ecx,8
+	xor	r10,r9
+	rol	ebx,8
+	xor	r13,r12
+
+	rol	edx,8
+	xor	eax,r10d
+	shr	r10,32
+	xor	ecx,r13d
+	shr	r13,32
+	xor	ebx,r10d
+	xor	edx,r13d
+
+	mov	r10,r8
+	rol	r8d,24
+	mov	r13,r11
+	rol	r11d,24
+	shr	r10,32
+	xor	eax,r8d
+	shr	r13,32
+	xor	ecx,r11d
+	rol	r10d,24
+	mov	r8,r9
+	rol	r13d,24
+	mov	r11,r12
+	shr	r8,32
+	xor	ebx,r10d
+	shr	r11,32
+	xor	edx,r13d
+
+	mov	rsi,QWORD[r14]
+	rol	r9d,16
+	mov	rdi,QWORD[64+r14]
+	rol	r12d,16
+	mov	rbp,QWORD[128+r14]
+	rol	r8d,16
+	mov	r10,QWORD[192+r14]
+	xor	eax,r9d
+	rol	r11d,16
+	xor	ecx,r12d
+	mov	r13,QWORD[256+r14]
+	xor	ebx,r8d
+	xor	edx,r11d
+	jmp	NEAR $L$dec_loop_compact
+ALIGN	16
+$L$dec_compact_done:
+	xor	eax,DWORD[r15]
+	xor	ebx,DWORD[4+r15]
+	xor	ecx,DWORD[8+r15]
+	xor	edx,DWORD[12+r15]
+DB	0xf3,0xc3
+
+ALIGN	16
+global	asm_AES_decrypt
+
+
+asm_AES_decrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_asm_AES_decrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	mov	rax,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+
+
+	lea	rcx,[((-63))+rdx]
+	and	rsp,-64
+	sub	rcx,rsp
+	neg	rcx
+	and	rcx,0x3c0
+	sub	rsp,rcx
+	sub	rsp,32
+
+	mov	QWORD[16+rsp],rsi
+	mov	QWORD[24+rsp],rax
+$L$dec_prologue:
+
+	mov	r15,rdx
+	mov	r13d,DWORD[240+r15]
+
+	mov	eax,DWORD[rdi]
+	mov	ebx,DWORD[4+rdi]
+	mov	ecx,DWORD[8+rdi]
+	mov	edx,DWORD[12+rdi]
+
+	shl	r13d,4
+	lea	rbp,[r13*1+r15]
+	mov	QWORD[rsp],r15
+	mov	QWORD[8+rsp],rbp
+
+
+	lea	r14,[(($L$AES_Td+2048))]
+	lea	rbp,[768+rsp]
+	sub	rbp,r14
+	and	rbp,0x300
+	lea	r14,[rbp*1+r14]
+	shr	rbp,3
+	add	r14,rbp
+
+	call	_x86_64_AES_decrypt_compact
+
+	mov	r9,QWORD[16+rsp]
+	mov	rsi,QWORD[24+rsp]
+	mov	DWORD[r9],eax
+	mov	DWORD[4+r9],ebx
+	mov	DWORD[8+r9],ecx
+	mov	DWORD[12+r9],edx
+
+	mov	r15,QWORD[((-48))+rsi]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$dec_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_asm_AES_decrypt:
+ALIGN	16
+global	asm_AES_set_encrypt_key
+
+asm_AES_set_encrypt_key:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_asm_AES_set_encrypt_key:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp,8
+$L$enc_key_prologue:
+
+	call	_x86_64_AES_set_encrypt_key
+
+	mov	rbp,QWORD[40+rsp]
+	mov	rbx,QWORD[48+rsp]
+	add	rsp,56
+$L$enc_key_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_asm_AES_set_encrypt_key:
+
+
+ALIGN	16
+_x86_64_AES_set_encrypt_key:
+	mov	ecx,esi
+	mov	rsi,rdi
+	mov	rdi,rdx
+
+	test	rsi,-1
+	jz	NEAR $L$badpointer
+	test	rdi,-1
+	jz	NEAR $L$badpointer
+
+	lea	rbp,[$L$AES_Te]
+	lea	rbp,[((2048+128))+rbp]
+
+
+	mov	eax,DWORD[((0-128))+rbp]
+	mov	ebx,DWORD[((32-128))+rbp]
+	mov	r8d,DWORD[((64-128))+rbp]
+	mov	edx,DWORD[((96-128))+rbp]
+	mov	eax,DWORD[((128-128))+rbp]
+	mov	ebx,DWORD[((160-128))+rbp]
+	mov	r8d,DWORD[((192-128))+rbp]
+	mov	edx,DWORD[((224-128))+rbp]
+
+	cmp	ecx,128
+	je	NEAR $L$10rounds
+	cmp	ecx,192
+	je	NEAR $L$12rounds
+	cmp	ecx,256
+	je	NEAR $L$14rounds
+	mov	rax,-2
+	jmp	NEAR $L$exit
+
+$L$10rounds:
+	mov	rax,QWORD[rsi]
+	mov	rdx,QWORD[8+rsi]
+	mov	QWORD[rdi],rax
+	mov	QWORD[8+rdi],rdx
+
+	shr	rdx,32
+	xor	ecx,ecx
+	jmp	NEAR $L$10shortcut
+ALIGN	4
+$L$10loop:
+	mov	eax,DWORD[rdi]
+	mov	edx,DWORD[12+rdi]
+$L$10shortcut:
+	movzx	esi,dl
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	movzx	esi,dh
+	shl	ebx,24
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	shr	edx,16
+	movzx	esi,dl
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	movzx	esi,dh
+	shl	ebx,8
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	shl	ebx,16
+	xor	eax,ebx
+
+	xor	eax,DWORD[((1024-128))+rcx*4+rbp]
+	mov	DWORD[16+rdi],eax
+	xor	eax,DWORD[4+rdi]
+	mov	DWORD[20+rdi],eax
+	xor	eax,DWORD[8+rdi]
+	mov	DWORD[24+rdi],eax
+	xor	eax,DWORD[12+rdi]
+	mov	DWORD[28+rdi],eax
+	add	ecx,1
+	lea	rdi,[16+rdi]
+	cmp	ecx,10
+	jl	NEAR $L$10loop
+
+	mov	DWORD[80+rdi],10
+	xor	rax,rax
+	jmp	NEAR $L$exit
+
+$L$12rounds:
+	mov	rax,QWORD[rsi]
+	mov	rbx,QWORD[8+rsi]
+	mov	rdx,QWORD[16+rsi]
+	mov	QWORD[rdi],rax
+	mov	QWORD[8+rdi],rbx
+	mov	QWORD[16+rdi],rdx
+
+	shr	rdx,32
+	xor	ecx,ecx
+	jmp	NEAR $L$12shortcut
+ALIGN	4
+$L$12loop:
+	mov	eax,DWORD[rdi]
+	mov	edx,DWORD[20+rdi]
+$L$12shortcut:
+	movzx	esi,dl
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	movzx	esi,dh
+	shl	ebx,24
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	shr	edx,16
+	movzx	esi,dl
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	movzx	esi,dh
+	shl	ebx,8
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	shl	ebx,16
+	xor	eax,ebx
+
+	xor	eax,DWORD[((1024-128))+rcx*4+rbp]
+	mov	DWORD[24+rdi],eax
+	xor	eax,DWORD[4+rdi]
+	mov	DWORD[28+rdi],eax
+	xor	eax,DWORD[8+rdi]
+	mov	DWORD[32+rdi],eax
+	xor	eax,DWORD[12+rdi]
+	mov	DWORD[36+rdi],eax
+
+	cmp	ecx,7
+	je	NEAR $L$12break
+	add	ecx,1
+
+	xor	eax,DWORD[16+rdi]
+	mov	DWORD[40+rdi],eax
+	xor	eax,DWORD[20+rdi]
+	mov	DWORD[44+rdi],eax
+
+	lea	rdi,[24+rdi]
+	jmp	NEAR $L$12loop
+$L$12break:
+	mov	DWORD[72+rdi],12
+	xor	rax,rax
+	jmp	NEAR $L$exit
+
+$L$14rounds:
+	mov	rax,QWORD[rsi]
+	mov	rbx,QWORD[8+rsi]
+	mov	rcx,QWORD[16+rsi]
+	mov	rdx,QWORD[24+rsi]
+	mov	QWORD[rdi],rax
+	mov	QWORD[8+rdi],rbx
+	mov	QWORD[16+rdi],rcx
+	mov	QWORD[24+rdi],rdx
+
+	shr	rdx,32
+	xor	ecx,ecx
+	jmp	NEAR $L$14shortcut
+ALIGN	4
+$L$14loop:
+	mov	eax,DWORD[rdi]
+	mov	edx,DWORD[28+rdi]
+$L$14shortcut:
+	movzx	esi,dl
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	movzx	esi,dh
+	shl	ebx,24
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	shr	edx,16
+	movzx	esi,dl
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	movzx	esi,dh
+	shl	ebx,8
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	shl	ebx,16
+	xor	eax,ebx
+
+	xor	eax,DWORD[((1024-128))+rcx*4+rbp]
+	mov	DWORD[32+rdi],eax
+	xor	eax,DWORD[4+rdi]
+	mov	DWORD[36+rdi],eax
+	xor	eax,DWORD[8+rdi]
+	mov	DWORD[40+rdi],eax
+	xor	eax,DWORD[12+rdi]
+	mov	DWORD[44+rdi],eax
+
+	cmp	ecx,6
+	je	NEAR $L$14break
+	add	ecx,1
+
+	mov	edx,eax
+	mov	eax,DWORD[16+rdi]
+	movzx	esi,dl
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	movzx	esi,dh
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	shr	edx,16
+	shl	ebx,8
+	movzx	esi,dl
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	movzx	esi,dh
+	shl	ebx,16
+	xor	eax,ebx
+
+	movzx	ebx,BYTE[((-128))+rsi*1+rbp]
+	shl	ebx,24
+	xor	eax,ebx
+
+	mov	DWORD[48+rdi],eax
+	xor	eax,DWORD[20+rdi]
+	mov	DWORD[52+rdi],eax
+	xor	eax,DWORD[24+rdi]
+	mov	DWORD[56+rdi],eax
+	xor	eax,DWORD[28+rdi]
+	mov	DWORD[60+rdi],eax
+
+	lea	rdi,[32+rdi]
+	jmp	NEAR $L$14loop
+$L$14break:
+	mov	DWORD[48+rdi],14
+	xor	rax,rax
+	jmp	NEAR $L$exit
+
+$L$badpointer:
+	mov	rax,-1
+$L$exit:
+DB	0xf3,0xc3
+
+ALIGN	16
+global	asm_AES_set_decrypt_key
+
+asm_AES_set_decrypt_key:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_asm_AES_set_decrypt_key:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	push	rdx
+$L$dec_key_prologue:
+
+	call	_x86_64_AES_set_encrypt_key
+	mov	r8,QWORD[rsp]
+	cmp	eax,0
+	jne	NEAR $L$abort
+
+	mov	r14d,DWORD[240+r8]
+	xor	rdi,rdi
+	lea	rcx,[r14*4+rdi]
+	mov	rsi,r8
+	lea	rdi,[rcx*4+r8]
+ALIGN	4
+$L$invert:
+	mov	rax,QWORD[rsi]
+	mov	rbx,QWORD[8+rsi]
+	mov	rcx,QWORD[rdi]
+	mov	rdx,QWORD[8+rdi]
+	mov	QWORD[rdi],rax
+	mov	QWORD[8+rdi],rbx
+	mov	QWORD[rsi],rcx
+	mov	QWORD[8+rsi],rdx
+	lea	rsi,[16+rsi]
+	lea	rdi,[((-16))+rdi]
+	cmp	rdi,rsi
+	jne	NEAR $L$invert
+
+	lea	rax,[(($L$AES_Te+2048+1024))]
+
+	mov	rsi,QWORD[40+rax]
+	mov	rdi,QWORD[48+rax]
+	mov	rbp,QWORD[56+rax]
+
+	mov	r15,r8
+	sub	r14d,1
+ALIGN	4
+$L$permute:
+	lea	r15,[16+r15]
+	mov	rax,QWORD[r15]
+	mov	rcx,QWORD[8+r15]
+	mov	r9,rsi
+	mov	r12,rsi
+	and	r9,rax
+	and	r12,rcx
+	mov	rbx,r9
+	mov	rdx,r12
+	shr	r9,7
+	lea	r8,[rax*1+rax]
+	shr	r12,7
+	lea	r11,[rcx*1+rcx]
+	sub	rbx,r9
+	sub	rdx,r12
+	and	r8,rdi
+	and	r11,rdi
+	and	rbx,rbp
+	and	rdx,rbp
+	xor	r8,rbx
+	xor	r11,rdx
+	mov	r10,rsi
+	mov	r13,rsi
+
+	and	r10,r8
+	and	r13,r11
+	mov	rbx,r10
+	mov	rdx,r13
+	shr	r10,7
+	lea	r9,[r8*1+r8]
+	shr	r13,7
+	lea	r12,[r11*1+r11]
+	sub	rbx,r10
+	sub	rdx,r13
+	and	r9,rdi
+	and	r12,rdi
+	and	rbx,rbp
+	and	rdx,rbp
+	xor	r9,rbx
+	xor	r12,rdx
+	mov	r10,rsi
+	mov	r13,rsi
+
+	and	r10,r9
+	and	r13,r12
+	mov	rbx,r10
+	mov	rdx,r13
+	shr	r10,7
+	xor	r8,rax
+	shr	r13,7
+	xor	r11,rcx
+	sub	rbx,r10
+	sub	rdx,r13
+	lea	r10,[r9*1+r9]
+	lea	r13,[r12*1+r12]
+	xor	r9,rax
+	xor	r12,rcx
+	and	r10,rdi
+	and	r13,rdi
+	and	rbx,rbp
+	and	rdx,rbp
+	xor	r10,rbx
+	xor	r13,rdx
+
+	xor	rax,r10
+	xor	rcx,r13
+	xor	r8,r10
+	xor	r11,r13
+	mov	rbx,rax
+	mov	rdx,rcx
+	xor	r9,r10
+	shr	rbx,32
+	xor	r12,r13
+	shr	rdx,32
+	xor	r10,r8
+	rol	eax,8
+	xor	r13,r11
+	rol	ecx,8
+	xor	r10,r9
+	rol	ebx,8
+	xor	r13,r12
+
+	rol	edx,8
+	xor	eax,r10d
+	shr	r10,32
+	xor	ecx,r13d
+	shr	r13,32
+	xor	ebx,r10d
+	xor	edx,r13d
+
+	mov	r10,r8
+	rol	r8d,24
+	mov	r13,r11
+	rol	r11d,24
+	shr	r10,32
+	xor	eax,r8d
+	shr	r13,32
+	xor	ecx,r11d
+	rol	r10d,24
+	mov	r8,r9
+	rol	r13d,24
+	mov	r11,r12
+	shr	r8,32
+	xor	ebx,r10d
+	shr	r11,32
+	xor	edx,r13d
+
+
+	rol	r9d,16
+
+	rol	r12d,16
+
+	rol	r8d,16
+
+	xor	eax,r9d
+	rol	r11d,16
+	xor	ecx,r12d
+
+	xor	ebx,r8d
+	xor	edx,r11d
+	mov	DWORD[r15],eax
+	mov	DWORD[4+r15],ebx
+	mov	DWORD[8+r15],ecx
+	mov	DWORD[12+r15],edx
+	sub	r14d,1
+	jnz	NEAR $L$permute
+
+	xor	rax,rax
+$L$abort:
+	mov	r15,QWORD[8+rsp]
+	mov	r14,QWORD[16+rsp]
+	mov	r13,QWORD[24+rsp]
+	mov	r12,QWORD[32+rsp]
+	mov	rbp,QWORD[40+rsp]
+	mov	rbx,QWORD[48+rsp]
+	add	rsp,56
+$L$dec_key_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_asm_AES_set_decrypt_key:
+ALIGN	16
+global	asm_AES_cbc_encrypt
+
+EXTERN	OPENSSL_ia32cap_P
+
+asm_AES_cbc_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_asm_AES_cbc_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+	cmp	rdx,0
+	je	NEAR $L$cbc_epilogue
+	pushfq
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+$L$cbc_prologue:
+
+	cld
+	mov	r9d,r9d
+
+	lea	r14,[$L$AES_Te]
+	lea	r10,[$L$AES_Td]
+	cmp	r9,0
+	cmove	r14,r10
+
+	lea	r10,[OPENSSL_ia32cap_P]
+	mov	r10d,DWORD[r10]
+	cmp	rdx,512
+	jb	NEAR $L$cbc_slow_prologue
+	test	rdx,15
+	jnz	NEAR $L$cbc_slow_prologue
+	bt	r10d,28
+	jc	NEAR $L$cbc_slow_prologue
+
+
+	lea	r15,[((-88-248))+rsp]
+	and	r15,-64
+
+
+	mov	r10,r14
+	lea	r11,[2304+r14]
+	mov	r12,r15
+	and	r10,0xFFF
+	and	r11,0xFFF
+	and	r12,0xFFF
+
+	cmp	r12,r11
+	jb	NEAR $L$cbc_te_break_out
+	sub	r12,r11
+	sub	r15,r12
+	jmp	NEAR $L$cbc_te_ok
+$L$cbc_te_break_out:
+	sub	r12,r10
+	and	r12,0xFFF
+	add	r12,320
+	sub	r15,r12
+ALIGN	4
+$L$cbc_te_ok:
+
+	xchg	r15,rsp
+
+	mov	QWORD[16+rsp],r15
+$L$cbc_fast_body:
+	mov	QWORD[24+rsp],rdi
+	mov	QWORD[32+rsp],rsi
+	mov	QWORD[40+rsp],rdx
+	mov	QWORD[48+rsp],rcx
+	mov	QWORD[56+rsp],r8
+	mov	DWORD[((80+240))+rsp],0
+	mov	rbp,r8
+	mov	rbx,r9
+	mov	r9,rsi
+	mov	r8,rdi
+	mov	r15,rcx
+
+	mov	eax,DWORD[240+r15]
+
+	mov	r10,r15
+	sub	r10,r14
+	and	r10,0xfff
+	cmp	r10,2304
+	jb	NEAR $L$cbc_do_ecopy
+	cmp	r10,4096-248
+	jb	NEAR $L$cbc_skip_ecopy
+ALIGN	4
+$L$cbc_do_ecopy:
+	mov	rsi,r15
+	lea	rdi,[80+rsp]
+	lea	r15,[80+rsp]
+	mov	ecx,240/8
+	DD	0x90A548F3
+	mov	DWORD[rdi],eax
+$L$cbc_skip_ecopy:
+	mov	QWORD[rsp],r15
+
+	mov	ecx,18
+ALIGN	4
+$L$cbc_prefetch_te:
+	mov	r10,QWORD[r14]
+	mov	r11,QWORD[32+r14]
+	mov	r12,QWORD[64+r14]
+	mov	r13,QWORD[96+r14]
+	lea	r14,[128+r14]
+	sub	ecx,1
+	jnz	NEAR $L$cbc_prefetch_te
+	lea	r14,[((-2304))+r14]
+
+	cmp	rbx,0
+	je	NEAR $L$FAST_DECRYPT
+
+
+	mov	eax,DWORD[rbp]
+	mov	ebx,DWORD[4+rbp]
+	mov	ecx,DWORD[8+rbp]
+	mov	edx,DWORD[12+rbp]
+
+ALIGN	4
+$L$cbc_fast_enc_loop:
+	xor	eax,DWORD[r8]
+	xor	ebx,DWORD[4+r8]
+	xor	ecx,DWORD[8+r8]
+	xor	edx,DWORD[12+r8]
+	mov	r15,QWORD[rsp]
+	mov	QWORD[24+rsp],r8
+
+	call	_x86_64_AES_encrypt
+
+	mov	r8,QWORD[24+rsp]
+	mov	r10,QWORD[40+rsp]
+	mov	DWORD[r9],eax
+	mov	DWORD[4+r9],ebx
+	mov	DWORD[8+r9],ecx
+	mov	DWORD[12+r9],edx
+
+	lea	r8,[16+r8]
+	lea	r9,[16+r9]
+	sub	r10,16
+	test	r10,-16
+	mov	QWORD[40+rsp],r10
+	jnz	NEAR $L$cbc_fast_enc_loop
+	mov	rbp,QWORD[56+rsp]
+	mov	DWORD[rbp],eax
+	mov	DWORD[4+rbp],ebx
+	mov	DWORD[8+rbp],ecx
+	mov	DWORD[12+rbp],edx
+
+	jmp	NEAR $L$cbc_fast_cleanup
+
+
+ALIGN	16
+$L$FAST_DECRYPT:
+	cmp	r9,r8
+	je	NEAR $L$cbc_fast_dec_in_place
+
+	mov	QWORD[64+rsp],rbp
+ALIGN	4
+$L$cbc_fast_dec_loop:
+	mov	eax,DWORD[r8]
+	mov	ebx,DWORD[4+r8]
+	mov	ecx,DWORD[8+r8]
+	mov	edx,DWORD[12+r8]
+	mov	r15,QWORD[rsp]
+	mov	QWORD[24+rsp],r8
+
+	call	_x86_64_AES_decrypt
+
+	mov	rbp,QWORD[64+rsp]
+	mov	r8,QWORD[24+rsp]
+	mov	r10,QWORD[40+rsp]
+	xor	eax,DWORD[rbp]
+	xor	ebx,DWORD[4+rbp]
+	xor	ecx,DWORD[8+rbp]
+	xor	edx,DWORD[12+rbp]
+	mov	rbp,r8
+
+	sub	r10,16
+	mov	QWORD[40+rsp],r10
+	mov	QWORD[64+rsp],rbp
+
+	mov	DWORD[r9],eax
+	mov	DWORD[4+r9],ebx
+	mov	DWORD[8+r9],ecx
+	mov	DWORD[12+r9],edx
+
+	lea	r8,[16+r8]
+	lea	r9,[16+r9]
+	jnz	NEAR $L$cbc_fast_dec_loop
+	mov	r12,QWORD[56+rsp]
+	mov	r10,QWORD[rbp]
+	mov	r11,QWORD[8+rbp]
+	mov	QWORD[r12],r10
+	mov	QWORD[8+r12],r11
+	jmp	NEAR $L$cbc_fast_cleanup
+
+ALIGN	16
+$L$cbc_fast_dec_in_place:
+	mov	r10,QWORD[rbp]
+	mov	r11,QWORD[8+rbp]
+	mov	QWORD[((0+64))+rsp],r10
+	mov	QWORD[((8+64))+rsp],r11
+ALIGN	4
+$L$cbc_fast_dec_in_place_loop:
+	mov	eax,DWORD[r8]
+	mov	ebx,DWORD[4+r8]
+	mov	ecx,DWORD[8+r8]
+	mov	edx,DWORD[12+r8]
+	mov	r15,QWORD[rsp]
+	mov	QWORD[24+rsp],r8
+
+	call	_x86_64_AES_decrypt
+
+	mov	r8,QWORD[24+rsp]
+	mov	r10,QWORD[40+rsp]
+	xor	eax,DWORD[((0+64))+rsp]
+	xor	ebx,DWORD[((4+64))+rsp]
+	xor	ecx,DWORD[((8+64))+rsp]
+	xor	edx,DWORD[((12+64))+rsp]
+
+	mov	r11,QWORD[r8]
+	mov	r12,QWORD[8+r8]
+	sub	r10,16
+	jz	NEAR $L$cbc_fast_dec_in_place_done
+
+	mov	QWORD[((0+64))+rsp],r11
+	mov	QWORD[((8+64))+rsp],r12
+
+	mov	DWORD[r9],eax
+	mov	DWORD[4+r9],ebx
+	mov	DWORD[8+r9],ecx
+	mov	DWORD[12+r9],edx
+
+	lea	r8,[16+r8]
+	lea	r9,[16+r9]
+	mov	QWORD[40+rsp],r10
+	jmp	NEAR $L$cbc_fast_dec_in_place_loop
+$L$cbc_fast_dec_in_place_done:
+	mov	rdi,QWORD[56+rsp]
+	mov	QWORD[rdi],r11
+	mov	QWORD[8+rdi],r12
+
+	mov	DWORD[r9],eax
+	mov	DWORD[4+r9],ebx
+	mov	DWORD[8+r9],ecx
+	mov	DWORD[12+r9],edx
+
+ALIGN	4
+$L$cbc_fast_cleanup:
+	cmp	DWORD[((80+240))+rsp],0
+	lea	rdi,[80+rsp]
+	je	NEAR $L$cbc_exit
+	mov	ecx,240/8
+	xor	rax,rax
+	DD	0x90AB48F3
+
+	jmp	NEAR $L$cbc_exit
+
+
+ALIGN	16
+$L$cbc_slow_prologue:
+
+	lea	rbp,[((-88))+rsp]
+	and	rbp,-64
+
+	lea	r10,[((-88-63))+rcx]
+	sub	r10,rbp
+	neg	r10
+	and	r10,0x3c0
+	sub	rbp,r10
+
+	xchg	rbp,rsp
+
+	mov	QWORD[16+rsp],rbp
+$L$cbc_slow_body:
+
+
+
+
+	mov	QWORD[56+rsp],r8
+	mov	rbp,r8
+	mov	rbx,r9
+	mov	r9,rsi
+	mov	r8,rdi
+	mov	r15,rcx
+	mov	r10,rdx
+
+	mov	eax,DWORD[240+r15]
+	mov	QWORD[rsp],r15
+	shl	eax,4
+	lea	rax,[rax*1+r15]
+	mov	QWORD[8+rsp],rax
+
+
+	lea	r14,[2048+r14]
+	lea	rax,[((768-8))+rsp]
+	sub	rax,r14
+	and	rax,0x300
+	lea	r14,[rax*1+r14]
+
+	cmp	rbx,0
+	je	NEAR $L$SLOW_DECRYPT
+
+
+	test	r10,-16
+	mov	eax,DWORD[rbp]
+	mov	ebx,DWORD[4+rbp]
+	mov	ecx,DWORD[8+rbp]
+	mov	edx,DWORD[12+rbp]
+	jz	NEAR $L$cbc_slow_enc_tail
+
+ALIGN	4
+$L$cbc_slow_enc_loop:
+	xor	eax,DWORD[r8]
+	xor	ebx,DWORD[4+r8]
+	xor	ecx,DWORD[8+r8]
+	xor	edx,DWORD[12+r8]
+	mov	r15,QWORD[rsp]
+	mov	QWORD[24+rsp],r8
+	mov	QWORD[32+rsp],r9
+	mov	QWORD[40+rsp],r10
+
+	call	_x86_64_AES_encrypt_compact
+
+	mov	r8,QWORD[24+rsp]
+	mov	r9,QWORD[32+rsp]
+	mov	r10,QWORD[40+rsp]
+	mov	DWORD[r9],eax
+	mov	DWORD[4+r9],ebx
+	mov	DWORD[8+r9],ecx
+	mov	DWORD[12+r9],edx
+
+	lea	r8,[16+r8]
+	lea	r9,[16+r9]
+	sub	r10,16
+	test	r10,-16
+	jnz	NEAR $L$cbc_slow_enc_loop
+	test	r10,15
+	jnz	NEAR $L$cbc_slow_enc_tail
+	mov	rbp,QWORD[56+rsp]
+	mov	DWORD[rbp],eax
+	mov	DWORD[4+rbp],ebx
+	mov	DWORD[8+rbp],ecx
+	mov	DWORD[12+rbp],edx
+
+	jmp	NEAR $L$cbc_exit
+
+ALIGN	4
+$L$cbc_slow_enc_tail:
+	mov	r11,rax
+	mov	r12,rcx
+	mov	rcx,r10
+	mov	rsi,r8
+	mov	rdi,r9
+	DD	0x9066A4F3
+	mov	rcx,16
+	sub	rcx,r10
+	xor	rax,rax
+	DD	0x9066AAF3
+	mov	r8,r9
+	mov	r10,16
+	mov	rax,r11
+	mov	rcx,r12
+	jmp	NEAR $L$cbc_slow_enc_loop
+
+ALIGN	16
+$L$SLOW_DECRYPT:
+	shr	rax,3
+	add	r14,rax
+
+	mov	r11,QWORD[rbp]
+	mov	r12,QWORD[8+rbp]
+	mov	QWORD[((0+64))+rsp],r11
+	mov	QWORD[((8+64))+rsp],r12
+
+ALIGN	4
+$L$cbc_slow_dec_loop:
+	mov	eax,DWORD[r8]
+	mov	ebx,DWORD[4+r8]
+	mov	ecx,DWORD[8+r8]
+	mov	edx,DWORD[12+r8]
+	mov	r15,QWORD[rsp]
+	mov	QWORD[24+rsp],r8
+	mov	QWORD[32+rsp],r9
+	mov	QWORD[40+rsp],r10
+
+	call	_x86_64_AES_decrypt_compact
+
+	mov	r8,QWORD[24+rsp]
+	mov	r9,QWORD[32+rsp]
+	mov	r10,QWORD[40+rsp]
+	xor	eax,DWORD[((0+64))+rsp]
+	xor	ebx,DWORD[((4+64))+rsp]
+	xor	ecx,DWORD[((8+64))+rsp]
+	xor	edx,DWORD[((12+64))+rsp]
+
+	mov	r11,QWORD[r8]
+	mov	r12,QWORD[8+r8]
+	sub	r10,16
+	jc	NEAR $L$cbc_slow_dec_partial
+	jz	NEAR $L$cbc_slow_dec_done
+
+	mov	QWORD[((0+64))+rsp],r11
+	mov	QWORD[((8+64))+rsp],r12
+
+	mov	DWORD[r9],eax
+	mov	DWORD[4+r9],ebx
+	mov	DWORD[8+r9],ecx
+	mov	DWORD[12+r9],edx
+
+	lea	r8,[16+r8]
+	lea	r9,[16+r9]
+	jmp	NEAR $L$cbc_slow_dec_loop
+$L$cbc_slow_dec_done:
+	mov	rdi,QWORD[56+rsp]
+	mov	QWORD[rdi],r11
+	mov	QWORD[8+rdi],r12
+
+	mov	DWORD[r9],eax
+	mov	DWORD[4+r9],ebx
+	mov	DWORD[8+r9],ecx
+	mov	DWORD[12+r9],edx
+
+	jmp	NEAR $L$cbc_exit
+
+ALIGN	4
+$L$cbc_slow_dec_partial:
+	mov	rdi,QWORD[56+rsp]
+	mov	QWORD[rdi],r11
+	mov	QWORD[8+rdi],r12
+
+	mov	DWORD[((0+64))+rsp],eax
+	mov	DWORD[((4+64))+rsp],ebx
+	mov	DWORD[((8+64))+rsp],ecx
+	mov	DWORD[((12+64))+rsp],edx
+
+	mov	rdi,r9
+	lea	rsi,[64+rsp]
+	lea	rcx,[16+r10]
+	DD	0x9066A4F3
+	jmp	NEAR $L$cbc_exit
+
+ALIGN	16
+$L$cbc_exit:
+	mov	rsi,QWORD[16+rsp]
+	mov	r15,QWORD[rsi]
+	mov	r14,QWORD[8+rsi]
+	mov	r13,QWORD[16+rsi]
+	mov	r12,QWORD[24+rsi]
+	mov	rbp,QWORD[32+rsi]
+	mov	rbx,QWORD[40+rsi]
+	lea	rsp,[48+rsi]
+$L$cbc_popfq:
+	popfq
+$L$cbc_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_asm_AES_cbc_encrypt:
+ALIGN	64
+$L$AES_Te:
+	DD	0xa56363c6,0xa56363c6
+	DD	0x847c7cf8,0x847c7cf8
+	DD	0x997777ee,0x997777ee
+	DD	0x8d7b7bf6,0x8d7b7bf6
+	DD	0x0df2f2ff,0x0df2f2ff
+	DD	0xbd6b6bd6,0xbd6b6bd6
+	DD	0xb16f6fde,0xb16f6fde
+	DD	0x54c5c591,0x54c5c591
+	DD	0x50303060,0x50303060
+	DD	0x03010102,0x03010102
+	DD	0xa96767ce,0xa96767ce
+	DD	0x7d2b2b56,0x7d2b2b56
+	DD	0x19fefee7,0x19fefee7
+	DD	0x62d7d7b5,0x62d7d7b5
+	DD	0xe6abab4d,0xe6abab4d
+	DD	0x9a7676ec,0x9a7676ec
+	DD	0x45caca8f,0x45caca8f
+	DD	0x9d82821f,0x9d82821f
+	DD	0x40c9c989,0x40c9c989
+	DD	0x877d7dfa,0x877d7dfa
+	DD	0x15fafaef,0x15fafaef
+	DD	0xeb5959b2,0xeb5959b2
+	DD	0xc947478e,0xc947478e
+	DD	0x0bf0f0fb,0x0bf0f0fb
+	DD	0xecadad41,0xecadad41
+	DD	0x67d4d4b3,0x67d4d4b3
+	DD	0xfda2a25f,0xfda2a25f
+	DD	0xeaafaf45,0xeaafaf45
+	DD	0xbf9c9c23,0xbf9c9c23
+	DD	0xf7a4a453,0xf7a4a453
+	DD	0x967272e4,0x967272e4
+	DD	0x5bc0c09b,0x5bc0c09b
+	DD	0xc2b7b775,0xc2b7b775
+	DD	0x1cfdfde1,0x1cfdfde1
+	DD	0xae93933d,0xae93933d
+	DD	0x6a26264c,0x6a26264c
+	DD	0x5a36366c,0x5a36366c
+	DD	0x413f3f7e,0x413f3f7e
+	DD	0x02f7f7f5,0x02f7f7f5
+	DD	0x4fcccc83,0x4fcccc83
+	DD	0x5c343468,0x5c343468
+	DD	0xf4a5a551,0xf4a5a551
+	DD	0x34e5e5d1,0x34e5e5d1
+	DD	0x08f1f1f9,0x08f1f1f9
+	DD	0x937171e2,0x937171e2
+	DD	0x73d8d8ab,0x73d8d8ab
+	DD	0x53313162,0x53313162
+	DD	0x3f15152a,0x3f15152a
+	DD	0x0c040408,0x0c040408
+	DD	0x52c7c795,0x52c7c795
+	DD	0x65232346,0x65232346
+	DD	0x5ec3c39d,0x5ec3c39d
+	DD	0x28181830,0x28181830
+	DD	0xa1969637,0xa1969637
+	DD	0x0f05050a,0x0f05050a
+	DD	0xb59a9a2f,0xb59a9a2f
+	DD	0x0907070e,0x0907070e
+	DD	0x36121224,0x36121224
+	DD	0x9b80801b,0x9b80801b
+	DD	0x3de2e2df,0x3de2e2df
+	DD	0x26ebebcd,0x26ebebcd
+	DD	0x6927274e,0x6927274e
+	DD	0xcdb2b27f,0xcdb2b27f
+	DD	0x9f7575ea,0x9f7575ea
+	DD	0x1b090912,0x1b090912
+	DD	0x9e83831d,0x9e83831d
+	DD	0x742c2c58,0x742c2c58
+	DD	0x2e1a1a34,0x2e1a1a34
+	DD	0x2d1b1b36,0x2d1b1b36
+	DD	0xb26e6edc,0xb26e6edc
+	DD	0xee5a5ab4,0xee5a5ab4
+	DD	0xfba0a05b,0xfba0a05b
+	DD	0xf65252a4,0xf65252a4
+	DD	0x4d3b3b76,0x4d3b3b76
+	DD	0x61d6d6b7,0x61d6d6b7
+	DD	0xceb3b37d,0xceb3b37d
+	DD	0x7b292952,0x7b292952
+	DD	0x3ee3e3dd,0x3ee3e3dd
+	DD	0x712f2f5e,0x712f2f5e
+	DD	0x97848413,0x97848413
+	DD	0xf55353a6,0xf55353a6
+	DD	0x68d1d1b9,0x68d1d1b9
+	DD	0x00000000,0x00000000
+	DD	0x2cededc1,0x2cededc1
+	DD	0x60202040,0x60202040
+	DD	0x1ffcfce3,0x1ffcfce3
+	DD	0xc8b1b179,0xc8b1b179
+	DD	0xed5b5bb6,0xed5b5bb6
+	DD	0xbe6a6ad4,0xbe6a6ad4
+	DD	0x46cbcb8d,0x46cbcb8d
+	DD	0xd9bebe67,0xd9bebe67
+	DD	0x4b393972,0x4b393972
+	DD	0xde4a4a94,0xde4a4a94
+	DD	0xd44c4c98,0xd44c4c98
+	DD	0xe85858b0,0xe85858b0
+	DD	0x4acfcf85,0x4acfcf85
+	DD	0x6bd0d0bb,0x6bd0d0bb
+	DD	0x2aefefc5,0x2aefefc5
+	DD	0xe5aaaa4f,0xe5aaaa4f
+	DD	0x16fbfbed,0x16fbfbed
+	DD	0xc5434386,0xc5434386
+	DD	0xd74d4d9a,0xd74d4d9a
+	DD	0x55333366,0x55333366
+	DD	0x94858511,0x94858511
+	DD	0xcf45458a,0xcf45458a
+	DD	0x10f9f9e9,0x10f9f9e9
+	DD	0x06020204,0x06020204
+	DD	0x817f7ffe,0x817f7ffe
+	DD	0xf05050a0,0xf05050a0
+	DD	0x443c3c78,0x443c3c78
+	DD	0xba9f9f25,0xba9f9f25
+	DD	0xe3a8a84b,0xe3a8a84b
+	DD	0xf35151a2,0xf35151a2
+	DD	0xfea3a35d,0xfea3a35d
+	DD	0xc0404080,0xc0404080
+	DD	0x8a8f8f05,0x8a8f8f05
+	DD	0xad92923f,0xad92923f
+	DD	0xbc9d9d21,0xbc9d9d21
+	DD	0x48383870,0x48383870
+	DD	0x04f5f5f1,0x04f5f5f1
+	DD	0xdfbcbc63,0xdfbcbc63
+	DD	0xc1b6b677,0xc1b6b677
+	DD	0x75dadaaf,0x75dadaaf
+	DD	0x63212142,0x63212142
+	DD	0x30101020,0x30101020
+	DD	0x1affffe5,0x1affffe5
+	DD	0x0ef3f3fd,0x0ef3f3fd
+	DD	0x6dd2d2bf,0x6dd2d2bf
+	DD	0x4ccdcd81,0x4ccdcd81
+	DD	0x140c0c18,0x140c0c18
+	DD	0x35131326,0x35131326
+	DD	0x2fececc3,0x2fececc3
+	DD	0xe15f5fbe,0xe15f5fbe
+	DD	0xa2979735,0xa2979735
+	DD	0xcc444488,0xcc444488
+	DD	0x3917172e,0x3917172e
+	DD	0x57c4c493,0x57c4c493
+	DD	0xf2a7a755,0xf2a7a755
+	DD	0x827e7efc,0x827e7efc
+	DD	0x473d3d7a,0x473d3d7a
+	DD	0xac6464c8,0xac6464c8
+	DD	0xe75d5dba,0xe75d5dba
+	DD	0x2b191932,0x2b191932
+	DD	0x957373e6,0x957373e6
+	DD	0xa06060c0,0xa06060c0
+	DD	0x98818119,0x98818119
+	DD	0xd14f4f9e,0xd14f4f9e
+	DD	0x7fdcdca3,0x7fdcdca3
+	DD	0x66222244,0x66222244
+	DD	0x7e2a2a54,0x7e2a2a54
+	DD	0xab90903b,0xab90903b
+	DD	0x8388880b,0x8388880b
+	DD	0xca46468c,0xca46468c
+	DD	0x29eeeec7,0x29eeeec7
+	DD	0xd3b8b86b,0xd3b8b86b
+	DD	0x3c141428,0x3c141428
+	DD	0x79dedea7,0x79dedea7
+	DD	0xe25e5ebc,0xe25e5ebc
+	DD	0x1d0b0b16,0x1d0b0b16
+	DD	0x76dbdbad,0x76dbdbad
+	DD	0x3be0e0db,0x3be0e0db
+	DD	0x56323264,0x56323264
+	DD	0x4e3a3a74,0x4e3a3a74
+	DD	0x1e0a0a14,0x1e0a0a14
+	DD	0xdb494992,0xdb494992
+	DD	0x0a06060c,0x0a06060c
+	DD	0x6c242448,0x6c242448
+	DD	0xe45c5cb8,0xe45c5cb8
+	DD	0x5dc2c29f,0x5dc2c29f
+	DD	0x6ed3d3bd,0x6ed3d3bd
+	DD	0xefacac43,0xefacac43
+	DD	0xa66262c4,0xa66262c4
+	DD	0xa8919139,0xa8919139
+	DD	0xa4959531,0xa4959531
+	DD	0x37e4e4d3,0x37e4e4d3
+	DD	0x8b7979f2,0x8b7979f2
+	DD	0x32e7e7d5,0x32e7e7d5
+	DD	0x43c8c88b,0x43c8c88b
+	DD	0x5937376e,0x5937376e
+	DD	0xb76d6dda,0xb76d6dda
+	DD	0x8c8d8d01,0x8c8d8d01
+	DD	0x64d5d5b1,0x64d5d5b1
+	DD	0xd24e4e9c,0xd24e4e9c
+	DD	0xe0a9a949,0xe0a9a949
+	DD	0xb46c6cd8,0xb46c6cd8
+	DD	0xfa5656ac,0xfa5656ac
+	DD	0x07f4f4f3,0x07f4f4f3
+	DD	0x25eaeacf,0x25eaeacf
+	DD	0xaf6565ca,0xaf6565ca
+	DD	0x8e7a7af4,0x8e7a7af4
+	DD	0xe9aeae47,0xe9aeae47
+	DD	0x18080810,0x18080810
+	DD	0xd5baba6f,0xd5baba6f
+	DD	0x887878f0,0x887878f0
+	DD	0x6f25254a,0x6f25254a
+	DD	0x722e2e5c,0x722e2e5c
+	DD	0x241c1c38,0x241c1c38
+	DD	0xf1a6a657,0xf1a6a657
+	DD	0xc7b4b473,0xc7b4b473
+	DD	0x51c6c697,0x51c6c697
+	DD	0x23e8e8cb,0x23e8e8cb
+	DD	0x7cdddda1,0x7cdddda1
+	DD	0x9c7474e8,0x9c7474e8
+	DD	0x211f1f3e,0x211f1f3e
+	DD	0xdd4b4b96,0xdd4b4b96
+	DD	0xdcbdbd61,0xdcbdbd61
+	DD	0x868b8b0d,0x868b8b0d
+	DD	0x858a8a0f,0x858a8a0f
+	DD	0x907070e0,0x907070e0
+	DD	0x423e3e7c,0x423e3e7c
+	DD	0xc4b5b571,0xc4b5b571
+	DD	0xaa6666cc,0xaa6666cc
+	DD	0xd8484890,0xd8484890
+	DD	0x05030306,0x05030306
+	DD	0x01f6f6f7,0x01f6f6f7
+	DD	0x120e0e1c,0x120e0e1c
+	DD	0xa36161c2,0xa36161c2
+	DD	0x5f35356a,0x5f35356a
+	DD	0xf95757ae,0xf95757ae
+	DD	0xd0b9b969,0xd0b9b969
+	DD	0x91868617,0x91868617
+	DD	0x58c1c199,0x58c1c199
+	DD	0x271d1d3a,0x271d1d3a
+	DD	0xb99e9e27,0xb99e9e27
+	DD	0x38e1e1d9,0x38e1e1d9
+	DD	0x13f8f8eb,0x13f8f8eb
+	DD	0xb398982b,0xb398982b
+	DD	0x33111122,0x33111122
+	DD	0xbb6969d2,0xbb6969d2
+	DD	0x70d9d9a9,0x70d9d9a9
+	DD	0x898e8e07,0x898e8e07
+	DD	0xa7949433,0xa7949433
+	DD	0xb69b9b2d,0xb69b9b2d
+	DD	0x221e1e3c,0x221e1e3c
+	DD	0x92878715,0x92878715
+	DD	0x20e9e9c9,0x20e9e9c9
+	DD	0x49cece87,0x49cece87
+	DD	0xff5555aa,0xff5555aa
+	DD	0x78282850,0x78282850
+	DD	0x7adfdfa5,0x7adfdfa5
+	DD	0x8f8c8c03,0x8f8c8c03
+	DD	0xf8a1a159,0xf8a1a159
+	DD	0x80898909,0x80898909
+	DD	0x170d0d1a,0x170d0d1a
+	DD	0xdabfbf65,0xdabfbf65
+	DD	0x31e6e6d7,0x31e6e6d7
+	DD	0xc6424284,0xc6424284
+	DD	0xb86868d0,0xb86868d0
+	DD	0xc3414182,0xc3414182
+	DD	0xb0999929,0xb0999929
+	DD	0x772d2d5a,0x772d2d5a
+	DD	0x110f0f1e,0x110f0f1e
+	DD	0xcbb0b07b,0xcbb0b07b
+	DD	0xfc5454a8,0xfc5454a8
+	DD	0xd6bbbb6d,0xd6bbbb6d
+	DD	0x3a16162c,0x3a16162c
+DB	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+DB	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+DB	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+DB	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+DB	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+DB	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+DB	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+DB	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+DB	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+DB	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+DB	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+DB	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+DB	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+DB	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+DB	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+DB	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+DB	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+DB	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+DB	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+DB	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+DB	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+DB	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+DB	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+DB	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+DB	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+DB	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+DB	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+DB	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+DB	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+DB	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+DB	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+DB	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+DB	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+DB	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+DB	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+DB	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+DB	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+DB	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+DB	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+DB	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+DB	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+DB	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+DB	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+DB	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+DB	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+DB	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+DB	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+DB	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+DB	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+DB	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+DB	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+DB	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+DB	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+DB	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+DB	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+DB	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+DB	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+DB	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+DB	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+DB	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+DB	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+DB	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+DB	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+DB	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+DB	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+DB	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+DB	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+DB	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+DB	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+DB	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+DB	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+DB	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+DB	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+DB	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+DB	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+DB	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+DB	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+DB	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+DB	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+DB	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+DB	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+DB	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+DB	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+DB	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+DB	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+DB	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+DB	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+DB	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+DB	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+DB	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+DB	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+DB	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+DB	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+DB	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+DB	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+DB	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+DB	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+DB	0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+DB	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+DB	0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+DB	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+DB	0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+DB	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+DB	0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+DB	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+DB	0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+DB	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+DB	0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+DB	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+DB	0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+DB	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+DB	0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+DB	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+DB	0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+DB	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+DB	0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+DB	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+DB	0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+DB	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+DB	0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+DB	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+DB	0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+DB	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+DB	0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+DB	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+DB	0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+DB	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+DB	0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+	DD	0x00000001,0x00000002,0x00000004,0x00000008
+	DD	0x00000010,0x00000020,0x00000040,0x00000080
+	DD	0x0000001b,0x00000036,0x80808080,0x80808080
+	DD	0xfefefefe,0xfefefefe,0x1b1b1b1b,0x1b1b1b1b
+ALIGN	64
+$L$AES_Td:
+	DD	0x50a7f451,0x50a7f451
+	DD	0x5365417e,0x5365417e
+	DD	0xc3a4171a,0xc3a4171a
+	DD	0x965e273a,0x965e273a
+	DD	0xcb6bab3b,0xcb6bab3b
+	DD	0xf1459d1f,0xf1459d1f
+	DD	0xab58faac,0xab58faac
+	DD	0x9303e34b,0x9303e34b
+	DD	0x55fa3020,0x55fa3020
+	DD	0xf66d76ad,0xf66d76ad
+	DD	0x9176cc88,0x9176cc88
+	DD	0x254c02f5,0x254c02f5
+	DD	0xfcd7e54f,0xfcd7e54f
+	DD	0xd7cb2ac5,0xd7cb2ac5
+	DD	0x80443526,0x80443526
+	DD	0x8fa362b5,0x8fa362b5
+	DD	0x495ab1de,0x495ab1de
+	DD	0x671bba25,0x671bba25
+	DD	0x980eea45,0x980eea45
+	DD	0xe1c0fe5d,0xe1c0fe5d
+	DD	0x02752fc3,0x02752fc3
+	DD	0x12f04c81,0x12f04c81
+	DD	0xa397468d,0xa397468d
+	DD	0xc6f9d36b,0xc6f9d36b
+	DD	0xe75f8f03,0xe75f8f03
+	DD	0x959c9215,0x959c9215
+	DD	0xeb7a6dbf,0xeb7a6dbf
+	DD	0xda595295,0xda595295
+	DD	0x2d83bed4,0x2d83bed4
+	DD	0xd3217458,0xd3217458
+	DD	0x2969e049,0x2969e049
+	DD	0x44c8c98e,0x44c8c98e
+	DD	0x6a89c275,0x6a89c275
+	DD	0x78798ef4,0x78798ef4
+	DD	0x6b3e5899,0x6b3e5899
+	DD	0xdd71b927,0xdd71b927
+	DD	0xb64fe1be,0xb64fe1be
+	DD	0x17ad88f0,0x17ad88f0
+	DD	0x66ac20c9,0x66ac20c9
+	DD	0xb43ace7d,0xb43ace7d
+	DD	0x184adf63,0x184adf63
+	DD	0x82311ae5,0x82311ae5
+	DD	0x60335197,0x60335197
+	DD	0x457f5362,0x457f5362
+	DD	0xe07764b1,0xe07764b1
+	DD	0x84ae6bbb,0x84ae6bbb
+	DD	0x1ca081fe,0x1ca081fe
+	DD	0x942b08f9,0x942b08f9
+	DD	0x58684870,0x58684870
+	DD	0x19fd458f,0x19fd458f
+	DD	0x876cde94,0x876cde94
+	DD	0xb7f87b52,0xb7f87b52
+	DD	0x23d373ab,0x23d373ab
+	DD	0xe2024b72,0xe2024b72
+	DD	0x578f1fe3,0x578f1fe3
+	DD	0x2aab5566,0x2aab5566
+	DD	0x0728ebb2,0x0728ebb2
+	DD	0x03c2b52f,0x03c2b52f
+	DD	0x9a7bc586,0x9a7bc586
+	DD	0xa50837d3,0xa50837d3
+	DD	0xf2872830,0xf2872830
+	DD	0xb2a5bf23,0xb2a5bf23
+	DD	0xba6a0302,0xba6a0302
+	DD	0x5c8216ed,0x5c8216ed
+	DD	0x2b1ccf8a,0x2b1ccf8a
+	DD	0x92b479a7,0x92b479a7
+	DD	0xf0f207f3,0xf0f207f3
+	DD	0xa1e2694e,0xa1e2694e
+	DD	0xcdf4da65,0xcdf4da65
+	DD	0xd5be0506,0xd5be0506
+	DD	0x1f6234d1,0x1f6234d1
+	DD	0x8afea6c4,0x8afea6c4
+	DD	0x9d532e34,0x9d532e34
+	DD	0xa055f3a2,0xa055f3a2
+	DD	0x32e18a05,0x32e18a05
+	DD	0x75ebf6a4,0x75ebf6a4
+	DD	0x39ec830b,0x39ec830b
+	DD	0xaaef6040,0xaaef6040
+	DD	0x069f715e,0x069f715e
+	DD	0x51106ebd,0x51106ebd
+	DD	0xf98a213e,0xf98a213e
+	DD	0x3d06dd96,0x3d06dd96
+	DD	0xae053edd,0xae053edd
+	DD	0x46bde64d,0x46bde64d
+	DD	0xb58d5491,0xb58d5491
+	DD	0x055dc471,0x055dc471
+	DD	0x6fd40604,0x6fd40604
+	DD	0xff155060,0xff155060
+	DD	0x24fb9819,0x24fb9819
+	DD	0x97e9bdd6,0x97e9bdd6
+	DD	0xcc434089,0xcc434089
+	DD	0x779ed967,0x779ed967
+	DD	0xbd42e8b0,0xbd42e8b0
+	DD	0x888b8907,0x888b8907
+	DD	0x385b19e7,0x385b19e7
+	DD	0xdbeec879,0xdbeec879
+	DD	0x470a7ca1,0x470a7ca1
+	DD	0xe90f427c,0xe90f427c
+	DD	0xc91e84f8,0xc91e84f8
+	DD	0x00000000,0x00000000
+	DD	0x83868009,0x83868009
+	DD	0x48ed2b32,0x48ed2b32
+	DD	0xac70111e,0xac70111e
+	DD	0x4e725a6c,0x4e725a6c
+	DD	0xfbff0efd,0xfbff0efd
+	DD	0x5638850f,0x5638850f
+	DD	0x1ed5ae3d,0x1ed5ae3d
+	DD	0x27392d36,0x27392d36
+	DD	0x64d90f0a,0x64d90f0a
+	DD	0x21a65c68,0x21a65c68
+	DD	0xd1545b9b,0xd1545b9b
+	DD	0x3a2e3624,0x3a2e3624
+	DD	0xb1670a0c,0xb1670a0c
+	DD	0x0fe75793,0x0fe75793
+	DD	0xd296eeb4,0xd296eeb4
+	DD	0x9e919b1b,0x9e919b1b
+	DD	0x4fc5c080,0x4fc5c080
+	DD	0xa220dc61,0xa220dc61
+	DD	0x694b775a,0x694b775a
+	DD	0x161a121c,0x161a121c
+	DD	0x0aba93e2,0x0aba93e2
+	DD	0xe52aa0c0,0xe52aa0c0
+	DD	0x43e0223c,0x43e0223c
+	DD	0x1d171b12,0x1d171b12
+	DD	0x0b0d090e,0x0b0d090e
+	DD	0xadc78bf2,0xadc78bf2
+	DD	0xb9a8b62d,0xb9a8b62d
+	DD	0xc8a91e14,0xc8a91e14
+	DD	0x8519f157,0x8519f157
+	DD	0x4c0775af,0x4c0775af
+	DD	0xbbdd99ee,0xbbdd99ee
+	DD	0xfd607fa3,0xfd607fa3
+	DD	0x9f2601f7,0x9f2601f7
+	DD	0xbcf5725c,0xbcf5725c
+	DD	0xc53b6644,0xc53b6644
+	DD	0x347efb5b,0x347efb5b
+	DD	0x7629438b,0x7629438b
+	DD	0xdcc623cb,0xdcc623cb
+	DD	0x68fcedb6,0x68fcedb6
+	DD	0x63f1e4b8,0x63f1e4b8
+	DD	0xcadc31d7,0xcadc31d7
+	DD	0x10856342,0x10856342
+	DD	0x40229713,0x40229713
+	DD	0x2011c684,0x2011c684
+	DD	0x7d244a85,0x7d244a85
+	DD	0xf83dbbd2,0xf83dbbd2
+	DD	0x1132f9ae,0x1132f9ae
+	DD	0x6da129c7,0x6da129c7
+	DD	0x4b2f9e1d,0x4b2f9e1d
+	DD	0xf330b2dc,0xf330b2dc
+	DD	0xec52860d,0xec52860d
+	DD	0xd0e3c177,0xd0e3c177
+	DD	0x6c16b32b,0x6c16b32b
+	DD	0x99b970a9,0x99b970a9
+	DD	0xfa489411,0xfa489411
+	DD	0x2264e947,0x2264e947
+	DD	0xc48cfca8,0xc48cfca8
+	DD	0x1a3ff0a0,0x1a3ff0a0
+	DD	0xd82c7d56,0xd82c7d56
+	DD	0xef903322,0xef903322
+	DD	0xc74e4987,0xc74e4987
+	DD	0xc1d138d9,0xc1d138d9
+	DD	0xfea2ca8c,0xfea2ca8c
+	DD	0x360bd498,0x360bd498
+	DD	0xcf81f5a6,0xcf81f5a6
+	DD	0x28de7aa5,0x28de7aa5
+	DD	0x268eb7da,0x268eb7da
+	DD	0xa4bfad3f,0xa4bfad3f
+	DD	0xe49d3a2c,0xe49d3a2c
+	DD	0x0d927850,0x0d927850
+	DD	0x9bcc5f6a,0x9bcc5f6a
+	DD	0x62467e54,0x62467e54
+	DD	0xc2138df6,0xc2138df6
+	DD	0xe8b8d890,0xe8b8d890
+	DD	0x5ef7392e,0x5ef7392e
+	DD	0xf5afc382,0xf5afc382
+	DD	0xbe805d9f,0xbe805d9f
+	DD	0x7c93d069,0x7c93d069
+	DD	0xa92dd56f,0xa92dd56f
+	DD	0xb31225cf,0xb31225cf
+	DD	0x3b99acc8,0x3b99acc8
+	DD	0xa77d1810,0xa77d1810
+	DD	0x6e639ce8,0x6e639ce8
+	DD	0x7bbb3bdb,0x7bbb3bdb
+	DD	0x097826cd,0x097826cd
+	DD	0xf418596e,0xf418596e
+	DD	0x01b79aec,0x01b79aec
+	DD	0xa89a4f83,0xa89a4f83
+	DD	0x656e95e6,0x656e95e6
+	DD	0x7ee6ffaa,0x7ee6ffaa
+	DD	0x08cfbc21,0x08cfbc21
+	DD	0xe6e815ef,0xe6e815ef
+	DD	0xd99be7ba,0xd99be7ba
+	DD	0xce366f4a,0xce366f4a
+	DD	0xd4099fea,0xd4099fea
+	DD	0xd67cb029,0xd67cb029
+	DD	0xafb2a431,0xafb2a431
+	DD	0x31233f2a,0x31233f2a
+	DD	0x3094a5c6,0x3094a5c6
+	DD	0xc066a235,0xc066a235
+	DD	0x37bc4e74,0x37bc4e74
+	DD	0xa6ca82fc,0xa6ca82fc
+	DD	0xb0d090e0,0xb0d090e0
+	DD	0x15d8a733,0x15d8a733
+	DD	0x4a9804f1,0x4a9804f1
+	DD	0xf7daec41,0xf7daec41
+	DD	0x0e50cd7f,0x0e50cd7f
+	DD	0x2ff69117,0x2ff69117
+	DD	0x8dd64d76,0x8dd64d76
+	DD	0x4db0ef43,0x4db0ef43
+	DD	0x544daacc,0x544daacc
+	DD	0xdf0496e4,0xdf0496e4
+	DD	0xe3b5d19e,0xe3b5d19e
+	DD	0x1b886a4c,0x1b886a4c
+	DD	0xb81f2cc1,0xb81f2cc1
+	DD	0x7f516546,0x7f516546
+	DD	0x04ea5e9d,0x04ea5e9d
+	DD	0x5d358c01,0x5d358c01
+	DD	0x737487fa,0x737487fa
+	DD	0x2e410bfb,0x2e410bfb
+	DD	0x5a1d67b3,0x5a1d67b3
+	DD	0x52d2db92,0x52d2db92
+	DD	0x335610e9,0x335610e9
+	DD	0x1347d66d,0x1347d66d
+	DD	0x8c61d79a,0x8c61d79a
+	DD	0x7a0ca137,0x7a0ca137
+	DD	0x8e14f859,0x8e14f859
+	DD	0x893c13eb,0x893c13eb
+	DD	0xee27a9ce,0xee27a9ce
+	DD	0x35c961b7,0x35c961b7
+	DD	0xede51ce1,0xede51ce1
+	DD	0x3cb1477a,0x3cb1477a
+	DD	0x59dfd29c,0x59dfd29c
+	DD	0x3f73f255,0x3f73f255
+	DD	0x79ce1418,0x79ce1418
+	DD	0xbf37c773,0xbf37c773
+	DD	0xeacdf753,0xeacdf753
+	DD	0x5baafd5f,0x5baafd5f
+	DD	0x146f3ddf,0x146f3ddf
+	DD	0x86db4478,0x86db4478
+	DD	0x81f3afca,0x81f3afca
+	DD	0x3ec468b9,0x3ec468b9
+	DD	0x2c342438,0x2c342438
+	DD	0x5f40a3c2,0x5f40a3c2
+	DD	0x72c31d16,0x72c31d16
+	DD	0x0c25e2bc,0x0c25e2bc
+	DD	0x8b493c28,0x8b493c28
+	DD	0x41950dff,0x41950dff
+	DD	0x7101a839,0x7101a839
+	DD	0xdeb30c08,0xdeb30c08
+	DD	0x9ce4b4d8,0x9ce4b4d8
+	DD	0x90c15664,0x90c15664
+	DD	0x6184cb7b,0x6184cb7b
+	DD	0x70b632d5,0x70b632d5
+	DD	0x745c6c48,0x745c6c48
+	DD	0x4257b8d0,0x4257b8d0
+DB	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+DB	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+DB	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+DB	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+DB	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+DB	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+DB	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+DB	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+DB	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+DB	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+DB	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+DB	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+DB	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+DB	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+DB	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+DB	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+DB	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+DB	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+DB	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+DB	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+DB	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+DB	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+DB	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+DB	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+DB	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+DB	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+DB	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+DB	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+DB	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+DB	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+DB	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+DB	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+	DD	0x80808080,0x80808080,0xfefefefe,0xfefefefe
+	DD	0x1b1b1b1b,0x1b1b1b1b,0,0
+DB	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+DB	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+DB	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+DB	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+DB	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+DB	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+DB	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+DB	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+DB	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+DB	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+DB	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+DB	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+DB	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+DB	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+DB	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+DB	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+DB	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+DB	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+DB	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+DB	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+DB	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+DB	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+DB	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+DB	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+DB	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+DB	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+DB	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+DB	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+DB	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+DB	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+DB	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+DB	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+	DD	0x80808080,0x80808080,0xfefefefe,0xfefefefe
+	DD	0x1b1b1b1b,0x1b1b1b1b,0,0
+DB	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+DB	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+DB	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+DB	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+DB	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+DB	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+DB	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+DB	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+DB	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+DB	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+DB	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+DB	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+DB	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+DB	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+DB	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+DB	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+DB	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+DB	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+DB	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+DB	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+DB	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+DB	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+DB	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+DB	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+DB	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+DB	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+DB	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+DB	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+DB	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+DB	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+DB	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+DB	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+	DD	0x80808080,0x80808080,0xfefefefe,0xfefefefe
+	DD	0x1b1b1b1b,0x1b1b1b1b,0,0
+DB	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+DB	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+DB	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+DB	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+DB	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+DB	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+DB	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+DB	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+DB	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+DB	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+DB	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+DB	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+DB	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+DB	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+DB	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+DB	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+DB	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+DB	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+DB	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+DB	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+DB	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+DB	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+DB	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+DB	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+DB	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+DB	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+DB	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+DB	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+DB	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+DB	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+DB	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+DB	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+	DD	0x80808080,0x80808080,0xfefefefe,0xfefefefe
+	DD	0x1b1b1b1b,0x1b1b1b1b,0,0
+DB	65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32
+DB	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+DB	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+DB	62,0
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+block_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$in_block_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_block_prologue
+
+	mov	rax,QWORD[24+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$in_block_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	jmp	NEAR $L$common_seh_exit
+
+
+
+ALIGN	16
+key_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$in_key_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_key_prologue
+
+	lea	rax,[56+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$in_key_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	jmp	NEAR $L$common_seh_exit
+
+
+
+ALIGN	16
+cbc_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	lea	r10,[$L$cbc_prologue]
+	cmp	rbx,r10
+	jb	NEAR $L$in_cbc_prologue
+
+	lea	r10,[$L$cbc_fast_body]
+	cmp	rbx,r10
+	jb	NEAR $L$in_cbc_frame_setup
+
+	lea	r10,[$L$cbc_slow_prologue]
+	cmp	rbx,r10
+	jb	NEAR $L$in_cbc_body
+
+	lea	r10,[$L$cbc_slow_body]
+	cmp	rbx,r10
+	jb	NEAR $L$in_cbc_frame_setup
+
+$L$in_cbc_body:
+	mov	rax,QWORD[152+r8]
+
+	lea	r10,[$L$cbc_epilogue]
+	cmp	rbx,r10
+	jae	NEAR $L$in_cbc_prologue
+
+	lea	rax,[8+rax]
+
+	lea	r10,[$L$cbc_popfq]
+	cmp	rbx,r10
+	jae	NEAR $L$in_cbc_prologue
+
+	mov	rax,QWORD[8+rax]
+	lea	rax,[56+rax]
+
+$L$in_cbc_frame_setup:
+	mov	rbx,QWORD[((-16))+rax]
+	mov	rbp,QWORD[((-24))+rax]
+	mov	r12,QWORD[((-32))+rax]
+	mov	r13,QWORD[((-40))+rax]
+	mov	r14,QWORD[((-48))+rax]
+	mov	r15,QWORD[((-56))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$in_cbc_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+$L$common_seh_exit:
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_asm_AES_encrypt wrt ..imagebase
+	DD	$L$SEH_end_asm_AES_encrypt wrt ..imagebase
+	DD	$L$SEH_info_asm_AES_encrypt wrt ..imagebase
+
+	DD	$L$SEH_begin_asm_AES_decrypt wrt ..imagebase
+	DD	$L$SEH_end_asm_AES_decrypt wrt ..imagebase
+	DD	$L$SEH_info_asm_AES_decrypt wrt ..imagebase
+
+	DD	$L$SEH_begin_asm_AES_set_encrypt_key wrt ..imagebase
+	DD	$L$SEH_end_asm_AES_set_encrypt_key wrt ..imagebase
+	DD	$L$SEH_info_asm_AES_set_encrypt_key wrt ..imagebase
+
+	DD	$L$SEH_begin_asm_AES_set_decrypt_key wrt ..imagebase
+	DD	$L$SEH_end_asm_AES_set_decrypt_key wrt ..imagebase
+	DD	$L$SEH_info_asm_AES_set_decrypt_key wrt ..imagebase
+
+	DD	$L$SEH_begin_asm_AES_cbc_encrypt wrt ..imagebase
+	DD	$L$SEH_end_asm_AES_cbc_encrypt wrt ..imagebase
+	DD	$L$SEH_info_asm_AES_cbc_encrypt wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_asm_AES_encrypt:
+DB	9,0,0,0
+	DD	block_se_handler wrt ..imagebase
+	DD	$L$enc_prologue wrt ..imagebase,$L$enc_epilogue wrt ..imagebase
+$L$SEH_info_asm_AES_decrypt:
+DB	9,0,0,0
+	DD	block_se_handler wrt ..imagebase
+	DD	$L$dec_prologue wrt ..imagebase,$L$dec_epilogue wrt ..imagebase
+$L$SEH_info_asm_AES_set_encrypt_key:
+DB	9,0,0,0
+	DD	key_se_handler wrt ..imagebase
+	DD	$L$enc_key_prologue wrt ..imagebase,$L$enc_key_epilogue wrt ..imagebase
+$L$SEH_info_asm_AES_set_decrypt_key:
+DB	9,0,0,0
+	DD	key_se_handler wrt ..imagebase
+	DD	$L$dec_key_prologue wrt ..imagebase,$L$dec_key_epilogue wrt ..imagebase
+$L$SEH_info_asm_AES_cbc_encrypt:
+DB	9,0,0,0
+	DD	cbc_se_handler wrt ..imagebase
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
new file mode 100644
index 0000000..63bcd48
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
@@ -0,0 +1,1022 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+
+ALIGN	32
+_aesni_ctr32_ghash_6x:
+
+	vmovdqu	xmm2,XMMWORD[32+r11]
+	sub	rdx,6
+	vpxor	xmm4,xmm4,xmm4
+	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
+	vpaddb	xmm10,xmm1,xmm2
+	vpaddb	xmm11,xmm10,xmm2
+	vpaddb	xmm12,xmm11,xmm2
+	vpaddb	xmm13,xmm12,xmm2
+	vpaddb	xmm14,xmm13,xmm2
+	vpxor	xmm9,xmm1,xmm15
+	vmovdqu	XMMWORD[(16+8)+rsp],xmm4
+	jmp	NEAR $L$oop6x
+
+ALIGN	32
+$L$oop6x:
+	add	ebx,100663296
+	jc	NEAR $L$handle_ctr32
+	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
+	vpaddb	xmm1,xmm14,xmm2
+	vpxor	xmm10,xmm10,xmm15
+	vpxor	xmm11,xmm11,xmm15
+
+$L$resume_ctr32:
+	vmovdqu	XMMWORD[r8],xmm1
+	vpclmulqdq	xmm5,xmm7,xmm3,0x10
+	vpxor	xmm12,xmm12,xmm15
+	vmovups	xmm2,XMMWORD[((16-128))+rcx]
+	vpclmulqdq	xmm6,xmm7,xmm3,0x01
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	xor	r12,r12
+	cmp	r15,r14
+
+	vaesenc	xmm9,xmm9,xmm2
+	vmovdqu	xmm0,XMMWORD[((48+8))+rsp]
+	vpxor	xmm13,xmm13,xmm15
+	vpclmulqdq	xmm1,xmm7,xmm3,0x00
+	vaesenc	xmm10,xmm10,xmm2
+	vpxor	xmm14,xmm14,xmm15
+	setnc	r12b
+	vpclmulqdq	xmm7,xmm7,xmm3,0x11
+	vaesenc	xmm11,xmm11,xmm2
+	vmovdqu	xmm3,XMMWORD[((16-32))+r9]
+	neg	r12
+	vaesenc	xmm12,xmm12,xmm2
+	vpxor	xmm6,xmm6,xmm5
+	vpclmulqdq	xmm5,xmm0,xmm3,0x00
+	vpxor	xmm8,xmm8,xmm4
+	vaesenc	xmm13,xmm13,xmm2
+	vpxor	xmm4,xmm1,xmm5
+	and	r12,0x60
+	vmovups	xmm15,XMMWORD[((32-128))+rcx]
+	vpclmulqdq	xmm1,xmm0,xmm3,0x10
+	vaesenc	xmm14,xmm14,xmm2
+
+	vpclmulqdq	xmm2,xmm0,xmm3,0x01
+	lea	r14,[r12*1+r14]
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
+	vpclmulqdq	xmm3,xmm0,xmm3,0x11
+	vmovdqu	xmm0,XMMWORD[((64+8))+rsp]
+	vaesenc	xmm10,xmm10,xmm15
+	movbe	r13,QWORD[88+r14]
+	vaesenc	xmm11,xmm11,xmm15
+	movbe	r12,QWORD[80+r14]
+	vaesenc	xmm12,xmm12,xmm15
+	mov	QWORD[((32+8))+rsp],r13
+	vaesenc	xmm13,xmm13,xmm15
+	mov	QWORD[((40+8))+rsp],r12
+	vmovdqu	xmm5,XMMWORD[((48-32))+r9]
+	vaesenc	xmm14,xmm14,xmm15
+
+	vmovups	xmm15,XMMWORD[((48-128))+rcx]
+	vpxor	xmm6,xmm6,xmm1
+	vpclmulqdq	xmm1,xmm0,xmm5,0x00
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm6,xmm6,xmm2
+	vpclmulqdq	xmm2,xmm0,xmm5,0x10
+	vaesenc	xmm10,xmm10,xmm15
+	vpxor	xmm7,xmm7,xmm3
+	vpclmulqdq	xmm3,xmm0,xmm5,0x01
+	vaesenc	xmm11,xmm11,xmm15
+	vpclmulqdq	xmm5,xmm0,xmm5,0x11
+	vmovdqu	xmm0,XMMWORD[((80+8))+rsp]
+	vaesenc	xmm12,xmm12,xmm15
+	vaesenc	xmm13,xmm13,xmm15
+	vpxor	xmm4,xmm4,xmm1
+	vmovdqu	xmm1,XMMWORD[((64-32))+r9]
+	vaesenc	xmm14,xmm14,xmm15
+
+	vmovups	xmm15,XMMWORD[((64-128))+rcx]
+	vpxor	xmm6,xmm6,xmm2
+	vpclmulqdq	xmm2,xmm0,xmm1,0x00
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm6,xmm6,xmm3
+	vpclmulqdq	xmm3,xmm0,xmm1,0x10
+	vaesenc	xmm10,xmm10,xmm15
+	movbe	r13,QWORD[72+r14]
+	vpxor	xmm7,xmm7,xmm5
+	vpclmulqdq	xmm5,xmm0,xmm1,0x01
+	vaesenc	xmm11,xmm11,xmm15
+	movbe	r12,QWORD[64+r14]
+	vpclmulqdq	xmm1,xmm0,xmm1,0x11
+	vmovdqu	xmm0,XMMWORD[((96+8))+rsp]
+	vaesenc	xmm12,xmm12,xmm15
+	mov	QWORD[((48+8))+rsp],r13
+	vaesenc	xmm13,xmm13,xmm15
+	mov	QWORD[((56+8))+rsp],r12
+	vpxor	xmm4,xmm4,xmm2
+	vmovdqu	xmm2,XMMWORD[((96-32))+r9]
+	vaesenc	xmm14,xmm14,xmm15
+
+	vmovups	xmm15,XMMWORD[((80-128))+rcx]
+	vpxor	xmm6,xmm6,xmm3
+	vpclmulqdq	xmm3,xmm0,xmm2,0x00
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm6,xmm6,xmm5
+	vpclmulqdq	xmm5,xmm0,xmm2,0x10
+	vaesenc	xmm10,xmm10,xmm15
+	movbe	r13,QWORD[56+r14]
+	vpxor	xmm7,xmm7,xmm1
+	vpclmulqdq	xmm1,xmm0,xmm2,0x01
+	vpxor	xmm8,xmm8,XMMWORD[((112+8))+rsp]
+	vaesenc	xmm11,xmm11,xmm15
+	movbe	r12,QWORD[48+r14]
+	vpclmulqdq	xmm2,xmm0,xmm2,0x11
+	vaesenc	xmm12,xmm12,xmm15
+	mov	QWORD[((64+8))+rsp],r13
+	vaesenc	xmm13,xmm13,xmm15
+	mov	QWORD[((72+8))+rsp],r12
+	vpxor	xmm4,xmm4,xmm3
+	vmovdqu	xmm3,XMMWORD[((112-32))+r9]
+	vaesenc	xmm14,xmm14,xmm15
+
+	vmovups	xmm15,XMMWORD[((96-128))+rcx]
+	vpxor	xmm6,xmm6,xmm5
+	vpclmulqdq	xmm5,xmm8,xmm3,0x10
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm6,xmm6,xmm1
+	vpclmulqdq	xmm1,xmm8,xmm3,0x01
+	vaesenc	xmm10,xmm10,xmm15
+	movbe	r13,QWORD[40+r14]
+	vpxor	xmm7,xmm7,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm3,0x00
+	vaesenc	xmm11,xmm11,xmm15
+	movbe	r12,QWORD[32+r14]
+	vpclmulqdq	xmm8,xmm8,xmm3,0x11
+	vaesenc	xmm12,xmm12,xmm15
+	mov	QWORD[((80+8))+rsp],r13
+	vaesenc	xmm13,xmm13,xmm15
+	mov	QWORD[((88+8))+rsp],r12
+	vpxor	xmm6,xmm6,xmm5
+	vaesenc	xmm14,xmm14,xmm15
+	vpxor	xmm6,xmm6,xmm1
+
+	vmovups	xmm15,XMMWORD[((112-128))+rcx]
+	vpslldq	xmm5,xmm6,8
+	vpxor	xmm4,xmm4,xmm2
+	vmovdqu	xmm3,XMMWORD[16+r11]
+
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm7,xmm7,xmm8
+	vaesenc	xmm10,xmm10,xmm15
+	vpxor	xmm4,xmm4,xmm5
+	movbe	r13,QWORD[24+r14]
+	vaesenc	xmm11,xmm11,xmm15
+	movbe	r12,QWORD[16+r14]
+	vpalignr	xmm0,xmm4,xmm4,8
+	vpclmulqdq	xmm4,xmm4,xmm3,0x10
+	mov	QWORD[((96+8))+rsp],r13
+	vaesenc	xmm12,xmm12,xmm15
+	mov	QWORD[((104+8))+rsp],r12
+	vaesenc	xmm13,xmm13,xmm15
+	vmovups	xmm1,XMMWORD[((128-128))+rcx]
+	vaesenc	xmm14,xmm14,xmm15
+
+	vaesenc	xmm9,xmm9,xmm1
+	vmovups	xmm15,XMMWORD[((144-128))+rcx]
+	vaesenc	xmm10,xmm10,xmm1
+	vpsrldq	xmm6,xmm6,8
+	vaesenc	xmm11,xmm11,xmm1
+	vpxor	xmm7,xmm7,xmm6
+	vaesenc	xmm12,xmm12,xmm1
+	vpxor	xmm4,xmm4,xmm0
+	movbe	r13,QWORD[8+r14]
+	vaesenc	xmm13,xmm13,xmm1
+	movbe	r12,QWORD[r14]
+	vaesenc	xmm14,xmm14,xmm1
+	vmovups	xmm1,XMMWORD[((160-128))+rcx]
+	cmp	ebp,11
+	jb	NEAR $L$enc_tail
+
+	vaesenc	xmm9,xmm9,xmm15
+	vaesenc	xmm10,xmm10,xmm15
+	vaesenc	xmm11,xmm11,xmm15
+	vaesenc	xmm12,xmm12,xmm15
+	vaesenc	xmm13,xmm13,xmm15
+	vaesenc	xmm14,xmm14,xmm15
+
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+	vmovups	xmm15,XMMWORD[((176-128))+rcx]
+	vaesenc	xmm14,xmm14,xmm1
+	vmovups	xmm1,XMMWORD[((192-128))+rcx]
+	je	NEAR $L$enc_tail
+
+	vaesenc	xmm9,xmm9,xmm15
+	vaesenc	xmm10,xmm10,xmm15
+	vaesenc	xmm11,xmm11,xmm15
+	vaesenc	xmm12,xmm12,xmm15
+	vaesenc	xmm13,xmm13,xmm15
+	vaesenc	xmm14,xmm14,xmm15
+
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+	vmovups	xmm15,XMMWORD[((208-128))+rcx]
+	vaesenc	xmm14,xmm14,xmm1
+	vmovups	xmm1,XMMWORD[((224-128))+rcx]
+	jmp	NEAR $L$enc_tail
+
+ALIGN	32
+$L$handle_ctr32:
+	vmovdqu	xmm0,XMMWORD[r11]
+	vpshufb	xmm6,xmm1,xmm0
+	vmovdqu	xmm5,XMMWORD[48+r11]
+	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
+	vpaddd	xmm11,xmm6,xmm5
+	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
+	vpaddd	xmm12,xmm10,xmm5
+	vpshufb	xmm10,xmm10,xmm0
+	vpaddd	xmm13,xmm11,xmm5
+	vpshufb	xmm11,xmm11,xmm0
+	vpxor	xmm10,xmm10,xmm15
+	vpaddd	xmm14,xmm12,xmm5
+	vpshufb	xmm12,xmm12,xmm0
+	vpxor	xmm11,xmm11,xmm15
+	vpaddd	xmm1,xmm13,xmm5
+	vpshufb	xmm13,xmm13,xmm0
+	vpshufb	xmm14,xmm14,xmm0
+	vpshufb	xmm1,xmm1,xmm0
+	jmp	NEAR $L$resume_ctr32
+
+ALIGN	32
+$L$enc_tail:
+	vaesenc	xmm9,xmm9,xmm15
+	vmovdqu	XMMWORD[(16+8)+rsp],xmm7
+	vpalignr	xmm8,xmm4,xmm4,8
+	vaesenc	xmm10,xmm10,xmm15
+	vpclmulqdq	xmm4,xmm4,xmm3,0x10
+	vpxor	xmm2,xmm1,XMMWORD[rdi]
+	vaesenc	xmm11,xmm11,xmm15
+	vpxor	xmm0,xmm1,XMMWORD[16+rdi]
+	vaesenc	xmm12,xmm12,xmm15
+	vpxor	xmm5,xmm1,XMMWORD[32+rdi]
+	vaesenc	xmm13,xmm13,xmm15
+	vpxor	xmm6,xmm1,XMMWORD[48+rdi]
+	vaesenc	xmm14,xmm14,xmm15
+	vpxor	xmm7,xmm1,XMMWORD[64+rdi]
+	vpxor	xmm3,xmm1,XMMWORD[80+rdi]
+	vmovdqu	xmm1,XMMWORD[r8]
+
+	vaesenclast	xmm9,xmm9,xmm2
+	vmovdqu	xmm2,XMMWORD[32+r11]
+	vaesenclast	xmm10,xmm10,xmm0
+	vpaddb	xmm0,xmm1,xmm2
+	mov	QWORD[((112+8))+rsp],r13
+	lea	rdi,[96+rdi]
+	vaesenclast	xmm11,xmm11,xmm5
+	vpaddb	xmm5,xmm0,xmm2
+	mov	QWORD[((120+8))+rsp],r12
+	lea	rsi,[96+rsi]
+	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
+	vaesenclast	xmm12,xmm12,xmm6
+	vpaddb	xmm6,xmm5,xmm2
+	vaesenclast	xmm13,xmm13,xmm7
+	vpaddb	xmm7,xmm6,xmm2
+	vaesenclast	xmm14,xmm14,xmm3
+	vpaddb	xmm3,xmm7,xmm2
+
+	add	r10,0x60
+	sub	rdx,0x6
+	jc	NEAR $L$6x_done
+
+	vmovups	XMMWORD[(-96)+rsi],xmm9
+	vpxor	xmm9,xmm1,xmm15
+	vmovups	XMMWORD[(-80)+rsi],xmm10
+	vmovdqa	xmm10,xmm0
+	vmovups	XMMWORD[(-64)+rsi],xmm11
+	vmovdqa	xmm11,xmm5
+	vmovups	XMMWORD[(-48)+rsi],xmm12
+	vmovdqa	xmm12,xmm6
+	vmovups	XMMWORD[(-32)+rsi],xmm13
+	vmovdqa	xmm13,xmm7
+	vmovups	XMMWORD[(-16)+rsi],xmm14
+	vmovdqa	xmm14,xmm3
+	vmovdqu	xmm7,XMMWORD[((32+8))+rsp]
+	jmp	NEAR $L$oop6x
+
+$L$6x_done:
+	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
+	vpxor	xmm8,xmm8,xmm4
+
+	DB	0F3h,0C3h		;repret
+
+
+global	aesni_gcm_decrypt
+
+ALIGN	32
+aesni_gcm_decrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_gcm_decrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	xor	r10,r10
+
+
+
+	cmp	rdx,0x60
+	jb	NEAR $L$gcm_dec_abort
+
+	lea	rax,[rsp]
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	lea	rsp,[((-168))+rsp]
+	movaps	XMMWORD[(-216)+rax],xmm6
+	movaps	XMMWORD[(-200)+rax],xmm7
+	movaps	XMMWORD[(-184)+rax],xmm8
+	movaps	XMMWORD[(-168)+rax],xmm9
+	movaps	XMMWORD[(-152)+rax],xmm10
+	movaps	XMMWORD[(-136)+rax],xmm11
+	movaps	XMMWORD[(-120)+rax],xmm12
+	movaps	XMMWORD[(-104)+rax],xmm13
+	movaps	XMMWORD[(-88)+rax],xmm14
+	movaps	XMMWORD[(-72)+rax],xmm15
+$L$gcm_dec_body:
+	vzeroupper
+
+	vmovdqu	xmm1,XMMWORD[r8]
+	add	rsp,-128
+	mov	ebx,DWORD[12+r8]
+	lea	r11,[$L$bswap_mask]
+	lea	r14,[((-128))+rcx]
+	mov	r15,0xf80
+	vmovdqu	xmm8,XMMWORD[r9]
+	and	rsp,-128
+	vmovdqu	xmm0,XMMWORD[r11]
+	lea	rcx,[128+rcx]
+	lea	r9,[((32+32))+r9]
+	mov	ebp,DWORD[((240-128))+rcx]
+	vpshufb	xmm8,xmm8,xmm0
+
+	and	r14,r15
+	and	r15,rsp
+	sub	r15,r14
+	jc	NEAR $L$dec_no_key_aliasing
+	cmp	r15,768
+	jnc	NEAR $L$dec_no_key_aliasing
+	sub	rsp,r15
+$L$dec_no_key_aliasing:
+
+	vmovdqu	xmm7,XMMWORD[80+rdi]
+	lea	r14,[rdi]
+	vmovdqu	xmm4,XMMWORD[64+rdi]
+
+
+
+
+
+
+
+	lea	r15,[((-192))+rdx*1+rdi]
+
+	vmovdqu	xmm5,XMMWORD[48+rdi]
+	shr	rdx,4
+	xor	r10,r10
+	vmovdqu	xmm6,XMMWORD[32+rdi]
+	vpshufb	xmm7,xmm7,xmm0
+	vmovdqu	xmm2,XMMWORD[16+rdi]
+	vpshufb	xmm4,xmm4,xmm0
+	vmovdqu	xmm3,XMMWORD[rdi]
+	vpshufb	xmm5,xmm5,xmm0
+	vmovdqu	XMMWORD[48+rsp],xmm4
+	vpshufb	xmm6,xmm6,xmm0
+	vmovdqu	XMMWORD[64+rsp],xmm5
+	vpshufb	xmm2,xmm2,xmm0
+	vmovdqu	XMMWORD[80+rsp],xmm6
+	vpshufb	xmm3,xmm3,xmm0
+	vmovdqu	XMMWORD[96+rsp],xmm2
+	vmovdqu	XMMWORD[112+rsp],xmm3
+
+	call	_aesni_ctr32_ghash_6x
+
+	vmovups	XMMWORD[(-96)+rsi],xmm9
+	vmovups	XMMWORD[(-80)+rsi],xmm10
+	vmovups	XMMWORD[(-64)+rsi],xmm11
+	vmovups	XMMWORD[(-48)+rsi],xmm12
+	vmovups	XMMWORD[(-32)+rsi],xmm13
+	vmovups	XMMWORD[(-16)+rsi],xmm14
+
+	vpshufb	xmm8,xmm8,XMMWORD[r11]
+	vmovdqu	XMMWORD[(-64)+r9],xmm8
+
+	vzeroupper
+	movaps	xmm6,XMMWORD[((-216))+rax]
+	movaps	xmm7,XMMWORD[((-200))+rax]
+	movaps	xmm8,XMMWORD[((-184))+rax]
+	movaps	xmm9,XMMWORD[((-168))+rax]
+	movaps	xmm10,XMMWORD[((-152))+rax]
+	movaps	xmm11,XMMWORD[((-136))+rax]
+	movaps	xmm12,XMMWORD[((-120))+rax]
+	movaps	xmm13,XMMWORD[((-104))+rax]
+	movaps	xmm14,XMMWORD[((-88))+rax]
+	movaps	xmm15,XMMWORD[((-72))+rax]
+	mov	r15,QWORD[((-48))+rax]
+
+	mov	r14,QWORD[((-40))+rax]
+
+	mov	r13,QWORD[((-32))+rax]
+
+	mov	r12,QWORD[((-24))+rax]
+
+	mov	rbp,QWORD[((-16))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+
+	lea	rsp,[rax]
+
+$L$gcm_dec_abort:
+	mov	rax,r10
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesni_gcm_decrypt:
+
+ALIGN	32
+_aesni_ctr32_6x:
+
+	vmovdqu	xmm4,XMMWORD[((0-128))+rcx]
+	vmovdqu	xmm2,XMMWORD[32+r11]
+	lea	r13,[((-1))+rbp]
+	vmovups	xmm15,XMMWORD[((16-128))+rcx]
+	lea	r12,[((32-128))+rcx]
+	vpxor	xmm9,xmm1,xmm4
+	add	ebx,100663296
+	jc	NEAR $L$handle_ctr32_2
+	vpaddb	xmm10,xmm1,xmm2
+	vpaddb	xmm11,xmm10,xmm2
+	vpxor	xmm10,xmm10,xmm4
+	vpaddb	xmm12,xmm11,xmm2
+	vpxor	xmm11,xmm11,xmm4
+	vpaddb	xmm13,xmm12,xmm2
+	vpxor	xmm12,xmm12,xmm4
+	vpaddb	xmm14,xmm13,xmm2
+	vpxor	xmm13,xmm13,xmm4
+	vpaddb	xmm1,xmm14,xmm2
+	vpxor	xmm14,xmm14,xmm4
+	jmp	NEAR $L$oop_ctr32
+
+ALIGN	16
+$L$oop_ctr32:
+	vaesenc	xmm9,xmm9,xmm15
+	vaesenc	xmm10,xmm10,xmm15
+	vaesenc	xmm11,xmm11,xmm15
+	vaesenc	xmm12,xmm12,xmm15
+	vaesenc	xmm13,xmm13,xmm15
+	vaesenc	xmm14,xmm14,xmm15
+	vmovups	xmm15,XMMWORD[r12]
+	lea	r12,[16+r12]
+	dec	r13d
+	jnz	NEAR $L$oop_ctr32
+
+	vmovdqu	xmm3,XMMWORD[r12]
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm4,xmm3,XMMWORD[rdi]
+	vaesenc	xmm10,xmm10,xmm15
+	vpxor	xmm5,xmm3,XMMWORD[16+rdi]
+	vaesenc	xmm11,xmm11,xmm15
+	vpxor	xmm6,xmm3,XMMWORD[32+rdi]
+	vaesenc	xmm12,xmm12,xmm15
+	vpxor	xmm8,xmm3,XMMWORD[48+rdi]
+	vaesenc	xmm13,xmm13,xmm15
+	vpxor	xmm2,xmm3,XMMWORD[64+rdi]
+	vaesenc	xmm14,xmm14,xmm15
+	vpxor	xmm3,xmm3,XMMWORD[80+rdi]
+	lea	rdi,[96+rdi]
+
+	vaesenclast	xmm9,xmm9,xmm4
+	vaesenclast	xmm10,xmm10,xmm5
+	vaesenclast	xmm11,xmm11,xmm6
+	vaesenclast	xmm12,xmm12,xmm8
+	vaesenclast	xmm13,xmm13,xmm2
+	vaesenclast	xmm14,xmm14,xmm3
+	vmovups	XMMWORD[rsi],xmm9
+	vmovups	XMMWORD[16+rsi],xmm10
+	vmovups	XMMWORD[32+rsi],xmm11
+	vmovups	XMMWORD[48+rsi],xmm12
+	vmovups	XMMWORD[64+rsi],xmm13
+	vmovups	XMMWORD[80+rsi],xmm14
+	lea	rsi,[96+rsi]
+
+	DB	0F3h,0C3h		;repret
+ALIGN	32
+$L$handle_ctr32_2:
+	vpshufb	xmm6,xmm1,xmm0
+	vmovdqu	xmm5,XMMWORD[48+r11]
+	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
+	vpaddd	xmm11,xmm6,xmm5
+	vpaddd	xmm12,xmm10,xmm5
+	vpshufb	xmm10,xmm10,xmm0
+	vpaddd	xmm13,xmm11,xmm5
+	vpshufb	xmm11,xmm11,xmm0
+	vpxor	xmm10,xmm10,xmm4
+	vpaddd	xmm14,xmm12,xmm5
+	vpshufb	xmm12,xmm12,xmm0
+	vpxor	xmm11,xmm11,xmm4
+	vpaddd	xmm1,xmm13,xmm5
+	vpshufb	xmm13,xmm13,xmm0
+	vpxor	xmm12,xmm12,xmm4
+	vpshufb	xmm14,xmm14,xmm0
+	vpxor	xmm13,xmm13,xmm4
+	vpshufb	xmm1,xmm1,xmm0
+	vpxor	xmm14,xmm14,xmm4
+	jmp	NEAR $L$oop_ctr32
+
+
+
+global	aesni_gcm_encrypt
+
+ALIGN	32
+aesni_gcm_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_gcm_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	xor	r10,r10
+
+
+
+
+	cmp	rdx,0x60*3
+	jb	NEAR $L$gcm_enc_abort
+
+	lea	rax,[rsp]
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	lea	rsp,[((-168))+rsp]
+	movaps	XMMWORD[(-216)+rax],xmm6
+	movaps	XMMWORD[(-200)+rax],xmm7
+	movaps	XMMWORD[(-184)+rax],xmm8
+	movaps	XMMWORD[(-168)+rax],xmm9
+	movaps	XMMWORD[(-152)+rax],xmm10
+	movaps	XMMWORD[(-136)+rax],xmm11
+	movaps	XMMWORD[(-120)+rax],xmm12
+	movaps	XMMWORD[(-104)+rax],xmm13
+	movaps	XMMWORD[(-88)+rax],xmm14
+	movaps	XMMWORD[(-72)+rax],xmm15
+$L$gcm_enc_body:
+	vzeroupper
+
+	vmovdqu	xmm1,XMMWORD[r8]
+	add	rsp,-128
+	mov	ebx,DWORD[12+r8]
+	lea	r11,[$L$bswap_mask]
+	lea	r14,[((-128))+rcx]
+	mov	r15,0xf80
+	lea	rcx,[128+rcx]
+	vmovdqu	xmm0,XMMWORD[r11]
+	and	rsp,-128
+	mov	ebp,DWORD[((240-128))+rcx]
+
+	and	r14,r15
+	and	r15,rsp
+	sub	r15,r14
+	jc	NEAR $L$enc_no_key_aliasing
+	cmp	r15,768
+	jnc	NEAR $L$enc_no_key_aliasing
+	sub	rsp,r15
+$L$enc_no_key_aliasing:
+
+	lea	r14,[rsi]
+
+
+
+
+
+
+
+
+	lea	r15,[((-192))+rdx*1+rsi]
+
+	shr	rdx,4
+
+	call	_aesni_ctr32_6x
+	vpshufb	xmm8,xmm9,xmm0
+	vpshufb	xmm2,xmm10,xmm0
+	vmovdqu	XMMWORD[112+rsp],xmm8
+	vpshufb	xmm4,xmm11,xmm0
+	vmovdqu	XMMWORD[96+rsp],xmm2
+	vpshufb	xmm5,xmm12,xmm0
+	vmovdqu	XMMWORD[80+rsp],xmm4
+	vpshufb	xmm6,xmm13,xmm0
+	vmovdqu	XMMWORD[64+rsp],xmm5
+	vpshufb	xmm7,xmm14,xmm0
+	vmovdqu	XMMWORD[48+rsp],xmm6
+
+	call	_aesni_ctr32_6x
+
+	vmovdqu	xmm8,XMMWORD[r9]
+	lea	r9,[((32+32))+r9]
+	sub	rdx,12
+	mov	r10,0x60*2
+	vpshufb	xmm8,xmm8,xmm0
+
+	call	_aesni_ctr32_ghash_6x
+	vmovdqu	xmm7,XMMWORD[32+rsp]
+	vmovdqu	xmm0,XMMWORD[r11]
+	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
+	vpunpckhqdq	xmm1,xmm7,xmm7
+	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
+	vmovups	XMMWORD[(-96)+rsi],xmm9
+	vpshufb	xmm9,xmm9,xmm0
+	vpxor	xmm1,xmm1,xmm7
+	vmovups	XMMWORD[(-80)+rsi],xmm10
+	vpshufb	xmm10,xmm10,xmm0
+	vmovups	XMMWORD[(-64)+rsi],xmm11
+	vpshufb	xmm11,xmm11,xmm0
+	vmovups	XMMWORD[(-48)+rsi],xmm12
+	vpshufb	xmm12,xmm12,xmm0
+	vmovups	XMMWORD[(-32)+rsi],xmm13
+	vpshufb	xmm13,xmm13,xmm0
+	vmovups	XMMWORD[(-16)+rsi],xmm14
+	vpshufb	xmm14,xmm14,xmm0
+	vmovdqu	XMMWORD[16+rsp],xmm9
+	vmovdqu	xmm6,XMMWORD[48+rsp]
+	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
+	vpunpckhqdq	xmm2,xmm6,xmm6
+	vpclmulqdq	xmm5,xmm7,xmm3,0x00
+	vpxor	xmm2,xmm2,xmm6
+	vpclmulqdq	xmm7,xmm7,xmm3,0x11
+	vpclmulqdq	xmm1,xmm1,xmm15,0x00
+
+	vmovdqu	xmm9,XMMWORD[64+rsp]
+	vpclmulqdq	xmm4,xmm6,xmm0,0x00
+	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
+	vpxor	xmm4,xmm4,xmm5
+	vpunpckhqdq	xmm5,xmm9,xmm9
+	vpclmulqdq	xmm6,xmm6,xmm0,0x11
+	vpxor	xmm5,xmm5,xmm9
+	vpxor	xmm6,xmm6,xmm7
+	vpclmulqdq	xmm2,xmm2,xmm15,0x10
+	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
+	vpxor	xmm2,xmm2,xmm1
+
+	vmovdqu	xmm1,XMMWORD[80+rsp]
+	vpclmulqdq	xmm7,xmm9,xmm3,0x00
+	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
+	vpxor	xmm7,xmm7,xmm4
+	vpunpckhqdq	xmm4,xmm1,xmm1
+	vpclmulqdq	xmm9,xmm9,xmm3,0x11
+	vpxor	xmm4,xmm4,xmm1
+	vpxor	xmm9,xmm9,xmm6
+	vpclmulqdq	xmm5,xmm5,xmm15,0x00
+	vpxor	xmm5,xmm5,xmm2
+
+	vmovdqu	xmm2,XMMWORD[96+rsp]
+	vpclmulqdq	xmm6,xmm1,xmm0,0x00
+	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
+	vpxor	xmm6,xmm6,xmm7
+	vpunpckhqdq	xmm7,xmm2,xmm2
+	vpclmulqdq	xmm1,xmm1,xmm0,0x11
+	vpxor	xmm7,xmm7,xmm2
+	vpxor	xmm1,xmm1,xmm9
+	vpclmulqdq	xmm4,xmm4,xmm15,0x10
+	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
+	vpxor	xmm4,xmm4,xmm5
+
+	vpxor	xmm8,xmm8,XMMWORD[112+rsp]
+	vpclmulqdq	xmm5,xmm2,xmm3,0x00
+	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
+	vpunpckhqdq	xmm9,xmm8,xmm8
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm2,xmm2,xmm3,0x11
+	vpxor	xmm9,xmm9,xmm8
+	vpxor	xmm2,xmm2,xmm1
+	vpclmulqdq	xmm7,xmm7,xmm15,0x00
+	vpxor	xmm4,xmm7,xmm4
+
+	vpclmulqdq	xmm6,xmm8,xmm0,0x00
+	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
+	vpunpckhqdq	xmm1,xmm14,xmm14
+	vpclmulqdq	xmm8,xmm8,xmm0,0x11
+	vpxor	xmm1,xmm1,xmm14
+	vpxor	xmm5,xmm6,xmm5
+	vpclmulqdq	xmm9,xmm9,xmm15,0x10
+	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
+	vpxor	xmm7,xmm8,xmm2
+	vpxor	xmm6,xmm9,xmm4
+
+	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
+	vpxor	xmm9,xmm7,xmm5
+	vpclmulqdq	xmm4,xmm14,xmm3,0x00
+	vpxor	xmm6,xmm6,xmm9
+	vpunpckhqdq	xmm2,xmm13,xmm13
+	vpclmulqdq	xmm14,xmm14,xmm3,0x11
+	vpxor	xmm2,xmm2,xmm13
+	vpslldq	xmm9,xmm6,8
+	vpclmulqdq	xmm1,xmm1,xmm15,0x00
+	vpxor	xmm8,xmm5,xmm9
+	vpsrldq	xmm6,xmm6,8
+	vpxor	xmm7,xmm7,xmm6
+
+	vpclmulqdq	xmm5,xmm13,xmm0,0x00
+	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
+	vpxor	xmm5,xmm5,xmm4
+	vpunpckhqdq	xmm9,xmm12,xmm12
+	vpclmulqdq	xmm13,xmm13,xmm0,0x11
+	vpxor	xmm9,xmm9,xmm12
+	vpxor	xmm13,xmm13,xmm14
+	vpalignr	xmm14,xmm8,xmm8,8
+	vpclmulqdq	xmm2,xmm2,xmm15,0x10
+	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
+	vpxor	xmm2,xmm2,xmm1
+
+	vpclmulqdq	xmm4,xmm12,xmm3,0x00
+	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
+	vpxor	xmm4,xmm4,xmm5
+	vpunpckhqdq	xmm1,xmm11,xmm11
+	vpclmulqdq	xmm12,xmm12,xmm3,0x11
+	vpxor	xmm1,xmm1,xmm11
+	vpxor	xmm12,xmm12,xmm13
+	vxorps	xmm7,xmm7,XMMWORD[16+rsp]
+	vpclmulqdq	xmm9,xmm9,xmm15,0x00
+	vpxor	xmm9,xmm9,xmm2
+
+	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
+	vxorps	xmm8,xmm8,xmm14
+
+	vpclmulqdq	xmm5,xmm11,xmm0,0x00
+	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
+	vpxor	xmm5,xmm5,xmm4
+	vpunpckhqdq	xmm2,xmm10,xmm10
+	vpclmulqdq	xmm11,xmm11,xmm0,0x11
+	vpxor	xmm2,xmm2,xmm10
+	vpalignr	xmm14,xmm8,xmm8,8
+	vpxor	xmm11,xmm11,xmm12
+	vpclmulqdq	xmm1,xmm1,xmm15,0x10
+	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
+	vpxor	xmm1,xmm1,xmm9
+
+	vxorps	xmm14,xmm14,xmm7
+	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
+	vxorps	xmm8,xmm8,xmm14
+
+	vpclmulqdq	xmm4,xmm10,xmm3,0x00
+	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
+	vpxor	xmm4,xmm4,xmm5
+	vpunpckhqdq	xmm9,xmm8,xmm8
+	vpclmulqdq	xmm10,xmm10,xmm3,0x11
+	vpxor	xmm9,xmm9,xmm8
+	vpxor	xmm10,xmm10,xmm11
+	vpclmulqdq	xmm2,xmm2,xmm15,0x00
+	vpxor	xmm2,xmm2,xmm1
+
+	vpclmulqdq	xmm5,xmm8,xmm0,0x00
+	vpclmulqdq	xmm7,xmm8,xmm0,0x11
+	vpxor	xmm5,xmm5,xmm4
+	vpclmulqdq	xmm6,xmm9,xmm15,0x10
+	vpxor	xmm7,xmm7,xmm10
+	vpxor	xmm6,xmm6,xmm2
+
+	vpxor	xmm4,xmm7,xmm5
+	vpxor	xmm6,xmm6,xmm4
+	vpslldq	xmm1,xmm6,8
+	vmovdqu	xmm3,XMMWORD[16+r11]
+	vpsrldq	xmm6,xmm6,8
+	vpxor	xmm8,xmm5,xmm1
+	vpxor	xmm7,xmm7,xmm6
+
+	vpalignr	xmm2,xmm8,xmm8,8
+	vpclmulqdq	xmm8,xmm8,xmm3,0x10
+	vpxor	xmm8,xmm8,xmm2
+
+	vpalignr	xmm2,xmm8,xmm8,8
+	vpclmulqdq	xmm8,xmm8,xmm3,0x10
+	vpxor	xmm2,xmm2,xmm7
+	vpxor	xmm8,xmm8,xmm2
+	vpshufb	xmm8,xmm8,XMMWORD[r11]
+	vmovdqu	XMMWORD[(-64)+r9],xmm8
+
+	vzeroupper
+	movaps	xmm6,XMMWORD[((-216))+rax]
+	movaps	xmm7,XMMWORD[((-200))+rax]
+	movaps	xmm8,XMMWORD[((-184))+rax]
+	movaps	xmm9,XMMWORD[((-168))+rax]
+	movaps	xmm10,XMMWORD[((-152))+rax]
+	movaps	xmm11,XMMWORD[((-136))+rax]
+	movaps	xmm12,XMMWORD[((-120))+rax]
+	movaps	xmm13,XMMWORD[((-104))+rax]
+	movaps	xmm14,XMMWORD[((-88))+rax]
+	movaps	xmm15,XMMWORD[((-72))+rax]
+	mov	r15,QWORD[((-48))+rax]
+
+	mov	r14,QWORD[((-40))+rax]
+
+	mov	r13,QWORD[((-32))+rax]
+
+	mov	r12,QWORD[((-24))+rax]
+
+	mov	rbp,QWORD[((-16))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+
+	lea	rsp,[rax]
+
+$L$gcm_enc_abort:
+	mov	rax,r10
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesni_gcm_encrypt:
+ALIGN	64
+$L$bswap_mask:
+DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$poly:
+DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+$L$one_msb:
+DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+$L$two_lsb:
+DB	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+$L$one_lsb:
+DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+DB	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
+DB	101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
+DB	89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+DB	114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+gcm_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[120+r8]
+
+	mov	r15,QWORD[((-48))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	rbx,QWORD[((-8))+rax]
+	mov	QWORD[240+r8],r15
+	mov	QWORD[232+r8],r14
+	mov	QWORD[224+r8],r13
+	mov	QWORD[216+r8],r12
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[144+r8],rbx
+
+	lea	rsi,[((-216))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase
+	DD	$L$SEH_end_aesni_gcm_decrypt wrt ..imagebase
+	DD	$L$SEH_gcm_dec_info wrt ..imagebase
+
+	DD	$L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase
+	DD	$L$SEH_end_aesni_gcm_encrypt wrt ..imagebase
+	DD	$L$SEH_gcm_enc_info wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_gcm_dec_info:
+DB	9,0,0,0
+	DD	gcm_se_handler wrt ..imagebase
+	DD	$L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase
+$L$SEH_gcm_enc_info:
+DB	9,0,0,0
+	DD	gcm_se_handler wrt ..imagebase
+	DD	$L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/aesni-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
new file mode 100644
index 0000000..13e9c5e
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
@@ -0,0 +1,4996 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+EXTERN	OPENSSL_ia32cap_P
+global	aesni_encrypt
+
+ALIGN	16
+aesni_encrypt:
+	movups	xmm2,XMMWORD[rcx]
+	mov	eax,DWORD[240+r8]
+	movups	xmm0,XMMWORD[r8]
+	movups	xmm1,XMMWORD[16+r8]
+	lea	r8,[32+r8]
+	xorps	xmm2,xmm0
+$L$oop_enc1_1:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[r8]
+	lea	r8,[16+r8]
+	jnz	NEAR $L$oop_enc1_1
+DB	102,15,56,221,209
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movups	XMMWORD[rdx],xmm2
+	pxor	xmm2,xmm2
+	DB	0F3h,0C3h		;repret
+
+
+global	aesni_decrypt
+
+ALIGN	16
+aesni_decrypt:
+	movups	xmm2,XMMWORD[rcx]
+	mov	eax,DWORD[240+r8]
+	movups	xmm0,XMMWORD[r8]
+	movups	xmm1,XMMWORD[16+r8]
+	lea	r8,[32+r8]
+	xorps	xmm2,xmm0
+$L$oop_dec1_2:
+DB	102,15,56,222,209
+	dec	eax
+	movups	xmm1,XMMWORD[r8]
+	lea	r8,[16+r8]
+	jnz	NEAR $L$oop_dec1_2
+DB	102,15,56,223,209
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movups	XMMWORD[rdx],xmm2
+	pxor	xmm2,xmm2
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_aesni_encrypt2:
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+	add	rax,16
+
+$L$enc_loop2:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$enc_loop2
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_aesni_decrypt2:
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+	add	rax,16
+
+$L$dec_loop2:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$dec_loop2
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,223,208
+DB	102,15,56,223,216
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_aesni_encrypt3:
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	xorps	xmm4,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+	add	rax,16
+
+$L$enc_loop3:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$enc_loop3
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+DB	102,15,56,221,224
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_aesni_decrypt3:
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	xorps	xmm4,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+	add	rax,16
+
+$L$dec_loop3:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$dec_loop3
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,223,208
+DB	102,15,56,223,216
+DB	102,15,56,223,224
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_aesni_encrypt4:
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	xorps	xmm4,xmm0
+	xorps	xmm5,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	0x0f,0x1f,0x00
+	add	rax,16
+
+$L$enc_loop4:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$enc_loop4
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+DB	102,15,56,221,224
+DB	102,15,56,221,232
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_aesni_decrypt4:
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	xorps	xmm4,xmm0
+	xorps	xmm5,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	0x0f,0x1f,0x00
+	add	rax,16
+
+$L$dec_loop4:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$dec_loop4
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,223,208
+DB	102,15,56,223,216
+DB	102,15,56,223,224
+DB	102,15,56,223,232
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_aesni_encrypt6:
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm0
+DB	102,15,56,220,209
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	102,15,56,220,217
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+DB	102,15,56,220,225
+	pxor	xmm7,xmm0
+	movups	xmm0,XMMWORD[rax*1+rcx]
+	add	rax,16
+	jmp	NEAR $L$enc_loop6_enter
+ALIGN	16
+$L$enc_loop6:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+$L$enc_loop6_enter:
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$enc_loop6
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+DB	102,15,56,221,224
+DB	102,15,56,221,232
+DB	102,15,56,221,240
+DB	102,15,56,221,248
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_aesni_decrypt6:
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm0
+DB	102,15,56,222,209
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	102,15,56,222,217
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+DB	102,15,56,222,225
+	pxor	xmm7,xmm0
+	movups	xmm0,XMMWORD[rax*1+rcx]
+	add	rax,16
+	jmp	NEAR $L$dec_loop6_enter
+ALIGN	16
+$L$dec_loop6:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+$L$dec_loop6_enter:
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$dec_loop6
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,15,56,223,208
+DB	102,15,56,223,216
+DB	102,15,56,223,224
+DB	102,15,56,223,232
+DB	102,15,56,223,240
+DB	102,15,56,223,248
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_aesni_encrypt8:
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	pxor	xmm4,xmm0
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	102,15,56,220,209
+	pxor	xmm7,xmm0
+	pxor	xmm8,xmm0
+DB	102,15,56,220,217
+	pxor	xmm9,xmm0
+	movups	xmm0,XMMWORD[rax*1+rcx]
+	add	rax,16
+	jmp	NEAR $L$enc_loop8_inner
+ALIGN	16
+$L$enc_loop8:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+$L$enc_loop8_inner:
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+$L$enc_loop8_enter:
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$enc_loop8
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+DB	102,15,56,221,224
+DB	102,15,56,221,232
+DB	102,15,56,221,240
+DB	102,15,56,221,248
+DB	102,68,15,56,221,192
+DB	102,68,15,56,221,200
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_aesni_decrypt8:
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	pxor	xmm4,xmm0
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	102,15,56,222,209
+	pxor	xmm7,xmm0
+	pxor	xmm8,xmm0
+DB	102,15,56,222,217
+	pxor	xmm9,xmm0
+	movups	xmm0,XMMWORD[rax*1+rcx]
+	add	rax,16
+	jmp	NEAR $L$dec_loop8_inner
+ALIGN	16
+$L$dec_loop8:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+$L$dec_loop8_inner:
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+$L$dec_loop8_enter:
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$dec_loop8
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+DB	102,15,56,223,208
+DB	102,15,56,223,216
+DB	102,15,56,223,224
+DB	102,15,56,223,232
+DB	102,15,56,223,240
+DB	102,15,56,223,248
+DB	102,68,15,56,223,192
+DB	102,68,15,56,223,200
+	DB	0F3h,0C3h		;repret
+
+global	aesni_ecb_encrypt
+
+ALIGN	16
+aesni_ecb_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_ecb_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+	lea	rsp,[((-88))+rsp]
+	movaps	XMMWORD[rsp],xmm6
+	movaps	XMMWORD[16+rsp],xmm7
+	movaps	XMMWORD[32+rsp],xmm8
+	movaps	XMMWORD[48+rsp],xmm9
+$L$ecb_enc_body:
+	and	rdx,-16
+	jz	NEAR $L$ecb_ret
+
+	mov	eax,DWORD[240+rcx]
+	movups	xmm0,XMMWORD[rcx]
+	mov	r11,rcx
+	mov	r10d,eax
+	test	r8d,r8d
+	jz	NEAR $L$ecb_decrypt
+
+	cmp	rdx,0x80
+	jb	NEAR $L$ecb_enc_tail
+
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movdqu	xmm8,XMMWORD[96+rdi]
+	movdqu	xmm9,XMMWORD[112+rdi]
+	lea	rdi,[128+rdi]
+	sub	rdx,0x80
+	jmp	NEAR $L$ecb_enc_loop8_enter
+ALIGN	16
+$L$ecb_enc_loop8:
+	movups	XMMWORD[rsi],xmm2
+	mov	rcx,r11
+	movdqu	xmm2,XMMWORD[rdi]
+	mov	eax,r10d
+	movups	XMMWORD[16+rsi],xmm3
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movups	XMMWORD[32+rsi],xmm4
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movups	XMMWORD[48+rsi],xmm5
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movups	XMMWORD[64+rsi],xmm6
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movups	XMMWORD[80+rsi],xmm7
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movups	XMMWORD[96+rsi],xmm8
+	movdqu	xmm8,XMMWORD[96+rdi]
+	movups	XMMWORD[112+rsi],xmm9
+	lea	rsi,[128+rsi]
+	movdqu	xmm9,XMMWORD[112+rdi]
+	lea	rdi,[128+rdi]
+$L$ecb_enc_loop8_enter:
+
+	call	_aesni_encrypt8
+
+	sub	rdx,0x80
+	jnc	NEAR $L$ecb_enc_loop8
+
+	movups	XMMWORD[rsi],xmm2
+	mov	rcx,r11
+	movups	XMMWORD[16+rsi],xmm3
+	mov	eax,r10d
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	movups	XMMWORD[96+rsi],xmm8
+	movups	XMMWORD[112+rsi],xmm9
+	lea	rsi,[128+rsi]
+	add	rdx,0x80
+	jz	NEAR $L$ecb_ret
+
+$L$ecb_enc_tail:
+	movups	xmm2,XMMWORD[rdi]
+	cmp	rdx,0x20
+	jb	NEAR $L$ecb_enc_one
+	movups	xmm3,XMMWORD[16+rdi]
+	je	NEAR $L$ecb_enc_two
+	movups	xmm4,XMMWORD[32+rdi]
+	cmp	rdx,0x40
+	jb	NEAR $L$ecb_enc_three
+	movups	xmm5,XMMWORD[48+rdi]
+	je	NEAR $L$ecb_enc_four
+	movups	xmm6,XMMWORD[64+rdi]
+	cmp	rdx,0x60
+	jb	NEAR $L$ecb_enc_five
+	movups	xmm7,XMMWORD[80+rdi]
+	je	NEAR $L$ecb_enc_six
+	movdqu	xmm8,XMMWORD[96+rdi]
+	xorps	xmm9,xmm9
+	call	_aesni_encrypt8
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	movups	XMMWORD[96+rsi],xmm8
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_one:
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_enc1_3:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_enc1_3
+DB	102,15,56,221,209
+	movups	XMMWORD[rsi],xmm2
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_two:
+	call	_aesni_encrypt2
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_three:
+	call	_aesni_encrypt3
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_four:
+	call	_aesni_encrypt4
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_five:
+	xorps	xmm7,xmm7
+	call	_aesni_encrypt6
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_six:
+	call	_aesni_encrypt6
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	jmp	NEAR $L$ecb_ret
+
+ALIGN	16
+$L$ecb_decrypt:
+	cmp	rdx,0x80
+	jb	NEAR $L$ecb_dec_tail
+
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movdqu	xmm8,XMMWORD[96+rdi]
+	movdqu	xmm9,XMMWORD[112+rdi]
+	lea	rdi,[128+rdi]
+	sub	rdx,0x80
+	jmp	NEAR $L$ecb_dec_loop8_enter
+ALIGN	16
+$L$ecb_dec_loop8:
+	movups	XMMWORD[rsi],xmm2
+	mov	rcx,r11
+	movdqu	xmm2,XMMWORD[rdi]
+	mov	eax,r10d
+	movups	XMMWORD[16+rsi],xmm3
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movups	XMMWORD[32+rsi],xmm4
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movups	XMMWORD[48+rsi],xmm5
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movups	XMMWORD[64+rsi],xmm6
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movups	XMMWORD[80+rsi],xmm7
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movups	XMMWORD[96+rsi],xmm8
+	movdqu	xmm8,XMMWORD[96+rdi]
+	movups	XMMWORD[112+rsi],xmm9
+	lea	rsi,[128+rsi]
+	movdqu	xmm9,XMMWORD[112+rdi]
+	lea	rdi,[128+rdi]
+$L$ecb_dec_loop8_enter:
+
+	call	_aesni_decrypt8
+
+	movups	xmm0,XMMWORD[r11]
+	sub	rdx,0x80
+	jnc	NEAR $L$ecb_dec_loop8
+
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	mov	rcx,r11
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	mov	eax,r10d
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	pxor	xmm7,xmm7
+	movups	XMMWORD[96+rsi],xmm8
+	pxor	xmm8,xmm8
+	movups	XMMWORD[112+rsi],xmm9
+	pxor	xmm9,xmm9
+	lea	rsi,[128+rsi]
+	add	rdx,0x80
+	jz	NEAR $L$ecb_ret
+
+$L$ecb_dec_tail:
+	movups	xmm2,XMMWORD[rdi]
+	cmp	rdx,0x20
+	jb	NEAR $L$ecb_dec_one
+	movups	xmm3,XMMWORD[16+rdi]
+	je	NEAR $L$ecb_dec_two
+	movups	xmm4,XMMWORD[32+rdi]
+	cmp	rdx,0x40
+	jb	NEAR $L$ecb_dec_three
+	movups	xmm5,XMMWORD[48+rdi]
+	je	NEAR $L$ecb_dec_four
+	movups	xmm6,XMMWORD[64+rdi]
+	cmp	rdx,0x60
+	jb	NEAR $L$ecb_dec_five
+	movups	xmm7,XMMWORD[80+rdi]
+	je	NEAR $L$ecb_dec_six
+	movups	xmm8,XMMWORD[96+rdi]
+	movups	xmm0,XMMWORD[rcx]
+	xorps	xmm9,xmm9
+	call	_aesni_decrypt8
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	pxor	xmm7,xmm7
+	movups	XMMWORD[96+rsi],xmm8
+	pxor	xmm8,xmm8
+	pxor	xmm9,xmm9
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_one:
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_dec1_4:
+DB	102,15,56,222,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_dec1_4
+DB	102,15,56,223,209
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_two:
+	call	_aesni_decrypt2
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_three:
+	call	_aesni_decrypt3
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_four:
+	call	_aesni_decrypt4
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_five:
+	xorps	xmm7,xmm7
+	call	_aesni_decrypt6
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_six:
+	call	_aesni_decrypt6
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	pxor	xmm7,xmm7
+
+$L$ecb_ret:
+	xorps	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	XMMWORD[rsp],xmm0
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	XMMWORD[48+rsp],xmm0
+	lea	rsp,[88+rsp]
+$L$ecb_enc_ret:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_aesni_ecb_encrypt:
+global	aesni_ccm64_encrypt_blocks
+
+ALIGN	16
+aesni_ccm64_encrypt_blocks:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_ccm64_encrypt_blocks:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+	lea	rsp,[((-88))+rsp]
+	movaps	XMMWORD[rsp],xmm6
+	movaps	XMMWORD[16+rsp],xmm7
+	movaps	XMMWORD[32+rsp],xmm8
+	movaps	XMMWORD[48+rsp],xmm9
+$L$ccm64_enc_body:
+	mov	eax,DWORD[240+rcx]
+	movdqu	xmm6,XMMWORD[r8]
+	movdqa	xmm9,XMMWORD[$L$increment64]
+	movdqa	xmm7,XMMWORD[$L$bswap_mask]
+
+	shl	eax,4
+	mov	r10d,16
+	lea	r11,[rcx]
+	movdqu	xmm3,XMMWORD[r9]
+	movdqa	xmm2,xmm6
+	lea	rcx,[32+rax*1+rcx]
+DB	102,15,56,0,247
+	sub	r10,rax
+	jmp	NEAR $L$ccm64_enc_outer
+ALIGN	16
+$L$ccm64_enc_outer:
+	movups	xmm0,XMMWORD[r11]
+	mov	rax,r10
+	movups	xmm8,XMMWORD[rdi]
+
+	xorps	xmm2,xmm0
+	movups	xmm1,XMMWORD[16+r11]
+	xorps	xmm0,xmm8
+	xorps	xmm3,xmm0
+	movups	xmm0,XMMWORD[32+r11]
+
+$L$ccm64_enc2_loop:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$ccm64_enc2_loop
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	paddq	xmm6,xmm9
+	dec	rdx
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+
+	lea	rdi,[16+rdi]
+	xorps	xmm8,xmm2
+	movdqa	xmm2,xmm6
+	movups	XMMWORD[rsi],xmm8
+DB	102,15,56,0,215
+	lea	rsi,[16+rsi]
+	jnz	NEAR $L$ccm64_enc_outer
+
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	movups	XMMWORD[r9],xmm3
+	pxor	xmm3,xmm3
+	pxor	xmm8,xmm8
+	pxor	xmm6,xmm6
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	XMMWORD[rsp],xmm0
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	XMMWORD[48+rsp],xmm0
+	lea	rsp,[88+rsp]
+$L$ccm64_enc_ret:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_aesni_ccm64_encrypt_blocks:
+global	aesni_ccm64_decrypt_blocks
+
+ALIGN	16
+aesni_ccm64_decrypt_blocks:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_ccm64_decrypt_blocks:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+	lea	rsp,[((-88))+rsp]
+	movaps	XMMWORD[rsp],xmm6
+	movaps	XMMWORD[16+rsp],xmm7
+	movaps	XMMWORD[32+rsp],xmm8
+	movaps	XMMWORD[48+rsp],xmm9
+$L$ccm64_dec_body:
+	mov	eax,DWORD[240+rcx]
+	movups	xmm6,XMMWORD[r8]
+	movdqu	xmm3,XMMWORD[r9]
+	movdqa	xmm9,XMMWORD[$L$increment64]
+	movdqa	xmm7,XMMWORD[$L$bswap_mask]
+
+	movaps	xmm2,xmm6
+	mov	r10d,eax
+	mov	r11,rcx
+DB	102,15,56,0,247
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_enc1_5:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_enc1_5
+DB	102,15,56,221,209
+	shl	r10d,4
+	mov	eax,16
+	movups	xmm8,XMMWORD[rdi]
+	paddq	xmm6,xmm9
+	lea	rdi,[16+rdi]
+	sub	rax,r10
+	lea	rcx,[32+r10*1+r11]
+	mov	r10,rax
+	jmp	NEAR $L$ccm64_dec_outer
+ALIGN	16
+$L$ccm64_dec_outer:
+	xorps	xmm8,xmm2
+	movdqa	xmm2,xmm6
+	movups	XMMWORD[rsi],xmm8
+	lea	rsi,[16+rsi]
+DB	102,15,56,0,215
+
+	sub	rdx,1
+	jz	NEAR $L$ccm64_dec_break
+
+	movups	xmm0,XMMWORD[r11]
+	mov	rax,r10
+	movups	xmm1,XMMWORD[16+r11]
+	xorps	xmm8,xmm0
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm8
+	movups	xmm0,XMMWORD[32+r11]
+	jmp	NEAR $L$ccm64_dec2_loop
+ALIGN	16
+$L$ccm64_dec2_loop:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$ccm64_dec2_loop
+	movups	xmm8,XMMWORD[rdi]
+	paddq	xmm6,xmm9
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+	lea	rdi,[16+rdi]
+	jmp	NEAR $L$ccm64_dec_outer
+
+ALIGN	16
+$L$ccm64_dec_break:
+
+	mov	eax,DWORD[240+r11]
+	movups	xmm0,XMMWORD[r11]
+	movups	xmm1,XMMWORD[16+r11]
+	xorps	xmm8,xmm0
+	lea	r11,[32+r11]
+	xorps	xmm3,xmm8
+$L$oop_enc1_6:
+DB	102,15,56,220,217
+	dec	eax
+	movups	xmm1,XMMWORD[r11]
+	lea	r11,[16+r11]
+	jnz	NEAR $L$oop_enc1_6
+DB	102,15,56,221,217
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	movups	XMMWORD[r9],xmm3
+	pxor	xmm3,xmm3
+	pxor	xmm8,xmm8
+	pxor	xmm6,xmm6
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	XMMWORD[rsp],xmm0
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	XMMWORD[48+rsp],xmm0
+	lea	rsp,[88+rsp]
+$L$ccm64_dec_ret:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_aesni_ccm64_decrypt_blocks:
+global	aesni_ctr32_encrypt_blocks
+
+ALIGN	16
+aesni_ctr32_encrypt_blocks:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_ctr32_encrypt_blocks:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+	cmp	rdx,1
+	jne	NEAR $L$ctr32_bulk
+
+
+
+	movups	xmm2,XMMWORD[r8]
+	movups	xmm3,XMMWORD[rdi]
+	mov	edx,DWORD[240+rcx]
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_enc1_7:
+DB	102,15,56,220,209
+	dec	edx
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_enc1_7
+DB	102,15,56,221,209
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	xorps	xmm2,xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[rsi],xmm2
+	xorps	xmm2,xmm2
+	jmp	NEAR $L$ctr32_epilogue
+
+ALIGN	16
+$L$ctr32_bulk:
+	lea	r11,[rsp]
+	push	rbp
+	sub	rsp,288
+	and	rsp,-16
+	movaps	XMMWORD[(-168)+r11],xmm6
+	movaps	XMMWORD[(-152)+r11],xmm7
+	movaps	XMMWORD[(-136)+r11],xmm8
+	movaps	XMMWORD[(-120)+r11],xmm9
+	movaps	XMMWORD[(-104)+r11],xmm10
+	movaps	XMMWORD[(-88)+r11],xmm11
+	movaps	XMMWORD[(-72)+r11],xmm12
+	movaps	XMMWORD[(-56)+r11],xmm13
+	movaps	XMMWORD[(-40)+r11],xmm14
+	movaps	XMMWORD[(-24)+r11],xmm15
+$L$ctr32_body:
+
+
+
+
+	movdqu	xmm2,XMMWORD[r8]
+	movdqu	xmm0,XMMWORD[rcx]
+	mov	r8d,DWORD[12+r8]
+	pxor	xmm2,xmm0
+	mov	ebp,DWORD[12+rcx]
+	movdqa	XMMWORD[rsp],xmm2
+	bswap	r8d
+	movdqa	xmm3,xmm2
+	movdqa	xmm4,xmm2
+	movdqa	xmm5,xmm2
+	movdqa	XMMWORD[64+rsp],xmm2
+	movdqa	XMMWORD[80+rsp],xmm2
+	movdqa	XMMWORD[96+rsp],xmm2
+	mov	r10,rdx
+	movdqa	XMMWORD[112+rsp],xmm2
+
+	lea	rax,[1+r8]
+	lea	rdx,[2+r8]
+	bswap	eax
+	bswap	edx
+	xor	eax,ebp
+	xor	edx,ebp
+DB	102,15,58,34,216,3
+	lea	rax,[3+r8]
+	movdqa	XMMWORD[16+rsp],xmm3
+DB	102,15,58,34,226,3
+	bswap	eax
+	mov	rdx,r10
+	lea	r10,[4+r8]
+	movdqa	XMMWORD[32+rsp],xmm4
+	xor	eax,ebp
+	bswap	r10d
+DB	102,15,58,34,232,3
+	xor	r10d,ebp
+	movdqa	XMMWORD[48+rsp],xmm5
+	lea	r9,[5+r8]
+	mov	DWORD[((64+12))+rsp],r10d
+	bswap	r9d
+	lea	r10,[6+r8]
+	mov	eax,DWORD[240+rcx]
+	xor	r9d,ebp
+	bswap	r10d
+	mov	DWORD[((80+12))+rsp],r9d
+	xor	r10d,ebp
+	lea	r9,[7+r8]
+	mov	DWORD[((96+12))+rsp],r10d
+	bswap	r9d
+	lea	r10,[OPENSSL_ia32cap_P]
+	mov	r10d,DWORD[4+r10]
+	xor	r9d,ebp
+	and	r10d,71303168
+	mov	DWORD[((112+12))+rsp],r9d
+
+	movups	xmm1,XMMWORD[16+rcx]
+
+	movdqa	xmm6,XMMWORD[64+rsp]
+	movdqa	xmm7,XMMWORD[80+rsp]
+
+	cmp	rdx,8
+	jb	NEAR $L$ctr32_tail
+
+	sub	rdx,6
+	cmp	r10d,4194304
+	je	NEAR $L$ctr32_6x
+
+	lea	rcx,[128+rcx]
+	sub	rdx,2
+	jmp	NEAR $L$ctr32_loop8
+
+ALIGN	16
+$L$ctr32_6x:
+	shl	eax,4
+	mov	r10d,48
+	bswap	ebp
+	lea	rcx,[32+rax*1+rcx]
+	sub	r10,rax
+	jmp	NEAR $L$ctr32_loop6
+
+ALIGN	16
+$L$ctr32_loop6:
+	add	r8d,6
+	movups	xmm0,XMMWORD[((-48))+r10*1+rcx]
+DB	102,15,56,220,209
+	mov	eax,r8d
+	xor	eax,ebp
+DB	102,15,56,220,217
+DB	0x0f,0x38,0xf1,0x44,0x24,12
+	lea	eax,[1+r8]
+DB	102,15,56,220,225
+	xor	eax,ebp
+DB	0x0f,0x38,0xf1,0x44,0x24,28
+DB	102,15,56,220,233
+	lea	eax,[2+r8]
+	xor	eax,ebp
+DB	102,15,56,220,241
+DB	0x0f,0x38,0xf1,0x44,0x24,44
+	lea	eax,[3+r8]
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[((-32))+r10*1+rcx]
+	xor	eax,ebp
+
+DB	102,15,56,220,208
+DB	0x0f,0x38,0xf1,0x44,0x24,60
+	lea	eax,[4+r8]
+DB	102,15,56,220,216
+	xor	eax,ebp
+DB	0x0f,0x38,0xf1,0x44,0x24,76
+DB	102,15,56,220,224
+	lea	eax,[5+r8]
+	xor	eax,ebp
+DB	102,15,56,220,232
+DB	0x0f,0x38,0xf1,0x44,0x24,92
+	mov	rax,r10
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+	movups	xmm0,XMMWORD[((-16))+r10*1+rcx]
+
+	call	$L$enc_loop6
+
+	movdqu	xmm8,XMMWORD[rdi]
+	movdqu	xmm9,XMMWORD[16+rdi]
+	movdqu	xmm10,XMMWORD[32+rdi]
+	movdqu	xmm11,XMMWORD[48+rdi]
+	movdqu	xmm12,XMMWORD[64+rdi]
+	movdqu	xmm13,XMMWORD[80+rdi]
+	lea	rdi,[96+rdi]
+	movups	xmm1,XMMWORD[((-64))+r10*1+rcx]
+	pxor	xmm8,xmm2
+	movaps	xmm2,XMMWORD[rsp]
+	pxor	xmm9,xmm3
+	movaps	xmm3,XMMWORD[16+rsp]
+	pxor	xmm10,xmm4
+	movaps	xmm4,XMMWORD[32+rsp]
+	pxor	xmm11,xmm5
+	movaps	xmm5,XMMWORD[48+rsp]
+	pxor	xmm12,xmm6
+	movaps	xmm6,XMMWORD[64+rsp]
+	pxor	xmm13,xmm7
+	movaps	xmm7,XMMWORD[80+rsp]
+	movdqu	XMMWORD[rsi],xmm8
+	movdqu	XMMWORD[16+rsi],xmm9
+	movdqu	XMMWORD[32+rsi],xmm10
+	movdqu	XMMWORD[48+rsi],xmm11
+	movdqu	XMMWORD[64+rsi],xmm12
+	movdqu	XMMWORD[80+rsi],xmm13
+	lea	rsi,[96+rsi]
+
+	sub	rdx,6
+	jnc	NEAR $L$ctr32_loop6
+
+	add	rdx,6
+	jz	NEAR $L$ctr32_done
+
+	lea	eax,[((-48))+r10]
+	lea	rcx,[((-80))+r10*1+rcx]
+	neg	eax
+	shr	eax,4
+	jmp	NEAR $L$ctr32_tail
+
+ALIGN	32
+$L$ctr32_loop8:
+	add	r8d,8
+	movdqa	xmm8,XMMWORD[96+rsp]
+DB	102,15,56,220,209
+	mov	r9d,r8d
+	movdqa	xmm9,XMMWORD[112+rsp]
+DB	102,15,56,220,217
+	bswap	r9d
+	movups	xmm0,XMMWORD[((32-128))+rcx]
+DB	102,15,56,220,225
+	xor	r9d,ebp
+	nop
+DB	102,15,56,220,233
+	mov	DWORD[((0+12))+rsp],r9d
+	lea	r9,[1+r8]
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((48-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	mov	DWORD[((16+12))+rsp],r9d
+	lea	r9,[2+r8]
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((64-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	mov	DWORD[((32+12))+rsp],r9d
+	lea	r9,[3+r8]
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((80-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	mov	DWORD[((48+12))+rsp],r9d
+	lea	r9,[4+r8]
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((96-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	mov	DWORD[((64+12))+rsp],r9d
+	lea	r9,[5+r8]
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((112-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	mov	DWORD[((80+12))+rsp],r9d
+	lea	r9,[6+r8]
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((128-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	mov	DWORD[((96+12))+rsp],r9d
+	lea	r9,[7+r8]
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((144-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+	xor	r9d,ebp
+	movdqu	xmm10,XMMWORD[rdi]
+DB	102,15,56,220,232
+	mov	DWORD[((112+12))+rsp],r9d
+	cmp	eax,11
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((160-128))+rcx]
+
+	jb	NEAR $L$ctr32_enc_done
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((176-128))+rcx]
+
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((192-128))+rcx]
+	je	NEAR $L$ctr32_enc_done
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((208-128))+rcx]
+
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((224-128))+rcx]
+	jmp	NEAR $L$ctr32_enc_done
+
+ALIGN	16
+$L$ctr32_enc_done:
+	movdqu	xmm11,XMMWORD[16+rdi]
+	pxor	xmm10,xmm0
+	movdqu	xmm12,XMMWORD[32+rdi]
+	pxor	xmm11,xmm0
+	movdqu	xmm13,XMMWORD[48+rdi]
+	pxor	xmm12,xmm0
+	movdqu	xmm14,XMMWORD[64+rdi]
+	pxor	xmm13,xmm0
+	movdqu	xmm15,XMMWORD[80+rdi]
+	pxor	xmm14,xmm0
+	pxor	xmm15,xmm0
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movdqu	xmm1,XMMWORD[96+rdi]
+	lea	rdi,[128+rdi]
+
+DB	102,65,15,56,221,210
+	pxor	xmm1,xmm0
+	movdqu	xmm10,XMMWORD[((112-128))+rdi]
+DB	102,65,15,56,221,219
+	pxor	xmm10,xmm0
+	movdqa	xmm11,XMMWORD[rsp]
+DB	102,65,15,56,221,228
+DB	102,65,15,56,221,237
+	movdqa	xmm12,XMMWORD[16+rsp]
+	movdqa	xmm13,XMMWORD[32+rsp]
+DB	102,65,15,56,221,246
+DB	102,65,15,56,221,255
+	movdqa	xmm14,XMMWORD[48+rsp]
+	movdqa	xmm15,XMMWORD[64+rsp]
+DB	102,68,15,56,221,193
+	movdqa	xmm0,XMMWORD[80+rsp]
+	movups	xmm1,XMMWORD[((16-128))+rcx]
+DB	102,69,15,56,221,202
+
+	movups	XMMWORD[rsi],xmm2
+	movdqa	xmm2,xmm11
+	movups	XMMWORD[16+rsi],xmm3
+	movdqa	xmm3,xmm12
+	movups	XMMWORD[32+rsi],xmm4
+	movdqa	xmm4,xmm13
+	movups	XMMWORD[48+rsi],xmm5
+	movdqa	xmm5,xmm14
+	movups	XMMWORD[64+rsi],xmm6
+	movdqa	xmm6,xmm15
+	movups	XMMWORD[80+rsi],xmm7
+	movdqa	xmm7,xmm0
+	movups	XMMWORD[96+rsi],xmm8
+	movups	XMMWORD[112+rsi],xmm9
+	lea	rsi,[128+rsi]
+
+	sub	rdx,8
+	jnc	NEAR $L$ctr32_loop8
+
+	add	rdx,8
+	jz	NEAR $L$ctr32_done
+	lea	rcx,[((-128))+rcx]
+
+$L$ctr32_tail:
+
+
+	lea	rcx,[16+rcx]
+	cmp	rdx,4
+	jb	NEAR $L$ctr32_loop3
+	je	NEAR $L$ctr32_loop4
+
+
+	shl	eax,4
+	movdqa	xmm8,XMMWORD[96+rsp]
+	pxor	xmm9,xmm9
+
+	movups	xmm0,XMMWORD[16+rcx]
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	lea	rcx,[((32-16))+rax*1+rcx]
+	neg	rax
+DB	102,15,56,220,225
+	add	rax,16
+	movups	xmm10,XMMWORD[rdi]
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+	movups	xmm11,XMMWORD[16+rdi]
+	movups	xmm12,XMMWORD[32+rdi]
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+
+	call	$L$enc_loop8_enter
+
+	movdqu	xmm13,XMMWORD[48+rdi]
+	pxor	xmm2,xmm10
+	movdqu	xmm10,XMMWORD[64+rdi]
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm6,xmm10
+	movdqu	XMMWORD[48+rsi],xmm5
+	movdqu	XMMWORD[64+rsi],xmm6
+	cmp	rdx,6
+	jb	NEAR $L$ctr32_done
+
+	movups	xmm11,XMMWORD[80+rdi]
+	xorps	xmm7,xmm11
+	movups	XMMWORD[80+rsi],xmm7
+	je	NEAR $L$ctr32_done
+
+	movups	xmm12,XMMWORD[96+rdi]
+	xorps	xmm8,xmm12
+	movups	XMMWORD[96+rsi],xmm8
+	jmp	NEAR $L$ctr32_done
+
+ALIGN	32
+$L$ctr32_loop4:
+DB	102,15,56,220,209
+	lea	rcx,[16+rcx]
+	dec	eax
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	movups	xmm1,XMMWORD[rcx]
+	jnz	NEAR $L$ctr32_loop4
+DB	102,15,56,221,209
+DB	102,15,56,221,217
+	movups	xmm10,XMMWORD[rdi]
+	movups	xmm11,XMMWORD[16+rdi]
+DB	102,15,56,221,225
+DB	102,15,56,221,233
+	movups	xmm12,XMMWORD[32+rdi]
+	movups	xmm13,XMMWORD[48+rdi]
+
+	xorps	xmm2,xmm10
+	movups	XMMWORD[rsi],xmm2
+	xorps	xmm3,xmm11
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[48+rsi],xmm5
+	jmp	NEAR $L$ctr32_done
+
+ALIGN	32
+$L$ctr32_loop3:
+DB	102,15,56,220,209
+	lea	rcx,[16+rcx]
+	dec	eax
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+	movups	xmm1,XMMWORD[rcx]
+	jnz	NEAR $L$ctr32_loop3
+DB	102,15,56,221,209
+DB	102,15,56,221,217
+DB	102,15,56,221,225
+
+	movups	xmm10,XMMWORD[rdi]
+	xorps	xmm2,xmm10
+	movups	XMMWORD[rsi],xmm2
+	cmp	rdx,2
+	jb	NEAR $L$ctr32_done
+
+	movups	xmm11,XMMWORD[16+rdi]
+	xorps	xmm3,xmm11
+	movups	XMMWORD[16+rsi],xmm3
+	je	NEAR $L$ctr32_done
+
+	movups	xmm12,XMMWORD[32+rdi]
+	xorps	xmm4,xmm12
+	movups	XMMWORD[32+rsi],xmm4
+
+$L$ctr32_done:
+	xorps	xmm0,xmm0
+	xor	ebp,ebp
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movaps	xmm6,XMMWORD[((-168))+r11]
+	movaps	XMMWORD[(-168)+r11],xmm0
+	movaps	xmm7,XMMWORD[((-152))+r11]
+	movaps	XMMWORD[(-152)+r11],xmm0
+	movaps	xmm8,XMMWORD[((-136))+r11]
+	movaps	XMMWORD[(-136)+r11],xmm0
+	movaps	xmm9,XMMWORD[((-120))+r11]
+	movaps	XMMWORD[(-120)+r11],xmm0
+	movaps	xmm10,XMMWORD[((-104))+r11]
+	movaps	XMMWORD[(-104)+r11],xmm0
+	movaps	xmm11,XMMWORD[((-88))+r11]
+	movaps	XMMWORD[(-88)+r11],xmm0
+	movaps	xmm12,XMMWORD[((-72))+r11]
+	movaps	XMMWORD[(-72)+r11],xmm0
+	movaps	xmm13,XMMWORD[((-56))+r11]
+	movaps	XMMWORD[(-56)+r11],xmm0
+	movaps	xmm14,XMMWORD[((-40))+r11]
+	movaps	XMMWORD[(-40)+r11],xmm0
+	movaps	xmm15,XMMWORD[((-24))+r11]
+	movaps	XMMWORD[(-24)+r11],xmm0
+	movaps	XMMWORD[rsp],xmm0
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	XMMWORD[48+rsp],xmm0
+	movaps	XMMWORD[64+rsp],xmm0
+	movaps	XMMWORD[80+rsp],xmm0
+	movaps	XMMWORD[96+rsp],xmm0
+	movaps	XMMWORD[112+rsp],xmm0
+	mov	rbp,QWORD[((-8))+r11]
+	lea	rsp,[r11]
+$L$ctr32_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_aesni_ctr32_encrypt_blocks:
+global	aesni_xts_encrypt
+
+ALIGN	16
+aesni_xts_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_xts_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+	lea	r11,[rsp]
+	push	rbp
+	sub	rsp,272
+	and	rsp,-16
+	movaps	XMMWORD[(-168)+r11],xmm6
+	movaps	XMMWORD[(-152)+r11],xmm7
+	movaps	XMMWORD[(-136)+r11],xmm8
+	movaps	XMMWORD[(-120)+r11],xmm9
+	movaps	XMMWORD[(-104)+r11],xmm10
+	movaps	XMMWORD[(-88)+r11],xmm11
+	movaps	XMMWORD[(-72)+r11],xmm12
+	movaps	XMMWORD[(-56)+r11],xmm13
+	movaps	XMMWORD[(-40)+r11],xmm14
+	movaps	XMMWORD[(-24)+r11],xmm15
+$L$xts_enc_body:
+	movups	xmm2,XMMWORD[r9]
+	mov	eax,DWORD[240+r8]
+	mov	r10d,DWORD[240+rcx]
+	movups	xmm0,XMMWORD[r8]
+	movups	xmm1,XMMWORD[16+r8]
+	lea	r8,[32+r8]
+	xorps	xmm2,xmm0
+$L$oop_enc1_8:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[r8]
+	lea	r8,[16+r8]
+	jnz	NEAR $L$oop_enc1_8
+DB	102,15,56,221,209
+	movups	xmm0,XMMWORD[rcx]
+	mov	rbp,rcx
+	mov	eax,r10d
+	shl	r10d,4
+	mov	r9,rdx
+	and	rdx,-16
+
+	movups	xmm1,XMMWORD[16+r10*1+rcx]
+
+	movdqa	xmm8,XMMWORD[$L$xts_magic]
+	movdqa	xmm15,xmm2
+	pshufd	xmm9,xmm2,0x5f
+	pxor	xmm1,xmm0
+	movdqa	xmm14,xmm9
+	paddd	xmm9,xmm9
+	movdqa	xmm10,xmm15
+	psrad	xmm14,31
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+	pxor	xmm10,xmm0
+	pxor	xmm15,xmm14
+	movdqa	xmm14,xmm9
+	paddd	xmm9,xmm9
+	movdqa	xmm11,xmm15
+	psrad	xmm14,31
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+	pxor	xmm11,xmm0
+	pxor	xmm15,xmm14
+	movdqa	xmm14,xmm9
+	paddd	xmm9,xmm9
+	movdqa	xmm12,xmm15
+	psrad	xmm14,31
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+	pxor	xmm12,xmm0
+	pxor	xmm15,xmm14
+	movdqa	xmm14,xmm9
+	paddd	xmm9,xmm9
+	movdqa	xmm13,xmm15
+	psrad	xmm14,31
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+	pxor	xmm13,xmm0
+	pxor	xmm15,xmm14
+	movdqa	xmm14,xmm15
+	psrad	xmm9,31
+	paddq	xmm15,xmm15
+	pand	xmm9,xmm8
+	pxor	xmm14,xmm0
+	pxor	xmm15,xmm9
+	movaps	XMMWORD[96+rsp],xmm1
+
+	sub	rdx,16*6
+	jc	NEAR $L$xts_enc_short
+
+	mov	eax,16+96
+	lea	rcx,[32+r10*1+rbp]
+	sub	rax,r10
+	movups	xmm1,XMMWORD[16+rbp]
+	mov	r10,rax
+	lea	r8,[$L$xts_magic]
+	jmp	NEAR $L$xts_enc_grandloop
+
+ALIGN	32
+$L$xts_enc_grandloop:
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqa	xmm8,xmm0
+	movdqu	xmm3,XMMWORD[16+rdi]
+	pxor	xmm2,xmm10
+	movdqu	xmm4,XMMWORD[32+rdi]
+	pxor	xmm3,xmm11
+DB	102,15,56,220,209
+	movdqu	xmm5,XMMWORD[48+rdi]
+	pxor	xmm4,xmm12
+DB	102,15,56,220,217
+	movdqu	xmm6,XMMWORD[64+rdi]
+	pxor	xmm5,xmm13
+DB	102,15,56,220,225
+	movdqu	xmm7,XMMWORD[80+rdi]
+	pxor	xmm8,xmm15
+	movdqa	xmm9,XMMWORD[96+rsp]
+	pxor	xmm6,xmm14
+DB	102,15,56,220,233
+	movups	xmm0,XMMWORD[32+rbp]
+	lea	rdi,[96+rdi]
+	pxor	xmm7,xmm8
+
+	pxor	xmm10,xmm9
+DB	102,15,56,220,241
+	pxor	xmm11,xmm9
+	movdqa	XMMWORD[rsp],xmm10
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[48+rbp]
+	pxor	xmm12,xmm9
+
+DB	102,15,56,220,208
+	pxor	xmm13,xmm9
+	movdqa	XMMWORD[16+rsp],xmm11
+DB	102,15,56,220,216
+	pxor	xmm14,xmm9
+	movdqa	XMMWORD[32+rsp],xmm12
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	pxor	xmm8,xmm9
+	movdqa	XMMWORD[64+rsp],xmm14
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+	movups	xmm0,XMMWORD[64+rbp]
+	movdqa	XMMWORD[80+rsp],xmm8
+	pshufd	xmm9,xmm15,0x5f
+	jmp	NEAR $L$xts_enc_loop6
+ALIGN	32
+$L$xts_enc_loop6:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[((-64))+rax*1+rcx]
+	add	rax,32
+
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+	movups	xmm0,XMMWORD[((-80))+rax*1+rcx]
+	jnz	NEAR $L$xts_enc_loop6
+
+	movdqa	xmm8,XMMWORD[r8]
+	movdqa	xmm14,xmm9
+	paddd	xmm9,xmm9
+DB	102,15,56,220,209
+	paddq	xmm15,xmm15
+	psrad	xmm14,31
+DB	102,15,56,220,217
+	pand	xmm14,xmm8
+	movups	xmm10,XMMWORD[rbp]
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+	pxor	xmm15,xmm14
+	movaps	xmm11,xmm10
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[((-64))+rcx]
+
+	movdqa	xmm14,xmm9
+DB	102,15,56,220,208
+	paddd	xmm9,xmm9
+	pxor	xmm10,xmm15
+DB	102,15,56,220,216
+	psrad	xmm14,31
+	paddq	xmm15,xmm15
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	pand	xmm14,xmm8
+	movaps	xmm12,xmm11
+DB	102,15,56,220,240
+	pxor	xmm15,xmm14
+	movdqa	xmm14,xmm9
+DB	102,15,56,220,248
+	movups	xmm0,XMMWORD[((-48))+rcx]
+
+	paddd	xmm9,xmm9
+DB	102,15,56,220,209
+	pxor	xmm11,xmm15
+	psrad	xmm14,31
+DB	102,15,56,220,217
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	movdqa	XMMWORD[48+rsp],xmm13
+	pxor	xmm15,xmm14
+DB	102,15,56,220,241
+	movaps	xmm13,xmm12
+	movdqa	xmm14,xmm9
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[((-32))+rcx]
+
+	paddd	xmm9,xmm9
+DB	102,15,56,220,208
+	pxor	xmm12,xmm15
+	psrad	xmm14,31
+DB	102,15,56,220,216
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+	pxor	xmm15,xmm14
+	movaps	xmm14,xmm13
+DB	102,15,56,220,248
+
+	movdqa	xmm0,xmm9
+	paddd	xmm9,xmm9
+DB	102,15,56,220,209
+	pxor	xmm13,xmm15
+	psrad	xmm0,31
+DB	102,15,56,220,217
+	paddq	xmm15,xmm15
+	pand	xmm0,xmm8
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	pxor	xmm15,xmm0
+	movups	xmm0,XMMWORD[rbp]
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[16+rbp]
+
+	pxor	xmm14,xmm15
+DB	102,15,56,221,84,36,0
+	psrad	xmm9,31
+	paddq	xmm15,xmm15
+DB	102,15,56,221,92,36,16
+DB	102,15,56,221,100,36,32
+	pand	xmm9,xmm8
+	mov	rax,r10
+DB	102,15,56,221,108,36,48
+DB	102,15,56,221,116,36,64
+DB	102,15,56,221,124,36,80
+	pxor	xmm15,xmm9
+
+	lea	rsi,[96+rsi]
+	movups	XMMWORD[(-96)+rsi],xmm2
+	movups	XMMWORD[(-80)+rsi],xmm3
+	movups	XMMWORD[(-64)+rsi],xmm4
+	movups	XMMWORD[(-48)+rsi],xmm5
+	movups	XMMWORD[(-32)+rsi],xmm6
+	movups	XMMWORD[(-16)+rsi],xmm7
+	sub	rdx,16*6
+	jnc	NEAR $L$xts_enc_grandloop
+
+	mov	eax,16+96
+	sub	eax,r10d
+	mov	rcx,rbp
+	shr	eax,4
+
+$L$xts_enc_short:
+
+	mov	r10d,eax
+	pxor	xmm10,xmm0
+	add	rdx,16*6
+	jz	NEAR $L$xts_enc_done
+
+	pxor	xmm11,xmm0
+	cmp	rdx,0x20
+	jb	NEAR $L$xts_enc_one
+	pxor	xmm12,xmm0
+	je	NEAR $L$xts_enc_two
+
+	pxor	xmm13,xmm0
+	cmp	rdx,0x40
+	jb	NEAR $L$xts_enc_three
+	pxor	xmm14,xmm0
+	je	NEAR $L$xts_enc_four
+
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqu	xmm4,XMMWORD[32+rdi]
+	pxor	xmm2,xmm10
+	movdqu	xmm5,XMMWORD[48+rdi]
+	pxor	xmm3,xmm11
+	movdqu	xmm6,XMMWORD[64+rdi]
+	lea	rdi,[80+rdi]
+	pxor	xmm4,xmm12
+	pxor	xmm5,xmm13
+	pxor	xmm6,xmm14
+	pxor	xmm7,xmm7
+
+	call	_aesni_encrypt6
+
+	xorps	xmm2,xmm10
+	movdqa	xmm10,xmm15
+	xorps	xmm3,xmm11
+	xorps	xmm4,xmm12
+	movdqu	XMMWORD[rsi],xmm2
+	xorps	xmm5,xmm13
+	movdqu	XMMWORD[16+rsi],xmm3
+	xorps	xmm6,xmm14
+	movdqu	XMMWORD[32+rsi],xmm4
+	movdqu	XMMWORD[48+rsi],xmm5
+	movdqu	XMMWORD[64+rsi],xmm6
+	lea	rsi,[80+rsi]
+	jmp	NEAR $L$xts_enc_done
+
+ALIGN	16
+$L$xts_enc_one:
+	movups	xmm2,XMMWORD[rdi]
+	lea	rdi,[16+rdi]
+	xorps	xmm2,xmm10
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_enc1_9:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_enc1_9
+DB	102,15,56,221,209
+	xorps	xmm2,xmm10
+	movdqa	xmm10,xmm11
+	movups	XMMWORD[rsi],xmm2
+	lea	rsi,[16+rsi]
+	jmp	NEAR $L$xts_enc_done
+
+ALIGN	16
+$L$xts_enc_two:
+	movups	xmm2,XMMWORD[rdi]
+	movups	xmm3,XMMWORD[16+rdi]
+	lea	rdi,[32+rdi]
+	xorps	xmm2,xmm10
+	xorps	xmm3,xmm11
+
+	call	_aesni_encrypt2
+
+	xorps	xmm2,xmm10
+	movdqa	xmm10,xmm12
+	xorps	xmm3,xmm11
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	lea	rsi,[32+rsi]
+	jmp	NEAR $L$xts_enc_done
+
+ALIGN	16
+$L$xts_enc_three:
+	movups	xmm2,XMMWORD[rdi]
+	movups	xmm3,XMMWORD[16+rdi]
+	movups	xmm4,XMMWORD[32+rdi]
+	lea	rdi,[48+rdi]
+	xorps	xmm2,xmm10
+	xorps	xmm3,xmm11
+	xorps	xmm4,xmm12
+
+	call	_aesni_encrypt3
+
+	xorps	xmm2,xmm10
+	movdqa	xmm10,xmm13
+	xorps	xmm3,xmm11
+	xorps	xmm4,xmm12
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	lea	rsi,[48+rsi]
+	jmp	NEAR $L$xts_enc_done
+
+ALIGN	16
+$L$xts_enc_four:
+	movups	xmm2,XMMWORD[rdi]
+	movups	xmm3,XMMWORD[16+rdi]
+	movups	xmm4,XMMWORD[32+rdi]
+	xorps	xmm2,xmm10
+	movups	xmm5,XMMWORD[48+rdi]
+	lea	rdi,[64+rdi]
+	xorps	xmm3,xmm11
+	xorps	xmm4,xmm12
+	xorps	xmm5,xmm13
+
+	call	_aesni_encrypt4
+
+	pxor	xmm2,xmm10
+	movdqa	xmm10,xmm14
+	pxor	xmm3,xmm11
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[16+rsi],xmm3
+	movdqu	XMMWORD[32+rsi],xmm4
+	movdqu	XMMWORD[48+rsi],xmm5
+	lea	rsi,[64+rsi]
+	jmp	NEAR $L$xts_enc_done
+
+ALIGN	16
+$L$xts_enc_done:
+	and	r9,15
+	jz	NEAR $L$xts_enc_ret
+	mov	rdx,r9
+
+$L$xts_enc_steal:
+	movzx	eax,BYTE[rdi]
+	movzx	ecx,BYTE[((-16))+rsi]
+	lea	rdi,[1+rdi]
+	mov	BYTE[((-16))+rsi],al
+	mov	BYTE[rsi],cl
+	lea	rsi,[1+rsi]
+	sub	rdx,1
+	jnz	NEAR $L$xts_enc_steal
+
+	sub	rsi,r9
+	mov	rcx,rbp
+	mov	eax,r10d
+
+	movups	xmm2,XMMWORD[((-16))+rsi]
+	xorps	xmm2,xmm10
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_enc1_10:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_enc1_10
+DB	102,15,56,221,209
+	xorps	xmm2,xmm10
+	movups	XMMWORD[(-16)+rsi],xmm2
+
+$L$xts_enc_ret:
+	xorps	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movaps	xmm6,XMMWORD[((-168))+r11]
+	movaps	XMMWORD[(-168)+r11],xmm0
+	movaps	xmm7,XMMWORD[((-152))+r11]
+	movaps	XMMWORD[(-152)+r11],xmm0
+	movaps	xmm8,XMMWORD[((-136))+r11]
+	movaps	XMMWORD[(-136)+r11],xmm0
+	movaps	xmm9,XMMWORD[((-120))+r11]
+	movaps	XMMWORD[(-120)+r11],xmm0
+	movaps	xmm10,XMMWORD[((-104))+r11]
+	movaps	XMMWORD[(-104)+r11],xmm0
+	movaps	xmm11,XMMWORD[((-88))+r11]
+	movaps	XMMWORD[(-88)+r11],xmm0
+	movaps	xmm12,XMMWORD[((-72))+r11]
+	movaps	XMMWORD[(-72)+r11],xmm0
+	movaps	xmm13,XMMWORD[((-56))+r11]
+	movaps	XMMWORD[(-56)+r11],xmm0
+	movaps	xmm14,XMMWORD[((-40))+r11]
+	movaps	XMMWORD[(-40)+r11],xmm0
+	movaps	xmm15,XMMWORD[((-24))+r11]
+	movaps	XMMWORD[(-24)+r11],xmm0
+	movaps	XMMWORD[rsp],xmm0
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	XMMWORD[48+rsp],xmm0
+	movaps	XMMWORD[64+rsp],xmm0
+	movaps	XMMWORD[80+rsp],xmm0
+	movaps	XMMWORD[96+rsp],xmm0
+	mov	rbp,QWORD[((-8))+r11]
+	lea	rsp,[r11]
+$L$xts_enc_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_aesni_xts_encrypt:
+global	aesni_xts_decrypt
+
+ALIGN	16
+aesni_xts_decrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_xts_decrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+	lea	r11,[rsp]
+	push	rbp
+	sub	rsp,272
+	and	rsp,-16
+	movaps	XMMWORD[(-168)+r11],xmm6
+	movaps	XMMWORD[(-152)+r11],xmm7
+	movaps	XMMWORD[(-136)+r11],xmm8
+	movaps	XMMWORD[(-120)+r11],xmm9
+	movaps	XMMWORD[(-104)+r11],xmm10
+	movaps	XMMWORD[(-88)+r11],xmm11
+	movaps	XMMWORD[(-72)+r11],xmm12
+	movaps	XMMWORD[(-56)+r11],xmm13
+	movaps	XMMWORD[(-40)+r11],xmm14
+	movaps	XMMWORD[(-24)+r11],xmm15
+$L$xts_dec_body:
+	movups	xmm2,XMMWORD[r9]
+	mov	eax,DWORD[240+r8]
+	mov	r10d,DWORD[240+rcx]
+	movups	xmm0,XMMWORD[r8]
+	movups	xmm1,XMMWORD[16+r8]
+	lea	r8,[32+r8]
+	xorps	xmm2,xmm0
+$L$oop_enc1_11:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[r8]
+	lea	r8,[16+r8]
+	jnz	NEAR $L$oop_enc1_11
+DB	102,15,56,221,209
+	xor	eax,eax
+	test	rdx,15
+	setnz	al
+	shl	rax,4
+	sub	rdx,rax
+
+	movups	xmm0,XMMWORD[rcx]
+	mov	rbp,rcx
+	mov	eax,r10d
+	shl	r10d,4
+	mov	r9,rdx
+	and	rdx,-16
+
+	movups	xmm1,XMMWORD[16+r10*1+rcx]
+
+	movdqa	xmm8,XMMWORD[$L$xts_magic]
+	movdqa	xmm15,xmm2
+	pshufd	xmm9,xmm2,0x5f
+	pxor	xmm1,xmm0
+	movdqa	xmm14,xmm9
+	paddd	xmm9,xmm9
+	movdqa	xmm10,xmm15
+	psrad	xmm14,31
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+	pxor	xmm10,xmm0
+	pxor	xmm15,xmm14
+	movdqa	xmm14,xmm9
+	paddd	xmm9,xmm9
+	movdqa	xmm11,xmm15
+	psrad	xmm14,31
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+	pxor	xmm11,xmm0
+	pxor	xmm15,xmm14
+	movdqa	xmm14,xmm9
+	paddd	xmm9,xmm9
+	movdqa	xmm12,xmm15
+	psrad	xmm14,31
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+	pxor	xmm12,xmm0
+	pxor	xmm15,xmm14
+	movdqa	xmm14,xmm9
+	paddd	xmm9,xmm9
+	movdqa	xmm13,xmm15
+	psrad	xmm14,31
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+	pxor	xmm13,xmm0
+	pxor	xmm15,xmm14
+	movdqa	xmm14,xmm15
+	psrad	xmm9,31
+	paddq	xmm15,xmm15
+	pand	xmm9,xmm8
+	pxor	xmm14,xmm0
+	pxor	xmm15,xmm9
+	movaps	XMMWORD[96+rsp],xmm1
+
+	sub	rdx,16*6
+	jc	NEAR $L$xts_dec_short
+
+	mov	eax,16+96
+	lea	rcx,[32+r10*1+rbp]
+	sub	rax,r10
+	movups	xmm1,XMMWORD[16+rbp]
+	mov	r10,rax
+	lea	r8,[$L$xts_magic]
+	jmp	NEAR $L$xts_dec_grandloop
+
+ALIGN	32
+$L$xts_dec_grandloop:
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqa	xmm8,xmm0
+	movdqu	xmm3,XMMWORD[16+rdi]
+	pxor	xmm2,xmm10
+	movdqu	xmm4,XMMWORD[32+rdi]
+	pxor	xmm3,xmm11
+DB	102,15,56,222,209
+	movdqu	xmm5,XMMWORD[48+rdi]
+	pxor	xmm4,xmm12
+DB	102,15,56,222,217
+	movdqu	xmm6,XMMWORD[64+rdi]
+	pxor	xmm5,xmm13
+DB	102,15,56,222,225
+	movdqu	xmm7,XMMWORD[80+rdi]
+	pxor	xmm8,xmm15
+	movdqa	xmm9,XMMWORD[96+rsp]
+	pxor	xmm6,xmm14
+DB	102,15,56,222,233
+	movups	xmm0,XMMWORD[32+rbp]
+	lea	rdi,[96+rdi]
+	pxor	xmm7,xmm8
+
+	pxor	xmm10,xmm9
+DB	102,15,56,222,241
+	pxor	xmm11,xmm9
+	movdqa	XMMWORD[rsp],xmm10
+DB	102,15,56,222,249
+	movups	xmm1,XMMWORD[48+rbp]
+	pxor	xmm12,xmm9
+
+DB	102,15,56,222,208
+	pxor	xmm13,xmm9
+	movdqa	XMMWORD[16+rsp],xmm11
+DB	102,15,56,222,216
+	pxor	xmm14,xmm9
+	movdqa	XMMWORD[32+rsp],xmm12
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+	pxor	xmm8,xmm9
+	movdqa	XMMWORD[64+rsp],xmm14
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+	movups	xmm0,XMMWORD[64+rbp]
+	movdqa	XMMWORD[80+rsp],xmm8
+	pshufd	xmm9,xmm15,0x5f
+	jmp	NEAR $L$xts_dec_loop6
+ALIGN	32
+$L$xts_dec_loop6:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+	movups	xmm1,XMMWORD[((-64))+rax*1+rcx]
+	add	rax,32
+
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+	movups	xmm0,XMMWORD[((-80))+rax*1+rcx]
+	jnz	NEAR $L$xts_dec_loop6
+
+	movdqa	xmm8,XMMWORD[r8]
+	movdqa	xmm14,xmm9
+	paddd	xmm9,xmm9
+DB	102,15,56,222,209
+	paddq	xmm15,xmm15
+	psrad	xmm14,31
+DB	102,15,56,222,217
+	pand	xmm14,xmm8
+	movups	xmm10,XMMWORD[rbp]
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+	pxor	xmm15,xmm14
+	movaps	xmm11,xmm10
+DB	102,15,56,222,249
+	movups	xmm1,XMMWORD[((-64))+rcx]
+
+	movdqa	xmm14,xmm9
+DB	102,15,56,222,208
+	paddd	xmm9,xmm9
+	pxor	xmm10,xmm15
+DB	102,15,56,222,216
+	psrad	xmm14,31
+	paddq	xmm15,xmm15
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+	pand	xmm14,xmm8
+	movaps	xmm12,xmm11
+DB	102,15,56,222,240
+	pxor	xmm15,xmm14
+	movdqa	xmm14,xmm9
+DB	102,15,56,222,248
+	movups	xmm0,XMMWORD[((-48))+rcx]
+
+	paddd	xmm9,xmm9
+DB	102,15,56,222,209
+	pxor	xmm11,xmm15
+	psrad	xmm14,31
+DB	102,15,56,222,217
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+	movdqa	XMMWORD[48+rsp],xmm13
+	pxor	xmm15,xmm14
+DB	102,15,56,222,241
+	movaps	xmm13,xmm12
+	movdqa	xmm14,xmm9
+DB	102,15,56,222,249
+	movups	xmm1,XMMWORD[((-32))+rcx]
+
+	paddd	xmm9,xmm9
+DB	102,15,56,222,208
+	pxor	xmm12,xmm15
+	psrad	xmm14,31
+DB	102,15,56,222,216
+	paddq	xmm15,xmm15
+	pand	xmm14,xmm8
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+	pxor	xmm15,xmm14
+	movaps	xmm14,xmm13
+DB	102,15,56,222,248
+
+	movdqa	xmm0,xmm9
+	paddd	xmm9,xmm9
+DB	102,15,56,222,209
+	pxor	xmm13,xmm15
+	psrad	xmm0,31
+DB	102,15,56,222,217
+	paddq	xmm15,xmm15
+	pand	xmm0,xmm8
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+	pxor	xmm15,xmm0
+	movups	xmm0,XMMWORD[rbp]
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+	movups	xmm1,XMMWORD[16+rbp]
+
+	pxor	xmm14,xmm15
+DB	102,15,56,223,84,36,0
+	psrad	xmm9,31
+	paddq	xmm15,xmm15
+DB	102,15,56,223,92,36,16
+DB	102,15,56,223,100,36,32
+	pand	xmm9,xmm8
+	mov	rax,r10
+DB	102,15,56,223,108,36,48
+DB	102,15,56,223,116,36,64
+DB	102,15,56,223,124,36,80
+	pxor	xmm15,xmm9
+
+	lea	rsi,[96+rsi]
+	movups	XMMWORD[(-96)+rsi],xmm2
+	movups	XMMWORD[(-80)+rsi],xmm3
+	movups	XMMWORD[(-64)+rsi],xmm4
+	movups	XMMWORD[(-48)+rsi],xmm5
+	movups	XMMWORD[(-32)+rsi],xmm6
+	movups	XMMWORD[(-16)+rsi],xmm7
+	sub	rdx,16*6
+	jnc	NEAR $L$xts_dec_grandloop
+
+	mov	eax,16+96
+	sub	eax,r10d
+	mov	rcx,rbp
+	shr	eax,4
+
+$L$xts_dec_short:
+
+	mov	r10d,eax
+	pxor	xmm10,xmm0
+	pxor	xmm11,xmm0
+	add	rdx,16*6
+	jz	NEAR $L$xts_dec_done
+
+	pxor	xmm12,xmm0
+	cmp	rdx,0x20
+	jb	NEAR $L$xts_dec_one
+	pxor	xmm13,xmm0
+	je	NEAR $L$xts_dec_two
+
+	pxor	xmm14,xmm0
+	cmp	rdx,0x40
+	jb	NEAR $L$xts_dec_three
+	je	NEAR $L$xts_dec_four
+
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqu	xmm4,XMMWORD[32+rdi]
+	pxor	xmm2,xmm10
+	movdqu	xmm5,XMMWORD[48+rdi]
+	pxor	xmm3,xmm11
+	movdqu	xmm6,XMMWORD[64+rdi]
+	lea	rdi,[80+rdi]
+	pxor	xmm4,xmm12
+	pxor	xmm5,xmm13
+	pxor	xmm6,xmm14
+
+	call	_aesni_decrypt6
+
+	xorps	xmm2,xmm10
+	xorps	xmm3,xmm11
+	xorps	xmm4,xmm12
+	movdqu	XMMWORD[rsi],xmm2
+	xorps	xmm5,xmm13
+	movdqu	XMMWORD[16+rsi],xmm3
+	xorps	xmm6,xmm14
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm14,xmm14
+	movdqu	XMMWORD[48+rsi],xmm5
+	pcmpgtd	xmm14,xmm15
+	movdqu	XMMWORD[64+rsi],xmm6
+	lea	rsi,[80+rsi]
+	pshufd	xmm11,xmm14,0x13
+	and	r9,15
+	jz	NEAR $L$xts_dec_ret
+
+	movdqa	xmm10,xmm15
+	paddq	xmm15,xmm15
+	pand	xmm11,xmm8
+	pxor	xmm11,xmm15
+	jmp	NEAR $L$xts_dec_done2
+
+ALIGN	16
+$L$xts_dec_one:
+	movups	xmm2,XMMWORD[rdi]
+	lea	rdi,[16+rdi]
+	xorps	xmm2,xmm10
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_dec1_12:
+DB	102,15,56,222,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_dec1_12
+DB	102,15,56,223,209
+	xorps	xmm2,xmm10
+	movdqa	xmm10,xmm11
+	movups	XMMWORD[rsi],xmm2
+	movdqa	xmm11,xmm12
+	lea	rsi,[16+rsi]
+	jmp	NEAR $L$xts_dec_done
+
+ALIGN	16
+$L$xts_dec_two:
+	movups	xmm2,XMMWORD[rdi]
+	movups	xmm3,XMMWORD[16+rdi]
+	lea	rdi,[32+rdi]
+	xorps	xmm2,xmm10
+	xorps	xmm3,xmm11
+
+	call	_aesni_decrypt2
+
+	xorps	xmm2,xmm10
+	movdqa	xmm10,xmm12
+	xorps	xmm3,xmm11
+	movdqa	xmm11,xmm13
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	lea	rsi,[32+rsi]
+	jmp	NEAR $L$xts_dec_done
+
+ALIGN	16
+$L$xts_dec_three:
+	movups	xmm2,XMMWORD[rdi]
+	movups	xmm3,XMMWORD[16+rdi]
+	movups	xmm4,XMMWORD[32+rdi]
+	lea	rdi,[48+rdi]
+	xorps	xmm2,xmm10
+	xorps	xmm3,xmm11
+	xorps	xmm4,xmm12
+
+	call	_aesni_decrypt3
+
+	xorps	xmm2,xmm10
+	movdqa	xmm10,xmm13
+	xorps	xmm3,xmm11
+	movdqa	xmm11,xmm14
+	xorps	xmm4,xmm12
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	lea	rsi,[48+rsi]
+	jmp	NEAR $L$xts_dec_done
+
+ALIGN	16
+$L$xts_dec_four:
+	movups	xmm2,XMMWORD[rdi]
+	movups	xmm3,XMMWORD[16+rdi]
+	movups	xmm4,XMMWORD[32+rdi]
+	xorps	xmm2,xmm10
+	movups	xmm5,XMMWORD[48+rdi]
+	lea	rdi,[64+rdi]
+	xorps	xmm3,xmm11
+	xorps	xmm4,xmm12
+	xorps	xmm5,xmm13
+
+	call	_aesni_decrypt4
+
+	pxor	xmm2,xmm10
+	movdqa	xmm10,xmm14
+	pxor	xmm3,xmm11
+	movdqa	xmm11,xmm15
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[16+rsi],xmm3
+	movdqu	XMMWORD[32+rsi],xmm4
+	movdqu	XMMWORD[48+rsi],xmm5
+	lea	rsi,[64+rsi]
+	jmp	NEAR $L$xts_dec_done
+
+ALIGN	16
+$L$xts_dec_done:
+	and	r9,15
+	jz	NEAR $L$xts_dec_ret
+$L$xts_dec_done2:
+	mov	rdx,r9
+	mov	rcx,rbp
+	mov	eax,r10d
+
+	movups	xmm2,XMMWORD[rdi]
+	xorps	xmm2,xmm11
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_dec1_13:
+DB	102,15,56,222,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_dec1_13
+DB	102,15,56,223,209
+	xorps	xmm2,xmm11
+	movups	XMMWORD[rsi],xmm2
+
+$L$xts_dec_steal:
+	movzx	eax,BYTE[16+rdi]
+	movzx	ecx,BYTE[rsi]
+	lea	rdi,[1+rdi]
+	mov	BYTE[rsi],al
+	mov	BYTE[16+rsi],cl
+	lea	rsi,[1+rsi]
+	sub	rdx,1
+	jnz	NEAR $L$xts_dec_steal
+
+	sub	rsi,r9
+	mov	rcx,rbp
+	mov	eax,r10d
+
+	movups	xmm2,XMMWORD[rsi]
+	xorps	xmm2,xmm10
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_dec1_14:
+DB	102,15,56,222,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_dec1_14
+DB	102,15,56,223,209
+	xorps	xmm2,xmm10
+	movups	XMMWORD[rsi],xmm2
+
+$L$xts_dec_ret:
+	xorps	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movaps	xmm6,XMMWORD[((-168))+r11]
+	movaps	XMMWORD[(-168)+r11],xmm0
+	movaps	xmm7,XMMWORD[((-152))+r11]
+	movaps	XMMWORD[(-152)+r11],xmm0
+	movaps	xmm8,XMMWORD[((-136))+r11]
+	movaps	XMMWORD[(-136)+r11],xmm0
+	movaps	xmm9,XMMWORD[((-120))+r11]
+	movaps	XMMWORD[(-120)+r11],xmm0
+	movaps	xmm10,XMMWORD[((-104))+r11]
+	movaps	XMMWORD[(-104)+r11],xmm0
+	movaps	xmm11,XMMWORD[((-88))+r11]
+	movaps	XMMWORD[(-88)+r11],xmm0
+	movaps	xmm12,XMMWORD[((-72))+r11]
+	movaps	XMMWORD[(-72)+r11],xmm0
+	movaps	xmm13,XMMWORD[((-56))+r11]
+	movaps	XMMWORD[(-56)+r11],xmm0
+	movaps	xmm14,XMMWORD[((-40))+r11]
+	movaps	XMMWORD[(-40)+r11],xmm0
+	movaps	xmm15,XMMWORD[((-24))+r11]
+	movaps	XMMWORD[(-24)+r11],xmm0
+	movaps	XMMWORD[rsp],xmm0
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	XMMWORD[48+rsp],xmm0
+	movaps	XMMWORD[64+rsp],xmm0
+	movaps	XMMWORD[80+rsp],xmm0
+	movaps	XMMWORD[96+rsp],xmm0
+	mov	rbp,QWORD[((-8))+r11]
+	lea	rsp,[r11]
+$L$xts_dec_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_aesni_xts_decrypt:
+global	aesni_ocb_encrypt
+
+ALIGN	32
+aesni_ocb_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_ocb_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+	lea	rax,[rsp]
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	lea	rsp,[((-160))+rsp]
+	movaps	XMMWORD[rsp],xmm6
+	movaps	XMMWORD[16+rsp],xmm7
+	movaps	XMMWORD[32+rsp],xmm8
+	movaps	XMMWORD[48+rsp],xmm9
+	movaps	XMMWORD[64+rsp],xmm10
+	movaps	XMMWORD[80+rsp],xmm11
+	movaps	XMMWORD[96+rsp],xmm12
+	movaps	XMMWORD[112+rsp],xmm13
+	movaps	XMMWORD[128+rsp],xmm14
+	movaps	XMMWORD[144+rsp],xmm15
+$L$ocb_enc_body:
+	mov	rbx,QWORD[56+rax]
+	mov	rbp,QWORD[((56+8))+rax]
+
+	mov	r10d,DWORD[240+rcx]
+	mov	r11,rcx
+	shl	r10d,4
+	movups	xmm9,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+r10*1+rcx]
+
+	movdqu	xmm15,XMMWORD[r9]
+	pxor	xmm9,xmm1
+	pxor	xmm15,xmm1
+
+	mov	eax,16+32
+	lea	rcx,[32+r10*1+r11]
+	movups	xmm1,XMMWORD[16+r11]
+	sub	rax,r10
+	mov	r10,rax
+
+	movdqu	xmm10,XMMWORD[rbx]
+	movdqu	xmm8,XMMWORD[rbp]
+
+	test	r8,1
+	jnz	NEAR $L$ocb_enc_odd
+
+	bsf	r12,r8
+	add	r8,1
+	shl	r12,4
+	movdqu	xmm7,XMMWORD[r12*1+rbx]
+	movdqu	xmm2,XMMWORD[rdi]
+	lea	rdi,[16+rdi]
+
+	call	__ocb_encrypt1
+
+	movdqa	xmm15,xmm7
+	movups	XMMWORD[rsi],xmm2
+	lea	rsi,[16+rsi]
+	sub	rdx,1
+	jz	NEAR $L$ocb_enc_done
+
+$L$ocb_enc_odd:
+	lea	r12,[1+r8]
+	lea	r13,[3+r8]
+	lea	r14,[5+r8]
+	lea	r8,[6+r8]
+	bsf	r12,r12
+	bsf	r13,r13
+	bsf	r14,r14
+	shl	r12,4
+	shl	r13,4
+	shl	r14,4
+
+	sub	rdx,6
+	jc	NEAR $L$ocb_enc_short
+	jmp	NEAR $L$ocb_enc_grandloop
+
+ALIGN	32
+$L$ocb_enc_grandloop:
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movdqu	xmm7,XMMWORD[80+rdi]
+	lea	rdi,[96+rdi]
+
+	call	__ocb_encrypt6
+
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	lea	rsi,[96+rsi]
+	sub	rdx,6
+	jnc	NEAR $L$ocb_enc_grandloop
+
+$L$ocb_enc_short:
+	add	rdx,6
+	jz	NEAR $L$ocb_enc_done
+
+	movdqu	xmm2,XMMWORD[rdi]
+	cmp	rdx,2
+	jb	NEAR $L$ocb_enc_one
+	movdqu	xmm3,XMMWORD[16+rdi]
+	je	NEAR $L$ocb_enc_two
+
+	movdqu	xmm4,XMMWORD[32+rdi]
+	cmp	rdx,4
+	jb	NEAR $L$ocb_enc_three
+	movdqu	xmm5,XMMWORD[48+rdi]
+	je	NEAR $L$ocb_enc_four
+
+	movdqu	xmm6,XMMWORD[64+rdi]
+	pxor	xmm7,xmm7
+
+	call	__ocb_encrypt6
+
+	movdqa	xmm15,xmm14
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	movups	XMMWORD[64+rsi],xmm6
+
+	jmp	NEAR $L$ocb_enc_done
+
+ALIGN	16
+$L$ocb_enc_one:
+	movdqa	xmm7,xmm10
+
+	call	__ocb_encrypt1
+
+	movdqa	xmm15,xmm7
+	movups	XMMWORD[rsi],xmm2
+	jmp	NEAR $L$ocb_enc_done
+
+ALIGN	16
+$L$ocb_enc_two:
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+
+	call	__ocb_encrypt4
+
+	movdqa	xmm15,xmm11
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+
+	jmp	NEAR $L$ocb_enc_done
+
+ALIGN	16
+$L$ocb_enc_three:
+	pxor	xmm5,xmm5
+
+	call	__ocb_encrypt4
+
+	movdqa	xmm15,xmm12
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+
+	jmp	NEAR $L$ocb_enc_done
+
+ALIGN	16
+$L$ocb_enc_four:
+	call	__ocb_encrypt4
+
+	movdqa	xmm15,xmm13
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+
+$L$ocb_enc_done:
+	pxor	xmm15,xmm0
+	movdqu	XMMWORD[rbp],xmm8
+	movdqu	XMMWORD[r9],xmm15
+
+	xorps	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	XMMWORD[rsp],xmm0
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	XMMWORD[48+rsp],xmm0
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	XMMWORD[64+rsp],xmm0
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	XMMWORD[80+rsp],xmm0
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	XMMWORD[96+rsp],xmm0
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	XMMWORD[112+rsp],xmm0
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	XMMWORD[128+rsp],xmm0
+	movaps	xmm15,XMMWORD[144+rsp]
+	movaps	XMMWORD[144+rsp],xmm0
+	lea	rax,[((160+40))+rsp]
+$L$ocb_enc_pop:
+	mov	r14,QWORD[((-40))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	rbx,QWORD[((-8))+rax]
+	lea	rsp,[rax]
+$L$ocb_enc_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_aesni_ocb_encrypt:
+
+
+ALIGN	32
+__ocb_encrypt6:
+	pxor	xmm15,xmm9
+	movdqu	xmm11,XMMWORD[r12*1+rbx]
+	movdqa	xmm12,xmm10
+	movdqu	xmm13,XMMWORD[r13*1+rbx]
+	movdqa	xmm14,xmm10
+	pxor	xmm10,xmm15
+	movdqu	xmm15,XMMWORD[r14*1+rbx]
+	pxor	xmm11,xmm10
+	pxor	xmm8,xmm2
+	pxor	xmm2,xmm10
+	pxor	xmm12,xmm11
+	pxor	xmm8,xmm3
+	pxor	xmm3,xmm11
+	pxor	xmm13,xmm12
+	pxor	xmm8,xmm4
+	pxor	xmm4,xmm12
+	pxor	xmm14,xmm13
+	pxor	xmm8,xmm5
+	pxor	xmm5,xmm13
+	pxor	xmm15,xmm14
+	pxor	xmm8,xmm6
+	pxor	xmm6,xmm14
+	pxor	xmm8,xmm7
+	pxor	xmm7,xmm15
+	movups	xmm0,XMMWORD[32+r11]
+
+	lea	r12,[1+r8]
+	lea	r13,[3+r8]
+	lea	r14,[5+r8]
+	add	r8,6
+	pxor	xmm10,xmm9
+	bsf	r12,r12
+	bsf	r13,r13
+	bsf	r14,r14
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	pxor	xmm11,xmm9
+	pxor	xmm12,xmm9
+DB	102,15,56,220,241
+	pxor	xmm13,xmm9
+	pxor	xmm14,xmm9
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[48+r11]
+	pxor	xmm15,xmm9
+
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+	movups	xmm0,XMMWORD[64+r11]
+	shl	r12,4
+	shl	r13,4
+	jmp	NEAR $L$ocb_enc_loop6
+
+ALIGN	32
+$L$ocb_enc_loop6:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$ocb_enc_loop6
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[16+r11]
+	shl	r14,4
+
+DB	102,65,15,56,221,210
+	movdqu	xmm10,XMMWORD[rbx]
+	mov	rax,r10
+DB	102,65,15,56,221,219
+DB	102,65,15,56,221,228
+DB	102,65,15,56,221,237
+DB	102,65,15,56,221,246
+DB	102,65,15,56,221,255
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+__ocb_encrypt4:
+	pxor	xmm15,xmm9
+	movdqu	xmm11,XMMWORD[r12*1+rbx]
+	movdqa	xmm12,xmm10
+	movdqu	xmm13,XMMWORD[r13*1+rbx]
+	pxor	xmm10,xmm15
+	pxor	xmm11,xmm10
+	pxor	xmm8,xmm2
+	pxor	xmm2,xmm10
+	pxor	xmm12,xmm11
+	pxor	xmm8,xmm3
+	pxor	xmm3,xmm11
+	pxor	xmm13,xmm12
+	pxor	xmm8,xmm4
+	pxor	xmm4,xmm12
+	pxor	xmm8,xmm5
+	pxor	xmm5,xmm13
+	movups	xmm0,XMMWORD[32+r11]
+
+	pxor	xmm10,xmm9
+	pxor	xmm11,xmm9
+	pxor	xmm12,xmm9
+	pxor	xmm13,xmm9
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	movups	xmm1,XMMWORD[48+r11]
+
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	movups	xmm0,XMMWORD[64+r11]
+	jmp	NEAR $L$ocb_enc_loop4
+
+ALIGN	32
+$L$ocb_enc_loop4:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$ocb_enc_loop4
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	movups	xmm1,XMMWORD[16+r11]
+	mov	rax,r10
+
+DB	102,65,15,56,221,210
+DB	102,65,15,56,221,219
+DB	102,65,15,56,221,228
+DB	102,65,15,56,221,237
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+__ocb_encrypt1:
+	pxor	xmm7,xmm15
+	pxor	xmm7,xmm9
+	pxor	xmm8,xmm2
+	pxor	xmm2,xmm7
+	movups	xmm0,XMMWORD[32+r11]
+
+DB	102,15,56,220,209
+	movups	xmm1,XMMWORD[48+r11]
+	pxor	xmm7,xmm9
+
+DB	102,15,56,220,208
+	movups	xmm0,XMMWORD[64+r11]
+	jmp	NEAR $L$ocb_enc_loop1
+
+ALIGN	32
+$L$ocb_enc_loop1:
+DB	102,15,56,220,209
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+
+DB	102,15,56,220,208
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$ocb_enc_loop1
+
+DB	102,15,56,220,209
+	movups	xmm1,XMMWORD[16+r11]
+	mov	rax,r10
+
+DB	102,15,56,221,215
+	DB	0F3h,0C3h		;repret
+
+
+global	aesni_ocb_decrypt
+
+ALIGN	32
+aesni_ocb_decrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_ocb_decrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+	lea	rax,[rsp]
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	lea	rsp,[((-160))+rsp]
+	movaps	XMMWORD[rsp],xmm6
+	movaps	XMMWORD[16+rsp],xmm7
+	movaps	XMMWORD[32+rsp],xmm8
+	movaps	XMMWORD[48+rsp],xmm9
+	movaps	XMMWORD[64+rsp],xmm10
+	movaps	XMMWORD[80+rsp],xmm11
+	movaps	XMMWORD[96+rsp],xmm12
+	movaps	XMMWORD[112+rsp],xmm13
+	movaps	XMMWORD[128+rsp],xmm14
+	movaps	XMMWORD[144+rsp],xmm15
+$L$ocb_dec_body:
+	mov	rbx,QWORD[56+rax]
+	mov	rbp,QWORD[((56+8))+rax]
+
+	mov	r10d,DWORD[240+rcx]
+	mov	r11,rcx
+	shl	r10d,4
+	movups	xmm9,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+r10*1+rcx]
+
+	movdqu	xmm15,XMMWORD[r9]
+	pxor	xmm9,xmm1
+	pxor	xmm15,xmm1
+
+	mov	eax,16+32
+	lea	rcx,[32+r10*1+r11]
+	movups	xmm1,XMMWORD[16+r11]
+	sub	rax,r10
+	mov	r10,rax
+
+	movdqu	xmm10,XMMWORD[rbx]
+	movdqu	xmm8,XMMWORD[rbp]
+
+	test	r8,1
+	jnz	NEAR $L$ocb_dec_odd
+
+	bsf	r12,r8
+	add	r8,1
+	shl	r12,4
+	movdqu	xmm7,XMMWORD[r12*1+rbx]
+	movdqu	xmm2,XMMWORD[rdi]
+	lea	rdi,[16+rdi]
+
+	call	__ocb_decrypt1
+
+	movdqa	xmm15,xmm7
+	movups	XMMWORD[rsi],xmm2
+	xorps	xmm8,xmm2
+	lea	rsi,[16+rsi]
+	sub	rdx,1
+	jz	NEAR $L$ocb_dec_done
+
+$L$ocb_dec_odd:
+	lea	r12,[1+r8]
+	lea	r13,[3+r8]
+	lea	r14,[5+r8]
+	lea	r8,[6+r8]
+	bsf	r12,r12
+	bsf	r13,r13
+	bsf	r14,r14
+	shl	r12,4
+	shl	r13,4
+	shl	r14,4
+
+	sub	rdx,6
+	jc	NEAR $L$ocb_dec_short
+	jmp	NEAR $L$ocb_dec_grandloop
+
+ALIGN	32
+$L$ocb_dec_grandloop:
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movdqu	xmm7,XMMWORD[80+rdi]
+	lea	rdi,[96+rdi]
+
+	call	__ocb_decrypt6
+
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm8,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm8,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm8,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm8,xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	pxor	xmm8,xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	pxor	xmm8,xmm7
+	lea	rsi,[96+rsi]
+	sub	rdx,6
+	jnc	NEAR $L$ocb_dec_grandloop
+
+$L$ocb_dec_short:
+	add	rdx,6
+	jz	NEAR $L$ocb_dec_done
+
+	movdqu	xmm2,XMMWORD[rdi]
+	cmp	rdx,2
+	jb	NEAR $L$ocb_dec_one
+	movdqu	xmm3,XMMWORD[16+rdi]
+	je	NEAR $L$ocb_dec_two
+
+	movdqu	xmm4,XMMWORD[32+rdi]
+	cmp	rdx,4
+	jb	NEAR $L$ocb_dec_three
+	movdqu	xmm5,XMMWORD[48+rdi]
+	je	NEAR $L$ocb_dec_four
+
+	movdqu	xmm6,XMMWORD[64+rdi]
+	pxor	xmm7,xmm7
+
+	call	__ocb_decrypt6
+
+	movdqa	xmm15,xmm14
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm8,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm8,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm8,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm8,xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	pxor	xmm8,xmm6
+
+	jmp	NEAR $L$ocb_dec_done
+
+ALIGN	16
+$L$ocb_dec_one:
+	movdqa	xmm7,xmm10
+
+	call	__ocb_decrypt1
+
+	movdqa	xmm15,xmm7
+	movups	XMMWORD[rsi],xmm2
+	xorps	xmm8,xmm2
+	jmp	NEAR $L$ocb_dec_done
+
+ALIGN	16
+$L$ocb_dec_two:
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+
+	call	__ocb_decrypt4
+
+	movdqa	xmm15,xmm11
+	movups	XMMWORD[rsi],xmm2
+	xorps	xmm8,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	xorps	xmm8,xmm3
+
+	jmp	NEAR $L$ocb_dec_done
+
+ALIGN	16
+$L$ocb_dec_three:
+	pxor	xmm5,xmm5
+
+	call	__ocb_decrypt4
+
+	movdqa	xmm15,xmm12
+	movups	XMMWORD[rsi],xmm2
+	xorps	xmm8,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	xorps	xmm8,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	xorps	xmm8,xmm4
+
+	jmp	NEAR $L$ocb_dec_done
+
+ALIGN	16
+$L$ocb_dec_four:
+	call	__ocb_decrypt4
+
+	movdqa	xmm15,xmm13
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm8,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm8,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm8,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm8,xmm5
+
+$L$ocb_dec_done:
+	pxor	xmm15,xmm0
+	movdqu	XMMWORD[rbp],xmm8
+	movdqu	XMMWORD[r9],xmm15
+
+	xorps	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	XMMWORD[rsp],xmm0
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	XMMWORD[48+rsp],xmm0
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	XMMWORD[64+rsp],xmm0
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	XMMWORD[80+rsp],xmm0
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	XMMWORD[96+rsp],xmm0
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	XMMWORD[112+rsp],xmm0
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	XMMWORD[128+rsp],xmm0
+	movaps	xmm15,XMMWORD[144+rsp]
+	movaps	XMMWORD[144+rsp],xmm0
+	lea	rax,[((160+40))+rsp]
+$L$ocb_dec_pop:
+	mov	r14,QWORD[((-40))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	rbx,QWORD[((-8))+rax]
+	lea	rsp,[rax]
+$L$ocb_dec_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_aesni_ocb_decrypt:
+
+
+ALIGN	32
+__ocb_decrypt6:
+	pxor	xmm15,xmm9
+	movdqu	xmm11,XMMWORD[r12*1+rbx]
+	movdqa	xmm12,xmm10
+	movdqu	xmm13,XMMWORD[r13*1+rbx]
+	movdqa	xmm14,xmm10
+	pxor	xmm10,xmm15
+	movdqu	xmm15,XMMWORD[r14*1+rbx]
+	pxor	xmm11,xmm10
+	pxor	xmm2,xmm10
+	pxor	xmm12,xmm11
+	pxor	xmm3,xmm11
+	pxor	xmm13,xmm12
+	pxor	xmm4,xmm12
+	pxor	xmm14,xmm13
+	pxor	xmm5,xmm13
+	pxor	xmm15,xmm14
+	pxor	xmm6,xmm14
+	pxor	xmm7,xmm15
+	movups	xmm0,XMMWORD[32+r11]
+
+	lea	r12,[1+r8]
+	lea	r13,[3+r8]
+	lea	r14,[5+r8]
+	add	r8,6
+	pxor	xmm10,xmm9
+	bsf	r12,r12
+	bsf	r13,r13
+	bsf	r14,r14
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+	pxor	xmm11,xmm9
+	pxor	xmm12,xmm9
+DB	102,15,56,222,241
+	pxor	xmm13,xmm9
+	pxor	xmm14,xmm9
+DB	102,15,56,222,249
+	movups	xmm1,XMMWORD[48+r11]
+	pxor	xmm15,xmm9
+
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+	movups	xmm0,XMMWORD[64+r11]
+	shl	r12,4
+	shl	r13,4
+	jmp	NEAR $L$ocb_dec_loop6
+
+ALIGN	32
+$L$ocb_dec_loop6:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$ocb_dec_loop6
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+	movups	xmm1,XMMWORD[16+r11]
+	shl	r14,4
+
+DB	102,65,15,56,223,210
+	movdqu	xmm10,XMMWORD[rbx]
+	mov	rax,r10
+DB	102,65,15,56,223,219
+DB	102,65,15,56,223,228
+DB	102,65,15,56,223,237
+DB	102,65,15,56,223,246
+DB	102,65,15,56,223,255
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+__ocb_decrypt4:
+	pxor	xmm15,xmm9
+	movdqu	xmm11,XMMWORD[r12*1+rbx]
+	movdqa	xmm12,xmm10
+	movdqu	xmm13,XMMWORD[r13*1+rbx]
+	pxor	xmm10,xmm15
+	pxor	xmm11,xmm10
+	pxor	xmm2,xmm10
+	pxor	xmm12,xmm11
+	pxor	xmm3,xmm11
+	pxor	xmm13,xmm12
+	pxor	xmm4,xmm12
+	pxor	xmm5,xmm13
+	movups	xmm0,XMMWORD[32+r11]
+
+	pxor	xmm10,xmm9
+	pxor	xmm11,xmm9
+	pxor	xmm12,xmm9
+	pxor	xmm13,xmm9
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+	movups	xmm1,XMMWORD[48+r11]
+
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+	movups	xmm0,XMMWORD[64+r11]
+	jmp	NEAR $L$ocb_dec_loop4
+
+ALIGN	32
+$L$ocb_dec_loop4:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$ocb_dec_loop4
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+	movups	xmm1,XMMWORD[16+r11]
+	mov	rax,r10
+
+DB	102,65,15,56,223,210
+DB	102,65,15,56,223,219
+DB	102,65,15,56,223,228
+DB	102,65,15,56,223,237
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+__ocb_decrypt1:
+	pxor	xmm7,xmm15
+	pxor	xmm7,xmm9
+	pxor	xmm2,xmm7
+	movups	xmm0,XMMWORD[32+r11]
+
+DB	102,15,56,222,209
+	movups	xmm1,XMMWORD[48+r11]
+	pxor	xmm7,xmm9
+
+DB	102,15,56,222,208
+	movups	xmm0,XMMWORD[64+r11]
+	jmp	NEAR $L$ocb_dec_loop1
+
+ALIGN	32
+$L$ocb_dec_loop1:
+DB	102,15,56,222,209
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+
+DB	102,15,56,222,208
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$ocb_dec_loop1
+
+DB	102,15,56,222,209
+	movups	xmm1,XMMWORD[16+r11]
+	mov	rax,r10
+
+DB	102,15,56,223,215
+	DB	0F3h,0C3h		;repret
+
+global	aesni_cbc_encrypt
+
+ALIGN	16
+aesni_cbc_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_cbc_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+	test	rdx,rdx
+	jz	NEAR $L$cbc_ret
+
+	mov	r10d,DWORD[240+rcx]
+	mov	r11,rcx
+	test	r9d,r9d
+	jz	NEAR $L$cbc_decrypt
+
+	movups	xmm2,XMMWORD[r8]
+	mov	eax,r10d
+	cmp	rdx,16
+	jb	NEAR $L$cbc_enc_tail
+	sub	rdx,16
+	jmp	NEAR $L$cbc_enc_loop
+ALIGN	16
+$L$cbc_enc_loop:
+	movups	xmm3,XMMWORD[rdi]
+	lea	rdi,[16+rdi]
+
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm3,xmm0
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm3
+$L$oop_enc1_15:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_enc1_15
+DB	102,15,56,221,209
+	mov	eax,r10d
+	mov	rcx,r11
+	movups	XMMWORD[rsi],xmm2
+	lea	rsi,[16+rsi]
+	sub	rdx,16
+	jnc	NEAR $L$cbc_enc_loop
+	add	rdx,16
+	jnz	NEAR $L$cbc_enc_tail
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movups	XMMWORD[r8],xmm2
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	jmp	NEAR $L$cbc_ret
+
+$L$cbc_enc_tail:
+	mov	rcx,rdx
+	xchg	rsi,rdi
+	DD	0x9066A4F3
+	mov	ecx,16
+	sub	rcx,rdx
+	xor	eax,eax
+	DD	0x9066AAF3
+	lea	rdi,[((-16))+rdi]
+	mov	eax,r10d
+	mov	rsi,rdi
+	mov	rcx,r11
+	xor	rdx,rdx
+	jmp	NEAR $L$cbc_enc_loop
+
+ALIGN	16
+$L$cbc_decrypt:
+	cmp	rdx,16
+	jne	NEAR $L$cbc_decrypt_bulk
+
+
+
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[r8]
+	movdqa	xmm4,xmm2
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_dec1_16:
+DB	102,15,56,222,209
+	dec	r10d
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_dec1_16
+DB	102,15,56,223,209
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movdqu	XMMWORD[r8],xmm4
+	xorps	xmm2,xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	jmp	NEAR $L$cbc_ret
+ALIGN	16
+$L$cbc_decrypt_bulk:
+	lea	r11,[rsp]
+	push	rbp
+	sub	rsp,176
+	and	rsp,-16
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$cbc_decrypt_body:
+	mov	rbp,rcx
+	movups	xmm10,XMMWORD[r8]
+	mov	eax,r10d
+	cmp	rdx,0x50
+	jbe	NEAR $L$cbc_dec_tail
+
+	movups	xmm0,XMMWORD[rcx]
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqa	xmm11,xmm2
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movdqa	xmm12,xmm3
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movdqa	xmm13,xmm4
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movdqa	xmm14,xmm5
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movdqa	xmm15,xmm6
+	lea	r9,[OPENSSL_ia32cap_P]
+	mov	r9d,DWORD[4+r9]
+	cmp	rdx,0x70
+	jbe	NEAR $L$cbc_dec_six_or_seven
+
+	and	r9d,71303168
+	sub	rdx,0x50
+	cmp	r9d,4194304
+	je	NEAR $L$cbc_dec_loop6_enter
+	sub	rdx,0x20
+	lea	rcx,[112+rcx]
+	jmp	NEAR $L$cbc_dec_loop8_enter
+ALIGN	16
+$L$cbc_dec_loop8:
+	movups	XMMWORD[rsi],xmm9
+	lea	rsi,[16+rsi]
+$L$cbc_dec_loop8_enter:
+	movdqu	xmm8,XMMWORD[96+rdi]
+	pxor	xmm2,xmm0
+	movdqu	xmm9,XMMWORD[112+rdi]
+	pxor	xmm3,xmm0
+	movups	xmm1,XMMWORD[((16-112))+rcx]
+	pxor	xmm4,xmm0
+	mov	rbp,-1
+	cmp	rdx,0x70
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+	pxor	xmm7,xmm0
+	pxor	xmm8,xmm0
+
+DB	102,15,56,222,209
+	pxor	xmm9,xmm0
+	movups	xmm0,XMMWORD[((32-112))+rcx]
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+	adc	rbp,0
+	and	rbp,128
+DB	102,68,15,56,222,201
+	add	rbp,rdi
+	movups	xmm1,XMMWORD[((48-112))+rcx]
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((64-112))+rcx]
+	nop
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movups	xmm1,XMMWORD[((80-112))+rcx]
+	nop
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((96-112))+rcx]
+	nop
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movups	xmm1,XMMWORD[((112-112))+rcx]
+	nop
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((128-112))+rcx]
+	nop
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movups	xmm1,XMMWORD[((144-112))+rcx]
+	cmp	eax,11
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((160-112))+rcx]
+	jb	NEAR $L$cbc_dec_done
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movups	xmm1,XMMWORD[((176-112))+rcx]
+	nop
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((192-112))+rcx]
+	je	NEAR $L$cbc_dec_done
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movups	xmm1,XMMWORD[((208-112))+rcx]
+	nop
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((224-112))+rcx]
+	jmp	NEAR $L$cbc_dec_done
+ALIGN	16
+$L$cbc_dec_done:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+	pxor	xmm10,xmm0
+	pxor	xmm11,xmm0
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+	pxor	xmm12,xmm0
+	pxor	xmm13,xmm0
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+	pxor	xmm14,xmm0
+	pxor	xmm15,xmm0
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movdqu	xmm1,XMMWORD[80+rdi]
+
+DB	102,65,15,56,223,210
+	movdqu	xmm10,XMMWORD[96+rdi]
+	pxor	xmm1,xmm0
+DB	102,65,15,56,223,219
+	pxor	xmm10,xmm0
+	movdqu	xmm0,XMMWORD[112+rdi]
+DB	102,65,15,56,223,228
+	lea	rdi,[128+rdi]
+	movdqu	xmm11,XMMWORD[rbp]
+DB	102,65,15,56,223,237
+DB	102,65,15,56,223,246
+	movdqu	xmm12,XMMWORD[16+rbp]
+	movdqu	xmm13,XMMWORD[32+rbp]
+DB	102,65,15,56,223,255
+DB	102,68,15,56,223,193
+	movdqu	xmm14,XMMWORD[48+rbp]
+	movdqu	xmm15,XMMWORD[64+rbp]
+DB	102,69,15,56,223,202
+	movdqa	xmm10,xmm0
+	movdqu	xmm1,XMMWORD[80+rbp]
+	movups	xmm0,XMMWORD[((-112))+rcx]
+
+	movups	XMMWORD[rsi],xmm2
+	movdqa	xmm2,xmm11
+	movups	XMMWORD[16+rsi],xmm3
+	movdqa	xmm3,xmm12
+	movups	XMMWORD[32+rsi],xmm4
+	movdqa	xmm4,xmm13
+	movups	XMMWORD[48+rsi],xmm5
+	movdqa	xmm5,xmm14
+	movups	XMMWORD[64+rsi],xmm6
+	movdqa	xmm6,xmm15
+	movups	XMMWORD[80+rsi],xmm7
+	movdqa	xmm7,xmm1
+	movups	XMMWORD[96+rsi],xmm8
+	lea	rsi,[112+rsi]
+
+	sub	rdx,0x80
+	ja	NEAR $L$cbc_dec_loop8
+
+	movaps	xmm2,xmm9
+	lea	rcx,[((-112))+rcx]
+	add	rdx,0x70
+	jle	NEAR $L$cbc_dec_clear_tail_collected
+	movups	XMMWORD[rsi],xmm9
+	lea	rsi,[16+rsi]
+	cmp	rdx,0x50
+	jbe	NEAR $L$cbc_dec_tail
+
+	movaps	xmm2,xmm11
+$L$cbc_dec_six_or_seven:
+	cmp	rdx,0x60
+	ja	NEAR $L$cbc_dec_seven
+
+	movaps	xmm8,xmm7
+	call	_aesni_decrypt6
+	pxor	xmm2,xmm10
+	movaps	xmm10,xmm8
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	pxor	xmm6,xmm14
+	movdqu	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	pxor	xmm7,xmm15
+	movdqu	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	lea	rsi,[80+rsi]
+	movdqa	xmm2,xmm7
+	pxor	xmm7,xmm7
+	jmp	NEAR $L$cbc_dec_tail_collected
+
+ALIGN	16
+$L$cbc_dec_seven:
+	movups	xmm8,XMMWORD[96+rdi]
+	xorps	xmm9,xmm9
+	call	_aesni_decrypt8
+	movups	xmm9,XMMWORD[80+rdi]
+	pxor	xmm2,xmm10
+	movups	xmm10,XMMWORD[96+rdi]
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	pxor	xmm6,xmm14
+	movdqu	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	pxor	xmm7,xmm15
+	movdqu	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	pxor	xmm8,xmm9
+	movdqu	XMMWORD[80+rsi],xmm7
+	pxor	xmm7,xmm7
+	lea	rsi,[96+rsi]
+	movdqa	xmm2,xmm8
+	pxor	xmm8,xmm8
+	pxor	xmm9,xmm9
+	jmp	NEAR $L$cbc_dec_tail_collected
+
+ALIGN	16
+$L$cbc_dec_loop6:
+	movups	XMMWORD[rsi],xmm7
+	lea	rsi,[16+rsi]
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqa	xmm11,xmm2
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movdqa	xmm12,xmm3
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movdqa	xmm13,xmm4
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movdqa	xmm14,xmm5
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movdqa	xmm15,xmm6
+$L$cbc_dec_loop6_enter:
+	lea	rdi,[96+rdi]
+	movdqa	xmm8,xmm7
+
+	call	_aesni_decrypt6
+
+	pxor	xmm2,xmm10
+	movdqa	xmm10,xmm8
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm6,xmm14
+	mov	rcx,rbp
+	movdqu	XMMWORD[48+rsi],xmm5
+	pxor	xmm7,xmm15
+	mov	eax,r10d
+	movdqu	XMMWORD[64+rsi],xmm6
+	lea	rsi,[80+rsi]
+	sub	rdx,0x60
+	ja	NEAR $L$cbc_dec_loop6
+
+	movdqa	xmm2,xmm7
+	add	rdx,0x50
+	jle	NEAR $L$cbc_dec_clear_tail_collected
+	movups	XMMWORD[rsi],xmm7
+	lea	rsi,[16+rsi]
+
+$L$cbc_dec_tail:
+	movups	xmm2,XMMWORD[rdi]
+	sub	rdx,0x10
+	jbe	NEAR $L$cbc_dec_one
+
+	movups	xmm3,XMMWORD[16+rdi]
+	movaps	xmm11,xmm2
+	sub	rdx,0x10
+	jbe	NEAR $L$cbc_dec_two
+
+	movups	xmm4,XMMWORD[32+rdi]
+	movaps	xmm12,xmm3
+	sub	rdx,0x10
+	jbe	NEAR $L$cbc_dec_three
+
+	movups	xmm5,XMMWORD[48+rdi]
+	movaps	xmm13,xmm4
+	sub	rdx,0x10
+	jbe	NEAR $L$cbc_dec_four
+
+	movups	xmm6,XMMWORD[64+rdi]
+	movaps	xmm14,xmm5
+	movaps	xmm15,xmm6
+	xorps	xmm7,xmm7
+	call	_aesni_decrypt6
+	pxor	xmm2,xmm10
+	movaps	xmm10,xmm15
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	pxor	xmm6,xmm14
+	movdqu	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	lea	rsi,[64+rsi]
+	movdqa	xmm2,xmm6
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+	sub	rdx,0x10
+	jmp	NEAR $L$cbc_dec_tail_collected
+
+ALIGN	16
+$L$cbc_dec_one:
+	movaps	xmm11,xmm2
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_dec1_17:
+DB	102,15,56,222,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_dec1_17
+DB	102,15,56,223,209
+	xorps	xmm2,xmm10
+	movaps	xmm10,xmm11
+	jmp	NEAR $L$cbc_dec_tail_collected
+ALIGN	16
+$L$cbc_dec_two:
+	movaps	xmm12,xmm3
+	call	_aesni_decrypt2
+	pxor	xmm2,xmm10
+	movaps	xmm10,xmm12
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	movdqa	xmm2,xmm3
+	pxor	xmm3,xmm3
+	lea	rsi,[16+rsi]
+	jmp	NEAR $L$cbc_dec_tail_collected
+ALIGN	16
+$L$cbc_dec_three:
+	movaps	xmm13,xmm4
+	call	_aesni_decrypt3
+	pxor	xmm2,xmm10
+	movaps	xmm10,xmm13
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movdqa	xmm2,xmm4
+	pxor	xmm4,xmm4
+	lea	rsi,[32+rsi]
+	jmp	NEAR $L$cbc_dec_tail_collected
+ALIGN	16
+$L$cbc_dec_four:
+	movaps	xmm14,xmm5
+	call	_aesni_decrypt4
+	pxor	xmm2,xmm10
+	movaps	xmm10,xmm14
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movdqa	xmm2,xmm5
+	pxor	xmm5,xmm5
+	lea	rsi,[48+rsi]
+	jmp	NEAR $L$cbc_dec_tail_collected
+
+ALIGN	16
+$L$cbc_dec_clear_tail_collected:
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+$L$cbc_dec_tail_collected:
+	movups	XMMWORD[r8],xmm10
+	and	rdx,15
+	jnz	NEAR $L$cbc_dec_tail_partial
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	jmp	NEAR $L$cbc_dec_ret
+ALIGN	16
+$L$cbc_dec_tail_partial:
+	movaps	XMMWORD[rsp],xmm2
+	pxor	xmm2,xmm2
+	mov	rcx,16
+	mov	rdi,rsi
+	sub	rcx,rdx
+	lea	rsi,[rsp]
+	DD	0x9066A4F3
+	movdqa	XMMWORD[rsp],xmm2
+
+$L$cbc_dec_ret:
+	xorps	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	XMMWORD[48+rsp],xmm0
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	XMMWORD[64+rsp],xmm0
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	XMMWORD[80+rsp],xmm0
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	XMMWORD[96+rsp],xmm0
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	XMMWORD[112+rsp],xmm0
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	XMMWORD[128+rsp],xmm0
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	XMMWORD[144+rsp],xmm0
+	movaps	xmm15,XMMWORD[160+rsp]
+	movaps	XMMWORD[160+rsp],xmm0
+	mov	rbp,QWORD[((-8))+r11]
+	lea	rsp,[r11]
+$L$cbc_ret:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_aesni_cbc_encrypt:
+global	aesni_set_decrypt_key
+
+ALIGN	16
+aesni_set_decrypt_key:
+DB	0x48,0x83,0xEC,0x08
+	call	__aesni_set_encrypt_key
+	shl	edx,4
+	test	eax,eax
+	jnz	NEAR $L$dec_key_ret
+	lea	rcx,[16+rdx*1+r8]
+
+	movups	xmm0,XMMWORD[r8]
+	movups	xmm1,XMMWORD[rcx]
+	movups	XMMWORD[rcx],xmm0
+	movups	XMMWORD[r8],xmm1
+	lea	r8,[16+r8]
+	lea	rcx,[((-16))+rcx]
+
+$L$dec_key_inverse:
+	movups	xmm0,XMMWORD[r8]
+	movups	xmm1,XMMWORD[rcx]
+DB	102,15,56,219,192
+DB	102,15,56,219,201
+	lea	r8,[16+r8]
+	lea	rcx,[((-16))+rcx]
+	movups	XMMWORD[16+rcx],xmm0
+	movups	XMMWORD[(-16)+r8],xmm1
+	cmp	rcx,r8
+	ja	NEAR $L$dec_key_inverse
+
+	movups	xmm0,XMMWORD[r8]
+DB	102,15,56,219,192
+	pxor	xmm1,xmm1
+	movups	XMMWORD[rcx],xmm0
+	pxor	xmm0,xmm0
+$L$dec_key_ret:
+	add	rsp,8
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_set_decrypt_key:
+
+global	aesni_set_encrypt_key
+
+ALIGN	16
+aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+DB	0x48,0x83,0xEC,0x08
+	mov	rax,-1
+	test	rcx,rcx
+	jz	NEAR $L$enc_key_ret
+	test	r8,r8
+	jz	NEAR $L$enc_key_ret
+
+	movups	xmm0,XMMWORD[rcx]
+	xorps	xmm4,xmm4
+	lea	r10,[OPENSSL_ia32cap_P]
+	mov	r10d,DWORD[4+r10]
+	and	r10d,268437504
+	lea	rax,[16+r8]
+	cmp	edx,256
+	je	NEAR $L$14rounds
+	cmp	edx,192
+	je	NEAR $L$12rounds
+	cmp	edx,128
+	jne	NEAR $L$bad_keybits
+
+$L$10rounds:
+	mov	edx,9
+	cmp	r10d,268435456
+	je	NEAR $L$10rounds_alt
+
+	movups	XMMWORD[r8],xmm0
+DB	102,15,58,223,200,1
+	call	$L$key_expansion_128_cold
+DB	102,15,58,223,200,2
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,4
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,8
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,16
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,32
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,64
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,128
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,27
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,54
+	call	$L$key_expansion_128
+	movups	XMMWORD[rax],xmm0
+	mov	DWORD[80+rax],edx
+	xor	eax,eax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$10rounds_alt:
+	movdqa	xmm5,XMMWORD[$L$key_rotate]
+	mov	r10d,8
+	movdqa	xmm4,XMMWORD[$L$key_rcon1]
+	movdqa	xmm2,xmm0
+	movdqu	XMMWORD[r8],xmm0
+	jmp	NEAR $L$oop_key128
+
+ALIGN	16
+$L$oop_key128:
+DB	102,15,56,0,197
+DB	102,15,56,221,196
+	pslld	xmm4,1
+	lea	rax,[16+rax]
+
+	movdqa	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm2,xmm3
+
+	pxor	xmm0,xmm2
+	movdqu	XMMWORD[(-16)+rax],xmm0
+	movdqa	xmm2,xmm0
+
+	dec	r10d
+	jnz	NEAR $L$oop_key128
+
+	movdqa	xmm4,XMMWORD[$L$key_rcon1b]
+
+DB	102,15,56,0,197
+DB	102,15,56,221,196
+	pslld	xmm4,1
+
+	movdqa	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm2,xmm3
+
+	pxor	xmm0,xmm2
+	movdqu	XMMWORD[rax],xmm0
+
+	movdqa	xmm2,xmm0
+DB	102,15,56,0,197
+DB	102,15,56,221,196
+
+	movdqa	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm2,xmm3
+
+	pxor	xmm0,xmm2
+	movdqu	XMMWORD[16+rax],xmm0
+
+	mov	DWORD[96+rax],edx
+	xor	eax,eax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$12rounds:
+	movq	xmm2,QWORD[16+rcx]
+	mov	edx,11
+	cmp	r10d,268435456
+	je	NEAR $L$12rounds_alt
+
+	movups	XMMWORD[r8],xmm0
+DB	102,15,58,223,202,1
+	call	$L$key_expansion_192a_cold
+DB	102,15,58,223,202,2
+	call	$L$key_expansion_192b
+DB	102,15,58,223,202,4
+	call	$L$key_expansion_192a
+DB	102,15,58,223,202,8
+	call	$L$key_expansion_192b
+DB	102,15,58,223,202,16
+	call	$L$key_expansion_192a
+DB	102,15,58,223,202,32
+	call	$L$key_expansion_192b
+DB	102,15,58,223,202,64
+	call	$L$key_expansion_192a
+DB	102,15,58,223,202,128
+	call	$L$key_expansion_192b
+	movups	XMMWORD[rax],xmm0
+	mov	DWORD[48+rax],edx
+	xor	rax,rax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$12rounds_alt:
+	movdqa	xmm5,XMMWORD[$L$key_rotate192]
+	movdqa	xmm4,XMMWORD[$L$key_rcon1]
+	mov	r10d,8
+	movdqu	XMMWORD[r8],xmm0
+	jmp	NEAR $L$oop_key192
+
+ALIGN	16
+$L$oop_key192:
+	movq	QWORD[rax],xmm2
+	movdqa	xmm1,xmm2
+DB	102,15,56,0,213
+DB	102,15,56,221,212
+	pslld	xmm4,1
+	lea	rax,[24+rax]
+
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm0,xmm3
+
+	pshufd	xmm3,xmm0,0xff
+	pxor	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm3,xmm1
+
+	pxor	xmm0,xmm2
+	pxor	xmm2,xmm3
+	movdqu	XMMWORD[(-16)+rax],xmm0
+
+	dec	r10d
+	jnz	NEAR $L$oop_key192
+
+	mov	DWORD[32+rax],edx
+	xor	eax,eax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$14rounds:
+	movups	xmm2,XMMWORD[16+rcx]
+	mov	edx,13
+	lea	rax,[16+rax]
+	cmp	r10d,268435456
+	je	NEAR $L$14rounds_alt
+
+	movups	XMMWORD[r8],xmm0
+	movups	XMMWORD[16+r8],xmm2
+DB	102,15,58,223,202,1
+	call	$L$key_expansion_256a_cold
+DB	102,15,58,223,200,1
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,2
+	call	$L$key_expansion_256a
+DB	102,15,58,223,200,2
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,4
+	call	$L$key_expansion_256a
+DB	102,15,58,223,200,4
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,8
+	call	$L$key_expansion_256a
+DB	102,15,58,223,200,8
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,16
+	call	$L$key_expansion_256a
+DB	102,15,58,223,200,16
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,32
+	call	$L$key_expansion_256a
+DB	102,15,58,223,200,32
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,64
+	call	$L$key_expansion_256a
+	movups	XMMWORD[rax],xmm0
+	mov	DWORD[16+rax],edx
+	xor	rax,rax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$14rounds_alt:
+	movdqa	xmm5,XMMWORD[$L$key_rotate]
+	movdqa	xmm4,XMMWORD[$L$key_rcon1]
+	mov	r10d,7
+	movdqu	XMMWORD[r8],xmm0
+	movdqa	xmm1,xmm2
+	movdqu	XMMWORD[16+r8],xmm2
+	jmp	NEAR $L$oop_key256
+
+ALIGN	16
+$L$oop_key256:
+DB	102,15,56,0,213
+DB	102,15,56,221,212
+
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm0,xmm3
+	pslld	xmm4,1
+
+	pxor	xmm0,xmm2
+	movdqu	XMMWORD[rax],xmm0
+
+	dec	r10d
+	jz	NEAR $L$done_key256
+
+	pshufd	xmm2,xmm0,0xff
+	pxor	xmm3,xmm3
+DB	102,15,56,221,211
+
+	movdqa	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm1,xmm3
+
+	pxor	xmm2,xmm1
+	movdqu	XMMWORD[16+rax],xmm2
+	lea	rax,[32+rax]
+	movdqa	xmm1,xmm2
+
+	jmp	NEAR $L$oop_key256
+
+$L$done_key256:
+	mov	DWORD[16+rax],edx
+	xor	eax,eax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$bad_keybits:
+	mov	rax,-2
+$L$enc_key_ret:
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	add	rsp,8
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_set_encrypt_key:
+
+ALIGN	16
+$L$key_expansion_128:
+	movups	XMMWORD[rax],xmm0
+	lea	rax,[16+rax]
+$L$key_expansion_128_cold:
+	shufps	xmm4,xmm0,16
+	xorps	xmm0,xmm4
+	shufps	xmm4,xmm0,140
+	xorps	xmm0,xmm4
+	shufps	xmm1,xmm1,255
+	xorps	xmm0,xmm1
+	DB	0F3h,0C3h		;repret
+
+ALIGN	16
+$L$key_expansion_192a:
+	movups	XMMWORD[rax],xmm0
+	lea	rax,[16+rax]
+$L$key_expansion_192a_cold:
+	movaps	xmm5,xmm2
+$L$key_expansion_192b_warm:
+	shufps	xmm4,xmm0,16
+	movdqa	xmm3,xmm2
+	xorps	xmm0,xmm4
+	shufps	xmm4,xmm0,140
+	pslldq	xmm3,4
+	xorps	xmm0,xmm4
+	pshufd	xmm1,xmm1,85
+	pxor	xmm2,xmm3
+	pxor	xmm0,xmm1
+	pshufd	xmm3,xmm0,255
+	pxor	xmm2,xmm3
+	DB	0F3h,0C3h		;repret
+
+ALIGN	16
+$L$key_expansion_192b:
+	movaps	xmm3,xmm0
+	shufps	xmm5,xmm0,68
+	movups	XMMWORD[rax],xmm5
+	shufps	xmm3,xmm2,78
+	movups	XMMWORD[16+rax],xmm3
+	lea	rax,[32+rax]
+	jmp	NEAR $L$key_expansion_192b_warm
+
+ALIGN	16
+$L$key_expansion_256a:
+	movups	XMMWORD[rax],xmm2
+	lea	rax,[16+rax]
+$L$key_expansion_256a_cold:
+	shufps	xmm4,xmm0,16
+	xorps	xmm0,xmm4
+	shufps	xmm4,xmm0,140
+	xorps	xmm0,xmm4
+	shufps	xmm1,xmm1,255
+	xorps	xmm0,xmm1
+	DB	0F3h,0C3h		;repret
+
+ALIGN	16
+$L$key_expansion_256b:
+	movups	XMMWORD[rax],xmm0
+	lea	rax,[16+rax]
+
+	shufps	xmm4,xmm2,16
+	xorps	xmm2,xmm4
+	shufps	xmm4,xmm2,140
+	xorps	xmm2,xmm4
+	shufps	xmm1,xmm1,170
+	xorps	xmm2,xmm1
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	64
+$L$bswap_mask:
+DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$increment32:
+	DD	6,6,6,0
+$L$increment64:
+	DD	1,0,0,0
+$L$xts_magic:
+	DD	0x87,0,1,0
+$L$increment1:
+DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+$L$key_rotate:
+	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+$L$key_rotate192:
+	DD	0x04070605,0x04070605,0x04070605,0x04070605
+$L$key_rcon1:
+	DD	1,1,1,1
+$L$key_rcon1b:
+	DD	0x1b,0x1b,0x1b,0x1b
+
+DB	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+DB	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+DB	115,108,46,111,114,103,62,0
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+ecb_ccm64_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[rax]
+	lea	rdi,[512+r8]
+	mov	ecx,8
+	DD	0xa548f3fc
+	lea	rax,[88+rax]
+
+	jmp	NEAR $L$common_seh_tail
+
+
+
+ALIGN	16
+ctr_xts_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[208+r8]
+
+	lea	rsi,[((-168))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+
+	mov	rbp,QWORD[((-8))+rax]
+	mov	QWORD[160+r8],rbp
+	jmp	NEAR $L$common_seh_tail
+
+
+
+ALIGN	16
+ocb_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	r10d,DWORD[8+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$ocb_no_xmm
+
+	mov	rax,QWORD[152+r8]
+
+	lea	rsi,[rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+	lea	rax,[((160+40))+rax]
+
+$L$ocb_no_xmm:
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+
+	jmp	NEAR $L$common_seh_tail
+
+
+ALIGN	16
+cbc_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[152+r8]
+	mov	rbx,QWORD[248+r8]
+
+	lea	r10,[$L$cbc_decrypt_bulk]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[120+r8]
+
+	lea	r10,[$L$cbc_decrypt_body]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	lea	r10,[$L$cbc_ret]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[16+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+
+	mov	rax,QWORD[208+r8]
+
+	mov	rbp,QWORD[((-8))+rax]
+	mov	QWORD[160+r8],rbp
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase
+	DD	$L$SEH_end_aesni_ecb_encrypt wrt ..imagebase
+	DD	$L$SEH_info_ecb wrt ..imagebase
+
+	DD	$L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase
+	DD	$L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase
+	DD	$L$SEH_info_ccm64_enc wrt ..imagebase
+
+	DD	$L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase
+	DD	$L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase
+	DD	$L$SEH_info_ccm64_dec wrt ..imagebase
+
+	DD	$L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase
+	DD	$L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase
+	DD	$L$SEH_info_ctr32 wrt ..imagebase
+
+	DD	$L$SEH_begin_aesni_xts_encrypt wrt ..imagebase
+	DD	$L$SEH_end_aesni_xts_encrypt wrt ..imagebase
+	DD	$L$SEH_info_xts_enc wrt ..imagebase
+
+	DD	$L$SEH_begin_aesni_xts_decrypt wrt ..imagebase
+	DD	$L$SEH_end_aesni_xts_decrypt wrt ..imagebase
+	DD	$L$SEH_info_xts_dec wrt ..imagebase
+
+	DD	$L$SEH_begin_aesni_ocb_encrypt wrt ..imagebase
+	DD	$L$SEH_end_aesni_ocb_encrypt wrt ..imagebase
+	DD	$L$SEH_info_ocb_enc wrt ..imagebase
+
+	DD	$L$SEH_begin_aesni_ocb_decrypt wrt ..imagebase
+	DD	$L$SEH_end_aesni_ocb_decrypt wrt ..imagebase
+	DD	$L$SEH_info_ocb_dec wrt ..imagebase
+	DD	$L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase
+	DD	$L$SEH_end_aesni_cbc_encrypt wrt ..imagebase
+	DD	$L$SEH_info_cbc wrt ..imagebase
+
+	DD	aesni_set_decrypt_key wrt ..imagebase
+	DD	$L$SEH_end_set_decrypt_key wrt ..imagebase
+	DD	$L$SEH_info_key wrt ..imagebase
+
+	DD	aesni_set_encrypt_key wrt ..imagebase
+	DD	$L$SEH_end_set_encrypt_key wrt ..imagebase
+	DD	$L$SEH_info_key wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_ecb:
+DB	9,0,0,0
+	DD	ecb_ccm64_se_handler wrt ..imagebase
+	DD	$L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase
+$L$SEH_info_ccm64_enc:
+DB	9,0,0,0
+	DD	ecb_ccm64_se_handler wrt ..imagebase
+	DD	$L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase
+$L$SEH_info_ccm64_dec:
+DB	9,0,0,0
+	DD	ecb_ccm64_se_handler wrt ..imagebase
+	DD	$L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase
+$L$SEH_info_ctr32:
+DB	9,0,0,0
+	DD	ctr_xts_se_handler wrt ..imagebase
+	DD	$L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
+$L$SEH_info_xts_enc:
+DB	9,0,0,0
+	DD	ctr_xts_se_handler wrt ..imagebase
+	DD	$L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
+$L$SEH_info_xts_dec:
+DB	9,0,0,0
+	DD	ctr_xts_se_handler wrt ..imagebase
+	DD	$L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
+$L$SEH_info_ocb_enc:
+DB	9,0,0,0
+	DD	ocb_se_handler wrt ..imagebase
+	DD	$L$ocb_enc_body wrt ..imagebase,$L$ocb_enc_epilogue wrt ..imagebase
+	DD	$L$ocb_enc_pop wrt ..imagebase
+	DD	0
+$L$SEH_info_ocb_dec:
+DB	9,0,0,0
+	DD	ocb_se_handler wrt ..imagebase
+	DD	$L$ocb_dec_body wrt ..imagebase,$L$ocb_dec_epilogue wrt ..imagebase
+	DD	$L$ocb_dec_pop wrt ..imagebase
+	DD	0
+$L$SEH_info_cbc:
+DB	9,0,0,0
+	DD	cbc_se_handler wrt ..imagebase
+$L$SEH_info_key:
+DB	0x01,0x04,0x01,0x00
+DB	0x04,0x02,0x00,0x00
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm
new file mode 100644
index 0000000..9c6d129
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm
@@ -0,0 +1,2744 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+EXTERN	asm_AES_encrypt
+EXTERN	asm_AES_decrypt
+
+
+ALIGN	64
+_bsaes_encrypt8:
+	lea	r11,[$L$BS0]
+
+	movdqa	xmm8,XMMWORD[rax]
+	lea	rax,[16+rax]
+	movdqa	xmm7,XMMWORD[80+r11]
+	pxor	xmm15,xmm8
+	pxor	xmm0,xmm8
+	pxor	xmm1,xmm8
+	pxor	xmm2,xmm8
+DB	102,68,15,56,0,255
+DB	102,15,56,0,199
+	pxor	xmm3,xmm8
+	pxor	xmm4,xmm8
+DB	102,15,56,0,207
+DB	102,15,56,0,215
+	pxor	xmm5,xmm8
+	pxor	xmm6,xmm8
+DB	102,15,56,0,223
+DB	102,15,56,0,231
+DB	102,15,56,0,239
+DB	102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+	movdqa	xmm7,XMMWORD[r11]
+	movdqa	xmm8,XMMWORD[16+r11]
+	movdqa	xmm9,xmm5
+	psrlq	xmm5,1
+	movdqa	xmm10,xmm3
+	psrlq	xmm3,1
+	pxor	xmm5,xmm6
+	pxor	xmm3,xmm4
+	pand	xmm5,xmm7
+	pand	xmm3,xmm7
+	pxor	xmm6,xmm5
+	psllq	xmm5,1
+	pxor	xmm4,xmm3
+	psllq	xmm3,1
+	pxor	xmm5,xmm9
+	pxor	xmm3,xmm10
+	movdqa	xmm9,xmm1
+	psrlq	xmm1,1
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,1
+	pxor	xmm1,xmm2
+	pxor	xmm15,xmm0
+	pand	xmm1,xmm7
+	pand	xmm15,xmm7
+	pxor	xmm2,xmm1
+	psllq	xmm1,1
+	pxor	xmm0,xmm15
+	psllq	xmm15,1
+	pxor	xmm1,xmm9
+	pxor	xmm15,xmm10
+	movdqa	xmm7,XMMWORD[32+r11]
+	movdqa	xmm9,xmm4
+	psrlq	xmm4,2
+	movdqa	xmm10,xmm3
+	psrlq	xmm3,2
+	pxor	xmm4,xmm6
+	pxor	xmm3,xmm5
+	pand	xmm4,xmm8
+	pand	xmm3,xmm8
+	pxor	xmm6,xmm4
+	psllq	xmm4,2
+	pxor	xmm5,xmm3
+	psllq	xmm3,2
+	pxor	xmm4,xmm9
+	pxor	xmm3,xmm10
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,2
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,2
+	pxor	xmm0,xmm2
+	pxor	xmm15,xmm1
+	pand	xmm0,xmm8
+	pand	xmm15,xmm8
+	pxor	xmm2,xmm0
+	psllq	xmm0,2
+	pxor	xmm1,xmm15
+	psllq	xmm15,2
+	pxor	xmm0,xmm9
+	pxor	xmm15,xmm10
+	movdqa	xmm9,xmm2
+	psrlq	xmm2,4
+	movdqa	xmm10,xmm1
+	psrlq	xmm1,4
+	pxor	xmm2,xmm6
+	pxor	xmm1,xmm5
+	pand	xmm2,xmm7
+	pand	xmm1,xmm7
+	pxor	xmm6,xmm2
+	psllq	xmm2,4
+	pxor	xmm5,xmm1
+	psllq	xmm1,4
+	pxor	xmm2,xmm9
+	pxor	xmm1,xmm10
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,4
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,4
+	pxor	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pand	xmm0,xmm7
+	pand	xmm15,xmm7
+	pxor	xmm4,xmm0
+	psllq	xmm0,4
+	pxor	xmm3,xmm15
+	psllq	xmm15,4
+	pxor	xmm0,xmm9
+	pxor	xmm15,xmm10
+	dec	r10d
+	jmp	NEAR $L$enc_sbox
+ALIGN	16
+$L$enc_loop:
+	pxor	xmm15,XMMWORD[rax]
+	pxor	xmm0,XMMWORD[16+rax]
+	pxor	xmm1,XMMWORD[32+rax]
+	pxor	xmm2,XMMWORD[48+rax]
+DB	102,68,15,56,0,255
+DB	102,15,56,0,199
+	pxor	xmm3,XMMWORD[64+rax]
+	pxor	xmm4,XMMWORD[80+rax]
+DB	102,15,56,0,207
+DB	102,15,56,0,215
+	pxor	xmm5,XMMWORD[96+rax]
+	pxor	xmm6,XMMWORD[112+rax]
+DB	102,15,56,0,223
+DB	102,15,56,0,231
+DB	102,15,56,0,239
+DB	102,15,56,0,247
+	lea	rax,[128+rax]
+$L$enc_sbox:
+	pxor	xmm4,xmm5
+	pxor	xmm1,xmm0
+	pxor	xmm2,xmm15
+	pxor	xmm5,xmm1
+	pxor	xmm4,xmm15
+
+	pxor	xmm5,xmm2
+	pxor	xmm2,xmm6
+	pxor	xmm6,xmm4
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm4
+	pxor	xmm2,xmm0
+
+	pxor	xmm1,xmm6
+	pxor	xmm0,xmm4
+	movdqa	xmm10,xmm6
+	movdqa	xmm9,xmm0
+	movdqa	xmm8,xmm4
+	movdqa	xmm12,xmm1
+	movdqa	xmm11,xmm5
+
+	pxor	xmm10,xmm3
+	pxor	xmm9,xmm1
+	pxor	xmm8,xmm2
+	movdqa	xmm13,xmm10
+	pxor	xmm12,xmm3
+	movdqa	xmm7,xmm9
+	pxor	xmm11,xmm15
+	movdqa	xmm14,xmm10
+
+	por	xmm9,xmm8
+	por	xmm10,xmm11
+	pxor	xmm14,xmm7
+	pand	xmm13,xmm11
+	pxor	xmm11,xmm8
+	pand	xmm7,xmm8
+	pand	xmm14,xmm11
+	movdqa	xmm11,xmm2
+	pxor	xmm11,xmm15
+	pand	xmm12,xmm11
+	pxor	xmm10,xmm12
+	pxor	xmm9,xmm12
+	movdqa	xmm12,xmm6
+	movdqa	xmm11,xmm4
+	pxor	xmm12,xmm0
+	pxor	xmm11,xmm5
+	movdqa	xmm8,xmm12
+	pand	xmm12,xmm11
+	por	xmm8,xmm11
+	pxor	xmm7,xmm12
+	pxor	xmm10,xmm14
+	pxor	xmm9,xmm13
+	pxor	xmm8,xmm14
+	movdqa	xmm11,xmm1
+	pxor	xmm7,xmm13
+	movdqa	xmm12,xmm3
+	pxor	xmm8,xmm13
+	movdqa	xmm13,xmm0
+	pand	xmm11,xmm2
+	movdqa	xmm14,xmm6
+	pand	xmm12,xmm15
+	pand	xmm13,xmm4
+	por	xmm14,xmm5
+	pxor	xmm10,xmm11
+	pxor	xmm9,xmm12
+	pxor	xmm8,xmm13
+	pxor	xmm7,xmm14
+
+
+
+
+
+	movdqa	xmm11,xmm10
+	pand	xmm10,xmm8
+	pxor	xmm11,xmm9
+
+	movdqa	xmm13,xmm7
+	movdqa	xmm14,xmm11
+	pxor	xmm13,xmm10
+	pand	xmm14,xmm13
+
+	movdqa	xmm12,xmm8
+	pxor	xmm14,xmm9
+	pxor	xmm12,xmm7
+
+	pxor	xmm10,xmm9
+
+	pand	xmm12,xmm10
+
+	movdqa	xmm9,xmm13
+	pxor	xmm12,xmm7
+
+	pxor	xmm9,xmm12
+	pxor	xmm8,xmm12
+
+	pand	xmm9,xmm7
+
+	pxor	xmm13,xmm9
+	pxor	xmm8,xmm9
+
+	pand	xmm13,xmm14
+
+	pxor	xmm13,xmm11
+	movdqa	xmm11,xmm5
+	movdqa	xmm7,xmm4
+	movdqa	xmm9,xmm14
+	pxor	xmm9,xmm13
+	pand	xmm9,xmm5
+	pxor	xmm5,xmm4
+	pand	xmm4,xmm14
+	pand	xmm5,xmm13
+	pxor	xmm5,xmm4
+	pxor	xmm4,xmm9
+	pxor	xmm11,xmm15
+	pxor	xmm7,xmm2
+	pxor	xmm14,xmm12
+	pxor	xmm13,xmm8
+	movdqa	xmm10,xmm14
+	movdqa	xmm9,xmm12
+	pxor	xmm10,xmm13
+	pxor	xmm9,xmm8
+	pand	xmm10,xmm11
+	pand	xmm9,xmm15
+	pxor	xmm11,xmm7
+	pxor	xmm15,xmm2
+	pand	xmm7,xmm14
+	pand	xmm2,xmm12
+	pand	xmm11,xmm13
+	pand	xmm15,xmm8
+	pxor	xmm7,xmm11
+	pxor	xmm15,xmm2
+	pxor	xmm11,xmm10
+	pxor	xmm2,xmm9
+	pxor	xmm5,xmm11
+	pxor	xmm15,xmm11
+	pxor	xmm4,xmm7
+	pxor	xmm2,xmm7
+
+	movdqa	xmm11,xmm6
+	movdqa	xmm7,xmm0
+	pxor	xmm11,xmm3
+	pxor	xmm7,xmm1
+	movdqa	xmm10,xmm14
+	movdqa	xmm9,xmm12
+	pxor	xmm10,xmm13
+	pxor	xmm9,xmm8
+	pand	xmm10,xmm11
+	pand	xmm9,xmm3
+	pxor	xmm11,xmm7
+	pxor	xmm3,xmm1
+	pand	xmm7,xmm14
+	pand	xmm1,xmm12
+	pand	xmm11,xmm13
+	pand	xmm3,xmm8
+	pxor	xmm7,xmm11
+	pxor	xmm3,xmm1
+	pxor	xmm11,xmm10
+	pxor	xmm1,xmm9
+	pxor	xmm14,xmm12
+	pxor	xmm13,xmm8
+	movdqa	xmm10,xmm14
+	pxor	xmm10,xmm13
+	pand	xmm10,xmm6
+	pxor	xmm6,xmm0
+	pand	xmm0,xmm14
+	pand	xmm6,xmm13
+	pxor	xmm6,xmm0
+	pxor	xmm0,xmm10
+	pxor	xmm6,xmm11
+	pxor	xmm3,xmm11
+	pxor	xmm0,xmm7
+	pxor	xmm1,xmm7
+	pxor	xmm6,xmm15
+	pxor	xmm0,xmm5
+	pxor	xmm3,xmm6
+	pxor	xmm5,xmm15
+	pxor	xmm15,xmm0
+
+	pxor	xmm0,xmm4
+	pxor	xmm4,xmm1
+	pxor	xmm1,xmm2
+	pxor	xmm2,xmm4
+	pxor	xmm3,xmm4
+
+	pxor	xmm5,xmm2
+	dec	r10d
+	jl	NEAR $L$enc_done
+	pshufd	xmm7,xmm15,0x93
+	pshufd	xmm8,xmm0,0x93
+	pxor	xmm15,xmm7
+	pshufd	xmm9,xmm3,0x93
+	pxor	xmm0,xmm8
+	pshufd	xmm10,xmm5,0x93
+	pxor	xmm3,xmm9
+	pshufd	xmm11,xmm2,0x93
+	pxor	xmm5,xmm10
+	pshufd	xmm12,xmm6,0x93
+	pxor	xmm2,xmm11
+	pshufd	xmm13,xmm1,0x93
+	pxor	xmm6,xmm12
+	pshufd	xmm14,xmm4,0x93
+	pxor	xmm1,xmm13
+	pxor	xmm4,xmm14
+
+	pxor	xmm8,xmm15
+	pxor	xmm7,xmm4
+	pxor	xmm8,xmm4
+	pshufd	xmm15,xmm15,0x4E
+	pxor	xmm9,xmm0
+	pshufd	xmm0,xmm0,0x4E
+	pxor	xmm12,xmm2
+	pxor	xmm15,xmm7
+	pxor	xmm13,xmm6
+	pxor	xmm0,xmm8
+	pxor	xmm11,xmm5
+	pshufd	xmm7,xmm2,0x4E
+	pxor	xmm14,xmm1
+	pshufd	xmm8,xmm6,0x4E
+	pxor	xmm10,xmm3
+	pshufd	xmm2,xmm5,0x4E
+	pxor	xmm10,xmm4
+	pshufd	xmm6,xmm4,0x4E
+	pxor	xmm11,xmm4
+	pshufd	xmm5,xmm1,0x4E
+	pxor	xmm7,xmm11
+	pshufd	xmm1,xmm3,0x4E
+	pxor	xmm8,xmm12
+	pxor	xmm2,xmm10
+	pxor	xmm6,xmm14
+	pxor	xmm5,xmm13
+	movdqa	xmm3,xmm7
+	pxor	xmm1,xmm9
+	movdqa	xmm4,xmm8
+	movdqa	xmm7,XMMWORD[48+r11]
+	jnz	NEAR $L$enc_loop
+	movdqa	xmm7,XMMWORD[64+r11]
+	jmp	NEAR $L$enc_loop
+ALIGN	16
+$L$enc_done:
+	movdqa	xmm7,XMMWORD[r11]
+	movdqa	xmm8,XMMWORD[16+r11]
+	movdqa	xmm9,xmm1
+	psrlq	xmm1,1
+	movdqa	xmm10,xmm2
+	psrlq	xmm2,1
+	pxor	xmm1,xmm4
+	pxor	xmm2,xmm6
+	pand	xmm1,xmm7
+	pand	xmm2,xmm7
+	pxor	xmm4,xmm1
+	psllq	xmm1,1
+	pxor	xmm6,xmm2
+	psllq	xmm2,1
+	pxor	xmm1,xmm9
+	pxor	xmm2,xmm10
+	movdqa	xmm9,xmm3
+	psrlq	xmm3,1
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,1
+	pxor	xmm3,xmm5
+	pxor	xmm15,xmm0
+	pand	xmm3,xmm7
+	pand	xmm15,xmm7
+	pxor	xmm5,xmm3
+	psllq	xmm3,1
+	pxor	xmm0,xmm15
+	psllq	xmm15,1
+	pxor	xmm3,xmm9
+	pxor	xmm15,xmm10
+	movdqa	xmm7,XMMWORD[32+r11]
+	movdqa	xmm9,xmm6
+	psrlq	xmm6,2
+	movdqa	xmm10,xmm2
+	psrlq	xmm2,2
+	pxor	xmm6,xmm4
+	pxor	xmm2,xmm1
+	pand	xmm6,xmm8
+	pand	xmm2,xmm8
+	pxor	xmm4,xmm6
+	psllq	xmm6,2
+	pxor	xmm1,xmm2
+	psllq	xmm2,2
+	pxor	xmm6,xmm9
+	pxor	xmm2,xmm10
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,2
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,2
+	pxor	xmm0,xmm5
+	pxor	xmm15,xmm3
+	pand	xmm0,xmm8
+	pand	xmm15,xmm8
+	pxor	xmm5,xmm0
+	psllq	xmm0,2
+	pxor	xmm3,xmm15
+	psllq	xmm15,2
+	pxor	xmm0,xmm9
+	pxor	xmm15,xmm10
+	movdqa	xmm9,xmm5
+	psrlq	xmm5,4
+	movdqa	xmm10,xmm3
+	psrlq	xmm3,4
+	pxor	xmm5,xmm4
+	pxor	xmm3,xmm1
+	pand	xmm5,xmm7
+	pand	xmm3,xmm7
+	pxor	xmm4,xmm5
+	psllq	xmm5,4
+	pxor	xmm1,xmm3
+	psllq	xmm3,4
+	pxor	xmm5,xmm9
+	pxor	xmm3,xmm10
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,4
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,4
+	pxor	xmm0,xmm6
+	pxor	xmm15,xmm2
+	pand	xmm0,xmm7
+	pand	xmm15,xmm7
+	pxor	xmm6,xmm0
+	psllq	xmm0,4
+	pxor	xmm2,xmm15
+	psllq	xmm15,4
+	pxor	xmm0,xmm9
+	pxor	xmm15,xmm10
+	movdqa	xmm7,XMMWORD[rax]
+	pxor	xmm3,xmm7
+	pxor	xmm5,xmm7
+	pxor	xmm2,xmm7
+	pxor	xmm6,xmm7
+	pxor	xmm1,xmm7
+	pxor	xmm4,xmm7
+	pxor	xmm15,xmm7
+	pxor	xmm0,xmm7
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	64
+_bsaes_decrypt8:
+	lea	r11,[$L$BS0]
+
+	movdqa	xmm8,XMMWORD[rax]
+	lea	rax,[16+rax]
+	movdqa	xmm7,XMMWORD[((-48))+r11]
+	pxor	xmm15,xmm8
+	pxor	xmm0,xmm8
+	pxor	xmm1,xmm8
+	pxor	xmm2,xmm8
+DB	102,68,15,56,0,255
+DB	102,15,56,0,199
+	pxor	xmm3,xmm8
+	pxor	xmm4,xmm8
+DB	102,15,56,0,207
+DB	102,15,56,0,215
+	pxor	xmm5,xmm8
+	pxor	xmm6,xmm8
+DB	102,15,56,0,223
+DB	102,15,56,0,231
+DB	102,15,56,0,239
+DB	102,15,56,0,247
+	movdqa	xmm7,XMMWORD[r11]
+	movdqa	xmm8,XMMWORD[16+r11]
+	movdqa	xmm9,xmm5
+	psrlq	xmm5,1
+	movdqa	xmm10,xmm3
+	psrlq	xmm3,1
+	pxor	xmm5,xmm6
+	pxor	xmm3,xmm4
+	pand	xmm5,xmm7
+	pand	xmm3,xmm7
+	pxor	xmm6,xmm5
+	psllq	xmm5,1
+	pxor	xmm4,xmm3
+	psllq	xmm3,1
+	pxor	xmm5,xmm9
+	pxor	xmm3,xmm10
+	movdqa	xmm9,xmm1
+	psrlq	xmm1,1
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,1
+	pxor	xmm1,xmm2
+	pxor	xmm15,xmm0
+	pand	xmm1,xmm7
+	pand	xmm15,xmm7
+	pxor	xmm2,xmm1
+	psllq	xmm1,1
+	pxor	xmm0,xmm15
+	psllq	xmm15,1
+	pxor	xmm1,xmm9
+	pxor	xmm15,xmm10
+	movdqa	xmm7,XMMWORD[32+r11]
+	movdqa	xmm9,xmm4
+	psrlq	xmm4,2
+	movdqa	xmm10,xmm3
+	psrlq	xmm3,2
+	pxor	xmm4,xmm6
+	pxor	xmm3,xmm5
+	pand	xmm4,xmm8
+	pand	xmm3,xmm8
+	pxor	xmm6,xmm4
+	psllq	xmm4,2
+	pxor	xmm5,xmm3
+	psllq	xmm3,2
+	pxor	xmm4,xmm9
+	pxor	xmm3,xmm10
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,2
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,2
+	pxor	xmm0,xmm2
+	pxor	xmm15,xmm1
+	pand	xmm0,xmm8
+	pand	xmm15,xmm8
+	pxor	xmm2,xmm0
+	psllq	xmm0,2
+	pxor	xmm1,xmm15
+	psllq	xmm15,2
+	pxor	xmm0,xmm9
+	pxor	xmm15,xmm10
+	movdqa	xmm9,xmm2
+	psrlq	xmm2,4
+	movdqa	xmm10,xmm1
+	psrlq	xmm1,4
+	pxor	xmm2,xmm6
+	pxor	xmm1,xmm5
+	pand	xmm2,xmm7
+	pand	xmm1,xmm7
+	pxor	xmm6,xmm2
+	psllq	xmm2,4
+	pxor	xmm5,xmm1
+	psllq	xmm1,4
+	pxor	xmm2,xmm9
+	pxor	xmm1,xmm10
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,4
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,4
+	pxor	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pand	xmm0,xmm7
+	pand	xmm15,xmm7
+	pxor	xmm4,xmm0
+	psllq	xmm0,4
+	pxor	xmm3,xmm15
+	psllq	xmm15,4
+	pxor	xmm0,xmm9
+	pxor	xmm15,xmm10
+	dec	r10d
+	jmp	NEAR $L$dec_sbox
+ALIGN	16
+$L$dec_loop:
+	pxor	xmm15,XMMWORD[rax]
+	pxor	xmm0,XMMWORD[16+rax]
+	pxor	xmm1,XMMWORD[32+rax]
+	pxor	xmm2,XMMWORD[48+rax]
+DB	102,68,15,56,0,255
+DB	102,15,56,0,199
+	pxor	xmm3,XMMWORD[64+rax]
+	pxor	xmm4,XMMWORD[80+rax]
+DB	102,15,56,0,207
+DB	102,15,56,0,215
+	pxor	xmm5,XMMWORD[96+rax]
+	pxor	xmm6,XMMWORD[112+rax]
+DB	102,15,56,0,223
+DB	102,15,56,0,231
+DB	102,15,56,0,239
+DB	102,15,56,0,247
+	lea	rax,[128+rax]
+$L$dec_sbox:
+	pxor	xmm2,xmm3
+
+	pxor	xmm3,xmm6
+	pxor	xmm1,xmm6
+	pxor	xmm5,xmm3
+	pxor	xmm6,xmm5
+	pxor	xmm0,xmm6
+
+	pxor	xmm15,xmm0
+	pxor	xmm1,xmm4
+	pxor	xmm2,xmm15
+	pxor	xmm4,xmm15
+	pxor	xmm0,xmm2
+	movdqa	xmm10,xmm2
+	movdqa	xmm9,xmm6
+	movdqa	xmm8,xmm0
+	movdqa	xmm12,xmm3
+	movdqa	xmm11,xmm4
+
+	pxor	xmm10,xmm15
+	pxor	xmm9,xmm3
+	pxor	xmm8,xmm5
+	movdqa	xmm13,xmm10
+	pxor	xmm12,xmm15
+	movdqa	xmm7,xmm9
+	pxor	xmm11,xmm1
+	movdqa	xmm14,xmm10
+
+	por	xmm9,xmm8
+	por	xmm10,xmm11
+	pxor	xmm14,xmm7
+	pand	xmm13,xmm11
+	pxor	xmm11,xmm8
+	pand	xmm7,xmm8
+	pand	xmm14,xmm11
+	movdqa	xmm11,xmm5
+	pxor	xmm11,xmm1
+	pand	xmm12,xmm11
+	pxor	xmm10,xmm12
+	pxor	xmm9,xmm12
+	movdqa	xmm12,xmm2
+	movdqa	xmm11,xmm0
+	pxor	xmm12,xmm6
+	pxor	xmm11,xmm4
+	movdqa	xmm8,xmm12
+	pand	xmm12,xmm11
+	por	xmm8,xmm11
+	pxor	xmm7,xmm12
+	pxor	xmm10,xmm14
+	pxor	xmm9,xmm13
+	pxor	xmm8,xmm14
+	movdqa	xmm11,xmm3
+	pxor	xmm7,xmm13
+	movdqa	xmm12,xmm15
+	pxor	xmm8,xmm13
+	movdqa	xmm13,xmm6
+	pand	xmm11,xmm5
+	movdqa	xmm14,xmm2
+	pand	xmm12,xmm1
+	pand	xmm13,xmm0
+	por	xmm14,xmm4
+	pxor	xmm10,xmm11
+	pxor	xmm9,xmm12
+	pxor	xmm8,xmm13
+	pxor	xmm7,xmm14
+
+
+
+
+
+	movdqa	xmm11,xmm10
+	pand	xmm10,xmm8
+	pxor	xmm11,xmm9
+
+	movdqa	xmm13,xmm7
+	movdqa	xmm14,xmm11
+	pxor	xmm13,xmm10
+	pand	xmm14,xmm13
+
+	movdqa	xmm12,xmm8
+	pxor	xmm14,xmm9
+	pxor	xmm12,xmm7
+
+	pxor	xmm10,xmm9
+
+	pand	xmm12,xmm10
+
+	movdqa	xmm9,xmm13
+	pxor	xmm12,xmm7
+
+	pxor	xmm9,xmm12
+	pxor	xmm8,xmm12
+
+	pand	xmm9,xmm7
+
+	pxor	xmm13,xmm9
+	pxor	xmm8,xmm9
+
+	pand	xmm13,xmm14
+
+	pxor	xmm13,xmm11
+	movdqa	xmm11,xmm4
+	movdqa	xmm7,xmm0
+	movdqa	xmm9,xmm14
+	pxor	xmm9,xmm13
+	pand	xmm9,xmm4
+	pxor	xmm4,xmm0
+	pand	xmm0,xmm14
+	pand	xmm4,xmm13
+	pxor	xmm4,xmm0
+	pxor	xmm0,xmm9
+	pxor	xmm11,xmm1
+	pxor	xmm7,xmm5
+	pxor	xmm14,xmm12
+	pxor	xmm13,xmm8
+	movdqa	xmm10,xmm14
+	movdqa	xmm9,xmm12
+	pxor	xmm10,xmm13
+	pxor	xmm9,xmm8
+	pand	xmm10,xmm11
+	pand	xmm9,xmm1
+	pxor	xmm11,xmm7
+	pxor	xmm1,xmm5
+	pand	xmm7,xmm14
+	pand	xmm5,xmm12
+	pand	xmm11,xmm13
+	pand	xmm1,xmm8
+	pxor	xmm7,xmm11
+	pxor	xmm1,xmm5
+	pxor	xmm11,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm11
+	pxor	xmm1,xmm11
+	pxor	xmm0,xmm7
+	pxor	xmm5,xmm7
+
+	movdqa	xmm11,xmm2
+	movdqa	xmm7,xmm6
+	pxor	xmm11,xmm15
+	pxor	xmm7,xmm3
+	movdqa	xmm10,xmm14
+	movdqa	xmm9,xmm12
+	pxor	xmm10,xmm13
+	pxor	xmm9,xmm8
+	pand	xmm10,xmm11
+	pand	xmm9,xmm15
+	pxor	xmm11,xmm7
+	pxor	xmm15,xmm3
+	pand	xmm7,xmm14
+	pand	xmm3,xmm12
+	pand	xmm11,xmm13
+	pand	xmm15,xmm8
+	pxor	xmm7,xmm11
+	pxor	xmm15,xmm3
+	pxor	xmm11,xmm10
+	pxor	xmm3,xmm9
+	pxor	xmm14,xmm12
+	pxor	xmm13,xmm8
+	movdqa	xmm10,xmm14
+	pxor	xmm10,xmm13
+	pand	xmm10,xmm2
+	pxor	xmm2,xmm6
+	pand	xmm6,xmm14
+	pand	xmm2,xmm13
+	pxor	xmm2,xmm6
+	pxor	xmm6,xmm10
+	pxor	xmm2,xmm11
+	pxor	xmm15,xmm11
+	pxor	xmm6,xmm7
+	pxor	xmm3,xmm7
+	pxor	xmm0,xmm6
+	pxor	xmm5,xmm4
+
+	pxor	xmm3,xmm0
+	pxor	xmm1,xmm6
+	pxor	xmm4,xmm6
+	pxor	xmm3,xmm1
+	pxor	xmm6,xmm15
+	pxor	xmm3,xmm4
+	pxor	xmm2,xmm5
+	pxor	xmm5,xmm0
+	pxor	xmm2,xmm3
+
+	pxor	xmm3,xmm15
+	pxor	xmm6,xmm2
+	dec	r10d
+	jl	NEAR $L$dec_done
+
+	pshufd	xmm7,xmm15,0x4E
+	pshufd	xmm13,xmm2,0x4E
+	pxor	xmm7,xmm15
+	pshufd	xmm14,xmm4,0x4E
+	pxor	xmm13,xmm2
+	pshufd	xmm8,xmm0,0x4E
+	pxor	xmm14,xmm4
+	pshufd	xmm9,xmm5,0x4E
+	pxor	xmm8,xmm0
+	pshufd	xmm10,xmm3,0x4E
+	pxor	xmm9,xmm5
+	pxor	xmm15,xmm13
+	pxor	xmm0,xmm13
+	pshufd	xmm11,xmm1,0x4E
+	pxor	xmm10,xmm3
+	pxor	xmm5,xmm7
+	pxor	xmm3,xmm8
+	pshufd	xmm12,xmm6,0x4E
+	pxor	xmm11,xmm1
+	pxor	xmm0,xmm14
+	pxor	xmm1,xmm9
+	pxor	xmm12,xmm6
+
+	pxor	xmm5,xmm14
+	pxor	xmm3,xmm13
+	pxor	xmm1,xmm13
+	pxor	xmm6,xmm10
+	pxor	xmm2,xmm11
+	pxor	xmm1,xmm14
+	pxor	xmm6,xmm14
+	pxor	xmm4,xmm12
+	pshufd	xmm7,xmm15,0x93
+	pshufd	xmm8,xmm0,0x93
+	pxor	xmm15,xmm7
+	pshufd	xmm9,xmm5,0x93
+	pxor	xmm0,xmm8
+	pshufd	xmm10,xmm3,0x93
+	pxor	xmm5,xmm9
+	pshufd	xmm11,xmm1,0x93
+	pxor	xmm3,xmm10
+	pshufd	xmm12,xmm6,0x93
+	pxor	xmm1,xmm11
+	pshufd	xmm13,xmm2,0x93
+	pxor	xmm6,xmm12
+	pshufd	xmm14,xmm4,0x93
+	pxor	xmm2,xmm13
+	pxor	xmm4,xmm14
+
+	pxor	xmm8,xmm15
+	pxor	xmm7,xmm4
+	pxor	xmm8,xmm4
+	pshufd	xmm15,xmm15,0x4E
+	pxor	xmm9,xmm0
+	pshufd	xmm0,xmm0,0x4E
+	pxor	xmm12,xmm1
+	pxor	xmm15,xmm7
+	pxor	xmm13,xmm6
+	pxor	xmm0,xmm8
+	pxor	xmm11,xmm3
+	pshufd	xmm7,xmm1,0x4E
+	pxor	xmm14,xmm2
+	pshufd	xmm8,xmm6,0x4E
+	pxor	xmm10,xmm5
+	pshufd	xmm1,xmm3,0x4E
+	pxor	xmm10,xmm4
+	pshufd	xmm6,xmm4,0x4E
+	pxor	xmm11,xmm4
+	pshufd	xmm3,xmm2,0x4E
+	pxor	xmm7,xmm11
+	pshufd	xmm2,xmm5,0x4E
+	pxor	xmm8,xmm12
+	pxor	xmm10,xmm1
+	pxor	xmm6,xmm14
+	pxor	xmm13,xmm3
+	movdqa	xmm3,xmm7
+	pxor	xmm2,xmm9
+	movdqa	xmm5,xmm13
+	movdqa	xmm4,xmm8
+	movdqa	xmm1,xmm2
+	movdqa	xmm2,xmm10
+	movdqa	xmm7,XMMWORD[((-16))+r11]
+	jnz	NEAR $L$dec_loop
+	movdqa	xmm7,XMMWORD[((-32))+r11]
+	jmp	NEAR $L$dec_loop
+ALIGN	16
+$L$dec_done:
+	movdqa	xmm7,XMMWORD[r11]
+	movdqa	xmm8,XMMWORD[16+r11]
+	movdqa	xmm9,xmm2
+	psrlq	xmm2,1
+	movdqa	xmm10,xmm1
+	psrlq	xmm1,1
+	pxor	xmm2,xmm4
+	pxor	xmm1,xmm6
+	pand	xmm2,xmm7
+	pand	xmm1,xmm7
+	pxor	xmm4,xmm2
+	psllq	xmm2,1
+	pxor	xmm6,xmm1
+	psllq	xmm1,1
+	pxor	xmm2,xmm9
+	pxor	xmm1,xmm10
+	movdqa	xmm9,xmm5
+	psrlq	xmm5,1
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,1
+	pxor	xmm5,xmm3
+	pxor	xmm15,xmm0
+	pand	xmm5,xmm7
+	pand	xmm15,xmm7
+	pxor	xmm3,xmm5
+	psllq	xmm5,1
+	pxor	xmm0,xmm15
+	psllq	xmm15,1
+	pxor	xmm5,xmm9
+	pxor	xmm15,xmm10
+	movdqa	xmm7,XMMWORD[32+r11]
+	movdqa	xmm9,xmm6
+	psrlq	xmm6,2
+	movdqa	xmm10,xmm1
+	psrlq	xmm1,2
+	pxor	xmm6,xmm4
+	pxor	xmm1,xmm2
+	pand	xmm6,xmm8
+	pand	xmm1,xmm8
+	pxor	xmm4,xmm6
+	psllq	xmm6,2
+	pxor	xmm2,xmm1
+	psllq	xmm1,2
+	pxor	xmm6,xmm9
+	pxor	xmm1,xmm10
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,2
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,2
+	pxor	xmm0,xmm3
+	pxor	xmm15,xmm5
+	pand	xmm0,xmm8
+	pand	xmm15,xmm8
+	pxor	xmm3,xmm0
+	psllq	xmm0,2
+	pxor	xmm5,xmm15
+	psllq	xmm15,2
+	pxor	xmm0,xmm9
+	pxor	xmm15,xmm10
+	movdqa	xmm9,xmm3
+	psrlq	xmm3,4
+	movdqa	xmm10,xmm5
+	psrlq	xmm5,4
+	pxor	xmm3,xmm4
+	pxor	xmm5,xmm2
+	pand	xmm3,xmm7
+	pand	xmm5,xmm7
+	pxor	xmm4,xmm3
+	psllq	xmm3,4
+	pxor	xmm2,xmm5
+	psllq	xmm5,4
+	pxor	xmm3,xmm9
+	pxor	xmm5,xmm10
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,4
+	movdqa	xmm10,xmm15
+	psrlq	xmm15,4
+	pxor	xmm0,xmm6
+	pxor	xmm15,xmm1
+	pand	xmm0,xmm7
+	pand	xmm15,xmm7
+	pxor	xmm6,xmm0
+	psllq	xmm0,4
+	pxor	xmm1,xmm15
+	psllq	xmm15,4
+	pxor	xmm0,xmm9
+	pxor	xmm15,xmm10
+	movdqa	xmm7,XMMWORD[rax]
+	pxor	xmm5,xmm7
+	pxor	xmm3,xmm7
+	pxor	xmm1,xmm7
+	pxor	xmm6,xmm7
+	pxor	xmm2,xmm7
+	pxor	xmm4,xmm7
+	pxor	xmm15,xmm7
+	pxor	xmm0,xmm7
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	16
+_bsaes_key_convert:
+	lea	r11,[$L$masks]
+	movdqu	xmm7,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	movdqa	xmm0,XMMWORD[r11]
+	movdqa	xmm1,XMMWORD[16+r11]
+	movdqa	xmm2,XMMWORD[32+r11]
+	movdqa	xmm3,XMMWORD[48+r11]
+	movdqa	xmm4,XMMWORD[64+r11]
+	pcmpeqd	xmm5,xmm5
+
+	movdqu	xmm6,XMMWORD[rcx]
+	movdqa	XMMWORD[rax],xmm7
+	lea	rax,[16+rax]
+	dec	r10d
+	jmp	NEAR $L$key_loop
+ALIGN	16
+$L$key_loop:
+DB	102,15,56,0,244
+
+	movdqa	xmm8,xmm0
+	movdqa	xmm9,xmm1
+
+	pand	xmm8,xmm6
+	pand	xmm9,xmm6
+	movdqa	xmm10,xmm2
+	pcmpeqb	xmm8,xmm0
+	psllq	xmm0,4
+	movdqa	xmm11,xmm3
+	pcmpeqb	xmm9,xmm1
+	psllq	xmm1,4
+
+	pand	xmm10,xmm6
+	pand	xmm11,xmm6
+	movdqa	xmm12,xmm0
+	pcmpeqb	xmm10,xmm2
+	psllq	xmm2,4
+	movdqa	xmm13,xmm1
+	pcmpeqb	xmm11,xmm3
+	psllq	xmm3,4
+
+	movdqa	xmm14,xmm2
+	movdqa	xmm15,xmm3
+	pxor	xmm8,xmm5
+	pxor	xmm9,xmm5
+
+	pand	xmm12,xmm6
+	pand	xmm13,xmm6
+	movdqa	XMMWORD[rax],xmm8
+	pcmpeqb	xmm12,xmm0
+	psrlq	xmm0,4
+	movdqa	XMMWORD[16+rax],xmm9
+	pcmpeqb	xmm13,xmm1
+	psrlq	xmm1,4
+	lea	rcx,[16+rcx]
+
+	pand	xmm14,xmm6
+	pand	xmm15,xmm6
+	movdqa	XMMWORD[32+rax],xmm10
+	pcmpeqb	xmm14,xmm2
+	psrlq	xmm2,4
+	movdqa	XMMWORD[48+rax],xmm11
+	pcmpeqb	xmm15,xmm3
+	psrlq	xmm3,4
+	movdqu	xmm6,XMMWORD[rcx]
+
+	pxor	xmm13,xmm5
+	pxor	xmm14,xmm5
+	movdqa	XMMWORD[64+rax],xmm12
+	movdqa	XMMWORD[80+rax],xmm13
+	movdqa	XMMWORD[96+rax],xmm14
+	movdqa	XMMWORD[112+rax],xmm15
+	lea	rax,[128+rax]
+	dec	r10d
+	jnz	NEAR $L$key_loop
+
+	movdqa	xmm7,XMMWORD[80+r11]
+
+	DB	0F3h,0C3h		;repret
+
+EXTERN	asm_AES_cbc_encrypt
+global	bsaes_cbc_encrypt
+
+ALIGN	16
+bsaes_cbc_encrypt:
+	mov	r11d,DWORD[48+rsp]
+	cmp	r11d,0
+	jne	NEAR asm_AES_cbc_encrypt
+	cmp	r8,128
+	jb	NEAR asm_AES_cbc_encrypt
+
+	mov	rax,rsp
+$L$cbc_dec_prologue:
+	push	rbp
+	push	rbx
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	lea	rsp,[((-72))+rsp]
+	mov	r10,QWORD[160+rsp]
+	lea	rsp,[((-160))+rsp]
+	movaps	XMMWORD[64+rsp],xmm6
+	movaps	XMMWORD[80+rsp],xmm7
+	movaps	XMMWORD[96+rsp],xmm8
+	movaps	XMMWORD[112+rsp],xmm9
+	movaps	XMMWORD[128+rsp],xmm10
+	movaps	XMMWORD[144+rsp],xmm11
+	movaps	XMMWORD[160+rsp],xmm12
+	movaps	XMMWORD[176+rsp],xmm13
+	movaps	XMMWORD[192+rsp],xmm14
+	movaps	XMMWORD[208+rsp],xmm15
+$L$cbc_dec_body:
+	mov	rbp,rsp
+	mov	eax,DWORD[240+r9]
+	mov	r12,rcx
+	mov	r13,rdx
+	mov	r14,r8
+	mov	r15,r9
+	mov	rbx,r10
+	shr	r14,4
+
+	mov	edx,eax
+	shl	rax,7
+	sub	rax,96
+	sub	rsp,rax
+
+	mov	rax,rsp
+	mov	rcx,r15
+	mov	r10d,edx
+	call	_bsaes_key_convert
+	pxor	xmm7,XMMWORD[rsp]
+	movdqa	XMMWORD[rax],xmm6
+	movdqa	XMMWORD[rsp],xmm7
+
+	movdqu	xmm14,XMMWORD[rbx]
+	sub	r14,8
+$L$cbc_dec_loop:
+	movdqu	xmm15,XMMWORD[r12]
+	movdqu	xmm0,XMMWORD[16+r12]
+	movdqu	xmm1,XMMWORD[32+r12]
+	movdqu	xmm2,XMMWORD[48+r12]
+	movdqu	xmm3,XMMWORD[64+r12]
+	movdqu	xmm4,XMMWORD[80+r12]
+	mov	rax,rsp
+	movdqu	xmm5,XMMWORD[96+r12]
+	mov	r10d,edx
+	movdqu	xmm6,XMMWORD[112+r12]
+	movdqa	XMMWORD[32+rbp],xmm14
+
+	call	_bsaes_decrypt8
+
+	pxor	xmm15,XMMWORD[32+rbp]
+	movdqu	xmm7,XMMWORD[r12]
+	movdqu	xmm8,XMMWORD[16+r12]
+	pxor	xmm0,xmm7
+	movdqu	xmm9,XMMWORD[32+r12]
+	pxor	xmm5,xmm8
+	movdqu	xmm10,XMMWORD[48+r12]
+	pxor	xmm3,xmm9
+	movdqu	xmm11,XMMWORD[64+r12]
+	pxor	xmm1,xmm10
+	movdqu	xmm12,XMMWORD[80+r12]
+	pxor	xmm6,xmm11
+	movdqu	xmm13,XMMWORD[96+r12]
+	pxor	xmm2,xmm12
+	movdqu	xmm14,XMMWORD[112+r12]
+	pxor	xmm4,xmm13
+	movdqu	XMMWORD[r13],xmm15
+	lea	r12,[128+r12]
+	movdqu	XMMWORD[16+r13],xmm0
+	movdqu	XMMWORD[32+r13],xmm5
+	movdqu	XMMWORD[48+r13],xmm3
+	movdqu	XMMWORD[64+r13],xmm1
+	movdqu	XMMWORD[80+r13],xmm6
+	movdqu	XMMWORD[96+r13],xmm2
+	movdqu	XMMWORD[112+r13],xmm4
+	lea	r13,[128+r13]
+	sub	r14,8
+	jnc	NEAR $L$cbc_dec_loop
+
+	add	r14,8
+	jz	NEAR $L$cbc_dec_done
+
+	movdqu	xmm15,XMMWORD[r12]
+	mov	rax,rsp
+	mov	r10d,edx
+	cmp	r14,2
+	jb	NEAR $L$cbc_dec_one
+	movdqu	xmm0,XMMWORD[16+r12]
+	je	NEAR $L$cbc_dec_two
+	movdqu	xmm1,XMMWORD[32+r12]
+	cmp	r14,4
+	jb	NEAR $L$cbc_dec_three
+	movdqu	xmm2,XMMWORD[48+r12]
+	je	NEAR $L$cbc_dec_four
+	movdqu	xmm3,XMMWORD[64+r12]
+	cmp	r14,6
+	jb	NEAR $L$cbc_dec_five
+	movdqu	xmm4,XMMWORD[80+r12]
+	je	NEAR $L$cbc_dec_six
+	movdqu	xmm5,XMMWORD[96+r12]
+	movdqa	XMMWORD[32+rbp],xmm14
+	call	_bsaes_decrypt8
+	pxor	xmm15,XMMWORD[32+rbp]
+	movdqu	xmm7,XMMWORD[r12]
+	movdqu	xmm8,XMMWORD[16+r12]
+	pxor	xmm0,xmm7
+	movdqu	xmm9,XMMWORD[32+r12]
+	pxor	xmm5,xmm8
+	movdqu	xmm10,XMMWORD[48+r12]
+	pxor	xmm3,xmm9
+	movdqu	xmm11,XMMWORD[64+r12]
+	pxor	xmm1,xmm10
+	movdqu	xmm12,XMMWORD[80+r12]
+	pxor	xmm6,xmm11
+	movdqu	xmm14,XMMWORD[96+r12]
+	pxor	xmm2,xmm12
+	movdqu	XMMWORD[r13],xmm15
+	movdqu	XMMWORD[16+r13],xmm0
+	movdqu	XMMWORD[32+r13],xmm5
+	movdqu	XMMWORD[48+r13],xmm3
+	movdqu	XMMWORD[64+r13],xmm1
+	movdqu	XMMWORD[80+r13],xmm6
+	movdqu	XMMWORD[96+r13],xmm2
+	jmp	NEAR $L$cbc_dec_done
+ALIGN	16
+$L$cbc_dec_six:
+	movdqa	XMMWORD[32+rbp],xmm14
+	call	_bsaes_decrypt8
+	pxor	xmm15,XMMWORD[32+rbp]
+	movdqu	xmm7,XMMWORD[r12]
+	movdqu	xmm8,XMMWORD[16+r12]
+	pxor	xmm0,xmm7
+	movdqu	xmm9,XMMWORD[32+r12]
+	pxor	xmm5,xmm8
+	movdqu	xmm10,XMMWORD[48+r12]
+	pxor	xmm3,xmm9
+	movdqu	xmm11,XMMWORD[64+r12]
+	pxor	xmm1,xmm10
+	movdqu	xmm14,XMMWORD[80+r12]
+	pxor	xmm6,xmm11
+	movdqu	XMMWORD[r13],xmm15
+	movdqu	XMMWORD[16+r13],xmm0
+	movdqu	XMMWORD[32+r13],xmm5
+	movdqu	XMMWORD[48+r13],xmm3
+	movdqu	XMMWORD[64+r13],xmm1
+	movdqu	XMMWORD[80+r13],xmm6
+	jmp	NEAR $L$cbc_dec_done
+ALIGN	16
+$L$cbc_dec_five:
+	movdqa	XMMWORD[32+rbp],xmm14
+	call	_bsaes_decrypt8
+	pxor	xmm15,XMMWORD[32+rbp]
+	movdqu	xmm7,XMMWORD[r12]
+	movdqu	xmm8,XMMWORD[16+r12]
+	pxor	xmm0,xmm7
+	movdqu	xmm9,XMMWORD[32+r12]
+	pxor	xmm5,xmm8
+	movdqu	xmm10,XMMWORD[48+r12]
+	pxor	xmm3,xmm9
+	movdqu	xmm14,XMMWORD[64+r12]
+	pxor	xmm1,xmm10
+	movdqu	XMMWORD[r13],xmm15
+	movdqu	XMMWORD[16+r13],xmm0
+	movdqu	XMMWORD[32+r13],xmm5
+	movdqu	XMMWORD[48+r13],xmm3
+	movdqu	XMMWORD[64+r13],xmm1
+	jmp	NEAR $L$cbc_dec_done
+ALIGN	16
+$L$cbc_dec_four:
+	movdqa	XMMWORD[32+rbp],xmm14
+	call	_bsaes_decrypt8
+	pxor	xmm15,XMMWORD[32+rbp]
+	movdqu	xmm7,XMMWORD[r12]
+	movdqu	xmm8,XMMWORD[16+r12]
+	pxor	xmm0,xmm7
+	movdqu	xmm9,XMMWORD[32+r12]
+	pxor	xmm5,xmm8
+	movdqu	xmm14,XMMWORD[48+r12]
+	pxor	xmm3,xmm9
+	movdqu	XMMWORD[r13],xmm15
+	movdqu	XMMWORD[16+r13],xmm0
+	movdqu	XMMWORD[32+r13],xmm5
+	movdqu	XMMWORD[48+r13],xmm3
+	jmp	NEAR $L$cbc_dec_done
+ALIGN	16
+$L$cbc_dec_three:
+	movdqa	XMMWORD[32+rbp],xmm14
+	call	_bsaes_decrypt8
+	pxor	xmm15,XMMWORD[32+rbp]
+	movdqu	xmm7,XMMWORD[r12]
+	movdqu	xmm8,XMMWORD[16+r12]
+	pxor	xmm0,xmm7
+	movdqu	xmm14,XMMWORD[32+r12]
+	pxor	xmm5,xmm8
+	movdqu	XMMWORD[r13],xmm15
+	movdqu	XMMWORD[16+r13],xmm0
+	movdqu	XMMWORD[32+r13],xmm5
+	jmp	NEAR $L$cbc_dec_done
+ALIGN	16
+$L$cbc_dec_two:
+	movdqa	XMMWORD[32+rbp],xmm14
+	call	_bsaes_decrypt8
+	pxor	xmm15,XMMWORD[32+rbp]
+	movdqu	xmm7,XMMWORD[r12]
+	movdqu	xmm14,XMMWORD[16+r12]
+	pxor	xmm0,xmm7
+	movdqu	XMMWORD[r13],xmm15
+	movdqu	XMMWORD[16+r13],xmm0
+	jmp	NEAR $L$cbc_dec_done
+ALIGN	16
+$L$cbc_dec_one:
+	lea	rcx,[r12]
+	lea	rdx,[32+rbp]
+	lea	r8,[r15]
+	call	asm_AES_decrypt
+	pxor	xmm14,XMMWORD[32+rbp]
+	movdqu	XMMWORD[r13],xmm14
+	movdqa	xmm14,xmm15
+
+$L$cbc_dec_done:
+	movdqu	XMMWORD[rbx],xmm14
+	lea	rax,[rsp]
+	pxor	xmm0,xmm0
+$L$cbc_dec_bzero:
+	movdqa	XMMWORD[rax],xmm0
+	movdqa	XMMWORD[16+rax],xmm0
+	lea	rax,[32+rax]
+	cmp	rbp,rax
+	ja	NEAR $L$cbc_dec_bzero
+
+	lea	rax,[120+rbp]
+	movaps	xmm6,XMMWORD[64+rbp]
+	movaps	xmm7,XMMWORD[80+rbp]
+	movaps	xmm8,XMMWORD[96+rbp]
+	movaps	xmm9,XMMWORD[112+rbp]
+	movaps	xmm10,XMMWORD[128+rbp]
+	movaps	xmm11,XMMWORD[144+rbp]
+	movaps	xmm12,XMMWORD[160+rbp]
+	movaps	xmm13,XMMWORD[176+rbp]
+	movaps	xmm14,XMMWORD[192+rbp]
+	movaps	xmm15,XMMWORD[208+rbp]
+	lea	rax,[160+rax]
+$L$cbc_dec_tail:
+	mov	r15,QWORD[((-48))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	rbx,QWORD[((-16))+rax]
+	mov	rbp,QWORD[((-8))+rax]
+	lea	rsp,[rax]
+$L$cbc_dec_epilogue:
+	DB	0F3h,0C3h		;repret
+
+
+global	bsaes_ctr32_encrypt_blocks
+
+ALIGN	16
+bsaes_ctr32_encrypt_blocks:
+	mov	rax,rsp
+$L$ctr_enc_prologue:
+	push	rbp
+	push	rbx
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	lea	rsp,[((-72))+rsp]
+	mov	r10,QWORD[160+rsp]
+	lea	rsp,[((-160))+rsp]
+	movaps	XMMWORD[64+rsp],xmm6
+	movaps	XMMWORD[80+rsp],xmm7
+	movaps	XMMWORD[96+rsp],xmm8
+	movaps	XMMWORD[112+rsp],xmm9
+	movaps	XMMWORD[128+rsp],xmm10
+	movaps	XMMWORD[144+rsp],xmm11
+	movaps	XMMWORD[160+rsp],xmm12
+	movaps	XMMWORD[176+rsp],xmm13
+	movaps	XMMWORD[192+rsp],xmm14
+	movaps	XMMWORD[208+rsp],xmm15
+$L$ctr_enc_body:
+	mov	rbp,rsp
+	movdqu	xmm0,XMMWORD[r10]
+	mov	eax,DWORD[240+r9]
+	mov	r12,rcx
+	mov	r13,rdx
+	mov	r14,r8
+	mov	r15,r9
+	movdqa	XMMWORD[32+rbp],xmm0
+	cmp	r8,8
+	jb	NEAR $L$ctr_enc_short
+
+	mov	ebx,eax
+	shl	rax,7
+	sub	rax,96
+	sub	rsp,rax
+
+	mov	rax,rsp
+	mov	rcx,r15
+	mov	r10d,ebx
+	call	_bsaes_key_convert
+	pxor	xmm7,xmm6
+	movdqa	XMMWORD[rax],xmm7
+
+	movdqa	xmm8,XMMWORD[rsp]
+	lea	r11,[$L$ADD1]
+	movdqa	xmm15,XMMWORD[32+rbp]
+	movdqa	xmm7,XMMWORD[((-32))+r11]
+DB	102,68,15,56,0,199
+DB	102,68,15,56,0,255
+	movdqa	XMMWORD[rsp],xmm8
+	jmp	NEAR $L$ctr_enc_loop
+ALIGN	16
+$L$ctr_enc_loop:
+	movdqa	XMMWORD[32+rbp],xmm15
+	movdqa	xmm0,xmm15
+	movdqa	xmm1,xmm15
+	paddd	xmm0,XMMWORD[r11]
+	movdqa	xmm2,xmm15
+	paddd	xmm1,XMMWORD[16+r11]
+	movdqa	xmm3,xmm15
+	paddd	xmm2,XMMWORD[32+r11]
+	movdqa	xmm4,xmm15
+	paddd	xmm3,XMMWORD[48+r11]
+	movdqa	xmm5,xmm15
+	paddd	xmm4,XMMWORD[64+r11]
+	movdqa	xmm6,xmm15
+	paddd	xmm5,XMMWORD[80+r11]
+	paddd	xmm6,XMMWORD[96+r11]
+
+
+
+	movdqa	xmm8,XMMWORD[rsp]
+	lea	rax,[16+rsp]
+	movdqa	xmm7,XMMWORD[((-16))+r11]
+	pxor	xmm15,xmm8
+	pxor	xmm0,xmm8
+	pxor	xmm1,xmm8
+	pxor	xmm2,xmm8
+DB	102,68,15,56,0,255
+DB	102,15,56,0,199
+	pxor	xmm3,xmm8
+	pxor	xmm4,xmm8
+DB	102,15,56,0,207
+DB	102,15,56,0,215
+	pxor	xmm5,xmm8
+	pxor	xmm6,xmm8
+DB	102,15,56,0,223
+DB	102,15,56,0,231
+DB	102,15,56,0,239
+DB	102,15,56,0,247
+	lea	r11,[$L$BS0]
+	mov	r10d,ebx
+
+	call	_bsaes_encrypt8_bitslice
+
+	sub	r14,8
+	jc	NEAR $L$ctr_enc_loop_done
+
+	movdqu	xmm7,XMMWORD[r12]
+	movdqu	xmm8,XMMWORD[16+r12]
+	movdqu	xmm9,XMMWORD[32+r12]
+	movdqu	xmm10,XMMWORD[48+r12]
+	movdqu	xmm11,XMMWORD[64+r12]
+	movdqu	xmm12,XMMWORD[80+r12]
+	movdqu	xmm13,XMMWORD[96+r12]
+	movdqu	xmm14,XMMWORD[112+r12]
+	lea	r12,[128+r12]
+	pxor	xmm7,xmm15
+	movdqa	xmm15,XMMWORD[32+rbp]
+	pxor	xmm0,xmm8
+	movdqu	XMMWORD[r13],xmm7
+	pxor	xmm3,xmm9
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm5,xmm10
+	movdqu	XMMWORD[32+r13],xmm3
+	pxor	xmm2,xmm11
+	movdqu	XMMWORD[48+r13],xmm5
+	pxor	xmm6,xmm12
+	movdqu	XMMWORD[64+r13],xmm2
+	pxor	xmm1,xmm13
+	movdqu	XMMWORD[80+r13],xmm6
+	pxor	xmm4,xmm14
+	movdqu	XMMWORD[96+r13],xmm1
+	lea	r11,[$L$ADD1]
+	movdqu	XMMWORD[112+r13],xmm4
+	lea	r13,[128+r13]
+	paddd	xmm15,XMMWORD[112+r11]
+	jnz	NEAR $L$ctr_enc_loop
+
+	jmp	NEAR $L$ctr_enc_done
+ALIGN	16
+$L$ctr_enc_loop_done:
+	add	r14,8
+	movdqu	xmm7,XMMWORD[r12]
+	pxor	xmm15,xmm7
+	movdqu	XMMWORD[r13],xmm15
+	cmp	r14,2
+	jb	NEAR $L$ctr_enc_done
+	movdqu	xmm8,XMMWORD[16+r12]
+	pxor	xmm0,xmm8
+	movdqu	XMMWORD[16+r13],xmm0
+	je	NEAR $L$ctr_enc_done
+	movdqu	xmm9,XMMWORD[32+r12]
+	pxor	xmm3,xmm9
+	movdqu	XMMWORD[32+r13],xmm3
+	cmp	r14,4
+	jb	NEAR $L$ctr_enc_done
+	movdqu	xmm10,XMMWORD[48+r12]
+	pxor	xmm5,xmm10
+	movdqu	XMMWORD[48+r13],xmm5
+	je	NEAR $L$ctr_enc_done
+	movdqu	xmm11,XMMWORD[64+r12]
+	pxor	xmm2,xmm11
+	movdqu	XMMWORD[64+r13],xmm2
+	cmp	r14,6
+	jb	NEAR $L$ctr_enc_done
+	movdqu	xmm12,XMMWORD[80+r12]
+	pxor	xmm6,xmm12
+	movdqu	XMMWORD[80+r13],xmm6
+	je	NEAR $L$ctr_enc_done
+	movdqu	xmm13,XMMWORD[96+r12]
+	pxor	xmm1,xmm13
+	movdqu	XMMWORD[96+r13],xmm1
+	jmp	NEAR $L$ctr_enc_done
+
+ALIGN	16
+$L$ctr_enc_short:
+	lea	rcx,[32+rbp]
+	lea	rdx,[48+rbp]
+	lea	r8,[r15]
+	call	asm_AES_encrypt
+	movdqu	xmm0,XMMWORD[r12]
+	lea	r12,[16+r12]
+	mov	eax,DWORD[44+rbp]
+	bswap	eax
+	pxor	xmm0,XMMWORD[48+rbp]
+	inc	eax
+	movdqu	XMMWORD[r13],xmm0
+	bswap	eax
+	lea	r13,[16+r13]
+	mov	DWORD[44+rsp],eax
+	dec	r14
+	jnz	NEAR $L$ctr_enc_short
+
+$L$ctr_enc_done:
+	lea	rax,[rsp]
+	pxor	xmm0,xmm0
+$L$ctr_enc_bzero:
+	movdqa	XMMWORD[rax],xmm0
+	movdqa	XMMWORD[16+rax],xmm0
+	lea	rax,[32+rax]
+	cmp	rbp,rax
+	ja	NEAR $L$ctr_enc_bzero
+
+	lea	rax,[120+rbp]
+	movaps	xmm6,XMMWORD[64+rbp]
+	movaps	xmm7,XMMWORD[80+rbp]
+	movaps	xmm8,XMMWORD[96+rbp]
+	movaps	xmm9,XMMWORD[112+rbp]
+	movaps	xmm10,XMMWORD[128+rbp]
+	movaps	xmm11,XMMWORD[144+rbp]
+	movaps	xmm12,XMMWORD[160+rbp]
+	movaps	xmm13,XMMWORD[176+rbp]
+	movaps	xmm14,XMMWORD[192+rbp]
+	movaps	xmm15,XMMWORD[208+rbp]
+	lea	rax,[160+rax]
+$L$ctr_enc_tail:
+	mov	r15,QWORD[((-48))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	rbx,QWORD[((-16))+rax]
+	mov	rbp,QWORD[((-8))+rax]
+	lea	rsp,[rax]
+$L$ctr_enc_epilogue:
+	DB	0F3h,0C3h		;repret
+
+global	bsaes_xts_encrypt
+
+ALIGN	16
+bsaes_xts_encrypt:
+	mov	rax,rsp
+$L$xts_enc_prologue:
+	push	rbp
+	push	rbx
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	lea	rsp,[((-72))+rsp]
+	mov	r10,QWORD[160+rsp]
+	mov	r11,QWORD[168+rsp]
+	lea	rsp,[((-160))+rsp]
+	movaps	XMMWORD[64+rsp],xmm6
+	movaps	XMMWORD[80+rsp],xmm7
+	movaps	XMMWORD[96+rsp],xmm8
+	movaps	XMMWORD[112+rsp],xmm9
+	movaps	XMMWORD[128+rsp],xmm10
+	movaps	XMMWORD[144+rsp],xmm11
+	movaps	XMMWORD[160+rsp],xmm12
+	movaps	XMMWORD[176+rsp],xmm13
+	movaps	XMMWORD[192+rsp],xmm14
+	movaps	XMMWORD[208+rsp],xmm15
+$L$xts_enc_body:
+	mov	rbp,rsp
+	mov	r12,rcx
+	mov	r13,rdx
+	mov	r14,r8
+	mov	r15,r9
+
+	lea	rcx,[r11]
+	lea	rdx,[32+rbp]
+	lea	r8,[r10]
+	call	asm_AES_encrypt
+
+	mov	eax,DWORD[240+r15]
+	mov	rbx,r14
+
+	mov	edx,eax
+	shl	rax,7
+	sub	rax,96
+	sub	rsp,rax
+
+	mov	rax,rsp
+	mov	rcx,r15
+	mov	r10d,edx
+	call	_bsaes_key_convert
+	pxor	xmm7,xmm6
+	movdqa	XMMWORD[rax],xmm7
+
+	and	r14,-16
+	sub	rsp,0x80
+	movdqa	xmm6,XMMWORD[32+rbp]
+
+	pxor	xmm14,xmm14
+	movdqa	xmm12,XMMWORD[$L$xts_magic]
+	pcmpgtd	xmm14,xmm6
+
+	sub	r14,0x80
+	jc	NEAR $L$xts_enc_short
+	jmp	NEAR $L$xts_enc_loop
+
+ALIGN	16
+$L$xts_enc_loop:
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm15,xmm6
+	movdqa	XMMWORD[rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm0,xmm6
+	movdqa	XMMWORD[16+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm7,XMMWORD[r12]
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm1,xmm6
+	movdqa	XMMWORD[32+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm8,XMMWORD[16+r12]
+	pxor	xmm15,xmm7
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm2,xmm6
+	movdqa	XMMWORD[48+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm9,XMMWORD[32+r12]
+	pxor	xmm0,xmm8
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm3,xmm6
+	movdqa	XMMWORD[64+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm10,XMMWORD[48+r12]
+	pxor	xmm1,xmm9
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm4,xmm6
+	movdqa	XMMWORD[80+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm11,XMMWORD[64+r12]
+	pxor	xmm2,xmm10
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm5,xmm6
+	movdqa	XMMWORD[96+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm12,XMMWORD[80+r12]
+	pxor	xmm3,xmm11
+	movdqu	xmm13,XMMWORD[96+r12]
+	pxor	xmm4,xmm12
+	movdqu	xmm14,XMMWORD[112+r12]
+	lea	r12,[128+r12]
+	movdqa	XMMWORD[112+rsp],xmm6
+	pxor	xmm5,xmm13
+	lea	rax,[128+rsp]
+	pxor	xmm6,xmm14
+	mov	r10d,edx
+
+	call	_bsaes_encrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm3,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm5,XMMWORD[48+rsp]
+	movdqu	XMMWORD[32+r13],xmm3
+	pxor	xmm2,XMMWORD[64+rsp]
+	movdqu	XMMWORD[48+r13],xmm5
+	pxor	xmm6,XMMWORD[80+rsp]
+	movdqu	XMMWORD[64+r13],xmm2
+	pxor	xmm1,XMMWORD[96+rsp]
+	movdqu	XMMWORD[80+r13],xmm6
+	pxor	xmm4,XMMWORD[112+rsp]
+	movdqu	XMMWORD[96+r13],xmm1
+	movdqu	XMMWORD[112+r13],xmm4
+	lea	r13,[128+r13]
+
+	movdqa	xmm6,XMMWORD[112+rsp]
+	pxor	xmm14,xmm14
+	movdqa	xmm12,XMMWORD[$L$xts_magic]
+	pcmpgtd	xmm14,xmm6
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+
+	sub	r14,0x80
+	jnc	NEAR $L$xts_enc_loop
+
+$L$xts_enc_short:
+	add	r14,0x80
+	jz	NEAR $L$xts_enc_done
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm15,xmm6
+	movdqa	XMMWORD[rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm0,xmm6
+	movdqa	XMMWORD[16+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm7,XMMWORD[r12]
+	cmp	r14,16
+	je	NEAR $L$xts_enc_1
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm1,xmm6
+	movdqa	XMMWORD[32+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm8,XMMWORD[16+r12]
+	cmp	r14,32
+	je	NEAR $L$xts_enc_2
+	pxor	xmm15,xmm7
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm2,xmm6
+	movdqa	XMMWORD[48+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm9,XMMWORD[32+r12]
+	cmp	r14,48
+	je	NEAR $L$xts_enc_3
+	pxor	xmm0,xmm8
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm3,xmm6
+	movdqa	XMMWORD[64+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm10,XMMWORD[48+r12]
+	cmp	r14,64
+	je	NEAR $L$xts_enc_4
+	pxor	xmm1,xmm9
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm4,xmm6
+	movdqa	XMMWORD[80+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm11,XMMWORD[64+r12]
+	cmp	r14,80
+	je	NEAR $L$xts_enc_5
+	pxor	xmm2,xmm10
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm5,xmm6
+	movdqa	XMMWORD[96+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm12,XMMWORD[80+r12]
+	cmp	r14,96
+	je	NEAR $L$xts_enc_6
+	pxor	xmm3,xmm11
+	movdqu	xmm13,XMMWORD[96+r12]
+	pxor	xmm4,xmm12
+	movdqa	XMMWORD[112+rsp],xmm6
+	lea	r12,[112+r12]
+	pxor	xmm5,xmm13
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_encrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm3,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm5,XMMWORD[48+rsp]
+	movdqu	XMMWORD[32+r13],xmm3
+	pxor	xmm2,XMMWORD[64+rsp]
+	movdqu	XMMWORD[48+r13],xmm5
+	pxor	xmm6,XMMWORD[80+rsp]
+	movdqu	XMMWORD[64+r13],xmm2
+	pxor	xmm1,XMMWORD[96+rsp]
+	movdqu	XMMWORD[80+r13],xmm6
+	movdqu	XMMWORD[96+r13],xmm1
+	lea	r13,[112+r13]
+
+	movdqa	xmm6,XMMWORD[112+rsp]
+	jmp	NEAR $L$xts_enc_done
+ALIGN	16
+$L$xts_enc_6:
+	pxor	xmm3,xmm11
+	lea	r12,[96+r12]
+	pxor	xmm4,xmm12
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_encrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm3,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm5,XMMWORD[48+rsp]
+	movdqu	XMMWORD[32+r13],xmm3
+	pxor	xmm2,XMMWORD[64+rsp]
+	movdqu	XMMWORD[48+r13],xmm5
+	pxor	xmm6,XMMWORD[80+rsp]
+	movdqu	XMMWORD[64+r13],xmm2
+	movdqu	XMMWORD[80+r13],xmm6
+	lea	r13,[96+r13]
+
+	movdqa	xmm6,XMMWORD[96+rsp]
+	jmp	NEAR $L$xts_enc_done
+ALIGN	16
+$L$xts_enc_5:
+	pxor	xmm2,xmm10
+	lea	r12,[80+r12]
+	pxor	xmm3,xmm11
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_encrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm3,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm5,XMMWORD[48+rsp]
+	movdqu	XMMWORD[32+r13],xmm3
+	pxor	xmm2,XMMWORD[64+rsp]
+	movdqu	XMMWORD[48+r13],xmm5
+	movdqu	XMMWORD[64+r13],xmm2
+	lea	r13,[80+r13]
+
+	movdqa	xmm6,XMMWORD[80+rsp]
+	jmp	NEAR $L$xts_enc_done
+ALIGN	16
+$L$xts_enc_4:
+	pxor	xmm1,xmm9
+	lea	r12,[64+r12]
+	pxor	xmm2,xmm10
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_encrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm3,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm5,XMMWORD[48+rsp]
+	movdqu	XMMWORD[32+r13],xmm3
+	movdqu	XMMWORD[48+r13],xmm5
+	lea	r13,[64+r13]
+
+	movdqa	xmm6,XMMWORD[64+rsp]
+	jmp	NEAR $L$xts_enc_done
+ALIGN	16
+$L$xts_enc_3:
+	pxor	xmm0,xmm8
+	lea	r12,[48+r12]
+	pxor	xmm1,xmm9
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_encrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm3,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	movdqu	XMMWORD[32+r13],xmm3
+	lea	r13,[48+r13]
+
+	movdqa	xmm6,XMMWORD[48+rsp]
+	jmp	NEAR $L$xts_enc_done
+ALIGN	16
+$L$xts_enc_2:
+	pxor	xmm15,xmm7
+	lea	r12,[32+r12]
+	pxor	xmm0,xmm8
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_encrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	movdqu	XMMWORD[16+r13],xmm0
+	lea	r13,[32+r13]
+
+	movdqa	xmm6,XMMWORD[32+rsp]
+	jmp	NEAR $L$xts_enc_done
+ALIGN	16
+$L$xts_enc_1:
+	pxor	xmm7,xmm15
+	lea	r12,[16+r12]
+	movdqa	XMMWORD[32+rbp],xmm7
+	lea	rcx,[32+rbp]
+	lea	rdx,[32+rbp]
+	lea	r8,[r15]
+	call	asm_AES_encrypt
+	pxor	xmm15,XMMWORD[32+rbp]
+
+
+
+
+
+	movdqu	XMMWORD[r13],xmm15
+	lea	r13,[16+r13]
+
+	movdqa	xmm6,XMMWORD[16+rsp]
+
+$L$xts_enc_done:
+	and	ebx,15
+	jz	NEAR $L$xts_enc_ret
+	mov	rdx,r13
+
+$L$xts_enc_steal:
+	movzx	eax,BYTE[r12]
+	movzx	ecx,BYTE[((-16))+rdx]
+	lea	r12,[1+r12]
+	mov	BYTE[((-16))+rdx],al
+	mov	BYTE[rdx],cl
+	lea	rdx,[1+rdx]
+	sub	ebx,1
+	jnz	NEAR $L$xts_enc_steal
+
+	movdqu	xmm15,XMMWORD[((-16))+r13]
+	lea	rcx,[32+rbp]
+	pxor	xmm15,xmm6
+	lea	rdx,[32+rbp]
+	movdqa	XMMWORD[32+rbp],xmm15
+	lea	r8,[r15]
+	call	asm_AES_encrypt
+	pxor	xmm6,XMMWORD[32+rbp]
+	movdqu	XMMWORD[(-16)+r13],xmm6
+
+$L$xts_enc_ret:
+	lea	rax,[rsp]
+	pxor	xmm0,xmm0
+$L$xts_enc_bzero:
+	movdqa	XMMWORD[rax],xmm0
+	movdqa	XMMWORD[16+rax],xmm0
+	lea	rax,[32+rax]
+	cmp	rbp,rax
+	ja	NEAR $L$xts_enc_bzero
+
+	lea	rax,[120+rbp]
+	movaps	xmm6,XMMWORD[64+rbp]
+	movaps	xmm7,XMMWORD[80+rbp]
+	movaps	xmm8,XMMWORD[96+rbp]
+	movaps	xmm9,XMMWORD[112+rbp]
+	movaps	xmm10,XMMWORD[128+rbp]
+	movaps	xmm11,XMMWORD[144+rbp]
+	movaps	xmm12,XMMWORD[160+rbp]
+	movaps	xmm13,XMMWORD[176+rbp]
+	movaps	xmm14,XMMWORD[192+rbp]
+	movaps	xmm15,XMMWORD[208+rbp]
+	lea	rax,[160+rax]
+$L$xts_enc_tail:
+	mov	r15,QWORD[((-48))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	rbx,QWORD[((-16))+rax]
+	mov	rbp,QWORD[((-8))+rax]
+	lea	rsp,[rax]
+$L$xts_enc_epilogue:
+	DB	0F3h,0C3h		;repret
+
+
+global	bsaes_xts_decrypt
+
+ALIGN	16
+bsaes_xts_decrypt:
+	mov	rax,rsp
+$L$xts_dec_prologue:
+	push	rbp
+	push	rbx
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	lea	rsp,[((-72))+rsp]
+	mov	r10,QWORD[160+rsp]
+	mov	r11,QWORD[168+rsp]
+	lea	rsp,[((-160))+rsp]
+	movaps	XMMWORD[64+rsp],xmm6
+	movaps	XMMWORD[80+rsp],xmm7
+	movaps	XMMWORD[96+rsp],xmm8
+	movaps	XMMWORD[112+rsp],xmm9
+	movaps	XMMWORD[128+rsp],xmm10
+	movaps	XMMWORD[144+rsp],xmm11
+	movaps	XMMWORD[160+rsp],xmm12
+	movaps	XMMWORD[176+rsp],xmm13
+	movaps	XMMWORD[192+rsp],xmm14
+	movaps	XMMWORD[208+rsp],xmm15
+$L$xts_dec_body:
+	mov	rbp,rsp
+	mov	r12,rcx
+	mov	r13,rdx
+	mov	r14,r8
+	mov	r15,r9
+
+	lea	rcx,[r11]
+	lea	rdx,[32+rbp]
+	lea	r8,[r10]
+	call	asm_AES_encrypt
+
+	mov	eax,DWORD[240+r15]
+	mov	rbx,r14
+
+	mov	edx,eax
+	shl	rax,7
+	sub	rax,96
+	sub	rsp,rax
+
+	mov	rax,rsp
+	mov	rcx,r15
+	mov	r10d,edx
+	call	_bsaes_key_convert
+	pxor	xmm7,XMMWORD[rsp]
+	movdqa	XMMWORD[rax],xmm6
+	movdqa	XMMWORD[rsp],xmm7
+
+	xor	eax,eax
+	and	r14,-16
+	test	ebx,15
+	setnz	al
+	shl	rax,4
+	sub	r14,rax
+
+	sub	rsp,0x80
+	movdqa	xmm6,XMMWORD[32+rbp]
+
+	pxor	xmm14,xmm14
+	movdqa	xmm12,XMMWORD[$L$xts_magic]
+	pcmpgtd	xmm14,xmm6
+
+	sub	r14,0x80
+	jc	NEAR $L$xts_dec_short
+	jmp	NEAR $L$xts_dec_loop
+
+ALIGN	16
+$L$xts_dec_loop:
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm15,xmm6
+	movdqa	XMMWORD[rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm0,xmm6
+	movdqa	XMMWORD[16+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm7,XMMWORD[r12]
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm1,xmm6
+	movdqa	XMMWORD[32+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm8,XMMWORD[16+r12]
+	pxor	xmm15,xmm7
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm2,xmm6
+	movdqa	XMMWORD[48+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm9,XMMWORD[32+r12]
+	pxor	xmm0,xmm8
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm3,xmm6
+	movdqa	XMMWORD[64+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm10,XMMWORD[48+r12]
+	pxor	xmm1,xmm9
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm4,xmm6
+	movdqa	XMMWORD[80+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm11,XMMWORD[64+r12]
+	pxor	xmm2,xmm10
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm5,xmm6
+	movdqa	XMMWORD[96+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm12,XMMWORD[80+r12]
+	pxor	xmm3,xmm11
+	movdqu	xmm13,XMMWORD[96+r12]
+	pxor	xmm4,xmm12
+	movdqu	xmm14,XMMWORD[112+r12]
+	lea	r12,[128+r12]
+	movdqa	XMMWORD[112+rsp],xmm6
+	pxor	xmm5,xmm13
+	lea	rax,[128+rsp]
+	pxor	xmm6,xmm14
+	mov	r10d,edx
+
+	call	_bsaes_decrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm5,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm3,XMMWORD[48+rsp]
+	movdqu	XMMWORD[32+r13],xmm5
+	pxor	xmm1,XMMWORD[64+rsp]
+	movdqu	XMMWORD[48+r13],xmm3
+	pxor	xmm6,XMMWORD[80+rsp]
+	movdqu	XMMWORD[64+r13],xmm1
+	pxor	xmm2,XMMWORD[96+rsp]
+	movdqu	XMMWORD[80+r13],xmm6
+	pxor	xmm4,XMMWORD[112+rsp]
+	movdqu	XMMWORD[96+r13],xmm2
+	movdqu	XMMWORD[112+r13],xmm4
+	lea	r13,[128+r13]
+
+	movdqa	xmm6,XMMWORD[112+rsp]
+	pxor	xmm14,xmm14
+	movdqa	xmm12,XMMWORD[$L$xts_magic]
+	pcmpgtd	xmm14,xmm6
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+
+	sub	r14,0x80
+	jnc	NEAR $L$xts_dec_loop
+
+$L$xts_dec_short:
+	add	r14,0x80
+	jz	NEAR $L$xts_dec_done
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm15,xmm6
+	movdqa	XMMWORD[rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm0,xmm6
+	movdqa	XMMWORD[16+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm7,XMMWORD[r12]
+	cmp	r14,16
+	je	NEAR $L$xts_dec_1
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm1,xmm6
+	movdqa	XMMWORD[32+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm8,XMMWORD[16+r12]
+	cmp	r14,32
+	je	NEAR $L$xts_dec_2
+	pxor	xmm15,xmm7
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm2,xmm6
+	movdqa	XMMWORD[48+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm9,XMMWORD[32+r12]
+	cmp	r14,48
+	je	NEAR $L$xts_dec_3
+	pxor	xmm0,xmm8
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm3,xmm6
+	movdqa	XMMWORD[64+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm10,XMMWORD[48+r12]
+	cmp	r14,64
+	je	NEAR $L$xts_dec_4
+	pxor	xmm1,xmm9
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm4,xmm6
+	movdqa	XMMWORD[80+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm11,XMMWORD[64+r12]
+	cmp	r14,80
+	je	NEAR $L$xts_dec_5
+	pxor	xmm2,xmm10
+	pshufd	xmm13,xmm14,0x13
+	pxor	xmm14,xmm14
+	movdqa	xmm5,xmm6
+	movdqa	XMMWORD[96+rsp],xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	pcmpgtd	xmm14,xmm6
+	pxor	xmm6,xmm13
+	movdqu	xmm12,XMMWORD[80+r12]
+	cmp	r14,96
+	je	NEAR $L$xts_dec_6
+	pxor	xmm3,xmm11
+	movdqu	xmm13,XMMWORD[96+r12]
+	pxor	xmm4,xmm12
+	movdqa	XMMWORD[112+rsp],xmm6
+	lea	r12,[112+r12]
+	pxor	xmm5,xmm13
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_decrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm5,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm3,XMMWORD[48+rsp]
+	movdqu	XMMWORD[32+r13],xmm5
+	pxor	xmm1,XMMWORD[64+rsp]
+	movdqu	XMMWORD[48+r13],xmm3
+	pxor	xmm6,XMMWORD[80+rsp]
+	movdqu	XMMWORD[64+r13],xmm1
+	pxor	xmm2,XMMWORD[96+rsp]
+	movdqu	XMMWORD[80+r13],xmm6
+	movdqu	XMMWORD[96+r13],xmm2
+	lea	r13,[112+r13]
+
+	movdqa	xmm6,XMMWORD[112+rsp]
+	jmp	NEAR $L$xts_dec_done
+ALIGN	16
+$L$xts_dec_6:
+	pxor	xmm3,xmm11
+	lea	r12,[96+r12]
+	pxor	xmm4,xmm12
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_decrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm5,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm3,XMMWORD[48+rsp]
+	movdqu	XMMWORD[32+r13],xmm5
+	pxor	xmm1,XMMWORD[64+rsp]
+	movdqu	XMMWORD[48+r13],xmm3
+	pxor	xmm6,XMMWORD[80+rsp]
+	movdqu	XMMWORD[64+r13],xmm1
+	movdqu	XMMWORD[80+r13],xmm6
+	lea	r13,[96+r13]
+
+	movdqa	xmm6,XMMWORD[96+rsp]
+	jmp	NEAR $L$xts_dec_done
+ALIGN	16
+$L$xts_dec_5:
+	pxor	xmm2,xmm10
+	lea	r12,[80+r12]
+	pxor	xmm3,xmm11
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_decrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm5,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm3,XMMWORD[48+rsp]
+	movdqu	XMMWORD[32+r13],xmm5
+	pxor	xmm1,XMMWORD[64+rsp]
+	movdqu	XMMWORD[48+r13],xmm3
+	movdqu	XMMWORD[64+r13],xmm1
+	lea	r13,[80+r13]
+
+	movdqa	xmm6,XMMWORD[80+rsp]
+	jmp	NEAR $L$xts_dec_done
+ALIGN	16
+$L$xts_dec_4:
+	pxor	xmm1,xmm9
+	lea	r12,[64+r12]
+	pxor	xmm2,xmm10
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_decrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm5,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	pxor	xmm3,XMMWORD[48+rsp]
+	movdqu	XMMWORD[32+r13],xmm5
+	movdqu	XMMWORD[48+r13],xmm3
+	lea	r13,[64+r13]
+
+	movdqa	xmm6,XMMWORD[64+rsp]
+	jmp	NEAR $L$xts_dec_done
+ALIGN	16
+$L$xts_dec_3:
+	pxor	xmm0,xmm8
+	lea	r12,[48+r12]
+	pxor	xmm1,xmm9
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_decrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	pxor	xmm5,XMMWORD[32+rsp]
+	movdqu	XMMWORD[16+r13],xmm0
+	movdqu	XMMWORD[32+r13],xmm5
+	lea	r13,[48+r13]
+
+	movdqa	xmm6,XMMWORD[48+rsp]
+	jmp	NEAR $L$xts_dec_done
+ALIGN	16
+$L$xts_dec_2:
+	pxor	xmm15,xmm7
+	lea	r12,[32+r12]
+	pxor	xmm0,xmm8
+	lea	rax,[128+rsp]
+	mov	r10d,edx
+
+	call	_bsaes_decrypt8
+
+	pxor	xmm15,XMMWORD[rsp]
+	pxor	xmm0,XMMWORD[16+rsp]
+	movdqu	XMMWORD[r13],xmm15
+	movdqu	XMMWORD[16+r13],xmm0
+	lea	r13,[32+r13]
+
+	movdqa	xmm6,XMMWORD[32+rsp]
+	jmp	NEAR $L$xts_dec_done
+ALIGN	16
+$L$xts_dec_1:
+	pxor	xmm7,xmm15
+	lea	r12,[16+r12]
+	movdqa	XMMWORD[32+rbp],xmm7
+	lea	rcx,[32+rbp]
+	lea	rdx,[32+rbp]
+	lea	r8,[r15]
+	call	asm_AES_decrypt
+	pxor	xmm15,XMMWORD[32+rbp]
+
+
+
+
+
+	movdqu	XMMWORD[r13],xmm15
+	lea	r13,[16+r13]
+
+	movdqa	xmm6,XMMWORD[16+rsp]
+
+$L$xts_dec_done:
+	and	ebx,15
+	jz	NEAR $L$xts_dec_ret
+
+	pxor	xmm14,xmm14
+	movdqa	xmm12,XMMWORD[$L$xts_magic]
+	pcmpgtd	xmm14,xmm6
+	pshufd	xmm13,xmm14,0x13
+	movdqa	xmm5,xmm6
+	paddq	xmm6,xmm6
+	pand	xmm13,xmm12
+	movdqu	xmm15,XMMWORD[r12]
+	pxor	xmm6,xmm13
+
+	lea	rcx,[32+rbp]
+	pxor	xmm15,xmm6
+	lea	rdx,[32+rbp]
+	movdqa	XMMWORD[32+rbp],xmm15
+	lea	r8,[r15]
+	call	asm_AES_decrypt
+	pxor	xmm6,XMMWORD[32+rbp]
+	mov	rdx,r13
+	movdqu	XMMWORD[r13],xmm6
+
+$L$xts_dec_steal:
+	movzx	eax,BYTE[16+r12]
+	movzx	ecx,BYTE[rdx]
+	lea	r12,[1+r12]
+	mov	BYTE[rdx],al
+	mov	BYTE[16+rdx],cl
+	lea	rdx,[1+rdx]
+	sub	ebx,1
+	jnz	NEAR $L$xts_dec_steal
+
+	movdqu	xmm15,XMMWORD[r13]
+	lea	rcx,[32+rbp]
+	pxor	xmm15,xmm5
+	lea	rdx,[32+rbp]
+	movdqa	XMMWORD[32+rbp],xmm15
+	lea	r8,[r15]
+	call	asm_AES_decrypt
+	pxor	xmm5,XMMWORD[32+rbp]
+	movdqu	XMMWORD[r13],xmm5
+
+$L$xts_dec_ret:
+	lea	rax,[rsp]
+	pxor	xmm0,xmm0
+$L$xts_dec_bzero:
+	movdqa	XMMWORD[rax],xmm0
+	movdqa	XMMWORD[16+rax],xmm0
+	lea	rax,[32+rax]
+	cmp	rbp,rax
+	ja	NEAR $L$xts_dec_bzero
+
+	lea	rax,[120+rbp]
+	movaps	xmm6,XMMWORD[64+rbp]
+	movaps	xmm7,XMMWORD[80+rbp]
+	movaps	xmm8,XMMWORD[96+rbp]
+	movaps	xmm9,XMMWORD[112+rbp]
+	movaps	xmm10,XMMWORD[128+rbp]
+	movaps	xmm11,XMMWORD[144+rbp]
+	movaps	xmm12,XMMWORD[160+rbp]
+	movaps	xmm13,XMMWORD[176+rbp]
+	movaps	xmm14,XMMWORD[192+rbp]
+	movaps	xmm15,XMMWORD[208+rbp]
+	lea	rax,[160+rax]
+$L$xts_dec_tail:
+	mov	r15,QWORD[((-48))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	rbx,QWORD[((-16))+rax]
+	mov	rbp,QWORD[((-8))+rax]
+	lea	rsp,[rax]
+$L$xts_dec_epilogue:
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	64
+_bsaes_const:
+$L$M0ISR:
+	DQ	0x0a0e0206070b0f03,0x0004080c0d010509
+$L$ISRM0:
+	DQ	0x01040b0e0205080f,0x0306090c00070a0d
+$L$ISR:
+	DQ	0x0504070602010003,0x0f0e0d0c080b0a09
+$L$BS0:
+	DQ	0x5555555555555555,0x5555555555555555
+$L$BS1:
+	DQ	0x3333333333333333,0x3333333333333333
+$L$BS2:
+	DQ	0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
+$L$SR:
+	DQ	0x0504070600030201,0x0f0e0d0c0a09080b
+$L$SRM0:
+	DQ	0x0304090e00050a0f,0x01060b0c0207080d
+$L$M0SR:
+	DQ	0x0a0e02060f03070b,0x0004080c05090d01
+$L$SWPUP:
+	DQ	0x0706050403020100,0x0c0d0e0f0b0a0908
+$L$SWPUPM0SR:
+	DQ	0x0a0d02060c03070b,0x0004080f05090e01
+$L$ADD1:
+	DQ	0x0000000000000000,0x0000000100000000
+$L$ADD2:
+	DQ	0x0000000000000000,0x0000000200000000
+$L$ADD3:
+	DQ	0x0000000000000000,0x0000000300000000
+$L$ADD4:
+	DQ	0x0000000000000000,0x0000000400000000
+$L$ADD5:
+	DQ	0x0000000000000000,0x0000000500000000
+$L$ADD6:
+	DQ	0x0000000000000000,0x0000000600000000
+$L$ADD7:
+	DQ	0x0000000000000000,0x0000000700000000
+$L$ADD8:
+	DQ	0x0000000000000000,0x0000000800000000
+$L$xts_magic:
+	DD	0x87,0,1,0
+$L$masks:
+	DQ	0x0101010101010101,0x0101010101010101
+	DQ	0x0202020202020202,0x0202020202020202
+	DQ	0x0404040404040404,0x0404040404040404
+	DQ	0x0808080808080808,0x0808080808080808
+$L$M0:
+	DQ	0x02060a0e03070b0f,0x0004080c0105090d
+$L$63:
+	DQ	0x6363636363636363,0x6363636363636363
+DB	66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
+DB	111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
+DB	32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
+DB	32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
+DB	65,110,100,121,32,80,111,108,121,97,107,111,118,0
+ALIGN	64
+
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jbe	NEAR $L$in_prologue
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+
+	mov	r10d,DWORD[8+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_tail
+
+	mov	rax,QWORD[160+r8]
+
+	lea	rsi,[64+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+	lea	rax,[((160+120))+rax]
+
+$L$in_tail:
+	mov	rbp,QWORD[((-48))+rax]
+	mov	rbx,QWORD[((-40))+rax]
+	mov	r12,QWORD[((-32))+rax]
+	mov	r13,QWORD[((-24))+rax]
+	mov	r14,QWORD[((-16))+rax]
+	mov	r15,QWORD[((-8))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$in_prologue:
+	mov	QWORD[152+r8],rax
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$cbc_dec_prologue wrt ..imagebase
+	DD	$L$cbc_dec_epilogue wrt ..imagebase
+	DD	$L$cbc_dec_info wrt ..imagebase
+
+	DD	$L$ctr_enc_prologue wrt ..imagebase
+	DD	$L$ctr_enc_epilogue wrt ..imagebase
+	DD	$L$ctr_enc_info wrt ..imagebase
+
+	DD	$L$xts_enc_prologue wrt ..imagebase
+	DD	$L$xts_enc_epilogue wrt ..imagebase
+	DD	$L$xts_enc_info wrt ..imagebase
+
+	DD	$L$xts_dec_prologue wrt ..imagebase
+	DD	$L$xts_dec_epilogue wrt ..imagebase
+	DD	$L$xts_dec_info wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$cbc_dec_info:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$cbc_dec_body wrt ..imagebase,$L$cbc_dec_epilogue wrt ..imagebase
+	DD	$L$cbc_dec_tail wrt ..imagebase
+	DD	0
+$L$ctr_enc_info:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$ctr_enc_body wrt ..imagebase,$L$ctr_enc_epilogue wrt ..imagebase
+	DD	$L$ctr_enc_tail wrt ..imagebase
+	DD	0
+$L$xts_enc_info:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
+	DD	$L$xts_enc_tail wrt ..imagebase
+	DD	0
+$L$xts_dec_info:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
+	DD	$L$xts_dec_tail wrt ..imagebase
+	DD	0
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
new file mode 100644
index 0000000..8ef16f5
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
@@ -0,0 +1,2030 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+EXTERN	OPENSSL_ia32cap_P
+
+global	gcm_gmult_4bit
+
+ALIGN	16
+gcm_gmult_4bit:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_gcm_gmult_4bit:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp,280
+$L$gmult_prologue:
+
+	movzx	r8,BYTE[15+rdi]
+	lea	r11,[$L$rem_4bit]
+	xor	rax,rax
+	xor	rbx,rbx
+	mov	al,r8b
+	mov	bl,r8b
+	shl	al,4
+	mov	rcx,14
+	mov	r8,QWORD[8+rax*1+rsi]
+	mov	r9,QWORD[rax*1+rsi]
+	and	bl,0xf0
+	mov	rdx,r8
+	jmp	NEAR $L$oop1
+
+ALIGN	16
+$L$oop1:
+	shr	r8,4
+	and	rdx,0xf
+	mov	r10,r9
+	mov	al,BYTE[rcx*1+rdi]
+	shr	r9,4
+	xor	r8,QWORD[8+rbx*1+rsi]
+	shl	r10,60
+	xor	r9,QWORD[rbx*1+rsi]
+	mov	bl,al
+	xor	r9,QWORD[rdx*8+r11]
+	mov	rdx,r8
+	shl	al,4
+	xor	r8,r10
+	dec	rcx
+	js	NEAR $L$break1
+
+	shr	r8,4
+	and	rdx,0xf
+	mov	r10,r9
+	shr	r9,4
+	xor	r8,QWORD[8+rax*1+rsi]
+	shl	r10,60
+	xor	r9,QWORD[rax*1+rsi]
+	and	bl,0xf0
+	xor	r9,QWORD[rdx*8+r11]
+	mov	rdx,r8
+	xor	r8,r10
+	jmp	NEAR $L$oop1
+
+ALIGN	16
+$L$break1:
+	shr	r8,4
+	and	rdx,0xf
+	mov	r10,r9
+	shr	r9,4
+	xor	r8,QWORD[8+rax*1+rsi]
+	shl	r10,60
+	xor	r9,QWORD[rax*1+rsi]
+	and	bl,0xf0
+	xor	r9,QWORD[rdx*8+r11]
+	mov	rdx,r8
+	xor	r8,r10
+
+	shr	r8,4
+	and	rdx,0xf
+	mov	r10,r9
+	shr	r9,4
+	xor	r8,QWORD[8+rbx*1+rsi]
+	shl	r10,60
+	xor	r9,QWORD[rbx*1+rsi]
+	xor	r8,r10
+	xor	r9,QWORD[rdx*8+r11]
+
+	bswap	r8
+	bswap	r9
+	mov	QWORD[8+rdi],r8
+	mov	QWORD[rdi],r9
+
+	lea	rsi,[((280+48))+rsp]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$gmult_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_gcm_gmult_4bit:
+global	gcm_ghash_4bit
+
+ALIGN	16
+gcm_ghash_4bit:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_gcm_ghash_4bit:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp,280
+$L$ghash_prologue:
+	mov	r14,rdx
+	mov	r15,rcx
+	sub	rsi,-128
+	lea	rbp,[((16+128))+rsp]
+	xor	edx,edx
+	mov	r8,QWORD[((0+0-128))+rsi]
+	mov	rax,QWORD[((0+8-128))+rsi]
+	mov	dl,al
+	shr	rax,4
+	mov	r10,r8
+	shr	r8,4
+	mov	r9,QWORD[((16+0-128))+rsi]
+	shl	dl,4
+	mov	rbx,QWORD[((16+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[rsp],dl
+	or	rax,r10
+	mov	dl,bl
+	shr	rbx,4
+	mov	r10,r9
+	shr	r9,4
+	mov	QWORD[rbp],r8
+	mov	r8,QWORD[((32+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((0-128))+rbp],rax
+	mov	rax,QWORD[((32+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[1+rsp],dl
+	or	rbx,r10
+	mov	dl,al
+	shr	rax,4
+	mov	r10,r8
+	shr	r8,4
+	mov	QWORD[8+rbp],r9
+	mov	r9,QWORD[((48+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((8-128))+rbp],rbx
+	mov	rbx,QWORD[((48+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[2+rsp],dl
+	or	rax,r10
+	mov	dl,bl
+	shr	rbx,4
+	mov	r10,r9
+	shr	r9,4
+	mov	QWORD[16+rbp],r8
+	mov	r8,QWORD[((64+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((16-128))+rbp],rax
+	mov	rax,QWORD[((64+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[3+rsp],dl
+	or	rbx,r10
+	mov	dl,al
+	shr	rax,4
+	mov	r10,r8
+	shr	r8,4
+	mov	QWORD[24+rbp],r9
+	mov	r9,QWORD[((80+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((24-128))+rbp],rbx
+	mov	rbx,QWORD[((80+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[4+rsp],dl
+	or	rax,r10
+	mov	dl,bl
+	shr	rbx,4
+	mov	r10,r9
+	shr	r9,4
+	mov	QWORD[32+rbp],r8
+	mov	r8,QWORD[((96+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((32-128))+rbp],rax
+	mov	rax,QWORD[((96+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[5+rsp],dl
+	or	rbx,r10
+	mov	dl,al
+	shr	rax,4
+	mov	r10,r8
+	shr	r8,4
+	mov	QWORD[40+rbp],r9
+	mov	r9,QWORD[((112+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((40-128))+rbp],rbx
+	mov	rbx,QWORD[((112+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[6+rsp],dl
+	or	rax,r10
+	mov	dl,bl
+	shr	rbx,4
+	mov	r10,r9
+	shr	r9,4
+	mov	QWORD[48+rbp],r8
+	mov	r8,QWORD[((128+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((48-128))+rbp],rax
+	mov	rax,QWORD[((128+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[7+rsp],dl
+	or	rbx,r10
+	mov	dl,al
+	shr	rax,4
+	mov	r10,r8
+	shr	r8,4
+	mov	QWORD[56+rbp],r9
+	mov	r9,QWORD[((144+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((56-128))+rbp],rbx
+	mov	rbx,QWORD[((144+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[8+rsp],dl
+	or	rax,r10
+	mov	dl,bl
+	shr	rbx,4
+	mov	r10,r9
+	shr	r9,4
+	mov	QWORD[64+rbp],r8
+	mov	r8,QWORD[((160+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((64-128))+rbp],rax
+	mov	rax,QWORD[((160+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[9+rsp],dl
+	or	rbx,r10
+	mov	dl,al
+	shr	rax,4
+	mov	r10,r8
+	shr	r8,4
+	mov	QWORD[72+rbp],r9
+	mov	r9,QWORD[((176+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((72-128))+rbp],rbx
+	mov	rbx,QWORD[((176+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[10+rsp],dl
+	or	rax,r10
+	mov	dl,bl
+	shr	rbx,4
+	mov	r10,r9
+	shr	r9,4
+	mov	QWORD[80+rbp],r8
+	mov	r8,QWORD[((192+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((80-128))+rbp],rax
+	mov	rax,QWORD[((192+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[11+rsp],dl
+	or	rbx,r10
+	mov	dl,al
+	shr	rax,4
+	mov	r10,r8
+	shr	r8,4
+	mov	QWORD[88+rbp],r9
+	mov	r9,QWORD[((208+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((88-128))+rbp],rbx
+	mov	rbx,QWORD[((208+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[12+rsp],dl
+	or	rax,r10
+	mov	dl,bl
+	shr	rbx,4
+	mov	r10,r9
+	shr	r9,4
+	mov	QWORD[96+rbp],r8
+	mov	r8,QWORD[((224+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((96-128))+rbp],rax
+	mov	rax,QWORD[((224+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[13+rsp],dl
+	or	rbx,r10
+	mov	dl,al
+	shr	rax,4
+	mov	r10,r8
+	shr	r8,4
+	mov	QWORD[104+rbp],r9
+	mov	r9,QWORD[((240+0-128))+rsi]
+	shl	dl,4
+	mov	QWORD[((104-128))+rbp],rbx
+	mov	rbx,QWORD[((240+8-128))+rsi]
+	shl	r10,60
+	mov	BYTE[14+rsp],dl
+	or	rax,r10
+	mov	dl,bl
+	shr	rbx,4
+	mov	r10,r9
+	shr	r9,4
+	mov	QWORD[112+rbp],r8
+	shl	dl,4
+	mov	QWORD[((112-128))+rbp],rax
+	shl	r10,60
+	mov	BYTE[15+rsp],dl
+	or	rbx,r10
+	mov	QWORD[120+rbp],r9
+	mov	QWORD[((120-128))+rbp],rbx
+	add	rsi,-128
+	mov	r8,QWORD[8+rdi]
+	mov	r9,QWORD[rdi]
+	add	r15,r14
+	lea	r11,[$L$rem_8bit]
+	jmp	NEAR $L$outer_loop
+ALIGN	16
+$L$outer_loop:
+	xor	r9,QWORD[r14]
+	mov	rdx,QWORD[8+r14]
+	lea	r14,[16+r14]
+	xor	rdx,r8
+	mov	QWORD[rdi],r9
+	mov	QWORD[8+rdi],rdx
+	shr	rdx,32
+	xor	rax,rax
+	rol	edx,8
+	mov	al,dl
+	movzx	ebx,dl
+	shl	al,4
+	shr	ebx,4
+	rol	edx,8
+	mov	r8,QWORD[8+rax*1+rsi]
+	mov	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	movzx	ecx,dl
+	shl	al,4
+	movzx	r12,BYTE[rbx*1+rsp]
+	shr	ecx,4
+	xor	r12,r8
+	mov	r10,r9
+	shr	r8,8
+	movzx	r12,r12b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rbx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rbx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r12,WORD[r12*2+r11]
+	movzx	ebx,dl
+	shl	al,4
+	movzx	r13,BYTE[rcx*1+rsp]
+	shr	ebx,4
+	shl	r12,48
+	xor	r13,r8
+	mov	r10,r9
+	xor	r9,r12
+	shr	r8,8
+	movzx	r13,r13b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rcx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rcx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r13,WORD[r13*2+r11]
+	movzx	ecx,dl
+	shl	al,4
+	movzx	r12,BYTE[rbx*1+rsp]
+	shr	ecx,4
+	shl	r13,48
+	xor	r12,r8
+	mov	r10,r9
+	xor	r9,r13
+	shr	r8,8
+	movzx	r12,r12b
+	mov	edx,DWORD[8+rdi]
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rbx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rbx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r12,WORD[r12*2+r11]
+	movzx	ebx,dl
+	shl	al,4
+	movzx	r13,BYTE[rcx*1+rsp]
+	shr	ebx,4
+	shl	r12,48
+	xor	r13,r8
+	mov	r10,r9
+	xor	r9,r12
+	shr	r8,8
+	movzx	r13,r13b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rcx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rcx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r13,WORD[r13*2+r11]
+	movzx	ecx,dl
+	shl	al,4
+	movzx	r12,BYTE[rbx*1+rsp]
+	shr	ecx,4
+	shl	r13,48
+	xor	r12,r8
+	mov	r10,r9
+	xor	r9,r13
+	shr	r8,8
+	movzx	r12,r12b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rbx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rbx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r12,WORD[r12*2+r11]
+	movzx	ebx,dl
+	shl	al,4
+	movzx	r13,BYTE[rcx*1+rsp]
+	shr	ebx,4
+	shl	r12,48
+	xor	r13,r8
+	mov	r10,r9
+	xor	r9,r12
+	shr	r8,8
+	movzx	r13,r13b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rcx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rcx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r13,WORD[r13*2+r11]
+	movzx	ecx,dl
+	shl	al,4
+	movzx	r12,BYTE[rbx*1+rsp]
+	shr	ecx,4
+	shl	r13,48
+	xor	r12,r8
+	mov	r10,r9
+	xor	r9,r13
+	shr	r8,8
+	movzx	r12,r12b
+	mov	edx,DWORD[4+rdi]
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rbx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rbx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r12,WORD[r12*2+r11]
+	movzx	ebx,dl
+	shl	al,4
+	movzx	r13,BYTE[rcx*1+rsp]
+	shr	ebx,4
+	shl	r12,48
+	xor	r13,r8
+	mov	r10,r9
+	xor	r9,r12
+	shr	r8,8
+	movzx	r13,r13b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rcx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rcx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r13,WORD[r13*2+r11]
+	movzx	ecx,dl
+	shl	al,4
+	movzx	r12,BYTE[rbx*1+rsp]
+	shr	ecx,4
+	shl	r13,48
+	xor	r12,r8
+	mov	r10,r9
+	xor	r9,r13
+	shr	r8,8
+	movzx	r12,r12b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rbx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rbx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r12,WORD[r12*2+r11]
+	movzx	ebx,dl
+	shl	al,4
+	movzx	r13,BYTE[rcx*1+rsp]
+	shr	ebx,4
+	shl	r12,48
+	xor	r13,r8
+	mov	r10,r9
+	xor	r9,r12
+	shr	r8,8
+	movzx	r13,r13b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rcx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rcx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r13,WORD[r13*2+r11]
+	movzx	ecx,dl
+	shl	al,4
+	movzx	r12,BYTE[rbx*1+rsp]
+	shr	ecx,4
+	shl	r13,48
+	xor	r12,r8
+	mov	r10,r9
+	xor	r9,r13
+	shr	r8,8
+	movzx	r12,r12b
+	mov	edx,DWORD[rdi]
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rbx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rbx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r12,WORD[r12*2+r11]
+	movzx	ebx,dl
+	shl	al,4
+	movzx	r13,BYTE[rcx*1+rsp]
+	shr	ebx,4
+	shl	r12,48
+	xor	r13,r8
+	mov	r10,r9
+	xor	r9,r12
+	shr	r8,8
+	movzx	r13,r13b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rcx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rcx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r13,WORD[r13*2+r11]
+	movzx	ecx,dl
+	shl	al,4
+	movzx	r12,BYTE[rbx*1+rsp]
+	shr	ecx,4
+	shl	r13,48
+	xor	r12,r8
+	mov	r10,r9
+	xor	r9,r13
+	shr	r8,8
+	movzx	r12,r12b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rbx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rbx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r12,WORD[r12*2+r11]
+	movzx	ebx,dl
+	shl	al,4
+	movzx	r13,BYTE[rcx*1+rsp]
+	shr	ebx,4
+	shl	r12,48
+	xor	r13,r8
+	mov	r10,r9
+	xor	r9,r12
+	shr	r8,8
+	movzx	r13,r13b
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rcx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rcx*8+rbp]
+	rol	edx,8
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	mov	al,dl
+	xor	r8,r10
+	movzx	r13,WORD[r13*2+r11]
+	movzx	ecx,dl
+	shl	al,4
+	movzx	r12,BYTE[rbx*1+rsp]
+	and	ecx,240
+	shl	r13,48
+	xor	r12,r8
+	mov	r10,r9
+	xor	r9,r13
+	shr	r8,8
+	movzx	r12,r12b
+	mov	edx,DWORD[((-4))+rdi]
+	shr	r9,8
+	xor	r8,QWORD[((-128))+rbx*8+rbp]
+	shl	r10,56
+	xor	r9,QWORD[rbx*8+rbp]
+	movzx	r12,WORD[r12*2+r11]
+	xor	r8,QWORD[8+rax*1+rsi]
+	xor	r9,QWORD[rax*1+rsi]
+	shl	r12,48
+	xor	r8,r10
+	xor	r9,r12
+	movzx	r13,r8b
+	shr	r8,4
+	mov	r10,r9
+	shl	r13b,4
+	shr	r9,4
+	xor	r8,QWORD[8+rcx*1+rsi]
+	movzx	r13,WORD[r13*2+r11]
+	shl	r10,60
+	xor	r9,QWORD[rcx*1+rsi]
+	xor	r8,r10
+	shl	r13,48
+	bswap	r8
+	xor	r9,r13
+	bswap	r9
+	cmp	r14,r15
+	jb	NEAR $L$outer_loop
+	mov	QWORD[8+rdi],r8
+	mov	QWORD[rdi],r9
+
+	lea	rsi,[((280+48))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$ghash_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_gcm_ghash_4bit:
+global	gcm_init_clmul
+
+ALIGN	16
+gcm_init_clmul:
+$L$_init_clmul:
+$L$SEH_begin_gcm_init_clmul:
+
+DB	0x48,0x83,0xec,0x18
+DB	0x0f,0x29,0x34,0x24
+	movdqu	xmm2,XMMWORD[rdx]
+	pshufd	xmm2,xmm2,78
+
+
+	pshufd	xmm4,xmm2,255
+	movdqa	xmm3,xmm2
+	psllq	xmm2,1
+	pxor	xmm5,xmm5
+	psrlq	xmm3,63
+	pcmpgtd	xmm5,xmm4
+	pslldq	xmm3,8
+	por	xmm2,xmm3
+
+
+	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
+	pxor	xmm2,xmm5
+
+
+	pshufd	xmm6,xmm2,78
+	movdqa	xmm0,xmm2
+	pxor	xmm6,xmm2
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+DB	102,15,58,68,194,0
+DB	102,15,58,68,202,17
+DB	102,15,58,68,222,0
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	pshufd	xmm3,xmm2,78
+	pshufd	xmm4,xmm0,78
+	pxor	xmm3,xmm2
+	movdqu	XMMWORD[rcx],xmm2
+	pxor	xmm4,xmm0
+	movdqu	XMMWORD[16+rcx],xmm0
+DB	102,15,58,15,227,8
+	movdqu	XMMWORD[32+rcx],xmm4
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+DB	102,15,58,68,194,0
+DB	102,15,58,68,202,17
+DB	102,15,58,68,222,0
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	movdqa	xmm5,xmm0
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+DB	102,15,58,68,194,0
+DB	102,15,58,68,202,17
+DB	102,15,58,68,222,0
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	pshufd	xmm3,xmm5,78
+	pshufd	xmm4,xmm0,78
+	pxor	xmm3,xmm5
+	movdqu	XMMWORD[48+rcx],xmm5
+	pxor	xmm4,xmm0
+	movdqu	XMMWORD[64+rcx],xmm0
+DB	102,15,58,15,227,8
+	movdqu	XMMWORD[80+rcx],xmm4
+	movaps	xmm6,XMMWORD[rsp]
+	lea	rsp,[24+rsp]
+$L$SEH_end_gcm_init_clmul:
+	DB	0F3h,0C3h		;repret
+
+global	gcm_gmult_clmul
+
+ALIGN	16
+gcm_gmult_clmul:
+$L$_gmult_clmul:
+	movdqu	xmm0,XMMWORD[rcx]
+	movdqa	xmm5,XMMWORD[$L$bswap_mask]
+	movdqu	xmm2,XMMWORD[rdx]
+	movdqu	xmm4,XMMWORD[32+rdx]
+DB	102,15,56,0,197
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+DB	102,15,58,68,194,0
+DB	102,15,58,68,202,17
+DB	102,15,58,68,220,0
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+DB	102,15,56,0,197
+	movdqu	XMMWORD[rcx],xmm0
+	DB	0F3h,0C3h		;repret
+
+global	gcm_ghash_clmul
+
+ALIGN	32
+gcm_ghash_clmul:
+$L$_ghash_clmul:
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_gcm_ghash_clmul:
+
+DB	0x48,0x8d,0x60,0xe0
+DB	0x0f,0x29,0x70,0xe0
+DB	0x0f,0x29,0x78,0xf0
+DB	0x44,0x0f,0x29,0x00
+DB	0x44,0x0f,0x29,0x48,0x10
+DB	0x44,0x0f,0x29,0x50,0x20
+DB	0x44,0x0f,0x29,0x58,0x30
+DB	0x44,0x0f,0x29,0x60,0x40
+DB	0x44,0x0f,0x29,0x68,0x50
+DB	0x44,0x0f,0x29,0x70,0x60
+DB	0x44,0x0f,0x29,0x78,0x70
+	movdqa	xmm10,XMMWORD[$L$bswap_mask]
+
+	movdqu	xmm0,XMMWORD[rcx]
+	movdqu	xmm2,XMMWORD[rdx]
+	movdqu	xmm7,XMMWORD[32+rdx]
+DB	102,65,15,56,0,194
+
+	sub	r9,0x10
+	jz	NEAR $L$odd_tail
+
+	movdqu	xmm6,XMMWORD[16+rdx]
+	lea	rax,[OPENSSL_ia32cap_P]
+	mov	eax,DWORD[4+rax]
+	cmp	r9,0x30
+	jb	NEAR $L$skip4x
+
+	and	eax,71303168
+	cmp	eax,4194304
+	je	NEAR $L$skip4x
+
+	sub	r9,0x30
+	mov	rax,0xA040608020C0E000
+	movdqu	xmm14,XMMWORD[48+rdx]
+	movdqu	xmm15,XMMWORD[64+rdx]
+
+
+
+
+	movdqu	xmm3,XMMWORD[48+r8]
+	movdqu	xmm11,XMMWORD[32+r8]
+DB	102,65,15,56,0,218
+DB	102,69,15,56,0,218
+	movdqa	xmm5,xmm3
+	pshufd	xmm4,xmm3,78
+	pxor	xmm4,xmm3
+DB	102,15,58,68,218,0
+DB	102,15,58,68,234,17
+DB	102,15,58,68,231,0
+
+	movdqa	xmm13,xmm11
+	pshufd	xmm12,xmm11,78
+	pxor	xmm12,xmm11
+DB	102,68,15,58,68,222,0
+DB	102,68,15,58,68,238,17
+DB	102,68,15,58,68,231,16
+	xorps	xmm3,xmm11
+	xorps	xmm5,xmm13
+	movups	xmm7,XMMWORD[80+rdx]
+	xorps	xmm4,xmm12
+
+	movdqu	xmm11,XMMWORD[16+r8]
+	movdqu	xmm8,XMMWORD[r8]
+DB	102,69,15,56,0,218
+DB	102,69,15,56,0,194
+	movdqa	xmm13,xmm11
+	pshufd	xmm12,xmm11,78
+	pxor	xmm0,xmm8
+	pxor	xmm12,xmm11
+DB	102,69,15,58,68,222,0
+	movdqa	xmm1,xmm0
+	pshufd	xmm8,xmm0,78
+	pxor	xmm8,xmm0
+DB	102,69,15,58,68,238,17
+DB	102,68,15,58,68,231,0
+	xorps	xmm3,xmm11
+	xorps	xmm5,xmm13
+
+	lea	r8,[64+r8]
+	sub	r9,0x40
+	jc	NEAR $L$tail4x
+
+	jmp	NEAR $L$mod4_loop
+ALIGN	32
+$L$mod4_loop:
+DB	102,65,15,58,68,199,0
+	xorps	xmm4,xmm12
+	movdqu	xmm11,XMMWORD[48+r8]
+DB	102,69,15,56,0,218
+DB	102,65,15,58,68,207,17
+	xorps	xmm0,xmm3
+	movdqu	xmm3,XMMWORD[32+r8]
+	movdqa	xmm13,xmm11
+DB	102,68,15,58,68,199,16
+	pshufd	xmm12,xmm11,78
+	xorps	xmm1,xmm5
+	pxor	xmm12,xmm11
+DB	102,65,15,56,0,218
+	movups	xmm7,XMMWORD[32+rdx]
+	xorps	xmm8,xmm4
+DB	102,68,15,58,68,218,0
+	pshufd	xmm4,xmm3,78
+
+	pxor	xmm8,xmm0
+	movdqa	xmm5,xmm3
+	pxor	xmm8,xmm1
+	pxor	xmm4,xmm3
+	movdqa	xmm9,xmm8
+DB	102,68,15,58,68,234,17
+	pslldq	xmm8,8
+	psrldq	xmm9,8
+	pxor	xmm0,xmm8
+	movdqa	xmm8,XMMWORD[$L$7_mask]
+	pxor	xmm1,xmm9
+DB	102,76,15,110,200
+
+	pand	xmm8,xmm0
+DB	102,69,15,56,0,200
+	pxor	xmm9,xmm0
+DB	102,68,15,58,68,231,0
+	psllq	xmm9,57
+	movdqa	xmm8,xmm9
+	pslldq	xmm9,8
+DB	102,15,58,68,222,0
+	psrldq	xmm8,8
+	pxor	xmm0,xmm9
+	pxor	xmm1,xmm8
+	movdqu	xmm8,XMMWORD[r8]
+
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,1
+DB	102,15,58,68,238,17
+	xorps	xmm3,xmm11
+	movdqu	xmm11,XMMWORD[16+r8]
+DB	102,69,15,56,0,218
+DB	102,15,58,68,231,16
+	xorps	xmm5,xmm13
+	movups	xmm7,XMMWORD[80+rdx]
+DB	102,69,15,56,0,194
+	pxor	xmm1,xmm9
+	pxor	xmm9,xmm0
+	psrlq	xmm0,5
+
+	movdqa	xmm13,xmm11
+	pxor	xmm4,xmm12
+	pshufd	xmm12,xmm11,78
+	pxor	xmm0,xmm9
+	pxor	xmm1,xmm8
+	pxor	xmm12,xmm11
+DB	102,69,15,58,68,222,0
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	movdqa	xmm1,xmm0
+DB	102,69,15,58,68,238,17
+	xorps	xmm3,xmm11
+	pshufd	xmm8,xmm0,78
+	pxor	xmm8,xmm0
+
+DB	102,68,15,58,68,231,0
+	xorps	xmm5,xmm13
+
+	lea	r8,[64+r8]
+	sub	r9,0x40
+	jnc	NEAR $L$mod4_loop
+
+$L$tail4x:
+DB	102,65,15,58,68,199,0
+DB	102,65,15,58,68,207,17
+DB	102,68,15,58,68,199,16
+	xorps	xmm4,xmm12
+	xorps	xmm0,xmm3
+	xorps	xmm1,xmm5
+	pxor	xmm1,xmm0
+	pxor	xmm8,xmm4
+
+	pxor	xmm8,xmm1
+	pxor	xmm1,xmm0
+
+	movdqa	xmm9,xmm8
+	psrldq	xmm8,8
+	pslldq	xmm9,8
+	pxor	xmm1,xmm8
+	pxor	xmm0,xmm9
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	add	r9,0x40
+	jz	NEAR $L$done
+	movdqu	xmm7,XMMWORD[32+rdx]
+	sub	r9,0x10
+	jz	NEAR $L$odd_tail
+$L$skip4x:
+
+
+
+
+
+	movdqu	xmm8,XMMWORD[r8]
+	movdqu	xmm3,XMMWORD[16+r8]
+DB	102,69,15,56,0,194
+DB	102,65,15,56,0,218
+	pxor	xmm0,xmm8
+
+	movdqa	xmm5,xmm3
+	pshufd	xmm4,xmm3,78
+	pxor	xmm4,xmm3
+DB	102,15,58,68,218,0
+DB	102,15,58,68,234,17
+DB	102,15,58,68,231,0
+
+	lea	r8,[32+r8]
+	nop
+	sub	r9,0x20
+	jbe	NEAR $L$even_tail
+	nop
+	jmp	NEAR $L$mod_loop
+
+ALIGN	32
+$L$mod_loop:
+	movdqa	xmm1,xmm0
+	movdqa	xmm8,xmm4
+	pshufd	xmm4,xmm0,78
+	pxor	xmm4,xmm0
+
+DB	102,15,58,68,198,0
+DB	102,15,58,68,206,17
+DB	102,15,58,68,231,16
+
+	pxor	xmm0,xmm3
+	pxor	xmm1,xmm5
+	movdqu	xmm9,XMMWORD[r8]
+	pxor	xmm8,xmm0
+DB	102,69,15,56,0,202
+	movdqu	xmm3,XMMWORD[16+r8]
+
+	pxor	xmm8,xmm1
+	pxor	xmm1,xmm9
+	pxor	xmm4,xmm8
+DB	102,65,15,56,0,218
+	movdqa	xmm8,xmm4
+	psrldq	xmm8,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm8
+	pxor	xmm0,xmm4
+
+	movdqa	xmm5,xmm3
+
+	movdqa	xmm9,xmm0
+	movdqa	xmm8,xmm0
+	psllq	xmm0,5
+	pxor	xmm8,xmm0
+DB	102,15,58,68,218,0
+	psllq	xmm0,1
+	pxor	xmm0,xmm8
+	psllq	xmm0,57
+	movdqa	xmm8,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm8,8
+	pxor	xmm0,xmm9
+	pshufd	xmm4,xmm5,78
+	pxor	xmm1,xmm8
+	pxor	xmm4,xmm5
+
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,1
+DB	102,15,58,68,234,17
+	pxor	xmm1,xmm9
+	pxor	xmm9,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm9
+	lea	r8,[32+r8]
+	psrlq	xmm0,1
+DB	102,15,58,68,231,0
+	pxor	xmm0,xmm1
+
+	sub	r9,0x20
+	ja	NEAR $L$mod_loop
+
+$L$even_tail:
+	movdqa	xmm1,xmm0
+	movdqa	xmm8,xmm4
+	pshufd	xmm4,xmm0,78
+	pxor	xmm4,xmm0
+
+DB	102,15,58,68,198,0
+DB	102,15,58,68,206,17
+DB	102,15,58,68,231,16
+
+	pxor	xmm0,xmm3
+	pxor	xmm1,xmm5
+	pxor	xmm8,xmm0
+	pxor	xmm8,xmm1
+	pxor	xmm4,xmm8
+	movdqa	xmm8,xmm4
+	psrldq	xmm8,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm8
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	test	r9,r9
+	jnz	NEAR $L$done
+
+$L$odd_tail:
+	movdqu	xmm8,XMMWORD[r8]
+DB	102,69,15,56,0,194
+	pxor	xmm0,xmm8
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+DB	102,15,58,68,194,0
+DB	102,15,58,68,202,17
+DB	102,15,58,68,223,0
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+$L$done:
+DB	102,65,15,56,0,194
+	movdqu	XMMWORD[rcx],xmm0
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[168+rsp]
+$L$SEH_end_gcm_ghash_clmul:
+	DB	0F3h,0C3h		;repret
+
+global	gcm_init_avx
+
+ALIGN	32
+gcm_init_avx:
+$L$SEH_begin_gcm_init_avx:
+
+DB	0x48,0x83,0xec,0x18
+DB	0x0f,0x29,0x34,0x24
+	vzeroupper
+
+	vmovdqu	xmm2,XMMWORD[rdx]
+	vpshufd	xmm2,xmm2,78
+
+
+	vpshufd	xmm4,xmm2,255
+	vpsrlq	xmm3,xmm2,63
+	vpsllq	xmm2,xmm2,1
+	vpxor	xmm5,xmm5,xmm5
+	vpcmpgtd	xmm5,xmm5,xmm4
+	vpslldq	xmm3,xmm3,8
+	vpor	xmm2,xmm2,xmm3
+
+
+	vpand	xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
+	vpxor	xmm2,xmm2,xmm5
+
+	vpunpckhqdq	xmm6,xmm2,xmm2
+	vmovdqa	xmm0,xmm2
+	vpxor	xmm6,xmm6,xmm2
+	mov	r10,4
+	jmp	NEAR $L$init_start_avx
+ALIGN	32
+$L$init_loop_avx:
+	vpalignr	xmm5,xmm4,xmm3,8
+	vmovdqu	XMMWORD[(-16)+rcx],xmm5
+	vpunpckhqdq	xmm3,xmm0,xmm0
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm1,xmm0,xmm2,0x11
+	vpclmulqdq	xmm0,xmm0,xmm2,0x00
+	vpclmulqdq	xmm3,xmm3,xmm6,0x00
+	vpxor	xmm4,xmm1,xmm0
+	vpxor	xmm3,xmm3,xmm4
+
+	vpslldq	xmm4,xmm3,8
+	vpsrldq	xmm3,xmm3,8
+	vpxor	xmm0,xmm0,xmm4
+	vpxor	xmm1,xmm1,xmm3
+	vpsllq	xmm3,xmm0,57
+	vpsllq	xmm4,xmm0,62
+	vpxor	xmm4,xmm4,xmm3
+	vpsllq	xmm3,xmm0,63
+	vpxor	xmm4,xmm4,xmm3
+	vpslldq	xmm3,xmm4,8
+	vpsrldq	xmm4,xmm4,8
+	vpxor	xmm0,xmm0,xmm3
+	vpxor	xmm1,xmm1,xmm4
+
+	vpsrlq	xmm4,xmm0,1
+	vpxor	xmm1,xmm1,xmm0
+	vpxor	xmm0,xmm0,xmm4
+	vpsrlq	xmm4,xmm4,5
+	vpxor	xmm0,xmm0,xmm4
+	vpsrlq	xmm0,xmm0,1
+	vpxor	xmm0,xmm0,xmm1
+$L$init_start_avx:
+	vmovdqa	xmm5,xmm0
+	vpunpckhqdq	xmm3,xmm0,xmm0
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm1,xmm0,xmm2,0x11
+	vpclmulqdq	xmm0,xmm0,xmm2,0x00
+	vpclmulqdq	xmm3,xmm3,xmm6,0x00
+	vpxor	xmm4,xmm1,xmm0
+	vpxor	xmm3,xmm3,xmm4
+
+	vpslldq	xmm4,xmm3,8
+	vpsrldq	xmm3,xmm3,8
+	vpxor	xmm0,xmm0,xmm4
+	vpxor	xmm1,xmm1,xmm3
+	vpsllq	xmm3,xmm0,57
+	vpsllq	xmm4,xmm0,62
+	vpxor	xmm4,xmm4,xmm3
+	vpsllq	xmm3,xmm0,63
+	vpxor	xmm4,xmm4,xmm3
+	vpslldq	xmm3,xmm4,8
+	vpsrldq	xmm4,xmm4,8
+	vpxor	xmm0,xmm0,xmm3
+	vpxor	xmm1,xmm1,xmm4
+
+	vpsrlq	xmm4,xmm0,1
+	vpxor	xmm1,xmm1,xmm0
+	vpxor	xmm0,xmm0,xmm4
+	vpsrlq	xmm4,xmm4,5
+	vpxor	xmm0,xmm0,xmm4
+	vpsrlq	xmm0,xmm0,1
+	vpxor	xmm0,xmm0,xmm1
+	vpshufd	xmm3,xmm5,78
+	vpshufd	xmm4,xmm0,78
+	vpxor	xmm3,xmm3,xmm5
+	vmovdqu	XMMWORD[rcx],xmm5
+	vpxor	xmm4,xmm4,xmm0
+	vmovdqu	XMMWORD[16+rcx],xmm0
+	lea	rcx,[48+rcx]
+	sub	r10,1
+	jnz	NEAR $L$init_loop_avx
+
+	vpalignr	xmm5,xmm3,xmm4,8
+	vmovdqu	XMMWORD[(-16)+rcx],xmm5
+
+	vzeroupper
+	movaps	xmm6,XMMWORD[rsp]
+	lea	rsp,[24+rsp]
+$L$SEH_end_gcm_init_avx:
+	DB	0F3h,0C3h		;repret
+
+global	gcm_gmult_avx
+
+ALIGN	32
+gcm_gmult_avx:
+	jmp	NEAR $L$_gmult_clmul
+
+global	gcm_ghash_avx
+
+ALIGN	32
+gcm_ghash_avx:
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_gcm_ghash_avx:
+
+DB	0x48,0x8d,0x60,0xe0
+DB	0x0f,0x29,0x70,0xe0
+DB	0x0f,0x29,0x78,0xf0
+DB	0x44,0x0f,0x29,0x00
+DB	0x44,0x0f,0x29,0x48,0x10
+DB	0x44,0x0f,0x29,0x50,0x20
+DB	0x44,0x0f,0x29,0x58,0x30
+DB	0x44,0x0f,0x29,0x60,0x40
+DB	0x44,0x0f,0x29,0x68,0x50
+DB	0x44,0x0f,0x29,0x70,0x60
+DB	0x44,0x0f,0x29,0x78,0x70
+	vzeroupper
+
+	vmovdqu	xmm10,XMMWORD[rcx]
+	lea	r10,[$L$0x1c2_polynomial]
+	lea	rdx,[64+rdx]
+	vmovdqu	xmm13,XMMWORD[$L$bswap_mask]
+	vpshufb	xmm10,xmm10,xmm13
+	cmp	r9,0x80
+	jb	NEAR $L$short_avx
+	sub	r9,0x80
+
+	vmovdqu	xmm14,XMMWORD[112+r8]
+	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
+	vpshufb	xmm14,xmm14,xmm13
+	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
+
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vmovdqu	xmm15,XMMWORD[96+r8]
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpxor	xmm9,xmm9,xmm14
+	vpshufb	xmm15,xmm15,xmm13
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vmovdqu	xmm14,XMMWORD[80+r8]
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vpxor	xmm8,xmm8,xmm15
+
+	vpshufb	xmm14,xmm14,xmm13
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+	vmovdqu	xmm15,XMMWORD[64+r8]
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
+
+	vpshufb	xmm15,xmm15,xmm13
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpxor	xmm4,xmm4,xmm1
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vpxor	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm14,XMMWORD[48+r8]
+	vpxor	xmm0,xmm0,xmm3
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpxor	xmm1,xmm1,xmm4
+	vpshufb	xmm14,xmm14,xmm13
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
+	vpxor	xmm2,xmm2,xmm5
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+
+	vmovdqu	xmm15,XMMWORD[32+r8]
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpxor	xmm4,xmm4,xmm1
+	vpshufb	xmm15,xmm15,xmm13
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
+	vpxor	xmm5,xmm5,xmm2
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vpxor	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm14,XMMWORD[16+r8]
+	vpxor	xmm0,xmm0,xmm3
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpxor	xmm1,xmm1,xmm4
+	vpshufb	xmm14,xmm14,xmm13
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
+	vpxor	xmm2,xmm2,xmm5
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+
+	vmovdqu	xmm15,XMMWORD[r8]
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpxor	xmm4,xmm4,xmm1
+	vpshufb	xmm15,xmm15,xmm13
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm9,xmm7,0x10
+
+	lea	r8,[128+r8]
+	cmp	r9,0x80
+	jb	NEAR $L$tail_avx
+
+	vpxor	xmm15,xmm15,xmm10
+	sub	r9,0x80
+	jmp	NEAR $L$oop8x_avx
+
+ALIGN	32
+$L$oop8x_avx:
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vmovdqu	xmm14,XMMWORD[112+r8]
+	vpxor	xmm3,xmm3,xmm0
+	vpxor	xmm8,xmm8,xmm15
+	vpclmulqdq	xmm10,xmm15,xmm6,0x00
+	vpshufb	xmm14,xmm14,xmm13
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm11,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm12,xmm8,xmm7,0x00
+	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+
+	vmovdqu	xmm15,XMMWORD[96+r8]
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpxor	xmm10,xmm10,xmm3
+	vpshufb	xmm15,xmm15,xmm13
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vxorps	xmm11,xmm11,xmm4
+	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vpxor	xmm12,xmm12,xmm5
+	vxorps	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm14,XMMWORD[80+r8]
+	vpxor	xmm12,xmm12,xmm10
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpxor	xmm12,xmm12,xmm11
+	vpslldq	xmm9,xmm12,8
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vpsrldq	xmm12,xmm12,8
+	vpxor	xmm10,xmm10,xmm9
+	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
+	vpshufb	xmm14,xmm14,xmm13
+	vxorps	xmm11,xmm11,xmm12
+	vpxor	xmm4,xmm4,xmm1
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+	vpxor	xmm5,xmm5,xmm2
+
+	vmovdqu	xmm15,XMMWORD[64+r8]
+	vpalignr	xmm12,xmm10,xmm10,8
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpshufb	xmm15,xmm15,xmm13
+	vpxor	xmm0,xmm0,xmm3
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vxorps	xmm8,xmm8,xmm15
+	vpxor	xmm2,xmm2,xmm5
+
+	vmovdqu	xmm14,XMMWORD[48+r8]
+	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpshufb	xmm14,xmm14,xmm13
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+	vpxor	xmm5,xmm5,xmm2
+
+	vmovdqu	xmm15,XMMWORD[32+r8]
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpshufb	xmm15,xmm15,xmm13
+	vpxor	xmm0,xmm0,xmm3
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vpxor	xmm2,xmm2,xmm5
+	vxorps	xmm10,xmm10,xmm12
+
+	vmovdqu	xmm14,XMMWORD[16+r8]
+	vpalignr	xmm12,xmm10,xmm10,8
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpshufb	xmm14,xmm14,xmm13
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
+	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
+	vxorps	xmm12,xmm12,xmm11
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+	vpxor	xmm5,xmm5,xmm2
+
+	vmovdqu	xmm15,XMMWORD[r8]
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpshufb	xmm15,xmm15,xmm13
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
+	vpxor	xmm15,xmm15,xmm12
+	vpclmulqdq	xmm2,xmm9,xmm7,0x10
+	vpxor	xmm15,xmm15,xmm10
+
+	lea	r8,[128+r8]
+	sub	r9,0x80
+	jnc	NEAR $L$oop8x_avx
+
+	add	r9,0x80
+	jmp	NEAR $L$tail_no_xor_avx
+
+ALIGN	32
+$L$short_avx:
+	vmovdqu	xmm14,XMMWORD[((-16))+r9*1+r8]
+	lea	r8,[r9*1+r8]
+	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
+	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+
+	vmovdqa	xmm3,xmm0
+	vmovdqa	xmm4,xmm1
+	vmovdqa	xmm5,xmm2
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-32))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vpsrldq	xmm7,xmm7,8
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-48))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-64))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vpsrldq	xmm7,xmm7,8
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-80))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-96))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vpsrldq	xmm7,xmm7,8
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-112))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vmovq	xmm7,QWORD[((184-64))+rdx]
+	sub	r9,0x10
+	jmp	NEAR $L$tail_avx
+
+ALIGN	32
+$L$tail_avx:
+	vpxor	xmm15,xmm15,xmm10
+$L$tail_no_xor_avx:
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+
+	vmovdqu	xmm12,XMMWORD[r10]
+
+	vpxor	xmm10,xmm3,xmm0
+	vpxor	xmm11,xmm4,xmm1
+	vpxor	xmm5,xmm5,xmm2
+
+	vpxor	xmm5,xmm5,xmm10
+	vpxor	xmm5,xmm5,xmm11
+	vpslldq	xmm9,xmm5,8
+	vpsrldq	xmm5,xmm5,8
+	vpxor	xmm10,xmm10,xmm9
+	vpxor	xmm11,xmm11,xmm5
+
+	vpclmulqdq	xmm9,xmm10,xmm12,0x10
+	vpalignr	xmm10,xmm10,xmm10,8
+	vpxor	xmm10,xmm10,xmm9
+
+	vpclmulqdq	xmm9,xmm10,xmm12,0x10
+	vpalignr	xmm10,xmm10,xmm10,8
+	vpxor	xmm10,xmm10,xmm11
+	vpxor	xmm10,xmm10,xmm9
+
+	cmp	r9,0
+	jne	NEAR $L$short_avx
+
+	vpshufb	xmm10,xmm10,xmm13
+	vmovdqu	XMMWORD[rcx],xmm10
+	vzeroupper
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[168+rsp]
+$L$SEH_end_gcm_ghash_avx:
+	DB	0F3h,0C3h		;repret
+
+ALIGN	64
+$L$bswap_mask:
+DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$0x1c2_polynomial:
+DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+$L$7_mask:
+	DD	7,0,7,0
+$L$7_mask_poly:
+	DD	7,0,450,0
+ALIGN	64
+
+$L$rem_4bit:
+	DD	0,0,0,471859200,0,943718400,0,610271232
+	DD	0,1887436800,0,1822425088,0,1220542464,0,1423966208
+	DD	0,3774873600,0,4246732800,0,3644850176,0,3311403008
+	DD	0,2441084928,0,2376073216,0,2847932416,0,3051356160
+
+$L$rem_8bit:
+	DW	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
+	DW	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
+	DW	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
+	DW	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
+	DW	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
+	DW	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
+	DW	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
+	DW	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
+	DW	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
+	DW	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
+	DW	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
+	DW	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
+	DW	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
+	DW	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
+	DW	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
+	DW	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
+	DW	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
+	DW	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
+	DW	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
+	DW	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
+	DW	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
+	DW	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
+	DW	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
+	DW	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
+	DW	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
+	DW	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
+	DW	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
+	DW	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
+	DW	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
+	DW	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
+	DW	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
+	DW	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
+
+DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
+DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+DB	114,103,62,0
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+
+	lea	rax,[((48+280))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$in_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_gcm_gmult_4bit wrt ..imagebase
+	DD	$L$SEH_end_gcm_gmult_4bit wrt ..imagebase
+	DD	$L$SEH_info_gcm_gmult_4bit wrt ..imagebase
+
+	DD	$L$SEH_begin_gcm_ghash_4bit wrt ..imagebase
+	DD	$L$SEH_end_gcm_ghash_4bit wrt ..imagebase
+	DD	$L$SEH_info_gcm_ghash_4bit wrt ..imagebase
+
+	DD	$L$SEH_begin_gcm_init_clmul wrt ..imagebase
+	DD	$L$SEH_end_gcm_init_clmul wrt ..imagebase
+	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
+
+	DD	$L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
+	DD	$L$SEH_end_gcm_ghash_clmul wrt ..imagebase
+	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
+	DD	$L$SEH_begin_gcm_init_avx wrt ..imagebase
+	DD	$L$SEH_end_gcm_init_avx wrt ..imagebase
+	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
+
+	DD	$L$SEH_begin_gcm_ghash_avx wrt ..imagebase
+	DD	$L$SEH_end_gcm_ghash_avx wrt ..imagebase
+	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_gcm_gmult_4bit:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase
+$L$SEH_info_gcm_ghash_4bit:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase
+$L$SEH_info_gcm_init_clmul:
+DB	0x01,0x08,0x03,0x00
+DB	0x08,0x68,0x00,0x00
+DB	0x04,0x22,0x00,0x00
+$L$SEH_info_gcm_ghash_clmul:
+DB	0x01,0x33,0x16,0x00
+DB	0x33,0xf8,0x09,0x00
+DB	0x2e,0xe8,0x08,0x00
+DB	0x29,0xd8,0x07,0x00
+DB	0x24,0xc8,0x06,0x00
+DB	0x1f,0xb8,0x05,0x00
+DB	0x1a,0xa8,0x04,0x00
+DB	0x15,0x98,0x03,0x00
+DB	0x10,0x88,0x02,0x00
+DB	0x0c,0x78,0x01,0x00
+DB	0x08,0x68,0x00,0x00
+DB	0x04,0x01,0x15,0x00
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/md5-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/md5-x86_64.asm
new file mode 100644
index 0000000..0e9d2c6
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/md5-x86_64.asm
@@ -0,0 +1,776 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+ALIGN	16
+
+global	md5_block_asm_data_order
+
+md5_block_asm_data_order:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_md5_block_asm_data_order:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	push	rbp
+	push	rbx
+	push	r12
+	push	r14
+	push	r15
+$L$prologue:
+
+
+
+
+	mov	rbp,rdi
+	shl	rdx,6
+	lea	rdi,[rdx*1+rsi]
+	mov	eax,DWORD[rbp]
+	mov	ebx,DWORD[4+rbp]
+	mov	ecx,DWORD[8+rbp]
+	mov	edx,DWORD[12+rbp]
+
+
+
+
+
+
+
+	cmp	rsi,rdi
+	je	NEAR $L$end
+
+
+$L$loop:
+	mov	r8d,eax
+	mov	r9d,ebx
+	mov	r14d,ecx
+	mov	r15d,edx
+	mov	r10d,DWORD[rsi]
+	mov	r11d,edx
+	xor	r11d,ecx
+	lea	eax,[((-680876936))+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[4+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[((-389564586))+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[8+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[606105819+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[12+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[((-1044525330))+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[16+rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	xor	r11d,ecx
+	lea	eax,[((-176418897))+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[20+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[1200080426+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[24+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[((-1473231341))+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[28+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[((-45705983))+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[32+rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	xor	r11d,ecx
+	lea	eax,[1770035416+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[36+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[((-1958414417))+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[40+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[((-42063))+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[44+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[((-1990404162))+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[48+rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	xor	r11d,ecx
+	lea	eax,[1804603682+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[52+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[((-40341101))+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[56+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[((-1502002290))+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[60+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[1236535329+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	mov	r10d,DWORD[4+rsi]
+	mov	r11d,edx
+	mov	r12d,edx
+	not	r11d
+	lea	eax,[((-165796510))+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[24+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[((-1069501632))+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[44+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[643717713+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[((-373897302))+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[20+rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	not	r11d
+	lea	eax,[((-701558691))+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[40+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[38016083+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[60+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[((-660478335))+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[16+rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[((-405537848))+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[36+rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	not	r11d
+	lea	eax,[568446438+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[56+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[((-1019803690))+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[12+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[((-187363961))+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[32+rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[1163531501+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[52+rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	not	r11d
+	lea	eax,[((-1444681467))+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[8+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[((-51403784))+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[28+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[1735328473+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[48+rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[((-1926607734))+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	mov	r10d,DWORD[20+rsi]
+	mov	r11d,ecx
+	lea	eax,[((-378558))+r10*1+rax]
+	mov	r10d,DWORD[32+rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[((-2022574463))+r10*1+rdx]
+	mov	r10d,DWORD[44+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[1839030562+r10*1+rcx]
+	mov	r10d,DWORD[56+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[((-35309556))+r10*1+rbx]
+	mov	r10d,DWORD[4+rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	lea	eax,[((-1530992060))+r10*1+rax]
+	mov	r10d,DWORD[16+rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[1272893353+r10*1+rdx]
+	mov	r10d,DWORD[28+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[((-155497632))+r10*1+rcx]
+	mov	r10d,DWORD[40+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[((-1094730640))+r10*1+rbx]
+	mov	r10d,DWORD[52+rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	lea	eax,[681279174+r10*1+rax]
+	mov	r10d,DWORD[rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[((-358537222))+r10*1+rdx]
+	mov	r10d,DWORD[12+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[((-722521979))+r10*1+rcx]
+	mov	r10d,DWORD[24+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[76029189+r10*1+rbx]
+	mov	r10d,DWORD[36+rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	lea	eax,[((-640364487))+r10*1+rax]
+	mov	r10d,DWORD[48+rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[((-421815835))+r10*1+rdx]
+	mov	r10d,DWORD[60+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[530742520+r10*1+rcx]
+	mov	r10d,DWORD[8+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[((-995338651))+r10*1+rbx]
+	mov	r10d,DWORD[rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	mov	r10d,DWORD[rsi]
+	mov	r11d,0xffffffff
+	xor	r11d,edx
+	lea	eax,[((-198630844))+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[28+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[1126891415+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[56+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[((-1416354905))+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[20+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[((-57434055))+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[48+rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+	lea	eax,[1700485571+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[12+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[((-1894986606))+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[40+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[((-1051523))+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[4+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[((-2054922799))+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[32+rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+	lea	eax,[1873313359+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[60+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[((-30611744))+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[24+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[((-1560198380))+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[52+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[1309151649+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[16+rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+	lea	eax,[((-145523070))+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[44+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[((-1120210379))+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[8+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[718787259+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[36+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[((-343485551))+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+
+	add	eax,r8d
+	add	ebx,r9d
+	add	ecx,r14d
+	add	edx,r15d
+
+
+	add	rsi,64
+	cmp	rsi,rdi
+	jb	NEAR $L$loop
+
+
+$L$end:
+	mov	DWORD[rbp],eax
+	mov	DWORD[4+rbp],ebx
+	mov	DWORD[8+rbp],ecx
+	mov	DWORD[12+rbp],edx
+
+	mov	r15,QWORD[rsp]
+	mov	r14,QWORD[8+rsp]
+	mov	r12,QWORD[16+rsp]
+	mov	rbx,QWORD[24+rsp]
+	mov	rbp,QWORD[32+rsp]
+	add	rsp,40
+$L$epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_md5_block_asm_data_order:
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	lea	r10,[$L$prologue]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	lea	r10,[$L$epilogue]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+
+	lea	rax,[40+rax]
+
+	mov	rbp,QWORD[((-8))+rax]
+	mov	rbx,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r14,QWORD[((-32))+rax]
+	mov	r15,QWORD[((-40))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$in_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_md5_block_asm_data_order wrt ..imagebase
+	DD	$L$SEH_end_md5_block_asm_data_order wrt ..imagebase
+	DD	$L$SEH_info_md5_block_asm_data_order wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_md5_block_asm_data_order:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm
new file mode 100644
index 0000000..64db9d9
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm
@@ -0,0 +1,1947 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+EXTERN	OPENSSL_ia32cap_P
+
+
+ALIGN	64
+$L$poly:
+	DQ	0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
+
+$L$One:
+	DD	1,1,1,1,1,1,1,1
+$L$Two:
+	DD	2,2,2,2,2,2,2,2
+$L$Three:
+	DD	3,3,3,3,3,3,3,3
+$L$ONE_mont:
+	DQ	0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
+
+
+
+global	ecp_nistz256_neg
+
+ALIGN	32
+ecp_nistz256_neg:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_neg:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+	push	r12
+	push	r13
+
+	xor	r8,r8
+	xor	r9,r9
+	xor	r10,r10
+	xor	r11,r11
+	xor	r13,r13
+
+	sub	r8,QWORD[rsi]
+	sbb	r9,QWORD[8+rsi]
+	sbb	r10,QWORD[16+rsi]
+	mov	rax,r8
+	sbb	r11,QWORD[24+rsi]
+	lea	rsi,[$L$poly]
+	mov	rdx,r9
+	sbb	r13,0
+
+	add	r8,QWORD[rsi]
+	mov	rcx,r10
+	adc	r9,QWORD[8+rsi]
+	adc	r10,QWORD[16+rsi]
+	mov	r12,r11
+	adc	r11,QWORD[24+rsi]
+	test	r13,r13
+
+	cmovz	r8,rax
+	cmovz	r9,rdx
+	mov	QWORD[rdi],r8
+	cmovz	r10,rcx
+	mov	QWORD[8+rdi],r9
+	cmovz	r11,r12
+	mov	QWORD[16+rdi],r10
+	mov	QWORD[24+rdi],r11
+
+	pop	r13
+	pop	r12
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_ecp_nistz256_neg:
+
+
+
+
+
+
+global	ecp_nistz256_mul_mont
+
+ALIGN	32
+ecp_nistz256_mul_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_mul_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+$L$mul_mont:
+	push	rbp
+	push	rbx
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	mov	rbx,rdx
+	mov	rax,QWORD[rdx]
+	mov	r9,QWORD[rsi]
+	mov	r10,QWORD[8+rsi]
+	mov	r11,QWORD[16+rsi]
+	mov	r12,QWORD[24+rsi]
+
+	call	__ecp_nistz256_mul_montq
+$L$mul_mont_done:
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbx
+	pop	rbp
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_ecp_nistz256_mul_mont:
+
+
+ALIGN	32
+__ecp_nistz256_mul_montq:
+
+
+	mov	rbp,rax
+	mul	r9
+	mov	r14,QWORD[(($L$poly+8))]
+	mov	r8,rax
+	mov	rax,rbp
+	mov	r9,rdx
+
+	mul	r10
+	mov	r15,QWORD[(($L$poly+24))]
+	add	r9,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	r11
+	add	r10,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	r12
+	add	r11,rax
+	mov	rax,r8
+	adc	rdx,0
+	xor	r13,r13
+	mov	r12,rdx
+
+
+
+
+
+
+
+
+
+
+	mov	rbp,r8
+	shl	r8,32
+	mul	r15
+	shr	rbp,32
+	add	r9,r8
+	adc	r10,rbp
+	adc	r11,rax
+	mov	rax,QWORD[8+rbx]
+	adc	r12,rdx
+	adc	r13,0
+	xor	r8,r8
+
+
+
+	mov	rbp,rax
+	mul	QWORD[rsi]
+	add	r9,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[8+rsi]
+	add	r10,rcx
+	adc	rdx,0
+	add	r10,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[16+rsi]
+	add	r11,rcx
+	adc	rdx,0
+	add	r11,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[24+rsi]
+	add	r12,rcx
+	adc	rdx,0
+	add	r12,rax
+	mov	rax,r9
+	adc	r13,rdx
+	adc	r8,0
+
+
+
+	mov	rbp,r9
+	shl	r9,32
+	mul	r15
+	shr	rbp,32
+	add	r10,r9
+	adc	r11,rbp
+	adc	r12,rax
+	mov	rax,QWORD[16+rbx]
+	adc	r13,rdx
+	adc	r8,0
+	xor	r9,r9
+
+
+
+	mov	rbp,rax
+	mul	QWORD[rsi]
+	add	r10,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[8+rsi]
+	add	r11,rcx
+	adc	rdx,0
+	add	r11,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[16+rsi]
+	add	r12,rcx
+	adc	rdx,0
+	add	r12,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[24+rsi]
+	add	r13,rcx
+	adc	rdx,0
+	add	r13,rax
+	mov	rax,r10
+	adc	r8,rdx
+	adc	r9,0
+
+
+
+	mov	rbp,r10
+	shl	r10,32
+	mul	r15
+	shr	rbp,32
+	add	r11,r10
+	adc	r12,rbp
+	adc	r13,rax
+	mov	rax,QWORD[24+rbx]
+	adc	r8,rdx
+	adc	r9,0
+	xor	r10,r10
+
+
+
+	mov	rbp,rax
+	mul	QWORD[rsi]
+	add	r11,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[8+rsi]
+	add	r12,rcx
+	adc	rdx,0
+	add	r12,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[16+rsi]
+	add	r13,rcx
+	adc	rdx,0
+	add	r13,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[24+rsi]
+	add	r8,rcx
+	adc	rdx,0
+	add	r8,rax
+	mov	rax,r11
+	adc	r9,rdx
+	adc	r10,0
+
+
+
+	mov	rbp,r11
+	shl	r11,32
+	mul	r15
+	shr	rbp,32
+	add	r12,r11
+	adc	r13,rbp
+	mov	rcx,r12
+	adc	r8,rax
+	adc	r9,rdx
+	mov	rbp,r13
+	adc	r10,0
+
+
+
+	sub	r12,-1
+	mov	rbx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	rdx,r9
+	sbb	r9,r15
+	sbb	r10,0
+
+	cmovc	r12,rcx
+	cmovc	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovc	r8,rbx
+	mov	QWORD[8+rdi],r13
+	cmovc	r9,rdx
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+global	ecp_nistz256_sqr_mont
+
+ALIGN	32
+ecp_nistz256_sqr_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_sqr_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+	push	rbp
+	push	rbx
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	mov	rax,QWORD[rsi]
+	mov	r14,QWORD[8+rsi]
+	mov	r15,QWORD[16+rsi]
+	mov	r8,QWORD[24+rsi]
+
+	call	__ecp_nistz256_sqr_montq
+$L$sqr_mont_done:
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbx
+	pop	rbp
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_ecp_nistz256_sqr_mont:
+
+
+ALIGN	32
+__ecp_nistz256_sqr_montq:
+	mov	r13,rax
+	mul	r14
+	mov	r9,rax
+	mov	rax,r15
+	mov	r10,rdx
+
+	mul	r13
+	add	r10,rax
+	mov	rax,r8
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	r13
+	add	r11,rax
+	mov	rax,r15
+	adc	rdx,0
+	mov	r12,rdx
+
+
+	mul	r14
+	add	r11,rax
+	mov	rax,r8
+	adc	rdx,0
+	mov	rbp,rdx
+
+	mul	r14
+	add	r12,rax
+	mov	rax,r8
+	adc	rdx,0
+	add	r12,rbp
+	mov	r13,rdx
+	adc	r13,0
+
+
+	mul	r15
+	xor	r15,r15
+	add	r13,rax
+	mov	rax,QWORD[rsi]
+	mov	r14,rdx
+	adc	r14,0
+
+	add	r9,r9
+	adc	r10,r10
+	adc	r11,r11
+	adc	r12,r12
+	adc	r13,r13
+	adc	r14,r14
+	adc	r15,0
+
+	mul	rax
+	mov	r8,rax
+	mov	rax,QWORD[8+rsi]
+	mov	rcx,rdx
+
+	mul	rax
+	add	r9,rcx
+	adc	r10,rax
+	mov	rax,QWORD[16+rsi]
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	rax
+	add	r11,rcx
+	adc	r12,rax
+	mov	rax,QWORD[24+rsi]
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	rax
+	add	r13,rcx
+	adc	r14,rax
+	mov	rax,r8
+	adc	r15,rdx
+
+	mov	rsi,QWORD[(($L$poly+8))]
+	mov	rbp,QWORD[(($L$poly+24))]
+
+
+
+
+	mov	rcx,r8
+	shl	r8,32
+	mul	rbp
+	shr	rcx,32
+	add	r9,r8
+	adc	r10,rcx
+	adc	r11,rax
+	mov	rax,r9
+	adc	rdx,0
+
+
+
+	mov	rcx,r9
+	shl	r9,32
+	mov	r8,rdx
+	mul	rbp
+	shr	rcx,32
+	add	r10,r9
+	adc	r11,rcx
+	adc	r8,rax
+	mov	rax,r10
+	adc	rdx,0
+
+
+
+	mov	rcx,r10
+	shl	r10,32
+	mov	r9,rdx
+	mul	rbp
+	shr	rcx,32
+	add	r11,r10
+	adc	r8,rcx
+	adc	r9,rax
+	mov	rax,r11
+	adc	rdx,0
+
+
+
+	mov	rcx,r11
+	shl	r11,32
+	mov	r10,rdx
+	mul	rbp
+	shr	rcx,32
+	add	r8,r11
+	adc	r9,rcx
+	adc	r10,rax
+	adc	rdx,0
+	xor	r11,r11
+
+
+
+	add	r12,r8
+	adc	r13,r9
+	mov	r8,r12
+	adc	r14,r10
+	adc	r15,rdx
+	mov	r9,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	r10,r14
+	sbb	r13,rsi
+	sbb	r14,0
+	mov	rcx,r15
+	sbb	r15,rbp
+	sbb	r11,0
+
+	cmovc	r12,r8
+	cmovc	r13,r9
+	mov	QWORD[rdi],r12
+	cmovc	r14,r10
+	mov	QWORD[8+rdi],r13
+	cmovc	r15,rcx
+	mov	QWORD[16+rdi],r14
+	mov	QWORD[24+rdi],r15
+
+	DB	0F3h,0C3h		;repret
+
+
+
+global	ecp_nistz256_select_w5
+
+ALIGN	32
+ecp_nistz256_select_w5:
+	lea	rax,[OPENSSL_ia32cap_P]
+	mov	rax,QWORD[8+rax]
+	test	eax,32
+	jnz	NEAR $L$avx2_select_w5
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_ecp_nistz256_select_w5:
+DB	0x48,0x8d,0x60,0xe0
+DB	0x0f,0x29,0x70,0xe0
+DB	0x0f,0x29,0x78,0xf0
+DB	0x44,0x0f,0x29,0x00
+DB	0x44,0x0f,0x29,0x48,0x10
+DB	0x44,0x0f,0x29,0x50,0x20
+DB	0x44,0x0f,0x29,0x58,0x30
+DB	0x44,0x0f,0x29,0x60,0x40
+DB	0x44,0x0f,0x29,0x68,0x50
+DB	0x44,0x0f,0x29,0x70,0x60
+DB	0x44,0x0f,0x29,0x78,0x70
+	movdqa	xmm0,XMMWORD[$L$One]
+	movd	xmm1,r8d
+
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+
+	movdqa	xmm8,xmm0
+	pshufd	xmm1,xmm1,0
+
+	mov	rax,16
+$L$select_loop_sse_w5:
+
+	movdqa	xmm15,xmm8
+	paddd	xmm8,xmm0
+	pcmpeqd	xmm15,xmm1
+
+	movdqa	xmm9,XMMWORD[rdx]
+	movdqa	xmm10,XMMWORD[16+rdx]
+	movdqa	xmm11,XMMWORD[32+rdx]
+	movdqa	xmm12,XMMWORD[48+rdx]
+	movdqa	xmm13,XMMWORD[64+rdx]
+	movdqa	xmm14,XMMWORD[80+rdx]
+	lea	rdx,[96+rdx]
+
+	pand	xmm9,xmm15
+	pand	xmm10,xmm15
+	por	xmm2,xmm9
+	pand	xmm11,xmm15
+	por	xmm3,xmm10
+	pand	xmm12,xmm15
+	por	xmm4,xmm11
+	pand	xmm13,xmm15
+	por	xmm5,xmm12
+	pand	xmm14,xmm15
+	por	xmm6,xmm13
+	por	xmm7,xmm14
+
+	dec	rax
+	jnz	NEAR $L$select_loop_sse_w5
+
+	movdqu	XMMWORD[rcx],xmm2
+	movdqu	XMMWORD[16+rcx],xmm3
+	movdqu	XMMWORD[32+rcx],xmm4
+	movdqu	XMMWORD[48+rcx],xmm5
+	movdqu	XMMWORD[64+rcx],xmm6
+	movdqu	XMMWORD[80+rcx],xmm7
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[168+rsp]
+$L$SEH_end_ecp_nistz256_select_w5:
+	DB	0F3h,0C3h		;repret
+
+
+
+
+global	ecp_nistz256_select_w7
+
+ALIGN	32
+ecp_nistz256_select_w7:
+	lea	rax,[OPENSSL_ia32cap_P]
+	mov	rax,QWORD[8+rax]
+	test	eax,32
+	jnz	NEAR $L$avx2_select_w7
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_ecp_nistz256_select_w7:
+DB	0x48,0x8d,0x60,0xe0
+DB	0x0f,0x29,0x70,0xe0
+DB	0x0f,0x29,0x78,0xf0
+DB	0x44,0x0f,0x29,0x00
+DB	0x44,0x0f,0x29,0x48,0x10
+DB	0x44,0x0f,0x29,0x50,0x20
+DB	0x44,0x0f,0x29,0x58,0x30
+DB	0x44,0x0f,0x29,0x60,0x40
+DB	0x44,0x0f,0x29,0x68,0x50
+DB	0x44,0x0f,0x29,0x70,0x60
+DB	0x44,0x0f,0x29,0x78,0x70
+	movdqa	xmm8,XMMWORD[$L$One]
+	movd	xmm1,r8d
+
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+
+	movdqa	xmm0,xmm8
+	pshufd	xmm1,xmm1,0
+	mov	rax,64
+
+$L$select_loop_sse_w7:
+	movdqa	xmm15,xmm8
+	paddd	xmm8,xmm0
+	movdqa	xmm9,XMMWORD[rdx]
+	movdqa	xmm10,XMMWORD[16+rdx]
+	pcmpeqd	xmm15,xmm1
+	movdqa	xmm11,XMMWORD[32+rdx]
+	movdqa	xmm12,XMMWORD[48+rdx]
+	lea	rdx,[64+rdx]
+
+	pand	xmm9,xmm15
+	pand	xmm10,xmm15
+	por	xmm2,xmm9
+	pand	xmm11,xmm15
+	por	xmm3,xmm10
+	pand	xmm12,xmm15
+	por	xmm4,xmm11
+	prefetcht0	[255+rdx]
+	por	xmm5,xmm12
+
+	dec	rax
+	jnz	NEAR $L$select_loop_sse_w7
+
+	movdqu	XMMWORD[rcx],xmm2
+	movdqu	XMMWORD[16+rcx],xmm3
+	movdqu	XMMWORD[32+rcx],xmm4
+	movdqu	XMMWORD[48+rcx],xmm5
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[168+rsp]
+$L$SEH_end_ecp_nistz256_select_w7:
+	DB	0F3h,0C3h		;repret
+
+
+
+
+ALIGN	32
+ecp_nistz256_avx2_select_w5:
+$L$avx2_select_w5:
+	vzeroupper
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_ecp_nistz256_avx2_select_w5:
+DB	0x48,0x8d,0x60,0xe0
+DB	0xc5,0xf8,0x29,0x70,0xe0
+DB	0xc5,0xf8,0x29,0x78,0xf0
+DB	0xc5,0x78,0x29,0x40,0x00
+DB	0xc5,0x78,0x29,0x48,0x10
+DB	0xc5,0x78,0x29,0x50,0x20
+DB	0xc5,0x78,0x29,0x58,0x30
+DB	0xc5,0x78,0x29,0x60,0x40
+DB	0xc5,0x78,0x29,0x68,0x50
+DB	0xc5,0x78,0x29,0x70,0x60
+DB	0xc5,0x78,0x29,0x78,0x70
+	vmovdqa	ymm0,YMMWORD[$L$Two]
+
+	vpxor	ymm2,ymm2,ymm2
+	vpxor	ymm3,ymm3,ymm3
+	vpxor	ymm4,ymm4,ymm4
+
+	vmovdqa	ymm5,YMMWORD[$L$One]
+	vmovdqa	ymm10,YMMWORD[$L$Two]
+
+	vmovd	xmm1,r8d
+	vpermd	ymm1,ymm2,ymm1
+
+	mov	rax,8
+$L$select_loop_avx2_w5:
+
+	vmovdqa	ymm6,YMMWORD[rdx]
+	vmovdqa	ymm7,YMMWORD[32+rdx]
+	vmovdqa	ymm8,YMMWORD[64+rdx]
+
+	vmovdqa	ymm11,YMMWORD[96+rdx]
+	vmovdqa	ymm12,YMMWORD[128+rdx]
+	vmovdqa	ymm13,YMMWORD[160+rdx]
+
+	vpcmpeqd	ymm9,ymm5,ymm1
+	vpcmpeqd	ymm14,ymm10,ymm1
+
+	vpaddd	ymm5,ymm5,ymm0
+	vpaddd	ymm10,ymm10,ymm0
+	lea	rdx,[192+rdx]
+
+	vpand	ymm6,ymm6,ymm9
+	vpand	ymm7,ymm7,ymm9
+	vpand	ymm8,ymm8,ymm9
+	vpand	ymm11,ymm11,ymm14
+	vpand	ymm12,ymm12,ymm14
+	vpand	ymm13,ymm13,ymm14
+
+	vpxor	ymm2,ymm2,ymm6
+	vpxor	ymm3,ymm3,ymm7
+	vpxor	ymm4,ymm4,ymm8
+	vpxor	ymm2,ymm2,ymm11
+	vpxor	ymm3,ymm3,ymm12
+	vpxor	ymm4,ymm4,ymm13
+
+	dec	rax
+	jnz	NEAR $L$select_loop_avx2_w5
+
+	vmovdqu	YMMWORD[rcx],ymm2
+	vmovdqu	YMMWORD[32+rcx],ymm3
+	vmovdqu	YMMWORD[64+rcx],ymm4
+	vzeroupper
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[168+rsp]
+$L$SEH_end_ecp_nistz256_avx2_select_w5:
+	DB	0F3h,0C3h		;repret
+
+
+
+
+global	ecp_nistz256_avx2_select_w7
+
+ALIGN	32
+ecp_nistz256_avx2_select_w7:
+$L$avx2_select_w7:
+	vzeroupper
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_ecp_nistz256_avx2_select_w7:
+DB	0x48,0x8d,0x60,0xe0
+DB	0xc5,0xf8,0x29,0x70,0xe0
+DB	0xc5,0xf8,0x29,0x78,0xf0
+DB	0xc5,0x78,0x29,0x40,0x00
+DB	0xc5,0x78,0x29,0x48,0x10
+DB	0xc5,0x78,0x29,0x50,0x20
+DB	0xc5,0x78,0x29,0x58,0x30
+DB	0xc5,0x78,0x29,0x60,0x40
+DB	0xc5,0x78,0x29,0x68,0x50
+DB	0xc5,0x78,0x29,0x70,0x60
+DB	0xc5,0x78,0x29,0x78,0x70
+	vmovdqa	ymm0,YMMWORD[$L$Three]
+
+	vpxor	ymm2,ymm2,ymm2
+	vpxor	ymm3,ymm3,ymm3
+
+	vmovdqa	ymm4,YMMWORD[$L$One]
+	vmovdqa	ymm8,YMMWORD[$L$Two]
+	vmovdqa	ymm12,YMMWORD[$L$Three]
+
+	vmovd	xmm1,r8d
+	vpermd	ymm1,ymm2,ymm1
+
+
+	mov	rax,21
+$L$select_loop_avx2_w7:
+
+	vmovdqa	ymm5,YMMWORD[rdx]
+	vmovdqa	ymm6,YMMWORD[32+rdx]
+
+	vmovdqa	ymm9,YMMWORD[64+rdx]
+	vmovdqa	ymm10,YMMWORD[96+rdx]
+
+	vmovdqa	ymm13,YMMWORD[128+rdx]
+	vmovdqa	ymm14,YMMWORD[160+rdx]
+
+	vpcmpeqd	ymm7,ymm4,ymm1
+	vpcmpeqd	ymm11,ymm8,ymm1
+	vpcmpeqd	ymm15,ymm12,ymm1
+
+	vpaddd	ymm4,ymm4,ymm0
+	vpaddd	ymm8,ymm8,ymm0
+	vpaddd	ymm12,ymm12,ymm0
+	lea	rdx,[192+rdx]
+
+	vpand	ymm5,ymm5,ymm7
+	vpand	ymm6,ymm6,ymm7
+	vpand	ymm9,ymm9,ymm11
+	vpand	ymm10,ymm10,ymm11
+	vpand	ymm13,ymm13,ymm15
+	vpand	ymm14,ymm14,ymm15
+
+	vpxor	ymm2,ymm2,ymm5
+	vpxor	ymm3,ymm3,ymm6
+	vpxor	ymm2,ymm2,ymm9
+	vpxor	ymm3,ymm3,ymm10
+	vpxor	ymm2,ymm2,ymm13
+	vpxor	ymm3,ymm3,ymm14
+
+	dec	rax
+	jnz	NEAR $L$select_loop_avx2_w7
+
+
+	vmovdqa	ymm5,YMMWORD[rdx]
+	vmovdqa	ymm6,YMMWORD[32+rdx]
+
+	vpcmpeqd	ymm7,ymm4,ymm1
+
+	vpand	ymm5,ymm5,ymm7
+	vpand	ymm6,ymm6,ymm7
+
+	vpxor	ymm2,ymm2,ymm5
+	vpxor	ymm3,ymm3,ymm6
+
+	vmovdqu	YMMWORD[rcx],ymm2
+	vmovdqu	YMMWORD[32+rcx],ymm3
+	vzeroupper
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[168+rsp]
+$L$SEH_end_ecp_nistz256_avx2_select_w7:
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	32
+__ecp_nistz256_add_toq:
+	xor	r11,r11
+	add	r12,QWORD[rbx]
+	adc	r13,QWORD[8+rbx]
+	mov	rax,r12
+	adc	r8,QWORD[16+rbx]
+	adc	r9,QWORD[24+rbx]
+	mov	rbp,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	cmovc	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovc	r8,rcx
+	mov	QWORD[8+rdi],r13
+	cmovc	r9,r10
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+__ecp_nistz256_sub_fromq:
+	sub	r12,QWORD[rbx]
+	sbb	r13,QWORD[8+rbx]
+	mov	rax,r12
+	sbb	r8,QWORD[16+rbx]
+	sbb	r9,QWORD[24+rbx]
+	mov	rbp,r13
+	sbb	r11,r11
+
+	add	r12,-1
+	mov	rcx,r8
+	adc	r13,r14
+	adc	r8,0
+	mov	r10,r9
+	adc	r9,r15
+	test	r11,r11
+
+	cmovz	r12,rax
+	cmovz	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovz	r8,rcx
+	mov	QWORD[8+rdi],r13
+	cmovz	r9,r10
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+__ecp_nistz256_subq:
+	sub	rax,r12
+	sbb	rbp,r13
+	mov	r12,rax
+	sbb	rcx,r8
+	sbb	r10,r9
+	mov	r13,rbp
+	sbb	r11,r11
+
+	add	rax,-1
+	mov	r8,rcx
+	adc	rbp,r14
+	adc	rcx,0
+	mov	r9,r10
+	adc	r10,r15
+	test	r11,r11
+
+	cmovnz	r12,rax
+	cmovnz	r13,rbp
+	cmovnz	r8,rcx
+	cmovnz	r9,r10
+
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+__ecp_nistz256_mul_by_2q:
+	xor	r11,r11
+	add	r12,r12
+	adc	r13,r13
+	mov	rax,r12
+	adc	r8,r8
+	adc	r9,r9
+	mov	rbp,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	cmovc	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovc	r8,rcx
+	mov	QWORD[8+rdi],r13
+	cmovc	r9,r10
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+global	ecp_nistz256_point_double
+
+ALIGN	32
+ecp_nistz256_point_double:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_point_double:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+	push	rbp
+	push	rbx
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp,32*5+8
+
+$L$point_double_shortcutq:
+	movdqu	xmm0,XMMWORD[rsi]
+	mov	rbx,rsi
+	movdqu	xmm1,XMMWORD[16+rsi]
+	mov	r12,QWORD[((32+0))+rsi]
+	mov	r13,QWORD[((32+8))+rsi]
+	mov	r8,QWORD[((32+16))+rsi]
+	mov	r9,QWORD[((32+24))+rsi]
+	mov	r14,QWORD[(($L$poly+8))]
+	mov	r15,QWORD[(($L$poly+24))]
+	movdqa	XMMWORD[96+rsp],xmm0
+	movdqa	XMMWORD[(96+16)+rsp],xmm1
+	lea	r10,[32+rdi]
+	lea	r11,[64+rdi]
+DB	102,72,15,110,199
+DB	102,73,15,110,202
+DB	102,73,15,110,211
+
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_by_2q
+
+	mov	rax,QWORD[((64+0))+rsi]
+	mov	r14,QWORD[((64+8))+rsi]
+	mov	r15,QWORD[((64+16))+rsi]
+	mov	r8,QWORD[((64+24))+rsi]
+	lea	rsi,[((64-0))+rsi]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	r14,QWORD[((8+0))+rsp]
+	lea	rsi,[((0+0))+rsp]
+	mov	r15,QWORD[((16+0))+rsp]
+	mov	r8,QWORD[((24+0))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[32+rbx]
+	mov	r9,QWORD[((64+0))+rbx]
+	mov	r10,QWORD[((64+8))+rbx]
+	mov	r11,QWORD[((64+16))+rbx]
+	mov	r12,QWORD[((64+24))+rbx]
+	lea	rsi,[((64-0))+rbx]
+	lea	rbx,[32+rbx]
+DB	102,72,15,126,215
+	call	__ecp_nistz256_mul_montq
+	call	__ecp_nistz256_mul_by_2q
+
+	mov	r12,QWORD[((96+0))+rsp]
+	mov	r13,QWORD[((96+8))+rsp]
+	lea	rbx,[64+rsp]
+	mov	r8,QWORD[((96+16))+rsp]
+	mov	r9,QWORD[((96+24))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_add_toq
+
+	mov	r12,QWORD[((96+0))+rsp]
+	mov	r13,QWORD[((96+8))+rsp]
+	lea	rbx,[64+rsp]
+	mov	r8,QWORD[((96+16))+rsp]
+	mov	r9,QWORD[((96+24))+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	r14,QWORD[((8+0))+rsp]
+	lea	rsi,[((0+0))+rsp]
+	mov	r15,QWORD[((16+0))+rsp]
+	mov	r8,QWORD[((24+0))+rsp]
+DB	102,72,15,126,207
+	call	__ecp_nistz256_sqr_montq
+	xor	r9,r9
+	mov	rax,r12
+	add	r12,-1
+	mov	r10,r13
+	adc	r13,rsi
+	mov	rcx,r14
+	adc	r14,0
+	mov	r8,r15
+	adc	r15,rbp
+	adc	r9,0
+	xor	rsi,rsi
+	test	rax,1
+
+	cmovz	r12,rax
+	cmovz	r13,r10
+	cmovz	r14,rcx
+	cmovz	r15,r8
+	cmovz	r9,rsi
+
+	mov	rax,r13
+	shr	r12,1
+	shl	rax,63
+	mov	r10,r14
+	shr	r13,1
+	or	r12,rax
+	shl	r10,63
+	mov	rcx,r15
+	shr	r14,1
+	or	r13,r10
+	shl	rcx,63
+	mov	QWORD[rdi],r12
+	shr	r15,1
+	mov	QWORD[8+rdi],r13
+	shl	r9,63
+	or	r14,rcx
+	or	r15,r9
+	mov	QWORD[16+rdi],r14
+	mov	QWORD[24+rdi],r15
+	mov	rax,QWORD[64+rsp]
+	lea	rbx,[64+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_mul_by_2q
+
+	lea	rbx,[32+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_add_toq
+
+	mov	rax,QWORD[96+rsp]
+	lea	rbx,[96+rsp]
+	mov	r9,QWORD[((0+0))+rsp]
+	mov	r10,QWORD[((8+0))+rsp]
+	lea	rsi,[((0+0))+rsp]
+	mov	r11,QWORD[((16+0))+rsp]
+	mov	r12,QWORD[((24+0))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_mul_by_2q
+
+	mov	rax,QWORD[((0+32))+rsp]
+	mov	r14,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r15,QWORD[((16+32))+rsp]
+	mov	r8,QWORD[((24+32))+rsp]
+DB	102,72,15,126,199
+	call	__ecp_nistz256_sqr_montq
+
+	lea	rbx,[128+rsp]
+	mov	r8,r14
+	mov	r9,r15
+	mov	r14,rsi
+	mov	r15,rbp
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	rbp,QWORD[((0+8))+rsp]
+	mov	rcx,QWORD[((0+16))+rsp]
+	mov	r10,QWORD[((0+24))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_subq
+
+	mov	rax,QWORD[32+rsp]
+	lea	rbx,[32+rsp]
+	mov	r14,r12
+	xor	ecx,ecx
+	mov	QWORD[((0+0))+rsp],r12
+	mov	r10,r13
+	mov	QWORD[((0+8))+rsp],r13
+	cmovz	r11,r8
+	mov	QWORD[((0+16))+rsp],r8
+	lea	rsi,[((0-0))+rsp]
+	cmovz	r12,r9
+	mov	QWORD[((0+24))+rsp],r9
+	mov	r9,r14
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montq
+
+DB	102,72,15,126,203
+DB	102,72,15,126,207
+	call	__ecp_nistz256_sub_fromq
+
+	add	rsp,32*5+8
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbx
+	pop	rbp
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_ecp_nistz256_point_double:
+global	ecp_nistz256_point_add
+
+ALIGN	32
+ecp_nistz256_point_add:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_point_add:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	push	rbp
+	push	rbx
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp,32*18+8
+
+	movdqu	xmm0,XMMWORD[rsi]
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm3,XMMWORD[48+rsi]
+	movdqu	xmm4,XMMWORD[64+rsi]
+	movdqu	xmm5,XMMWORD[80+rsi]
+	mov	rbx,rsi
+	mov	rsi,rdx
+	movdqa	XMMWORD[384+rsp],xmm0
+	movdqa	XMMWORD[(384+16)+rsp],xmm1
+	movdqa	XMMWORD[416+rsp],xmm2
+	movdqa	XMMWORD[(416+16)+rsp],xmm3
+	movdqa	XMMWORD[448+rsp],xmm4
+	movdqa	XMMWORD[(448+16)+rsp],xmm5
+	por	xmm5,xmm4
+
+	movdqu	xmm0,XMMWORD[rsi]
+	pshufd	xmm3,xmm5,0xb1
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	por	xmm5,xmm3
+	movdqu	xmm3,XMMWORD[48+rsi]
+	mov	rax,QWORD[((64+0))+rsi]
+	mov	r14,QWORD[((64+8))+rsi]
+	mov	r15,QWORD[((64+16))+rsi]
+	mov	r8,QWORD[((64+24))+rsi]
+	movdqa	XMMWORD[480+rsp],xmm0
+	pshufd	xmm4,xmm5,0x1e
+	movdqa	XMMWORD[(480+16)+rsp],xmm1
+	movdqu	xmm0,XMMWORD[64+rsi]
+	movdqu	xmm1,XMMWORD[80+rsi]
+	movdqa	XMMWORD[512+rsp],xmm2
+	movdqa	XMMWORD[(512+16)+rsp],xmm3
+	por	xmm5,xmm4
+	pxor	xmm4,xmm4
+	por	xmm1,xmm0
+DB	102,72,15,110,199
+
+	lea	rsi,[((64-0))+rsi]
+	mov	QWORD[((544+0))+rsp],rax
+	mov	QWORD[((544+8))+rsp],r14
+	mov	QWORD[((544+16))+rsp],r15
+	mov	QWORD[((544+24))+rsp],r8
+	lea	rdi,[96+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	pcmpeqd	xmm5,xmm4
+	pshufd	xmm4,xmm1,0xb1
+	por	xmm4,xmm1
+	pshufd	xmm5,xmm5,0
+	pshufd	xmm3,xmm4,0x1e
+	por	xmm4,xmm3
+	pxor	xmm3,xmm3
+	pcmpeqd	xmm4,xmm3
+	pshufd	xmm4,xmm4,0
+	mov	rax,QWORD[((64+0))+rbx]
+	mov	r14,QWORD[((64+8))+rbx]
+	mov	r15,QWORD[((64+16))+rbx]
+	mov	r8,QWORD[((64+24))+rbx]
+DB	102,72,15,110,203
+
+	lea	rsi,[((64-0))+rbx]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[544+rsp]
+	lea	rbx,[544+rsp]
+	mov	r9,QWORD[((0+96))+rsp]
+	mov	r10,QWORD[((8+96))+rsp]
+	lea	rsi,[((0+96))+rsp]
+	mov	r11,QWORD[((16+96))+rsp]
+	mov	r12,QWORD[((24+96))+rsp]
+	lea	rdi,[224+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[448+rsp]
+	lea	rbx,[448+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[416+rsp]
+	lea	rbx,[416+rsp]
+	mov	r9,QWORD[((0+224))+rsp]
+	mov	r10,QWORD[((8+224))+rsp]
+	lea	rsi,[((0+224))+rsp]
+	mov	r11,QWORD[((16+224))+rsp]
+	mov	r12,QWORD[((24+224))+rsp]
+	lea	rdi,[224+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[512+rsp]
+	lea	rbx,[512+rsp]
+	mov	r9,QWORD[((0+256))+rsp]
+	mov	r10,QWORD[((8+256))+rsp]
+	lea	rsi,[((0+256))+rsp]
+	mov	r11,QWORD[((16+256))+rsp]
+	mov	r12,QWORD[((24+256))+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[224+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	or	r12,r13
+	movdqa	xmm2,xmm4
+	or	r12,r8
+	or	r12,r9
+	por	xmm2,xmm5
+DB	102,73,15,110,220
+
+	mov	rax,QWORD[384+rsp]
+	lea	rbx,[384+rsp]
+	mov	r9,QWORD[((0+96))+rsp]
+	mov	r10,QWORD[((8+96))+rsp]
+	lea	rsi,[((0+96))+rsp]
+	mov	r11,QWORD[((16+96))+rsp]
+	mov	r12,QWORD[((24+96))+rsp]
+	lea	rdi,[160+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[480+rsp]
+	lea	rbx,[480+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[192+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[160+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	or	r12,r13
+	or	r12,r8
+	or	r12,r9
+
+DB	0x3e
+	jnz	NEAR $L$add_proceedq
+DB	102,73,15,126,208
+DB	102,73,15,126,217
+	test	r8,r8
+	jnz	NEAR $L$add_proceedq
+	test	r9,r9
+	jz	NEAR $L$add_doubleq
+
+DB	102,72,15,126,199
+	pxor	xmm0,xmm0
+	movdqu	XMMWORD[rdi],xmm0
+	movdqu	XMMWORD[16+rdi],xmm0
+	movdqu	XMMWORD[32+rdi],xmm0
+	movdqu	XMMWORD[48+rdi],xmm0
+	movdqu	XMMWORD[64+rdi],xmm0
+	movdqu	XMMWORD[80+rdi],xmm0
+	jmp	NEAR $L$add_doneq
+
+ALIGN	32
+$L$add_doubleq:
+DB	102,72,15,126,206
+DB	102,72,15,126,199
+	add	rsp,416
+	jmp	NEAR $L$point_double_shortcutq
+
+ALIGN	32
+$L$add_proceedq:
+	mov	rax,QWORD[((0+64))+rsp]
+	mov	r14,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r15,QWORD[((16+64))+rsp]
+	mov	r8,QWORD[((24+64))+rsp]
+	lea	rdi,[96+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[448+rsp]
+	lea	rbx,[448+rsp]
+	mov	r9,QWORD[((0+0))+rsp]
+	mov	r10,QWORD[((8+0))+rsp]
+	lea	rsi,[((0+0))+rsp]
+	mov	r11,QWORD[((16+0))+rsp]
+	mov	r12,QWORD[((24+0))+rsp]
+	lea	rdi,[352+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	r14,QWORD[((8+0))+rsp]
+	lea	rsi,[((0+0))+rsp]
+	mov	r15,QWORD[((16+0))+rsp]
+	mov	r8,QWORD[((24+0))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[544+rsp]
+	lea	rbx,[544+rsp]
+	mov	r9,QWORD[((0+352))+rsp]
+	mov	r10,QWORD[((8+352))+rsp]
+	lea	rsi,[((0+352))+rsp]
+	mov	r11,QWORD[((16+352))+rsp]
+	mov	r12,QWORD[((24+352))+rsp]
+	lea	rdi,[352+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[rsp]
+	lea	rbx,[rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[160+rsp]
+	lea	rbx,[160+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[192+rsp]
+	call	__ecp_nistz256_mul_montq
+
+
+
+
+	xor	r11,r11
+	add	r12,r12
+	lea	rsi,[96+rsp]
+	adc	r13,r13
+	mov	rax,r12
+	adc	r8,r8
+	adc	r9,r9
+	mov	rbp,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	mov	rax,QWORD[rsi]
+	cmovc	r13,rbp
+	mov	rbp,QWORD[8+rsi]
+	cmovc	r8,rcx
+	mov	rcx,QWORD[16+rsi]
+	cmovc	r9,r10
+	mov	r10,QWORD[24+rsi]
+
+	call	__ecp_nistz256_subq
+
+	lea	rbx,[128+rsp]
+	lea	rdi,[288+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[((192+0))+rsp]
+	mov	rbp,QWORD[((192+8))+rsp]
+	mov	rcx,QWORD[((192+16))+rsp]
+	mov	r10,QWORD[((192+24))+rsp]
+	lea	rdi,[320+rsp]
+
+	call	__ecp_nistz256_subq
+
+	mov	QWORD[rdi],r12
+	mov	QWORD[8+rdi],r13
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+	mov	rax,QWORD[128+rsp]
+	lea	rbx,[128+rsp]
+	mov	r9,QWORD[((0+224))+rsp]
+	mov	r10,QWORD[((8+224))+rsp]
+	lea	rsi,[((0+224))+rsp]
+	mov	r11,QWORD[((16+224))+rsp]
+	mov	r12,QWORD[((24+224))+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[320+rsp]
+	lea	rbx,[320+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[320+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[256+rsp]
+	lea	rdi,[320+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+DB	102,72,15,126,199
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[352+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((352+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[544+rsp]
+	pand	xmm3,XMMWORD[((544+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[448+rsp]
+	pand	xmm3,XMMWORD[((448+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[64+rdi],xmm2
+	movdqu	XMMWORD[80+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[288+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((288+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[480+rsp]
+	pand	xmm3,XMMWORD[((480+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[384+rsp]
+	pand	xmm3,XMMWORD[((384+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[rdi],xmm2
+	movdqu	XMMWORD[16+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[320+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((320+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[512+rsp]
+	pand	xmm3,XMMWORD[((512+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[416+rsp]
+	pand	xmm3,XMMWORD[((416+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm3
+
+$L$add_doneq:
+	add	rsp,32*18+8
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbx
+	pop	rbp
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_ecp_nistz256_point_add:
+global	ecp_nistz256_point_add_affine
+
+ALIGN	32
+ecp_nistz256_point_add_affine:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_point_add_affine:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	push	rbp
+	push	rbx
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp,32*15+8
+
+	movdqu	xmm0,XMMWORD[rsi]
+	mov	rbx,rdx
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm3,XMMWORD[48+rsi]
+	movdqu	xmm4,XMMWORD[64+rsi]
+	movdqu	xmm5,XMMWORD[80+rsi]
+	mov	rax,QWORD[((64+0))+rsi]
+	mov	r14,QWORD[((64+8))+rsi]
+	mov	r15,QWORD[((64+16))+rsi]
+	mov	r8,QWORD[((64+24))+rsi]
+	movdqa	XMMWORD[320+rsp],xmm0
+	movdqa	XMMWORD[(320+16)+rsp],xmm1
+	movdqa	XMMWORD[352+rsp],xmm2
+	movdqa	XMMWORD[(352+16)+rsp],xmm3
+	movdqa	XMMWORD[384+rsp],xmm4
+	movdqa	XMMWORD[(384+16)+rsp],xmm5
+	por	xmm5,xmm4
+
+	movdqu	xmm0,XMMWORD[rbx]
+	pshufd	xmm3,xmm5,0xb1
+	movdqu	xmm1,XMMWORD[16+rbx]
+	movdqu	xmm2,XMMWORD[32+rbx]
+	por	xmm5,xmm3
+	movdqu	xmm3,XMMWORD[48+rbx]
+	movdqa	XMMWORD[416+rsp],xmm0
+	pshufd	xmm4,xmm5,0x1e
+	movdqa	XMMWORD[(416+16)+rsp],xmm1
+	por	xmm1,xmm0
+DB	102,72,15,110,199
+	movdqa	XMMWORD[448+rsp],xmm2
+	movdqa	XMMWORD[(448+16)+rsp],xmm3
+	por	xmm3,xmm2
+	por	xmm5,xmm4
+	pxor	xmm4,xmm4
+	por	xmm3,xmm1
+
+	lea	rsi,[((64-0))+rsi]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	pcmpeqd	xmm5,xmm4
+	pshufd	xmm4,xmm3,0xb1
+	mov	rax,QWORD[rbx]
+
+	mov	r9,r12
+	por	xmm4,xmm3
+	pshufd	xmm5,xmm5,0
+	pshufd	xmm3,xmm4,0x1e
+	mov	r10,r13
+	por	xmm4,xmm3
+	pxor	xmm3,xmm3
+	mov	r11,r14
+	pcmpeqd	xmm4,xmm3
+	pshufd	xmm4,xmm4,0
+
+	lea	rsi,[((32-0))+rsp]
+	mov	r12,r15
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[320+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[384+rsp]
+	lea	rbx,[384+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[384+rsp]
+	lea	rbx,[384+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[288+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[448+rsp]
+	lea	rbx,[448+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[352+rsp]
+	lea	rdi,[96+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[((0+64))+rsp]
+	mov	r14,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r15,QWORD[((16+64))+rsp]
+	mov	r8,QWORD[((24+64))+rsp]
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[((0+96))+rsp]
+	mov	r14,QWORD[((8+96))+rsp]
+	lea	rsi,[((0+96))+rsp]
+	mov	r15,QWORD[((16+96))+rsp]
+	mov	r8,QWORD[((24+96))+rsp]
+	lea	rdi,[192+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[128+rsp]
+	lea	rbx,[128+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[160+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[320+rsp]
+	lea	rbx,[320+rsp]
+	mov	r9,QWORD[((0+128))+rsp]
+	mov	r10,QWORD[((8+128))+rsp]
+	lea	rsi,[((0+128))+rsp]
+	mov	r11,QWORD[((16+128))+rsp]
+	mov	r12,QWORD[((24+128))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montq
+
+
+
+
+	xor	r11,r11
+	add	r12,r12
+	lea	rsi,[192+rsp]
+	adc	r13,r13
+	mov	rax,r12
+	adc	r8,r8
+	adc	r9,r9
+	mov	rbp,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	mov	rax,QWORD[rsi]
+	cmovc	r13,rbp
+	mov	rbp,QWORD[8+rsi]
+	cmovc	r8,rcx
+	mov	rcx,QWORD[16+rsi]
+	cmovc	r9,r10
+	mov	r10,QWORD[24+rsi]
+
+	call	__ecp_nistz256_subq
+
+	lea	rbx,[160+rsp]
+	lea	rdi,[224+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	rbp,QWORD[((0+8))+rsp]
+	mov	rcx,QWORD[((0+16))+rsp]
+	mov	r10,QWORD[((0+24))+rsp]
+	lea	rdi,[64+rsp]
+
+	call	__ecp_nistz256_subq
+
+	mov	QWORD[rdi],r12
+	mov	QWORD[8+rdi],r13
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+	mov	rax,QWORD[352+rsp]
+	lea	rbx,[352+rsp]
+	mov	r9,QWORD[((0+160))+rsp]
+	mov	r10,QWORD[((8+160))+rsp]
+	lea	rsi,[((0+160))+rsp]
+	mov	r11,QWORD[((16+160))+rsp]
+	mov	r12,QWORD[((24+160))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[96+rsp]
+	lea	rbx,[96+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[32+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+DB	102,72,15,126,199
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[288+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((288+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[$L$ONE_mont]
+	pand	xmm3,XMMWORD[(($L$ONE_mont+16))]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[384+rsp]
+	pand	xmm3,XMMWORD[((384+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[64+rdi],xmm2
+	movdqu	XMMWORD[80+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[224+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((224+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[416+rsp]
+	pand	xmm3,XMMWORD[((416+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[320+rsp]
+	pand	xmm3,XMMWORD[((320+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[rdi],xmm2
+	movdqu	XMMWORD[16+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[256+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((256+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[448+rsp]
+	pand	xmm3,XMMWORD[((448+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[352+rsp]
+	pand	xmm3,XMMWORD[((352+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm3
+
+	add	rsp,32*15+8
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbx
+	pop	rbp
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_ecp_nistz256_point_add_affine:
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm
new file mode 100644
index 0000000..4c03791
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm
@@ -0,0 +1,70 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+
+
+
+global	CRYPTO_rdrand
+
+ALIGN	16
+CRYPTO_rdrand:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_CRYPTO_rdrand:
+	mov	rdi,rcx
+
+
+	xor	rax,rax
+
+
+DB	0x48,0x0f,0xc7,0xf1
+
+	adc	rax,rax
+	mov	QWORD[rdi],rcx
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+global	CRYPTO_rdrand_multiple8_buf
+
+ALIGN	16
+CRYPTO_rdrand_multiple8_buf:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_CRYPTO_rdrand_multiple8_buf:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+	test	rsi,rsi
+	jz	NEAR $L$out
+	mov	rdx,8
+$L$loop:
+
+
+DB	0x48,0x0f,0xc7,0xf1
+	jnc	NEAR $L$err
+	mov	QWORD[rdi],rcx
+	add	rdi,rdx
+	sub	rsi,rdx
+	jnz	NEAR $L$loop
+$L$out:
+	mov	rax,1
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$err:
+	xor	rax,rax
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/rsaz-avx2.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/rsaz-avx2.asm
new file mode 100644
index 0000000..32624ba
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/rsaz-avx2.asm
@@ -0,0 +1,1971 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+global	rsaz_1024_sqr_avx2
+
+ALIGN	64
+rsaz_1024_sqr_avx2:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_rsaz_1024_sqr_avx2:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	lea	rax,[rsp]
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	vzeroupper
+	lea	rsp,[((-168))+rsp]
+	vmovaps	XMMWORD[(-216)+rax],xmm6
+	vmovaps	XMMWORD[(-200)+rax],xmm7
+	vmovaps	XMMWORD[(-184)+rax],xmm8
+	vmovaps	XMMWORD[(-168)+rax],xmm9
+	vmovaps	XMMWORD[(-152)+rax],xmm10
+	vmovaps	XMMWORD[(-136)+rax],xmm11
+	vmovaps	XMMWORD[(-120)+rax],xmm12
+	vmovaps	XMMWORD[(-104)+rax],xmm13
+	vmovaps	XMMWORD[(-88)+rax],xmm14
+	vmovaps	XMMWORD[(-72)+rax],xmm15
+$L$sqr_1024_body:
+	mov	rbp,rax
+
+	mov	r13,rdx
+	sub	rsp,832
+	mov	r15,r13
+	sub	rdi,-128
+	sub	rsi,-128
+	sub	r13,-128
+
+	and	r15,4095
+	add	r15,32*10
+	shr	r15,12
+	vpxor	ymm9,ymm9,ymm9
+	jz	NEAR $L$sqr_1024_no_n_copy
+
+
+
+
+
+	sub	rsp,32*10
+	vmovdqu	ymm0,YMMWORD[((0-128))+r13]
+	and	rsp,-2048
+	vmovdqu	ymm1,YMMWORD[((32-128))+r13]
+	vmovdqu	ymm2,YMMWORD[((64-128))+r13]
+	vmovdqu	ymm3,YMMWORD[((96-128))+r13]
+	vmovdqu	ymm4,YMMWORD[((128-128))+r13]
+	vmovdqu	ymm5,YMMWORD[((160-128))+r13]
+	vmovdqu	ymm6,YMMWORD[((192-128))+r13]
+	vmovdqu	ymm7,YMMWORD[((224-128))+r13]
+	vmovdqu	ymm8,YMMWORD[((256-128))+r13]
+	lea	r13,[((832+128))+rsp]
+	vmovdqu	YMMWORD[(0-128)+r13],ymm0
+	vmovdqu	YMMWORD[(32-128)+r13],ymm1
+	vmovdqu	YMMWORD[(64-128)+r13],ymm2
+	vmovdqu	YMMWORD[(96-128)+r13],ymm3
+	vmovdqu	YMMWORD[(128-128)+r13],ymm4
+	vmovdqu	YMMWORD[(160-128)+r13],ymm5
+	vmovdqu	YMMWORD[(192-128)+r13],ymm6
+	vmovdqu	YMMWORD[(224-128)+r13],ymm7
+	vmovdqu	YMMWORD[(256-128)+r13],ymm8
+	vmovdqu	YMMWORD[(288-128)+r13],ymm9
+
+$L$sqr_1024_no_n_copy:
+	and	rsp,-1024
+
+	vmovdqu	ymm1,YMMWORD[((32-128))+rsi]
+	vmovdqu	ymm2,YMMWORD[((64-128))+rsi]
+	vmovdqu	ymm3,YMMWORD[((96-128))+rsi]
+	vmovdqu	ymm4,YMMWORD[((128-128))+rsi]
+	vmovdqu	ymm5,YMMWORD[((160-128))+rsi]
+	vmovdqu	ymm6,YMMWORD[((192-128))+rsi]
+	vmovdqu	ymm7,YMMWORD[((224-128))+rsi]
+	vmovdqu	ymm8,YMMWORD[((256-128))+rsi]
+
+	lea	rbx,[192+rsp]
+	vmovdqu	ymm15,YMMWORD[$L$and_mask]
+	jmp	NEAR $L$OOP_GRANDE_SQR_1024
+
+ALIGN	32
+$L$OOP_GRANDE_SQR_1024:
+	lea	r9,[((576+128))+rsp]
+	lea	r12,[448+rsp]
+
+
+
+
+	vpaddq	ymm1,ymm1,ymm1
+	vpbroadcastq	ymm10,QWORD[((0-128))+rsi]
+	vpaddq	ymm2,ymm2,ymm2
+	vmovdqa	YMMWORD[(0-128)+r9],ymm1
+	vpaddq	ymm3,ymm3,ymm3
+	vmovdqa	YMMWORD[(32-128)+r9],ymm2
+	vpaddq	ymm4,ymm4,ymm4
+	vmovdqa	YMMWORD[(64-128)+r9],ymm3
+	vpaddq	ymm5,ymm5,ymm5
+	vmovdqa	YMMWORD[(96-128)+r9],ymm4
+	vpaddq	ymm6,ymm6,ymm6
+	vmovdqa	YMMWORD[(128-128)+r9],ymm5
+	vpaddq	ymm7,ymm7,ymm7
+	vmovdqa	YMMWORD[(160-128)+r9],ymm6
+	vpaddq	ymm8,ymm8,ymm8
+	vmovdqa	YMMWORD[(192-128)+r9],ymm7
+	vpxor	ymm9,ymm9,ymm9
+	vmovdqa	YMMWORD[(224-128)+r9],ymm8
+
+	vpmuludq	ymm0,ymm10,YMMWORD[((0-128))+rsi]
+	vpbroadcastq	ymm11,QWORD[((32-128))+rsi]
+	vmovdqu	YMMWORD[(288-192)+rbx],ymm9
+	vpmuludq	ymm1,ymm1,ymm10
+	vmovdqu	YMMWORD[(320-448)+r12],ymm9
+	vpmuludq	ymm2,ymm2,ymm10
+	vmovdqu	YMMWORD[(352-448)+r12],ymm9
+	vpmuludq	ymm3,ymm3,ymm10
+	vmovdqu	YMMWORD[(384-448)+r12],ymm9
+	vpmuludq	ymm4,ymm4,ymm10
+	vmovdqu	YMMWORD[(416-448)+r12],ymm9
+	vpmuludq	ymm5,ymm5,ymm10
+	vmovdqu	YMMWORD[(448-448)+r12],ymm9
+	vpmuludq	ymm6,ymm6,ymm10
+	vmovdqu	YMMWORD[(480-448)+r12],ymm9
+	vpmuludq	ymm7,ymm7,ymm10
+	vmovdqu	YMMWORD[(512-448)+r12],ymm9
+	vpmuludq	ymm8,ymm8,ymm10
+	vpbroadcastq	ymm10,QWORD[((64-128))+rsi]
+	vmovdqu	YMMWORD[(544-448)+r12],ymm9
+
+	mov	r15,rsi
+	mov	r14d,4
+	jmp	NEAR $L$sqr_entry_1024
+ALIGN	32
+$L$OOP_SQR_1024:
+	vpbroadcastq	ymm11,QWORD[((32-128))+r15]
+	vpmuludq	ymm0,ymm10,YMMWORD[((0-128))+rsi]
+	vpaddq	ymm0,ymm0,YMMWORD[((0-192))+rbx]
+	vpmuludq	ymm1,ymm10,YMMWORD[((0-128))+r9]
+	vpaddq	ymm1,ymm1,YMMWORD[((32-192))+rbx]
+	vpmuludq	ymm2,ymm10,YMMWORD[((32-128))+r9]
+	vpaddq	ymm2,ymm2,YMMWORD[((64-192))+rbx]
+	vpmuludq	ymm3,ymm10,YMMWORD[((64-128))+r9]
+	vpaddq	ymm3,ymm3,YMMWORD[((96-192))+rbx]
+	vpmuludq	ymm4,ymm10,YMMWORD[((96-128))+r9]
+	vpaddq	ymm4,ymm4,YMMWORD[((128-192))+rbx]
+	vpmuludq	ymm5,ymm10,YMMWORD[((128-128))+r9]
+	vpaddq	ymm5,ymm5,YMMWORD[((160-192))+rbx]
+	vpmuludq	ymm6,ymm10,YMMWORD[((160-128))+r9]
+	vpaddq	ymm6,ymm6,YMMWORD[((192-192))+rbx]
+	vpmuludq	ymm7,ymm10,YMMWORD[((192-128))+r9]
+	vpaddq	ymm7,ymm7,YMMWORD[((224-192))+rbx]
+	vpmuludq	ymm8,ymm10,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm10,QWORD[((64-128))+r15]
+	vpaddq	ymm8,ymm8,YMMWORD[((256-192))+rbx]
+$L$sqr_entry_1024:
+	vmovdqu	YMMWORD[(0-192)+rbx],ymm0
+	vmovdqu	YMMWORD[(32-192)+rbx],ymm1
+
+	vpmuludq	ymm12,ymm11,YMMWORD[((32-128))+rsi]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm14,ymm11,YMMWORD[((32-128))+r9]
+	vpaddq	ymm3,ymm3,ymm14
+	vpmuludq	ymm13,ymm11,YMMWORD[((64-128))+r9]
+	vpaddq	ymm4,ymm4,ymm13
+	vpmuludq	ymm12,ymm11,YMMWORD[((96-128))+r9]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm14,ymm11,YMMWORD[((128-128))+r9]
+	vpaddq	ymm6,ymm6,ymm14
+	vpmuludq	ymm13,ymm11,YMMWORD[((160-128))+r9]
+	vpaddq	ymm7,ymm7,ymm13
+	vpmuludq	ymm12,ymm11,YMMWORD[((192-128))+r9]
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm0,ymm11,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm11,QWORD[((96-128))+r15]
+	vpaddq	ymm0,ymm0,YMMWORD[((288-192))+rbx]
+
+	vmovdqu	YMMWORD[(64-192)+rbx],ymm2
+	vmovdqu	YMMWORD[(96-192)+rbx],ymm3
+
+	vpmuludq	ymm13,ymm10,YMMWORD[((64-128))+rsi]
+	vpaddq	ymm4,ymm4,ymm13
+	vpmuludq	ymm12,ymm10,YMMWORD[((64-128))+r9]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm14,ymm10,YMMWORD[((96-128))+r9]
+	vpaddq	ymm6,ymm6,ymm14
+	vpmuludq	ymm13,ymm10,YMMWORD[((128-128))+r9]
+	vpaddq	ymm7,ymm7,ymm13
+	vpmuludq	ymm12,ymm10,YMMWORD[((160-128))+r9]
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm14,ymm10,YMMWORD[((192-128))+r9]
+	vpaddq	ymm0,ymm0,ymm14
+	vpmuludq	ymm1,ymm10,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm10,QWORD[((128-128))+r15]
+	vpaddq	ymm1,ymm1,YMMWORD[((320-448))+r12]
+
+	vmovdqu	YMMWORD[(128-192)+rbx],ymm4
+	vmovdqu	YMMWORD[(160-192)+rbx],ymm5
+
+	vpmuludq	ymm12,ymm11,YMMWORD[((96-128))+rsi]
+	vpaddq	ymm6,ymm6,ymm12
+	vpmuludq	ymm14,ymm11,YMMWORD[((96-128))+r9]
+	vpaddq	ymm7,ymm7,ymm14
+	vpmuludq	ymm13,ymm11,YMMWORD[((128-128))+r9]
+	vpaddq	ymm8,ymm8,ymm13
+	vpmuludq	ymm12,ymm11,YMMWORD[((160-128))+r9]
+	vpaddq	ymm0,ymm0,ymm12
+	vpmuludq	ymm14,ymm11,YMMWORD[((192-128))+r9]
+	vpaddq	ymm1,ymm1,ymm14
+	vpmuludq	ymm2,ymm11,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm11,QWORD[((160-128))+r15]
+	vpaddq	ymm2,ymm2,YMMWORD[((352-448))+r12]
+
+	vmovdqu	YMMWORD[(192-192)+rbx],ymm6
+	vmovdqu	YMMWORD[(224-192)+rbx],ymm7
+
+	vpmuludq	ymm12,ymm10,YMMWORD[((128-128))+rsi]
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm14,ymm10,YMMWORD[((128-128))+r9]
+	vpaddq	ymm0,ymm0,ymm14
+	vpmuludq	ymm13,ymm10,YMMWORD[((160-128))+r9]
+	vpaddq	ymm1,ymm1,ymm13
+	vpmuludq	ymm12,ymm10,YMMWORD[((192-128))+r9]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm3,ymm10,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm10,QWORD[((192-128))+r15]
+	vpaddq	ymm3,ymm3,YMMWORD[((384-448))+r12]
+
+	vmovdqu	YMMWORD[(256-192)+rbx],ymm8
+	vmovdqu	YMMWORD[(288-192)+rbx],ymm0
+	lea	rbx,[8+rbx]
+
+	vpmuludq	ymm13,ymm11,YMMWORD[((160-128))+rsi]
+	vpaddq	ymm1,ymm1,ymm13
+	vpmuludq	ymm12,ymm11,YMMWORD[((160-128))+r9]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm14,ymm11,YMMWORD[((192-128))+r9]
+	vpaddq	ymm3,ymm3,ymm14
+	vpmuludq	ymm4,ymm11,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm11,QWORD[((224-128))+r15]
+	vpaddq	ymm4,ymm4,YMMWORD[((416-448))+r12]
+
+	vmovdqu	YMMWORD[(320-448)+r12],ymm1
+	vmovdqu	YMMWORD[(352-448)+r12],ymm2
+
+	vpmuludq	ymm12,ymm10,YMMWORD[((192-128))+rsi]
+	vpaddq	ymm3,ymm3,ymm12
+	vpmuludq	ymm14,ymm10,YMMWORD[((192-128))+r9]
+	vpbroadcastq	ymm0,QWORD[((256-128))+r15]
+	vpaddq	ymm4,ymm4,ymm14
+	vpmuludq	ymm5,ymm10,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm10,QWORD[((0+8-128))+r15]
+	vpaddq	ymm5,ymm5,YMMWORD[((448-448))+r12]
+
+	vmovdqu	YMMWORD[(384-448)+r12],ymm3
+	vmovdqu	YMMWORD[(416-448)+r12],ymm4
+	lea	r15,[8+r15]
+
+	vpmuludq	ymm12,ymm11,YMMWORD[((224-128))+rsi]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm6,ymm11,YMMWORD[((224-128))+r9]
+	vpaddq	ymm6,ymm6,YMMWORD[((480-448))+r12]
+
+	vpmuludq	ymm7,ymm0,YMMWORD[((256-128))+rsi]
+	vmovdqu	YMMWORD[(448-448)+r12],ymm5
+	vpaddq	ymm7,ymm7,YMMWORD[((512-448))+r12]
+	vmovdqu	YMMWORD[(480-448)+r12],ymm6
+	vmovdqu	YMMWORD[(512-448)+r12],ymm7
+	lea	r12,[8+r12]
+
+	dec	r14d
+	jnz	NEAR $L$OOP_SQR_1024
+
+	vmovdqu	ymm8,YMMWORD[256+rsp]
+	vmovdqu	ymm1,YMMWORD[288+rsp]
+	vmovdqu	ymm2,YMMWORD[320+rsp]
+	lea	rbx,[192+rsp]
+
+	vpsrlq	ymm14,ymm8,29
+	vpand	ymm8,ymm8,ymm15
+	vpsrlq	ymm11,ymm1,29
+	vpand	ymm1,ymm1,ymm15
+
+	vpermq	ymm14,ymm14,0x93
+	vpxor	ymm9,ymm9,ymm9
+	vpermq	ymm11,ymm11,0x93
+
+	vpblendd	ymm10,ymm14,ymm9,3
+	vpblendd	ymm14,ymm11,ymm14,3
+	vpaddq	ymm8,ymm8,ymm10
+	vpblendd	ymm11,ymm9,ymm11,3
+	vpaddq	ymm1,ymm1,ymm14
+	vpaddq	ymm2,ymm2,ymm11
+	vmovdqu	YMMWORD[(288-192)+rbx],ymm1
+	vmovdqu	YMMWORD[(320-192)+rbx],ymm2
+
+	mov	rax,QWORD[rsp]
+	mov	r10,QWORD[8+rsp]
+	mov	r11,QWORD[16+rsp]
+	mov	r12,QWORD[24+rsp]
+	vmovdqu	ymm1,YMMWORD[32+rsp]
+	vmovdqu	ymm2,YMMWORD[((64-192))+rbx]
+	vmovdqu	ymm3,YMMWORD[((96-192))+rbx]
+	vmovdqu	ymm4,YMMWORD[((128-192))+rbx]
+	vmovdqu	ymm5,YMMWORD[((160-192))+rbx]
+	vmovdqu	ymm6,YMMWORD[((192-192))+rbx]
+	vmovdqu	ymm7,YMMWORD[((224-192))+rbx]
+
+	mov	r9,rax
+	imul	eax,ecx
+	and	eax,0x1fffffff
+	vmovd	xmm12,eax
+
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+r13]
+	vpbroadcastq	ymm12,xmm12
+	add	r9,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+r13]
+	shr	r9,29
+	add	r10,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((16-128))+r13]
+	add	r10,r9
+	add	r11,rax
+	imul	rdx,QWORD[((24-128))+r13]
+	add	r12,rdx
+
+	mov	rax,r10
+	imul	eax,ecx
+	and	eax,0x1fffffff
+
+	mov	r14d,9
+	jmp	NEAR $L$OOP_REDUCE_1024
+
+ALIGN	32
+$L$OOP_REDUCE_1024:
+	vmovd	xmm13,eax
+	vpbroadcastq	ymm13,xmm13
+
+	vpmuludq	ymm10,ymm12,YMMWORD[((32-128))+r13]
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+r13]
+	vpaddq	ymm1,ymm1,ymm10
+	add	r10,rax
+	vpmuludq	ymm14,ymm12,YMMWORD[((64-128))+r13]
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+r13]
+	vpaddq	ymm2,ymm2,ymm14
+	vpmuludq	ymm11,ymm12,YMMWORD[((96-128))+r13]
+DB	0x67
+	add	r11,rax
+DB	0x67
+	mov	rax,rdx
+	imul	rax,QWORD[((16-128))+r13]
+	shr	r10,29
+	vpaddq	ymm3,ymm3,ymm11
+	vpmuludq	ymm10,ymm12,YMMWORD[((128-128))+r13]
+	add	r12,rax
+	add	r11,r10
+	vpaddq	ymm4,ymm4,ymm10
+	vpmuludq	ymm14,ymm12,YMMWORD[((160-128))+r13]
+	mov	rax,r11
+	imul	eax,ecx
+	vpaddq	ymm5,ymm5,ymm14
+	vpmuludq	ymm11,ymm12,YMMWORD[((192-128))+r13]
+	and	eax,0x1fffffff
+	vpaddq	ymm6,ymm6,ymm11
+	vpmuludq	ymm10,ymm12,YMMWORD[((224-128))+r13]
+	vpaddq	ymm7,ymm7,ymm10
+	vpmuludq	ymm14,ymm12,YMMWORD[((256-128))+r13]
+	vmovd	xmm12,eax
+
+	vpaddq	ymm8,ymm8,ymm14
+
+	vpbroadcastq	ymm12,xmm12
+
+	vpmuludq	ymm11,ymm13,YMMWORD[((32-8-128))+r13]
+	vmovdqu	ymm14,YMMWORD[((96-8-128))+r13]
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+r13]
+	vpaddq	ymm1,ymm1,ymm11
+	vpmuludq	ymm10,ymm13,YMMWORD[((64-8-128))+r13]
+	vmovdqu	ymm11,YMMWORD[((128-8-128))+r13]
+	add	r11,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+r13]
+	vpaddq	ymm2,ymm2,ymm10
+	add	rax,r12
+	shr	r11,29
+	vpmuludq	ymm14,ymm14,ymm13
+	vmovdqu	ymm10,YMMWORD[((160-8-128))+r13]
+	add	rax,r11
+	vpaddq	ymm3,ymm3,ymm14
+	vpmuludq	ymm11,ymm11,ymm13
+	vmovdqu	ymm14,YMMWORD[((192-8-128))+r13]
+DB	0x67
+	mov	r12,rax
+	imul	eax,ecx
+	vpaddq	ymm4,ymm4,ymm11
+	vpmuludq	ymm10,ymm10,ymm13
+DB	0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00
+	and	eax,0x1fffffff
+	vpaddq	ymm5,ymm5,ymm10
+	vpmuludq	ymm14,ymm14,ymm13
+	vmovdqu	ymm10,YMMWORD[((256-8-128))+r13]
+	vpaddq	ymm6,ymm6,ymm14
+	vpmuludq	ymm11,ymm11,ymm13
+	vmovdqu	ymm9,YMMWORD[((288-8-128))+r13]
+	vmovd	xmm0,eax
+	imul	rax,QWORD[((-128))+r13]
+	vpaddq	ymm7,ymm7,ymm11
+	vpmuludq	ymm10,ymm10,ymm13
+	vmovdqu	ymm14,YMMWORD[((32-16-128))+r13]
+	vpbroadcastq	ymm0,xmm0
+	vpaddq	ymm8,ymm8,ymm10
+	vpmuludq	ymm9,ymm9,ymm13
+	vmovdqu	ymm11,YMMWORD[((64-16-128))+r13]
+	add	r12,rax
+
+	vmovdqu	ymm13,YMMWORD[((32-24-128))+r13]
+	vpmuludq	ymm14,ymm14,ymm12
+	vmovdqu	ymm10,YMMWORD[((96-16-128))+r13]
+	vpaddq	ymm1,ymm1,ymm14
+	vpmuludq	ymm13,ymm13,ymm0
+	vpmuludq	ymm11,ymm11,ymm12
+DB	0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff
+	vpaddq	ymm13,ymm13,ymm1
+	vpaddq	ymm2,ymm2,ymm11
+	vpmuludq	ymm10,ymm10,ymm12
+	vmovdqu	ymm11,YMMWORD[((160-16-128))+r13]
+DB	0x67
+	vmovq	rax,xmm13
+	vmovdqu	YMMWORD[rsp],ymm13
+	vpaddq	ymm3,ymm3,ymm10
+	vpmuludq	ymm14,ymm14,ymm12
+	vmovdqu	ymm10,YMMWORD[((192-16-128))+r13]
+	vpaddq	ymm4,ymm4,ymm14
+	vpmuludq	ymm11,ymm11,ymm12
+	vmovdqu	ymm14,YMMWORD[((224-16-128))+r13]
+	vpaddq	ymm5,ymm5,ymm11
+	vpmuludq	ymm10,ymm10,ymm12
+	vmovdqu	ymm11,YMMWORD[((256-16-128))+r13]
+	vpaddq	ymm6,ymm6,ymm10
+	vpmuludq	ymm14,ymm14,ymm12
+	shr	r12,29
+	vmovdqu	ymm10,YMMWORD[((288-16-128))+r13]
+	add	rax,r12
+	vpaddq	ymm7,ymm7,ymm14
+	vpmuludq	ymm11,ymm11,ymm12
+
+	mov	r9,rax
+	imul	eax,ecx
+	vpaddq	ymm8,ymm8,ymm11
+	vpmuludq	ymm10,ymm10,ymm12
+	and	eax,0x1fffffff
+	vmovd	xmm12,eax
+	vmovdqu	ymm11,YMMWORD[((96-24-128))+r13]
+DB	0x67
+	vpaddq	ymm9,ymm9,ymm10
+	vpbroadcastq	ymm12,xmm12
+
+	vpmuludq	ymm14,ymm0,YMMWORD[((64-24-128))+r13]
+	vmovdqu	ymm10,YMMWORD[((128-24-128))+r13]
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+r13]
+	mov	r10,QWORD[8+rsp]
+	vpaddq	ymm1,ymm2,ymm14
+	vpmuludq	ymm11,ymm11,ymm0
+	vmovdqu	ymm14,YMMWORD[((160-24-128))+r13]
+	add	r9,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+r13]
+DB	0x67
+	shr	r9,29
+	mov	r11,QWORD[16+rsp]
+	vpaddq	ymm2,ymm3,ymm11
+	vpmuludq	ymm10,ymm10,ymm0
+	vmovdqu	ymm11,YMMWORD[((192-24-128))+r13]
+	add	r10,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((16-128))+r13]
+	vpaddq	ymm3,ymm4,ymm10
+	vpmuludq	ymm14,ymm14,ymm0
+	vmovdqu	ymm10,YMMWORD[((224-24-128))+r13]
+	imul	rdx,QWORD[((24-128))+r13]
+	add	r11,rax
+	lea	rax,[r10*1+r9]
+	vpaddq	ymm4,ymm5,ymm14
+	vpmuludq	ymm11,ymm11,ymm0
+	vmovdqu	ymm14,YMMWORD[((256-24-128))+r13]
+	mov	r10,rax
+	imul	eax,ecx
+	vpmuludq	ymm10,ymm10,ymm0
+	vpaddq	ymm5,ymm6,ymm11
+	vmovdqu	ymm11,YMMWORD[((288-24-128))+r13]
+	and	eax,0x1fffffff
+	vpaddq	ymm6,ymm7,ymm10
+	vpmuludq	ymm14,ymm14,ymm0
+	add	rdx,QWORD[24+rsp]
+	vpaddq	ymm7,ymm8,ymm14
+	vpmuludq	ymm11,ymm11,ymm0
+	vpaddq	ymm8,ymm9,ymm11
+	vmovq	xmm9,r12
+	mov	r12,rdx
+
+	dec	r14d
+	jnz	NEAR $L$OOP_REDUCE_1024
+	lea	r12,[448+rsp]
+	vpaddq	ymm0,ymm13,ymm9
+	vpxor	ymm9,ymm9,ymm9
+
+	vpaddq	ymm0,ymm0,YMMWORD[((288-192))+rbx]
+	vpaddq	ymm1,ymm1,YMMWORD[((320-448))+r12]
+	vpaddq	ymm2,ymm2,YMMWORD[((352-448))+r12]
+	vpaddq	ymm3,ymm3,YMMWORD[((384-448))+r12]
+	vpaddq	ymm4,ymm4,YMMWORD[((416-448))+r12]
+	vpaddq	ymm5,ymm5,YMMWORD[((448-448))+r12]
+	vpaddq	ymm6,ymm6,YMMWORD[((480-448))+r12]
+	vpaddq	ymm7,ymm7,YMMWORD[((512-448))+r12]
+	vpaddq	ymm8,ymm8,YMMWORD[((544-448))+r12]
+
+	vpsrlq	ymm14,ymm0,29
+	vpand	ymm0,ymm0,ymm15
+	vpsrlq	ymm11,ymm1,29
+	vpand	ymm1,ymm1,ymm15
+	vpsrlq	ymm12,ymm2,29
+	vpermq	ymm14,ymm14,0x93
+	vpand	ymm2,ymm2,ymm15
+	vpsrlq	ymm13,ymm3,29
+	vpermq	ymm11,ymm11,0x93
+	vpand	ymm3,ymm3,ymm15
+	vpermq	ymm12,ymm12,0x93
+
+	vpblendd	ymm10,ymm14,ymm9,3
+	vpermq	ymm13,ymm13,0x93
+	vpblendd	ymm14,ymm11,ymm14,3
+	vpaddq	ymm0,ymm0,ymm10
+	vpblendd	ymm11,ymm12,ymm11,3
+	vpaddq	ymm1,ymm1,ymm14
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm2,ymm2,ymm11
+	vpblendd	ymm13,ymm9,ymm13,3
+	vpaddq	ymm3,ymm3,ymm12
+	vpaddq	ymm4,ymm4,ymm13
+
+	vpsrlq	ymm14,ymm0,29
+	vpand	ymm0,ymm0,ymm15
+	vpsrlq	ymm11,ymm1,29
+	vpand	ymm1,ymm1,ymm15
+	vpsrlq	ymm12,ymm2,29
+	vpermq	ymm14,ymm14,0x93
+	vpand	ymm2,ymm2,ymm15
+	vpsrlq	ymm13,ymm3,29
+	vpermq	ymm11,ymm11,0x93
+	vpand	ymm3,ymm3,ymm15
+	vpermq	ymm12,ymm12,0x93
+
+	vpblendd	ymm10,ymm14,ymm9,3
+	vpermq	ymm13,ymm13,0x93
+	vpblendd	ymm14,ymm11,ymm14,3
+	vpaddq	ymm0,ymm0,ymm10
+	vpblendd	ymm11,ymm12,ymm11,3
+	vpaddq	ymm1,ymm1,ymm14
+	vmovdqu	YMMWORD[(0-128)+rdi],ymm0
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm2,ymm2,ymm11
+	vmovdqu	YMMWORD[(32-128)+rdi],ymm1
+	vpblendd	ymm13,ymm9,ymm13,3
+	vpaddq	ymm3,ymm3,ymm12
+	vmovdqu	YMMWORD[(64-128)+rdi],ymm2
+	vpaddq	ymm4,ymm4,ymm13
+	vmovdqu	YMMWORD[(96-128)+rdi],ymm3
+	vpsrlq	ymm14,ymm4,29
+	vpand	ymm4,ymm4,ymm15
+	vpsrlq	ymm11,ymm5,29
+	vpand	ymm5,ymm5,ymm15
+	vpsrlq	ymm12,ymm6,29
+	vpermq	ymm14,ymm14,0x93
+	vpand	ymm6,ymm6,ymm15
+	vpsrlq	ymm13,ymm7,29
+	vpermq	ymm11,ymm11,0x93
+	vpand	ymm7,ymm7,ymm15
+	vpsrlq	ymm0,ymm8,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm8,ymm8,ymm15
+	vpermq	ymm13,ymm13,0x93
+
+	vpblendd	ymm10,ymm14,ymm9,3
+	vpermq	ymm0,ymm0,0x93
+	vpblendd	ymm14,ymm11,ymm14,3
+	vpaddq	ymm4,ymm4,ymm10
+	vpblendd	ymm11,ymm12,ymm11,3
+	vpaddq	ymm5,ymm5,ymm14
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm6,ymm6,ymm11
+	vpblendd	ymm13,ymm0,ymm13,3
+	vpaddq	ymm7,ymm7,ymm12
+	vpaddq	ymm8,ymm8,ymm13
+
+	vpsrlq	ymm14,ymm4,29
+	vpand	ymm4,ymm4,ymm15
+	vpsrlq	ymm11,ymm5,29
+	vpand	ymm5,ymm5,ymm15
+	vpsrlq	ymm12,ymm6,29
+	vpermq	ymm14,ymm14,0x93
+	vpand	ymm6,ymm6,ymm15
+	vpsrlq	ymm13,ymm7,29
+	vpermq	ymm11,ymm11,0x93
+	vpand	ymm7,ymm7,ymm15
+	vpsrlq	ymm0,ymm8,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm8,ymm8,ymm15
+	vpermq	ymm13,ymm13,0x93
+
+	vpblendd	ymm10,ymm14,ymm9,3
+	vpermq	ymm0,ymm0,0x93
+	vpblendd	ymm14,ymm11,ymm14,3
+	vpaddq	ymm4,ymm4,ymm10
+	vpblendd	ymm11,ymm12,ymm11,3
+	vpaddq	ymm5,ymm5,ymm14
+	vmovdqu	YMMWORD[(128-128)+rdi],ymm4
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm6,ymm6,ymm11
+	vmovdqu	YMMWORD[(160-128)+rdi],ymm5
+	vpblendd	ymm13,ymm0,ymm13,3
+	vpaddq	ymm7,ymm7,ymm12
+	vmovdqu	YMMWORD[(192-128)+rdi],ymm6
+	vpaddq	ymm8,ymm8,ymm13
+	vmovdqu	YMMWORD[(224-128)+rdi],ymm7
+	vmovdqu	YMMWORD[(256-128)+rdi],ymm8
+
+	mov	rsi,rdi
+	dec	r8d
+	jne	NEAR $L$OOP_GRANDE_SQR_1024
+
+	vzeroall
+	mov	rax,rbp
+
+$L$sqr_1024_in_tail:
+	movaps	xmm6,XMMWORD[((-216))+rax]
+	movaps	xmm7,XMMWORD[((-200))+rax]
+	movaps	xmm8,XMMWORD[((-184))+rax]
+	movaps	xmm9,XMMWORD[((-168))+rax]
+	movaps	xmm10,XMMWORD[((-152))+rax]
+	movaps	xmm11,XMMWORD[((-136))+rax]
+	movaps	xmm12,XMMWORD[((-120))+rax]
+	movaps	xmm13,XMMWORD[((-104))+rax]
+	movaps	xmm14,XMMWORD[((-88))+rax]
+	movaps	xmm15,XMMWORD[((-72))+rax]
+	mov	r15,QWORD[((-48))+rax]
+
+	mov	r14,QWORD[((-40))+rax]
+
+	mov	r13,QWORD[((-32))+rax]
+
+	mov	r12,QWORD[((-24))+rax]
+
+	mov	rbp,QWORD[((-16))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+
+	lea	rsp,[rax]
+
+$L$sqr_1024_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_rsaz_1024_sqr_avx2:
+global	rsaz_1024_mul_avx2
+
+ALIGN	64
+rsaz_1024_mul_avx2:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_rsaz_1024_mul_avx2:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	lea	rax,[rsp]
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	vzeroupper
+	lea	rsp,[((-168))+rsp]
+	vmovaps	XMMWORD[(-216)+rax],xmm6
+	vmovaps	XMMWORD[(-200)+rax],xmm7
+	vmovaps	XMMWORD[(-184)+rax],xmm8
+	vmovaps	XMMWORD[(-168)+rax],xmm9
+	vmovaps	XMMWORD[(-152)+rax],xmm10
+	vmovaps	XMMWORD[(-136)+rax],xmm11
+	vmovaps	XMMWORD[(-120)+rax],xmm12
+	vmovaps	XMMWORD[(-104)+rax],xmm13
+	vmovaps	XMMWORD[(-88)+rax],xmm14
+	vmovaps	XMMWORD[(-72)+rax],xmm15
+$L$mul_1024_body:
+	mov	rbp,rax
+
+	vzeroall
+	mov	r13,rdx
+	sub	rsp,64
+
+
+
+
+
+
+DB	0x67,0x67
+	mov	r15,rsi
+	and	r15,4095
+	add	r15,32*10
+	shr	r15,12
+	mov	r15,rsi
+	cmovnz	rsi,r13
+	cmovnz	r13,r15
+
+	mov	r15,rcx
+	sub	rsi,-128
+	sub	rcx,-128
+	sub	rdi,-128
+
+	and	r15,4095
+	add	r15,32*10
+DB	0x67,0x67
+	shr	r15,12
+	jz	NEAR $L$mul_1024_no_n_copy
+
+
+
+
+
+	sub	rsp,32*10
+	vmovdqu	ymm0,YMMWORD[((0-128))+rcx]
+	and	rsp,-512
+	vmovdqu	ymm1,YMMWORD[((32-128))+rcx]
+	vmovdqu	ymm2,YMMWORD[((64-128))+rcx]
+	vmovdqu	ymm3,YMMWORD[((96-128))+rcx]
+	vmovdqu	ymm4,YMMWORD[((128-128))+rcx]
+	vmovdqu	ymm5,YMMWORD[((160-128))+rcx]
+	vmovdqu	ymm6,YMMWORD[((192-128))+rcx]
+	vmovdqu	ymm7,YMMWORD[((224-128))+rcx]
+	vmovdqu	ymm8,YMMWORD[((256-128))+rcx]
+	lea	rcx,[((64+128))+rsp]
+	vmovdqu	YMMWORD[(0-128)+rcx],ymm0
+	vpxor	ymm0,ymm0,ymm0
+	vmovdqu	YMMWORD[(32-128)+rcx],ymm1
+	vpxor	ymm1,ymm1,ymm1
+	vmovdqu	YMMWORD[(64-128)+rcx],ymm2
+	vpxor	ymm2,ymm2,ymm2
+	vmovdqu	YMMWORD[(96-128)+rcx],ymm3
+	vpxor	ymm3,ymm3,ymm3
+	vmovdqu	YMMWORD[(128-128)+rcx],ymm4
+	vpxor	ymm4,ymm4,ymm4
+	vmovdqu	YMMWORD[(160-128)+rcx],ymm5
+	vpxor	ymm5,ymm5,ymm5
+	vmovdqu	YMMWORD[(192-128)+rcx],ymm6
+	vpxor	ymm6,ymm6,ymm6
+	vmovdqu	YMMWORD[(224-128)+rcx],ymm7
+	vpxor	ymm7,ymm7,ymm7
+	vmovdqu	YMMWORD[(256-128)+rcx],ymm8
+	vmovdqa	ymm8,ymm0
+	vmovdqu	YMMWORD[(288-128)+rcx],ymm9
+$L$mul_1024_no_n_copy:
+	and	rsp,-64
+
+	mov	rbx,QWORD[r13]
+	vpbroadcastq	ymm10,QWORD[r13]
+	vmovdqu	YMMWORD[rsp],ymm0
+	xor	r9,r9
+DB	0x67
+	xor	r10,r10
+	xor	r11,r11
+	xor	r12,r12
+
+	vmovdqu	ymm15,YMMWORD[$L$and_mask]
+	mov	r14d,9
+	vmovdqu	YMMWORD[(288-128)+rdi],ymm9
+	jmp	NEAR $L$oop_mul_1024
+
+ALIGN	32
+$L$oop_mul_1024:
+	vpsrlq	ymm9,ymm3,29
+	mov	rax,rbx
+	imul	rax,QWORD[((-128))+rsi]
+	add	rax,r9
+	mov	r10,rbx
+	imul	r10,QWORD[((8-128))+rsi]
+	add	r10,QWORD[8+rsp]
+
+	mov	r9,rax
+	imul	eax,r8d
+	and	eax,0x1fffffff
+
+	mov	r11,rbx
+	imul	r11,QWORD[((16-128))+rsi]
+	add	r11,QWORD[16+rsp]
+
+	mov	r12,rbx
+	imul	r12,QWORD[((24-128))+rsi]
+	add	r12,QWORD[24+rsp]
+	vpmuludq	ymm0,ymm10,YMMWORD[((32-128))+rsi]
+	vmovd	xmm11,eax
+	vpaddq	ymm1,ymm1,ymm0
+	vpmuludq	ymm12,ymm10,YMMWORD[((64-128))+rsi]
+	vpbroadcastq	ymm11,xmm11
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm13,ymm10,YMMWORD[((96-128))+rsi]
+	vpand	ymm3,ymm3,ymm15
+	vpaddq	ymm3,ymm3,ymm13
+	vpmuludq	ymm0,ymm10,YMMWORD[((128-128))+rsi]
+	vpaddq	ymm4,ymm4,ymm0
+	vpmuludq	ymm12,ymm10,YMMWORD[((160-128))+rsi]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm13,ymm10,YMMWORD[((192-128))+rsi]
+	vpaddq	ymm6,ymm6,ymm13
+	vpmuludq	ymm0,ymm10,YMMWORD[((224-128))+rsi]
+	vpermq	ymm9,ymm9,0x93
+	vpaddq	ymm7,ymm7,ymm0
+	vpmuludq	ymm12,ymm10,YMMWORD[((256-128))+rsi]
+	vpbroadcastq	ymm10,QWORD[8+r13]
+	vpaddq	ymm8,ymm8,ymm12
+
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+rcx]
+	add	r9,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+rcx]
+	add	r10,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((16-128))+rcx]
+	add	r11,rax
+	shr	r9,29
+	imul	rdx,QWORD[((24-128))+rcx]
+	add	r12,rdx
+	add	r10,r9
+
+	vpmuludq	ymm13,ymm11,YMMWORD[((32-128))+rcx]
+	vmovq	rbx,xmm10
+	vpaddq	ymm1,ymm1,ymm13
+	vpmuludq	ymm0,ymm11,YMMWORD[((64-128))+rcx]
+	vpaddq	ymm2,ymm2,ymm0
+	vpmuludq	ymm12,ymm11,YMMWORD[((96-128))+rcx]
+	vpaddq	ymm3,ymm3,ymm12
+	vpmuludq	ymm13,ymm11,YMMWORD[((128-128))+rcx]
+	vpaddq	ymm4,ymm4,ymm13
+	vpmuludq	ymm0,ymm11,YMMWORD[((160-128))+rcx]
+	vpaddq	ymm5,ymm5,ymm0
+	vpmuludq	ymm12,ymm11,YMMWORD[((192-128))+rcx]
+	vpaddq	ymm6,ymm6,ymm12
+	vpmuludq	ymm13,ymm11,YMMWORD[((224-128))+rcx]
+	vpblendd	ymm12,ymm9,ymm14,3
+	vpaddq	ymm7,ymm7,ymm13
+	vpmuludq	ymm0,ymm11,YMMWORD[((256-128))+rcx]
+	vpaddq	ymm3,ymm3,ymm12
+	vpaddq	ymm8,ymm8,ymm0
+
+	mov	rax,rbx
+	imul	rax,QWORD[((-128))+rsi]
+	add	r10,rax
+	vmovdqu	ymm12,YMMWORD[((-8+32-128))+rsi]
+	mov	rax,rbx
+	imul	rax,QWORD[((8-128))+rsi]
+	add	r11,rax
+	vmovdqu	ymm13,YMMWORD[((-8+64-128))+rsi]
+
+	mov	rax,r10
+	vpblendd	ymm9,ymm9,ymm14,0xfc
+	imul	eax,r8d
+	vpaddq	ymm4,ymm4,ymm9
+	and	eax,0x1fffffff
+
+	imul	rbx,QWORD[((16-128))+rsi]
+	add	r12,rbx
+	vpmuludq	ymm12,ymm12,ymm10
+	vmovd	xmm11,eax
+	vmovdqu	ymm0,YMMWORD[((-8+96-128))+rsi]
+	vpaddq	ymm1,ymm1,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vpbroadcastq	ymm11,xmm11
+	vmovdqu	ymm12,YMMWORD[((-8+128-128))+rsi]
+	vpaddq	ymm2,ymm2,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-8+160-128))+rsi]
+	vpaddq	ymm3,ymm3,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vmovdqu	ymm0,YMMWORD[((-8+192-128))+rsi]
+	vpaddq	ymm4,ymm4,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vmovdqu	ymm12,YMMWORD[((-8+224-128))+rsi]
+	vpaddq	ymm5,ymm5,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-8+256-128))+rsi]
+	vpaddq	ymm6,ymm6,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vmovdqu	ymm9,YMMWORD[((-8+288-128))+rsi]
+	vpaddq	ymm7,ymm7,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vpaddq	ymm8,ymm8,ymm13
+	vpmuludq	ymm9,ymm9,ymm10
+	vpbroadcastq	ymm10,QWORD[16+r13]
+
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+rcx]
+	add	r10,rax
+	vmovdqu	ymm0,YMMWORD[((-8+32-128))+rcx]
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+rcx]
+	add	r11,rax
+	vmovdqu	ymm12,YMMWORD[((-8+64-128))+rcx]
+	shr	r10,29
+	imul	rdx,QWORD[((16-128))+rcx]
+	add	r12,rdx
+	add	r11,r10
+
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovq	rbx,xmm10
+	vmovdqu	ymm13,YMMWORD[((-8+96-128))+rcx]
+	vpaddq	ymm1,ymm1,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-8+128-128))+rcx]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-8+160-128))+rcx]
+	vpaddq	ymm3,ymm3,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-8+192-128))+rcx]
+	vpaddq	ymm4,ymm4,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-8+224-128))+rcx]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-8+256-128))+rcx]
+	vpaddq	ymm6,ymm6,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-8+288-128))+rcx]
+	vpaddq	ymm7,ymm7,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vpaddq	ymm9,ymm9,ymm13
+
+	vmovdqu	ymm0,YMMWORD[((-16+32-128))+rsi]
+	mov	rax,rbx
+	imul	rax,QWORD[((-128))+rsi]
+	add	rax,r11
+
+	vmovdqu	ymm12,YMMWORD[((-16+64-128))+rsi]
+	mov	r11,rax
+	imul	eax,r8d
+	and	eax,0x1fffffff
+
+	imul	rbx,QWORD[((8-128))+rsi]
+	add	r12,rbx
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovd	xmm11,eax
+	vmovdqu	ymm13,YMMWORD[((-16+96-128))+rsi]
+	vpaddq	ymm1,ymm1,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vpbroadcastq	ymm11,xmm11
+	vmovdqu	ymm0,YMMWORD[((-16+128-128))+rsi]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vmovdqu	ymm12,YMMWORD[((-16+160-128))+rsi]
+	vpaddq	ymm3,ymm3,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-16+192-128))+rsi]
+	vpaddq	ymm4,ymm4,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vmovdqu	ymm0,YMMWORD[((-16+224-128))+rsi]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vmovdqu	ymm12,YMMWORD[((-16+256-128))+rsi]
+	vpaddq	ymm6,ymm6,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-16+288-128))+rsi]
+	vpaddq	ymm7,ymm7,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vpbroadcastq	ymm10,QWORD[24+r13]
+	vpaddq	ymm9,ymm9,ymm13
+
+	vmovdqu	ymm0,YMMWORD[((-16+32-128))+rcx]
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+rcx]
+	add	r11,rax
+	vmovdqu	ymm12,YMMWORD[((-16+64-128))+rcx]
+	imul	rdx,QWORD[((8-128))+rcx]
+	add	r12,rdx
+	shr	r11,29
+
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovq	rbx,xmm10
+	vmovdqu	ymm13,YMMWORD[((-16+96-128))+rcx]
+	vpaddq	ymm1,ymm1,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-16+128-128))+rcx]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-16+160-128))+rcx]
+	vpaddq	ymm3,ymm3,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-16+192-128))+rcx]
+	vpaddq	ymm4,ymm4,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-16+224-128))+rcx]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-16+256-128))+rcx]
+	vpaddq	ymm6,ymm6,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-16+288-128))+rcx]
+	vpaddq	ymm7,ymm7,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-24+32-128))+rsi]
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-24+64-128))+rsi]
+	vpaddq	ymm9,ymm9,ymm13
+
+	add	r12,r11
+	imul	rbx,QWORD[((-128))+rsi]
+	add	r12,rbx
+
+	mov	rax,r12
+	imul	eax,r8d
+	and	eax,0x1fffffff
+
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovd	xmm11,eax
+	vmovdqu	ymm13,YMMWORD[((-24+96-128))+rsi]
+	vpaddq	ymm1,ymm1,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vpbroadcastq	ymm11,xmm11
+	vmovdqu	ymm0,YMMWORD[((-24+128-128))+rsi]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vmovdqu	ymm12,YMMWORD[((-24+160-128))+rsi]
+	vpaddq	ymm3,ymm3,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-24+192-128))+rsi]
+	vpaddq	ymm4,ymm4,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vmovdqu	ymm0,YMMWORD[((-24+224-128))+rsi]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vmovdqu	ymm12,YMMWORD[((-24+256-128))+rsi]
+	vpaddq	ymm6,ymm6,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-24+288-128))+rsi]
+	vpaddq	ymm7,ymm7,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vpbroadcastq	ymm10,QWORD[32+r13]
+	vpaddq	ymm9,ymm9,ymm13
+	add	r13,32
+
+	vmovdqu	ymm0,YMMWORD[((-24+32-128))+rcx]
+	imul	rax,QWORD[((-128))+rcx]
+	add	r12,rax
+	shr	r12,29
+
+	vmovdqu	ymm12,YMMWORD[((-24+64-128))+rcx]
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovq	rbx,xmm10
+	vmovdqu	ymm13,YMMWORD[((-24+96-128))+rcx]
+	vpaddq	ymm0,ymm1,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	YMMWORD[rsp],ymm0
+	vpaddq	ymm1,ymm2,ymm12
+	vmovdqu	ymm0,YMMWORD[((-24+128-128))+rcx]
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-24+160-128))+rcx]
+	vpaddq	ymm2,ymm3,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-24+192-128))+rcx]
+	vpaddq	ymm3,ymm4,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-24+224-128))+rcx]
+	vpaddq	ymm4,ymm5,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-24+256-128))+rcx]
+	vpaddq	ymm5,ymm6,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-24+288-128))+rcx]
+	mov	r9,r12
+	vpaddq	ymm6,ymm7,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	add	r9,QWORD[rsp]
+	vpaddq	ymm7,ymm8,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovq	xmm12,r12
+	vpaddq	ymm8,ymm9,ymm13
+
+	dec	r14d
+	jnz	NEAR $L$oop_mul_1024
+	vpaddq	ymm0,ymm12,YMMWORD[rsp]
+
+	vpsrlq	ymm12,ymm0,29
+	vpand	ymm0,ymm0,ymm15
+	vpsrlq	ymm13,ymm1,29
+	vpand	ymm1,ymm1,ymm15
+	vpsrlq	ymm10,ymm2,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm2,ymm2,ymm15
+	vpsrlq	ymm11,ymm3,29
+	vpermq	ymm13,ymm13,0x93
+	vpand	ymm3,ymm3,ymm15
+
+	vpblendd	ymm9,ymm12,ymm14,3
+	vpermq	ymm10,ymm10,0x93
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpermq	ymm11,ymm11,0x93
+	vpaddq	ymm0,ymm0,ymm9
+	vpblendd	ymm13,ymm10,ymm13,3
+	vpaddq	ymm1,ymm1,ymm12
+	vpblendd	ymm10,ymm11,ymm10,3
+	vpaddq	ymm2,ymm2,ymm13
+	vpblendd	ymm11,ymm14,ymm11,3
+	vpaddq	ymm3,ymm3,ymm10
+	vpaddq	ymm4,ymm4,ymm11
+
+	vpsrlq	ymm12,ymm0,29
+	vpand	ymm0,ymm0,ymm15
+	vpsrlq	ymm13,ymm1,29
+	vpand	ymm1,ymm1,ymm15
+	vpsrlq	ymm10,ymm2,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm2,ymm2,ymm15
+	vpsrlq	ymm11,ymm3,29
+	vpermq	ymm13,ymm13,0x93
+	vpand	ymm3,ymm3,ymm15
+	vpermq	ymm10,ymm10,0x93
+
+	vpblendd	ymm9,ymm12,ymm14,3
+	vpermq	ymm11,ymm11,0x93
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm0,ymm0,ymm9
+	vpblendd	ymm13,ymm10,ymm13,3
+	vpaddq	ymm1,ymm1,ymm12
+	vpblendd	ymm10,ymm11,ymm10,3
+	vpaddq	ymm2,ymm2,ymm13
+	vpblendd	ymm11,ymm14,ymm11,3
+	vpaddq	ymm3,ymm3,ymm10
+	vpaddq	ymm4,ymm4,ymm11
+
+	vmovdqu	YMMWORD[(0-128)+rdi],ymm0
+	vmovdqu	YMMWORD[(32-128)+rdi],ymm1
+	vmovdqu	YMMWORD[(64-128)+rdi],ymm2
+	vmovdqu	YMMWORD[(96-128)+rdi],ymm3
+	vpsrlq	ymm12,ymm4,29
+	vpand	ymm4,ymm4,ymm15
+	vpsrlq	ymm13,ymm5,29
+	vpand	ymm5,ymm5,ymm15
+	vpsrlq	ymm10,ymm6,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm6,ymm6,ymm15
+	vpsrlq	ymm11,ymm7,29
+	vpermq	ymm13,ymm13,0x93
+	vpand	ymm7,ymm7,ymm15
+	vpsrlq	ymm0,ymm8,29
+	vpermq	ymm10,ymm10,0x93
+	vpand	ymm8,ymm8,ymm15
+	vpermq	ymm11,ymm11,0x93
+
+	vpblendd	ymm9,ymm12,ymm14,3
+	vpermq	ymm0,ymm0,0x93
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm4,ymm4,ymm9
+	vpblendd	ymm13,ymm10,ymm13,3
+	vpaddq	ymm5,ymm5,ymm12
+	vpblendd	ymm10,ymm11,ymm10,3
+	vpaddq	ymm6,ymm6,ymm13
+	vpblendd	ymm11,ymm0,ymm11,3
+	vpaddq	ymm7,ymm7,ymm10
+	vpaddq	ymm8,ymm8,ymm11
+
+	vpsrlq	ymm12,ymm4,29
+	vpand	ymm4,ymm4,ymm15
+	vpsrlq	ymm13,ymm5,29
+	vpand	ymm5,ymm5,ymm15
+	vpsrlq	ymm10,ymm6,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm6,ymm6,ymm15
+	vpsrlq	ymm11,ymm7,29
+	vpermq	ymm13,ymm13,0x93
+	vpand	ymm7,ymm7,ymm15
+	vpsrlq	ymm0,ymm8,29
+	vpermq	ymm10,ymm10,0x93
+	vpand	ymm8,ymm8,ymm15
+	vpermq	ymm11,ymm11,0x93
+
+	vpblendd	ymm9,ymm12,ymm14,3
+	vpermq	ymm0,ymm0,0x93
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm4,ymm4,ymm9
+	vpblendd	ymm13,ymm10,ymm13,3
+	vpaddq	ymm5,ymm5,ymm12
+	vpblendd	ymm10,ymm11,ymm10,3
+	vpaddq	ymm6,ymm6,ymm13
+	vpblendd	ymm11,ymm0,ymm11,3
+	vpaddq	ymm7,ymm7,ymm10
+	vpaddq	ymm8,ymm8,ymm11
+
+	vmovdqu	YMMWORD[(128-128)+rdi],ymm4
+	vmovdqu	YMMWORD[(160-128)+rdi],ymm5
+	vmovdqu	YMMWORD[(192-128)+rdi],ymm6
+	vmovdqu	YMMWORD[(224-128)+rdi],ymm7
+	vmovdqu	YMMWORD[(256-128)+rdi],ymm8
+	vzeroupper
+
+	mov	rax,rbp
+
+$L$mul_1024_in_tail:
+	movaps	xmm6,XMMWORD[((-216))+rax]
+	movaps	xmm7,XMMWORD[((-200))+rax]
+	movaps	xmm8,XMMWORD[((-184))+rax]
+	movaps	xmm9,XMMWORD[((-168))+rax]
+	movaps	xmm10,XMMWORD[((-152))+rax]
+	movaps	xmm11,XMMWORD[((-136))+rax]
+	movaps	xmm12,XMMWORD[((-120))+rax]
+	movaps	xmm13,XMMWORD[((-104))+rax]
+	movaps	xmm14,XMMWORD[((-88))+rax]
+	movaps	xmm15,XMMWORD[((-72))+rax]
+	mov	r15,QWORD[((-48))+rax]
+
+	mov	r14,QWORD[((-40))+rax]
+
+	mov	r13,QWORD[((-32))+rax]
+
+	mov	r12,QWORD[((-24))+rax]
+
+	mov	rbp,QWORD[((-16))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+
+	lea	rsp,[rax]
+
+$L$mul_1024_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_rsaz_1024_mul_avx2:
+global	rsaz_1024_red2norm_avx2
+
+ALIGN	32
+rsaz_1024_red2norm_avx2:
+	sub	rdx,-128
+	xor	rax,rax
+	mov	r8,QWORD[((-128))+rdx]
+	mov	r9,QWORD[((-120))+rdx]
+	mov	r10,QWORD[((-112))+rdx]
+	shl	r8,0
+	shl	r9,29
+	mov	r11,r10
+	shl	r10,58
+	shr	r11,6
+	add	rax,r8
+	add	rax,r9
+	add	rax,r10
+	adc	r11,0
+	mov	QWORD[rcx],rax
+	mov	rax,r11
+	mov	r8,QWORD[((-104))+rdx]
+	mov	r9,QWORD[((-96))+rdx]
+	shl	r8,23
+	mov	r10,r9
+	shl	r9,52
+	shr	r10,12
+	add	rax,r8
+	add	rax,r9
+	adc	r10,0
+	mov	QWORD[8+rcx],rax
+	mov	rax,r10
+	mov	r11,QWORD[((-88))+rdx]
+	mov	r8,QWORD[((-80))+rdx]
+	shl	r11,17
+	mov	r9,r8
+	shl	r8,46
+	shr	r9,18
+	add	rax,r11
+	add	rax,r8
+	adc	r9,0
+	mov	QWORD[16+rcx],rax
+	mov	rax,r9
+	mov	r10,QWORD[((-72))+rdx]
+	mov	r11,QWORD[((-64))+rdx]
+	shl	r10,11
+	mov	r8,r11
+	shl	r11,40
+	shr	r8,24
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[24+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[((-56))+rdx]
+	mov	r10,QWORD[((-48))+rdx]
+	mov	r11,QWORD[((-40))+rdx]
+	shl	r9,5
+	shl	r10,34
+	mov	r8,r11
+	shl	r11,63
+	shr	r8,1
+	add	rax,r9
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[32+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[((-32))+rdx]
+	mov	r10,QWORD[((-24))+rdx]
+	shl	r9,28
+	mov	r11,r10
+	shl	r10,57
+	shr	r11,7
+	add	rax,r9
+	add	rax,r10
+	adc	r11,0
+	mov	QWORD[40+rcx],rax
+	mov	rax,r11
+	mov	r8,QWORD[((-16))+rdx]
+	mov	r9,QWORD[((-8))+rdx]
+	shl	r8,22
+	mov	r10,r9
+	shl	r9,51
+	shr	r10,13
+	add	rax,r8
+	add	rax,r9
+	adc	r10,0
+	mov	QWORD[48+rcx],rax
+	mov	rax,r10
+	mov	r11,QWORD[rdx]
+	mov	r8,QWORD[8+rdx]
+	shl	r11,16
+	mov	r9,r8
+	shl	r8,45
+	shr	r9,19
+	add	rax,r11
+	add	rax,r8
+	adc	r9,0
+	mov	QWORD[56+rcx],rax
+	mov	rax,r9
+	mov	r10,QWORD[16+rdx]
+	mov	r11,QWORD[24+rdx]
+	shl	r10,10
+	mov	r8,r11
+	shl	r11,39
+	shr	r8,25
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[64+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[32+rdx]
+	mov	r10,QWORD[40+rdx]
+	mov	r11,QWORD[48+rdx]
+	shl	r9,4
+	shl	r10,33
+	mov	r8,r11
+	shl	r11,62
+	shr	r8,2
+	add	rax,r9
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[72+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[56+rdx]
+	mov	r10,QWORD[64+rdx]
+	shl	r9,27
+	mov	r11,r10
+	shl	r10,56
+	shr	r11,8
+	add	rax,r9
+	add	rax,r10
+	adc	r11,0
+	mov	QWORD[80+rcx],rax
+	mov	rax,r11
+	mov	r8,QWORD[72+rdx]
+	mov	r9,QWORD[80+rdx]
+	shl	r8,21
+	mov	r10,r9
+	shl	r9,50
+	shr	r10,14
+	add	rax,r8
+	add	rax,r9
+	adc	r10,0
+	mov	QWORD[88+rcx],rax
+	mov	rax,r10
+	mov	r11,QWORD[88+rdx]
+	mov	r8,QWORD[96+rdx]
+	shl	r11,15
+	mov	r9,r8
+	shl	r8,44
+	shr	r9,20
+	add	rax,r11
+	add	rax,r8
+	adc	r9,0
+	mov	QWORD[96+rcx],rax
+	mov	rax,r9
+	mov	r10,QWORD[104+rdx]
+	mov	r11,QWORD[112+rdx]
+	shl	r10,9
+	mov	r8,r11
+	shl	r11,38
+	shr	r8,26
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[104+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[120+rdx]
+	mov	r10,QWORD[128+rdx]
+	mov	r11,QWORD[136+rdx]
+	shl	r9,3
+	shl	r10,32
+	mov	r8,r11
+	shl	r11,61
+	shr	r8,3
+	add	rax,r9
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[112+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[144+rdx]
+	mov	r10,QWORD[152+rdx]
+	shl	r9,26
+	mov	r11,r10
+	shl	r10,55
+	shr	r11,9
+	add	rax,r9
+	add	rax,r10
+	adc	r11,0
+	mov	QWORD[120+rcx],rax
+	mov	rax,r11
+	DB	0F3h,0C3h		;repret
+
+
+global	rsaz_1024_norm2red_avx2
+
+ALIGN	32
+rsaz_1024_norm2red_avx2:
+	sub	rcx,-128
+	mov	r8,QWORD[rdx]
+	mov	eax,0x1fffffff
+	mov	r9,QWORD[8+rdx]
+	mov	r11,r8
+	shr	r11,0
+	and	r11,rax
+	mov	QWORD[((-128))+rcx],r11
+	mov	r10,r8
+	shr	r10,29
+	and	r10,rax
+	mov	QWORD[((-120))+rcx],r10
+	shrd	r8,r9,58
+	and	r8,rax
+	mov	QWORD[((-112))+rcx],r8
+	mov	r10,QWORD[16+rdx]
+	mov	r8,r9
+	shr	r8,23
+	and	r8,rax
+	mov	QWORD[((-104))+rcx],r8
+	shrd	r9,r10,52
+	and	r9,rax
+	mov	QWORD[((-96))+rcx],r9
+	mov	r11,QWORD[24+rdx]
+	mov	r9,r10
+	shr	r9,17
+	and	r9,rax
+	mov	QWORD[((-88))+rcx],r9
+	shrd	r10,r11,46
+	and	r10,rax
+	mov	QWORD[((-80))+rcx],r10
+	mov	r8,QWORD[32+rdx]
+	mov	r10,r11
+	shr	r10,11
+	and	r10,rax
+	mov	QWORD[((-72))+rcx],r10
+	shrd	r11,r8,40
+	and	r11,rax
+	mov	QWORD[((-64))+rcx],r11
+	mov	r9,QWORD[40+rdx]
+	mov	r11,r8
+	shr	r11,5
+	and	r11,rax
+	mov	QWORD[((-56))+rcx],r11
+	mov	r10,r8
+	shr	r10,34
+	and	r10,rax
+	mov	QWORD[((-48))+rcx],r10
+	shrd	r8,r9,63
+	and	r8,rax
+	mov	QWORD[((-40))+rcx],r8
+	mov	r10,QWORD[48+rdx]
+	mov	r8,r9
+	shr	r8,28
+	and	r8,rax
+	mov	QWORD[((-32))+rcx],r8
+	shrd	r9,r10,57
+	and	r9,rax
+	mov	QWORD[((-24))+rcx],r9
+	mov	r11,QWORD[56+rdx]
+	mov	r9,r10
+	shr	r9,22
+	and	r9,rax
+	mov	QWORD[((-16))+rcx],r9
+	shrd	r10,r11,51
+	and	r10,rax
+	mov	QWORD[((-8))+rcx],r10
+	mov	r8,QWORD[64+rdx]
+	mov	r10,r11
+	shr	r10,16
+	and	r10,rax
+	mov	QWORD[rcx],r10
+	shrd	r11,r8,45
+	and	r11,rax
+	mov	QWORD[8+rcx],r11
+	mov	r9,QWORD[72+rdx]
+	mov	r11,r8
+	shr	r11,10
+	and	r11,rax
+	mov	QWORD[16+rcx],r11
+	shrd	r8,r9,39
+	and	r8,rax
+	mov	QWORD[24+rcx],r8
+	mov	r10,QWORD[80+rdx]
+	mov	r8,r9
+	shr	r8,4
+	and	r8,rax
+	mov	QWORD[32+rcx],r8
+	mov	r11,r9
+	shr	r11,33
+	and	r11,rax
+	mov	QWORD[40+rcx],r11
+	shrd	r9,r10,62
+	and	r9,rax
+	mov	QWORD[48+rcx],r9
+	mov	r11,QWORD[88+rdx]
+	mov	r9,r10
+	shr	r9,27
+	and	r9,rax
+	mov	QWORD[56+rcx],r9
+	shrd	r10,r11,56
+	and	r10,rax
+	mov	QWORD[64+rcx],r10
+	mov	r8,QWORD[96+rdx]
+	mov	r10,r11
+	shr	r10,21
+	and	r10,rax
+	mov	QWORD[72+rcx],r10
+	shrd	r11,r8,50
+	and	r11,rax
+	mov	QWORD[80+rcx],r11
+	mov	r9,QWORD[104+rdx]
+	mov	r11,r8
+	shr	r11,15
+	and	r11,rax
+	mov	QWORD[88+rcx],r11
+	shrd	r8,r9,44
+	and	r8,rax
+	mov	QWORD[96+rcx],r8
+	mov	r10,QWORD[112+rdx]
+	mov	r8,r9
+	shr	r8,9
+	and	r8,rax
+	mov	QWORD[104+rcx],r8
+	shrd	r9,r10,38
+	and	r9,rax
+	mov	QWORD[112+rcx],r9
+	mov	r11,QWORD[120+rdx]
+	mov	r9,r10
+	shr	r9,3
+	and	r9,rax
+	mov	QWORD[120+rcx],r9
+	mov	r8,r10
+	shr	r8,32
+	and	r8,rax
+	mov	QWORD[128+rcx],r8
+	shrd	r10,r11,61
+	and	r10,rax
+	mov	QWORD[136+rcx],r10
+	xor	r8,r8
+	mov	r10,r11
+	shr	r10,26
+	and	r10,rax
+	mov	QWORD[144+rcx],r10
+	shrd	r11,r8,55
+	and	r11,rax
+	mov	QWORD[152+rcx],r11
+	mov	QWORD[160+rcx],r8
+	mov	QWORD[168+rcx],r8
+	mov	QWORD[176+rcx],r8
+	mov	QWORD[184+rcx],r8
+	DB	0F3h,0C3h		;repret
+
+global	rsaz_1024_scatter5_avx2
+
+ALIGN	32
+rsaz_1024_scatter5_avx2:
+	vzeroupper
+	vmovdqu	ymm5,YMMWORD[$L$scatter_permd]
+	shl	r8d,4
+	lea	rcx,[r8*1+rcx]
+	mov	eax,9
+	jmp	NEAR $L$oop_scatter_1024
+
+ALIGN	32
+$L$oop_scatter_1024:
+	vmovdqu	ymm0,YMMWORD[rdx]
+	lea	rdx,[32+rdx]
+	vpermd	ymm0,ymm5,ymm0
+	vmovdqu	XMMWORD[rcx],xmm0
+	lea	rcx,[512+rcx]
+	dec	eax
+	jnz	NEAR $L$oop_scatter_1024
+
+	vzeroupper
+	DB	0F3h,0C3h		;repret
+
+
+global	rsaz_1024_gather5_avx2
+
+ALIGN	32
+rsaz_1024_gather5_avx2:
+
+	vzeroupper
+	mov	r11,rsp
+
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_rsaz_1024_gather5:
+
+DB	0x48,0x8d,0x60,0xe0
+DB	0xc5,0xf8,0x29,0x70,0xe0
+DB	0xc5,0xf8,0x29,0x78,0xf0
+DB	0xc5,0x78,0x29,0x40,0x00
+DB	0xc5,0x78,0x29,0x48,0x10
+DB	0xc5,0x78,0x29,0x50,0x20
+DB	0xc5,0x78,0x29,0x58,0x30
+DB	0xc5,0x78,0x29,0x60,0x40
+DB	0xc5,0x78,0x29,0x68,0x50
+DB	0xc5,0x78,0x29,0x70,0x60
+DB	0xc5,0x78,0x29,0x78,0x70
+	lea	rsp,[((-256))+rsp]
+	and	rsp,-32
+	lea	r10,[$L$inc]
+	lea	rax,[((-128))+rsp]
+
+	vmovd	xmm4,r8d
+	vmovdqa	ymm0,YMMWORD[r10]
+	vmovdqa	ymm1,YMMWORD[32+r10]
+	vmovdqa	ymm5,YMMWORD[64+r10]
+	vpbroadcastd	ymm4,xmm4
+
+	vpaddd	ymm2,ymm0,ymm5
+	vpcmpeqd	ymm0,ymm0,ymm4
+	vpaddd	ymm3,ymm1,ymm5
+	vpcmpeqd	ymm1,ymm1,ymm4
+	vmovdqa	YMMWORD[(0+128)+rax],ymm0
+	vpaddd	ymm0,ymm2,ymm5
+	vpcmpeqd	ymm2,ymm2,ymm4
+	vmovdqa	YMMWORD[(32+128)+rax],ymm1
+	vpaddd	ymm1,ymm3,ymm5
+	vpcmpeqd	ymm3,ymm3,ymm4
+	vmovdqa	YMMWORD[(64+128)+rax],ymm2
+	vpaddd	ymm2,ymm0,ymm5
+	vpcmpeqd	ymm0,ymm0,ymm4
+	vmovdqa	YMMWORD[(96+128)+rax],ymm3
+	vpaddd	ymm3,ymm1,ymm5
+	vpcmpeqd	ymm1,ymm1,ymm4
+	vmovdqa	YMMWORD[(128+128)+rax],ymm0
+	vpaddd	ymm8,ymm2,ymm5
+	vpcmpeqd	ymm2,ymm2,ymm4
+	vmovdqa	YMMWORD[(160+128)+rax],ymm1
+	vpaddd	ymm9,ymm3,ymm5
+	vpcmpeqd	ymm3,ymm3,ymm4
+	vmovdqa	YMMWORD[(192+128)+rax],ymm2
+	vpaddd	ymm10,ymm8,ymm5
+	vpcmpeqd	ymm8,ymm8,ymm4
+	vmovdqa	YMMWORD[(224+128)+rax],ymm3
+	vpaddd	ymm11,ymm9,ymm5
+	vpcmpeqd	ymm9,ymm9,ymm4
+	vpaddd	ymm12,ymm10,ymm5
+	vpcmpeqd	ymm10,ymm10,ymm4
+	vpaddd	ymm13,ymm11,ymm5
+	vpcmpeqd	ymm11,ymm11,ymm4
+	vpaddd	ymm14,ymm12,ymm5
+	vpcmpeqd	ymm12,ymm12,ymm4
+	vpaddd	ymm15,ymm13,ymm5
+	vpcmpeqd	ymm13,ymm13,ymm4
+	vpcmpeqd	ymm14,ymm14,ymm4
+	vpcmpeqd	ymm15,ymm15,ymm4
+
+	vmovdqa	ymm7,YMMWORD[((-32))+r10]
+	lea	rdx,[128+rdx]
+	mov	r8d,9
+
+$L$oop_gather_1024:
+	vmovdqa	ymm0,YMMWORD[((0-128))+rdx]
+	vmovdqa	ymm1,YMMWORD[((32-128))+rdx]
+	vmovdqa	ymm2,YMMWORD[((64-128))+rdx]
+	vmovdqa	ymm3,YMMWORD[((96-128))+rdx]
+	vpand	ymm0,ymm0,YMMWORD[((0+128))+rax]
+	vpand	ymm1,ymm1,YMMWORD[((32+128))+rax]
+	vpand	ymm2,ymm2,YMMWORD[((64+128))+rax]
+	vpor	ymm4,ymm1,ymm0
+	vpand	ymm3,ymm3,YMMWORD[((96+128))+rax]
+	vmovdqa	ymm0,YMMWORD[((128-128))+rdx]
+	vmovdqa	ymm1,YMMWORD[((160-128))+rdx]
+	vpor	ymm5,ymm3,ymm2
+	vmovdqa	ymm2,YMMWORD[((192-128))+rdx]
+	vmovdqa	ymm3,YMMWORD[((224-128))+rdx]
+	vpand	ymm0,ymm0,YMMWORD[((128+128))+rax]
+	vpand	ymm1,ymm1,YMMWORD[((160+128))+rax]
+	vpand	ymm2,ymm2,YMMWORD[((192+128))+rax]
+	vpor	ymm4,ymm4,ymm0
+	vpand	ymm3,ymm3,YMMWORD[((224+128))+rax]
+	vpand	ymm0,ymm8,YMMWORD[((256-128))+rdx]
+	vpor	ymm5,ymm5,ymm1
+	vpand	ymm1,ymm9,YMMWORD[((288-128))+rdx]
+	vpor	ymm4,ymm4,ymm2
+	vpand	ymm2,ymm10,YMMWORD[((320-128))+rdx]
+	vpor	ymm5,ymm5,ymm3
+	vpand	ymm3,ymm11,YMMWORD[((352-128))+rdx]
+	vpor	ymm4,ymm4,ymm0
+	vpand	ymm0,ymm12,YMMWORD[((384-128))+rdx]
+	vpor	ymm5,ymm5,ymm1
+	vpand	ymm1,ymm13,YMMWORD[((416-128))+rdx]
+	vpor	ymm4,ymm4,ymm2
+	vpand	ymm2,ymm14,YMMWORD[((448-128))+rdx]
+	vpor	ymm5,ymm5,ymm3
+	vpand	ymm3,ymm15,YMMWORD[((480-128))+rdx]
+	lea	rdx,[512+rdx]
+	vpor	ymm4,ymm4,ymm0
+	vpor	ymm5,ymm5,ymm1
+	vpor	ymm4,ymm4,ymm2
+	vpor	ymm5,ymm5,ymm3
+
+	vpor	ymm4,ymm4,ymm5
+	vextracti128	xmm5,ymm4,1
+	vpor	xmm5,xmm5,xmm4
+	vpermd	ymm5,ymm7,ymm5
+	vmovdqu	YMMWORD[rcx],ymm5
+	lea	rcx,[32+rcx]
+	dec	r8d
+	jnz	NEAR $L$oop_gather_1024
+
+	vpxor	ymm0,ymm0,ymm0
+	vmovdqu	YMMWORD[rcx],ymm0
+	vzeroupper
+	movaps	xmm6,XMMWORD[((-168))+r11]
+	movaps	xmm7,XMMWORD[((-152))+r11]
+	movaps	xmm8,XMMWORD[((-136))+r11]
+	movaps	xmm9,XMMWORD[((-120))+r11]
+	movaps	xmm10,XMMWORD[((-104))+r11]
+	movaps	xmm11,XMMWORD[((-88))+r11]
+	movaps	xmm12,XMMWORD[((-72))+r11]
+	movaps	xmm13,XMMWORD[((-56))+r11]
+	movaps	xmm14,XMMWORD[((-40))+r11]
+	movaps	xmm15,XMMWORD[((-24))+r11]
+	lea	rsp,[r11]
+
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_rsaz_1024_gather5:
+
+EXTERN	OPENSSL_ia32cap_P
+global	rsaz_avx2_eligible
+
+ALIGN	32
+rsaz_avx2_eligible:
+	lea	rax,[OPENSSL_ia32cap_P]
+	mov	eax,DWORD[8+rax]
+	and	eax,32
+	shr	eax,5
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	64
+$L$and_mask:
+	DQ	0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
+$L$scatter_permd:
+	DD	0,2,4,6,7,7,7,7
+$L$gather_permd:
+	DD	0,7,1,7,2,7,3,7
+$L$inc:
+	DD	0,0,0,0,1,1,1,1
+	DD	2,2,2,2,3,3,3,3
+	DD	4,4,4,4,4,4,4,4
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+rsaz_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	rbp,QWORD[160+r8]
+
+	mov	r10d,DWORD[8+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	cmovc	rax,rbp
+
+	mov	r15,QWORD[((-48))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	rbx,QWORD[((-8))+rax]
+	mov	QWORD[240+r8],r15
+	mov	QWORD[232+r8],r14
+	mov	QWORD[224+r8],r13
+	mov	QWORD[216+r8],r12
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[144+r8],rbx
+
+	lea	rsi,[((-216))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_rsaz_1024_sqr_avx2 wrt ..imagebase
+	DD	$L$SEH_end_rsaz_1024_sqr_avx2 wrt ..imagebase
+	DD	$L$SEH_info_rsaz_1024_sqr_avx2 wrt ..imagebase
+
+	DD	$L$SEH_begin_rsaz_1024_mul_avx2 wrt ..imagebase
+	DD	$L$SEH_end_rsaz_1024_mul_avx2 wrt ..imagebase
+	DD	$L$SEH_info_rsaz_1024_mul_avx2 wrt ..imagebase
+
+	DD	$L$SEH_begin_rsaz_1024_gather5 wrt ..imagebase
+	DD	$L$SEH_end_rsaz_1024_gather5 wrt ..imagebase
+	DD	$L$SEH_info_rsaz_1024_gather5 wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_rsaz_1024_sqr_avx2:
+DB	9,0,0,0
+	DD	rsaz_se_handler wrt ..imagebase
+	DD	$L$sqr_1024_body wrt ..imagebase,$L$sqr_1024_epilogue wrt ..imagebase,$L$sqr_1024_in_tail wrt ..imagebase
+	DD	0
+$L$SEH_info_rsaz_1024_mul_avx2:
+DB	9,0,0,0
+	DD	rsaz_se_handler wrt ..imagebase
+	DD	$L$mul_1024_body wrt ..imagebase,$L$mul_1024_epilogue wrt ..imagebase,$L$mul_1024_in_tail wrt ..imagebase
+	DD	0
+$L$SEH_info_rsaz_1024_gather5:
+DB	0x01,0x36,0x17,0x0b
+DB	0x36,0xf8,0x09,0x00
+DB	0x31,0xe8,0x08,0x00
+DB	0x2c,0xd8,0x07,0x00
+DB	0x27,0xc8,0x06,0x00
+DB	0x22,0xb8,0x05,0x00
+DB	0x1d,0xa8,0x04,0x00
+DB	0x18,0x98,0x03,0x00
+DB	0x13,0x88,0x02,0x00
+DB	0x0e,0x78,0x01,0x00
+DB	0x09,0x68,0x00,0x00
+DB	0x04,0x01,0x15,0x00
+DB	0x00,0xb3,0x00,0x00
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
new file mode 100644
index 0000000..65b040f
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
@@ -0,0 +1,3763 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+EXTERN	OPENSSL_ia32cap_P
+
+global	sha1_block_data_order
+
+ALIGN	16
+sha1_block_data_order:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha1_block_data_order:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	lea	r10,[OPENSSL_ia32cap_P]
+	mov	r9d,DWORD[r10]
+	mov	r8d,DWORD[4+r10]
+	mov	r10d,DWORD[8+r10]
+	test	r8d,512
+	jz	NEAR $L$ialu
+	and	r8d,268435456
+	and	r9d,1073741824
+	or	r8d,r9d
+	cmp	r8d,1342177280
+	je	NEAR _avx_shortcut
+	jmp	NEAR _ssse3_shortcut
+
+ALIGN	16
+$L$ialu:
+	mov	rax,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	mov	r8,rdi
+	sub	rsp,72
+	mov	r9,rsi
+	and	rsp,-64
+	mov	r10,rdx
+	mov	QWORD[64+rsp],rax
+$L$prologue:
+
+	mov	esi,DWORD[r8]
+	mov	edi,DWORD[4+r8]
+	mov	r11d,DWORD[8+r8]
+	mov	r12d,DWORD[12+r8]
+	mov	r13d,DWORD[16+r8]
+	jmp	NEAR $L$loop
+
+ALIGN	16
+$L$loop:
+	mov	edx,DWORD[r9]
+	bswap	edx
+	mov	ebp,DWORD[4+r9]
+	mov	eax,r12d
+	mov	DWORD[rsp],edx
+	mov	ecx,esi
+	bswap	ebp
+	xor	eax,r11d
+	rol	ecx,5
+	and	eax,edi
+	lea	r13d,[1518500249+r13*1+rdx]
+	add	r13d,ecx
+	xor	eax,r12d
+	rol	edi,30
+	add	r13d,eax
+	mov	r14d,DWORD[8+r9]
+	mov	eax,r11d
+	mov	DWORD[4+rsp],ebp
+	mov	ecx,r13d
+	bswap	r14d
+	xor	eax,edi
+	rol	ecx,5
+	and	eax,esi
+	lea	r12d,[1518500249+r12*1+rbp]
+	add	r12d,ecx
+	xor	eax,r11d
+	rol	esi,30
+	add	r12d,eax
+	mov	edx,DWORD[12+r9]
+	mov	eax,edi
+	mov	DWORD[8+rsp],r14d
+	mov	ecx,r12d
+	bswap	edx
+	xor	eax,esi
+	rol	ecx,5
+	and	eax,r13d
+	lea	r11d,[1518500249+r11*1+r14]
+	add	r11d,ecx
+	xor	eax,edi
+	rol	r13d,30
+	add	r11d,eax
+	mov	ebp,DWORD[16+r9]
+	mov	eax,esi
+	mov	DWORD[12+rsp],edx
+	mov	ecx,r11d
+	bswap	ebp
+	xor	eax,r13d
+	rol	ecx,5
+	and	eax,r12d
+	lea	edi,[1518500249+rdi*1+rdx]
+	add	edi,ecx
+	xor	eax,esi
+	rol	r12d,30
+	add	edi,eax
+	mov	r14d,DWORD[20+r9]
+	mov	eax,r13d
+	mov	DWORD[16+rsp],ebp
+	mov	ecx,edi
+	bswap	r14d
+	xor	eax,r12d
+	rol	ecx,5
+	and	eax,r11d
+	lea	esi,[1518500249+rsi*1+rbp]
+	add	esi,ecx
+	xor	eax,r13d
+	rol	r11d,30
+	add	esi,eax
+	mov	edx,DWORD[24+r9]
+	mov	eax,r12d
+	mov	DWORD[20+rsp],r14d
+	mov	ecx,esi
+	bswap	edx
+	xor	eax,r11d
+	rol	ecx,5
+	and	eax,edi
+	lea	r13d,[1518500249+r13*1+r14]
+	add	r13d,ecx
+	xor	eax,r12d
+	rol	edi,30
+	add	r13d,eax
+	mov	ebp,DWORD[28+r9]
+	mov	eax,r11d
+	mov	DWORD[24+rsp],edx
+	mov	ecx,r13d
+	bswap	ebp
+	xor	eax,edi
+	rol	ecx,5
+	and	eax,esi
+	lea	r12d,[1518500249+r12*1+rdx]
+	add	r12d,ecx
+	xor	eax,r11d
+	rol	esi,30
+	add	r12d,eax
+	mov	r14d,DWORD[32+r9]
+	mov	eax,edi
+	mov	DWORD[28+rsp],ebp
+	mov	ecx,r12d
+	bswap	r14d
+	xor	eax,esi
+	rol	ecx,5
+	and	eax,r13d
+	lea	r11d,[1518500249+r11*1+rbp]
+	add	r11d,ecx
+	xor	eax,edi
+	rol	r13d,30
+	add	r11d,eax
+	mov	edx,DWORD[36+r9]
+	mov	eax,esi
+	mov	DWORD[32+rsp],r14d
+	mov	ecx,r11d
+	bswap	edx
+	xor	eax,r13d
+	rol	ecx,5
+	and	eax,r12d
+	lea	edi,[1518500249+rdi*1+r14]
+	add	edi,ecx
+	xor	eax,esi
+	rol	r12d,30
+	add	edi,eax
+	mov	ebp,DWORD[40+r9]
+	mov	eax,r13d
+	mov	DWORD[36+rsp],edx
+	mov	ecx,edi
+	bswap	ebp
+	xor	eax,r12d
+	rol	ecx,5
+	and	eax,r11d
+	lea	esi,[1518500249+rsi*1+rdx]
+	add	esi,ecx
+	xor	eax,r13d
+	rol	r11d,30
+	add	esi,eax
+	mov	r14d,DWORD[44+r9]
+	mov	eax,r12d
+	mov	DWORD[40+rsp],ebp
+	mov	ecx,esi
+	bswap	r14d
+	xor	eax,r11d
+	rol	ecx,5
+	and	eax,edi
+	lea	r13d,[1518500249+r13*1+rbp]
+	add	r13d,ecx
+	xor	eax,r12d
+	rol	edi,30
+	add	r13d,eax
+	mov	edx,DWORD[48+r9]
+	mov	eax,r11d
+	mov	DWORD[44+rsp],r14d
+	mov	ecx,r13d
+	bswap	edx
+	xor	eax,edi
+	rol	ecx,5
+	and	eax,esi
+	lea	r12d,[1518500249+r12*1+r14]
+	add	r12d,ecx
+	xor	eax,r11d
+	rol	esi,30
+	add	r12d,eax
+	mov	ebp,DWORD[52+r9]
+	mov	eax,edi
+	mov	DWORD[48+rsp],edx
+	mov	ecx,r12d
+	bswap	ebp
+	xor	eax,esi
+	rol	ecx,5
+	and	eax,r13d
+	lea	r11d,[1518500249+r11*1+rdx]
+	add	r11d,ecx
+	xor	eax,edi
+	rol	r13d,30
+	add	r11d,eax
+	mov	r14d,DWORD[56+r9]
+	mov	eax,esi
+	mov	DWORD[52+rsp],ebp
+	mov	ecx,r11d
+	bswap	r14d
+	xor	eax,r13d
+	rol	ecx,5
+	and	eax,r12d
+	lea	edi,[1518500249+rdi*1+rbp]
+	add	edi,ecx
+	xor	eax,esi
+	rol	r12d,30
+	add	edi,eax
+	mov	edx,DWORD[60+r9]
+	mov	eax,r13d
+	mov	DWORD[56+rsp],r14d
+	mov	ecx,edi
+	bswap	edx
+	xor	eax,r12d
+	rol	ecx,5
+	and	eax,r11d
+	lea	esi,[1518500249+rsi*1+r14]
+	add	esi,ecx
+	xor	eax,r13d
+	rol	r11d,30
+	add	esi,eax
+	xor	ebp,DWORD[rsp]
+	mov	eax,r12d
+	mov	DWORD[60+rsp],edx
+	mov	ecx,esi
+	xor	ebp,DWORD[8+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	ebp,DWORD[32+rsp]
+	and	eax,edi
+	lea	r13d,[1518500249+r13*1+rdx]
+	rol	edi,30
+	xor	eax,r12d
+	add	r13d,ecx
+	rol	ebp,1
+	add	r13d,eax
+	xor	r14d,DWORD[4+rsp]
+	mov	eax,r11d
+	mov	DWORD[rsp],ebp
+	mov	ecx,r13d
+	xor	r14d,DWORD[12+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	r14d,DWORD[36+rsp]
+	and	eax,esi
+	lea	r12d,[1518500249+r12*1+rbp]
+	rol	esi,30
+	xor	eax,r11d
+	add	r12d,ecx
+	rol	r14d,1
+	add	r12d,eax
+	xor	edx,DWORD[8+rsp]
+	mov	eax,edi
+	mov	DWORD[4+rsp],r14d
+	mov	ecx,r12d
+	xor	edx,DWORD[16+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	edx,DWORD[40+rsp]
+	and	eax,r13d
+	lea	r11d,[1518500249+r11*1+r14]
+	rol	r13d,30
+	xor	eax,edi
+	add	r11d,ecx
+	rol	edx,1
+	add	r11d,eax
+	xor	ebp,DWORD[12+rsp]
+	mov	eax,esi
+	mov	DWORD[8+rsp],edx
+	mov	ecx,r11d
+	xor	ebp,DWORD[20+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	ebp,DWORD[44+rsp]
+	and	eax,r12d
+	lea	edi,[1518500249+rdi*1+rdx]
+	rol	r12d,30
+	xor	eax,esi
+	add	edi,ecx
+	rol	ebp,1
+	add	edi,eax
+	xor	r14d,DWORD[16+rsp]
+	mov	eax,r13d
+	mov	DWORD[12+rsp],ebp
+	mov	ecx,edi
+	xor	r14d,DWORD[24+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	r14d,DWORD[48+rsp]
+	and	eax,r11d
+	lea	esi,[1518500249+rsi*1+rbp]
+	rol	r11d,30
+	xor	eax,r13d
+	add	esi,ecx
+	rol	r14d,1
+	add	esi,eax
+	xor	edx,DWORD[20+rsp]
+	mov	eax,edi
+	mov	DWORD[16+rsp],r14d
+	mov	ecx,esi
+	xor	edx,DWORD[28+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	edx,DWORD[52+rsp]
+	lea	r13d,[1859775393+r13*1+r14]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	edx,1
+	xor	ebp,DWORD[24+rsp]
+	mov	eax,esi
+	mov	DWORD[20+rsp],edx
+	mov	ecx,r13d
+	xor	ebp,DWORD[32+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	ebp,DWORD[56+rsp]
+	lea	r12d,[1859775393+r12*1+rdx]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[28+rsp]
+	mov	eax,r13d
+	mov	DWORD[24+rsp],ebp
+	mov	ecx,r12d
+	xor	r14d,DWORD[36+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	r14d,DWORD[60+rsp]
+	lea	r11d,[1859775393+r11*1+rbp]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	r14d,1
+	xor	edx,DWORD[32+rsp]
+	mov	eax,r12d
+	mov	DWORD[28+rsp],r14d
+	mov	ecx,r11d
+	xor	edx,DWORD[40+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	edx,DWORD[rsp]
+	lea	edi,[1859775393+rdi*1+r14]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	edx,1
+	xor	ebp,DWORD[36+rsp]
+	mov	eax,r11d
+	mov	DWORD[32+rsp],edx
+	mov	ecx,edi
+	xor	ebp,DWORD[44+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	ebp,DWORD[4+rsp]
+	lea	esi,[1859775393+rsi*1+rdx]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	ebp,1
+	xor	r14d,DWORD[40+rsp]
+	mov	eax,edi
+	mov	DWORD[36+rsp],ebp
+	mov	ecx,esi
+	xor	r14d,DWORD[48+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	r14d,DWORD[8+rsp]
+	lea	r13d,[1859775393+r13*1+rbp]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	r14d,1
+	xor	edx,DWORD[44+rsp]
+	mov	eax,esi
+	mov	DWORD[40+rsp],r14d
+	mov	ecx,r13d
+	xor	edx,DWORD[52+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	edx,DWORD[12+rsp]
+	lea	r12d,[1859775393+r12*1+r14]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	edx,1
+	xor	ebp,DWORD[48+rsp]
+	mov	eax,r13d
+	mov	DWORD[44+rsp],edx
+	mov	ecx,r12d
+	xor	ebp,DWORD[56+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	ebp,DWORD[16+rsp]
+	lea	r11d,[1859775393+r11*1+rdx]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[52+rsp]
+	mov	eax,r12d
+	mov	DWORD[48+rsp],ebp
+	mov	ecx,r11d
+	xor	r14d,DWORD[60+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	r14d,DWORD[20+rsp]
+	lea	edi,[1859775393+rdi*1+rbp]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	r14d,1
+	xor	edx,DWORD[56+rsp]
+	mov	eax,r11d
+	mov	DWORD[52+rsp],r14d
+	mov	ecx,edi
+	xor	edx,DWORD[rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	edx,DWORD[24+rsp]
+	lea	esi,[1859775393+rsi*1+r14]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	edx,1
+	xor	ebp,DWORD[60+rsp]
+	mov	eax,edi
+	mov	DWORD[56+rsp],edx
+	mov	ecx,esi
+	xor	ebp,DWORD[4+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	ebp,DWORD[28+rsp]
+	lea	r13d,[1859775393+r13*1+rdx]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[rsp]
+	mov	eax,esi
+	mov	DWORD[60+rsp],ebp
+	mov	ecx,r13d
+	xor	r14d,DWORD[8+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	r14d,DWORD[32+rsp]
+	lea	r12d,[1859775393+r12*1+rbp]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	r14d,1
+	xor	edx,DWORD[4+rsp]
+	mov	eax,r13d
+	mov	DWORD[rsp],r14d
+	mov	ecx,r12d
+	xor	edx,DWORD[12+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	edx,DWORD[36+rsp]
+	lea	r11d,[1859775393+r11*1+r14]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	edx,1
+	xor	ebp,DWORD[8+rsp]
+	mov	eax,r12d
+	mov	DWORD[4+rsp],edx
+	mov	ecx,r11d
+	xor	ebp,DWORD[16+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	ebp,DWORD[40+rsp]
+	lea	edi,[1859775393+rdi*1+rdx]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	ebp,1
+	xor	r14d,DWORD[12+rsp]
+	mov	eax,r11d
+	mov	DWORD[8+rsp],ebp
+	mov	ecx,edi
+	xor	r14d,DWORD[20+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	r14d,DWORD[44+rsp]
+	lea	esi,[1859775393+rsi*1+rbp]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	r14d,1
+	xor	edx,DWORD[16+rsp]
+	mov	eax,edi
+	mov	DWORD[12+rsp],r14d
+	mov	ecx,esi
+	xor	edx,DWORD[24+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	edx,DWORD[48+rsp]
+	lea	r13d,[1859775393+r13*1+r14]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	edx,1
+	xor	ebp,DWORD[20+rsp]
+	mov	eax,esi
+	mov	DWORD[16+rsp],edx
+	mov	ecx,r13d
+	xor	ebp,DWORD[28+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	ebp,DWORD[52+rsp]
+	lea	r12d,[1859775393+r12*1+rdx]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[24+rsp]
+	mov	eax,r13d
+	mov	DWORD[20+rsp],ebp
+	mov	ecx,r12d
+	xor	r14d,DWORD[32+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	r14d,DWORD[56+rsp]
+	lea	r11d,[1859775393+r11*1+rbp]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	r14d,1
+	xor	edx,DWORD[28+rsp]
+	mov	eax,r12d
+	mov	DWORD[24+rsp],r14d
+	mov	ecx,r11d
+	xor	edx,DWORD[36+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	edx,DWORD[60+rsp]
+	lea	edi,[1859775393+rdi*1+r14]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	edx,1
+	xor	ebp,DWORD[32+rsp]
+	mov	eax,r11d
+	mov	DWORD[28+rsp],edx
+	mov	ecx,edi
+	xor	ebp,DWORD[40+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	ebp,DWORD[rsp]
+	lea	esi,[1859775393+rsi*1+rdx]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	ebp,1
+	xor	r14d,DWORD[36+rsp]
+	mov	eax,r12d
+	mov	DWORD[32+rsp],ebp
+	mov	ebx,r12d
+	xor	r14d,DWORD[44+rsp]
+	and	eax,r11d
+	mov	ecx,esi
+	xor	r14d,DWORD[4+rsp]
+	lea	r13d,[((-1894007588))+r13*1+rbp]
+	xor	ebx,r11d
+	rol	ecx,5
+	add	r13d,eax
+	rol	r14d,1
+	and	ebx,edi
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,ebx
+	xor	edx,DWORD[40+rsp]
+	mov	eax,r11d
+	mov	DWORD[36+rsp],r14d
+	mov	ebx,r11d
+	xor	edx,DWORD[48+rsp]
+	and	eax,edi
+	mov	ecx,r13d
+	xor	edx,DWORD[8+rsp]
+	lea	r12d,[((-1894007588))+r12*1+r14]
+	xor	ebx,edi
+	rol	ecx,5
+	add	r12d,eax
+	rol	edx,1
+	and	ebx,esi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,ebx
+	xor	ebp,DWORD[44+rsp]
+	mov	eax,edi
+	mov	DWORD[40+rsp],edx
+	mov	ebx,edi
+	xor	ebp,DWORD[52+rsp]
+	and	eax,esi
+	mov	ecx,r12d
+	xor	ebp,DWORD[12+rsp]
+	lea	r11d,[((-1894007588))+r11*1+rdx]
+	xor	ebx,esi
+	rol	ecx,5
+	add	r11d,eax
+	rol	ebp,1
+	and	ebx,r13d
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,ebx
+	xor	r14d,DWORD[48+rsp]
+	mov	eax,esi
+	mov	DWORD[44+rsp],ebp
+	mov	ebx,esi
+	xor	r14d,DWORD[56+rsp]
+	and	eax,r13d
+	mov	ecx,r11d
+	xor	r14d,DWORD[16+rsp]
+	lea	edi,[((-1894007588))+rdi*1+rbp]
+	xor	ebx,r13d
+	rol	ecx,5
+	add	edi,eax
+	rol	r14d,1
+	and	ebx,r12d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,ebx
+	xor	edx,DWORD[52+rsp]
+	mov	eax,r13d
+	mov	DWORD[48+rsp],r14d
+	mov	ebx,r13d
+	xor	edx,DWORD[60+rsp]
+	and	eax,r12d
+	mov	ecx,edi
+	xor	edx,DWORD[20+rsp]
+	lea	esi,[((-1894007588))+rsi*1+r14]
+	xor	ebx,r12d
+	rol	ecx,5
+	add	esi,eax
+	rol	edx,1
+	and	ebx,r11d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,ebx
+	xor	ebp,DWORD[56+rsp]
+	mov	eax,r12d
+	mov	DWORD[52+rsp],edx
+	mov	ebx,r12d
+	xor	ebp,DWORD[rsp]
+	and	eax,r11d
+	mov	ecx,esi
+	xor	ebp,DWORD[24+rsp]
+	lea	r13d,[((-1894007588))+r13*1+rdx]
+	xor	ebx,r11d
+	rol	ecx,5
+	add	r13d,eax
+	rol	ebp,1
+	and	ebx,edi
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,ebx
+	xor	r14d,DWORD[60+rsp]
+	mov	eax,r11d
+	mov	DWORD[56+rsp],ebp
+	mov	ebx,r11d
+	xor	r14d,DWORD[4+rsp]
+	and	eax,edi
+	mov	ecx,r13d
+	xor	r14d,DWORD[28+rsp]
+	lea	r12d,[((-1894007588))+r12*1+rbp]
+	xor	ebx,edi
+	rol	ecx,5
+	add	r12d,eax
+	rol	r14d,1
+	and	ebx,esi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,ebx
+	xor	edx,DWORD[rsp]
+	mov	eax,edi
+	mov	DWORD[60+rsp],r14d
+	mov	ebx,edi
+	xor	edx,DWORD[8+rsp]
+	and	eax,esi
+	mov	ecx,r12d
+	xor	edx,DWORD[32+rsp]
+	lea	r11d,[((-1894007588))+r11*1+r14]
+	xor	ebx,esi
+	rol	ecx,5
+	add	r11d,eax
+	rol	edx,1
+	and	ebx,r13d
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,ebx
+	xor	ebp,DWORD[4+rsp]
+	mov	eax,esi
+	mov	DWORD[rsp],edx
+	mov	ebx,esi
+	xor	ebp,DWORD[12+rsp]
+	and	eax,r13d
+	mov	ecx,r11d
+	xor	ebp,DWORD[36+rsp]
+	lea	edi,[((-1894007588))+rdi*1+rdx]
+	xor	ebx,r13d
+	rol	ecx,5
+	add	edi,eax
+	rol	ebp,1
+	and	ebx,r12d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,ebx
+	xor	r14d,DWORD[8+rsp]
+	mov	eax,r13d
+	mov	DWORD[4+rsp],ebp
+	mov	ebx,r13d
+	xor	r14d,DWORD[16+rsp]
+	and	eax,r12d
+	mov	ecx,edi
+	xor	r14d,DWORD[40+rsp]
+	lea	esi,[((-1894007588))+rsi*1+rbp]
+	xor	ebx,r12d
+	rol	ecx,5
+	add	esi,eax
+	rol	r14d,1
+	and	ebx,r11d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,ebx
+	xor	edx,DWORD[12+rsp]
+	mov	eax,r12d
+	mov	DWORD[8+rsp],r14d
+	mov	ebx,r12d
+	xor	edx,DWORD[20+rsp]
+	and	eax,r11d
+	mov	ecx,esi
+	xor	edx,DWORD[44+rsp]
+	lea	r13d,[((-1894007588))+r13*1+r14]
+	xor	ebx,r11d
+	rol	ecx,5
+	add	r13d,eax
+	rol	edx,1
+	and	ebx,edi
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,ebx
+	xor	ebp,DWORD[16+rsp]
+	mov	eax,r11d
+	mov	DWORD[12+rsp],edx
+	mov	ebx,r11d
+	xor	ebp,DWORD[24+rsp]
+	and	eax,edi
+	mov	ecx,r13d
+	xor	ebp,DWORD[48+rsp]
+	lea	r12d,[((-1894007588))+r12*1+rdx]
+	xor	ebx,edi
+	rol	ecx,5
+	add	r12d,eax
+	rol	ebp,1
+	and	ebx,esi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,ebx
+	xor	r14d,DWORD[20+rsp]
+	mov	eax,edi
+	mov	DWORD[16+rsp],ebp
+	mov	ebx,edi
+	xor	r14d,DWORD[28+rsp]
+	and	eax,esi
+	mov	ecx,r12d
+	xor	r14d,DWORD[52+rsp]
+	lea	r11d,[((-1894007588))+r11*1+rbp]
+	xor	ebx,esi
+	rol	ecx,5
+	add	r11d,eax
+	rol	r14d,1
+	and	ebx,r13d
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,ebx
+	xor	edx,DWORD[24+rsp]
+	mov	eax,esi
+	mov	DWORD[20+rsp],r14d
+	mov	ebx,esi
+	xor	edx,DWORD[32+rsp]
+	and	eax,r13d
+	mov	ecx,r11d
+	xor	edx,DWORD[56+rsp]
+	lea	edi,[((-1894007588))+rdi*1+r14]
+	xor	ebx,r13d
+	rol	ecx,5
+	add	edi,eax
+	rol	edx,1
+	and	ebx,r12d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,ebx
+	xor	ebp,DWORD[28+rsp]
+	mov	eax,r13d
+	mov	DWORD[24+rsp],edx
+	mov	ebx,r13d
+	xor	ebp,DWORD[36+rsp]
+	and	eax,r12d
+	mov	ecx,edi
+	xor	ebp,DWORD[60+rsp]
+	lea	esi,[((-1894007588))+rsi*1+rdx]
+	xor	ebx,r12d
+	rol	ecx,5
+	add	esi,eax
+	rol	ebp,1
+	and	ebx,r11d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,ebx
+	xor	r14d,DWORD[32+rsp]
+	mov	eax,r12d
+	mov	DWORD[28+rsp],ebp
+	mov	ebx,r12d
+	xor	r14d,DWORD[40+rsp]
+	and	eax,r11d
+	mov	ecx,esi
+	xor	r14d,DWORD[rsp]
+	lea	r13d,[((-1894007588))+r13*1+rbp]
+	xor	ebx,r11d
+	rol	ecx,5
+	add	r13d,eax
+	rol	r14d,1
+	and	ebx,edi
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,ebx
+	xor	edx,DWORD[36+rsp]
+	mov	eax,r11d
+	mov	DWORD[32+rsp],r14d
+	mov	ebx,r11d
+	xor	edx,DWORD[44+rsp]
+	and	eax,edi
+	mov	ecx,r13d
+	xor	edx,DWORD[4+rsp]
+	lea	r12d,[((-1894007588))+r12*1+r14]
+	xor	ebx,edi
+	rol	ecx,5
+	add	r12d,eax
+	rol	edx,1
+	and	ebx,esi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,ebx
+	xor	ebp,DWORD[40+rsp]
+	mov	eax,edi
+	mov	DWORD[36+rsp],edx
+	mov	ebx,edi
+	xor	ebp,DWORD[48+rsp]
+	and	eax,esi
+	mov	ecx,r12d
+	xor	ebp,DWORD[8+rsp]
+	lea	r11d,[((-1894007588))+r11*1+rdx]
+	xor	ebx,esi
+	rol	ecx,5
+	add	r11d,eax
+	rol	ebp,1
+	and	ebx,r13d
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,ebx
+	xor	r14d,DWORD[44+rsp]
+	mov	eax,esi
+	mov	DWORD[40+rsp],ebp
+	mov	ebx,esi
+	xor	r14d,DWORD[52+rsp]
+	and	eax,r13d
+	mov	ecx,r11d
+	xor	r14d,DWORD[12+rsp]
+	lea	edi,[((-1894007588))+rdi*1+rbp]
+	xor	ebx,r13d
+	rol	ecx,5
+	add	edi,eax
+	rol	r14d,1
+	and	ebx,r12d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,ebx
+	xor	edx,DWORD[48+rsp]
+	mov	eax,r13d
+	mov	DWORD[44+rsp],r14d
+	mov	ebx,r13d
+	xor	edx,DWORD[56+rsp]
+	and	eax,r12d
+	mov	ecx,edi
+	xor	edx,DWORD[16+rsp]
+	lea	esi,[((-1894007588))+rsi*1+r14]
+	xor	ebx,r12d
+	rol	ecx,5
+	add	esi,eax
+	rol	edx,1
+	and	ebx,r11d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,ebx
+	xor	ebp,DWORD[52+rsp]
+	mov	eax,edi
+	mov	DWORD[48+rsp],edx
+	mov	ecx,esi
+	xor	ebp,DWORD[60+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	ebp,DWORD[20+rsp]
+	lea	r13d,[((-899497514))+r13*1+rdx]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[56+rsp]
+	mov	eax,esi
+	mov	DWORD[52+rsp],ebp
+	mov	ecx,r13d
+	xor	r14d,DWORD[rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	r14d,DWORD[24+rsp]
+	lea	r12d,[((-899497514))+r12*1+rbp]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	r14d,1
+	xor	edx,DWORD[60+rsp]
+	mov	eax,r13d
+	mov	DWORD[56+rsp],r14d
+	mov	ecx,r12d
+	xor	edx,DWORD[4+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	edx,DWORD[28+rsp]
+	lea	r11d,[((-899497514))+r11*1+r14]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	edx,1
+	xor	ebp,DWORD[rsp]
+	mov	eax,r12d
+	mov	DWORD[60+rsp],edx
+	mov	ecx,r11d
+	xor	ebp,DWORD[8+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	ebp,DWORD[32+rsp]
+	lea	edi,[((-899497514))+rdi*1+rdx]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	ebp,1
+	xor	r14d,DWORD[4+rsp]
+	mov	eax,r11d
+	mov	DWORD[rsp],ebp
+	mov	ecx,edi
+	xor	r14d,DWORD[12+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	r14d,DWORD[36+rsp]
+	lea	esi,[((-899497514))+rsi*1+rbp]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	r14d,1
+	xor	edx,DWORD[8+rsp]
+	mov	eax,edi
+	mov	DWORD[4+rsp],r14d
+	mov	ecx,esi
+	xor	edx,DWORD[16+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	edx,DWORD[40+rsp]
+	lea	r13d,[((-899497514))+r13*1+r14]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	edx,1
+	xor	ebp,DWORD[12+rsp]
+	mov	eax,esi
+	mov	DWORD[8+rsp],edx
+	mov	ecx,r13d
+	xor	ebp,DWORD[20+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	ebp,DWORD[44+rsp]
+	lea	r12d,[((-899497514))+r12*1+rdx]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[16+rsp]
+	mov	eax,r13d
+	mov	DWORD[12+rsp],ebp
+	mov	ecx,r12d
+	xor	r14d,DWORD[24+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	r14d,DWORD[48+rsp]
+	lea	r11d,[((-899497514))+r11*1+rbp]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	r14d,1
+	xor	edx,DWORD[20+rsp]
+	mov	eax,r12d
+	mov	DWORD[16+rsp],r14d
+	mov	ecx,r11d
+	xor	edx,DWORD[28+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	edx,DWORD[52+rsp]
+	lea	edi,[((-899497514))+rdi*1+r14]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	edx,1
+	xor	ebp,DWORD[24+rsp]
+	mov	eax,r11d
+	mov	DWORD[20+rsp],edx
+	mov	ecx,edi
+	xor	ebp,DWORD[32+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	ebp,DWORD[56+rsp]
+	lea	esi,[((-899497514))+rsi*1+rdx]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	ebp,1
+	xor	r14d,DWORD[28+rsp]
+	mov	eax,edi
+	mov	DWORD[24+rsp],ebp
+	mov	ecx,esi
+	xor	r14d,DWORD[36+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	r14d,DWORD[60+rsp]
+	lea	r13d,[((-899497514))+r13*1+rbp]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	r14d,1
+	xor	edx,DWORD[32+rsp]
+	mov	eax,esi
+	mov	DWORD[28+rsp],r14d
+	mov	ecx,r13d
+	xor	edx,DWORD[40+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	edx,DWORD[rsp]
+	lea	r12d,[((-899497514))+r12*1+r14]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	edx,1
+	xor	ebp,DWORD[36+rsp]
+	mov	eax,r13d
+
+	mov	ecx,r12d
+	xor	ebp,DWORD[44+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	ebp,DWORD[4+rsp]
+	lea	r11d,[((-899497514))+r11*1+rdx]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[40+rsp]
+	mov	eax,r12d
+
+	mov	ecx,r11d
+	xor	r14d,DWORD[48+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	r14d,DWORD[8+rsp]
+	lea	edi,[((-899497514))+rdi*1+rbp]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	r14d,1
+	xor	edx,DWORD[44+rsp]
+	mov	eax,r11d
+
+	mov	ecx,edi
+	xor	edx,DWORD[52+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	edx,DWORD[12+rsp]
+	lea	esi,[((-899497514))+rsi*1+r14]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	edx,1
+	xor	ebp,DWORD[48+rsp]
+	mov	eax,edi
+
+	mov	ecx,esi
+	xor	ebp,DWORD[56+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	ebp,DWORD[16+rsp]
+	lea	r13d,[((-899497514))+r13*1+rdx]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[52+rsp]
+	mov	eax,esi
+
+	mov	ecx,r13d
+	xor	r14d,DWORD[60+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	r14d,DWORD[20+rsp]
+	lea	r12d,[((-899497514))+r12*1+rbp]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	r14d,1
+	xor	edx,DWORD[56+rsp]
+	mov	eax,r13d
+
+	mov	ecx,r12d
+	xor	edx,DWORD[rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	edx,DWORD[24+rsp]
+	lea	r11d,[((-899497514))+r11*1+r14]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	edx,1
+	xor	ebp,DWORD[60+rsp]
+	mov	eax,r12d
+
+	mov	ecx,r11d
+	xor	ebp,DWORD[4+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	ebp,DWORD[28+rsp]
+	lea	edi,[((-899497514))+rdi*1+rdx]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	ebp,1
+	mov	eax,r11d
+	mov	ecx,edi
+	xor	eax,r13d
+	lea	esi,[((-899497514))+rsi*1+rbp]
+	rol	ecx,5
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	add	esi,DWORD[r8]
+	add	edi,DWORD[4+r8]
+	add	r11d,DWORD[8+r8]
+	add	r12d,DWORD[12+r8]
+	add	r13d,DWORD[16+r8]
+	mov	DWORD[r8],esi
+	mov	DWORD[4+r8],edi
+	mov	DWORD[8+r8],r11d
+	mov	DWORD[12+r8],r12d
+	mov	DWORD[16+r8],r13d
+
+	sub	r10,1
+	lea	r9,[64+r9]
+	jnz	NEAR $L$loop
+
+	mov	rsi,QWORD[64+rsp]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha1_block_data_order:
+
+ALIGN	16
+sha1_block_data_order_ssse3:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha1_block_data_order_ssse3:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+_ssse3_shortcut:
+	mov	r11,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	lea	rsp,[((-160))+rsp]
+	movaps	XMMWORD[(-40-96)+r11],xmm6
+	movaps	XMMWORD[(-40-80)+r11],xmm7
+	movaps	XMMWORD[(-40-64)+r11],xmm8
+	movaps	XMMWORD[(-40-48)+r11],xmm9
+	movaps	XMMWORD[(-40-32)+r11],xmm10
+	movaps	XMMWORD[(-40-16)+r11],xmm11
+$L$prologue_ssse3:
+	and	rsp,-64
+	mov	r8,rdi
+	mov	r9,rsi
+	mov	r10,rdx
+
+	shl	r10,6
+	add	r10,r9
+	lea	r14,[((K_XX_XX+64))]
+
+	mov	eax,DWORD[r8]
+	mov	ebx,DWORD[4+r8]
+	mov	ecx,DWORD[8+r8]
+	mov	edx,DWORD[12+r8]
+	mov	esi,ebx
+	mov	ebp,DWORD[16+r8]
+	mov	edi,ecx
+	xor	edi,edx
+	and	esi,edi
+
+	movdqa	xmm6,XMMWORD[64+r14]
+	movdqa	xmm9,XMMWORD[((-64))+r14]
+	movdqu	xmm0,XMMWORD[r9]
+	movdqu	xmm1,XMMWORD[16+r9]
+	movdqu	xmm2,XMMWORD[32+r9]
+	movdqu	xmm3,XMMWORD[48+r9]
+DB	102,15,56,0,198
+DB	102,15,56,0,206
+DB	102,15,56,0,214
+	add	r9,64
+	paddd	xmm0,xmm9
+DB	102,15,56,0,222
+	paddd	xmm1,xmm9
+	paddd	xmm2,xmm9
+	movdqa	XMMWORD[rsp],xmm0
+	psubd	xmm0,xmm9
+	movdqa	XMMWORD[16+rsp],xmm1
+	psubd	xmm1,xmm9
+	movdqa	XMMWORD[32+rsp],xmm2
+	psubd	xmm2,xmm9
+	jmp	NEAR $L$oop_ssse3
+ALIGN	16
+$L$oop_ssse3:
+	ror	ebx,2
+	pshufd	xmm4,xmm0,238
+	xor	esi,edx
+	movdqa	xmm8,xmm3
+	paddd	xmm9,xmm3
+	mov	edi,eax
+	add	ebp,DWORD[rsp]
+	punpcklqdq	xmm4,xmm1
+	xor	ebx,ecx
+	rol	eax,5
+	add	ebp,esi
+	psrldq	xmm8,4
+	and	edi,ebx
+	xor	ebx,ecx
+	pxor	xmm4,xmm0
+	add	ebp,eax
+	ror	eax,7
+	pxor	xmm8,xmm2
+	xor	edi,ecx
+	mov	esi,ebp
+	add	edx,DWORD[4+rsp]
+	pxor	xmm4,xmm8
+	xor	eax,ebx
+	rol	ebp,5
+	movdqa	XMMWORD[48+rsp],xmm9
+	add	edx,edi
+	and	esi,eax
+	movdqa	xmm10,xmm4
+	xor	eax,ebx
+	add	edx,ebp
+	ror	ebp,7
+	movdqa	xmm8,xmm4
+	xor	esi,ebx
+	pslldq	xmm10,12
+	paddd	xmm4,xmm4
+	mov	edi,edx
+	add	ecx,DWORD[8+rsp]
+	psrld	xmm8,31
+	xor	ebp,eax
+	rol	edx,5
+	add	ecx,esi
+	movdqa	xmm9,xmm10
+	and	edi,ebp
+	xor	ebp,eax
+	psrld	xmm10,30
+	add	ecx,edx
+	ror	edx,7
+	por	xmm4,xmm8
+	xor	edi,eax
+	mov	esi,ecx
+	add	ebx,DWORD[12+rsp]
+	pslld	xmm9,2
+	pxor	xmm4,xmm10
+	xor	edx,ebp
+	movdqa	xmm10,XMMWORD[((-64))+r14]
+	rol	ecx,5
+	add	ebx,edi
+	and	esi,edx
+	pxor	xmm4,xmm9
+	xor	edx,ebp
+	add	ebx,ecx
+	ror	ecx,7
+	pshufd	xmm5,xmm1,238
+	xor	esi,ebp
+	movdqa	xmm9,xmm4
+	paddd	xmm10,xmm4
+	mov	edi,ebx
+	add	eax,DWORD[16+rsp]
+	punpcklqdq	xmm5,xmm2
+	xor	ecx,edx
+	rol	ebx,5
+	add	eax,esi
+	psrldq	xmm9,4
+	and	edi,ecx
+	xor	ecx,edx
+	pxor	xmm5,xmm1
+	add	eax,ebx
+	ror	ebx,7
+	pxor	xmm9,xmm3
+	xor	edi,edx
+	mov	esi,eax
+	add	ebp,DWORD[20+rsp]
+	pxor	xmm5,xmm9
+	xor	ebx,ecx
+	rol	eax,5
+	movdqa	XMMWORD[rsp],xmm10
+	add	ebp,edi
+	and	esi,ebx
+	movdqa	xmm8,xmm5
+	xor	ebx,ecx
+	add	ebp,eax
+	ror	eax,7
+	movdqa	xmm9,xmm5
+	xor	esi,ecx
+	pslldq	xmm8,12
+	paddd	xmm5,xmm5
+	mov	edi,ebp
+	add	edx,DWORD[24+rsp]
+	psrld	xmm9,31
+	xor	eax,ebx
+	rol	ebp,5
+	add	edx,esi
+	movdqa	xmm10,xmm8
+	and	edi,eax
+	xor	eax,ebx
+	psrld	xmm8,30
+	add	edx,ebp
+	ror	ebp,7
+	por	xmm5,xmm9
+	xor	edi,ebx
+	mov	esi,edx
+	add	ecx,DWORD[28+rsp]
+	pslld	xmm10,2
+	pxor	xmm5,xmm8
+	xor	ebp,eax
+	movdqa	xmm8,XMMWORD[((-32))+r14]
+	rol	edx,5
+	add	ecx,edi
+	and	esi,ebp
+	pxor	xmm5,xmm10
+	xor	ebp,eax
+	add	ecx,edx
+	ror	edx,7
+	pshufd	xmm6,xmm2,238
+	xor	esi,eax
+	movdqa	xmm10,xmm5
+	paddd	xmm8,xmm5
+	mov	edi,ecx
+	add	ebx,DWORD[32+rsp]
+	punpcklqdq	xmm6,xmm3
+	xor	edx,ebp
+	rol	ecx,5
+	add	ebx,esi
+	psrldq	xmm10,4
+	and	edi,edx
+	xor	edx,ebp
+	pxor	xmm6,xmm2
+	add	ebx,ecx
+	ror	ecx,7
+	pxor	xmm10,xmm4
+	xor	edi,ebp
+	mov	esi,ebx
+	add	eax,DWORD[36+rsp]
+	pxor	xmm6,xmm10
+	xor	ecx,edx
+	rol	ebx,5
+	movdqa	XMMWORD[16+rsp],xmm8
+	add	eax,edi
+	and	esi,ecx
+	movdqa	xmm9,xmm6
+	xor	ecx,edx
+	add	eax,ebx
+	ror	ebx,7
+	movdqa	xmm10,xmm6
+	xor	esi,edx
+	pslldq	xmm9,12
+	paddd	xmm6,xmm6
+	mov	edi,eax
+	add	ebp,DWORD[40+rsp]
+	psrld	xmm10,31
+	xor	ebx,ecx
+	rol	eax,5
+	add	ebp,esi
+	movdqa	xmm8,xmm9
+	and	edi,ebx
+	xor	ebx,ecx
+	psrld	xmm9,30
+	add	ebp,eax
+	ror	eax,7
+	por	xmm6,xmm10
+	xor	edi,ecx
+	mov	esi,ebp
+	add	edx,DWORD[44+rsp]
+	pslld	xmm8,2
+	pxor	xmm6,xmm9
+	xor	eax,ebx
+	movdqa	xmm9,XMMWORD[((-32))+r14]
+	rol	ebp,5
+	add	edx,edi
+	and	esi,eax
+	pxor	xmm6,xmm8
+	xor	eax,ebx
+	add	edx,ebp
+	ror	ebp,7
+	pshufd	xmm7,xmm3,238
+	xor	esi,ebx
+	movdqa	xmm8,xmm6
+	paddd	xmm9,xmm6
+	mov	edi,edx
+	add	ecx,DWORD[48+rsp]
+	punpcklqdq	xmm7,xmm4
+	xor	ebp,eax
+	rol	edx,5
+	add	ecx,esi
+	psrldq	xmm8,4
+	and	edi,ebp
+	xor	ebp,eax
+	pxor	xmm7,xmm3
+	add	ecx,edx
+	ror	edx,7
+	pxor	xmm8,xmm5
+	xor	edi,eax
+	mov	esi,ecx
+	add	ebx,DWORD[52+rsp]
+	pxor	xmm7,xmm8
+	xor	edx,ebp
+	rol	ecx,5
+	movdqa	XMMWORD[32+rsp],xmm9
+	add	ebx,edi
+	and	esi,edx
+	movdqa	xmm10,xmm7
+	xor	edx,ebp
+	add	ebx,ecx
+	ror	ecx,7
+	movdqa	xmm8,xmm7
+	xor	esi,ebp
+	pslldq	xmm10,12
+	paddd	xmm7,xmm7
+	mov	edi,ebx
+	add	eax,DWORD[56+rsp]
+	psrld	xmm8,31
+	xor	ecx,edx
+	rol	ebx,5
+	add	eax,esi
+	movdqa	xmm9,xmm10
+	and	edi,ecx
+	xor	ecx,edx
+	psrld	xmm10,30
+	add	eax,ebx
+	ror	ebx,7
+	por	xmm7,xmm8
+	xor	edi,edx
+	mov	esi,eax
+	add	ebp,DWORD[60+rsp]
+	pslld	xmm9,2
+	pxor	xmm7,xmm10
+	xor	ebx,ecx
+	movdqa	xmm10,XMMWORD[((-32))+r14]
+	rol	eax,5
+	add	ebp,edi
+	and	esi,ebx
+	pxor	xmm7,xmm9
+	pshufd	xmm9,xmm6,238
+	xor	ebx,ecx
+	add	ebp,eax
+	ror	eax,7
+	pxor	xmm0,xmm4
+	xor	esi,ecx
+	mov	edi,ebp
+	add	edx,DWORD[rsp]
+	punpcklqdq	xmm9,xmm7
+	xor	eax,ebx
+	rol	ebp,5
+	pxor	xmm0,xmm1
+	add	edx,esi
+	and	edi,eax
+	movdqa	xmm8,xmm10
+	xor	eax,ebx
+	paddd	xmm10,xmm7
+	add	edx,ebp
+	pxor	xmm0,xmm9
+	ror	ebp,7
+	xor	edi,ebx
+	mov	esi,edx
+	add	ecx,DWORD[4+rsp]
+	movdqa	xmm9,xmm0
+	xor	ebp,eax
+	rol	edx,5
+	movdqa	XMMWORD[48+rsp],xmm10
+	add	ecx,edi
+	and	esi,ebp
+	xor	ebp,eax
+	pslld	xmm0,2
+	add	ecx,edx
+	ror	edx,7
+	psrld	xmm9,30
+	xor	esi,eax
+	mov	edi,ecx
+	add	ebx,DWORD[8+rsp]
+	por	xmm0,xmm9
+	xor	edx,ebp
+	rol	ecx,5
+	pshufd	xmm10,xmm7,238
+	add	ebx,esi
+	and	edi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[12+rsp]
+	xor	edi,ebp
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,edx
+	ror	ecx,7
+	add	eax,ebx
+	pxor	xmm1,xmm5
+	add	ebp,DWORD[16+rsp]
+	xor	esi,ecx
+	punpcklqdq	xmm10,xmm0
+	mov	edi,eax
+	rol	eax,5
+	pxor	xmm1,xmm2
+	add	ebp,esi
+	xor	edi,ecx
+	movdqa	xmm9,xmm8
+	ror	ebx,7
+	paddd	xmm8,xmm0
+	add	ebp,eax
+	pxor	xmm1,xmm10
+	add	edx,DWORD[20+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	movdqa	xmm10,xmm1
+	add	edx,edi
+	xor	esi,ebx
+	movdqa	XMMWORD[rsp],xmm8
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[24+rsp]
+	pslld	xmm1,2
+	xor	esi,eax
+	mov	edi,edx
+	psrld	xmm10,30
+	rol	edx,5
+	add	ecx,esi
+	xor	edi,eax
+	ror	ebp,7
+	por	xmm1,xmm10
+	add	ecx,edx
+	add	ebx,DWORD[28+rsp]
+	pshufd	xmm8,xmm0,238
+	xor	edi,ebp
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	pxor	xmm2,xmm6
+	add	eax,DWORD[32+rsp]
+	xor	esi,edx
+	punpcklqdq	xmm8,xmm1
+	mov	edi,ebx
+	rol	ebx,5
+	pxor	xmm2,xmm3
+	add	eax,esi
+	xor	edi,edx
+	movdqa	xmm10,XMMWORD[r14]
+	ror	ecx,7
+	paddd	xmm9,xmm1
+	add	eax,ebx
+	pxor	xmm2,xmm8
+	add	ebp,DWORD[36+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	rol	eax,5
+	movdqa	xmm8,xmm2
+	add	ebp,edi
+	xor	esi,ecx
+	movdqa	XMMWORD[16+rsp],xmm9
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[40+rsp]
+	pslld	xmm2,2
+	xor	esi,ebx
+	mov	edi,ebp
+	psrld	xmm8,30
+	rol	ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	ror	eax,7
+	por	xmm2,xmm8
+	add	edx,ebp
+	add	ecx,DWORD[44+rsp]
+	pshufd	xmm9,xmm1,238
+	xor	edi,eax
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,eax
+	ror	ebp,7
+	add	ecx,edx
+	pxor	xmm3,xmm7
+	add	ebx,DWORD[48+rsp]
+	xor	esi,ebp
+	punpcklqdq	xmm9,xmm2
+	mov	edi,ecx
+	rol	ecx,5
+	pxor	xmm3,xmm4
+	add	ebx,esi
+	xor	edi,ebp
+	movdqa	xmm8,xmm10
+	ror	edx,7
+	paddd	xmm10,xmm2
+	add	ebx,ecx
+	pxor	xmm3,xmm9
+	add	eax,DWORD[52+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	rol	ebx,5
+	movdqa	xmm9,xmm3
+	add	eax,edi
+	xor	esi,edx
+	movdqa	XMMWORD[32+rsp],xmm10
+	ror	ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[56+rsp]
+	pslld	xmm3,2
+	xor	esi,ecx
+	mov	edi,eax
+	psrld	xmm9,30
+	rol	eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	ror	ebx,7
+	por	xmm3,xmm9
+	add	ebp,eax
+	add	edx,DWORD[60+rsp]
+	pshufd	xmm10,xmm2,238
+	xor	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	ror	eax,7
+	add	edx,ebp
+	pxor	xmm4,xmm0
+	add	ecx,DWORD[rsp]
+	xor	esi,eax
+	punpcklqdq	xmm10,xmm3
+	mov	edi,edx
+	rol	edx,5
+	pxor	xmm4,xmm5
+	add	ecx,esi
+	xor	edi,eax
+	movdqa	xmm9,xmm8
+	ror	ebp,7
+	paddd	xmm8,xmm3
+	add	ecx,edx
+	pxor	xmm4,xmm10
+	add	ebx,DWORD[4+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	rol	ecx,5
+	movdqa	xmm10,xmm4
+	add	ebx,edi
+	xor	esi,ebp
+	movdqa	XMMWORD[48+rsp],xmm8
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[8+rsp]
+	pslld	xmm4,2
+	xor	esi,edx
+	mov	edi,ebx
+	psrld	xmm10,30
+	rol	ebx,5
+	add	eax,esi
+	xor	edi,edx
+	ror	ecx,7
+	por	xmm4,xmm10
+	add	eax,ebx
+	add	ebp,DWORD[12+rsp]
+	pshufd	xmm8,xmm3,238
+	xor	edi,ecx
+	mov	esi,eax
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	pxor	xmm5,xmm1
+	add	edx,DWORD[16+rsp]
+	xor	esi,ebx
+	punpcklqdq	xmm8,xmm4
+	mov	edi,ebp
+	rol	ebp,5
+	pxor	xmm5,xmm6
+	add	edx,esi
+	xor	edi,ebx
+	movdqa	xmm10,xmm9
+	ror	eax,7
+	paddd	xmm9,xmm4
+	add	edx,ebp
+	pxor	xmm5,xmm8
+	add	ecx,DWORD[20+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	rol	edx,5
+	movdqa	xmm8,xmm5
+	add	ecx,edi
+	xor	esi,eax
+	movdqa	XMMWORD[rsp],xmm9
+	ror	ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[24+rsp]
+	pslld	xmm5,2
+	xor	esi,ebp
+	mov	edi,ecx
+	psrld	xmm8,30
+	rol	ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	ror	edx,7
+	por	xmm5,xmm8
+	add	ebx,ecx
+	add	eax,DWORD[28+rsp]
+	pshufd	xmm9,xmm4,238
+	ror	ecx,7
+	mov	esi,ebx
+	xor	edi,edx
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	pxor	xmm6,xmm2
+	add	ebp,DWORD[32+rsp]
+	and	esi,ecx
+	xor	ecx,edx
+	ror	ebx,7
+	punpcklqdq	xmm9,xmm5
+	mov	edi,eax
+	xor	esi,ecx
+	pxor	xmm6,xmm7
+	rol	eax,5
+	add	ebp,esi
+	movdqa	xmm8,xmm10
+	xor	edi,ebx
+	paddd	xmm10,xmm5
+	xor	ebx,ecx
+	pxor	xmm6,xmm9
+	add	ebp,eax
+	add	edx,DWORD[36+rsp]
+	and	edi,ebx
+	xor	ebx,ecx
+	ror	eax,7
+	movdqa	xmm9,xmm6
+	mov	esi,ebp
+	xor	edi,ebx
+	movdqa	XMMWORD[16+rsp],xmm10
+	rol	ebp,5
+	add	edx,edi
+	xor	esi,eax
+	pslld	xmm6,2
+	xor	eax,ebx
+	add	edx,ebp
+	psrld	xmm9,30
+	add	ecx,DWORD[40+rsp]
+	and	esi,eax
+	xor	eax,ebx
+	por	xmm6,xmm9
+	ror	ebp,7
+	mov	edi,edx
+	xor	esi,eax
+	rol	edx,5
+	pshufd	xmm10,xmm5,238
+	add	ecx,esi
+	xor	edi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	add	ebx,DWORD[44+rsp]
+	and	edi,ebp
+	xor	ebp,eax
+	ror	edx,7
+	mov	esi,ecx
+	xor	edi,ebp
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	pxor	xmm7,xmm3
+	add	eax,DWORD[48+rsp]
+	and	esi,edx
+	xor	edx,ebp
+	ror	ecx,7
+	punpcklqdq	xmm10,xmm6
+	mov	edi,ebx
+	xor	esi,edx
+	pxor	xmm7,xmm0
+	rol	ebx,5
+	add	eax,esi
+	movdqa	xmm9,XMMWORD[32+r14]
+	xor	edi,ecx
+	paddd	xmm8,xmm6
+	xor	ecx,edx
+	pxor	xmm7,xmm10
+	add	eax,ebx
+	add	ebp,DWORD[52+rsp]
+	and	edi,ecx
+	xor	ecx,edx
+	ror	ebx,7
+	movdqa	xmm10,xmm7
+	mov	esi,eax
+	xor	edi,ecx
+	movdqa	XMMWORD[32+rsp],xmm8
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ebx
+	pslld	xmm7,2
+	xor	ebx,ecx
+	add	ebp,eax
+	psrld	xmm10,30
+	add	edx,DWORD[56+rsp]
+	and	esi,ebx
+	xor	ebx,ecx
+	por	xmm7,xmm10
+	ror	eax,7
+	mov	edi,ebp
+	xor	esi,ebx
+	rol	ebp,5
+	pshufd	xmm8,xmm6,238
+	add	edx,esi
+	xor	edi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	add	ecx,DWORD[60+rsp]
+	and	edi,eax
+	xor	eax,ebx
+	ror	ebp,7
+	mov	esi,edx
+	xor	edi,eax
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	pxor	xmm0,xmm4
+	add	ebx,DWORD[rsp]
+	and	esi,ebp
+	xor	ebp,eax
+	ror	edx,7
+	punpcklqdq	xmm8,xmm7
+	mov	edi,ecx
+	xor	esi,ebp
+	pxor	xmm0,xmm1
+	rol	ecx,5
+	add	ebx,esi
+	movdqa	xmm10,xmm9
+	xor	edi,edx
+	paddd	xmm9,xmm7
+	xor	edx,ebp
+	pxor	xmm0,xmm8
+	add	ebx,ecx
+	add	eax,DWORD[4+rsp]
+	and	edi,edx
+	xor	edx,ebp
+	ror	ecx,7
+	movdqa	xmm8,xmm0
+	mov	esi,ebx
+	xor	edi,edx
+	movdqa	XMMWORD[48+rsp],xmm9
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,ecx
+	pslld	xmm0,2
+	xor	ecx,edx
+	add	eax,ebx
+	psrld	xmm8,30
+	add	ebp,DWORD[8+rsp]
+	and	esi,ecx
+	xor	ecx,edx
+	por	xmm0,xmm8
+	ror	ebx,7
+	mov	edi,eax
+	xor	esi,ecx
+	rol	eax,5
+	pshufd	xmm9,xmm7,238
+	add	ebp,esi
+	xor	edi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	add	edx,DWORD[12+rsp]
+	and	edi,ebx
+	xor	ebx,ecx
+	ror	eax,7
+	mov	esi,ebp
+	xor	edi,ebx
+	rol	ebp,5
+	add	edx,edi
+	xor	esi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	pxor	xmm1,xmm5
+	add	ecx,DWORD[16+rsp]
+	and	esi,eax
+	xor	eax,ebx
+	ror	ebp,7
+	punpcklqdq	xmm9,xmm0
+	mov	edi,edx
+	xor	esi,eax
+	pxor	xmm1,xmm2
+	rol	edx,5
+	add	ecx,esi
+	movdqa	xmm8,xmm10
+	xor	edi,ebp
+	paddd	xmm10,xmm0
+	xor	ebp,eax
+	pxor	xmm1,xmm9
+	add	ecx,edx
+	add	ebx,DWORD[20+rsp]
+	and	edi,ebp
+	xor	ebp,eax
+	ror	edx,7
+	movdqa	xmm9,xmm1
+	mov	esi,ecx
+	xor	edi,ebp
+	movdqa	XMMWORD[rsp],xmm10
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,edx
+	pslld	xmm1,2
+	xor	edx,ebp
+	add	ebx,ecx
+	psrld	xmm9,30
+	add	eax,DWORD[24+rsp]
+	and	esi,edx
+	xor	edx,ebp
+	por	xmm1,xmm9
+	ror	ecx,7
+	mov	edi,ebx
+	xor	esi,edx
+	rol	ebx,5
+	pshufd	xmm10,xmm0,238
+	add	eax,esi
+	xor	edi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	ebp,DWORD[28+rsp]
+	and	edi,ecx
+	xor	ecx,edx
+	ror	ebx,7
+	mov	esi,eax
+	xor	edi,ecx
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	pxor	xmm2,xmm6
+	add	edx,DWORD[32+rsp]
+	and	esi,ebx
+	xor	ebx,ecx
+	ror	eax,7
+	punpcklqdq	xmm10,xmm1
+	mov	edi,ebp
+	xor	esi,ebx
+	pxor	xmm2,xmm3
+	rol	ebp,5
+	add	edx,esi
+	movdqa	xmm9,xmm8
+	xor	edi,eax
+	paddd	xmm8,xmm1
+	xor	eax,ebx
+	pxor	xmm2,xmm10
+	add	edx,ebp
+	add	ecx,DWORD[36+rsp]
+	and	edi,eax
+	xor	eax,ebx
+	ror	ebp,7
+	movdqa	xmm10,xmm2
+	mov	esi,edx
+	xor	edi,eax
+	movdqa	XMMWORD[16+rsp],xmm8
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,ebp
+	pslld	xmm2,2
+	xor	ebp,eax
+	add	ecx,edx
+	psrld	xmm10,30
+	add	ebx,DWORD[40+rsp]
+	and	esi,ebp
+	xor	ebp,eax
+	por	xmm2,xmm10
+	ror	edx,7
+	mov	edi,ecx
+	xor	esi,ebp
+	rol	ecx,5
+	pshufd	xmm8,xmm1,238
+	add	ebx,esi
+	xor	edi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[44+rsp]
+	and	edi,edx
+	xor	edx,ebp
+	ror	ecx,7
+	mov	esi,ebx
+	xor	edi,edx
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,edx
+	add	eax,ebx
+	pxor	xmm3,xmm7
+	add	ebp,DWORD[48+rsp]
+	xor	esi,ecx
+	punpcklqdq	xmm8,xmm2
+	mov	edi,eax
+	rol	eax,5
+	pxor	xmm3,xmm4
+	add	ebp,esi
+	xor	edi,ecx
+	movdqa	xmm10,xmm9
+	ror	ebx,7
+	paddd	xmm9,xmm2
+	add	ebp,eax
+	pxor	xmm3,xmm8
+	add	edx,DWORD[52+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	movdqa	xmm8,xmm3
+	add	edx,edi
+	xor	esi,ebx
+	movdqa	XMMWORD[32+rsp],xmm9
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[56+rsp]
+	pslld	xmm3,2
+	xor	esi,eax
+	mov	edi,edx
+	psrld	xmm8,30
+	rol	edx,5
+	add	ecx,esi
+	xor	edi,eax
+	ror	ebp,7
+	por	xmm3,xmm8
+	add	ecx,edx
+	add	ebx,DWORD[60+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	rol	ebx,5
+	paddd	xmm10,xmm3
+	add	eax,esi
+	xor	edi,edx
+	movdqa	XMMWORD[48+rsp],xmm10
+	ror	ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[4+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[8+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	rol	ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[12+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,eax
+	ror	ebp,7
+	add	ecx,edx
+	cmp	r9,r10
+	je	NEAR $L$done_ssse3
+	movdqa	xmm6,XMMWORD[64+r14]
+	movdqa	xmm9,XMMWORD[((-64))+r14]
+	movdqu	xmm0,XMMWORD[r9]
+	movdqu	xmm1,XMMWORD[16+r9]
+	movdqu	xmm2,XMMWORD[32+r9]
+	movdqu	xmm3,XMMWORD[48+r9]
+DB	102,15,56,0,198
+	add	r9,64
+	add	ebx,DWORD[16+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+DB	102,15,56,0,206
+	rol	ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	ror	edx,7
+	paddd	xmm0,xmm9
+	add	ebx,ecx
+	add	eax,DWORD[20+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	movdqa	XMMWORD[rsp],xmm0
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,edx
+	ror	ecx,7
+	psubd	xmm0,xmm9
+	add	eax,ebx
+	add	ebp,DWORD[24+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	rol	eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[28+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[32+rsp]
+	xor	esi,eax
+	mov	edi,edx
+DB	102,15,56,0,214
+	rol	edx,5
+	add	ecx,esi
+	xor	edi,eax
+	ror	ebp,7
+	paddd	xmm1,xmm9
+	add	ecx,edx
+	add	ebx,DWORD[36+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	movdqa	XMMWORD[16+rsp],xmm1
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	ror	edx,7
+	psubd	xmm1,xmm9
+	add	ebx,ecx
+	add	eax,DWORD[40+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	rol	ebx,5
+	add	eax,esi
+	xor	edi,edx
+	ror	ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[44+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[48+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+DB	102,15,56,0,222
+	rol	ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	ror	eax,7
+	paddd	xmm2,xmm9
+	add	edx,ebp
+	add	ecx,DWORD[52+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	movdqa	XMMWORD[32+rsp],xmm2
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,eax
+	ror	ebp,7
+	psubd	xmm2,xmm9
+	add	ecx,edx
+	add	ebx,DWORD[56+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[60+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,edi
+	ror	ecx,7
+	add	eax,ebx
+	add	eax,DWORD[r8]
+	add	esi,DWORD[4+r8]
+	add	ecx,DWORD[8+r8]
+	add	edx,DWORD[12+r8]
+	mov	DWORD[r8],eax
+	add	ebp,DWORD[16+r8]
+	mov	DWORD[4+r8],esi
+	mov	ebx,esi
+	mov	DWORD[8+r8],ecx
+	mov	edi,ecx
+	mov	DWORD[12+r8],edx
+	xor	edi,edx
+	mov	DWORD[16+r8],ebp
+	and	esi,edi
+	jmp	NEAR $L$oop_ssse3
+
+ALIGN	16
+$L$done_ssse3:
+	add	ebx,DWORD[16+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[20+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,edx
+	ror	ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[24+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	rol	eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[28+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[32+rsp]
+	xor	esi,eax
+	mov	edi,edx
+	rol	edx,5
+	add	ecx,esi
+	xor	edi,eax
+	ror	ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[36+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[40+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	rol	ebx,5
+	add	eax,esi
+	xor	edi,edx
+	ror	ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[44+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[48+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	rol	ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[52+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,eax
+	ror	ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[56+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[60+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,edi
+	ror	ecx,7
+	add	eax,ebx
+	add	eax,DWORD[r8]
+	add	esi,DWORD[4+r8]
+	add	ecx,DWORD[8+r8]
+	mov	DWORD[r8],eax
+	add	edx,DWORD[12+r8]
+	mov	DWORD[4+r8],esi
+	add	ebp,DWORD[16+r8]
+	mov	DWORD[8+r8],ecx
+	mov	DWORD[12+r8],edx
+	mov	DWORD[16+r8],ebp
+	movaps	xmm6,XMMWORD[((-40-96))+r11]
+	movaps	xmm7,XMMWORD[((-40-80))+r11]
+	movaps	xmm8,XMMWORD[((-40-64))+r11]
+	movaps	xmm9,XMMWORD[((-40-48))+r11]
+	movaps	xmm10,XMMWORD[((-40-32))+r11]
+	movaps	xmm11,XMMWORD[((-40-16))+r11]
+	mov	r14,QWORD[((-40))+r11]
+	mov	r13,QWORD[((-32))+r11]
+	mov	r12,QWORD[((-24))+r11]
+	mov	rbp,QWORD[((-16))+r11]
+	mov	rbx,QWORD[((-8))+r11]
+	lea	rsp,[r11]
+$L$epilogue_ssse3:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha1_block_data_order_ssse3:
+
+ALIGN	16
+sha1_block_data_order_avx:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha1_block_data_order_avx:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+_avx_shortcut:
+	mov	r11,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	lea	rsp,[((-160))+rsp]
+	vzeroupper
+	vmovaps	XMMWORD[(-40-96)+r11],xmm6
+	vmovaps	XMMWORD[(-40-80)+r11],xmm7
+	vmovaps	XMMWORD[(-40-64)+r11],xmm8
+	vmovaps	XMMWORD[(-40-48)+r11],xmm9
+	vmovaps	XMMWORD[(-40-32)+r11],xmm10
+	vmovaps	XMMWORD[(-40-16)+r11],xmm11
+$L$prologue_avx:
+	and	rsp,-64
+	mov	r8,rdi
+	mov	r9,rsi
+	mov	r10,rdx
+
+	shl	r10,6
+	add	r10,r9
+	lea	r14,[((K_XX_XX+64))]
+
+	mov	eax,DWORD[r8]
+	mov	ebx,DWORD[4+r8]
+	mov	ecx,DWORD[8+r8]
+	mov	edx,DWORD[12+r8]
+	mov	esi,ebx
+	mov	ebp,DWORD[16+r8]
+	mov	edi,ecx
+	xor	edi,edx
+	and	esi,edi
+
+	vmovdqa	xmm6,XMMWORD[64+r14]
+	vmovdqa	xmm11,XMMWORD[((-64))+r14]
+	vmovdqu	xmm0,XMMWORD[r9]
+	vmovdqu	xmm1,XMMWORD[16+r9]
+	vmovdqu	xmm2,XMMWORD[32+r9]
+	vmovdqu	xmm3,XMMWORD[48+r9]
+	vpshufb	xmm0,xmm0,xmm6
+	add	r9,64
+	vpshufb	xmm1,xmm1,xmm6
+	vpshufb	xmm2,xmm2,xmm6
+	vpshufb	xmm3,xmm3,xmm6
+	vpaddd	xmm4,xmm0,xmm11
+	vpaddd	xmm5,xmm1,xmm11
+	vpaddd	xmm6,xmm2,xmm11
+	vmovdqa	XMMWORD[rsp],xmm4
+	vmovdqa	XMMWORD[16+rsp],xmm5
+	vmovdqa	XMMWORD[32+rsp],xmm6
+	jmp	NEAR $L$oop_avx
+ALIGN	16
+$L$oop_avx:
+	shrd	ebx,ebx,2
+	xor	esi,edx
+	vpalignr	xmm4,xmm1,xmm0,8
+	mov	edi,eax
+	add	ebp,DWORD[rsp]
+	vpaddd	xmm9,xmm11,xmm3
+	xor	ebx,ecx
+	shld	eax,eax,5
+	vpsrldq	xmm8,xmm3,4
+	add	ebp,esi
+	and	edi,ebx
+	vpxor	xmm4,xmm4,xmm0
+	xor	ebx,ecx
+	add	ebp,eax
+	vpxor	xmm8,xmm8,xmm2
+	shrd	eax,eax,7
+	xor	edi,ecx
+	mov	esi,ebp
+	add	edx,DWORD[4+rsp]
+	vpxor	xmm4,xmm4,xmm8
+	xor	eax,ebx
+	shld	ebp,ebp,5
+	vmovdqa	XMMWORD[48+rsp],xmm9
+	add	edx,edi
+	and	esi,eax
+	vpsrld	xmm8,xmm4,31
+	xor	eax,ebx
+	add	edx,ebp
+	shrd	ebp,ebp,7
+	xor	esi,ebx
+	vpslldq	xmm10,xmm4,12
+	vpaddd	xmm4,xmm4,xmm4
+	mov	edi,edx
+	add	ecx,DWORD[8+rsp]
+	xor	ebp,eax
+	shld	edx,edx,5
+	vpsrld	xmm9,xmm10,30
+	vpor	xmm4,xmm4,xmm8
+	add	ecx,esi
+	and	edi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	vpslld	xmm10,xmm10,2
+	vpxor	xmm4,xmm4,xmm9
+	shrd	edx,edx,7
+	xor	edi,eax
+	mov	esi,ecx
+	add	ebx,DWORD[12+rsp]
+	vpxor	xmm4,xmm4,xmm10
+	xor	edx,ebp
+	shld	ecx,ecx,5
+	add	ebx,edi
+	and	esi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	shrd	ecx,ecx,7
+	xor	esi,ebp
+	vpalignr	xmm5,xmm2,xmm1,8
+	mov	edi,ebx
+	add	eax,DWORD[16+rsp]
+	vpaddd	xmm9,xmm11,xmm4
+	xor	ecx,edx
+	shld	ebx,ebx,5
+	vpsrldq	xmm8,xmm4,4
+	add	eax,esi
+	and	edi,ecx
+	vpxor	xmm5,xmm5,xmm1
+	xor	ecx,edx
+	add	eax,ebx
+	vpxor	xmm8,xmm8,xmm3
+	shrd	ebx,ebx,7
+	xor	edi,edx
+	mov	esi,eax
+	add	ebp,DWORD[20+rsp]
+	vpxor	xmm5,xmm5,xmm8
+	xor	ebx,ecx
+	shld	eax,eax,5
+	vmovdqa	XMMWORD[rsp],xmm9
+	add	ebp,edi
+	and	esi,ebx
+	vpsrld	xmm8,xmm5,31
+	xor	ebx,ecx
+	add	ebp,eax
+	shrd	eax,eax,7
+	xor	esi,ecx
+	vpslldq	xmm10,xmm5,12
+	vpaddd	xmm5,xmm5,xmm5
+	mov	edi,ebp
+	add	edx,DWORD[24+rsp]
+	xor	eax,ebx
+	shld	ebp,ebp,5
+	vpsrld	xmm9,xmm10,30
+	vpor	xmm5,xmm5,xmm8
+	add	edx,esi
+	and	edi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	vpslld	xmm10,xmm10,2
+	vpxor	xmm5,xmm5,xmm9
+	shrd	ebp,ebp,7
+	xor	edi,ebx
+	mov	esi,edx
+	add	ecx,DWORD[28+rsp]
+	vpxor	xmm5,xmm5,xmm10
+	xor	ebp,eax
+	shld	edx,edx,5
+	vmovdqa	xmm11,XMMWORD[((-32))+r14]
+	add	ecx,edi
+	and	esi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	shrd	edx,edx,7
+	xor	esi,eax
+	vpalignr	xmm6,xmm3,xmm2,8
+	mov	edi,ecx
+	add	ebx,DWORD[32+rsp]
+	vpaddd	xmm9,xmm11,xmm5
+	xor	edx,ebp
+	shld	ecx,ecx,5
+	vpsrldq	xmm8,xmm5,4
+	add	ebx,esi
+	and	edi,edx
+	vpxor	xmm6,xmm6,xmm2
+	xor	edx,ebp
+	add	ebx,ecx
+	vpxor	xmm8,xmm8,xmm4
+	shrd	ecx,ecx,7
+	xor	edi,ebp
+	mov	esi,ebx
+	add	eax,DWORD[36+rsp]
+	vpxor	xmm6,xmm6,xmm8
+	xor	ecx,edx
+	shld	ebx,ebx,5
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	add	eax,edi
+	and	esi,ecx
+	vpsrld	xmm8,xmm6,31
+	xor	ecx,edx
+	add	eax,ebx
+	shrd	ebx,ebx,7
+	xor	esi,edx
+	vpslldq	xmm10,xmm6,12
+	vpaddd	xmm6,xmm6,xmm6
+	mov	edi,eax
+	add	ebp,DWORD[40+rsp]
+	xor	ebx,ecx
+	shld	eax,eax,5
+	vpsrld	xmm9,xmm10,30
+	vpor	xmm6,xmm6,xmm8
+	add	ebp,esi
+	and	edi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	vpslld	xmm10,xmm10,2
+	vpxor	xmm6,xmm6,xmm9
+	shrd	eax,eax,7
+	xor	edi,ecx
+	mov	esi,ebp
+	add	edx,DWORD[44+rsp]
+	vpxor	xmm6,xmm6,xmm10
+	xor	eax,ebx
+	shld	ebp,ebp,5
+	add	edx,edi
+	and	esi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	shrd	ebp,ebp,7
+	xor	esi,ebx
+	vpalignr	xmm7,xmm4,xmm3,8
+	mov	edi,edx
+	add	ecx,DWORD[48+rsp]
+	vpaddd	xmm9,xmm11,xmm6
+	xor	ebp,eax
+	shld	edx,edx,5
+	vpsrldq	xmm8,xmm6,4
+	add	ecx,esi
+	and	edi,ebp
+	vpxor	xmm7,xmm7,xmm3
+	xor	ebp,eax
+	add	ecx,edx
+	vpxor	xmm8,xmm8,xmm5
+	shrd	edx,edx,7
+	xor	edi,eax
+	mov	esi,ecx
+	add	ebx,DWORD[52+rsp]
+	vpxor	xmm7,xmm7,xmm8
+	xor	edx,ebp
+	shld	ecx,ecx,5
+	vmovdqa	XMMWORD[32+rsp],xmm9
+	add	ebx,edi
+	and	esi,edx
+	vpsrld	xmm8,xmm7,31
+	xor	edx,ebp
+	add	ebx,ecx
+	shrd	ecx,ecx,7
+	xor	esi,ebp
+	vpslldq	xmm10,xmm7,12
+	vpaddd	xmm7,xmm7,xmm7
+	mov	edi,ebx
+	add	eax,DWORD[56+rsp]
+	xor	ecx,edx
+	shld	ebx,ebx,5
+	vpsrld	xmm9,xmm10,30
+	vpor	xmm7,xmm7,xmm8
+	add	eax,esi
+	and	edi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	vpslld	xmm10,xmm10,2
+	vpxor	xmm7,xmm7,xmm9
+	shrd	ebx,ebx,7
+	xor	edi,edx
+	mov	esi,eax
+	add	ebp,DWORD[60+rsp]
+	vpxor	xmm7,xmm7,xmm10
+	xor	ebx,ecx
+	shld	eax,eax,5
+	add	ebp,edi
+	and	esi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	vpalignr	xmm8,xmm7,xmm6,8
+	vpxor	xmm0,xmm0,xmm4
+	shrd	eax,eax,7
+	xor	esi,ecx
+	mov	edi,ebp
+	add	edx,DWORD[rsp]
+	vpxor	xmm0,xmm0,xmm1
+	xor	eax,ebx
+	shld	ebp,ebp,5
+	vpaddd	xmm9,xmm11,xmm7
+	add	edx,esi
+	and	edi,eax
+	vpxor	xmm0,xmm0,xmm8
+	xor	eax,ebx
+	add	edx,ebp
+	shrd	ebp,ebp,7
+	xor	edi,ebx
+	vpsrld	xmm8,xmm0,30
+	vmovdqa	XMMWORD[48+rsp],xmm9
+	mov	esi,edx
+	add	ecx,DWORD[4+rsp]
+	xor	ebp,eax
+	shld	edx,edx,5
+	vpslld	xmm0,xmm0,2
+	add	ecx,edi
+	and	esi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	shrd	edx,edx,7
+	xor	esi,eax
+	mov	edi,ecx
+	add	ebx,DWORD[8+rsp]
+	vpor	xmm0,xmm0,xmm8
+	xor	edx,ebp
+	shld	ecx,ecx,5
+	add	ebx,esi
+	and	edi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[12+rsp]
+	xor	edi,ebp
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpalignr	xmm8,xmm0,xmm7,8
+	vpxor	xmm1,xmm1,xmm5
+	add	ebp,DWORD[16+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	shld	eax,eax,5
+	vpxor	xmm1,xmm1,xmm2
+	add	ebp,esi
+	xor	edi,ecx
+	vpaddd	xmm9,xmm11,xmm0
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	vpxor	xmm1,xmm1,xmm8
+	add	edx,DWORD[20+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	shld	ebp,ebp,5
+	vpsrld	xmm8,xmm1,30
+	vmovdqa	XMMWORD[rsp],xmm9
+	add	edx,edi
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	vpslld	xmm1,xmm1,2
+	add	ecx,DWORD[24+rsp]
+	xor	esi,eax
+	mov	edi,edx
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	edi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vpor	xmm1,xmm1,xmm8
+	add	ebx,DWORD[28+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpalignr	xmm8,xmm1,xmm0,8
+	vpxor	xmm2,xmm2,xmm6
+	add	eax,DWORD[32+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	shld	ebx,ebx,5
+	vpxor	xmm2,xmm2,xmm3
+	add	eax,esi
+	xor	edi,edx
+	vpaddd	xmm9,xmm11,xmm1
+	vmovdqa	xmm11,XMMWORD[r14]
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpxor	xmm2,xmm2,xmm8
+	add	ebp,DWORD[36+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	vpsrld	xmm8,xmm2,30
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	add	ebp,edi
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	vpslld	xmm2,xmm2,2
+	add	edx,DWORD[40+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	shld	ebp,ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	vpor	xmm2,xmm2,xmm8
+	add	ecx,DWORD[44+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vpalignr	xmm8,xmm2,xmm1,8
+	vpxor	xmm3,xmm3,xmm7
+	add	ebx,DWORD[48+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	vpxor	xmm3,xmm3,xmm4
+	add	ebx,esi
+	xor	edi,ebp
+	vpaddd	xmm9,xmm11,xmm2
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpxor	xmm3,xmm3,xmm8
+	add	eax,DWORD[52+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	vpsrld	xmm8,xmm3,30
+	vmovdqa	XMMWORD[32+rsp],xmm9
+	add	eax,edi
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpslld	xmm3,xmm3,2
+	add	ebp,DWORD[56+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	shld	eax,eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	vpor	xmm3,xmm3,xmm8
+	add	edx,DWORD[60+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	shld	ebp,ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	vpalignr	xmm8,xmm3,xmm2,8
+	vpxor	xmm4,xmm4,xmm0
+	add	ecx,DWORD[rsp]
+	xor	esi,eax
+	mov	edi,edx
+	shld	edx,edx,5
+	vpxor	xmm4,xmm4,xmm5
+	add	ecx,esi
+	xor	edi,eax
+	vpaddd	xmm9,xmm11,xmm3
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vpxor	xmm4,xmm4,xmm8
+	add	ebx,DWORD[4+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	vpsrld	xmm8,xmm4,30
+	vmovdqa	XMMWORD[48+rsp],xmm9
+	add	ebx,edi
+	xor	esi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpslld	xmm4,xmm4,2
+	add	eax,DWORD[8+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	edi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpor	xmm4,xmm4,xmm8
+	add	ebp,DWORD[12+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	vpalignr	xmm8,xmm4,xmm3,8
+	vpxor	xmm5,xmm5,xmm1
+	add	edx,DWORD[16+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	shld	ebp,ebp,5
+	vpxor	xmm5,xmm5,xmm6
+	add	edx,esi
+	xor	edi,ebx
+	vpaddd	xmm9,xmm11,xmm4
+	shrd	eax,eax,7
+	add	edx,ebp
+	vpxor	xmm5,xmm5,xmm8
+	add	ecx,DWORD[20+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	vpsrld	xmm8,xmm5,30
+	vmovdqa	XMMWORD[rsp],xmm9
+	add	ecx,edi
+	xor	esi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vpslld	xmm5,xmm5,2
+	add	ebx,DWORD[24+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpor	xmm5,xmm5,xmm8
+	add	eax,DWORD[28+rsp]
+	shrd	ecx,ecx,7
+	mov	esi,ebx
+	xor	edi,edx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	vpalignr	xmm8,xmm5,xmm4,8
+	vpxor	xmm6,xmm6,xmm2
+	add	ebp,DWORD[32+rsp]
+	and	esi,ecx
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	vpxor	xmm6,xmm6,xmm7
+	mov	edi,eax
+	xor	esi,ecx
+	vpaddd	xmm9,xmm11,xmm5
+	shld	eax,eax,5
+	add	ebp,esi
+	vpxor	xmm6,xmm6,xmm8
+	xor	edi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	add	edx,DWORD[36+rsp]
+	vpsrld	xmm8,xmm6,30
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	and	edi,ebx
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	mov	esi,ebp
+	vpslld	xmm6,xmm6,2
+	xor	edi,ebx
+	shld	ebp,ebp,5
+	add	edx,edi
+	xor	esi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	add	ecx,DWORD[40+rsp]
+	and	esi,eax
+	vpor	xmm6,xmm6,xmm8
+	xor	eax,ebx
+	shrd	ebp,ebp,7
+	mov	edi,edx
+	xor	esi,eax
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	edi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	add	ebx,DWORD[44+rsp]
+	and	edi,ebp
+	xor	ebp,eax
+	shrd	edx,edx,7
+	mov	esi,ecx
+	xor	edi,ebp
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	vpalignr	xmm8,xmm6,xmm5,8
+	vpxor	xmm7,xmm7,xmm3
+	add	eax,DWORD[48+rsp]
+	and	esi,edx
+	xor	edx,ebp
+	shrd	ecx,ecx,7
+	vpxor	xmm7,xmm7,xmm0
+	mov	edi,ebx
+	xor	esi,edx
+	vpaddd	xmm9,xmm11,xmm6
+	vmovdqa	xmm11,XMMWORD[32+r14]
+	shld	ebx,ebx,5
+	add	eax,esi
+	vpxor	xmm7,xmm7,xmm8
+	xor	edi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	ebp,DWORD[52+rsp]
+	vpsrld	xmm8,xmm7,30
+	vmovdqa	XMMWORD[32+rsp],xmm9
+	and	edi,ecx
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	mov	esi,eax
+	vpslld	xmm7,xmm7,2
+	xor	edi,ecx
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	add	edx,DWORD[56+rsp]
+	and	esi,ebx
+	vpor	xmm7,xmm7,xmm8
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	mov	edi,ebp
+	xor	esi,ebx
+	shld	ebp,ebp,5
+	add	edx,esi
+	xor	edi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	add	ecx,DWORD[60+rsp]
+	and	edi,eax
+	xor	eax,ebx
+	shrd	ebp,ebp,7
+	mov	esi,edx
+	xor	edi,eax
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	vpalignr	xmm8,xmm7,xmm6,8
+	vpxor	xmm0,xmm0,xmm4
+	add	ebx,DWORD[rsp]
+	and	esi,ebp
+	xor	ebp,eax
+	shrd	edx,edx,7
+	vpxor	xmm0,xmm0,xmm1
+	mov	edi,ecx
+	xor	esi,ebp
+	vpaddd	xmm9,xmm11,xmm7
+	shld	ecx,ecx,5
+	add	ebx,esi
+	vpxor	xmm0,xmm0,xmm8
+	xor	edi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[4+rsp]
+	vpsrld	xmm8,xmm0,30
+	vmovdqa	XMMWORD[48+rsp],xmm9
+	and	edi,edx
+	xor	edx,ebp
+	shrd	ecx,ecx,7
+	mov	esi,ebx
+	vpslld	xmm0,xmm0,2
+	xor	edi,edx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	ebp,DWORD[8+rsp]
+	and	esi,ecx
+	vpor	xmm0,xmm0,xmm8
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	mov	edi,eax
+	xor	esi,ecx
+	shld	eax,eax,5
+	add	ebp,esi
+	xor	edi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	add	edx,DWORD[12+rsp]
+	and	edi,ebx
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	mov	esi,ebp
+	xor	edi,ebx
+	shld	ebp,ebp,5
+	add	edx,edi
+	xor	esi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	vpalignr	xmm8,xmm0,xmm7,8
+	vpxor	xmm1,xmm1,xmm5
+	add	ecx,DWORD[16+rsp]
+	and	esi,eax
+	xor	eax,ebx
+	shrd	ebp,ebp,7
+	vpxor	xmm1,xmm1,xmm2
+	mov	edi,edx
+	xor	esi,eax
+	vpaddd	xmm9,xmm11,xmm0
+	shld	edx,edx,5
+	add	ecx,esi
+	vpxor	xmm1,xmm1,xmm8
+	xor	edi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	add	ebx,DWORD[20+rsp]
+	vpsrld	xmm8,xmm1,30
+	vmovdqa	XMMWORD[rsp],xmm9
+	and	edi,ebp
+	xor	ebp,eax
+	shrd	edx,edx,7
+	mov	esi,ecx
+	vpslld	xmm1,xmm1,2
+	xor	edi,ebp
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[24+rsp]
+	and	esi,edx
+	vpor	xmm1,xmm1,xmm8
+	xor	edx,ebp
+	shrd	ecx,ecx,7
+	mov	edi,ebx
+	xor	esi,edx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	edi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	ebp,DWORD[28+rsp]
+	and	edi,ecx
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	mov	esi,eax
+	xor	edi,ecx
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	vpalignr	xmm8,xmm1,xmm0,8
+	vpxor	xmm2,xmm2,xmm6
+	add	edx,DWORD[32+rsp]
+	and	esi,ebx
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	vpxor	xmm2,xmm2,xmm3
+	mov	edi,ebp
+	xor	esi,ebx
+	vpaddd	xmm9,xmm11,xmm1
+	shld	ebp,ebp,5
+	add	edx,esi
+	vpxor	xmm2,xmm2,xmm8
+	xor	edi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	add	ecx,DWORD[36+rsp]
+	vpsrld	xmm8,xmm2,30
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	and	edi,eax
+	xor	eax,ebx
+	shrd	ebp,ebp,7
+	mov	esi,edx
+	vpslld	xmm2,xmm2,2
+	xor	edi,eax
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	add	ebx,DWORD[40+rsp]
+	and	esi,ebp
+	vpor	xmm2,xmm2,xmm8
+	xor	ebp,eax
+	shrd	edx,edx,7
+	mov	edi,ecx
+	xor	esi,ebp
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	edi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[44+rsp]
+	and	edi,edx
+	xor	edx,ebp
+	shrd	ecx,ecx,7
+	mov	esi,ebx
+	xor	edi,edx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,edx
+	add	eax,ebx
+	vpalignr	xmm8,xmm2,xmm1,8
+	vpxor	xmm3,xmm3,xmm7
+	add	ebp,DWORD[48+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	shld	eax,eax,5
+	vpxor	xmm3,xmm3,xmm4
+	add	ebp,esi
+	xor	edi,ecx
+	vpaddd	xmm9,xmm11,xmm2
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	vpxor	xmm3,xmm3,xmm8
+	add	edx,DWORD[52+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	shld	ebp,ebp,5
+	vpsrld	xmm8,xmm3,30
+	vmovdqa	XMMWORD[32+rsp],xmm9
+	add	edx,edi
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	vpslld	xmm3,xmm3,2
+	add	ecx,DWORD[56+rsp]
+	xor	esi,eax
+	mov	edi,edx
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	edi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vpor	xmm3,xmm3,xmm8
+	add	ebx,DWORD[60+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[rsp]
+	vpaddd	xmm9,xmm11,xmm3
+	xor	esi,edx
+	mov	edi,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	vmovdqa	XMMWORD[48+rsp],xmm9
+	xor	edi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[4+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	add	edx,DWORD[8+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	shld	ebp,ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	add	ecx,DWORD[12+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	cmp	r9,r10
+	je	NEAR $L$done_avx
+	vmovdqa	xmm6,XMMWORD[64+r14]
+	vmovdqa	xmm11,XMMWORD[((-64))+r14]
+	vmovdqu	xmm0,XMMWORD[r9]
+	vmovdqu	xmm1,XMMWORD[16+r9]
+	vmovdqu	xmm2,XMMWORD[32+r9]
+	vmovdqu	xmm3,XMMWORD[48+r9]
+	vpshufb	xmm0,xmm0,xmm6
+	add	r9,64
+	add	ebx,DWORD[16+rsp]
+	xor	esi,ebp
+	vpshufb	xmm1,xmm1,xmm6
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	vpaddd	xmm4,xmm0,xmm11
+	add	ebx,esi
+	xor	edi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vmovdqa	XMMWORD[rsp],xmm4
+	add	eax,DWORD[20+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[24+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	shld	eax,eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	add	edx,DWORD[28+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	shld	ebp,ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	add	ecx,DWORD[32+rsp]
+	xor	esi,eax
+	vpshufb	xmm2,xmm2,xmm6
+	mov	edi,edx
+	shld	edx,edx,5
+	vpaddd	xmm5,xmm1,xmm11
+	add	ecx,esi
+	xor	edi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vmovdqa	XMMWORD[16+rsp],xmm5
+	add	ebx,DWORD[36+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[40+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	edi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[44+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	add	edx,DWORD[48+rsp]
+	xor	esi,ebx
+	vpshufb	xmm3,xmm3,xmm6
+	mov	edi,ebp
+	shld	ebp,ebp,5
+	vpaddd	xmm6,xmm2,xmm11
+	add	edx,esi
+	xor	edi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	vmovdqa	XMMWORD[32+rsp],xmm6
+	add	ecx,DWORD[52+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[56+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[60+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,edi
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	eax,DWORD[r8]
+	add	esi,DWORD[4+r8]
+	add	ecx,DWORD[8+r8]
+	add	edx,DWORD[12+r8]
+	mov	DWORD[r8],eax
+	add	ebp,DWORD[16+r8]
+	mov	DWORD[4+r8],esi
+	mov	ebx,esi
+	mov	DWORD[8+r8],ecx
+	mov	edi,ecx
+	mov	DWORD[12+r8],edx
+	xor	edi,edx
+	mov	DWORD[16+r8],ebp
+	and	esi,edi
+	jmp	NEAR $L$oop_avx
+
+ALIGN	16
+$L$done_avx:
+	add	ebx,DWORD[16+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[20+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[24+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	shld	eax,eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	add	edx,DWORD[28+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	shld	ebp,ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	add	ecx,DWORD[32+rsp]
+	xor	esi,eax
+	mov	edi,edx
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	edi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[36+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[40+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	edi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[44+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	add	edx,DWORD[48+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	shld	ebp,ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	add	ecx,DWORD[52+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[56+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[60+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,edi
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vzeroupper
+
+	add	eax,DWORD[r8]
+	add	esi,DWORD[4+r8]
+	add	ecx,DWORD[8+r8]
+	mov	DWORD[r8],eax
+	add	edx,DWORD[12+r8]
+	mov	DWORD[4+r8],esi
+	add	ebp,DWORD[16+r8]
+	mov	DWORD[8+r8],ecx
+	mov	DWORD[12+r8],edx
+	mov	DWORD[16+r8],ebp
+	movaps	xmm6,XMMWORD[((-40-96))+r11]
+	movaps	xmm7,XMMWORD[((-40-80))+r11]
+	movaps	xmm8,XMMWORD[((-40-64))+r11]
+	movaps	xmm9,XMMWORD[((-40-48))+r11]
+	movaps	xmm10,XMMWORD[((-40-32))+r11]
+	movaps	xmm11,XMMWORD[((-40-16))+r11]
+	mov	r14,QWORD[((-40))+r11]
+	mov	r13,QWORD[((-32))+r11]
+	mov	r12,QWORD[((-24))+r11]
+	mov	rbp,QWORD[((-16))+r11]
+	mov	rbx,QWORD[((-8))+r11]
+	lea	rsp,[r11]
+$L$epilogue_avx:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha1_block_data_order_avx:
+ALIGN	64
+K_XX_XX:
+	DD	0x5a827999,0x5a827999,0x5a827999,0x5a827999
+	DD	0x5a827999,0x5a827999,0x5a827999,0x5a827999
+	DD	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+	DD	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+	DD	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+	DD	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+	DD	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+	DD	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+	DD	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+	DD	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+DB	0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
+DB	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+DB	102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44
+DB	32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60
+DB	97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114
+DB	103,62,0
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	lea	r10,[$L$prologue]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	lea	r10,[$L$epilogue]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[64+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+
+	jmp	NEAR $L$common_seh_tail
+
+
+ALIGN	16
+ssse3_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[208+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[((-40-96))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,12
+	DD	0xa548f3fc
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_sha1_block_data_order wrt ..imagebase
+	DD	$L$SEH_end_sha1_block_data_order wrt ..imagebase
+	DD	$L$SEH_info_sha1_block_data_order wrt ..imagebase
+	DD	$L$SEH_begin_sha1_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_begin_sha1_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_end_sha1_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_info_sha1_block_data_order_avx wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_sha1_block_data_order:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+$L$SEH_info_sha1_block_data_order_ssse3:
+DB	9,0,0,0
+	DD	ssse3_handler wrt ..imagebase
+	DD	$L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase
+$L$SEH_info_sha1_block_data_order_avx:
+DB	9,0,0,0
+	DD	ssse3_handler wrt ..imagebase
+	DD	$L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm
new file mode 100644
index 0000000..6e3d154
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm
@@ -0,0 +1,4081 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+EXTERN	OPENSSL_ia32cap_P
+global	sha256_block_data_order
+
+ALIGN	16
+sha256_block_data_order:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha256_block_data_order:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	lea	r11,[OPENSSL_ia32cap_P]
+	mov	r9d,DWORD[r11]
+	mov	r10d,DWORD[4+r11]
+	mov	r11d,DWORD[8+r11]
+	and	r9d,1073741824
+	and	r10d,268435968
+	or	r10d,r9d
+	cmp	r10d,1342177792
+	je	NEAR $L$avx_shortcut
+	test	r10d,512
+	jnz	NEAR $L$ssse3_shortcut
+	mov	rax,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	shl	rdx,4
+	sub	rsp,16*4+4*8
+	lea	rdx,[rdx*4+rsi]
+	and	rsp,-64
+	mov	QWORD[((64+0))+rsp],rdi
+	mov	QWORD[((64+8))+rsp],rsi
+	mov	QWORD[((64+16))+rsp],rdx
+	mov	QWORD[((64+24))+rsp],rax
+$L$prologue:
+
+	mov	eax,DWORD[rdi]
+	mov	ebx,DWORD[4+rdi]
+	mov	ecx,DWORD[8+rdi]
+	mov	edx,DWORD[12+rdi]
+	mov	r8d,DWORD[16+rdi]
+	mov	r9d,DWORD[20+rdi]
+	mov	r10d,DWORD[24+rdi]
+	mov	r11d,DWORD[28+rdi]
+	jmp	NEAR $L$loop
+
+ALIGN	16
+$L$loop:
+	mov	edi,ebx
+	lea	rbp,[K256]
+	xor	edi,ecx
+	mov	r12d,DWORD[rsi]
+	mov	r13d,r8d
+	mov	r14d,eax
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r9d
+
+	xor	r13d,r8d
+	ror	r14d,9
+	xor	r15d,r10d
+
+	mov	DWORD[rsp],r12d
+	xor	r14d,eax
+	and	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r15d,r10d
+
+	ror	r14d,11
+	xor	r13d,r8d
+	add	r12d,r15d
+
+	mov	r15d,eax
+	add	r12d,DWORD[rbp]
+	xor	r14d,eax
+
+	xor	r15d,ebx
+	ror	r13d,6
+	mov	r11d,ebx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r11d,edi
+	add	edx,r12d
+	add	r11d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r11d,r14d
+	mov	r12d,DWORD[4+rsi]
+	mov	r13d,edx
+	mov	r14d,r11d
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,r8d
+
+	xor	r13d,edx
+	ror	r14d,9
+	xor	edi,r9d
+
+	mov	DWORD[4+rsp],r12d
+	xor	r14d,r11d
+	and	edi,edx
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	edi,r9d
+
+	ror	r14d,11
+	xor	r13d,edx
+	add	r12d,edi
+
+	mov	edi,r11d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r11d
+
+	xor	edi,eax
+	ror	r13d,6
+	mov	r10d,eax
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r10d,r15d
+	add	ecx,r12d
+	add	r10d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r10d,r14d
+	mov	r12d,DWORD[8+rsi]
+	mov	r13d,ecx
+	mov	r14d,r10d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,edx
+
+	xor	r13d,ecx
+	ror	r14d,9
+	xor	r15d,r8d
+
+	mov	DWORD[8+rsp],r12d
+	xor	r14d,r10d
+	and	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r15d,r8d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	add	r12d,r15d
+
+	mov	r15d,r10d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r10d
+
+	xor	r15d,r11d
+	ror	r13d,6
+	mov	r9d,r11d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r9d,edi
+	add	ebx,r12d
+	add	r9d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r9d,r14d
+	mov	r12d,DWORD[12+rsi]
+	mov	r13d,ebx
+	mov	r14d,r9d
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,ecx
+
+	xor	r13d,ebx
+	ror	r14d,9
+	xor	edi,edx
+
+	mov	DWORD[12+rsp],r12d
+	xor	r14d,r9d
+	and	edi,ebx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	edi,edx
+
+	ror	r14d,11
+	xor	r13d,ebx
+	add	r12d,edi
+
+	mov	edi,r9d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r9d
+
+	xor	edi,r10d
+	ror	r13d,6
+	mov	r8d,r10d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r8d,r15d
+	add	eax,r12d
+	add	r8d,r12d
+
+	lea	rbp,[20+rbp]
+	add	r8d,r14d
+	mov	r12d,DWORD[16+rsi]
+	mov	r13d,eax
+	mov	r14d,r8d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,ebx
+
+	xor	r13d,eax
+	ror	r14d,9
+	xor	r15d,ecx
+
+	mov	DWORD[16+rsp],r12d
+	xor	r14d,r8d
+	and	r15d,eax
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r15d,ecx
+
+	ror	r14d,11
+	xor	r13d,eax
+	add	r12d,r15d
+
+	mov	r15d,r8d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r8d
+
+	xor	r15d,r9d
+	ror	r13d,6
+	mov	edx,r9d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	edx,edi
+	add	r11d,r12d
+	add	edx,r12d
+
+	lea	rbp,[4+rbp]
+	add	edx,r14d
+	mov	r12d,DWORD[20+rsi]
+	mov	r13d,r11d
+	mov	r14d,edx
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,eax
+
+	xor	r13d,r11d
+	ror	r14d,9
+	xor	edi,ebx
+
+	mov	DWORD[20+rsp],r12d
+	xor	r14d,edx
+	and	edi,r11d
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	edi,ebx
+
+	ror	r14d,11
+	xor	r13d,r11d
+	add	r12d,edi
+
+	mov	edi,edx
+	add	r12d,DWORD[rbp]
+	xor	r14d,edx
+
+	xor	edi,r8d
+	ror	r13d,6
+	mov	ecx,r8d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ecx,r15d
+	add	r10d,r12d
+	add	ecx,r12d
+
+	lea	rbp,[4+rbp]
+	add	ecx,r14d
+	mov	r12d,DWORD[24+rsi]
+	mov	r13d,r10d
+	mov	r14d,ecx
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r11d
+
+	xor	r13d,r10d
+	ror	r14d,9
+	xor	r15d,eax
+
+	mov	DWORD[24+rsp],r12d
+	xor	r14d,ecx
+	and	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r15d,eax
+
+	ror	r14d,11
+	xor	r13d,r10d
+	add	r12d,r15d
+
+	mov	r15d,ecx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ecx
+
+	xor	r15d,edx
+	ror	r13d,6
+	mov	ebx,edx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ebx,edi
+	add	r9d,r12d
+	add	ebx,r12d
+
+	lea	rbp,[4+rbp]
+	add	ebx,r14d
+	mov	r12d,DWORD[28+rsi]
+	mov	r13d,r9d
+	mov	r14d,ebx
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,r10d
+
+	xor	r13d,r9d
+	ror	r14d,9
+	xor	edi,r11d
+
+	mov	DWORD[28+rsp],r12d
+	xor	r14d,ebx
+	and	edi,r9d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	edi,r11d
+
+	ror	r14d,11
+	xor	r13d,r9d
+	add	r12d,edi
+
+	mov	edi,ebx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ebx
+
+	xor	edi,ecx
+	ror	r13d,6
+	mov	eax,ecx
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	eax,r15d
+	add	r8d,r12d
+	add	eax,r12d
+
+	lea	rbp,[20+rbp]
+	add	eax,r14d
+	mov	r12d,DWORD[32+rsi]
+	mov	r13d,r8d
+	mov	r14d,eax
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r9d
+
+	xor	r13d,r8d
+	ror	r14d,9
+	xor	r15d,r10d
+
+	mov	DWORD[32+rsp],r12d
+	xor	r14d,eax
+	and	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r15d,r10d
+
+	ror	r14d,11
+	xor	r13d,r8d
+	add	r12d,r15d
+
+	mov	r15d,eax
+	add	r12d,DWORD[rbp]
+	xor	r14d,eax
+
+	xor	r15d,ebx
+	ror	r13d,6
+	mov	r11d,ebx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r11d,edi
+	add	edx,r12d
+	add	r11d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r11d,r14d
+	mov	r12d,DWORD[36+rsi]
+	mov	r13d,edx
+	mov	r14d,r11d
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,r8d
+
+	xor	r13d,edx
+	ror	r14d,9
+	xor	edi,r9d
+
+	mov	DWORD[36+rsp],r12d
+	xor	r14d,r11d
+	and	edi,edx
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	edi,r9d
+
+	ror	r14d,11
+	xor	r13d,edx
+	add	r12d,edi
+
+	mov	edi,r11d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r11d
+
+	xor	edi,eax
+	ror	r13d,6
+	mov	r10d,eax
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r10d,r15d
+	add	ecx,r12d
+	add	r10d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r10d,r14d
+	mov	r12d,DWORD[40+rsi]
+	mov	r13d,ecx
+	mov	r14d,r10d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,edx
+
+	xor	r13d,ecx
+	ror	r14d,9
+	xor	r15d,r8d
+
+	mov	DWORD[40+rsp],r12d
+	xor	r14d,r10d
+	and	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r15d,r8d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	add	r12d,r15d
+
+	mov	r15d,r10d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r10d
+
+	xor	r15d,r11d
+	ror	r13d,6
+	mov	r9d,r11d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r9d,edi
+	add	ebx,r12d
+	add	r9d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r9d,r14d
+	mov	r12d,DWORD[44+rsi]
+	mov	r13d,ebx
+	mov	r14d,r9d
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,ecx
+
+	xor	r13d,ebx
+	ror	r14d,9
+	xor	edi,edx
+
+	mov	DWORD[44+rsp],r12d
+	xor	r14d,r9d
+	and	edi,ebx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	edi,edx
+
+	ror	r14d,11
+	xor	r13d,ebx
+	add	r12d,edi
+
+	mov	edi,r9d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r9d
+
+	xor	edi,r10d
+	ror	r13d,6
+	mov	r8d,r10d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r8d,r15d
+	add	eax,r12d
+	add	r8d,r12d
+
+	lea	rbp,[20+rbp]
+	add	r8d,r14d
+	mov	r12d,DWORD[48+rsi]
+	mov	r13d,eax
+	mov	r14d,r8d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,ebx
+
+	xor	r13d,eax
+	ror	r14d,9
+	xor	r15d,ecx
+
+	mov	DWORD[48+rsp],r12d
+	xor	r14d,r8d
+	and	r15d,eax
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r15d,ecx
+
+	ror	r14d,11
+	xor	r13d,eax
+	add	r12d,r15d
+
+	mov	r15d,r8d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r8d
+
+	xor	r15d,r9d
+	ror	r13d,6
+	mov	edx,r9d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	edx,edi
+	add	r11d,r12d
+	add	edx,r12d
+
+	lea	rbp,[4+rbp]
+	add	edx,r14d
+	mov	r12d,DWORD[52+rsi]
+	mov	r13d,r11d
+	mov	r14d,edx
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,eax
+
+	xor	r13d,r11d
+	ror	r14d,9
+	xor	edi,ebx
+
+	mov	DWORD[52+rsp],r12d
+	xor	r14d,edx
+	and	edi,r11d
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	edi,ebx
+
+	ror	r14d,11
+	xor	r13d,r11d
+	add	r12d,edi
+
+	mov	edi,edx
+	add	r12d,DWORD[rbp]
+	xor	r14d,edx
+
+	xor	edi,r8d
+	ror	r13d,6
+	mov	ecx,r8d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ecx,r15d
+	add	r10d,r12d
+	add	ecx,r12d
+
+	lea	rbp,[4+rbp]
+	add	ecx,r14d
+	mov	r12d,DWORD[56+rsi]
+	mov	r13d,r10d
+	mov	r14d,ecx
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r11d
+
+	xor	r13d,r10d
+	ror	r14d,9
+	xor	r15d,eax
+
+	mov	DWORD[56+rsp],r12d
+	xor	r14d,ecx
+	and	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r15d,eax
+
+	ror	r14d,11
+	xor	r13d,r10d
+	add	r12d,r15d
+
+	mov	r15d,ecx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ecx
+
+	xor	r15d,edx
+	ror	r13d,6
+	mov	ebx,edx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ebx,edi
+	add	r9d,r12d
+	add	ebx,r12d
+
+	lea	rbp,[4+rbp]
+	add	ebx,r14d
+	mov	r12d,DWORD[60+rsi]
+	mov	r13d,r9d
+	mov	r14d,ebx
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,r10d
+
+	xor	r13d,r9d
+	ror	r14d,9
+	xor	edi,r11d
+
+	mov	DWORD[60+rsp],r12d
+	xor	r14d,ebx
+	and	edi,r9d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	edi,r11d
+
+	ror	r14d,11
+	xor	r13d,r9d
+	add	r12d,edi
+
+	mov	edi,ebx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ebx
+
+	xor	edi,ecx
+	ror	r13d,6
+	mov	eax,ecx
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	eax,r15d
+	add	r8d,r12d
+	add	eax,r12d
+
+	lea	rbp,[20+rbp]
+	jmp	NEAR $L$rounds_16_xx
+ALIGN	16
+$L$rounds_16_xx:
+	mov	r13d,DWORD[4+rsp]
+	mov	r15d,DWORD[56+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	eax,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[36+rsp]
+
+	add	r12d,DWORD[rsp]
+	mov	r13d,r8d
+	add	r12d,r15d
+	mov	r14d,eax
+	ror	r13d,14
+	mov	r15d,r9d
+
+	xor	r13d,r8d
+	ror	r14d,9
+	xor	r15d,r10d
+
+	mov	DWORD[rsp],r12d
+	xor	r14d,eax
+	and	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r15d,r10d
+
+	ror	r14d,11
+	xor	r13d,r8d
+	add	r12d,r15d
+
+	mov	r15d,eax
+	add	r12d,DWORD[rbp]
+	xor	r14d,eax
+
+	xor	r15d,ebx
+	ror	r13d,6
+	mov	r11d,ebx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r11d,edi
+	add	edx,r12d
+	add	r11d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[8+rsp]
+	mov	edi,DWORD[60+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r11d,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[40+rsp]
+
+	add	r12d,DWORD[4+rsp]
+	mov	r13d,edx
+	add	r12d,edi
+	mov	r14d,r11d
+	ror	r13d,14
+	mov	edi,r8d
+
+	xor	r13d,edx
+	ror	r14d,9
+	xor	edi,r9d
+
+	mov	DWORD[4+rsp],r12d
+	xor	r14d,r11d
+	and	edi,edx
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	edi,r9d
+
+	ror	r14d,11
+	xor	r13d,edx
+	add	r12d,edi
+
+	mov	edi,r11d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r11d
+
+	xor	edi,eax
+	ror	r13d,6
+	mov	r10d,eax
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r10d,r15d
+	add	ecx,r12d
+	add	r10d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[12+rsp]
+	mov	r15d,DWORD[rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r10d,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[44+rsp]
+
+	add	r12d,DWORD[8+rsp]
+	mov	r13d,ecx
+	add	r12d,r15d
+	mov	r14d,r10d
+	ror	r13d,14
+	mov	r15d,edx
+
+	xor	r13d,ecx
+	ror	r14d,9
+	xor	r15d,r8d
+
+	mov	DWORD[8+rsp],r12d
+	xor	r14d,r10d
+	and	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r15d,r8d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	add	r12d,r15d
+
+	mov	r15d,r10d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r10d
+
+	xor	r15d,r11d
+	ror	r13d,6
+	mov	r9d,r11d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r9d,edi
+	add	ebx,r12d
+	add	r9d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[16+rsp]
+	mov	edi,DWORD[4+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r9d,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[48+rsp]
+
+	add	r12d,DWORD[12+rsp]
+	mov	r13d,ebx
+	add	r12d,edi
+	mov	r14d,r9d
+	ror	r13d,14
+	mov	edi,ecx
+
+	xor	r13d,ebx
+	ror	r14d,9
+	xor	edi,edx
+
+	mov	DWORD[12+rsp],r12d
+	xor	r14d,r9d
+	and	edi,ebx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	edi,edx
+
+	ror	r14d,11
+	xor	r13d,ebx
+	add	r12d,edi
+
+	mov	edi,r9d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r9d
+
+	xor	edi,r10d
+	ror	r13d,6
+	mov	r8d,r10d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r8d,r15d
+	add	eax,r12d
+	add	r8d,r12d
+
+	lea	rbp,[20+rbp]
+	mov	r13d,DWORD[20+rsp]
+	mov	r15d,DWORD[8+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r8d,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[52+rsp]
+
+	add	r12d,DWORD[16+rsp]
+	mov	r13d,eax
+	add	r12d,r15d
+	mov	r14d,r8d
+	ror	r13d,14
+	mov	r15d,ebx
+
+	xor	r13d,eax
+	ror	r14d,9
+	xor	r15d,ecx
+
+	mov	DWORD[16+rsp],r12d
+	xor	r14d,r8d
+	and	r15d,eax
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r15d,ecx
+
+	ror	r14d,11
+	xor	r13d,eax
+	add	r12d,r15d
+
+	mov	r15d,r8d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r8d
+
+	xor	r15d,r9d
+	ror	r13d,6
+	mov	edx,r9d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	edx,edi
+	add	r11d,r12d
+	add	edx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[24+rsp]
+	mov	edi,DWORD[12+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	edx,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[56+rsp]
+
+	add	r12d,DWORD[20+rsp]
+	mov	r13d,r11d
+	add	r12d,edi
+	mov	r14d,edx
+	ror	r13d,14
+	mov	edi,eax
+
+	xor	r13d,r11d
+	ror	r14d,9
+	xor	edi,ebx
+
+	mov	DWORD[20+rsp],r12d
+	xor	r14d,edx
+	and	edi,r11d
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	edi,ebx
+
+	ror	r14d,11
+	xor	r13d,r11d
+	add	r12d,edi
+
+	mov	edi,edx
+	add	r12d,DWORD[rbp]
+	xor	r14d,edx
+
+	xor	edi,r8d
+	ror	r13d,6
+	mov	ecx,r8d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ecx,r15d
+	add	r10d,r12d
+	add	ecx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[28+rsp]
+	mov	r15d,DWORD[16+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	ecx,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[60+rsp]
+
+	add	r12d,DWORD[24+rsp]
+	mov	r13d,r10d
+	add	r12d,r15d
+	mov	r14d,ecx
+	ror	r13d,14
+	mov	r15d,r11d
+
+	xor	r13d,r10d
+	ror	r14d,9
+	xor	r15d,eax
+
+	mov	DWORD[24+rsp],r12d
+	xor	r14d,ecx
+	and	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r15d,eax
+
+	ror	r14d,11
+	xor	r13d,r10d
+	add	r12d,r15d
+
+	mov	r15d,ecx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ecx
+
+	xor	r15d,edx
+	ror	r13d,6
+	mov	ebx,edx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ebx,edi
+	add	r9d,r12d
+	add	ebx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[32+rsp]
+	mov	edi,DWORD[20+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	ebx,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[rsp]
+
+	add	r12d,DWORD[28+rsp]
+	mov	r13d,r9d
+	add	r12d,edi
+	mov	r14d,ebx
+	ror	r13d,14
+	mov	edi,r10d
+
+	xor	r13d,r9d
+	ror	r14d,9
+	xor	edi,r11d
+
+	mov	DWORD[28+rsp],r12d
+	xor	r14d,ebx
+	and	edi,r9d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	edi,r11d
+
+	ror	r14d,11
+	xor	r13d,r9d
+	add	r12d,edi
+
+	mov	edi,ebx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ebx
+
+	xor	edi,ecx
+	ror	r13d,6
+	mov	eax,ecx
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	eax,r15d
+	add	r8d,r12d
+	add	eax,r12d
+
+	lea	rbp,[20+rbp]
+	mov	r13d,DWORD[36+rsp]
+	mov	r15d,DWORD[24+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	eax,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[4+rsp]
+
+	add	r12d,DWORD[32+rsp]
+	mov	r13d,r8d
+	add	r12d,r15d
+	mov	r14d,eax
+	ror	r13d,14
+	mov	r15d,r9d
+
+	xor	r13d,r8d
+	ror	r14d,9
+	xor	r15d,r10d
+
+	mov	DWORD[32+rsp],r12d
+	xor	r14d,eax
+	and	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r15d,r10d
+
+	ror	r14d,11
+	xor	r13d,r8d
+	add	r12d,r15d
+
+	mov	r15d,eax
+	add	r12d,DWORD[rbp]
+	xor	r14d,eax
+
+	xor	r15d,ebx
+	ror	r13d,6
+	mov	r11d,ebx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r11d,edi
+	add	edx,r12d
+	add	r11d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[40+rsp]
+	mov	edi,DWORD[28+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r11d,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[8+rsp]
+
+	add	r12d,DWORD[36+rsp]
+	mov	r13d,edx
+	add	r12d,edi
+	mov	r14d,r11d
+	ror	r13d,14
+	mov	edi,r8d
+
+	xor	r13d,edx
+	ror	r14d,9
+	xor	edi,r9d
+
+	mov	DWORD[36+rsp],r12d
+	xor	r14d,r11d
+	and	edi,edx
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	edi,r9d
+
+	ror	r14d,11
+	xor	r13d,edx
+	add	r12d,edi
+
+	mov	edi,r11d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r11d
+
+	xor	edi,eax
+	ror	r13d,6
+	mov	r10d,eax
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r10d,r15d
+	add	ecx,r12d
+	add	r10d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[44+rsp]
+	mov	r15d,DWORD[32+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r10d,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[12+rsp]
+
+	add	r12d,DWORD[40+rsp]
+	mov	r13d,ecx
+	add	r12d,r15d
+	mov	r14d,r10d
+	ror	r13d,14
+	mov	r15d,edx
+
+	xor	r13d,ecx
+	ror	r14d,9
+	xor	r15d,r8d
+
+	mov	DWORD[40+rsp],r12d
+	xor	r14d,r10d
+	and	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r15d,r8d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	add	r12d,r15d
+
+	mov	r15d,r10d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r10d
+
+	xor	r15d,r11d
+	ror	r13d,6
+	mov	r9d,r11d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r9d,edi
+	add	ebx,r12d
+	add	r9d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[48+rsp]
+	mov	edi,DWORD[36+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r9d,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[16+rsp]
+
+	add	r12d,DWORD[44+rsp]
+	mov	r13d,ebx
+	add	r12d,edi
+	mov	r14d,r9d
+	ror	r13d,14
+	mov	edi,ecx
+
+	xor	r13d,ebx
+	ror	r14d,9
+	xor	edi,edx
+
+	mov	DWORD[44+rsp],r12d
+	xor	r14d,r9d
+	and	edi,ebx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	edi,edx
+
+	ror	r14d,11
+	xor	r13d,ebx
+	add	r12d,edi
+
+	mov	edi,r9d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r9d
+
+	xor	edi,r10d
+	ror	r13d,6
+	mov	r8d,r10d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r8d,r15d
+	add	eax,r12d
+	add	r8d,r12d
+
+	lea	rbp,[20+rbp]
+	mov	r13d,DWORD[52+rsp]
+	mov	r15d,DWORD[40+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r8d,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[20+rsp]
+
+	add	r12d,DWORD[48+rsp]
+	mov	r13d,eax
+	add	r12d,r15d
+	mov	r14d,r8d
+	ror	r13d,14
+	mov	r15d,ebx
+
+	xor	r13d,eax
+	ror	r14d,9
+	xor	r15d,ecx
+
+	mov	DWORD[48+rsp],r12d
+	xor	r14d,r8d
+	and	r15d,eax
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r15d,ecx
+
+	ror	r14d,11
+	xor	r13d,eax
+	add	r12d,r15d
+
+	mov	r15d,r8d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r8d
+
+	xor	r15d,r9d
+	ror	r13d,6
+	mov	edx,r9d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	edx,edi
+	add	r11d,r12d
+	add	edx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[56+rsp]
+	mov	edi,DWORD[44+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	edx,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[24+rsp]
+
+	add	r12d,DWORD[52+rsp]
+	mov	r13d,r11d
+	add	r12d,edi
+	mov	r14d,edx
+	ror	r13d,14
+	mov	edi,eax
+
+	xor	r13d,r11d
+	ror	r14d,9
+	xor	edi,ebx
+
+	mov	DWORD[52+rsp],r12d
+	xor	r14d,edx
+	and	edi,r11d
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	edi,ebx
+
+	ror	r14d,11
+	xor	r13d,r11d
+	add	r12d,edi
+
+	mov	edi,edx
+	add	r12d,DWORD[rbp]
+	xor	r14d,edx
+
+	xor	edi,r8d
+	ror	r13d,6
+	mov	ecx,r8d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ecx,r15d
+	add	r10d,r12d
+	add	ecx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[60+rsp]
+	mov	r15d,DWORD[48+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	ecx,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[28+rsp]
+
+	add	r12d,DWORD[56+rsp]
+	mov	r13d,r10d
+	add	r12d,r15d
+	mov	r14d,ecx
+	ror	r13d,14
+	mov	r15d,r11d
+
+	xor	r13d,r10d
+	ror	r14d,9
+	xor	r15d,eax
+
+	mov	DWORD[56+rsp],r12d
+	xor	r14d,ecx
+	and	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r15d,eax
+
+	ror	r14d,11
+	xor	r13d,r10d
+	add	r12d,r15d
+
+	mov	r15d,ecx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ecx
+
+	xor	r15d,edx
+	ror	r13d,6
+	mov	ebx,edx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ebx,edi
+	add	r9d,r12d
+	add	ebx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[rsp]
+	mov	edi,DWORD[52+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	ebx,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[32+rsp]
+
+	add	r12d,DWORD[60+rsp]
+	mov	r13d,r9d
+	add	r12d,edi
+	mov	r14d,ebx
+	ror	r13d,14
+	mov	edi,r10d
+
+	xor	r13d,r9d
+	ror	r14d,9
+	xor	edi,r11d
+
+	mov	DWORD[60+rsp],r12d
+	xor	r14d,ebx
+	and	edi,r9d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	edi,r11d
+
+	ror	r14d,11
+	xor	r13d,r9d
+	add	r12d,edi
+
+	mov	edi,ebx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ebx
+
+	xor	edi,ecx
+	ror	r13d,6
+	mov	eax,ecx
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	eax,r15d
+	add	r8d,r12d
+	add	eax,r12d
+
+	lea	rbp,[20+rbp]
+	cmp	BYTE[3+rbp],0
+	jnz	NEAR $L$rounds_16_xx
+
+	mov	rdi,QWORD[((64+0))+rsp]
+	add	eax,r14d
+	lea	rsi,[64+rsi]
+
+	add	eax,DWORD[rdi]
+	add	ebx,DWORD[4+rdi]
+	add	ecx,DWORD[8+rdi]
+	add	edx,DWORD[12+rdi]
+	add	r8d,DWORD[16+rdi]
+	add	r9d,DWORD[20+rdi]
+	add	r10d,DWORD[24+rdi]
+	add	r11d,DWORD[28+rdi]
+
+	cmp	rsi,QWORD[((64+16))+rsp]
+
+	mov	DWORD[rdi],eax
+	mov	DWORD[4+rdi],ebx
+	mov	DWORD[8+rdi],ecx
+	mov	DWORD[12+rdi],edx
+	mov	DWORD[16+rdi],r8d
+	mov	DWORD[20+rdi],r9d
+	mov	DWORD[24+rdi],r10d
+	mov	DWORD[28+rdi],r11d
+	jb	NEAR $L$loop
+
+	mov	rsi,QWORD[((64+24))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha256_block_data_order:
+ALIGN	64
+
+K256:
+	DD	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	DD	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	DD	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	DD	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	DD	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	DD	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	DD	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	DD	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	DD	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	DD	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	DD	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	DD	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	DD	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	DD	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	DD	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	DD	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	DD	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	DD	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	DD	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	DD	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	DD	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	DD	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	DD	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	DD	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	DD	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	DD	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	DD	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	DD	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	DD	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	DD	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	DD	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+	DD	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+	DD	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+	DD	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+	DD	0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+	DD	0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+	DD	0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+	DD	0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+DB	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+DB	110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
+DB	52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+DB	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+DB	111,114,103,62,0
+
+ALIGN	64
+sha256_block_data_order_ssse3:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha256_block_data_order_ssse3:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+$L$ssse3_shortcut:
+	mov	rax,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	shl	rdx,4
+	sub	rsp,160
+	lea	rdx,[rdx*4+rsi]
+	and	rsp,-64
+	mov	QWORD[((64+0))+rsp],rdi
+	mov	QWORD[((64+8))+rsp],rsi
+	mov	QWORD[((64+16))+rsp],rdx
+	mov	QWORD[((64+24))+rsp],rax
+	movaps	XMMWORD[(64+32)+rsp],xmm6
+	movaps	XMMWORD[(64+48)+rsp],xmm7
+	movaps	XMMWORD[(64+64)+rsp],xmm8
+	movaps	XMMWORD[(64+80)+rsp],xmm9
+$L$prologue_ssse3:
+
+	mov	eax,DWORD[rdi]
+	mov	ebx,DWORD[4+rdi]
+	mov	ecx,DWORD[8+rdi]
+	mov	edx,DWORD[12+rdi]
+	mov	r8d,DWORD[16+rdi]
+	mov	r9d,DWORD[20+rdi]
+	mov	r10d,DWORD[24+rdi]
+	mov	r11d,DWORD[28+rdi]
+
+
+	jmp	NEAR $L$loop_ssse3
+ALIGN	16
+$L$loop_ssse3:
+	movdqa	xmm7,XMMWORD[((K256+512))]
+	movdqu	xmm0,XMMWORD[rsi]
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+DB	102,15,56,0,199
+	movdqu	xmm3,XMMWORD[48+rsi]
+	lea	rbp,[K256]
+DB	102,15,56,0,207
+	movdqa	xmm4,XMMWORD[rbp]
+	movdqa	xmm5,XMMWORD[32+rbp]
+DB	102,15,56,0,215
+	paddd	xmm4,xmm0
+	movdqa	xmm6,XMMWORD[64+rbp]
+DB	102,15,56,0,223
+	movdqa	xmm7,XMMWORD[96+rbp]
+	paddd	xmm5,xmm1
+	paddd	xmm6,xmm2
+	paddd	xmm7,xmm3
+	movdqa	XMMWORD[rsp],xmm4
+	mov	r14d,eax
+	movdqa	XMMWORD[16+rsp],xmm5
+	mov	edi,ebx
+	movdqa	XMMWORD[32+rsp],xmm6
+	xor	edi,ecx
+	movdqa	XMMWORD[48+rsp],xmm7
+	mov	r13d,r8d
+	jmp	NEAR $L$ssse3_00_47
+
+ALIGN	16
+$L$ssse3_00_47:
+	sub	rbp,-128
+	ror	r13d,14
+	movdqa	xmm4,xmm1
+	mov	eax,r14d
+	mov	r12d,r9d
+	movdqa	xmm7,xmm3
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	ror	r13d,5
+	xor	r14d,eax
+DB	102,15,58,15,224,4
+	and	r12d,r8d
+	xor	r13d,r8d
+DB	102,15,58,15,250,4
+	add	r11d,DWORD[rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	ror	r14d,11
+	movdqa	xmm5,xmm4
+	xor	r15d,ebx
+	add	r11d,r12d
+	movdqa	xmm6,xmm4
+	ror	r13d,6
+	and	edi,r15d
+	psrld	xmm4,3
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	paddd	xmm0,xmm7
+	ror	r14d,2
+	add	edx,r11d
+	psrld	xmm6,7
+	add	r11d,edi
+	mov	r13d,edx
+	pshufd	xmm7,xmm3,250
+	add	r14d,r11d
+	ror	r13d,14
+	pslld	xmm5,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	pxor	xmm4,xmm6
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	ror	r13d,5
+	psrld	xmm6,11
+	xor	r14d,r11d
+	pxor	xmm4,xmm5
+	and	r12d,edx
+	xor	r13d,edx
+	pslld	xmm5,11
+	add	r10d,DWORD[4+rsp]
+	mov	edi,r11d
+	pxor	xmm4,xmm6
+	xor	r12d,r9d
+	ror	r14d,11
+	movdqa	xmm6,xmm7
+	xor	edi,eax
+	add	r10d,r12d
+	pxor	xmm4,xmm5
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	psrld	xmm7,10
+	add	r10d,r13d
+	xor	r15d,eax
+	paddd	xmm0,xmm4
+	ror	r14d,2
+	add	ecx,r10d
+	psrlq	xmm6,17
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	pxor	xmm7,xmm6
+	ror	r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	ror	r14d,9
+	psrlq	xmm6,2
+	xor	r13d,ecx
+	xor	r12d,r8d
+	pxor	xmm7,xmm6
+	ror	r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	pshufd	xmm7,xmm7,128
+	xor	r13d,ecx
+	add	r9d,DWORD[8+rsp]
+	mov	r15d,r10d
+	psrldq	xmm7,8
+	xor	r12d,r8d
+	ror	r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	ror	r13d,6
+	paddd	xmm0,xmm7
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	pshufd	xmm7,xmm0,80
+	xor	edi,r11d
+	ror	r14d,2
+	add	ebx,r9d
+	movdqa	xmm6,xmm7
+	add	r9d,edi
+	mov	r13d,ebx
+	psrld	xmm7,10
+	add	r14d,r9d
+	ror	r13d,14
+	psrlq	xmm6,17
+	mov	r9d,r14d
+	mov	r12d,ecx
+	pxor	xmm7,xmm6
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	ror	r13d,5
+	xor	r14d,r9d
+	psrlq	xmm6,2
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[12+rsp]
+	pxor	xmm7,xmm6
+	mov	edi,r9d
+	xor	r12d,edx
+	ror	r14d,11
+	pshufd	xmm7,xmm7,8
+	xor	edi,r10d
+	add	r8d,r12d
+	movdqa	xmm6,XMMWORD[rbp]
+	ror	r13d,6
+	and	r15d,edi
+	pslldq	xmm7,8
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	paddd	xmm0,xmm7
+	ror	r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	paddd	xmm6,xmm0
+	mov	r13d,eax
+	add	r14d,r8d
+	movdqa	XMMWORD[rsp],xmm6
+	ror	r13d,14
+	movdqa	xmm4,xmm2
+	mov	r8d,r14d
+	mov	r12d,ebx
+	movdqa	xmm7,xmm0
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	ror	r13d,5
+	xor	r14d,r8d
+DB	102,15,58,15,225,4
+	and	r12d,eax
+	xor	r13d,eax
+DB	102,15,58,15,251,4
+	add	edx,DWORD[16+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	ror	r14d,11
+	movdqa	xmm5,xmm4
+	xor	r15d,r9d
+	add	edx,r12d
+	movdqa	xmm6,xmm4
+	ror	r13d,6
+	and	edi,r15d
+	psrld	xmm4,3
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	paddd	xmm1,xmm7
+	ror	r14d,2
+	add	r11d,edx
+	psrld	xmm6,7
+	add	edx,edi
+	mov	r13d,r11d
+	pshufd	xmm7,xmm0,250
+	add	r14d,edx
+	ror	r13d,14
+	pslld	xmm5,14
+	mov	edx,r14d
+	mov	r12d,eax
+	pxor	xmm4,xmm6
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	ror	r13d,5
+	psrld	xmm6,11
+	xor	r14d,edx
+	pxor	xmm4,xmm5
+	and	r12d,r11d
+	xor	r13d,r11d
+	pslld	xmm5,11
+	add	ecx,DWORD[20+rsp]
+	mov	edi,edx
+	pxor	xmm4,xmm6
+	xor	r12d,ebx
+	ror	r14d,11
+	movdqa	xmm6,xmm7
+	xor	edi,r8d
+	add	ecx,r12d
+	pxor	xmm4,xmm5
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	psrld	xmm7,10
+	add	ecx,r13d
+	xor	r15d,r8d
+	paddd	xmm1,xmm4
+	ror	r14d,2
+	add	r10d,ecx
+	psrlq	xmm6,17
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	pxor	xmm7,xmm6
+	ror	r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	ror	r14d,9
+	psrlq	xmm6,2
+	xor	r13d,r10d
+	xor	r12d,eax
+	pxor	xmm7,xmm6
+	ror	r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	pshufd	xmm7,xmm7,128
+	xor	r13d,r10d
+	add	ebx,DWORD[24+rsp]
+	mov	r15d,ecx
+	psrldq	xmm7,8
+	xor	r12d,eax
+	ror	r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	ror	r13d,6
+	paddd	xmm1,xmm7
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	pshufd	xmm7,xmm1,80
+	xor	edi,edx
+	ror	r14d,2
+	add	r9d,ebx
+	movdqa	xmm6,xmm7
+	add	ebx,edi
+	mov	r13d,r9d
+	psrld	xmm7,10
+	add	r14d,ebx
+	ror	r13d,14
+	psrlq	xmm6,17
+	mov	ebx,r14d
+	mov	r12d,r10d
+	pxor	xmm7,xmm6
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	ror	r13d,5
+	xor	r14d,ebx
+	psrlq	xmm6,2
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[28+rsp]
+	pxor	xmm7,xmm6
+	mov	edi,ebx
+	xor	r12d,r11d
+	ror	r14d,11
+	pshufd	xmm7,xmm7,8
+	xor	edi,ecx
+	add	eax,r12d
+	movdqa	xmm6,XMMWORD[32+rbp]
+	ror	r13d,6
+	and	r15d,edi
+	pslldq	xmm7,8
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	paddd	xmm1,xmm7
+	ror	r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	paddd	xmm6,xmm1
+	mov	r13d,r8d
+	add	r14d,eax
+	movdqa	XMMWORD[16+rsp],xmm6
+	ror	r13d,14
+	movdqa	xmm4,xmm3
+	mov	eax,r14d
+	mov	r12d,r9d
+	movdqa	xmm7,xmm1
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	ror	r13d,5
+	xor	r14d,eax
+DB	102,15,58,15,226,4
+	and	r12d,r8d
+	xor	r13d,r8d
+DB	102,15,58,15,248,4
+	add	r11d,DWORD[32+rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	ror	r14d,11
+	movdqa	xmm5,xmm4
+	xor	r15d,ebx
+	add	r11d,r12d
+	movdqa	xmm6,xmm4
+	ror	r13d,6
+	and	edi,r15d
+	psrld	xmm4,3
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	paddd	xmm2,xmm7
+	ror	r14d,2
+	add	edx,r11d
+	psrld	xmm6,7
+	add	r11d,edi
+	mov	r13d,edx
+	pshufd	xmm7,xmm1,250
+	add	r14d,r11d
+	ror	r13d,14
+	pslld	xmm5,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	pxor	xmm4,xmm6
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	ror	r13d,5
+	psrld	xmm6,11
+	xor	r14d,r11d
+	pxor	xmm4,xmm5
+	and	r12d,edx
+	xor	r13d,edx
+	pslld	xmm5,11
+	add	r10d,DWORD[36+rsp]
+	mov	edi,r11d
+	pxor	xmm4,xmm6
+	xor	r12d,r9d
+	ror	r14d,11
+	movdqa	xmm6,xmm7
+	xor	edi,eax
+	add	r10d,r12d
+	pxor	xmm4,xmm5
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	psrld	xmm7,10
+	add	r10d,r13d
+	xor	r15d,eax
+	paddd	xmm2,xmm4
+	ror	r14d,2
+	add	ecx,r10d
+	psrlq	xmm6,17
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	pxor	xmm7,xmm6
+	ror	r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	ror	r14d,9
+	psrlq	xmm6,2
+	xor	r13d,ecx
+	xor	r12d,r8d
+	pxor	xmm7,xmm6
+	ror	r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	pshufd	xmm7,xmm7,128
+	xor	r13d,ecx
+	add	r9d,DWORD[40+rsp]
+	mov	r15d,r10d
+	psrldq	xmm7,8
+	xor	r12d,r8d
+	ror	r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	ror	r13d,6
+	paddd	xmm2,xmm7
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	pshufd	xmm7,xmm2,80
+	xor	edi,r11d
+	ror	r14d,2
+	add	ebx,r9d
+	movdqa	xmm6,xmm7
+	add	r9d,edi
+	mov	r13d,ebx
+	psrld	xmm7,10
+	add	r14d,r9d
+	ror	r13d,14
+	psrlq	xmm6,17
+	mov	r9d,r14d
+	mov	r12d,ecx
+	pxor	xmm7,xmm6
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	ror	r13d,5
+	xor	r14d,r9d
+	psrlq	xmm6,2
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[44+rsp]
+	pxor	xmm7,xmm6
+	mov	edi,r9d
+	xor	r12d,edx
+	ror	r14d,11
+	pshufd	xmm7,xmm7,8
+	xor	edi,r10d
+	add	r8d,r12d
+	movdqa	xmm6,XMMWORD[64+rbp]
+	ror	r13d,6
+	and	r15d,edi
+	pslldq	xmm7,8
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	paddd	xmm2,xmm7
+	ror	r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	paddd	xmm6,xmm2
+	mov	r13d,eax
+	add	r14d,r8d
+	movdqa	XMMWORD[32+rsp],xmm6
+	ror	r13d,14
+	movdqa	xmm4,xmm0
+	mov	r8d,r14d
+	mov	r12d,ebx
+	movdqa	xmm7,xmm2
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	ror	r13d,5
+	xor	r14d,r8d
+DB	102,15,58,15,227,4
+	and	r12d,eax
+	xor	r13d,eax
+DB	102,15,58,15,249,4
+	add	edx,DWORD[48+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	ror	r14d,11
+	movdqa	xmm5,xmm4
+	xor	r15d,r9d
+	add	edx,r12d
+	movdqa	xmm6,xmm4
+	ror	r13d,6
+	and	edi,r15d
+	psrld	xmm4,3
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	paddd	xmm3,xmm7
+	ror	r14d,2
+	add	r11d,edx
+	psrld	xmm6,7
+	add	edx,edi
+	mov	r13d,r11d
+	pshufd	xmm7,xmm2,250
+	add	r14d,edx
+	ror	r13d,14
+	pslld	xmm5,14
+	mov	edx,r14d
+	mov	r12d,eax
+	pxor	xmm4,xmm6
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	ror	r13d,5
+	psrld	xmm6,11
+	xor	r14d,edx
+	pxor	xmm4,xmm5
+	and	r12d,r11d
+	xor	r13d,r11d
+	pslld	xmm5,11
+	add	ecx,DWORD[52+rsp]
+	mov	edi,edx
+	pxor	xmm4,xmm6
+	xor	r12d,ebx
+	ror	r14d,11
+	movdqa	xmm6,xmm7
+	xor	edi,r8d
+	add	ecx,r12d
+	pxor	xmm4,xmm5
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	psrld	xmm7,10
+	add	ecx,r13d
+	xor	r15d,r8d
+	paddd	xmm3,xmm4
+	ror	r14d,2
+	add	r10d,ecx
+	psrlq	xmm6,17
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	pxor	xmm7,xmm6
+	ror	r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	ror	r14d,9
+	psrlq	xmm6,2
+	xor	r13d,r10d
+	xor	r12d,eax
+	pxor	xmm7,xmm6
+	ror	r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	pshufd	xmm7,xmm7,128
+	xor	r13d,r10d
+	add	ebx,DWORD[56+rsp]
+	mov	r15d,ecx
+	psrldq	xmm7,8
+	xor	r12d,eax
+	ror	r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	ror	r13d,6
+	paddd	xmm3,xmm7
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	pshufd	xmm7,xmm3,80
+	xor	edi,edx
+	ror	r14d,2
+	add	r9d,ebx
+	movdqa	xmm6,xmm7
+	add	ebx,edi
+	mov	r13d,r9d
+	psrld	xmm7,10
+	add	r14d,ebx
+	ror	r13d,14
+	psrlq	xmm6,17
+	mov	ebx,r14d
+	mov	r12d,r10d
+	pxor	xmm7,xmm6
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	ror	r13d,5
+	xor	r14d,ebx
+	psrlq	xmm6,2
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[60+rsp]
+	pxor	xmm7,xmm6
+	mov	edi,ebx
+	xor	r12d,r11d
+	ror	r14d,11
+	pshufd	xmm7,xmm7,8
+	xor	edi,ecx
+	add	eax,r12d
+	movdqa	xmm6,XMMWORD[96+rbp]
+	ror	r13d,6
+	and	r15d,edi
+	pslldq	xmm7,8
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	paddd	xmm3,xmm7
+	ror	r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	paddd	xmm6,xmm3
+	mov	r13d,r8d
+	add	r14d,eax
+	movdqa	XMMWORD[48+rsp],xmm6
+	cmp	BYTE[131+rbp],0
+	jne	NEAR $L$ssse3_00_47
+	ror	r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	ror	r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	xor	r13d,r8d
+	add	r11d,DWORD[rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	ror	r14d,11
+	xor	r15d,ebx
+	add	r11d,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	ror	r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	mov	r13d,edx
+	add	r14d,r11d
+	ror	r13d,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	ror	r13d,5
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	add	r10d,DWORD[4+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	ror	r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	add	r10d,r13d
+	xor	r15d,eax
+	ror	r14d,2
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	ror	r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	ror	r14d,9
+	xor	r13d,ecx
+	xor	r12d,r8d
+	ror	r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[8+rsp]
+	mov	r15d,r10d
+	xor	r12d,r8d
+	ror	r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	xor	edi,r11d
+	ror	r14d,2
+	add	ebx,r9d
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	ror	r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	ror	r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[12+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	ror	r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	ror	r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	ror	r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	ror	r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	xor	r13d,eax
+	add	edx,DWORD[16+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	ror	r14d,11
+	xor	r15d,r9d
+	add	edx,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	ror	r14d,2
+	add	r11d,edx
+	add	edx,edi
+	mov	r13d,r11d
+	add	r14d,edx
+	ror	r13d,14
+	mov	edx,r14d
+	mov	r12d,eax
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	ror	r13d,5
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	add	ecx,DWORD[20+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	ror	r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	add	ecx,r13d
+	xor	r15d,r8d
+	ror	r14d,2
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	ror	r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	ror	r14d,9
+	xor	r13d,r10d
+	xor	r12d,eax
+	ror	r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[24+rsp]
+	mov	r15d,ecx
+	xor	r12d,eax
+	ror	r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	xor	edi,edx
+	ror	r14d,2
+	add	r9d,ebx
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	ror	r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	ror	r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[28+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	ror	r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	ror	r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	ror	r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	ror	r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	xor	r13d,r8d
+	add	r11d,DWORD[32+rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	ror	r14d,11
+	xor	r15d,ebx
+	add	r11d,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	ror	r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	mov	r13d,edx
+	add	r14d,r11d
+	ror	r13d,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	ror	r13d,5
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	add	r10d,DWORD[36+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	ror	r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	add	r10d,r13d
+	xor	r15d,eax
+	ror	r14d,2
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	ror	r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	ror	r14d,9
+	xor	r13d,ecx
+	xor	r12d,r8d
+	ror	r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[40+rsp]
+	mov	r15d,r10d
+	xor	r12d,r8d
+	ror	r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	xor	edi,r11d
+	ror	r14d,2
+	add	ebx,r9d
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	ror	r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	ror	r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[44+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	ror	r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	ror	r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	ror	r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	ror	r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	xor	r13d,eax
+	add	edx,DWORD[48+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	ror	r14d,11
+	xor	r15d,r9d
+	add	edx,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	ror	r14d,2
+	add	r11d,edx
+	add	edx,edi
+	mov	r13d,r11d
+	add	r14d,edx
+	ror	r13d,14
+	mov	edx,r14d
+	mov	r12d,eax
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	ror	r13d,5
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	add	ecx,DWORD[52+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	ror	r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	add	ecx,r13d
+	xor	r15d,r8d
+	ror	r14d,2
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	ror	r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	ror	r14d,9
+	xor	r13d,r10d
+	xor	r12d,eax
+	ror	r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[56+rsp]
+	mov	r15d,ecx
+	xor	r12d,eax
+	ror	r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	xor	edi,edx
+	ror	r14d,2
+	add	r9d,ebx
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	ror	r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	ror	r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[60+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	ror	r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	ror	r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	mov	rdi,QWORD[((64+0))+rsp]
+	mov	eax,r14d
+
+	add	eax,DWORD[rdi]
+	lea	rsi,[64+rsi]
+	add	ebx,DWORD[4+rdi]
+	add	ecx,DWORD[8+rdi]
+	add	edx,DWORD[12+rdi]
+	add	r8d,DWORD[16+rdi]
+	add	r9d,DWORD[20+rdi]
+	add	r10d,DWORD[24+rdi]
+	add	r11d,DWORD[28+rdi]
+
+	cmp	rsi,QWORD[((64+16))+rsp]
+
+	mov	DWORD[rdi],eax
+	mov	DWORD[4+rdi],ebx
+	mov	DWORD[8+rdi],ecx
+	mov	DWORD[12+rdi],edx
+	mov	DWORD[16+rdi],r8d
+	mov	DWORD[20+rdi],r9d
+	mov	DWORD[24+rdi],r10d
+	mov	DWORD[28+rdi],r11d
+	jb	NEAR $L$loop_ssse3
+
+	mov	rsi,QWORD[((64+24))+rsp]
+	movaps	xmm6,XMMWORD[((64+32))+rsp]
+	movaps	xmm7,XMMWORD[((64+48))+rsp]
+	movaps	xmm8,XMMWORD[((64+64))+rsp]
+	movaps	xmm9,XMMWORD[((64+80))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$epilogue_ssse3:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha256_block_data_order_ssse3:
+
+ALIGN	64
+sha256_block_data_order_avx:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha256_block_data_order_avx:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+$L$avx_shortcut:
+	mov	rax,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	shl	rdx,4
+	sub	rsp,160
+	lea	rdx,[rdx*4+rsi]
+	and	rsp,-64
+	mov	QWORD[((64+0))+rsp],rdi
+	mov	QWORD[((64+8))+rsp],rsi
+	mov	QWORD[((64+16))+rsp],rdx
+	mov	QWORD[((64+24))+rsp],rax
+	movaps	XMMWORD[(64+32)+rsp],xmm6
+	movaps	XMMWORD[(64+48)+rsp],xmm7
+	movaps	XMMWORD[(64+64)+rsp],xmm8
+	movaps	XMMWORD[(64+80)+rsp],xmm9
+$L$prologue_avx:
+
+	vzeroupper
+	mov	eax,DWORD[rdi]
+	mov	ebx,DWORD[4+rdi]
+	mov	ecx,DWORD[8+rdi]
+	mov	edx,DWORD[12+rdi]
+	mov	r8d,DWORD[16+rdi]
+	mov	r9d,DWORD[20+rdi]
+	mov	r10d,DWORD[24+rdi]
+	mov	r11d,DWORD[28+rdi]
+	vmovdqa	xmm8,XMMWORD[((K256+512+32))]
+	vmovdqa	xmm9,XMMWORD[((K256+512+64))]
+	jmp	NEAR $L$loop_avx
+ALIGN	16
+$L$loop_avx:
+	vmovdqa	xmm7,XMMWORD[((K256+512))]
+	vmovdqu	xmm0,XMMWORD[rsi]
+	vmovdqu	xmm1,XMMWORD[16+rsi]
+	vmovdqu	xmm2,XMMWORD[32+rsi]
+	vmovdqu	xmm3,XMMWORD[48+rsi]
+	vpshufb	xmm0,xmm0,xmm7
+	lea	rbp,[K256]
+	vpshufb	xmm1,xmm1,xmm7
+	vpshufb	xmm2,xmm2,xmm7
+	vpaddd	xmm4,xmm0,XMMWORD[rbp]
+	vpshufb	xmm3,xmm3,xmm7
+	vpaddd	xmm5,xmm1,XMMWORD[32+rbp]
+	vpaddd	xmm6,xmm2,XMMWORD[64+rbp]
+	vpaddd	xmm7,xmm3,XMMWORD[96+rbp]
+	vmovdqa	XMMWORD[rsp],xmm4
+	mov	r14d,eax
+	vmovdqa	XMMWORD[16+rsp],xmm5
+	mov	edi,ebx
+	vmovdqa	XMMWORD[32+rsp],xmm6
+	xor	edi,ecx
+	vmovdqa	XMMWORD[48+rsp],xmm7
+	mov	r13d,r8d
+	jmp	NEAR $L$avx_00_47
+
+ALIGN	16
+$L$avx_00_47:
+	sub	rbp,-128
+	vpalignr	xmm4,xmm1,xmm0,4
+	shrd	r13d,r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	vpalignr	xmm7,xmm3,xmm2,4
+	shrd	r14d,r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	vpsrld	xmm6,xmm4,7
+	shrd	r13d,r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	vpaddd	xmm0,xmm0,xmm7
+	xor	r13d,r8d
+	add	r11d,DWORD[rsp]
+	mov	r15d,eax
+	vpsrld	xmm7,xmm4,3
+	xor	r12d,r10d
+	shrd	r14d,r14d,11
+	xor	r15d,ebx
+	vpslld	xmm5,xmm4,14
+	add	r11d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	vpxor	xmm4,xmm7,xmm6
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	vpshufd	xmm7,xmm3,250
+	shrd	r14d,r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	vpsrld	xmm6,xmm6,11
+	mov	r13d,edx
+	add	r14d,r11d
+	shrd	r13d,r13d,14
+	vpxor	xmm4,xmm4,xmm5
+	mov	r11d,r14d
+	mov	r12d,r8d
+	shrd	r14d,r14d,9
+	vpslld	xmm5,xmm5,11
+	xor	r13d,edx
+	xor	r12d,r9d
+	shrd	r13d,r13d,5
+	vpxor	xmm4,xmm4,xmm6
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	vpsrld	xmm6,xmm7,10
+	add	r10d,DWORD[4+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	vpxor	xmm4,xmm4,xmm5
+	shrd	r14d,r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	vpsrlq	xmm7,xmm7,17
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	vpaddd	xmm0,xmm0,xmm4
+	add	r10d,r13d
+	xor	r15d,eax
+	shrd	r14d,r14d,2
+	vpxor	xmm6,xmm6,xmm7
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	vpsrlq	xmm7,xmm7,2
+	add	r14d,r10d
+	shrd	r13d,r13d,14
+	mov	r10d,r14d
+	vpxor	xmm6,xmm6,xmm7
+	mov	r12d,edx
+	shrd	r14d,r14d,9
+	xor	r13d,ecx
+	vpshufb	xmm6,xmm6,xmm8
+	xor	r12d,r8d
+	shrd	r13d,r13d,5
+	xor	r14d,r10d
+	vpaddd	xmm0,xmm0,xmm6
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[8+rsp]
+	vpshufd	xmm7,xmm0,80
+	mov	r15d,r10d
+	xor	r12d,r8d
+	shrd	r14d,r14d,11
+	vpsrld	xmm6,xmm7,10
+	xor	r15d,r11d
+	add	r9d,r12d
+	shrd	r13d,r13d,6
+	vpsrlq	xmm7,xmm7,17
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	vpxor	xmm6,xmm6,xmm7
+	xor	edi,r11d
+	shrd	r14d,r14d,2
+	add	ebx,r9d
+	vpsrlq	xmm7,xmm7,2
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	vpxor	xmm6,xmm6,xmm7
+	shrd	r13d,r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	vpshufb	xmm6,xmm6,xmm9
+	shrd	r14d,r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	vpaddd	xmm0,xmm0,xmm6
+	shrd	r13d,r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	vpaddd	xmm6,xmm0,XMMWORD[rbp]
+	xor	r13d,ebx
+	add	r8d,DWORD[12+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	shrd	r14d,r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	shrd	r14d,r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	vmovdqa	XMMWORD[rsp],xmm6
+	vpalignr	xmm4,xmm2,xmm1,4
+	shrd	r13d,r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	vpalignr	xmm7,xmm0,xmm3,4
+	shrd	r14d,r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	vpsrld	xmm6,xmm4,7
+	shrd	r13d,r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	vpaddd	xmm1,xmm1,xmm7
+	xor	r13d,eax
+	add	edx,DWORD[16+rsp]
+	mov	r15d,r8d
+	vpsrld	xmm7,xmm4,3
+	xor	r12d,ecx
+	shrd	r14d,r14d,11
+	xor	r15d,r9d
+	vpslld	xmm5,xmm4,14
+	add	edx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	vpxor	xmm4,xmm7,xmm6
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	vpshufd	xmm7,xmm0,250
+	shrd	r14d,r14d,2
+	add	r11d,edx
+	add	edx,edi
+	vpsrld	xmm6,xmm6,11
+	mov	r13d,r11d
+	add	r14d,edx
+	shrd	r13d,r13d,14
+	vpxor	xmm4,xmm4,xmm5
+	mov	edx,r14d
+	mov	r12d,eax
+	shrd	r14d,r14d,9
+	vpslld	xmm5,xmm5,11
+	xor	r13d,r11d
+	xor	r12d,ebx
+	shrd	r13d,r13d,5
+	vpxor	xmm4,xmm4,xmm6
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	vpsrld	xmm6,xmm7,10
+	add	ecx,DWORD[20+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	vpxor	xmm4,xmm4,xmm5
+	shrd	r14d,r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	vpsrlq	xmm7,xmm7,17
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	vpaddd	xmm1,xmm1,xmm4
+	add	ecx,r13d
+	xor	r15d,r8d
+	shrd	r14d,r14d,2
+	vpxor	xmm6,xmm6,xmm7
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	vpsrlq	xmm7,xmm7,2
+	add	r14d,ecx
+	shrd	r13d,r13d,14
+	mov	ecx,r14d
+	vpxor	xmm6,xmm6,xmm7
+	mov	r12d,r11d
+	shrd	r14d,r14d,9
+	xor	r13d,r10d
+	vpshufb	xmm6,xmm6,xmm8
+	xor	r12d,eax
+	shrd	r13d,r13d,5
+	xor	r14d,ecx
+	vpaddd	xmm1,xmm1,xmm6
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[24+rsp]
+	vpshufd	xmm7,xmm1,80
+	mov	r15d,ecx
+	xor	r12d,eax
+	shrd	r14d,r14d,11
+	vpsrld	xmm6,xmm7,10
+	xor	r15d,edx
+	add	ebx,r12d
+	shrd	r13d,r13d,6
+	vpsrlq	xmm7,xmm7,17
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	vpxor	xmm6,xmm6,xmm7
+	xor	edi,edx
+	shrd	r14d,r14d,2
+	add	r9d,ebx
+	vpsrlq	xmm7,xmm7,2
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	vpxor	xmm6,xmm6,xmm7
+	shrd	r13d,r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	vpshufb	xmm6,xmm6,xmm9
+	shrd	r14d,r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	vpaddd	xmm1,xmm1,xmm6
+	shrd	r13d,r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	vpaddd	xmm6,xmm1,XMMWORD[32+rbp]
+	xor	r13d,r9d
+	add	eax,DWORD[28+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	shrd	r14d,r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	shrd	r14d,r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	vmovdqa	XMMWORD[16+rsp],xmm6
+	vpalignr	xmm4,xmm3,xmm2,4
+	shrd	r13d,r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	vpalignr	xmm7,xmm1,xmm0,4
+	shrd	r14d,r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	vpsrld	xmm6,xmm4,7
+	shrd	r13d,r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	vpaddd	xmm2,xmm2,xmm7
+	xor	r13d,r8d
+	add	r11d,DWORD[32+rsp]
+	mov	r15d,eax
+	vpsrld	xmm7,xmm4,3
+	xor	r12d,r10d
+	shrd	r14d,r14d,11
+	xor	r15d,ebx
+	vpslld	xmm5,xmm4,14
+	add	r11d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	vpxor	xmm4,xmm7,xmm6
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	vpshufd	xmm7,xmm1,250
+	shrd	r14d,r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	vpsrld	xmm6,xmm6,11
+	mov	r13d,edx
+	add	r14d,r11d
+	shrd	r13d,r13d,14
+	vpxor	xmm4,xmm4,xmm5
+	mov	r11d,r14d
+	mov	r12d,r8d
+	shrd	r14d,r14d,9
+	vpslld	xmm5,xmm5,11
+	xor	r13d,edx
+	xor	r12d,r9d
+	shrd	r13d,r13d,5
+	vpxor	xmm4,xmm4,xmm6
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	vpsrld	xmm6,xmm7,10
+	add	r10d,DWORD[36+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	vpxor	xmm4,xmm4,xmm5
+	shrd	r14d,r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	vpsrlq	xmm7,xmm7,17
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	vpaddd	xmm2,xmm2,xmm4
+	add	r10d,r13d
+	xor	r15d,eax
+	shrd	r14d,r14d,2
+	vpxor	xmm6,xmm6,xmm7
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	vpsrlq	xmm7,xmm7,2
+	add	r14d,r10d
+	shrd	r13d,r13d,14
+	mov	r10d,r14d
+	vpxor	xmm6,xmm6,xmm7
+	mov	r12d,edx
+	shrd	r14d,r14d,9
+	xor	r13d,ecx
+	vpshufb	xmm6,xmm6,xmm8
+	xor	r12d,r8d
+	shrd	r13d,r13d,5
+	xor	r14d,r10d
+	vpaddd	xmm2,xmm2,xmm6
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[40+rsp]
+	vpshufd	xmm7,xmm2,80
+	mov	r15d,r10d
+	xor	r12d,r8d
+	shrd	r14d,r14d,11
+	vpsrld	xmm6,xmm7,10
+	xor	r15d,r11d
+	add	r9d,r12d
+	shrd	r13d,r13d,6
+	vpsrlq	xmm7,xmm7,17
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	vpxor	xmm6,xmm6,xmm7
+	xor	edi,r11d
+	shrd	r14d,r14d,2
+	add	ebx,r9d
+	vpsrlq	xmm7,xmm7,2
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	vpxor	xmm6,xmm6,xmm7
+	shrd	r13d,r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	vpshufb	xmm6,xmm6,xmm9
+	shrd	r14d,r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	vpaddd	xmm2,xmm2,xmm6
+	shrd	r13d,r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	vpaddd	xmm6,xmm2,XMMWORD[64+rbp]
+	xor	r13d,ebx
+	add	r8d,DWORD[44+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	shrd	r14d,r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	shrd	r14d,r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	vmovdqa	XMMWORD[32+rsp],xmm6
+	vpalignr	xmm4,xmm0,xmm3,4
+	shrd	r13d,r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	vpalignr	xmm7,xmm2,xmm1,4
+	shrd	r14d,r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	vpsrld	xmm6,xmm4,7
+	shrd	r13d,r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	vpaddd	xmm3,xmm3,xmm7
+	xor	r13d,eax
+	add	edx,DWORD[48+rsp]
+	mov	r15d,r8d
+	vpsrld	xmm7,xmm4,3
+	xor	r12d,ecx
+	shrd	r14d,r14d,11
+	xor	r15d,r9d
+	vpslld	xmm5,xmm4,14
+	add	edx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	vpxor	xmm4,xmm7,xmm6
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	vpshufd	xmm7,xmm2,250
+	shrd	r14d,r14d,2
+	add	r11d,edx
+	add	edx,edi
+	vpsrld	xmm6,xmm6,11
+	mov	r13d,r11d
+	add	r14d,edx
+	shrd	r13d,r13d,14
+	vpxor	xmm4,xmm4,xmm5
+	mov	edx,r14d
+	mov	r12d,eax
+	shrd	r14d,r14d,9
+	vpslld	xmm5,xmm5,11
+	xor	r13d,r11d
+	xor	r12d,ebx
+	shrd	r13d,r13d,5
+	vpxor	xmm4,xmm4,xmm6
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	vpsrld	xmm6,xmm7,10
+	add	ecx,DWORD[52+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	vpxor	xmm4,xmm4,xmm5
+	shrd	r14d,r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	vpsrlq	xmm7,xmm7,17
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	vpaddd	xmm3,xmm3,xmm4
+	add	ecx,r13d
+	xor	r15d,r8d
+	shrd	r14d,r14d,2
+	vpxor	xmm6,xmm6,xmm7
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	vpsrlq	xmm7,xmm7,2
+	add	r14d,ecx
+	shrd	r13d,r13d,14
+	mov	ecx,r14d
+	vpxor	xmm6,xmm6,xmm7
+	mov	r12d,r11d
+	shrd	r14d,r14d,9
+	xor	r13d,r10d
+	vpshufb	xmm6,xmm6,xmm8
+	xor	r12d,eax
+	shrd	r13d,r13d,5
+	xor	r14d,ecx
+	vpaddd	xmm3,xmm3,xmm6
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[56+rsp]
+	vpshufd	xmm7,xmm3,80
+	mov	r15d,ecx
+	xor	r12d,eax
+	shrd	r14d,r14d,11
+	vpsrld	xmm6,xmm7,10
+	xor	r15d,edx
+	add	ebx,r12d
+	shrd	r13d,r13d,6
+	vpsrlq	xmm7,xmm7,17
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	vpxor	xmm6,xmm6,xmm7
+	xor	edi,edx
+	shrd	r14d,r14d,2
+	add	r9d,ebx
+	vpsrlq	xmm7,xmm7,2
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	vpxor	xmm6,xmm6,xmm7
+	shrd	r13d,r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	vpshufb	xmm6,xmm6,xmm9
+	shrd	r14d,r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	vpaddd	xmm3,xmm3,xmm6
+	shrd	r13d,r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	vpaddd	xmm6,xmm3,XMMWORD[96+rbp]
+	xor	r13d,r9d
+	add	eax,DWORD[60+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	shrd	r14d,r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	shrd	r14d,r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	vmovdqa	XMMWORD[48+rsp],xmm6
+	cmp	BYTE[131+rbp],0
+	jne	NEAR $L$avx_00_47
+	shrd	r13d,r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	shrd	r14d,r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	shrd	r13d,r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	xor	r13d,r8d
+	add	r11d,DWORD[rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	shrd	r14d,r14d,11
+	xor	r15d,ebx
+	add	r11d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	shrd	r14d,r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	mov	r13d,edx
+	add	r14d,r11d
+	shrd	r13d,r13d,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	shrd	r14d,r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	shrd	r13d,r13d,5
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	add	r10d,DWORD[4+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	shrd	r14d,r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	add	r10d,r13d
+	xor	r15d,eax
+	shrd	r14d,r14d,2
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	shrd	r13d,r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	shrd	r14d,r14d,9
+	xor	r13d,ecx
+	xor	r12d,r8d
+	shrd	r13d,r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[8+rsp]
+	mov	r15d,r10d
+	xor	r12d,r8d
+	shrd	r14d,r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	xor	edi,r11d
+	shrd	r14d,r14d,2
+	add	ebx,r9d
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	shrd	r13d,r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	shrd	r14d,r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	shrd	r13d,r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[12+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	shrd	r14d,r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	shrd	r14d,r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	shrd	r13d,r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	shrd	r14d,r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	shrd	r13d,r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	xor	r13d,eax
+	add	edx,DWORD[16+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	shrd	r14d,r14d,11
+	xor	r15d,r9d
+	add	edx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	shrd	r14d,r14d,2
+	add	r11d,edx
+	add	edx,edi
+	mov	r13d,r11d
+	add	r14d,edx
+	shrd	r13d,r13d,14
+	mov	edx,r14d
+	mov	r12d,eax
+	shrd	r14d,r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	shrd	r13d,r13d,5
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	add	ecx,DWORD[20+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	shrd	r14d,r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	add	ecx,r13d
+	xor	r15d,r8d
+	shrd	r14d,r14d,2
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	shrd	r13d,r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	shrd	r14d,r14d,9
+	xor	r13d,r10d
+	xor	r12d,eax
+	shrd	r13d,r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[24+rsp]
+	mov	r15d,ecx
+	xor	r12d,eax
+	shrd	r14d,r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	xor	edi,edx
+	shrd	r14d,r14d,2
+	add	r9d,ebx
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	shrd	r13d,r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	shrd	r14d,r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	shrd	r13d,r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[28+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	shrd	r14d,r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	shrd	r14d,r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	shrd	r13d,r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	shrd	r14d,r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	shrd	r13d,r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	xor	r13d,r8d
+	add	r11d,DWORD[32+rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	shrd	r14d,r14d,11
+	xor	r15d,ebx
+	add	r11d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	shrd	r14d,r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	mov	r13d,edx
+	add	r14d,r11d
+	shrd	r13d,r13d,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	shrd	r14d,r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	shrd	r13d,r13d,5
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	add	r10d,DWORD[36+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	shrd	r14d,r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	add	r10d,r13d
+	xor	r15d,eax
+	shrd	r14d,r14d,2
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	shrd	r13d,r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	shrd	r14d,r14d,9
+	xor	r13d,ecx
+	xor	r12d,r8d
+	shrd	r13d,r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[40+rsp]
+	mov	r15d,r10d
+	xor	r12d,r8d
+	shrd	r14d,r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	xor	edi,r11d
+	shrd	r14d,r14d,2
+	add	ebx,r9d
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	shrd	r13d,r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	shrd	r14d,r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	shrd	r13d,r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[44+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	shrd	r14d,r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	shrd	r14d,r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	shrd	r13d,r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	shrd	r14d,r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	shrd	r13d,r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	xor	r13d,eax
+	add	edx,DWORD[48+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	shrd	r14d,r14d,11
+	xor	r15d,r9d
+	add	edx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	shrd	r14d,r14d,2
+	add	r11d,edx
+	add	edx,edi
+	mov	r13d,r11d
+	add	r14d,edx
+	shrd	r13d,r13d,14
+	mov	edx,r14d
+	mov	r12d,eax
+	shrd	r14d,r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	shrd	r13d,r13d,5
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	add	ecx,DWORD[52+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	shrd	r14d,r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	add	ecx,r13d
+	xor	r15d,r8d
+	shrd	r14d,r14d,2
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	shrd	r13d,r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	shrd	r14d,r14d,9
+	xor	r13d,r10d
+	xor	r12d,eax
+	shrd	r13d,r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[56+rsp]
+	mov	r15d,ecx
+	xor	r12d,eax
+	shrd	r14d,r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	xor	edi,edx
+	shrd	r14d,r14d,2
+	add	r9d,ebx
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	shrd	r13d,r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	shrd	r14d,r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	shrd	r13d,r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[60+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	shrd	r14d,r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	shrd	r14d,r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	mov	rdi,QWORD[((64+0))+rsp]
+	mov	eax,r14d
+
+	add	eax,DWORD[rdi]
+	lea	rsi,[64+rsi]
+	add	ebx,DWORD[4+rdi]
+	add	ecx,DWORD[8+rdi]
+	add	edx,DWORD[12+rdi]
+	add	r8d,DWORD[16+rdi]
+	add	r9d,DWORD[20+rdi]
+	add	r10d,DWORD[24+rdi]
+	add	r11d,DWORD[28+rdi]
+
+	cmp	rsi,QWORD[((64+16))+rsp]
+
+	mov	DWORD[rdi],eax
+	mov	DWORD[4+rdi],ebx
+	mov	DWORD[8+rdi],ecx
+	mov	DWORD[12+rdi],edx
+	mov	DWORD[16+rdi],r8d
+	mov	DWORD[20+rdi],r9d
+	mov	DWORD[24+rdi],r10d
+	mov	DWORD[28+rdi],r11d
+	jb	NEAR $L$loop_avx
+
+	mov	rsi,QWORD[((64+24))+rsp]
+	vzeroupper
+	movaps	xmm6,XMMWORD[((64+32))+rsp]
+	movaps	xmm7,XMMWORD[((64+48))+rsp]
+	movaps	xmm8,XMMWORD[((64+64))+rsp]
+	movaps	xmm9,XMMWORD[((64+80))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$epilogue_avx:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha256_block_data_order_avx:
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+	mov	rsi,rax
+	mov	rax,QWORD[((64+24))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+	lea	r10,[$L$epilogue]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	lea	rsi,[((64+32))+rsi]
+	lea	rdi,[512+r8]
+	mov	ecx,8
+	DD	0xa548f3fc
+
+$L$in_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_sha256_block_data_order wrt ..imagebase
+	DD	$L$SEH_end_sha256_block_data_order wrt ..imagebase
+	DD	$L$SEH_info_sha256_block_data_order wrt ..imagebase
+	DD	$L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_end_sha256_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_info_sha256_block_data_order_avx wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_sha256_block_data_order:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
+$L$SEH_info_sha256_block_data_order_ssse3:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase
+$L$SEH_info_sha256_block_data_order_avx:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/sha512-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/sha512-x86_64.asm
new file mode 100644
index 0000000..d0d7a43
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/sha512-x86_64.asm
@@ -0,0 +1,4211 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+EXTERN	OPENSSL_ia32cap_P
+global	sha512_block_data_order
+
+ALIGN	16
+sha512_block_data_order:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha512_block_data_order:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	lea	r11,[OPENSSL_ia32cap_P]
+	mov	r9d,DWORD[r11]
+	mov	r10d,DWORD[4+r11]
+	mov	r11d,DWORD[8+r11]
+	test	r10d,2048
+	jnz	NEAR $L$xop_shortcut
+	and	r9d,1073741824
+	and	r10d,268435968
+	or	r10d,r9d
+	cmp	r10d,1342177792
+	je	NEAR $L$avx_shortcut
+	mov	rax,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	shl	rdx,4
+	sub	rsp,16*8+4*8
+	lea	rdx,[rdx*8+rsi]
+	and	rsp,-64
+	mov	QWORD[((128+0))+rsp],rdi
+	mov	QWORD[((128+8))+rsp],rsi
+	mov	QWORD[((128+16))+rsp],rdx
+	mov	QWORD[((128+24))+rsp],rax
+$L$prologue:
+
+	mov	rax,QWORD[rdi]
+	mov	rbx,QWORD[8+rdi]
+	mov	rcx,QWORD[16+rdi]
+	mov	rdx,QWORD[24+rdi]
+	mov	r8,QWORD[32+rdi]
+	mov	r9,QWORD[40+rdi]
+	mov	r10,QWORD[48+rdi]
+	mov	r11,QWORD[56+rdi]
+	jmp	NEAR $L$loop
+
+ALIGN	16
+$L$loop:
+	mov	rdi,rbx
+	lea	rbp,[K512]
+	xor	rdi,rcx
+	mov	r12,QWORD[rsi]
+	mov	r13,r8
+	mov	r14,rax
+	bswap	r12
+	ror	r13,23
+	mov	r15,r9
+
+	xor	r13,r8
+	ror	r14,5
+	xor	r15,r10
+
+	mov	QWORD[rsp],r12
+	xor	r14,rax
+	and	r15,r8
+
+	ror	r13,4
+	add	r12,r11
+	xor	r15,r10
+
+	ror	r14,6
+	xor	r13,r8
+	add	r12,r15
+
+	mov	r15,rax
+	add	r12,QWORD[rbp]
+	xor	r14,rax
+
+	xor	r15,rbx
+	ror	r13,14
+	mov	r11,rbx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r11,rdi
+	add	rdx,r12
+	add	r11,r12
+
+	lea	rbp,[8+rbp]
+	add	r11,r14
+	mov	r12,QWORD[8+rsi]
+	mov	r13,rdx
+	mov	r14,r11
+	bswap	r12
+	ror	r13,23
+	mov	rdi,r8
+
+	xor	r13,rdx
+	ror	r14,5
+	xor	rdi,r9
+
+	mov	QWORD[8+rsp],r12
+	xor	r14,r11
+	and	rdi,rdx
+
+	ror	r13,4
+	add	r12,r10
+	xor	rdi,r9
+
+	ror	r14,6
+	xor	r13,rdx
+	add	r12,rdi
+
+	mov	rdi,r11
+	add	r12,QWORD[rbp]
+	xor	r14,r11
+
+	xor	rdi,rax
+	ror	r13,14
+	mov	r10,rax
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r10,r15
+	add	rcx,r12
+	add	r10,r12
+
+	lea	rbp,[24+rbp]
+	add	r10,r14
+	mov	r12,QWORD[16+rsi]
+	mov	r13,rcx
+	mov	r14,r10
+	bswap	r12
+	ror	r13,23
+	mov	r15,rdx
+
+	xor	r13,rcx
+	ror	r14,5
+	xor	r15,r8
+
+	mov	QWORD[16+rsp],r12
+	xor	r14,r10
+	and	r15,rcx
+
+	ror	r13,4
+	add	r12,r9
+	xor	r15,r8
+
+	ror	r14,6
+	xor	r13,rcx
+	add	r12,r15
+
+	mov	r15,r10
+	add	r12,QWORD[rbp]
+	xor	r14,r10
+
+	xor	r15,r11
+	ror	r13,14
+	mov	r9,r11
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r9,rdi
+	add	rbx,r12
+	add	r9,r12
+
+	lea	rbp,[8+rbp]
+	add	r9,r14
+	mov	r12,QWORD[24+rsi]
+	mov	r13,rbx
+	mov	r14,r9
+	bswap	r12
+	ror	r13,23
+	mov	rdi,rcx
+
+	xor	r13,rbx
+	ror	r14,5
+	xor	rdi,rdx
+
+	mov	QWORD[24+rsp],r12
+	xor	r14,r9
+	and	rdi,rbx
+
+	ror	r13,4
+	add	r12,r8
+	xor	rdi,rdx
+
+	ror	r14,6
+	xor	r13,rbx
+	add	r12,rdi
+
+	mov	rdi,r9
+	add	r12,QWORD[rbp]
+	xor	r14,r9
+
+	xor	rdi,r10
+	ror	r13,14
+	mov	r8,r10
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r8,r15
+	add	rax,r12
+	add	r8,r12
+
+	lea	rbp,[24+rbp]
+	add	r8,r14
+	mov	r12,QWORD[32+rsi]
+	mov	r13,rax
+	mov	r14,r8
+	bswap	r12
+	ror	r13,23
+	mov	r15,rbx
+
+	xor	r13,rax
+	ror	r14,5
+	xor	r15,rcx
+
+	mov	QWORD[32+rsp],r12
+	xor	r14,r8
+	and	r15,rax
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r15,rcx
+
+	ror	r14,6
+	xor	r13,rax
+	add	r12,r15
+
+	mov	r15,r8
+	add	r12,QWORD[rbp]
+	xor	r14,r8
+
+	xor	r15,r9
+	ror	r13,14
+	mov	rdx,r9
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rdx,rdi
+	add	r11,r12
+	add	rdx,r12
+
+	lea	rbp,[8+rbp]
+	add	rdx,r14
+	mov	r12,QWORD[40+rsi]
+	mov	r13,r11
+	mov	r14,rdx
+	bswap	r12
+	ror	r13,23
+	mov	rdi,rax
+
+	xor	r13,r11
+	ror	r14,5
+	xor	rdi,rbx
+
+	mov	QWORD[40+rsp],r12
+	xor	r14,rdx
+	and	rdi,r11
+
+	ror	r13,4
+	add	r12,rcx
+	xor	rdi,rbx
+
+	ror	r14,6
+	xor	r13,r11
+	add	r12,rdi
+
+	mov	rdi,rdx
+	add	r12,QWORD[rbp]
+	xor	r14,rdx
+
+	xor	rdi,r8
+	ror	r13,14
+	mov	rcx,r8
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rcx,r15
+	add	r10,r12
+	add	rcx,r12
+
+	lea	rbp,[24+rbp]
+	add	rcx,r14
+	mov	r12,QWORD[48+rsi]
+	mov	r13,r10
+	mov	r14,rcx
+	bswap	r12
+	ror	r13,23
+	mov	r15,r11
+
+	xor	r13,r10
+	ror	r14,5
+	xor	r15,rax
+
+	mov	QWORD[48+rsp],r12
+	xor	r14,rcx
+	and	r15,r10
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r15,rax
+
+	ror	r14,6
+	xor	r13,r10
+	add	r12,r15
+
+	mov	r15,rcx
+	add	r12,QWORD[rbp]
+	xor	r14,rcx
+
+	xor	r15,rdx
+	ror	r13,14
+	mov	rbx,rdx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rbx,rdi
+	add	r9,r12
+	add	rbx,r12
+
+	lea	rbp,[8+rbp]
+	add	rbx,r14
+	mov	r12,QWORD[56+rsi]
+	mov	r13,r9
+	mov	r14,rbx
+	bswap	r12
+	ror	r13,23
+	mov	rdi,r10
+
+	xor	r13,r9
+	ror	r14,5
+	xor	rdi,r11
+
+	mov	QWORD[56+rsp],r12
+	xor	r14,rbx
+	and	rdi,r9
+
+	ror	r13,4
+	add	r12,rax
+	xor	rdi,r11
+
+	ror	r14,6
+	xor	r13,r9
+	add	r12,rdi
+
+	mov	rdi,rbx
+	add	r12,QWORD[rbp]
+	xor	r14,rbx
+
+	xor	rdi,rcx
+	ror	r13,14
+	mov	rax,rcx
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rax,r15
+	add	r8,r12
+	add	rax,r12
+
+	lea	rbp,[24+rbp]
+	add	rax,r14
+	mov	r12,QWORD[64+rsi]
+	mov	r13,r8
+	mov	r14,rax
+	bswap	r12
+	ror	r13,23
+	mov	r15,r9
+
+	xor	r13,r8
+	ror	r14,5
+	xor	r15,r10
+
+	mov	QWORD[64+rsp],r12
+	xor	r14,rax
+	and	r15,r8
+
+	ror	r13,4
+	add	r12,r11
+	xor	r15,r10
+
+	ror	r14,6
+	xor	r13,r8
+	add	r12,r15
+
+	mov	r15,rax
+	add	r12,QWORD[rbp]
+	xor	r14,rax
+
+	xor	r15,rbx
+	ror	r13,14
+	mov	r11,rbx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r11,rdi
+	add	rdx,r12
+	add	r11,r12
+
+	lea	rbp,[8+rbp]
+	add	r11,r14
+	mov	r12,QWORD[72+rsi]
+	mov	r13,rdx
+	mov	r14,r11
+	bswap	r12
+	ror	r13,23
+	mov	rdi,r8
+
+	xor	r13,rdx
+	ror	r14,5
+	xor	rdi,r9
+
+	mov	QWORD[72+rsp],r12
+	xor	r14,r11
+	and	rdi,rdx
+
+	ror	r13,4
+	add	r12,r10
+	xor	rdi,r9
+
+	ror	r14,6
+	xor	r13,rdx
+	add	r12,rdi
+
+	mov	rdi,r11
+	add	r12,QWORD[rbp]
+	xor	r14,r11
+
+	xor	rdi,rax
+	ror	r13,14
+	mov	r10,rax
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r10,r15
+	add	rcx,r12
+	add	r10,r12
+
+	lea	rbp,[24+rbp]
+	add	r10,r14
+	mov	r12,QWORD[80+rsi]
+	mov	r13,rcx
+	mov	r14,r10
+	bswap	r12
+	ror	r13,23
+	mov	r15,rdx
+
+	xor	r13,rcx
+	ror	r14,5
+	xor	r15,r8
+
+	mov	QWORD[80+rsp],r12
+	xor	r14,r10
+	and	r15,rcx
+
+	ror	r13,4
+	add	r12,r9
+	xor	r15,r8
+
+	ror	r14,6
+	xor	r13,rcx
+	add	r12,r15
+
+	mov	r15,r10
+	add	r12,QWORD[rbp]
+	xor	r14,r10
+
+	xor	r15,r11
+	ror	r13,14
+	mov	r9,r11
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r9,rdi
+	add	rbx,r12
+	add	r9,r12
+
+	lea	rbp,[8+rbp]
+	add	r9,r14
+	mov	r12,QWORD[88+rsi]
+	mov	r13,rbx
+	mov	r14,r9
+	bswap	r12
+	ror	r13,23
+	mov	rdi,rcx
+
+	xor	r13,rbx
+	ror	r14,5
+	xor	rdi,rdx
+
+	mov	QWORD[88+rsp],r12
+	xor	r14,r9
+	and	rdi,rbx
+
+	ror	r13,4
+	add	r12,r8
+	xor	rdi,rdx
+
+	ror	r14,6
+	xor	r13,rbx
+	add	r12,rdi
+
+	mov	rdi,r9
+	add	r12,QWORD[rbp]
+	xor	r14,r9
+
+	xor	rdi,r10
+	ror	r13,14
+	mov	r8,r10
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r8,r15
+	add	rax,r12
+	add	r8,r12
+
+	lea	rbp,[24+rbp]
+	add	r8,r14
+	mov	r12,QWORD[96+rsi]
+	mov	r13,rax
+	mov	r14,r8
+	bswap	r12
+	ror	r13,23
+	mov	r15,rbx
+
+	xor	r13,rax
+	ror	r14,5
+	xor	r15,rcx
+
+	mov	QWORD[96+rsp],r12
+	xor	r14,r8
+	and	r15,rax
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r15,rcx
+
+	ror	r14,6
+	xor	r13,rax
+	add	r12,r15
+
+	mov	r15,r8
+	add	r12,QWORD[rbp]
+	xor	r14,r8
+
+	xor	r15,r9
+	ror	r13,14
+	mov	rdx,r9
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rdx,rdi
+	add	r11,r12
+	add	rdx,r12
+
+	lea	rbp,[8+rbp]
+	add	rdx,r14
+	mov	r12,QWORD[104+rsi]
+	mov	r13,r11
+	mov	r14,rdx
+	bswap	r12
+	ror	r13,23
+	mov	rdi,rax
+
+	xor	r13,r11
+	ror	r14,5
+	xor	rdi,rbx
+
+	mov	QWORD[104+rsp],r12
+	xor	r14,rdx
+	and	rdi,r11
+
+	ror	r13,4
+	add	r12,rcx
+	xor	rdi,rbx
+
+	ror	r14,6
+	xor	r13,r11
+	add	r12,rdi
+
+	mov	rdi,rdx
+	add	r12,QWORD[rbp]
+	xor	r14,rdx
+
+	xor	rdi,r8
+	ror	r13,14
+	mov	rcx,r8
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rcx,r15
+	add	r10,r12
+	add	rcx,r12
+
+	lea	rbp,[24+rbp]
+	add	rcx,r14
+	mov	r12,QWORD[112+rsi]
+	mov	r13,r10
+	mov	r14,rcx
+	bswap	r12
+	ror	r13,23
+	mov	r15,r11
+
+	xor	r13,r10
+	ror	r14,5
+	xor	r15,rax
+
+	mov	QWORD[112+rsp],r12
+	xor	r14,rcx
+	and	r15,r10
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r15,rax
+
+	ror	r14,6
+	xor	r13,r10
+	add	r12,r15
+
+	mov	r15,rcx
+	add	r12,QWORD[rbp]
+	xor	r14,rcx
+
+	xor	r15,rdx
+	ror	r13,14
+	mov	rbx,rdx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rbx,rdi
+	add	r9,r12
+	add	rbx,r12
+
+	lea	rbp,[8+rbp]
+	add	rbx,r14
+	mov	r12,QWORD[120+rsi]
+	mov	r13,r9
+	mov	r14,rbx
+	bswap	r12
+	ror	r13,23
+	mov	rdi,r10
+
+	xor	r13,r9
+	ror	r14,5
+	xor	rdi,r11
+
+	mov	QWORD[120+rsp],r12
+	xor	r14,rbx
+	and	rdi,r9
+
+	ror	r13,4
+	add	r12,rax
+	xor	rdi,r11
+
+	ror	r14,6
+	xor	r13,r9
+	add	r12,rdi
+
+	mov	rdi,rbx
+	add	r12,QWORD[rbp]
+	xor	r14,rbx
+
+	xor	rdi,rcx
+	ror	r13,14
+	mov	rax,rcx
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rax,r15
+	add	r8,r12
+	add	rax,r12
+
+	lea	rbp,[24+rbp]
+	jmp	NEAR $L$rounds_16_xx
+ALIGN	16
+$L$rounds_16_xx:
+	mov	r13,QWORD[8+rsp]
+	mov	r15,QWORD[112+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rax,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[72+rsp]
+
+	add	r12,QWORD[rsp]
+	mov	r13,r8
+	add	r12,r15
+	mov	r14,rax
+	ror	r13,23
+	mov	r15,r9
+
+	xor	r13,r8
+	ror	r14,5
+	xor	r15,r10
+
+	mov	QWORD[rsp],r12
+	xor	r14,rax
+	and	r15,r8
+
+	ror	r13,4
+	add	r12,r11
+	xor	r15,r10
+
+	ror	r14,6
+	xor	r13,r8
+	add	r12,r15
+
+	mov	r15,rax
+	add	r12,QWORD[rbp]
+	xor	r14,rax
+
+	xor	r15,rbx
+	ror	r13,14
+	mov	r11,rbx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r11,rdi
+	add	rdx,r12
+	add	r11,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[16+rsp]
+	mov	rdi,QWORD[120+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r11,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[80+rsp]
+
+	add	r12,QWORD[8+rsp]
+	mov	r13,rdx
+	add	r12,rdi
+	mov	r14,r11
+	ror	r13,23
+	mov	rdi,r8
+
+	xor	r13,rdx
+	ror	r14,5
+	xor	rdi,r9
+
+	mov	QWORD[8+rsp],r12
+	xor	r14,r11
+	and	rdi,rdx
+
+	ror	r13,4
+	add	r12,r10
+	xor	rdi,r9
+
+	ror	r14,6
+	xor	r13,rdx
+	add	r12,rdi
+
+	mov	rdi,r11
+	add	r12,QWORD[rbp]
+	xor	r14,r11
+
+	xor	rdi,rax
+	ror	r13,14
+	mov	r10,rax
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r10,r15
+	add	rcx,r12
+	add	r10,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[24+rsp]
+	mov	r15,QWORD[rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r10,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[88+rsp]
+
+	add	r12,QWORD[16+rsp]
+	mov	r13,rcx
+	add	r12,r15
+	mov	r14,r10
+	ror	r13,23
+	mov	r15,rdx
+
+	xor	r13,rcx
+	ror	r14,5
+	xor	r15,r8
+
+	mov	QWORD[16+rsp],r12
+	xor	r14,r10
+	and	r15,rcx
+
+	ror	r13,4
+	add	r12,r9
+	xor	r15,r8
+
+	ror	r14,6
+	xor	r13,rcx
+	add	r12,r15
+
+	mov	r15,r10
+	add	r12,QWORD[rbp]
+	xor	r14,r10
+
+	xor	r15,r11
+	ror	r13,14
+	mov	r9,r11
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r9,rdi
+	add	rbx,r12
+	add	r9,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[32+rsp]
+	mov	rdi,QWORD[8+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r9,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[96+rsp]
+
+	add	r12,QWORD[24+rsp]
+	mov	r13,rbx
+	add	r12,rdi
+	mov	r14,r9
+	ror	r13,23
+	mov	rdi,rcx
+
+	xor	r13,rbx
+	ror	r14,5
+	xor	rdi,rdx
+
+	mov	QWORD[24+rsp],r12
+	xor	r14,r9
+	and	rdi,rbx
+
+	ror	r13,4
+	add	r12,r8
+	xor	rdi,rdx
+
+	ror	r14,6
+	xor	r13,rbx
+	add	r12,rdi
+
+	mov	rdi,r9
+	add	r12,QWORD[rbp]
+	xor	r14,r9
+
+	xor	rdi,r10
+	ror	r13,14
+	mov	r8,r10
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r8,r15
+	add	rax,r12
+	add	r8,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[40+rsp]
+	mov	r15,QWORD[16+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r8,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[104+rsp]
+
+	add	r12,QWORD[32+rsp]
+	mov	r13,rax
+	add	r12,r15
+	mov	r14,r8
+	ror	r13,23
+	mov	r15,rbx
+
+	xor	r13,rax
+	ror	r14,5
+	xor	r15,rcx
+
+	mov	QWORD[32+rsp],r12
+	xor	r14,r8
+	and	r15,rax
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r15,rcx
+
+	ror	r14,6
+	xor	r13,rax
+	add	r12,r15
+
+	mov	r15,r8
+	add	r12,QWORD[rbp]
+	xor	r14,r8
+
+	xor	r15,r9
+	ror	r13,14
+	mov	rdx,r9
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rdx,rdi
+	add	r11,r12
+	add	rdx,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[48+rsp]
+	mov	rdi,QWORD[24+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rdx,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[112+rsp]
+
+	add	r12,QWORD[40+rsp]
+	mov	r13,r11
+	add	r12,rdi
+	mov	r14,rdx
+	ror	r13,23
+	mov	rdi,rax
+
+	xor	r13,r11
+	ror	r14,5
+	xor	rdi,rbx
+
+	mov	QWORD[40+rsp],r12
+	xor	r14,rdx
+	and	rdi,r11
+
+	ror	r13,4
+	add	r12,rcx
+	xor	rdi,rbx
+
+	ror	r14,6
+	xor	r13,r11
+	add	r12,rdi
+
+	mov	rdi,rdx
+	add	r12,QWORD[rbp]
+	xor	r14,rdx
+
+	xor	rdi,r8
+	ror	r13,14
+	mov	rcx,r8
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rcx,r15
+	add	r10,r12
+	add	rcx,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[56+rsp]
+	mov	r15,QWORD[32+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rcx,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[120+rsp]
+
+	add	r12,QWORD[48+rsp]
+	mov	r13,r10
+	add	r12,r15
+	mov	r14,rcx
+	ror	r13,23
+	mov	r15,r11
+
+	xor	r13,r10
+	ror	r14,5
+	xor	r15,rax
+
+	mov	QWORD[48+rsp],r12
+	xor	r14,rcx
+	and	r15,r10
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r15,rax
+
+	ror	r14,6
+	xor	r13,r10
+	add	r12,r15
+
+	mov	r15,rcx
+	add	r12,QWORD[rbp]
+	xor	r14,rcx
+
+	xor	r15,rdx
+	ror	r13,14
+	mov	rbx,rdx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rbx,rdi
+	add	r9,r12
+	add	rbx,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[64+rsp]
+	mov	rdi,QWORD[40+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rbx,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[rsp]
+
+	add	r12,QWORD[56+rsp]
+	mov	r13,r9
+	add	r12,rdi
+	mov	r14,rbx
+	ror	r13,23
+	mov	rdi,r10
+
+	xor	r13,r9
+	ror	r14,5
+	xor	rdi,r11
+
+	mov	QWORD[56+rsp],r12
+	xor	r14,rbx
+	and	rdi,r9
+
+	ror	r13,4
+	add	r12,rax
+	xor	rdi,r11
+
+	ror	r14,6
+	xor	r13,r9
+	add	r12,rdi
+
+	mov	rdi,rbx
+	add	r12,QWORD[rbp]
+	xor	r14,rbx
+
+	xor	rdi,rcx
+	ror	r13,14
+	mov	rax,rcx
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rax,r15
+	add	r8,r12
+	add	rax,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[72+rsp]
+	mov	r15,QWORD[48+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rax,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[8+rsp]
+
+	add	r12,QWORD[64+rsp]
+	mov	r13,r8
+	add	r12,r15
+	mov	r14,rax
+	ror	r13,23
+	mov	r15,r9
+
+	xor	r13,r8
+	ror	r14,5
+	xor	r15,r10
+
+	mov	QWORD[64+rsp],r12
+	xor	r14,rax
+	and	r15,r8
+
+	ror	r13,4
+	add	r12,r11
+	xor	r15,r10
+
+	ror	r14,6
+	xor	r13,r8
+	add	r12,r15
+
+	mov	r15,rax
+	add	r12,QWORD[rbp]
+	xor	r14,rax
+
+	xor	r15,rbx
+	ror	r13,14
+	mov	r11,rbx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r11,rdi
+	add	rdx,r12
+	add	r11,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[80+rsp]
+	mov	rdi,QWORD[56+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r11,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[16+rsp]
+
+	add	r12,QWORD[72+rsp]
+	mov	r13,rdx
+	add	r12,rdi
+	mov	r14,r11
+	ror	r13,23
+	mov	rdi,r8
+
+	xor	r13,rdx
+	ror	r14,5
+	xor	rdi,r9
+
+	mov	QWORD[72+rsp],r12
+	xor	r14,r11
+	and	rdi,rdx
+
+	ror	r13,4
+	add	r12,r10
+	xor	rdi,r9
+
+	ror	r14,6
+	xor	r13,rdx
+	add	r12,rdi
+
+	mov	rdi,r11
+	add	r12,QWORD[rbp]
+	xor	r14,r11
+
+	xor	rdi,rax
+	ror	r13,14
+	mov	r10,rax
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r10,r15
+	add	rcx,r12
+	add	r10,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[88+rsp]
+	mov	r15,QWORD[64+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r10,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[24+rsp]
+
+	add	r12,QWORD[80+rsp]
+	mov	r13,rcx
+	add	r12,r15
+	mov	r14,r10
+	ror	r13,23
+	mov	r15,rdx
+
+	xor	r13,rcx
+	ror	r14,5
+	xor	r15,r8
+
+	mov	QWORD[80+rsp],r12
+	xor	r14,r10
+	and	r15,rcx
+
+	ror	r13,4
+	add	r12,r9
+	xor	r15,r8
+
+	ror	r14,6
+	xor	r13,rcx
+	add	r12,r15
+
+	mov	r15,r10
+	add	r12,QWORD[rbp]
+	xor	r14,r10
+
+	xor	r15,r11
+	ror	r13,14
+	mov	r9,r11
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r9,rdi
+	add	rbx,r12
+	add	r9,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[96+rsp]
+	mov	rdi,QWORD[72+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r9,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[32+rsp]
+
+	add	r12,QWORD[88+rsp]
+	mov	r13,rbx
+	add	r12,rdi
+	mov	r14,r9
+	ror	r13,23
+	mov	rdi,rcx
+
+	xor	r13,rbx
+	ror	r14,5
+	xor	rdi,rdx
+
+	mov	QWORD[88+rsp],r12
+	xor	r14,r9
+	and	rdi,rbx
+
+	ror	r13,4
+	add	r12,r8
+	xor	rdi,rdx
+
+	ror	r14,6
+	xor	r13,rbx
+	add	r12,rdi
+
+	mov	rdi,r9
+	add	r12,QWORD[rbp]
+	xor	r14,r9
+
+	xor	rdi,r10
+	ror	r13,14
+	mov	r8,r10
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r8,r15
+	add	rax,r12
+	add	r8,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[104+rsp]
+	mov	r15,QWORD[80+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r8,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[40+rsp]
+
+	add	r12,QWORD[96+rsp]
+	mov	r13,rax
+	add	r12,r15
+	mov	r14,r8
+	ror	r13,23
+	mov	r15,rbx
+
+	xor	r13,rax
+	ror	r14,5
+	xor	r15,rcx
+
+	mov	QWORD[96+rsp],r12
+	xor	r14,r8
+	and	r15,rax
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r15,rcx
+
+	ror	r14,6
+	xor	r13,rax
+	add	r12,r15
+
+	mov	r15,r8
+	add	r12,QWORD[rbp]
+	xor	r14,r8
+
+	xor	r15,r9
+	ror	r13,14
+	mov	rdx,r9
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rdx,rdi
+	add	r11,r12
+	add	rdx,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[112+rsp]
+	mov	rdi,QWORD[88+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rdx,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[48+rsp]
+
+	add	r12,QWORD[104+rsp]
+	mov	r13,r11
+	add	r12,rdi
+	mov	r14,rdx
+	ror	r13,23
+	mov	rdi,rax
+
+	xor	r13,r11
+	ror	r14,5
+	xor	rdi,rbx
+
+	mov	QWORD[104+rsp],r12
+	xor	r14,rdx
+	and	rdi,r11
+
+	ror	r13,4
+	add	r12,rcx
+	xor	rdi,rbx
+
+	ror	r14,6
+	xor	r13,r11
+	add	r12,rdi
+
+	mov	rdi,rdx
+	add	r12,QWORD[rbp]
+	xor	r14,rdx
+
+	xor	rdi,r8
+	ror	r13,14
+	mov	rcx,r8
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rcx,r15
+	add	r10,r12
+	add	rcx,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[120+rsp]
+	mov	r15,QWORD[96+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rcx,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[56+rsp]
+
+	add	r12,QWORD[112+rsp]
+	mov	r13,r10
+	add	r12,r15
+	mov	r14,rcx
+	ror	r13,23
+	mov	r15,r11
+
+	xor	r13,r10
+	ror	r14,5
+	xor	r15,rax
+
+	mov	QWORD[112+rsp],r12
+	xor	r14,rcx
+	and	r15,r10
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r15,rax
+
+	ror	r14,6
+	xor	r13,r10
+	add	r12,r15
+
+	mov	r15,rcx
+	add	r12,QWORD[rbp]
+	xor	r14,rcx
+
+	xor	r15,rdx
+	ror	r13,14
+	mov	rbx,rdx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rbx,rdi
+	add	r9,r12
+	add	rbx,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[rsp]
+	mov	rdi,QWORD[104+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rbx,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[64+rsp]
+
+	add	r12,QWORD[120+rsp]
+	mov	r13,r9
+	add	r12,rdi
+	mov	r14,rbx
+	ror	r13,23
+	mov	rdi,r10
+
+	xor	r13,r9
+	ror	r14,5
+	xor	rdi,r11
+
+	mov	QWORD[120+rsp],r12
+	xor	r14,rbx
+	and	rdi,r9
+
+	ror	r13,4
+	add	r12,rax
+	xor	rdi,r11
+
+	ror	r14,6
+	xor	r13,r9
+	add	r12,rdi
+
+	mov	rdi,rbx
+	add	r12,QWORD[rbp]
+	xor	r14,rbx
+
+	xor	rdi,rcx
+	ror	r13,14
+	mov	rax,rcx
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rax,r15
+	add	r8,r12
+	add	rax,r12
+
+	lea	rbp,[24+rbp]
+	cmp	BYTE[7+rbp],0
+	jnz	NEAR $L$rounds_16_xx
+
+	mov	rdi,QWORD[((128+0))+rsp]
+	add	rax,r14
+	lea	rsi,[128+rsi]
+
+	add	rax,QWORD[rdi]
+	add	rbx,QWORD[8+rdi]
+	add	rcx,QWORD[16+rdi]
+	add	rdx,QWORD[24+rdi]
+	add	r8,QWORD[32+rdi]
+	add	r9,QWORD[40+rdi]
+	add	r10,QWORD[48+rdi]
+	add	r11,QWORD[56+rdi]
+
+	cmp	rsi,QWORD[((128+16))+rsp]
+
+	mov	QWORD[rdi],rax
+	mov	QWORD[8+rdi],rbx
+	mov	QWORD[16+rdi],rcx
+	mov	QWORD[24+rdi],rdx
+	mov	QWORD[32+rdi],r8
+	mov	QWORD[40+rdi],r9
+	mov	QWORD[48+rdi],r10
+	mov	QWORD[56+rdi],r11
+	jb	NEAR $L$loop
+
+	mov	rsi,QWORD[((128+24))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha512_block_data_order:
+ALIGN	64
+
+K512:
+	DQ	0x428a2f98d728ae22,0x7137449123ef65cd
+	DQ	0x428a2f98d728ae22,0x7137449123ef65cd
+	DQ	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+	DQ	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+	DQ	0x3956c25bf348b538,0x59f111f1b605d019
+	DQ	0x3956c25bf348b538,0x59f111f1b605d019
+	DQ	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+	DQ	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+	DQ	0xd807aa98a3030242,0x12835b0145706fbe
+	DQ	0xd807aa98a3030242,0x12835b0145706fbe
+	DQ	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+	DQ	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+	DQ	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+	DQ	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+	DQ	0x9bdc06a725c71235,0xc19bf174cf692694
+	DQ	0x9bdc06a725c71235,0xc19bf174cf692694
+	DQ	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+	DQ	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+	DQ	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+	DQ	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+	DQ	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+	DQ	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+	DQ	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+	DQ	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+	DQ	0x983e5152ee66dfab,0xa831c66d2db43210
+	DQ	0x983e5152ee66dfab,0xa831c66d2db43210
+	DQ	0xb00327c898fb213f,0xbf597fc7beef0ee4
+	DQ	0xb00327c898fb213f,0xbf597fc7beef0ee4
+	DQ	0xc6e00bf33da88fc2,0xd5a79147930aa725
+	DQ	0xc6e00bf33da88fc2,0xd5a79147930aa725
+	DQ	0x06ca6351e003826f,0x142929670a0e6e70
+	DQ	0x06ca6351e003826f,0x142929670a0e6e70
+	DQ	0x27b70a8546d22ffc,0x2e1b21385c26c926
+	DQ	0x27b70a8546d22ffc,0x2e1b21385c26c926
+	DQ	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+	DQ	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+	DQ	0x650a73548baf63de,0x766a0abb3c77b2a8
+	DQ	0x650a73548baf63de,0x766a0abb3c77b2a8
+	DQ	0x81c2c92e47edaee6,0x92722c851482353b
+	DQ	0x81c2c92e47edaee6,0x92722c851482353b
+	DQ	0xa2bfe8a14cf10364,0xa81a664bbc423001
+	DQ	0xa2bfe8a14cf10364,0xa81a664bbc423001
+	DQ	0xc24b8b70d0f89791,0xc76c51a30654be30
+	DQ	0xc24b8b70d0f89791,0xc76c51a30654be30
+	DQ	0xd192e819d6ef5218,0xd69906245565a910
+	DQ	0xd192e819d6ef5218,0xd69906245565a910
+	DQ	0xf40e35855771202a,0x106aa07032bbd1b8
+	DQ	0xf40e35855771202a,0x106aa07032bbd1b8
+	DQ	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+	DQ	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+	DQ	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+	DQ	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+	DQ	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+	DQ	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+	DQ	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+	DQ	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+	DQ	0x748f82ee5defb2fc,0x78a5636f43172f60
+	DQ	0x748f82ee5defb2fc,0x78a5636f43172f60
+	DQ	0x84c87814a1f0ab72,0x8cc702081a6439ec
+	DQ	0x84c87814a1f0ab72,0x8cc702081a6439ec
+	DQ	0x90befffa23631e28,0xa4506cebde82bde9
+	DQ	0x90befffa23631e28,0xa4506cebde82bde9
+	DQ	0xbef9a3f7b2c67915,0xc67178f2e372532b
+	DQ	0xbef9a3f7b2c67915,0xc67178f2e372532b
+	DQ	0xca273eceea26619c,0xd186b8c721c0c207
+	DQ	0xca273eceea26619c,0xd186b8c721c0c207
+	DQ	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+	DQ	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+	DQ	0x06f067aa72176fba,0x0a637dc5a2c898a6
+	DQ	0x06f067aa72176fba,0x0a637dc5a2c898a6
+	DQ	0x113f9804bef90dae,0x1b710b35131c471b
+	DQ	0x113f9804bef90dae,0x1b710b35131c471b
+	DQ	0x28db77f523047d84,0x32caab7b40c72493
+	DQ	0x28db77f523047d84,0x32caab7b40c72493
+	DQ	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+	DQ	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+	DQ	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+	DQ	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+	DQ	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+	DQ	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+	DQ	0x0001020304050607,0x08090a0b0c0d0e0f
+	DQ	0x0001020304050607,0x08090a0b0c0d0e0f
+DB	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+DB	110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
+DB	52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+DB	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+DB	111,114,103,62,0
+
+ALIGN	64
+sha512_block_data_order_xop:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha512_block_data_order_xop:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+$L$xop_shortcut:
+	mov	rax,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	shl	rdx,4
+	sub	rsp,256
+	lea	rdx,[rdx*8+rsi]
+	and	rsp,-64
+	mov	QWORD[((128+0))+rsp],rdi
+	mov	QWORD[((128+8))+rsp],rsi
+	mov	QWORD[((128+16))+rsp],rdx
+	mov	QWORD[((128+24))+rsp],rax
+	movaps	XMMWORD[(128+32)+rsp],xmm6
+	movaps	XMMWORD[(128+48)+rsp],xmm7
+	movaps	XMMWORD[(128+64)+rsp],xmm8
+	movaps	XMMWORD[(128+80)+rsp],xmm9
+	movaps	XMMWORD[(128+96)+rsp],xmm10
+	movaps	XMMWORD[(128+112)+rsp],xmm11
+$L$prologue_xop:
+
+	vzeroupper
+	mov	rax,QWORD[rdi]
+	mov	rbx,QWORD[8+rdi]
+	mov	rcx,QWORD[16+rdi]
+	mov	rdx,QWORD[24+rdi]
+	mov	r8,QWORD[32+rdi]
+	mov	r9,QWORD[40+rdi]
+	mov	r10,QWORD[48+rdi]
+	mov	r11,QWORD[56+rdi]
+	jmp	NEAR $L$loop_xop
+ALIGN	16
+$L$loop_xop:
+	vmovdqa	xmm11,XMMWORD[((K512+1280))]
+	vmovdqu	xmm0,XMMWORD[rsi]
+	lea	rbp,[((K512+128))]
+	vmovdqu	xmm1,XMMWORD[16+rsi]
+	vmovdqu	xmm2,XMMWORD[32+rsi]
+	vpshufb	xmm0,xmm0,xmm11
+	vmovdqu	xmm3,XMMWORD[48+rsi]
+	vpshufb	xmm1,xmm1,xmm11
+	vmovdqu	xmm4,XMMWORD[64+rsi]
+	vpshufb	xmm2,xmm2,xmm11
+	vmovdqu	xmm5,XMMWORD[80+rsi]
+	vpshufb	xmm3,xmm3,xmm11
+	vmovdqu	xmm6,XMMWORD[96+rsi]
+	vpshufb	xmm4,xmm4,xmm11
+	vmovdqu	xmm7,XMMWORD[112+rsi]
+	vpshufb	xmm5,xmm5,xmm11
+	vpaddq	xmm8,xmm0,XMMWORD[((-128))+rbp]
+	vpshufb	xmm6,xmm6,xmm11
+	vpaddq	xmm9,xmm1,XMMWORD[((-96))+rbp]
+	vpshufb	xmm7,xmm7,xmm11
+	vpaddq	xmm10,xmm2,XMMWORD[((-64))+rbp]
+	vpaddq	xmm11,xmm3,XMMWORD[((-32))+rbp]
+	vmovdqa	XMMWORD[rsp],xmm8
+	vpaddq	xmm8,xmm4,XMMWORD[rbp]
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	vpaddq	xmm9,xmm5,XMMWORD[32+rbp]
+	vmovdqa	XMMWORD[32+rsp],xmm10
+	vpaddq	xmm10,xmm6,XMMWORD[64+rbp]
+	vmovdqa	XMMWORD[48+rsp],xmm11
+	vpaddq	xmm11,xmm7,XMMWORD[96+rbp]
+	vmovdqa	XMMWORD[64+rsp],xmm8
+	mov	r14,rax
+	vmovdqa	XMMWORD[80+rsp],xmm9
+	mov	rdi,rbx
+	vmovdqa	XMMWORD[96+rsp],xmm10
+	xor	rdi,rcx
+	vmovdqa	XMMWORD[112+rsp],xmm11
+	mov	r13,r8
+	jmp	NEAR $L$xop_00_47
+
+ALIGN	16
+$L$xop_00_47:
+	add	rbp,256
+	vpalignr	xmm8,xmm1,xmm0,8
+	ror	r13,23
+	mov	rax,r14
+	vpalignr	xmm11,xmm5,xmm4,8
+	mov	r12,r9
+	ror	r14,5
+DB	143,72,120,195,200,56
+	xor	r13,r8
+	xor	r12,r10
+	vpsrlq	xmm8,xmm8,7
+	ror	r13,4
+	xor	r14,rax
+	vpaddq	xmm0,xmm0,xmm11
+	and	r12,r8
+	xor	r13,r8
+	add	r11,QWORD[rsp]
+	mov	r15,rax
+DB	143,72,120,195,209,7
+	xor	r12,r10
+	ror	r14,6
+	vpxor	xmm8,xmm8,xmm9
+	xor	r15,rbx
+	add	r11,r12
+	ror	r13,14
+	and	rdi,r15
+DB	143,104,120,195,223,3
+	xor	r14,rax
+	add	r11,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rbx
+	ror	r14,28
+	vpsrlq	xmm10,xmm7,6
+	add	rdx,r11
+	add	r11,rdi
+	vpaddq	xmm0,xmm0,xmm8
+	mov	r13,rdx
+	add	r14,r11
+DB	143,72,120,195,203,42
+	ror	r13,23
+	mov	r11,r14
+	vpxor	xmm11,xmm11,xmm10
+	mov	r12,r8
+	ror	r14,5
+	xor	r13,rdx
+	xor	r12,r9
+	vpxor	xmm11,xmm11,xmm9
+	ror	r13,4
+	xor	r14,r11
+	and	r12,rdx
+	xor	r13,rdx
+	vpaddq	xmm0,xmm0,xmm11
+	add	r10,QWORD[8+rsp]
+	mov	rdi,r11
+	xor	r12,r9
+	ror	r14,6
+	vpaddq	xmm10,xmm0,XMMWORD[((-128))+rbp]
+	xor	rdi,rax
+	add	r10,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,r11
+	add	r10,r13
+	xor	r15,rax
+	ror	r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	vmovdqa	XMMWORD[rsp],xmm10
+	vpalignr	xmm8,xmm2,xmm1,8
+	ror	r13,23
+	mov	r10,r14
+	vpalignr	xmm11,xmm6,xmm5,8
+	mov	r12,rdx
+	ror	r14,5
+DB	143,72,120,195,200,56
+	xor	r13,rcx
+	xor	r12,r8
+	vpsrlq	xmm8,xmm8,7
+	ror	r13,4
+	xor	r14,r10
+	vpaddq	xmm1,xmm1,xmm11
+	and	r12,rcx
+	xor	r13,rcx
+	add	r9,QWORD[16+rsp]
+	mov	r15,r10
+DB	143,72,120,195,209,7
+	xor	r12,r8
+	ror	r14,6
+	vpxor	xmm8,xmm8,xmm9
+	xor	r15,r11
+	add	r9,r12
+	ror	r13,14
+	and	rdi,r15
+DB	143,104,120,195,216,3
+	xor	r14,r10
+	add	r9,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r11
+	ror	r14,28
+	vpsrlq	xmm10,xmm0,6
+	add	rbx,r9
+	add	r9,rdi
+	vpaddq	xmm1,xmm1,xmm8
+	mov	r13,rbx
+	add	r14,r9
+DB	143,72,120,195,203,42
+	ror	r13,23
+	mov	r9,r14
+	vpxor	xmm11,xmm11,xmm10
+	mov	r12,rcx
+	ror	r14,5
+	xor	r13,rbx
+	xor	r12,rdx
+	vpxor	xmm11,xmm11,xmm9
+	ror	r13,4
+	xor	r14,r9
+	and	r12,rbx
+	xor	r13,rbx
+	vpaddq	xmm1,xmm1,xmm11
+	add	r8,QWORD[24+rsp]
+	mov	rdi,r9
+	xor	r12,rdx
+	ror	r14,6
+	vpaddq	xmm10,xmm1,XMMWORD[((-96))+rbp]
+	xor	rdi,r10
+	add	r8,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,r9
+	add	r8,r13
+	xor	r15,r10
+	ror	r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	vmovdqa	XMMWORD[16+rsp],xmm10
+	vpalignr	xmm8,xmm3,xmm2,8
+	ror	r13,23
+	mov	r8,r14
+	vpalignr	xmm11,xmm7,xmm6,8
+	mov	r12,rbx
+	ror	r14,5
+DB	143,72,120,195,200,56
+	xor	r13,rax
+	xor	r12,rcx
+	vpsrlq	xmm8,xmm8,7
+	ror	r13,4
+	xor	r14,r8
+	vpaddq	xmm2,xmm2,xmm11
+	and	r12,rax
+	xor	r13,rax
+	add	rdx,QWORD[32+rsp]
+	mov	r15,r8
+DB	143,72,120,195,209,7
+	xor	r12,rcx
+	ror	r14,6
+	vpxor	xmm8,xmm8,xmm9
+	xor	r15,r9
+	add	rdx,r12
+	ror	r13,14
+	and	rdi,r15
+DB	143,104,120,195,217,3
+	xor	r14,r8
+	add	rdx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r9
+	ror	r14,28
+	vpsrlq	xmm10,xmm1,6
+	add	r11,rdx
+	add	rdx,rdi
+	vpaddq	xmm2,xmm2,xmm8
+	mov	r13,r11
+	add	r14,rdx
+DB	143,72,120,195,203,42
+	ror	r13,23
+	mov	rdx,r14
+	vpxor	xmm11,xmm11,xmm10
+	mov	r12,rax
+	ror	r14,5
+	xor	r13,r11
+	xor	r12,rbx
+	vpxor	xmm11,xmm11,xmm9
+	ror	r13,4
+	xor	r14,rdx
+	and	r12,r11
+	xor	r13,r11
+	vpaddq	xmm2,xmm2,xmm11
+	add	rcx,QWORD[40+rsp]
+	mov	rdi,rdx
+	xor	r12,rbx
+	ror	r14,6
+	vpaddq	xmm10,xmm2,XMMWORD[((-64))+rbp]
+	xor	rdi,r8
+	add	rcx,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,rdx
+	add	rcx,r13
+	xor	r15,r8
+	ror	r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	vmovdqa	XMMWORD[32+rsp],xmm10
+	vpalignr	xmm8,xmm4,xmm3,8
+	ror	r13,23
+	mov	rcx,r14
+	vpalignr	xmm11,xmm0,xmm7,8
+	mov	r12,r11
+	ror	r14,5
+DB	143,72,120,195,200,56
+	xor	r13,r10
+	xor	r12,rax
+	vpsrlq	xmm8,xmm8,7
+	ror	r13,4
+	xor	r14,rcx
+	vpaddq	xmm3,xmm3,xmm11
+	and	r12,r10
+	xor	r13,r10
+	add	rbx,QWORD[48+rsp]
+	mov	r15,rcx
+DB	143,72,120,195,209,7
+	xor	r12,rax
+	ror	r14,6
+	vpxor	xmm8,xmm8,xmm9
+	xor	r15,rdx
+	add	rbx,r12
+	ror	r13,14
+	and	rdi,r15
+DB	143,104,120,195,218,3
+	xor	r14,rcx
+	add	rbx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rdx
+	ror	r14,28
+	vpsrlq	xmm10,xmm2,6
+	add	r9,rbx
+	add	rbx,rdi
+	vpaddq	xmm3,xmm3,xmm8
+	mov	r13,r9
+	add	r14,rbx
+DB	143,72,120,195,203,42
+	ror	r13,23
+	mov	rbx,r14
+	vpxor	xmm11,xmm11,xmm10
+	mov	r12,r10
+	ror	r14,5
+	xor	r13,r9
+	xor	r12,r11
+	vpxor	xmm11,xmm11,xmm9
+	ror	r13,4
+	xor	r14,rbx
+	and	r12,r9
+	xor	r13,r9
+	vpaddq	xmm3,xmm3,xmm11
+	add	rax,QWORD[56+rsp]
+	mov	rdi,rbx
+	xor	r12,r11
+	ror	r14,6
+	vpaddq	xmm10,xmm3,XMMWORD[((-32))+rbp]
+	xor	rdi,rcx
+	add	rax,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,rbx
+	add	rax,r13
+	xor	r15,rcx
+	ror	r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	vmovdqa	XMMWORD[48+rsp],xmm10
+	vpalignr	xmm8,xmm5,xmm4,8
+	ror	r13,23
+	mov	rax,r14
+	vpalignr	xmm11,xmm1,xmm0,8
+	mov	r12,r9
+	ror	r14,5
+DB	143,72,120,195,200,56
+	xor	r13,r8
+	xor	r12,r10
+	vpsrlq	xmm8,xmm8,7
+	ror	r13,4
+	xor	r14,rax
+	vpaddq	xmm4,xmm4,xmm11
+	and	r12,r8
+	xor	r13,r8
+	add	r11,QWORD[64+rsp]
+	mov	r15,rax
+DB	143,72,120,195,209,7
+	xor	r12,r10
+	ror	r14,6
+	vpxor	xmm8,xmm8,xmm9
+	xor	r15,rbx
+	add	r11,r12
+	ror	r13,14
+	and	rdi,r15
+DB	143,104,120,195,219,3
+	xor	r14,rax
+	add	r11,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rbx
+	ror	r14,28
+	vpsrlq	xmm10,xmm3,6
+	add	rdx,r11
+	add	r11,rdi
+	vpaddq	xmm4,xmm4,xmm8
+	mov	r13,rdx
+	add	r14,r11
+DB	143,72,120,195,203,42
+	ror	r13,23
+	mov	r11,r14
+	vpxor	xmm11,xmm11,xmm10
+	mov	r12,r8
+	ror	r14,5
+	xor	r13,rdx
+	xor	r12,r9
+	vpxor	xmm11,xmm11,xmm9
+	ror	r13,4
+	xor	r14,r11
+	and	r12,rdx
+	xor	r13,rdx
+	vpaddq	xmm4,xmm4,xmm11
+	add	r10,QWORD[72+rsp]
+	mov	rdi,r11
+	xor	r12,r9
+	ror	r14,6
+	vpaddq	xmm10,xmm4,XMMWORD[rbp]
+	xor	rdi,rax
+	add	r10,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,r11
+	add	r10,r13
+	xor	r15,rax
+	ror	r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	vmovdqa	XMMWORD[64+rsp],xmm10
+	vpalignr	xmm8,xmm6,xmm5,8
+	ror	r13,23
+	mov	r10,r14
+	vpalignr	xmm11,xmm2,xmm1,8
+	mov	r12,rdx
+	ror	r14,5
+DB	143,72,120,195,200,56
+	xor	r13,rcx
+	xor	r12,r8
+	vpsrlq	xmm8,xmm8,7
+	ror	r13,4
+	xor	r14,r10
+	vpaddq	xmm5,xmm5,xmm11
+	and	r12,rcx
+	xor	r13,rcx
+	add	r9,QWORD[80+rsp]
+	mov	r15,r10
+DB	143,72,120,195,209,7
+	xor	r12,r8
+	ror	r14,6
+	vpxor	xmm8,xmm8,xmm9
+	xor	r15,r11
+	add	r9,r12
+	ror	r13,14
+	and	rdi,r15
+DB	143,104,120,195,220,3
+	xor	r14,r10
+	add	r9,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r11
+	ror	r14,28
+	vpsrlq	xmm10,xmm4,6
+	add	rbx,r9
+	add	r9,rdi
+	vpaddq	xmm5,xmm5,xmm8
+	mov	r13,rbx
+	add	r14,r9
+DB	143,72,120,195,203,42
+	ror	r13,23
+	mov	r9,r14
+	vpxor	xmm11,xmm11,xmm10
+	mov	r12,rcx
+	ror	r14,5
+	xor	r13,rbx
+	xor	r12,rdx
+	vpxor	xmm11,xmm11,xmm9
+	ror	r13,4
+	xor	r14,r9
+	and	r12,rbx
+	xor	r13,rbx
+	vpaddq	xmm5,xmm5,xmm11
+	add	r8,QWORD[88+rsp]
+	mov	rdi,r9
+	xor	r12,rdx
+	ror	r14,6
+	vpaddq	xmm10,xmm5,XMMWORD[32+rbp]
+	xor	rdi,r10
+	add	r8,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,r9
+	add	r8,r13
+	xor	r15,r10
+	ror	r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	vmovdqa	XMMWORD[80+rsp],xmm10
+	vpalignr	xmm8,xmm7,xmm6,8
+	ror	r13,23
+	mov	r8,r14
+	vpalignr	xmm11,xmm3,xmm2,8
+	mov	r12,rbx
+	ror	r14,5
+DB	143,72,120,195,200,56
+	xor	r13,rax
+	xor	r12,rcx
+	vpsrlq	xmm8,xmm8,7
+	ror	r13,4
+	xor	r14,r8
+	vpaddq	xmm6,xmm6,xmm11
+	and	r12,rax
+	xor	r13,rax
+	add	rdx,QWORD[96+rsp]
+	mov	r15,r8
+DB	143,72,120,195,209,7
+	xor	r12,rcx
+	ror	r14,6
+	vpxor	xmm8,xmm8,xmm9
+	xor	r15,r9
+	add	rdx,r12
+	ror	r13,14
+	and	rdi,r15
+DB	143,104,120,195,221,3
+	xor	r14,r8
+	add	rdx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r9
+	ror	r14,28
+	vpsrlq	xmm10,xmm5,6
+	add	r11,rdx
+	add	rdx,rdi
+	vpaddq	xmm6,xmm6,xmm8
+	mov	r13,r11
+	add	r14,rdx
+DB	143,72,120,195,203,42
+	ror	r13,23
+	mov	rdx,r14
+	vpxor	xmm11,xmm11,xmm10
+	mov	r12,rax
+	ror	r14,5
+	xor	r13,r11
+	xor	r12,rbx
+	vpxor	xmm11,xmm11,xmm9
+	ror	r13,4
+	xor	r14,rdx
+	and	r12,r11
+	xor	r13,r11
+	vpaddq	xmm6,xmm6,xmm11
+	add	rcx,QWORD[104+rsp]
+	mov	rdi,rdx
+	xor	r12,rbx
+	ror	r14,6
+	vpaddq	xmm10,xmm6,XMMWORD[64+rbp]
+	xor	rdi,r8
+	add	rcx,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,rdx
+	add	rcx,r13
+	xor	r15,r8
+	ror	r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	vmovdqa	XMMWORD[96+rsp],xmm10
+	vpalignr	xmm8,xmm0,xmm7,8
+	ror	r13,23
+	mov	rcx,r14
+	vpalignr	xmm11,xmm4,xmm3,8
+	mov	r12,r11
+	ror	r14,5
+DB	143,72,120,195,200,56
+	xor	r13,r10
+	xor	r12,rax
+	vpsrlq	xmm8,xmm8,7
+	ror	r13,4
+	xor	r14,rcx
+	vpaddq	xmm7,xmm7,xmm11
+	and	r12,r10
+	xor	r13,r10
+	add	rbx,QWORD[112+rsp]
+	mov	r15,rcx
+DB	143,72,120,195,209,7
+	xor	r12,rax
+	ror	r14,6
+	vpxor	xmm8,xmm8,xmm9
+	xor	r15,rdx
+	add	rbx,r12
+	ror	r13,14
+	and	rdi,r15
+DB	143,104,120,195,222,3
+	xor	r14,rcx
+	add	rbx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rdx
+	ror	r14,28
+	vpsrlq	xmm10,xmm6,6
+	add	r9,rbx
+	add	rbx,rdi
+	vpaddq	xmm7,xmm7,xmm8
+	mov	r13,r9
+	add	r14,rbx
+DB	143,72,120,195,203,42
+	ror	r13,23
+	mov	rbx,r14
+	vpxor	xmm11,xmm11,xmm10
+	mov	r12,r10
+	ror	r14,5
+	xor	r13,r9
+	xor	r12,r11
+	vpxor	xmm11,xmm11,xmm9
+	ror	r13,4
+	xor	r14,rbx
+	and	r12,r9
+	xor	r13,r9
+	vpaddq	xmm7,xmm7,xmm11
+	add	rax,QWORD[120+rsp]
+	mov	rdi,rbx
+	xor	r12,r11
+	ror	r14,6
+	vpaddq	xmm10,xmm7,XMMWORD[96+rbp]
+	xor	rdi,rcx
+	add	rax,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,rbx
+	add	rax,r13
+	xor	r15,rcx
+	ror	r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	vmovdqa	XMMWORD[112+rsp],xmm10
+	cmp	BYTE[135+rbp],0
+	jne	NEAR $L$xop_00_47
+	ror	r13,23
+	mov	rax,r14
+	mov	r12,r9
+	ror	r14,5
+	xor	r13,r8
+	xor	r12,r10
+	ror	r13,4
+	xor	r14,rax
+	and	r12,r8
+	xor	r13,r8
+	add	r11,QWORD[rsp]
+	mov	r15,rax
+	xor	r12,r10
+	ror	r14,6
+	xor	r15,rbx
+	add	r11,r12
+	ror	r13,14
+	and	rdi,r15
+	xor	r14,rax
+	add	r11,r13
+	xor	rdi,rbx
+	ror	r14,28
+	add	rdx,r11
+	add	r11,rdi
+	mov	r13,rdx
+	add	r14,r11
+	ror	r13,23
+	mov	r11,r14
+	mov	r12,r8
+	ror	r14,5
+	xor	r13,rdx
+	xor	r12,r9
+	ror	r13,4
+	xor	r14,r11
+	and	r12,rdx
+	xor	r13,rdx
+	add	r10,QWORD[8+rsp]
+	mov	rdi,r11
+	xor	r12,r9
+	ror	r14,6
+	xor	rdi,rax
+	add	r10,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,r11
+	add	r10,r13
+	xor	r15,rax
+	ror	r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	ror	r13,23
+	mov	r10,r14
+	mov	r12,rdx
+	ror	r14,5
+	xor	r13,rcx
+	xor	r12,r8
+	ror	r13,4
+	xor	r14,r10
+	and	r12,rcx
+	xor	r13,rcx
+	add	r9,QWORD[16+rsp]
+	mov	r15,r10
+	xor	r12,r8
+	ror	r14,6
+	xor	r15,r11
+	add	r9,r12
+	ror	r13,14
+	and	rdi,r15
+	xor	r14,r10
+	add	r9,r13
+	xor	rdi,r11
+	ror	r14,28
+	add	rbx,r9
+	add	r9,rdi
+	mov	r13,rbx
+	add	r14,r9
+	ror	r13,23
+	mov	r9,r14
+	mov	r12,rcx
+	ror	r14,5
+	xor	r13,rbx
+	xor	r12,rdx
+	ror	r13,4
+	xor	r14,r9
+	and	r12,rbx
+	xor	r13,rbx
+	add	r8,QWORD[24+rsp]
+	mov	rdi,r9
+	xor	r12,rdx
+	ror	r14,6
+	xor	rdi,r10
+	add	r8,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,r9
+	add	r8,r13
+	xor	r15,r10
+	ror	r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	ror	r13,23
+	mov	r8,r14
+	mov	r12,rbx
+	ror	r14,5
+	xor	r13,rax
+	xor	r12,rcx
+	ror	r13,4
+	xor	r14,r8
+	and	r12,rax
+	xor	r13,rax
+	add	rdx,QWORD[32+rsp]
+	mov	r15,r8
+	xor	r12,rcx
+	ror	r14,6
+	xor	r15,r9
+	add	rdx,r12
+	ror	r13,14
+	and	rdi,r15
+	xor	r14,r8
+	add	rdx,r13
+	xor	rdi,r9
+	ror	r14,28
+	add	r11,rdx
+	add	rdx,rdi
+	mov	r13,r11
+	add	r14,rdx
+	ror	r13,23
+	mov	rdx,r14
+	mov	r12,rax
+	ror	r14,5
+	xor	r13,r11
+	xor	r12,rbx
+	ror	r13,4
+	xor	r14,rdx
+	and	r12,r11
+	xor	r13,r11
+	add	rcx,QWORD[40+rsp]
+	mov	rdi,rdx
+	xor	r12,rbx
+	ror	r14,6
+	xor	rdi,r8
+	add	rcx,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,rdx
+	add	rcx,r13
+	xor	r15,r8
+	ror	r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	ror	r13,23
+	mov	rcx,r14
+	mov	r12,r11
+	ror	r14,5
+	xor	r13,r10
+	xor	r12,rax
+	ror	r13,4
+	xor	r14,rcx
+	and	r12,r10
+	xor	r13,r10
+	add	rbx,QWORD[48+rsp]
+	mov	r15,rcx
+	xor	r12,rax
+	ror	r14,6
+	xor	r15,rdx
+	add	rbx,r12
+	ror	r13,14
+	and	rdi,r15
+	xor	r14,rcx
+	add	rbx,r13
+	xor	rdi,rdx
+	ror	r14,28
+	add	r9,rbx
+	add	rbx,rdi
+	mov	r13,r9
+	add	r14,rbx
+	ror	r13,23
+	mov	rbx,r14
+	mov	r12,r10
+	ror	r14,5
+	xor	r13,r9
+	xor	r12,r11
+	ror	r13,4
+	xor	r14,rbx
+	and	r12,r9
+	xor	r13,r9
+	add	rax,QWORD[56+rsp]
+	mov	rdi,rbx
+	xor	r12,r11
+	ror	r14,6
+	xor	rdi,rcx
+	add	rax,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,rbx
+	add	rax,r13
+	xor	r15,rcx
+	ror	r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	ror	r13,23
+	mov	rax,r14
+	mov	r12,r9
+	ror	r14,5
+	xor	r13,r8
+	xor	r12,r10
+	ror	r13,4
+	xor	r14,rax
+	and	r12,r8
+	xor	r13,r8
+	add	r11,QWORD[64+rsp]
+	mov	r15,rax
+	xor	r12,r10
+	ror	r14,6
+	xor	r15,rbx
+	add	r11,r12
+	ror	r13,14
+	and	rdi,r15
+	xor	r14,rax
+	add	r11,r13
+	xor	rdi,rbx
+	ror	r14,28
+	add	rdx,r11
+	add	r11,rdi
+	mov	r13,rdx
+	add	r14,r11
+	ror	r13,23
+	mov	r11,r14
+	mov	r12,r8
+	ror	r14,5
+	xor	r13,rdx
+	xor	r12,r9
+	ror	r13,4
+	xor	r14,r11
+	and	r12,rdx
+	xor	r13,rdx
+	add	r10,QWORD[72+rsp]
+	mov	rdi,r11
+	xor	r12,r9
+	ror	r14,6
+	xor	rdi,rax
+	add	r10,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,r11
+	add	r10,r13
+	xor	r15,rax
+	ror	r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	ror	r13,23
+	mov	r10,r14
+	mov	r12,rdx
+	ror	r14,5
+	xor	r13,rcx
+	xor	r12,r8
+	ror	r13,4
+	xor	r14,r10
+	and	r12,rcx
+	xor	r13,rcx
+	add	r9,QWORD[80+rsp]
+	mov	r15,r10
+	xor	r12,r8
+	ror	r14,6
+	xor	r15,r11
+	add	r9,r12
+	ror	r13,14
+	and	rdi,r15
+	xor	r14,r10
+	add	r9,r13
+	xor	rdi,r11
+	ror	r14,28
+	add	rbx,r9
+	add	r9,rdi
+	mov	r13,rbx
+	add	r14,r9
+	ror	r13,23
+	mov	r9,r14
+	mov	r12,rcx
+	ror	r14,5
+	xor	r13,rbx
+	xor	r12,rdx
+	ror	r13,4
+	xor	r14,r9
+	and	r12,rbx
+	xor	r13,rbx
+	add	r8,QWORD[88+rsp]
+	mov	rdi,r9
+	xor	r12,rdx
+	ror	r14,6
+	xor	rdi,r10
+	add	r8,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,r9
+	add	r8,r13
+	xor	r15,r10
+	ror	r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	ror	r13,23
+	mov	r8,r14
+	mov	r12,rbx
+	ror	r14,5
+	xor	r13,rax
+	xor	r12,rcx
+	ror	r13,4
+	xor	r14,r8
+	and	r12,rax
+	xor	r13,rax
+	add	rdx,QWORD[96+rsp]
+	mov	r15,r8
+	xor	r12,rcx
+	ror	r14,6
+	xor	r15,r9
+	add	rdx,r12
+	ror	r13,14
+	and	rdi,r15
+	xor	r14,r8
+	add	rdx,r13
+	xor	rdi,r9
+	ror	r14,28
+	add	r11,rdx
+	add	rdx,rdi
+	mov	r13,r11
+	add	r14,rdx
+	ror	r13,23
+	mov	rdx,r14
+	mov	r12,rax
+	ror	r14,5
+	xor	r13,r11
+	xor	r12,rbx
+	ror	r13,4
+	xor	r14,rdx
+	and	r12,r11
+	xor	r13,r11
+	add	rcx,QWORD[104+rsp]
+	mov	rdi,rdx
+	xor	r12,rbx
+	ror	r14,6
+	xor	rdi,r8
+	add	rcx,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,rdx
+	add	rcx,r13
+	xor	r15,r8
+	ror	r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	ror	r13,23
+	mov	rcx,r14
+	mov	r12,r11
+	ror	r14,5
+	xor	r13,r10
+	xor	r12,rax
+	ror	r13,4
+	xor	r14,rcx
+	and	r12,r10
+	xor	r13,r10
+	add	rbx,QWORD[112+rsp]
+	mov	r15,rcx
+	xor	r12,rax
+	ror	r14,6
+	xor	r15,rdx
+	add	rbx,r12
+	ror	r13,14
+	and	rdi,r15
+	xor	r14,rcx
+	add	rbx,r13
+	xor	rdi,rdx
+	ror	r14,28
+	add	r9,rbx
+	add	rbx,rdi
+	mov	r13,r9
+	add	r14,rbx
+	ror	r13,23
+	mov	rbx,r14
+	mov	r12,r10
+	ror	r14,5
+	xor	r13,r9
+	xor	r12,r11
+	ror	r13,4
+	xor	r14,rbx
+	and	r12,r9
+	xor	r13,r9
+	add	rax,QWORD[120+rsp]
+	mov	rdi,rbx
+	xor	r12,r11
+	ror	r14,6
+	xor	rdi,rcx
+	add	rax,r12
+	ror	r13,14
+	and	r15,rdi
+	xor	r14,rbx
+	add	rax,r13
+	xor	r15,rcx
+	ror	r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	mov	rdi,QWORD[((128+0))+rsp]
+	mov	rax,r14
+
+	add	rax,QWORD[rdi]
+	lea	rsi,[128+rsi]
+	add	rbx,QWORD[8+rdi]
+	add	rcx,QWORD[16+rdi]
+	add	rdx,QWORD[24+rdi]
+	add	r8,QWORD[32+rdi]
+	add	r9,QWORD[40+rdi]
+	add	r10,QWORD[48+rdi]
+	add	r11,QWORD[56+rdi]
+
+	cmp	rsi,QWORD[((128+16))+rsp]
+
+	mov	QWORD[rdi],rax
+	mov	QWORD[8+rdi],rbx
+	mov	QWORD[16+rdi],rcx
+	mov	QWORD[24+rdi],rdx
+	mov	QWORD[32+rdi],r8
+	mov	QWORD[40+rdi],r9
+	mov	QWORD[48+rdi],r10
+	mov	QWORD[56+rdi],r11
+	jb	NEAR $L$loop_xop
+
+	mov	rsi,QWORD[((128+24))+rsp]
+	vzeroupper
+	movaps	xmm6,XMMWORD[((128+32))+rsp]
+	movaps	xmm7,XMMWORD[((128+48))+rsp]
+	movaps	xmm8,XMMWORD[((128+64))+rsp]
+	movaps	xmm9,XMMWORD[((128+80))+rsp]
+	movaps	xmm10,XMMWORD[((128+96))+rsp]
+	movaps	xmm11,XMMWORD[((128+112))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$epilogue_xop:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha512_block_data_order_xop:
+
+ALIGN	64
+sha512_block_data_order_avx:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha512_block_data_order_avx:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+$L$avx_shortcut:
+	mov	rax,rsp
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	shl	rdx,4
+	sub	rsp,256
+	lea	rdx,[rdx*8+rsi]
+	and	rsp,-64
+	mov	QWORD[((128+0))+rsp],rdi
+	mov	QWORD[((128+8))+rsp],rsi
+	mov	QWORD[((128+16))+rsp],rdx
+	mov	QWORD[((128+24))+rsp],rax
+	movaps	XMMWORD[(128+32)+rsp],xmm6
+	movaps	XMMWORD[(128+48)+rsp],xmm7
+	movaps	XMMWORD[(128+64)+rsp],xmm8
+	movaps	XMMWORD[(128+80)+rsp],xmm9
+	movaps	XMMWORD[(128+96)+rsp],xmm10
+	movaps	XMMWORD[(128+112)+rsp],xmm11
+$L$prologue_avx:
+
+	vzeroupper
+	mov	rax,QWORD[rdi]
+	mov	rbx,QWORD[8+rdi]
+	mov	rcx,QWORD[16+rdi]
+	mov	rdx,QWORD[24+rdi]
+	mov	r8,QWORD[32+rdi]
+	mov	r9,QWORD[40+rdi]
+	mov	r10,QWORD[48+rdi]
+	mov	r11,QWORD[56+rdi]
+	jmp	NEAR $L$loop_avx
+ALIGN	16
+$L$loop_avx:
+	vmovdqa	xmm11,XMMWORD[((K512+1280))]
+	vmovdqu	xmm0,XMMWORD[rsi]
+	lea	rbp,[((K512+128))]
+	vmovdqu	xmm1,XMMWORD[16+rsi]
+	vmovdqu	xmm2,XMMWORD[32+rsi]
+	vpshufb	xmm0,xmm0,xmm11
+	vmovdqu	xmm3,XMMWORD[48+rsi]
+	vpshufb	xmm1,xmm1,xmm11
+	vmovdqu	xmm4,XMMWORD[64+rsi]
+	vpshufb	xmm2,xmm2,xmm11
+	vmovdqu	xmm5,XMMWORD[80+rsi]
+	vpshufb	xmm3,xmm3,xmm11
+	vmovdqu	xmm6,XMMWORD[96+rsi]
+	vpshufb	xmm4,xmm4,xmm11
+	vmovdqu	xmm7,XMMWORD[112+rsi]
+	vpshufb	xmm5,xmm5,xmm11
+	vpaddq	xmm8,xmm0,XMMWORD[((-128))+rbp]
+	vpshufb	xmm6,xmm6,xmm11
+	vpaddq	xmm9,xmm1,XMMWORD[((-96))+rbp]
+	vpshufb	xmm7,xmm7,xmm11
+	vpaddq	xmm10,xmm2,XMMWORD[((-64))+rbp]
+	vpaddq	xmm11,xmm3,XMMWORD[((-32))+rbp]
+	vmovdqa	XMMWORD[rsp],xmm8
+	vpaddq	xmm8,xmm4,XMMWORD[rbp]
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	vpaddq	xmm9,xmm5,XMMWORD[32+rbp]
+	vmovdqa	XMMWORD[32+rsp],xmm10
+	vpaddq	xmm10,xmm6,XMMWORD[64+rbp]
+	vmovdqa	XMMWORD[48+rsp],xmm11
+	vpaddq	xmm11,xmm7,XMMWORD[96+rbp]
+	vmovdqa	XMMWORD[64+rsp],xmm8
+	mov	r14,rax
+	vmovdqa	XMMWORD[80+rsp],xmm9
+	mov	rdi,rbx
+	vmovdqa	XMMWORD[96+rsp],xmm10
+	xor	rdi,rcx
+	vmovdqa	XMMWORD[112+rsp],xmm11
+	mov	r13,r8
+	jmp	NEAR $L$avx_00_47
+
+ALIGN	16
+$L$avx_00_47:
+	add	rbp,256
+	vpalignr	xmm8,xmm1,xmm0,8
+	shrd	r13,r13,23
+	mov	rax,r14
+	vpalignr	xmm11,xmm5,xmm4,8
+	mov	r12,r9
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,r8
+	xor	r12,r10
+	vpaddq	xmm0,xmm0,xmm11
+	shrd	r13,r13,4
+	xor	r14,rax
+	vpsrlq	xmm11,xmm8,7
+	and	r12,r8
+	xor	r13,r8
+	vpsllq	xmm9,xmm8,56
+	add	r11,QWORD[rsp]
+	mov	r15,rax
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,r10
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,rbx
+	add	r11,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,rax
+	add	r11,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rbx
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm7,6
+	add	rdx,r11
+	add	r11,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,rdx
+	add	r14,r11
+	vpsllq	xmm10,xmm7,3
+	shrd	r13,r13,23
+	mov	r11,r14
+	vpaddq	xmm0,xmm0,xmm8
+	mov	r12,r8
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm7,19
+	xor	r13,rdx
+	xor	r12,r9
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,r11
+	vpsllq	xmm10,xmm10,42
+	and	r12,rdx
+	xor	r13,rdx
+	vpxor	xmm11,xmm11,xmm9
+	add	r10,QWORD[8+rsp]
+	mov	rdi,r11
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,r9
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,rax
+	add	r10,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm0,xmm0,xmm11
+	xor	r14,r11
+	add	r10,r13
+	vpaddq	xmm10,xmm0,XMMWORD[((-128))+rbp]
+	xor	r15,rax
+	shrd	r14,r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	vmovdqa	XMMWORD[rsp],xmm10
+	vpalignr	xmm8,xmm2,xmm1,8
+	shrd	r13,r13,23
+	mov	r10,r14
+	vpalignr	xmm11,xmm6,xmm5,8
+	mov	r12,rdx
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,rcx
+	xor	r12,r8
+	vpaddq	xmm1,xmm1,xmm11
+	shrd	r13,r13,4
+	xor	r14,r10
+	vpsrlq	xmm11,xmm8,7
+	and	r12,rcx
+	xor	r13,rcx
+	vpsllq	xmm9,xmm8,56
+	add	r9,QWORD[16+rsp]
+	mov	r15,r10
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,r8
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,r11
+	add	r9,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,r10
+	add	r9,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r11
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm0,6
+	add	rbx,r9
+	add	r9,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,rbx
+	add	r14,r9
+	vpsllq	xmm10,xmm0,3
+	shrd	r13,r13,23
+	mov	r9,r14
+	vpaddq	xmm1,xmm1,xmm8
+	mov	r12,rcx
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm0,19
+	xor	r13,rbx
+	xor	r12,rdx
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,r9
+	vpsllq	xmm10,xmm10,42
+	and	r12,rbx
+	xor	r13,rbx
+	vpxor	xmm11,xmm11,xmm9
+	add	r8,QWORD[24+rsp]
+	mov	rdi,r9
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,rdx
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,r10
+	add	r8,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm1,xmm1,xmm11
+	xor	r14,r9
+	add	r8,r13
+	vpaddq	xmm10,xmm1,XMMWORD[((-96))+rbp]
+	xor	r15,r10
+	shrd	r14,r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	vmovdqa	XMMWORD[16+rsp],xmm10
+	vpalignr	xmm8,xmm3,xmm2,8
+	shrd	r13,r13,23
+	mov	r8,r14
+	vpalignr	xmm11,xmm7,xmm6,8
+	mov	r12,rbx
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,rax
+	xor	r12,rcx
+	vpaddq	xmm2,xmm2,xmm11
+	shrd	r13,r13,4
+	xor	r14,r8
+	vpsrlq	xmm11,xmm8,7
+	and	r12,rax
+	xor	r13,rax
+	vpsllq	xmm9,xmm8,56
+	add	rdx,QWORD[32+rsp]
+	mov	r15,r8
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,rcx
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,r9
+	add	rdx,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,r8
+	add	rdx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r9
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm1,6
+	add	r11,rdx
+	add	rdx,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,r11
+	add	r14,rdx
+	vpsllq	xmm10,xmm1,3
+	shrd	r13,r13,23
+	mov	rdx,r14
+	vpaddq	xmm2,xmm2,xmm8
+	mov	r12,rax
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm1,19
+	xor	r13,r11
+	xor	r12,rbx
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,rdx
+	vpsllq	xmm10,xmm10,42
+	and	r12,r11
+	xor	r13,r11
+	vpxor	xmm11,xmm11,xmm9
+	add	rcx,QWORD[40+rsp]
+	mov	rdi,rdx
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,rbx
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,r8
+	add	rcx,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm2,xmm2,xmm11
+	xor	r14,rdx
+	add	rcx,r13
+	vpaddq	xmm10,xmm2,XMMWORD[((-64))+rbp]
+	xor	r15,r8
+	shrd	r14,r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	vmovdqa	XMMWORD[32+rsp],xmm10
+	vpalignr	xmm8,xmm4,xmm3,8
+	shrd	r13,r13,23
+	mov	rcx,r14
+	vpalignr	xmm11,xmm0,xmm7,8
+	mov	r12,r11
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,r10
+	xor	r12,rax
+	vpaddq	xmm3,xmm3,xmm11
+	shrd	r13,r13,4
+	xor	r14,rcx
+	vpsrlq	xmm11,xmm8,7
+	and	r12,r10
+	xor	r13,r10
+	vpsllq	xmm9,xmm8,56
+	add	rbx,QWORD[48+rsp]
+	mov	r15,rcx
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,rax
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,rdx
+	add	rbx,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,rcx
+	add	rbx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rdx
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm2,6
+	add	r9,rbx
+	add	rbx,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,r9
+	add	r14,rbx
+	vpsllq	xmm10,xmm2,3
+	shrd	r13,r13,23
+	mov	rbx,r14
+	vpaddq	xmm3,xmm3,xmm8
+	mov	r12,r10
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm2,19
+	xor	r13,r9
+	xor	r12,r11
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,rbx
+	vpsllq	xmm10,xmm10,42
+	and	r12,r9
+	xor	r13,r9
+	vpxor	xmm11,xmm11,xmm9
+	add	rax,QWORD[56+rsp]
+	mov	rdi,rbx
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,r11
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,rcx
+	add	rax,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm3,xmm3,xmm11
+	xor	r14,rbx
+	add	rax,r13
+	vpaddq	xmm10,xmm3,XMMWORD[((-32))+rbp]
+	xor	r15,rcx
+	shrd	r14,r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	vmovdqa	XMMWORD[48+rsp],xmm10
+	vpalignr	xmm8,xmm5,xmm4,8
+	shrd	r13,r13,23
+	mov	rax,r14
+	vpalignr	xmm11,xmm1,xmm0,8
+	mov	r12,r9
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,r8
+	xor	r12,r10
+	vpaddq	xmm4,xmm4,xmm11
+	shrd	r13,r13,4
+	xor	r14,rax
+	vpsrlq	xmm11,xmm8,7
+	and	r12,r8
+	xor	r13,r8
+	vpsllq	xmm9,xmm8,56
+	add	r11,QWORD[64+rsp]
+	mov	r15,rax
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,r10
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,rbx
+	add	r11,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,rax
+	add	r11,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rbx
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm3,6
+	add	rdx,r11
+	add	r11,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,rdx
+	add	r14,r11
+	vpsllq	xmm10,xmm3,3
+	shrd	r13,r13,23
+	mov	r11,r14
+	vpaddq	xmm4,xmm4,xmm8
+	mov	r12,r8
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm3,19
+	xor	r13,rdx
+	xor	r12,r9
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,r11
+	vpsllq	xmm10,xmm10,42
+	and	r12,rdx
+	xor	r13,rdx
+	vpxor	xmm11,xmm11,xmm9
+	add	r10,QWORD[72+rsp]
+	mov	rdi,r11
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,r9
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,rax
+	add	r10,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm4,xmm4,xmm11
+	xor	r14,r11
+	add	r10,r13
+	vpaddq	xmm10,xmm4,XMMWORD[rbp]
+	xor	r15,rax
+	shrd	r14,r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	vmovdqa	XMMWORD[64+rsp],xmm10
+	vpalignr	xmm8,xmm6,xmm5,8
+	shrd	r13,r13,23
+	mov	r10,r14
+	vpalignr	xmm11,xmm2,xmm1,8
+	mov	r12,rdx
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,rcx
+	xor	r12,r8
+	vpaddq	xmm5,xmm5,xmm11
+	shrd	r13,r13,4
+	xor	r14,r10
+	vpsrlq	xmm11,xmm8,7
+	and	r12,rcx
+	xor	r13,rcx
+	vpsllq	xmm9,xmm8,56
+	add	r9,QWORD[80+rsp]
+	mov	r15,r10
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,r8
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,r11
+	add	r9,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,r10
+	add	r9,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r11
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm4,6
+	add	rbx,r9
+	add	r9,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,rbx
+	add	r14,r9
+	vpsllq	xmm10,xmm4,3
+	shrd	r13,r13,23
+	mov	r9,r14
+	vpaddq	xmm5,xmm5,xmm8
+	mov	r12,rcx
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm4,19
+	xor	r13,rbx
+	xor	r12,rdx
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,r9
+	vpsllq	xmm10,xmm10,42
+	and	r12,rbx
+	xor	r13,rbx
+	vpxor	xmm11,xmm11,xmm9
+	add	r8,QWORD[88+rsp]
+	mov	rdi,r9
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,rdx
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,r10
+	add	r8,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm5,xmm5,xmm11
+	xor	r14,r9
+	add	r8,r13
+	vpaddq	xmm10,xmm5,XMMWORD[32+rbp]
+	xor	r15,r10
+	shrd	r14,r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	vmovdqa	XMMWORD[80+rsp],xmm10
+	vpalignr	xmm8,xmm7,xmm6,8
+	shrd	r13,r13,23
+	mov	r8,r14
+	vpalignr	xmm11,xmm3,xmm2,8
+	mov	r12,rbx
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,rax
+	xor	r12,rcx
+	vpaddq	xmm6,xmm6,xmm11
+	shrd	r13,r13,4
+	xor	r14,r8
+	vpsrlq	xmm11,xmm8,7
+	and	r12,rax
+	xor	r13,rax
+	vpsllq	xmm9,xmm8,56
+	add	rdx,QWORD[96+rsp]
+	mov	r15,r8
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,rcx
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,r9
+	add	rdx,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,r8
+	add	rdx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r9
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm5,6
+	add	r11,rdx
+	add	rdx,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,r11
+	add	r14,rdx
+	vpsllq	xmm10,xmm5,3
+	shrd	r13,r13,23
+	mov	rdx,r14
+	vpaddq	xmm6,xmm6,xmm8
+	mov	r12,rax
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm5,19
+	xor	r13,r11
+	xor	r12,rbx
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,rdx
+	vpsllq	xmm10,xmm10,42
+	and	r12,r11
+	xor	r13,r11
+	vpxor	xmm11,xmm11,xmm9
+	add	rcx,QWORD[104+rsp]
+	mov	rdi,rdx
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,rbx
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,r8
+	add	rcx,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm6,xmm6,xmm11
+	xor	r14,rdx
+	add	rcx,r13
+	vpaddq	xmm10,xmm6,XMMWORD[64+rbp]
+	xor	r15,r8
+	shrd	r14,r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	vmovdqa	XMMWORD[96+rsp],xmm10
+	vpalignr	xmm8,xmm0,xmm7,8
+	shrd	r13,r13,23
+	mov	rcx,r14
+	vpalignr	xmm11,xmm4,xmm3,8
+	mov	r12,r11
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,r10
+	xor	r12,rax
+	vpaddq	xmm7,xmm7,xmm11
+	shrd	r13,r13,4
+	xor	r14,rcx
+	vpsrlq	xmm11,xmm8,7
+	and	r12,r10
+	xor	r13,r10
+	vpsllq	xmm9,xmm8,56
+	add	rbx,QWORD[112+rsp]
+	mov	r15,rcx
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,rax
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,rdx
+	add	rbx,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,rcx
+	add	rbx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rdx
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm6,6
+	add	r9,rbx
+	add	rbx,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,r9
+	add	r14,rbx
+	vpsllq	xmm10,xmm6,3
+	shrd	r13,r13,23
+	mov	rbx,r14
+	vpaddq	xmm7,xmm7,xmm8
+	mov	r12,r10
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm6,19
+	xor	r13,r9
+	xor	r12,r11
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,rbx
+	vpsllq	xmm10,xmm10,42
+	and	r12,r9
+	xor	r13,r9
+	vpxor	xmm11,xmm11,xmm9
+	add	rax,QWORD[120+rsp]
+	mov	rdi,rbx
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,r11
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,rcx
+	add	rax,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm7,xmm7,xmm11
+	xor	r14,rbx
+	add	rax,r13
+	vpaddq	xmm10,xmm7,XMMWORD[96+rbp]
+	xor	r15,rcx
+	shrd	r14,r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	vmovdqa	XMMWORD[112+rsp],xmm10
+	cmp	BYTE[135+rbp],0
+	jne	NEAR $L$avx_00_47
+	shrd	r13,r13,23
+	mov	rax,r14
+	mov	r12,r9
+	shrd	r14,r14,5
+	xor	r13,r8
+	xor	r12,r10
+	shrd	r13,r13,4
+	xor	r14,rax
+	and	r12,r8
+	xor	r13,r8
+	add	r11,QWORD[rsp]
+	mov	r15,rax
+	xor	r12,r10
+	shrd	r14,r14,6
+	xor	r15,rbx
+	add	r11,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,rax
+	add	r11,r13
+	xor	rdi,rbx
+	shrd	r14,r14,28
+	add	rdx,r11
+	add	r11,rdi
+	mov	r13,rdx
+	add	r14,r11
+	shrd	r13,r13,23
+	mov	r11,r14
+	mov	r12,r8
+	shrd	r14,r14,5
+	xor	r13,rdx
+	xor	r12,r9
+	shrd	r13,r13,4
+	xor	r14,r11
+	and	r12,rdx
+	xor	r13,rdx
+	add	r10,QWORD[8+rsp]
+	mov	rdi,r11
+	xor	r12,r9
+	shrd	r14,r14,6
+	xor	rdi,rax
+	add	r10,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,r11
+	add	r10,r13
+	xor	r15,rax
+	shrd	r14,r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	shrd	r13,r13,23
+	mov	r10,r14
+	mov	r12,rdx
+	shrd	r14,r14,5
+	xor	r13,rcx
+	xor	r12,r8
+	shrd	r13,r13,4
+	xor	r14,r10
+	and	r12,rcx
+	xor	r13,rcx
+	add	r9,QWORD[16+rsp]
+	mov	r15,r10
+	xor	r12,r8
+	shrd	r14,r14,6
+	xor	r15,r11
+	add	r9,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,r10
+	add	r9,r13
+	xor	rdi,r11
+	shrd	r14,r14,28
+	add	rbx,r9
+	add	r9,rdi
+	mov	r13,rbx
+	add	r14,r9
+	shrd	r13,r13,23
+	mov	r9,r14
+	mov	r12,rcx
+	shrd	r14,r14,5
+	xor	r13,rbx
+	xor	r12,rdx
+	shrd	r13,r13,4
+	xor	r14,r9
+	and	r12,rbx
+	xor	r13,rbx
+	add	r8,QWORD[24+rsp]
+	mov	rdi,r9
+	xor	r12,rdx
+	shrd	r14,r14,6
+	xor	rdi,r10
+	add	r8,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,r9
+	add	r8,r13
+	xor	r15,r10
+	shrd	r14,r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	shrd	r13,r13,23
+	mov	r8,r14
+	mov	r12,rbx
+	shrd	r14,r14,5
+	xor	r13,rax
+	xor	r12,rcx
+	shrd	r13,r13,4
+	xor	r14,r8
+	and	r12,rax
+	xor	r13,rax
+	add	rdx,QWORD[32+rsp]
+	mov	r15,r8
+	xor	r12,rcx
+	shrd	r14,r14,6
+	xor	r15,r9
+	add	rdx,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,r8
+	add	rdx,r13
+	xor	rdi,r9
+	shrd	r14,r14,28
+	add	r11,rdx
+	add	rdx,rdi
+	mov	r13,r11
+	add	r14,rdx
+	shrd	r13,r13,23
+	mov	rdx,r14
+	mov	r12,rax
+	shrd	r14,r14,5
+	xor	r13,r11
+	xor	r12,rbx
+	shrd	r13,r13,4
+	xor	r14,rdx
+	and	r12,r11
+	xor	r13,r11
+	add	rcx,QWORD[40+rsp]
+	mov	rdi,rdx
+	xor	r12,rbx
+	shrd	r14,r14,6
+	xor	rdi,r8
+	add	rcx,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,rdx
+	add	rcx,r13
+	xor	r15,r8
+	shrd	r14,r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	shrd	r13,r13,23
+	mov	rcx,r14
+	mov	r12,r11
+	shrd	r14,r14,5
+	xor	r13,r10
+	xor	r12,rax
+	shrd	r13,r13,4
+	xor	r14,rcx
+	and	r12,r10
+	xor	r13,r10
+	add	rbx,QWORD[48+rsp]
+	mov	r15,rcx
+	xor	r12,rax
+	shrd	r14,r14,6
+	xor	r15,rdx
+	add	rbx,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,rcx
+	add	rbx,r13
+	xor	rdi,rdx
+	shrd	r14,r14,28
+	add	r9,rbx
+	add	rbx,rdi
+	mov	r13,r9
+	add	r14,rbx
+	shrd	r13,r13,23
+	mov	rbx,r14
+	mov	r12,r10
+	shrd	r14,r14,5
+	xor	r13,r9
+	xor	r12,r11
+	shrd	r13,r13,4
+	xor	r14,rbx
+	and	r12,r9
+	xor	r13,r9
+	add	rax,QWORD[56+rsp]
+	mov	rdi,rbx
+	xor	r12,r11
+	shrd	r14,r14,6
+	xor	rdi,rcx
+	add	rax,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,rbx
+	add	rax,r13
+	xor	r15,rcx
+	shrd	r14,r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	shrd	r13,r13,23
+	mov	rax,r14
+	mov	r12,r9
+	shrd	r14,r14,5
+	xor	r13,r8
+	xor	r12,r10
+	shrd	r13,r13,4
+	xor	r14,rax
+	and	r12,r8
+	xor	r13,r8
+	add	r11,QWORD[64+rsp]
+	mov	r15,rax
+	xor	r12,r10
+	shrd	r14,r14,6
+	xor	r15,rbx
+	add	r11,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,rax
+	add	r11,r13
+	xor	rdi,rbx
+	shrd	r14,r14,28
+	add	rdx,r11
+	add	r11,rdi
+	mov	r13,rdx
+	add	r14,r11
+	shrd	r13,r13,23
+	mov	r11,r14
+	mov	r12,r8
+	shrd	r14,r14,5
+	xor	r13,rdx
+	xor	r12,r9
+	shrd	r13,r13,4
+	xor	r14,r11
+	and	r12,rdx
+	xor	r13,rdx
+	add	r10,QWORD[72+rsp]
+	mov	rdi,r11
+	xor	r12,r9
+	shrd	r14,r14,6
+	xor	rdi,rax
+	add	r10,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,r11
+	add	r10,r13
+	xor	r15,rax
+	shrd	r14,r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	shrd	r13,r13,23
+	mov	r10,r14
+	mov	r12,rdx
+	shrd	r14,r14,5
+	xor	r13,rcx
+	xor	r12,r8
+	shrd	r13,r13,4
+	xor	r14,r10
+	and	r12,rcx
+	xor	r13,rcx
+	add	r9,QWORD[80+rsp]
+	mov	r15,r10
+	xor	r12,r8
+	shrd	r14,r14,6
+	xor	r15,r11
+	add	r9,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,r10
+	add	r9,r13
+	xor	rdi,r11
+	shrd	r14,r14,28
+	add	rbx,r9
+	add	r9,rdi
+	mov	r13,rbx
+	add	r14,r9
+	shrd	r13,r13,23
+	mov	r9,r14
+	mov	r12,rcx
+	shrd	r14,r14,5
+	xor	r13,rbx
+	xor	r12,rdx
+	shrd	r13,r13,4
+	xor	r14,r9
+	and	r12,rbx
+	xor	r13,rbx
+	add	r8,QWORD[88+rsp]
+	mov	rdi,r9
+	xor	r12,rdx
+	shrd	r14,r14,6
+	xor	rdi,r10
+	add	r8,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,r9
+	add	r8,r13
+	xor	r15,r10
+	shrd	r14,r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	shrd	r13,r13,23
+	mov	r8,r14
+	mov	r12,rbx
+	shrd	r14,r14,5
+	xor	r13,rax
+	xor	r12,rcx
+	shrd	r13,r13,4
+	xor	r14,r8
+	and	r12,rax
+	xor	r13,rax
+	add	rdx,QWORD[96+rsp]
+	mov	r15,r8
+	xor	r12,rcx
+	shrd	r14,r14,6
+	xor	r15,r9
+	add	rdx,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,r8
+	add	rdx,r13
+	xor	rdi,r9
+	shrd	r14,r14,28
+	add	r11,rdx
+	add	rdx,rdi
+	mov	r13,r11
+	add	r14,rdx
+	shrd	r13,r13,23
+	mov	rdx,r14
+	mov	r12,rax
+	shrd	r14,r14,5
+	xor	r13,r11
+	xor	r12,rbx
+	shrd	r13,r13,4
+	xor	r14,rdx
+	and	r12,r11
+	xor	r13,r11
+	add	rcx,QWORD[104+rsp]
+	mov	rdi,rdx
+	xor	r12,rbx
+	shrd	r14,r14,6
+	xor	rdi,r8
+	add	rcx,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,rdx
+	add	rcx,r13
+	xor	r15,r8
+	shrd	r14,r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	shrd	r13,r13,23
+	mov	rcx,r14
+	mov	r12,r11
+	shrd	r14,r14,5
+	xor	r13,r10
+	xor	r12,rax
+	shrd	r13,r13,4
+	xor	r14,rcx
+	and	r12,r10
+	xor	r13,r10
+	add	rbx,QWORD[112+rsp]
+	mov	r15,rcx
+	xor	r12,rax
+	shrd	r14,r14,6
+	xor	r15,rdx
+	add	rbx,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,rcx
+	add	rbx,r13
+	xor	rdi,rdx
+	shrd	r14,r14,28
+	add	r9,rbx
+	add	rbx,rdi
+	mov	r13,r9
+	add	r14,rbx
+	shrd	r13,r13,23
+	mov	rbx,r14
+	mov	r12,r10
+	shrd	r14,r14,5
+	xor	r13,r9
+	xor	r12,r11
+	shrd	r13,r13,4
+	xor	r14,rbx
+	and	r12,r9
+	xor	r13,r9
+	add	rax,QWORD[120+rsp]
+	mov	rdi,rbx
+	xor	r12,r11
+	shrd	r14,r14,6
+	xor	rdi,rcx
+	add	rax,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,rbx
+	add	rax,r13
+	xor	r15,rcx
+	shrd	r14,r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	mov	rdi,QWORD[((128+0))+rsp]
+	mov	rax,r14
+
+	add	rax,QWORD[rdi]
+	lea	rsi,[128+rsi]
+	add	rbx,QWORD[8+rdi]
+	add	rcx,QWORD[16+rdi]
+	add	rdx,QWORD[24+rdi]
+	add	r8,QWORD[32+rdi]
+	add	r9,QWORD[40+rdi]
+	add	r10,QWORD[48+rdi]
+	add	r11,QWORD[56+rdi]
+
+	cmp	rsi,QWORD[((128+16))+rsp]
+
+	mov	QWORD[rdi],rax
+	mov	QWORD[8+rdi],rbx
+	mov	QWORD[16+rdi],rcx
+	mov	QWORD[24+rdi],rdx
+	mov	QWORD[32+rdi],r8
+	mov	QWORD[40+rdi],r9
+	mov	QWORD[48+rdi],r10
+	mov	QWORD[56+rdi],r11
+	jb	NEAR $L$loop_avx
+
+	mov	rsi,QWORD[((128+24))+rsp]
+	vzeroupper
+	movaps	xmm6,XMMWORD[((128+32))+rsp]
+	movaps	xmm7,XMMWORD[((128+48))+rsp]
+	movaps	xmm8,XMMWORD[((128+64))+rsp]
+	movaps	xmm9,XMMWORD[((128+80))+rsp]
+	movaps	xmm10,XMMWORD[((128+96))+rsp]
+	movaps	xmm11,XMMWORD[((128+112))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+	mov	r14,QWORD[((-40))+rsi]
+	mov	r13,QWORD[((-32))+rsi]
+	mov	r12,QWORD[((-24))+rsi]
+	mov	rbp,QWORD[((-16))+rsi]
+	mov	rbx,QWORD[((-8))+rsi]
+	lea	rsp,[rsi]
+$L$epilogue_avx:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha512_block_data_order_avx:
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+	mov	rsi,rax
+	mov	rax,QWORD[((128+24))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+	lea	r10,[$L$epilogue]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	lea	rsi,[((128+32))+rsi]
+	lea	rdi,[512+r8]
+	mov	ecx,12
+	DD	0xa548f3fc
+
+$L$in_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_sha512_block_data_order wrt ..imagebase
+	DD	$L$SEH_end_sha512_block_data_order wrt ..imagebase
+	DD	$L$SEH_info_sha512_block_data_order wrt ..imagebase
+	DD	$L$SEH_begin_sha512_block_data_order_xop wrt ..imagebase
+	DD	$L$SEH_end_sha512_block_data_order_xop wrt ..imagebase
+	DD	$L$SEH_info_sha512_block_data_order_xop wrt ..imagebase
+	DD	$L$SEH_begin_sha512_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_end_sha512_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_info_sha512_block_data_order_avx wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_sha512_block_data_order:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
+$L$SEH_info_sha512_block_data_order_xop:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase
+$L$SEH_info_sha512_block_data_order_avx:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
new file mode 100644
index 0000000..3edde9f
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
@@ -0,0 +1,1137 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_encrypt_core:
+	mov	r9,rdx
+	mov	r11,16
+	mov	eax,DWORD[240+rdx]
+	movdqa	xmm1,xmm9
+	movdqa	xmm2,XMMWORD[$L$k_ipt]
+	pandn	xmm1,xmm0
+	movdqu	xmm5,XMMWORD[r9]
+	psrld	xmm1,4
+	pand	xmm0,xmm9
+DB	102,15,56,0,208
+	movdqa	xmm0,XMMWORD[(($L$k_ipt+16))]
+DB	102,15,56,0,193
+	pxor	xmm2,xmm5
+	add	r9,16
+	pxor	xmm0,xmm2
+	lea	r10,[$L$k_mc_backward]
+	jmp	NEAR $L$enc_entry
+
+ALIGN	16
+$L$enc_loop:
+
+	movdqa	xmm4,xmm13
+	movdqa	xmm0,xmm12
+DB	102,15,56,0,226
+DB	102,15,56,0,195
+	pxor	xmm4,xmm5
+	movdqa	xmm5,xmm15
+	pxor	xmm0,xmm4
+	movdqa	xmm1,XMMWORD[((-64))+r10*1+r11]
+DB	102,15,56,0,234
+	movdqa	xmm4,XMMWORD[r10*1+r11]
+	movdqa	xmm2,xmm14
+DB	102,15,56,0,211
+	movdqa	xmm3,xmm0
+	pxor	xmm2,xmm5
+DB	102,15,56,0,193
+	add	r9,16
+	pxor	xmm0,xmm2
+DB	102,15,56,0,220
+	add	r11,16
+	pxor	xmm3,xmm0
+DB	102,15,56,0,193
+	and	r11,0x30
+	sub	rax,1
+	pxor	xmm0,xmm3
+
+$L$enc_entry:
+
+	movdqa	xmm1,xmm9
+	movdqa	xmm5,xmm11
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm9
+DB	102,15,56,0,232
+	movdqa	xmm3,xmm10
+	pxor	xmm0,xmm1
+DB	102,15,56,0,217
+	movdqa	xmm4,xmm10
+	pxor	xmm3,xmm5
+DB	102,15,56,0,224
+	movdqa	xmm2,xmm10
+	pxor	xmm4,xmm5
+DB	102,15,56,0,211
+	movdqa	xmm3,xmm10
+	pxor	xmm2,xmm0
+DB	102,15,56,0,220
+	movdqu	xmm5,XMMWORD[r9]
+	pxor	xmm3,xmm1
+	jnz	NEAR $L$enc_loop
+
+
+	movdqa	xmm4,XMMWORD[((-96))+r10]
+	movdqa	xmm0,XMMWORD[((-80))+r10]
+DB	102,15,56,0,226
+	pxor	xmm4,xmm5
+DB	102,15,56,0,195
+	movdqa	xmm1,XMMWORD[64+r10*1+r11]
+	pxor	xmm0,xmm4
+DB	102,15,56,0,193
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_decrypt_core:
+	mov	r9,rdx
+	mov	eax,DWORD[240+rdx]
+	movdqa	xmm1,xmm9
+	movdqa	xmm2,XMMWORD[$L$k_dipt]
+	pandn	xmm1,xmm0
+	mov	r11,rax
+	psrld	xmm1,4
+	movdqu	xmm5,XMMWORD[r9]
+	shl	r11,4
+	pand	xmm0,xmm9
+DB	102,15,56,0,208
+	movdqa	xmm0,XMMWORD[(($L$k_dipt+16))]
+	xor	r11,0x30
+	lea	r10,[$L$k_dsbd]
+DB	102,15,56,0,193
+	and	r11,0x30
+	pxor	xmm2,xmm5
+	movdqa	xmm5,XMMWORD[(($L$k_mc_forward+48))]
+	pxor	xmm0,xmm2
+	add	r9,16
+	add	r11,r10
+	jmp	NEAR $L$dec_entry
+
+ALIGN	16
+$L$dec_loop:
+
+
+
+	movdqa	xmm4,XMMWORD[((-32))+r10]
+	movdqa	xmm1,XMMWORD[((-16))+r10]
+DB	102,15,56,0,226
+DB	102,15,56,0,203
+	pxor	xmm0,xmm4
+	movdqa	xmm4,XMMWORD[r10]
+	pxor	xmm0,xmm1
+	movdqa	xmm1,XMMWORD[16+r10]
+
+DB	102,15,56,0,226
+DB	102,15,56,0,197
+DB	102,15,56,0,203
+	pxor	xmm0,xmm4
+	movdqa	xmm4,XMMWORD[32+r10]
+	pxor	xmm0,xmm1
+	movdqa	xmm1,XMMWORD[48+r10]
+
+DB	102,15,56,0,226
+DB	102,15,56,0,197
+DB	102,15,56,0,203
+	pxor	xmm0,xmm4
+	movdqa	xmm4,XMMWORD[64+r10]
+	pxor	xmm0,xmm1
+	movdqa	xmm1,XMMWORD[80+r10]
+
+DB	102,15,56,0,226
+DB	102,15,56,0,197
+DB	102,15,56,0,203
+	pxor	xmm0,xmm4
+	add	r9,16
+DB	102,15,58,15,237,12
+	pxor	xmm0,xmm1
+	sub	rax,1
+
+$L$dec_entry:
+
+	movdqa	xmm1,xmm9
+	pandn	xmm1,xmm0
+	movdqa	xmm2,xmm11
+	psrld	xmm1,4
+	pand	xmm0,xmm9
+DB	102,15,56,0,208
+	movdqa	xmm3,xmm10
+	pxor	xmm0,xmm1
+DB	102,15,56,0,217
+	movdqa	xmm4,xmm10
+	pxor	xmm3,xmm2
+DB	102,15,56,0,224
+	pxor	xmm4,xmm2
+	movdqa	xmm2,xmm10
+DB	102,15,56,0,211
+	movdqa	xmm3,xmm10
+	pxor	xmm2,xmm0
+DB	102,15,56,0,220
+	movdqu	xmm0,XMMWORD[r9]
+	pxor	xmm3,xmm1
+	jnz	NEAR $L$dec_loop
+
+
+	movdqa	xmm4,XMMWORD[96+r10]
+DB	102,15,56,0,226
+	pxor	xmm4,xmm0
+	movdqa	xmm0,XMMWORD[112+r10]
+	movdqa	xmm2,XMMWORD[((-352))+r11]
+DB	102,15,56,0,195
+	pxor	xmm0,xmm4
+DB	102,15,56,0,194
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_schedule_core:
+
+
+
+
+
+	call	_vpaes_preheat
+	movdqa	xmm8,XMMWORD[$L$k_rcon]
+	movdqu	xmm0,XMMWORD[rdi]
+
+
+	movdqa	xmm3,xmm0
+	lea	r11,[$L$k_ipt]
+	call	_vpaes_schedule_transform
+	movdqa	xmm7,xmm0
+
+	lea	r10,[$L$k_sr]
+	test	rcx,rcx
+	jnz	NEAR $L$schedule_am_decrypting
+
+
+	movdqu	XMMWORD[rdx],xmm0
+	jmp	NEAR $L$schedule_go
+
+$L$schedule_am_decrypting:
+
+	movdqa	xmm1,XMMWORD[r10*1+r8]
+DB	102,15,56,0,217
+	movdqu	XMMWORD[rdx],xmm3
+	xor	r8,0x30
+
+$L$schedule_go:
+	cmp	esi,192
+	ja	NEAR $L$schedule_256
+	je	NEAR $L$schedule_192
+
+
+
+
+
+
+
+
+
+
+$L$schedule_128:
+	mov	esi,10
+
+$L$oop_schedule_128:
+	call	_vpaes_schedule_round
+	dec	rsi
+	jz	NEAR $L$schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	jmp	NEAR $L$oop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+$L$schedule_192:
+	movdqu	xmm0,XMMWORD[8+rdi]
+	call	_vpaes_schedule_transform
+	movdqa	xmm6,xmm0
+	pxor	xmm4,xmm4
+	movhlps	xmm6,xmm4
+	mov	esi,4
+
+$L$oop_schedule_192:
+	call	_vpaes_schedule_round
+DB	102,15,58,15,198,8
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_round
+	dec	rsi
+	jz	NEAR $L$schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	jmp	NEAR $L$oop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+$L$schedule_256:
+	movdqu	xmm0,XMMWORD[16+rdi]
+	call	_vpaes_schedule_transform
+	mov	esi,7
+
+$L$oop_schedule_256:
+	call	_vpaes_schedule_mangle
+	movdqa	xmm6,xmm0
+
+
+	call	_vpaes_schedule_round
+	dec	rsi
+	jz	NEAR $L$schedule_mangle_last
+	call	_vpaes_schedule_mangle
+
+
+	pshufd	xmm0,xmm0,0xFF
+	movdqa	xmm5,xmm7
+	movdqa	xmm7,xmm6
+	call	_vpaes_schedule_low_round
+	movdqa	xmm7,xmm5
+
+	jmp	NEAR $L$oop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+$L$schedule_mangle_last:
+
+	lea	r11,[$L$k_deskew]
+	test	rcx,rcx
+	jnz	NEAR $L$schedule_mangle_last_dec
+
+
+	movdqa	xmm1,XMMWORD[r10*1+r8]
+DB	102,15,56,0,193
+	lea	r11,[$L$k_opt]
+	add	rdx,32
+
+$L$schedule_mangle_last_dec:
+	add	rdx,-16
+	pxor	xmm0,XMMWORD[$L$k_s63]
+	call	_vpaes_schedule_transform
+	movdqu	XMMWORD[rdx],xmm0
+
+
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_schedule_192_smear:
+	pshufd	xmm1,xmm6,0x80
+	pshufd	xmm0,xmm7,0xFE
+	pxor	xmm6,xmm1
+	pxor	xmm1,xmm1
+	pxor	xmm6,xmm0
+	movdqa	xmm0,xmm6
+	movhlps	xmm6,xmm1
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_schedule_round:
+
+	pxor	xmm1,xmm1
+DB	102,65,15,58,15,200,15
+DB	102,69,15,58,15,192,15
+	pxor	xmm7,xmm1
+
+
+	pshufd	xmm0,xmm0,0xFF
+DB	102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round:
+
+	movdqa	xmm1,xmm7
+	pslldq	xmm7,4
+	pxor	xmm7,xmm1
+	movdqa	xmm1,xmm7
+	pslldq	xmm7,8
+	pxor	xmm7,xmm1
+	pxor	xmm7,XMMWORD[$L$k_s63]
+
+
+	movdqa	xmm1,xmm9
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm9
+	movdqa	xmm2,xmm11
+DB	102,15,56,0,208
+	pxor	xmm0,xmm1
+	movdqa	xmm3,xmm10
+DB	102,15,56,0,217
+	pxor	xmm3,xmm2
+	movdqa	xmm4,xmm10
+DB	102,15,56,0,224
+	pxor	xmm4,xmm2
+	movdqa	xmm2,xmm10
+DB	102,15,56,0,211
+	pxor	xmm2,xmm0
+	movdqa	xmm3,xmm10
+DB	102,15,56,0,220
+	pxor	xmm3,xmm1
+	movdqa	xmm4,xmm13
+DB	102,15,56,0,226
+	movdqa	xmm0,xmm12
+DB	102,15,56,0,195
+	pxor	xmm0,xmm4
+
+
+	pxor	xmm0,xmm7
+	movdqa	xmm7,xmm0
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_schedule_transform:
+	movdqa	xmm1,xmm9
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm9
+	movdqa	xmm2,XMMWORD[r11]
+DB	102,15,56,0,208
+	movdqa	xmm0,XMMWORD[16+r11]
+DB	102,15,56,0,193
+	pxor	xmm0,xmm2
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_schedule_mangle:
+	movdqa	xmm4,xmm0
+	movdqa	xmm5,XMMWORD[$L$k_mc_forward]
+	test	rcx,rcx
+	jnz	NEAR $L$schedule_mangle_dec
+
+
+	add	rdx,16
+	pxor	xmm4,XMMWORD[$L$k_s63]
+DB	102,15,56,0,229
+	movdqa	xmm3,xmm4
+DB	102,15,56,0,229
+	pxor	xmm3,xmm4
+DB	102,15,56,0,229
+	pxor	xmm3,xmm4
+
+	jmp	NEAR $L$schedule_mangle_both
+ALIGN	16
+$L$schedule_mangle_dec:
+
+	lea	r11,[$L$k_dksd]
+	movdqa	xmm1,xmm9
+	pandn	xmm1,xmm4
+	psrld	xmm1,4
+	pand	xmm4,xmm9
+
+	movdqa	xmm2,XMMWORD[r11]
+DB	102,15,56,0,212
+	movdqa	xmm3,XMMWORD[16+r11]
+DB	102,15,56,0,217
+	pxor	xmm3,xmm2
+DB	102,15,56,0,221
+
+	movdqa	xmm2,XMMWORD[32+r11]
+DB	102,15,56,0,212
+	pxor	xmm2,xmm3
+	movdqa	xmm3,XMMWORD[48+r11]
+DB	102,15,56,0,217
+	pxor	xmm3,xmm2
+DB	102,15,56,0,221
+
+	movdqa	xmm2,XMMWORD[64+r11]
+DB	102,15,56,0,212
+	pxor	xmm2,xmm3
+	movdqa	xmm3,XMMWORD[80+r11]
+DB	102,15,56,0,217
+	pxor	xmm3,xmm2
+DB	102,15,56,0,221
+
+	movdqa	xmm2,XMMWORD[96+r11]
+DB	102,15,56,0,212
+	pxor	xmm2,xmm3
+	movdqa	xmm3,XMMWORD[112+r11]
+DB	102,15,56,0,217
+	pxor	xmm3,xmm2
+
+	add	rdx,-16
+
+$L$schedule_mangle_both:
+	movdqa	xmm1,XMMWORD[r10*1+r8]
+DB	102,15,56,0,217
+	add	r8,-16
+	and	r8,0x30
+	movdqu	XMMWORD[rdx],xmm3
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+global	vpaes_set_encrypt_key
+
+ALIGN	16
+vpaes_set_encrypt_key:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_set_encrypt_key:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$enc_key_body:
+	mov	eax,esi
+	shr	eax,5
+	add	eax,5
+	mov	DWORD[240+rdx],eax
+
+	mov	ecx,0
+	mov	r8d,0x30
+	call	_vpaes_schedule_core
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$enc_key_epilogue:
+	xor	eax,eax
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_vpaes_set_encrypt_key:
+
+global	vpaes_set_decrypt_key
+
+ALIGN	16
+vpaes_set_decrypt_key:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_set_decrypt_key:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$dec_key_body:
+	mov	eax,esi
+	shr	eax,5
+	add	eax,5
+	mov	DWORD[240+rdx],eax
+	shl	eax,4
+	lea	rdx,[16+rax*1+rdx]
+
+	mov	ecx,1
+	mov	r8d,esi
+	shr	r8d,1
+	and	r8d,32
+	xor	r8d,32
+	call	_vpaes_schedule_core
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$dec_key_epilogue:
+	xor	eax,eax
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_vpaes_set_decrypt_key:
+
+global	vpaes_encrypt
+
+ALIGN	16
+vpaes_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$enc_body:
+	movdqu	xmm0,XMMWORD[rdi]
+	call	_vpaes_preheat
+	call	_vpaes_encrypt_core
+	movdqu	XMMWORD[rsi],xmm0
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$enc_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_vpaes_encrypt:
+
+global	vpaes_decrypt
+
+ALIGN	16
+vpaes_decrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_decrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$dec_body:
+	movdqu	xmm0,XMMWORD[rdi]
+	call	_vpaes_preheat
+	call	_vpaes_decrypt_core
+	movdqu	XMMWORD[rsi],xmm0
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$dec_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_vpaes_decrypt:
+global	vpaes_cbc_encrypt
+
+ALIGN	16
+vpaes_cbc_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_cbc_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+	xchg	rdx,rcx
+	sub	rcx,16
+	jc	NEAR $L$cbc_abort
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$cbc_body:
+	movdqu	xmm6,XMMWORD[r8]
+	sub	rsi,rdi
+	call	_vpaes_preheat
+	cmp	r9d,0
+	je	NEAR $L$cbc_dec_loop
+	jmp	NEAR $L$cbc_enc_loop
+ALIGN	16
+$L$cbc_enc_loop:
+	movdqu	xmm0,XMMWORD[rdi]
+	pxor	xmm0,xmm6
+	call	_vpaes_encrypt_core
+	movdqa	xmm6,xmm0
+	movdqu	XMMWORD[rdi*1+rsi],xmm0
+	lea	rdi,[16+rdi]
+	sub	rcx,16
+	jnc	NEAR $L$cbc_enc_loop
+	jmp	NEAR $L$cbc_done
+ALIGN	16
+$L$cbc_dec_loop:
+	movdqu	xmm0,XMMWORD[rdi]
+	movdqa	xmm7,xmm0
+	call	_vpaes_decrypt_core
+	pxor	xmm0,xmm6
+	movdqa	xmm6,xmm7
+	movdqu	XMMWORD[rdi*1+rsi],xmm0
+	lea	rdi,[16+rdi]
+	sub	rcx,16
+	jnc	NEAR $L$cbc_dec_loop
+$L$cbc_done:
+	movdqu	XMMWORD[r8],xmm6
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$cbc_epilogue:
+$L$cbc_abort:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_vpaes_cbc_encrypt:
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_preheat:
+	lea	r10,[$L$k_s0F]
+	movdqa	xmm10,XMMWORD[((-32))+r10]
+	movdqa	xmm11,XMMWORD[((-16))+r10]
+	movdqa	xmm9,XMMWORD[r10]
+	movdqa	xmm13,XMMWORD[48+r10]
+	movdqa	xmm12,XMMWORD[64+r10]
+	movdqa	xmm15,XMMWORD[80+r10]
+	movdqa	xmm14,XMMWORD[96+r10]
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+ALIGN	64
+_vpaes_consts:
+$L$k_inv:
+	DQ	0x0E05060F0D080180,0x040703090A0B0C02
+	DQ	0x01040A060F0B0780,0x030D0E0C02050809
+
+$L$k_s0F:
+	DQ	0x0F0F0F0F0F0F0F0F,0x0F0F0F0F0F0F0F0F
+
+$L$k_ipt:
+	DQ	0xC2B2E8985A2A7000,0xCABAE09052227808
+	DQ	0x4C01307D317C4D00,0xCD80B1FCB0FDCC81
+
+$L$k_sb1:
+	DQ	0xB19BE18FCB503E00,0xA5DF7A6E142AF544
+	DQ	0x3618D415FAE22300,0x3BF7CCC10D2ED9EF
+$L$k_sb2:
+	DQ	0xE27A93C60B712400,0x5EB7E955BC982FCD
+	DQ	0x69EB88400AE12900,0xC2A163C8AB82234A
+$L$k_sbo:
+	DQ	0xD0D26D176FBDC700,0x15AABF7AC502A878
+	DQ	0xCFE474A55FBB6A00,0x8E1E90D1412B35FA
+
+$L$k_mc_forward:
+	DQ	0x0407060500030201,0x0C0F0E0D080B0A09
+	DQ	0x080B0A0904070605,0x000302010C0F0E0D
+	DQ	0x0C0F0E0D080B0A09,0x0407060500030201
+	DQ	0x000302010C0F0E0D,0x080B0A0904070605
+
+$L$k_mc_backward:
+	DQ	0x0605040702010003,0x0E0D0C0F0A09080B
+	DQ	0x020100030E0D0C0F,0x0A09080B06050407
+	DQ	0x0E0D0C0F0A09080B,0x0605040702010003
+	DQ	0x0A09080B06050407,0x020100030E0D0C0F
+
+$L$k_sr:
+	DQ	0x0706050403020100,0x0F0E0D0C0B0A0908
+	DQ	0x030E09040F0A0500,0x0B06010C07020D08
+	DQ	0x0F060D040B020900,0x070E050C030A0108
+	DQ	0x0B0E0104070A0D00,0x0306090C0F020508
+
+$L$k_rcon:
+	DQ	0x1F8391B9AF9DEEB6,0x702A98084D7C7D81
+
+$L$k_s63:
+	DQ	0x5B5B5B5B5B5B5B5B,0x5B5B5B5B5B5B5B5B
+
+$L$k_opt:
+	DQ	0xFF9F4929D6B66000,0xF7974121DEBE6808
+	DQ	0x01EDBD5150BCEC00,0xE10D5DB1B05C0CE0
+
+$L$k_deskew:
+	DQ	0x07E4A34047A4E300,0x1DFEB95A5DBEF91A
+	DQ	0x5F36B5DC83EA6900,0x2841C2ABF49D1E77
+
+
+
+
+
+$L$k_dksd:
+	DQ	0xFEB91A5DA3E44700,0x0740E3A45A1DBEF9
+	DQ	0x41C277F4B5368300,0x5FDC69EAAB289D1E
+$L$k_dksb:
+	DQ	0x9A4FCA1F8550D500,0x03D653861CC94C99
+	DQ	0x115BEDA7B6FC4A00,0xD993256F7E3482C8
+$L$k_dkse:
+	DQ	0xD5031CCA1FC9D600,0x53859A4C994F5086
+	DQ	0xA23196054FDC7BE8,0xCD5EF96A20B31487
+$L$k_dks9:
+	DQ	0xB6116FC87ED9A700,0x4AED933482255BFC
+	DQ	0x4576516227143300,0x8BB89FACE9DAFDCE
+
+
+
+
+
+$L$k_dipt:
+	DQ	0x0F505B040B545F00,0x154A411E114E451A
+	DQ	0x86E383E660056500,0x12771772F491F194
+
+$L$k_dsb9:
+	DQ	0x851C03539A86D600,0xCAD51F504F994CC9
+	DQ	0xC03B1789ECD74900,0x725E2C9EB2FBA565
+$L$k_dsbd:
+	DQ	0x7D57CCDFE6B1A200,0xF56E9B13882A4439
+	DQ	0x3CE2FAF724C6CB00,0x2931180D15DEEFD3
+$L$k_dsbb:
+	DQ	0xD022649296B44200,0x602646F6B0F2D404
+	DQ	0xC19498A6CD596700,0xF3FF0C3E3255AA6B
+$L$k_dsbe:
+	DQ	0x46F2929626D4D000,0x2242600464B4F6B0
+	DQ	0x0C55A6CDFFAAC100,0x9467F36B98593E32
+$L$k_dsbo:
+	DQ	0x1387EA537EF94000,0xC7AA6DB9D4943E2D
+	DQ	0x12D7560F93441D00,0xCA4B8159D8C58E9C
+DB	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+DB	111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54
+DB	52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97
+DB	109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32
+DB	85,110,105,118,101,114,115,105,116,121,41,0
+ALIGN	64
+
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+
+	lea	rsi,[16+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+	lea	rax,[184+rax]
+
+$L$in_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_vpaes_set_encrypt_key wrt ..imagebase
+	DD	$L$SEH_end_vpaes_set_encrypt_key wrt ..imagebase
+	DD	$L$SEH_info_vpaes_set_encrypt_key wrt ..imagebase
+
+	DD	$L$SEH_begin_vpaes_set_decrypt_key wrt ..imagebase
+	DD	$L$SEH_end_vpaes_set_decrypt_key wrt ..imagebase
+	DD	$L$SEH_info_vpaes_set_decrypt_key wrt ..imagebase
+
+	DD	$L$SEH_begin_vpaes_encrypt wrt ..imagebase
+	DD	$L$SEH_end_vpaes_encrypt wrt ..imagebase
+	DD	$L$SEH_info_vpaes_encrypt wrt ..imagebase
+
+	DD	$L$SEH_begin_vpaes_decrypt wrt ..imagebase
+	DD	$L$SEH_end_vpaes_decrypt wrt ..imagebase
+	DD	$L$SEH_info_vpaes_decrypt wrt ..imagebase
+
+	DD	$L$SEH_begin_vpaes_cbc_encrypt wrt ..imagebase
+	DD	$L$SEH_end_vpaes_cbc_encrypt wrt ..imagebase
+	DD	$L$SEH_info_vpaes_cbc_encrypt wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_vpaes_set_encrypt_key:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$enc_key_body wrt ..imagebase,$L$enc_key_epilogue wrt ..imagebase
+$L$SEH_info_vpaes_set_decrypt_key:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$dec_key_body wrt ..imagebase,$L$dec_key_epilogue wrt ..imagebase
+$L$SEH_info_vpaes_encrypt:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$enc_body wrt ..imagebase,$L$enc_epilogue wrt ..imagebase
+$L$SEH_info_vpaes_decrypt:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$dec_body wrt ..imagebase,$L$dec_epilogue wrt ..imagebase
+$L$SEH_info_vpaes_cbc_encrypt:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$cbc_body wrt ..imagebase,$L$cbc_epilogue wrt ..imagebase
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont.asm
new file mode 100644
index 0000000..dd93341
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont.asm
@@ -0,0 +1,1072 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+EXTERN	OPENSSL_ia32cap_P
+
+global	bn_mul_mont
+
+ALIGN	16
+bn_mul_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_mul_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	r9d,r9d
+	mov	rax,rsp
+
+	test	r9d,3
+	jnz	NEAR $L$mul_enter
+	cmp	r9d,8
+	jb	NEAR $L$mul_enter
+	cmp	rdx,rsi
+	jne	NEAR $L$mul4x_enter
+	test	r9d,7
+	jz	NEAR $L$sqr8x_enter
+	jmp	NEAR $L$mul4x_enter
+
+ALIGN	16
+$L$mul_enter:
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+
+	neg	r9
+	mov	r11,rsp
+	lea	r10,[((-16))+r9*8+rsp]
+	neg	r9
+	and	r10,-1024
+
+
+
+
+
+
+
+
+
+	sub	r11,r10
+	and	r11,-4096
+	lea	rsp,[r11*1+r10]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul_page_walk
+	jmp	NEAR $L$mul_page_walk_done
+
+ALIGN	16
+$L$mul_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul_page_walk
+$L$mul_page_walk_done:
+
+	mov	QWORD[8+r9*8+rsp],rax
+
+$L$mul_body:
+	mov	r12,rdx
+	mov	r8,QWORD[r8]
+	mov	rbx,QWORD[r12]
+	mov	rax,QWORD[rsi]
+
+	xor	r14,r14
+	xor	r15,r15
+
+	mov	rbp,r8
+	mul	rbx
+	mov	r10,rax
+	mov	rax,QWORD[rcx]
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	r13,rdx
+
+	lea	r15,[1+r15]
+	jmp	NEAR $L$1st_enter
+
+ALIGN	16
+$L$1st:
+	add	r13,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	r13,r11
+	mov	r11,r10
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+
+$L$1st_enter:
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	lea	r15,[1+r15]
+	mov	r10,rdx
+
+	mul	rbp
+	cmp	r15,r9
+	jne	NEAR $L$1st
+
+	add	r13,rax
+	mov	rax,QWORD[rsi]
+	adc	rdx,0
+	add	r13,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+	mov	r11,r10
+
+	xor	rdx,rdx
+	add	r13,r11
+	adc	rdx,0
+	mov	QWORD[((-8))+r9*8+rsp],r13
+	mov	QWORD[r9*8+rsp],rdx
+
+	lea	r14,[1+r14]
+	jmp	NEAR $L$outer
+ALIGN	16
+$L$outer:
+	mov	rbx,QWORD[r14*8+r12]
+	xor	r15,r15
+	mov	rbp,r8
+	mov	r10,QWORD[rsp]
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	r10,QWORD[8+rsp]
+	mov	r13,rdx
+
+	lea	r15,[1+r15]
+	jmp	NEAR $L$inner_enter
+
+ALIGN	16
+$L$inner:
+	add	r13,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	mov	r10,QWORD[r15*8+rsp]
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+
+$L$inner_enter:
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	add	r10,r11
+	mov	r11,rdx
+	adc	r11,0
+	lea	r15,[1+r15]
+
+	mul	rbp
+	cmp	r15,r9
+	jne	NEAR $L$inner
+
+	add	r13,rax
+	mov	rax,QWORD[rsi]
+	adc	rdx,0
+	add	r13,r10
+	mov	r10,QWORD[r15*8+rsp]
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+
+	xor	rdx,rdx
+	add	r13,r11
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-8))+r9*8+rsp],r13
+	mov	QWORD[r9*8+rsp],rdx
+
+	lea	r14,[1+r14]
+	cmp	r14,r9
+	jb	NEAR $L$outer
+
+	xor	r14,r14
+	mov	rax,QWORD[rsp]
+	lea	rsi,[rsp]
+	mov	r15,r9
+	jmp	NEAR $L$sub
+ALIGN	16
+$L$sub:
+	sbb	rax,QWORD[r14*8+rcx]
+	mov	QWORD[r14*8+rdi],rax
+	mov	rax,QWORD[8+r14*8+rsi]
+	lea	r14,[1+r14]
+	dec	r15
+	jnz	NEAR $L$sub
+
+	sbb	rax,0
+	xor	r14,r14
+	and	rsi,rax
+	not	rax
+	mov	rcx,rdi
+	and	rcx,rax
+	mov	r15,r9
+	or	rsi,rcx
+ALIGN	16
+$L$copy:
+	mov	rax,QWORD[r14*8+rsi]
+	mov	QWORD[r14*8+rsp],r14
+	mov	QWORD[r14*8+rdi],rax
+	lea	r14,[1+r14]
+	sub	r15,1
+	jnz	NEAR $L$copy
+
+	mov	rsi,QWORD[8+r9*8+rsp]
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$mul_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_mul_mont:
+
+ALIGN	16
+bn_mul4x_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_mul4x_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	r9d,r9d
+	mov	rax,rsp
+
+$L$mul4x_enter:
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+
+	neg	r9
+	mov	r11,rsp
+	lea	r10,[((-32))+r9*8+rsp]
+	neg	r9
+	and	r10,-1024
+
+	sub	r11,r10
+	and	r11,-4096
+	lea	rsp,[r11*1+r10]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul4x_page_walk
+	jmp	NEAR $L$mul4x_page_walk_done
+
+$L$mul4x_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul4x_page_walk
+$L$mul4x_page_walk_done:
+
+	mov	QWORD[8+r9*8+rsp],rax
+
+$L$mul4x_body:
+	mov	QWORD[16+r9*8+rsp],rdi
+	mov	r12,rdx
+	mov	r8,QWORD[r8]
+	mov	rbx,QWORD[r12]
+	mov	rax,QWORD[rsi]
+
+	xor	r14,r14
+	xor	r15,r15
+
+	mov	rbp,r8
+	mul	rbx
+	mov	r10,rax
+	mov	rax,QWORD[rcx]
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	r15,[4+r15]
+	adc	rdx,0
+	mov	QWORD[rsp],rdi
+	mov	r13,rdx
+	jmp	NEAR $L$1st4x
+ALIGN	16
+$L$1st4x:
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+r15*8+rcx]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+r15*8+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],rdi
+	mov	r13,rdx
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[8+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-8))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+r15*8+rcx]
+	adc	rdx,0
+	lea	r15,[4+r15]
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[((-16))+r15*8+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-32))+r15*8+rsp],rdi
+	mov	r13,rdx
+	cmp	r15,r9
+	jb	NEAR $L$1st4x
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+r15*8+rcx]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+r15*8+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],rdi
+	mov	r13,rdx
+
+	xor	rdi,rdi
+	add	r13,r10
+	adc	rdi,0
+	mov	QWORD[((-8))+r15*8+rsp],r13
+	mov	QWORD[r15*8+rsp],rdi
+
+	lea	r14,[1+r14]
+ALIGN	4
+$L$outer4x:
+	mov	rbx,QWORD[r14*8+r12]
+	xor	r15,r15
+	mov	r10,QWORD[rsp]
+	mov	rbp,r8
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[8+rsp]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	r15,[4+r15]
+	adc	rdx,0
+	mov	QWORD[rsp],rdi
+	mov	r13,rdx
+	jmp	NEAR $L$inner4x
+ALIGN	16
+$L$inner4x:
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+r15*8+rcx]
+	adc	rdx,0
+	add	r10,QWORD[((-16))+r15*8+rsp]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+r15*8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[((-8))+r15*8+rsp]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],rdi
+	mov	r13,rdx
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	add	r10,QWORD[r15*8+rsp]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[8+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-8))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+r15*8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[8+r15*8+rsp]
+	adc	rdx,0
+	lea	r15,[4+r15]
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[((-16))+r15*8+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-32))+r15*8+rsp],rdi
+	mov	r13,rdx
+	cmp	r15,r9
+	jb	NEAR $L$inner4x
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+r15*8+rcx]
+	adc	rdx,0
+	add	r10,QWORD[((-16))+r15*8+rsp]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+r15*8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[((-8))+r15*8+rsp]
+	adc	rdx,0
+	lea	r14,[1+r14]
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],rdi
+	mov	r13,rdx
+
+	xor	rdi,rdi
+	add	r13,r10
+	adc	rdi,0
+	add	r13,QWORD[r9*8+rsp]
+	adc	rdi,0
+	mov	QWORD[((-8))+r15*8+rsp],r13
+	mov	QWORD[r15*8+rsp],rdi
+
+	cmp	r14,r9
+	jb	NEAR $L$outer4x
+	mov	rdi,QWORD[16+r9*8+rsp]
+	lea	r15,[((-4))+r9]
+	mov	rax,QWORD[rsp]
+	pxor	xmm0,xmm0
+	mov	rdx,QWORD[8+rsp]
+	shr	r15,2
+	lea	rsi,[rsp]
+	xor	r14,r14
+
+	sub	rax,QWORD[rcx]
+	mov	rbx,QWORD[16+rsi]
+	mov	rbp,QWORD[24+rsi]
+	sbb	rdx,QWORD[8+rcx]
+	jmp	NEAR $L$sub4x
+ALIGN	16
+$L$sub4x:
+	mov	QWORD[r14*8+rdi],rax
+	mov	QWORD[8+r14*8+rdi],rdx
+	sbb	rbx,QWORD[16+r14*8+rcx]
+	mov	rax,QWORD[32+r14*8+rsi]
+	mov	rdx,QWORD[40+r14*8+rsi]
+	sbb	rbp,QWORD[24+r14*8+rcx]
+	mov	QWORD[16+r14*8+rdi],rbx
+	mov	QWORD[24+r14*8+rdi],rbp
+	sbb	rax,QWORD[32+r14*8+rcx]
+	mov	rbx,QWORD[48+r14*8+rsi]
+	mov	rbp,QWORD[56+r14*8+rsi]
+	sbb	rdx,QWORD[40+r14*8+rcx]
+	lea	r14,[4+r14]
+	dec	r15
+	jnz	NEAR $L$sub4x
+
+	mov	QWORD[r14*8+rdi],rax
+	mov	rax,QWORD[32+r14*8+rsi]
+	sbb	rbx,QWORD[16+r14*8+rcx]
+	mov	QWORD[8+r14*8+rdi],rdx
+	sbb	rbp,QWORD[24+r14*8+rcx]
+	mov	QWORD[16+r14*8+rdi],rbx
+
+	sbb	rax,0
+	mov	QWORD[24+r14*8+rdi],rbp
+	xor	r14,r14
+	and	rsi,rax
+	not	rax
+	mov	rcx,rdi
+	and	rcx,rax
+	lea	r15,[((-4))+r9]
+	or	rsi,rcx
+	shr	r15,2
+
+	movdqu	xmm1,XMMWORD[rsi]
+	movdqa	XMMWORD[rsp],xmm0
+	movdqu	XMMWORD[rdi],xmm1
+	jmp	NEAR $L$copy4x
+ALIGN	16
+$L$copy4x:
+	movdqu	xmm2,XMMWORD[16+r14*1+rsi]
+	movdqu	xmm1,XMMWORD[32+r14*1+rsi]
+	movdqa	XMMWORD[16+r14*1+rsp],xmm0
+	movdqu	XMMWORD[16+r14*1+rdi],xmm2
+	movdqa	XMMWORD[32+r14*1+rsp],xmm0
+	movdqu	XMMWORD[32+r14*1+rdi],xmm1
+	lea	r14,[32+r14]
+	dec	r15
+	jnz	NEAR $L$copy4x
+
+	movdqu	xmm2,XMMWORD[16+r14*1+rsi]
+	movdqa	XMMWORD[16+r14*1+rsp],xmm0
+	movdqu	XMMWORD[16+r14*1+rdi],xmm2
+	mov	rsi,QWORD[8+r9*8+rsp]
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$mul4x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_mul4x_mont:
+EXTERN	bn_sqr8x_internal
+
+
+ALIGN	32
+bn_sqr8x_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_sqr8x_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	rax,rsp
+
+$L$sqr8x_enter:
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$sqr8x_prologue:
+
+	mov	r10d,r9d
+	shl	r9d,3
+	shl	r10,3+2
+	neg	r9
+
+
+
+
+
+
+	lea	r11,[((-64))+r9*2+rsp]
+	mov	rbp,rsp
+	mov	r8,QWORD[r8]
+	sub	r11,rsi
+	and	r11,4095
+	cmp	r10,r11
+	jb	NEAR $L$sqr8x_sp_alt
+	sub	rbp,r11
+	lea	rbp,[((-64))+r9*2+rbp]
+	jmp	NEAR $L$sqr8x_sp_done
+
+ALIGN	32
+$L$sqr8x_sp_alt:
+	lea	r10,[((4096-64))+r9*2]
+	lea	rbp,[((-64))+r9*2+rbp]
+	sub	r11,r10
+	mov	r10,0
+	cmovc	r11,r10
+	sub	rbp,r11
+$L$sqr8x_sp_done:
+	and	rbp,-64
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$sqr8x_page_walk
+	jmp	NEAR $L$sqr8x_page_walk_done
+
+ALIGN	16
+$L$sqr8x_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$sqr8x_page_walk
+$L$sqr8x_page_walk_done:
+
+	mov	r10,r9
+	neg	r9
+
+	mov	QWORD[32+rsp],r8
+	mov	QWORD[40+rsp],rax
+
+$L$sqr8x_body:
+
+DB	102,72,15,110,209
+	pxor	xmm0,xmm0
+DB	102,72,15,110,207
+DB	102,73,15,110,218
+	call	bn_sqr8x_internal
+
+
+
+
+	lea	rbx,[r9*1+rdi]
+	mov	rcx,r9
+	mov	rdx,r9
+DB	102,72,15,126,207
+	sar	rcx,3+2
+	jmp	NEAR $L$sqr8x_sub
+
+ALIGN	32
+$L$sqr8x_sub:
+	mov	r12,QWORD[rbx]
+	mov	r13,QWORD[8+rbx]
+	mov	r14,QWORD[16+rbx]
+	mov	r15,QWORD[24+rbx]
+	lea	rbx,[32+rbx]
+	sbb	r12,QWORD[rbp]
+	sbb	r13,QWORD[8+rbp]
+	sbb	r14,QWORD[16+rbp]
+	sbb	r15,QWORD[24+rbp]
+	lea	rbp,[32+rbp]
+	mov	QWORD[rdi],r12
+	mov	QWORD[8+rdi],r13
+	mov	QWORD[16+rdi],r14
+	mov	QWORD[24+rdi],r15
+	lea	rdi,[32+rdi]
+	inc	rcx
+	jnz	NEAR $L$sqr8x_sub
+
+	sbb	rax,0
+	lea	rbx,[r9*1+rbx]
+	lea	rdi,[r9*1+rdi]
+
+DB	102,72,15,110,200
+	pxor	xmm0,xmm0
+	pshufd	xmm1,xmm1,0
+	mov	rsi,QWORD[40+rsp]
+
+	jmp	NEAR $L$sqr8x_cond_copy
+
+ALIGN	32
+$L$sqr8x_cond_copy:
+	movdqa	xmm2,XMMWORD[rbx]
+	movdqa	xmm3,XMMWORD[16+rbx]
+	lea	rbx,[32+rbx]
+	movdqu	xmm4,XMMWORD[rdi]
+	movdqu	xmm5,XMMWORD[16+rdi]
+	lea	rdi,[32+rdi]
+	movdqa	XMMWORD[(-32)+rbx],xmm0
+	movdqa	XMMWORD[(-16)+rbx],xmm0
+	movdqa	XMMWORD[(-32)+rdx*1+rbx],xmm0
+	movdqa	XMMWORD[(-16)+rdx*1+rbx],xmm0
+	pcmpeqd	xmm0,xmm1
+	pand	xmm2,xmm1
+	pand	xmm3,xmm1
+	pand	xmm4,xmm0
+	pand	xmm5,xmm0
+	pxor	xmm0,xmm0
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqu	XMMWORD[(-32)+rdi],xmm4
+	movdqu	XMMWORD[(-16)+rdi],xmm5
+	add	r9,32
+	jnz	NEAR $L$sqr8x_cond_copy
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$sqr8x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_sqr8x_mont:
+DB	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+DB	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
+DB	54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
+DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+DB	115,108,46,111,114,103,62,0
+ALIGN	16
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+mul_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	r10,QWORD[192+r8]
+	mov	rax,QWORD[8+r10*8+rax]
+
+	jmp	NEAR $L$common_pop_regs
+
+
+
+ALIGN	16
+sqr_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_pop_regs
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[8+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[40+rax]
+
+$L$common_pop_regs:
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_bn_mul_mont wrt ..imagebase
+	DD	$L$SEH_end_bn_mul_mont wrt ..imagebase
+	DD	$L$SEH_info_bn_mul_mont wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_mul4x_mont wrt ..imagebase
+	DD	$L$SEH_end_bn_mul4x_mont wrt ..imagebase
+	DD	$L$SEH_info_bn_mul4x_mont wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_sqr8x_mont wrt ..imagebase
+	DD	$L$SEH_end_bn_sqr8x_mont wrt ..imagebase
+	DD	$L$SEH_info_bn_sqr8x_mont wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_bn_mul_mont:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
+$L$SEH_info_bn_mul4x_mont:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
+$L$SEH_info_bn_sqr8x_mont:
+DB	9,0,0,0
+	DD	sqr_handler wrt ..imagebase
+	DD	$L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase
+ALIGN	8
diff --git a/third_party/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm b/third_party/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
new file mode 100644
index 0000000..1bcbc5d
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
@@ -0,0 +1,2601 @@
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section	.text code align=64
+
+
+EXTERN	OPENSSL_ia32cap_P
+
+global	bn_mul_mont_gather5
+
+ALIGN	64
+bn_mul_mont_gather5:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_mul_mont_gather5:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	r9d,r9d
+	mov	rax,rsp
+
+	test	r9d,7
+	jnz	NEAR $L$mul_enter
+	jmp	NEAR $L$mul4x_enter
+
+ALIGN	16
+$L$mul_enter:
+	movd	xmm5,DWORD[56+rsp]
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+
+	neg	r9
+	mov	r11,rsp
+	lea	r10,[((-280))+r9*8+rsp]
+	neg	r9
+	and	r10,-1024
+
+
+
+
+
+
+
+
+
+	sub	r11,r10
+	and	r11,-4096
+	lea	rsp,[r11*1+r10]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul_page_walk
+	jmp	NEAR $L$mul_page_walk_done
+
+$L$mul_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul_page_walk
+$L$mul_page_walk_done:
+
+	lea	r10,[$L$inc]
+	mov	QWORD[8+r9*8+rsp],rax
+
+$L$mul_body:
+
+	lea	r12,[128+rdx]
+	movdqa	xmm0,XMMWORD[r10]
+	movdqa	xmm1,XMMWORD[16+r10]
+	lea	r10,[((24-112))+r9*8+rsp]
+	and	r10,-16
+
+	pshufd	xmm5,xmm5,0
+	movdqa	xmm4,xmm1
+	movdqa	xmm2,xmm1
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+DB	0x67
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[112+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[128+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[144+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[160+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[176+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[192+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[208+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[224+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[240+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[256+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[272+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[288+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[304+r10],xmm0
+
+	paddd	xmm3,xmm2
+DB	0x67
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[320+r10],xmm1
+
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[336+r10],xmm2
+	pand	xmm0,XMMWORD[64+r12]
+
+	pand	xmm1,XMMWORD[80+r12]
+	pand	xmm2,XMMWORD[96+r12]
+	movdqa	XMMWORD[352+r10],xmm3
+	pand	xmm3,XMMWORD[112+r12]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[((-128))+r12]
+	movdqa	xmm5,XMMWORD[((-112))+r12]
+	movdqa	xmm2,XMMWORD[((-96))+r12]
+	pand	xmm4,XMMWORD[112+r10]
+	movdqa	xmm3,XMMWORD[((-80))+r12]
+	pand	xmm5,XMMWORD[128+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[144+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[160+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[((-64))+r12]
+	movdqa	xmm5,XMMWORD[((-48))+r12]
+	movdqa	xmm2,XMMWORD[((-32))+r12]
+	pand	xmm4,XMMWORD[176+r10]
+	movdqa	xmm3,XMMWORD[((-16))+r12]
+	pand	xmm5,XMMWORD[192+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[208+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[224+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[r12]
+	movdqa	xmm5,XMMWORD[16+r12]
+	movdqa	xmm2,XMMWORD[32+r12]
+	pand	xmm4,XMMWORD[240+r10]
+	movdqa	xmm3,XMMWORD[48+r12]
+	pand	xmm5,XMMWORD[256+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[272+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[288+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	por	xmm0,xmm1
+	pshufd	xmm1,xmm0,0x4e
+	por	xmm0,xmm1
+	lea	r12,[256+r12]
+DB	102,72,15,126,195
+
+	mov	r8,QWORD[r8]
+	mov	rax,QWORD[rsi]
+
+	xor	r14,r14
+	xor	r15,r15
+
+	mov	rbp,r8
+	mul	rbx
+	mov	r10,rax
+	mov	rax,QWORD[rcx]
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	r13,rdx
+
+	lea	r15,[1+r15]
+	jmp	NEAR $L$1st_enter
+
+ALIGN	16
+$L$1st:
+	add	r13,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	r13,r11
+	mov	r11,r10
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+
+$L$1st_enter:
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	lea	r15,[1+r15]
+	mov	r10,rdx
+
+	mul	rbp
+	cmp	r15,r9
+	jne	NEAR $L$1st
+
+
+	add	r13,rax
+	adc	rdx,0
+	add	r13,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r9*8+rsp],r13
+	mov	r13,rdx
+	mov	r11,r10
+
+	xor	rdx,rdx
+	add	r13,r11
+	adc	rdx,0
+	mov	QWORD[((-8))+r9*8+rsp],r13
+	mov	QWORD[r9*8+rsp],rdx
+
+	lea	r14,[1+r14]
+	jmp	NEAR $L$outer
+ALIGN	16
+$L$outer:
+	lea	rdx,[((24+128))+r9*8+rsp]
+	and	rdx,-16
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movdqa	xmm0,XMMWORD[((-128))+r12]
+	movdqa	xmm1,XMMWORD[((-112))+r12]
+	movdqa	xmm2,XMMWORD[((-96))+r12]
+	movdqa	xmm3,XMMWORD[((-80))+r12]
+	pand	xmm0,XMMWORD[((-128))+rdx]
+	pand	xmm1,XMMWORD[((-112))+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-96))+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-80))+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[((-64))+r12]
+	movdqa	xmm1,XMMWORD[((-48))+r12]
+	movdqa	xmm2,XMMWORD[((-32))+r12]
+	movdqa	xmm3,XMMWORD[((-16))+r12]
+	pand	xmm0,XMMWORD[((-64))+rdx]
+	pand	xmm1,XMMWORD[((-48))+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-32))+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-16))+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[r12]
+	movdqa	xmm1,XMMWORD[16+r12]
+	movdqa	xmm2,XMMWORD[32+r12]
+	movdqa	xmm3,XMMWORD[48+r12]
+	pand	xmm0,XMMWORD[rdx]
+	pand	xmm1,XMMWORD[16+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[32+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[48+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[64+r12]
+	movdqa	xmm1,XMMWORD[80+r12]
+	movdqa	xmm2,XMMWORD[96+r12]
+	movdqa	xmm3,XMMWORD[112+r12]
+	pand	xmm0,XMMWORD[64+rdx]
+	pand	xmm1,XMMWORD[80+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[96+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[112+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	por	xmm4,xmm5
+	pshufd	xmm0,xmm4,0x4e
+	por	xmm0,xmm4
+	lea	r12,[256+r12]
+
+	mov	rax,QWORD[rsi]
+DB	102,72,15,126,195
+
+	xor	r15,r15
+	mov	rbp,r8
+	mov	r10,QWORD[rsp]
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	r10,QWORD[8+rsp]
+	mov	r13,rdx
+
+	lea	r15,[1+r15]
+	jmp	NEAR $L$inner_enter
+
+ALIGN	16
+$L$inner:
+	add	r13,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	mov	r10,QWORD[r15*8+rsp]
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+
+$L$inner_enter:
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	add	r10,r11
+	mov	r11,rdx
+	adc	r11,0
+	lea	r15,[1+r15]
+
+	mul	rbp
+	cmp	r15,r9
+	jne	NEAR $L$inner
+
+	add	r13,rax
+	adc	rdx,0
+	add	r13,r10
+	mov	r10,QWORD[r9*8+rsp]
+	adc	rdx,0
+	mov	QWORD[((-16))+r9*8+rsp],r13
+	mov	r13,rdx
+
+	xor	rdx,rdx
+	add	r13,r11
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-8))+r9*8+rsp],r13
+	mov	QWORD[r9*8+rsp],rdx
+
+	lea	r14,[1+r14]
+	cmp	r14,r9
+	jb	NEAR $L$outer
+
+	xor	r14,r14
+	mov	rax,QWORD[rsp]
+	lea	rsi,[rsp]
+	mov	r15,r9
+	jmp	NEAR $L$sub
+ALIGN	16
+$L$sub:
+	sbb	rax,QWORD[r14*8+rcx]
+	mov	QWORD[r14*8+rdi],rax
+	mov	rax,QWORD[8+r14*8+rsi]
+	lea	r14,[1+r14]
+	dec	r15
+	jnz	NEAR $L$sub
+
+	sbb	rax,0
+	xor	r14,r14
+	and	rsi,rax
+	not	rax
+	mov	rcx,rdi
+	and	rcx,rax
+	mov	r15,r9
+	or	rsi,rcx
+ALIGN	16
+$L$copy:
+	mov	rax,QWORD[r14*8+rsi]
+	mov	QWORD[r14*8+rsp],r14
+	mov	QWORD[r14*8+rdi],rax
+	lea	r14,[1+r14]
+	sub	r15,1
+	jnz	NEAR $L$copy
+
+	mov	rsi,QWORD[8+r9*8+rsp]
+
+	mov	rax,1
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$mul_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_mul_mont_gather5:
+
+ALIGN	32
+bn_mul4x_mont_gather5:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_mul4x_mont_gather5:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+DB	0x67
+	mov	rax,rsp
+
+$L$mul4x_enter:
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$mul4x_prologue:
+
+DB	0x67
+	shl	r9d,3
+	lea	r10,[r9*2+r9]
+	neg	r9
+
+
+
+
+
+
+
+
+
+
+	lea	r11,[((-320))+r9*2+rsp]
+	mov	rbp,rsp
+	sub	r11,rdi
+	and	r11,4095
+	cmp	r10,r11
+	jb	NEAR $L$mul4xsp_alt
+	sub	rbp,r11
+	lea	rbp,[((-320))+r9*2+rbp]
+	jmp	NEAR $L$mul4xsp_done
+
+ALIGN	32
+$L$mul4xsp_alt:
+	lea	r10,[((4096-320))+r9*2]
+	lea	rbp,[((-320))+r9*2+rbp]
+	sub	r11,r10
+	mov	r10,0
+	cmovc	r11,r10
+	sub	rbp,r11
+$L$mul4xsp_done:
+	and	rbp,-64
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$mul4x_page_walk
+	jmp	NEAR $L$mul4x_page_walk_done
+
+$L$mul4x_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$mul4x_page_walk
+$L$mul4x_page_walk_done:
+
+	neg	r9
+
+	mov	QWORD[40+rsp],rax
+
+$L$mul4x_body:
+
+	call	mul4x_internal
+
+	mov	rsi,QWORD[40+rsp]
+
+	mov	rax,1
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$mul4x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_mul4x_mont_gather5:
+
+
+ALIGN	32
+mul4x_internal:
+	shl	r9,5
+	movd	xmm5,DWORD[56+rax]
+	lea	rax,[$L$inc]
+	lea	r13,[128+r9*1+rdx]
+	shr	r9,5
+	movdqa	xmm0,XMMWORD[rax]
+	movdqa	xmm1,XMMWORD[16+rax]
+	lea	r10,[((88-112))+r9*1+rsp]
+	lea	r12,[128+rdx]
+
+	pshufd	xmm5,xmm5,0
+	movdqa	xmm4,xmm1
+DB	0x67,0x67
+	movdqa	xmm2,xmm1
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+DB	0x67
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[112+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[128+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[144+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[160+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[176+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[192+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[208+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[224+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[240+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[256+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[272+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[288+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[304+r10],xmm0
+
+	paddd	xmm3,xmm2
+DB	0x67
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[320+r10],xmm1
+
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[336+r10],xmm2
+	pand	xmm0,XMMWORD[64+r12]
+
+	pand	xmm1,XMMWORD[80+r12]
+	pand	xmm2,XMMWORD[96+r12]
+	movdqa	XMMWORD[352+r10],xmm3
+	pand	xmm3,XMMWORD[112+r12]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[((-128))+r12]
+	movdqa	xmm5,XMMWORD[((-112))+r12]
+	movdqa	xmm2,XMMWORD[((-96))+r12]
+	pand	xmm4,XMMWORD[112+r10]
+	movdqa	xmm3,XMMWORD[((-80))+r12]
+	pand	xmm5,XMMWORD[128+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[144+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[160+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[((-64))+r12]
+	movdqa	xmm5,XMMWORD[((-48))+r12]
+	movdqa	xmm2,XMMWORD[((-32))+r12]
+	pand	xmm4,XMMWORD[176+r10]
+	movdqa	xmm3,XMMWORD[((-16))+r12]
+	pand	xmm5,XMMWORD[192+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[208+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[224+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[r12]
+	movdqa	xmm5,XMMWORD[16+r12]
+	movdqa	xmm2,XMMWORD[32+r12]
+	pand	xmm4,XMMWORD[240+r10]
+	movdqa	xmm3,XMMWORD[48+r12]
+	pand	xmm5,XMMWORD[256+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[272+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[288+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	por	xmm0,xmm1
+	pshufd	xmm1,xmm0,0x4e
+	por	xmm0,xmm1
+	lea	r12,[256+r12]
+DB	102,72,15,126,195
+
+	mov	QWORD[((16+8))+rsp],r13
+	mov	QWORD[((56+8))+rsp],rdi
+
+	mov	r8,QWORD[r8]
+	mov	rax,QWORD[rsi]
+	lea	rsi,[r9*1+rsi]
+	neg	r9
+
+	mov	rbp,r8
+	mul	rbx
+	mov	r10,rax
+	mov	rax,QWORD[rcx]
+
+	imul	rbp,r10
+	lea	r14,[((64+8))+rsp]
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+r9*1+rsi]
+	adc	rdx,0
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+r9*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	r15,[32+r9]
+	lea	rcx,[32+rcx]
+	adc	rdx,0
+	mov	QWORD[r14],rdi
+	mov	r13,rdx
+	jmp	NEAR $L$1st4x
+
+ALIGN	32
+$L$1st4x:
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+rcx]
+	lea	r14,[32+r14]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*1+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r14],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r15*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r14],rdi
+	mov	r13,rdx
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[8+r15*1+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-8))+r14],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+r15*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	rcx,[32+rcx]
+	adc	rdx,0
+	mov	QWORD[r14],rdi
+	mov	r13,rdx
+
+	add	r15,32
+	jnz	NEAR $L$1st4x
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+rcx]
+	lea	r14,[32+r14]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r14],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r9*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r14],rdi
+	mov	r13,rdx
+
+	lea	rcx,[r9*1+rcx]
+
+	xor	rdi,rdi
+	add	r13,r10
+	adc	rdi,0
+	mov	QWORD[((-8))+r14],r13
+
+	jmp	NEAR $L$outer4x
+
+ALIGN	32
+$L$outer4x:
+	lea	rdx,[((16+128))+r14]
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movdqa	xmm0,XMMWORD[((-128))+r12]
+	movdqa	xmm1,XMMWORD[((-112))+r12]
+	movdqa	xmm2,XMMWORD[((-96))+r12]
+	movdqa	xmm3,XMMWORD[((-80))+r12]
+	pand	xmm0,XMMWORD[((-128))+rdx]
+	pand	xmm1,XMMWORD[((-112))+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-96))+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-80))+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[((-64))+r12]
+	movdqa	xmm1,XMMWORD[((-48))+r12]
+	movdqa	xmm2,XMMWORD[((-32))+r12]
+	movdqa	xmm3,XMMWORD[((-16))+r12]
+	pand	xmm0,XMMWORD[((-64))+rdx]
+	pand	xmm1,XMMWORD[((-48))+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-32))+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-16))+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[r12]
+	movdqa	xmm1,XMMWORD[16+r12]
+	movdqa	xmm2,XMMWORD[32+r12]
+	movdqa	xmm3,XMMWORD[48+r12]
+	pand	xmm0,XMMWORD[rdx]
+	pand	xmm1,XMMWORD[16+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[32+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[48+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[64+r12]
+	movdqa	xmm1,XMMWORD[80+r12]
+	movdqa	xmm2,XMMWORD[96+r12]
+	movdqa	xmm3,XMMWORD[112+r12]
+	pand	xmm0,XMMWORD[64+rdx]
+	pand	xmm1,XMMWORD[80+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[96+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[112+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	por	xmm4,xmm5
+	pshufd	xmm0,xmm4,0x4e
+	por	xmm0,xmm4
+	lea	r12,[256+r12]
+DB	102,72,15,126,195
+
+	mov	r10,QWORD[r9*1+r14]
+	mov	rbp,r8
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+
+	imul	rbp,r10
+	mov	r11,rdx
+	mov	QWORD[r14],rdi
+
+	lea	r14,[r9*1+r14]
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+r9*1+rsi]
+	adc	rdx,0
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[8+r14]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+r9*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	r15,[32+r9]
+	lea	rcx,[32+rcx]
+	adc	rdx,0
+	mov	r13,rdx
+	jmp	NEAR $L$inner4x
+
+ALIGN	32
+$L$inner4x:
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+rcx]
+	adc	rdx,0
+	add	r10,QWORD[16+r14]
+	lea	r14,[32+r14]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*1+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-32))+r14],rdi
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+rcx]
+	adc	rdx,0
+	add	r11,QWORD[((-8))+r14]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r15*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-24))+r14],r13
+	mov	r13,rdx
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+	add	r10,QWORD[r14]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[8+r15*1+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-16))+r14],rdi
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[8+r14]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+r15*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	rcx,[32+rcx]
+	adc	rdx,0
+	mov	QWORD[((-8))+r14],r13
+	mov	r13,rdx
+
+	add	r15,32
+	jnz	NEAR $L$inner4x
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+rcx]
+	adc	rdx,0
+	add	r10,QWORD[16+r14]
+	lea	r14,[32+r14]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-32))+r14],rdi
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,rbp
+	mov	rbp,QWORD[((-8))+rcx]
+	adc	rdx,0
+	add	r11,QWORD[((-8))+r14]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r9*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-24))+r14],r13
+	mov	r13,rdx
+
+	mov	QWORD[((-16))+r14],rdi
+	lea	rcx,[r9*1+rcx]
+
+	xor	rdi,rdi
+	add	r13,r10
+	adc	rdi,0
+	add	r13,QWORD[r14]
+	adc	rdi,0
+	mov	QWORD[((-8))+r14],r13
+
+	cmp	r12,QWORD[((16+8))+rsp]
+	jb	NEAR $L$outer4x
+	xor	rax,rax
+	sub	rbp,r13
+	adc	r15,r15
+	or	rdi,r15
+	sub	rax,rdi
+	lea	rbx,[r9*1+r14]
+	mov	r12,QWORD[rcx]
+	lea	rbp,[rcx]
+	mov	rcx,r9
+	sar	rcx,3+2
+	mov	rdi,QWORD[((56+8))+rsp]
+	dec	r12
+	xor	r10,r10
+	mov	r13,QWORD[8+rbp]
+	mov	r14,QWORD[16+rbp]
+	mov	r15,QWORD[24+rbp]
+	jmp	NEAR $L$sqr4x_sub_entry
+
+global	bn_power5
+
+ALIGN	32
+bn_power5:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_power5:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	rax,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$power5_prologue:
+
+	shl	r9d,3
+	lea	r10d,[r9*2+r9]
+	neg	r9
+	mov	r8,QWORD[r8]
+
+
+
+
+
+
+
+
+	lea	r11,[((-320))+r9*2+rsp]
+	mov	rbp,rsp
+	sub	r11,rdi
+	and	r11,4095
+	cmp	r10,r11
+	jb	NEAR $L$pwr_sp_alt
+	sub	rbp,r11
+	lea	rbp,[((-320))+r9*2+rbp]
+	jmp	NEAR $L$pwr_sp_done
+
+ALIGN	32
+$L$pwr_sp_alt:
+	lea	r10,[((4096-320))+r9*2]
+	lea	rbp,[((-320))+r9*2+rbp]
+	sub	r11,r10
+	mov	r10,0
+	cmovc	r11,r10
+	sub	rbp,r11
+$L$pwr_sp_done:
+	and	rbp,-64
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$pwr_page_walk
+	jmp	NEAR $L$pwr_page_walk_done
+
+$L$pwr_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$pwr_page_walk
+$L$pwr_page_walk_done:
+
+	mov	r10,r9
+	neg	r9
+
+
+
+
+
+
+
+
+
+
+	mov	QWORD[32+rsp],r8
+	mov	QWORD[40+rsp],rax
+
+$L$power5_body:
+DB	102,72,15,110,207
+DB	102,72,15,110,209
+DB	102,73,15,110,218
+DB	102,72,15,110,226
+
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+
+DB	102,72,15,126,209
+DB	102,72,15,126,226
+	mov	rdi,rsi
+	mov	rax,QWORD[40+rsp]
+	lea	r8,[32+rsp]
+
+	call	mul4x_internal
+
+	mov	rsi,QWORD[40+rsp]
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$power5_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_power5:
+
+global	bn_sqr8x_internal
+
+
+ALIGN	32
+bn_sqr8x_internal:
+__bn_sqr8x_internal:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	lea	rbp,[32+r10]
+	lea	rsi,[r9*1+rsi]
+
+	mov	rcx,r9
+
+
+	mov	r14,QWORD[((-32))+rbp*1+rsi]
+	lea	rdi,[((48+8))+r9*2+rsp]
+	mov	rax,QWORD[((-24))+rbp*1+rsi]
+	lea	rdi,[((-32))+rbp*1+rdi]
+	mov	rbx,QWORD[((-16))+rbp*1+rsi]
+	mov	r15,rax
+
+	mul	r14
+	mov	r10,rax
+	mov	rax,rbx
+	mov	r11,rdx
+	mov	QWORD[((-24))+rbp*1+rdi],r10
+
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	adc	rdx,0
+	mov	QWORD[((-16))+rbp*1+rdi],r11
+	mov	r10,rdx
+
+
+	mov	rbx,QWORD[((-8))+rbp*1+rsi]
+	mul	r15
+	mov	r12,rax
+	mov	rax,rbx
+	mov	r13,rdx
+
+	lea	rcx,[rbp]
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	mov	r11,rdx
+	adc	r11,0
+	add	r10,r12
+	adc	r11,0
+	mov	QWORD[((-8))+rcx*1+rdi],r10
+	jmp	NEAR $L$sqr4x_1st
+
+ALIGN	32
+$L$sqr4x_1st:
+	mov	rbx,QWORD[rcx*1+rsi]
+	mul	r15
+	add	r13,rax
+	mov	rax,rbx
+	mov	r12,rdx
+	adc	r12,0
+
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	mov	rbx,QWORD[8+rcx*1+rsi]
+	mov	r10,rdx
+	adc	r10,0
+	add	r11,r13
+	adc	r10,0
+
+
+	mul	r15
+	add	r12,rax
+	mov	rax,rbx
+	mov	QWORD[rcx*1+rdi],r11
+	mov	r13,rdx
+	adc	r13,0
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	mov	rbx,QWORD[16+rcx*1+rsi]
+	mov	r11,rdx
+	adc	r11,0
+	add	r10,r12
+	adc	r11,0
+
+	mul	r15
+	add	r13,rax
+	mov	rax,rbx
+	mov	QWORD[8+rcx*1+rdi],r10
+	mov	r12,rdx
+	adc	r12,0
+
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	mov	rbx,QWORD[24+rcx*1+rsi]
+	mov	r10,rdx
+	adc	r10,0
+	add	r11,r13
+	adc	r10,0
+
+
+	mul	r15
+	add	r12,rax
+	mov	rax,rbx
+	mov	QWORD[16+rcx*1+rdi],r11
+	mov	r13,rdx
+	adc	r13,0
+	lea	rcx,[32+rcx]
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	mov	r11,rdx
+	adc	r11,0
+	add	r10,r12
+	adc	r11,0
+	mov	QWORD[((-8))+rcx*1+rdi],r10
+
+	cmp	rcx,0
+	jne	NEAR $L$sqr4x_1st
+
+	mul	r15
+	add	r13,rax
+	lea	rbp,[16+rbp]
+	adc	rdx,0
+	add	r13,r11
+	adc	rdx,0
+
+	mov	QWORD[rdi],r13
+	mov	r12,rdx
+	mov	QWORD[8+rdi],rdx
+	jmp	NEAR $L$sqr4x_outer
+
+ALIGN	32
+$L$sqr4x_outer:
+	mov	r14,QWORD[((-32))+rbp*1+rsi]
+	lea	rdi,[((48+8))+r9*2+rsp]
+	mov	rax,QWORD[((-24))+rbp*1+rsi]
+	lea	rdi,[((-32))+rbp*1+rdi]
+	mov	rbx,QWORD[((-16))+rbp*1+rsi]
+	mov	r15,rax
+
+	mul	r14
+	mov	r10,QWORD[((-24))+rbp*1+rdi]
+	add	r10,rax
+	mov	rax,rbx
+	adc	rdx,0
+	mov	QWORD[((-24))+rbp*1+rdi],r10
+	mov	r11,rdx
+
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	adc	rdx,0
+	add	r11,QWORD[((-16))+rbp*1+rdi]
+	mov	r10,rdx
+	adc	r10,0
+	mov	QWORD[((-16))+rbp*1+rdi],r11
+
+	xor	r12,r12
+
+	mov	rbx,QWORD[((-8))+rbp*1+rsi]
+	mul	r15
+	add	r12,rax
+	mov	rax,rbx
+	adc	rdx,0
+	add	r12,QWORD[((-8))+rbp*1+rdi]
+	mov	r13,rdx
+	adc	r13,0
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	adc	rdx,0
+	add	r10,r12
+	mov	r11,rdx
+	adc	r11,0
+	mov	QWORD[((-8))+rbp*1+rdi],r10
+
+	lea	rcx,[rbp]
+	jmp	NEAR $L$sqr4x_inner
+
+ALIGN	32
+$L$sqr4x_inner:
+	mov	rbx,QWORD[rcx*1+rsi]
+	mul	r15
+	add	r13,rax
+	mov	rax,rbx
+	mov	r12,rdx
+	adc	r12,0
+	add	r13,QWORD[rcx*1+rdi]
+	adc	r12,0
+
+DB	0x67
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	mov	rbx,QWORD[8+rcx*1+rsi]
+	mov	r10,rdx
+	adc	r10,0
+	add	r11,r13
+	adc	r10,0
+
+	mul	r15
+	add	r12,rax
+	mov	QWORD[rcx*1+rdi],r11
+	mov	rax,rbx
+	mov	r13,rdx
+	adc	r13,0
+	add	r12,QWORD[8+rcx*1+rdi]
+	lea	rcx,[16+rcx]
+	adc	r13,0
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	adc	rdx,0
+	add	r10,r12
+	mov	r11,rdx
+	adc	r11,0
+	mov	QWORD[((-8))+rcx*1+rdi],r10
+
+	cmp	rcx,0
+	jne	NEAR $L$sqr4x_inner
+
+DB	0x67
+	mul	r15
+	add	r13,rax
+	adc	rdx,0
+	add	r13,r11
+	adc	rdx,0
+
+	mov	QWORD[rdi],r13
+	mov	r12,rdx
+	mov	QWORD[8+rdi],rdx
+
+	add	rbp,16
+	jnz	NEAR $L$sqr4x_outer
+
+
+	mov	r14,QWORD[((-32))+rsi]
+	lea	rdi,[((48+8))+r9*2+rsp]
+	mov	rax,QWORD[((-24))+rsi]
+	lea	rdi,[((-32))+rbp*1+rdi]
+	mov	rbx,QWORD[((-16))+rsi]
+	mov	r15,rax
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	mov	r11,rdx
+	adc	r11,0
+
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	mov	QWORD[((-24))+rdi],r10
+	mov	r10,rdx
+	adc	r10,0
+	add	r11,r13
+	mov	rbx,QWORD[((-8))+rsi]
+	adc	r10,0
+
+	mul	r15
+	add	r12,rax
+	mov	rax,rbx
+	mov	QWORD[((-16))+rdi],r11
+	mov	r13,rdx
+	adc	r13,0
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	mov	r11,rdx
+	adc	r11,0
+	add	r10,r12
+	adc	r11,0
+	mov	QWORD[((-8))+rdi],r10
+
+	mul	r15
+	add	r13,rax
+	mov	rax,QWORD[((-16))+rsi]
+	adc	rdx,0
+	add	r13,r11
+	adc	rdx,0
+
+	mov	QWORD[rdi],r13
+	mov	r12,rdx
+	mov	QWORD[8+rdi],rdx
+
+	mul	rbx
+	add	rbp,16
+	xor	r14,r14
+	sub	rbp,r9
+	xor	r15,r15
+
+	add	rax,r12
+	adc	rdx,0
+	mov	QWORD[8+rdi],rax
+	mov	QWORD[16+rdi],rdx
+	mov	QWORD[24+rdi],r15
+
+	mov	rax,QWORD[((-16))+rbp*1+rsi]
+	lea	rdi,[((48+8))+rsp]
+	xor	r10,r10
+	mov	r11,QWORD[8+rdi]
+
+	lea	r12,[r10*2+r14]
+	shr	r10,63
+	lea	r13,[r11*2+rcx]
+	shr	r11,63
+	or	r13,r10
+	mov	r10,QWORD[16+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[24+rdi]
+	adc	r12,rax
+	mov	rax,QWORD[((-8))+rbp*1+rsi]
+	mov	QWORD[rdi],r12
+	adc	r13,rdx
+
+	lea	rbx,[r10*2+r14]
+	mov	QWORD[8+rdi],r13
+	sbb	r15,r15
+	shr	r10,63
+	lea	r8,[r11*2+rcx]
+	shr	r11,63
+	or	r8,r10
+	mov	r10,QWORD[32+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[40+rdi]
+	adc	rbx,rax
+	mov	rax,QWORD[rbp*1+rsi]
+	mov	QWORD[16+rdi],rbx
+	adc	r8,rdx
+	lea	rbp,[16+rbp]
+	mov	QWORD[24+rdi],r8
+	sbb	r15,r15
+	lea	rdi,[64+rdi]
+	jmp	NEAR $L$sqr4x_shift_n_add
+
+ALIGN	32
+$L$sqr4x_shift_n_add:
+	lea	r12,[r10*2+r14]
+	shr	r10,63
+	lea	r13,[r11*2+rcx]
+	shr	r11,63
+	or	r13,r10
+	mov	r10,QWORD[((-16))+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[((-8))+rdi]
+	adc	r12,rax
+	mov	rax,QWORD[((-8))+rbp*1+rsi]
+	mov	QWORD[((-32))+rdi],r12
+	adc	r13,rdx
+
+	lea	rbx,[r10*2+r14]
+	mov	QWORD[((-24))+rdi],r13
+	sbb	r15,r15
+	shr	r10,63
+	lea	r8,[r11*2+rcx]
+	shr	r11,63
+	or	r8,r10
+	mov	r10,QWORD[rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[8+rdi]
+	adc	rbx,rax
+	mov	rax,QWORD[rbp*1+rsi]
+	mov	QWORD[((-16))+rdi],rbx
+	adc	r8,rdx
+
+	lea	r12,[r10*2+r14]
+	mov	QWORD[((-8))+rdi],r8
+	sbb	r15,r15
+	shr	r10,63
+	lea	r13,[r11*2+rcx]
+	shr	r11,63
+	or	r13,r10
+	mov	r10,QWORD[16+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[24+rdi]
+	adc	r12,rax
+	mov	rax,QWORD[8+rbp*1+rsi]
+	mov	QWORD[rdi],r12
+	adc	r13,rdx
+
+	lea	rbx,[r10*2+r14]
+	mov	QWORD[8+rdi],r13
+	sbb	r15,r15
+	shr	r10,63
+	lea	r8,[r11*2+rcx]
+	shr	r11,63
+	or	r8,r10
+	mov	r10,QWORD[32+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[40+rdi]
+	adc	rbx,rax
+	mov	rax,QWORD[16+rbp*1+rsi]
+	mov	QWORD[16+rdi],rbx
+	adc	r8,rdx
+	mov	QWORD[24+rdi],r8
+	sbb	r15,r15
+	lea	rdi,[64+rdi]
+	add	rbp,32
+	jnz	NEAR $L$sqr4x_shift_n_add
+
+	lea	r12,[r10*2+r14]
+DB	0x67
+	shr	r10,63
+	lea	r13,[r11*2+rcx]
+	shr	r11,63
+	or	r13,r10
+	mov	r10,QWORD[((-16))+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[((-8))+rdi]
+	adc	r12,rax
+	mov	rax,QWORD[((-8))+rsi]
+	mov	QWORD[((-32))+rdi],r12
+	adc	r13,rdx
+
+	lea	rbx,[r10*2+r14]
+	mov	QWORD[((-24))+rdi],r13
+	sbb	r15,r15
+	shr	r10,63
+	lea	r8,[r11*2+rcx]
+	shr	r11,63
+	or	r8,r10
+	mul	rax
+	neg	r15
+	adc	rbx,rax
+	adc	r8,rdx
+	mov	QWORD[((-16))+rdi],rbx
+	mov	QWORD[((-8))+rdi],r8
+DB	102,72,15,126,213
+__bn_sqr8x_reduction:
+	xor	rax,rax
+	lea	rcx,[rbp*1+r9]
+	lea	rdx,[((48+8))+r9*2+rsp]
+	mov	QWORD[((0+8))+rsp],rcx
+	lea	rdi,[((48+8))+r9*1+rsp]
+	mov	QWORD[((8+8))+rsp],rdx
+	neg	r9
+	jmp	NEAR $L$8x_reduction_loop
+
+ALIGN	32
+$L$8x_reduction_loop:
+	lea	rdi,[r9*1+rdi]
+DB	0x66
+	mov	rbx,QWORD[rdi]
+	mov	r9,QWORD[8+rdi]
+	mov	r10,QWORD[16+rdi]
+	mov	r11,QWORD[24+rdi]
+	mov	r12,QWORD[32+rdi]
+	mov	r13,QWORD[40+rdi]
+	mov	r14,QWORD[48+rdi]
+	mov	r15,QWORD[56+rdi]
+	mov	QWORD[rdx],rax
+	lea	rdi,[64+rdi]
+
+DB	0x67
+	mov	r8,rbx
+	imul	rbx,QWORD[((32+8))+rsp]
+	mov	rax,QWORD[rbp]
+	mov	ecx,8
+	jmp	NEAR $L$8x_reduce
+
+ALIGN	32
+$L$8x_reduce:
+	mul	rbx
+	mov	rax,QWORD[8+rbp]
+	neg	r8
+	mov	r8,rdx
+	adc	r8,0
+
+	mul	rbx
+	add	r9,rax
+	mov	rax,QWORD[16+rbp]
+	adc	rdx,0
+	add	r8,r9
+	mov	QWORD[((48-8+8))+rcx*8+rsp],rbx
+	mov	r9,rdx
+	adc	r9,0
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[24+rbp]
+	adc	rdx,0
+	add	r9,r10
+	mov	rsi,QWORD[((32+8))+rsp]
+	mov	r10,rdx
+	adc	r10,0
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[32+rbp]
+	adc	rdx,0
+	imul	rsi,r8
+	add	r10,r11
+	mov	r11,rdx
+	adc	r11,0
+
+	mul	rbx
+	add	r12,rax
+	mov	rax,QWORD[40+rbp]
+	adc	rdx,0
+	add	r11,r12
+	mov	r12,rdx
+	adc	r12,0
+
+	mul	rbx
+	add	r13,rax
+	mov	rax,QWORD[48+rbp]
+	adc	rdx,0
+	add	r12,r13
+	mov	r13,rdx
+	adc	r13,0
+
+	mul	rbx
+	add	r14,rax
+	mov	rax,QWORD[56+rbp]
+	adc	rdx,0
+	add	r13,r14
+	mov	r14,rdx
+	adc	r14,0
+
+	mul	rbx
+	mov	rbx,rsi
+	add	r15,rax
+	mov	rax,QWORD[rbp]
+	adc	rdx,0
+	add	r14,r15
+	mov	r15,rdx
+	adc	r15,0
+
+	dec	ecx
+	jnz	NEAR $L$8x_reduce
+
+	lea	rbp,[64+rbp]
+	xor	rax,rax
+	mov	rdx,QWORD[((8+8))+rsp]
+	cmp	rbp,QWORD[((0+8))+rsp]
+	jae	NEAR $L$8x_no_tail
+
+DB	0x66
+	add	r8,QWORD[rdi]
+	adc	r9,QWORD[8+rdi]
+	adc	r10,QWORD[16+rdi]
+	adc	r11,QWORD[24+rdi]
+	adc	r12,QWORD[32+rdi]
+	adc	r13,QWORD[40+rdi]
+	adc	r14,QWORD[48+rdi]
+	adc	r15,QWORD[56+rdi]
+	sbb	rsi,rsi
+
+	mov	rbx,QWORD[((48+56+8))+rsp]
+	mov	ecx,8
+	mov	rax,QWORD[rbp]
+	jmp	NEAR $L$8x_tail
+
+ALIGN	32
+$L$8x_tail:
+	mul	rbx
+	add	r8,rax
+	mov	rax,QWORD[8+rbp]
+	mov	QWORD[rdi],r8
+	mov	r8,rdx
+	adc	r8,0
+
+	mul	rbx
+	add	r9,rax
+	mov	rax,QWORD[16+rbp]
+	adc	rdx,0
+	add	r8,r9
+	lea	rdi,[8+rdi]
+	mov	r9,rdx
+	adc	r9,0
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[24+rbp]
+	adc	rdx,0
+	add	r9,r10
+	mov	r10,rdx
+	adc	r10,0
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[32+rbp]
+	adc	rdx,0
+	add	r10,r11
+	mov	r11,rdx
+	adc	r11,0
+
+	mul	rbx
+	add	r12,rax
+	mov	rax,QWORD[40+rbp]
+	adc	rdx,0
+	add	r11,r12
+	mov	r12,rdx
+	adc	r12,0
+
+	mul	rbx
+	add	r13,rax
+	mov	rax,QWORD[48+rbp]
+	adc	rdx,0
+	add	r12,r13
+	mov	r13,rdx
+	adc	r13,0
+
+	mul	rbx
+	add	r14,rax
+	mov	rax,QWORD[56+rbp]
+	adc	rdx,0
+	add	r13,r14
+	mov	r14,rdx
+	adc	r14,0
+
+	mul	rbx
+	mov	rbx,QWORD[((48-16+8))+rcx*8+rsp]
+	add	r15,rax
+	adc	rdx,0
+	add	r14,r15
+	mov	rax,QWORD[rbp]
+	mov	r15,rdx
+	adc	r15,0
+
+	dec	ecx
+	jnz	NEAR $L$8x_tail
+
+	lea	rbp,[64+rbp]
+	mov	rdx,QWORD[((8+8))+rsp]
+	cmp	rbp,QWORD[((0+8))+rsp]
+	jae	NEAR $L$8x_tail_done
+
+	mov	rbx,QWORD[((48+56+8))+rsp]
+	neg	rsi
+	mov	rax,QWORD[rbp]
+	adc	r8,QWORD[rdi]
+	adc	r9,QWORD[8+rdi]
+	adc	r10,QWORD[16+rdi]
+	adc	r11,QWORD[24+rdi]
+	adc	r12,QWORD[32+rdi]
+	adc	r13,QWORD[40+rdi]
+	adc	r14,QWORD[48+rdi]
+	adc	r15,QWORD[56+rdi]
+	sbb	rsi,rsi
+
+	mov	ecx,8
+	jmp	NEAR $L$8x_tail
+
+ALIGN	32
+$L$8x_tail_done:
+	xor	rax,rax
+	add	r8,QWORD[rdx]
+	adc	r9,0
+	adc	r10,0
+	adc	r11,0
+	adc	r12,0
+	adc	r13,0
+	adc	r14,0
+	adc	r15,0
+	adc	rax,0
+
+	neg	rsi
+$L$8x_no_tail:
+	adc	r8,QWORD[rdi]
+	adc	r9,QWORD[8+rdi]
+	adc	r10,QWORD[16+rdi]
+	adc	r11,QWORD[24+rdi]
+	adc	r12,QWORD[32+rdi]
+	adc	r13,QWORD[40+rdi]
+	adc	r14,QWORD[48+rdi]
+	adc	r15,QWORD[56+rdi]
+	adc	rax,0
+	mov	rcx,QWORD[((-8))+rbp]
+	xor	rsi,rsi
+
+DB	102,72,15,126,213
+
+	mov	QWORD[rdi],r8
+	mov	QWORD[8+rdi],r9
+DB	102,73,15,126,217
+	mov	QWORD[16+rdi],r10
+	mov	QWORD[24+rdi],r11
+	mov	QWORD[32+rdi],r12
+	mov	QWORD[40+rdi],r13
+	mov	QWORD[48+rdi],r14
+	mov	QWORD[56+rdi],r15
+	lea	rdi,[64+rdi]
+
+	cmp	rdi,rdx
+	jb	NEAR $L$8x_reduction_loop
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	32
+__bn_post4x_internal:
+	mov	r12,QWORD[rbp]
+	lea	rbx,[r9*1+rdi]
+	mov	rcx,r9
+DB	102,72,15,126,207
+	neg	rax
+DB	102,72,15,126,206
+	sar	rcx,3+2
+	dec	r12
+	xor	r10,r10
+	mov	r13,QWORD[8+rbp]
+	mov	r14,QWORD[16+rbp]
+	mov	r15,QWORD[24+rbp]
+	jmp	NEAR $L$sqr4x_sub_entry
+
+ALIGN	16
+$L$sqr4x_sub:
+	mov	r12,QWORD[rbp]
+	mov	r13,QWORD[8+rbp]
+	mov	r14,QWORD[16+rbp]
+	mov	r15,QWORD[24+rbp]
+$L$sqr4x_sub_entry:
+	lea	rbp,[32+rbp]
+	not	r12
+	not	r13
+	not	r14
+	not	r15
+	and	r12,rax
+	and	r13,rax
+	and	r14,rax
+	and	r15,rax
+
+	neg	r10
+	adc	r12,QWORD[rbx]
+	adc	r13,QWORD[8+rbx]
+	adc	r14,QWORD[16+rbx]
+	adc	r15,QWORD[24+rbx]
+	mov	QWORD[rdi],r12
+	lea	rbx,[32+rbx]
+	mov	QWORD[8+rdi],r13
+	sbb	r10,r10
+	mov	QWORD[16+rdi],r14
+	mov	QWORD[24+rdi],r15
+	lea	rdi,[32+rdi]
+
+	inc	rcx
+	jnz	NEAR $L$sqr4x_sub
+
+	mov	r10,r9
+	neg	r9
+	DB	0F3h,0C3h		;repret
+
+global	bn_from_montgomery
+
+ALIGN	32
+bn_from_montgomery:
+	test	DWORD[48+rsp],7
+	jz	NEAR bn_from_mont8x
+	xor	eax,eax
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+bn_from_mont8x:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_from_mont8x:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+DB	0x67
+	mov	rax,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$from_prologue:
+
+	shl	r9d,3
+	lea	r10,[r9*2+r9]
+	neg	r9
+	mov	r8,QWORD[r8]
+
+
+
+
+
+
+
+
+	lea	r11,[((-320))+r9*2+rsp]
+	mov	rbp,rsp
+	sub	r11,rdi
+	and	r11,4095
+	cmp	r10,r11
+	jb	NEAR $L$from_sp_alt
+	sub	rbp,r11
+	lea	rbp,[((-320))+r9*2+rbp]
+	jmp	NEAR $L$from_sp_done
+
+ALIGN	32
+$L$from_sp_alt:
+	lea	r10,[((4096-320))+r9*2]
+	lea	rbp,[((-320))+r9*2+rbp]
+	sub	r11,r10
+	mov	r10,0
+	cmovc	r11,r10
+	sub	rbp,r11
+$L$from_sp_done:
+	and	rbp,-64
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$from_page_walk
+	jmp	NEAR $L$from_page_walk_done
+
+$L$from_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$from_page_walk
+$L$from_page_walk_done:
+
+	mov	r10,r9
+	neg	r9
+
+
+
+
+
+
+
+
+
+
+	mov	QWORD[32+rsp],r8
+	mov	QWORD[40+rsp],rax
+
+$L$from_body:
+	mov	r11,r9
+	lea	rax,[48+rsp]
+	pxor	xmm0,xmm0
+	jmp	NEAR $L$mul_by_1
+
+ALIGN	32
+$L$mul_by_1:
+	movdqu	xmm1,XMMWORD[rsi]
+	movdqu	xmm2,XMMWORD[16+rsi]
+	movdqu	xmm3,XMMWORD[32+rsi]
+	movdqa	XMMWORD[r9*1+rax],xmm0
+	movdqu	xmm4,XMMWORD[48+rsi]
+	movdqa	XMMWORD[16+r9*1+rax],xmm0
+DB	0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
+	movdqa	XMMWORD[rax],xmm1
+	movdqa	XMMWORD[32+r9*1+rax],xmm0
+	movdqa	XMMWORD[16+rax],xmm2
+	movdqa	XMMWORD[48+r9*1+rax],xmm0
+	movdqa	XMMWORD[32+rax],xmm3
+	movdqa	XMMWORD[48+rax],xmm4
+	lea	rax,[64+rax]
+	sub	r11,64
+	jnz	NEAR $L$mul_by_1
+
+DB	102,72,15,110,207
+DB	102,72,15,110,209
+DB	0x67
+	mov	rbp,rcx
+DB	102,73,15,110,218
+	call	__bn_sqr8x_reduction
+	call	__bn_post4x_internal
+
+	pxor	xmm0,xmm0
+	lea	rax,[48+rsp]
+	jmp	NEAR $L$from_mont_zero
+
+ALIGN	32
+$L$from_mont_zero:
+	mov	rsi,QWORD[40+rsp]
+
+	movdqa	XMMWORD[rax],xmm0
+	movdqa	XMMWORD[16+rax],xmm0
+	movdqa	XMMWORD[32+rax],xmm0
+	movdqa	XMMWORD[48+rax],xmm0
+	lea	rax,[64+rax]
+	sub	r9,32
+	jnz	NEAR $L$from_mont_zero
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$from_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_from_mont8x:
+global	bn_scatter5
+
+ALIGN	16
+bn_scatter5:
+	cmp	edx,0
+	jz	NEAR $L$scatter_epilogue
+	lea	r8,[r9*8+r8]
+$L$scatter:
+	mov	rax,QWORD[rcx]
+	lea	rcx,[8+rcx]
+	mov	QWORD[r8],rax
+	lea	r8,[256+r8]
+	sub	edx,1
+	jnz	NEAR $L$scatter
+$L$scatter_epilogue:
+	DB	0F3h,0C3h		;repret
+
+
+global	bn_gather5
+
+ALIGN	32
+bn_gather5:
+$L$SEH_begin_bn_gather5:
+
+DB	0x4c,0x8d,0x14,0x24
+DB	0x48,0x81,0xec,0x08,0x01,0x00,0x00
+	lea	rax,[$L$inc]
+	and	rsp,-16
+
+	movd	xmm5,r9d
+	movdqa	xmm0,XMMWORD[rax]
+	movdqa	xmm1,XMMWORD[16+rax]
+	lea	r11,[128+r8]
+	lea	rax,[128+rsp]
+
+	pshufd	xmm5,xmm5,0
+	movdqa	xmm4,xmm1
+	movdqa	xmm2,xmm1
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	xmm3,xmm4
+
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[(-128)+rax],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[(-112)+rax],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[(-96)+rax],xmm2
+	movdqa	xmm2,xmm4
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[(-80)+rax],xmm3
+	movdqa	xmm3,xmm4
+
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[(-64)+rax],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[(-48)+rax],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[(-32)+rax],xmm2
+	movdqa	xmm2,xmm4
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[(-16)+rax],xmm3
+	movdqa	xmm3,xmm4
+
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[rax],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[16+rax],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[32+rax],xmm2
+	movdqa	xmm2,xmm4
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[48+rax],xmm3
+	movdqa	xmm3,xmm4
+
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[64+rax],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[80+rax],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[96+rax],xmm2
+	movdqa	xmm2,xmm4
+	movdqa	XMMWORD[112+rax],xmm3
+	jmp	NEAR $L$gather
+
+ALIGN	32
+$L$gather:
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movdqa	xmm0,XMMWORD[((-128))+r11]
+	movdqa	xmm1,XMMWORD[((-112))+r11]
+	movdqa	xmm2,XMMWORD[((-96))+r11]
+	pand	xmm0,XMMWORD[((-128))+rax]
+	movdqa	xmm3,XMMWORD[((-80))+r11]
+	pand	xmm1,XMMWORD[((-112))+rax]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-96))+rax]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-80))+rax]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[((-64))+r11]
+	movdqa	xmm1,XMMWORD[((-48))+r11]
+	movdqa	xmm2,XMMWORD[((-32))+r11]
+	pand	xmm0,XMMWORD[((-64))+rax]
+	movdqa	xmm3,XMMWORD[((-16))+r11]
+	pand	xmm1,XMMWORD[((-48))+rax]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-32))+rax]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-16))+rax]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[r11]
+	movdqa	xmm1,XMMWORD[16+r11]
+	movdqa	xmm2,XMMWORD[32+r11]
+	pand	xmm0,XMMWORD[rax]
+	movdqa	xmm3,XMMWORD[48+r11]
+	pand	xmm1,XMMWORD[16+rax]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[32+rax]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[48+rax]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[64+r11]
+	movdqa	xmm1,XMMWORD[80+r11]
+	movdqa	xmm2,XMMWORD[96+r11]
+	pand	xmm0,XMMWORD[64+rax]
+	movdqa	xmm3,XMMWORD[112+r11]
+	pand	xmm1,XMMWORD[80+rax]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[96+rax]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[112+rax]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	por	xmm4,xmm5
+	lea	r11,[256+r11]
+	pshufd	xmm0,xmm4,0x4e
+	por	xmm0,xmm4
+	movq	QWORD[rcx],xmm0
+	lea	rcx,[8+rcx]
+	sub	edx,1
+	jnz	NEAR $L$gather
+
+	lea	rsp,[r10]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_bn_gather5:
+
+ALIGN	64
+$L$inc:
+	DD	0,0,1,1
+	DD	2,2,2,2
+DB	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+DB	112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
+DB	99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
+DB	114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
+DB	71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
+DB	112,101,110,115,115,108,46,111,114,103,62,0
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+mul_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_pop_regs
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[8+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	r10,[$L$mul_epilogue]
+	cmp	rbx,r10
+	ja	NEAR $L$body_40
+
+	mov	r10,QWORD[192+r8]
+	mov	rax,QWORD[8+r10*8+rax]
+
+	jmp	NEAR $L$common_pop_regs
+
+$L$body_40:
+	mov	rax,QWORD[40+rax]
+$L$common_pop_regs:
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase
+	DD	$L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase
+	DD	$L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase
+	DD	$L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase
+	DD	$L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_power5 wrt ..imagebase
+	DD	$L$SEH_end_bn_power5 wrt ..imagebase
+	DD	$L$SEH_info_bn_power5 wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_from_mont8x wrt ..imagebase
+	DD	$L$SEH_end_bn_from_mont8x wrt ..imagebase
+	DD	$L$SEH_info_bn_from_mont8x wrt ..imagebase
+	DD	$L$SEH_begin_bn_gather5 wrt ..imagebase
+	DD	$L$SEH_end_bn_gather5 wrt ..imagebase
+	DD	$L$SEH_info_bn_gather5 wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_bn_mul_mont_gather5:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_mul4x_mont_gather5:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_power5:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_from_mont8x:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_gather5:
+DB	0x01,0x0b,0x03,0x0a
+DB	0x0b,0x01,0x21,0x00
+DB	0x04,0xa3,0x00,0x00
+ALIGN	8
diff --git a/third_party/breakpad/BUILD.gn b/third_party/breakpad/BUILD.gn
new file mode 100644
index 0000000..87e77bf
--- /dev/null
+++ b/third_party/breakpad/BUILD.gn
@@ -0,0 +1,793 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Copied from chromium breakpad/ and modified for goma client.
+#  - chromium_code => goma_code
+#  - no_chromium_code => no_goma_code
+#  - is_win => os == "win"
+#  - is_mac => os == "mac"
+#  - is_linux => os == "linux"
+#  - drop is_android, is_chromeos, is_ios
+#  - remove fuzzer_test, breakpad_unittests, generate_test_dump
+#  - drop is_component_build
+#  - add goma_breakpad_config (for include path and warnings)
+#  - don't use "USE_PROTECTED_ALLOCATIONS=1" for mac
+
+if (os == "win") {
+  import("//build/config/win/visual_studio_version.gni")
+}
+
+config("goma_breakpad_config") {
+  include_dirs = [ "//" ]
+  if (is_clang) {
+    cflags = [
+      "-Wno-unused-parameter",
+      "-Wno-missing-field-initializers",
+    ]
+  }
+}
+
+config("tools_config") {
+  include_dirs = [
+    "breakpad/src",
+    "breakpad/src/third_party",
+  ]
+  if (is_clang) {
+    cflags = [ "-Wno-tautological-constant-out-of-range-compare" ]
+  }
+}
+
+config("internal_config") {
+  include_dirs = [ "breakpad/src" ]
+  defines = []
+  if (is_debug) {
+    # This is needed for GTMLogger to work correctly.
+    defines += [ "DEBUG" ]
+  }
+}
+
+config("client_config") {
+  include_dirs = [ "breakpad/src" ]
+}
+
+config("handler_config") {
+  include_dirs = [ "breakpad/src" ]
+}
+
+config("sender_config") {
+  include_dirs = [ "breakpad/src" ]
+}
+
+config("breakpad_unittest_config") {
+  # One of the breakpad unit tests test that we can detect the proper build-id.
+  # We must override the build-id for this one target.
+  ldflags = [ "-Wl,--build-id=0x000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f" ]
+}
+
+# {micro,mini}dump_stackwalk and minidump_dump are tool-type executables
+# that do not build on Windows.
+if (os != "win") {
+  if (current_toolchain == host_toolchain) {
+    # Contains the code shared by both {micro,mini}dump_stackwalk.
+    static_library("stackwalk_common") {
+      # Always want these files included regardless of platform.
+      set_sources_assignment_filter([])
+      sources = [
+        "breakpad/src/common/path_helper.cc",
+        "breakpad/src/common/path_helper.h",
+        "breakpad/src/processor/basic_code_module.h",
+        "breakpad/src/processor/basic_code_modules.cc",
+        "breakpad/src/processor/basic_code_modules.h",
+        "breakpad/src/processor/basic_source_line_resolver.cc",
+        "breakpad/src/processor/call_stack.cc",
+        "breakpad/src/processor/cfi_frame_info.cc",
+        "breakpad/src/processor/cfi_frame_info.h",
+        "breakpad/src/processor/disassembler_x86.cc",
+        "breakpad/src/processor/disassembler_x86.h",
+        "breakpad/src/processor/dump_context.cc",
+        "breakpad/src/processor/dump_object.cc",
+        "breakpad/src/processor/logging.cc",
+        "breakpad/src/processor/logging.h",
+        "breakpad/src/processor/pathname_stripper.cc",
+        "breakpad/src/processor/pathname_stripper.h",
+        "breakpad/src/processor/proc_maps_linux.cc",
+        "breakpad/src/processor/process_state.cc",
+        "breakpad/src/processor/simple_symbol_supplier.cc",
+        "breakpad/src/processor/simple_symbol_supplier.h",
+        "breakpad/src/processor/source_line_resolver_base.cc",
+        "breakpad/src/processor/stack_frame_cpu.cc",
+        "breakpad/src/processor/stack_frame_symbolizer.cc",
+        "breakpad/src/processor/stackwalk_common.cc",
+        "breakpad/src/processor/stackwalker.cc",
+        "breakpad/src/processor/stackwalker_amd64.cc",
+        "breakpad/src/processor/stackwalker_amd64.h",
+        "breakpad/src/processor/stackwalker_arm.cc",
+        "breakpad/src/processor/stackwalker_arm.h",
+        "breakpad/src/processor/stackwalker_arm64.cc",
+        "breakpad/src/processor/stackwalker_arm64.h",
+        "breakpad/src/processor/stackwalker_mips.cc",
+        "breakpad/src/processor/stackwalker_mips.h",
+        "breakpad/src/processor/stackwalker_ppc.cc",
+        "breakpad/src/processor/stackwalker_ppc.h",
+        "breakpad/src/processor/stackwalker_ppc64.cc",
+        "breakpad/src/processor/stackwalker_ppc64.h",
+        "breakpad/src/processor/stackwalker_sparc.cc",
+        "breakpad/src/processor/stackwalker_sparc.h",
+        "breakpad/src/processor/stackwalker_x86.cc",
+        "breakpad/src/processor/stackwalker_x86.h",
+        "breakpad/src/processor/tokenize.cc",
+        "breakpad/src/processor/tokenize.h",
+
+        # libdisasm
+        "breakpad/src/third_party/libdisasm/ia32_implicit.c",
+        "breakpad/src/third_party/libdisasm/ia32_implicit.h",
+        "breakpad/src/third_party/libdisasm/ia32_insn.c",
+        "breakpad/src/third_party/libdisasm/ia32_insn.h",
+        "breakpad/src/third_party/libdisasm/ia32_invariant.c",
+        "breakpad/src/third_party/libdisasm/ia32_invariant.h",
+        "breakpad/src/third_party/libdisasm/ia32_modrm.c",
+        "breakpad/src/third_party/libdisasm/ia32_modrm.h",
+        "breakpad/src/third_party/libdisasm/ia32_opcode_tables.c",
+        "breakpad/src/third_party/libdisasm/ia32_opcode_tables.h",
+        "breakpad/src/third_party/libdisasm/ia32_operand.c",
+        "breakpad/src/third_party/libdisasm/ia32_operand.h",
+        "breakpad/src/third_party/libdisasm/ia32_reg.c",
+        "breakpad/src/third_party/libdisasm/ia32_reg.h",
+        "breakpad/src/third_party/libdisasm/ia32_settings.c",
+        "breakpad/src/third_party/libdisasm/ia32_settings.h",
+        "breakpad/src/third_party/libdisasm/libdis.h",
+        "breakpad/src/third_party/libdisasm/qword.h",
+        "breakpad/src/third_party/libdisasm/x86_disasm.c",
+        "breakpad/src/third_party/libdisasm/x86_format.c",
+        "breakpad/src/third_party/libdisasm/x86_imm.c",
+        "breakpad/src/third_party/libdisasm/x86_imm.h",
+        "breakpad/src/third_party/libdisasm/x86_insn.c",
+        "breakpad/src/third_party/libdisasm/x86_misc.c",
+        "breakpad/src/third_party/libdisasm/x86_operand_list.c",
+        "breakpad/src/third_party/libdisasm/x86_operand_list.h",
+      ]
+
+      defines = [ "BPLOG_MINIMUM_SEVERITY=SEVERITY_ERROR" ]
+
+      configs -= [ "//build/config/compiler:goma_code" ]
+      configs += [ "//build/config/compiler:no_goma_code" ]
+      configs += [ ":tools_config" ]
+    }
+
+    executable("microdump_stackwalk") {
+      sources = [
+        "breakpad/src/processor/microdump.cc",
+        "breakpad/src/processor/microdump_processor.cc",
+        "breakpad/src/processor/microdump_stackwalk.cc",
+      ]
+
+      deps = [
+        ":stackwalk_common",
+        "//build/config:exe_and_shlib_deps",
+      ]
+
+      defines = [ "BPLOG_MINIMUM_SEVERITY=SEVERITY_ERROR" ]
+
+      configs -= [ "//build/config/compiler:goma_code" ]
+      configs += [ "//build/config/compiler:no_goma_code" ]
+      configs += [ ":tools_config" ]
+    }
+
+    executable("minidump_stackwalk") {
+      sources = [
+        "breakpad/src/processor/exploitability.cc",
+        "breakpad/src/processor/minidump.cc",
+        "breakpad/src/processor/minidump_processor.cc",
+        "breakpad/src/processor/minidump_stackwalk.cc",
+      ]
+
+      deps = [
+        ":stackwalk_common",
+        "//build/config:exe_and_shlib_deps",
+      ]
+
+      defines = [ "BPLOG_MINIMUM_SEVERITY=SEVERITY_ERROR" ]
+
+      configs -= [ "//build/config/compiler:goma_code" ]
+      configs += [ "//build/config/compiler:no_goma_code" ]
+      configs += [ ":tools_config" ]
+
+      # Always want these files included regardless of platform.
+      set_sources_assignment_filter([])
+      sources += [
+        "breakpad/src/processor/exploitability_linux.cc",
+        "breakpad/src/processor/exploitability_linux.h",
+        "breakpad/src/processor/exploitability_win.cc",
+        "breakpad/src/processor/exploitability_win.h",
+        "breakpad/src/processor/symbolic_constants_win.cc",
+        "breakpad/src/processor/symbolic_constants_win.h",
+      ]
+    }
+
+    executable("minidump_dump") {
+      set_sources_assignment_filter([])
+      sources = [
+        "breakpad/src/processor/basic_code_module.h",
+        "breakpad/src/processor/basic_code_modules.cc",
+        "breakpad/src/processor/basic_code_modules.h",
+        "breakpad/src/processor/dump_context.cc",
+        "breakpad/src/processor/dump_object.cc",
+        "breakpad/src/processor/logging.cc",
+        "breakpad/src/processor/logging.h",
+        "breakpad/src/processor/minidump.cc",
+        "breakpad/src/processor/minidump_dump.cc",
+        "breakpad/src/processor/pathname_stripper.cc",
+        "breakpad/src/processor/pathname_stripper.h",
+        "breakpad/src/processor/proc_maps_linux.cc",
+      ]
+
+      configs += [ ":tools_config" ]
+
+      # There are some warnings in this code.
+      configs -= [ "//build/config/compiler:goma_code" ]
+      configs += [ "//build/config/compiler:no_goma_code" ]
+
+      deps = [
+        "//build/config:exe_and_shlib_deps",
+      ]
+    }
+  } else {
+    # Aliases for convenience.
+    binary_symlink("microdump_stackwalk") {
+      binary_label = ":$target_name($host_toolchain)"
+    }
+    binary_symlink("minidump_stackwalk") {
+      binary_label = ":$target_name($host_toolchain)"
+    }
+    binary_symlink("minidump_dump") {
+      binary_label = ":$target_name($host_toolchain)"
+    }
+  }
+}
+
+# Mac --------------------------------------------------------------------------
+
+if (os == "mac") {
+  if (current_toolchain == host_toolchain) {
+    executable("dump_syms") {
+      sources = [
+        "breakpad/src/common/dwarf/bytereader.cc",
+        "breakpad/src/common/dwarf/dwarf2diehandler.cc",
+        "breakpad/src/common/dwarf/dwarf2reader.cc",
+        "breakpad/src/common/dwarf/elf_reader.cc",
+        "breakpad/src/common/dwarf/elf_reader.h",
+        "breakpad/src/common/dwarf_cfi_to_module.cc",
+        "breakpad/src/common/dwarf_cu_to_module.cc",
+        "breakpad/src/common/dwarf_line_to_module.cc",
+        "breakpad/src/common/language.cc",
+        "breakpad/src/common/mac/arch_utilities.cc",
+        "breakpad/src/common/mac/arch_utilities.h",
+        "breakpad/src/common/mac/dump_syms.cc",
+        "breakpad/src/common/mac/file_id.cc",
+        "breakpad/src/common/mac/macho_id.cc",
+        "breakpad/src/common/mac/macho_reader.cc",
+        "breakpad/src/common/mac/macho_utilities.cc",
+        "breakpad/src/common/mac/macho_walker.cc",
+        "breakpad/src/common/md5.cc",
+        "breakpad/src/common/module.cc",
+        "breakpad/src/common/path_helper.cc",
+        "breakpad/src/common/path_helper.h",
+        "breakpad/src/common/stabs_reader.cc",
+        "breakpad/src/common/stabs_to_module.cc",
+        "breakpad/src/tools/mac/dump_syms/dump_syms_tool.cc",
+      ]
+
+      # For breakpad/src/common/stabs_reader.h.
+      defines = [ "HAVE_MACH_O_NLIST_H" ]
+      include_dirs = [ "breakpad/src/common/mac" ]
+
+      # The DWARF utilities require -funsigned-char.
+      cflags = [ "-funsigned-char" ]
+
+      configs += [ ":internal_config" ]
+
+      configs -= [ "//build/config/compiler:goma_code" ]
+      configs += [ "//build/config/compiler:no_goma_code" ]
+
+      # dwarf2reader.cc uses dynamic_cast.
+      configs -= [ "//build/config/compiler:no_rtti" ]
+      configs += [ "//build/config/compiler:rtti" ]
+
+      libs = [ "Foundation.framework" ]
+
+      if (!is_debug) {
+        # dump_syms crashes when built at -O1, -O2, and -O3.  It does
+        # not crash at -Os.  To play it safe, dump_syms is always built
+        # at -O0 until this can be sorted out.
+        # https://crbug.com/google-breakpad/329
+        # ORIGINAL: configs -= [ "//build/config/compiler:default_optimization" ]
+        configs -= [ "//build/config/compiler:optimize" ]
+        cflags += [ "-O0" ]
+      }
+
+      deps = [
+        "//build/config:exe_and_shlib_deps",
+      ]
+    }
+
+    executable("symupload") {
+      sources = [
+        "breakpad/src/common/mac/HTTPMultipartUpload.m",
+        "breakpad/src/tools/mac/symupload/symupload.m",
+      ]
+
+      include_dirs = [ "breakpad/src/common/mac" ]
+
+      libs = [ "Foundation.framework" ]
+
+      configs -= [ "//build/config/compiler:goma_code" ]
+      configs += [ "//build/config/compiler:no_goma_code" ]
+
+      deps = [
+        "//build/config:exe_and_shlib_deps",
+      ]
+    }
+  } else {
+    binary_symlink("dump_syms") {
+      binary_label = ":$target_name($host_toolchain)"
+    }
+    binary_symlink("symupload") {
+      binary_label = ":$target_name($host_toolchain)"
+    }
+  }
+}
+
+if (os == "mac") {
+  static_library("utilities") {
+    sources = [
+      "breakpad/src/client/mac/crash_generation/ConfigFile.mm",
+      "breakpad/src/client/mac/handler/breakpad_nlist_64.cc",
+      "breakpad/src/client/mac/handler/dynamic_images.cc",
+      "breakpad/src/client/mac/handler/minidump_generator.cc",
+      "breakpad/src/client/minidump_file_writer.cc",
+      "breakpad/src/common/convert_UTF.c",
+      "breakpad/src/common/mac/MachIPC.mm",
+      "breakpad/src/common/mac/arch_utilities.cc",
+      "breakpad/src/common/mac/bootstrap_compat.cc",
+      "breakpad/src/common/mac/file_id.cc",
+      "breakpad/src/common/mac/launch_reporter.cc",
+      "breakpad/src/common/mac/macho_id.cc",
+      "breakpad/src/common/mac/macho_utilities.cc",
+      "breakpad/src/common/mac/macho_walker.cc",
+      "breakpad/src/common/mac/string_utilities.cc",
+      "breakpad/src/common/md5.cc",
+      "breakpad/src/common/simple_string_dictionary.cc",
+      "breakpad/src/common/string_conversion.cc",
+    ]
+
+    configs += [ ":internal_config" ]
+
+    # There are some warnings in this code.
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [ "//build/config/compiler:no_goma_code" ]
+  }
+
+  executable("crash_inspector") {
+    sources = [
+      "breakpad/src/client/mac/crash_generation/Inspector.mm",
+      "breakpad/src/client/mac/crash_generation/InspectorMain.mm",
+    ]
+
+    # TODO: 'mac_real_dsym': 1,
+
+    include_dirs = [
+      "breakpad/src/client/apple/Framework",
+      "breakpad/src/common/mac",
+      "breakpad/src",
+    ]
+    libs = [
+      "CoreServices.framework",
+      "Foundation.framework",
+    ]
+
+    deps = [
+      ":utilities",
+      "//build/config:exe_and_shlib_deps",
+    ]
+  }
+
+  # TODO this target has some mac_bundle_resources stuff.
+  # executable("crash_report_sender") {
+  # }
+  group("crash_report_sender") {
+  }
+
+  config("breakpad_config") {
+    include_dirs = [ "breakpad/src/client/apple/Framework" ]
+  }
+
+  static_library("breakpad") {
+    sources = [
+      "breakpad/src/client/mac/Framework/Breakpad.mm",
+      "breakpad/src/client/mac/Framework/OnDemandServer.mm",
+      "breakpad/src/client/mac/crash_generation/crash_generation_client.cc",
+      "breakpad/src/client/mac/crash_generation/crash_generation_client.h",
+      "breakpad/src/client/mac/handler/exception_handler.cc",
+      "breakpad/src/client/mac/handler/protected_memory_allocator.cc",
+    ]
+
+    configs += [ ":internal_config" ]
+    public_configs = [ ":breakpad_config" ]
+
+    # If this used, compiler_proxy won't start.
+    # defines = [ "USE_PROTECTED_ALLOCATIONS=1" ]
+    include_dirs = [ "breakpad/src/client/apple/Framework" ]
+
+    deps = [
+      ":crash_inspector",
+      ":crash_report_sender",
+      ":utilities",
+    ]
+  }
+
+  group("client") {
+    public_configs = [ ":client_config" ]
+  }
+}
+
+if (os == "linux") {
+  if (current_toolchain == host_toolchain) {
+    executable("symupload") {
+      sources = [
+        "breakpad/src/common/linux/http_upload.cc",
+        "breakpad/src/common/linux/http_upload.h",
+        "breakpad/src/common/linux/symbol_upload.cc",
+        "breakpad/src/common/linux/symbol_upload.h",
+        "breakpad/src/tools/linux/symupload/sym_upload.cc",
+      ]
+
+      include_dirs = [
+        "breakpad/src",
+        "breakpad/src/third_party",
+      ]
+
+      configs += [ ":tools_config" ]
+
+      libs = [ "dl" ]
+
+      deps = [
+        "//build/config:exe_and_shlib_deps",
+      ]
+    }
+
+    # dump_syms is a host tool, so only compile it for the host system.
+    executable("dump_syms") {
+      sources = [
+        "breakpad/src/common/dwarf/bytereader.cc",
+        "breakpad/src/common/dwarf/dwarf2diehandler.cc",
+        "breakpad/src/common/dwarf/dwarf2reader.cc",
+        "breakpad/src/common/dwarf/elf_reader.cc",
+        "breakpad/src/common/dwarf/elf_reader.h",
+        "breakpad/src/common/dwarf_cfi_to_module.cc",
+        "breakpad/src/common/dwarf_cfi_to_module.h",
+        "breakpad/src/common/dwarf_cu_to_module.cc",
+        "breakpad/src/common/dwarf_cu_to_module.h",
+        "breakpad/src/common/dwarf_line_to_module.cc",
+        "breakpad/src/common/dwarf_line_to_module.h",
+        "breakpad/src/common/language.cc",
+        "breakpad/src/common/language.h",
+        "breakpad/src/common/linux/crc32.cc",
+        "breakpad/src/common/linux/crc32.h",
+        "breakpad/src/common/linux/dump_symbols.cc",
+        "breakpad/src/common/linux/dump_symbols.h",
+        "breakpad/src/common/linux/elf_symbols_to_module.cc",
+        "breakpad/src/common/linux/elf_symbols_to_module.h",
+        "breakpad/src/common/linux/elfutils.cc",
+        "breakpad/src/common/linux/elfutils.h",
+        "breakpad/src/common/linux/file_id.cc",
+        "breakpad/src/common/linux/file_id.h",
+        "breakpad/src/common/linux/guid_creator.h",
+        "breakpad/src/common/linux/linux_libc_support.cc",
+        "breakpad/src/common/linux/linux_libc_support.h",
+        "breakpad/src/common/linux/memory_mapped_file.cc",
+        "breakpad/src/common/linux/memory_mapped_file.h",
+        "breakpad/src/common/module.cc",
+        "breakpad/src/common/module.h",
+        "breakpad/src/common/path_helper.cc",
+        "breakpad/src/common/path_helper.h",
+        "breakpad/src/common/stabs_reader.cc",
+        "breakpad/src/common/stabs_reader.h",
+        "breakpad/src/common/stabs_to_module.cc",
+        "breakpad/src/common/stabs_to_module.h",
+        "breakpad/src/tools/linux/dump_syms/dump_syms.cc",
+      ]
+
+      # There are some warnings in this code.
+      configs -= [ "//build/config/compiler:goma_code" ]
+      configs += [ "//build/config/compiler:no_goma_code" ]
+      configs += [ ":goma_breakpad_config" ]
+
+      # dwarf2reader.cc uses dynamic_cast. Because we don't typically
+      # don't support RTTI, we enable it for this single target. Since
+      # dump_syms doesn't share any object files with anything else,
+      # this doesn't end up polluting Chrome itself.
+      configs -= [ "//build/config/compiler:no_rtti" ]
+      configs += [ "//build/config/compiler:rtti" ]
+
+      # Breakpad rev 583 introduced this flag.
+      # Using this define, stabs_reader.h will include a.out.h to
+      # build on Linux.
+      defines = [ "HAVE_A_OUT_H" ]
+
+      include_dirs = [ "breakpad/src" ]
+
+      deps = [
+        "//build/config:exe_and_shlib_deps",
+      ]
+    }
+  } else {
+    # Aliases for convenience.
+    binary_symlink("dump_syms") {
+      binary_label = ":dump_syms($host_toolchain)"
+    }
+    binary_symlink("symupload") {
+      binary_label = ":symupload($host_toolchain)"
+    }
+  }
+
+  static_library("client") {
+    # Want all these sources for both Linux and Android.
+    set_sources_assignment_filter([])
+    sources = [
+      "breakpad/src/client/linux/crash_generation/crash_generation_client.cc",
+      "breakpad/src/client/linux/crash_generation/crash_generation_client.h",
+      "breakpad/src/client/linux/dump_writer_common/mapping_info.h",
+      "breakpad/src/client/linux/dump_writer_common/thread_info.cc",
+      "breakpad/src/client/linux/dump_writer_common/thread_info.h",
+      "breakpad/src/client/linux/dump_writer_common/ucontext_reader.cc",
+      "breakpad/src/client/linux/dump_writer_common/ucontext_reader.h",
+      "breakpad/src/client/linux/handler/exception_handler.cc",
+      "breakpad/src/client/linux/handler/exception_handler.h",
+      "breakpad/src/client/linux/handler/minidump_descriptor.cc",
+      "breakpad/src/client/linux/handler/minidump_descriptor.h",
+      "breakpad/src/client/linux/log/log.cc",
+      "breakpad/src/client/linux/log/log.h",
+      "breakpad/src/client/linux/microdump_writer/microdump_writer.cc",
+      "breakpad/src/client/linux/microdump_writer/microdump_writer.h",
+      "breakpad/src/client/linux/minidump_writer/cpu_set.h",
+      "breakpad/src/client/linux/minidump_writer/directory_reader.h",
+      "breakpad/src/client/linux/minidump_writer/line_reader.h",
+      "breakpad/src/client/linux/minidump_writer/linux_core_dumper.cc",
+      "breakpad/src/client/linux/minidump_writer/linux_core_dumper.h",
+      "breakpad/src/client/linux/minidump_writer/linux_dumper.cc",
+      "breakpad/src/client/linux/minidump_writer/linux_dumper.h",
+      "breakpad/src/client/linux/minidump_writer/linux_ptrace_dumper.cc",
+      "breakpad/src/client/linux/minidump_writer/linux_ptrace_dumper.h",
+      "breakpad/src/client/linux/minidump_writer/minidump_writer.cc",
+      "breakpad/src/client/linux/minidump_writer/minidump_writer.h",
+      "breakpad/src/client/linux/minidump_writer/proc_cpuinfo_reader.h",
+      "breakpad/src/client/minidump_file_writer-inl.h",
+      "breakpad/src/client/minidump_file_writer.cc",
+      "breakpad/src/client/minidump_file_writer.h",
+      "breakpad/src/common/convert_UTF.c",
+      "breakpad/src/common/convert_UTF.h",
+      "breakpad/src/common/linux/elf_core_dump.cc",
+      "breakpad/src/common/linux/elf_core_dump.h",
+      "breakpad/src/common/linux/elfutils.cc",
+      "breakpad/src/common/linux/elfutils.h",
+      "breakpad/src/common/linux/file_id.cc",
+      "breakpad/src/common/linux/file_id.h",
+      "breakpad/src/common/linux/google_crashdump_uploader.cc",
+      "breakpad/src/common/linux/google_crashdump_uploader.h",
+      "breakpad/src/common/linux/guid_creator.cc",
+      "breakpad/src/common/linux/guid_creator.h",
+      "breakpad/src/common/linux/libcurl_wrapper.cc",
+      "breakpad/src/common/linux/libcurl_wrapper.h",
+      "breakpad/src/common/linux/linux_libc_support.cc",
+      "breakpad/src/common/linux/linux_libc_support.h",
+      "breakpad/src/common/linux/memory_mapped_file.cc",
+      "breakpad/src/common/linux/memory_mapped_file.h",
+      "breakpad/src/common/linux/safe_readlink.cc",
+      "breakpad/src/common/linux/safe_readlink.h",
+      "breakpad/src/common/memory_allocator.h",
+      "breakpad/src/common/simple_string_dictionary.cc",
+      "breakpad/src/common/simple_string_dictionary.h",
+      "breakpad/src/common/string_conversion.cc",
+      "breakpad/src/common/string_conversion.h",
+    ]
+
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [ "//build/config/compiler:no_goma_code" ]
+    configs += [ ":goma_breakpad_config" ]
+    public_configs = [ ":client_config" ]
+
+    if (current_cpu == "arm") {
+      # Avoid running out of registers in
+      # linux_syscall_support.h:sys_clone()'s inline assembly.
+      cflags = [ "-marm" ]
+    }
+
+    # Clang's -mstackrealign doesn't work well with
+    # linux_syscall_support.h hand written asm syscalls.
+    # See https://crbug.com/556393
+    # Dropped for goma
+    # configs -= [ "//build/config/compiler:clang_stackrealign" ]
+
+    libs = [ "dl" ]
+
+    include_dirs = [
+      ".",
+      "breakpad/src",
+      "breakpad/src/client",
+      "breakpad/src/third_party/linux/include",
+    ]
+  }
+
+  static_library("processor_support") {
+    set_sources_assignment_filter([])
+    sources = [
+      "breakpad/src/common/scoped_ptr.h",
+      "breakpad/src/processor/basic_code_modules.cc",
+      "breakpad/src/processor/basic_code_modules.h",
+      "breakpad/src/processor/dump_context.cc",
+      "breakpad/src/processor/dump_object.cc",
+      "breakpad/src/processor/logging.cc",
+      "breakpad/src/processor/logging.h",
+      "breakpad/src/processor/minidump.cc",
+      "breakpad/src/processor/pathname_stripper.cc",
+      "breakpad/src/processor/pathname_stripper.h",
+      "breakpad/src/processor/proc_maps_linux.cc",
+    ]
+
+    include_dirs = [
+      "breakpad/src",
+      "breakpad/src/client",
+      "breakpad/src/third_party/linux/include",
+      ".",
+    ]
+
+    # There are some warnings in this code.
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [ "//build/config/compiler:no_goma_code" ]
+  }
+
+  executable("linux_dumper_unittest_helper") {
+    set_sources_assignment_filter([])
+    testonly = true
+    sources = [
+      "breakpad/src/client/linux/minidump_writer/linux_dumper_unittest_helper.cc",
+    ]
+    deps = [
+      ":processor_support",
+      "//build/config:exe_and_shlib_deps",
+    ]
+
+    configs += [
+      ":client_config",
+      ":goma_breakpad_config",
+    ]
+  }
+
+  executable("minidump-2-core") {
+    set_sources_assignment_filter([])
+    sources = [
+      "breakpad/src/common/path_helper.cc",
+      "breakpad/src/common/path_helper.h",
+      "breakpad/src/tools/linux/md2core/minidump-2-core.cc",
+    ]
+
+    include_dirs = [ "breakpad/src" ]
+
+    configs += [ ":goma_breakpad_config" ]
+
+    deps = [
+      ":client",
+      "//build/config:exe_and_shlib_deps",
+    ]
+  }
+
+  executable("core-2-minidump") {
+    set_sources_assignment_filter([])
+    sources = [
+      "breakpad/src/tools/linux/core2md/core2md.cc",
+    ]
+
+    configs += [ ":goma_breakpad_config" ]
+
+    deps = [
+      ":client",
+      "//build/config:exe_and_shlib_deps",
+    ]
+
+    include_dirs = [ "breakpad/src" ]
+  }
+}
+
+if (os == "win") {
+  executable("dump_syms") {
+    # TODO using this with VS2015 may break the crash server.
+    # https://crbug.com/696671
+    include_dirs = [
+      "$visual_studio_path/DIA SDK/include",
+      "breakpad/src",
+    ]
+
+    sources = [
+      "breakpad/src/common/windows/dia_util.cc",
+      "breakpad/src/common/windows/dia_util.h",
+      "breakpad/src/common/windows/guid_string.cc",
+      "breakpad/src/common/windows/guid_string.h",
+      "breakpad/src/common/windows/omap.cc",
+      "breakpad/src/common/windows/omap.h",
+      "breakpad/src/common/windows/pdb_source_line_writer.cc",
+      "breakpad/src/common/windows/pdb_source_line_writer.h",
+      "breakpad/src/common/windows/string_utils-inl.h",
+      "breakpad/src/common/windows/string_utils.cc",
+      "breakpad/src/tools/windows/dump_syms/dump_syms.cc",
+    ]
+
+    lib_dirs = []
+    if (target_cpu == "x64") {
+      lib_dirs += [ "$visual_studio_path/DIA SDK/lib/amd64" ]
+    } else {
+      lib_dirs += [ "$visual_studio_path/DIA SDK/lib" ]
+    }
+
+    libs = [
+      "diaguids.lib",
+      "imagehlp.lib",
+    ]
+    if (is_clang) {
+      # clang complains about microsoft-specific goto extensions. Instead of
+      # rewriting decade-old, goto-ridden code, disable the warning.
+      cflags = [ "-Wno-microsoft-goto" ]
+    }
+  }
+}
+
+if (os == "win") {
+  group("client") {
+    public_configs = [ ":client_config" ]
+  }
+
+  config("breakpad_handler_warnings") {
+    if (is_clang) {
+      # See https://crbug.com/google-breakpad/658.
+      cflags = [ "-Wno-reorder" ]
+    }
+  }
+
+  static_library("breakpad_handler") {
+    configs += [ ":handler_config" ]
+    if (os == "win") {
+      public_configs = [ ":handler_config" ]
+    }
+
+    defines = [ "BREAKPAD_NO_TERMINATE_THREAD" ]
+
+    sources = [
+      "breakpad/src/client/windows/crash_generation/client_info.cc",
+      "breakpad/src/client/windows/crash_generation/client_info.h",
+      "breakpad/src/client/windows/crash_generation/crash_generation_client.cc",
+      "breakpad/src/client/windows/crash_generation/crash_generation_client.h",
+      "breakpad/src/client/windows/crash_generation/crash_generation_server.cc",
+      "breakpad/src/client/windows/crash_generation/crash_generation_server.h",
+      "breakpad/src/client/windows/crash_generation/minidump_generator.cc",
+      "breakpad/src/client/windows/crash_generation/minidump_generator.h",
+      "breakpad/src/client/windows/handler/exception_handler.cc",
+      "breakpad/src/client/windows/handler/exception_handler.h",
+      "breakpad/src/common/windows/guid_string.cc",
+      "breakpad/src/common/windows/guid_string.h",
+      "breakpad/src/common/windows/string_utils-inl.h",
+      "breakpad/src/google_breakpad/common/minidump_format.h",
+    ]
+    configs += [ ":breakpad_handler_warnings" ]
+  }
+
+  source_set("breakpad_sender") {
+    sources = [
+      "breakpad/src/client/windows/sender/crash_report_sender.cc",
+      "breakpad/src/client/windows/sender/crash_report_sender.h",
+      "breakpad/src/common/windows/http_upload.cc",
+      "breakpad/src/common/windows/http_upload.h",
+    ]
+    configs += [ ":sender_config" ]
+    public_configs = [ ":sender_config" ]
+  }
+}
diff --git a/third_party/breakpad/LICENSE b/third_party/breakpad/LICENSE
new file mode 100644
index 0000000..95207bd
--- /dev/null
+++ b/third_party/breakpad/LICENSE
@@ -0,0 +1,50 @@
+Copyright (c) 2006, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------
+
+Copyright 2001-2004 Unicode, Inc.
+
+Disclaimer
+
+This source code is provided as is by Unicode, Inc. No claims are
+made as to fitness for any particular purpose. No warranties of any
+kind are expressed or implied. The recipient agrees to determine
+applicability of information provided. If this file has been
+purchased on magnetic or optical media from Unicode, Inc., the
+sole remedy for any claim will be exchange of defective media
+within 90 days of receipt.
+
+Limitations on Rights to Redistribute This Code
+
+Unicode, Inc. hereby grants the right to freely use the information
+supplied in this file in the creation of products supporting the
+Unicode Standard, and to make copies of this file in any form
+for internal or external distribution as long as this notice
+remains attached.
diff --git a/third_party/chromium_base/BUILD.gn b/third_party/chromium_base/BUILD.gn
new file mode 100644
index 0000000..5bbb8e0
--- /dev/null
+++ b/third_party/chromium_base/BUILD.gn
@@ -0,0 +1,92 @@
+config("chromium_base_config") {
+  include_dirs = [ "." ]
+}
+
+static_library("string") {
+  sources = [
+    "string_piece.cc",
+    "string_piece.h",
+    "string_util.h",
+  ]
+
+  public_configs = [ ":chromium_base_config" ]
+}
+
+executable("string_piece_unittest") {
+  testonly = true
+  sources = [
+    "string_piece_unittest.cc",
+  ]
+  deps = [
+    ":string",
+    "//build/config/sanitizers:deps",
+    "//third_party:glog",
+    "//third_party:gtest",
+    "//third_party:gtest_main",
+  ]
+}
+
+static_library("cpu") {
+  sources = [
+    "cpu.cc",
+    "cpu.h",
+  ]
+
+  public_configs = [ ":chromium_base_config" ]
+}
+
+executable("cpu_unittest") {
+  testonly = true
+  sources = [
+    "cpu_unittest.cc",
+  ]
+  deps = [
+    ":cpu",
+    ":string",
+    "//third_party:glog",
+    "//third_party:gtest",
+    "//third_party:gtest_main",
+  ]
+}
+
+static_library("platform_thread") {
+  sources = [
+    "platform_thread.cc",
+    "platform_thread.h",
+  ]
+
+  deps = [
+    "//third_party:glog",
+  ]
+
+  public_configs = [ ":chromium_base_config" ]
+}
+
+if (os == "mac") {
+  static_library("mac_version") {
+    sources = [
+      "mac_version.cc",
+      "mac_version.h",
+    ]
+
+    deps = [
+      "//third_party:glog",
+    ]
+
+    public_configs = [ ":chromium_base_config" ]
+  }
+
+  executable("mac_version_unittest") {
+    testonly = true
+    sources = [
+      "mac_version_unittest.cc",
+    ]
+    deps = [
+      ":mac_version",
+      ":string",
+      "//third_party:glog",
+      "//third_party:gtest",
+      "//third_party:gtest_main",
+    ]
+  }
+}
diff --git a/third_party/chromium_base/LICENSE b/third_party/chromium_base/LICENSE
new file mode 100644
index 0000000..a32e00c
--- /dev/null
+++ b/third_party/chromium_base/LICENSE
@@ -0,0 +1,27 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/third_party/chromium_base/cpu.cc b/third_party/chromium_base/cpu.cc
new file mode 100644
index 0000000..fc5c41d
--- /dev/null
+++ b/third_party/chromium_base/cpu.cc
@@ -0,0 +1,164 @@
+// Copied from chromium's base/cpu.cc and modified for goma.
+//
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "cpu.h"
+
+#include <string.h>
+
+#ifdef _WIN32
+#include <intrin.h>
+#include <immintrin.h>  // For _xgetbv()
+#endif
+
+namespace devtools_goma {
+
+CPU::CPU()
+  : signature_(0),
+    type_(0),
+    family_(0),
+    model_(0),
+    stepping_(0),
+    ext_model_(0),
+    ext_family_(0),
+    has_mmx_(false),
+    has_sse_(false),
+    has_sse2_(false),
+    has_sse3_(false),
+    has_ssse3_(false),
+    has_sse41_(false),
+    has_sse42_(false),
+    has_popcnt_(false),
+    has_avx_(false),
+    has_avx2_(false),
+    has_aesni_(false),
+    has_non_stop_time_stamp_counter_(false),
+    cpu_vendor_("unknown") {
+  Initialize();
+}
+
+namespace {
+
+#ifndef _WIN32
+
+// emulate MSVC's functions.
+
+static void __cpuid(int cpu_info[4], int info_type) {
+  __asm__ volatile("cpuid\n"
+                   : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
+                     "=d"(cpu_info[3])
+                   : "a"(info_type), "c"(0));
+}
+
+// _xgetbv returns the value of an Intel Extended Control Register (XCR).
+// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
+uint64_t _xgetbv(uint32_t xcr) {
+  uint32_t eax, edx;
+
+  __asm__ volatile (
+    "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
+  return (static_cast<uint64_t>(edx) << 32) | eax;
+}
+
+#endif  // !_WIN32
+
+}  // anonymous namespace
+
+void CPU::Initialize() {
+  int cpu_info[4] = {-1};
+  char cpu_string[48];
+
+  // __cpuid with an InfoType argument of 0 returns the number of
+  // valid Ids in CPUInfo[0] and the CPU identification string in
+  // the other three array elements. The CPU identification string is
+  // not in linear order. The code below arranges the information
+  // in a human readable form. The human readable order is CPUInfo[1] |
+  // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
+  // before using memcpy to copy these three array elements to cpu_string.
+  __cpuid(cpu_info, 0);
+  int num_ids = cpu_info[0];
+  std::swap(cpu_info[2], cpu_info[3]);
+  memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1]));
+  cpu_vendor_.assign(cpu_string, 3 * sizeof(cpu_info[1]));
+
+  // Interpret CPU feature information.
+  if (num_ids > 0) {
+    int cpu_info7[4] = {0};
+    __cpuid(cpu_info, 1);
+    if (num_ids >= 7) {
+      __cpuid(cpu_info7, 7);
+    }
+    signature_ = cpu_info[0];
+    stepping_ = cpu_info[0] & 0xf;
+    model_ = ((cpu_info[0] >> 4) & 0xf) + ((cpu_info[0] >> 12) & 0xf0);
+    family_ = (cpu_info[0] >> 8) & 0xf;
+    type_ = (cpu_info[0] >> 12) & 0x3;
+    ext_model_ = (cpu_info[0] >> 16) & 0xf;
+    ext_family_ = (cpu_info[0] >> 20) & 0xff;
+    has_mmx_ =   (cpu_info[3] & 0x00800000) != 0;
+    has_sse_ =   (cpu_info[3] & 0x02000000) != 0;
+    has_sse2_ =  (cpu_info[3] & 0x04000000) != 0;
+    has_sse3_ =  (cpu_info[2] & 0x00000001) != 0;
+    has_ssse3_ = (cpu_info[2] & 0x00000200) != 0;
+    has_sse41_ = (cpu_info[2] & 0x00080000) != 0;
+    has_sse42_ = (cpu_info[2] & 0x00100000) != 0;
+    has_popcnt_ = (cpu_info[2] & 0x00800000) != 0;
+
+    // AVX instructions will generate an illegal instruction exception unless
+    //   a) they are supported by the CPU,
+    //   b) XSAVE is supported by the CPU and
+    //   c) XSAVE is enabled by the kernel.
+    // See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
+    //
+    // In addition, we have observed some crashes with the xgetbv instruction
+    // even after following Intel's example code. (See crbug.com/375968.)
+    // Because of that, we also test the XSAVE bit because its description in
+    // the CPUID documentation suggests that it signals xgetbv support.
+    has_avx_ =
+        (cpu_info[2] & 0x10000000) != 0 &&
+        (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
+        (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
+        (_xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */;
+    has_aesni_ = (cpu_info[2] & 0x02000000) != 0;
+    has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0;
+  }
+
+  // Get the brand string of the cpu.
+  __cpuid(cpu_info, 0x80000000);
+  const int parameter_end = 0x80000004;
+  int max_parameter = cpu_info[0];
+
+  if (cpu_info[0] >= parameter_end) {
+    char* cpu_string_ptr = cpu_string;
+
+    for (int parameter = 0x80000002; parameter <= parameter_end &&
+         cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
+      __cpuid(cpu_info, parameter);
+      memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info));
+      cpu_string_ptr += sizeof(cpu_info);
+    }
+    cpu_brand_.assign(cpu_string, cpu_string_ptr - cpu_string);
+  }
+
+  const int parameter_containing_non_stop_time_stamp_counter = 0x80000007;
+  if (max_parameter >= parameter_containing_non_stop_time_stamp_counter) {
+    __cpuid(cpu_info, parameter_containing_non_stop_time_stamp_counter);
+    has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0;
+  }
+}
+
+CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const {
+  if (has_avx2()) return AVX2;
+  if (has_avx()) return AVX;
+  if (has_sse42()) return SSE42;
+  if (has_sse41()) return SSE41;
+  if (has_ssse3()) return SSSE3;
+  if (has_sse3()) return SSE3;
+  if (has_sse2()) return SSE2;
+  if (has_sse()) return SSE;
+  return PENTIUM;
+}
+
+}  // namespace devtools_goma
diff --git a/third_party/chromium_base/cpu.h b/third_party/chromium_base/cpu.h
new file mode 100644
index 0000000..1f2f982
--- /dev/null
+++ b/third_party/chromium_base/cpu.h
@@ -0,0 +1,89 @@
+// Copied from chromium's base/cpu.h and modified for goma.
+//
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_CPU_H_
+#define BASE_CPU_H_
+
+#include <string>
+
+namespace devtools_goma {
+
+// Query information about the processor.
+class CPU {
+ public:
+  // Constructor
+  CPU();
+
+  enum IntelMicroArchitecture {
+    PENTIUM,
+    SSE,
+    SSE2,
+    SSE3,
+    SSSE3,
+    SSE41,
+    SSE42,
+    AVX,
+    AVX2,
+    MAX_INTEL_MICRO_ARCHITECTURE
+  };
+
+  // Accessors for CPU information.
+  const std::string& vendor_name() const { return cpu_vendor_; }
+  int signature() const { return signature_; }
+  int stepping() const { return stepping_; }
+  int model() const { return model_; }
+  int family() const { return family_; }
+  int type() const { return type_; }
+  int extended_model() const { return ext_model_; }
+  int extended_family() const { return ext_family_; }
+  bool has_mmx() const { return has_mmx_; }
+  bool has_sse() const { return has_sse_; }
+  bool has_sse2() const { return has_sse2_; }
+  bool has_sse3() const { return has_sse3_; }
+  bool has_ssse3() const { return has_ssse3_; }
+  bool has_sse41() const { return has_sse41_; }
+  bool has_sse42() const { return has_sse42_; }
+  bool has_popcnt() const { return has_popcnt_; }
+  bool has_avx() const { return has_avx_; }
+  bool has_avx2() const { return has_avx2_; }
+  bool has_aesni() const { return has_aesni_; }
+  bool has_non_stop_time_stamp_counter() const {
+    return has_non_stop_time_stamp_counter_;
+  }
+
+  IntelMicroArchitecture GetIntelMicroArchitecture() const;
+  const std::string& cpu_brand() const { return cpu_brand_; }
+
+ private:
+  // Query the processor for CPUID information.
+  void Initialize();
+
+  int signature_;  // raw form of type, family, model, and stepping
+  int type_;  // process type
+  int family_;  // family of the processor
+  int model_;  // model of processor
+  int stepping_;  // processor revision number
+  int ext_model_;
+  int ext_family_;
+  bool has_mmx_;
+  bool has_sse_;
+  bool has_sse2_;
+  bool has_sse3_;
+  bool has_ssse3_;
+  bool has_sse41_;
+  bool has_sse42_;
+  bool has_popcnt_;
+  bool has_avx_;
+  bool has_avx2_;
+  bool has_aesni_;
+  bool has_non_stop_time_stamp_counter_;
+  std::string cpu_vendor_;
+  std::string cpu_brand_;
+};
+
+}  // namespace devtools_goma
+
+#endif  // BASE_CPU_H_
diff --git a/third_party/chromium_base/cpu_unittest.cc b/third_party/chromium_base/cpu_unittest.cc
new file mode 100644
index 0000000..e26296e
--- /dev/null
+++ b/third_party/chromium_base/cpu_unittest.cc
@@ -0,0 +1,125 @@
+// Copied from chromium's base/cpu_unittest.cc and modified for goma.
+//
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "cpu.h"
+
+#include "gtest/gtest.h"
+
+#if _MSC_VER >= 1700
+// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
+#pragma warning(disable: 4752)
+#endif
+
+// Tests whether we can run extended instructions represented by the CPU
+// information. This test actually executes some extended instructions (such as
+// MMX, SSE, etc.) supported by the CPU and sees we can run them without
+// "undefined instruction" exceptions. That is, this test succeeds when this
+// test finishes without a crash.
+TEST(CPU, RunExtendedInstructions) {
+  // Retrieve the CPU information.
+  devtools_goma::CPU cpu;
+
+  ASSERT_TRUE(cpu.has_mmx());
+  ASSERT_TRUE(cpu.has_sse());
+  ASSERT_TRUE(cpu.has_sse2());
+
+// GCC and clang instruction test.
+#ifndef _WIN32
+  // Execute an MMX instruction.
+  __asm__ __volatile__("emms\n" : : : "mm0");
+
+  // Execute an SSE instruction.
+  __asm__ __volatile__("xorps %%xmm0, %%xmm0\n" : : : "xmm0");
+
+  // Execute an SSE 2 instruction.
+  __asm__ __volatile__("psrldq $0, %%xmm0\n" : : : "xmm0");
+
+  if (cpu.has_sse3()) {
+    // Execute an SSE 3 instruction.
+    __asm__ __volatile__("addsubpd %%xmm0, %%xmm0\n" : : : "xmm0");
+  }
+
+  if (cpu.has_ssse3()) {
+    // Execute a Supplimental SSE 3 instruction.
+    __asm__ __volatile__("psignb %%xmm0, %%xmm0\n" : : : "xmm0");
+  }
+
+  if (cpu.has_sse41()) {
+    // Execute an SSE 4.1 instruction.
+    __asm__ __volatile__("pmuldq %%xmm0, %%xmm0\n" : : : "xmm0");
+  }
+
+  if (cpu.has_sse42()) {
+    // Execute an SSE 4.2 instruction.
+    __asm__ __volatile__("crc32 %%eax, %%eax\n" : : : "eax");
+  }
+
+  if (cpu.has_popcnt()) {
+    // Execute a POPCNT instruction.
+    __asm__ __volatile__("popcnt %%eax, %%eax\n" : : : "eax");
+  }
+
+  if (cpu.has_avx()) {
+    // Execute an AVX instruction.
+    __asm__ __volatile__("vzeroupper\n" : : : "xmm0");
+  }
+
+  if (cpu.has_avx2()) {
+    // Execute an AVX 2 instruction.
+    __asm__ __volatile__("vpunpcklbw %%ymm0, %%ymm0, %%ymm0\n" : : : "xmm0");
+  }
+
+// ClangCL 32/64 bit test.
+#elif defined(_WIN32) && defined(__clang__)
+
+  // Execute an MMX instruction.
+  __asm emms;
+
+  // Execute an SSE instruction.
+  __asm xorps xmm0, xmm0;
+
+  // Execute an SSE 2 instruction.
+  __asm psrldq xmm0, 0;
+
+  if (cpu.has_sse3()) {
+    // Execute an SSE 3 instruction.
+    __asm addsubpd xmm0, xmm0;
+  }
+
+  if (cpu.has_ssse3()) {
+    // Execute a Supplimental SSE 3 instruction.
+    __asm psignb xmm0, xmm0;
+  }
+
+  if (cpu.has_sse41()) {
+    // Execute an SSE 4.1 instruction.
+    __asm pmuldq xmm0, xmm0;
+  }
+
+  if (cpu.has_sse42()) {
+    // Execute an SSE 4.2 instruction.
+    __asm crc32 eax, eax;
+  }
+
+  if (cpu.has_popcnt()) {
+    // Execute a POPCNT instruction.
+    __asm popcnt eax, eax;
+  }
+
+// Visual C 2012 required for AVX.
+#if _MSC_VER >= 1700
+  if (cpu.has_avx()) {
+    // Execute an AVX instruction.
+    __asm vzeroupper;
+  }
+
+  if (cpu.has_avx2()) {
+    // Execute an AVX 2 instruction.
+    __asm vpunpcklbw ymm0, ymm0, ymm0
+  }
+#endif  // _MSC_VER >= 1700
+#endif  // !_WIN32
+}
diff --git a/third_party/chromium_base/mac_version.cc b/third_party/chromium_base/mac_version.cc
new file mode 100644
index 0000000..02692e0
--- /dev/null
+++ b/third_party/chromium_base/mac_version.cc
@@ -0,0 +1,94 @@
+// This is copied from chromium/src/base/mac/mac_util.mm and modified for goma.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "mac_version.h"
+
+#include <string.h>
+#include <string>
+#include <sys/utsname.h>
+
+#include <glog/logging.h>
+
+#include "string_piece.h"
+
+namespace {
+
+// Returns the running system's Darwin major version. Don't call this, it's
+// an implementation detail and its result is meant to be cached by
+// MacOSXMinorVersion.
+int DarwinMajorVersionInternal() {
+  // base::OperatingSystemVersionNumbers calls Gestalt, which is a
+  // higher-level operation than is needed. It might perform unnecessary
+  // operations. On 10.6, it was observed to be able to spawn threads (see
+  // http://crbug.com/53200). It might also read files or perform other
+  // blocking operations. Actually, nobody really knows for sure just what
+  // Gestalt might do, or what it might be taught to do in the future.
+  //
+  // uname, on the other hand, is implemented as a simple series of sysctl
+  // system calls to obtain the relevant data from the kernel. The data is
+  // compiled right into the kernel, so no threads or blocking or other
+  // funny business is necessary.
+
+  struct utsname uname_info;
+  if (uname(&uname_info) != 0) {
+    PLOG(ERROR) << "uname";
+    return 0;
+  }
+
+  if (strcmp(uname_info.sysname, "Darwin") != 0) {
+    DLOG(ERROR) << "unexpected uname sysname " << uname_info.sysname;
+    return 0;
+  }
+
+  int darwin_major_version = 0;
+  char* dot = strchr(uname_info.release, '.');
+  if (dot) {
+    std::string version_string(uname_info.release,
+                               dot - uname_info.release);
+    if (sscanf(version_string.c_str(), "%d", &darwin_major_version) != 1) {
+      dot = nullptr;
+    }
+  }
+
+  if (!dot) {
+    DLOG(ERROR) << "could not parse uname release " << uname_info.release;
+    return 0;
+  }
+
+  return darwin_major_version;
+}
+
+// Returns the running system's Mac OS X minor version. This is the |y| value
+// in 10.y or 10.y.z. Don't call this, it's an implementation detail and the
+// result is meant to be cached by MacOSXMinorVersion.
+int MacOSXMinorVersionInternal() {
+  int darwin_major_version = DarwinMajorVersionInternal();
+
+  // The Darwin major version is always 4 greater than the Mac OS X minor
+  // version for Darwin versions beginning with 6, corresponding to Mac OS X
+  // 10.2. Since this correspondence may change in the future, warn when
+  // encountering a version higher than anything seen before. Older Darwin
+  // versions, or versions that can't be determined, result in
+  // immediate death.
+  CHECK(darwin_major_version >= 6);
+  int mac_os_x_minor_version = darwin_major_version - 4;
+  DLOG_IF(WARNING, darwin_major_version > 16)
+      << "Assuming Darwin "
+      << darwin_major_version << " is Mac OS X 10."
+      << mac_os_x_minor_version;
+
+  return mac_os_x_minor_version;
+}
+
+}  // anonymous namespace
+
+namespace devtools_goma {
+
+int MacOSXMinorVersion() {
+  static int mac_os_x_minor_version = MacOSXMinorVersionInternal();
+  return mac_os_x_minor_version;
+}
+
+}  // namespace devtools_goma
diff --git a/third_party/chromium_base/mac_version.h b/third_party/chromium_base/mac_version.h
new file mode 100644
index 0000000..45b66f1
--- /dev/null
+++ b/third_party/chromium_base/mac_version.h
@@ -0,0 +1,17 @@
+// This is copied from chromium/src/base/mac/mac_util.h and modified for goma.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_MAC_MAC_VERSION_H_
+#define BASE_MAC_MAC_VERSION_H_
+
+namespace devtools_goma {
+
+// Returns the running system's Mac OS X minor version. This is the |y| value
+// in 10.y or 10.y.z.
+int MacOSXMinorVersion();
+
+}  // namespace devtools_goma
+
+#endif  // BASE_MAC_MAC_VERSION_H_
diff --git a/third_party/chromium_base/mac_version_unittest.cc b/third_party/chromium_base/mac_version_unittest.cc
new file mode 100644
index 0000000..1a538ff
--- /dev/null
+++ b/third_party/chromium_base/mac_version_unittest.cc
@@ -0,0 +1,13 @@
+// Copyright 2017 The Goma Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "mac_version.h"
+
+#include "gtest/gtest.h"
+
+TEST(MacVersion, MacOSXMinorVersion) {
+  // If MacOSXMinorVersion successfully worked,
+  // it should return value greater than or equal to 2.
+  EXPECT_GE(devtools_goma::MacOSXMinorVersion(), 2);
+}
diff --git a/third_party/chromium_base/platform_thread.cc b/third_party/chromium_base/platform_thread.cc
new file mode 100644
index 0000000..6aa5c01
--- /dev/null
+++ b/third_party/chromium_base/platform_thread.cc
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2011 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "platform_thread.h"
+
+#include "glog/logging.h"
+
+#if HAVE_CPU_PROFILER
+#include <gperftools/profiler.h>
+#endif
+
+namespace devtools_goma {
+
+#if defined (_WIN32)
+
+DWORD __stdcall ThreadFunc(void* params) {
+  PlatformThread::Delegate* delegate =
+      static_cast<PlatformThread::Delegate*>(params);
+  delegate->ThreadMain();
+  return 0;
+}
+
+// static
+bool PlatformThread::Create(Delegate* delegate,
+                            PlatformThreadHandle* thread_handle) {
+  CHECK(thread_handle);
+  *thread_handle = CreateThread(nullptr, 0, ThreadFunc, delegate, 0, nullptr);
+  if (!(*thread_handle)) {
+    return false;
+  }
+
+  return true;
+}
+
+// static
+void PlatformThread::Join(PlatformThreadHandle thread_handle) {
+  CHECK(thread_handle);
+  DWORD result = WaitForSingleObject(thread_handle, INFINITE);
+  CHECK(result == WAIT_OBJECT_0);
+  CloseHandle(thread_handle);
+}
+
+// static
+void PlatformThread::Sleep(int32_t duration_ms) {
+  ::Sleep(duration_ms);
+}
+
+#else
+
+void* ThreadFunc(void* params) {
+#if HAVE_CPU_PROFILER
+  ProfilerRegisterThread();
+#endif
+  PlatformThread::Delegate* delegate =
+      static_cast<PlatformThread::Delegate*>(params);
+  delegate->ThreadMain();
+  return nullptr;
+}
+
+// static
+bool PlatformThread::Create(Delegate* delegate,
+                            PlatformThreadHandle* thread_handle) {
+  CHECK(thread_handle);
+
+  bool success = false;
+  pthread_attr_t attributes;
+  pthread_attr_init(&attributes);
+  success = !pthread_create(thread_handle, &attributes, ThreadFunc, delegate);
+  PLOG_IF(ERROR, !success) << "pthread_create";
+  pthread_attr_destroy(&attributes);
+
+  return success;
+}
+
+// static
+void PlatformThread::Join(PlatformThreadHandle thread_handle) {
+  CHECK(thread_handle);
+  pthread_join(thread_handle, nullptr);
+}
+
+// static
+void PlatformThread::Sleep(int32_t duration_ms) {
+  struct timespec sleep_time, remaining;
+
+  // Contains the portion of duration_ms >= 1 sec.
+  sleep_time.tv_sec = duration_ms / 1000;
+  duration_ms -= sleep_time.tv_sec * 1000;
+
+  // Contains the portion of duration_ms < 1 sec.
+  sleep_time.tv_nsec = duration_ms * 1000 * 1000;  // nanoseconds.
+
+  while (nanosleep(&sleep_time, &remaining) == -1 && errno == EINTR)
+    sleep_time = remaining;
+}
+
+#endif  // _WIN32
+
+}  // namespace devtools_goma
diff --git a/third_party/chromium_base/platform_thread.h b/third_party/chromium_base/platform_thread.h
new file mode 100644
index 0000000..f1ce8d4
--- /dev/null
+++ b/third_party/chromium_base/platform_thread.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2011 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Simple platform thread implementation used to test our cross-platform locks.
+// This is a trimmed down version of Chromium base/threading/platform_thread.h.
+// Originated from sfntly.googlecode.com
+#ifndef DEVTOOLS_GOMA_BASE_PLATFORM_THREAD_H_
+#define DEVTOOLS_GOMA_BASE_PLATFORM_THREAD_H_
+
+#if defined (_WIN32)
+#include <windows.h>
+#else  // Assume pthread
+#include <errno.h>
+#include <pthread.h>
+#include <time.h>
+#endif  // if defined (_WIN32)
+
+#include <stdint.h>
+
+namespace devtools_goma {
+
+#if defined (_WIN32)
+typedef HANDLE PlatformThreadHandle;
+typedef DWORD PlatformThreadId;
+const PlatformThreadHandle kNullThreadHandle = NULL;
+
+inline PlatformThreadId GetCurrentThreadId() {
+  return ::GetCurrentThreadId();
+}
+inline PlatformThreadId GetThreadId(PlatformThreadHandle th) {
+  return ::GetThreadId(th);
+}
+inline bool THREAD_ID_IS_SELF(PlatformThreadId thread_id) {
+  return (thread_id == ::GetCurrentThreadId());
+}
+#else  // Assume pthread
+typedef pthread_t PlatformThreadHandle;
+typedef pthread_t PlatformThreadId;
+const PlatformThreadHandle kNullThreadHandle = 0;
+
+inline PlatformThreadId GetCurrentThreadId() {
+  return pthread_self();
+}
+inline PlatformThreadId GetThreadId(PlatformThreadHandle th) {
+  return th;
+}
+inline bool THREAD_ID_IS_SELF(PlatformThreadId thread_id) {
+  return pthread_equal(thread_id, pthread_self());
+}
+#endif
+
+class PlatformThread {
+ public:
+  class Delegate {
+   public:
+     virtual ~Delegate() {}
+     virtual void ThreadMain() = 0;
+  };
+
+  PlatformThread(const PlatformThread&) = delete;
+  void operator=(const PlatformThread&) = delete;
+
+  // Sleeps for the specified duration (units are milliseconds).
+  static void Sleep(int32_t duration_ms);
+
+  // Creates a new thread using default stack size.  Upon success,
+  // |*thread_handle| will be assigned a handle to the newly created thread,
+  // and |delegate|'s ThreadMain method will be executed on the newly created
+  // thread.
+  // NOTE: When you are done with the thread handle, you must call Join to
+  // release system resources associated with the thread.  You must ensure that
+  // the Delegate object outlives the thread.
+  static bool Create(Delegate* delegate, PlatformThreadHandle* thread_handle);
+
+  // Joins with a thread created via the Create function.  This function blocks
+  // the caller until the designated thread exits.  This will invalidate
+  // |thread_handle|.
+  static void Join(PlatformThreadHandle thread_handle);
+
+ private:
+  PlatformThread();
+};
+
+}  // namespace devtools_goma
+
+#endif  // DEVTOOLS_GOMA_BASE_PLATFORM_THREAD_H_
diff --git a/third_party/chromium_base/string_piece.cc b/third_party/chromium_base/string_piece.cc
new file mode 100644
index 0000000..97bdf06
--- /dev/null
+++ b/third_party/chromium_base/string_piece.cc
@@ -0,0 +1,217 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+// Copied from strings/stringpiece.cc with modifications
+
+#include "string_piece.h"
+
+#include <limits.h>
+
+#include <algorithm>
+#include <ostream>
+
+typedef StringPiece::size_type size_type;
+
+std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
+  o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
+  return o;
+}
+
+bool operator==(const StringPiece& x, const StringPiece& y) {
+  if (x.size() != y.size())
+    return false;
+
+  return StringPiece::wordmemcmp(x.data(), y.data(), x.size()) == 0;
+}
+
+void StringPiece::CopyToString(std::string* target) const {
+  target->assign(!empty() ? data() : "", size());
+}
+
+void StringPiece::AppendToString(std::string* target) const {
+  if (!empty())
+    target->append(data(), size());
+}
+
+size_type StringPiece::copy(char* buf, size_type n, size_type pos) const {
+  size_type ret = std::min(length_ - pos, n);
+  memcpy(buf, ptr_ + pos, ret);
+  return ret;
+}
+
+size_type StringPiece::find(const StringPiece& s, size_type pos) const {
+  if (pos > length_)
+    return npos;
+
+  const char* result = std::search(ptr_ + pos, ptr_ + length_,
+                                   s.ptr_, s.ptr_ + s.length_);
+  const size_type xpos = result - ptr_;
+  return xpos + s.length_ <= length_ ? xpos : npos;
+}
+
+size_type StringPiece::find(char c, size_type pos) const {
+  if (pos >= length_)
+    return npos;
+
+  const char* result = std::find(ptr_ + pos, ptr_ + length_, c);
+  return result != ptr_ + length_ ? static_cast<size_t>(result - ptr_) : npos;
+}
+
+size_type StringPiece::rfind(const StringPiece& s, size_type pos) const {
+  if (length_ < s.length_)
+    return npos;
+
+  if (s.empty())
+    return std::min(length_, pos);
+
+  const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_;
+  const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_);
+  return result != last ? static_cast<size_t>(result - ptr_) : npos;
+}
+
+size_type StringPiece::rfind(char c, size_type pos) const {
+  if (length_ == 0)
+    return npos;
+
+  for (size_type i = std::min(pos, length_ - 1); ; --i) {
+    if (ptr_[i] == c)
+      return i;
+    if (i == 0)
+      break;
+  }
+  return npos;
+}
+
+// For each character in characters_wanted, sets the index corresponding
+// to the ASCII code of that character to 1 in table.  This is used by
+// the find_.*_of methods below to tell whether or not a character is in
+// the lookup table in constant time.
+// The argument `table' must be an array that is large enough to hold all
+// the possible values of an unsigned char.  Thus it should be be declared
+// as follows:
+//   bool table[UCHAR_MAX + 1]
+static inline void BuildLookupTable(const StringPiece& characters_wanted,
+                                    bool* table) {
+  const size_type length = characters_wanted.length();
+  const char* const data = characters_wanted.data();
+  for (size_type i = 0; i < length; ++i) {
+    table[static_cast<unsigned char>(data[i])] = true;
+  }
+}
+
+size_type StringPiece::find_first_of(const StringPiece& s,
+                                     size_type pos) const {
+  if (length_ == 0 || s.length_ == 0)
+    return npos;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.length_ == 1)
+    return find_first_of(s.ptr_[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (size_type i = pos; i < length_; ++i) {
+    if (lookup[static_cast<unsigned char>(ptr_[i])]) {
+      return i;
+    }
+  }
+  return npos;
+}
+
+size_type StringPiece::find_first_not_of(const StringPiece& s,
+                                         size_type pos) const {
+  if (length_ == 0)
+    return npos;
+
+  if (s.length_ == 0)
+    return 0;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.length_ == 1)
+    return find_first_not_of(s.ptr_[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (size_type i = pos; i < length_; ++i) {
+    if (!lookup[static_cast<unsigned char>(ptr_[i])]) {
+      return i;
+    }
+  }
+  return npos;
+}
+
+size_type StringPiece::find_first_not_of(char c, size_type pos) const {
+  if (length_ == 0)
+    return npos;
+
+  for (; pos < length_; ++pos) {
+    if (ptr_[pos] != c) {
+      return pos;
+    }
+  }
+  return npos;
+}
+
+size_type StringPiece::find_last_of(const StringPiece& s, size_type pos) const {
+  if (length_ == 0 || s.length_ == 0)
+    return npos;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.length_ == 1)
+    return find_last_of(s.ptr_[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (size_type i = std::min(pos, length_ - 1); ; --i) {
+    if (lookup[static_cast<unsigned char>(ptr_[i])])
+      return i;
+    if (i == 0)
+      break;
+  }
+  return npos;
+}
+
+size_type StringPiece::find_last_not_of(const StringPiece& s,
+                                        size_type pos) const {
+  if (length_ == 0)
+    return npos;
+
+  size_type i = std::min(pos, length_ - 1);
+  if (s.length_ == 0)
+    return i;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.length_ == 1)
+    return find_last_not_of(s.ptr_[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (; ; --i) {
+    if (!lookup[static_cast<unsigned char>(ptr_[i])])
+      return i;
+    if (i == 0)
+      break;
+  }
+  return npos;
+}
+
+size_type StringPiece::find_last_not_of(char c, size_type pos) const {
+  if (length_ == 0)
+    return npos;
+
+  for (size_type i = std::min(pos, length_ - 1); ; --i) {
+    if (ptr_[i] != c)
+      return i;
+    if (i == 0)
+      break;
+  }
+  return npos;
+}
+
+StringPiece StringPiece::substr(size_type pos, size_type n) const {
+  if (pos > length_) pos = length_;
+  if (n > length_ - pos) n = length_ - pos;
+  return StringPiece(ptr_ + pos, n);
+}
+
+const StringPiece::size_type StringPiece::npos = size_type(-1);
diff --git a/third_party/chromium_base/string_piece.h b/third_party/chromium_base/string_piece.h
new file mode 100644
index 0000000..0a5b3bf
--- /dev/null
+++ b/third_party/chromium_base/string_piece.h
@@ -0,0 +1,197 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+// Copied from strings/stringpiece.h with modifications
+//
+// A string-like object that points to a sized piece of memory.
+//
+// Functions or methods may use const StringPiece& parameters to accept either
+// a "const char*" or a "string" value that will be implicitly converted to
+// a StringPiece.  The implicit conversion means that it is often appropriate
+// to include this .h file in other files rather than forward-declaring
+// StringPiece as would be appropriate.
+//
+// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+// conversions from "const char*" to "string" and back again.
+//
+
+#ifndef BASE_STRINGS_STRING_PIECE_H_
+#define BASE_STRINGS_STRING_PIECE_H_
+#pragma once
+
+#include <stddef.h>
+#include <string.h>
+
+#include <algorithm>
+#include <iosfwd>
+#include <string>
+
+class StringPiece {
+ public:
+  typedef std::char_traits<char> traits_type;
+  typedef size_t size_type;
+
+ private:
+  const char*   ptr_;
+  size_type     length_;
+
+ public:
+  // We provide non-explicit singleton constructors so users can pass
+  // in a "const char*" or a "string" wherever a "StringPiece" is
+  // expected.
+  StringPiece() : ptr_(NULL), length_(0) { }
+  StringPiece(const char* str)  // NOLINT(runtime/explicit)
+    : ptr_(str), length_((str == NULL) ? 0 : strlen(str)) { }
+  StringPiece(const std::string& str)  // NOLINT(runtime/explicit)
+    : ptr_(str.data()), length_(str.size()) { }
+  StringPiece(const char* offset, size_type len)
+    : ptr_(offset), length_(len) { }
+
+  // data() may return a pointer to a buffer with embedded NULs, and the
+  // returned buffer may or may not be null terminated.  Therefore it is
+  // typically a mistake to pass data() to a routine that expects a NUL
+  // terminated string.
+  const char* data() const { return ptr_; }
+  size_type size() const { return length_; }
+  size_type length() const { return length_; }
+  bool empty() const { return length_ == 0; }
+
+  void set(const char* data, size_type len) {
+    ptr_ = data;
+    length_ = len;
+  }
+  void set(const char* str) {
+    ptr_ = str;
+    length_ = str ? strlen(str) : 0;
+  }
+  void set(const void* data, size_type len) {
+    ptr_ = reinterpret_cast<const char*>(data);
+    length_ = len;
+  }
+
+  char operator[](size_type i) const { return ptr_[i]; }
+
+  void remove_prefix(size_type n) {
+    ptr_ += n;
+    length_ -= n;
+  }
+
+  void remove_suffix(size_type n) {
+    length_ -= n;
+  }
+
+  int compare(const StringPiece& x) const {
+    int r = wordmemcmp(ptr_, x.ptr_, std::min(length_, x.length_));
+    if (r == 0) {
+      if (length_ < x.length_) r = -1;
+      else if (length_ > x.length_) r = +1;
+    }
+    return r;
+  }
+
+  // Explicit string conversion operator.
+  template <class A>
+  explicit operator std::basic_string<char, traits_type, A>() const {
+    if (!data()) return {};
+    return std::basic_string<char, traits_type, A>(data(), size());
+  }
+
+  // to_string was deprecated; use std::string(sp) instead.
+  // as_string was deprecated; use std::string(sp) instead.
+
+  void CopyToString(std::string* target) const;
+  void AppendToString(std::string* target) const;
+
+  // standard STL container boilerplate
+  typedef char value_type;
+  typedef const char* pointer;
+  typedef const char& reference;
+  typedef const char& const_reference;
+  typedef ptrdiff_t difference_type;
+  static const size_type npos;
+  typedef const char* const_iterator;
+  typedef const char* iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+  iterator begin() const { return ptr_; }
+  iterator end() const { return ptr_ + length_; }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(ptr_ + length_);
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(ptr_);
+  }
+
+  size_type max_size() const { return length_; }
+  size_type capacity() const { return length_; }
+
+  size_type copy(char* buf, size_type n, size_type pos = 0) const;
+
+  size_type find(const StringPiece& s, size_type pos = 0) const;
+  size_type find(char c, size_type pos = 0) const;
+  size_type rfind(const StringPiece& s, size_type pos = npos) const;
+  size_type rfind(char c, size_type pos = npos) const;
+
+  size_type find_first_of(const StringPiece& s, size_type pos = 0) const;
+  size_type find_first_of(char c, size_type pos = 0) const {
+    return find(c, pos);
+  }
+  size_type find_first_not_of(const StringPiece& s, size_type pos = 0) const;
+  size_type find_first_not_of(char c, size_type pos = 0) const;
+  size_type find_last_of(const StringPiece& s, size_type pos = npos) const;
+  size_type find_last_of(char c, size_type pos = npos) const {
+    return rfind(c, pos);
+  }
+  size_type find_last_not_of(const StringPiece& s, size_type pos = npos) const;
+  size_type find_last_not_of(char c, size_type pos = npos) const;
+
+  StringPiece substr(size_type pos, size_type n = npos) const;
+
+  static int wordmemcmp(const char* p, const char* p2, size_type N) {
+    return memcmp(p, p2, N);
+  }
+};
+
+bool operator==(const StringPiece& x, const StringPiece& y);
+
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+  return !(x == y);
+}
+
+inline bool operator<(const StringPiece& x, const StringPiece& y) {
+  const int r = StringPiece::wordmemcmp(x.data(), y.data(),
+                                        std::min(x.size(), y.size()));
+  return ((r < 0) || ((r == 0) && (x.size() < y.size())));
+}
+
+inline bool operator>(const StringPiece& x, const StringPiece& y) {
+  return y < x;
+}
+
+inline bool operator<=(const StringPiece& x, const StringPiece& y) {
+  return !(x > y);
+}
+
+inline bool operator>=(const StringPiece& x, const StringPiece& y) {
+  return !(x < y);
+}
+
+// allow StringPiece to be logged (needed for unit testing).
+extern std::ostream& operator<<(std::ostream& o, const StringPiece& piece);
+
+namespace absl {
+
+using string_view = StringPiece;
+
+// Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`.
+// Provided because std::string_view::substr throws if `pos > size()`,
+// to support b/37991613.
+inline string_view ClippedSubstr(string_view s, size_t pos,
+                                 size_t n = string_view::npos) {
+  pos = std::min(pos, static_cast<size_t>(s.size()));
+  return s.substr(pos, n);
+}
+
+}  // namespace absl
+
+#endif  // BASE_STRINGS_STRING_PIECE_H_
diff --git a/third_party/chromium_base/string_piece_unittest.cc b/third_party/chromium_base/string_piece_unittest.cc
new file mode 100644
index 0000000..62584c9
--- /dev/null
+++ b/third_party/chromium_base/string_piece_unittest.cc
@@ -0,0 +1,517 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+
+#include "string_piece.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+TEST(StringPieceTest, CheckComparisonOperators) {
+#define CMP_Y(op, x, y)                                               \
+  ASSERT_TRUE( (StringPiece((x)) op StringPiece((y))));               \
+  ASSERT_TRUE( (StringPiece((x)).compare(StringPiece((y))) op 0))
+
+#define CMP_N(op, x, y)                                          \
+  ASSERT_FALSE(StringPiece((x)) op StringPiece((y)));               \
+  ASSERT_FALSE(StringPiece((x)).compare(StringPiece((y))) op 0)
+
+  CMP_Y(==, "",   "");
+  CMP_Y(==, "a",  "a");
+  CMP_Y(==, "aa", "aa");
+  CMP_N(==, "a",  "");
+  CMP_N(==, "",   "a");
+  CMP_N(==, "a",  "b");
+  CMP_N(==, "a",  "aa");
+  CMP_N(==, "aa", "a");
+
+  CMP_N(!=, "",   "");
+  CMP_N(!=, "a",  "a");
+  CMP_N(!=, "aa", "aa");
+  CMP_Y(!=, "a",  "");
+  CMP_Y(!=, "",   "a");
+  CMP_Y(!=, "a",  "b");
+  CMP_Y(!=, "a",  "aa");
+  CMP_Y(!=, "aa", "a");
+
+  CMP_Y(<, "a",  "b");
+  CMP_Y(<, "a",  "aa");
+  CMP_Y(<, "aa", "b");
+  CMP_Y(<, "aa", "bb");
+  CMP_N(<, "a",  "a");
+  CMP_N(<, "b",  "a");
+  CMP_N(<, "aa", "a");
+  CMP_N(<, "b",  "aa");
+  CMP_N(<, "bb", "aa");
+
+  CMP_Y(<=, "a",  "a");
+  CMP_Y(<=, "a",  "b");
+  CMP_Y(<=, "a",  "aa");
+  CMP_Y(<=, "aa", "b");
+  CMP_Y(<=, "aa", "bb");
+  CMP_N(<=, "b",  "a");
+  CMP_N(<=, "aa", "a");
+  CMP_N(<=, "b",  "aa");
+  CMP_N(<=, "bb", "aa");
+
+  CMP_N(>=, "a",  "b");
+  CMP_N(>=, "a",  "aa");
+  CMP_N(>=, "aa", "b");
+  CMP_N(>=, "aa", "bb");
+  CMP_Y(>=, "a",  "a");
+  CMP_Y(>=, "b",  "a");
+  CMP_Y(>=, "aa", "a");
+  CMP_Y(>=, "b",  "aa");
+  CMP_Y(>=, "bb", "aa");
+
+  CMP_N(>, "a",  "a");
+  CMP_N(>, "a",  "b");
+  CMP_N(>, "a",  "aa");
+  CMP_N(>, "aa", "b");
+  CMP_N(>, "aa", "bb");
+  CMP_Y(>, "b",  "a");
+  CMP_Y(>, "aa", "a");
+  CMP_Y(>, "b",  "aa");
+  CMP_Y(>, "bb", "aa");
+
+  std::string x;
+  for (int i = 0; i < 256; i++) {
+    x += 'a';
+    std::string y = x;
+    CMP_Y(==, x, y);
+    for (int j = 0; j < i; j++) {
+      std::string z = x;
+      z[j] = 'b';       // Differs in position 'j'
+      CMP_N(==, x, z);
+    }
+  }
+
+#undef CMP_Y
+#undef CMP_N
+}
+
+TEST(StringPieceTest, CheckSTL) {
+  StringPiece a("abcdefghijklmnopqrstuvwxyz");
+  StringPiece b("abc");
+  StringPiece c("xyz");
+  StringPiece d("foobar");
+  StringPiece e;
+  std::string temp("123");
+  temp += '\0';
+  temp += "456";
+  StringPiece f(temp);
+
+  ASSERT_EQ(a[6], 'g');
+  ASSERT_EQ(b[0], 'a');
+  ASSERT_EQ(c[2], 'z');
+  ASSERT_EQ(f[3], '\0');
+  ASSERT_EQ(f[5], '5');
+
+  ASSERT_EQ(*d.data(), 'f');
+  ASSERT_EQ(d.data()[5], 'r');
+  ASSERT_TRUE(e.data() == nullptr);
+
+  ASSERT_EQ(*a.begin(), 'a');
+  ASSERT_EQ(*(b.begin() + 2), 'c');
+  ASSERT_EQ(*(c.end() - 1), 'z');
+
+  ASSERT_EQ(*a.rbegin(), 'z');
+  ASSERT_EQ(*(b.rbegin() + 2), 'a');
+  ASSERT_EQ(*(c.rend() - 1), 'x');
+  ASSERT_TRUE(a.rbegin() + 26 == a.rend());
+
+  ASSERT_EQ(a.size(), 26U);
+  ASSERT_EQ(b.size(), 3U);
+  ASSERT_EQ(c.size(), 3U);
+  ASSERT_EQ(d.size(), 6U);
+  ASSERT_EQ(e.size(), 0U);
+  ASSERT_EQ(f.size(), 7U);
+
+  ASSERT_TRUE(!d.empty());
+  ASSERT_TRUE(d.begin() != d.end());
+  ASSERT_TRUE(d.begin() + 6 == d.end());
+
+  ASSERT_TRUE(e.empty());
+  ASSERT_TRUE(e.begin() == e.end());
+
+  d = StringPiece();
+  ASSERT_EQ(d.size(), 0U);
+  ASSERT_TRUE(d.empty());
+  ASSERT_TRUE(d.data() == nullptr);
+  ASSERT_TRUE(d.begin() == d.end());
+
+  ASSERT_GE(a.max_size(), a.capacity());
+  ASSERT_GE(a.capacity(), a.size());
+
+  char buf[4] = { '%', '%', '%', '%' };
+  ASSERT_EQ(a.copy(buf, 4), 4U);
+  ASSERT_EQ(buf[0], a[0]);
+  ASSERT_EQ(buf[1], a[1]);
+  ASSERT_EQ(buf[2], a[2]);
+  ASSERT_EQ(buf[3], a[3]);
+  ASSERT_EQ(a.copy(buf, 3, 7), 3U);
+  ASSERT_EQ(buf[0], a[7]);
+  ASSERT_EQ(buf[1], a[8]);
+  ASSERT_EQ(buf[2], a[9]);
+  ASSERT_EQ(buf[3], a[3]);
+  ASSERT_EQ(c.copy(buf, 99), 3U);
+  ASSERT_EQ(buf[0], c[0]);
+  ASSERT_EQ(buf[1], c[1]);
+  ASSERT_EQ(buf[2], c[2]);
+  ASSERT_EQ(buf[3], a[3]);
+
+  ASSERT_EQ(StringPiece::npos, std::string::npos);
+
+  ASSERT_EQ(a.find(b), 0U);
+  ASSERT_EQ(a.find(b, 1), StringPiece::npos);
+  ASSERT_EQ(a.find(c), 23U);
+  ASSERT_EQ(a.find(c, 9), 23U);
+  ASSERT_EQ(a.find(c, StringPiece::npos), StringPiece::npos);
+  ASSERT_EQ(b.find(c), StringPiece::npos);
+  ASSERT_EQ(b.find(c, StringPiece::npos), StringPiece::npos);
+  ASSERT_EQ(a.find(d), 0U);
+  ASSERT_EQ(a.find(e), 0U);
+  ASSERT_EQ(a.find(d, 12), 12U);
+  ASSERT_EQ(a.find(e, 17), 17U);
+  StringPiece g("xx not found bb");
+  ASSERT_EQ(a.find(g), StringPiece::npos);
+  // empty string nonsense
+  ASSERT_EQ(d.find(b), StringPiece::npos);
+  ASSERT_EQ(e.find(b), StringPiece::npos);
+  ASSERT_EQ(d.find(b, 4), StringPiece::npos);
+  ASSERT_EQ(e.find(b, 7), StringPiece::npos);
+
+  size_t empty_search_pos = std::string().find(std::string());
+  ASSERT_EQ(d.find(d), empty_search_pos);
+  ASSERT_EQ(d.find(e), empty_search_pos);
+  ASSERT_EQ(e.find(d), empty_search_pos);
+  ASSERT_EQ(e.find(e), empty_search_pos);
+  ASSERT_EQ(d.find(d, 4), std::string().find(std::string(), 4));
+  ASSERT_EQ(d.find(e, 4), std::string().find(std::string(), 4));
+  ASSERT_EQ(e.find(d, 4), std::string().find(std::string(), 4));
+  ASSERT_EQ(e.find(e, 4), std::string().find(std::string(), 4));
+
+  ASSERT_EQ(a.find('a'), 0U);
+  ASSERT_EQ(a.find('c'), 2U);
+  ASSERT_EQ(a.find('z'), 25U);
+  ASSERT_EQ(a.find('$'), StringPiece::npos);
+  ASSERT_EQ(a.find('\0'), StringPiece::npos);
+  ASSERT_EQ(f.find('\0'), 3U);
+  ASSERT_EQ(f.find('3'), 2U);
+  ASSERT_EQ(f.find('5'), 5U);
+  ASSERT_EQ(g.find('o'), 4U);
+  ASSERT_EQ(g.find('o', 4), 4U);
+  ASSERT_EQ(g.find('o', 5), 8U);
+  ASSERT_EQ(a.find('b', 5), StringPiece::npos);
+  // empty string nonsense
+  ASSERT_EQ(d.find('\0'), StringPiece::npos);
+  ASSERT_EQ(e.find('\0'), StringPiece::npos);
+  ASSERT_EQ(d.find('\0', 4), StringPiece::npos);
+  ASSERT_EQ(e.find('\0', 7), StringPiece::npos);
+  ASSERT_EQ(d.find('x'), StringPiece::npos);
+  ASSERT_EQ(e.find('x'), StringPiece::npos);
+  ASSERT_EQ(d.find('x', 4), StringPiece::npos);
+  ASSERT_EQ(e.find('x', 7), StringPiece::npos);
+
+  ASSERT_EQ(a.rfind(b), 0U);
+  ASSERT_EQ(a.rfind(b, 1), 0U);
+  ASSERT_EQ(a.rfind(c), 23U);
+  ASSERT_EQ(a.rfind(c, 22U), StringPiece::npos);
+  ASSERT_EQ(a.rfind(c, 1U), StringPiece::npos);
+  ASSERT_EQ(a.rfind(c, 0U), StringPiece::npos);
+  ASSERT_EQ(b.rfind(c), StringPiece::npos);
+  ASSERT_EQ(b.rfind(c, 0U), StringPiece::npos);
+  ASSERT_EQ(a.rfind(d), (size_t) std::string(a).rfind(std::string()));
+  ASSERT_EQ(a.rfind(e), std::string(a).rfind(std::string()));
+  ASSERT_EQ(a.rfind(d, 12), 12U);
+  ASSERT_EQ(a.rfind(e, 17), 17U);
+  ASSERT_EQ(a.rfind(g), StringPiece::npos);
+  ASSERT_EQ(d.rfind(b), StringPiece::npos);
+  ASSERT_EQ(e.rfind(b), StringPiece::npos);
+  ASSERT_EQ(d.rfind(b, 4), StringPiece::npos);
+  ASSERT_EQ(e.rfind(b, 7), StringPiece::npos);
+  // empty string nonsense
+  ASSERT_EQ(d.rfind(d, 4), std::string().rfind(std::string()));
+  ASSERT_EQ(e.rfind(d, 7), std::string().rfind(std::string()));
+  ASSERT_EQ(d.rfind(e, 4), std::string().rfind(std::string()));
+  ASSERT_EQ(e.rfind(e, 7), std::string().rfind(std::string()));
+  ASSERT_EQ(d.rfind(d), std::string().rfind(std::string()));
+  ASSERT_EQ(e.rfind(d), std::string().rfind(std::string()));
+  ASSERT_EQ(d.rfind(e), std::string().rfind(std::string()));
+  ASSERT_EQ(e.rfind(e), std::string().rfind(std::string()));
+
+  ASSERT_EQ(g.rfind('o'), 8U);
+  ASSERT_EQ(g.rfind('q'), StringPiece::npos);
+  ASSERT_EQ(g.rfind('o', 8), 8U);
+  ASSERT_EQ(g.rfind('o', 7), 4U);
+  ASSERT_EQ(g.rfind('o', 3), StringPiece::npos);
+  ASSERT_EQ(f.rfind('\0'), 3U);
+  ASSERT_EQ(f.rfind('\0', 12), 3U);
+  ASSERT_EQ(f.rfind('3'), 2U);
+  ASSERT_EQ(f.rfind('5'), 5U);
+  // empty string nonsense
+  ASSERT_EQ(d.rfind('o'), StringPiece::npos);
+  ASSERT_EQ(e.rfind('o'), StringPiece::npos);
+  ASSERT_EQ(d.rfind('o', 4), StringPiece::npos);
+  ASSERT_EQ(e.rfind('o', 7), StringPiece::npos);
+
+  ASSERT_EQ(a.find_first_of(b), 0U);
+  ASSERT_EQ(a.find_first_of(b, 0), 0U);
+  ASSERT_EQ(a.find_first_of(b, 1), 1U);
+  ASSERT_EQ(a.find_first_of(b, 2), 2U);
+  ASSERT_EQ(a.find_first_of(b, 3), StringPiece::npos);
+  ASSERT_EQ(a.find_first_of(c), 23U);
+  ASSERT_EQ(a.find_first_of(c, 23), 23U);
+  ASSERT_EQ(a.find_first_of(c, 24), 24U);
+  ASSERT_EQ(a.find_first_of(c, 25), 25U);
+  ASSERT_EQ(a.find_first_of(c, 26), StringPiece::npos);
+  ASSERT_EQ(g.find_first_of(b), 13U);
+  ASSERT_EQ(g.find_first_of(c), 0U);
+  ASSERT_EQ(a.find_first_of(f), StringPiece::npos);
+  ASSERT_EQ(f.find_first_of(a), StringPiece::npos);
+  // empty string nonsense
+  ASSERT_EQ(a.find_first_of(d), StringPiece::npos);
+  ASSERT_EQ(a.find_first_of(e), StringPiece::npos);
+  ASSERT_EQ(d.find_first_of(b), StringPiece::npos);
+  ASSERT_EQ(e.find_first_of(b), StringPiece::npos);
+  ASSERT_EQ(d.find_first_of(d), StringPiece::npos);
+  ASSERT_EQ(e.find_first_of(d), StringPiece::npos);
+  ASSERT_EQ(d.find_first_of(e), StringPiece::npos);
+  ASSERT_EQ(e.find_first_of(e), StringPiece::npos);
+
+  ASSERT_EQ(a.find_first_not_of(b), 3U);
+  ASSERT_EQ(a.find_first_not_of(c), 0U);
+  ASSERT_EQ(b.find_first_not_of(a), StringPiece::npos);
+  ASSERT_EQ(c.find_first_not_of(a), StringPiece::npos);
+  ASSERT_EQ(f.find_first_not_of(a), 0U);
+  ASSERT_EQ(a.find_first_not_of(f), 0U);
+  ASSERT_EQ(a.find_first_not_of(d), 0U);
+  ASSERT_EQ(a.find_first_not_of(e), 0U);
+  // empty string nonsense
+  ASSERT_EQ(d.find_first_not_of(a), StringPiece::npos);
+  ASSERT_EQ(e.find_first_not_of(a), StringPiece::npos);
+  ASSERT_EQ(d.find_first_not_of(d), StringPiece::npos);
+  ASSERT_EQ(e.find_first_not_of(d), StringPiece::npos);
+  ASSERT_EQ(d.find_first_not_of(e), StringPiece::npos);
+  ASSERT_EQ(e.find_first_not_of(e), StringPiece::npos);
+
+  StringPiece h("====");
+  ASSERT_EQ(h.find_first_not_of('='), StringPiece::npos);
+  ASSERT_EQ(h.find_first_not_of('=', 3), StringPiece::npos);
+  ASSERT_EQ(h.find_first_not_of('\0'), 0U);
+  ASSERT_EQ(g.find_first_not_of('x'), 2U);
+  ASSERT_EQ(f.find_first_not_of('\0'), 0U);
+  ASSERT_EQ(f.find_first_not_of('\0', 3), 4U);
+  ASSERT_EQ(f.find_first_not_of('\0', 2), 2U);
+  // empty string nonsense
+  ASSERT_EQ(d.find_first_not_of('x'), StringPiece::npos);
+  ASSERT_EQ(e.find_first_not_of('x'), StringPiece::npos);
+  ASSERT_EQ(d.find_first_not_of('\0'), StringPiece::npos);
+  ASSERT_EQ(e.find_first_not_of('\0'), StringPiece::npos);
+
+  //  StringPiece g("xx not found bb");
+  StringPiece i("56");
+  ASSERT_EQ(h.find_last_of(a), StringPiece::npos);
+  ASSERT_EQ(g.find_last_of(a), g.size()-1);
+  ASSERT_EQ(a.find_last_of(b), 2U);
+  ASSERT_EQ(a.find_last_of(c), a.size()-1);
+  ASSERT_EQ(f.find_last_of(i), 6U);
+  ASSERT_EQ(a.find_last_of('a'), 0U);
+  ASSERT_EQ(a.find_last_of('b'), 1U);
+  ASSERT_EQ(a.find_last_of('z'), 25U);
+  ASSERT_EQ(a.find_last_of('a', 5), 0U);
+  ASSERT_EQ(a.find_last_of('b', 5), 1U);
+  ASSERT_EQ(a.find_last_of('b', 0), StringPiece::npos);
+  ASSERT_EQ(a.find_last_of('z', 25), 25U);
+  ASSERT_EQ(a.find_last_of('z', 24), StringPiece::npos);
+  ASSERT_EQ(f.find_last_of(i, 5), 5U);
+  ASSERT_EQ(f.find_last_of(i, 6), 6U);
+  ASSERT_EQ(f.find_last_of(a, 4), StringPiece::npos);
+  // empty string nonsense
+  ASSERT_EQ(f.find_last_of(d), StringPiece::npos);
+  ASSERT_EQ(f.find_last_of(e), StringPiece::npos);
+  ASSERT_EQ(f.find_last_of(d, 4), StringPiece::npos);
+  ASSERT_EQ(f.find_last_of(e, 4), StringPiece::npos);
+  ASSERT_EQ(d.find_last_of(d), StringPiece::npos);
+  ASSERT_EQ(d.find_last_of(e), StringPiece::npos);
+  ASSERT_EQ(e.find_last_of(d), StringPiece::npos);
+  ASSERT_EQ(e.find_last_of(e), StringPiece::npos);
+  ASSERT_EQ(d.find_last_of(f), StringPiece::npos);
+  ASSERT_EQ(e.find_last_of(f), StringPiece::npos);
+  ASSERT_EQ(d.find_last_of(d, 4), StringPiece::npos);
+  ASSERT_EQ(d.find_last_of(e, 4), StringPiece::npos);
+  ASSERT_EQ(e.find_last_of(d, 4), StringPiece::npos);
+  ASSERT_EQ(e.find_last_of(e, 4), StringPiece::npos);
+  ASSERT_EQ(d.find_last_of(f, 4), StringPiece::npos);
+  ASSERT_EQ(e.find_last_of(f, 4), StringPiece::npos);
+
+  ASSERT_EQ(a.find_last_not_of(b), a.size()-1);
+  ASSERT_EQ(a.find_last_not_of(c), 22U);
+  ASSERT_EQ(b.find_last_not_of(a), StringPiece::npos);
+  ASSERT_EQ(b.find_last_not_of(b), StringPiece::npos);
+  ASSERT_EQ(f.find_last_not_of(i), 4U);
+  ASSERT_EQ(a.find_last_not_of(c, 24), 22U);
+  ASSERT_EQ(a.find_last_not_of(b, 3), 3U);
+  ASSERT_EQ(a.find_last_not_of(b, 2), StringPiece::npos);
+  // empty string nonsense
+  ASSERT_EQ(f.find_last_not_of(d), f.size()-1);
+  ASSERT_EQ(f.find_last_not_of(e), f.size()-1);
+  ASSERT_EQ(f.find_last_not_of(d, 4), 4U);
+  ASSERT_EQ(f.find_last_not_of(e, 4), 4U);
+  ASSERT_EQ(d.find_last_not_of(d), StringPiece::npos);
+  ASSERT_EQ(d.find_last_not_of(e), StringPiece::npos);
+  ASSERT_EQ(e.find_last_not_of(d), StringPiece::npos);
+  ASSERT_EQ(e.find_last_not_of(e), StringPiece::npos);
+  ASSERT_EQ(d.find_last_not_of(f), StringPiece::npos);
+  ASSERT_EQ(e.find_last_not_of(f), StringPiece::npos);
+  ASSERT_EQ(d.find_last_not_of(d, 4), StringPiece::npos);
+  ASSERT_EQ(d.find_last_not_of(e, 4), StringPiece::npos);
+  ASSERT_EQ(e.find_last_not_of(d, 4), StringPiece::npos);
+  ASSERT_EQ(e.find_last_not_of(e, 4), StringPiece::npos);
+  ASSERT_EQ(d.find_last_not_of(f, 4), StringPiece::npos);
+  ASSERT_EQ(e.find_last_not_of(f, 4), StringPiece::npos);
+
+  ASSERT_EQ(h.find_last_not_of('x'), h.size() - 1);
+  ASSERT_EQ(h.find_last_not_of('='), StringPiece::npos);
+  ASSERT_EQ(b.find_last_not_of('c'), 1U);
+  ASSERT_EQ(h.find_last_not_of('x', 2), 2U);
+  ASSERT_EQ(h.find_last_not_of('=', 2), StringPiece::npos);
+  ASSERT_EQ(b.find_last_not_of('b', 1), 0U);
+  // empty string nonsense
+  ASSERT_EQ(d.find_last_not_of('x'), StringPiece::npos);
+  ASSERT_EQ(e.find_last_not_of('x'), StringPiece::npos);
+  ASSERT_EQ(d.find_last_not_of('\0'), StringPiece::npos);
+  ASSERT_EQ(e.find_last_not_of('\0'), StringPiece::npos);
+
+  ASSERT_EQ(a.substr(0, 3), b);
+  ASSERT_EQ(a.substr(23), c);
+  ASSERT_EQ(a.substr(23, 3), c);
+  ASSERT_EQ(a.substr(23, 99), c);
+  ASSERT_EQ(a.substr(0), a);
+  ASSERT_EQ(a.substr(3, 2), "de");
+  // empty string nonsense
+  ASSERT_EQ(a.substr(99, 2), e);
+  ASSERT_EQ(d.substr(99), e);
+  ASSERT_EQ(d.substr(0, 99), e);
+  ASSERT_EQ(d.substr(99, 99), e);
+}
+
+TEST(StringPiece, TruncSubstr) {
+  const StringPiece hi("hi");
+  EXPECT_EQ("", absl::ClippedSubstr(hi, 0, 0));
+  EXPECT_EQ("h", absl::ClippedSubstr(hi, 0, 1));
+  EXPECT_EQ("hi", absl::ClippedSubstr(hi, 0));
+  EXPECT_EQ("i", absl::ClippedSubstr(hi, 1));
+  EXPECT_EQ("", absl::ClippedSubstr(hi, 2));
+  EXPECT_EQ("", absl::ClippedSubstr(hi, 3));  // truncation
+  EXPECT_EQ("", absl::ClippedSubstr(hi, 3, 2));  // truncation
+}
+
+TEST(StringPieceTest, CheckCustom) {
+  StringPiece a("foobar");
+  std::string s1("123");
+  s1 += '\0';
+  s1 += "456";
+  StringPiece b(s1);
+  StringPiece e;
+  std::string s2;
+
+  // CopyToString
+  a.CopyToString(&s2);
+  ASSERT_EQ(s2.size(), 6U);
+  ASSERT_EQ(s2, "foobar");
+  b.CopyToString(&s2);
+  ASSERT_EQ(s2.size(), 7U);
+  ASSERT_EQ(s1, s2);
+  e.CopyToString(&s2);
+  ASSERT_TRUE(s2.empty());
+
+  // AppendToString
+  s2.erase();
+  a.AppendToString(&s2);
+  ASSERT_EQ(s2.size(), 6U);
+  ASSERT_EQ(s2, "foobar");
+  a.AppendToString(&s2);
+  ASSERT_EQ(s2.size(), 12U);
+  ASSERT_EQ(s2, "foobarfoobar");
+
+  // remove_prefix
+  StringPiece c(a);
+  c.remove_prefix(3);
+  ASSERT_EQ(c, "bar");
+  c = a;
+  c.remove_prefix(0);
+  ASSERT_EQ(c, a);
+  c.remove_prefix(c.size());
+  ASSERT_EQ(c, e);
+
+  // remove_suffix
+  c = a;
+  c.remove_suffix(3);
+  ASSERT_EQ(c, "foo");
+  c = a;
+  c.remove_suffix(0);
+  ASSERT_EQ(c, a);
+  c.remove_suffix(c.size());
+  ASSERT_EQ(c, e);
+
+  // set
+  c.set("foobar", 6);
+  ASSERT_EQ(c, a);
+  c.set("foobar", 0);
+  ASSERT_EQ(c, e);
+  c.set("foobar", 7);
+  ASSERT_NE(c, a);
+
+  c.set("foobar");
+  ASSERT_EQ(c, a);
+
+  c.set(static_cast<const void*>("foobar"), 6);
+  ASSERT_EQ(c, a);
+  c.set(static_cast<const void*>("foobar"), 0);
+  ASSERT_EQ(c, e);
+  c.set(static_cast<const void*>("foobar"), 7);
+  ASSERT_NE(c, a);
+
+  std::string s3(std::string(a).c_str(), 7);
+  ASSERT_EQ(c, s3);
+  std::string s4((std::string(e)));
+  ASSERT_TRUE(s4.empty());
+}
+
+TEST(StringPieceTest, Checknullptr) {
+  // we used to crash here, but now we don't.
+  StringPiece s(nullptr);
+  ASSERT_EQ(s.data(), (const char*)nullptr);
+  ASSERT_EQ(s.size(), 0U);
+
+  s.set(nullptr);
+  ASSERT_EQ(s.data(), (const char*)nullptr);
+  ASSERT_EQ(s.size(), 0U);
+}
+
+TEST(StringPieceTest, CheckComparisons2) {
+  StringPiece abc("abcdefghijklmnopqrstuvwxyz");
+
+  // check comparison operations on strings longer than 4 bytes.
+  ASSERT_TRUE(abc == StringPiece("abcdefghijklmnopqrstuvwxyz"));
+  ASSERT_TRUE(abc.compare(StringPiece("abcdefghijklmnopqrstuvwxyz")) == 0);
+
+  ASSERT_TRUE(abc < StringPiece("abcdefghijklmnopqrstuvwxzz"));
+  ASSERT_TRUE(abc.compare(StringPiece("abcdefghijklmnopqrstuvwxzz")) < 0);
+
+  ASSERT_TRUE(abc > StringPiece("abcdefghijklmnopqrstuvwxyy"));
+  ASSERT_TRUE(abc.compare(StringPiece("abcdefghijklmnopqrstuvwxyy")) > 0);
+}
+
+TEST(StringPieceTest, StringCompareNotAmbiguous) {
+  ASSERT_TRUE("hello" == std::string("hello"));
+  ASSERT_TRUE("hello" < std::string("world"));
+}
+
+TEST(StringPieceTest, HeterogenousStringPieceEquals) {
+  ASSERT_TRUE(StringPiece("hello") == std::string("hello"));
+  ASSERT_TRUE("hello" == StringPiece("hello"));
+}
diff --git a/third_party/chromium_base/string_util.h b/third_party/chromium_base/string_util.h
new file mode 100644
index 0000000..83578cb
--- /dev/null
+++ b/third_party/chromium_base/string_util.h
@@ -0,0 +1,74 @@
+// This file is extracted from chromium/src/base/strings/string_util.h
+// and modified for goma/client.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// This file defines utility functions for working with strings.
+
+#ifndef BASE_STRINGS_STRING_UTIL_H_
+#define BASE_STRINGS_STRING_UTIL_H_
+
+#include <string>
+
+#include "string_piece.h"
+
+// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
+// so we don't want to use it here.
+inline char ToLowerASCII(char c) {
+  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
+}
+
+inline std::string ToLower(StringPiece sp) {
+  std::string str;
+  str.reserve(sp.size());
+  for (auto c : sp) {
+    str.push_back(ToLowerASCII(c));
+  }
+  return str;
+}
+
+// ASCII-specific toupper.  The standard library's toupper is locale sensitive,
+// so we don't want to use it here.
+inline char ToUpperASCII(char c) {
+  return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
+}
+
+// Determines the type of ASCII character, independent of locale (the C
+// library versions will change based on locale).
+template <typename Char>
+inline bool IsCppBlank(Char c) {
+  return c == ' ' || c == '\t' || c == '\f' || c == '\v';
+}
+template <typename Char>
+inline bool IsAsciiAlpha(Char c) {
+  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+template <typename Char>
+inline bool IsAsciiUpper(Char c) {
+  return c >= 'A' && c <= 'Z';
+}
+template <typename Char>
+inline bool IsAsciiLower(Char c) {
+  return c >= 'a' && c <= 'z';
+}
+template <typename Char>
+inline bool IsAsciiDigit(Char c) {
+  return c >= '0' && c <= '9';
+}
+
+template <typename Char>
+inline bool IsHexDigit(Char c) {
+  return (c >= '0' && c <= '9') ||
+         (c >= 'A' && c <= 'F') ||
+         (c >= 'a' && c <= 'f');
+}
+
+template <typename Char>
+inline bool IsAsciiAlphaDigit(Char c) {
+  return (c >= '0' && c <= '9') ||
+      (c >= 'A' && c <= 'Z') ||
+      (c >= 'a' && c <= 'z');
+}
+
+#endif  // BASE_STRINGS_STRING_UTIL_H_
diff --git a/third_party/config/LICENSE b/third_party/config/LICENSE
new file mode 100644
index 0000000..2e90f95
--- /dev/null
+++ b/third_party/config/LICENSE
@@ -0,0 +1,184 @@
+glog
+====================
+
+Copyright (c) 2008, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+A function gettimeofday in utilities.cc is based on
+
+http://www.google.com/codesearch/p?hl=en#dR3YEbitojA/COPYING&q=GetSystemTimeAsFileTime%20license:bsd
+
+The license of this code is:
+
+Copyright (c) 2003-2008, Jouni Malinen <j@w1.fi> and contributors
+All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the name(s) of the above-listed copyright holder(s) nor the
+   names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+----------------------------------------------------------------------
+
+protobuf
+====================
+
+This license applies to all parts of Protocol Buffers except the following:
+
+  - Atomicops support for generic gcc, located in
+    src/google/protobuf/stubs/atomicops_internals_generic_gcc.h.
+    This file is copyrighted by Red Hat Inc.
+
+  - Atomicops support for AIX/POWER, located in
+    src/google/protobuf/stubs/atomicops_internals_power.h.
+    This file is copyrighted by Bloomberg Finance LP.
+
+Copyright 2014, Google Inc.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Code generated by the Protocol Buffer compiler is owned by the owner
+of the input file used when generating it.  This code is not
+standalone and requires a support library to be linked with it.  This
+support library is itself covered by the above license.
+
+----------------------------------------------------------------------
+
+Since we don't use getopt_long in xz, we use xz under public domain.
+
+XZ Utils Licensing
+==================
+
+    Different licenses apply to different files in this package. Here
+    is a rough summary of which licenses apply to which parts of this
+    package (but check the individual files to be sure!):
+
+      - liblzma is in the public domain.
+
+      - xz, xzdec, and lzmadec command line tools are in the public
+        domain unless GNU getopt_long had to be compiled and linked
+        in from the lib directory. The getopt_long code is under
+        GNU LGPLv2.1+.
+
+      - The scripts to grep, diff, and view compressed files have been
+        adapted from gzip. These scripts and their documentation are
+        under GNU GPLv2+.
+
+      - All the documentation in the doc directory and most of the
+        XZ Utils specific documentation files in other directories
+        are in the public domain.
+
+      - Translated messages are in the public domain.
+
+      - The build system contains public domain files, and files that
+        are under GNU GPLv2+ or GNU GPLv3+. None of these files end up
+        in the binaries being built.
+
+      - Test files and test code in the tests directory, and debugging
+        utilities in the debug directory are in the public domain.
+
+      - The extra directory may contain public domain files, and files
+        that are under various free software licenses.
+
+    You can do whatever you want with the files that have been put into
+    the public domain. If you find public domain legally problematic,
+    take the previous sentence as a license grant. If you still find
+    the lack of copyright legally problematic, you have too many
+    lawyers.
+
+    As usual, this software is provided "as is", without any warranty.
+
+    If you copy significant amounts of public domain code from XZ Utils
+    into your project, acknowledging this somewhere in your software is
+    polite (especially if it is proprietary, non-free software), but
+    naturally it is not legally required. Here is an example of a good
+    notice to put into "about box" or into documentation:
+
+        This software includes code from XZ Utils <http://tukaani.org/xz/>.
+
+    The following license texts are included in the following files:
+      - COPYING.LGPLv2.1: GNU Lesser General Public License version 2.1
+      - COPYING.GPLv2: GNU General Public License version 2
+      - COPYING.GPLv3: GNU General Public License version 3
+
+    Note that the toolchain (compiler, linker etc.) may add some code
+    pieces that are copyrighted. Thus, it is possible that e.g. liblzma
+    binary wouldn't actually be in the public domain in its entirety
+    even though it contains no copyrighted code from the XZ Utils source
+    package.
+
+    If you have questions, don't hesitate to ask the author(s) for more
+    information.
diff --git a/third_party/config/glog/linux/config.h b/third_party/config/glog/linux/config.h
new file mode 100644
index 0000000..e5e6f1b
--- /dev/null
+++ b/third_party/config/glog/linux/config.h
@@ -0,0 +1,198 @@
+/* src/config.h.  Generated from config.h.in by configure.  */
+/* src/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* define if glog doesn't use RTTI */
+#define DISABLE_RTTI 1
+
+/* Namespace for Google classes */
+#define GOOGLE_NAMESPACE google
+
+/* Define if you have the `dladdr' function */
+/* #undef HAVE_DLADDR */
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <execinfo.h> header file. */
+#define HAVE_EXECINFO_H 1
+
+/* Define if you have the `fcntl' function */
+#define HAVE_FCNTL 1
+
+/* Define to 1 if you have the <glob.h> header file. */
+#define HAVE_GLOB_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the `pthread' library (-lpthread). */
+#define HAVE_LIBPTHREAD 1
+
+/* Define to 1 if you have the <libunwind.h> header file. */
+/* #undef HAVE_LIBUNWIND_H */
+
+/* define if you have google gflags library */
+/* #undef HAVE_LIB_GFLAGS */
+
+/* define if you have google gmock library */
+/* #undef HAVE_LIB_GMOCK */
+
+/* define if you have google gtest library */
+/* #undef HAVE_LIB_GTEST */
+
+/* define if you have libunwind */
+/* #undef HAVE_LIB_UNWIND */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* define if the compiler implements namespaces */
+#define HAVE_NAMESPACES 1
+
+/* Define if you have the 'pread' function */
+#define HAVE_PREAD 1
+
+/* Define if you have POSIX threads libraries and header files. */
+#define HAVE_PTHREAD 1
+
+/* Define to 1 if you have the <pwd.h> header file. */
+#define HAVE_PWD_H 1
+
+/* Define if you have the 'pwrite' function */
+#define HAVE_PWRITE 1
+
+/* define if the compiler implements pthread_rwlock_* */
+#define HAVE_RWLOCK 1
+
+/* Define if you have the 'sigaction' function */
+#define HAVE_SIGACTION 1
+
+/* Define if you have the `sigaltstack' function */
+#define HAVE_SIGALTSTACK 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <syscall.h> header file. */
+#define HAVE_SYSCALL_H 1
+
+/* Define to 1 if you have the <syslog.h> header file. */
+#define HAVE_SYSLOG_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/syscall.h> header file. */
+#define HAVE_SYS_SYSCALL_H 1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#define HAVE_SYS_TIME_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <sys/ucontext.h> header file. */
+#define HAVE_SYS_UCONTEXT_H 1
+
+/* Define to 1 if you have the <sys/utsname.h> header file. */
+#define HAVE_SYS_UTSNAME_H 1
+
+/* Define to 1 if you have the <ucontext.h> header file. */
+#define HAVE_UCONTEXT_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the <unwind.h> header file. */
+#define HAVE_UNWIND_H 1
+
+/* define if the compiler supports using expression for operator */
+#define HAVE_USING_OPERATOR 1
+
+/* define if your compiler has __attribute__ */
+#define HAVE___ATTRIBUTE__ 1
+
+/* define if your compiler has __builtin_expect */
+#define HAVE___BUILTIN_EXPECT 1
+
+/* define if your compiler has __sync_val_compare_and_swap */
+#define HAVE___SYNC_VAL_COMPARE_AND_SWAP 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+
+/* Name of package */
+#define PACKAGE "glog"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "opensource@google.com"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "glog"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "glog 0.3.5"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "glog"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "0.3.5"
+
+/* How to access the PC from a struct ucontext */
+#if defined(__LP64__)
+#define PC_FROM_UCONTEXT uc_mcontext.gregs[REG_RIP]
+#else
+#define PC_FROM_UCONTEXT uc_mcontext.gregs[REG_EIP]
+#endif
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+/* #undef PTHREAD_CREATE_JOINABLE */
+
+/* The size of `void *', as computed by sizeof. */
+#if defined(__LP64__)
+#define SIZEOF_VOID_P 8
+#else
+#define SIZEOF_VOID_P 4
+#endif
+
+/* Define to 1 if you have the ANSI C header files. */
+/* #undef STDC_HEADERS */
+
+/* the namespace where STL code like vector<> is defined */
+#define STL_NAMESPACE std
+
+/* location of source code */
+#define TEST_SRC_DIR "."
+
+/* Version number of package */
+#define VERSION "0.3.5"
+
+/* Stops putting the code inside the Google namespace */
+#define _END_GOOGLE_NAMESPACE_ }
+
+/* Puts following code inside the Google namespace */
+#define _START_GOOGLE_NAMESPACE_ namespace google {
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef GOOGLE_GLOG_DLL_DECL
+# if defined(_WIN32) && !defined(__CYGWIN__)
+#   define GOOGLE_GLOG_DLL_DECL  __declspec(dllimport)
+# else
+#   define GOOGLE_GLOG_DLL_DECL
+# endif
+#endif
diff --git a/third_party/config/glog/linux/glog/log_severity.h b/third_party/config/glog/linux/glog/log_severity.h
new file mode 100644
index 0000000..99945a4
--- /dev/null
+++ b/third_party/config/glog/linux/glog/log_severity.h
@@ -0,0 +1,92 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef BASE_LOG_SEVERITY_H__
+#define BASE_LOG_SEVERITY_H__
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef GOOGLE_GLOG_DLL_DECL
+# if defined(_WIN32) && !defined(__CYGWIN__)
+#   define GOOGLE_GLOG_DLL_DECL  __declspec(dllimport)
+# else
+#   define GOOGLE_GLOG_DLL_DECL
+# endif
+#endif
+
+// Variables of type LogSeverity are widely taken to lie in the range
+// [0, NUM_SEVERITIES-1].  Be careful to preserve this assumption if
+// you ever need to change their values or add a new severity.
+typedef int LogSeverity;
+
+const int GLOG_INFO = 0, GLOG_WARNING = 1, GLOG_ERROR = 2, GLOG_FATAL = 3,
+  NUM_SEVERITIES = 4;
+#ifndef GLOG_NO_ABBREVIATED_SEVERITIES
+# ifdef ERROR
+#  error ERROR macro is defined. Define GLOG_NO_ABBREVIATED_SEVERITIES before including logging.h. See the document for detail.
+# endif
+const int INFO = GLOG_INFO, WARNING = GLOG_WARNING,
+  ERROR = GLOG_ERROR, FATAL = GLOG_FATAL;
+#endif
+
+// DFATAL is FATAL in debug mode, ERROR in normal mode
+#ifdef NDEBUG
+#define DFATAL_LEVEL ERROR
+#else
+#define DFATAL_LEVEL FATAL
+#endif
+
+extern GOOGLE_GLOG_DLL_DECL const char* const LogSeverityNames[NUM_SEVERITIES];
+
+// NDEBUG usage helpers related to (RAW_)DCHECK:
+//
+// DEBUG_MODE is for small !NDEBUG uses like
+//   if (DEBUG_MODE) foo.CheckThatFoo();
+// instead of substantially more verbose
+//   #ifndef NDEBUG
+//     foo.CheckThatFoo();
+//   #endif
+//
+// IF_DEBUG_MODE is for small !NDEBUG uses like
+//   IF_DEBUG_MODE( string error; )
+//   DCHECK(Foo(&error)) << error;
+// instead of substantially more verbose
+//   #ifndef NDEBUG
+//     string error;
+//     DCHECK(Foo(&error)) << error;
+//   #endif
+//
+#ifdef NDEBUG
+enum { DEBUG_MODE = 0 };
+#define IF_DEBUG_MODE(x)
+#else
+enum { DEBUG_MODE = 1 };
+#define IF_DEBUG_MODE(x) x
+#endif
+
+#endif  // BASE_LOG_SEVERITY_H__
diff --git a/third_party/config/glog/linux/glog/logging.h b/third_party/config/glog/linux/glog/logging.h
new file mode 100644
index 0000000..d7c392b
--- /dev/null
+++ b/third_party/config/glog/linux/glog/logging.h
@@ -0,0 +1,1659 @@
+// Copyright (c) 1999, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Ray Sidney
+//
+// This file contains #include information about logging-related stuff.
+// Pretty much everybody needs to #include this file so that they can
+// log various happenings.
+//
+#ifndef _LOGGING_H_
+#define _LOGGING_H_
+
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <iosfwd>
+#include <ostream>
+#include <sstream>
+#include <string>
+#if 1
+# include <unistd.h>
+#endif
+#include <vector>
+
+#if defined(_MSC_VER)
+#define GLOG_MSVC_PUSH_DISABLE_WARNING(n) __pragma(warning(push)) \
+                                     __pragma(warning(disable:n))
+#define GLOG_MSVC_POP_WARNING() __pragma(warning(pop))
+#else
+#define GLOG_MSVC_PUSH_DISABLE_WARNING(n)
+#define GLOG_MSVC_POP_WARNING()
+#endif
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef GOOGLE_GLOG_DLL_DECL
+# if defined(_WIN32) && !defined(__CYGWIN__)
+#   define GOOGLE_GLOG_DLL_DECL  __declspec(dllimport)
+# else
+#   define GOOGLE_GLOG_DLL_DECL
+# endif
+#endif
+
+// We care a lot about number of bits things take up.  Unfortunately,
+// systems define their bit-specific ints in a lot of different ways.
+// We use our own way, and have a typedef to get there.
+// Note: these commands below may look like "#if 1" or "#if 0", but
+// that's because they were constructed that way at ./configure time.
+// Look at logging.h.in to see how they're calculated (based on your config).
+#if 1
+#include <stdint.h>             // the normal place uint16_t is defined
+#endif
+#if 1
+#include <sys/types.h>          // the normal place u_int16_t is defined
+#endif
+#if 1
+#include <inttypes.h>           // a third place for uint16_t or u_int16_t
+#endif
+
+#if 0
+#include <gflags/gflags.h>
+#endif
+
+namespace google {
+
+#if 1      // the C99 format
+typedef int32_t int32;
+typedef uint32_t uint32;
+typedef int64_t int64;
+typedef uint64_t uint64;
+#elif 1   // the BSD format
+typedef int32_t int32;
+typedef u_int32_t uint32;
+typedef int64_t int64;
+typedef u_int64_t uint64;
+#elif 0    // the windows (vc7) format
+typedef __int32 int32;
+typedef unsigned __int32 uint32;
+typedef __int64 int64;
+typedef unsigned __int64 uint64;
+#else
+#error Do not know how to define a 32-bit integer quantity on your system
+#endif
+
+}
+
+// The global value of GOOGLE_STRIP_LOG. All the messages logged to
+// LOG(XXX) with severity less than GOOGLE_STRIP_LOG will not be displayed.
+// If it can be determined at compile time that the message will not be
+// printed, the statement will be compiled out.
+//
+// Example: to strip out all INFO and WARNING messages, use the value
+// of 2 below. To make an exception for WARNING messages from a single
+// file, add "#define GOOGLE_STRIP_LOG 1" to that file _before_ including
+// base/logging.h
+#ifndef GOOGLE_STRIP_LOG
+#define GOOGLE_STRIP_LOG 0
+#endif
+
+// GCC can be told that a certain branch is not likely to be taken (for
+// instance, a CHECK failure), and use that information in static analysis.
+// Giving it this information can help it optimize for the common case in
+// the absence of better information (ie. -fprofile-arcs).
+//
+#ifndef GOOGLE_PREDICT_BRANCH_NOT_TAKEN
+#if 1
+#define GOOGLE_PREDICT_BRANCH_NOT_TAKEN(x) (__builtin_expect(x, 0))
+#else
+#define GOOGLE_PREDICT_BRANCH_NOT_TAKEN(x) x
+#endif
+#endif
+
+#ifndef GOOGLE_PREDICT_FALSE
+#if 1
+#define GOOGLE_PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#else
+#define GOOGLE_PREDICT_FALSE(x) x
+#endif
+#endif
+
+#ifndef GOOGLE_PREDICT_TRUE
+#if 1
+#define GOOGLE_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#else
+#define GOOGLE_PREDICT_TRUE(x) x
+#endif
+#endif
+
+
+// Make a bunch of macros for logging.  The way to log things is to stream
+// things to LOG(<a particular severity level>).  E.g.,
+//
+//   LOG(INFO) << "Found " << num_cookies << " cookies";
+//
+// You can capture log messages in a string, rather than reporting them
+// immediately:
+//
+//   vector<string> errors;
+//   LOG_STRING(ERROR, &errors) << "Couldn't parse cookie #" << cookie_num;
+//
+// This pushes back the new error onto 'errors'; if given a NULL pointer,
+// it reports the error via LOG(ERROR).
+//
+// You can also do conditional logging:
+//
+//   LOG_IF(INFO, num_cookies > 10) << "Got lots of cookies";
+//
+// You can also do occasional logging (log every n'th occurrence of an
+// event):
+//
+//   LOG_EVERY_N(INFO, 10) << "Got the " << google::COUNTER << "th cookie";
+//
+// The above will cause log messages to be output on the 1st, 11th, 21st, ...
+// times it is executed.  Note that the special google::COUNTER value is used
+// to identify which repetition is happening.
+//
+// You can also do occasional conditional logging (log every n'th
+// occurrence of an event, when condition is satisfied):
+//
+//   LOG_IF_EVERY_N(INFO, (size > 1024), 10) << "Got the " << google::COUNTER
+//                                           << "th big cookie";
+//
+// You can log messages the first N times your code executes a line. E.g.
+//
+//   LOG_FIRST_N(INFO, 20) << "Got the " << google::COUNTER << "th cookie";
+//
+// Outputs log messages for the first 20 times it is executed.
+//
+// Analogous SYSLOG, SYSLOG_IF, and SYSLOG_EVERY_N macros are available.
+// These log to syslog as well as to the normal logs.  If you use these at
+// all, you need to be aware that syslog can drastically reduce performance,
+// especially if it is configured for remote logging!  Don't use these
+// unless you fully understand this and have a concrete need to use them.
+// Even then, try to minimize your use of them.
+//
+// There are also "debug mode" logging macros like the ones above:
+//
+//   DLOG(INFO) << "Found cookies";
+//
+//   DLOG_IF(INFO, num_cookies > 10) << "Got lots of cookies";
+//
+//   DLOG_EVERY_N(INFO, 10) << "Got the " << google::COUNTER << "th cookie";
+//
+// All "debug mode" logging is compiled away to nothing for non-debug mode
+// compiles.
+//
+// We also have
+//
+//   LOG_ASSERT(assertion);
+//   DLOG_ASSERT(assertion);
+//
+// which is syntactic sugar for {,D}LOG_IF(FATAL, assert fails) << assertion;
+//
+// There are "verbose level" logging macros.  They look like
+//
+//   VLOG(1) << "I'm printed when you run the program with --v=1 or more";
+//   VLOG(2) << "I'm printed when you run the program with --v=2 or more";
+//
+// These always log at the INFO log level (when they log at all).
+// The verbose logging can also be turned on module-by-module.  For instance,
+//    --vmodule=mapreduce=2,file=1,gfs*=3 --v=0
+// will cause:
+//   a. VLOG(2) and lower messages to be printed from mapreduce.{h,cc}
+//   b. VLOG(1) and lower messages to be printed from file.{h,cc}
+//   c. VLOG(3) and lower messages to be printed from files prefixed with "gfs"
+//   d. VLOG(0) and lower messages to be printed from elsewhere
+//
+// The wildcarding functionality shown by (c) supports both '*' (match
+// 0 or more characters) and '?' (match any single character) wildcards.
+//
+// There's also VLOG_IS_ON(n) "verbose level" condition macro. To be used as
+//
+//   if (VLOG_IS_ON(2)) {
+//     // do some logging preparation and logging
+//     // that can't be accomplished with just VLOG(2) << ...;
+//   }
+//
+// There are also VLOG_IF, VLOG_EVERY_N and VLOG_IF_EVERY_N "verbose level"
+// condition macros for sample cases, when some extra computation and
+// preparation for logs is not needed.
+//   VLOG_IF(1, (size > 1024))
+//      << "I'm printed when size is more than 1024 and when you run the "
+//         "program with --v=1 or more";
+//   VLOG_EVERY_N(1, 10)
+//      << "I'm printed every 10th occurrence, and when you run the program "
+//         "with --v=1 or more. Present occurence is " << google::COUNTER;
+//   VLOG_IF_EVERY_N(1, (size > 1024), 10)
+//      << "I'm printed on every 10th occurence of case when size is more "
+//         " than 1024, when you run the program with --v=1 or more. ";
+//         "Present occurence is " << google::COUNTER;
+//
+// The supported severity levels for macros that allow you to specify one
+// are (in increasing order of severity) INFO, WARNING, ERROR, and FATAL.
+// Note that messages of a given severity are logged not only in the
+// logfile for that severity, but also in all logfiles of lower severity.
+// E.g., a message of severity FATAL will be logged to the logfiles of
+// severity FATAL, ERROR, WARNING, and INFO.
+//
+// There is also the special severity of DFATAL, which logs FATAL in
+// debug mode, ERROR in normal mode.
+//
+// Very important: logging a message at the FATAL severity level causes
+// the program to terminate (after the message is logged).
+//
+// Unless otherwise specified, logs will be written to the filename
+// "<program name>.<hostname>.<user name>.log.<severity level>.", followed
+// by the date, time, and pid (you can't prevent the date, time, and pid
+// from being in the filename).
+//
+// The logging code takes two flags:
+//     --v=#           set the verbose level
+//     --logtostderr   log all the messages to stderr instead of to logfiles
+
+// LOG LINE PREFIX FORMAT
+//
+// Log lines have this form:
+//
+//     Lmmdd hh:mm:ss.uuuuuu threadid file:line] msg...
+//
+// where the fields are defined as follows:
+//
+//   L                A single character, representing the log level
+//                    (eg 'I' for INFO)
+//   mm               The month (zero padded; ie May is '05')
+//   dd               The day (zero padded)
+//   hh:mm:ss.uuuuuu  Time in hours, minutes and fractional seconds
+//   threadid         The space-padded thread ID as returned by GetTID()
+//                    (this matches the PID on Linux)
+//   file             The file name
+//   line             The line number
+//   msg              The user-supplied message
+//
+// Example:
+//
+//   I1103 11:57:31.739339 24395 google.cc:2341] Command line: ./some_prog
+//   I1103 11:57:31.739403 24395 google.cc:2342] Process id 24395
+//
+// NOTE: although the microseconds are useful for comparing events on
+// a single machine, clocks on different machines may not be well
+// synchronized.  Hence, use caution when comparing the low bits of
+// timestamps from different machines.
+
+#ifndef DECLARE_VARIABLE
+#define MUST_UNDEF_GFLAGS_DECLARE_MACROS
+#define DECLARE_VARIABLE(type, shorttype, name, tn)                     \
+  namespace fL##shorttype {                                             \
+    extern GOOGLE_GLOG_DLL_DECL type FLAGS_##name;                      \
+  }                                                                     \
+  using fL##shorttype::FLAGS_##name
+
+// bool specialization
+#define DECLARE_bool(name) \
+  DECLARE_VARIABLE(bool, B, name, bool)
+
+// int32 specialization
+#define DECLARE_int32(name) \
+  DECLARE_VARIABLE(google::int32, I, name, int32)
+
+// Special case for string, because we have to specify the namespace
+// std::string, which doesn't play nicely with our FLAG__namespace hackery.
+#define DECLARE_string(name)                                            \
+  namespace fLS {                                                       \
+    extern GOOGLE_GLOG_DLL_DECL std::string& FLAGS_##name;              \
+  }                                                                     \
+  using fLS::FLAGS_##name
+#endif
+
+// Set whether log messages go to stderr instead of logfiles
+DECLARE_bool(logtostderr);
+
+// Set whether log messages go to stderr in addition to logfiles.
+DECLARE_bool(alsologtostderr);
+
+// Set color messages logged to stderr (if supported by terminal).
+DECLARE_bool(colorlogtostderr);
+
+// Log messages at a level >= this flag are automatically sent to
+// stderr in addition to log files.
+DECLARE_int32(stderrthreshold);
+
+// Set whether the log prefix should be prepended to each line of output.
+DECLARE_bool(log_prefix);
+
+// Log messages at a level <= this flag are buffered.
+// Log messages at a higher level are flushed immediately.
+DECLARE_int32(logbuflevel);
+
+// Sets the maximum number of seconds which logs may be buffered for.
+DECLARE_int32(logbufsecs);
+
+// Log suppression level: messages logged at a lower level than this
+// are suppressed.
+DECLARE_int32(minloglevel);
+
+// If specified, logfiles are written into this directory instead of the
+// default logging directory.
+DECLARE_string(log_dir);
+
+// Set the log file mode.
+DECLARE_int32(logfile_mode);
+
+// Sets the path of the directory into which to put additional links
+// to the log files.
+DECLARE_string(log_link);
+
+DECLARE_int32(v);  // in vlog_is_on.cc
+
+// Sets the maximum log file size (in MB).
+DECLARE_int32(max_log_size);
+
+// Sets whether to avoid logging to the disk if the disk is full.
+DECLARE_bool(stop_logging_if_full_disk);
+
+#ifdef MUST_UNDEF_GFLAGS_DECLARE_MACROS
+#undef MUST_UNDEF_GFLAGS_DECLARE_MACROS
+#undef DECLARE_VARIABLE
+#undef DECLARE_bool
+#undef DECLARE_int32
+#undef DECLARE_string
+#endif
+
+// Log messages below the GOOGLE_STRIP_LOG level will be compiled away for
+// security reasons. See LOG(severtiy) below.
+
+// A few definitions of macros that don't generate much code.  Since
+// LOG(INFO) and its ilk are used all over our code, it's
+// better to have compact code for these operations.
+
+#if GOOGLE_STRIP_LOG == 0
+#define COMPACT_GOOGLE_LOG_INFO google::LogMessage( \
+      __FILE__, __LINE__)
+#define LOG_TO_STRING_INFO(message) google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_INFO, message)
+#else
+#define COMPACT_GOOGLE_LOG_INFO google::NullStream()
+#define LOG_TO_STRING_INFO(message) google::NullStream()
+#endif
+
+#if GOOGLE_STRIP_LOG <= 1
+#define COMPACT_GOOGLE_LOG_WARNING google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_WARNING)
+#define LOG_TO_STRING_WARNING(message) google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_WARNING, message)
+#else
+#define COMPACT_GOOGLE_LOG_WARNING google::NullStream()
+#define LOG_TO_STRING_WARNING(message) google::NullStream()
+#endif
+
+#if GOOGLE_STRIP_LOG <= 2
+#define COMPACT_GOOGLE_LOG_ERROR google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_ERROR)
+#define LOG_TO_STRING_ERROR(message) google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_ERROR, message)
+#else
+#define COMPACT_GOOGLE_LOG_ERROR google::NullStream()
+#define LOG_TO_STRING_ERROR(message) google::NullStream()
+#endif
+
+#if GOOGLE_STRIP_LOG <= 3
+#define COMPACT_GOOGLE_LOG_FATAL google::LogMessageFatal( \
+      __FILE__, __LINE__)
+#define LOG_TO_STRING_FATAL(message) google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_FATAL, message)
+#else
+#define COMPACT_GOOGLE_LOG_FATAL google::NullStreamFatal()
+#define LOG_TO_STRING_FATAL(message) google::NullStreamFatal()
+#endif
+
+#if defined(NDEBUG) && !defined(DCHECK_ALWAYS_ON)
+#define DCHECK_IS_ON() 0
+#else
+#define DCHECK_IS_ON() 1
+#endif
+
+// For DFATAL, we want to use LogMessage (as opposed to
+// LogMessageFatal), to be consistent with the original behavior.
+#if !DCHECK_IS_ON()
+#define COMPACT_GOOGLE_LOG_DFATAL COMPACT_GOOGLE_LOG_ERROR
+#elif GOOGLE_STRIP_LOG <= 3
+#define COMPACT_GOOGLE_LOG_DFATAL google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_FATAL)
+#else
+#define COMPACT_GOOGLE_LOG_DFATAL google::NullStreamFatal()
+#endif
+
+#define GOOGLE_LOG_INFO(counter) google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO, counter, &google::LogMessage::SendToLog)
+#define SYSLOG_INFO(counter) \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO, counter, \
+  &google::LogMessage::SendToSyslogAndLog)
+#define GOOGLE_LOG_WARNING(counter)  \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING, counter, \
+  &google::LogMessage::SendToLog)
+#define SYSLOG_WARNING(counter)  \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING, counter, \
+  &google::LogMessage::SendToSyslogAndLog)
+#define GOOGLE_LOG_ERROR(counter)  \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR, counter, \
+  &google::LogMessage::SendToLog)
+#define SYSLOG_ERROR(counter)  \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR, counter, \
+  &google::LogMessage::SendToSyslogAndLog)
+#define GOOGLE_LOG_FATAL(counter) \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_FATAL, counter, \
+  &google::LogMessage::SendToLog)
+#define SYSLOG_FATAL(counter) \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_FATAL, counter, \
+  &google::LogMessage::SendToSyslogAndLog)
+#define GOOGLE_LOG_DFATAL(counter) \
+  google::LogMessage(__FILE__, __LINE__, google::DFATAL_LEVEL, counter, \
+  &google::LogMessage::SendToLog)
+#define SYSLOG_DFATAL(counter) \
+  google::LogMessage(__FILE__, __LINE__, google::DFATAL_LEVEL, counter, \
+  &google::LogMessage::SendToSyslogAndLog)
+
+#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+// A very useful logging macro to log windows errors:
+#define LOG_SYSRESULT(result) \
+  if (FAILED(HRESULT_FROM_WIN32(result))) { \
+    LPSTR message = NULL; \
+    LPSTR msg = reinterpret_cast<LPSTR>(&message); \
+    DWORD message_length = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | \
+                         FORMAT_MESSAGE_FROM_SYSTEM, \
+                         0, result, 0, msg, 100, NULL); \
+    if (message_length > 0) { \
+      google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR, 0, \
+          &google::LogMessage::SendToLog).stream() \
+          << reinterpret_cast<const char*>(message); \
+      LocalFree(message); \
+    } \
+  }
+#endif
+
+// We use the preprocessor's merging operator, "##", so that, e.g.,
+// LOG(INFO) becomes the token GOOGLE_LOG_INFO.  There's some funny
+// subtle difference between ostream member streaming functions (e.g.,
+// ostream::operator<<(int) and ostream non-member streaming functions
+// (e.g., ::operator<<(ostream&, string&): it turns out that it's
+// impossible to stream something like a string directly to an unnamed
+// ostream. We employ a neat hack by calling the stream() member
+// function of LogMessage which seems to avoid the problem.
+#define LOG(severity) COMPACT_GOOGLE_LOG_ ## severity.stream()
+#define SYSLOG(severity) SYSLOG_ ## severity(0).stream()
+
+namespace google {
+
+// They need the definitions of integer types.
+#include "glog/log_severity.h"
+#include "glog/vlog_is_on.h"
+
+// Initialize google's logging library. You will see the program name
+// specified by argv0 in log outputs.
+GOOGLE_GLOG_DLL_DECL void InitGoogleLogging(const char* argv0);
+
+// Shutdown google's logging library.
+GOOGLE_GLOG_DLL_DECL void ShutdownGoogleLogging();
+
+// Install a function which will be called after LOG(FATAL).
+GOOGLE_GLOG_DLL_DECL void InstallFailureFunction(void (*fail_func)());
+
+class LogSink;  // defined below
+
+// If a non-NULL sink pointer is given, we push this message to that sink.
+// For LOG_TO_SINK we then do normal LOG(severity) logging as well.
+// This is useful for capturing messages and passing/storing them
+// somewhere more specific than the global log of the process.
+// Argument types:
+//   LogSink* sink;
+//   LogSeverity severity;
+// The cast is to disambiguate NULL arguments.
+#define LOG_TO_SINK(sink, severity) \
+  google::LogMessage(                                    \
+      __FILE__, __LINE__,                                               \
+      google::GLOG_ ## severity,                         \
+      static_cast<google::LogSink*>(sink), true).stream()
+#define LOG_TO_SINK_BUT_NOT_TO_LOGFILE(sink, severity)                  \
+  google::LogMessage(                                    \
+      __FILE__, __LINE__,                                               \
+      google::GLOG_ ## severity,                         \
+      static_cast<google::LogSink*>(sink), false).stream()
+
+// If a non-NULL string pointer is given, we write this message to that string.
+// We then do normal LOG(severity) logging as well.
+// This is useful for capturing messages and storing them somewhere more
+// specific than the global log of the process.
+// Argument types:
+//   string* message;
+//   LogSeverity severity;
+// The cast is to disambiguate NULL arguments.
+// NOTE: LOG(severity) expands to LogMessage().stream() for the specified
+// severity.
+#define LOG_TO_STRING(severity, message) \
+  LOG_TO_STRING_##severity(static_cast<string*>(message)).stream()
+
+// If a non-NULL pointer is given, we push the message onto the end
+// of a vector of strings; otherwise, we report it with LOG(severity).
+// This is handy for capturing messages and perhaps passing them back
+// to the caller, rather than reporting them immediately.
+// Argument types:
+//   LogSeverity severity;
+//   vector<string> *outvec;
+// The cast is to disambiguate NULL arguments.
+#define LOG_STRING(severity, outvec) \
+  LOG_TO_STRING_##severity(static_cast<std::vector<std::string>*>(outvec)).stream()
+
+#define LOG_IF(severity, condition) \
+  !(condition) ? (void) 0 : google::LogMessageVoidify() & LOG(severity)
+#define SYSLOG_IF(severity, condition) \
+  !(condition) ? (void) 0 : google::LogMessageVoidify() & SYSLOG(severity)
+
+#define LOG_ASSERT(condition)  \
+  LOG_IF(FATAL, !(condition)) << "Assert failed: " #condition
+#define SYSLOG_ASSERT(condition) \
+  SYSLOG_IF(FATAL, !(condition)) << "Assert failed: " #condition
+
+// CHECK dies with a fatal error if condition is not true.  It is *not*
+// controlled by DCHECK_IS_ON(), so the check will be executed regardless of
+// compilation mode.  Therefore, it is safe to do things like:
+//    CHECK(fp->Write(x) == 4)
+#define CHECK(condition)  \
+      LOG_IF(FATAL, GOOGLE_PREDICT_BRANCH_NOT_TAKEN(!(condition))) \
+             << "Check failed: " #condition " "
+
+// A container for a string pointer which can be evaluated to a bool -
+// true iff the pointer is NULL.
+struct CheckOpString {
+  CheckOpString(std::string* str) : str_(str) { }
+  // No destructor: if str_ is non-NULL, we're about to LOG(FATAL),
+  // so there's no point in cleaning up str_.
+  operator bool() const {
+    return GOOGLE_PREDICT_BRANCH_NOT_TAKEN(str_ != NULL);
+  }
+  std::string* str_;
+};
+
+// Function is overloaded for integral types to allow static const
+// integrals declared in classes and not defined to be used as arguments to
+// CHECK* macros. It's not encouraged though.
+template <class T>
+inline const T&       GetReferenceableValue(const T&           t) { return t; }
+inline char           GetReferenceableValue(char               t) { return t; }
+inline unsigned char  GetReferenceableValue(unsigned char      t) { return t; }
+inline signed char    GetReferenceableValue(signed char        t) { return t; }
+inline short          GetReferenceableValue(short              t) { return t; }
+inline unsigned short GetReferenceableValue(unsigned short     t) { return t; }
+inline int            GetReferenceableValue(int                t) { return t; }
+inline unsigned int   GetReferenceableValue(unsigned int       t) { return t; }
+inline long           GetReferenceableValue(long               t) { return t; }
+inline unsigned long  GetReferenceableValue(unsigned long      t) { return t; }
+inline long long      GetReferenceableValue(long long          t) { return t; }
+inline unsigned long long GetReferenceableValue(unsigned long long t) {
+  return t;
+}
+
+// This is a dummy class to define the following operator.
+struct DummyClassToDefineOperator {};
+
+}
+
+// Define global operator<< to declare using ::operator<<.
+// This declaration will allow use to use CHECK macros for user
+// defined classes which have operator<< (e.g., stl_logging.h).
+inline std::ostream& operator<<(
+    std::ostream& out, const google::DummyClassToDefineOperator&) {
+  return out;
+}
+
+namespace google {
+
+// This formats a value for a failing CHECK_XX statement.  Ordinarily,
+// it uses the definition for operator<<, with a few special cases below.
+template <typename T>
+inline void MakeCheckOpValueString(std::ostream* os, const T& v) {
+  (*os) << v;
+}
+
+// Overrides for char types provide readable values for unprintable
+// characters.
+template <> GOOGLE_GLOG_DLL_DECL
+void MakeCheckOpValueString(std::ostream* os, const char& v);
+template <> GOOGLE_GLOG_DLL_DECL
+void MakeCheckOpValueString(std::ostream* os, const signed char& v);
+template <> GOOGLE_GLOG_DLL_DECL
+void MakeCheckOpValueString(std::ostream* os, const unsigned char& v);
+
+// Build the error message string. Specify no inlining for code size.
+template <typename T1, typename T2>
+std::string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext)
+    __attribute__ ((noinline));
+
+namespace base {
+namespace internal {
+
+// If "s" is less than base_logging::INFO, returns base_logging::INFO.
+// If "s" is greater than base_logging::FATAL, returns
+// base_logging::ERROR.  Otherwise, returns "s".
+LogSeverity NormalizeSeverity(LogSeverity s);
+
+}  // namespace internal
+
+// A helper class for formatting "expr (V1 vs. V2)" in a CHECK_XX
+// statement.  See MakeCheckOpString for sample usage.  Other
+// approaches were considered: use of a template method (e.g.,
+// base::BuildCheckOpString(exprtext, base::Print<T1>, &v1,
+// base::Print<T2>, &v2), however this approach has complications
+// related to volatile arguments and function-pointer arguments).
+class GOOGLE_GLOG_DLL_DECL CheckOpMessageBuilder {
+ public:
+  // Inserts "exprtext" and " (" to the stream.
+  explicit CheckOpMessageBuilder(const char *exprtext);
+  // Deletes "stream_".
+  ~CheckOpMessageBuilder();
+  // For inserting the first variable.
+  std::ostream* ForVar1() { return stream_; }
+  // For inserting the second variable (adds an intermediate " vs. ").
+  std::ostream* ForVar2();
+  // Get the result (inserts the closing ")").
+  std::string* NewString();
+
+ private:
+  std::ostringstream *stream_;
+};
+
+}  // namespace base
+
+template <typename T1, typename T2>
+std::string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) {
+  base::CheckOpMessageBuilder comb(exprtext);
+  MakeCheckOpValueString(comb.ForVar1(), v1);
+  MakeCheckOpValueString(comb.ForVar2(), v2);
+  return comb.NewString();
+}
+
+// Helper functions for CHECK_OP macro.
+// The (int, int) specialization works around the issue that the compiler
+// will not instantiate the template version of the function on values of
+// unnamed enum type - see comment below.
+#define DEFINE_CHECK_OP_IMPL(name, op) \
+  template <typename T1, typename T2> \
+  inline std::string* name##Impl(const T1& v1, const T2& v2,    \
+                            const char* exprtext) { \
+    if (GOOGLE_PREDICT_TRUE(v1 op v2)) return NULL; \
+    else return MakeCheckOpString(v1, v2, exprtext); \
+  } \
+  inline std::string* name##Impl(int v1, int v2, const char* exprtext) { \
+    return name##Impl<int, int>(v1, v2, exprtext); \
+  }
+
+// We use the full name Check_EQ, Check_NE, etc. in case the file including
+// base/logging.h provides its own #defines for the simpler names EQ, NE, etc.
+// This happens if, for example, those are used as token names in a
+// yacc grammar.
+DEFINE_CHECK_OP_IMPL(Check_EQ, ==)  // Compilation error with CHECK_EQ(NULL, x)?
+DEFINE_CHECK_OP_IMPL(Check_NE, !=)  // Use CHECK(x == NULL) instead.
+DEFINE_CHECK_OP_IMPL(Check_LE, <=)
+DEFINE_CHECK_OP_IMPL(Check_LT, < )
+DEFINE_CHECK_OP_IMPL(Check_GE, >=)
+DEFINE_CHECK_OP_IMPL(Check_GT, > )
+#undef DEFINE_CHECK_OP_IMPL
+
+// Helper macro for binary operators.
+// Don't use this macro directly in your code, use CHECK_EQ et al below.
+
+#if defined(STATIC_ANALYSIS)
+// Only for static analysis tool to know that it is equivalent to assert
+#define CHECK_OP_LOG(name, op, val1, val2, log) CHECK((val1) op (val2))
+#elif DCHECK_IS_ON()
+// In debug mode, avoid constructing CheckOpStrings if possible,
+// to reduce the overhead of CHECK statments by 2x.
+// Real DCHECK-heavy tests have seen 1.5x speedups.
+
+// The meaning of "string" might be different between now and
+// when this macro gets invoked (e.g., if someone is experimenting
+// with other string implementations that get defined after this
+// file is included).  Save the current meaning now and use it
+// in the macro.
+typedef std::string _Check_string;
+#define CHECK_OP_LOG(name, op, val1, val2, log)                         \
+  while (google::_Check_string* _result =                \
+         google::Check##name##Impl(                      \
+             google::GetReferenceableValue(val1),        \
+             google::GetReferenceableValue(val2),        \
+             #val1 " " #op " " #val2))                                  \
+    log(__FILE__, __LINE__,                                             \
+        google::CheckOpString(_result)).stream()
+#else
+// In optimized mode, use CheckOpString to hint to compiler that
+// the while condition is unlikely.
+#define CHECK_OP_LOG(name, op, val1, val2, log)                         \
+  while (google::CheckOpString _result =                 \
+         google::Check##name##Impl(                      \
+             google::GetReferenceableValue(val1),        \
+             google::GetReferenceableValue(val2),        \
+             #val1 " " #op " " #val2))                                  \
+    log(__FILE__, __LINE__, _result).stream()
+#endif  // STATIC_ANALYSIS, DCHECK_IS_ON()
+
+#if GOOGLE_STRIP_LOG <= 3
+#define CHECK_OP(name, op, val1, val2) \
+  CHECK_OP_LOG(name, op, val1, val2, google::LogMessageFatal)
+#else
+#define CHECK_OP(name, op, val1, val2) \
+  CHECK_OP_LOG(name, op, val1, val2, google::NullStreamFatal)
+#endif // STRIP_LOG <= 3
+
+// Equality/Inequality checks - compare two values, and log a FATAL message
+// including the two values when the result is not as expected.  The values
+// must have operator<<(ostream, ...) defined.
+//
+// You may append to the error message like so:
+//   CHECK_NE(1, 2) << ": The world must be ending!";
+//
+// We are very careful to ensure that each argument is evaluated exactly
+// once, and that anything which is legal to pass as a function argument is
+// legal here.  In particular, the arguments may be temporary expressions
+// which will end up being destroyed at the end of the apparent statement,
+// for example:
+//   CHECK_EQ(string("abc")[1], 'b');
+//
+// WARNING: These don't compile correctly if one of the arguments is a pointer
+// and the other is NULL. To work around this, simply static_cast NULL to the
+// type of the desired pointer.
+
+#define CHECK_EQ(val1, val2) CHECK_OP(_EQ, ==, val1, val2)
+#define CHECK_NE(val1, val2) CHECK_OP(_NE, !=, val1, val2)
+#define CHECK_LE(val1, val2) CHECK_OP(_LE, <=, val1, val2)
+#define CHECK_LT(val1, val2) CHECK_OP(_LT, < , val1, val2)
+#define CHECK_GE(val1, val2) CHECK_OP(_GE, >=, val1, val2)
+#define CHECK_GT(val1, val2) CHECK_OP(_GT, > , val1, val2)
+
+// Check that the input is non NULL.  This very useful in constructor
+// initializer lists.
+
+#define CHECK_NOTNULL(val) \
+  google::CheckNotNull(__FILE__, __LINE__, "'" #val "' Must be non NULL", (val))
+
+// Helper functions for string comparisons.
+// To avoid bloat, the definitions are in logging.cc.
+#define DECLARE_CHECK_STROP_IMPL(func, expected) \
+  GOOGLE_GLOG_DLL_DECL std::string* Check##func##expected##Impl( \
+      const char* s1, const char* s2, const char* names);
+DECLARE_CHECK_STROP_IMPL(strcmp, true)
+DECLARE_CHECK_STROP_IMPL(strcmp, false)
+DECLARE_CHECK_STROP_IMPL(strcasecmp, true)
+DECLARE_CHECK_STROP_IMPL(strcasecmp, false)
+#undef DECLARE_CHECK_STROP_IMPL
+
+// Helper macro for string comparisons.
+// Don't use this macro directly in your code, use CHECK_STREQ et al below.
+#define CHECK_STROP(func, op, expected, s1, s2) \
+  while (google::CheckOpString _result = \
+         google::Check##func##expected##Impl((s1), (s2), \
+                                     #s1 " " #op " " #s2)) \
+    LOG(FATAL) << *_result.str_
+
+
+// String (char*) equality/inequality checks.
+// CASE versions are case-insensitive.
+//
+// Note that "s1" and "s2" may be temporary strings which are destroyed
+// by the compiler at the end of the current "full expression"
+// (e.g. CHECK_STREQ(Foo().c_str(), Bar().c_str())).
+
+#define CHECK_STREQ(s1, s2) CHECK_STROP(strcmp, ==, true, s1, s2)
+#define CHECK_STRNE(s1, s2) CHECK_STROP(strcmp, !=, false, s1, s2)
+#define CHECK_STRCASEEQ(s1, s2) CHECK_STROP(strcasecmp, ==, true, s1, s2)
+#define CHECK_STRCASENE(s1, s2) CHECK_STROP(strcasecmp, !=, false, s1, s2)
+
+#define CHECK_INDEX(I,A) CHECK(I < (sizeof(A)/sizeof(A[0])))
+#define CHECK_BOUND(B,A) CHECK(B <= (sizeof(A)/sizeof(A[0])))
+
+#define CHECK_DOUBLE_EQ(val1, val2)              \
+  do {                                           \
+    CHECK_LE((val1), (val2)+0.000000000000001L); \
+    CHECK_GE((val1), (val2)-0.000000000000001L); \
+  } while (0)
+
+#define CHECK_NEAR(val1, val2, margin)           \
+  do {                                           \
+    CHECK_LE((val1), (val2)+(margin));           \
+    CHECK_GE((val1), (val2)-(margin));           \
+  } while (0)
+
+// perror()..googly style!
+//
+// PLOG() and PLOG_IF() and PCHECK() behave exactly like their LOG* and
+// CHECK equivalents with the addition that they postpend a description
+// of the current state of errno to their output lines.
+
+#define PLOG(severity) GOOGLE_PLOG(severity, 0).stream()
+
+#define GOOGLE_PLOG(severity, counter)  \
+  google::ErrnoLogMessage( \
+      __FILE__, __LINE__, google::GLOG_ ## severity, counter, \
+      &google::LogMessage::SendToLog)
+
+#define PLOG_IF(severity, condition) \
+  !(condition) ? (void) 0 : google::LogMessageVoidify() & PLOG(severity)
+
+// A CHECK() macro that postpends errno if the condition is false. E.g.
+//
+// if (poll(fds, nfds, timeout) == -1) { PCHECK(errno == EINTR); ... }
+#define PCHECK(condition)  \
+      PLOG_IF(FATAL, GOOGLE_PREDICT_BRANCH_NOT_TAKEN(!(condition))) \
+              << "Check failed: " #condition " "
+
+// A CHECK() macro that lets you assert the success of a function that
+// returns -1 and sets errno in case of an error. E.g.
+//
+// CHECK_ERR(mkdir(path, 0700));
+//
+// or
+//
+// int fd = open(filename, flags); CHECK_ERR(fd) << ": open " << filename;
+#define CHECK_ERR(invocation)                                          \
+PLOG_IF(FATAL, GOOGLE_PREDICT_BRANCH_NOT_TAKEN((invocation) == -1))    \
+        << #invocation
+
+// Use macro expansion to create, for each use of LOG_EVERY_N(), static
+// variables with the __LINE__ expansion as part of the variable name.
+#define LOG_EVERY_N_VARNAME(base, line) LOG_EVERY_N_VARNAME_CONCAT(base, line)
+#define LOG_EVERY_N_VARNAME_CONCAT(base, line) base ## line
+
+#define LOG_OCCURRENCES LOG_EVERY_N_VARNAME(occurrences_, __LINE__)
+#define LOG_OCCURRENCES_MOD_N LOG_EVERY_N_VARNAME(occurrences_mod_n_, __LINE__)
+
+#define SOME_KIND_OF_LOG_EVERY_N(severity, n, what_to_do) \
+  static int LOG_OCCURRENCES = 0, LOG_OCCURRENCES_MOD_N = 0; \
+  ++LOG_OCCURRENCES; \
+  if (++LOG_OCCURRENCES_MOD_N > n) LOG_OCCURRENCES_MOD_N -= n; \
+  if (LOG_OCCURRENCES_MOD_N == 1) \
+    google::LogMessage( \
+        __FILE__, __LINE__, google::GLOG_ ## severity, LOG_OCCURRENCES, \
+        &what_to_do).stream()
+
+#define SOME_KIND_OF_LOG_IF_EVERY_N(severity, condition, n, what_to_do) \
+  static int LOG_OCCURRENCES = 0, LOG_OCCURRENCES_MOD_N = 0; \
+  ++LOG_OCCURRENCES; \
+  if (condition && \
+      ((LOG_OCCURRENCES_MOD_N=(LOG_OCCURRENCES_MOD_N + 1) % n) == (1 % n))) \
+    google::LogMessage( \
+        __FILE__, __LINE__, google::GLOG_ ## severity, LOG_OCCURRENCES, \
+                 &what_to_do).stream()
+
+#define SOME_KIND_OF_PLOG_EVERY_N(severity, n, what_to_do) \
+  static int LOG_OCCURRENCES = 0, LOG_OCCURRENCES_MOD_N = 0; \
+  ++LOG_OCCURRENCES; \
+  if (++LOG_OCCURRENCES_MOD_N > n) LOG_OCCURRENCES_MOD_N -= n; \
+  if (LOG_OCCURRENCES_MOD_N == 1) \
+    google::ErrnoLogMessage( \
+        __FILE__, __LINE__, google::GLOG_ ## severity, LOG_OCCURRENCES, \
+        &what_to_do).stream()
+
+#define SOME_KIND_OF_LOG_FIRST_N(severity, n, what_to_do) \
+  static int LOG_OCCURRENCES = 0; \
+  if (LOG_OCCURRENCES <= n) \
+    ++LOG_OCCURRENCES; \
+  if (LOG_OCCURRENCES <= n) \
+    google::LogMessage( \
+        __FILE__, __LINE__, google::GLOG_ ## severity, LOG_OCCURRENCES, \
+        &what_to_do).stream()
+
+namespace glog_internal_namespace_ {
+template <bool>
+struct CompileAssert {
+};
+struct CrashReason;
+
+// Returns true if FailureSignalHandler is installed.
+// Needs to be exported since it's used by the signalhandler_unittest.
+GOOGLE_GLOG_DLL_DECL bool IsFailureSignalHandlerInstalled();
+}  // namespace glog_internal_namespace_
+
+#define LOG_EVERY_N(severity, n)                                        \
+  SOME_KIND_OF_LOG_EVERY_N(severity, (n), google::LogMessage::SendToLog)
+
+#define SYSLOG_EVERY_N(severity, n) \
+  SOME_KIND_OF_LOG_EVERY_N(severity, (n), google::LogMessage::SendToSyslogAndLog)
+
+#define PLOG_EVERY_N(severity, n) \
+  SOME_KIND_OF_PLOG_EVERY_N(severity, (n), google::LogMessage::SendToLog)
+
+#define LOG_FIRST_N(severity, n) \
+  SOME_KIND_OF_LOG_FIRST_N(severity, (n), google::LogMessage::SendToLog)
+
+#define LOG_IF_EVERY_N(severity, condition, n) \
+  SOME_KIND_OF_LOG_IF_EVERY_N(severity, (condition), (n), google::LogMessage::SendToLog)
+
+// We want the special COUNTER value available for LOG_EVERY_X()'ed messages
+enum PRIVATE_Counter {COUNTER};
+
+#ifdef GLOG_NO_ABBREVIATED_SEVERITIES
+// wingdi.h defines ERROR to be 0. When we call LOG(ERROR), it gets
+// substituted with 0, and it expands to COMPACT_GOOGLE_LOG_0. To allow us
+// to keep using this syntax, we define this macro to do the same thing
+// as COMPACT_GOOGLE_LOG_ERROR.
+#define COMPACT_GOOGLE_LOG_0 COMPACT_GOOGLE_LOG_ERROR
+#define SYSLOG_0 SYSLOG_ERROR
+#define LOG_TO_STRING_0 LOG_TO_STRING_ERROR
+// Needed for LOG_IS_ON(ERROR).
+const LogSeverity GLOG_0 = GLOG_ERROR;
+#else
+// Users may include windows.h after logging.h without
+// GLOG_NO_ABBREVIATED_SEVERITIES nor WIN32_LEAN_AND_MEAN.
+// For this case, we cannot detect if ERROR is defined before users
+// actually use ERROR. Let's make an undefined symbol to warn users.
+# define GLOG_ERROR_MSG ERROR_macro_is_defined_Define_GLOG_NO_ABBREVIATED_SEVERITIES_before_including_logging_h_See_the_document_for_detail
+# define COMPACT_GOOGLE_LOG_0 GLOG_ERROR_MSG
+# define SYSLOG_0 GLOG_ERROR_MSG
+# define LOG_TO_STRING_0 GLOG_ERROR_MSG
+# define GLOG_0 GLOG_ERROR_MSG
+#endif
+
+// Plus some debug-logging macros that get compiled to nothing for production
+
+#if DCHECK_IS_ON()
+
+#define DLOG(severity) LOG(severity)
+#define DVLOG(verboselevel) VLOG(verboselevel)
+#define DLOG_IF(severity, condition) LOG_IF(severity, condition)
+#define DLOG_EVERY_N(severity, n) LOG_EVERY_N(severity, n)
+#define DLOG_IF_EVERY_N(severity, condition, n) \
+  LOG_IF_EVERY_N(severity, condition, n)
+#define DLOG_ASSERT(condition) LOG_ASSERT(condition)
+
+// debug-only checking.  executed if DCHECK_IS_ON().
+#define DCHECK(condition) CHECK(condition)
+#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2)
+#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2)
+#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2)
+#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2)
+#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2)
+#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2)
+#define DCHECK_NOTNULL(val) CHECK_NOTNULL(val)
+#define DCHECK_STREQ(str1, str2) CHECK_STREQ(str1, str2)
+#define DCHECK_STRCASEEQ(str1, str2) CHECK_STRCASEEQ(str1, str2)
+#define DCHECK_STRNE(str1, str2) CHECK_STRNE(str1, str2)
+#define DCHECK_STRCASENE(str1, str2) CHECK_STRCASENE(str1, str2)
+
+#else  // !DCHECK_IS_ON()
+
+#define DLOG(severity) \
+  true ? (void) 0 : google::LogMessageVoidify() & LOG(severity)
+
+#define DVLOG(verboselevel) \
+  (true || !VLOG_IS_ON(verboselevel)) ?\
+    (void) 0 : google::LogMessageVoidify() & LOG(INFO)
+
+#define DLOG_IF(severity, condition) \
+  (true || !(condition)) ? (void) 0 : google::LogMessageVoidify() & LOG(severity)
+
+#define DLOG_EVERY_N(severity, n) \
+  true ? (void) 0 : google::LogMessageVoidify() & LOG(severity)
+
+#define DLOG_IF_EVERY_N(severity, condition, n) \
+  (true || !(condition))? (void) 0 : google::LogMessageVoidify() & LOG(severity)
+
+#define DLOG_ASSERT(condition) \
+  true ? (void) 0 : LOG_ASSERT(condition)
+
+// MSVC warning C4127: conditional expression is constant
+#define DCHECK(condition) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK(condition)
+
+#define DCHECK_EQ(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_EQ(val1, val2)
+
+#define DCHECK_NE(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_NE(val1, val2)
+
+#define DCHECK_LE(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_LE(val1, val2)
+
+#define DCHECK_LT(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_LT(val1, val2)
+
+#define DCHECK_GE(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_GE(val1, val2)
+
+#define DCHECK_GT(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_GT(val1, val2)
+
+// You may see warnings in release mode if you don't use the return
+// value of DCHECK_NOTNULL. Please just use DCHECK for such cases.
+#define DCHECK_NOTNULL(val) (val)
+
+#define DCHECK_STREQ(str1, str2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_STREQ(str1, str2)
+
+#define DCHECK_STRCASEEQ(str1, str2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_STRCASEEQ(str1, str2)
+
+#define DCHECK_STRNE(str1, str2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_STRNE(str1, str2)
+
+#define DCHECK_STRCASENE(str1, str2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_STRCASENE(str1, str2)
+
+#endif  // DCHECK_IS_ON()
+
+// Log only in verbose mode.
+
+#define VLOG(verboselevel) LOG_IF(INFO, VLOG_IS_ON(verboselevel))
+
+#define VLOG_IF(verboselevel, condition) \
+  LOG_IF(INFO, (condition) && VLOG_IS_ON(verboselevel))
+
+#define VLOG_EVERY_N(verboselevel, n) \
+  LOG_IF_EVERY_N(INFO, VLOG_IS_ON(verboselevel), n)
+
+#define VLOG_IF_EVERY_N(verboselevel, condition, n) \
+  LOG_IF_EVERY_N(INFO, (condition) && VLOG_IS_ON(verboselevel), n)
+
+namespace base_logging {
+
+// LogMessage::LogStream is a std::ostream backed by this streambuf.
+// This class ignores overflow and leaves two bytes at the end of the
+// buffer to allow for a '\n' and '\0'.
+class GOOGLE_GLOG_DLL_DECL LogStreamBuf : public std::streambuf {
+ public:
+  // REQUIREMENTS: "len" must be >= 2 to account for the '\n' and '\n'.
+  LogStreamBuf(char *buf, int len) {
+    setp(buf, buf + len - 2);
+  }
+
+  // This effectively ignores overflow.
+  virtual int_type overflow(int_type ch) {
+    return ch;
+  }
+
+  // Legacy public ostrstream method.
+  size_t pcount() const { return pptr() - pbase(); }
+  char* pbase() const { return std::streambuf::pbase(); }
+};
+
+}  // namespace base_logging
+
+//
+// This class more or less represents a particular log message.  You
+// create an instance of LogMessage and then stream stuff to it.
+// When you finish streaming to it, ~LogMessage is called and the
+// full message gets streamed to the appropriate destination.
+//
+// You shouldn't actually use LogMessage's constructor to log things,
+// though.  You should use the LOG() macro (and variants thereof)
+// above.
+class GOOGLE_GLOG_DLL_DECL LogMessage {
+public:
+  enum {
+    // Passing kNoLogPrefix for the line number disables the
+    // log-message prefix. Useful for using the LogMessage
+    // infrastructure as a printing utility. See also the --log_prefix
+    // flag for controlling the log-message prefix on an
+    // application-wide basis.
+    kNoLogPrefix = -1
+  };
+
+  // LogStream inherit from non-DLL-exported class (std::ostrstream)
+  // and VC++ produces a warning for this situation.
+  // However, MSDN says "C4275 can be ignored in Microsoft Visual C++
+  // 2005 if you are deriving from a type in the Standard C++ Library"
+  // http://msdn.microsoft.com/en-us/library/3tdb471s(VS.80).aspx
+  // Let's just ignore the warning.
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 4275)
+#endif
+  class GOOGLE_GLOG_DLL_DECL LogStream : public std::ostream {
+#ifdef _MSC_VER
+# pragma warning(pop)
+#endif
+  public:
+    LogStream(char *buf, int len, int ctr)
+        : std::ostream(NULL),
+          streambuf_(buf, len),
+          ctr_(ctr),
+          self_(this) {
+      rdbuf(&streambuf_);
+    }
+
+    int ctr() const { return ctr_; }
+    void set_ctr(int ctr) { ctr_ = ctr; }
+    LogStream* self() const { return self_; }
+
+    // Legacy std::streambuf methods.
+    size_t pcount() const { return streambuf_.pcount(); }
+    char* pbase() const { return streambuf_.pbase(); }
+    char* str() const { return pbase(); }
+
+  private:
+    LogStream(const LogStream&);
+    LogStream& operator=(const LogStream&);
+    base_logging::LogStreamBuf streambuf_;
+    int ctr_;  // Counter hack (for the LOG_EVERY_X() macro)
+    LogStream *self_;  // Consistency check hack
+  };
+
+public:
+  // icc 8 requires this typedef to avoid an internal compiler error.
+  typedef void (LogMessage::*SendMethod)();
+
+  LogMessage(const char* file, int line, LogSeverity severity, int ctr,
+             SendMethod send_method);
+
+  // Two special constructors that generate reduced amounts of code at
+  // LOG call sites for common cases.
+
+  // Used for LOG(INFO): Implied are:
+  // severity = INFO, ctr = 0, send_method = &LogMessage::SendToLog.
+  //
+  // Using this constructor instead of the more complex constructor above
+  // saves 19 bytes per call site.
+  LogMessage(const char* file, int line);
+
+  // Used for LOG(severity) where severity != INFO.  Implied
+  // are: ctr = 0, send_method = &LogMessage::SendToLog
+  //
+  // Using this constructor instead of the more complex constructor above
+  // saves 17 bytes per call site.
+  LogMessage(const char* file, int line, LogSeverity severity);
+
+  // Constructor to log this message to a specified sink (if not NULL).
+  // Implied are: ctr = 0, send_method = &LogMessage::SendToSinkAndLog if
+  // also_send_to_log is true, send_method = &LogMessage::SendToSink otherwise.
+  LogMessage(const char* file, int line, LogSeverity severity, LogSink* sink,
+             bool also_send_to_log);
+
+  // Constructor where we also give a vector<string> pointer
+  // for storing the messages (if the pointer is not NULL).
+  // Implied are: ctr = 0, send_method = &LogMessage::SaveOrSendToLog.
+  LogMessage(const char* file, int line, LogSeverity severity,
+             std::vector<std::string>* outvec);
+
+  // Constructor where we also give a string pointer for storing the
+  // message (if the pointer is not NULL).  Implied are: ctr = 0,
+  // send_method = &LogMessage::WriteToStringAndLog.
+  LogMessage(const char* file, int line, LogSeverity severity,
+             std::string* message);
+
+  // A special constructor used for check failures
+  LogMessage(const char* file, int line, const CheckOpString& result);
+
+  ~LogMessage();
+
+  // Flush a buffered message to the sink set in the constructor.  Always
+  // called by the destructor, it may also be called from elsewhere if
+  // needed.  Only the first call is actioned; any later ones are ignored.
+  void Flush();
+
+  // An arbitrary limit on the length of a single log message.  This
+  // is so that streaming can be done more efficiently.
+  static const size_t kMaxLogMessageLen;
+
+  // Theses should not be called directly outside of logging.*,
+  // only passed as SendMethod arguments to other LogMessage methods:
+  void SendToLog();  // Actually dispatch to the logs
+  void SendToSyslogAndLog();  // Actually dispatch to syslog and the logs
+
+  // Call abort() or similar to perform LOG(FATAL) crash.
+  static void __attribute__ ((noreturn)) Fail();
+
+  std::ostream& stream();
+
+  int preserved_errno() const;
+
+  // Must be called without the log_mutex held.  (L < log_mutex)
+  static int64 num_messages(int severity);
+
+  struct LogMessageData;
+
+private:
+  // Fully internal SendMethod cases:
+  void SendToSinkAndLog();  // Send to sink if provided and dispatch to the logs
+  void SendToSink();  // Send to sink if provided, do nothing otherwise.
+
+  // Write to string if provided and dispatch to the logs.
+  void WriteToStringAndLog();
+
+  void SaveOrSendToLog();  // Save to stringvec if provided, else to logs
+
+  void Init(const char* file, int line, LogSeverity severity,
+            void (LogMessage::*send_method)());
+
+  // Used to fill in crash information during LOG(FATAL) failures.
+  void RecordCrashReason(glog_internal_namespace_::CrashReason* reason);
+
+  // Counts of messages sent at each priority:
+  static int64 num_messages_[NUM_SEVERITIES];  // under log_mutex
+
+  // We keep the data in a separate struct so that each instance of
+  // LogMessage uses less stack space.
+  LogMessageData* allocated_;
+  LogMessageData* data_;
+
+  friend class LogDestination;
+
+  LogMessage(const LogMessage&);
+  void operator=(const LogMessage&);
+};
+
+// This class happens to be thread-hostile because all instances share
+// a single data buffer, but since it can only be created just before
+// the process dies, we don't worry so much.
+class GOOGLE_GLOG_DLL_DECL LogMessageFatal : public LogMessage {
+ public:
+  LogMessageFatal(const char* file, int line);
+  LogMessageFatal(const char* file, int line, const CheckOpString& result);
+  __attribute__ ((noreturn)) ~LogMessageFatal();
+};
+
+// A non-macro interface to the log facility; (useful
+// when the logging level is not a compile-time constant).
+inline void LogAtLevel(int const severity, std::string const &msg) {
+  LogMessage(__FILE__, __LINE__, severity).stream() << msg;
+}
+
+// A macro alternative of LogAtLevel. New code may want to use this
+// version since there are two advantages: 1. this version outputs the
+// file name and the line number where this macro is put like other
+// LOG macros, 2. this macro can be used as C++ stream.
+#define LOG_AT_LEVEL(severity) google::LogMessage(__FILE__, __LINE__, severity).stream()
+
+
+// Check if it's compiled in C++11 mode.
+//
+// GXX_EXPERIMENTAL_CXX0X is defined by gcc and clang up to at least
+// gcc-4.7 and clang-3.1 (2011-12-13).  __cplusplus was defined to 1
+// in gcc before 4.7 (Crosstool 16) and clang before 3.1, but is
+// defined according to the language version in effect thereafter.
+// Microsoft Visual Studio 14 (2015) sets __cplusplus==199711 despite
+// reasonably good C++11 support, so we set LANG_CXX for it and
+// newer versions (_MSC_VER >= 1900).
+#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \
+     (defined(_MSC_VER) && _MSC_VER >= 1900))
+// Helper for CHECK_NOTNULL().
+//
+// In C++11, all cases can be handled by a single function. Since the value
+// category of the argument is preserved (also for rvalue references),
+// member initializer lists like the one below will compile correctly:
+//
+//   Foo()
+//     : x_(CHECK_NOTNULL(MethodReturningUniquePtr())) {}
+template <typename T>
+T CheckNotNull(const char* file, int line, const char* names, T&& t) {
+ if (t == nullptr) {
+   LogMessageFatal(file, line, new std::string(names));
+ }
+ return std::forward<T>(t);
+}
+
+#else
+
+// A small helper for CHECK_NOTNULL().
+template <typename T>
+T* CheckNotNull(const char *file, int line, const char *names, T* t) {
+  if (t == NULL) {
+    LogMessageFatal(file, line, new std::string(names));
+  }
+  return t;
+}
+#endif
+
+// Allow folks to put a counter in the LOG_EVERY_X()'ed messages. This
+// only works if ostream is a LogStream. If the ostream is not a
+// LogStream you'll get an assert saying as much at runtime.
+GOOGLE_GLOG_DLL_DECL std::ostream& operator<<(std::ostream &os,
+                                              const PRIVATE_Counter&);
+
+
+// Derived class for PLOG*() above.
+class GOOGLE_GLOG_DLL_DECL ErrnoLogMessage : public LogMessage {
+ public:
+
+  ErrnoLogMessage(const char* file, int line, LogSeverity severity, int ctr,
+                  void (LogMessage::*send_method)());
+
+  // Postpends ": strerror(errno) [errno]".
+  ~ErrnoLogMessage();
+
+ private:
+  ErrnoLogMessage(const ErrnoLogMessage&);
+  void operator=(const ErrnoLogMessage&);
+};
+
+
+// This class is used to explicitly ignore values in the conditional
+// logging macros.  This avoids compiler warnings like "value computed
+// is not used" and "statement has no effect".
+
+class GOOGLE_GLOG_DLL_DECL LogMessageVoidify {
+ public:
+  LogMessageVoidify() { }
+  // This has to be an operator with a precedence lower than << but
+  // higher than ?:
+  void operator&(std::ostream&) { }
+};
+
+
+// Flushes all log files that contains messages that are at least of
+// the specified severity level.  Thread-safe.
+GOOGLE_GLOG_DLL_DECL void FlushLogFiles(LogSeverity min_severity);
+
+// Flushes all log files that contains messages that are at least of
+// the specified severity level. Thread-hostile because it ignores
+// locking -- used for catastrophic failures.
+GOOGLE_GLOG_DLL_DECL void FlushLogFilesUnsafe(LogSeverity min_severity);
+
+//
+// Set the destination to which a particular severity level of log
+// messages is sent.  If base_filename is "", it means "don't log this
+// severity".  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void SetLogDestination(LogSeverity severity,
+                                            const char* base_filename);
+
+//
+// Set the basename of the symlink to the latest log file at a given
+// severity.  If symlink_basename is empty, do not make a symlink.  If
+// you don't call this function, the symlink basename is the
+// invocation name of the program.  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void SetLogSymlink(LogSeverity severity,
+                                        const char* symlink_basename);
+
+//
+// Used to send logs to some other kind of destination
+// Users should subclass LogSink and override send to do whatever they want.
+// Implementations must be thread-safe because a shared instance will
+// be called from whichever thread ran the LOG(XXX) line.
+class GOOGLE_GLOG_DLL_DECL LogSink {
+ public:
+  virtual ~LogSink();
+
+  // Sink's logging logic (message_len is such as to exclude '\n' at the end).
+  // This method can't use LOG() or CHECK() as logging system mutex(s) are held
+  // during this call.
+  virtual void send(LogSeverity severity, const char* full_filename,
+                    const char* base_filename, int line,
+                    const struct ::tm* tm_time,
+                    const char* message, size_t message_len) = 0;
+
+  // Redefine this to implement waiting for
+  // the sink's logging logic to complete.
+  // It will be called after each send() returns,
+  // but before that LogMessage exits or crashes.
+  // By default this function does nothing.
+  // Using this function one can implement complex logic for send()
+  // that itself involves logging; and do all this w/o causing deadlocks and
+  // inconsistent rearrangement of log messages.
+  // E.g. if a LogSink has thread-specific actions, the send() method
+  // can simply add the message to a queue and wake up another thread that
+  // handles real logging while itself making some LOG() calls;
+  // WaitTillSent() can be implemented to wait for that logic to complete.
+  // See our unittest for an example.
+  virtual void WaitTillSent();
+
+  // Returns the normal text output of the log message.
+  // Can be useful to implement send().
+  static std::string ToString(LogSeverity severity, const char* file, int line,
+                              const struct ::tm* tm_time,
+                              const char* message, size_t message_len);
+};
+
+// Add or remove a LogSink as a consumer of logging data.  Thread-safe.
+GOOGLE_GLOG_DLL_DECL void AddLogSink(LogSink *destination);
+GOOGLE_GLOG_DLL_DECL void RemoveLogSink(LogSink *destination);
+
+//
+// Specify an "extension" added to the filename specified via
+// SetLogDestination.  This applies to all severity levels.  It's
+// often used to append the port we're listening on to the logfile
+// name.  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void SetLogFilenameExtension(
+    const char* filename_extension);
+
+//
+// Make it so that all log messages of at least a particular severity
+// are logged to stderr (in addition to logging to the usual log
+// file(s)).  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void SetStderrLogging(LogSeverity min_severity);
+
+//
+// Make it so that all log messages go only to stderr.  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void LogToStderr();
+
+//
+// Make it so that all log messages of at least a particular severity are
+// logged via email to a list of addresses (in addition to logging to the
+// usual log file(s)).  The list of addresses is just a string containing
+// the email addresses to send to (separated by spaces, say).  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void SetEmailLogging(LogSeverity min_severity,
+                                          const char* addresses);
+
+// A simple function that sends email. dest is a commma-separated
+// list of addressess.  Thread-safe.
+GOOGLE_GLOG_DLL_DECL bool SendEmail(const char *dest,
+                                    const char *subject, const char *body);
+
+GOOGLE_GLOG_DLL_DECL const std::vector<std::string>& GetLoggingDirectories();
+
+// For tests only:  Clear the internal [cached] list of logging directories to
+// force a refresh the next time GetLoggingDirectories is called.
+// Thread-hostile.
+void TestOnly_ClearLoggingDirectoriesList();
+
+// Returns a set of existing temporary directories, which will be a
+// subset of the directories returned by GetLogginDirectories().
+// Thread-safe.
+GOOGLE_GLOG_DLL_DECL void GetExistingTempDirectories(
+    std::vector<std::string>* list);
+
+// Print any fatal message again -- useful to call from signal handler
+// so that the last thing in the output is the fatal message.
+// Thread-hostile, but a race is unlikely.
+GOOGLE_GLOG_DLL_DECL void ReprintFatalMessage();
+
+// Truncate a log file that may be the append-only output of multiple
+// processes and hence can't simply be renamed/reopened (typically a
+// stdout/stderr).  If the file "path" is > "limit" bytes, copy the
+// last "keep" bytes to offset 0 and truncate the rest. Since we could
+// be racing with other writers, this approach has the potential to
+// lose very small amounts of data. For security, only follow symlinks
+// if the path is /proc/self/fd/*
+GOOGLE_GLOG_DLL_DECL void TruncateLogFile(const char *path,
+                                          int64 limit, int64 keep);
+
+// Truncate stdout and stderr if they are over the value specified by
+// --max_log_size; keep the final 1MB.  This function has the same
+// race condition as TruncateLogFile.
+GOOGLE_GLOG_DLL_DECL void TruncateStdoutStderr();
+
+// Return the string representation of the provided LogSeverity level.
+// Thread-safe.
+GOOGLE_GLOG_DLL_DECL const char* GetLogSeverityName(LogSeverity severity);
+
+// ---------------------------------------------------------------------
+// Implementation details that are not useful to most clients
+// ---------------------------------------------------------------------
+
+// A Logger is the interface used by logging modules to emit entries
+// to a log.  A typical implementation will dump formatted data to a
+// sequence of files.  We also provide interfaces that will forward
+// the data to another thread so that the invoker never blocks.
+// Implementations should be thread-safe since the logging system
+// will write to them from multiple threads.
+
+namespace base {
+
+class GOOGLE_GLOG_DLL_DECL Logger {
+ public:
+  virtual ~Logger();
+
+  // Writes "message[0,message_len-1]" corresponding to an event that
+  // occurred at "timestamp".  If "force_flush" is true, the log file
+  // is flushed immediately.
+  //
+  // The input message has already been formatted as deemed
+  // appropriate by the higher level logging facility.  For example,
+  // textual log messages already contain timestamps, and the
+  // file:linenumber header.
+  virtual void Write(bool force_flush,
+                     time_t timestamp,
+                     const char* message,
+                     int message_len) = 0;
+
+  // Flush any buffered messages
+  virtual void Flush() = 0;
+
+  // Get the current LOG file size.
+  // The returned value is approximate since some
+  // logged data may not have been flushed to disk yet.
+  virtual uint32 LogSize() = 0;
+};
+
+// Get the logger for the specified severity level.  The logger
+// remains the property of the logging module and should not be
+// deleted by the caller.  Thread-safe.
+extern GOOGLE_GLOG_DLL_DECL Logger* GetLogger(LogSeverity level);
+
+// Set the logger for the specified severity level.  The logger
+// becomes the property of the logging module and should not
+// be deleted by the caller.  Thread-safe.
+extern GOOGLE_GLOG_DLL_DECL void SetLogger(LogSeverity level, Logger* logger);
+
+}
+
+// glibc has traditionally implemented two incompatible versions of
+// strerror_r(). There is a poorly defined convention for picking the
+// version that we want, but it is not clear whether it even works with
+// all versions of glibc.
+// So, instead, we provide this wrapper that automatically detects the
+// version that is in use, and then implements POSIX semantics.
+// N.B. In addition to what POSIX says, we also guarantee that "buf" will
+// be set to an empty string, if this function failed. This means, in most
+// cases, you do not need to check the error code and you can directly
+// use the value of "buf". It will never have an undefined value.
+// DEPRECATED: Use StrError(int) instead.
+GOOGLE_GLOG_DLL_DECL int posix_strerror_r(int err, char *buf, size_t len);
+
+// A thread-safe replacement for strerror(). Returns a string describing the
+// given POSIX error code.
+GOOGLE_GLOG_DLL_DECL std::string StrError(int err);
+
+// A class for which we define operator<<, which does nothing.
+class GOOGLE_GLOG_DLL_DECL NullStream : public LogMessage::LogStream {
+ public:
+  // Initialize the LogStream so the messages can be written somewhere
+  // (they'll never be actually displayed). This will be needed if a
+  // NullStream& is implicitly converted to LogStream&, in which case
+  // the overloaded NullStream::operator<< will not be invoked.
+  NullStream() : LogMessage::LogStream(message_buffer_, 1, 0) { }
+  NullStream(const char* /*file*/, int /*line*/,
+             const CheckOpString& /*result*/) :
+      LogMessage::LogStream(message_buffer_, 1, 0) { }
+  NullStream &stream() { return *this; }
+ private:
+  // A very short buffer for messages (which we discard anyway). This
+  // will be needed if NullStream& converted to LogStream& (e.g. as a
+  // result of a conditional expression).
+  char message_buffer_[2];
+};
+
+// Do nothing. This operator is inline, allowing the message to be
+// compiled away. The message will not be compiled away if we do
+// something like (flag ? LOG(INFO) : LOG(ERROR)) << message; when
+// SKIP_LOG=WARNING. In those cases, NullStream will be implicitly
+// converted to LogStream and the message will be computed and then
+// quietly discarded.
+template<class T>
+inline NullStream& operator<<(NullStream &str, const T &) { return str; }
+
+// Similar to NullStream, but aborts the program (without stack
+// trace), like LogMessageFatal.
+class GOOGLE_GLOG_DLL_DECL NullStreamFatal : public NullStream {
+ public:
+  NullStreamFatal() { }
+  NullStreamFatal(const char* file, int line, const CheckOpString& result) :
+      NullStream(file, line, result) { }
+  __attribute__ ((noreturn)) ~NullStreamFatal() throw () { _exit(1); }
+};
+
+// Install a signal handler that will dump signal information and a stack
+// trace when the program crashes on certain signals.  We'll install the
+// signal handler for the following signals.
+//
+// SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGBUS, and SIGTERM.
+//
+// By default, the signal handler will write the failure dump to the
+// standard error.  You can customize the destination by installing your
+// own writer function by InstallFailureWriter() below.
+//
+// Note on threading:
+//
+// The function should be called before threads are created, if you want
+// to use the failure signal handler for all threads.  The stack trace
+// will be shown only for the thread that receives the signal.  In other
+// words, stack traces of other threads won't be shown.
+GOOGLE_GLOG_DLL_DECL void InstallFailureSignalHandler();
+
+// Installs a function that is used for writing the failure dump.  "data"
+// is the pointer to the beginning of a message to be written, and "size"
+// is the size of the message.  You should not expect the data is
+// terminated with '\0'.
+GOOGLE_GLOG_DLL_DECL void InstallFailureWriter(
+    void (*writer)(const char* data, int size));
+
+}
+
+#endif // _LOGGING_H_
diff --git a/third_party/config/glog/linux/glog/raw_logging.h b/third_party/config/glog/linux/glog/raw_logging.h
new file mode 100644
index 0000000..65278f6
--- /dev/null
+++ b/third_party/config/glog/linux/glog/raw_logging.h
@@ -0,0 +1,185 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Maxim Lifantsev
+//
+// Thread-safe logging routines that do not allocate any memory or
+// acquire any locks, and can therefore be used by low-level memory
+// allocation and synchronization code.
+
+#ifndef BASE_RAW_LOGGING_H_
+#define BASE_RAW_LOGGING_H_
+
+#include <time.h>
+
+namespace google {
+
+#include "glog/log_severity.h"
+#include "glog/vlog_is_on.h"
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef GOOGLE_GLOG_DLL_DECL
+# if defined(_WIN32) && !defined(__CYGWIN__)
+#   define GOOGLE_GLOG_DLL_DECL  __declspec(dllimport)
+# else
+#   define GOOGLE_GLOG_DLL_DECL
+# endif
+#endif
+
+// This is similar to LOG(severity) << format... and VLOG(level) << format..,
+// but
+// * it is to be used ONLY by low-level modules that can't use normal LOG()
+// * it is desiged to be a low-level logger that does not allocate any
+//   memory and does not need any locks, hence:
+// * it logs straight and ONLY to STDERR w/o buffering
+// * it uses an explicit format and arguments list
+// * it will silently chop off really long message strings
+// Usage example:
+//   RAW_LOG(ERROR, "Failed foo with %i: %s", status, error);
+//   RAW_VLOG(3, "status is %i", status);
+// These will print an almost standard log lines like this to stderr only:
+//   E0821 211317 file.cc:123] RAW: Failed foo with 22: bad_file
+//   I0821 211317 file.cc:142] RAW: status is 20
+#define RAW_LOG(severity, ...) \
+  do { \
+    switch (google::GLOG_ ## severity) {  \
+      case 0: \
+        RAW_LOG_INFO(__VA_ARGS__); \
+        break; \
+      case 1: \
+        RAW_LOG_WARNING(__VA_ARGS__); \
+        break; \
+      case 2: \
+        RAW_LOG_ERROR(__VA_ARGS__); \
+        break; \
+      case 3: \
+        RAW_LOG_FATAL(__VA_ARGS__); \
+        break; \
+      default: \
+        break; \
+    } \
+  } while (0)
+
+// The following STRIP_LOG testing is performed in the header file so that it's
+// possible to completely compile out the logging code and the log messages.
+#if STRIP_LOG == 0
+#define RAW_VLOG(verboselevel, ...) \
+  do { \
+    if (VLOG_IS_ON(verboselevel)) { \
+      RAW_LOG_INFO(__VA_ARGS__); \
+    } \
+  } while (0)
+#else
+#define RAW_VLOG(verboselevel, ...) RawLogStub__(0, __VA_ARGS__)
+#endif // STRIP_LOG == 0
+
+#if STRIP_LOG == 0
+#define RAW_LOG_INFO(...) google::RawLog__(google::GLOG_INFO, \
+                                   __FILE__, __LINE__, __VA_ARGS__)
+#else
+#define RAW_LOG_INFO(...) google::RawLogStub__(0, __VA_ARGS__)
+#endif // STRIP_LOG == 0
+
+#if STRIP_LOG <= 1
+#define RAW_LOG_WARNING(...) google::RawLog__(google::GLOG_WARNING,   \
+                                      __FILE__, __LINE__, __VA_ARGS__)
+#else
+#define RAW_LOG_WARNING(...) google::RawLogStub__(0, __VA_ARGS__)
+#endif // STRIP_LOG <= 1
+
+#if STRIP_LOG <= 2
+#define RAW_LOG_ERROR(...) google::RawLog__(google::GLOG_ERROR,       \
+                                    __FILE__, __LINE__, __VA_ARGS__)
+#else
+#define RAW_LOG_ERROR(...) google::RawLogStub__(0, __VA_ARGS__)
+#endif // STRIP_LOG <= 2
+
+#if STRIP_LOG <= 3
+#define RAW_LOG_FATAL(...) google::RawLog__(google::GLOG_FATAL,       \
+                                    __FILE__, __LINE__, __VA_ARGS__)
+#else
+#define RAW_LOG_FATAL(...) \
+  do { \
+    google::RawLogStub__(0, __VA_ARGS__);        \
+    exit(1); \
+  } while (0)
+#endif // STRIP_LOG <= 3
+
+// Similar to CHECK(condition) << message,
+// but for low-level modules: we use only RAW_LOG that does not allocate memory.
+// We do not want to provide args list here to encourage this usage:
+//   if (!cond)  RAW_LOG(FATAL, "foo ...", hard_to_compute_args);
+// so that the args are not computed when not needed.
+#define RAW_CHECK(condition, message)                                   \
+  do {                                                                  \
+    if (!(condition)) {                                                 \
+      RAW_LOG(FATAL, "Check %s failed: %s", #condition, message);       \
+    }                                                                   \
+  } while (0)
+
+// Debug versions of RAW_LOG and RAW_CHECK
+#ifndef NDEBUG
+
+#define RAW_DLOG(severity, ...) RAW_LOG(severity, __VA_ARGS__)
+#define RAW_DCHECK(condition, message) RAW_CHECK(condition, message)
+
+#else  // NDEBUG
+
+#define RAW_DLOG(severity, ...)                                 \
+  while (false)                                                 \
+    RAW_LOG(severity, __VA_ARGS__)
+#define RAW_DCHECK(condition, message) \
+  while (false) \
+    RAW_CHECK(condition, message)
+
+#endif  // NDEBUG
+
+// Stub log function used to work around for unused variable warnings when
+// building with STRIP_LOG > 0.
+static inline void RawLogStub__(int /* ignored */, ...) {
+}
+
+// Helper function to implement RAW_LOG and RAW_VLOG
+// Logs format... at "severity" level, reporting it
+// as called from file:line.
+// This does not allocate memory or acquire locks.
+GOOGLE_GLOG_DLL_DECL void RawLog__(LogSeverity severity,
+                                   const char* file,
+                                   int line,
+                                   const char* format, ...)
+   __attribute__((__format__ (__printf__, 4, 5)));
+
+// Hack to propagate time information into this module so that
+// this module does not have to directly call localtime_r(),
+// which could allocate memory.
+GOOGLE_GLOG_DLL_DECL void RawLog__SetLastTime(const struct tm& t, int usecs);
+
+}
+
+#endif  // BASE_RAW_LOGGING_H_
diff --git a/third_party/config/glog/linux/glog/stl_logging.h b/third_party/config/glog/linux/glog/stl_logging.h
new file mode 100644
index 0000000..40a15aa
--- /dev/null
+++ b/third_party/config/glog/linux/glog/stl_logging.h
@@ -0,0 +1,220 @@
+// Copyright (c) 2003, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Stream output operators for STL containers; to be used for logging *only*.
+// Inclusion of this file lets you do:
+//
+// list<string> x;
+// LOG(INFO) << "data: " << x;
+// vector<int> v1, v2;
+// CHECK_EQ(v1, v2);
+//
+// If you want to use this header file with hash maps or slist, you
+// need to define macros before including this file:
+//
+// - GLOG_STL_LOGGING_FOR_UNORDERED     - <unordered_map> and <unordered_set>
+// - GLOG_STL_LOGGING_FOR_TR1_UNORDERED - <tr1/unordered_(map|set)>
+// - GLOG_STL_LOGGING_FOR_EXT_HASH      - <ext/hash_(map|set)>
+// - GLOG_STL_LOGGING_FOR_EXT_SLIST     - <ext/slist>
+//
+
+#ifndef UTIL_GTL_STL_LOGGING_INL_H_
+#define UTIL_GTL_STL_LOGGING_INL_H_
+
+#if !1
+# error We do not support stl_logging for this compiler
+#endif
+
+#include <deque>
+#include <list>
+#include <map>
+#include <ostream>
+#include <set>
+#include <utility>
+#include <vector>
+
+#ifdef GLOG_STL_LOGGING_FOR_UNORDERED
+# include <unordered_map>
+# include <unordered_set>
+#endif
+
+#ifdef GLOG_STL_LOGGING_FOR_TR1_UNORDERED
+# include <tr1/unordered_map>
+# include <tr1/unordered_set>
+#endif
+
+#ifdef GLOG_STL_LOGGING_FOR_EXT_HASH
+# include <ext/hash_set>
+# include <ext/hash_map>
+#endif
+#ifdef GLOG_STL_LOGGING_FOR_EXT_SLIST
+# include <ext/slist>
+#endif
+
+// Forward declare these two, and define them after all the container streams
+// operators so that we can recurse from pair -> container -> container -> pair
+// properly.
+template<class First, class Second>
+std::ostream& operator<<(std::ostream& out, const std::pair<First, Second>& p);
+
+namespace google {
+
+template<class Iter>
+void PrintSequence(std::ostream& out, Iter begin, Iter end);
+
+}
+
+#define OUTPUT_TWO_ARG_CONTAINER(Sequence) \
+template<class T1, class T2> \
+inline std::ostream& operator<<(std::ostream& out, \
+                                const Sequence<T1, T2>& seq) { \
+  google::PrintSequence(out, seq.begin(), seq.end()); \
+  return out; \
+}
+
+OUTPUT_TWO_ARG_CONTAINER(std::vector)
+OUTPUT_TWO_ARG_CONTAINER(std::deque)
+OUTPUT_TWO_ARG_CONTAINER(std::list)
+#ifdef GLOG_STL_LOGGING_FOR_EXT_SLIST
+OUTPUT_TWO_ARG_CONTAINER(__gnu_cxx::slist)
+#endif
+
+#undef OUTPUT_TWO_ARG_CONTAINER
+
+#define OUTPUT_THREE_ARG_CONTAINER(Sequence) \
+template<class T1, class T2, class T3> \
+inline std::ostream& operator<<(std::ostream& out, \
+                                const Sequence<T1, T2, T3>& seq) { \
+  google::PrintSequence(out, seq.begin(), seq.end()); \
+  return out; \
+}
+
+OUTPUT_THREE_ARG_CONTAINER(std::set)
+OUTPUT_THREE_ARG_CONTAINER(std::multiset)
+
+#undef OUTPUT_THREE_ARG_CONTAINER
+
+#define OUTPUT_FOUR_ARG_CONTAINER(Sequence) \
+template<class T1, class T2, class T3, class T4> \
+inline std::ostream& operator<<(std::ostream& out, \
+                                const Sequence<T1, T2, T3, T4>& seq) { \
+  google::PrintSequence(out, seq.begin(), seq.end()); \
+  return out; \
+}
+
+OUTPUT_FOUR_ARG_CONTAINER(std::map)
+OUTPUT_FOUR_ARG_CONTAINER(std::multimap)
+#ifdef GLOG_STL_LOGGING_FOR_UNORDERED
+OUTPUT_FOUR_ARG_CONTAINER(std::unordered_set)
+OUTPUT_FOUR_ARG_CONTAINER(std::unordered_multiset)
+#endif
+#ifdef GLOG_STL_LOGGING_FOR_TR1_UNORDERED
+OUTPUT_FOUR_ARG_CONTAINER(std::tr1::unordered_set)
+OUTPUT_FOUR_ARG_CONTAINER(std::tr1::unordered_multiset)
+#endif
+#ifdef GLOG_STL_LOGGING_FOR_EXT_HASH
+OUTPUT_FOUR_ARG_CONTAINER(__gnu_cxx::hash_set)
+OUTPUT_FOUR_ARG_CONTAINER(__gnu_cxx::hash_multiset)
+#endif
+
+#undef OUTPUT_FOUR_ARG_CONTAINER
+
+#define OUTPUT_FIVE_ARG_CONTAINER(Sequence) \
+template<class T1, class T2, class T3, class T4, class T5> \
+inline std::ostream& operator<<(std::ostream& out, \
+                                const Sequence<T1, T2, T3, T4, T5>& seq) { \
+  google::PrintSequence(out, seq.begin(), seq.end()); \
+  return out; \
+}
+
+#ifdef GLOG_STL_LOGGING_FOR_UNORDERED
+OUTPUT_FIVE_ARG_CONTAINER(std::unordered_map)
+OUTPUT_FIVE_ARG_CONTAINER(std::unordered_multimap)
+#endif
+#ifdef GLOG_STL_LOGGING_FOR_TR1_UNORDERED
+OUTPUT_FIVE_ARG_CONTAINER(std::tr1::unordered_map)
+OUTPUT_FIVE_ARG_CONTAINER(std::tr1::unordered_multimap)
+#endif
+#ifdef GLOG_STL_LOGGING_FOR_EXT_HASH
+OUTPUT_FIVE_ARG_CONTAINER(__gnu_cxx::hash_map)
+OUTPUT_FIVE_ARG_CONTAINER(__gnu_cxx::hash_multimap)
+#endif
+
+#undef OUTPUT_FIVE_ARG_CONTAINER
+
+template<class First, class Second>
+inline std::ostream& operator<<(std::ostream& out,
+                                const std::pair<First, Second>& p) {
+  out << '(' << p.first << ", " << p.second << ')';
+  return out;
+}
+
+namespace google {
+
+template<class Iter>
+inline void PrintSequence(std::ostream& out, Iter begin, Iter end) {
+  // Output at most 100 elements -- appropriate if used for logging.
+  for (int i = 0; begin != end && i < 100; ++i, ++begin) {
+    if (i > 0) out << ' ';
+    out << *begin;
+  }
+  if (begin != end) {
+    out << " ...";
+  }
+}
+
+}
+
+// Note that this is technically undefined behavior! We are adding things into
+// the std namespace for a reason though -- we are providing new operations on
+// types which are themselves defined with this namespace. Without this, these
+// operator overloads cannot be found via ADL. If these definitions are not
+// found via ADL, they must be #included before they're used, which requires
+// this header to be included before apparently independent other headers.
+//
+// For example, base/logging.h defines various template functions to implement
+// CHECK_EQ(x, y) and stream x and y into the log in the event the check fails.
+// It does so via the function template MakeCheckOpValueString:
+//   template<class T>
+//   void MakeCheckOpValueString(strstream* ss, const T& v) {
+//     (*ss) << v;
+//   }
+// Because 'glog/logging.h' is included before 'glog/stl_logging.h',
+// subsequent CHECK_EQ(v1, v2) for vector<...> typed variable v1 and v2 can only
+// find these operator definitions via ADL.
+//
+// Even this solution has problems -- it may pull unintended operators into the
+// namespace as well, allowing them to also be found via ADL, and creating code
+// that only works with a particular order of includes. Long term, we need to
+// move all of the *definitions* into namespace std, bet we need to ensure no
+// one references them first. This lets us take that step. We cannot define them
+// in both because that would create ambiguous overloads when both are found.
+namespace std { using ::operator<<; }
+
+#endif  // UTIL_GTL_STL_LOGGING_INL_H_
diff --git a/third_party/config/glog/linux/glog/vlog_is_on.h b/third_party/config/glog/linux/glog/vlog_is_on.h
new file mode 100644
index 0000000..02b0b86
--- /dev/null
+++ b/third_party/config/glog/linux/glog/vlog_is_on.h
@@ -0,0 +1,129 @@
+// Copyright (c) 1999, 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Ray Sidney and many others
+//
+// Defines the VLOG_IS_ON macro that controls the variable-verbosity
+// conditional logging.
+//
+// It's used by VLOG and VLOG_IF in logging.h
+// and by RAW_VLOG in raw_logging.h to trigger the logging.
+//
+// It can also be used directly e.g. like this:
+//   if (VLOG_IS_ON(2)) {
+//     // do some logging preparation and logging
+//     // that can't be accomplished e.g. via just VLOG(2) << ...;
+//   }
+//
+// The truth value that VLOG_IS_ON(level) returns is determined by 
+// the three verbosity level flags:
+//   --v=<n>  Gives the default maximal active V-logging level;
+//            0 is the default.
+//            Normally positive values are used for V-logging levels.
+//   --vmodule=<str>  Gives the per-module maximal V-logging levels to override
+//                    the value given by --v.
+//                    E.g. "my_module=2,foo*=3" would change the logging level
+//                    for all code in source files "my_module.*" and "foo*.*"
+//                    ("-inl" suffixes are also disregarded for this matching).
+//
+// SetVLOGLevel helper function is provided to do limited dynamic control over
+// V-logging by overriding the per-module settings given via --vmodule flag.
+//
+// CAVEAT: --vmodule functionality is not available in non gcc compilers.
+//
+
+#ifndef BASE_VLOG_IS_ON_H_
+#define BASE_VLOG_IS_ON_H_
+
+#include "glog/log_severity.h"
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef GOOGLE_GLOG_DLL_DECL
+# if defined(_WIN32) && !defined(__CYGWIN__)
+#   define GOOGLE_GLOG_DLL_DECL  __declspec(dllimport)
+# else
+#   define GOOGLE_GLOG_DLL_DECL
+# endif
+#endif
+
+#if defined(__GNUC__)
+// We emit an anonymous static int* variable at every VLOG_IS_ON(n) site.
+// (Normally) the first time every VLOG_IS_ON(n) site is hit,
+// we determine what variable will dynamically control logging at this site:
+// it's either FLAGS_v or an appropriate internal variable
+// matching the current source file that represents results of
+// parsing of --vmodule flag and/or SetVLOGLevel calls.
+#define VLOG_IS_ON(verboselevel)                                \
+  __extension__  \
+  ({ static google::int32* vlocal__ = &google::kLogSiteUninitialized;           \
+     google::int32 verbose_level__ = (verboselevel);                    \
+     (*vlocal__ >= verbose_level__) &&                          \
+     ((vlocal__ != &google::kLogSiteUninitialized) ||                   \
+      (google::InitVLOG3__(&vlocal__, &FLAGS_v,                         \
+                   __FILE__, verbose_level__))); })
+#else
+// GNU extensions not available, so we do not support --vmodule.
+// Dynamic value of FLAGS_v always controls the logging level.
+#define VLOG_IS_ON(verboselevel) (FLAGS_v >= (verboselevel))
+#endif
+
+// Set VLOG(_IS_ON) level for module_pattern to log_level.
+// This lets us dynamically control what is normally set by the --vmodule flag.
+// Returns the level that previously applied to module_pattern.
+// NOTE: To change the log level for VLOG(_IS_ON) sites
+//	 that have already executed after/during InitGoogleLogging,
+//	 one needs to supply the exact --vmodule pattern that applied to them.
+//       (If no --vmodule pattern applied to them
+//       the value of FLAGS_v will continue to control them.)
+extern GOOGLE_GLOG_DLL_DECL int SetVLOGLevel(const char* module_pattern,
+                                             int log_level);
+
+// Various declarations needed for VLOG_IS_ON above: =========================
+
+// Special value used to indicate that a VLOG_IS_ON site has not been
+// initialized.  We make this a large value, so the common-case check
+// of "*vlocal__ >= verbose_level__" in VLOG_IS_ON definition
+// passes in such cases and InitVLOG3__ is then triggered.
+extern google::int32 kLogSiteUninitialized;
+
+// Helper routine which determines the logging info for a particalur VLOG site.
+//   site_flag     is the address of the site-local pointer to the controlling
+//                 verbosity level
+//   site_default  is the default to use for *site_flag
+//   fname         is the current source file name
+//   verbose_level is the argument to VLOG_IS_ON
+// We will return the return value for VLOG_IS_ON
+// and if possible set *site_flag appropriately.
+extern GOOGLE_GLOG_DLL_DECL bool InitVLOG3__(
+    google::int32** site_flag,
+    google::int32* site_default,
+    const char* fname,
+    google::int32 verbose_level);
+
+#endif  // BASE_VLOG_IS_ON_H_
diff --git a/third_party/config/glog/mac/config.h b/third_party/config/glog/mac/config.h
new file mode 100644
index 0000000..cef5f46
--- /dev/null
+++ b/third_party/config/glog/mac/config.h
@@ -0,0 +1,194 @@
+/* src/config.h.  Generated from config.h.in by configure.  */
+/* src/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* define if glog doesn't use RTTI */
+#define DISABLE_RTTI 1
+
+/* Namespace for Google classes */
+#define GOOGLE_NAMESPACE google
+
+/* Define if you have the `dladdr' function */
+#define HAVE_DLADDR 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <execinfo.h> header file. */
+#define HAVE_EXECINFO_H 1
+
+/* Define if you have the `fcntl' function */
+#define HAVE_FCNTL 1
+
+/* Define to 1 if you have the <glob.h> header file. */
+#define HAVE_GLOB_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the `pthread' library (-lpthread). */
+#define HAVE_LIBPTHREAD 1
+
+/* Define to 1 if you have the <libunwind.h> header file. */
+#define HAVE_LIBUNWIND_H 1
+
+/* define if you have google gflags library */
+/* #undef HAVE_LIB_GFLAGS */
+
+/* define if you have google gmock library */
+/* #undef HAVE_LIB_GMOCK */
+
+/* define if you have google gtest library */
+/* #undef HAVE_LIB_GTEST */
+
+/* define if you have libunwind */
+/* #undef HAVE_LIB_UNWIND */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* define if the compiler implements namespaces */
+#define HAVE_NAMESPACES 1
+
+/* Define if you have the 'pread' function */
+#define HAVE_PREAD 1
+
+/* Define if you have POSIX threads libraries and header files. */
+#define HAVE_PTHREAD 1
+
+/* Define to 1 if you have the <pwd.h> header file. */
+#define HAVE_PWD_H 1
+
+/* Define if you have the 'pwrite' function */
+#define HAVE_PWRITE 1
+
+/* define if the compiler implements pthread_rwlock_* */
+#define HAVE_RWLOCK 1
+
+/* Define if you have the 'sigaction' function */
+#define HAVE_SIGACTION 1
+
+/* Define if you have the `sigaltstack' function */
+#define HAVE_SIGALTSTACK 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <syscall.h> header file. */
+/* #undef HAVE_SYSCALL_H */
+
+/* Define to 1 if you have the <syslog.h> header file. */
+#define HAVE_SYSLOG_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/syscall.h> header file. */
+#define HAVE_SYS_SYSCALL_H 1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#define HAVE_SYS_TIME_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <sys/ucontext.h> header file. */
+#define HAVE_SYS_UCONTEXT_H 1
+
+/* Define to 1 if you have the <sys/utsname.h> header file. */
+#define HAVE_SYS_UTSNAME_H 1
+
+/* Define to 1 if you have the <ucontext.h> header file. */
+/* #undef HAVE_UCONTEXT_H */
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the <unwind.h> header file. */
+#define HAVE_UNWIND_H 1
+
+/* define if the compiler supports using expression for operator */
+#define HAVE_USING_OPERATOR 1
+
+/* define if your compiler has __attribute__ */
+#define HAVE___ATTRIBUTE__ 1
+
+/* define if your compiler has __builtin_expect */
+#define HAVE___BUILTIN_EXPECT 1
+
+/* define if your compiler has __sync_val_compare_and_swap */
+#define HAVE___SYNC_VAL_COMPARE_AND_SWAP 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+
+/* Name of package */
+#define PACKAGE "glog"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "opensource@google.com"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "glog"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "glog 0.3.5"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "glog"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "0.3.5"
+
+/* How to access the PC from a struct ucontext */
+#if defined(__LP64__)
+#define PC_FROM_UCONTEXT uc_mcontext->__ss.__rip
+#else
+#define PC_FROM_UCONTEXT uc_mcontext->__ss.__eip
+#endif
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+/* #undef PTHREAD_CREATE_JOINABLE */
+
+/* The size of `void *', as computed by sizeof. */
+#define SIZEOF_VOID_P 8
+
+/* Define to 1 if you have the ANSI C header files. */
+/* #undef STDC_HEADERS */
+
+/* the namespace where STL code like vector<> is defined */
+#define STL_NAMESPACE std
+
+/* location of source code */
+#define TEST_SRC_DIR "."
+
+/* Version number of package */
+#define VERSION "0.3.5"
+
+/* Stops putting the code inside the Google namespace */
+#define _END_GOOGLE_NAMESPACE_ }
+
+/* Puts following code inside the Google namespace */
+#define _START_GOOGLE_NAMESPACE_ namespace google {
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef GOOGLE_GLOG_DLL_DECL
+# if defined(_WIN32) && !defined(__CYGWIN__)
+#   define GOOGLE_GLOG_DLL_DECL  __declspec(dllimport)
+# else
+#   define GOOGLE_GLOG_DLL_DECL
+# endif
+#endif
diff --git a/third_party/config/glog/mac/glog/log_severity.h b/third_party/config/glog/mac/glog/log_severity.h
new file mode 100644
index 0000000..99945a4
--- /dev/null
+++ b/third_party/config/glog/mac/glog/log_severity.h
@@ -0,0 +1,92 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef BASE_LOG_SEVERITY_H__
+#define BASE_LOG_SEVERITY_H__
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef GOOGLE_GLOG_DLL_DECL
+# if defined(_WIN32) && !defined(__CYGWIN__)
+#   define GOOGLE_GLOG_DLL_DECL  __declspec(dllimport)
+# else
+#   define GOOGLE_GLOG_DLL_DECL
+# endif
+#endif
+
+// Variables of type LogSeverity are widely taken to lie in the range
+// [0, NUM_SEVERITIES-1].  Be careful to preserve this assumption if
+// you ever need to change their values or add a new severity.
+typedef int LogSeverity;
+
+const int GLOG_INFO = 0, GLOG_WARNING = 1, GLOG_ERROR = 2, GLOG_FATAL = 3,
+  NUM_SEVERITIES = 4;
+#ifndef GLOG_NO_ABBREVIATED_SEVERITIES
+# ifdef ERROR
+#  error ERROR macro is defined. Define GLOG_NO_ABBREVIATED_SEVERITIES before including logging.h. See the document for detail.
+# endif
+const int INFO = GLOG_INFO, WARNING = GLOG_WARNING,
+  ERROR = GLOG_ERROR, FATAL = GLOG_FATAL;
+#endif
+
+// DFATAL is FATAL in debug mode, ERROR in normal mode
+#ifdef NDEBUG
+#define DFATAL_LEVEL ERROR
+#else
+#define DFATAL_LEVEL FATAL
+#endif
+
+extern GOOGLE_GLOG_DLL_DECL const char* const LogSeverityNames[NUM_SEVERITIES];
+
+// NDEBUG usage helpers related to (RAW_)DCHECK:
+//
+// DEBUG_MODE is for small !NDEBUG uses like
+//   if (DEBUG_MODE) foo.CheckThatFoo();
+// instead of substantially more verbose
+//   #ifndef NDEBUG
+//     foo.CheckThatFoo();
+//   #endif
+//
+// IF_DEBUG_MODE is for small !NDEBUG uses like
+//   IF_DEBUG_MODE( string error; )
+//   DCHECK(Foo(&error)) << error;
+// instead of substantially more verbose
+//   #ifndef NDEBUG
+//     string error;
+//     DCHECK(Foo(&error)) << error;
+//   #endif
+//
+#ifdef NDEBUG
+enum { DEBUG_MODE = 0 };
+#define IF_DEBUG_MODE(x)
+#else
+enum { DEBUG_MODE = 1 };
+#define IF_DEBUG_MODE(x) x
+#endif
+
+#endif  // BASE_LOG_SEVERITY_H__
diff --git a/third_party/config/glog/mac/glog/logging.h b/third_party/config/glog/mac/glog/logging.h
new file mode 100644
index 0000000..d7c392b
--- /dev/null
+++ b/third_party/config/glog/mac/glog/logging.h
@@ -0,0 +1,1659 @@
+// Copyright (c) 1999, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Ray Sidney
+//
+// This file contains #include information about logging-related stuff.
+// Pretty much everybody needs to #include this file so that they can
+// log various happenings.
+//
+#ifndef _LOGGING_H_
+#define _LOGGING_H_
+
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <iosfwd>
+#include <ostream>
+#include <sstream>
+#include <string>
+#if 1
+# include <unistd.h>
+#endif
+#include <vector>
+
+#if defined(_MSC_VER)
+#define GLOG_MSVC_PUSH_DISABLE_WARNING(n) __pragma(warning(push)) \
+                                     __pragma(warning(disable:n))
+#define GLOG_MSVC_POP_WARNING() __pragma(warning(pop))
+#else
+#define GLOG_MSVC_PUSH_DISABLE_WARNING(n)
+#define GLOG_MSVC_POP_WARNING()
+#endif
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef GOOGLE_GLOG_DLL_DECL
+# if defined(_WIN32) && !defined(__CYGWIN__)
+#   define GOOGLE_GLOG_DLL_DECL  __declspec(dllimport)
+# else
+#   define GOOGLE_GLOG_DLL_DECL
+# endif
+#endif
+
+// We care a lot about number of bits things take up.  Unfortunately,
+// systems define their bit-specific ints in a lot of different ways.
+// We use our own way, and have a typedef to get there.
+// Note: these commands below may look like "#if 1" or "#if 0", but
+// that's because they were constructed that way at ./configure time.
+// Look at logging.h.in to see how they're calculated (based on your config).
+#if 1
+#include <stdint.h>             // the normal place uint16_t is defined
+#endif
+#if 1
+#include <sys/types.h>          // the normal place u_int16_t is defined
+#endif
+#if 1
+#include <inttypes.h>           // a third place for uint16_t or u_int16_t
+#endif
+
+#if 0
+#include <gflags/gflags.h>
+#endif
+
+namespace google {
+
+#if 1      // the C99 format
+typedef int32_t int32;
+typedef uint32_t uint32;
+typedef int64_t int64;
+typedef uint64_t uint64;
+#elif 1   // the BSD format
+typedef int32_t int32;
+typedef u_int32_t uint32;
+typedef int64_t int64;
+typedef u_int64_t uint64;
+#elif 0    // the windows (vc7) format
+typedef __int32 int32;
+typedef unsigned __int32 uint32;
+typedef __int64 int64;
+typedef unsigned __int64 uint64;
+#else
+#error Do not know how to define a 32-bit integer quantity on your system
+#endif
+
+}
+
+// The global value of GOOGLE_STRIP_LOG. All the messages logged to
+// LOG(XXX) with severity less than GOOGLE_STRIP_LOG will not be displayed.
+// If it can be determined at compile time that the message will not be
+// printed, the statement will be compiled out.
+//
+// Example: to strip out all INFO and WARNING messages, use the value
+// of 2 below. To make an exception for WARNING messages from a single
+// file, add "#define GOOGLE_STRIP_LOG 1" to that file _before_ including
+// base/logging.h
+#ifndef GOOGLE_STRIP_LOG
+#define GOOGLE_STRIP_LOG 0
+#endif
+
+// GCC can be told that a certain branch is not likely to be taken (for
+// instance, a CHECK failure), and use that information in static analysis.
+// Giving it this information can help it optimize for the common case in
+// the absence of better information (ie. -fprofile-arcs).
+//
+#ifndef GOOGLE_PREDICT_BRANCH_NOT_TAKEN
+#if 1
+#define GOOGLE_PREDICT_BRANCH_NOT_TAKEN(x) (__builtin_expect(x, 0))
+#else
+#define GOOGLE_PREDICT_BRANCH_NOT_TAKEN(x) x
+#endif
+#endif
+
+#ifndef GOOGLE_PREDICT_FALSE
+#if 1
+#define GOOGLE_PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#else
+#define GOOGLE_PREDICT_FALSE(x) x
+#endif
+#endif
+
+#ifndef GOOGLE_PREDICT_TRUE
+#if 1
+#define GOOGLE_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#else
+#define GOOGLE_PREDICT_TRUE(x) x
+#endif
+#endif
+
+
+// Make a bunch of macros for logging.  The way to log things is to stream
+// things to LOG(<a particular severity level>).  E.g.,
+//
+//   LOG(INFO) << "Found " << num_cookies << " cookies";
+//
+// You can capture log messages in a string, rather than reporting them
+// immediately:
+//
+//   vector<string> errors;
+//   LOG_STRING(ERROR, &errors) << "Couldn't parse cookie #" << cookie_num;
+//
+// This pushes back the new error onto 'errors'; if given a NULL pointer,
+// it reports the error via LOG(ERROR).
+//
+// You can also do conditional logging:
+//
+//   LOG_IF(INFO, num_cookies > 10) << "Got lots of cookies";
+//
+// You can also do occasional logging (log every n'th occurrence of an
+// event):
+//
+//   LOG_EVERY_N(INFO, 10) << "Got the " << google::COUNTER << "th cookie";
+//
+// The above will cause log messages to be output on the 1st, 11th, 21st, ...
+// times it is executed.  Note that the special google::COUNTER value is used
+// to identify which repetition is happening.
+//
+// You can also do occasional conditional logging (log every n'th
+// occurrence of an event, when condition is satisfied):
+//
+//   LOG_IF_EVERY_N(INFO, (size > 1024), 10) << "Got the " << google::COUNTER
+//                                           << "th big cookie";
+//
+// You can log messages the first N times your code executes a line. E.g.
+//
+//   LOG_FIRST_N(INFO, 20) << "Got the " << google::COUNTER << "th cookie";
+//
+// Outputs log messages for the first 20 times it is executed.
+//
+// Analogous SYSLOG, SYSLOG_IF, and SYSLOG_EVERY_N macros are available.
+// These log to syslog as well as to the normal logs.  If you use these at
+// all, you need to be aware that syslog can drastically reduce performance,
+// especially if it is configured for remote logging!  Don't use these
+// unless you fully understand this and have a concrete need to use them.
+// Even then, try to minimize your use of them.
+//
+// There are also "debug mode" logging macros like the ones above:
+//
+//   DLOG(INFO) << "Found cookies";
+//
+//   DLOG_IF(INFO, num_cookies > 10) << "Got lots of cookies";
+//
+//   DLOG_EVERY_N(INFO, 10) << "Got the " << google::COUNTER << "th cookie";
+//
+// All "debug mode" logging is compiled away to nothing for non-debug mode
+// compiles.
+//
+// We also have
+//
+//   LOG_ASSERT(assertion);
+//   DLOG_ASSERT(assertion);
+//
+// which is syntactic sugar for {,D}LOG_IF(FATAL, assert fails) << assertion;
+//
+// There are "verbose level" logging macros.  They look like
+//
+//   VLOG(1) << "I'm printed when you run the program with --v=1 or more";
+//   VLOG(2) << "I'm printed when you run the program with --v=2 or more";
+//
+// These always log at the INFO log level (when they log at all).
+// The verbose logging can also be turned on module-by-module.  For instance,
+//    --vmodule=mapreduce=2,file=1,gfs*=3 --v=0
+// will cause:
+//   a. VLOG(2) and lower messages to be printed from mapreduce.{h,cc}
+//   b. VLOG(1) and lower messages to be printed from file.{h,cc}
+//   c. VLOG(3) and lower messages to be printed from files prefixed with "gfs"
+//   d. VLOG(0) and lower messages to be printed from elsewhere
+//
+// The wildcarding functionality shown by (c) supports both '*' (match
+// 0 or more characters) and '?' (match any single character) wildcards.
+//
+// There's also VLOG_IS_ON(n) "verbose level" condition macro. To be used as
+//
+//   if (VLOG_IS_ON(2)) {
+//     // do some logging preparation and logging
+//     // that can't be accomplished with just VLOG(2) << ...;
+//   }
+//
+// There are also VLOG_IF, VLOG_EVERY_N and VLOG_IF_EVERY_N "verbose level"
+// condition macros for sample cases, when some extra computation and
+// preparation for logs is not needed.
+//   VLOG_IF(1, (size > 1024))
+//      << "I'm printed when size is more than 1024 and when you run the "
+//         "program with --v=1 or more";
+//   VLOG_EVERY_N(1, 10)
+//      << "I'm printed every 10th occurrence, and when you run the program "
+//         "with --v=1 or more. Present occurence is " << google::COUNTER;
+//   VLOG_IF_EVERY_N(1, (size > 1024), 10)
+//      << "I'm printed on every 10th occurence of case when size is more "
+//         " than 1024, when you run the program with --v=1 or more. ";
+//         "Present occurence is " << google::COUNTER;
+//
+// The supported severity levels for macros that allow you to specify one
+// are (in increasing order of severity) INFO, WARNING, ERROR, and FATAL.
+// Note that messages of a given severity are logged not only in the
+// logfile for that severity, but also in all logfiles of lower severity.
+// E.g., a message of severity FATAL will be logged to the logfiles of
+// severity FATAL, ERROR, WARNING, and INFO.
+//
+// There is also the special severity of DFATAL, which logs FATAL in
+// debug mode, ERROR in normal mode.
+//
+// Very important: logging a message at the FATAL severity level causes
+// the program to terminate (after the message is logged).
+//
+// Unless otherwise specified, logs will be written to the filename
+// "<program name>.<hostname>.<user name>.log.<severity level>.", followed
+// by the date, time, and pid (you can't prevent the date, time, and pid
+// from being in the filename).
+//
+// The logging code takes two flags:
+//     --v=#           set the verbose level
+//     --logtostderr   log all the messages to stderr instead of to logfiles
+
+// LOG LINE PREFIX FORMAT
+//
+// Log lines have this form:
+//
+//     Lmmdd hh:mm:ss.uuuuuu threadid file:line] msg...
+//
+// where the fields are defined as follows:
+//
+//   L                A single character, representing the log level
+//                    (eg 'I' for INFO)
+//   mm               The month (zero padded; ie May is '05')
+//   dd               The day (zero padded)
+//   hh:mm:ss.uuuuuu  Time in hours, minutes and fractional seconds
+//   threadid         The space-padded thread ID as returned by GetTID()
+//                    (this matches the PID on Linux)
+//   file             The file name
+//   line             The line number
+//   msg              The user-supplied message
+//
+// Example:
+//
+//   I1103 11:57:31.739339 24395 google.cc:2341] Command line: ./some_prog
+//   I1103 11:57:31.739403 24395 google.cc:2342] Process id 24395
+//
+// NOTE: although the microseconds are useful for comparing events on
+// a single machine, clocks on different machines may not be well
+// synchronized.  Hence, use caution when comparing the low bits of
+// timestamps from different machines.
+
+#ifndef DECLARE_VARIABLE
+#define MUST_UNDEF_GFLAGS_DECLARE_MACROS
+#define DECLARE_VARIABLE(type, shorttype, name, tn)                     \
+  namespace fL##shorttype {                                             \
+    extern GOOGLE_GLOG_DLL_DECL type FLAGS_##name;                      \
+  }                                                                     \
+  using fL##shorttype::FLAGS_##name
+
+// bool specialization
+#define DECLARE_bool(name) \
+  DECLARE_VARIABLE(bool, B, name, bool)
+
+// int32 specialization
+#define DECLARE_int32(name) \
+  DECLARE_VARIABLE(google::int32, I, name, int32)
+
+// Special case for string, because we have to specify the namespace
+// std::string, which doesn't play nicely with our FLAG__namespace hackery.
+#define DECLARE_string(name)                                            \
+  namespace fLS {                                                       \
+    extern GOOGLE_GLOG_DLL_DECL std::string& FLAGS_##name;              \
+  }                                                                     \
+  using fLS::FLAGS_##name
+#endif
+
+// Set whether log messages go to stderr instead of logfiles
+DECLARE_bool(logtostderr);
+
+// Set whether log messages go to stderr in addition to logfiles.
+DECLARE_bool(alsologtostderr);
+
+// Set color messages logged to stderr (if supported by terminal).
+DECLARE_bool(colorlogtostderr);
+
+// Log messages at a level >= this flag are automatically sent to
+// stderr in addition to log files.
+DECLARE_int32(stderrthreshold);
+
+// Set whether the log prefix should be prepended to each line of output.
+DECLARE_bool(log_prefix);
+
+// Log messages at a level <= this flag are buffered.
+// Log messages at a higher level are flushed immediately.
+DECLARE_int32(logbuflevel);
+
+// Sets the maximum number of seconds which logs may be buffered for.
+DECLARE_int32(logbufsecs);
+
+// Log suppression level: messages logged at a lower level than this
+// are suppressed.
+DECLARE_int32(minloglevel);
+
+// If specified, logfiles are written into this directory instead of the
+// default logging directory.
+DECLARE_string(log_dir);
+
+// Set the log file mode.
+DECLARE_int32(logfile_mode);
+
+// Sets the path of the directory into which to put additional links
+// to the log files.
+DECLARE_string(log_link);
+
+DECLARE_int32(v);  // in vlog_is_on.cc
+
+// Sets the maximum log file size (in MB).
+DECLARE_int32(max_log_size);
+
+// Sets whether to avoid logging to the disk if the disk is full.
+DECLARE_bool(stop_logging_if_full_disk);
+
+#ifdef MUST_UNDEF_GFLAGS_DECLARE_MACROS
+#undef MUST_UNDEF_GFLAGS_DECLARE_MACROS
+#undef DECLARE_VARIABLE
+#undef DECLARE_bool
+#undef DECLARE_int32
+#undef DECLARE_string
+#endif
+
+// Log messages below the GOOGLE_STRIP_LOG level will be compiled away for
+// security reasons. See LOG(severtiy) below.
+
+// A few definitions of macros that don't generate much code.  Since
+// LOG(INFO) and its ilk are used all over our code, it's
+// better to have compact code for these operations.
+
+#if GOOGLE_STRIP_LOG == 0
+#define COMPACT_GOOGLE_LOG_INFO google::LogMessage( \
+      __FILE__, __LINE__)
+#define LOG_TO_STRING_INFO(message) google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_INFO, message)
+#else
+#define COMPACT_GOOGLE_LOG_INFO google::NullStream()
+#define LOG_TO_STRING_INFO(message) google::NullStream()
+#endif
+
+#if GOOGLE_STRIP_LOG <= 1
+#define COMPACT_GOOGLE_LOG_WARNING google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_WARNING)
+#define LOG_TO_STRING_WARNING(message) google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_WARNING, message)
+#else
+#define COMPACT_GOOGLE_LOG_WARNING google::NullStream()
+#define LOG_TO_STRING_WARNING(message) google::NullStream()
+#endif
+
+#if GOOGLE_STRIP_LOG <= 2
+#define COMPACT_GOOGLE_LOG_ERROR google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_ERROR)
+#define LOG_TO_STRING_ERROR(message) google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_ERROR, message)
+#else
+#define COMPACT_GOOGLE_LOG_ERROR google::NullStream()
+#define LOG_TO_STRING_ERROR(message) google::NullStream()
+#endif
+
+#if GOOGLE_STRIP_LOG <= 3
+#define COMPACT_GOOGLE_LOG_FATAL google::LogMessageFatal( \
+      __FILE__, __LINE__)
+#define LOG_TO_STRING_FATAL(message) google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_FATAL, message)
+#else
+#define COMPACT_GOOGLE_LOG_FATAL google::NullStreamFatal()
+#define LOG_TO_STRING_FATAL(message) google::NullStreamFatal()
+#endif
+
+#if defined(NDEBUG) && !defined(DCHECK_ALWAYS_ON)
+#define DCHECK_IS_ON() 0
+#else
+#define DCHECK_IS_ON() 1
+#endif
+
+// For DFATAL, we want to use LogMessage (as opposed to
+// LogMessageFatal), to be consistent with the original behavior.
+#if !DCHECK_IS_ON()
+#define COMPACT_GOOGLE_LOG_DFATAL COMPACT_GOOGLE_LOG_ERROR
+#elif GOOGLE_STRIP_LOG <= 3
+#define COMPACT_GOOGLE_LOG_DFATAL google::LogMessage( \
+      __FILE__, __LINE__, google::GLOG_FATAL)
+#else
+#define COMPACT_GOOGLE_LOG_DFATAL google::NullStreamFatal()
+#endif
+
+#define GOOGLE_LOG_INFO(counter) google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO, counter, &google::LogMessage::SendToLog)
+#define SYSLOG_INFO(counter) \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO, counter, \
+  &google::LogMessage::SendToSyslogAndLog)
+#define GOOGLE_LOG_WARNING(counter)  \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING, counter, \
+  &google::LogMessage::SendToLog)
+#define SYSLOG_WARNING(counter)  \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING, counter, \
+  &google::LogMessage::SendToSyslogAndLog)
+#define GOOGLE_LOG_ERROR(counter)  \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR, counter, \
+  &google::LogMessage::SendToLog)
+#define SYSLOG_ERROR(counter)  \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR, counter, \
+  &google::LogMessage::SendToSyslogAndLog)
+#define GOOGLE_LOG_FATAL(counter) \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_FATAL, counter, \
+  &google::LogMessage::SendToLog)
+#define SYSLOG_FATAL(counter) \
+  google::LogMessage(__FILE__, __LINE__, google::GLOG_FATAL, counter, \
+  &google::LogMessage::SendToSyslogAndLog)
+#define GOOGLE_LOG_DFATAL(counter) \
+  google::LogMessage(__FILE__, __LINE__, google::DFATAL_LEVEL, counter, \
+  &google::LogMessage::SendToLog)
+#define SYSLOG_DFATAL(counter) \
+  google::LogMessage(__FILE__, __LINE__, google::DFATAL_LEVEL, counter, \
+  &google::LogMessage::SendToSyslogAndLog)
+
+#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+// A very useful logging macro to log windows errors:
+#define LOG_SYSRESULT(result) \
+  if (FAILED(HRESULT_FROM_WIN32(result))) { \
+    LPSTR message = NULL; \
+    LPSTR msg = reinterpret_cast<LPSTR>(&message); \
+    DWORD message_length = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | \
+                         FORMAT_MESSAGE_FROM_SYSTEM, \
+                         0, result, 0, msg, 100, NULL); \
+    if (message_length > 0) { \
+      google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR, 0, \
+          &google::LogMessage::SendToLog).stream() \
+          << reinterpret_cast<const char*>(message); \
+      LocalFree(message); \
+    } \
+  }
+#endif
+
+// We use the preprocessor's merging operator, "##", so that, e.g.,
+// LOG(INFO) becomes the token GOOGLE_LOG_INFO.  There's some funny
+// subtle difference between ostream member streaming functions (e.g.,
+// ostream::operator<<(int) and ostream non-member streaming functions
+// (e.g., ::operator<<(ostream&, string&): it turns out that it's
+// impossible to stream something like a string directly to an unnamed
+// ostream. We employ a neat hack by calling the stream() member
+// function of LogMessage which seems to avoid the problem.
+#define LOG(severity) COMPACT_GOOGLE_LOG_ ## severity.stream()
+#define SYSLOG(severity) SYSLOG_ ## severity(0).stream()
+
+namespace google {
+
+// They need the definitions of integer types.
+#include "glog/log_severity.h"
+#include "glog/vlog_is_on.h"
+
+// Initialize google's logging library. You will see the program name
+// specified by argv0 in log outputs.
+GOOGLE_GLOG_DLL_DECL void InitGoogleLogging(const char* argv0);
+
+// Shutdown google's logging library.
+GOOGLE_GLOG_DLL_DECL void ShutdownGoogleLogging();
+
+// Install a function which will be called after LOG(FATAL).
+GOOGLE_GLOG_DLL_DECL void InstallFailureFunction(void (*fail_func)());
+
+class LogSink;  // defined below
+
+// If a non-NULL sink pointer is given, we push this message to that sink.
+// For LOG_TO_SINK we then do normal LOG(severity) logging as well.
+// This is useful for capturing messages and passing/storing them
+// somewhere more specific than the global log of the process.
+// Argument types:
+//   LogSink* sink;
+//   LogSeverity severity;
+// The cast is to disambiguate NULL arguments.
+#define LOG_TO_SINK(sink, severity) \
+  google::LogMessage(                                    \
+      __FILE__, __LINE__,                                               \
+      google::GLOG_ ## severity,                         \
+      static_cast<google::LogSink*>(sink), true).stream()
+#define LOG_TO_SINK_BUT_NOT_TO_LOGFILE(sink, severity)                  \
+  google::LogMessage(                                    \
+      __FILE__, __LINE__,                                               \
+      google::GLOG_ ## severity,                         \
+      static_cast<google::LogSink*>(sink), false).stream()
+
+// If a non-NULL string pointer is given, we write this message to that string.
+// We then do normal LOG(severity) logging as well.
+// This is useful for capturing messages and storing them somewhere more
+// specific than the global log of the process.
+// Argument types:
+//   string* message;
+//   LogSeverity severity;
+// The cast is to disambiguate NULL arguments.
+// NOTE: LOG(severity) expands to LogMessage().stream() for the specified
+// severity.
+#define LOG_TO_STRING(severity, message) \
+  LOG_TO_STRING_##severity(static_cast<string*>(message)).stream()
+
+// If a non-NULL pointer is given, we push the message onto the end
+// of a vector of strings; otherwise, we report it with LOG(severity).
+// This is handy for capturing messages and perhaps passing them back
+// to the caller, rather than reporting them immediately.
+// Argument types:
+//   LogSeverity severity;
+//   vector<string> *outvec;
+// The cast is to disambiguate NULL arguments.
+#define LOG_STRING(severity, outvec) \
+  LOG_TO_STRING_##severity(static_cast<std::vector<std::string>*>(outvec)).stream()
+
+#define LOG_IF(severity, condition) \
+  !(condition) ? (void) 0 : google::LogMessageVoidify() & LOG(severity)
+#define SYSLOG_IF(severity, condition) \
+  !(condition) ? (void) 0 : google::LogMessageVoidify() & SYSLOG(severity)
+
+#define LOG_ASSERT(condition)  \
+  LOG_IF(FATAL, !(condition)) << "Assert failed: " #condition
+#define SYSLOG_ASSERT(condition) \
+  SYSLOG_IF(FATAL, !(condition)) << "Assert failed: " #condition
+
+// CHECK dies with a fatal error if condition is not true.  It is *not*
+// controlled by DCHECK_IS_ON(), so the check will be executed regardless of
+// compilation mode.  Therefore, it is safe to do things like:
+//    CHECK(fp->Write(x) == 4)
+#define CHECK(condition)  \
+      LOG_IF(FATAL, GOOGLE_PREDICT_BRANCH_NOT_TAKEN(!(condition))) \
+             << "Check failed: " #condition " "
+
+// A container for a string pointer which can be evaluated to a bool -
+// true iff the pointer is NULL.
+struct CheckOpString {
+  CheckOpString(std::string* str) : str_(str) { }
+  // No destructor: if str_ is non-NULL, we're about to LOG(FATAL),
+  // so there's no point in cleaning up str_.
+  operator bool() const {
+    return GOOGLE_PREDICT_BRANCH_NOT_TAKEN(str_ != NULL);
+  }
+  std::string* str_;
+};
+
+// Function is overloaded for integral types to allow static const
+// integrals declared in classes and not defined to be used as arguments to
+// CHECK* macros. It's not encouraged though.
+template <class T>
+inline const T&       GetReferenceableValue(const T&           t) { return t; }
+inline char           GetReferenceableValue(char               t) { return t; }
+inline unsigned char  GetReferenceableValue(unsigned char      t) { return t; }
+inline signed char    GetReferenceableValue(signed char        t) { return t; }
+inline short          GetReferenceableValue(short              t) { return t; }
+inline unsigned short GetReferenceableValue(unsigned short     t) { return t; }
+inline int            GetReferenceableValue(int                t) { return t; }
+inline unsigned int   GetReferenceableValue(unsigned int       t) { return t; }
+inline long           GetReferenceableValue(long               t) { return t; }
+inline unsigned long  GetReferenceableValue(unsigned long      t) { return t; }
+inline long long      GetReferenceableValue(long long          t) { return t; }
+inline unsigned long long GetReferenceableValue(unsigned long long t) {
+  return t;
+}
+
+// This is a dummy class to define the following operator.
+struct DummyClassToDefineOperator {};
+
+}
+
+// Define global operator<< to declare using ::operator<<.
+// This declaration will allow use to use CHECK macros for user
+// defined classes which have operator<< (e.g., stl_logging.h).
+inline std::ostream& operator<<(
+    std::ostream& out, const google::DummyClassToDefineOperator&) {
+  return out;
+}
+
+namespace google {
+
+// This formats a value for a failing CHECK_XX statement.  Ordinarily,
+// it uses the definition for operator<<, with a few special cases below.
+template <typename T>
+inline void MakeCheckOpValueString(std::ostream* os, const T& v) {
+  (*os) << v;
+}
+
+// Overrides for char types provide readable values for unprintable
+// characters.
+template <> GOOGLE_GLOG_DLL_DECL
+void MakeCheckOpValueString(std::ostream* os, const char& v);
+template <> GOOGLE_GLOG_DLL_DECL
+void MakeCheckOpValueString(std::ostream* os, const signed char& v);
+template <> GOOGLE_GLOG_DLL_DECL
+void MakeCheckOpValueString(std::ostream* os, const unsigned char& v);
+
+// Build the error message string. Specify no inlining for code size.
+template <typename T1, typename T2>
+std::string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext)
+    __attribute__ ((noinline));
+
+namespace base {
+namespace internal {
+
+// If "s" is less than base_logging::INFO, returns base_logging::INFO.
+// If "s" is greater than base_logging::FATAL, returns
+// base_logging::ERROR.  Otherwise, returns "s".
+LogSeverity NormalizeSeverity(LogSeverity s);
+
+}  // namespace internal
+
+// A helper class for formatting "expr (V1 vs. V2)" in a CHECK_XX
+// statement.  See MakeCheckOpString for sample usage.  Other
+// approaches were considered: use of a template method (e.g.,
+// base::BuildCheckOpString(exprtext, base::Print<T1>, &v1,
+// base::Print<T2>, &v2), however this approach has complications
+// related to volatile arguments and function-pointer arguments).
+class GOOGLE_GLOG_DLL_DECL CheckOpMessageBuilder {
+ public:
+  // Inserts "exprtext" and " (" to the stream.
+  explicit CheckOpMessageBuilder(const char *exprtext);
+  // Deletes "stream_".
+  ~CheckOpMessageBuilder();
+  // For inserting the first variable.
+  std::ostream* ForVar1() { return stream_; }
+  // For inserting the second variable (adds an intermediate " vs. ").
+  std::ostream* ForVar2();
+  // Get the result (inserts the closing ")").
+  std::string* NewString();
+
+ private:
+  std::ostringstream *stream_;
+};
+
+}  // namespace base
+
+template <typename T1, typename T2>
+std::string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) {
+  base::CheckOpMessageBuilder comb(exprtext);
+  MakeCheckOpValueString(comb.ForVar1(), v1);
+  MakeCheckOpValueString(comb.ForVar2(), v2);
+  return comb.NewString();
+}
+
+// Helper functions for CHECK_OP macro.
+// The (int, int) specialization works around the issue that the compiler
+// will not instantiate the template version of the function on values of
+// unnamed enum type - see comment below.
+#define DEFINE_CHECK_OP_IMPL(name, op) \
+  template <typename T1, typename T2> \
+  inline std::string* name##Impl(const T1& v1, const T2& v2,    \
+                            const char* exprtext) { \
+    if (GOOGLE_PREDICT_TRUE(v1 op v2)) return NULL; \
+    else return MakeCheckOpString(v1, v2, exprtext); \
+  } \
+  inline std::string* name##Impl(int v1, int v2, const char* exprtext) { \
+    return name##Impl<int, int>(v1, v2, exprtext); \
+  }
+
+// We use the full name Check_EQ, Check_NE, etc. in case the file including
+// base/logging.h provides its own #defines for the simpler names EQ, NE, etc.
+// This happens if, for example, those are used as token names in a
+// yacc grammar.
+DEFINE_CHECK_OP_IMPL(Check_EQ, ==)  // Compilation error with CHECK_EQ(NULL, x)?
+DEFINE_CHECK_OP_IMPL(Check_NE, !=)  // Use CHECK(x == NULL) instead.
+DEFINE_CHECK_OP_IMPL(Check_LE, <=)
+DEFINE_CHECK_OP_IMPL(Check_LT, < )
+DEFINE_CHECK_OP_IMPL(Check_GE, >=)
+DEFINE_CHECK_OP_IMPL(Check_GT, > )
+#undef DEFINE_CHECK_OP_IMPL
+
+// Helper macro for binary operators.
+// Don't use this macro directly in your code, use CHECK_EQ et al below.
+
+#if defined(STATIC_ANALYSIS)
+// Only for static analysis tool to know that it is equivalent to assert
+#define CHECK_OP_LOG(name, op, val1, val2, log) CHECK((val1) op (val2))
+#elif DCHECK_IS_ON()
+// In debug mode, avoid constructing CheckOpStrings if possible,
+// to reduce the overhead of CHECK statments by 2x.
+// Real DCHECK-heavy tests have seen 1.5x speedups.
+
+// The meaning of "string" might be different between now and
+// when this macro gets invoked (e.g., if someone is experimenting
+// with other string implementations that get defined after this
+// file is included).  Save the current meaning now and use it
+// in the macro.
+typedef std::string _Check_string;
+#define CHECK_OP_LOG(name, op, val1, val2, log)                         \
+  while (google::_Check_string* _result =                \
+         google::Check##name##Impl(                      \
+             google::GetReferenceableValue(val1),        \
+             google::GetReferenceableValue(val2),        \
+             #val1 " " #op " " #val2))                                  \
+    log(__FILE__, __LINE__,                                             \
+        google::CheckOpString(_result)).stream()
+#else
+// In optimized mode, use CheckOpString to hint to compiler that
+// the while condition is unlikely.
+#define CHECK_OP_LOG(name, op, val1, val2, log)                         \
+  while (google::CheckOpString _result =                 \
+         google::Check##name##Impl(                      \
+             google::GetReferenceableValue(val1),        \
+             google::GetReferenceableValue(val2),        \
+             #val1 " " #op " " #val2))                                  \
+    log(__FILE__, __LINE__, _result).stream()
+#endif  // STATIC_ANALYSIS, DCHECK_IS_ON()
+
+#if GOOGLE_STRIP_LOG <= 3
+#define CHECK_OP(name, op, val1, val2) \
+  CHECK_OP_LOG(name, op, val1, val2, google::LogMessageFatal)
+#else
+#define CHECK_OP(name, op, val1, val2) \
+  CHECK_OP_LOG(name, op, val1, val2, google::NullStreamFatal)
+#endif // STRIP_LOG <= 3
+
+// Equality/Inequality checks - compare two values, and log a FATAL message
+// including the two values when the result is not as expected.  The values
+// must have operator<<(ostream, ...) defined.
+//
+// You may append to the error message like so:
+//   CHECK_NE(1, 2) << ": The world must be ending!";
+//
+// We are very careful to ensure that each argument is evaluated exactly
+// once, and that anything which is legal to pass as a function argument is
+// legal here.  In particular, the arguments may be temporary expressions
+// which will end up being destroyed at the end of the apparent statement,
+// for example:
+//   CHECK_EQ(string("abc")[1], 'b');
+//
+// WARNING: These don't compile correctly if one of the arguments is a pointer
+// and the other is NULL. To work around this, simply static_cast NULL to the
+// type of the desired pointer.
+
+#define CHECK_EQ(val1, val2) CHECK_OP(_EQ, ==, val1, val2)
+#define CHECK_NE(val1, val2) CHECK_OP(_NE, !=, val1, val2)
+#define CHECK_LE(val1, val2) CHECK_OP(_LE, <=, val1, val2)
+#define CHECK_LT(val1, val2) CHECK_OP(_LT, < , val1, val2)
+#define CHECK_GE(val1, val2) CHECK_OP(_GE, >=, val1, val2)
+#define CHECK_GT(val1, val2) CHECK_OP(_GT, > , val1, val2)
+
+// Check that the input is non NULL.  This very useful in constructor
+// initializer lists.
+
+#define CHECK_NOTNULL(val) \
+  google::CheckNotNull(__FILE__, __LINE__, "'" #val "' Must be non NULL", (val))
+
+// Helper functions for string comparisons.
+// To avoid bloat, the definitions are in logging.cc.
+#define DECLARE_CHECK_STROP_IMPL(func, expected) \
+  GOOGLE_GLOG_DLL_DECL std::string* Check##func##expected##Impl( \
+      const char* s1, const char* s2, const char* names);
+DECLARE_CHECK_STROP_IMPL(strcmp, true)
+DECLARE_CHECK_STROP_IMPL(strcmp, false)
+DECLARE_CHECK_STROP_IMPL(strcasecmp, true)
+DECLARE_CHECK_STROP_IMPL(strcasecmp, false)
+#undef DECLARE_CHECK_STROP_IMPL
+
+// Helper macro for string comparisons.
+// Don't use this macro directly in your code, use CHECK_STREQ et al below.
+#define CHECK_STROP(func, op, expected, s1, s2) \
+  while (google::CheckOpString _result = \
+         google::Check##func##expected##Impl((s1), (s2), \
+                                     #s1 " " #op " " #s2)) \
+    LOG(FATAL) << *_result.str_
+
+
+// String (char*) equality/inequality checks.
+// CASE versions are case-insensitive.
+//
+// Note that "s1" and "s2" may be temporary strings which are destroyed
+// by the compiler at the end of the current "full expression"
+// (e.g. CHECK_STREQ(Foo().c_str(), Bar().c_str())).
+
+#define CHECK_STREQ(s1, s2) CHECK_STROP(strcmp, ==, true, s1, s2)
+#define CHECK_STRNE(s1, s2) CHECK_STROP(strcmp, !=, false, s1, s2)
+#define CHECK_STRCASEEQ(s1, s2) CHECK_STROP(strcasecmp, ==, true, s1, s2)
+#define CHECK_STRCASENE(s1, s2) CHECK_STROP(strcasecmp, !=, false, s1, s2)
+
+#define CHECK_INDEX(I,A) CHECK(I < (sizeof(A)/sizeof(A[0])))
+#define CHECK_BOUND(B,A) CHECK(B <= (sizeof(A)/sizeof(A[0])))
+
+#define CHECK_DOUBLE_EQ(val1, val2)              \
+  do {                                           \
+    CHECK_LE((val1), (val2)+0.000000000000001L); \
+    CHECK_GE((val1), (val2)-0.000000000000001L); \
+  } while (0)
+
+#define CHECK_NEAR(val1, val2, margin)           \
+  do {                                           \
+    CHECK_LE((val1), (val2)+(margin));           \
+    CHECK_GE((val1), (val2)-(margin));           \
+  } while (0)
+
+// perror()..googly style!
+//
+// PLOG() and PLOG_IF() and PCHECK() behave exactly like their LOG* and
+// CHECK equivalents with the addition that they postpend a description
+// of the current state of errno to their output lines.
+
+#define PLOG(severity) GOOGLE_PLOG(severity, 0).stream()
+
+#define GOOGLE_PLOG(severity, counter)  \
+  google::ErrnoLogMessage( \
+      __FILE__, __LINE__, google::GLOG_ ## severity, counter, \
+      &google::LogMessage::SendToLog)
+
+#define PLOG_IF(severity, condition) \
+  !(condition) ? (void) 0 : google::LogMessageVoidify() & PLOG(severity)
+
+// A CHECK() macro that postpends errno if the condition is false. E.g.
+//
+// if (poll(fds, nfds, timeout) == -1) { PCHECK(errno == EINTR); ... }
+#define PCHECK(condition)  \
+      PLOG_IF(FATAL, GOOGLE_PREDICT_BRANCH_NOT_TAKEN(!(condition))) \
+              << "Check failed: " #condition " "
+
+// A CHECK() macro that lets you assert the success of a function that
+// returns -1 and sets errno in case of an error. E.g.
+//
+// CHECK_ERR(mkdir(path, 0700));
+//
+// or
+//
+// int fd = open(filename, flags); CHECK_ERR(fd) << ": open " << filename;
+#define CHECK_ERR(invocation)                                          \
+PLOG_IF(FATAL, GOOGLE_PREDICT_BRANCH_NOT_TAKEN((invocation) == -1))    \
+        << #invocation
+
+// Use macro expansion to create, for each use of LOG_EVERY_N(), static
+// variables with the __LINE__ expansion as part of the variable name.
+#define LOG_EVERY_N_VARNAME(base, line) LOG_EVERY_N_VARNAME_CONCAT(base, line)
+#define LOG_EVERY_N_VARNAME_CONCAT(base, line) base ## line
+
+#define LOG_OCCURRENCES LOG_EVERY_N_VARNAME(occurrences_, __LINE__)
+#define LOG_OCCURRENCES_MOD_N LOG_EVERY_N_VARNAME(occurrences_mod_n_, __LINE__)
+
+#define SOME_KIND_OF_LOG_EVERY_N(severity, n, what_to_do) \
+  static int LOG_OCCURRENCES = 0, LOG_OCCURRENCES_MOD_N = 0; \
+  ++LOG_OCCURRENCES; \
+  if (++LOG_OCCURRENCES_MOD_N > n) LOG_OCCURRENCES_MOD_N -= n; \
+  if (LOG_OCCURRENCES_MOD_N == 1) \
+    google::LogMessage( \
+        __FILE__, __LINE__, google::GLOG_ ## severity, LOG_OCCURRENCES, \
+        &what_to_do).stream()
+
+#define SOME_KIND_OF_LOG_IF_EVERY_N(severity, condition, n, what_to_do) \
+  static int LOG_OCCURRENCES = 0, LOG_OCCURRENCES_MOD_N = 0; \
+  ++LOG_OCCURRENCES; \
+  if (condition && \
+      ((LOG_OCCURRENCES_MOD_N=(LOG_OCCURRENCES_MOD_N + 1) % n) == (1 % n))) \
+    google::LogMessage( \
+        __FILE__, __LINE__, google::GLOG_ ## severity, LOG_OCCURRENCES, \
+                 &what_to_do).stream()
+
+#define SOME_KIND_OF_PLOG_EVERY_N(severity, n, what_to_do) \
+  static int LOG_OCCURRENCES = 0, LOG_OCCURRENCES_MOD_N = 0; \
+  ++LOG_OCCURRENCES; \
+  if (++LOG_OCCURRENCES_MOD_N > n) LOG_OCCURRENCES_MOD_N -= n; \
+  if (LOG_OCCURRENCES_MOD_N == 1) \
+    google::ErrnoLogMessage( \
+        __FILE__, __LINE__, google::GLOG_ ## severity, LOG_OCCURRENCES, \
+        &what_to_do).stream()
+
+#define SOME_KIND_OF_LOG_FIRST_N(severity, n, what_to_do) \
+  static int LOG_OCCURRENCES = 0; \
+  if (LOG_OCCURRENCES <= n) \
+    ++LOG_OCCURRENCES; \
+  if (LOG_OCCURRENCES <= n) \
+    google::LogMessage( \
+        __FILE__, __LINE__, google::GLOG_ ## severity, LOG_OCCURRENCES, \
+        &what_to_do).stream()
+
+namespace glog_internal_namespace_ {
+template <bool>
+struct CompileAssert {
+};
+struct CrashReason;
+
+// Returns true if FailureSignalHandler is installed.
+// Needs to be exported since it's used by the signalhandler_unittest.
+GOOGLE_GLOG_DLL_DECL bool IsFailureSignalHandlerInstalled();
+}  // namespace glog_internal_namespace_
+
+#define LOG_EVERY_N(severity, n)                                        \
+  SOME_KIND_OF_LOG_EVERY_N(severity, (n), google::LogMessage::SendToLog)
+
+#define SYSLOG_EVERY_N(severity, n) \
+  SOME_KIND_OF_LOG_EVERY_N(severity, (n), google::LogMessage::SendToSyslogAndLog)
+
+#define PLOG_EVERY_N(severity, n) \
+  SOME_KIND_OF_PLOG_EVERY_N(severity, (n), google::LogMessage::SendToLog)
+
+#define LOG_FIRST_N(severity, n) \
+  SOME_KIND_OF_LOG_FIRST_N(severity, (n), google::LogMessage::SendToLog)
+
+#define LOG_IF_EVERY_N(severity, condition, n) \
+  SOME_KIND_OF_LOG_IF_EVERY_N(severity, (condition), (n), google::LogMessage::SendToLog)
+
+// We want the special COUNTER value available for LOG_EVERY_X()'ed messages
+enum PRIVATE_Counter {COUNTER};
+
+#ifdef GLOG_NO_ABBREVIATED_SEVERITIES
+// wingdi.h defines ERROR to be 0. When we call LOG(ERROR), it gets
+// substituted with 0, and it expands to COMPACT_GOOGLE_LOG_0. To allow us
+// to keep using this syntax, we define this macro to do the same thing
+// as COMPACT_GOOGLE_LOG_ERROR.
+#define COMPACT_GOOGLE_LOG_0 COMPACT_GOOGLE_LOG_ERROR
+#define SYSLOG_0 SYSLOG_ERROR
+#define LOG_TO_STRING_0 LOG_TO_STRING_ERROR
+// Needed for LOG_IS_ON(ERROR).
+const LogSeverity GLOG_0 = GLOG_ERROR;
+#else
+// Users may include windows.h after logging.h without
+// GLOG_NO_ABBREVIATED_SEVERITIES nor WIN32_LEAN_AND_MEAN.
+// For this case, we cannot detect if ERROR is defined before users
+// actually use ERROR. Let's make an undefined symbol to warn users.
+# define GLOG_ERROR_MSG ERROR_macro_is_defined_Define_GLOG_NO_ABBREVIATED_SEVERITIES_before_including_logging_h_See_the_document_for_detail
+# define COMPACT_GOOGLE_LOG_0 GLOG_ERROR_MSG
+# define SYSLOG_0 GLOG_ERROR_MSG
+# define LOG_TO_STRING_0 GLOG_ERROR_MSG
+# define GLOG_0 GLOG_ERROR_MSG
+#endif
+
+// Plus some debug-logging macros that get compiled to nothing for production
+
+#if DCHECK_IS_ON()
+
+#define DLOG(severity) LOG(severity)
+#define DVLOG(verboselevel) VLOG(verboselevel)
+#define DLOG_IF(severity, condition) LOG_IF(severity, condition)
+#define DLOG_EVERY_N(severity, n) LOG_EVERY_N(severity, n)
+#define DLOG_IF_EVERY_N(severity, condition, n) \
+  LOG_IF_EVERY_N(severity, condition, n)
+#define DLOG_ASSERT(condition) LOG_ASSERT(condition)
+
+// debug-only checking.  executed if DCHECK_IS_ON().
+#define DCHECK(condition) CHECK(condition)
+#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2)
+#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2)
+#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2)
+#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2)
+#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2)
+#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2)
+#define DCHECK_NOTNULL(val) CHECK_NOTNULL(val)
+#define DCHECK_STREQ(str1, str2) CHECK_STREQ(str1, str2)
+#define DCHECK_STRCASEEQ(str1, str2) CHECK_STRCASEEQ(str1, str2)
+#define DCHECK_STRNE(str1, str2) CHECK_STRNE(str1, str2)
+#define DCHECK_STRCASENE(str1, str2) CHECK_STRCASENE(str1, str2)
+
+#else  // !DCHECK_IS_ON()
+
+#define DLOG(severity) \
+  true ? (void) 0 : google::LogMessageVoidify() & LOG(severity)
+
+#define DVLOG(verboselevel) \
+  (true || !VLOG_IS_ON(verboselevel)) ?\
+    (void) 0 : google::LogMessageVoidify() & LOG(INFO)
+
+#define DLOG_IF(severity, condition) \
+  (true || !(condition)) ? (void) 0 : google::LogMessageVoidify() & LOG(severity)
+
+#define DLOG_EVERY_N(severity, n) \
+  true ? (void) 0 : google::LogMessageVoidify() & LOG(severity)
+
+#define DLOG_IF_EVERY_N(severity, condition, n) \
+  (true || !(condition))? (void) 0 : google::LogMessageVoidify() & LOG(severity)
+
+#define DLOG_ASSERT(condition) \
+  true ? (void) 0 : LOG_ASSERT(condition)
+
+// MSVC warning C4127: conditional expression is constant
+#define DCHECK(condition) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK(condition)
+
+#define DCHECK_EQ(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_EQ(val1, val2)
+
+#define DCHECK_NE(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_NE(val1, val2)
+
+#define DCHECK_LE(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_LE(val1, val2)
+
+#define DCHECK_LT(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_LT(val1, val2)
+
+#define DCHECK_GE(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_GE(val1, val2)
+
+#define DCHECK_GT(val1, val2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_GT(val1, val2)
+
+// You may see warnings in release mode if you don't use the return
+// value of DCHECK_NOTNULL. Please just use DCHECK for such cases.
+#define DCHECK_NOTNULL(val) (val)
+
+#define DCHECK_STREQ(str1, str2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_STREQ(str1, str2)
+
+#define DCHECK_STRCASEEQ(str1, str2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_STRCASEEQ(str1, str2)
+
+#define DCHECK_STRNE(str1, str2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_STRNE(str1, str2)
+
+#define DCHECK_STRCASENE(str1, str2) \
+  GLOG_MSVC_PUSH_DISABLE_WARNING(4127) \
+  while (false) \
+    GLOG_MSVC_POP_WARNING() CHECK_STRCASENE(str1, str2)
+
+#endif  // DCHECK_IS_ON()
+
+// Log only in verbose mode.
+
+#define VLOG(verboselevel) LOG_IF(INFO, VLOG_IS_ON(verboselevel))
+
+#define VLOG_IF(verboselevel, condition) \
+  LOG_IF(INFO, (condition) && VLOG_IS_ON(verboselevel))
+
+#define VLOG_EVERY_N(verboselevel, n) \
+  LOG_IF_EVERY_N(INFO, VLOG_IS_ON(verboselevel), n)
+
+#define VLOG_IF_EVERY_N(verboselevel, condition, n) \
+  LOG_IF_EVERY_N(INFO, (condition) && VLOG_IS_ON(verboselevel), n)
+
+namespace base_logging {
+
+// LogMessage::LogStream is a std::ostream backed by this streambuf.
+// This class ignores overflow and leaves two bytes at the end of the
+// buffer to allow for a '\n' and '\0'.
+class GOOGLE_GLOG_DLL_DECL LogStreamBuf : public std::streambuf {
+ public:
+  // REQUIREMENTS: "len" must be >= 2 to account for the '\n' and '\n'.
+  LogStreamBuf(char *buf, int len) {
+    setp(buf, buf + len - 2);
+  }
+
+  // This effectively ignores overflow.
+  virtual int_type overflow(int_type ch) {
+    return ch;
+  }
+
+  // Legacy public ostrstream method.
+  size_t pcount() const { return pptr() - pbase(); }
+  char* pbase() const { return std::streambuf::pbase(); }
+};
+
+}  // namespace base_logging
+
+//
+// This class more or less represents a particular log message.  You
+// create an instance of LogMessage and then stream stuff to it.
+// When you finish streaming to it, ~LogMessage is called and the
+// full message gets streamed to the appropriate destination.
+//
+// You shouldn't actually use LogMessage's constructor to log things,
+// though.  You should use the LOG() macro (and variants thereof)
+// above.
+class GOOGLE_GLOG_DLL_DECL LogMessage {
+public:
+  enum {
+    // Passing kNoLogPrefix for the line number disables the
+    // log-message prefix. Useful for using the LogMessage
+    // infrastructure as a printing utility. See also the --log_prefix
+    // flag for controlling the log-message prefix on an
+    // application-wide basis.
+    kNoLogPrefix = -1
+  };
+
+  // LogStream inherit from non-DLL-exported class (std::ostrstream)
+  // and VC++ produces a warning for this situation.
+  // However, MSDN says "C4275 can be ignored in Microsoft Visual C++
+  // 2005 if you are deriving from a type in the Standard C++ Library"
+  // http://msdn.microsoft.com/en-us/library/3tdb471s(VS.80).aspx
+  // Let's just ignore the warning.
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 4275)
+#endif
+  class GOOGLE_GLOG_DLL_DECL LogStream : public std::ostream {
+#ifdef _MSC_VER
+# pragma warning(pop)
+#endif
+  public:
+    LogStream(char *buf, int len, int ctr)
+        : std::ostream(NULL),
+          streambuf_(buf, len),
+          ctr_(ctr),
+          self_(this) {
+      rdbuf(&streambuf_);
+    }
+
+    int ctr() const { return ctr_; }
+    void set_ctr(int ctr) { ctr_ = ctr; }
+    LogStream* self() const { return self_; }
+
+    // Legacy std::streambuf methods.
+    size_t pcount() const { return streambuf_.pcount(); }
+    char* pbase() const { return streambuf_.pbase(); }
+    char* str() const { return pbase(); }
+
+  private:
+    LogStream(const LogStream&);
+    LogStream& operator=(const LogStream&);
+    base_logging::LogStreamBuf streambuf_;
+    int ctr_;  // Counter hack (for the LOG_EVERY_X() macro)
+    LogStream *self_;  // Consistency check hack
+  };
+
+public:
+  // icc 8 requires this typedef to avoid an internal compiler error.
+  typedef void (LogMessage::*SendMethod)();
+
+  LogMessage(const char* file, int line, LogSeverity severity, int ctr,
+             SendMethod send_method);
+
+  // Two special constructors that generate reduced amounts of code at
+  // LOG call sites for common cases.
+
+  // Used for LOG(INFO): Implied are:
+  // severity = INFO, ctr = 0, send_method = &LogMessage::SendToLog.
+  //
+  // Using this constructor instead of the more complex constructor above
+  // saves 19 bytes per call site.
+  LogMessage(const char* file, int line);
+
+  // Used for LOG(severity) where severity != INFO.  Implied
+  // are: ctr = 0, send_method = &LogMessage::SendToLog
+  //
+  // Using this constructor instead of the more complex constructor above
+  // saves 17 bytes per call site.
+  LogMessage(const char* file, int line, LogSeverity severity);
+
+  // Constructor to log this message to a specified sink (if not NULL).
+  // Implied are: ctr = 0, send_method = &LogMessage::SendToSinkAndLog if
+  // also_send_to_log is true, send_method = &LogMessage::SendToSink otherwise.
+  LogMessage(const char* file, int line, LogSeverity severity, LogSink* sink,
+             bool also_send_to_log);
+
+  // Constructor where we also give a vector<string> pointer
+  // for storing the messages (if the pointer is not NULL).
+  // Implied are: ctr = 0, send_method = &LogMessage::SaveOrSendToLog.
+  LogMessage(const char* file, int line, LogSeverity severity,
+             std::vector<std::string>* outvec);
+
+  // Constructor where we also give a string pointer for storing the
+  // message (if the pointer is not NULL).  Implied are: ctr = 0,
+  // send_method = &LogMessage::WriteToStringAndLog.
+  LogMessage(const char* file, int line, LogSeverity severity,
+             std::string* message);
+
+  // A special constructor used for check failures
+  LogMessage(const char* file, int line, const CheckOpString& result);
+
+  ~LogMessage();
+
+  // Flush a buffered message to the sink set in the constructor.  Always
+  // called by the destructor, it may also be called from elsewhere if
+  // needed.  Only the first call is actioned; any later ones are ignored.
+  void Flush();
+
+  // An arbitrary limit on the length of a single log message.  This
+  // is so that streaming can be done more efficiently.
+  static const size_t kMaxLogMessageLen;
+
+  // Theses should not be called directly outside of logging.*,
+  // only passed as SendMethod arguments to other LogMessage methods:
+  void SendToLog();  // Actually dispatch to the logs
+  void SendToSyslogAndLog();  // Actually dispatch to syslog and the logs
+
+  // Call abort() or similar to perform LOG(FATAL) crash.
+  static void __attribute__ ((noreturn)) Fail();
+
+  std::ostream& stream();
+
+  int preserved_errno() const;
+
+  // Must be called without the log_mutex held.  (L < log_mutex)
+  static int64 num_messages(int severity);
+
+  struct LogMessageData;
+
+private:
+  // Fully internal SendMethod cases:
+  void SendToSinkAndLog();  // Send to sink if provided and dispatch to the logs
+  void SendToSink();  // Send to sink if provided, do nothing otherwise.
+
+  // Write to string if provided and dispatch to the logs.
+  void WriteToStringAndLog();
+
+  void SaveOrSendToLog();  // Save to stringvec if provided, else to logs
+
+  void Init(const char* file, int line, LogSeverity severity,
+            void (LogMessage::*send_method)());
+
+  // Used to fill in crash information during LOG(FATAL) failures.
+  void RecordCrashReason(glog_internal_namespace_::CrashReason* reason);
+
+  // Counts of messages sent at each priority:
+  static int64 num_messages_[NUM_SEVERITIES];  // under log_mutex
+
+  // We keep the data in a separate struct so that each instance of
+  // LogMessage uses less stack space.
+  LogMessageData* allocated_;
+  LogMessageData* data_;
+
+  friend class LogDestination;
+
+  LogMessage(const LogMessage&);
+  void operator=(const LogMessage&);
+};
+
+// This class happens to be thread-hostile because all instances share
+// a single data buffer, but since it can only be created just before
+// the process dies, we don't worry so much.
+class GOOGLE_GLOG_DLL_DECL LogMessageFatal : public LogMessage {
+ public:
+  LogMessageFatal(const char* file, int line);
+  LogMessageFatal(const char* file, int line, const CheckOpString& result);
+  __attribute__ ((noreturn)) ~LogMessageFatal();
+};
+
+// A non-macro interface to the log facility; (useful
+// when the logging level is not a compile-time constant).
+inline void LogAtLevel(int const severity, std::string const &msg) {
+  LogMessage(__FILE__, __LINE__, severity).stream() << msg;
+}
+
+// A macro alternative of LogAtLevel. New code may want to use this
+// version since there are two advantages: 1. this version outputs the
+// file name and the line number where this macro is put like other
+// LOG macros, 2. this macro can be used as C++ stream.
+#define LOG_AT_LEVEL(severity) google::LogMessage(__FILE__, __LINE__, severity).stream()
+
+
+// Check if it's compiled in C++11 mode.
+//
+// GXX_EXPERIMENTAL_CXX0X is defined by gcc and clang up to at least
+// gcc-4.7 and clang-3.1 (2011-12-13).  __cplusplus was defined to 1
+// in gcc before 4.7 (Crosstool 16) and clang before 3.1, but is
+// defined according to the language version in effect thereafter.
+// Microsoft Visual Studio 14 (2015) sets __cplusplus==199711 despite
+// reasonably good C++11 support, so we set LANG_CXX for it and
+// newer versions (_MSC_VER >= 1900).
+#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \
+     (defined(_MSC_VER) && _MSC_VER >= 1900))
+// Helper for CHECK_NOTNULL().
+//
+// In C++11, all cases can be handled by a single function. Since the value
+// category of the argument is preserved (also for rvalue references),
+// member initializer lists like the one below will compile correctly:
+//
+//   Foo()
+//     : x_(CHECK_NOTNULL(MethodReturningUniquePtr())) {}
+template <typename T>
+T CheckNotNull(const char* file, int line, const char* names, T&& t) {
+ if (t == nullptr) {
+   LogMessageFatal(file, line, new std::string(names));
+ }
+ return std::forward<T>(t);
+}
+
+#else
+
+// A small helper for CHECK_NOTNULL().
+template <typename T>
+T* CheckNotNull(const char *file, int line, const char *names, T* t) {
+  if (t == NULL) {
+    LogMessageFatal(file, line, new std::string(names));
+  }
+  return t;
+}
+#endif
+
+// Allow folks to put a counter in the LOG_EVERY_X()'ed messages. This
+// only works if ostream is a LogStream. If the ostream is not a
+// LogStream you'll get an assert saying as much at runtime.
+GOOGLE_GLOG_DLL_DECL std::ostream& operator<<(std::ostream &os,
+                                              const PRIVATE_Counter&);
+
+
+// Derived class for PLOG*() above.
+class GOOGLE_GLOG_DLL_DECL ErrnoLogMessage : public LogMessage {
+ public:
+
+  ErrnoLogMessage(const char* file, int line, LogSeverity severity, int ctr,
+                  void (LogMessage::*send_method)());
+
+  // Postpends ": strerror(errno) [errno]".
+  ~ErrnoLogMessage();
+
+ private:
+  ErrnoLogMessage(const ErrnoLogMessage&);
+  void operator=(const ErrnoLogMessage&);
+};
+
+
+// This class is used to explicitly ignore values in the conditional
+// logging macros.  This avoids compiler warnings like "value computed
+// is not used" and "statement has no effect".
+
+class GOOGLE_GLOG_DLL_DECL LogMessageVoidify {
+ public:
+  LogMessageVoidify() { }
+  // This has to be an operator with a precedence lower than << but
+  // higher than ?:
+  void operator&(std::ostream&) { }
+};
+
+
+// Flushes all log files that contains messages that are at least of
+// the specified severity level.  Thread-safe.
+GOOGLE_GLOG_DLL_DECL void FlushLogFiles(LogSeverity min_severity);
+
+// Flushes all log files that contains messages that are at least of
+// the specified severity level. Thread-hostile because it ignores
+// locking -- used for catastrophic failures.
+GOOGLE_GLOG_DLL_DECL void FlushLogFilesUnsafe(LogSeverity min_severity);
+
+//
+// Set the destination to which a particular severity level of log
+// messages is sent.  If base_filename is "", it means "don't log this
+// severity".  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void SetLogDestination(LogSeverity severity,
+                                            const char* base_filename);
+
+//
+// Set the basename of the symlink to the latest log file at a given
+// severity.  If symlink_basename is empty, do not make a symlink.  If
+// you don't call this function, the symlink basename is the
+// invocation name of the program.  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void SetLogSymlink(LogSeverity severity,
+                                        const char* symlink_basename);
+
+//
+// Used to send logs to some other kind of destination
+// Users should subclass LogSink and override send to do whatever they want.
+// Implementations must be thread-safe because a shared instance will
+// be called from whichever thread ran the LOG(XXX) line.
+class GOOGLE_GLOG_DLL_DECL LogSink {
+ public:
+  virtual ~LogSink();
+
+  // Sink's logging logic (message_len is such as to exclude '\n' at the end).
+  // This method can't use LOG() or CHECK() as logging system mutex(s) are held
+  // during this call.
+  virtual void send(LogSeverity severity, const char* full_filename,
+                    const char* base_filename, int line,
+                    const struct ::tm* tm_time,
+                    const char* message, size_t message_len) = 0;
+
+  // Redefine this to implement waiting for
+  // the sink's logging logic to complete.
+  // It will be called after each send() returns,
+  // but before that LogMessage exits or crashes.
+  // By default this function does nothing.
+  // Using this function one can implement complex logic for send()
+  // that itself involves logging; and do all this w/o causing deadlocks and
+  // inconsistent rearrangement of log messages.
+  // E.g. if a LogSink has thread-specific actions, the send() method
+  // can simply add the message to a queue and wake up another thread that
+  // handles real logging while itself making some LOG() calls;
+  // WaitTillSent() can be implemented to wait for that logic to complete.
+  // See our unittest for an example.
+  virtual void WaitTillSent();
+
+  // Returns the normal text output of the log message.
+  // Can be useful to implement send().
+  static std::string ToString(LogSeverity severity, const char* file, int line,
+                              const struct ::tm* tm_time,
+                              const char* message, size_t message_len);
+};
+
+// Add or remove a LogSink as a consumer of logging data.  Thread-safe.
+GOOGLE_GLOG_DLL_DECL void AddLogSink(LogSink *destination);
+GOOGLE_GLOG_DLL_DECL void RemoveLogSink(LogSink *destination);
+
+//
+// Specify an "extension" added to the filename specified via
+// SetLogDestination.  This applies to all severity levels.  It's
+// often used to append the port we're listening on to the logfile
+// name.  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void SetLogFilenameExtension(
+    const char* filename_extension);
+
+//
+// Make it so that all log messages of at least a particular severity
+// are logged to stderr (in addition to logging to the usual log
+// file(s)).  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void SetStderrLogging(LogSeverity min_severity);
+
+//
+// Make it so that all log messages go only to stderr.  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void LogToStderr();
+
+//
+// Make it so that all log messages of at least a particular severity are
+// logged via email to a list of addresses (in addition to logging to the
+// usual log file(s)).  The list of addresses is just a string containing
+// the email addresses to send to (separated by spaces, say).  Thread-safe.
+//
+GOOGLE_GLOG_DLL_DECL void SetEmailLogging(LogSeverity min_severity,
+                                          const char* addresses);
+
+// A simple function that sends email. dest is a commma-separated
+// list of addressess.  Thread-safe.
+GOOGLE_GLOG_DLL_DECL bool SendEmail(const char *dest,
+                                    const char *subject, const char *body);
+
+GOOGLE_GLOG_DLL_DECL const std::vector<std::string>& GetLoggingDirectories();
+
+// For tests only:  Clear the internal [cached] list of logging directories to
+// force a refresh the next time GetLoggingDirectories is called.
+// Thread-hostile.
+void TestOnly_ClearLoggingDirectoriesList();
+
+// Returns a set of existing temporary directories, which will be a
+// subset of the directories returned by GetLogginDirectories().
+// Thread-safe.
+GOOGLE_GLOG_DLL_DECL void GetExistingTempDirectories(
+    std::vector<std::string>* list);
+
+// Print any fatal message again -- useful to call from signal handler
+// so that the last thing in the output is the fatal message.
+// Thread-hostile, but a race is unlikely.
+GOOGLE_GLOG_DLL_DECL void ReprintFatalMessage();
+
+// Truncate a log file that may be the append-only output of multiple
+// processes and hence can't simply be renamed/reopened (typically a
+// stdout/stderr).  If the file "path" is > "limit" bytes, copy the
+// last "keep" bytes to offset 0 and truncate the rest. Since we could
+// be racing with other writers, this approach has the potential to
+// lose very small amounts of data. For security, only follow symlinks
+// if the path is /proc/self/fd/*
+GOOGLE_GLOG_DLL_DECL void TruncateLogFile(const char *path,
+                                          int64 limit, int64 keep);
+
+// Truncate stdout and stderr if they are over the value specified by
+// --max_log_size; keep the final 1MB.  This function has the same
+// race condition as TruncateLogFile.
+GOOGLE_GLOG_DLL_DECL void TruncateStdoutStderr();
+
+// Return the string representation of the provided LogSeverity level.
+// Thread-safe.
+GOOGLE_GLOG_DLL_DECL const char* GetLogSeverityName(LogSeverity severity);
+
+// ---------------------------------------------------------------------
+// Implementation details that are not useful to most clients
+// ---------------------------------------------------------------------
+
+// A Logger is the interface used by logging modules to emit entries
+// to a log.  A typical implementation will dump formatted data to a
+// sequence of files.  We also provide interfaces that will forward
+// the data to another thread so that the invoker never blocks.
+// Implementations should be thread-safe since the logging system
+// will write to them from multiple threads.
+
+namespace base {
+
+class GOOGLE_GLOG_DLL_DECL Logger {
+ public:
+  virtual ~Logger();
+
+  // Writes "message[0,message_len-1]" corresponding to an event that
+  // occurred at "timestamp".  If "force_flush" is true, the log file
+  // is flushed immediately.
+  //
+  // The input message has already been formatted as deemed
+  // appropriate by the higher level logging facility.  For example,
+  // textual log messages already contain timestamps, and the
+  // file:linenumber header.
+  virtual void Write(bool force_flush,
+                     time_t timestamp,
+                     const char* message,
+                     int message_len) = 0;
+
+  // Flush any buffered messages
+  virtual void Flush() = 0;
+
+  // Get the current LOG file size.
+  // The returned value is approximate since some
+  // logged data may not have been flushed to disk yet.
+  virtual uint32 LogSize() = 0;
+};
+
+// Get the logger for the specified severity level.  The logger
+// remains the property of the logging module and should not be
+// deleted by the caller.  Thread-safe.
+extern GOOGLE_GLOG_DLL_DECL Logger* GetLogger(LogSeverity level);
+
+// Set the logger for the specified severity level.  The logger
+// becomes the property of the logging module and should not
+// be deleted by the caller.  Thread-safe.
+extern GOOGLE_GLOG_DLL_DECL void SetLogger(LogSeverity level, Logger* logger);
+
+}
+
+// glibc has traditionally implemented two incompatible versions of
+// strerror_r(). There is a poorly defined convention for picking the
+// version that we want, but it is not clear whether it even works with
+// all versions of glibc.
+// So, instead, we provide this wrapper that automatically detects the
+// version that is in use, and then implements POSIX semantics.
+// N.B. In addition to what POSIX says, we also guarantee that "buf" will
+// be set to an empty string, if this function failed. This means, in most
+// cases, you do not need to check the error code and you can directly
+// use the value of "buf". It will never have an undefined value.
+// DEPRECATED: Use StrError(int) instead.
+GOOGLE_GLOG_DLL_DECL int posix_strerror_r(int err, char *buf, size_t len);
+
+// A thread-safe replacement for strerror(). Returns a string describing the
+// given POSIX error code.
+GOOGLE_GLOG_DLL_DECL std::string StrError(int err);
+
+// A class for which we define operator<<, which does nothing.
+class GOOGLE_GLOG_DLL_DECL NullStream : public LogMessage::LogStream {
+ public:
+  // Initialize the LogStream so the messages can be written somewhere
+  // (they'll never be actually displayed). This will be needed if a
+  // NullStream& is implicitly converted to LogStream&, in which case
+  // the overloaded NullStream::operator<< will not be invoked.
+  NullStream() : LogMessage::LogStream(message_buffer_, 1, 0) { }
+  NullStream(const char* /*file*/, int /*line*/,
+             const CheckOpString& /*result*/) :
+      LogMessage::LogStream(message_buffer_, 1, 0) { }
+  NullStream &stream() { return *this; }
+ private:
+  // A very short buffer for messages (which we discard anyway). This
+  // will be needed if NullStream& converted to LogStream& (e.g. as a
+  // result of a conditional expression).
+  char message_buffer_[2];
+};
+
+// Do nothing. This operator is inline, allowing the message to be
+// compiled away. The message will not be compiled away if we do
+// something like (flag ? LOG(INFO) : LOG(ERROR)) << message; when
+// SKIP_LOG=WARNING. In those cases, NullStream will be implicitly
+// converted to LogStream and the message will be computed and then
+// quietly discarded.
+template<class T>
+inline NullStream& operator<<(NullStream &str, const T &) { return str; }
+
+// Similar to NullStream, but aborts the program (without stack
+// trace), like LogMessageFatal.
+class GOOGLE_GLOG_DLL_DECL NullStreamFatal : public NullStream {
+ public:
+  NullStreamFatal() { }
+  NullStreamFatal(const char* file, int line, const CheckOpString& result) :
+      NullStream(file, line, result) { }
+  __attribute__ ((noreturn)) ~NullStreamFatal() throw () { _exit(1); }
+};
+
+// Install a signal handler that will dump signal information and a stack
+// trace when the program crashes on certain signals.  We'll install the
+// signal handler for the following signals.
+//
+// SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGBUS, and SIGTERM.
+//
+// By default, the signal handler will write the failure dump to the
+// standard error.  You can customize the destination by installing your
+// own writer function by InstallFailureWriter() below.
+//
+// Note on threading:
+//
+// The function should be called before threads are created, if you want
+// to use the failure signal handler for all threads.  The stack trace
+// will be shown only for the thread that receives the signal.  In other
+// words, stack traces of other threads won't be shown.
+GOOGLE_GLOG_DLL_DECL void InstallFailureSignalHandler();
+
+// Installs a function that is used for writing the failure dump.  "data"
+// is the pointer to the beginning of a message to be written, and "size"
+// is the size of the message.  You should not expect the data is
+// terminated with '\0'.
+GOOGLE_GLOG_DLL_DECL void InstallFailureWriter(
+    void (*writer)(const char* data, int size));
+
+}
+
+#endif // _LOGGING_H_
diff --git a/third_party/config/glog/mac/glog/raw_logging.h b/third_party/config/glog/mac/glog/raw_logging.h
new file mode 100644
index 0000000..65278f6
--- /dev/null
+++ b/third_party/config/glog/mac/glog/raw_logging.h
@@ -0,0 +1,185 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Maxim Lifantsev
+//
+// Thread-safe logging routines that do not allocate any memory or
+// acquire any locks, and can therefore be used by low-level memory
+// allocation and synchronization code.
+
+#ifndef BASE_RAW_LOGGING_H_
+#define BASE_RAW_LOGGING_H_
+
+#include <time.h>
+
+namespace google {
+
+#include "glog/log_severity.h"
+#include "glog/vlog_is_on.h"
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef GOOGLE_GLOG_DLL_DECL
+# if defined(_WIN32) && !defined(__CYGWIN__)
+#   define GOOGLE_GLOG_DLL_DECL  __declspec(dllimport)
+# else
+#   define GOOGLE_GLOG_DLL_DECL
+# endif
+#endif
+
+// This is similar to LOG(severity) << format... and VLOG(level) << format..,
+// but
+// * it is to be used ONLY by low-level modules that can't use normal LOG()
+// * it is desiged to be a low-level logger that does not allocate any
+//   memory and does not need any locks, hence:
+// * it logs straight and ONLY to STDERR w/o buffering
+// * it uses an explicit format and arguments list
+// * it will silently chop off really long message strings
+// Usage example:
+//   RAW_LOG(ERROR, "Failed foo with %i: %s", status, error);
+//   RAW_VLOG(3, "status is %i", status);
+// These will print an almost standard log lines like this to stderr only:
+//   E0821 211317 file.cc:123] RAW: Failed foo with 22: bad_file
+//   I0821 211317 file.cc:142] RAW: status is 20
+#define RAW_LOG(severity, ...) \
+  do { \
+    switch (google::GLOG_ ## severity) {  \
+      case 0: \
+        RAW_LOG_INFO(__VA_ARGS__); \
+        break; \
+      case 1: \
+        RAW_LOG_WARNING(__VA_ARGS__); \
+        break; \
+      case 2: \
+        RAW_LOG_ERROR(__VA_ARGS__); \
+        break; \
+      case 3: \
+        RAW_LOG_FATAL(__VA_ARGS__); \
+        break; \
+      default: \
+        break; \
+    } \
+  } while (0)
+
+// The following STRIP_LOG testing is performed in the header file so that it's
+// possible to completely compile out the logging code and the log messages.
+#if STRIP_LOG == 0
+#define RAW_VLOG(verboselevel, ...) \
+  do { \
+    if (VLOG_IS_ON(verboselevel)) { \
+      RAW_LOG_INFO(__VA_ARGS__); \
+    } \
+  } while (0)
+#else
+#define RAW_VLOG(verboselevel, ...) RawLogStub__(0, __VA_ARGS__)
+#endif // STRIP_LOG == 0
+
+#if STRIP_LOG == 0
+#define RAW_LOG_INFO(...) google::RawLog__(google::GLOG_INFO, \
+                                   __FILE__, __LINE__, __VA_ARGS__)
+#else
+#define RAW_LOG_INFO(...) google::RawLogStub__(0, __VA_ARGS__)
+#endif // STRIP_LOG == 0
+
+#if STRIP_LOG <= 1
+#define RAW_LOG_WARNING(...) google::RawLog__(google::GLOG_WARNING,   \
+                                      __FILE__, __LINE__, __VA_ARGS__)
+#else
+#define RAW_LOG_WARNING(...) google::RawLogStub__(0, __VA_ARGS__)
+#endif // STRIP_LOG <= 1
+
+#if STRIP_LOG <= 2
+#define RAW_LOG_ERROR(...) google::RawLog__(google::GLOG_ERROR,       \
+                                    __FILE__, __LINE__, __VA_ARGS__)
+#else
+#define RAW_LOG_ERROR(...) google::RawLogStub__(0, __VA_ARGS__)
+#endif // STRIP_LOG <= 2
+
+#if STRIP_LOG <= 3
+#define RAW_LOG_FATAL(...) google::RawLog__(google::GLOG_FATAL,       \
+                                    __FILE__, __LINE__, __VA_ARGS__)
+#else
+#define RAW_LOG_FATAL(...) \
+  do { \
+    google::RawLogStub__(0, __VA_ARGS__);        \
+    exit(1); \
+  } while (0)
+#endif // STRIP_LOG <= 3
+
+// Similar to CHECK(condition) << message,
+// but for low-level modules: we use only RAW_LOG that does not allocate memory.
+// We do not want to provide args list here to encourage this usage:
+//   if (!cond)  RAW_LOG(FATAL, "foo ...", hard_to_compute_args);
+// so that the args are not computed when not needed.
+#define RAW_CHECK(condition, message)                                   \
+  do {                                                                  \
+    if (!(condition)) {                                                 \
+      RAW_LOG(FATAL, "Check %s failed: %s", #condition, message);       \
+    }                                                                   \
+  } while (0)
+
+// Debug versions of RAW_LOG and RAW_CHECK
+#ifndef NDEBUG
+
+#define RAW_DLOG(severity, ...) RAW_LOG(severity, __VA_ARGS__)
+#define RAW_DCHECK(condition, message) RAW_CHECK(condition, message)
+
+#else  // NDEBUG
+
+#define RAW_DLOG(severity, ...)                                 \
+  while (false)                                                 \
+    RAW_LOG(severity, __VA_ARGS__)
+#define RAW_DCHECK(condition, message) \
+  while (false) \
+    RAW_CHECK(condition, message)
+
+#endif  // NDEBUG
+
+// Stub log function used to work around for unused variable warnings when
+// building with STRIP_LOG > 0.
+static inline void RawLogStub__(int /* ignored */, ...) {
+}
+
+// Helper function to implement RAW_LOG and RAW_VLOG
+// Logs format... at "severity" level, reporting it
+// as called from file:line.
+// This does not allocate memory or acquire locks.
+GOOGLE_GLOG_DLL_DECL void RawLog__(LogSeverity severity,
+                                   const char* file,
+                                   int line,
+                                   const char* format, ...)
+   __attribute__((__format__ (__printf__, 4, 5)));
+
+// Hack to propagate time information into this module so that
+// this module does not have to directly call localtime_r(),
+// which could allocate memory.
+GOOGLE_GLOG_DLL_DECL void RawLog__SetLastTime(const struct tm& t, int usecs);
+
+}
+
+#endif  // BASE_RAW_LOGGING_H_
diff --git a/third_party/config/glog/mac/glog/stl_logging.h b/third_party/config/glog/mac/glog/stl_logging.h
new file mode 100644
index 0000000..40a15aa
--- /dev/null
+++ b/third_party/config/glog/mac/glog/stl_logging.h
@@ -0,0 +1,220 @@
+// Copyright (c) 2003, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Stream output operators for STL containers; to be used for logging *only*.
+// Inclusion of this file lets you do:
+//
+// list<string> x;
+// LOG(INFO) << "data: " << x;
+// vector<int> v1, v2;
+// CHECK_EQ(v1, v2);
+//
+// If you want to use this header file with hash maps or slist, you
+// need to define macros before including this file:
+//
+// - GLOG_STL_LOGGING_FOR_UNORDERED     - <unordered_map> and <unordered_set>
+// - GLOG_STL_LOGGING_FOR_TR1_UNORDERED - <tr1/unordered_(map|set)>
+// - GLOG_STL_LOGGING_FOR_EXT_HASH      - <ext/hash_(map|set)>
+// - GLOG_STL_LOGGING_FOR_EXT_SLIST     - <ext/slist>
+//
+
+#ifndef UTIL_GTL_STL_LOGGING_INL_H_
+#define UTIL_GTL_STL_LOGGING_INL_H_
+
+#if !1
+# error We do not support stl_logging for this compiler
+#endif
+
+#include <deque>
+#include <list>
+#include <map>
+#include <ostream>
+#include <set>
+#include <utility>
+#include <vector>
+
+#ifdef GLOG_STL_LOGGING_FOR_UNORDERED
+# include <unordered_map>
+# include <unordered_set>
+#endif
+
+#ifdef GLOG_STL_LOGGING_FOR_TR1_UNORDERED
+# include <tr1/unordered_map>
+# include <tr1/unordered_set>
+#endif
+
+#ifdef GLOG_STL_LOGGING_FOR_EXT_HASH
+# include <ext/hash_set>
+# include <ext/hash_map>
+#endif
+#ifdef GLOG_STL_LOGGING_FOR_EXT_SLIST
+# include <ext/slist>
+#endif
+
+// Forward declare these two, and define them after all the container streams
+// operators so that we can recurse from pair -> container -> container -> pair
+// properly.
+template<class First, class Second>
+std::ostream& operator<<(std::ostream& out, const std::pair<First, Second>& p);
+
+namespace google {
+
+template<class Iter>
+void PrintSequence(std::ostream& out, Iter begin, Iter end);
+
+}
+
+#define OUTPUT_TWO_ARG_CONTAINER(Sequence) \
+template<class T1, class T2> \
+inline std::ostream& operator<<(std::ostream& out, \
+                                const Sequence<T1, T2>& seq) { \
+  google::PrintSequence(out, seq.begin(), seq.end()); \
+  return out; \
+}
+
+OUTPUT_TWO_ARG_CONTAINER(std::vector)
+OUTPUT_TWO_ARG_CONTAINER(std::deque)
+OUTPUT_TWO_ARG_CONTAINER(std::list)
+#ifdef GLOG_STL_LOGGING_FOR_EXT_SLIST
+OUTPUT_TWO_ARG_CONTAINER(__gnu_cxx::slist)
+#endif
+
+#undef OUTPUT_TWO_ARG_CONTAINER
+
+#define OUTPUT_THREE_ARG_CONTAINER(Sequence) \
+template<class T1, class T2, class T3> \
+inline std::ostream& operator<<(std::ostream& out, \
+                                const Sequence<T1, T2, T3>& seq) { \
+  google::PrintSequence(out, seq.begin(), seq.end()); \
+  return out; \
+}
+
+OUTPUT_THREE_ARG_CONTAINER(std::set)
+OUTPUT_THREE_ARG_CONTAINER(std::multiset)
+
+#undef OUTPUT_THREE_ARG_CONTAINER
+
+#define OUTPUT_FOUR_ARG_CONTAINER(Sequence) \
+template<class T1, class T2, class T3, class T4> \
+inline std::ostream& operator<<(std::ostream& out, \
+                                const Sequence<T1, T2, T3, T4>& seq) { \
+  google::PrintSequence(out, seq.begin(), seq.end()); \
+  return out; \
+}
+
+OUTPUT_FOUR_ARG_CONTAINER(std::map)
+OUTPUT_FOUR_ARG_CONTAINER(std::multimap)
+#ifdef GLOG_STL_LOGGING_FOR_UNORDERED
+OUTPUT_FOUR_ARG_CONTAINER(std::unordered_set)
+OUTPUT_FOUR_ARG_CONTAINER(std::unordered_multiset)
+#endif
+#ifdef GLOG_STL_LOGGING_FOR_TR1_UNORDERED
+OUTPUT_FOUR_ARG_CONTAINER(std::tr1::unordered_set)
+OUTPUT_FOUR_ARG_CONTAINER(std::tr1::unordered_multiset)
+#endif
+#ifdef GLOG_STL_LOGGING_FOR_EXT_HASH
+OUTPUT_FOUR_ARG_CONTAINER(__gnu_cxx::hash_set)
+OUTPUT_FOUR_ARG_CONTAINER(__gnu_cxx::hash_multiset)
+#endif
+
+#undef OUTPUT_FOUR_ARG_CONTAINER
+
+#define OUTPUT_FIVE_ARG_CONTAINER(Sequence) \
+template<class T1, class T2, class T3, class T4, class T5> \
+inline std::ostream& operator<<(std::ostream& out, \
+                                const Sequence<T1, T2, T3, T4, T5>& seq) { \
+  google::PrintSequence(out, seq.begin(), seq.end()); \
+  return out; \
+}
+
+#ifdef GLOG_STL_LOGGING_FOR_UNORDERED
+OUTPUT_FIVE_ARG_CONTAINER(std::unordered_map)
+OUTPUT_FIVE_ARG_CONTAINER(std::unordered_multimap)
+#endif
+#ifdef GLOG_STL_LOGGING_FOR_TR1_UNORDERED
+OUTPUT_FIVE_ARG_CONTAINER(std::tr1::unordered_map)
+OUTPUT_FIVE_ARG_CONTAINER(std::tr1::unordered_multimap)
+#endif
+#ifdef GLOG_STL_LOGGING_FOR_EXT_HASH
+OUTPUT_FIVE_ARG_CONTAINER(__gnu_cxx::hash_map)
+OUTPUT_FIVE_ARG_CONTAINER(__gnu_cxx::hash_multimap)
+#endif
+
+#undef OUTPUT_FIVE_ARG_CONTAINER
+
+template<class First, class Second>
+inline std::ostream& operator<<(std::ostream& out,
+                                const std::pair<First, Second>& p) {
+  out << '(' << p.first << ", " << p.second << ')';
+  return out;
+}
+
+namespace google {
+
+template<class Iter>
+inline void PrintSequence(std::ostream& out, Iter begin, Iter end) {
+  // Output at most 100 elements -- appropriate if used for logging.
+  for (int i = 0; begin != end && i < 100; ++i, ++begin) {
+    if (i > 0) out << ' ';
+    out << *begin;
+  }
+  if (begin != end) {
+    out << " ...";
+  }
+}
+
+}
+
+// Note that this is technically undefined behavior! We are adding things into
+// the std namespace for a reason though -- we are providing new operations on
+// types which are themselves defined with this namespace. Without this, these
+// operator overloads cannot be found via ADL. If these definitions are not
+// found via ADL, they must be #included before they're used, which requires
+// this header to be included before apparently independent other headers.
+//
+// For example, base/logging.h defines various template functions to implement
+// CHECK_EQ(x, y) and stream x and y into the log in the event the check fails.
+// It does so via the function template MakeCheckOpValueString:
+//   template<class T>
+//   void MakeCheckOpValueString(strstream* ss, const T& v) {
+//     (*ss) << v;
+//   }
+// Because 'glog/logging.h' is included before 'glog/stl_logging.h',
+// subsequent CHECK_EQ(v1, v2) for vector<...> typed variable v1 and v2 can only
+// find these operator definitions via ADL.
+//
+// Even this solution has problems -- it may pull unintended operators into the
+// namespace as well, allowing them to also be found via ADL, and creating code
+// that only works with a particular order of includes. Long term, we need to
+// move all of the *definitions* into namespace std, bet we need to ensure no
+// one references them first. This lets us take that step. We cannot define them
+// in both because that would create ambiguous overloads when both are found.
+namespace std { using ::operator<<; }
+
+#endif  // UTIL_GTL_STL_LOGGING_INL_H_
diff --git a/third_party/config/glog/mac/glog/vlog_is_on.h b/third_party/config/glog/mac/glog/vlog_is_on.h
new file mode 100644
index 0000000..02b0b86
--- /dev/null
+++ b/third_party/config/glog/mac/glog/vlog_is_on.h
@@ -0,0 +1,129 @@
+// Copyright (c) 1999, 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Ray Sidney and many others
+//
+// Defines the VLOG_IS_ON macro that controls the variable-verbosity
+// conditional logging.
+//
+// It's used by VLOG and VLOG_IF in logging.h
+// and by RAW_VLOG in raw_logging.h to trigger the logging.
+//
+// It can also be used directly e.g. like this:
+//   if (VLOG_IS_ON(2)) {
+//     // do some logging preparation and logging
+//     // that can't be accomplished e.g. via just VLOG(2) << ...;
+//   }
+//
+// The truth value that VLOG_IS_ON(level) returns is determined by 
+// the three verbosity level flags:
+//   --v=<n>  Gives the default maximal active V-logging level;
+//            0 is the default.
+//            Normally positive values are used for V-logging levels.
+//   --vmodule=<str>  Gives the per-module maximal V-logging levels to override
+//                    the value given by --v.
+//                    E.g. "my_module=2,foo*=3" would change the logging level
+//                    for all code in source files "my_module.*" and "foo*.*"
+//                    ("-inl" suffixes are also disregarded for this matching).
+//
+// SetVLOGLevel helper function is provided to do limited dynamic control over
+// V-logging by overriding the per-module settings given via --vmodule flag.
+//
+// CAVEAT: --vmodule functionality is not available in non gcc compilers.
+//
+
+#ifndef BASE_VLOG_IS_ON_H_
+#define BASE_VLOG_IS_ON_H_
+
+#include "glog/log_severity.h"
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef GOOGLE_GLOG_DLL_DECL
+# if defined(_WIN32) && !defined(__CYGWIN__)
+#   define GOOGLE_GLOG_DLL_DECL  __declspec(dllimport)
+# else
+#   define GOOGLE_GLOG_DLL_DECL
+# endif
+#endif
+
+#if defined(__GNUC__)
+// We emit an anonymous static int* variable at every VLOG_IS_ON(n) site.
+// (Normally) the first time every VLOG_IS_ON(n) site is hit,
+// we determine what variable will dynamically control logging at this site:
+// it's either FLAGS_v or an appropriate internal variable
+// matching the current source file that represents results of
+// parsing of --vmodule flag and/or SetVLOGLevel calls.
+#define VLOG_IS_ON(verboselevel)                                \
+  __extension__  \
+  ({ static google::int32* vlocal__ = &google::kLogSiteUninitialized;           \
+     google::int32 verbose_level__ = (verboselevel);                    \
+     (*vlocal__ >= verbose_level__) &&                          \
+     ((vlocal__ != &google::kLogSiteUninitialized) ||                   \
+      (google::InitVLOG3__(&vlocal__, &FLAGS_v,                         \
+                   __FILE__, verbose_level__))); })
+#else
+// GNU extensions not available, so we do not support --vmodule.
+// Dynamic value of FLAGS_v always controls the logging level.
+#define VLOG_IS_ON(verboselevel) (FLAGS_v >= (verboselevel))
+#endif
+
+// Set VLOG(_IS_ON) level for module_pattern to log_level.
+// This lets us dynamically control what is normally set by the --vmodule flag.
+// Returns the level that previously applied to module_pattern.
+// NOTE: To change the log level for VLOG(_IS_ON) sites
+//	 that have already executed after/during InitGoogleLogging,
+//	 one needs to supply the exact --vmodule pattern that applied to them.
+//       (If no --vmodule pattern applied to them
+//       the value of FLAGS_v will continue to control them.)
+extern GOOGLE_GLOG_DLL_DECL int SetVLOGLevel(const char* module_pattern,
+                                             int log_level);
+
+// Various declarations needed for VLOG_IS_ON above: =========================
+
+// Special value used to indicate that a VLOG_IS_ON site has not been
+// initialized.  We make this a large value, so the common-case check
+// of "*vlocal__ >= verbose_level__" in VLOG_IS_ON definition
+// passes in such cases and InitVLOG3__ is then triggered.
+extern google::int32 kLogSiteUninitialized;
+
+// Helper routine which determines the logging info for a particalur VLOG site.
+//   site_flag     is the address of the site-local pointer to the controlling
+//                 verbosity level
+//   site_default  is the default to use for *site_flag
+//   fname         is the current source file name
+//   verbose_level is the argument to VLOG_IS_ON
+// We will return the return value for VLOG_IS_ON
+// and if possible set *site_flag appropriately.
+extern GOOGLE_GLOG_DLL_DECL bool InitVLOG3__(
+    google::int32** site_flag,
+    google::int32* site_default,
+    const char* fname,
+    google::int32 verbose_level);
+
+#endif  // BASE_VLOG_IS_ON_H_
diff --git a/third_party/config/protobuf/config.h b/third_party/config/protobuf/config.h
new file mode 100644
index 0000000..dcc0041
--- /dev/null
+++ b/third_party/config/protobuf/config.h
@@ -0,0 +1,156 @@
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* the name of <hash_set> */
+#define HASH_MAP_CLASS hash_map
+
+/* the location of <hash_map> */
+#if defined(USE_STLPORT)
+#define HASH_MAP_H <hash_map>
+#else
+#define HASH_MAP_H <ext/hash_map>
+#endif
+
+/* the namespace of hash_map/hash_set */
+#if defined(USE_STLPORT)
+#define HASH_NAMESPACE std
+#else
+#define HASH_NAMESPACE __gnu_cxx
+#endif
+
+/* the name of <hash_set> */
+#define HASH_SET_CLASS hash_set
+
+/* the location of <hash_set> */
+#if defined(USE_STLPORT)
+#define HASH_SET_H <hash_set>
+#else
+#define HASH_SET_H <ext/hash_set>
+#endif
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#define HAVE_FCNTL_H 1
+
+/* Define to 1 if you have the `ftruncate' function. */
+#define HAVE_FTRUNCATE 1
+
+/* define if the compiler has hash_map */
+#define HAVE_HASH_MAP 1
+
+/* define if the compiler has hash_set */
+#define HAVE_HASH_SET 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <limits.h> header file. */
+#define HAVE_LIMITS_H 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the `memset' function. */
+#define HAVE_MEMSET 1
+
+/* Define to 1 if you have the `mkdir' function. */
+#define HAVE_MKDIR 1
+
+/* Define if you have POSIX threads libraries and header files. */
+#define HAVE_PTHREAD 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the `strchr' function. */
+#define HAVE_STRCHR 1
+
+/* Define to 1 if you have the `strerror' function. */
+#define HAVE_STRERROR 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the `strtol' function. */
+#define HAVE_STRTOL 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Enable classes using zlib compression. */
+#define HAVE_ZLIB 1
+
+/* Name of package */
+#define PACKAGE "protobuf"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "protobuf@googlegroups.com"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "Protocol Buffers"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "Protocol Buffers 2.3.0"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "protobuf"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "2.3.0"
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+/* #undef PTHREAD_CREATE_JOINABLE */
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Version number of package */
+#define VERSION "2.3.0"
+
+/* Define to 1 if on AIX 3.
+   System headers sometimes define this.
+   We just want to avoid a redefinition error message.  */
+#ifndef _ALL_SOURCE
+/* # undef _ALL_SOURCE */
+#endif
+
+/* Enable GNU extensions on systems that have them.  */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+
+/* Define to 1 if on MINIX. */
+/* #undef _MINIX */
+
+/* Define to 2 if the system does not provide POSIX.1 features except with
+   this defined. */
+/* #undef _POSIX_1_SOURCE */
+
+/* Define to 1 if you need to in order for `stat' and other things to work. */
+/* #undef _POSIX_SOURCE */
+
+/* Enable extensions on Solaris.  */
+#ifndef __EXTENSIONS__
+# define __EXTENSIONS__ 1
+#endif
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# define _POSIX_PTHREAD_SEMANTICS 1
+#endif
+#ifndef _TANDEM_SOURCE
+# define _TANDEM_SOURCE 1
+#endif
diff --git a/third_party/config/xz/freebsd/config.h b/third_party/config/xz/freebsd/config.h
new file mode 100644
index 0000000..80c1d17
--- /dev/null
+++ b/third_party/config/xz/freebsd/config.h
@@ -0,0 +1,421 @@
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define if building universal (internal helper macro) */
+/* #undef AC_APPLE_UNIVERSAL_BUILD */
+
+/* How many MiB of RAM to assume if the real amount cannot be determined. */
+#define ASSUME_RAM 128
+
+/* Define to 1 if translation of program messages to the user's native
+   language is requested. */
+/* #undef ENABLE_NLS */
+
+/* Define to 1 if bswap_16 is available. */
+/* #undef HAVE_BSWAP_16 */
+
+/* Define to 1 if bswap_32 is available. */
+/* #undef HAVE_BSWAP_32 */
+
+/* Define to 1 if bswap_64 is available. */
+/* #undef HAVE_BSWAP_64 */
+
+/* Define to 1 if you have the <byteswap.h> header file. */
+/* #undef HAVE_BYTESWAP_H */
+
+/* Define to 1 if you have the MacOS X function CFLocaleCopyCurrent in the
+   CoreFoundation framework. */
+/* #undef HAVE_CFLOCALECOPYCURRENT */
+
+/* Define to 1 if you have the MacOS X function CFPreferencesCopyAppValue in
+   the CoreFoundation framework. */
+/* #undef HAVE_CFPREFERENCESCOPYAPPVALUE */
+
+/* Define to 1 if crc32 integrity check is enabled. */
+#define HAVE_CHECK_CRC32 1
+
+/* Define to 1 if crc64 integrity check is enabled. */
+#define HAVE_CHECK_CRC64 1
+
+/* Define to 1 if sha256 integrity check is enabled. */
+#define HAVE_CHECK_SHA256 1
+
+/* Define if the GNU dcgettext() function is already present or preinstalled.
+   */
+/* #undef HAVE_DCGETTEXT */
+
+/* Define to 1 if you have the declaration of `program_invocation_name', and
+   to 0 if you don't. */
+#define HAVE_DECL_PROGRAM_INVOCATION_NAME 0
+
+/* Define to 1 if arm decoder is enabled. */
+#define HAVE_DECODER_ARM 1
+
+/* Define to 1 if armthumb decoder is enabled. */
+#define HAVE_DECODER_ARMTHUMB 1
+
+/* Define to 1 if delta decoder is enabled. */
+#define HAVE_DECODER_DELTA 1
+
+/* Define to 1 if ia64 decoder is enabled. */
+#define HAVE_DECODER_IA64 1
+
+/* Define to 1 if lzma1 decoder is enabled. */
+#define HAVE_DECODER_LZMA1 1
+
+/* Define to 1 if lzma2 decoder is enabled. */
+#define HAVE_DECODER_LZMA2 1
+
+/* Define to 1 if powerpc decoder is enabled. */
+#define HAVE_DECODER_POWERPC 1
+
+/* Define to 1 if sparc decoder is enabled. */
+#define HAVE_DECODER_SPARC 1
+
+/* Define to 1 if x86 decoder is enabled. */
+#define HAVE_DECODER_X86 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if arm encoder is enabled. */
+#define HAVE_ENCODER_ARM 1
+
+/* Define to 1 if armthumb encoder is enabled. */
+#define HAVE_ENCODER_ARMTHUMB 1
+
+/* Define to 1 if delta encoder is enabled. */
+#define HAVE_ENCODER_DELTA 1
+
+/* Define to 1 if ia64 encoder is enabled. */
+#define HAVE_ENCODER_IA64 1
+
+/* Define to 1 if lzma1 encoder is enabled. */
+#define HAVE_ENCODER_LZMA1 1
+
+/* Define to 1 if lzma2 encoder is enabled. */
+#define HAVE_ENCODER_LZMA2 1
+
+/* Define to 1 if powerpc encoder is enabled. */
+#define HAVE_ENCODER_POWERPC 1
+
+/* Define to 1 if sparc encoder is enabled. */
+#define HAVE_ENCODER_SPARC 1
+
+/* Define to 1 if x86 encoder is enabled. */
+#define HAVE_ENCODER_X86 1
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#define HAVE_FCNTL_H 1
+
+/* Define to 1 if you have the `futimens' function. */
+/* #undef HAVE_FUTIMENS */
+
+/* Define to 1 if you have the `futimes' function. */
+#define HAVE_FUTIMES 1
+
+/* Define to 1 if you have the `futimesat' function. */
+/* #undef HAVE_FUTIMESAT */
+
+/* Define to 1 if you have the <getopt.h> header file. */
+#define HAVE_GETOPT_H 1
+
+/* Define to 1 if you have the `getopt_long' function. */
+#define HAVE_GETOPT_LONG 1
+
+/* Define if the GNU gettext() function is already present or preinstalled. */
+/* #undef HAVE_GETTEXT */
+
+/* Define if you have the iconv() function. */
+/* #undef HAVE_ICONV */
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <limits.h> header file. */
+#define HAVE_LIMITS_H 1
+
+/* Define to 1 if mbrtowc and mbstate_t are properly declared. */
+#define HAVE_MBRTOWC 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 to enable bt2 match finder. */
+#define HAVE_MF_BT2 1
+
+/* Define to 1 to enable bt3 match finder. */
+#define HAVE_MF_BT3 1
+
+/* Define to 1 to enable bt4 match finder. */
+#define HAVE_MF_BT4 1
+
+/* Define to 1 to enable hc3 match finder. */
+#define HAVE_MF_HC3 1
+
+/* Define to 1 to enable hc4 match finder. */
+#define HAVE_MF_HC4 1
+
+/* Define to 1 if getopt.h declares extern int optreset. */
+#define HAVE_OPTRESET 1
+
+/* Define if you have POSIX threads libraries and header files. */
+#define HAVE_PTHREAD 1
+
+/* Define to 1 if optimizing for size. */
+/* #undef HAVE_SMALL */
+
+/* Define to 1 if stdbool.h conforms to C99. */
+#define HAVE_STDBOOL_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if `st_atimensec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_ATIMENSEC */
+
+/* Define to 1 if `st_atimespec.tv_nsec' is a member of `struct stat'. */
+#define HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC 1
+
+/* Define to 1 if `st_atim.st__tim.tv_nsec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC */
+
+/* Define to 1 if `st_atim.tv_nsec' is a member of `struct stat'. */
+#define HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC 1
+
+/* Define to 1 if `st_uatime' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_UATIME */
+
+/* Define to 1 if you have the <sys/byteorder.h> header file. */
+/* #undef HAVE_SYS_BYTEORDER_H */
+
+/* Define to 1 if you have the <sys/endian.h> header file. */
+#define HAVE_SYS_ENDIAN_H 1
+
+/* Define to 1 if you have the <sys/param.h> header file. */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#define HAVE_SYS_TIME_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if the system has the type `uintptr_t'. */
+#define HAVE_UINTPTR_T 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the `utime' function. */
+/* #undef HAVE_UTIME */
+
+/* Define to 1 if you have the `utimes' function. */
+/* #undef HAVE_UTIMES */
+
+/* Define to 1 or 0, depending whether the compiler supports simple visibility
+   declarations. */
+#define HAVE_VISIBILITY 1
+
+/* Define to 1 if you have the `wcwidth' function. */
+#define HAVE_WCWIDTH 1
+
+/* Define to 1 if the system has the type `_Bool'. */
+#define HAVE__BOOL 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+
+/* Define to 1 to disable debugging code. */
+#define NDEBUG 1
+
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+/* #undef NO_MINUS_C_MINUS_O */
+
+/* Name of package */
+#define PACKAGE "xz"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "lasse.collin@tukaani.org"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "XZ Utils"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "XZ Utils 5.0.3"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "xz"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL "http://tukaani.org/xz/"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "5.0.3"
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+/* #undef PTHREAD_CREATE_JOINABLE */
+
+/* The size of `size_t', as computed by sizeof. */
+#define SIZEOF_SIZE_T 8
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define to 1 if the number of available CPU cores can be detected with
+   pstat_getdynamic(). */
+/* #undef TUKLIB_CPUCORES_PSTAT_GETDYNAMIC */
+
+/* Define to 1 if the number of available CPU cores can be detected with
+   sysconf(_SC_NPROCESSORS_ONLN) or sysconf(_SC_NPROC_ONLN). */
+/* #undef TUKLIB_CPUCORES_SYSCONF */
+
+/* Define to 1 if the number of available CPU cores can be detected with
+   sysctl(). */
+#define TUKLIB_CPUCORES_SYSCTL 1
+
+/* Define to 1 if the system supports fast unaligned access to 16-bit and
+   32-bit integers. */
+#define TUKLIB_FAST_UNALIGNED_ACCESS 1
+
+/* Define to 1 if the amount of physical memory can be detected with
+   _system_configuration.physmem. */
+/* #undef TUKLIB_PHYSMEM_AIX */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   getinvent_r(). */
+/* #undef TUKLIB_PHYSMEM_GETINVENT_R */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   getsysinfo(). */
+/* #undef TUKLIB_PHYSMEM_GETSYSINFO */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   pstat_getstatic(). */
+/* #undef TUKLIB_PHYSMEM_PSTAT_GETSTATIC */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   sysconf(_SC_PAGESIZE) and sysconf(_SC_PHYS_PAGES). */
+#define TUKLIB_PHYSMEM_SYSCONF 1
+
+/* Define to 1 if the amount of physical memory can be detected with sysctl().
+   */
+/* #undef TUKLIB_PHYSMEM_SYSCTL */
+
+/* Define to 1 if the amount of physical memory can be detected with Linux
+   sysinfo(). */
+/* #undef TUKLIB_PHYSMEM_SYSINFO */
+
+/* Enable extensions on AIX 3, Interix.  */
+#ifndef _ALL_SOURCE
+# define _ALL_SOURCE 1
+#endif
+/* Enable GNU extensions on systems that have them.  */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+/* Enable threading extensions on Solaris.  */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# define _POSIX_PTHREAD_SEMANTICS 1
+#endif
+/* Enable extensions on HP NonStop.  */
+#ifndef _TANDEM_SOURCE
+# define _TANDEM_SOURCE 1
+#endif
+/* Enable general extensions on Solaris.  */
+#ifndef __EXTENSIONS__
+# define __EXTENSIONS__ 1
+#endif
+
+
+/* Version number of package */
+#define VERSION "5.0.3"
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+/* #  undef WORDS_BIGENDIAN */
+# endif
+#endif
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+/* #undef _FILE_OFFSET_BITS */
+
+/* Define for large files, on AIX-style hosts. */
+/* #undef _LARGE_FILES */
+
+/* Define to 1 if on MINIX. */
+/* #undef _MINIX */
+
+/* Define to 2 if the system does not provide POSIX.1 features except with
+   this defined. */
+/* #undef _POSIX_1_SOURCE */
+
+/* Define to 1 if you need to in order for `stat' and other things to work. */
+/* #undef _POSIX_SOURCE */
+
+/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT32_T */
+
+/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT64_T */
+
+/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT8_T */
+
+/* Define to rpl_ if the getopt replacement functions and variables should be
+   used. */
+/* #undef __GETOPT_PREFIX */
+
+/* Define to the type of a signed integer type of width exactly 32 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef int32_t */
+
+/* Define to the type of a signed integer type of width exactly 64 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef int64_t */
+
+/* Define to the type of an unsigned integer type of width exactly 16 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint16_t */
+
+/* Define to the type of an unsigned integer type of width exactly 32 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint32_t */
+
+/* Define to the type of an unsigned integer type of width exactly 64 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint64_t */
+
+/* Define to the type of an unsigned integer type of width exactly 8 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint8_t */
+
+/* Define to the type of an unsigned integer type wide enough to hold a
+   pointer, if such a type exists, and if the system does not define it. */
+/* #undef uintptr_t */
diff --git a/third_party/config/xz/linux/config.h b/third_party/config/xz/linux/config.h
new file mode 100644
index 0000000..4aec2a7
--- /dev/null
+++ b/third_party/config/xz/linux/config.h
@@ -0,0 +1,421 @@
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define if building universal (internal helper macro) */
+/* #undef AC_APPLE_UNIVERSAL_BUILD */
+
+/* How many MiB of RAM to assume if the real amount cannot be determined. */
+#define ASSUME_RAM 128
+
+/* Define to 1 if translation of program messages to the user's native
+   language is requested. */
+#define ENABLE_NLS 1
+
+/* Define to 1 if bswap_16 is available. */
+#define HAVE_BSWAP_16 1
+
+/* Define to 1 if bswap_32 is available. */
+#define HAVE_BSWAP_32 1
+
+/* Define to 1 if bswap_64 is available. */
+#define HAVE_BSWAP_64 1
+
+/* Define to 1 if you have the <byteswap.h> header file. */
+#define HAVE_BYTESWAP_H 1
+
+/* Define to 1 if you have the MacOS X function CFLocaleCopyCurrent in the
+   CoreFoundation framework. */
+/* #undef HAVE_CFLOCALECOPYCURRENT */
+
+/* Define to 1 if you have the MacOS X function CFPreferencesCopyAppValue in
+   the CoreFoundation framework. */
+/* #undef HAVE_CFPREFERENCESCOPYAPPVALUE */
+
+/* Define to 1 if crc32 integrity check is enabled. */
+#define HAVE_CHECK_CRC32 1
+
+/* Define to 1 if crc64 integrity check is enabled. */
+#define HAVE_CHECK_CRC64 1
+
+/* Define to 1 if sha256 integrity check is enabled. */
+#define HAVE_CHECK_SHA256 1
+
+/* Define if the GNU dcgettext() function is already present or preinstalled.
+   */
+#define HAVE_DCGETTEXT 1
+
+/* Define to 1 if you have the declaration of `program_invocation_name', and
+   to 0 if you don't. */
+#define HAVE_DECL_PROGRAM_INVOCATION_NAME 1
+
+/* Define to 1 if arm decoder is enabled. */
+#define HAVE_DECODER_ARM 1
+
+/* Define to 1 if armthumb decoder is enabled. */
+#define HAVE_DECODER_ARMTHUMB 1
+
+/* Define to 1 if delta decoder is enabled. */
+#define HAVE_DECODER_DELTA 1
+
+/* Define to 1 if ia64 decoder is enabled. */
+#define HAVE_DECODER_IA64 1
+
+/* Define to 1 if lzma1 decoder is enabled. */
+#define HAVE_DECODER_LZMA1 1
+
+/* Define to 1 if lzma2 decoder is enabled. */
+#define HAVE_DECODER_LZMA2 1
+
+/* Define to 1 if powerpc decoder is enabled. */
+#define HAVE_DECODER_POWERPC 1
+
+/* Define to 1 if sparc decoder is enabled. */
+#define HAVE_DECODER_SPARC 1
+
+/* Define to 1 if x86 decoder is enabled. */
+#define HAVE_DECODER_X86 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if arm encoder is enabled. */
+#define HAVE_ENCODER_ARM 1
+
+/* Define to 1 if armthumb encoder is enabled. */
+#define HAVE_ENCODER_ARMTHUMB 1
+
+/* Define to 1 if delta encoder is enabled. */
+#define HAVE_ENCODER_DELTA 1
+
+/* Define to 1 if ia64 encoder is enabled. */
+#define HAVE_ENCODER_IA64 1
+
+/* Define to 1 if lzma1 encoder is enabled. */
+#define HAVE_ENCODER_LZMA1 1
+
+/* Define to 1 if lzma2 encoder is enabled. */
+#define HAVE_ENCODER_LZMA2 1
+
+/* Define to 1 if powerpc encoder is enabled. */
+#define HAVE_ENCODER_POWERPC 1
+
+/* Define to 1 if sparc encoder is enabled. */
+#define HAVE_ENCODER_SPARC 1
+
+/* Define to 1 if x86 encoder is enabled. */
+#define HAVE_ENCODER_X86 1
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#define HAVE_FCNTL_H 1
+
+/* Define to 1 if you have the `futimens' function. */
+#define HAVE_FUTIMENS 1
+
+/* Define to 1 if you have the `futimes' function. */
+/* #undef HAVE_FUTIMES */
+
+/* Define to 1 if you have the `futimesat' function. */
+/* #undef HAVE_FUTIMESAT */
+
+/* Define to 1 if you have the <getopt.h> header file. */
+#define HAVE_GETOPT_H 1
+
+/* Define to 1 if you have the `getopt_long' function. */
+#define HAVE_GETOPT_LONG 1
+
+/* Define if the GNU gettext() function is already present or preinstalled. */
+#define HAVE_GETTEXT 1
+
+/* Define if you have the iconv() function. */
+/* #undef HAVE_ICONV */
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <limits.h> header file. */
+#define HAVE_LIMITS_H 1
+
+/* Define to 1 if mbrtowc and mbstate_t are properly declared. */
+#define HAVE_MBRTOWC 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 to enable bt2 match finder. */
+#define HAVE_MF_BT2 1
+
+/* Define to 1 to enable bt3 match finder. */
+#define HAVE_MF_BT3 1
+
+/* Define to 1 to enable bt4 match finder. */
+#define HAVE_MF_BT4 1
+
+/* Define to 1 to enable hc3 match finder. */
+#define HAVE_MF_HC3 1
+
+/* Define to 1 to enable hc4 match finder. */
+#define HAVE_MF_HC4 1
+
+/* Define to 1 if getopt.h declares extern int optreset. */
+/* #undef HAVE_OPTRESET */
+
+/* Define if you have POSIX threads libraries and header files. */
+#define HAVE_PTHREAD 1
+
+/* Define to 1 if optimizing for size. */
+/* #undef HAVE_SMALL */
+
+/* Define to 1 if stdbool.h conforms to C99. */
+#define HAVE_STDBOOL_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if `st_atimensec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_ATIMENSEC */
+
+/* Define to 1 if `st_atimespec.tv_nsec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC */
+
+/* Define to 1 if `st_atim.st__tim.tv_nsec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC */
+
+/* Define to 1 if `st_atim.tv_nsec' is a member of `struct stat'. */
+#define HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC 1
+
+/* Define to 1 if `st_uatime' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_UATIME */
+
+/* Define to 1 if you have the <sys/byteorder.h> header file. */
+/* #undef HAVE_SYS_BYTEORDER_H */
+
+/* Define to 1 if you have the <sys/endian.h> header file. */
+/* #undef HAVE_SYS_ENDIAN_H */
+
+/* Define to 1 if you have the <sys/param.h> header file. */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#define HAVE_SYS_TIME_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if the system has the type `uintptr_t'. */
+#define HAVE_UINTPTR_T 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the `utime' function. */
+/* #undef HAVE_UTIME */
+
+/* Define to 1 if you have the `utimes' function. */
+/* #undef HAVE_UTIMES */
+
+/* Define to 1 or 0, depending whether the compiler supports simple visibility
+   declarations. */
+#define HAVE_VISIBILITY 1
+
+/* Define to 1 if you have the `wcwidth' function. */
+#define HAVE_WCWIDTH 1
+
+/* Define to 1 if the system has the type `_Bool'. */
+#define HAVE__BOOL 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+
+/* Define to 1 to disable debugging code. */
+#define NDEBUG 1
+
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+/* #undef NO_MINUS_C_MINUS_O */
+
+/* Name of package */
+#define PACKAGE "xz"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "lasse.collin@tukaani.org"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "XZ Utils"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "XZ Utils 5.0.3"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "xz"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL "http://tukaani.org/xz/"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "5.0.3"
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+/* #undef PTHREAD_CREATE_JOINABLE */
+
+/* The size of `size_t', as computed by sizeof. */
+#define SIZEOF_SIZE_T 8
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define to 1 if the number of available CPU cores can be detected with
+   pstat_getdynamic(). */
+/* #undef TUKLIB_CPUCORES_PSTAT_GETDYNAMIC */
+
+/* Define to 1 if the number of available CPU cores can be detected with
+   sysconf(_SC_NPROCESSORS_ONLN) or sysconf(_SC_NPROC_ONLN). */
+#define TUKLIB_CPUCORES_SYSCONF 1
+
+/* Define to 1 if the number of available CPU cores can be detected with
+   sysctl(). */
+/* #undef TUKLIB_CPUCORES_SYSCTL */
+
+/* Define to 1 if the system supports fast unaligned access to 16-bit and
+   32-bit integers. */
+#define TUKLIB_FAST_UNALIGNED_ACCESS 1
+
+/* Define to 1 if the amount of physical memory can be detected with
+   _system_configuration.physmem. */
+/* #undef TUKLIB_PHYSMEM_AIX */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   getinvent_r(). */
+/* #undef TUKLIB_PHYSMEM_GETINVENT_R */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   getsysinfo(). */
+/* #undef TUKLIB_PHYSMEM_GETSYSINFO */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   pstat_getstatic(). */
+/* #undef TUKLIB_PHYSMEM_PSTAT_GETSTATIC */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   sysconf(_SC_PAGESIZE) and sysconf(_SC_PHYS_PAGES). */
+#define TUKLIB_PHYSMEM_SYSCONF 1
+
+/* Define to 1 if the amount of physical memory can be detected with sysctl().
+   */
+/* #undef TUKLIB_PHYSMEM_SYSCTL */
+
+/* Define to 1 if the amount of physical memory can be detected with Linux
+   sysinfo(). */
+/* #undef TUKLIB_PHYSMEM_SYSINFO */
+
+/* Enable extensions on AIX 3, Interix.  */
+#ifndef _ALL_SOURCE
+# define _ALL_SOURCE 1
+#endif
+/* Enable GNU extensions on systems that have them.  */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+/* Enable threading extensions on Solaris.  */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# define _POSIX_PTHREAD_SEMANTICS 1
+#endif
+/* Enable extensions on HP NonStop.  */
+#ifndef _TANDEM_SOURCE
+# define _TANDEM_SOURCE 1
+#endif
+/* Enable general extensions on Solaris.  */
+#ifndef __EXTENSIONS__
+# define __EXTENSIONS__ 1
+#endif
+
+
+/* Version number of package */
+#define VERSION "5.0.3"
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+/* #  undef WORDS_BIGENDIAN */
+# endif
+#endif
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+/* #undef _FILE_OFFSET_BITS */
+
+/* Define for large files, on AIX-style hosts. */
+/* #undef _LARGE_FILES */
+
+/* Define to 1 if on MINIX. */
+/* #undef _MINIX */
+
+/* Define to 2 if the system does not provide POSIX.1 features except with
+   this defined. */
+/* #undef _POSIX_1_SOURCE */
+
+/* Define to 1 if you need to in order for `stat' and other things to work. */
+/* #undef _POSIX_SOURCE */
+
+/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT32_T */
+
+/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT64_T */
+
+/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT8_T */
+
+/* Define to rpl_ if the getopt replacement functions and variables should be
+   used. */
+/* #undef __GETOPT_PREFIX */
+
+/* Define to the type of a signed integer type of width exactly 32 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef int32_t */
+
+/* Define to the type of a signed integer type of width exactly 64 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef int64_t */
+
+/* Define to the type of an unsigned integer type of width exactly 16 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint16_t */
+
+/* Define to the type of an unsigned integer type of width exactly 32 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint32_t */
+
+/* Define to the type of an unsigned integer type of width exactly 64 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint64_t */
+
+/* Define to the type of an unsigned integer type of width exactly 8 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint8_t */
+
+/* Define to the type of an unsigned integer type wide enough to hold a
+   pointer, if such a type exists, and if the system does not define it. */
+/* #undef uintptr_t */
diff --git a/third_party/config/xz/mac/config.h b/third_party/config/xz/mac/config.h
new file mode 100644
index 0000000..33cce4e
--- /dev/null
+++ b/third_party/config/xz/mac/config.h
@@ -0,0 +1,421 @@
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define if building universal (internal helper macro) */
+/* #undef AC_APPLE_UNIVERSAL_BUILD */
+
+/* How many MiB of RAM to assume if the real amount cannot be determined. */
+#define ASSUME_RAM 128
+
+/* Define to 1 if translation of program messages to the user's native
+   language is requested. */
+/* #undef ENABLE_NLS */
+
+/* Define to 1 if bswap_16 is available. */
+/* #undef HAVE_BSWAP_16 */
+
+/* Define to 1 if bswap_32 is available. */
+/* #undef HAVE_BSWAP_32 */
+
+/* Define to 1 if bswap_64 is available. */
+/* #undef HAVE_BSWAP_64 */
+
+/* Define to 1 if you have the <byteswap.h> header file. */
+/* #undef HAVE_BYTESWAP_H */
+
+/* Define to 1 if you have the MacOS X function CFLocaleCopyCurrent in the
+   CoreFoundation framework. */
+#define HAVE_CFLOCALECOPYCURRENT 1
+
+/* Define to 1 if you have the MacOS X function CFPreferencesCopyAppValue in
+   the CoreFoundation framework. */
+#define HAVE_CFPREFERENCESCOPYAPPVALUE 1
+
+/* Define to 1 if crc32 integrity check is enabled. */
+#define HAVE_CHECK_CRC32 1
+
+/* Define to 1 if crc64 integrity check is enabled. */
+#define HAVE_CHECK_CRC64 1
+
+/* Define to 1 if sha256 integrity check is enabled. */
+#define HAVE_CHECK_SHA256 1
+
+/* Define if the GNU dcgettext() function is already present or preinstalled.
+   */
+/* #undef HAVE_DCGETTEXT */
+
+/* Define to 1 if you have the declaration of `program_invocation_name', and
+   to 0 if you don't. */
+#define HAVE_DECL_PROGRAM_INVOCATION_NAME 0
+
+/* Define to 1 if arm decoder is enabled. */
+#define HAVE_DECODER_ARM 1
+
+/* Define to 1 if armthumb decoder is enabled. */
+#define HAVE_DECODER_ARMTHUMB 1
+
+/* Define to 1 if delta decoder is enabled. */
+#define HAVE_DECODER_DELTA 1
+
+/* Define to 1 if ia64 decoder is enabled. */
+#define HAVE_DECODER_IA64 1
+
+/* Define to 1 if lzma1 decoder is enabled. */
+#define HAVE_DECODER_LZMA1 1
+
+/* Define to 1 if lzma2 decoder is enabled. */
+#define HAVE_DECODER_LZMA2 1
+
+/* Define to 1 if powerpc decoder is enabled. */
+#define HAVE_DECODER_POWERPC 1
+
+/* Define to 1 if sparc decoder is enabled. */
+#define HAVE_DECODER_SPARC 1
+
+/* Define to 1 if x86 decoder is enabled. */
+#define HAVE_DECODER_X86 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if arm encoder is enabled. */
+#define HAVE_ENCODER_ARM 1
+
+/* Define to 1 if armthumb encoder is enabled. */
+#define HAVE_ENCODER_ARMTHUMB 1
+
+/* Define to 1 if delta encoder is enabled. */
+#define HAVE_ENCODER_DELTA 1
+
+/* Define to 1 if ia64 encoder is enabled. */
+#define HAVE_ENCODER_IA64 1
+
+/* Define to 1 if lzma1 encoder is enabled. */
+#define HAVE_ENCODER_LZMA1 1
+
+/* Define to 1 if lzma2 encoder is enabled. */
+#define HAVE_ENCODER_LZMA2 1
+
+/* Define to 1 if powerpc encoder is enabled. */
+#define HAVE_ENCODER_POWERPC 1
+
+/* Define to 1 if sparc encoder is enabled. */
+#define HAVE_ENCODER_SPARC 1
+
+/* Define to 1 if x86 encoder is enabled. */
+#define HAVE_ENCODER_X86 1
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#define HAVE_FCNTL_H 1
+
+/* Define to 1 if you have the `futimens' function. */
+/* #undef HAVE_FUTIMENS */
+
+/* Define to 1 if you have the `futimes' function. */
+#define HAVE_FUTIMES 1
+
+/* Define to 1 if you have the `futimesat' function. */
+/* #undef HAVE_FUTIMESAT */
+
+/* Define to 1 if you have the <getopt.h> header file. */
+#define HAVE_GETOPT_H 1
+
+/* Define to 1 if you have the `getopt_long' function. */
+#define HAVE_GETOPT_LONG 1
+
+/* Define if the GNU gettext() function is already present or preinstalled. */
+/* #undef HAVE_GETTEXT */
+
+/* Define if you have the iconv() function. */
+#define HAVE_ICONV 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <limits.h> header file. */
+#define HAVE_LIMITS_H 1
+
+/* Define to 1 if mbrtowc and mbstate_t are properly declared. */
+#define HAVE_MBRTOWC 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 to enable bt2 match finder. */
+#define HAVE_MF_BT2 1
+
+/* Define to 1 to enable bt3 match finder. */
+#define HAVE_MF_BT3 1
+
+/* Define to 1 to enable bt4 match finder. */
+#define HAVE_MF_BT4 1
+
+/* Define to 1 to enable hc3 match finder. */
+#define HAVE_MF_HC3 1
+
+/* Define to 1 to enable hc4 match finder. */
+#define HAVE_MF_HC4 1
+
+/* Define to 1 if getopt.h declares extern int optreset. */
+#define HAVE_OPTRESET 1
+
+/* Define if you have POSIX threads libraries and header files. */
+#define HAVE_PTHREAD 1
+
+/* Define to 1 if optimizing for size. */
+/* #undef HAVE_SMALL */
+
+/* Define to 1 if stdbool.h conforms to C99. */
+#define HAVE_STDBOOL_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if `st_atimensec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_ATIMENSEC */
+
+/* Define to 1 if `st_atimespec.tv_nsec' is a member of `struct stat'. */
+#define HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC 1
+
+/* Define to 1 if `st_atim.st__tim.tv_nsec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC */
+
+/* Define to 1 if `st_atim.tv_nsec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC */
+
+/* Define to 1 if `st_uatime' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_UATIME */
+
+/* Define to 1 if you have the <sys/byteorder.h> header file. */
+/* #undef HAVE_SYS_BYTEORDER_H */
+
+/* Define to 1 if you have the <sys/endian.h> header file. */
+/* #undef HAVE_SYS_ENDIAN_H */
+
+/* Define to 1 if you have the <sys/param.h> header file. */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#define HAVE_SYS_TIME_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if the system has the type `uintptr_t'. */
+#define HAVE_UINTPTR_T 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the `utime' function. */
+/* #undef HAVE_UTIME */
+
+/* Define to 1 if you have the `utimes' function. */
+/* #undef HAVE_UTIMES */
+
+/* Define to 1 or 0, depending whether the compiler supports simple visibility
+   declarations. */
+#define HAVE_VISIBILITY 1
+
+/* Define to 1 if you have the `wcwidth' function. */
+#define HAVE_WCWIDTH 1
+
+/* Define to 1 if the system has the type `_Bool'. */
+#define HAVE__BOOL 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+
+/* Define to 1 to disable debugging code. */
+#define NDEBUG 1
+
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+/* #undef NO_MINUS_C_MINUS_O */
+
+/* Name of package */
+#define PACKAGE "xz"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "lasse.collin@tukaani.org"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "XZ Utils"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "XZ Utils 5.0.3"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "xz"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL "http://tukaani.org/xz/"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "5.0.3"
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+/* #undef PTHREAD_CREATE_JOINABLE */
+
+/* The size of `size_t', as computed by sizeof. */
+#define SIZEOF_SIZE_T 4
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define to 1 if the number of available CPU cores can be detected with
+   pstat_getdynamic(). */
+/* #undef TUKLIB_CPUCORES_PSTAT_GETDYNAMIC */
+
+/* Define to 1 if the number of available CPU cores can be detected with
+   sysconf(_SC_NPROCESSORS_ONLN) or sysconf(_SC_NPROC_ONLN). */
+/* #undef TUKLIB_CPUCORES_SYSCONF */
+
+/* Define to 1 if the number of available CPU cores can be detected with
+   sysctl(). */
+#define TUKLIB_CPUCORES_SYSCTL 1
+
+/* Define to 1 if the system supports fast unaligned access to 16-bit and
+   32-bit integers. */
+#define TUKLIB_FAST_UNALIGNED_ACCESS 1
+
+/* Define to 1 if the amount of physical memory can be detected with
+   _system_configuration.physmem. */
+/* #undef TUKLIB_PHYSMEM_AIX */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   getinvent_r(). */
+/* #undef TUKLIB_PHYSMEM_GETINVENT_R */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   getsysinfo(). */
+/* #undef TUKLIB_PHYSMEM_GETSYSINFO */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   pstat_getstatic(). */
+/* #undef TUKLIB_PHYSMEM_PSTAT_GETSTATIC */
+
+/* Define to 1 if the amount of physical memory can be detected with
+   sysconf(_SC_PAGESIZE) and sysconf(_SC_PHYS_PAGES). */
+/* #undef TUKLIB_PHYSMEM_SYSCONF */
+
+/* Define to 1 if the amount of physical memory can be detected with sysctl().
+   */
+#define TUKLIB_PHYSMEM_SYSCTL 1
+
+/* Define to 1 if the amount of physical memory can be detected with Linux
+   sysinfo(). */
+/* #undef TUKLIB_PHYSMEM_SYSINFO */
+
+/* Enable extensions on AIX 3, Interix.  */
+#ifndef _ALL_SOURCE
+# define _ALL_SOURCE 1
+#endif
+/* Enable GNU extensions on systems that have them.  */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+/* Enable threading extensions on Solaris.  */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# define _POSIX_PTHREAD_SEMANTICS 1
+#endif
+/* Enable extensions on HP NonStop.  */
+#ifndef _TANDEM_SOURCE
+# define _TANDEM_SOURCE 1
+#endif
+/* Enable general extensions on Solaris.  */
+#ifndef __EXTENSIONS__
+# define __EXTENSIONS__ 1
+#endif
+
+
+/* Version number of package */
+#define VERSION "5.0.3"
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+/* #  undef WORDS_BIGENDIAN */
+# endif
+#endif
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+/* #undef _FILE_OFFSET_BITS */
+
+/* Define for large files, on AIX-style hosts. */
+/* #undef _LARGE_FILES */
+
+/* Define to 1 if on MINIX. */
+/* #undef _MINIX */
+
+/* Define to 2 if the system does not provide POSIX.1 features except with
+   this defined. */
+/* #undef _POSIX_1_SOURCE */
+
+/* Define to 1 if you need to in order for `stat' and other things to work. */
+/* #undef _POSIX_SOURCE */
+
+/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT32_T */
+
+/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT64_T */
+
+/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT8_T */
+
+/* Define to rpl_ if the getopt replacement functions and variables should be
+   used. */
+/* #undef __GETOPT_PREFIX */
+
+/* Define to the type of a signed integer type of width exactly 32 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef int32_t */
+
+/* Define to the type of a signed integer type of width exactly 64 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef int64_t */
+
+/* Define to the type of an unsigned integer type of width exactly 16 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint16_t */
+
+/* Define to the type of an unsigned integer type of width exactly 32 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint32_t */
+
+/* Define to the type of an unsigned integer type of width exactly 64 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint64_t */
+
+/* Define to the type of an unsigned integer type of width exactly 8 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint8_t */
+
+/* Define to the type of an unsigned integer type wide enough to hold a
+   pointer, if such a type exists, and if the system does not define it. */
+/* #undef uintptr_t */
diff --git a/third_party/jquery/BUILD.gn b/third_party/jquery/BUILD.gn
new file mode 100644
index 0000000..b894570
--- /dev/null
+++ b/third_party/jquery/BUILD.gn
@@ -0,0 +1,35 @@
+# Copyright 2015 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+genc = "//client/genc.py"
+
+action("gen_jquery") {
+  script = genc
+  sources = [
+    "jquery.min.js",
+  ]
+  outputs = [
+    "$target_gen_dir/jquery.min.c",
+    "$target_gen_dir/jquery.min.h",
+  ]
+  args = [
+    "--out-dir",
+    rebase_path("$target_gen_dir"),
+    rebase_path("jquery.min.js"),
+  ]
+}
+
+config("jquery_config") {
+  include_dirs = [ "$target_gen_dir" ]
+}
+static_library("jquery") {
+  sources = [
+    "$target_gen_dir/jquery.min.c",
+    "$target_gen_dir/jquery.min.h",
+  ]
+  deps = [
+    ":gen_jquery",
+  ]
+  public_configs = [ ":jquery_config" ]
+}
diff --git a/third_party/jquery/LICENSE b/third_party/jquery/LICENSE
new file mode 100644
index 0000000..e4e5e00
--- /dev/null
+++ b/third_party/jquery/LICENSE
@@ -0,0 +1,36 @@
+Copyright JS Foundation and other contributors, https://js.foundation/
+
+This software consists of voluntary contributions made by many
+individuals. For exact contribution history, see the revision history
+available at https://github.com/jquery/jquery
+
+The following license applies to all parts of this software except as
+documented below:
+
+====
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+====
+
+All files located in the node_modules and external directories are
+externally maintained libraries used by this software which have their
+own licenses; we recommend you read them, as their terms may differ from
+the terms above.
diff --git a/third_party/jquery/README b/third_party/jquery/README
new file mode 100644
index 0000000..d4113db
--- /dev/null
+++ b/third_party/jquery/README
@@ -0,0 +1,2 @@
+jQuery-2.1.4.min.js
+Downloaded the latest 2.x.y compressed version from http://jquery.com/download/
diff --git a/third_party/jquery/jquery.min.js b/third_party/jquery/jquery.min.js
new file mode 100644
index 0000000..49990d6
--- /dev/null
+++ b/third_party/jquery/jquery.min.js
@@ -0,0 +1,4 @@
+/*! jQuery v2.1.4 | (c) 2005, 2015 jQuery Foundation, Inc. | jquery.org/license */
+!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=c.slice,e=c.concat,f=c.push,g=c.indexOf,h={},i=h.toString,j=h.hasOwnProperty,k={},l=a.document,m="2.1.4",n=function(a,b){return new n.fn.init(a,b)},o=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,p=/^-ms-/,q=/-([\da-z])/gi,r=function(a,b){return b.toUpperCase()};n.fn=n.prototype={jquery:m,constructor:n,selector:"",length:0,toArray:function(){return d.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:d.call(this)},pushStack:function(a){var b=n.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a,b){return n.each(this,a,b)},map:function(a){return this.pushStack(n.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(d.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor(null)},push:f,sort:c.sort,splice:c.splice},n.extend=n.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||n.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(a=arguments[h]))for(b in a)c=g[b],d=a[b],g!==d&&(j&&d&&(n.isPlainObject(d)||(e=n.isArray(d)))?(e?(e=!1,f=c&&n.isArray(c)?c:[]):f=c&&n.isPlainObject(c)?c:{},g[b]=n.extend(j,f,d)):void 0!==d&&(g[b]=d));return g},n.extend({expando:"jQuery"+(m+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===n.type(a)},isArray:Array.isArray,isWindow:function(a){return null!=a&&a===a.window},isNumeric:function(a){return!n.isArray(a)&&a-parseFloat(a)+1>=0},isPlainObject:function(a){return"object"!==n.type(a)||a.nodeType||n.isWindow(a)?!1:a.constructor&&!j.call(a.constructor.prototype,"isPrototypeOf")?!1:!0},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?h[i.call(a)]||"object":typeof a},globalEval:function(a){var b,c=eval;a=n.trim(a),a&&(1===a.indexOf("use strict")?(b=l.createElement("script"),b.text=a,l.head.appendChild(b).parentNode.removeChild(b)):c(a))},camelCase:function(a){return a.replace(p,"ms-").replace(q,r)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b,c){var d,e=0,f=a.length,g=s(a);if(c){if(g){for(;f>e;e++)if(d=b.apply(a[e],c),d===!1)break}else for(e in a)if(d=b.apply(a[e],c),d===!1)break}else if(g){for(;f>e;e++)if(d=b.call(a[e],e,a[e]),d===!1)break}else for(e in a)if(d=b.call(a[e],e,a[e]),d===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(o,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(s(Object(a))?n.merge(c,"string"==typeof a?[a]:a):f.call(c,a)),c},inArray:function(a,b,c){return null==b?-1:g.call(b,a,c)},merge:function(a,b){for(var c=+b.length,d=0,e=a.length;c>d;d++)a[e++]=b[d];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,f=0,g=a.length,h=s(a),i=[];if(h)for(;g>f;f++)d=b(a[f],f,c),null!=d&&i.push(d);else for(f in a)d=b(a[f],f,c),null!=d&&i.push(d);return e.apply([],i)},guid:1,proxy:function(a,b){var c,e,f;return"string"==typeof b&&(c=a[b],b=a,a=c),n.isFunction(a)?(e=d.call(arguments,2),f=function(){return a.apply(b||this,e.concat(d.call(arguments)))},f.guid=a.guid=a.guid||n.guid++,f):void 0},now:Date.now,support:k}),n.each("Boolean Number String Function Array Date RegExp Object Error".split(" "),function(a,b){h["[object "+b+"]"]=b.toLowerCase()});function s(a){var b="length"in a&&a.length,c=n.type(a);return"function"===c||n.isWindow(a)?!1:1===a.nodeType&&b?!0:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var t=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ha(),z=ha(),A=ha(),B=function(a,b){return a===b&&(l=!0),0},C=1<<31,D={}.hasOwnProperty,E=[],F=E.pop,G=E.push,H=E.push,I=E.slice,J=function(a,b){for(var c=0,d=a.length;d>c;c++)if(a[c]===b)return c;return-1},K="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",L="[\\x20\\t\\r\\n\\f]",M="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",N=M.replace("w","w#"),O="\\["+L+"*("+M+")(?:"+L+"*([*^$|!~]?=)"+L+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+N+"))|)"+L+"*\\]",P=":("+M+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+O+")*)|.*)\\)|)",Q=new RegExp(L+"+","g"),R=new RegExp("^"+L+"+|((?:^|[^\\\\])(?:\\\\.)*)"+L+"+$","g"),S=new RegExp("^"+L+"*,"+L+"*"),T=new RegExp("^"+L+"*([>+~]|"+L+")"+L+"*"),U=new RegExp("="+L+"*([^\\]'\"]*?)"+L+"*\\]","g"),V=new RegExp(P),W=new RegExp("^"+N+"$"),X={ID:new RegExp("^#("+M+")"),CLASS:new RegExp("^\\.("+M+")"),TAG:new RegExp("^("+M.replace("w","w*")+")"),ATTR:new RegExp("^"+O),PSEUDO:new RegExp("^"+P),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+L+"*(even|odd|(([+-]|)(\\d*)n|)"+L+"*(?:([+-]|)"+L+"*(\\d+)|))"+L+"*\\)|)","i"),bool:new RegExp("^(?:"+K+")$","i"),needsContext:new RegExp("^"+L+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+L+"*((?:-\\d)?\\d*)"+L+"*\\)|)(?=[^-]|$)","i")},Y=/^(?:input|select|textarea|button)$/i,Z=/^h\d$/i,$=/^[^{]+\{\s*\[native \w/,_=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,aa=/[+~]/,ba=/'|\\/g,ca=new RegExp("\\\\([\\da-f]{1,6}"+L+"?|("+L+")|.)","ig"),da=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},ea=function(){m()};try{H.apply(E=I.call(v.childNodes),v.childNodes),E[v.childNodes.length].nodeType}catch(fa){H={apply:E.length?function(a,b){G.apply(a,I.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function ga(a,b,d,e){var f,h,j,k,l,o,r,s,w,x;if((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,d=d||[],k=b.nodeType,"string"!=typeof a||!a||1!==k&&9!==k&&11!==k)return d;if(!e&&p){if(11!==k&&(f=_.exec(a)))if(j=f[1]){if(9===k){if(h=b.getElementById(j),!h||!h.parentNode)return d;if(h.id===j)return d.push(h),d}else if(b.ownerDocument&&(h=b.ownerDocument.getElementById(j))&&t(b,h)&&h.id===j)return d.push(h),d}else{if(f[2])return H.apply(d,b.getElementsByTagName(a)),d;if((j=f[3])&&c.getElementsByClassName)return H.apply(d,b.getElementsByClassName(j)),d}if(c.qsa&&(!q||!q.test(a))){if(s=r=u,w=b,x=1!==k&&a,1===k&&"object"!==b.nodeName.toLowerCase()){o=g(a),(r=b.getAttribute("id"))?s=r.replace(ba,"\\$&"):b.setAttribute("id",s),s="[id='"+s+"'] ",l=o.length;while(l--)o[l]=s+ra(o[l]);w=aa.test(a)&&pa(b.parentNode)||b,x=o.join(",")}if(x)try{return H.apply(d,w.querySelectorAll(x)),d}catch(y){}finally{r||b.removeAttribute("id")}}}return i(a.replace(R,"$1"),b,d,e)}function ha(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ia(a){return a[u]=!0,a}function ja(a){var b=n.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ka(a,b){var c=a.split("|"),e=a.length;while(e--)d.attrHandle[c[e]]=b}function la(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||C)-(~a.sourceIndex||C);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function na(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function oa(a){return ia(function(b){return b=+b,ia(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function pa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=ga.support={},f=ga.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},m=ga.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=g.documentElement,e=g.defaultView,e&&e!==e.top&&(e.addEventListener?e.addEventListener("unload",ea,!1):e.attachEvent&&e.attachEvent("onunload",ea)),p=!f(g),c.attributes=ja(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ja(function(a){return a.appendChild(g.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=$.test(g.getElementsByClassName),c.getById=ja(function(a){return o.appendChild(a).id=u,!g.getElementsByName||!g.getElementsByName(u).length}),c.getById?(d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c&&c.parentNode?[c]:[]}},d.filter.ID=function(a){var b=a.replace(ca,da);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(ca,da);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return p?b.getElementsByClassName(a):void 0},r=[],q=[],(c.qsa=$.test(g.querySelectorAll))&&(ja(function(a){o.appendChild(a).innerHTML="<a id='"+u+"'></a><select id='"+u+"-\f]' msallowcapture=''><option selected=''></option></select>",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+L+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+L+"*(?:value|"+K+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ja(function(a){var b=g.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+L+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=$.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ja(function(a){c.disconnectedMatch=s.call(a,"div"),s.call(a,"[s!='']:x"),r.push("!=",P)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=$.test(o.compareDocumentPosition),t=b||$.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===g||a.ownerDocument===v&&t(v,a)?-1:b===g||b.ownerDocument===v&&t(v,b)?1:k?J(k,a)-J(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,h=[a],i=[b];if(!e||!f)return a===g?-1:b===g?1:e?-1:f?1:k?J(k,a)-J(k,b):0;if(e===f)return la(a,b);c=a;while(c=c.parentNode)h.unshift(c);c=b;while(c=c.parentNode)i.unshift(c);while(h[d]===i[d])d++;return d?la(h[d],i[d]):h[d]===v?-1:i[d]===v?1:0},g):n},ga.matches=function(a,b){return ga(a,null,null,b)},ga.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(U,"='$1']"),!(!c.matchesSelector||!p||r&&r.test(b)||q&&q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return ga(b,n,null,[a]).length>0},ga.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},ga.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&D.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},ga.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},ga.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=ga.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=ga.selectors={cacheLength:50,createPseudo:ia,match:X,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(ca,da),a[3]=(a[3]||a[4]||a[5]||"").replace(ca,da),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||ga.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&ga.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return X.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&V.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(ca,da).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+L+")"+a+"("+L+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=ga.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(Q," ")+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h;if(q){if(f){while(p){l=b;while(l=l[p])if(h?l.nodeName.toLowerCase()===r:1===l.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){k=q[u]||(q[u]={}),j=k[a]||[],n=j[0]===w&&j[1],m=j[0]===w&&j[2],l=n&&q.childNodes[n];while(l=++n&&l&&l[p]||(m=n=0)||o.pop())if(1===l.nodeType&&++m&&l===b){k[a]=[w,n,m];break}}else if(s&&(j=(b[u]||(b[u]={}))[a])&&j[0]===w)m=j[1];else while(l=++n&&l&&l[p]||(m=n=0)||o.pop())if((h?l.nodeName.toLowerCase()===r:1===l.nodeType)&&++m&&(s&&((l[u]||(l[u]={}))[a]=[w,m]),l===b))break;return m-=e,m===d||m%d===0&&m/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||ga.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ia(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=J(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ia(function(a){var b=[],c=[],d=h(a.replace(R,"$1"));return d[u]?ia(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ia(function(a){return function(b){return ga(a,b).length>0}}),contains:ia(function(a){return a=a.replace(ca,da),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ia(function(a){return W.test(a||"")||ga.error("unsupported lang: "+a),a=a.replace(ca,da).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return Z.test(a.nodeName)},input:function(a){return Y.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:oa(function(){return[0]}),last:oa(function(a,b){return[b-1]}),eq:oa(function(a,b,c){return[0>c?c+b:c]}),even:oa(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:oa(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:oa(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:oa(function(a,b,c){for(var d=0>c?c+b:c;++d<b;)a.push(d);return a})}},d.pseudos.nth=d.pseudos.eq;for(b in{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})d.pseudos[b]=ma(b);for(b in{submit:!0,reset:!0})d.pseudos[b]=na(b);function qa(){}qa.prototype=d.filters=d.pseudos,d.setFilters=new qa,g=ga.tokenize=function(a,b){var c,e,f,g,h,i,j,k=z[a+" "];if(k)return b?0:k.slice(0);h=a,i=[],j=d.preFilter;while(h){(!c||(e=S.exec(h)))&&(e&&(h=h.slice(e[0].length)||h),i.push(f=[])),c=!1,(e=T.exec(h))&&(c=e.shift(),f.push({value:c,type:e[0].replace(R," ")}),h=h.slice(c.length));for(g in d.filter)!(e=X[g].exec(h))||j[g]&&!(e=j[g](e))||(c=e.shift(),f.push({value:c,type:g,matches:e}),h=h.slice(c.length));if(!c)break}return b?h.length:h?ga.error(a):z(a,i).slice(0)};function ra(a){for(var b=0,c=a.length,d="";c>b;b++)d+=a[b].value;return d}function sa(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=x++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j=[w,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(i=b[u]||(b[u]={}),(h=i[d])&&h[0]===w&&h[1]===f)return j[2]=h[2];if(i[d]=j,j[2]=a(b,c,g))return!0}}}function ta(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function ua(a,b,c){for(var d=0,e=b.length;e>d;d++)ga(a,b[d],c);return c}function va(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(!c||c(f,d,e))&&(g.push(f),j&&b.push(h));return g}function wa(a,b,c,d,e,f){return d&&!d[u]&&(d=wa(d)),e&&!e[u]&&(e=wa(e,f)),ia(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||ua(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:va(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=va(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?J(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=va(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):H.apply(g,r)})}function xa(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=sa(function(a){return a===b},h,!0),l=sa(function(a){return J(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];f>i;i++)if(c=d.relative[a[i].type])m=[sa(ta(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;f>e;e++)if(d.relative[a[e].type])break;return wa(i>1&&ta(m),i>1&&ra(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(R,"$1"),c,e>i&&xa(a.slice(i,e)),f>e&&xa(a=a.slice(e)),f>e&&ra(a))}m.push(c)}return ta(m)}function ya(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,m,o,p=0,q="0",r=f&&[],s=[],t=j,u=f||e&&d.find.TAG("*",k),v=w+=null==t?1:Math.random()||.1,x=u.length;for(k&&(j=g!==n&&g);q!==x&&null!=(l=u[q]);q++){if(e&&l){m=0;while(o=a[m++])if(o(l,g,h)){i.push(l);break}k&&(w=v)}c&&((l=!o&&l)&&p--,f&&r.push(l))}if(p+=q,c&&q!==p){m=0;while(o=b[m++])o(r,s,g,h);if(f){if(p>0)while(q--)r[q]||s[q]||(s[q]=F.call(i));s=va(s)}H.apply(i,s),k&&!f&&s.length>0&&p+b.length>1&&ga.uniqueSort(i)}return k&&(w=v,j=t),r};return c?ia(f):f}return h=ga.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=xa(b[c]),f[u]?d.push(f):e.push(f);f=A(a,ya(e,d)),f.selector=a}return f},i=ga.select=function(a,b,e,f){var i,j,k,l,m,n="function"==typeof a&&a,o=!f&&g(a=n.selector||a);if(e=e||[],1===o.length){if(j=o[0]=o[0].slice(0),j.length>2&&"ID"===(k=j[0]).type&&c.getById&&9===b.nodeType&&p&&d.relative[j[1].type]){if(b=(d.find.ID(k.matches[0].replace(ca,da),b)||[])[0],!b)return e;n&&(b=b.parentNode),a=a.slice(j.shift().value.length)}i=X.needsContext.test(a)?0:j.length;while(i--){if(k=j[i],d.relative[l=k.type])break;if((m=d.find[l])&&(f=m(k.matches[0].replace(ca,da),aa.test(j[0].type)&&pa(b.parentNode)||b))){if(j.splice(i,1),a=f.length&&ra(j),!a)return H.apply(e,f),e;break}}}return(n||h(a,o))(f,b,!p,e,aa.test(a)&&pa(b.parentNode)||b),e},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ja(function(a){return 1&a.compareDocumentPosition(n.createElement("div"))}),ja(function(a){return a.innerHTML="<a href='#'></a>","#"===a.firstChild.getAttribute("href")})||ka("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ja(function(a){return a.innerHTML="<input/>",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ka("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),ja(function(a){return null==a.getAttribute("disabled")})||ka(K,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),ga}(a);n.find=t,n.expr=t.selectors,n.expr[":"]=n.expr.pseudos,n.unique=t.uniqueSort,n.text=t.getText,n.isXMLDoc=t.isXML,n.contains=t.contains;var u=n.expr.match.needsContext,v=/^<(\w+)\s*\/?>(?:<\/\1>|)$/,w=/^.[^:#\[\.,]*$/;function x(a,b,c){if(n.isFunction(b))return n.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return n.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(w.test(b))return n.filter(b,a,c);b=n.filter(b,a)}return n.grep(a,function(a){return g.call(b,a)>=0!==c})}n.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?n.find.matchesSelector(d,a)?[d]:[]:n.find.matches(a,n.grep(b,function(a){return 1===a.nodeType}))},n.fn.extend({find:function(a){var b,c=this.length,d=[],e=this;if("string"!=typeof a)return this.pushStack(n(a).filter(function(){for(b=0;c>b;b++)if(n.contains(e[b],this))return!0}));for(b=0;c>b;b++)n.find(a,e[b],d);return d=this.pushStack(c>1?n.unique(d):d),d.selector=this.selector?this.selector+" "+a:a,d},filter:function(a){return this.pushStack(x(this,a||[],!1))},not:function(a){return this.pushStack(x(this,a||[],!0))},is:function(a){return!!x(this,"string"==typeof a&&u.test(a)?n(a):a||[],!1).length}});var y,z=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,A=n.fn.init=function(a,b){var c,d;if(!a)return this;if("string"==typeof a){if(c="<"===a[0]&&">"===a[a.length-1]&&a.length>=3?[null,a,null]:z.exec(a),!c||!c[1]&&b)return!b||b.jquery?(b||y).find(a):this.constructor(b).find(a);if(c[1]){if(b=b instanceof n?b[0]:b,n.merge(this,n.parseHTML(c[1],b&&b.nodeType?b.ownerDocument||b:l,!0)),v.test(c[1])&&n.isPlainObject(b))for(c in b)n.isFunction(this[c])?this[c](b[c]):this.attr(c,b[c]);return this}return d=l.getElementById(c[2]),d&&d.parentNode&&(this.length=1,this[0]=d),this.context=l,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):n.isFunction(a)?"undefined"!=typeof y.ready?y.ready(a):a(n):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),n.makeArray(a,this))};A.prototype=n.fn,y=n(l);var B=/^(?:parents|prev(?:Until|All))/,C={children:!0,contents:!0,next:!0,prev:!0};n.extend({dir:function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&n(a).is(c))break;d.push(a)}return d},sibling:function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c}}),n.fn.extend({has:function(a){var b=n(a,this),c=b.length;return this.filter(function(){for(var a=0;c>a;a++)if(n.contains(this,b[a]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=u.test(a)||"string"!=typeof a?n(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&n.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?n.unique(f):f)},index:function(a){return a?"string"==typeof a?g.call(n(a),this[0]):g.call(this,a.jquery?a[0]:a):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(n.unique(n.merge(this.get(),n(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function D(a,b){while((a=a[b])&&1!==a.nodeType);return a}n.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return n.dir(a,"parentNode")},parentsUntil:function(a,b,c){return n.dir(a,"parentNode",c)},next:function(a){return D(a,"nextSibling")},prev:function(a){return D(a,"previousSibling")},nextAll:function(a){return n.dir(a,"nextSibling")},prevAll:function(a){return n.dir(a,"previousSibling")},nextUntil:function(a,b,c){return n.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return n.dir(a,"previousSibling",c)},siblings:function(a){return n.sibling((a.parentNode||{}).firstChild,a)},children:function(a){return n.sibling(a.firstChild)},contents:function(a){return a.contentDocument||n.merge([],a.childNodes)}},function(a,b){n.fn[a]=function(c,d){var e=n.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=n.filter(d,e)),this.length>1&&(C[a]||n.unique(e),B.test(a)&&e.reverse()),this.pushStack(e)}});var E=/\S+/g,F={};function G(a){var b=F[a]={};return n.each(a.match(E)||[],function(a,c){b[c]=!0}),b}n.Callbacks=function(a){a="string"==typeof a?F[a]||G(a):n.extend({},a);var b,c,d,e,f,g,h=[],i=!a.once&&[],j=function(l){for(b=a.memory&&l,c=!0,g=e||0,e=0,f=h.length,d=!0;h&&f>g;g++)if(h[g].apply(l[0],l[1])===!1&&a.stopOnFalse){b=!1;break}d=!1,h&&(i?i.length&&j(i.shift()):b?h=[]:k.disable())},k={add:function(){if(h){var c=h.length;!function g(b){n.each(b,function(b,c){var d=n.type(c);"function"===d?a.unique&&k.has(c)||h.push(c):c&&c.length&&"string"!==d&&g(c)})}(arguments),d?f=h.length:b&&(e=c,j(b))}return this},remove:function(){return h&&n.each(arguments,function(a,b){var c;while((c=n.inArray(b,h,c))>-1)h.splice(c,1),d&&(f>=c&&f--,g>=c&&g--)}),this},has:function(a){return a?n.inArray(a,h)>-1:!(!h||!h.length)},empty:function(){return h=[],f=0,this},disable:function(){return h=i=b=void 0,this},disabled:function(){return!h},lock:function(){return i=void 0,b||k.disable(),this},locked:function(){return!i},fireWith:function(a,b){return!h||c&&!i||(b=b||[],b=[a,b.slice?b.slice():b],d?i.push(b):j(b)),this},fire:function(){return k.fireWith(this,arguments),this},fired:function(){return!!c}};return k},n.extend({Deferred:function(a){var b=[["resolve","done",n.Callbacks("once memory"),"resolved"],["reject","fail",n.Callbacks("once memory"),"rejected"],["notify","progress",n.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return n.Deferred(function(c){n.each(b,function(b,f){var g=n.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&n.isFunction(a.promise)?a.promise().done(c.resolve).fail(c.reject).progress(c.notify):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?n.extend(a,d):d}},e={};return d.pipe=d.then,n.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=d.call(arguments),e=c.length,f=1!==e||a&&n.isFunction(a.promise)?e:0,g=1===f?a:n.Deferred(),h=function(a,b,c){return function(e){b[a]=this,c[a]=arguments.length>1?d.call(arguments):e,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(e>1)for(i=new Array(e),j=new Array(e),k=new Array(e);e>b;b++)c[b]&&n.isFunction(c[b].promise)?c[b].promise().done(h(b,k,c)).fail(g.reject).progress(h(b,j,i)):--f;return f||g.resolveWith(k,c),g.promise()}});var H;n.fn.ready=function(a){return n.ready.promise().done(a),this},n.extend({isReady:!1,readyWait:1,holdReady:function(a){a?n.readyWait++:n.ready(!0)},ready:function(a){(a===!0?--n.readyWait:n.isReady)||(n.isReady=!0,a!==!0&&--n.readyWait>0||(H.resolveWith(l,[n]),n.fn.triggerHandler&&(n(l).triggerHandler("ready"),n(l).off("ready"))))}});function I(){l.removeEventListener("DOMContentLoaded",I,!1),a.removeEventListener("load",I,!1),n.ready()}n.ready.promise=function(b){return H||(H=n.Deferred(),"complete"===l.readyState?setTimeout(n.ready):(l.addEventListener("DOMContentLoaded",I,!1),a.addEventListener("load",I,!1))),H.promise(b)},n.ready.promise();var J=n.access=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===n.type(c)){e=!0;for(h in c)n.access(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,n.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(n(a),c)})),b))for(;i>h;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f};n.acceptData=function(a){return 1===a.nodeType||9===a.nodeType||!+a.nodeType};function K(){Object.defineProperty(this.cache={},0,{get:function(){return{}}}),this.expando=n.expando+K.uid++}K.uid=1,K.accepts=n.acceptData,K.prototype={key:function(a){if(!K.accepts(a))return 0;var b={},c=a[this.expando];if(!c){c=K.uid++;try{b[this.expando]={value:c},Object.defineProperties(a,b)}catch(d){b[this.expando]=c,n.extend(a,b)}}return this.cache[c]||(this.cache[c]={}),c},set:function(a,b,c){var d,e=this.key(a),f=this.cache[e];if("string"==typeof b)f[b]=c;else if(n.isEmptyObject(f))n.extend(this.cache[e],b);else for(d in b)f[d]=b[d];return f},get:function(a,b){var c=this.cache[this.key(a)];return void 0===b?c:c[b]},access:function(a,b,c){var d;return void 0===b||b&&"string"==typeof b&&void 0===c?(d=this.get(a,b),void 0!==d?d:this.get(a,n.camelCase(b))):(this.set(a,b,c),void 0!==c?c:b)},remove:function(a,b){var c,d,e,f=this.key(a),g=this.cache[f];if(void 0===b)this.cache[f]={};else{n.isArray(b)?d=b.concat(b.map(n.camelCase)):(e=n.camelCase(b),b in g?d=[b,e]:(d=e,d=d in g?[d]:d.match(E)||[])),c=d.length;while(c--)delete g[d[c]]}},hasData:function(a){return!n.isEmptyObject(this.cache[a[this.expando]]||{})},discard:function(a){a[this.expando]&&delete this.cache[a[this.expando]]}};var L=new K,M=new K,N=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,O=/([A-Z])/g;function P(a,b,c){var d;if(void 0===c&&1===a.nodeType)if(d="data-"+b.replace(O,"-$1").toLowerCase(),c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:N.test(c)?n.parseJSON(c):c}catch(e){}M.set(a,b,c)}else c=void 0;return c}n.extend({hasData:function(a){return M.hasData(a)||L.hasData(a)},data:function(a,b,c){
+return M.access(a,b,c)},removeData:function(a,b){M.remove(a,b)},_data:function(a,b,c){return L.access(a,b,c)},_removeData:function(a,b){L.remove(a,b)}}),n.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=M.get(f),1===f.nodeType&&!L.get(f,"hasDataAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=n.camelCase(d.slice(5)),P(f,d,e[d])));L.set(f,"hasDataAttrs",!0)}return e}return"object"==typeof a?this.each(function(){M.set(this,a)}):J(this,function(b){var c,d=n.camelCase(a);if(f&&void 0===b){if(c=M.get(f,a),void 0!==c)return c;if(c=M.get(f,d),void 0!==c)return c;if(c=P(f,d,void 0),void 0!==c)return c}else this.each(function(){var c=M.get(this,d);M.set(this,d,b),-1!==a.indexOf("-")&&void 0!==c&&M.set(this,a,b)})},null,b,arguments.length>1,null,!0)},removeData:function(a){return this.each(function(){M.remove(this,a)})}}),n.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=L.get(a,b),c&&(!d||n.isArray(c)?d=L.access(a,b,n.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=n.queue(a,b),d=c.length,e=c.shift(),f=n._queueHooks(a,b),g=function(){n.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return L.get(a,c)||L.access(a,c,{empty:n.Callbacks("once memory").add(function(){L.remove(a,[b+"queue",c])})})}}),n.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length<c?n.queue(this[0],a):void 0===b?this:this.each(function(){var c=n.queue(this,a,b);n._queueHooks(this,a),"fx"===a&&"inprogress"!==c[0]&&n.dequeue(this,a)})},dequeue:function(a){return this.each(function(){n.dequeue(this,a)})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,b){var c,d=1,e=n.Deferred(),f=this,g=this.length,h=function(){--d||e.resolveWith(f,[f])};"string"!=typeof a&&(b=a,a=void 0),a=a||"fx";while(g--)c=L.get(f[g],a+"queueHooks"),c&&c.empty&&(d++,c.empty.add(h));return h(),e.promise(b)}});var Q=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,R=["Top","Right","Bottom","Left"],S=function(a,b){return a=b||a,"none"===n.css(a,"display")||!n.contains(a.ownerDocument,a)},T=/^(?:checkbox|radio)$/i;!function(){var a=l.createDocumentFragment(),b=a.appendChild(l.createElement("div")),c=l.createElement("input");c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),b.appendChild(c),k.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,b.innerHTML="<textarea>x</textarea>",k.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue}();var U="undefined";k.focusinBubbles="onfocusin"in a;var V=/^key/,W=/^(?:mouse|pointer|contextmenu)|click/,X=/^(?:focusinfocus|focusoutblur)$/,Y=/^([^.]*)(?:\.(.+)|)$/;function Z(){return!0}function $(){return!1}function _(){try{return l.activeElement}catch(a){}}n.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=L.get(a);if(r){c.handler&&(f=c,c=f.handler,e=f.selector),c.guid||(c.guid=n.guid++),(i=r.events)||(i=r.events={}),(g=r.handle)||(g=r.handle=function(b){return typeof n!==U&&n.event.triggered!==b.type?n.event.dispatch.apply(a,arguments):void 0}),b=(b||"").match(E)||[""],j=b.length;while(j--)h=Y.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o&&(l=n.event.special[o]||{},o=(e?l.delegateType:l.bindType)||o,l=n.event.special[o]||{},k=n.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&n.expr.match.needsContext.test(e),namespace:p.join(".")},f),(m=i[o])||(m=i[o]=[],m.delegateCount=0,l.setup&&l.setup.call(a,d,p,g)!==!1||a.addEventListener&&a.addEventListener(o,g,!1)),l.add&&(l.add.call(a,k),k.handler.guid||(k.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,k):m.push(k),n.event.global[o]=!0)}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=L.hasData(a)&&L.get(a);if(r&&(i=r.events)){b=(b||"").match(E)||[""],j=b.length;while(j--)if(h=Y.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=n.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,m=i[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),g=f=m.length;while(f--)k=m[f],!e&&q!==k.origType||c&&c.guid!==k.guid||h&&!h.test(k.namespace)||d&&d!==k.selector&&("**"!==d||!k.selector)||(m.splice(f,1),k.selector&&m.delegateCount--,l.remove&&l.remove.call(a,k));g&&!m.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||n.removeEvent(a,o,r.handle),delete i[o])}else for(o in i)n.event.remove(a,o+b[j],c,d,!0);n.isEmptyObject(i)&&(delete r.handle,L.remove(a,"events"))}},trigger:function(b,c,d,e){var f,g,h,i,k,m,o,p=[d||l],q=j.call(b,"type")?b.type:b,r=j.call(b,"namespace")?b.namespace.split("."):[];if(g=h=d=d||l,3!==d.nodeType&&8!==d.nodeType&&!X.test(q+n.event.triggered)&&(q.indexOf(".")>=0&&(r=q.split("."),q=r.shift(),r.sort()),k=q.indexOf(":")<0&&"on"+q,b=b[n.expando]?b:new n.Event(q,"object"==typeof b&&b),b.isTrigger=e?2:3,b.namespace=r.join("."),b.namespace_re=b.namespace?new RegExp("(^|\\.)"+r.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=d),c=null==c?[b]:n.makeArray(c,[b]),o=n.event.special[q]||{},e||!o.trigger||o.trigger.apply(d,c)!==!1)){if(!e&&!o.noBubble&&!n.isWindow(d)){for(i=o.delegateType||q,X.test(i+q)||(g=g.parentNode);g;g=g.parentNode)p.push(g),h=g;h===(d.ownerDocument||l)&&p.push(h.defaultView||h.parentWindow||a)}f=0;while((g=p[f++])&&!b.isPropagationStopped())b.type=f>1?i:o.bindType||q,m=(L.get(g,"events")||{})[b.type]&&L.get(g,"handle"),m&&m.apply(g,c),m=k&&g[k],m&&m.apply&&n.acceptData(g)&&(b.result=m.apply(g,c),b.result===!1&&b.preventDefault());return b.type=q,e||b.isDefaultPrevented()||o._default&&o._default.apply(p.pop(),c)!==!1||!n.acceptData(d)||k&&n.isFunction(d[q])&&!n.isWindow(d)&&(h=d[k],h&&(d[k]=null),n.event.triggered=q,d[q](),n.event.triggered=void 0,h&&(d[k]=h)),b.result}},dispatch:function(a){a=n.event.fix(a);var b,c,e,f,g,h=[],i=d.call(arguments),j=(L.get(this,"events")||{})[a.type]||[],k=n.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=n.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,c=0;while((g=f.handlers[c++])&&!a.isImmediatePropagationStopped())(!a.namespace_re||a.namespace_re.test(g.namespace))&&(a.handleObj=g,a.data=g.data,e=((n.event.special[g.origType]||{}).handle||g.handler).apply(f.elem,i),void 0!==e&&(a.result=e)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&(!a.button||"click"!==a.type))for(;i!==this;i=i.parentNode||this)if(i.disabled!==!0||"click"!==a.type){for(d=[],c=0;h>c;c++)f=b[c],e=f.selector+" ",void 0===d[e]&&(d[e]=f.needsContext?n(e,this).index(i)>=0:n.find(e,this,null,[i]).length),d[e]&&d.push(f);d.length&&g.push({elem:i,handlers:d})}return h<b.length&&g.push({elem:this,handlers:b.slice(h)}),g},props:"altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(a,b){return null==a.which&&(a.which=null!=b.charCode?b.charCode:b.keyCode),a}},mouseHooks:{props:"button buttons clientX clientY offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(a,b){var c,d,e,f=b.button;return null==a.pageX&&null!=b.clientX&&(c=a.target.ownerDocument||l,d=c.documentElement,e=c.body,a.pageX=b.clientX+(d&&d.scrollLeft||e&&e.scrollLeft||0)-(d&&d.clientLeft||e&&e.clientLeft||0),a.pageY=b.clientY+(d&&d.scrollTop||e&&e.scrollTop||0)-(d&&d.clientTop||e&&e.clientTop||0)),a.which||void 0===f||(a.which=1&f?1:2&f?3:4&f?2:0),a}},fix:function(a){if(a[n.expando])return a;var b,c,d,e=a.type,f=a,g=this.fixHooks[e];g||(this.fixHooks[e]=g=W.test(e)?this.mouseHooks:V.test(e)?this.keyHooks:{}),d=g.props?this.props.concat(g.props):this.props,a=new n.Event(f),b=d.length;while(b--)c=d[b],a[c]=f[c];return a.target||(a.target=l),3===a.target.nodeType&&(a.target=a.target.parentNode),g.filter?g.filter(a,f):a},special:{load:{noBubble:!0},focus:{trigger:function(){return this!==_()&&this.focus?(this.focus(),!1):void 0},delegateType:"focusin"},blur:{trigger:function(){return this===_()&&this.blur?(this.blur(),!1):void 0},delegateType:"focusout"},click:{trigger:function(){return"checkbox"===this.type&&this.click&&n.nodeName(this,"input")?(this.click(),!1):void 0},_default:function(a){return n.nodeName(a.target,"a")}},beforeunload:{postDispatch:function(a){void 0!==a.result&&a.originalEvent&&(a.originalEvent.returnValue=a.result)}}},simulate:function(a,b,c,d){var e=n.extend(new n.Event,c,{type:a,isSimulated:!0,originalEvent:{}});d?n.event.trigger(e,null,b):n.event.dispatch.call(b,e),e.isDefaultPrevented()&&c.preventDefault()}},n.removeEvent=function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c,!1)},n.Event=function(a,b){return this instanceof n.Event?(a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||void 0===a.defaultPrevented&&a.returnValue===!1?Z:$):this.type=a,b&&n.extend(this,b),this.timeStamp=a&&a.timeStamp||n.now(),void(this[n.expando]=!0)):new n.Event(a,b)},n.Event.prototype={isDefaultPrevented:$,isPropagationStopped:$,isImmediatePropagationStopped:$,preventDefault:function(){var a=this.originalEvent;this.isDefaultPrevented=Z,a&&a.preventDefault&&a.preventDefault()},stopPropagation:function(){var a=this.originalEvent;this.isPropagationStopped=Z,a&&a.stopPropagation&&a.stopPropagation()},stopImmediatePropagation:function(){var a=this.originalEvent;this.isImmediatePropagationStopped=Z,a&&a.stopImmediatePropagation&&a.stopImmediatePropagation(),this.stopPropagation()}},n.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(a,b){n.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c,d=this,e=a.relatedTarget,f=a.handleObj;return(!e||e!==d&&!n.contains(d,e))&&(a.type=f.origType,c=f.handler.apply(this,arguments),a.type=b),c}}}),k.focusinBubbles||n.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){n.event.simulate(b,a.target,n.event.fix(a),!0)};n.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=L.access(d,b);e||d.addEventListener(a,c,!0),L.access(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=L.access(d,b)-1;e?L.access(d,b,e):(d.removeEventListener(a,c,!0),L.remove(d,b))}}}),n.fn.extend({on:function(a,b,c,d,e){var f,g;if("object"==typeof a){"string"!=typeof b&&(c=c||b,b=void 0);for(g in a)this.on(g,b,c,a[g],e);return this}if(null==c&&null==d?(d=b,c=b=void 0):null==d&&("string"==typeof b?(d=c,c=void 0):(d=c,c=b,b=void 0)),d===!1)d=$;else if(!d)return this;return 1===e&&(f=d,d=function(a){return n().off(a),f.apply(this,arguments)},d.guid=f.guid||(f.guid=n.guid++)),this.each(function(){n.event.add(this,a,d,c,b)})},one:function(a,b,c,d){return this.on(a,b,c,d,1)},off:function(a,b,c){var d,e;if(a&&a.preventDefault&&a.handleObj)return d=a.handleObj,n(a.delegateTarget).off(d.namespace?d.origType+"."+d.namespace:d.origType,d.selector,d.handler),this;if("object"==typeof a){for(e in a)this.off(e,b,a[e]);return this}return(b===!1||"function"==typeof b)&&(c=b,b=void 0),c===!1&&(c=$),this.each(function(){n.event.remove(this,a,c,b)})},trigger:function(a,b){return this.each(function(){n.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];return c?n.event.trigger(a,b,c,!0):void 0}});var aa=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/gi,ba=/<([\w:]+)/,ca=/<|&#?\w+;/,da=/<(?:script|style|link)/i,ea=/checked\s*(?:[^=]|=\s*.checked.)/i,fa=/^$|\/(?:java|ecma)script/i,ga=/^true\/(.*)/,ha=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g,ia={option:[1,"<select multiple='multiple'>","</select>"],thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};ia.optgroup=ia.option,ia.tbody=ia.tfoot=ia.colgroup=ia.caption=ia.thead,ia.th=ia.td;function ja(a,b){return n.nodeName(a,"table")&&n.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function ka(a){return a.type=(null!==a.getAttribute("type"))+"/"+a.type,a}function la(a){var b=ga.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function ma(a,b){for(var c=0,d=a.length;d>c;c++)L.set(a[c],"globalEval",!b||L.get(b[c],"globalEval"))}function na(a,b){var c,d,e,f,g,h,i,j;if(1===b.nodeType){if(L.hasData(a)&&(f=L.access(a),g=L.set(b,f),j=f.events)){delete g.handle,g.events={};for(e in j)for(c=0,d=j[e].length;d>c;c++)n.event.add(b,e,j[e][c])}M.hasData(a)&&(h=M.access(a),i=n.extend({},h),M.set(b,i))}}function oa(a,b){var c=a.getElementsByTagName?a.getElementsByTagName(b||"*"):a.querySelectorAll?a.querySelectorAll(b||"*"):[];return void 0===b||b&&n.nodeName(a,b)?n.merge([a],c):c}function pa(a,b){var c=b.nodeName.toLowerCase();"input"===c&&T.test(a.type)?b.checked=a.checked:("input"===c||"textarea"===c)&&(b.defaultValue=a.defaultValue)}n.extend({clone:function(a,b,c){var d,e,f,g,h=a.cloneNode(!0),i=n.contains(a.ownerDocument,a);if(!(k.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||n.isXMLDoc(a)))for(g=oa(h),f=oa(a),d=0,e=f.length;e>d;d++)pa(f[d],g[d]);if(b)if(c)for(f=f||oa(a),g=g||oa(h),d=0,e=f.length;e>d;d++)na(f[d],g[d]);else na(a,h);return g=oa(h,"script"),g.length>0&&ma(g,!i&&oa(a,"script")),h},buildFragment:function(a,b,c,d){for(var e,f,g,h,i,j,k=b.createDocumentFragment(),l=[],m=0,o=a.length;o>m;m++)if(e=a[m],e||0===e)if("object"===n.type(e))n.merge(l,e.nodeType?[e]:e);else if(ca.test(e)){f=f||k.appendChild(b.createElement("div")),g=(ba.exec(e)||["",""])[1].toLowerCase(),h=ia[g]||ia._default,f.innerHTML=h[1]+e.replace(aa,"<$1></$2>")+h[2],j=h[0];while(j--)f=f.lastChild;n.merge(l,f.childNodes),f=k.firstChild,f.textContent=""}else l.push(b.createTextNode(e));k.textContent="",m=0;while(e=l[m++])if((!d||-1===n.inArray(e,d))&&(i=n.contains(e.ownerDocument,e),f=oa(k.appendChild(e),"script"),i&&ma(f),c)){j=0;while(e=f[j++])fa.test(e.type||"")&&c.push(e)}return k},cleanData:function(a){for(var b,c,d,e,f=n.event.special,g=0;void 0!==(c=a[g]);g++){if(n.acceptData(c)&&(e=c[L.expando],e&&(b=L.cache[e]))){if(b.events)for(d in b.events)f[d]?n.event.remove(c,d):n.removeEvent(c,d,b.handle);L.cache[e]&&delete L.cache[e]}delete M.cache[c[M.expando]]}}}),n.fn.extend({text:function(a){return J(this,function(a){return void 0===a?n.text(this):this.empty().each(function(){(1===this.nodeType||11===this.nodeType||9===this.nodeType)&&(this.textContent=a)})},null,a,arguments.length)},append:function(){return this.domManip(arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=ja(this,a);b.appendChild(a)}})},prepend:function(){return this.domManip(arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=ja(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return this.domManip(arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return this.domManip(arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},remove:function(a,b){for(var c,d=a?n.filter(a,this):this,e=0;null!=(c=d[e]);e++)b||1!==c.nodeType||n.cleanData(oa(c)),c.parentNode&&(b&&n.contains(c.ownerDocument,c)&&ma(oa(c,"script")),c.parentNode.removeChild(c));return this},empty:function(){for(var a,b=0;null!=(a=this[b]);b++)1===a.nodeType&&(n.cleanData(oa(a,!1)),a.textContent="");return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return n.clone(this,a,b)})},html:function(a){return J(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a&&1===b.nodeType)return b.innerHTML;if("string"==typeof a&&!da.test(a)&&!ia[(ba.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(aa,"<$1></$2>");try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(n.cleanData(oa(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=arguments[0];return this.domManip(arguments,function(b){a=this.parentNode,n.cleanData(oa(this)),a&&a.replaceChild(b,this)}),a&&(a.length||a.nodeType)?this:this.remove()},detach:function(a){return this.remove(a,!0)},domManip:function(a,b){a=e.apply([],a);var c,d,f,g,h,i,j=0,l=this.length,m=this,o=l-1,p=a[0],q=n.isFunction(p);if(q||l>1&&"string"==typeof p&&!k.checkClone&&ea.test(p))return this.each(function(c){var d=m.eq(c);q&&(a[0]=p.call(this,c,d.html())),d.domManip(a,b)});if(l&&(c=n.buildFragment(a,this[0].ownerDocument,!1,this),d=c.firstChild,1===c.childNodes.length&&(c=d),d)){for(f=n.map(oa(c,"script"),ka),g=f.length;l>j;j++)h=c,j!==o&&(h=n.clone(h,!0,!0),g&&n.merge(f,oa(h,"script"))),b.call(this[j],h,j);if(g)for(i=f[f.length-1].ownerDocument,n.map(f,la),j=0;g>j;j++)h=f[j],fa.test(h.type||"")&&!L.access(h,"globalEval")&&n.contains(i,h)&&(h.src?n._evalUrl&&n._evalUrl(h.src):n.globalEval(h.textContent.replace(ha,"")))}return this}}),n.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){n.fn[a]=function(a){for(var c,d=[],e=n(a),g=e.length-1,h=0;g>=h;h++)c=h===g?this:this.clone(!0),n(e[h])[b](c),f.apply(d,c.get());return this.pushStack(d)}});var qa,ra={};function sa(b,c){var d,e=n(c.createElement(b)).appendTo(c.body),f=a.getDefaultComputedStyle&&(d=a.getDefaultComputedStyle(e[0]))?d.display:n.css(e[0],"display");return e.detach(),f}function ta(a){var b=l,c=ra[a];return c||(c=sa(a,b),"none"!==c&&c||(qa=(qa||n("<iframe frameborder='0' width='0' height='0'/>")).appendTo(b.documentElement),b=qa[0].contentDocument,b.write(),b.close(),c=sa(a,b),qa.detach()),ra[a]=c),c}var ua=/^margin/,va=new RegExp("^("+Q+")(?!px)[a-z%]+$","i"),wa=function(b){return b.ownerDocument.defaultView.opener?b.ownerDocument.defaultView.getComputedStyle(b,null):a.getComputedStyle(b,null)};function xa(a,b,c){var d,e,f,g,h=a.style;return c=c||wa(a),c&&(g=c.getPropertyValue(b)||c[b]),c&&(""!==g||n.contains(a.ownerDocument,a)||(g=n.style(a,b)),va.test(g)&&ua.test(b)&&(d=h.width,e=h.minWidth,f=h.maxWidth,h.minWidth=h.maxWidth=h.width=g,g=c.width,h.width=d,h.minWidth=e,h.maxWidth=f)),void 0!==g?g+"":g}function ya(a,b){return{get:function(){return a()?void delete this.get:(this.get=b).apply(this,arguments)}}}!function(){var b,c,d=l.documentElement,e=l.createElement("div"),f=l.createElement("div");if(f.style){f.style.backgroundClip="content-box",f.cloneNode(!0).style.backgroundClip="",k.clearCloneStyle="content-box"===f.style.backgroundClip,e.style.cssText="border:0;width:0;height:0;top:0;left:-9999px;margin-top:1px;position:absolute",e.appendChild(f);function g(){f.style.cssText="-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;display:block;margin-top:1%;top:1%;border:1px;padding:1px;width:4px;position:absolute",f.innerHTML="",d.appendChild(e);var g=a.getComputedStyle(f,null);b="1%"!==g.top,c="4px"===g.width,d.removeChild(e)}a.getComputedStyle&&n.extend(k,{pixelPosition:function(){return g(),b},boxSizingReliable:function(){return null==c&&g(),c},reliableMarginRight:function(){var b,c=f.appendChild(l.createElement("div"));return c.style.cssText=f.style.cssText="-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:0",c.style.marginRight=c.style.width="0",f.style.width="1px",d.appendChild(e),b=!parseFloat(a.getComputedStyle(c,null).marginRight),d.removeChild(e),f.removeChild(c),b}})}}(),n.swap=function(a,b,c,d){var e,f,g={};for(f in b)g[f]=a.style[f],a.style[f]=b[f];e=c.apply(a,d||[]);for(f in b)a.style[f]=g[f];return e};var za=/^(none|table(?!-c[ea]).+)/,Aa=new RegExp("^("+Q+")(.*)$","i"),Ba=new RegExp("^([+-])=("+Q+")","i"),Ca={position:"absolute",visibility:"hidden",display:"block"},Da={letterSpacing:"0",fontWeight:"400"},Ea=["Webkit","O","Moz","ms"];function Fa(a,b){if(b in a)return b;var c=b[0].toUpperCase()+b.slice(1),d=b,e=Ea.length;while(e--)if(b=Ea[e]+c,b in a)return b;return d}function Ga(a,b,c){var d=Aa.exec(b);return d?Math.max(0,d[1]-(c||0))+(d[2]||"px"):b}function Ha(a,b,c,d,e){for(var f=c===(d?"border":"content")?4:"width"===b?1:0,g=0;4>f;f+=2)"margin"===c&&(g+=n.css(a,c+R[f],!0,e)),d?("content"===c&&(g-=n.css(a,"padding"+R[f],!0,e)),"margin"!==c&&(g-=n.css(a,"border"+R[f]+"Width",!0,e))):(g+=n.css(a,"padding"+R[f],!0,e),"padding"!==c&&(g+=n.css(a,"border"+R[f]+"Width",!0,e)));return g}function Ia(a,b,c){var d=!0,e="width"===b?a.offsetWidth:a.offsetHeight,f=wa(a),g="border-box"===n.css(a,"boxSizing",!1,f);if(0>=e||null==e){if(e=xa(a,b,f),(0>e||null==e)&&(e=a.style[b]),va.test(e))return e;d=g&&(k.boxSizingReliable()||e===a.style[b]),e=parseFloat(e)||0}return e+Ha(a,b,c||(g?"border":"content"),d,f)+"px"}function Ja(a,b){for(var c,d,e,f=[],g=0,h=a.length;h>g;g++)d=a[g],d.style&&(f[g]=L.get(d,"olddisplay"),c=d.style.display,b?(f[g]||"none"!==c||(d.style.display=""),""===d.style.display&&S(d)&&(f[g]=L.access(d,"olddisplay",ta(d.nodeName)))):(e=S(d),"none"===c&&e||L.set(d,"olddisplay",e?c:n.css(d,"display"))));for(g=0;h>g;g++)d=a[g],d.style&&(b&&"none"!==d.style.display&&""!==d.style.display||(d.style.display=b?f[g]||"":"none"));return a}n.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=xa(a,"opacity");return""===c?"1":c}}}},cssNumber:{columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":"cssFloat"},style:function(a,b,c,d){if(a&&3!==a.nodeType&&8!==a.nodeType&&a.style){var e,f,g,h=n.camelCase(b),i=a.style;return b=n.cssProps[h]||(n.cssProps[h]=Fa(i,h)),g=n.cssHooks[b]||n.cssHooks[h],void 0===c?g&&"get"in g&&void 0!==(e=g.get(a,!1,d))?e:i[b]:(f=typeof c,"string"===f&&(e=Ba.exec(c))&&(c=(e[1]+1)*e[2]+parseFloat(n.css(a,b)),f="number"),null!=c&&c===c&&("number"!==f||n.cssNumber[h]||(c+="px"),k.clearCloneStyle||""!==c||0!==b.indexOf("background")||(i[b]="inherit"),g&&"set"in g&&void 0===(c=g.set(a,c,d))||(i[b]=c)),void 0)}},css:function(a,b,c,d){var e,f,g,h=n.camelCase(b);return b=n.cssProps[h]||(n.cssProps[h]=Fa(a.style,h)),g=n.cssHooks[b]||n.cssHooks[h],g&&"get"in g&&(e=g.get(a,!0,c)),void 0===e&&(e=xa(a,b,d)),"normal"===e&&b in Da&&(e=Da[b]),""===c||c?(f=parseFloat(e),c===!0||n.isNumeric(f)?f||0:e):e}}),n.each(["height","width"],function(a,b){n.cssHooks[b]={get:function(a,c,d){return c?za.test(n.css(a,"display"))&&0===a.offsetWidth?n.swap(a,Ca,function(){return Ia(a,b,d)}):Ia(a,b,d):void 0},set:function(a,c,d){var e=d&&wa(a);return Ga(a,c,d?Ha(a,b,d,"border-box"===n.css(a,"boxSizing",!1,e),e):0)}}}),n.cssHooks.marginRight=ya(k.reliableMarginRight,function(a,b){return b?n.swap(a,{display:"inline-block"},xa,[a,"marginRight"]):void 0}),n.each({margin:"",padding:"",border:"Width"},function(a,b){n.cssHooks[a+b]={expand:function(c){for(var d=0,e={},f="string"==typeof c?c.split(" "):[c];4>d;d++)e[a+R[d]+b]=f[d]||f[d-2]||f[0];return e}},ua.test(a)||(n.cssHooks[a+b].set=Ga)}),n.fn.extend({css:function(a,b){return J(this,function(a,b,c){var d,e,f={},g=0;if(n.isArray(b)){for(d=wa(a),e=b.length;e>g;g++)f[b[g]]=n.css(a,b[g],!1,d);return f}return void 0!==c?n.style(a,b,c):n.css(a,b)},a,b,arguments.length>1)},show:function(){return Ja(this,!0)},hide:function(){return Ja(this)},toggle:function(a){return"boolean"==typeof a?a?this.show():this.hide():this.each(function(){S(this)?n(this).show():n(this).hide()})}});function Ka(a,b,c,d,e){return new Ka.prototype.init(a,b,c,d,e)}n.Tween=Ka,Ka.prototype={constructor:Ka,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||"swing",this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(n.cssNumber[c]?"":"px")},cur:function(){var a=Ka.propHooks[this.prop];return a&&a.get?a.get(this):Ka.propHooks._default.get(this)},run:function(a){var b,c=Ka.propHooks[this.prop];return this.options.duration?this.pos=b=n.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):this.pos=b=a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):Ka.propHooks._default.set(this),this}},Ka.prototype.init.prototype=Ka.prototype,Ka.propHooks={_default:{get:function(a){var b;return null==a.elem[a.prop]||a.elem.style&&null!=a.elem.style[a.prop]?(b=n.css(a.elem,a.prop,""),b&&"auto"!==b?b:0):a.elem[a.prop]},set:function(a){n.fx.step[a.prop]?n.fx.step[a.prop](a):a.elem.style&&(null!=a.elem.style[n.cssProps[a.prop]]||n.cssHooks[a.prop])?n.style(a.elem,a.prop,a.now+a.unit):a.elem[a.prop]=a.now}}},Ka.propHooks.scrollTop=Ka.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},n.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2}},n.fx=Ka.prototype.init,n.fx.step={};var La,Ma,Na=/^(?:toggle|show|hide)$/,Oa=new RegExp("^(?:([+-])=|)("+Q+")([a-z%]*)$","i"),Pa=/queueHooks$/,Qa=[Va],Ra={"*":[function(a,b){var c=this.createTween(a,b),d=c.cur(),e=Oa.exec(b),f=e&&e[3]||(n.cssNumber[a]?"":"px"),g=(n.cssNumber[a]||"px"!==f&&+d)&&Oa.exec(n.css(c.elem,a)),h=1,i=20;if(g&&g[3]!==f){f=f||g[3],e=e||[],g=+d||1;do h=h||".5",g/=h,n.style(c.elem,a,g+f);while(h!==(h=c.cur()/d)&&1!==h&&--i)}return e&&(g=c.start=+g||+d||0,c.unit=f,c.end=e[1]?g+(e[1]+1)*e[2]:+e[2]),c}]};function Sa(){return setTimeout(function(){La=void 0}),La=n.now()}function Ta(a,b){var c,d=0,e={height:a};for(b=b?1:0;4>d;d+=2-b)c=R[d],e["margin"+c]=e["padding"+c]=a;return b&&(e.opacity=e.width=a),e}function Ua(a,b,c){for(var d,e=(Ra[b]||[]).concat(Ra["*"]),f=0,g=e.length;g>f;f++)if(d=e[f].call(c,b,a))return d}function Va(a,b,c){var d,e,f,g,h,i,j,k,l=this,m={},o=a.style,p=a.nodeType&&S(a),q=L.get(a,"fxshow");c.queue||(h=n._queueHooks(a,"fx"),null==h.unqueued&&(h.unqueued=0,i=h.empty.fire,h.empty.fire=function(){h.unqueued||i()}),h.unqueued++,l.always(function(){l.always(function(){h.unqueued--,n.queue(a,"fx").length||h.empty.fire()})})),1===a.nodeType&&("height"in b||"width"in b)&&(c.overflow=[o.overflow,o.overflowX,o.overflowY],j=n.css(a,"display"),k="none"===j?L.get(a,"olddisplay")||ta(a.nodeName):j,"inline"===k&&"none"===n.css(a,"float")&&(o.display="inline-block")),c.overflow&&(o.overflow="hidden",l.always(function(){o.overflow=c.overflow[0],o.overflowX=c.overflow[1],o.overflowY=c.overflow[2]}));for(d in b)if(e=b[d],Na.exec(e)){if(delete b[d],f=f||"toggle"===e,e===(p?"hide":"show")){if("show"!==e||!q||void 0===q[d])continue;p=!0}m[d]=q&&q[d]||n.style(a,d)}else j=void 0;if(n.isEmptyObject(m))"inline"===("none"===j?ta(a.nodeName):j)&&(o.display=j);else{q?"hidden"in q&&(p=q.hidden):q=L.access(a,"fxshow",{}),f&&(q.hidden=!p),p?n(a).show():l.done(function(){n(a).hide()}),l.done(function(){var b;L.remove(a,"fxshow");for(b in m)n.style(a,b,m[b])});for(d in m)g=Ua(p?q[d]:0,d,l),d in q||(q[d]=g.start,p&&(g.end=g.start,g.start="width"===d||"height"===d?1:0))}}function Wa(a,b){var c,d,e,f,g;for(c in a)if(d=n.camelCase(c),e=b[d],f=a[c],n.isArray(f)&&(e=f[1],f=a[c]=f[0]),c!==d&&(a[d]=f,delete a[c]),g=n.cssHooks[d],g&&"expand"in g){f=g.expand(f),delete a[d];for(c in f)c in a||(a[c]=f[c],b[c]=e)}else b[d]=e}function Xa(a,b,c){var d,e,f=0,g=Qa.length,h=n.Deferred().always(function(){delete i.elem}),i=function(){if(e)return!1;for(var b=La||Sa(),c=Math.max(0,j.startTime+j.duration-b),d=c/j.duration||0,f=1-d,g=0,i=j.tweens.length;i>g;g++)j.tweens[g].run(f);return h.notifyWith(a,[j,f,c]),1>f&&i?c:(h.resolveWith(a,[j]),!1)},j=h.promise({elem:a,props:n.extend({},b),opts:n.extend(!0,{specialEasing:{}},c),originalProperties:b,originalOptions:c,startTime:La||Sa(),duration:c.duration,tweens:[],createTween:function(b,c){var d=n.Tween(a,j.opts,b,c,j.opts.specialEasing[b]||j.opts.easing);return j.tweens.push(d),d},stop:function(b){var c=0,d=b?j.tweens.length:0;if(e)return this;for(e=!0;d>c;c++)j.tweens[c].run(1);return b?h.resolveWith(a,[j,b]):h.rejectWith(a,[j,b]),this}}),k=j.props;for(Wa(k,j.opts.specialEasing);g>f;f++)if(d=Qa[f].call(j,a,k,j.opts))return d;return n.map(k,Ua,j),n.isFunction(j.opts.start)&&j.opts.start.call(a,j),n.fx.timer(n.extend(i,{elem:a,anim:j,queue:j.opts.queue})),j.progress(j.opts.progress).done(j.opts.done,j.opts.complete).fail(j.opts.fail).always(j.opts.always)}n.Animation=n.extend(Xa,{tweener:function(a,b){n.isFunction(a)?(b=a,a=["*"]):a=a.split(" ");for(var c,d=0,e=a.length;e>d;d++)c=a[d],Ra[c]=Ra[c]||[],Ra[c].unshift(b)},prefilter:function(a,b){b?Qa.unshift(a):Qa.push(a)}}),n.speed=function(a,b,c){var d=a&&"object"==typeof a?n.extend({},a):{complete:c||!c&&b||n.isFunction(a)&&a,duration:a,easing:c&&b||b&&!n.isFunction(b)&&b};return d.duration=n.fx.off?0:"number"==typeof d.duration?d.duration:d.duration in n.fx.speeds?n.fx.speeds[d.duration]:n.fx.speeds._default,(null==d.queue||d.queue===!0)&&(d.queue="fx"),d.old=d.complete,d.complete=function(){n.isFunction(d.old)&&d.old.call(this),d.queue&&n.dequeue(this,d.queue)},d},n.fn.extend({fadeTo:function(a,b,c,d){return this.filter(S).css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){var e=n.isEmptyObject(a),f=n.speed(b,c,d),g=function(){var b=Xa(this,n.extend({},a),f);(e||L.get(this,"finish"))&&b.stop(!0)};return g.finish=g,e||f.queue===!1?this.each(g):this.queue(f.queue,g)},stop:function(a,b,c){var d=function(a){var b=a.stop;delete a.stop,b(c)};return"string"!=typeof a&&(c=b,b=a,a=void 0),b&&a!==!1&&this.queue(a||"fx",[]),this.each(function(){var b=!0,e=null!=a&&a+"queueHooks",f=n.timers,g=L.get(this);if(e)g[e]&&g[e].stop&&d(g[e]);else for(e in g)g[e]&&g[e].stop&&Pa.test(e)&&d(g[e]);for(e=f.length;e--;)f[e].elem!==this||null!=a&&f[e].queue!==a||(f[e].anim.stop(c),b=!1,f.splice(e,1));(b||!c)&&n.dequeue(this,a)})},finish:function(a){return a!==!1&&(a=a||"fx"),this.each(function(){var b,c=L.get(this),d=c[a+"queue"],e=c[a+"queueHooks"],f=n.timers,g=d?d.length:0;for(c.finish=!0,n.queue(this,a,[]),e&&e.stop&&e.stop.call(this,!0),b=f.length;b--;)f[b].elem===this&&f[b].queue===a&&(f[b].anim.stop(!0),f.splice(b,1));for(b=0;g>b;b++)d[b]&&d[b].finish&&d[b].finish.call(this);delete c.finish})}}),n.each(["toggle","show","hide"],function(a,b){var c=n.fn[b];n.fn[b]=function(a,d,e){return null==a||"boolean"==typeof a?c.apply(this,arguments):this.animate(Ta(b,!0),a,d,e)}}),n.each({slideDown:Ta("show"),slideUp:Ta("hide"),slideToggle:Ta("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){n.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),n.timers=[],n.fx.tick=function(){var a,b=0,c=n.timers;for(La=n.now();b<c.length;b++)a=c[b],a()||c[b]!==a||c.splice(b--,1);c.length||n.fx.stop(),La=void 0},n.fx.timer=function(a){n.timers.push(a),a()?n.fx.start():n.timers.pop()},n.fx.interval=13,n.fx.start=function(){Ma||(Ma=setInterval(n.fx.tick,n.fx.interval))},n.fx.stop=function(){clearInterval(Ma),Ma=null},n.fx.speeds={slow:600,fast:200,_default:400},n.fn.delay=function(a,b){return a=n.fx?n.fx.speeds[a]||a:a,b=b||"fx",this.queue(b,function(b,c){var d=setTimeout(b,a);c.stop=function(){clearTimeout(d)}})},function(){var a=l.createElement("input"),b=l.createElement("select"),c=b.appendChild(l.createElement("option"));a.type="checkbox",k.checkOn=""!==a.value,k.optSelected=c.selected,b.disabled=!0,k.optDisabled=!c.disabled,a=l.createElement("input"),a.value="t",a.type="radio",k.radioValue="t"===a.value}();var Ya,Za,$a=n.expr.attrHandle;n.fn.extend({attr:function(a,b){return J(this,n.attr,a,b,arguments.length>1)},removeAttr:function(a){return this.each(function(){n.removeAttr(this,a)})}}),n.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(a&&3!==f&&8!==f&&2!==f)return typeof a.getAttribute===U?n.prop(a,b,c):(1===f&&n.isXMLDoc(a)||(b=b.toLowerCase(),d=n.attrHooks[b]||(n.expr.match.bool.test(b)?Za:Ya)),
+void 0===c?d&&"get"in d&&null!==(e=d.get(a,b))?e:(e=n.find.attr(a,b),null==e?void 0:e):null!==c?d&&"set"in d&&void 0!==(e=d.set(a,c,b))?e:(a.setAttribute(b,c+""),c):void n.removeAttr(a,b))},removeAttr:function(a,b){var c,d,e=0,f=b&&b.match(E);if(f&&1===a.nodeType)while(c=f[e++])d=n.propFix[c]||c,n.expr.match.bool.test(c)&&(a[d]=!1),a.removeAttribute(c)},attrHooks:{type:{set:function(a,b){if(!k.radioValue&&"radio"===b&&n.nodeName(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}}}),Za={set:function(a,b,c){return b===!1?n.removeAttr(a,c):a.setAttribute(c,c),c}},n.each(n.expr.match.bool.source.match(/\w+/g),function(a,b){var c=$a[b]||n.find.attr;$a[b]=function(a,b,d){var e,f;return d||(f=$a[b],$a[b]=e,e=null!=c(a,b,d)?b.toLowerCase():null,$a[b]=f),e}});var _a=/^(?:input|select|textarea|button)$/i;n.fn.extend({prop:function(a,b){return J(this,n.prop,a,b,arguments.length>1)},removeProp:function(a){return this.each(function(){delete this[n.propFix[a]||a]})}}),n.extend({propFix:{"for":"htmlFor","class":"className"},prop:function(a,b,c){var d,e,f,g=a.nodeType;if(a&&3!==g&&8!==g&&2!==g)return f=1!==g||!n.isXMLDoc(a),f&&(b=n.propFix[b]||b,e=n.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){return a.hasAttribute("tabindex")||_a.test(a.nodeName)||a.href?a.tabIndex:-1}}}}),k.optSelected||(n.propHooks.selected={get:function(a){var b=a.parentNode;return b&&b.parentNode&&b.parentNode.selectedIndex,null}}),n.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){n.propFix[this.toLowerCase()]=this});var ab=/[\t\r\n\f]/g;n.fn.extend({addClass:function(a){var b,c,d,e,f,g,h="string"==typeof a&&a,i=0,j=this.length;if(n.isFunction(a))return this.each(function(b){n(this).addClass(a.call(this,b,this.className))});if(h)for(b=(a||"").match(E)||[];j>i;i++)if(c=this[i],d=1===c.nodeType&&(c.className?(" "+c.className+" ").replace(ab," "):" ")){f=0;while(e=b[f++])d.indexOf(" "+e+" ")<0&&(d+=e+" ");g=n.trim(d),c.className!==g&&(c.className=g)}return this},removeClass:function(a){var b,c,d,e,f,g,h=0===arguments.length||"string"==typeof a&&a,i=0,j=this.length;if(n.isFunction(a))return this.each(function(b){n(this).removeClass(a.call(this,b,this.className))});if(h)for(b=(a||"").match(E)||[];j>i;i++)if(c=this[i],d=1===c.nodeType&&(c.className?(" "+c.className+" ").replace(ab," "):"")){f=0;while(e=b[f++])while(d.indexOf(" "+e+" ")>=0)d=d.replace(" "+e+" "," ");g=a?n.trim(d):"",c.className!==g&&(c.className=g)}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):this.each(n.isFunction(a)?function(c){n(this).toggleClass(a.call(this,c,this.className,b),b)}:function(){if("string"===c){var b,d=0,e=n(this),f=a.match(E)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else(c===U||"boolean"===c)&&(this.className&&L.set(this,"__className__",this.className),this.className=this.className||a===!1?"":L.get(this,"__className__")||"")})},hasClass:function(a){for(var b=" "+a+" ",c=0,d=this.length;d>c;c++)if(1===this[c].nodeType&&(" "+this[c].className+" ").replace(ab," ").indexOf(b)>=0)return!0;return!1}});var bb=/\r/g;n.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=n.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,n(this).val()):a,null==e?e="":"number"==typeof e?e+="":n.isArray(e)&&(e=n.map(e,function(a){return null==a?"":a+""})),b=n.valHooks[this.type]||n.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=n.valHooks[e.type]||n.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(bb,""):null==c?"":c)}}}),n.extend({valHooks:{option:{get:function(a){var b=n.find.attr(a,"value");return null!=b?b:n.trim(n.text(a))}},select:{get:function(a){for(var b,c,d=a.options,e=a.selectedIndex,f="select-one"===a.type||0>e,g=f?null:[],h=f?e+1:d.length,i=0>e?h:f?e:0;h>i;i++)if(c=d[i],!(!c.selected&&i!==e||(k.optDisabled?c.disabled:null!==c.getAttribute("disabled"))||c.parentNode.disabled&&n.nodeName(c.parentNode,"optgroup"))){if(b=n(c).val(),f)return b;g.push(b)}return g},set:function(a,b){var c,d,e=a.options,f=n.makeArray(b),g=e.length;while(g--)d=e[g],(d.selected=n.inArray(d.value,f)>=0)&&(c=!0);return c||(a.selectedIndex=-1),f}}}}),n.each(["radio","checkbox"],function(){n.valHooks[this]={set:function(a,b){return n.isArray(b)?a.checked=n.inArray(n(a).val(),b)>=0:void 0}},k.checkOn||(n.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})}),n.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(a,b){n.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),n.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)},bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return 1===arguments.length?this.off(a,"**"):this.off(b,a||"**",c)}});var cb=n.now(),db=/\?/;n.parseJSON=function(a){return JSON.parse(a+"")},n.parseXML=function(a){var b,c;if(!a||"string"!=typeof a)return null;try{c=new DOMParser,b=c.parseFromString(a,"text/xml")}catch(d){b=void 0}return(!b||b.getElementsByTagName("parsererror").length)&&n.error("Invalid XML: "+a),b};var eb=/#.*$/,fb=/([?&])_=[^&]*/,gb=/^(.*?):[ \t]*([^\r\n]*)$/gm,hb=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,ib=/^(?:GET|HEAD)$/,jb=/^\/\//,kb=/^([\w.+-]+:)(?:\/\/(?:[^\/?#]*@|)([^\/?#:]*)(?::(\d+)|)|)/,lb={},mb={},nb="*/".concat("*"),ob=a.location.href,pb=kb.exec(ob.toLowerCase())||[];function qb(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(E)||[];if(n.isFunction(c))while(d=f[e++])"+"===d[0]?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function rb(a,b,c,d){var e={},f=a===mb;function g(h){var i;return e[h]=!0,n.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function sb(a,b){var c,d,e=n.ajaxSettings.flatOptions||{};for(c in b)void 0!==b[c]&&((e[c]?a:d||(d={}))[c]=b[c]);return d&&n.extend(!0,a,d),a}function tb(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===d&&(d=a.mimeType||b.getResponseHeader("Content-Type"));if(d)for(e in h)if(h[e]&&h[e].test(d)){i.unshift(e);break}if(i[0]in c)f=i[0];else{for(e in c){if(!i[0]||a.converters[e+" "+i[0]]){f=e;break}g||(g=e)}f=f||g}return f?(f!==i[0]&&i.unshift(f),c[f]):void 0}function ub(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}n.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:ob,type:"GET",isLocal:hb.test(pb[1]),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":nb,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":n.parseJSON,"text xml":n.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?sb(sb(a,n.ajaxSettings),b):sb(n.ajaxSettings,a)},ajaxPrefilter:qb(lb),ajaxTransport:qb(mb),ajax:function(a,b){"object"==typeof a&&(b=a,a=void 0),b=b||{};var c,d,e,f,g,h,i,j,k=n.ajaxSetup({},b),l=k.context||k,m=k.context&&(l.nodeType||l.jquery)?n(l):n.event,o=n.Deferred(),p=n.Callbacks("once memory"),q=k.statusCode||{},r={},s={},t=0,u="canceled",v={readyState:0,getResponseHeader:function(a){var b;if(2===t){if(!f){f={};while(b=gb.exec(e))f[b[1].toLowerCase()]=b[2]}b=f[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return 2===t?e:null},setRequestHeader:function(a,b){var c=a.toLowerCase();return t||(a=s[c]=s[c]||a,r[a]=b),this},overrideMimeType:function(a){return t||(k.mimeType=a),this},statusCode:function(a){var b;if(a)if(2>t)for(b in a)q[b]=[q[b],a[b]];else v.always(a[v.status]);return this},abort:function(a){var b=a||u;return c&&c.abort(b),x(0,b),this}};if(o.promise(v).complete=p.add,v.success=v.done,v.error=v.fail,k.url=((a||k.url||ob)+"").replace(eb,"").replace(jb,pb[1]+"//"),k.type=b.method||b.type||k.method||k.type,k.dataTypes=n.trim(k.dataType||"*").toLowerCase().match(E)||[""],null==k.crossDomain&&(h=kb.exec(k.url.toLowerCase()),k.crossDomain=!(!h||h[1]===pb[1]&&h[2]===pb[2]&&(h[3]||("http:"===h[1]?"80":"443"))===(pb[3]||("http:"===pb[1]?"80":"443")))),k.data&&k.processData&&"string"!=typeof k.data&&(k.data=n.param(k.data,k.traditional)),rb(lb,k,b,v),2===t)return v;i=n.event&&k.global,i&&0===n.active++&&n.event.trigger("ajaxStart"),k.type=k.type.toUpperCase(),k.hasContent=!ib.test(k.type),d=k.url,k.hasContent||(k.data&&(d=k.url+=(db.test(d)?"&":"?")+k.data,delete k.data),k.cache===!1&&(k.url=fb.test(d)?d.replace(fb,"$1_="+cb++):d+(db.test(d)?"&":"?")+"_="+cb++)),k.ifModified&&(n.lastModified[d]&&v.setRequestHeader("If-Modified-Since",n.lastModified[d]),n.etag[d]&&v.setRequestHeader("If-None-Match",n.etag[d])),(k.data&&k.hasContent&&k.contentType!==!1||b.contentType)&&v.setRequestHeader("Content-Type",k.contentType),v.setRequestHeader("Accept",k.dataTypes[0]&&k.accepts[k.dataTypes[0]]?k.accepts[k.dataTypes[0]]+("*"!==k.dataTypes[0]?", "+nb+"; q=0.01":""):k.accepts["*"]);for(j in k.headers)v.setRequestHeader(j,k.headers[j]);if(k.beforeSend&&(k.beforeSend.call(l,v,k)===!1||2===t))return v.abort();u="abort";for(j in{success:1,error:1,complete:1})v[j](k[j]);if(c=rb(mb,k,b,v)){v.readyState=1,i&&m.trigger("ajaxSend",[v,k]),k.async&&k.timeout>0&&(g=setTimeout(function(){v.abort("timeout")},k.timeout));try{t=1,c.send(r,x)}catch(w){if(!(2>t))throw w;x(-1,w)}}else x(-1,"No Transport");function x(a,b,f,h){var j,r,s,u,w,x=b;2!==t&&(t=2,g&&clearTimeout(g),c=void 0,e=h||"",v.readyState=a>0?4:0,j=a>=200&&300>a||304===a,f&&(u=tb(k,v,f)),u=ub(k,u,v,j),j?(k.ifModified&&(w=v.getResponseHeader("Last-Modified"),w&&(n.lastModified[d]=w),w=v.getResponseHeader("etag"),w&&(n.etag[d]=w)),204===a||"HEAD"===k.type?x="nocontent":304===a?x="notmodified":(x=u.state,r=u.data,s=u.error,j=!s)):(s=x,(a||!x)&&(x="error",0>a&&(a=0))),v.status=a,v.statusText=(b||x)+"",j?o.resolveWith(l,[r,x,v]):o.rejectWith(l,[v,x,s]),v.statusCode(q),q=void 0,i&&m.trigger(j?"ajaxSuccess":"ajaxError",[v,k,j?r:s]),p.fireWith(l,[v,x]),i&&(m.trigger("ajaxComplete",[v,k]),--n.active||n.event.trigger("ajaxStop")))}return v},getJSON:function(a,b,c){return n.get(a,b,c,"json")},getScript:function(a,b){return n.get(a,void 0,b,"script")}}),n.each(["get","post"],function(a,b){n[b]=function(a,c,d,e){return n.isFunction(c)&&(e=e||d,d=c,c=void 0),n.ajax({url:a,type:b,dataType:e,data:c,success:d})}}),n._evalUrl=function(a){return n.ajax({url:a,type:"GET",dataType:"script",async:!1,global:!1,"throws":!0})},n.fn.extend({wrapAll:function(a){var b;return n.isFunction(a)?this.each(function(b){n(this).wrapAll(a.call(this,b))}):(this[0]&&(b=n(a,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstElementChild)a=a.firstElementChild;return a}).append(this)),this)},wrapInner:function(a){return this.each(n.isFunction(a)?function(b){n(this).wrapInner(a.call(this,b))}:function(){var b=n(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=n.isFunction(a);return this.each(function(c){n(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){n.nodeName(this,"body")||n(this).replaceWith(this.childNodes)}).end()}}),n.expr.filters.hidden=function(a){return a.offsetWidth<=0&&a.offsetHeight<=0},n.expr.filters.visible=function(a){return!n.expr.filters.hidden(a)};var vb=/%20/g,wb=/\[\]$/,xb=/\r?\n/g,yb=/^(?:submit|button|image|reset|file)$/i,zb=/^(?:input|select|textarea|keygen)/i;function Ab(a,b,c,d){var e;if(n.isArray(b))n.each(b,function(b,e){c||wb.test(a)?d(a,e):Ab(a+"["+("object"==typeof e?b:"")+"]",e,c,d)});else if(c||"object"!==n.type(b))d(a,b);else for(e in b)Ab(a+"["+e+"]",b[e],c,d)}n.param=function(a,b){var c,d=[],e=function(a,b){b=n.isFunction(b)?b():null==b?"":b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};if(void 0===b&&(b=n.ajaxSettings&&n.ajaxSettings.traditional),n.isArray(a)||a.jquery&&!n.isPlainObject(a))n.each(a,function(){e(this.name,this.value)});else for(c in a)Ab(c,a[c],b,e);return d.join("&").replace(vb,"+")},n.fn.extend({serialize:function(){return n.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=n.prop(this,"elements");return a?n.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!n(this).is(":disabled")&&zb.test(this.nodeName)&&!yb.test(a)&&(this.checked||!T.test(a))}).map(function(a,b){var c=n(this).val();return null==c?null:n.isArray(c)?n.map(c,function(a){return{name:b.name,value:a.replace(xb,"\r\n")}}):{name:b.name,value:c.replace(xb,"\r\n")}}).get()}}),n.ajaxSettings.xhr=function(){try{return new XMLHttpRequest}catch(a){}};var Bb=0,Cb={},Db={0:200,1223:204},Eb=n.ajaxSettings.xhr();a.attachEvent&&a.attachEvent("onunload",function(){for(var a in Cb)Cb[a]()}),k.cors=!!Eb&&"withCredentials"in Eb,k.ajax=Eb=!!Eb,n.ajaxTransport(function(a){var b;return k.cors||Eb&&!a.crossDomain?{send:function(c,d){var e,f=a.xhr(),g=++Bb;if(f.open(a.type,a.url,a.async,a.username,a.password),a.xhrFields)for(e in a.xhrFields)f[e]=a.xhrFields[e];a.mimeType&&f.overrideMimeType&&f.overrideMimeType(a.mimeType),a.crossDomain||c["X-Requested-With"]||(c["X-Requested-With"]="XMLHttpRequest");for(e in c)f.setRequestHeader(e,c[e]);b=function(a){return function(){b&&(delete Cb[g],b=f.onload=f.onerror=null,"abort"===a?f.abort():"error"===a?d(f.status,f.statusText):d(Db[f.status]||f.status,f.statusText,"string"==typeof f.responseText?{text:f.responseText}:void 0,f.getAllResponseHeaders()))}},f.onload=b(),f.onerror=b("error"),b=Cb[g]=b("abort");try{f.send(a.hasContent&&a.data||null)}catch(h){if(b)throw h}},abort:function(){b&&b()}}:void 0}),n.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/(?:java|ecma)script/},converters:{"text script":function(a){return n.globalEval(a),a}}}),n.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET")}),n.ajaxTransport("script",function(a){if(a.crossDomain){var b,c;return{send:function(d,e){b=n("<script>").prop({async:!0,charset:a.scriptCharset,src:a.url}).on("load error",c=function(a){b.remove(),c=null,a&&e("error"===a.type?404:200,a.type)}),l.head.appendChild(b[0])},abort:function(){c&&c()}}}});var Fb=[],Gb=/(=)\?(?=&|$)|\?\?/;n.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var a=Fb.pop()||n.expando+"_"+cb++;return this[a]=!0,a}}),n.ajaxPrefilter("json jsonp",function(b,c,d){var e,f,g,h=b.jsonp!==!1&&(Gb.test(b.url)?"url":"string"==typeof b.data&&!(b.contentType||"").indexOf("application/x-www-form-urlencoded")&&Gb.test(b.data)&&"data");return h||"jsonp"===b.dataTypes[0]?(e=b.jsonpCallback=n.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,h?b[h]=b[h].replace(Gb,"$1"+e):b.jsonp!==!1&&(b.url+=(db.test(b.url)?"&":"?")+b.jsonp+"="+e),b.converters["script json"]=function(){return g||n.error(e+" was not called"),g[0]},b.dataTypes[0]="json",f=a[e],a[e]=function(){g=arguments},d.always(function(){a[e]=f,b[e]&&(b.jsonpCallback=c.jsonpCallback,Fb.push(e)),g&&n.isFunction(f)&&f(g[0]),g=f=void 0}),"script"):void 0}),n.parseHTML=function(a,b,c){if(!a||"string"!=typeof a)return null;"boolean"==typeof b&&(c=b,b=!1),b=b||l;var d=v.exec(a),e=!c&&[];return d?[b.createElement(d[1])]:(d=n.buildFragment([a],b,e),e&&e.length&&n(e).remove(),n.merge([],d.childNodes))};var Hb=n.fn.load;n.fn.load=function(a,b,c){if("string"!=typeof a&&Hb)return Hb.apply(this,arguments);var d,e,f,g=this,h=a.indexOf(" ");return h>=0&&(d=n.trim(a.slice(h)),a=a.slice(0,h)),n.isFunction(b)?(c=b,b=void 0):b&&"object"==typeof b&&(e="POST"),g.length>0&&n.ajax({url:a,type:e,dataType:"html",data:b}).done(function(a){f=arguments,g.html(d?n("<div>").append(n.parseHTML(a)).find(d):a)}).complete(c&&function(a,b){g.each(c,f||[a.responseText,b,a])}),this},n.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){n.fn[b]=function(a){return this.on(b,a)}}),n.expr.filters.animated=function(a){return n.grep(n.timers,function(b){return a===b.elem}).length};var Ib=a.document.documentElement;function Jb(a){return n.isWindow(a)?a:9===a.nodeType&&a.defaultView}n.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=n.css(a,"position"),l=n(a),m={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=n.css(a,"top"),i=n.css(a,"left"),j=("absolute"===k||"fixed"===k)&&(f+i).indexOf("auto")>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),n.isFunction(b)&&(b=b.call(a,c,h)),null!=b.top&&(m.top=b.top-h.top+g),null!=b.left&&(m.left=b.left-h.left+e),"using"in b?b.using.call(a,m):l.css(m)}},n.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){n.offset.setOffset(this,a,b)});var b,c,d=this[0],e={top:0,left:0},f=d&&d.ownerDocument;if(f)return b=f.documentElement,n.contains(b,d)?(typeof d.getBoundingClientRect!==U&&(e=d.getBoundingClientRect()),c=Jb(f),{top:e.top+c.pageYOffset-b.clientTop,left:e.left+c.pageXOffset-b.clientLeft}):e},position:function(){if(this[0]){var a,b,c=this[0],d={top:0,left:0};return"fixed"===n.css(c,"position")?b=c.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),n.nodeName(a[0],"html")||(d=a.offset()),d.top+=n.css(a[0],"borderTopWidth",!0),d.left+=n.css(a[0],"borderLeftWidth",!0)),{top:b.top-d.top-n.css(c,"marginTop",!0),left:b.left-d.left-n.css(c,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||Ib;while(a&&!n.nodeName(a,"html")&&"static"===n.css(a,"position"))a=a.offsetParent;return a||Ib})}}),n.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(b,c){var d="pageYOffset"===c;n.fn[b]=function(e){return J(this,function(b,e,f){var g=Jb(b);return void 0===f?g?g[c]:b[e]:void(g?g.scrollTo(d?a.pageXOffset:f,d?f:a.pageYOffset):b[e]=f)},b,e,arguments.length,null)}}),n.each(["top","left"],function(a,b){n.cssHooks[b]=ya(k.pixelPosition,function(a,c){return c?(c=xa(a,b),va.test(c)?n(a).position()[b]+"px":c):void 0})}),n.each({Height:"height",Width:"width"},function(a,b){n.each({padding:"inner"+a,content:b,"":"outer"+a},function(c,d){n.fn[d]=function(d,e){var f=arguments.length&&(c||"boolean"!=typeof d),g=c||(d===!0||e===!0?"margin":"border");return J(this,function(b,c,d){var e;return n.isWindow(b)?b.document.documentElement["client"+a]:9===b.nodeType?(e=b.documentElement,Math.max(b.body["scroll"+a],e["scroll"+a],b.body["offset"+a],e["offset"+a],e["client"+a])):void 0===d?n.css(b,c,g):n.style(b,c,d,g)},b,f?d:void 0,f,null)}})}),n.fn.size=function(){return this.length},n.fn.andSelf=n.fn.addBack,"function"==typeof define&&define.amd&&define("jquery",[],function(){return n});var Kb=a.jQuery,Lb=a.$;return n.noConflict=function(b){return a.$===n&&(a.$=Lb),b&&a.jQuery===n&&(a.jQuery=Kb),n},typeof b===U&&(a.jQuery=a.$=n),n});
diff --git a/third_party/jsoncpp/BUILD.gn b/third_party/jsoncpp/BUILD.gn
new file mode 100644
index 0000000..a832d0f
--- /dev/null
+++ b/third_party/jsoncpp/BUILD.gn
@@ -0,0 +1,41 @@
+# This is a copy of chromium third_party/jsoncpp/.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+config("jsoncpp_config") {
+  include_dirs = [
+    "overrides/include",
+    "source/include",
+  ]
+
+  if (os == "win") {
+    cflags = [ "/wd4996" ]  # Deprecated function warning (e.g. strcpy).
+  }
+}
+
+source_set("jsoncpp") {
+  sources = [
+    "overrides/include/json/value.h",
+    "overrides/src/lib_json/json_reader.cpp",
+    "overrides/src/lib_json/json_value.cpp",
+    "source/include/json/assertions.h",
+    "source/include/json/autolink.h",
+    "source/include/json/config.h",
+    "source/include/json/features.h",
+    "source/include/json/forwards.h",
+    "source/include/json/json.h",
+    "source/include/json/reader.h",
+    "source/include/json/writer.h",
+    "source/src/lib_json/json_batchallocator.h",
+    "source/src/lib_json/json_tool.h",
+    "source/src/lib_json/json_writer.cpp",
+  ]
+
+  public_configs = [ ":jsoncpp_config" ]
+
+  defines = [ "JSON_USE_EXCEPTION=0" ]
+
+  include_dirs = [ "source/src/lib_json" ]
+}
diff --git a/third_party/jsoncpp/LICENSE b/third_party/jsoncpp/LICENSE
new file mode 100644
index 0000000..c41a1d1
--- /dev/null
+++ b/third_party/jsoncpp/LICENSE
@@ -0,0 +1,55 @@
+The JsonCpp library's source code, including accompanying documentation,
+tests and demonstration applications, are licensed under the following
+conditions...
+
+Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
+jurisdictions which recognize such a disclaimer. In such jurisdictions,
+this software is released into the Public Domain.
+
+In jurisdictions which do not recognize Public Domain property (e.g. Germany as of
+2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and
+The JsonCpp Authors, and is released under the terms of the MIT License (see below).
+
+In jurisdictions which recognize Public Domain property, the user of this
+software may choose to accept it either as 1) Public Domain, 2) under the
+conditions of the MIT License (see below), or 3) under the terms of dual
+Public Domain/MIT License conditions described here, as they choose.
+
+The MIT License is about as close to Public Domain as a license can get, and is
+described in clear, concise terms at:
+
+   http://en.wikipedia.org/wiki/MIT_License
+
+The full text of the MIT License follows:
+
+========================================================================
+Copyright (c) 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use, copy,
+modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+========================================================================
+(END LICENSE TEXT)
+
+The MIT license is compatible with both the GPL and commercial
+software, affording one all of the rights of Public Domain with the
+minor nuisance of being required to keep the above copyright notice
+and license text in the source code. Note also that by accepting the
+Public Domain "license" you can re-license your copy using whatever
+license you like.
diff --git a/third_party/jsoncpp/README.goma b/third_party/jsoncpp/README.goma
new file mode 100644
index 0000000..571cf48
--- /dev/null
+++ b/third_party/jsoncpp/README.goma
@@ -0,0 +1,13 @@
+Name: jsoncpp
+URL: https://github.com/open-source-parsers/jsoncpp
+Version: f572e8e42e22cfcf5ab0aea26574f408943edfa4
+License: MIT
+License File: LICENSE
+Security Critical: yes
+
+Description:
+oauth2 uses jsoncpp.
+
+Local modifications:
+Applied the overrides in chromium to build on libc++.
+See README.chromium (in chromium's third_party/jsoncpp) for more details.
\ No newline at end of file
diff --git a/third_party/jsoncpp/overrides/include/json/value.h b/third_party/jsoncpp/overrides/include/json/value.h
new file mode 100644
index 0000000..7ca4f11
--- /dev/null
+++ b/third_party/jsoncpp/overrides/include/json/value.h
@@ -0,0 +1,1111 @@
+// Copyright 2007-2010 Baptiste Lepilleur
+// Distributed under MIT license, or public domain if desired and
+// recognized in your jurisdiction.
+// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
+
+#ifndef CPPTL_JSON_H_INCLUDED
+# define CPPTL_JSON_H_INCLUDED
+
+#if !defined(JSON_IS_AMALGAMATION)
+# include <json/forwards.h>
+#endif // if !defined(JSON_IS_AMALGAMATION)
+# include <string>
+# include <vector>
+
+# ifndef JSON_USE_CPPTL_SMALLMAP
+#  include <map>
+# else
+#  include <cpptl/smallmap.h>
+# endif
+# ifdef JSON_USE_CPPTL
+#  include <cpptl/forwards.h>
+# endif
+
+/** \brief JSON (JavaScript Object Notation).
+ */
+namespace Json {
+
+   /** \brief Type of the value held by a Value object.
+    */
+   enum ValueType
+   {
+      nullValue = 0, ///< 'null' value
+      intValue,      ///< signed integer value
+      uintValue,     ///< unsigned integer value
+      realValue,     ///< double value
+      stringValue,   ///< UTF-8 string value
+      booleanValue,  ///< bool value
+      arrayValue,    ///< array value (ordered list)
+      objectValue    ///< object value (collection of name/value pairs).
+   };
+
+   enum CommentPlacement
+   {
+      commentBefore = 0,        ///< a comment placed on the line before a value
+      commentAfterOnSameLine,   ///< a comment just after a value on the same line
+      commentAfter,             ///< a comment on the line after a value (only make sense for root value)
+      numberOfCommentPlacement
+   };
+
+//# ifdef JSON_USE_CPPTL
+//   typedef CppTL::AnyEnumerator<const char *> EnumMemberNames;
+//   typedef CppTL::AnyEnumerator<const Value &> EnumValues;
+//# endif
+
+   /** \brief Lightweight wrapper to tag static string.
+    *
+    * Value constructor and objectValue member assignement takes advantage of the
+    * StaticString and avoid the cost of string duplication when storing the
+    * string or the member name.
+    *
+    * Example of usage:
+    * \code
+    * Json::Value aValue( StaticString("some text") );
+    * Json::Value object;
+    * static const StaticString code("code");
+    * object[code] = 1234;
+    * \endcode
+    */
+   class JSON_API StaticString
+   {
+   public:
+      explicit StaticString( const char *czstring )
+         : str_( czstring )
+      {
+      }
+
+      operator const char *() const
+      {
+         return str_;
+      }
+
+      const char *c_str() const
+      {
+         return str_;
+      }
+
+   private:
+      const char *str_;
+   };
+
+   /** \brief Represents a <a HREF="http://www.json.org">JSON</a> value.
+    *
+    * This class is a discriminated union wrapper that can represents a:
+    * - signed integer [range: Value::minInt - Value::maxInt]
+    * - unsigned integer (range: 0 - Value::maxUInt)
+    * - double
+    * - UTF-8 string
+    * - boolean
+    * - 'null'
+    * - an ordered list of Value
+    * - collection of name/value pairs (javascript object)
+    *
+    * The type of the held value is represented by a #ValueType and
+    * can be obtained using type().
+    *
+    * values of an #objectValue or #arrayValue can be accessed using operator[]() methods.
+    * Non const methods will automatically create the a #nullValue element
+    * if it does not exist.
+    * The sequence of an #arrayValue will be automatically resize and initialized
+    * with #nullValue. resize() can be used to enlarge or truncate an #arrayValue.
+    *
+    * The get() methods can be used to obtanis default value in the case the required element
+    * does not exist.
+    *
+    * It is possible to iterate over the list of a #objectValue values using
+    * the getMemberNames() method.
+    */
+   class JSON_API Value
+   {
+      friend class ValueIteratorBase;
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+      friend class ValueInternalLink;
+      friend class ValueInternalMap;
+# endif
+   public:
+      typedef std::vector<std::string> Members;
+      typedef ValueIterator iterator;
+      typedef ValueConstIterator const_iterator;
+      typedef Json::UInt UInt;
+      typedef Json::Int Int;
+# if defined(JSON_HAS_INT64)
+      typedef Json::UInt64 UInt64;
+      typedef Json::Int64 Int64;
+#endif // defined(JSON_HAS_INT64)
+      typedef Json::LargestInt LargestInt;
+      typedef Json::LargestUInt LargestUInt;
+      typedef Json::ArrayIndex ArrayIndex;
+
+      static const Value& null;
+      /// Minimum signed integer value that can be stored in a Json::Value.
+      static const LargestInt minLargestInt;
+      /// Maximum signed integer value that can be stored in a Json::Value.
+      static const LargestInt maxLargestInt;
+      /// Maximum unsigned integer value that can be stored in a Json::Value.
+      static const LargestUInt maxLargestUInt;
+
+      /// Minimum signed int value that can be stored in a Json::Value.
+      static const Int minInt;
+      /// Maximum signed int value that can be stored in a Json::Value.
+      static const Int maxInt;
+      /// Maximum unsigned int value that can be stored in a Json::Value.
+      static const UInt maxUInt;
+
+# if defined(JSON_HAS_INT64)
+      /// Minimum signed 64 bits int value that can be stored in a Json::Value.
+      static const Int64 minInt64;
+      /// Maximum signed 64 bits int value that can be stored in a Json::Value.
+      static const Int64 maxInt64;
+      /// Maximum unsigned 64 bits int value that can be stored in a Json::Value.
+      static const UInt64 maxUInt64;
+#endif // defined(JSON_HAS_INT64)
+
+   private:
+#ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION
+# ifndef JSON_VALUE_USE_INTERNAL_MAP
+      class CZString
+      {
+      public:
+         enum DuplicationPolicy
+         {
+            noDuplication = 0,
+            duplicate,
+            duplicateOnCopy
+         };
+         CZString( ArrayIndex index );
+         CZString( const char *cstr, DuplicationPolicy allocate );
+         CZString( const CZString &other );
+         ~CZString();
+         CZString &operator =( const CZString &other );
+         bool operator<( const CZString &other ) const;
+         bool operator==( const CZString &other ) const;
+         ArrayIndex index() const;
+         const char *c_str() const;
+         bool isStaticString() const;
+      private:
+         void swap( CZString &other );
+         const char *cstr_;
+         ArrayIndex index_;
+      };
+
+   public:
+#  ifndef JSON_USE_CPPTL_SMALLMAP
+      typedef std::map<CZString, Value> ObjectValues;
+#  else
+      typedef CppTL::SmallMap<CZString, Value> ObjectValues;
+#  endif // ifndef JSON_USE_CPPTL_SMALLMAP
+# endif // ifndef JSON_VALUE_USE_INTERNAL_MAP
+#endif // ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION
+
+   public:
+      /** \brief Create a default Value of the given type.
+
+        This is a very useful constructor.
+        To create an empty array, pass arrayValue.
+        To create an empty object, pass objectValue.
+        Another Value can then be set to this one by assignment.
+    This is useful since clear() and resize() will not alter types.
+
+        Examples:
+    \code
+    Json::Value null_value; // null
+    Json::Value arr_value(Json::arrayValue); // []
+    Json::Value obj_value(Json::objectValue); // {}
+    \endcode
+      */
+      Value( ValueType type = nullValue );
+      Value( Int value );
+      Value( UInt value );
+#if defined(JSON_HAS_INT64)
+      Value( Int64 value );
+      Value( UInt64 value );
+#endif // if defined(JSON_HAS_INT64)
+      Value( double value );
+      Value( const char *value );
+      Value( const char *beginValue, const char *endValue );
+      /** \brief Constructs a value from a static string.
+
+       * Like other value string constructor but do not duplicate the string for
+       * internal storage. The given string must remain alive after the call to this
+       * constructor.
+       * Example of usage:
+       * \code
+       * Json::Value aValue( StaticString("some text") );
+       * \endcode
+       */
+      Value( const StaticString &value );
+      Value( const std::string &value );
+# ifdef JSON_USE_CPPTL
+      Value( const CppTL::ConstString &value );
+# endif
+      Value( bool value );
+      Value( const Value &other );
+      ~Value();
+
+      Value &operator=( const Value &other );
+      /// Swap values.
+      /// \note Currently, comments are intentionally not swapped, for
+      /// both logic and efficiency.
+      void swap( Value &other );
+
+      ValueType type() const;
+
+      bool operator <( const Value &other ) const;
+      bool operator <=( const Value &other ) const;
+      bool operator >=( const Value &other ) const;
+      bool operator >( const Value &other ) const;
+
+      bool operator ==( const Value &other ) const;
+      bool operator !=( const Value &other ) const;
+
+      int compare( const Value &other ) const;
+
+      const char *asCString() const;
+      std::string asString() const;
+# ifdef JSON_USE_CPPTL
+      CppTL::ConstString asConstString() const;
+# endif
+      Int asInt() const;
+      UInt asUInt() const;
+#if defined(JSON_HAS_INT64)
+      Int64 asInt64() const;
+      UInt64 asUInt64() const;
+#endif // if defined(JSON_HAS_INT64)
+      LargestInt asLargestInt() const;
+      LargestUInt asLargestUInt() const;
+      float asFloat() const;
+      double asDouble() const;
+      bool asBool() const;
+
+      bool isNull() const;
+      bool isBool() const;
+      bool isInt() const;
+      bool isInt64() const;
+      bool isUInt() const;
+      bool isUInt64() const;
+      bool isIntegral() const;
+      bool isDouble() const;
+      bool isNumeric() const;
+      bool isString() const;
+      bool isArray() const;
+      bool isObject() const;
+
+      bool isConvertibleTo( ValueType other ) const;
+
+      /// Number of values in array or object
+      ArrayIndex size() const;
+
+      /// \brief Return true if empty array, empty object, or null;
+      /// otherwise, false.
+      bool empty() const;
+
+      /// Return isNull()
+      bool operator!() const;
+
+      /// Remove all object members and array elements.
+      /// \pre type() is arrayValue, objectValue, or nullValue
+      /// \post type() is unchanged
+      void clear();
+
+      /// Resize the array to size elements.
+      /// New elements are initialized to null.
+      /// May only be called on nullValue or arrayValue.
+      /// \pre type() is arrayValue or nullValue
+      /// \post type() is arrayValue
+      void resize( ArrayIndex size );
+
+      /// Access an array element (zero based index ).
+      /// If the array contains less than index element, then null value are inserted
+      /// in the array so that its size is index+1.
+      /// (You may need to say 'value[0u]' to get your compiler to distinguish
+      ///  this from the operator[] which takes a string.)
+      Value &operator[]( ArrayIndex index );
+
+      /// Access an array element (zero based index ).
+      /// If the array contains less than index element, then null value are inserted
+      /// in the array so that its size is index+1.
+      /// (You may need to say 'value[0u]' to get your compiler to distinguish
+      ///  this from the operator[] which takes a string.)
+      Value &operator[]( int index );
+
+      /// Access an array element (zero based index )
+      /// (You may need to say 'value[0u]' to get your compiler to distinguish
+      ///  this from the operator[] which takes a string.)
+      const Value &operator[]( ArrayIndex index ) const;
+
+      /// Access an array element (zero based index )
+      /// (You may need to say 'value[0u]' to get your compiler to distinguish
+      ///  this from the operator[] which takes a string.)
+      const Value &operator[]( int index ) const;
+
+      /// If the array contains at least index+1 elements, returns the element value,
+      /// otherwise returns defaultValue.
+      Value get( ArrayIndex index,
+                 const Value &defaultValue ) const;
+      /// Return true if index < size().
+      bool isValidIndex( ArrayIndex index ) const;
+      /// \brief Append value to array at the end.
+      ///
+      /// Equivalent to jsonvalue[jsonvalue.size()] = value;
+      Value &append( const Value &value );
+
+      /// Access an object value by name, create a null member if it does not exist.
+      Value &operator[]( const char *key );
+      /// Access an object value by name, returns null if there is no member with that name.
+      const Value &operator[]( const char *key ) const;
+      /// Access an object value by name, create a null member if it does not exist.
+      Value &operator[]( const std::string &key );
+      /// Access an object value by name, returns null if there is no member with that name.
+      const Value &operator[]( const std::string &key ) const;
+      /** \brief Access an object value by name, create a null member if it does not exist.
+
+       * If the object as no entry for that name, then the member name used to store
+       * the new entry is not duplicated.
+       * Example of use:
+       * \code
+       * Json::Value object;
+       * static const StaticString code("code");
+       * object[code] = 1234;
+       * \endcode
+       */
+      Value &operator[]( const StaticString &key );
+# ifdef JSON_USE_CPPTL
+      /// Access an object value by name, create a null member if it does not exist.
+      Value &operator[]( const CppTL::ConstString &key );
+      /// Access an object value by name, returns null if there is no member with that name.
+      const Value &operator[]( const CppTL::ConstString &key ) const;
+# endif
+      /// Return the member named key if it exist, defaultValue otherwise.
+      Value get( const char *key,
+                 const Value &defaultValue ) const;
+      /// Return the member named key if it exist, defaultValue otherwise.
+      Value get( const std::string &key,
+                 const Value &defaultValue ) const;
+# ifdef JSON_USE_CPPTL
+      /// Return the member named key if it exist, defaultValue otherwise.
+      Value get( const CppTL::ConstString &key,
+                 const Value &defaultValue ) const;
+# endif
+      /// \brief Remove and return the named member.
+      ///
+      /// Do nothing if it did not exist.
+      /// \return the removed Value, or null.
+      /// \pre type() is objectValue or nullValue
+      /// \post type() is unchanged
+      Value removeMember( const char* key );
+      /// Same as removeMember(const char*)
+      Value removeMember( const std::string &key );
+
+      /// Return true if the object has a member named key.
+      bool isMember( const char *key ) const;
+      /// Return true if the object has a member named key.
+      bool isMember( const std::string &key ) const;
+# ifdef JSON_USE_CPPTL
+      /// Return true if the object has a member named key.
+      bool isMember( const CppTL::ConstString &key ) const;
+# endif
+
+      /// \brief Return a list of the member names.
+      ///
+      /// If null, return an empty list.
+      /// \pre type() is objectValue or nullValue
+      /// \post if type() was nullValue, it remains nullValue
+      Members getMemberNames() const;
+
+//# ifdef JSON_USE_CPPTL
+//      EnumMemberNames enumMemberNames() const;
+//      EnumValues enumValues() const;
+//# endif
+
+      /// Comments must be //... or /* ... */
+      void setComment( const char *comment,
+                       CommentPlacement placement );
+      /// Comments must be //... or /* ... */
+      void setComment( const std::string &comment,
+                       CommentPlacement placement );
+      bool hasComment( CommentPlacement placement ) const;
+      /// Include delimiters and embedded newlines.
+      std::string getComment( CommentPlacement placement ) const;
+
+      std::string toStyledString() const;
+
+      const_iterator begin() const;
+      const_iterator end() const;
+
+      iterator begin();
+      iterator end();
+
+   private:
+      Value &resolveReference( const char *key,
+                               bool isStatic );
+
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+      inline bool isItemAvailable() const
+      {
+         return itemIsUsed_ == 0;
+      }
+
+      inline void setItemUsed( bool isUsed = true )
+      {
+         itemIsUsed_ = isUsed ? 1 : 0;
+      }
+
+      inline bool isMemberNameStatic() const
+      {
+         return memberNameIsStatic_ == 0;
+      }
+
+      inline void setMemberNameIsStatic( bool isStatic )
+      {
+         memberNameIsStatic_ = isStatic ? 1 : 0;
+      }
+# endif // # ifdef JSON_VALUE_USE_INTERNAL_MAP
+
+   private:
+      struct CommentInfo
+      {
+         CommentInfo();
+         ~CommentInfo();
+
+         void setComment( const char *text );
+
+         char *comment_;
+      };
+
+      //struct MemberNamesTransform
+      //{
+      //   typedef const char *result_type;
+      //   const char *operator()( const CZString &name ) const
+      //   {
+      //      return name.c_str();
+      //   }
+      //};
+
+      union ValueHolder
+      {
+         LargestInt int_;
+         LargestUInt uint_;
+         double real_;
+         bool bool_;
+         char *string_;
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+         ValueInternalArray *array_;
+         ValueInternalMap *map_;
+#else
+         ObjectValues *map_;
+# endif
+      } value_;
+      ValueType type_ : 8;
+      // One-bit bitfields must be unsigned to allow storing 1.
+      // They must be 32-bits to share storage with ValueHolder.
+      unsigned int allocated_ : 1;
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+      unsigned int itemIsUsed_ : 1;      // used by the ValueInternalMap container.
+      unsigned int memberNameIsStatic_ : 1; // used by the ValueInternalMap container.
+# endif
+      CommentInfo *comments_;
+   };
+
+
+   /** \brief Experimental and untested: represents an element of the "path" to access a node.
+    */
+   class PathArgument
+   {
+   public:
+      friend class Path;
+
+      PathArgument();
+      PathArgument( ArrayIndex index );
+      PathArgument( const char *key );
+      PathArgument( const std::string &key );
+
+   private:
+      enum Kind
+      {
+         kindNone = 0,
+         kindIndex,
+         kindKey
+      };
+      std::string key_;
+      ArrayIndex index_;
+      Kind kind_;
+   };
+
+   /** \brief Experimental and untested: represents a "path" to access a node.
+    *
+    * Syntax:
+    * - "." => root node
+    * - ".[n]" => elements at index 'n' of root node (an array value)
+    * - ".name" => member named 'name' of root node (an object value)
+    * - ".name1.name2.name3"
+    * - ".[0][1][2].name1[3]"
+    * - ".%" => member name is provided as parameter
+    * - ".[%]" => index is provied as parameter
+    */
+   class Path
+   {
+   public:
+      Path( const std::string &path,
+            const PathArgument &a1 = PathArgument(),
+            const PathArgument &a2 = PathArgument(),
+            const PathArgument &a3 = PathArgument(),
+            const PathArgument &a4 = PathArgument(),
+            const PathArgument &a5 = PathArgument() );
+
+      const Value &resolve( const Value &root ) const;
+      Value resolve( const Value &root,
+                     const Value &defaultValue ) const;
+      /// Creates the "path" to access the specified node and returns a reference on the node.
+      Value &make( Value &root ) const;
+
+   private:
+      typedef std::vector<const PathArgument *> InArgs;
+      typedef std::vector<PathArgument> Args;
+
+      void makePath( const std::string &path,
+                     const InArgs &in );
+      void addPathInArg( const std::string &path,
+                         const InArgs &in,
+                         InArgs::const_iterator &itInArg,
+                         PathArgument::Kind kind );
+      void invalidPath( const std::string &path,
+                        int location );
+
+      Args args_;
+   };
+
+
+
+#ifdef JSON_VALUE_USE_INTERNAL_MAP
+   /** \brief Allocator to customize Value internal map.
+    * Below is an example of a simple implementation (default implementation actually
+    * use memory pool for speed).
+    * \code
+      class DefaultValueMapAllocator : public ValueMapAllocator
+      {
+      public: // overridden from ValueMapAllocator
+         virtual ValueInternalMap *newMap()
+         {
+            return new ValueInternalMap();
+         }
+
+         virtual ValueInternalMap *newMapCopy( const ValueInternalMap &other )
+         {
+            return new ValueInternalMap( other );
+         }
+
+         virtual void destructMap( ValueInternalMap *map )
+         {
+            delete map;
+         }
+
+         virtual ValueInternalLink *allocateMapBuckets( unsigned int size )
+         {
+            return new ValueInternalLink[size];
+         }
+
+         virtual void releaseMapBuckets( ValueInternalLink *links )
+         {
+            delete [] links;
+         }
+
+         virtual ValueInternalLink *allocateMapLink()
+         {
+            return new ValueInternalLink();
+         }
+
+         virtual void releaseMapLink( ValueInternalLink *link )
+         {
+            delete link;
+         }
+      };
+    * \endcode
+    */
+   class JSON_API ValueMapAllocator
+   {
+   public:
+      virtual ~ValueMapAllocator();
+      virtual ValueInternalMap *newMap() = 0;
+      virtual ValueInternalMap *newMapCopy( const ValueInternalMap &other ) = 0;
+      virtual void destructMap( ValueInternalMap *map ) = 0;
+      virtual ValueInternalLink *allocateMapBuckets( unsigned int size ) = 0;
+      virtual void releaseMapBuckets( ValueInternalLink *links ) = 0;
+      virtual ValueInternalLink *allocateMapLink() = 0;
+      virtual void releaseMapLink( ValueInternalLink *link ) = 0;
+   };
+
+   /** \brief ValueInternalMap hash-map bucket chain link (for internal use only).
+    * \internal previous_ & next_ allows for bidirectional traversal.
+    */
+   class JSON_API ValueInternalLink
+   {
+   public:
+      enum { itemPerLink = 6 };  // sizeof(ValueInternalLink) = 128 on 32 bits architecture.
+      enum InternalFlags {
+         flagAvailable = 0,
+         flagUsed = 1
+      };
+
+      ValueInternalLink();
+
+      ~ValueInternalLink();
+
+      Value items_[itemPerLink];
+      char *keys_[itemPerLink];
+      ValueInternalLink *previous_;
+      ValueInternalLink *next_;
+   };
+
+
+   /** \brief A linked page based hash-table implementation used internally by Value.
+    * \internal ValueInternalMap is a tradional bucket based hash-table, with a linked
+    * list in each bucket to handle collision. There is an addional twist in that
+    * each node of the collision linked list is a page containing a fixed amount of
+    * value. This provides a better compromise between memory usage and speed.
+    *
+    * Each bucket is made up of a chained list of ValueInternalLink. The last
+    * link of a given bucket can be found in the 'previous_' field of the following bucket.
+    * The last link of the last bucket is stored in tailLink_ as it has no following bucket.
+    * Only the last link of a bucket may contains 'available' item. The last link always
+    * contains at least one element unless is it the bucket one very first link.
+    */
+   class JSON_API ValueInternalMap
+   {
+      friend class ValueIteratorBase;
+      friend class Value;
+   public:
+      typedef unsigned int HashKey;
+      typedef unsigned int BucketIndex;
+
+# ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION
+      struct IteratorState
+      {
+         IteratorState()
+            : map_(0)
+            , link_(0)
+            , itemIndex_(0)
+            , bucketIndex_(0)
+         {
+         }
+         ValueInternalMap *map_;
+         ValueInternalLink *link_;
+         BucketIndex itemIndex_;
+         BucketIndex bucketIndex_;
+      };
+# endif // ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION
+
+      ValueInternalMap();
+      ValueInternalMap( const ValueInternalMap &other );
+      ValueInternalMap &operator =( const ValueInternalMap &other );
+      ~ValueInternalMap();
+
+      void swap( ValueInternalMap &other );
+
+      BucketIndex size() const;
+
+      void clear();
+
+      bool reserveDelta( BucketIndex growth );
+
+      bool reserve( BucketIndex newItemCount );
+
+      const Value *find( const char *key ) const;
+
+      Value *find( const char *key );
+
+      Value &resolveReference( const char *key,
+                               bool isStatic );
+
+      void remove( const char *key );
+
+      void doActualRemove( ValueInternalLink *link,
+                           BucketIndex index,
+                           BucketIndex bucketIndex );
+
+      ValueInternalLink *&getLastLinkInBucket( BucketIndex bucketIndex );
+
+      Value &setNewItem( const char *key,
+                         bool isStatic,
+                         ValueInternalLink *link,
+                         BucketIndex index );
+
+      Value &unsafeAdd( const char *key,
+                        bool isStatic,
+                        HashKey hashedKey );
+
+      HashKey hash( const char *key ) const;
+
+      int compare( const ValueInternalMap &other ) const;
+
+   private:
+      void makeBeginIterator( IteratorState &it ) const;
+      void makeEndIterator( IteratorState &it ) const;
+      static bool equals( const IteratorState &x, const IteratorState &other );
+      static void increment( IteratorState &iterator );
+      static void incrementBucket( IteratorState &iterator );
+      static void decrement( IteratorState &iterator );
+      static const char *key( const IteratorState &iterator );
+      static const char *key( const IteratorState &iterator, bool &isStatic );
+      static Value &value( const IteratorState &iterator );
+      static int distance( const IteratorState &x, const IteratorState &y );
+
+   private:
+      ValueInternalLink *buckets_;
+      ValueInternalLink *tailLink_;
+      BucketIndex bucketsSize_;
+      BucketIndex itemCount_;
+   };
+
+   /** \brief A simplified deque implementation used internally by Value.
+   * \internal
+   * It is based on a list of fixed "page", each page contains a fixed number of items.
+   * Instead of using a linked-list, a array of pointer is used for fast item look-up.
+   * Look-up for an element is as follow:
+   * - compute page index: pageIndex = itemIndex / itemsPerPage
+   * - look-up item in page: pages_[pageIndex][itemIndex % itemsPerPage]
+   *
+   * Insertion is amortized constant time (only the array containing the index of pointers
+   * need to be reallocated when items are appended).
+   */
+   class JSON_API ValueInternalArray
+   {
+      friend class Value;
+      friend class ValueIteratorBase;
+   public:
+      enum { itemsPerPage = 8 };    // should be a power of 2 for fast divide and modulo.
+      typedef Value::ArrayIndex ArrayIndex;
+      typedef unsigned int PageIndex;
+
+# ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION
+      struct IteratorState // Must be a POD
+      {
+         IteratorState()
+            : array_(0)
+            , currentPageIndex_(0)
+            , currentItemIndex_(0)
+         {
+         }
+         ValueInternalArray *array_;
+         Value **currentPageIndex_;
+         unsigned int currentItemIndex_;
+      };
+# endif // ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION
+
+      ValueInternalArray();
+      ValueInternalArray( const ValueInternalArray &other );
+      ValueInternalArray &operator =( const ValueInternalArray &other );
+      ~ValueInternalArray();
+      void swap( ValueInternalArray &other );
+
+      void clear();
+      void resize( ArrayIndex newSize );
+
+      Value &resolveReference( ArrayIndex index );
+
+      Value *find( ArrayIndex index ) const;
+
+      ArrayIndex size() const;
+
+      int compare( const ValueInternalArray &other ) const;
+
+   private:
+      static bool equals( const IteratorState &x, const IteratorState &other );
+      static void increment( IteratorState &iterator );
+      static void decrement( IteratorState &iterator );
+      static Value &dereference( const IteratorState &iterator );
+      static Value &unsafeDereference( const IteratorState &iterator );
+      static int distance( const IteratorState &x, const IteratorState &y );
+      static ArrayIndex indexOf( const IteratorState &iterator );
+      void makeBeginIterator( IteratorState &it ) const;
+      void makeEndIterator( IteratorState &it ) const;
+      void makeIterator( IteratorState &it, ArrayIndex index ) const;
+
+      void makeIndexValid( ArrayIndex index );
+
+      Value **pages_;
+      ArrayIndex size_;
+      PageIndex pageCount_;
+   };
+
+   /** \brief Experimental: do not use. Allocator to customize Value internal array.
+    * Below is an example of a simple implementation (actual implementation use
+    * memory pool).
+      \code
+class DefaultValueArrayAllocator : public ValueArrayAllocator
+{
+public: // overridden from ValueArrayAllocator
+   virtual ~DefaultValueArrayAllocator()
+   {
+   }
+
+   virtual ValueInternalArray *newArray()
+   {
+      return new ValueInternalArray();
+   }
+
+   virtual ValueInternalArray *newArrayCopy( const ValueInternalArray &other )
+   {
+      return new ValueInternalArray( other );
+   }
+
+   virtual void destruct( ValueInternalArray *array )
+   {
+      delete array;
+   }
+
+   virtual void reallocateArrayPageIndex( Value **&indexes,
+                                          ValueInternalArray::PageIndex &indexCount,
+                                          ValueInternalArray::PageIndex minNewIndexCount )
+   {
+      ValueInternalArray::PageIndex newIndexCount = (indexCount*3)/2 + 1;
+      if ( minNewIndexCount > newIndexCount )
+         newIndexCount = minNewIndexCount;
+      void *newIndexes = realloc( indexes, sizeof(Value*) * newIndexCount );
+      if ( !newIndexes )
+         throw std::bad_alloc();
+      indexCount = newIndexCount;
+      indexes = static_cast<Value **>( newIndexes );
+   }
+   virtual void releaseArrayPageIndex( Value **indexes,
+                                       ValueInternalArray::PageIndex indexCount )
+   {
+      if ( indexes )
+         free( indexes );
+   }
+
+   virtual Value *allocateArrayPage()
+   {
+      return static_cast<Value *>( malloc( sizeof(Value) * ValueInternalArray::itemsPerPage ) );
+   }
+
+   virtual void releaseArrayPage( Value *value )
+   {
+      if ( value )
+         free( value );
+   }
+};
+      \endcode
+    */
+   class JSON_API ValueArrayAllocator
+   {
+   public:
+      virtual ~ValueArrayAllocator();
+      virtual ValueInternalArray *newArray() = 0;
+      virtual ValueInternalArray *newArrayCopy( const ValueInternalArray &other ) = 0;
+      virtual void destructArray( ValueInternalArray *array ) = 0;
+      /** \brief Reallocate array page index.
+       * Reallocates an array of pointer on each page.
+       * \param indexes [input] pointer on the current index. May be \c NULL.
+       *                [output] pointer on the new index of at least
+       *                         \a minNewIndexCount pages.
+       * \param indexCount [input] current number of pages in the index.
+       *                   [output] number of page the reallocated index can handle.
+       *                            \b MUST be >= \a minNewIndexCount.
+       * \param minNewIndexCount Minimum number of page the new index must be able to
+       *                         handle.
+       */
+      virtual void reallocateArrayPageIndex( Value **&indexes,
+                                             ValueInternalArray::PageIndex &indexCount,
+                                             ValueInternalArray::PageIndex minNewIndexCount ) = 0;
+      virtual void releaseArrayPageIndex( Value **indexes,
+                                          ValueInternalArray::PageIndex indexCount ) = 0;
+      virtual Value *allocateArrayPage() = 0;
+      virtual void releaseArrayPage( Value *value ) = 0;
+   };
+#endif // #ifdef JSON_VALUE_USE_INTERNAL_MAP
+
+
+   /** \brief base class for Value iterators.
+    *
+    */
+   class ValueIteratorBase
+   {
+   public:
+      typedef unsigned int size_t;
+      typedef int difference_type;
+      typedef ValueIteratorBase SelfType;
+
+      ValueIteratorBase();
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+      explicit ValueIteratorBase( const Value::ObjectValues::iterator &current );
+#else
+      ValueIteratorBase( const ValueInternalArray::IteratorState &state );
+      ValueIteratorBase( const ValueInternalMap::IteratorState &state );
+#endif
+
+      bool operator ==( const SelfType &other ) const
+      {
+         return isEqual( other );
+      }
+
+      bool operator !=( const SelfType &other ) const
+      {
+         return !isEqual( other );
+      }
+
+      difference_type operator -( const SelfType &other ) const
+      {
+         return computeDistance( other );
+      }
+
+      /// Return either the index or the member name of the referenced value as a Value.
+      Value key() const;
+
+      /// Return the index of the referenced Value. -1 if it is not an arrayValue.
+      UInt index() const;
+
+      /// Return the member name of the referenced Value. "" if it is not an objectValue.
+      const char *memberName() const;
+
+   protected:
+      Value &deref() const;
+
+      void increment();
+
+      void decrement();
+
+      difference_type computeDistance( const SelfType &other ) const;
+
+      bool isEqual( const SelfType &other ) const;
+
+      void copy( const SelfType &other );
+
+   private:
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+      Value::ObjectValues::iterator current_;
+      // Indicates that iterator is for a null value.
+      bool isNull_;
+#else
+      union
+      {
+         ValueInternalArray::IteratorState array_;
+         ValueInternalMap::IteratorState map_;
+      } iterator_;
+      bool isArray_;
+#endif
+   };
+
+   /** \brief const iterator for object and array value.
+    *
+    */
+   class ValueConstIterator : public ValueIteratorBase
+   {
+      friend class Value;
+   public:
+      typedef unsigned int size_t;
+      typedef int difference_type;
+      typedef const Value &reference;
+      typedef const Value *pointer;
+      typedef ValueConstIterator SelfType;
+
+      ValueConstIterator();
+   private:
+      /*! \internal Use by Value to create an iterator.
+       */
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+      explicit ValueConstIterator( const Value::ObjectValues::iterator &current );
+#else
+      ValueConstIterator( const ValueInternalArray::IteratorState &state );
+      ValueConstIterator( const ValueInternalMap::IteratorState &state );
+#endif
+   public:
+      SelfType &operator =( const ValueIteratorBase &other );
+
+      SelfType operator++( int )
+      {
+         SelfType temp( *this );
+         ++*this;
+         return temp;
+      }
+
+      SelfType operator--( int )
+      {
+         SelfType temp( *this );
+         --*this;
+         return temp;
+      }
+
+      SelfType &operator--()
+      {
+         decrement();
+         return *this;
+      }
+
+      SelfType &operator++()
+      {
+         increment();
+         return *this;
+      }
+
+      reference operator *() const
+      {
+         return deref();
+      }
+   };
+
+
+   /** \brief Iterator for object and array value.
+    */
+   class ValueIterator : public ValueIteratorBase
+   {
+      friend class Value;
+   public:
+      typedef unsigned int size_t;
+      typedef int difference_type;
+      typedef Value &reference;
+      typedef Value *pointer;
+      typedef ValueIterator SelfType;
+
+      ValueIterator();
+      ValueIterator( const ValueConstIterator &other );
+      ValueIterator( const ValueIterator &other );
+   private:
+      /*! \internal Use by Value to create an iterator.
+       */
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+      explicit ValueIterator( const Value::ObjectValues::iterator &current );
+#else
+      ValueIterator( const ValueInternalArray::IteratorState &state );
+      ValueIterator( const ValueInternalMap::IteratorState &state );
+#endif
+   public:
+
+      SelfType &operator =( const SelfType &other );
+
+      SelfType operator++( int )
+      {
+         SelfType temp( *this );
+         ++*this;
+         return temp;
+      }
+
+      SelfType operator--( int )
+      {
+         SelfType temp( *this );
+         --*this;
+         return temp;
+      }
+
+      SelfType &operator--()
+      {
+         decrement();
+         return *this;
+      }
+
+      SelfType &operator++()
+      {
+         increment();
+         return *this;
+      }
+
+      reference operator *() const
+      {
+         return deref();
+      }
+   };
+
+
+} // namespace Json
+
+
+#endif // CPPTL_JSON_H_INCLUDED
diff --git a/third_party/jsoncpp/overrides/src/lib_json/json_reader.cpp b/third_party/jsoncpp/overrides/src/lib_json/json_reader.cpp
new file mode 100644
index 0000000..f8cfad7
--- /dev/null
+++ b/third_party/jsoncpp/overrides/src/lib_json/json_reader.cpp
@@ -0,0 +1,920 @@
+// Copyright 2007-2011 Baptiste Lepilleur
+// Distributed under MIT license, or public domain if desired and
+// recognized in your jurisdiction.
+// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
+
+#if !defined(JSON_IS_AMALGAMATION)
+# include <json/assertions.h>
+# include <json/reader.h>
+# include <json/value.h>
+# include "json_tool.h"
+#endif // if !defined(JSON_IS_AMALGAMATION)
+#include <utility>
+#include <cstdio>
+#include <cassert>
+#include <cstring>
+#include <stdexcept>
+#include <string>
+#include <istream>
+
+#if _MSC_VER >= 1400 // VC++ 8.0
+#pragma warning( disable : 4996 )   // disable warning about strdup being deprecated.
+#endif
+
+namespace Json {
+
+// Implementation of class Features
+// ////////////////////////////////
+
+Features::Features()
+   : allowComments_( true )
+   , strictRoot_( false )
+{
+}
+
+
+Features 
+Features::all()
+{
+   return Features();
+}
+
+
+Features 
+Features::strictMode()
+{
+   Features features;
+   features.allowComments_ = false;
+   features.strictRoot_ = true;
+   return features;
+}
+
+// Implementation of class Reader
+// ////////////////////////////////
+
+
+static inline bool 
+in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
+{
+   return c == c1  ||  c == c2  ||  c == c3  ||  c == c4;
+}
+
+static inline bool 
+in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
+{
+   return c == c1  ||  c == c2  ||  c == c3  ||  c == c4  ||  c == c5;
+}
+
+
+static bool 
+containsNewLine( Reader::Location begin, 
+                 Reader::Location end )
+{
+   for ( ;begin < end; ++begin )
+      if ( *begin == '\n'  ||  *begin == '\r' )
+         return true;
+   return false;
+}
+
+
+// Class Reader
+// //////////////////////////////////////////////////////////////////
+
+Reader::Reader()
+    : errors_(),
+      document_(),
+      begin_(),
+      end_(),
+      current_(),
+      lastValueEnd_(),
+      lastValue_(),
+      commentsBefore_(),
+      features_( Features::all() ),
+      collectComments_()
+{
+}
+
+
+Reader::Reader( const Features &features )
+    : errors_(),
+      document_(),
+      begin_(),
+      end_(),
+      current_(),
+      lastValueEnd_(),
+      lastValue_(),
+      commentsBefore_(),
+      features_( features ),
+      collectComments_()
+{
+}
+
+
+bool
+Reader::parse( const std::string &document, 
+               Value &root,
+               bool collectComments )
+{
+   document_ = document;
+   const char *begin = document_.c_str();
+   const char *end = begin + document_.length();
+   return parse( begin, end, root, collectComments );
+}
+
+
+bool
+Reader::parse( std::istream& sin,
+               Value &root,
+               bool collectComments )
+{
+   //std::istream_iterator<char> begin(sin);
+   //std::istream_iterator<char> end;
+   // Those would allow streamed input from a file, if parse() were a
+   // template function.
+
+   // Since std::string is reference-counted, this at least does not
+   // create an extra copy.
+   std::string doc;
+   std::getline(sin, doc, (char)EOF);
+   return parse( doc, root, collectComments );
+}
+
+bool 
+Reader::parse( const char *beginDoc, const char *endDoc, 
+               Value &root,
+               bool collectComments )
+{
+   if ( !features_.allowComments_ )
+   {
+      collectComments = false;
+   }
+
+   begin_ = beginDoc;
+   end_ = endDoc;
+   collectComments_ = collectComments;
+   current_ = begin_;
+   lastValueEnd_ = 0;
+   lastValue_ = 0;
+   commentsBefore_ = "";
+   errors_.clear();
+   while ( !nodes_.empty() )
+      nodes_.pop();
+   nodes_.push( &root );
+   
+   bool successful = readValue();
+   Token token;
+   skipCommentTokens( token );
+   if ( collectComments_  &&  !commentsBefore_.empty() )
+      root.setComment( commentsBefore_, commentAfter );
+   if ( features_.strictRoot_ )
+   {
+      if ( !root.isArray()  &&  !root.isObject() )
+      {
+         // Set error location to start of doc, ideally should be first token found in doc
+         token.type_ = tokenError;
+         token.start_ = beginDoc;
+         token.end_ = endDoc;
+         addError( "A valid JSON document must be either an array or an object value.",
+                   token );
+         return false;
+      }
+   }
+   return successful;
+}
+
+
+bool
+Reader::readValue()
+{
+   Token token;
+   skipCommentTokens( token );
+   bool successful = true;
+
+   if ( collectComments_  &&  !commentsBefore_.empty() )
+   {
+      currentValue().setComment( commentsBefore_, commentBefore );
+      commentsBefore_ = "";
+   }
+
+
+   switch ( token.type_ )
+   {
+   case tokenObjectBegin:
+      successful = readObject( token );
+      break;
+   case tokenArrayBegin:
+      successful = readArray( token );
+      break;
+   case tokenNumber:
+      successful = decodeNumber( token );
+      break;
+   case tokenString:
+      successful = decodeString( token );
+      break;
+   case tokenTrue:
+      currentValue() = true;
+      break;
+   case tokenFalse:
+      currentValue() = false;
+      break;
+   case tokenNull:
+      currentValue() = Value();
+      break;
+   default:
+      return addError( "Syntax error: value, object or array expected.", token );
+   }
+
+   if ( collectComments_ )
+   {
+      lastValueEnd_ = current_;
+      lastValue_ = &currentValue();
+   }
+
+   return successful;
+}
+
+
+void 
+Reader::skipCommentTokens( Token &token )
+{
+   if ( features_.allowComments_ )
+   {
+      do
+      {
+         readToken( token );
+      }
+      while ( token.type_ == tokenComment );
+   }
+   else
+   {
+      readToken( token );
+   }
+}
+
+
+bool 
+Reader::expectToken( TokenType type, Token &token, const char *message )
+{
+   readToken( token );
+   if ( token.type_ != type )
+      return addError( message, token );
+   return true;
+}
+
+
+bool 
+Reader::readToken( Token &token )
+{
+   skipSpaces();
+   token.start_ = current_;
+   Char c = getNextChar();
+   bool ok = true;
+   switch ( c )
+   {
+   case '{':
+      token.type_ = tokenObjectBegin;
+      break;
+   case '}':
+      token.type_ = tokenObjectEnd;
+      break;
+   case '[':
+      token.type_ = tokenArrayBegin;
+      break;
+   case ']':
+      token.type_ = tokenArrayEnd;
+      break;
+   case '"':
+      token.type_ = tokenString;
+      ok = readString();
+      break;
+   case '/':
+      token.type_ = tokenComment;
+      ok = readComment();
+      break;
+   case '0':
+   case '1':
+   case '2':
+   case '3':
+   case '4':
+   case '5':
+   case '6':
+   case '7':
+   case '8':
+   case '9':
+   case '-':
+      token.type_ = tokenNumber;
+      readNumber();
+      break;
+   case 't':
+      token.type_ = tokenTrue;
+      ok = match( "rue", 3 );
+      break;
+   case 'f':
+      token.type_ = tokenFalse;
+      ok = match( "alse", 4 );
+      break;
+   case 'n':
+      token.type_ = tokenNull;
+      ok = match( "ull", 3 );
+      break;
+   case ',':
+      token.type_ = tokenArraySeparator;
+      break;
+   case ':':
+      token.type_ = tokenMemberSeparator;
+      break;
+   case 0:
+      token.type_ = tokenEndOfStream;
+      break;
+   default:
+      ok = false;
+      break;
+   }
+   if ( !ok )
+      token.type_ = tokenError;
+   token.end_ = current_;
+   return true;
+}
+
+
+void 
+Reader::skipSpaces()
+{
+   while ( current_ != end_ )
+   {
+      Char c = *current_;
+      if ( c == ' '  ||  c == '\t'  ||  c == '\r'  ||  c == '\n' )
+         ++current_;
+      else
+         break;
+   }
+}
+
+
+bool 
+Reader::match( Location pattern, 
+               int patternLength )
+{
+   if ( end_ - current_ < patternLength )
+      return false;
+   int index = patternLength;
+   while ( index-- )
+      if ( current_[index] != pattern[index] )
+         return false;
+   current_ += patternLength;
+   return true;
+}
+
+
+bool
+Reader::readComment()
+{
+   Location commentBegin = current_ - 1;
+   Char c = getNextChar();
+   bool successful = false;
+   if ( c == '*' )
+      successful = readCStyleComment();
+   else if ( c == '/' )
+      successful = readCppStyleComment();
+   if ( !successful )
+      return false;
+
+   if ( collectComments_ )
+   {
+      CommentPlacement placement = commentBefore;
+      if ( lastValueEnd_  &&  !containsNewLine( lastValueEnd_, commentBegin ) )
+      {
+         if ( c != '*'  ||  !containsNewLine( commentBegin, current_ ) )
+            placement = commentAfterOnSameLine;
+      }
+
+      addComment( commentBegin, current_, placement );
+   }
+   return true;
+}
+
+
+void 
+Reader::addComment( Location begin, 
+                    Location end, 
+                    CommentPlacement placement )
+{
+   assert( collectComments_ );
+   if ( placement == commentAfterOnSameLine )
+   {
+      assert( lastValue_ != 0 );
+      lastValue_->setComment( std::string( begin, end ), placement );
+   }
+   else
+   {
+      if ( !commentsBefore_.empty() )
+         commentsBefore_ += "\n";
+      commentsBefore_ += std::string( begin, end );
+   }
+}
+
+
+bool 
+Reader::readCStyleComment()
+{
+   while ( current_ != end_ )
+   {
+      Char c = getNextChar();
+      if ( c == '*'  &&  *current_ == '/' )
+         break;
+   }
+   return getNextChar() == '/';
+}
+
+
+bool 
+Reader::readCppStyleComment()
+{
+   while ( current_ != end_ )
+   {
+      Char c = getNextChar();
+      if (  c == '\r'  ||  c == '\n' )
+         break;
+   }
+   return true;
+}
+
+
+void 
+Reader::readNumber()
+{
+   while ( current_ != end_ )
+   {
+      if ( !(*current_ >= '0'  &&  *current_ <= '9')  &&
+           !in( *current_, '.', 'e', 'E', '+', '-' ) )
+         break;
+      ++current_;
+   }
+}
+
+bool
+Reader::readString()
+{
+   Char c = 0;
+   while ( current_ != end_ )
+   {
+      c = getNextChar();
+      if ( c == '\\' )
+         getNextChar();
+      else if ( c == '"' )
+         break;
+   }
+   return c == '"';
+}
+
+
+bool 
+Reader::readObject( Token &/*tokenStart*/ )
+{
+   Token tokenName;
+   std::string name;
+   currentValue() = Value( objectValue );
+   while ( readToken( tokenName ) )
+   {
+      bool initialTokenOk = true;
+      while ( tokenName.type_ == tokenComment  &&  initialTokenOk )
+         initialTokenOk = readToken( tokenName );
+      if  ( !initialTokenOk )
+         break;
+      if ( tokenName.type_ == tokenObjectEnd  &&  name.empty() )  // empty object
+         return true;
+      if ( tokenName.type_ != tokenString )
+         break;
+      
+      name = "";
+      if ( !decodeString( tokenName, name ) )
+         return recoverFromError( tokenObjectEnd );
+
+      Token colon;
+      if ( !readToken( colon ) ||  colon.type_ != tokenMemberSeparator )
+      {
+         return addErrorAndRecover( "Missing ':' after object member name", 
+                                    colon, 
+                                    tokenObjectEnd );
+      }
+      Value &value = currentValue()[ name ];
+      nodes_.push( &value );
+      bool ok = readValue();
+      nodes_.pop();
+      if ( !ok ) // error already set
+         return recoverFromError( tokenObjectEnd );
+
+      Token comma;
+      if ( !readToken( comma )
+            ||  ( comma.type_ != tokenObjectEnd  &&  
+                  comma.type_ != tokenArraySeparator &&
+                  comma.type_ != tokenComment ) )
+      {
+         return addErrorAndRecover( "Missing ',' or '}' in object declaration", 
+                                    comma, 
+                                    tokenObjectEnd );
+      }
+      bool finalizeTokenOk = true;
+      while ( comma.type_ == tokenComment &&
+              finalizeTokenOk )
+         finalizeTokenOk = readToken( comma );
+      if ( comma.type_ == tokenObjectEnd )
+         return true;
+   }
+   return addErrorAndRecover( "Missing '}' or object member name", 
+                              tokenName, 
+                              tokenObjectEnd );
+}
+
+
+bool 
+Reader::readArray( Token &/*tokenStart*/ )
+{
+   currentValue() = Value( arrayValue );
+   skipSpaces();
+   if ( *current_ == ']' ) // empty array
+   {
+      Token endArray;
+      readToken( endArray );
+      return true;
+   }
+   int index = 0;
+   for (;;)
+   {
+      Value &value = currentValue()[ index++ ];
+      nodes_.push( &value );
+      bool ok = readValue();
+      nodes_.pop();
+      if ( !ok ) // error already set
+         return recoverFromError( tokenArrayEnd );
+
+      Token token;
+      // Accept Comment after last item in the array.
+      ok = readToken( token );
+      while ( token.type_ == tokenComment  &&  ok )
+      {
+         ok = readToken( token );
+      }
+      bool badTokenType = ( token.type_ != tokenArraySeparator  &&
+                            token.type_ != tokenArrayEnd );
+      if ( !ok  ||  badTokenType )
+      {
+         return addErrorAndRecover( "Missing ',' or ']' in array declaration", 
+                                    token, 
+                                    tokenArrayEnd );
+      }
+      if ( token.type_ == tokenArrayEnd )
+         break;
+   }
+   return true;
+}
+
+
+bool 
+Reader::decodeNumber( Token &token )
+{
+   bool isDouble = false;
+   for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
+   {
+      isDouble = isDouble  
+                 ||  in( *inspect, '.', 'e', 'E', '+' )  
+                 ||  ( *inspect == '-'  &&  inspect != token.start_ );
+   }
+   if ( isDouble )
+      return decodeDouble( token );
+   // Attempts to parse the number as an integer. If the number is
+   // larger than the maximum supported value of an integer then
+   // we decode the number as a double.
+   Location current = token.start_;
+   bool isNegative = *current == '-';
+   if ( isNegative )
+      ++current;
+   Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt) 
+                                                   : Value::maxLargestUInt;
+   Value::LargestUInt threshold = maxIntegerValue / 10;
+   Value::LargestUInt value = 0;
+   while ( current < token.end_ )
+   {
+      Char c = *current++;
+      if ( c < '0'  ||  c > '9' )
+         return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
+      Value::UInt digit(c - '0');
+      if ( value >= threshold )
+      {
+         // We've hit or exceeded the max value divided by 10 (rounded down). If
+         // a) we've only just touched the limit, b) this is the last digit, and
+         // c) it's small enough to fit in that rounding delta, we're okay.
+         // Otherwise treat this number as a double to avoid overflow.
+         if (value > threshold ||
+             current != token.end_ ||
+             digit > maxIntegerValue % 10)
+         {
+            return decodeDouble( token );
+         }
+      }
+      value = value * 10 + digit;
+   }
+   if ( isNegative )
+      currentValue() = -Value::LargestInt( value );
+   else if ( value <= Value::LargestUInt(Value::maxInt) )
+      currentValue() = Value::LargestInt( value );
+   else
+      currentValue() = value;
+   return true;
+}
+
+
+bool 
+Reader::decodeDouble( Token &token )
+{
+   double value = 0;
+   const int bufferSize = 32;
+   int count;
+   int length = int(token.end_ - token.start_);
+
+   // Sanity check to avoid buffer overflow exploits.
+   if (length < 0) {
+      return addError( "Unable to parse token length", token );
+   }
+
+   // Avoid using a string constant for the format control string given to
+   // sscanf, as this can cause hard to debug crashes on OS X. See here for more
+   // info:
+   //
+   //     http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
+   char format[] = "%lf";
+
+   if ( length <= bufferSize )
+   {
+      Char buffer[bufferSize+1];
+      memcpy( buffer, token.start_, length );
+      buffer[length] = 0;
+      count = sscanf( buffer, format, &value );
+   }
+   else
+   {
+      std::string buffer( token.start_, token.end_ );
+      count = sscanf( buffer.c_str(), format, &value );
+   }
+
+   if ( count != 1 )
+      return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
+   currentValue() = value;
+   return true;
+}
+
+
+bool 
+Reader::decodeString( Token &token )
+{
+   std::string decoded;
+   if ( !decodeString( token, decoded ) )
+      return false;
+   currentValue() = decoded;
+   return true;
+}
+
+
+bool 
+Reader::decodeString( Token &token, std::string &decoded )
+{
+   decoded.reserve( token.end_ - token.start_ - 2 );
+   Location current = token.start_ + 1; // skip '"'
+   Location end = token.end_ - 1;      // do not include '"'
+   while ( current != end )
+   {
+      Char c = *current++;
+      if ( c == '"' )
+         break;
+      else if ( c == '\\' )
+      {
+         if ( current == end )
+            return addError( "Empty escape sequence in string", token, current );
+         Char escape = *current++;
+         switch ( escape )
+         {
+         case '"': decoded += '"'; break;
+         case '/': decoded += '/'; break;
+         case '\\': decoded += '\\'; break;
+         case 'b': decoded += '\b'; break;
+         case 'f': decoded += '\f'; break;
+         case 'n': decoded += '\n'; break;
+         case 'r': decoded += '\r'; break;
+         case 't': decoded += '\t'; break;
+         case 'u':
+            {
+               unsigned int unicode;
+               if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
+                  return false;
+               decoded += codePointToUTF8(unicode);
+            }
+            break;
+         default:
+            return addError( "Bad escape sequence in string", token, current );
+         }
+      }
+      else
+      {
+         decoded += c;
+      }
+   }
+   return true;
+}
+
+bool
+Reader::decodeUnicodeCodePoint( Token &token, 
+                                     Location &current, 
+                                     Location end, 
+                                     unsigned int &unicode )
+{
+
+   if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
+      return false;
+   if (unicode >= 0xD800 && unicode <= 0xDBFF)
+   {
+      // surrogate pairs
+      if (end - current < 6)
+         return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
+      unsigned int surrogatePair;
+      if (*(current++) == '\\' && *(current++)== 'u')
+      {
+         if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
+         {
+            unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
+         } 
+         else
+            return false;
+      } 
+      else
+         return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
+   }
+   return true;
+}
+
+bool 
+Reader::decodeUnicodeEscapeSequence( Token &token, 
+                                     Location &current, 
+                                     Location end, 
+                                     unsigned int &unicode )
+{
+   if ( end - current < 4 )
+      return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
+   unicode = 0;
+   for ( int index =0; index < 4; ++index )
+   {
+      Char c = *current++;
+      unicode *= 16;
+      if ( c >= '0'  &&  c <= '9' )
+         unicode += c - '0';
+      else if ( c >= 'a'  &&  c <= 'f' )
+         unicode += c - 'a' + 10;
+      else if ( c >= 'A'  &&  c <= 'F' )
+         unicode += c - 'A' + 10;
+      else
+         return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
+   }
+   return true;
+}
+
+
+bool 
+Reader::addError( const std::string &message, 
+                  Token &token,
+                  Location extra )
+{
+   ErrorInfo info;
+   info.token_ = token;
+   info.message_ = message;
+   info.extra_ = extra;
+   errors_.push_back( info );
+   return false;
+}
+
+
+bool 
+Reader::recoverFromError( TokenType skipUntilToken )
+{
+   int errorCount = int(errors_.size());
+   Token skip;
+   for (;;)
+   {
+      if ( !readToken(skip) )
+         errors_.resize( errorCount ); // discard errors caused by recovery
+      if ( skip.type_ == skipUntilToken  ||  skip.type_ == tokenEndOfStream )
+         break;
+   }
+   errors_.resize( errorCount );
+   return false;
+}
+
+
+bool 
+Reader::addErrorAndRecover( const std::string &message, 
+                            Token &token,
+                            TokenType skipUntilToken )
+{
+   addError( message, token );
+   return recoverFromError( skipUntilToken );
+}
+
+
+Value &
+Reader::currentValue()
+{
+   return *(nodes_.top());
+}
+
+
+Reader::Char 
+Reader::getNextChar()
+{
+   if ( current_ == end_ )
+      return 0;
+   return *current_++;
+}
+
+
+void 
+Reader::getLocationLineAndColumn( Location location,
+                                  int &line,
+                                  int &column ) const
+{
+   Location current = begin_;
+   Location lastLineStart = current;
+   line = 0;
+   while ( current < location  &&  current != end_ )
+   {
+      Char c = *current++;
+      if ( c == '\r' )
+      {
+         if ( *current == '\n' )
+            ++current;
+         lastLineStart = current;
+         ++line;
+      }
+      else if ( c == '\n' )
+      {
+         lastLineStart = current;
+         ++line;
+      }
+   }
+   // column & line start at 1
+   column = int(location - lastLineStart) + 1;
+   ++line;
+}
+
+
+std::string
+Reader::getLocationLineAndColumn( Location location ) const
+{
+   int line, column;
+   getLocationLineAndColumn( location, line, column );
+   char buffer[18+16+16+1];
+   sprintf( buffer, "Line %d, Column %d", line, column );
+   return buffer;
+}
+
+
+// Deprecated. Preserved for backward compatibility
+std::string 
+Reader::getFormatedErrorMessages() const
+{
+    return getFormattedErrorMessages();
+}
+
+
+std::string 
+Reader::getFormattedErrorMessages() const
+{
+   std::string formattedMessage;
+   for ( Errors::const_iterator itError = errors_.begin();
+         itError != errors_.end();
+         ++itError )
+   {
+      const ErrorInfo &error = *itError;
+      formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
+      formattedMessage += "  " + error.message_ + "\n";
+      if ( error.extra_ )
+         formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
+   }
+   return formattedMessage;
+}
+
+
+std::istream& operator>>( std::istream &sin, Value &root )
+{
+    Json::Reader reader;
+    bool ok = reader.parse(sin, root, true);
+    if (!ok) {
+      fprintf(
+          stderr,
+          "Error from reader: %s",
+          reader.getFormattedErrorMessages().c_str());
+
+      JSON_FAIL_MESSAGE("reader error");
+    }
+    return sin;
+}
+
+
+} // namespace Json
diff --git a/third_party/jsoncpp/overrides/src/lib_json/json_value.cpp b/third_party/jsoncpp/overrides/src/lib_json/json_value.cpp
new file mode 100644
index 0000000..b66dd99
--- /dev/null
+++ b/third_party/jsoncpp/overrides/src/lib_json/json_value.cpp
@@ -0,0 +1,1930 @@
+// Copyright 2011 Baptiste Lepilleur
+// Distributed under MIT license, or public domain if desired and
+// recognized in your jurisdiction.
+// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
+
+#if !defined(JSON_IS_AMALGAMATION)
+# include <json/assertions.h>
+# include <json/value.h>
+# include <json/writer.h>
+# ifndef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR
+#  include "json_batchallocator.h"
+# endif // #ifndef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR
+#endif // if !defined(JSON_IS_AMALGAMATION)
+#include <math.h>
+#include <sstream>
+#include <utility>
+#include <stdexcept>
+#include <cstring>
+#include <cassert>
+#ifdef JSON_USE_CPPTL
+# include <cpptl/conststring.h>
+#endif
+#include <cstddef>    // size_t
+
+#define JSON_ASSERT_UNREACHABLE assert( false )
+
+namespace Json {
+
+// This is a walkaround to avoid the static initialization of Value::null.
+// kNull must be word-aligned to avoid crashing on ARM.  We use an alignment of
+// 8 (instead of 4) as a bit of future-proofing.
+#if defined(__ARMEL__)
+#define ALIGNAS(byte_alignment) __attribute__((aligned(byte_alignment)))
+#else
+#define ALIGNAS(byte_alignment)
+#endif
+static const unsigned char ALIGNAS(8) kNull[sizeof(Value)] = {0};
+const Value& Value::null = reinterpret_cast<const Value&>(kNull);
+
+const Int Value::minInt = Int( ~(UInt(-1)/2) );
+const Int Value::maxInt = Int( UInt(-1)/2 );
+const UInt Value::maxUInt = UInt(-1);
+# if defined(JSON_HAS_INT64)
+const Int64 Value::minInt64 = Int64( ~(UInt64(-1)/2) );
+const Int64 Value::maxInt64 = Int64( UInt64(-1)/2 );
+const UInt64 Value::maxUInt64 = UInt64(-1);
+// The constant is hard-coded because some compiler have trouble
+// converting Value::maxUInt64 to a double correctly (AIX/xlC).
+// Assumes that UInt64 is a 64 bits integer.
+static const double maxUInt64AsDouble = 18446744073709551615.0;
+#endif // defined(JSON_HAS_INT64)
+const LargestInt Value::minLargestInt = LargestInt( ~(LargestUInt(-1)/2) );
+const LargestInt Value::maxLargestInt = LargestInt( LargestUInt(-1)/2 );
+const LargestUInt Value::maxLargestUInt = LargestUInt(-1);
+
+
+/// Unknown size marker
+static const unsigned int unknown = (unsigned)-1;
+
+#if !defined(JSON_USE_INT64_DOUBLE_CONVERSION)
+template <typename T, typename U>
+static inline bool InRange(double d, T min, U max) {
+   return d >= min && d <= max;
+}
+#else // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION)
+static inline double integerToDouble( Json::UInt64 value )
+{
+    return static_cast<double>( Int64(value/2) ) * 2.0 + Int64(value & 1);
+}
+
+template<typename T>
+static inline double integerToDouble( T value )
+{
+    return static_cast<double>( value );
+}
+
+template <typename T, typename U>
+static inline bool InRange(double d, T min, U max) {
+   return d >= integerToDouble(min) && d <= integerToDouble(max);
+}
+#endif // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION)
+
+
+/** Duplicates the specified string value.
+ * @param value Pointer to the string to duplicate. Must be zero-terminated if
+ *              length is "unknown".
+ * @param length Length of the value. if equals to unknown, then it will be
+ *               computed using strlen(value).
+ * @return Pointer on the duplicate instance of string.
+ */
+static inline char *
+duplicateStringValue( const char *value,
+                      unsigned int length = unknown )
+{
+   if ( length == unknown )
+      length = (unsigned int)strlen(value);
+
+   // Avoid an integer overflow in the call to malloc below by limiting length
+   // to a sane value.
+   if (length >= (unsigned)Value::maxInt)
+      length = Value::maxInt - 1;
+
+   char *newString = static_cast<char *>( malloc( length + 1 ) );
+   JSON_ASSERT_MESSAGE( newString != 0, "Failed to allocate string value buffer" );
+   memcpy( newString, value, length );
+   newString[length] = 0;
+   return newString;
+}
+
+
+/** Free the string duplicated by duplicateStringValue().
+ */
+static inline void
+releaseStringValue( char *value )
+{
+   if ( value )
+      free( value );
+}
+
+} // namespace Json
+
+
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// ValueInternals...
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+#if !defined(JSON_IS_AMALGAMATION)
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+#  include "json_internalarray.inl"
+#  include "json_internalmap.inl"
+# endif // JSON_VALUE_USE_INTERNAL_MAP
+
+# include "json_valueiterator.inl"
+#endif // if !defined(JSON_IS_AMALGAMATION)
+
+namespace Json {
+
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// class Value::CommentInfo
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+
+
+Value::CommentInfo::CommentInfo()
+   : comment_( 0 )
+{
+}
+
+Value::CommentInfo::~CommentInfo()
+{
+   if ( comment_ )
+      releaseStringValue( comment_ );
+}
+
+
+void
+Value::CommentInfo::setComment( const char *text )
+{
+   if ( comment_ )
+      releaseStringValue( comment_ );
+   JSON_ASSERT( text != 0 );
+   JSON_ASSERT_MESSAGE( text[0]=='\0' || text[0]=='/', "Comments must start with /");
+   // It seems that /**/ style comments are acceptable as well.
+   comment_ = duplicateStringValue( text );
+}
+
+
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// class Value::CZString
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+# ifndef JSON_VALUE_USE_INTERNAL_MAP
+
+// Notes: index_ indicates if the string was allocated when
+// a string is stored.
+
+Value::CZString::CZString( ArrayIndex index )
+   : cstr_( 0 )
+   , index_( index )
+{
+}
+
+Value::CZString::CZString( const char *cstr, DuplicationPolicy allocate )
+   : cstr_( allocate == duplicate ? duplicateStringValue(cstr)
+                                  : cstr )
+   , index_( allocate )
+{
+}
+
+Value::CZString::CZString( const CZString &other )
+: cstr_( other.index_ != noDuplication &&  other.cstr_ != 0
+                ?  duplicateStringValue( other.cstr_ )
+                : other.cstr_ )
+   , index_( other.cstr_ ? (other.index_ == noDuplication ? noDuplication : duplicate)
+                         : other.index_ )
+{
+}
+
+Value::CZString::~CZString()
+{
+   if ( cstr_  &&  index_ == duplicate )
+      releaseStringValue( const_cast<char *>( cstr_ ) );
+}
+
+void
+Value::CZString::swap( CZString &other )
+{
+   std::swap( cstr_, other.cstr_ );
+   std::swap( index_, other.index_ );
+}
+
+Value::CZString &
+Value::CZString::operator =( const CZString &other )
+{
+   CZString temp( other );
+   swap( temp );
+   return *this;
+}
+
+bool
+Value::CZString::operator<( const CZString &other ) const
+{
+   if ( cstr_ )
+      return strcmp( cstr_, other.cstr_ ) < 0;
+   return index_ < other.index_;
+}
+
+bool
+Value::CZString::operator==( const CZString &other ) const
+{
+   if ( cstr_ )
+      return strcmp( cstr_, other.cstr_ ) == 0;
+   return index_ == other.index_;
+}
+
+
+ArrayIndex
+Value::CZString::index() const
+{
+   return index_;
+}
+
+
+const char *
+Value::CZString::c_str() const
+{
+   return cstr_;
+}
+
+bool
+Value::CZString::isStaticString() const
+{
+   return index_ == noDuplication;
+}
+
+#endif // ifndef JSON_VALUE_USE_INTERNAL_MAP
+
+
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// class Value::Value
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+// //////////////////////////////////////////////////////////////////
+
+/*! \internal Default constructor initialization must be equivalent to:
+ * memset( this, 0, sizeof(Value) )
+ * This optimization is used in ValueInternalMap fast allocator.
+ */
+Value::Value( ValueType type )
+   : type_( type )
+   , allocated_( false )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   switch ( type )
+   {
+   case nullValue:
+      break;
+   case intValue:
+   case uintValue:
+      value_.int_ = 0;
+      break;
+   case realValue:
+      value_.real_ = 0.0;
+      break;
+   case stringValue:
+      value_.string_ = 0;
+      break;
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:
+   case objectValue:
+      value_.map_ = new ObjectValues();
+      break;
+#else
+   case arrayValue:
+      value_.array_ = arrayAllocator()->newArray();
+      break;
+   case objectValue:
+      value_.map_ = mapAllocator()->newMap();
+      break;
+#endif
+   case booleanValue:
+      value_.bool_ = false;
+      break;
+   default:
+      JSON_ASSERT_UNREACHABLE;
+   }
+}
+
+
+Value::Value( UInt value )
+   : type_( uintValue )
+   , allocated_( false )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.uint_ = value;
+}
+
+Value::Value( Int value )
+   : type_( intValue )
+   , allocated_( false )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.int_ = value;
+}
+
+
+# if defined(JSON_HAS_INT64)
+Value::Value( Int64 value )
+   : type_( intValue )
+   , allocated_( false )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.int_ = value;
+}
+
+
+Value::Value( UInt64 value )
+   : type_( uintValue )
+   , allocated_( false )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.uint_ = value;
+}
+#endif // defined(JSON_HAS_INT64)
+
+Value::Value( double value )
+   : type_( realValue )
+   , allocated_( false )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.real_ = value;
+}
+
+Value::Value( const char *value )
+   : type_( stringValue )
+   , allocated_( true )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.string_ = duplicateStringValue( value );
+}
+
+
+Value::Value( const char *beginValue,
+              const char *endValue )
+   : type_( stringValue )
+   , allocated_( true )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.string_ = duplicateStringValue( beginValue,
+                                          (unsigned int)(endValue - beginValue) );
+}
+
+
+Value::Value( const std::string &value )
+   : type_( stringValue )
+   , allocated_( true )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.string_ = duplicateStringValue( value.c_str(),
+                                          (unsigned int)value.length() );
+
+}
+
+Value::Value( const StaticString &value )
+   : type_( stringValue )
+   , allocated_( false )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.string_ = const_cast<char *>( value.c_str() );
+}
+
+
+# ifdef JSON_USE_CPPTL
+Value::Value( const CppTL::ConstString &value )
+   : type_( stringValue )
+   , allocated_( true )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.string_ = duplicateStringValue( value, value.length() );
+}
+# endif
+
+Value::Value( bool value )
+   : type_( booleanValue )
+   , allocated_( false )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   value_.bool_ = value;
+}
+
+
+Value::Value( const Value &other )
+   : type_( other.type_ )
+   , allocated_( false )
+# ifdef JSON_VALUE_USE_INTERNAL_MAP
+   , itemIsUsed_( 0 )
+#endif
+   , comments_( 0 )
+{
+   switch ( type_ )
+   {
+   case nullValue:
+   case intValue:
+   case uintValue:
+   case realValue:
+   case booleanValue:
+      value_ = other.value_;
+      break;
+   case stringValue:
+      if ( other.value_.string_ )
+      {
+         value_.string_ = duplicateStringValue( other.value_.string_ );
+         allocated_ = true;
+      }
+      else
+         value_.string_ = 0;
+      break;
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:
+   case objectValue:
+      value_.map_ = new ObjectValues( *other.value_.map_ );
+      break;
+#else
+   case arrayValue:
+      value_.array_ = arrayAllocator()->newArrayCopy( *other.value_.array_ );
+      break;
+   case objectValue:
+      value_.map_ = mapAllocator()->newMapCopy( *other.value_.map_ );
+      break;
+#endif
+   default:
+      JSON_ASSERT_UNREACHABLE;
+   }
+   if ( other.comments_ )
+   {
+      comments_ = new CommentInfo[numberOfCommentPlacement];
+      for ( int comment =0; comment < numberOfCommentPlacement; ++comment )
+      {
+         const CommentInfo &otherComment = other.comments_[comment];
+         if ( otherComment.comment_ )
+            comments_[comment].setComment( otherComment.comment_ );
+      }
+   }
+}
+
+
+Value::~Value()
+{
+   switch ( type_ )
+   {
+   case nullValue:
+   case intValue:
+   case uintValue:
+   case realValue:
+   case booleanValue:
+      break;
+   case stringValue:
+      if ( allocated_ )
+         releaseStringValue( value_.string_ );
+      break;
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:
+   case objectValue:
+      delete value_.map_;
+      break;
+#else
+   case arrayValue:
+      arrayAllocator()->destructArray( value_.array_ );
+      break;
+   case objectValue:
+      mapAllocator()->destructMap( value_.map_ );
+      break;
+#endif
+   default:
+      JSON_ASSERT_UNREACHABLE;
+   }
+
+   if ( comments_ )
+      delete[] comments_;
+}
+
+Value &
+Value::operator=( const Value &other )
+{
+   Value temp( other );
+   swap( temp );
+   return *this;
+}
+
+void
+Value::swap( Value &other )
+{
+   ValueType temp = type_;
+   type_ = other.type_;
+   other.type_ = temp;
+   std::swap( value_, other.value_ );
+   int temp2 = allocated_;
+   allocated_ = other.allocated_;
+   other.allocated_ = temp2;
+}
+
+ValueType
+Value::type() const
+{
+   return type_;
+}
+
+
+int
+Value::compare( const Value &other ) const
+{
+   if ( *this < other )
+      return -1;
+   if ( *this > other )
+      return 1;
+   return 0;
+}
+
+
+bool
+Value::operator <( const Value &other ) const
+{
+   int typeDelta = type_ - other.type_;
+   if ( typeDelta )
+      return typeDelta < 0 ? true : false;
+   switch ( type_ )
+   {
+   case nullValue:
+      return false;
+   case intValue:
+      return value_.int_ < other.value_.int_;
+   case uintValue:
+      return value_.uint_ < other.value_.uint_;
+   case realValue:
+      return value_.real_ < other.value_.real_;
+   case booleanValue:
+      return value_.bool_ < other.value_.bool_;
+   case stringValue:
+      return ( value_.string_ == 0  &&  other.value_.string_ )
+             || ( other.value_.string_
+                  &&  value_.string_
+                  && strcmp( value_.string_, other.value_.string_ ) < 0 );
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:
+   case objectValue:
+      {
+         int delta = int( value_.map_->size() - other.value_.map_->size() );
+         if ( delta )
+            return delta < 0;
+         return (*value_.map_) < (*other.value_.map_);
+      }
+#else
+   case arrayValue:
+      return value_.array_->compare( *(other.value_.array_) ) < 0;
+   case objectValue:
+      return value_.map_->compare( *(other.value_.map_) ) < 0;
+#endif
+   default:
+      JSON_ASSERT_UNREACHABLE;
+   }
+   return false;  // unreachable
+}
+
+bool
+Value::operator <=( const Value &other ) const
+{
+   return !(other < *this);
+}
+
+bool
+Value::operator >=( const Value &other ) const
+{
+   return !(*this < other);
+}
+
+bool
+Value::operator >( const Value &other ) const
+{
+   return other < *this;
+}
+
+bool
+Value::operator ==( const Value &other ) const
+{
+   //if ( type_ != other.type_ )
+   // GCC 2.95.3 says:
+   // attempt to take address of bit-field structure member `Json::Value::type_'
+   // Beats me, but a temp solves the problem.
+   int temp = other.type_;
+   if ( type_ != temp )
+      return false;
+   switch ( type_ )
+   {
+   case nullValue:
+      return true;
+   case intValue:
+      return value_.int_ == other.value_.int_;
+   case uintValue:
+      return value_.uint_ == other.value_.uint_;
+   case realValue:
+      return value_.real_ == other.value_.real_;
+   case booleanValue:
+      return value_.bool_ == other.value_.bool_;
+   case stringValue:
+      return ( value_.string_ == other.value_.string_ )
+             || ( other.value_.string_
+                  &&  value_.string_
+                  && strcmp( value_.string_, other.value_.string_ ) == 0 );
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:
+   case objectValue:
+      return value_.map_->size() == other.value_.map_->size()
+             && (*value_.map_) == (*other.value_.map_);
+#else
+   case arrayValue:
+      return value_.array_->compare( *(other.value_.array_) ) == 0;
+   case objectValue:
+      return value_.map_->compare( *(other.value_.map_) ) == 0;
+#endif
+   default:
+      JSON_ASSERT_UNREACHABLE;
+   }
+   return false;  // unreachable
+}
+
+bool
+Value::operator !=( const Value &other ) const
+{
+   return !( *this == other );
+}
+
+const char *
+Value::asCString() const
+{
+   JSON_ASSERT( type_ == stringValue );
+   return value_.string_;
+}
+
+
+std::string
+Value::asString() const
+{
+   switch ( type_ )
+   {
+   case nullValue:
+      return "";
+   case stringValue:
+      return value_.string_ ? value_.string_ : "";
+   case booleanValue:
+      return value_.bool_ ? "true" : "false";
+   case intValue:
+      return valueToString( value_.int_ );
+   case uintValue:
+      return valueToString( value_.uint_ );
+   case realValue:
+      return valueToString( value_.real_ );
+   default:
+      JSON_FAIL_MESSAGE( "Type is not convertible to string" );
+   }
+}
+
+# ifdef JSON_USE_CPPTL
+CppTL::ConstString
+Value::asConstString() const
+{
+   return CppTL::ConstString( asString().c_str() );
+}
+# endif
+
+
+Value::Int
+Value::asInt() const
+{
+   switch ( type_ )
+   {
+   case intValue:
+      JSON_ASSERT_MESSAGE(isInt(), "LargestInt out of Int range");
+      return Int(value_.int_);
+   case uintValue:
+      JSON_ASSERT_MESSAGE(isInt(), "LargestUInt out of Int range");
+      return Int(value_.uint_);
+   case realValue:
+      JSON_ASSERT_MESSAGE(InRange(value_.real_, minInt, maxInt), "double out of Int range");
+      return Int(value_.real_);
+   case nullValue:
+      return 0;
+   case booleanValue:
+      return value_.bool_ ? 1 : 0;
+   default:
+      break;
+   }
+   JSON_FAIL_MESSAGE("Value is not convertible to Int.");
+}
+
+
+Value::UInt
+Value::asUInt() const
+{
+   switch ( type_ )
+   {
+   case intValue:
+      JSON_ASSERT_MESSAGE(isUInt(), "LargestInt out of UInt range");
+      return UInt(value_.int_);
+   case uintValue:
+      JSON_ASSERT_MESSAGE(isUInt(), "LargestUInt out of UInt range");
+      return UInt(value_.uint_);
+   case realValue:
+      JSON_ASSERT_MESSAGE(InRange(value_.real_, 0, maxUInt), "double out of UInt range");
+      return UInt( value_.real_ );
+   case nullValue:
+      return 0;
+   case booleanValue:
+      return value_.bool_ ? 1 : 0;
+   default:
+      break;
+   }
+   JSON_FAIL_MESSAGE("Value is not convertible to UInt.");
+}
+
+
+# if defined(JSON_HAS_INT64)
+
+Value::Int64
+Value::asInt64() const
+{
+   switch ( type_ )
+   {
+   case intValue:
+      return Int64(value_.int_);
+   case uintValue:
+      JSON_ASSERT_MESSAGE(isInt64(), "LargestUInt out of Int64 range");
+      return Int64(value_.uint_);
+   case realValue:
+      JSON_ASSERT_MESSAGE(InRange(value_.real_, minInt64, maxInt64), "double out of Int64 range");
+      return Int64(value_.real_);
+   case nullValue:
+      return 0;
+   case booleanValue:
+      return value_.bool_ ? 1 : 0;
+   default:
+      break;
+   }
+   JSON_FAIL_MESSAGE("Value is not convertible to Int64.");
+}
+
+
+Value::UInt64
+Value::asUInt64() const
+{
+   switch ( type_ )
+   {
+   case intValue:
+      JSON_ASSERT_MESSAGE(isUInt64(), "LargestInt out of UInt64 range");
+      return UInt64(value_.int_);
+   case uintValue:
+      return UInt64(value_.uint_);
+   case realValue:
+      JSON_ASSERT_MESSAGE(InRange(value_.real_, 0, maxUInt64), "double out of UInt64 range");
+      return UInt64( value_.real_ );
+   case nullValue:
+      return 0;
+   case booleanValue:
+      return value_.bool_ ? 1 : 0;
+   default:
+      break;
+   }
+   JSON_FAIL_MESSAGE("Value is not convertible to UInt64.");
+}
+# endif // if defined(JSON_HAS_INT64)
+
+
+LargestInt
+Value::asLargestInt() const
+{
+#if defined(JSON_NO_INT64)
+    return asInt();
+#else
+    return asInt64();
+#endif
+}
+
+
+LargestUInt
+Value::asLargestUInt() const
+{
+#if defined(JSON_NO_INT64)
+    return asUInt();
+#else
+    return asUInt64();
+#endif
+}
+
+
+double
+Value::asDouble() const
+{
+   switch ( type_ )
+   {
+   case intValue:
+      return static_cast<double>( value_.int_ );
+   case uintValue:
+#if !defined(JSON_USE_INT64_DOUBLE_CONVERSION)
+      return static_cast<double>( value_.uint_ );
+#else // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION)
+      return integerToDouble( value_.uint_ );
+#endif // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION)
+   case realValue:
+      return value_.real_;
+   case nullValue:
+      return 0.0;
+   case booleanValue:
+      return value_.bool_ ? 1.0 : 0.0;
+   default:
+      break;
+   }
+   JSON_FAIL_MESSAGE("Value is not convertible to double.");
+}
+
+float
+Value::asFloat() const
+{
+   switch ( type_ )
+   {
+   case intValue:
+      return static_cast<float>( value_.int_ );
+   case uintValue:
+#if !defined(JSON_USE_INT64_DOUBLE_CONVERSION)
+      return static_cast<float>( value_.uint_ );
+#else // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION)
+      return integerToDouble( value_.uint_ );
+#endif // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION)
+   case realValue:
+      return static_cast<float>( value_.real_ );
+   case nullValue:
+      return 0.0;
+   case booleanValue:
+      return value_.bool_ ? 1.0f : 0.0f;
+   default:
+      break;
+   }
+   JSON_FAIL_MESSAGE("Value is not convertible to float.");
+}
+
+bool
+Value::asBool() const
+{
+   switch ( type_ )
+   {
+   case booleanValue:
+      return value_.bool_;
+   case nullValue:
+      return false;
+   case intValue:
+      return value_.int_ ? true : false;
+   case uintValue:
+      return value_.uint_ ? true : false;
+   case realValue:
+      return value_.real_ ? true : false;
+   default:
+      break;
+   }
+   JSON_FAIL_MESSAGE("Value is not convertible to bool.");
+}
+
+
+bool
+Value::isConvertibleTo( ValueType other ) const
+{
+   switch ( other )
+   {
+   case nullValue:
+      return ( isNumeric() && asDouble() == 0.0 )
+             || ( type_ == booleanValue && value_.bool_ == false )
+             || ( type_ == stringValue && asString() == "" )
+             || ( type_ == arrayValue && value_.map_->size() == 0 )
+             || ( type_ == objectValue && value_.map_->size() == 0 )
+             || type_ == nullValue;
+   case intValue:
+      return isInt()
+             || (type_ == realValue && InRange(value_.real_, minInt, maxInt))
+             || type_ == booleanValue
+             || type_ == nullValue;
+   case uintValue:
+      return isUInt()
+             || (type_ == realValue && InRange(value_.real_, 0, maxUInt))
+             || type_ == booleanValue
+             || type_ == nullValue;
+   case realValue:
+      return isNumeric()
+             || type_ == booleanValue
+             || type_ == nullValue;
+   case booleanValue:
+      return isNumeric()
+             || type_ == booleanValue
+             || type_ == nullValue;
+   case stringValue:
+      return isNumeric()
+             || type_ == booleanValue
+             || type_ == stringValue
+             || type_ == nullValue;
+   case arrayValue:
+      return type_ == arrayValue
+             || type_ == nullValue;
+   case objectValue:
+      return type_ == objectValue
+             || type_ == nullValue;
+   }
+   JSON_ASSERT_UNREACHABLE;
+   return false;
+}
+
+
+/// Number of values in array or object
+ArrayIndex
+Value::size() const
+{
+   switch ( type_ )
+   {
+   case nullValue:
+   case intValue:
+   case uintValue:
+   case realValue:
+   case booleanValue:
+   case stringValue:
+      return 0;
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:  // size of the array is highest index + 1
+      if ( !value_.map_->empty() )
+      {
+         ObjectValues::const_iterator itLast = value_.map_->end();
+         --itLast;
+         return (*itLast).first.index()+1;
+      }
+      return 0;
+   case objectValue:
+      return ArrayIndex( value_.map_->size() );
+#else
+   case arrayValue:
+      return Int( value_.array_->size() );
+   case objectValue:
+      return Int( value_.map_->size() );
+#endif
+   }
+   JSON_ASSERT_UNREACHABLE;
+   return 0; // unreachable;
+}
+
+
+bool
+Value::empty() const
+{
+   if ( isNull() || isArray() || isObject() )
+      return size() == 0u;
+   else
+      return false;
+}
+
+
+bool
+Value::operator!() const
+{
+   return isNull();
+}
+
+
+void
+Value::clear()
+{
+   JSON_ASSERT( type_ == nullValue  ||  type_ == arrayValue  || type_ == objectValue );
+
+   switch ( type_ )
+   {
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:
+   case objectValue:
+      value_.map_->clear();
+      break;
+#else
+   case arrayValue:
+      value_.array_->clear();
+      break;
+   case objectValue:
+      value_.map_->clear();
+      break;
+#endif
+   default:
+      break;
+   }
+}
+
+void
+Value::resize( ArrayIndex newSize )
+{
+   JSON_ASSERT( type_ == nullValue  ||  type_ == arrayValue );
+   if ( type_ == nullValue )
+      *this = Value( arrayValue );
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   ArrayIndex oldSize = size();
+   if ( newSize == 0 )
+      clear();
+   else if ( newSize > oldSize )
+      (*this)[ newSize - 1 ];
+   else
+   {
+      for ( ArrayIndex index = newSize; index < oldSize; ++index )
+      {
+         value_.map_->erase( index );
+      }
+      assert( size() == newSize );
+   }
+#else
+   value_.array_->resize( newSize );
+#endif
+}
+
+
+Value &
+Value::operator[]( ArrayIndex index )
+{
+   JSON_ASSERT( type_ == nullValue  ||  type_ == arrayValue );
+   if ( type_ == nullValue )
+      *this = Value( arrayValue );
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   CZString key( index );
+   ObjectValues::iterator it = value_.map_->lower_bound( key );
+   if ( it != value_.map_->end()  &&  (*it).first == key )
+      return (*it).second;
+
+   ObjectValues::value_type defaultValue( key, null );
+   it = value_.map_->insert( it, defaultValue );
+   return (*it).second;
+#else
+   return value_.array_->resolveReference( index );
+#endif
+}
+
+
+Value &
+Value::operator[]( int index )
+{
+   JSON_ASSERT( index >= 0 );
+   return (*this)[ ArrayIndex(index) ];
+}
+
+
+const Value &
+Value::operator[]( ArrayIndex index ) const
+{
+   JSON_ASSERT( type_ == nullValue  ||  type_ == arrayValue );
+   if ( type_ == nullValue )
+      return null;
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   CZString key( index );
+   ObjectValues::const_iterator it = value_.map_->find( key );
+   if ( it == value_.map_->end() )
+      return null;
+   return (*it).second;
+#else
+   Value *value = value_.array_->find( index );
+   return value ? *value : null;
+#endif
+}
+
+
+const Value &
+Value::operator[]( int index ) const
+{
+   JSON_ASSERT( index >= 0 );
+   return (*this)[ ArrayIndex(index) ];
+}
+
+
+Value &
+Value::operator[]( const char *key )
+{
+   return resolveReference( key, false );
+}
+
+
+Value &
+Value::resolveReference( const char *key,
+                         bool isStatic )
+{
+   JSON_ASSERT( type_ == nullValue  ||  type_ == objectValue );
+   if ( type_ == nullValue )
+      *this = Value( objectValue );
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   CZString actualKey( key, isStatic ? CZString::noDuplication
+                                     : CZString::duplicateOnCopy );
+   ObjectValues::iterator it = value_.map_->lower_bound( actualKey );
+   if ( it != value_.map_->end()  &&  (*it).first == actualKey )
+      return (*it).second;
+
+   ObjectValues::value_type defaultValue( actualKey, null );
+   it = value_.map_->insert( it, defaultValue );
+   Value &value = (*it).second;
+   return value;
+#else
+   return value_.map_->resolveReference( key, isStatic );
+#endif
+}
+
+
+Value
+Value::get( ArrayIndex index,
+            const Value &defaultValue ) const
+{
+   const Value *value = &((*this)[index]);
+   return value == &null ? defaultValue : *value;
+}
+
+
+bool
+Value::isValidIndex( ArrayIndex index ) const
+{
+   return index < size();
+}
+
+
+
+const Value &
+Value::operator[]( const char *key ) const
+{
+   JSON_ASSERT( type_ == nullValue  ||  type_ == objectValue );
+   if ( type_ == nullValue )
+      return null;
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   CZString actualKey( key, CZString::noDuplication );
+   ObjectValues::const_iterator it = value_.map_->find( actualKey );
+   if ( it == value_.map_->end() )
+      return null;
+   return (*it).second;
+#else
+   const Value *value = value_.map_->find( key );
+   return value ? *value : null;
+#endif
+}
+
+
+Value &
+Value::operator[]( const std::string &key )
+{
+   return (*this)[ key.c_str() ];
+}
+
+
+const Value &
+Value::operator[]( const std::string &key ) const
+{
+   return (*this)[ key.c_str() ];
+}
+
+Value &
+Value::operator[]( const StaticString &key )
+{
+   return resolveReference( key, true );
+}
+
+
+# ifdef JSON_USE_CPPTL
+Value &
+Value::operator[]( const CppTL::ConstString &key )
+{
+   return (*this)[ key.c_str() ];
+}
+
+
+const Value &
+Value::operator[]( const CppTL::ConstString &key ) const
+{
+   return (*this)[ key.c_str() ];
+}
+# endif
+
+
+Value &
+Value::append( const Value &value )
+{
+   return (*this)[size()] = value;
+}
+
+
+Value
+Value::get( const char *key,
+            const Value &defaultValue ) const
+{
+   const Value *value = &((*this)[key]);
+   return value == &null ? defaultValue : *value;
+}
+
+
+Value
+Value::get( const std::string &key,
+            const Value &defaultValue ) const
+{
+   return get( key.c_str(), defaultValue );
+}
+
+Value
+Value::removeMember( const char* key )
+{
+   JSON_ASSERT( type_ == nullValue  ||  type_ == objectValue );
+   if ( type_ == nullValue )
+      return null;
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   CZString actualKey( key, CZString::noDuplication );
+   ObjectValues::iterator it = value_.map_->find( actualKey );
+   if ( it == value_.map_->end() )
+      return null;
+   Value old(it->second);
+   value_.map_->erase(it);
+   return old;
+#else
+   Value *value = value_.map_->find( key );
+   if (value){
+      Value old(*value);
+      value_.map_.remove( key );
+      return old;
+   } else {
+      return null;
+   }
+#endif
+}
+
+Value
+Value::removeMember( const std::string &key )
+{
+   return removeMember( key.c_str() );
+}
+
+# ifdef JSON_USE_CPPTL
+Value
+Value::get( const CppTL::ConstString &key,
+            const Value &defaultValue ) const
+{
+   return get( key.c_str(), defaultValue );
+}
+# endif
+
+bool
+Value::isMember( const char *key ) const
+{
+   const Value *value = &((*this)[key]);
+   return value != &null;
+}
+
+
+bool
+Value::isMember( const std::string &key ) const
+{
+   return isMember( key.c_str() );
+}
+
+
+# ifdef JSON_USE_CPPTL
+bool
+Value::isMember( const CppTL::ConstString &key ) const
+{
+   return isMember( key.c_str() );
+}
+#endif
+
+Value::Members
+Value::getMemberNames() const
+{
+   JSON_ASSERT( type_ == nullValue  ||  type_ == objectValue );
+   if ( type_ == nullValue )
+       return Value::Members();
+   Members members;
+   members.reserve( value_.map_->size() );
+#ifndef JSON_VALUE_USE_INTERNAL_MAP
+   ObjectValues::const_iterator it = value_.map_->begin();
+   ObjectValues::const_iterator itEnd = value_.map_->end();
+   for ( ; it != itEnd; ++it )
+      members.push_back( std::string( (*it).first.c_str() ) );
+#else
+   ValueInternalMap::IteratorState it;
+   ValueInternalMap::IteratorState itEnd;
+   value_.map_->makeBeginIterator( it );
+   value_.map_->makeEndIterator( itEnd );
+   for ( ; !ValueInternalMap::equals( it, itEnd ); ValueInternalMap::increment(it) )
+      members.push_back( std::string( ValueInternalMap::key( it ) ) );
+#endif
+   return members;
+}
+//
+//# ifdef JSON_USE_CPPTL
+//EnumMemberNames
+//Value::enumMemberNames() const
+//{
+//   if ( type_ == objectValue )
+//   {
+//      return CppTL::Enum::any(  CppTL::Enum::transform(
+//         CppTL::Enum::keys( *(value_.map_), CppTL::Type<const CZString &>() ),
+//         MemberNamesTransform() ) );
+//   }
+//   return EnumMemberNames();
+//}
+//
+//
+//EnumValues
+//Value::enumValues() const
+//{
+//   if ( type_ == objectValue  ||  type_ == arrayValue )
+//      return CppTL::Enum::anyValues( *(value_.map_),
+//                                     CppTL::Type<const Value &>() );
+//   return EnumValues();
+//}
+//
+//# endif
+
+static bool IsIntegral(double d) {
+  double integral_part;
+  return modf(d, &integral_part) == 0.0;
+}
+
+
+bool
+Value::isNull() const
+{
+   return type_ == nullValue;
+}
+
+
+bool
+Value::isBool() const
+{
+   return type_ == booleanValue;
+}
+
+
+bool
+Value::isInt() const
+{
+   switch ( type_ )
+   {
+   case intValue:
+      return value_.int_ >= minInt  &&  value_.int_ <= maxInt;
+   case uintValue:
+      return value_.uint_ <= UInt(maxInt);
+   case realValue:
+      return value_.real_ >= minInt &&
+             value_.real_ <= maxInt &&
+             IsIntegral(value_.real_);
+   default:
+      break;
+   }
+   return false;
+}
+
+
+bool
+Value::isUInt() const
+{
+   switch ( type_ )
+   {
+   case intValue:
+      return value_.int_ >= 0 && LargestUInt(value_.int_) <= LargestUInt(maxUInt);
+   case uintValue:
+      return value_.uint_ <= maxUInt;
+   case realValue:
+      return value_.real_ >= 0 &&
+             value_.real_ <= maxUInt &&
+             IsIntegral(value_.real_);
+   default:
+      break;
+   }
+   return false;
+}
+
+bool
+Value::isInt64() const
+{
+# if defined(JSON_HAS_INT64)
+   switch ( type_ )
+   {
+   case intValue:
+     return true;
+   case uintValue:
+      return value_.uint_ <= UInt64(maxInt64);
+   case realValue:
+      // Note that maxInt64 (= 2^63 - 1) is not exactly representable as a
+      // double, so double(maxInt64) will be rounded up to 2^63. Therefore we
+      // require the value to be strictly less than the limit.
+      return value_.real_ >= double(minInt64) &&
+             value_.real_ < double(maxInt64) &&
+             IsIntegral(value_.real_);
+   default:
+      break;
+   }
+# endif  // JSON_HAS_INT64
+   return false;
+}
+
+bool
+Value::isUInt64() const
+{
+# if defined(JSON_HAS_INT64)
+   switch ( type_ )
+   {
+   case intValue:
+     return value_.int_ >= 0;
+   case uintValue:
+      return true;
+   case realValue:
+      // Note that maxUInt64 (= 2^64 - 1) is not exactly representable as a
+      // double, so double(maxUInt64) will be rounded up to 2^64. Therefore we
+      // require the value to be strictly less than the limit.
+      return value_.real_ >= 0 &&
+             value_.real_ < maxUInt64AsDouble &&
+             IsIntegral(value_.real_);
+   default:
+      break;
+   }
+# endif  // JSON_HAS_INT64
+   return false;
+}
+
+
+bool
+Value::isIntegral() const
+{
+#if defined(JSON_HAS_INT64)
+  return isInt64() || isUInt64();
+#else
+  return isInt() || isUInt();
+#endif
+}
+
+
+bool
+Value::isDouble() const
+{
+   return type_ == realValue || isIntegral();
+}
+
+
+bool
+Value::isNumeric() const
+{
+   return isIntegral() || isDouble();
+}
+
+
+bool
+Value::isString() const
+{
+   return type_ == stringValue;
+}
+
+
+bool
+Value::isArray() const
+{
+   return type_ == arrayValue;
+}
+
+
+bool
+Value::isObject() const
+{
+   return type_ == objectValue;
+}
+
+
+void
+Value::setComment( const char *comment,
+                   CommentPlacement placement )
+{
+   if ( !comments_ )
+      comments_ = new CommentInfo[numberOfCommentPlacement];
+   comments_[placement].setComment( comment );
+}
+
+
+void
+Value::setComment( const std::string &comment,
+                   CommentPlacement placement )
+{
+   setComment( comment.c_str(), placement );
+}
+
+
+bool
+Value::hasComment( CommentPlacement placement ) const
+{
+   return comments_ != 0  &&  comments_[placement].comment_ != 0;
+}
+
+std::string
+Value::getComment( CommentPlacement placement ) const
+{
+   if ( hasComment(placement) )
+      return comments_[placement].comment_;
+   return "";
+}
+
+
+std::string
+Value::toStyledString() const
+{
+   StyledWriter writer;
+   return writer.write( *this );
+}
+
+
+Value::const_iterator
+Value::begin() const
+{
+   switch ( type_ )
+   {
+#ifdef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:
+      if ( value_.array_ )
+      {
+         ValueInternalArray::IteratorState it;
+         value_.array_->makeBeginIterator( it );
+         return const_iterator( it );
+      }
+      break;
+   case objectValue:
+      if ( value_.map_ )
+      {
+         ValueInternalMap::IteratorState it;
+         value_.map_->makeBeginIterator( it );
+         return const_iterator( it );
+      }
+      break;
+#else
+   case arrayValue:
+   case objectValue:
+      if ( value_.map_ )
+         return const_iterator( value_.map_->begin() );
+      break;
+#endif
+   default:
+      break;
+   }
+   return const_iterator();
+}
+
+Value::const_iterator
+Value::end() const
+{
+   switch ( type_ )
+   {
+#ifdef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:
+      if ( value_.array_ )
+      {
+         ValueInternalArray::IteratorState it;
+         value_.array_->makeEndIterator( it );
+         return const_iterator( it );
+      }
+      break;
+   case objectValue:
+      if ( value_.map_ )
+      {
+         ValueInternalMap::IteratorState it;
+         value_.map_->makeEndIterator( it );
+         return const_iterator( it );
+      }
+      break;
+#else
+   case arrayValue:
+   case objectValue:
+      if ( value_.map_ )
+         return const_iterator( value_.map_->end() );
+      break;
+#endif
+   default:
+      break;
+   }
+   return const_iterator();
+}
+
+
+Value::iterator
+Value::begin()
+{
+   switch ( type_ )
+   {
+#ifdef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:
+      if ( value_.array_ )
+      {
+         ValueInternalArray::IteratorState it;
+         value_.array_->makeBeginIterator( it );
+         return iterator( it );
+      }
+      break;
+   case objectValue:
+      if ( value_.map_ )
+      {
+         ValueInternalMap::IteratorState it;
+         value_.map_->makeBeginIterator( it );
+         return iterator( it );
+      }
+      break;
+#else
+   case arrayValue:
+   case objectValue:
+      if ( value_.map_ )
+         return iterator( value_.map_->begin() );
+      break;
+#endif
+   default:
+      break;
+   }
+   return iterator();
+}
+
+Value::iterator
+Value::end()
+{
+   switch ( type_ )
+   {
+#ifdef JSON_VALUE_USE_INTERNAL_MAP
+   case arrayValue:
+      if ( value_.array_ )
+      {
+         ValueInternalArray::IteratorState it;
+         value_.array_->makeEndIterator( it );
+         return iterator( it );
+      }
+      break;
+   case objectValue:
+      if ( value_.map_ )
+      {
+         ValueInternalMap::IteratorState it;
+         value_.map_->makeEndIterator( it );
+         return iterator( it );
+      }
+      break;
+#else
+   case arrayValue:
+   case objectValue:
+      if ( value_.map_ )
+         return iterator( value_.map_->end() );
+      break;
+#endif
+   default:
+      break;
+   }
+   return iterator();
+}
+
+
+// class PathArgument
+// //////////////////////////////////////////////////////////////////
+
+PathArgument::PathArgument()
+   : key_()
+   , index_()
+   , kind_( kindNone )
+{
+}
+
+
+PathArgument::PathArgument( ArrayIndex index )
+   : key_()
+   , index_( index )
+   , kind_( kindIndex )
+{
+}
+
+
+PathArgument::PathArgument( const char *key )
+   : key_( key )
+   , index_()
+   , kind_( kindKey )
+{
+}
+
+
+PathArgument::PathArgument( const std::string &key )
+   : key_( key.c_str() )
+   , index_()
+   , kind_( kindKey )
+{
+}
+
+// class Path
+// //////////////////////////////////////////////////////////////////
+
+Path::Path( const std::string &path,
+            const PathArgument &a1,
+            const PathArgument &a2,
+            const PathArgument &a3,
+            const PathArgument &a4,
+            const PathArgument &a5 )
+{
+   InArgs in;
+   in.push_back( &a1 );
+   in.push_back( &a2 );
+   in.push_back( &a3 );
+   in.push_back( &a4 );
+   in.push_back( &a5 );
+   makePath( path, in );
+}
+
+
+void
+Path::makePath( const std::string &path,
+                const InArgs &in )
+{
+   const char *current = path.c_str();
+   const char *end = current + path.length();
+   InArgs::const_iterator itInArg = in.begin();
+   while ( current != end )
+   {
+      if ( *current == '[' )
+      {
+         ++current;
+         if ( *current == '%' )
+            addPathInArg( path, in, itInArg, PathArgument::kindIndex );
+         else
+         {
+            ArrayIndex index = 0;
+            for ( ; current != end && *current >= '0'  &&  *current <= '9'; ++current )
+               index = index * 10 + ArrayIndex(*current - '0');
+            args_.push_back( index );
+         }
+         if ( current == end  ||  *current++ != ']' )
+            invalidPath( path, int(current - path.c_str()) );
+      }
+      else if ( *current == '%' )
+      {
+         addPathInArg( path, in, itInArg, PathArgument::kindKey );
+         ++current;
+      }
+      else if ( *current == '.' )
+      {
+         ++current;
+      }
+      else
+      {
+         const char *beginName = current;
+         while ( current != end  &&  !strchr( "[.", *current ) )
+            ++current;
+         args_.push_back( std::string( beginName, current ) );
+      }
+   }
+}
+
+
+void
+Path::addPathInArg( const std::string &/*path*/,
+                    const InArgs &in,
+                    InArgs::const_iterator &itInArg,
+                    PathArgument::Kind kind )
+{
+   if ( itInArg == in.end() )
+   {
+      // Error: missing argument %d
+   }
+   else if ( (*itInArg)->kind_ != kind )
+   {
+      // Error: bad argument type
+   }
+   else
+   {
+      args_.push_back( **itInArg );
+   }
+}
+
+
+void
+Path::invalidPath( const std::string &/*path*/,
+                   int /*location*/ )
+{
+   // Error: invalid path.
+}
+
+
+const Value &
+Path::resolve( const Value &root ) const
+{
+   const Value *node = &root;
+   for ( Args::const_iterator it = args_.begin(); it != args_.end(); ++it )
+   {
+      const PathArgument &arg = *it;
+      if ( arg.kind_ == PathArgument::kindIndex )
+      {
+         if ( !node->isArray()  ||  !node->isValidIndex( arg.index_ ) )
+         {
+            // Error: unable to resolve path (array value expected at position...
+         }
+         node = &((*node)[arg.index_]);
+      }
+      else if ( arg.kind_ == PathArgument::kindKey )
+      {
+         if ( !node->isObject() )
+         {
+            // Error: unable to resolve path (object value expected at position...)
+         }
+         node = &((*node)[arg.key_]);
+         if ( node == &Value::null )
+         {
+            // Error: unable to resolve path (object has no member named '' at position...)
+         }
+      }
+   }
+   return *node;
+}
+
+
+Value
+Path::resolve( const Value &root,
+               const Value &defaultValue ) const
+{
+   const Value *node = &root;
+   for ( Args::const_iterator it = args_.begin(); it != args_.end(); ++it )
+   {
+      const PathArgument &arg = *it;
+      if ( arg.kind_ == PathArgument::kindIndex )
+      {
+         if ( !node->isArray()  ||  !node->isValidIndex( arg.index_ ) )
+            return defaultValue;
+         node = &((*node)[arg.index_]);
+      }
+      else if ( arg.kind_ == PathArgument::kindKey )
+      {
+         if ( !node->isObject() )
+            return defaultValue;
+         node = &((*node)[arg.key_]);
+         if ( node == &Value::null )
+            return defaultValue;
+      }
+   }
+   return *node;
+}
+
+
+Value &
+Path::make( Value &root ) const
+{
+   Value *node = &root;
+   for ( Args::const_iterator it = args_.begin(); it != args_.end(); ++it )
+   {
+      const PathArgument &arg = *it;
+      if ( arg.kind_ == PathArgument::kindIndex )
+      {
+         if ( !node->isArray() )
+         {
+            // Error: node is not an array at position ...
+         }
+         node = &((*node)[arg.index_]);
+      }
+      else if ( arg.kind_ == PathArgument::kindKey )
+      {
+         if ( !node->isObject() )
+         {
+            // Error: node is not an object at position...
+         }
+         node = &((*node)[arg.key_]);
+      }
+   }
+   return *node;
+}
+
+
+} // namespace Json
diff --git a/third_party/jsoncpp/patches/value.h.diff b/third_party/jsoncpp/patches/value.h.diff
new file mode 100644
index 0000000..e433deb
--- /dev/null
+++ b/third_party/jsoncpp/patches/value.h.diff
@@ -0,0 +1,37 @@
+diff --git "a/source\\include\\json\\value.h" "b/overrides\\include\\json\\value.h"
+index b013c9b..5707260 100644
+--- "a/source\\include\\json\\value.h"
++++ "b/overrides\\include\\json\\value.h"
+@@ -7,7 +7,7 @@
+ # define CPPTL_JSON_H_INCLUDED
+ 
+ #if !defined(JSON_IS_AMALGAMATION)
+-# include "forwards.h"
++# include <json/forwards.h>
+ #endif // if !defined(JSON_IS_AMALGAMATION)
+ # include <string>
+ # include <vector>
+@@ -136,7 +136,7 @@ namespace Json {
+       typedef Json::LargestUInt LargestUInt;
+       typedef Json::ArrayIndex ArrayIndex;
+ 
+-      static const Value null;
++      static const Value& null;
+       /// Minimum signed integer value that can be stored in a Json::Value.
+       static const LargestInt minLargestInt;
+       /// Maximum signed integer value that can be stored in a Json::Value.
+@@ -496,10 +496,12 @@ namespace Json {
+ # endif
+       } value_;
+       ValueType type_ : 8;
+-      int allocated_ : 1;     // Notes: if declared as bool, bitfield is useless.
++      // One-bit bitfields must be unsigned to allow storing 1.
++      // They must be 32-bits to share storage with ValueHolder.
++      unsigned int allocated_ : 1;
+ # ifdef JSON_VALUE_USE_INTERNAL_MAP
+       unsigned int itemIsUsed_ : 1;      // used by the ValueInternalMap container.
+-      int memberNameIsStatic_ : 1;       // used by the ValueInternalMap container.
++      unsigned int memberNameIsStatic_ : 1; // used by the ValueInternalMap container.
+ # endif
+       CommentInfo *comments_;
+    };
diff --git a/third_party/libFuzzer/BUILD.gn b/third_party/libFuzzer/BUILD.gn
new file mode 100644
index 0000000..dacd779
--- /dev/null
+++ b/third_party/libFuzzer/BUILD.gn
@@ -0,0 +1,44 @@
+# Copied from chromium's third_party/libFuzzer.
+#
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+source_set("libfuzzer") {
+  # libfuzzer should be compiled without coverage (infinite loop in trace_cmp).
+  configs -= [ "//build/config/sanitizers:default_sanitizer_flags" ]
+  configs +=
+      [ "//build/config/sanitizers:default_sanitizer_flags_but_coverage" ]
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [ "//build/config/compiler:no_goma_code" ]
+
+  sources = [
+    "src/FuzzerCrossOver.cpp",
+    "src/FuzzerDriver.cpp",
+    "src/FuzzerExtFunctionsDlsym.cpp",
+    "src/FuzzerExtFunctionsWeak.cpp",
+    "src/FuzzerIO.cpp",
+    "src/FuzzerLoop.cpp",
+    "src/FuzzerMain.cpp",
+    "src/FuzzerMutate.cpp",
+    "src/FuzzerSHA1.cpp",
+    "src/FuzzerTracePC.cpp",
+    "src/FuzzerTraceState.cpp",
+    "src/FuzzerUtil.cpp",
+    "src/FuzzerUtilDarwin.cpp",
+    "src/FuzzerUtilLinux.cpp",
+  ]
+}
+
+source_set("afl_driver") {
+  # AFL should be compiled without coverage (infinite loop in trace_cmp).
+  configs -= [ "//build/config/sanitizers:default_sanitizer_flags" ]
+  configs +=
+      [ "//build/config/sanitizers:default_sanitizer_flags_but_coverage" ]
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [ "//build/config/compiler:no_goma_code" ]
+
+  sources = [
+    "src/afl/afl_driver.cpp",
+  ]
+}
diff --git a/third_party/libFuzzer/LICENSE b/third_party/libFuzzer/LICENSE
new file mode 100644
index 0000000..b148c65
--- /dev/null
+++ b/third_party/libFuzzer/LICENSE
@@ -0,0 +1,93 @@
+Note: libFuzzer is now compiler-rt component. So, LICENSE is taken from
+compiler-rt.
+
+==============================================================================
+compiler_rt License
+==============================================================================
+
+The compiler_rt library is dual licensed under both the University of Illinois
+"BSD-Like" license and the MIT license.  As a user of this code you may choose
+to use it under either license.  As a contributor, you agree to allow your code
+to be used under both.
+
+Full text of the relevant licenses is included below.
+
+==============================================================================
+
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT
+
+All rights reserved.
+
+Developed by:
+
+    LLVM Team
+
+    University of Illinois at Urbana-Champaign
+
+    http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimers.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimers in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the names of the LLVM Team, University of Illinois at
+      Urbana-Champaign, nor the names of its contributors may be used to
+      endorse or promote products derived from this Software without specific
+      prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+
+Copyright (c) 2009-2015 by the contributors listed in CREDITS.TXT
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+==============================================================================
+Copyrights and Licenses for Third Party Software Distributed with LLVM:
+==============================================================================
+The LLVM software contains code written by third parties.  Such software will
+have its own individual LICENSE.TXT file in the directory in which it appears.
+This file will describe the copyrights, license, and restrictions which apply
+to that code.
+
+The disclaimer of warranty in the University of Illinois Open Source License
+applies to all code in the LLVM Distribution, and nothing in any of the
+other licenses gives permission to use the names of the LLVM Team or the
+University of Illinois to endorse or promote products derived from this
+Software.
diff --git a/third_party/libc++/BUILD.gn b/third_party/libc++/BUILD.gn
new file mode 100644
index 0000000..6bfd97f
--- /dev/null
+++ b/third_party/libc++/BUILD.gn
@@ -0,0 +1,132 @@
+# Copied from chromium's buildtools/third_party/libc++/BUILD.gn,
+# and modified for goma.
+#
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+#import("//build/config/sanitizers/sanitizers.gni")
+import("//build/toolchain/toolchain.gni")
+
+# Used by libc++ and libc++abi.
+config("config") {
+  defines = [ "LIBCXX_BUILDING_LIBCXXABI" ]
+  cflags = [
+    "-fPIC",
+    "-fstrict-aliasing",
+    "-pthread",
+  ]
+  cflags_cc = [
+    "-nostdinc++",
+    "-isystem" + rebase_path("trunk/include", root_build_dir),
+    "-isystem" + rebase_path("../libc++abi/trunk/include", root_build_dir),
+    "-std=c++11",
+  ]
+}
+
+shared_library("libc++") {
+  sources = [
+    "trunk/src/algorithm.cpp",
+    "trunk/src/any.cpp",
+    "trunk/src/bind.cpp",
+    "trunk/src/chrono.cpp",
+    "trunk/src/condition_variable.cpp",
+    "trunk/src/debug.cpp",
+    "trunk/src/exception.cpp",
+    "trunk/src/future.cpp",
+    "trunk/src/hash.cpp",
+    "trunk/src/ios.cpp",
+    "trunk/src/iostream.cpp",
+    "trunk/src/locale.cpp",
+    "trunk/src/memory.cpp",
+    "trunk/src/mutex.cpp",
+    "trunk/src/new.cpp",
+    "trunk/src/optional.cpp",
+    "trunk/src/random.cpp",
+    "trunk/src/regex.cpp",
+    "trunk/src/shared_mutex.cpp",
+    "trunk/src/stdexcept.cpp",
+    "trunk/src/string.cpp",
+    "trunk/src/strstream.cpp",
+    "trunk/src/system_error.cpp",
+    "trunk/src/thread.cpp",
+    "trunk/src/typeinfo.cpp",
+    "trunk/src/utility.cpp",
+    "trunk/src/valarray.cpp",
+  ]
+  configs -= [
+    "//build/config/compiler:goma_code",
+    "//build/config/compiler:no_rtti",
+    "//build/config/gcc:no_exceptions",
+
+    #"//build/config/gcc:symbol_visibility_hidden",
+  ]
+  configs += [
+    ":config",
+    "//build/config/compiler:no_goma_code",
+    "//build/config/compiler:rtti",
+
+    #"//build/config/gcc:symbol_visibility_default",
+    "//build/config/sanitizers:sanitizer_options_link_helper",
+  ]
+
+  ldflags = [ "-nodefaultlibs" ]
+
+  # TODO: Remove "-pthread" from ldflags.
+  # -nodefaultlibs turns -pthread into a no-op, causing an unused argument
+  # warning. Explicitly link with -lpthread instead.
+
+  libs = [ "m" ]
+
+  if (os != "mac") {
+    libs += [
+      "c",
+      "gcc_s",
+      "pthread",
+      "rt",
+    ]
+  }
+
+  # libc++abi is linked statically into libc++.so. This allows us to get both
+  # libc++ and libc++abi by passing '-stdlib=libc++'. If libc++abi was a
+  # separate DSO, we'd have to link against it explicitly.
+  deps = [
+    "//third_party/libc++abi",
+  ]
+
+  if (os == "mac" && using_sanitizer) {
+    # -nodefaultlibs on mac doesn't add any kind of runtime libraries.
+    # These runtimes have to be added manually.
+    lib_dirs = [ "//third_party/llvm-build/Release+Asserts/lib/clang/$clang_version/lib/darwin" ]
+
+    if (is_asan) {
+      libs += [ "clang_rt.asan_osx_dynamic" ]
+    }
+  }
+}
+
+group("libcxx_proxy") {
+  deps = [
+    ":libc++",
+  ]
+  public_configs = [ ":link_helper" ]
+}
+
+# This config is only used by binaries and shared library targets.
+# //build/config/sanitizers:default_sanitizer_flags sets the include paths for
+# everything else.
+config("link_helper") {
+  ldflags = [ "-stdlib=libc++" ]
+
+  if (os != "mac") {
+    ldflags += [
+      # Normally the generator takes care of RPATH. Our case is special because
+      # the generator is unaware of the libc++.so dependency. Note that setting
+      # RPATH here is a potential security issue. See the following for another
+      # example of this issue: https://code.google.com/p/gyp/issues/detail?id=315
+      "-Wl,-rpath,\$ORIGIN/",
+    ]
+  }
+
+  lib_dirs = [ root_build_dir ]
+}
diff --git a/third_party/libc++/LICENSE b/third_party/libc++/LICENSE
new file mode 100644
index 0000000..cda5a34
--- /dev/null
+++ b/third_party/libc++/LICENSE
@@ -0,0 +1,56 @@
+==============================================================================
+libc++ License
+==============================================================================
+The libc++ library is dual licensed under both the University of Illinois
+"BSD-Like" license and the MIT license.  As a user of this code you may choose
+to use it under either license.  As a contributor, you agree to allow your code
+to be used under both.
+Full text of the relevant licenses is included below.
+==============================================================================
+University of Illinois/NCSA
+Open Source License
+Copyright (c) 2009-2017 by the contributors listed in CREDITS.TXT
+All rights reserved.
+Developed by:
+    LLVM Team
+    University of Illinois at Urbana-Champaign
+    http://llvm.org
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimers.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimers in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the names of the LLVM Team, University of Illinois at
+      Urbana-Champaign, nor the names of its contributors may be used to
+      endorse or promote products derived from this Software without specific
+      prior written permission.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+==============================================================================
+Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/third_party/libc++abi/BUILD.gn b/third_party/libc++abi/BUILD.gn
new file mode 100644
index 0000000..0386fee
--- /dev/null
+++ b/third_party/libc++abi/BUILD.gn
@@ -0,0 +1,53 @@
+# Copied from chromium's buildtools/third_party/libc++abi/BUILD.gn,
+# and modified for goma.
+#
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+config("libc++abi_warnings") {
+  if (is_clang) {
+    # http://llvm.org/PR25978
+    cflags = [ "-Wno-unused-function" ]
+  }
+}
+
+static_library("libc++abi") {
+  sources = [
+    "trunk/src/abort_message.cpp",
+    "trunk/src/cxa_aux_runtime.cpp",
+    "trunk/src/cxa_default_handlers.cpp",
+    "trunk/src/cxa_demangle.cpp",
+    "trunk/src/cxa_exception.cpp",
+    "trunk/src/cxa_exception_storage.cpp",
+    "trunk/src/cxa_guard.cpp",
+    "trunk/src/cxa_handlers.cpp",
+    "trunk/src/cxa_new_delete.cpp",
+    "trunk/src/cxa_personality.cpp",
+    "trunk/src/cxa_thread_atexit.cpp",
+    "trunk/src/cxa_unexpected.cpp",
+    "trunk/src/cxa_vector.cpp",
+    "trunk/src/cxa_virtual.cpp",
+    "trunk/src/exception.cpp",
+    "trunk/src/private_typeinfo.cpp",
+    "trunk/src/stdexcept.cpp",
+    "trunk/src/typeinfo.cpp",
+  ]
+  configs -= [
+    "//build/config/compiler:goma_code",
+    "//build/config/compiler:no_rtti",
+    "//build/config/gcc:no_exceptions",
+
+    #"//build/config/gcc:symbol_visibility_hidden",
+  ]
+  configs += [
+    "//build/config/compiler:no_goma_code",
+    "//build/config/compiler:rtti",
+
+    #"//build/config/gcc:symbol_visibility_default",
+    "//third_party/libc++:config",
+
+    # Must be after no_goma_code.
+    ":libc++abi_warnings",
+  ]
+}
diff --git a/third_party/libc++abi/LICENSE b/third_party/libc++abi/LICENSE
new file mode 100644
index 0000000..037dd73
--- /dev/null
+++ b/third_party/libc++abi/LICENSE
@@ -0,0 +1,56 @@
+==============================================================================
+libc++abi License
+==============================================================================
+The libc++abi library is dual licensed under both the University of Illinois
+"BSD-Like" license and the MIT license.  As a user of this code you may choose
+to use it under either license.  As a contributor, you agree to allow your code
+to be used under both.
+Full text of the relevant licenses is included below.
+==============================================================================
+University of Illinois/NCSA
+Open Source License
+Copyright (c) 2009-2017 by the contributors listed in CREDITS.TXT
+All rights reserved.
+Developed by:
+    LLVM Team
+    University of Illinois at Urbana-Champaign
+    http://llvm.org
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimers.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimers in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the names of the LLVM Team, University of Illinois at
+      Urbana-Champaign, nor the names of its contributors may be used to
+      endorse or promote products derived from this Software without specific
+      prior written permission.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+==============================================================================
+Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/third_party/protobuf/BUILD.gn b/third_party/protobuf/BUILD.gn
new file mode 100644
index 0000000..c6804f4
--- /dev/null
+++ b/third_party/protobuf/BUILD.gn
@@ -0,0 +1,628 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+#
+# Copied from chromium third_party/protobuf and revised for goma client
+
+config("protobuf_config") {
+  include_dirs = [ "protobuf/src" ]
+  defines = [
+    "GOOGLE_PROTOBUF_NO_RTTI",
+    "GOOGLE_PROTOBUF_NO_STATIC_INITIALIZER",
+  ]
+
+  # Needs to set HAVE_ZLIB to use Gzip{Input,Output}Stream.
+  defines += [ "HAVE_ZLIB" ]
+  if (os != "win") {
+    defines += [ "HAVE_PTHREAD" ]
+  }
+}
+
+config("protobuf_warnings") {
+  cflags = []
+  if (is_clang) {
+    # protobuf-3 contains a few functions that are unused.
+    cflags += [ "-Wno-unused-function" ]
+  }
+}
+
+# This config should be applied to targets using generated code from the proto
+# compiler. It sets up the include directories properly.
+config("using_proto") {
+  include_dirs = [
+    "protobuf/src",
+    "$root_gen_dir",
+  ]
+
+  if (os == "win") {
+    # Suppress warnings on Windows.
+    cflags = [
+      "/wd4512",  # assignment operator could not be generated
+      "/wd4244",  # '=': conversion from 'uint32' to 'uint8'.
+      "/wd4127",  # conditional expression is constant
+      "/wd4125",  # decimal digit terminates octal escape sequence
+      "/wd4100",  # unreferenced formal parameter
+      "/wd4309",  # truncation of constant value
+      "/wd4838",  # require narrowing conversion
+      "/wd4505",  # unreferenced local function has been removed
+      "/wd4456",  # declaration hides previous local declaration
+    ]
+  }
+  if (is_posix) {
+    cflags = [
+      # Not to warn for headers.
+      "-Wno-unused-parameter",
+      "-Wno-sign-compare",
+      "-Wno-unused-local-typedef",
+      "-Wno-deprecated",
+      "-Wno-bitfield-width",
+      "-Wno-missing-field-initializers",
+    ]
+  }
+}
+
+protobuf_lite_sources = [
+  "protobuf/src/google/protobuf/arena.cc",
+  "protobuf/src/google/protobuf/arena.h",
+  "protobuf/src/google/protobuf/arenastring.cc",
+  "protobuf/src/google/protobuf/arenastring.h",
+  "protobuf/src/google/protobuf/extension_set.cc",
+  "protobuf/src/google/protobuf/extension_set.h",
+  "protobuf/src/google/protobuf/generated_message_util.cc",
+  "protobuf/src/google/protobuf/generated_message_util.h",
+  "protobuf/src/google/protobuf/io/coded_stream.cc",
+  "protobuf/src/google/protobuf/io/coded_stream.h",
+  "protobuf/src/google/protobuf/io/coded_stream_inl.h",
+  "protobuf/src/google/protobuf/io/zero_copy_stream.cc",
+  "protobuf/src/google/protobuf/io/zero_copy_stream.h",
+  "protobuf/src/google/protobuf/io/zero_copy_stream_impl_lite.cc",
+  "protobuf/src/google/protobuf/io/zero_copy_stream_impl_lite.h",
+  "protobuf/src/google/protobuf/map.h",
+  "protobuf/src/google/protobuf/map_entry_lite.h",
+  "protobuf/src/google/protobuf/map_field_lite.h",
+  "protobuf/src/google/protobuf/map_type_handler.h",
+  "protobuf/src/google/protobuf/message_lite.cc",
+  "protobuf/src/google/protobuf/message_lite.h",
+  "protobuf/src/google/protobuf/repeated_field.cc",
+  "protobuf/src/google/protobuf/repeated_field.h",
+  "protobuf/src/google/protobuf/stubs/atomicops.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_arm64_gcc.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_arm_gcc.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_arm_qnx.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_atomicword_compat.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_generic_gcc.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_macosx.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_mips_gcc.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_pnacl.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_power.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_solaris.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_tsan.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_x86_gcc.cc",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_x86_gcc.h",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_x86_msvc.cc",
+  "protobuf/src/google/protobuf/stubs/atomicops_internals_x86_msvc.h",
+  "protobuf/src/google/protobuf/stubs/atomic_sequence_num.h",
+  "protobuf/src/google/protobuf/stubs/bytestream.cc",
+  "protobuf/src/google/protobuf/stubs/bytestream.h",
+  "protobuf/src/google/protobuf/stubs/callback.h",
+  "protobuf/src/google/protobuf/stubs/casts.h",
+  "protobuf/src/google/protobuf/stubs/common.cc",
+  "protobuf/src/google/protobuf/stubs/common.h",
+  "protobuf/src/google/protobuf/stubs/fastmem.h",
+  "protobuf/src/google/protobuf/stubs/hash.h",
+  "protobuf/src/google/protobuf/stubs/int128.cc",
+  "protobuf/src/google/protobuf/stubs/int128.h",
+  "protobuf/src/google/protobuf/stubs/logging.h",
+  "protobuf/src/google/protobuf/stubs/macros.h",
+  "protobuf/src/google/protobuf/stubs/map_util.h",
+  "protobuf/src/google/protobuf/stubs/mutex.h",
+  "protobuf/src/google/protobuf/stubs/once.cc",
+  "protobuf/src/google/protobuf/stubs/once.h",
+  "protobuf/src/google/protobuf/stubs/platform_macros.h",
+  "protobuf/src/google/protobuf/stubs/port.h",
+  "protobuf/src/google/protobuf/stubs/scoped_ptr.h",
+  "protobuf/src/google/protobuf/stubs/shared_ptr.h",
+  "protobuf/src/google/protobuf/stubs/status.cc",
+  "protobuf/src/google/protobuf/stubs/status.h",
+  "protobuf/src/google/protobuf/stubs/status_macros.h",
+  "protobuf/src/google/protobuf/stubs/statusor.cc",
+  "protobuf/src/google/protobuf/stubs/statusor.h",
+  "protobuf/src/google/protobuf/stubs/stl_util.h",
+  "protobuf/src/google/protobuf/stubs/stringpiece.cc",
+  "protobuf/src/google/protobuf/stubs/stringpiece.h",
+  "protobuf/src/google/protobuf/stubs/stringprintf.cc",
+  "protobuf/src/google/protobuf/stubs/stringprintf.h",
+  "protobuf/src/google/protobuf/stubs/structurally_valid.cc",
+  "protobuf/src/google/protobuf/stubs/strutil.cc",
+  "protobuf/src/google/protobuf/stubs/strutil.h",
+  "protobuf/src/google/protobuf/stubs/template_util.h",
+  "protobuf/src/google/protobuf/stubs/type_traits.h",
+  "protobuf/src/google/protobuf/stubs/time.cc",
+  "protobuf/src/google/protobuf/stubs/time.h",
+  "protobuf/src/google/protobuf/wire_format_lite.cc",
+  "protobuf/src/google/protobuf/wire_format_lite.h",
+  "protobuf/src/google/protobuf/wire_format_lite_inl.h",
+]
+
+protobuf_lite_cflags = []
+if (os == "win") {
+  protobuf_lite_cflags = [
+    "/wd4018",  # signed/unsigned mismatch in comparison
+    "/wd4065",  # switch statement contains 'default' but no 'case' labels
+    "/wd4146",  # unary minus operator applied to unsigned type
+    "/wd4244",  # implicit conversion, possible loss of data
+    "/wd4267",  # size_t to int truncation
+    "/wd4291",  # no matching operator delete for a placement new.
+    "/wd4305",  # double to float truncation
+    "/wd4355",  # 'this' used in base member initializer list
+    "/wd4506",  # no definition for inline function (protobuf issue #240)
+    "/wd4309",  # Truncation of constant value.
+    "/wd4838",  # Narrowing conversion. Doesn't seem to be very useful.
+  ]
+}
+
+static_library("protobuf_lite") {
+  sources = protobuf_lite_sources
+
+  # goma
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [
+    "//build/config/compiler:no_goma_code",
+
+    # Must be after no_goma_code for warning flags to be ordered
+    # correctly.
+    ":protobuf_warnings",
+  ]
+
+  if (os == "win") {
+    configs -= [ "//build/config/win:lean_and_mean" ]
+  }
+
+  public_configs = [
+    ":protobuf_config",
+
+    # TODO: crbug.com/167187 fix size_t to int truncations.
+    "//build/config/compiler:no_size_t_to_int_warning",
+  ]
+
+  # deps = [
+  #   "//build/config/sanitizers:deps",
+  # ]
+
+  cflags = protobuf_lite_cflags
+}
+
+# This is the full, heavy protobuf lib that's needed for c++ .protos that don't
+# specify the LITE_RUNTIME option. The protocol compiler itself (protoc) falls
+# into that category. Do not use in Chrome code.
+static_library("protobuf_full") {
+  # Prevent people from depending on this outside our file.
+  # visibility = [ ":*" ]
+
+  sources = protobuf_lite_sources
+  sources += [
+    "protobuf/src/google/protobuf/any.cc",
+    "protobuf/src/google/protobuf/any.h",
+    "protobuf/src/google/protobuf/any.pb.cc",
+    "protobuf/src/google/protobuf/any.pb.h",
+    "protobuf/src/google/protobuf/api.pb.cc",
+    "protobuf/src/google/protobuf/api.pb.h",
+    "protobuf/src/google/protobuf/compiler/importer.cc",
+    "protobuf/src/google/protobuf/compiler/importer.h",
+    "protobuf/src/google/protobuf/compiler/parser.cc",
+    "protobuf/src/google/protobuf/compiler/parser.h",
+    "protobuf/src/google/protobuf/descriptor.cc",
+    "protobuf/src/google/protobuf/descriptor.h",
+    "protobuf/src/google/protobuf/descriptor.pb.cc",
+    "protobuf/src/google/protobuf/descriptor.pb.h",
+    "protobuf/src/google/protobuf/descriptor_database.cc",
+    "protobuf/src/google/protobuf/descriptor_database.h",
+    "protobuf/src/google/protobuf/duration.pb.cc",
+    "protobuf/src/google/protobuf/duration.pb.h",
+    "protobuf/src/google/protobuf/dynamic_message.cc",
+    "protobuf/src/google/protobuf/dynamic_message.h",
+    "protobuf/src/google/protobuf/empty.pb.cc",
+    "protobuf/src/google/protobuf/empty.pb.h",
+    "protobuf/src/google/protobuf/extension_set_heavy.cc",
+    "protobuf/src/google/protobuf/field_mask.pb.cc",
+    "protobuf/src/google/protobuf/field_mask.pb.h",
+    "protobuf/src/google/protobuf/generated_enum_reflection.h",
+    "protobuf/src/google/protobuf/generated_enum_util.h",
+    "protobuf/src/google/protobuf/generated_message_reflection.cc",
+    "protobuf/src/google/protobuf/generated_message_reflection.h",
+    "protobuf/src/google/protobuf/io/gzip_stream.cc",
+    "protobuf/src/google/protobuf/io/gzip_stream.h",
+    "protobuf/src/google/protobuf/io/printer.cc",
+    "protobuf/src/google/protobuf/io/printer.h",
+    "protobuf/src/google/protobuf/io/strtod.cc",
+    "protobuf/src/google/protobuf/io/strtod.h",
+    "protobuf/src/google/protobuf/io/tokenizer.cc",
+    "protobuf/src/google/protobuf/io/tokenizer.h",
+    "protobuf/src/google/protobuf/io/zero_copy_stream_impl.cc",
+    "protobuf/src/google/protobuf/io/zero_copy_stream_impl.h",
+    "protobuf/src/google/protobuf/map_entry.h",
+    "protobuf/src/google/protobuf/map_field.cc",
+    "protobuf/src/google/protobuf/map_field.h",
+    "protobuf/src/google/protobuf/map_field_inl.h",
+    "protobuf/src/google/protobuf/message.cc",
+    "protobuf/src/google/protobuf/message.h",
+    "protobuf/src/google/protobuf/metadata.h",
+    "protobuf/src/google/protobuf/reflection.h",
+    "protobuf/src/google/protobuf/reflection_internal.h",
+    "protobuf/src/google/protobuf/reflection_ops.cc",
+    "protobuf/src/google/protobuf/reflection_ops.h",
+    "protobuf/src/google/protobuf/service.cc",
+    "protobuf/src/google/protobuf/service.h",
+    "protobuf/src/google/protobuf/source_context.pb.cc",
+    "protobuf/src/google/protobuf/source_context.pb.h",
+    "protobuf/src/google/protobuf/struct.pb.cc",
+    "protobuf/src/google/protobuf/struct.pb.h",
+    "protobuf/src/google/protobuf/stubs/mathlimits.cc",
+    "protobuf/src/google/protobuf/stubs/mathlimits.h",
+    "protobuf/src/google/protobuf/stubs/mathutil.h",
+    "protobuf/src/google/protobuf/stubs/singleton.h",
+    "protobuf/src/google/protobuf/stubs/substitute.cc",
+    "protobuf/src/google/protobuf/stubs/substitute.h",
+    "protobuf/src/google/protobuf/text_format.cc",
+    "protobuf/src/google/protobuf/text_format.h",
+    "protobuf/src/google/protobuf/timestamp.pb.cc",
+    "protobuf/src/google/protobuf/timestamp.pb.h",
+    "protobuf/src/google/protobuf/type.pb.cc",
+    "protobuf/src/google/protobuf/type.pb.h",
+    "protobuf/src/google/protobuf/unknown_field_set.cc",
+    "protobuf/src/google/protobuf/unknown_field_set.h",
+    "protobuf/src/google/protobuf/util/field_comparator.cc",
+    "protobuf/src/google/protobuf/util/field_comparator.h",
+    "protobuf/src/google/protobuf/util/field_mask_util.cc",
+    "protobuf/src/google/protobuf/util/field_mask_util.h",
+    "protobuf/src/google/protobuf/util/internal/constants.h",
+    "protobuf/src/google/protobuf/util/internal/datapiece.cc",
+    "protobuf/src/google/protobuf/util/internal/datapiece.h",
+    "protobuf/src/google/protobuf/util/internal/default_value_objectwriter.cc",
+    "protobuf/src/google/protobuf/util/internal/default_value_objectwriter.h",
+    "protobuf/src/google/protobuf/util/internal/error_listener.cc",
+    "protobuf/src/google/protobuf/util/internal/error_listener.h",
+    "protobuf/src/google/protobuf/util/internal/field_mask_utility.cc",
+    "protobuf/src/google/protobuf/util/internal/field_mask_utility.h",
+    "protobuf/src/google/protobuf/util/internal/json_escaping.cc",
+    "protobuf/src/google/protobuf/util/internal/json_escaping.h",
+    "protobuf/src/google/protobuf/util/internal/json_objectwriter.cc",
+    "protobuf/src/google/protobuf/util/internal/json_objectwriter.h",
+    "protobuf/src/google/protobuf/util/internal/json_stream_parser.cc",
+    "protobuf/src/google/protobuf/util/internal/json_stream_parser.h",
+    "protobuf/src/google/protobuf/util/internal/location_tracker.h",
+    "protobuf/src/google/protobuf/util/internal/object_location_tracker.h",
+    "protobuf/src/google/protobuf/util/internal/object_source.h",
+    "protobuf/src/google/protobuf/util/internal/object_writer.cc",
+    "protobuf/src/google/protobuf/util/internal/object_writer.h",
+    "protobuf/src/google/protobuf/util/internal/proto_writer.cc",
+    "protobuf/src/google/protobuf/util/internal/proto_writer.h",
+    "protobuf/src/google/protobuf/util/internal/protostream_objectsource.cc",
+    "protobuf/src/google/protobuf/util/internal/protostream_objectsource.h",
+    "protobuf/src/google/protobuf/util/internal/protostream_objectwriter.cc",
+    "protobuf/src/google/protobuf/util/internal/protostream_objectwriter.h",
+    "protobuf/src/google/protobuf/util/internal/structured_objectwriter.h",
+    "protobuf/src/google/protobuf/util/internal/type_info.cc",
+    "protobuf/src/google/protobuf/util/internal/type_info.h",
+    "protobuf/src/google/protobuf/util/internal/type_info_test_helper.cc",
+    "protobuf/src/google/protobuf/util/internal/type_info_test_helper.h",
+    "protobuf/src/google/protobuf/util/internal/utility.cc",
+    "protobuf/src/google/protobuf/util/internal/utility.h",
+    "protobuf/src/google/protobuf/util/json_util.cc",
+    "protobuf/src/google/protobuf/util/json_util.h",
+    "protobuf/src/google/protobuf/util/message_differencer.cc",
+    "protobuf/src/google/protobuf/util/message_differencer.h",
+    "protobuf/src/google/protobuf/util/time_util.cc",
+    "protobuf/src/google/protobuf/util/time_util.h",
+    "protobuf/src/google/protobuf/util/type_resolver.h",
+    "protobuf/src/google/protobuf/util/type_resolver_util.cc",
+    "protobuf/src/google/protobuf/util/type_resolver_util.h",
+    "protobuf/src/google/protobuf/wire_format.cc",
+    "protobuf/src/google/protobuf/wire_format.h",
+    "protobuf/src/google/protobuf/wrappers.pb.cc",
+    "protobuf/src/google/protobuf/wrappers.pb.h",
+  ]
+
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [
+    "//build/config/compiler:no_goma_code",
+
+    # Must be after no_goma_code for warning flags to be ordered
+    # correctly.
+    ":protobuf_warnings",
+  ]
+  if (os == "win") {
+    configs -= [ "//build/config/win:lean_and_mean" ]
+  }
+  public_configs = [
+    ":protobuf_config",
+
+    # TODO: crbug.com/167187 fix size_t to int truncations.
+    "//build/config/compiler:no_size_t_to_int_warning",
+  ]
+
+  deps = [
+    "//third_party:minizip",
+  ]
+
+  cflags = protobuf_lite_cflags
+}
+
+static_library("protoc_lib") {
+  sources = [
+    "protobuf/src/google/protobuf/compiler/code_generator.cc",
+    "protobuf/src/google/protobuf/compiler/code_generator.h",
+    "protobuf/src/google/protobuf/compiler/command_line_interface.cc",
+    "protobuf/src/google/protobuf/compiler/command_line_interface.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_enum.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_enum.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_enum_field.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_enum_field.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_extension.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_extension.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_field.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_field.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_file.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_file.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_generator.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_generator.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_helpers.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_helpers.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_map_field.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_map_field.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_message.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_message.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_message_field.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_message_field.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_options.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_primitive_field.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_primitive_field.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_service.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_service.h",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_string_field.cc",
+    "protobuf/src/google/protobuf/compiler/cpp/cpp_string_field.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_doc_comment.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_doc_comment.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_enum.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_enum.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_enum_field.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_enum_field.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_field_base.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_field_base.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_generator.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_generator.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_helpers.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_helpers.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_map_field.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_map_field.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_message.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_message.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_message_field.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_message_field.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_options.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_primitive_field.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_primitive_field.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_reflection_class.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_reflection_class.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_repeated_enum_field.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_repeated_enum_field.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_repeated_message_field.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_repeated_message_field.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_repeated_primitive_field.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_repeated_primitive_field.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_source_generator_base.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_source_generator_base.h",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_wrapper_field.cc",
+    "protobuf/src/google/protobuf/compiler/csharp/csharp_wrapper_field.h",
+    "protobuf/src/google/protobuf/compiler/java/java_context.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_context.h",
+    "protobuf/src/google/protobuf/compiler/java/java_doc_comment.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_doc_comment.h",
+    "protobuf/src/google/protobuf/compiler/java/java_enum.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_enum.h",
+    "protobuf/src/google/protobuf/compiler/java/java_enum_field.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_enum_field.h",
+    "protobuf/src/google/protobuf/compiler/java/java_enum_field_lite.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_enum_field_lite.h",
+    "protobuf/src/google/protobuf/compiler/java/java_enum_lite.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_enum_lite.h",
+    "protobuf/src/google/protobuf/compiler/java/java_extension.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_extension.h",
+    "protobuf/src/google/protobuf/compiler/java/java_extension_lite.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_extension_lite.h",
+    "protobuf/src/google/protobuf/compiler/java/java_field.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_field.h",
+    "protobuf/src/google/protobuf/compiler/java/java_file.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_file.h",
+    "protobuf/src/google/protobuf/compiler/java/java_generator.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_generator.h",
+    "protobuf/src/google/protobuf/compiler/java/java_generator_factory.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_generator_factory.h",
+    "protobuf/src/google/protobuf/compiler/java/java_helpers.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_helpers.h",
+    "protobuf/src/google/protobuf/compiler/java/java_lazy_message_field.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_lazy_message_field.h",
+    "protobuf/src/google/protobuf/compiler/java/java_lazy_message_field_lite.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_lazy_message_field_lite.h",
+    "protobuf/src/google/protobuf/compiler/java/java_map_field.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_map_field.h",
+    "protobuf/src/google/protobuf/compiler/java/java_map_field_lite.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_map_field_lite.h",
+    "protobuf/src/google/protobuf/compiler/java/java_message.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_message.h",
+    "protobuf/src/google/protobuf/compiler/java/java_message_builder.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_message_builder.h",
+    "protobuf/src/google/protobuf/compiler/java/java_message_builder_lite.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_message_builder_lite.h",
+    "protobuf/src/google/protobuf/compiler/java/java_message_field.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_message_field.h",
+    "protobuf/src/google/protobuf/compiler/java/java_message_field_lite.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_message_field_lite.h",
+    "protobuf/src/google/protobuf/compiler/java/java_message_lite.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_message_lite.h",
+    "protobuf/src/google/protobuf/compiler/java/java_name_resolver.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_name_resolver.h",
+    "protobuf/src/google/protobuf/compiler/java/java_primitive_field.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_primitive_field.h",
+    "protobuf/src/google/protobuf/compiler/java/java_primitive_field_lite.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_primitive_field_lite.h",
+    "protobuf/src/google/protobuf/compiler/java/java_service.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_service.h",
+    "protobuf/src/google/protobuf/compiler/java/java_shared_code_generator.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_shared_code_generator.h",
+    "protobuf/src/google/protobuf/compiler/java/java_string_field.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_string_field.h",
+    "protobuf/src/google/protobuf/compiler/java/java_string_field_lite.cc",
+    "protobuf/src/google/protobuf/compiler/java/java_string_field_lite.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_enum.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_enum.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_enum_field.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_enum_field.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_extension.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_extension.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_field.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_field.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_file.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_file.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_generator.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_generator.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_helpers.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_helpers.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_map_field.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_map_field.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_message.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_message.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_message_field.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_message_field.h",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_primitive_field.cc",
+    "protobuf/src/google/protobuf/compiler/javanano/javanano_primitive_field.h",
+    "protobuf/src/google/protobuf/compiler/js/js_generator.cc",
+    "protobuf/src/google/protobuf/compiler/js/js_generator.h",
+    "protobuf/src/google/protobuf/compiler/js/well_known_types_embed.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_enum.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_enum.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_enum_field.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_enum_field.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_extension.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_extension.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_field.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_field.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_file.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_file.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_generator.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_generator.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_helpers.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_helpers.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_map_field.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_map_field.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_message.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_message.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_message_field.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_message_field.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_oneof.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_oneof.h",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_primitive_field.cc",
+    "protobuf/src/google/protobuf/compiler/objectivec/objectivec_primitive_field.h",
+    "protobuf/src/google/protobuf/compiler/php/php_generator.cc",
+    "protobuf/src/google/protobuf/compiler/php/php_generator.h",
+    "protobuf/src/google/protobuf/compiler/plugin.cc",
+    "protobuf/src/google/protobuf/compiler/plugin.h",
+    "protobuf/src/google/protobuf/compiler/plugin.pb.cc",
+    "protobuf/src/google/protobuf/compiler/plugin.pb.h",
+    "protobuf/src/google/protobuf/compiler/python/python_generator.cc",
+    "protobuf/src/google/protobuf/compiler/python/python_generator.h",
+    "protobuf/src/google/protobuf/compiler/ruby/ruby_generator.cc",
+    "protobuf/src/google/protobuf/compiler/ruby/ruby_generator.h",
+    "protobuf/src/google/protobuf/compiler/subprocess.cc",
+    "protobuf/src/google/protobuf/compiler/subprocess.h",
+    "protobuf/src/google/protobuf/compiler/zip_writer.cc",
+    "protobuf/src/google/protobuf/compiler/zip_writer.h",
+
+    # generated files
+    "$target_gen_dir/well_known_types_embed.cc",
+  ]
+
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [
+    "//build/config/compiler:no_goma_code",
+
+    # Must be after no_chromium_code for warning flags to be ordered
+    # correctly.
+    ":protobuf_warnings",
+  ]
+  if (os == "win") {
+    # This is defined internally, don't warn on duplicate.
+    configs -= [ "//build/config/win:lean_and_mean" ]
+  }
+
+  public_configs = [ ":protobuf_config" ]
+
+  cflags = protobuf_lite_cflags
+
+  deps = [
+    ":generate_js_well_known_types_embed",
+  ]
+
+  public_deps = [
+    ":protobuf_full",
+  ]
+}
+
+# Only compile the compiler for the host architecture.
+executable("protoc") {
+  sources = [
+    "protobuf/src/google/protobuf/compiler/main.cc",
+  ]
+
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [ "//build/config/compiler:no_goma_code" ]
+
+  cflags = protobuf_lite_cflags
+
+  deps = [
+    ":protoc_lib",
+    # Default manifest on Windows (a no-op elsewhere).
+    # "//build/win:default_exe_manifest",
+  ]
+
+  deps += [ "//build/config/sanitizers:deps" ]
+}
+
+executable("js_embed") {
+  sources = [
+    "protobuf/src/google/protobuf/compiler/js/embed.cc",
+  ]
+
+  configs -= [ "//build/config/compiler:goma_code" ]
+  configs += [ "//build/config/compiler:no_goma_code" ]
+
+  cflags = protobuf_lite_cflags
+}
+
+action("generate_js_well_known_types_embed") {
+  script = "js_embed.py"
+  sources = [
+    "protobuf/src/google/protobuf/compiler/js/well_known_types/any.js",
+    "protobuf/src/google/protobuf/compiler/js/well_known_types/struct.js",
+    "protobuf/src/google/protobuf/compiler/js/well_known_types/timestamp.js",
+  ]
+  outputs = [
+    "$target_gen_dir/well_known_types_embed.cc",
+  ]
+  args = [
+    "--generator",
+    rebase_path("$root_out_dir/js_embed"),
+    "--output-path",
+    rebase_path("$target_gen_dir/well_known_types_embed.cc"),
+    rebase_path(
+        "protobuf/src/google/protobuf/compiler/js/well_known_types/any.js"),
+    rebase_path(
+        "protobuf/src/google/protobuf/compiler/js/well_known_types/struct.js"),
+    rebase_path(
+        "protobuf/src/google/protobuf/compiler/js/well_known_types/timestamp.js"),
+  ]
+
+  deps = [
+    ":js_embed",
+  ]
+}
diff --git a/third_party/protobuf/LICENSE b/third_party/protobuf/LICENSE
new file mode 100644
index 0000000..f028c82
--- /dev/null
+++ b/third_party/protobuf/LICENSE
@@ -0,0 +1,42 @@
+This license applies to all parts of Protocol Buffers except the following:
+
+  - Atomicops support for generic gcc, located in
+    src/google/protobuf/stubs/atomicops_internals_generic_gcc.h.
+    This file is copyrighted by Red Hat Inc.
+
+  - Atomicops support for AIX/POWER, located in
+    src/google/protobuf/stubs/atomicops_internals_power.h.
+    This file is copyrighted by Bloomberg Finance LP.
+
+Copyright 2014, Google Inc.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Code generated by the Protocol Buffer compiler is owned by the owner
+of the input file used when generating it.  This code is not
+standalone and requires a support library to be linked with it.  This
+support library is itself covered by the above license.
diff --git a/third_party/protobuf/js_embed.py b/third_party/protobuf/js_embed.py
new file mode 100644
index 0000000..82e855d
--- /dev/null
+++ b/third_party/protobuf/js_embed.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+#
+# Copyright 2017 The Goma Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+
+import argparse
+import subprocess
+import sys
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--generator', help='generator')
+  parser.add_argument('--output-path', help='output file path')
+  parser.add_argument('inputs', metavar='INPUT', type=str, nargs='+',
+                      help='input files')
+  args = parser.parse_args()
+
+  # cmd = "$(location :js_embed) $(SRCS) > $@",
+  cmd = [ args.generator ] + args.inputs
+
+  p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+  stdout_data, _ = p.communicate()
+
+  if p.returncode != 0:
+    print >>sys.stderr, 'failed to run js_embed: exit_status=', p.returncode
+    sys.exit(1)
+
+  with open(args.output_path, 'wb') as f:
+    f.write(stdout_data)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/third_party/protobuf/proto_library.gni b/third_party/protobuf/proto_library.gni
new file mode 100644
index 0000000..8d999f7
--- /dev/null
+++ b/third_party/protobuf/proto_library.gni
@@ -0,0 +1,252 @@
+# Copied from chromium third_party/prorobuf and revised for goma.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Compile a protocol buffer.
+#
+# Protobuf parameters:
+#
+#   proto_out_dir (optional)
+#       Specifies the path suffix that output files are generated under. This
+#       path will be appended to the root_gen_dir.
+#
+#       Targets that depend on the proto target will be able to include the
+#       resulting proto headers with an include like:
+#         #include "dir/for/my_proto_lib/foo.pb.h"
+#       If undefined, this defaults to matching the input directory for each
+#       .proto file (you should almost always use the default mode).
+#
+#   generate_python (optional, default true)
+#       Generate Python protobuf stubs.
+#
+#   generate_cc (optional, default true)
+#       Generate C++ protobuf stubs.
+#
+#   cc_generator_options (optional)
+#       List of extra flags passed to the protocol compiler.  If you need to
+#       add an EXPORT macro to a protobuf's C++ header, set the
+#       'cc_generator_options' variable with the value:
+#       'dllexport_decl=FOO_EXPORT:' (note trailing colon).
+#
+#       It is likely you also need to #include a file for the above EXPORT
+#       macro to work (see cc_include) and set
+#       component_build_force_source_set = true.
+#
+#   generator_plugin_label (optional)
+#       GN label for plugin executable which generates custom cc stubs.
+#
+#   generator_plugin_suffix (required if generator_plugin_label set)
+#       Suffix (before extension) for generated .cc and .h files.
+#
+#   generator_plugin_options (optional)
+#       Extra flags passed to the plugin. See cc_generator_options.
+#
+#   cc_include (optional)
+#       String listing an extra include that should be passed.
+#       Example: cc_include = "foo/bar.h"
+#
+#   deps (optional)
+#       Additional dependencies.
+#
+# Parameters for compiling the generated code:
+#
+#   component_build_force_source_set (Default=false)
+#       When set true the generated code will be compiled as a source set in
+#       the component build. This does not affect static builds.  If you are
+#       exporting symbols from a component, this is required to prevent those
+#       symbols from being stripped. If you're not using dllexports in
+#       cc_generator_options, it's usually best to leave this false.
+#
+#   defines (optional)
+#       Defines to supply to the source set that compiles the generated source
+#       code.
+#
+#   extra_configs (optional)
+#       A list of config labels that will be appended to the configs applying
+#       to the source set.
+#
+# Example
+#  proto_library("mylib") {
+#    sources = [
+#      "foo.proto",
+#    ]
+#  }
+
+template("proto_library") {
+  assert(defined(invoker.sources), "Need sources for proto_library")
+
+  # Don't apply OS-specific sources filtering to the assignments later on.
+  # Platform files should have gotten filtered out in the sources assignment
+  # when this template was invoked. If they weren't, it was on purpose and
+  # this template shouldn't re-apply the filter.
+  set_sources_assignment_filter([])
+
+  action_name = "${target_name}_gen"
+  source_set_name = target_name
+  action_foreach(action_name) {
+    visibility = [ ":$source_set_name" ]
+
+    script = "//build/tools/protoc_wrapper.py"
+
+    sources = invoker.sources
+
+    # Compute the output directory, both relative to the source root (for
+    # declaring "outputs") and relative to the build dir (for passing to the
+    # script).
+    if (defined(invoker.proto_out_dir)) {
+      proto_out_dir = invoker.proto_out_dir
+    } else {
+      proto_out_dir = "{{source_root_relative_dir}}"
+    }
+    out_dir = "$root_gen_dir/" + proto_out_dir
+    rel_out_dir = rebase_path(out_dir, root_build_dir)
+
+    outputs = []
+
+    args = []
+    if (defined(invoker.cc_include)) {
+      args += [
+        "--include",
+        invoker.cc_include,
+        "--protobuf",
+        "$rel_out_dir/{{source_name_part}}.pb.h",
+      ]
+    }
+
+    args += [
+      "--proto-in-dir",
+      "{{source_dir}}",
+      "--proto-in-file",
+      "{{source_file_part}}",
+
+      # TODO support system protobuf compiler.
+      "--use-system-protobuf=0",
+    ]
+
+    protoc_label = "//third_party/protobuf:protoc($host_toolchain)"
+    args += [
+      "--",
+
+      # Prepend with "./" so this will never pick up the system one (normally
+      # when not cross-compiling, protoc's output directory will be the same
+      # as the build dir, so the relative location will be empty).
+      "./" +
+          rebase_path(get_label_info(protoc_label, "root_out_dir") + "/protoc",
+                      root_build_dir),
+    ]
+
+    if (!defined(invoker.generate_python) || invoker.generate_python) {
+      py_out_dir = "$root_out_dir/pyproto/" + proto_out_dir
+      rel_py_out_dir = rebase_path(py_out_dir, root_build_dir)
+
+      outputs += [ "$py_out_dir/{{source_name_part}}_pb2.py" ]
+      args += [
+        "--python_out",
+        rel_py_out_dir,
+      ]
+    }
+
+    if (!defined(invoker.generate_cc) || invoker.generate_cc) {
+      # If passed cc_generator_options should end in a colon, which will
+      # separate it from the directory when we concatenate them. The proto
+      # compiler understands this syntax.
+      if (defined(invoker.cc_generator_options)) {
+        cc_generator_options = invoker.cc_generator_options
+      } else {
+        cc_generator_options = ""
+      }
+      outputs += [
+        "$out_dir/{{source_name_part}}.pb.cc",
+        "$out_dir/{{source_name_part}}.pb.h",
+      ]
+      args += [
+        "--cpp_out",
+        "$cc_generator_options$rel_out_dir",  # Separated by colon.
+      ]
+    }
+
+    if (defined(invoker.generator_plugin_label)) {
+      generator_plugin_label = invoker.generator_plugin_label
+      generator_plugin_suffix = invoker.generator_plugin_suffix
+      if (defined(invoker.generator_plugin_options)) {
+        generator_plugin_options = invoker.generator_plugin_options
+      } else {
+        generator_plugin_options = ""
+      }
+      outputs += [
+        "$out_dir/{{source_name_part}}$generator_plugin_suffix.cc",
+        "$out_dir/{{source_name_part}}$generator_plugin_suffix.h",
+      ]
+
+      # Straightforward way to get the name of executable doesn't work because
+      # root_out_dir and root_build_dir may differ in cross-compilation and
+      # also Windows executables have .exe at the end.
+
+      plugin_host_label = generator_plugin_label + "($host_toolchain)"
+      plugin_path = get_label_info(plugin_host_label, "root_out_dir") + "/" +
+                    get_label_info(plugin_host_label, "name")
+      if (host_os == "win") {
+        plugin_path += ".exe"
+      }
+
+      # Need "./" for script to find plugin binary (working dir is not on PATH).
+      args += [
+        "--plugin",
+        "protoc-gen-plugin=./" + rebase_path(plugin_path, root_build_dir),
+        "--plugin_out",
+        "$generator_plugin_options$rel_out_dir",  # Separated by colon.
+      ]
+    }
+
+    deps = [
+      protoc_label,
+    ]
+
+    # The deps may have steps that have to run before running protobuf.
+    if (defined(invoker.deps)) {
+      deps += invoker.deps
+    }
+  }
+
+  if (defined(invoker.component_build_force_source_set) &&
+      invoker.component_build_force_source_set && is_component_build) {
+    link_target_type = "source_set"
+  } else {
+    link_target_type = "static_library"
+  }
+  target(link_target_type, target_name) {
+    forward_variables_from(invoker,
+                           [
+                             "visibility",
+                             "defines",
+                           ])
+
+    sources = get_target_outputs(":$action_name")
+
+    if (defined(invoker.extra_configs)) {
+      configs += invoker.extra_configs
+    }
+
+    public_configs = [ "//third_party/protobuf:using_proto" ]
+
+    # If using built-in cc generator the resulting headers reference headers
+    # within protobuf_lite, hence dependencies require those headers too.
+    # In case of generator plugin such issues should be resolved by invoker.
+    if (!defined(invoker.generate_cc) || invoker.generate_cc) {
+      public_deps = [
+        "//third_party/protobuf:protobuf_lite",
+      ]
+    }
+    deps = [
+      ":$action_name",
+    ]
+
+    # This will link any libraries in the deps (the use of invoker.deps in the
+    # action won't link it).
+    if (defined(invoker.deps)) {
+      deps += invoker.deps
+    }
+  }
+}
diff --git a/third_party/yasm/BUILD.gn b/third_party/yasm/BUILD.gn
new file mode 100644
index 0000000..d8ece2b
--- /dev/null
+++ b/third_party/yasm/BUILD.gn
@@ -0,0 +1,526 @@
+# Copied from chromium third_party/yasm/, and modified for goma.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# The yasm build process creates a slew of small C subprograms that
+# dynamically generate files at various point in the build process.  This makes
+# the build integration moderately complex.
+#
+# There are three classes of dynamically generated files:
+#   1) C source files that should be included in the build (eg., lc3bid.c)
+#   2) C source files that are #included by static C sources (eg., license.c)
+#   3) Intermediate files that are used as input by other subprograms to
+#      further generate files in category #1 or #2.  (eg., version.mac)
+#
+# This structure is represented with the following targets:
+#   1) yasm -- Sources, flags for the main yasm executable. Also has most of
+#              of the actions and rules that invoke the subprograms.
+#   2) yasm_config -- General build configuration including setting a
+#              inputs listing the checked in version of files
+#              generated by manually running configure. These manually
+#              generated files are used by all binaries.
+#   3) yasm_utils -- Object files with memory management and hashing utilities
+#              shared between yasm and the genperf subprogram.
+#   4) genmacro, genmodule, etc. -- One executable target for each subprogram.
+#   5) generate_license, generate_module, etc. -- Actions that invoke programs
+#              built in #4 to generate .c files.
+#   6) compile_gperf, compile_re2c, etc. -- Actions that invoke programs that
+#              turn intermediate files into .c files.
+
+configs_to_delete = []
+configs_to_add = []
+
+if (current_toolchain == host_toolchain) {
+  # Various files referenced by multiple targets. yasm_gen_include_dir was moved
+  # from $target_gen_dir/include to avoid conflicts with x86insn_gas.c and
+  # x86insn_nasm.c. These files were previously generated during the build but
+  # are now shipped pre-generated by yasm.
+  yasm_gen_include_dir = "$target_gen_dir/gen_include"
+  config_makefile = "source/config/Makefile"
+  version_file = "version.mac"
+
+  import("//build/compiled_action.gni")
+
+  config("yasm_config") {
+    include_dirs = [
+      "source/config/$host_os",
+      "source/patched-yasm",
+    ]
+    defines = [ "HAVE_CONFIG_H" ]
+    if (is_posix) {
+      cflags = [ "-std=gnu99" ]
+    }
+  }
+
+  executable("genmacro") {
+    sources = [
+      "source/patched-yasm/tools/genmacro/genmacro.c",
+    ]
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [
+      ":yasm_config",
+      "//build/config/compiler:no_goma_code",
+    ]
+    deps = [
+      "//build/config:exe_and_shlib_deps",
+
+      # Default manifest on Windows (a no-op elsewhere).
+      "//build/win:default_exe_manifest",
+    ]
+  }
+
+  executable("genmodule") {
+    sources = [
+      "source/patched-yasm/libyasm/genmodule.c",
+    ]
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [
+      ":yasm_config",
+      "//build/config/compiler:no_goma_code",
+    ]
+    deps = [
+      "//build/config:exe_and_shlib_deps",
+
+      # Default manifest on Windows (a no-op elsewhere).
+      "//build/win:default_exe_manifest",
+    ]
+  }
+
+  executable("genperf") {
+    sources = [
+      "source/patched-yasm/tools/genperf/genperf.c",
+      "source/patched-yasm/tools/genperf/perfect.c",
+    ]
+
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [
+      ":yasm_config",
+      "//build/config/compiler:no_goma_code",
+    ]
+
+    # Must be compatible with yasm_utils/yasm
+    configs -= configs_to_delete
+    configs += configs_to_add
+
+    deps = [
+      ":yasm_utils",
+      "//build/config:exe_and_shlib_deps",
+
+      # Default manifest on Windows (a no-op elsewhere).
+      "//build/win:default_exe_manifest",
+    ]
+  }
+
+  # Used by both yasm and genperf binaries.
+  static_library("yasm_utils") {
+    sources = [
+      "source/patched-yasm/libyasm/phash.c",
+      "source/patched-yasm/libyasm/xmalloc.c",
+      "source/patched-yasm/libyasm/xstrdup.c",
+    ]
+
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [
+      ":yasm_config",
+      "//build/config/compiler:no_goma_code",
+    ]
+
+    # Must be compatible with yasm
+    configs -= configs_to_delete
+    configs += configs_to_add
+  }
+
+  executable("genstring") {
+    sources = [
+      "source/patched-yasm/genstring.c",
+    ]
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [
+      ":yasm_config",
+      "//build/config/compiler:no_goma_code",
+    ]
+    deps = [
+      "//build/config:exe_and_shlib_deps",
+
+      # Default manifest on Windows (a no-op elsewhere).
+      "//build/win:default_exe_manifest",
+    ]
+  }
+
+  executable("genversion") {
+    sources = [
+      "source/patched-yasm/modules/preprocs/nasm/genversion.c",
+    ]
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [
+      ":yasm_config",
+      "//build/config/compiler:no_goma_code",
+    ]
+    deps = [
+      "//build/config:exe_and_shlib_deps",
+
+      # Default manifest on Windows (a no-op elsewhere).
+      "//build/win:default_exe_manifest",
+    ]
+  }
+
+  config("re2c_warnings") {
+    # re2c is missing CLOSEVOP from one switch.
+    if (is_clang) {
+      cflags = [
+        # re2c is missing CLOSEVOP from one switch.
+        "-Wno-switch",
+
+        # re2c contains many static functions in headers (because it's
+        # a C library predating C99.)
+        "-Wno-unused-function",
+      ]
+    }
+  }
+
+  executable("re2c") {
+    sources = [
+      "source/patched-yasm/tools/re2c/actions.c",
+      "source/patched-yasm/tools/re2c/code.c",
+      "source/patched-yasm/tools/re2c/dfa.c",
+      "source/patched-yasm/tools/re2c/main.c",
+      "source/patched-yasm/tools/re2c/mbo_getopt.c",
+      "source/patched-yasm/tools/re2c/parser.c",
+      "source/patched-yasm/tools/re2c/scanner.c",
+      "source/patched-yasm/tools/re2c/substr.c",
+      "source/patched-yasm/tools/re2c/translate.c",
+    ]
+
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [
+      ":yasm_config",
+      "//build/config/compiler:no_goma_code",
+
+      # Must be after no_goma_code for warning flags to be ordered
+      # correctly.
+      ":re2c_warnings",
+    ]
+    deps = [
+      "//build/config:exe_and_shlib_deps",
+
+      # Default manifest on Windows (a no-op elsewhere).
+      "//build/win:default_exe_manifest",
+    ]
+  }
+
+  config("yasm_warnings") {
+    if (is_clang) {
+      cflags = [
+        # reg3264type in x86expr.c is unused.
+        "-Wno-unused-local-typedef",
+      ]
+    }
+  }
+
+  executable("yasm") {
+    sources = [
+      "source/patched-yasm/frontends/yasm/yasm-options.c",
+      "source/patched-yasm/frontends/yasm/yasm.c",
+      "source/patched-yasm/libyasm/assocdat.c",
+      "source/patched-yasm/libyasm/bc-align.c",
+      "source/patched-yasm/libyasm/bc-data.c",
+      "source/patched-yasm/libyasm/bc-incbin.c",
+      "source/patched-yasm/libyasm/bc-org.c",
+      "source/patched-yasm/libyasm/bc-reserve.c",
+      "source/patched-yasm/libyasm/bitvect.c",
+      "source/patched-yasm/libyasm/bytecode.c",
+      "source/patched-yasm/libyasm/errwarn.c",
+      "source/patched-yasm/libyasm/expr.c",
+      "source/patched-yasm/libyasm/file.c",
+      "source/patched-yasm/libyasm/floatnum.c",
+      "source/patched-yasm/libyasm/hamt.c",
+      "source/patched-yasm/libyasm/insn.c",
+      "source/patched-yasm/libyasm/intnum.c",
+      "source/patched-yasm/libyasm/inttree.c",
+      "source/patched-yasm/libyasm/linemap.c",
+      "source/patched-yasm/libyasm/md5.c",
+      "source/patched-yasm/libyasm/mergesort.c",
+      "source/patched-yasm/libyasm/section.c",
+      "source/patched-yasm/libyasm/strcasecmp.c",
+      "source/patched-yasm/libyasm/strsep.c",
+      "source/patched-yasm/libyasm/symrec.c",
+      "source/patched-yasm/libyasm/valparam.c",
+      "source/patched-yasm/libyasm/value.c",
+      "source/patched-yasm/modules/arch/lc3b/lc3barch.c",
+      "source/patched-yasm/modules/arch/lc3b/lc3bbc.c",
+      "source/patched-yasm/modules/arch/x86/x86arch.c",
+      "source/patched-yasm/modules/arch/x86/x86bc.c",
+      "source/patched-yasm/modules/arch/x86/x86expr.c",
+      "source/patched-yasm/modules/arch/x86/x86id.c",
+      "source/patched-yasm/modules/dbgfmts/codeview/cv-dbgfmt.c",
+      "source/patched-yasm/modules/dbgfmts/codeview/cv-symline.c",
+      "source/patched-yasm/modules/dbgfmts/codeview/cv-type.c",
+      "source/patched-yasm/modules/dbgfmts/dwarf2/dwarf2-aranges.c",
+      "source/patched-yasm/modules/dbgfmts/dwarf2/dwarf2-dbgfmt.c",
+      "source/patched-yasm/modules/dbgfmts/dwarf2/dwarf2-info.c",
+      "source/patched-yasm/modules/dbgfmts/dwarf2/dwarf2-line.c",
+      "source/patched-yasm/modules/dbgfmts/null/null-dbgfmt.c",
+      "source/patched-yasm/modules/dbgfmts/stabs/stabs-dbgfmt.c",
+      "source/patched-yasm/modules/listfmts/nasm/nasm-listfmt.c",
+      "source/patched-yasm/modules/objfmts/bin/bin-objfmt.c",
+      "source/patched-yasm/modules/objfmts/coff/coff-objfmt.c",
+      "source/patched-yasm/modules/objfmts/coff/win64-except.c",
+      "source/patched-yasm/modules/objfmts/dbg/dbg-objfmt.c",
+      "source/patched-yasm/modules/objfmts/elf/elf-objfmt.c",
+      "source/patched-yasm/modules/objfmts/elf/elf-x86-amd64.c",
+      "source/patched-yasm/modules/objfmts/elf/elf-x86-x32.c",
+      "source/patched-yasm/modules/objfmts/elf/elf-x86-x86.c",
+      "source/patched-yasm/modules/objfmts/elf/elf.c",
+      "source/patched-yasm/modules/objfmts/macho/macho-objfmt.c",
+      "source/patched-yasm/modules/objfmts/rdf/rdf-objfmt.c",
+      "source/patched-yasm/modules/objfmts/xdf/xdf-objfmt.c",
+      "source/patched-yasm/modules/parsers/gas/gas-parse-intel.c",
+      "source/patched-yasm/modules/parsers/gas/gas-parse.c",
+      "source/patched-yasm/modules/parsers/gas/gas-parser.c",
+      "source/patched-yasm/modules/parsers/nasm/nasm-parse.c",
+      "source/patched-yasm/modules/parsers/nasm/nasm-parser.c",
+      "source/patched-yasm/modules/preprocs/cpp/cpp-preproc.c",
+      "source/patched-yasm/modules/preprocs/gas/gas-eval.c",
+      "source/patched-yasm/modules/preprocs/gas/gas-preproc.c",
+      "source/patched-yasm/modules/preprocs/nasm/nasm-eval.c",
+      "source/patched-yasm/modules/preprocs/nasm/nasm-pp.c",
+      "source/patched-yasm/modules/preprocs/nasm/nasm-preproc.c",
+      "source/patched-yasm/modules/preprocs/nasm/nasmlib.c",
+      "source/patched-yasm/modules/preprocs/raw/raw-preproc.c",
+
+      # Files generated by compile_gperf
+      "$target_gen_dir/x86cpu.c",
+      "$target_gen_dir/x86regtmod.c",
+
+      # Files generated by compile_re2c
+      "$target_gen_dir/gas-token.c",
+      "$target_gen_dir/nasm-token.c",
+
+      # File generated by compile_re2c_lc3b
+      "$target_gen_dir/lc3bid.c",
+
+      # File generated by generate_module
+      "$target_gen_dir/module.c",
+    ]
+
+    configs -= [ "//build/config/compiler:goma_code" ]
+    configs += [
+      ":yasm_config",
+      "//build/config/compiler:no_goma_code",
+      "//build/config/compiler:no_incompatible_pointer_warnings",
+
+      # Must be after no_goma_code for warning flags to be ordered
+      # correctly.
+      ":yasm_warnings",
+    ]
+
+    # Disable WPO for yasm: crbug.com/604808
+    configs -= configs_to_delete
+    configs += configs_to_add
+
+    # Yasm generates a bunch of .c files which its source file #include. These
+    # are placed in |yasm_gen_include_dir|.
+    include_dirs = [ yasm_gen_include_dir ]
+
+    # TODO: This should take most of the generated output as
+    # inputs.
+    deps = [
+      ":compile_gperf",
+      ":compile_gperf_for_include",
+      ":compile_nasm_macros",
+      ":compile_nasm_version",
+      ":compile_re2c",
+      ":compile_re2c_lc3b",
+      ":compile_win64_gas",
+      ":compile_win64_nasm",
+      ":generate_license",
+      ":generate_module",
+      ":generate_version",
+      ":yasm_utils",
+    ]
+  }
+
+  compiled_action_foreach("compile_gperf") {
+    tool = ":genperf"
+    sources = [
+      "source/patched-yasm/modules/arch/x86/x86cpu.gperf",
+      "source/patched-yasm/modules/arch/x86/x86regtmod.gperf",
+    ]
+
+    outputs = [
+      "$target_gen_dir/{{source_name_part}}.c",
+    ]
+    args = [
+      "{{source}}",
+      rebase_path(target_gen_dir, root_build_dir) + "/{{source_name_part}}.c",
+    ]
+  }
+
+  # This differs from |compile_gperf| in where it places it output files.
+  compiled_action_foreach("compile_gperf_for_include") {
+    tool = ":genperf"
+    sources = [
+      # Make sure the generated gperf files in $target_gen_dir are synced with
+      # the outputs for the related generate_*_insn actions in the
+      # generate_files target below.
+      #
+      # The output for these two are #included by
+      #   source/patched-yasm/modules/arch/x86/x86id.c
+      "source/patched-yasm/x86insn_gas.gperf",
+      "source/patched-yasm/x86insn_nasm.gperf",
+    ]
+
+    outputs = [
+      "$yasm_gen_include_dir/{{source_name_part}}.c",
+    ]
+    args = [
+      "{{source}}",
+      rebase_path(yasm_gen_include_dir, root_build_dir) +
+          "/{{source_name_part}}.c",
+    ]
+  }
+
+  template("compile_macro") {
+    compiled_action(target_name) {
+      tool = ":genmacro"
+
+      # Output #included by source/patched-yasm/frontends/yasm/yasm.c.
+      inputs = invoker.sources
+      outputs = invoker.outputs
+      args = [
+        rebase_path(outputs[0], root_build_dir),
+        invoker.macro_varname,
+        rebase_path(inputs[0], root_build_dir),
+      ]
+      if (defined(invoker.deps)) {
+        deps = invoker.deps
+      }
+    }
+  }
+
+  compile_macro("compile_nasm_macros") {
+    # Output #included by
+    #   source/patched-yasm/modules/preprocs/nasm/nasm-parser.c
+    sources = [
+      "source/patched-yasm/modules/parsers/nasm/nasm-std.mac",
+    ]
+    outputs = [
+      "$yasm_gen_include_dir/nasm-macros.c",
+    ]
+    macro_varname = "nasm_standard_mac"
+  }
+
+  compile_macro("compile_nasm_version") {
+    # Output #included by
+    #   source/patched-yasm/modules/preprocs/nasm/nasm-preproc.c
+    sources = [
+      "$target_gen_dir/$version_file",
+    ]
+    outputs = [
+      "$yasm_gen_include_dir/nasm-version.c",
+    ]
+    macro_varname = "nasm_version_mac"
+    deps = [
+      ":generate_version",
+    ]
+  }
+
+  compile_macro("compile_win64_gas") {
+    # Output #included by source/patched-yasm/frontends/yasm/yasm.c.
+    sources = [
+      "source/patched-yasm/modules/objfmts/coff/win64-gas.mac",
+    ]
+    outputs = [
+      "$yasm_gen_include_dir/win64-gas.c",
+    ]
+    macro_varname = "win64_gas_stdmac"
+  }
+
+  compile_macro("compile_win64_nasm") {
+    # Output #included by source/patched-yasm/frontends/yasm/yasm.c.
+    sources = [
+      "source/patched-yasm/modules/objfmts/coff/win64-nasm.mac",
+    ]
+    outputs = [
+      "$yasm_gen_include_dir/win64-nasm.c",
+    ]
+    macro_varname = "win64_nasm_stdmac"
+  }
+
+  compiled_action_foreach("compile_re2c") {
+    tool = ":re2c"
+    sources = [
+      "source/patched-yasm/modules/parsers/gas/gas-token.re",
+      "source/patched-yasm/modules/parsers/nasm/nasm-token.re",
+    ]
+    outputs = [
+      "$target_gen_dir/{{source_name_part}}.c",
+    ]
+    args = [
+      "-b",
+      "-o",
+      rebase_path(target_gen_dir, root_build_dir) + "/{{source_name_part}}.c",
+      "{{source}}",
+    ]
+  }
+
+  # This call doesn't fit into the re2c template above.
+  compiled_action("compile_re2c_lc3b") {
+    tool = ":re2c"
+    inputs = [
+      "source/patched-yasm/modules/arch/lc3b/lc3bid.re",
+    ]
+    outputs = [
+      "$target_gen_dir/lc3bid.c",
+    ]
+    args = [
+      "-s",
+      "-o",
+      rebase_path(outputs[0], root_build_dir),
+      rebase_path(inputs[0], root_build_dir),
+    ]
+  }
+
+  compiled_action("generate_license") {
+    tool = ":genstring"
+
+    # Output #included by source/patched-yasm/frontends/yasm/yasm.c.
+    inputs = [
+      "source/patched-yasm/COPYING",
+    ]
+    outputs = [
+      "$yasm_gen_include_dir/license.c",
+    ]
+    args = [
+      "license_msg",
+      rebase_path(outputs[0], root_build_dir),
+      rebase_path(inputs[0], root_build_dir),
+    ]
+  }
+
+  compiled_action("generate_module") {
+    tool = ":genmodule"
+    inputs = [
+      "source/patched-yasm/libyasm/module.in",
+      config_makefile,
+    ]
+    outputs = [
+      "$target_gen_dir/module.c",
+    ]
+    args = [
+      rebase_path(inputs[0], root_build_dir),
+      rebase_path(config_makefile, root_build_dir),
+      rebase_path(outputs[0], root_build_dir),
+    ]
+  }
+
+  compiled_action("generate_version") {
+    tool = ":genversion"
+    outputs = [
+      "$target_gen_dir/$version_file",
+    ]
+    args = [ rebase_path(outputs[0], root_build_dir) ]
+  }
+}
diff --git a/third_party/yasm/LICENSE b/third_party/yasm/LICENSE
new file mode 100644
index 0000000..254ada5
--- /dev/null
+++ b/third_party/yasm/LICENSE
@@ -0,0 +1,69 @@
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-------------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. Neither the name of the author nor the names of other contributors
+   may be used to endorse or promote products derived from this
+   software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-------------------------------------------------------------------------------
+NASM is now licensed under the 2-clause BSD license, also known as the
+simplified BSD license.
+    Copyright 1996-2009 the NASM Authors - All rights reserved.
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following
+    conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+      THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+      CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+      INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+      MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+      DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+      CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+      SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+      NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+      LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+      HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+      CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+      OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+      EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/third_party/yasm/README.goma b/third_party/yasm/README.goma
new file mode 100644
index 0000000..96b23c2
--- /dev/null
+++ b/third_party/yasm/README.goma
@@ -0,0 +1,8 @@
+Files under this directory is copied from chromium's src/third_party/yasm,
+and modified for goma.
+
+Modifications for goma:
+- *.gn and *.gni are changed to be used as goma's GN rule.
+  e.g. {,no_}goma_code instead of {,no_}_chromium_code
+       os == "win" instead of is_win.
+- removed unused configs.
diff --git a/third_party/yasm/run_yasm.py b/third_party/yasm/run_yasm.py
new file mode 100644
index 0000000..483a6dd
--- /dev/null
+++ b/third_party/yasm/run_yasm.py
@@ -0,0 +1,52 @@
+# Copied from chromium third_party/yasm/.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""A wrapper to run yasm.
+
+Its main job is to provide a Python wrapper for GN integration, and to write
+the makefile-style output yasm generates in stdout to a .d file for dependency
+management of .inc files.
+
+Run with:
+  python run_yasm.py <yasm_binary_path> <all other yasm args>
+
+Note that <all other yasm args> must include an explicit output file (-o). This
+script will append a ".d" to this and write the dependencies there. This script
+will add "-M" to cause yasm to write the deps to stdout, so you don't need to
+specify that.
+"""
+
+import argparse
+import sys
+import subprocess
+
+# Extract the output file name from the yasm command line so we can generate a
+# .d file with the same base name.
+parser = argparse.ArgumentParser()
+parser.add_argument("-o", dest="objfile")
+options, _ = parser.parse_known_args()
+
+objfile = options.objfile
+depfile = objfile + '.d'
+
+# Assemble.
+result_code = subprocess.call(sys.argv[1:])
+if result_code != 0:
+  sys.exit(result_code)
+
+# Now generate the .d file listing the dependencies. The -M option makes yasm
+# write the Makefile-style dependencies to stdout, but it seems that inhibits
+# generating any compiled output so we need to do this in a separate pass.
+# However, outputting deps seems faster than actually assembling, and yasm is
+# so fast anyway this is not a big deal.
+#
+# This guarantees proper dependency management for assembly files. Otherwise,
+# we would have to require people to manually specify the .inc files they
+# depend on in the build file, which will surely be wrong or out-of-date in
+# some cases.
+deps = subprocess.check_output(sys.argv[1:] + ['-M'])
+with open(depfile, "wb") as f:
+  f.write(deps)
diff --git a/third_party/yasm/source/config/win/config.h b/third_party/yasm/source/config/win/config.h
new file mode 100644
index 0000000..12c081d
--- /dev/null
+++ b/third_party/yasm/source/config/win/config.h
@@ -0,0 +1,173 @@
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Command name to run C preprocessor */
+#define CPP_PROG "cc -E"
+
+/* */
+/* #undef ENABLE_NLS */
+
+/* Define to 1 if you have the `abort' function. */
+#define HAVE_ABORT 1
+
+/* */
+/* #undef HAVE_CATGETS */
+
+/* Define to 1 if you have the Mac OS X function CFLocaleCopyCurrent in the
+   CoreFoundation framework. */
+/* #undef HAVE_CFLOCALECOPYCURRENT */
+
+/* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue i
+   the CoreFoundation framework. */
+/* #undef HAVE_CFPREFERENCESCOPYAPPVALUE */
+
+/* Define if the GNU dcgettext() function is already present or preinstalled.
+   */
+#define HAVE_DCGETTEXT 1
+
+/* Define to 1 if you have the <direct.h> header file. */
+#define HAVE_DIRECT_H 1
+
+/* Define to 1 if you have the `ftruncate' function. */
+/* #undef HAVE_FTRUNCATE */
+
+/* Define to 1 if you have the `getcwd' function. */
+#define HAVE_GETCWD 1
+
+/* */
+#define HAVE_GETTEXT 1
+
+/* Define to 1 if you have the GNU C Library */
+/* #undef HAVE_GNU_C_LIBRARY */
+
+/* Define if you have the iconv() function and it works. */
+/* #undef HAVE_ICONV */
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* */
+/* #undef HAVE_LC_MESSAGES */
+
+/* Define to 1 if you have the <libgen.h> header file. */
+/* #undef HAVE_LIBGEN_H */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the `mergesort' function. */
+/* #undef HAVE_MERGESORT */
+
+/* Define to 1 if you have the `popen' function. */
+/* #undef HAVE_POPEN */
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* */
+/* #undef HAVE_STPCPY */
+
+/* Define to 1 if you have the `strcasecmp' function. */
+/* #undef HAVE_STRCASECMP */
+
+/* Define to 1 if you have the `strcmpi' function. */
+/* #undef HAVE_STRCMPI */
+
+/* Define to 1 if you have the `stricmp' function. */
+/* #undef HAVE_STRICMP */
+
+/* Define to 1 if you have the <strings.h> header file. */
+/* #undef HAVE_STRINGS_H */
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the `strncasecmp' function. */
+#define HAVE_STRNCASECMP 1
+
+/* Define to 1 if you have the `strsep' function. */
+/* #undef HAVE_STRSEP */
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the `toascii' function. */
+#define HAVE_TOASCII 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+/* #undef HAVE_UNISTD_H */
+
+/* Define to 1 if you have the `vsnprintf' function. */
+#define HAVE_VSNPRINTF 1
+
+/* Define to 1 if you have the `_stricmp' function. */
+/* #undef HAVE__STRICMP */
+
+/* Name of package */
+#define PACKAGE "yasm"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "bug-yasm@tortall.net"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "yasm"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "yasm 1.3.0"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "yasm"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.3.0"
+
+/* Define to 1 if the C compiler supports function prototypes. */
+#define PROTOTYPES 1
+
+/* The size of `char', as computed by sizeof. */
+/* #undef SIZEOF_CHAR */
+
+/* The size of `int', as computed by sizeof. */
+/* #undef SIZEOF_INT */
+
+/* The size of `long', as computed by sizeof. */
+/* #undef SIZEOF_LONG */
+
+/* The size of `short', as computed by sizeof. */
+/* #undef SIZEOF_SHORT */
+
+/* The size of `void*', as computed by sizeof. */
+/* #undef SIZEOF_VOIDP */
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Version number of package */
+#define VERSION "1.3.0"
+
+/* Define if using the dmalloc debugging malloc package */
+/* #undef WITH_DMALLOC */
+
+/* Define like PROTOTYPES; this can be used by system headers. */
+#define __PROTOTYPES 1
+
+/* Define to empty if `const' does not conform to ANSI C. */
+/* #undef const */
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+/* #undef inline */
+#endif
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+/* #undef size_t */
diff --git a/third_party/yasm/source/config/win/libyasm-stdint.h b/third_party/yasm/source/config/win/libyasm-stdint.h
new file mode 100644
index 0000000..b9ce696
--- /dev/null
+++ b/third_party/yasm/source/config/win/libyasm-stdint.h
@@ -0,0 +1,9 @@
+#ifndef _YASM_LIBYASM_STDINT_H
+#define _YASM_LIBYASM_STDINT_H 1
+#ifndef _GENERATED_STDINT_H
+#define _GENERATED_STDINT_H "yasm 1.3.0"
+/* generated using gcc -std=gnu99 */
+#define _STDINT_HAVE_STDINT_H 1
+#include <stdint.h>
+#endif
+#endif
diff --git a/third_party/yasm/yasm_assemble.gni b/third_party/yasm/yasm_assemble.gni
new file mode 100644
index 0000000..f164d07
--- /dev/null
+++ b/third_party/yasm/yasm_assemble.gni
@@ -0,0 +1,198 @@
+# Copied from chromium third_party/yasm/, and modified for goma.
+#
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This provides the yasm_assemble() template which uses YASM to assemble
+# assembly files.
+#
+# Files to be assembled with YASM should have an extension of .asm.
+#
+# Parameters
+#
+#   yasm_flags (optional)
+#       [list of strings] Pass additional flags into YASM. These are appended
+#       to the command line. Note that the target machine type and system is
+#       already set up based on the current toolchain so you don't need to
+#       specify these things (see below).
+#
+#       Example: yasm_flags = [ "--force-strict" ]
+#
+#   include_dirs (optional)
+#       [list of dir names] List of additional include dirs. Note that the
+#       source root and the root generated file dir is always added, just like
+#       our C++ build sets up.
+#
+#       Example: include_dirs = [ "//some/other/path", target_gen_dir ]
+#
+#   defines (optional)
+#       [list of strings] List of defines, as with the native code defines.
+#
+#       Example: defines = [ "FOO", "BAR=1" ]
+#
+#   inputs, deps, visibility  (optional)
+#       These have the same meaning as in an action.
+#
+# Example
+#
+#   yasm_assemble("my_yasm_target") {
+#     sources = [
+#       "ultra_optimized_awesome.asm",
+#     ]
+#     include_dirs = [ "assembly_include" ]
+#   }
+
+if (os == "mac") {
+  if (cpu_arch == "x86") {
+    _yasm_flags = [
+      "-fmacho32",
+      "-m",
+      "x86",
+    ]
+  } else if (cpu_arch == "x64") {
+    _yasm_flags = [
+      "-fmacho64",
+      "-m",
+      "amd64",
+    ]
+  }
+} else if (is_posix) {
+  if (cpu_arch == "x86") {
+    _yasm_flags = [
+      "-felf32",
+      "-m",
+      "x86",
+    ]
+  } else if (cpu_arch == "x64") {
+    _yasm_flags = [
+      "-DPIC",
+      "-felf64",
+      "-m",
+      "amd64",
+    ]
+  }
+} else if (os == "win") {
+  if (cpu_arch == "x86") {
+    _yasm_flags = [
+      "-DPREFIX",
+      "-fwin32",
+      "-m",
+      "x86",
+    ]
+  } else if (cpu_arch == "x64") {
+    _yasm_flags = [
+      "-fwin64",
+      "-m",
+      "amd64",
+    ]
+  }
+}
+
+if (os == "win") {
+  asm_obj_extension = "obj"
+} else {
+  asm_obj_extension = "o"
+}
+
+template("yasm_assemble") {
+  assert(defined(invoker.sources), "Need sources defined for $target_name")
+
+  # Only depend on YASM on x86 systems. Force compilation of .asm files for
+  # ARM to fail.
+  assert(cpu_arch == "x86" || cpu_arch == "x64")
+
+  action_name = "${target_name}_action"
+  source_set_name = target_name
+
+  action_foreach(action_name) {
+    # Only the source set can depend on this.
+    visibility = [ ":$source_set_name" ]
+
+    script = "//third_party/yasm/run_yasm.py"
+    sources = invoker.sources
+
+    if (defined(invoker.inputs)) {
+      inputs = invoker.inputs
+    }
+
+    # Executable (first in the args). The binary might be in the root build dir
+    # (no cross-compiling) or in a toolchain-specific subdirectory of that
+    # (when cross-compiling).
+    yasm_label = "//third_party/yasm($host_toolchain)"
+    args = [ "./" +  # Force current dir.
+             rebase_path(get_label_info(yasm_label, "root_out_dir") + "/yasm",
+                         root_build_dir) ]
+
+    # Deps.
+    deps = [
+      yasm_label,
+    ]
+    if (defined(invoker.deps)) {
+      deps += invoker.deps
+    }
+
+    # Flags.
+    args += _yasm_flags
+    if (defined(invoker.yasm_flags)) {
+      args += invoker.yasm_flags
+    }
+
+    # User defined include dirs go first.
+    if (defined(invoker.include_dirs)) {
+      foreach(include, invoker.include_dirs) {
+        args += [ "-I" + rebase_path(include, root_build_dir) ]
+      }
+    }
+
+    # Default yasm include dirs. Make it match the native build (source root and
+    # root generated code directory).
+    # This goes to the end of include list.
+    args += [
+      "-I.",
+
+      # Using "//." will produce a relative path "../.." which looks better than
+      # "../../" which will result from using "//" as the base (although both
+      # work). This is because rebase_path will terminate the result in a
+      # slash if the input ends in a slash.
+      "-I" + rebase_path("//.", root_build_dir),
+      "-I" + rebase_path(root_gen_dir, root_build_dir),
+    ]
+
+    # Extra defines.
+    if (defined(invoker.defines)) {
+      foreach(def, invoker.defines) {
+        args += [ "-D$def" ]
+      }
+    }
+
+    # Output file.
+    outputs = [
+      "$target_out_dir/$source_set_name/{{source_name_part}}.o",
+    ]
+    args += [
+      "-o",
+      rebase_path(outputs[0], root_build_dir),
+      "{{source}}",
+    ]
+
+    # The wrapper script run_yasm will write the depfile to the same name as
+    # the output but with .d appended (like gcc will).
+    depfile = outputs[0] + ".d"
+  }
+
+  # Gather the .o files into a linkable thing. This doesn't actually link
+  # anything (a source set just compiles files to link later), but will pass
+  # the object files generated by the action up the dependency chain.
+  static_library(source_set_name) {
+    if (defined(invoker.visibility)) {
+      visibility = invoker.visibility
+    }
+
+    sources = get_target_outputs(":$action_name")
+
+    deps = [
+      ":$action_name",
+    ]
+  }
+}
diff --git a/tools/.gitignore b/tools/.gitignore
new file mode 100644
index 0000000..1902679
--- /dev/null
+++ b/tools/.gitignore
@@ -0,0 +1,3 @@
+clang
+gyp
+