Cherry-pick Arm CLZ fixes from upstream

Cherry-pick two patches from upstream that fix the Neon intrinsics
Huffman encoding path and reduce the memory footprint on Windows on
Arm:
https://github.com/libjpeg-turbo/libjpeg-turbo/commit/d2c407995992be1f128704ae2479adfd7906c158
https://github.com/libjpeg-turbo/libjpeg-turbo/commit/74e6ea45e3547ae85cd43efcdfc24a6907a2154e

Re-enable the Neon intrinsics Huffman encoding path for WoA compiled
with clang-cl.

Bug: 1160249
Change-Id: I0849ca54b8f4f8f38c9b293ea48c9de1c60be86f
diff --git a/README.chromium b/README.chromium
index b0a1623..e8b0bc6 100644
--- a/README.chromium
+++ b/README.chromium
@@ -14,11 +14,12 @@
 * An OWNERS file
 * A codereview.settings file
 * Patched header files used by Chromium
-* Cherry-picked three additional patches from upstream master to fix bugs found
-  by fuzzers:
+* Additional patches cherry-picked from upstream master to fix various bugs:
   https://github.com/libjpeg-turbo/libjpeg-turbo/commit/ccaba5d7894ecfb5a8f11e48d3f86e1f14d5a469
   https://github.com/libjpeg-turbo/libjpeg-turbo/commit/c7ca521bc85b57d41d3ad4963c13fc0100481084
   https://github.com/libjpeg-turbo/libjpeg-turbo/commit/110d8d6dcafaed517e8f77a6253169535ee3a20e
+  https://github.com/libjpeg-turbo/libjpeg-turbo/commit/d2c407995992be1f128704ae2479adfd7906c158
+  https://github.com/libjpeg-turbo/libjpeg-turbo/commit/74e6ea45e3547ae85cd43efcdfc24a6907a2154e
 * Deleted unused directories: ci, cmakescripts, doc, java, release, sharedlib,
   simd/loongson, simd/mips, simd/powerpc, and win
 * Deleted unused files: appveyor.yml, CMakeLists.txt, doxygen.config,
@@ -73,8 +74,6 @@
   - Refactor djpeg.c to provide test interface
   A new gtest directory contains GTest wrappers (and associated utilities) for
   each of tjunittest, tjbench, cjpeg, djpeg and jpegtran.
-* Disable Neon SIMD path for Huffman encoding when compiling for Windows on Arm
-  using Clang-cl: http://crbug.com/1160249
 
 Refer to working-with-nested-repos [1] for details of how to setup your git
 svn client to update the code (for making local changes, cherry picking from
diff --git a/jchuff.c b/jchuff.c
index 8ea48b8..e2d5772 100644
--- a/jchuff.c
+++ b/jchuff.c
@@ -44,15 +44,19 @@
  * flags (this defines __thumb__).
  */
 
-/* NOTE: Both GCC and Clang define __GNUC__ */
-#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
+#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
+    defined(_M_ARM64)
 #if !defined(__thumb__) || defined(__thumb2__)
 #define USE_CLZ_INTRINSIC
 #endif
 #endif
 
 #ifdef USE_CLZ_INTRINSIC
+#if defined(_MSC_VER) && !defined(__clang__)
+#define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
+#else
 #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
+#endif
 #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
 #else
 #include "jpeg_nbits_table.h"
diff --git a/jcphuff.c b/jcphuff.c
index a8b94be..373d71f 100644
--- a/jcphuff.c
+++ b/jcphuff.c
@@ -6,6 +6,7 @@
  * libjpeg-turbo Modifications:
  * Copyright (C) 2011, 2015, 2018, D. R. Commander.
  * Copyright (C) 2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2020, Arm Limited.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -51,15 +52,19 @@
  * flags (this defines __thumb__).
  */
 
-/* NOTE: Both GCC and Clang define __GNUC__ */
-#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
+#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
+    defined(_M_ARM64)
 #if !defined(__thumb__) || defined(__thumb2__)
 #define USE_CLZ_INTRINSIC
 #endif
 #endif
 
 #ifdef USE_CLZ_INTRINSIC
+#if defined(_MSC_VER) && !defined(__clang__)
+#define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
+#else
 #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
+#endif
 #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
 #else
 #include "jpeg_nbits_table.h"
diff --git a/simd/arm/aarch64/jchuff-neon.c b/simd/arm/aarch64/jchuff-neon.c
index a0a57a6..f13fd1b 100644
--- a/simd/arm/aarch64/jchuff-neon.c
+++ b/simd/arm/aarch64/jchuff-neon.c
@@ -1,7 +1,7 @@
 /*
  * jchuff-neon.c - Huffman entropy encoding (64-bit Arm Neon)
  *
- * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
  * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
@@ -331,7 +331,7 @@
     vst1q_u16(block_diff + 7 * DCTSIZE, row7_diff);
 
     while (bitmap != 0) {
-      r = BUILTIN_CLZL(bitmap);
+      r = BUILTIN_CLZLL(bitmap);
       i += r;
       bitmap <<= r;
       nbits = block_nbits[i];
@@ -370,7 +370,7 @@
 
     /* Same as above but must mask diff bits and compute nbits on demand. */
     while (bitmap != 0) {
-      r = BUILTIN_CLZL(bitmap);
+      r = BUILTIN_CLZLL(bitmap);
       i += r;
       bitmap <<= r;
       lz = BUILTIN_CLZ(block_abs[i]);
diff --git a/simd/arm/aarch64/jsimd.c b/simd/arm/aarch64/jsimd.c
index 4991bc0..8570b82 100644
--- a/simd/arm/aarch64/jsimd.c
+++ b/simd/arm/aarch64/jsimd.c
@@ -977,8 +977,6 @@
 GLOBAL(int)
 jsimd_can_huff_encode_one_block(void)
 {
-/* Disable for Windows on Arm compiled with Clang-cl: crbug.com/1160249 */
-#if !(defined(_MSC_VER) && defined(__clang__))
   init_simd();
 
   if (DCTSIZE != 8)
@@ -988,7 +986,6 @@
 
   if (simd_support & JSIMD_NEON && simd_huffman)
     return 1;
-#endif
 
   return 0;
 }
diff --git a/simd/arm/jcphuff-neon.c b/simd/arm/jcphuff-neon.c
index 8b6d53b..86a263f 100644
--- a/simd/arm/jcphuff-neon.c
+++ b/simd/arm/jcphuff-neon.c
@@ -1,7 +1,7 @@
 /*
  * jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon)
  *
- * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -572,7 +572,7 @@
     /* EOB position is defined to be 0 if all coefficients != 1. */
     return 0;
   } else {
-    return 63 - BUILTIN_CLZL(bitmap);
+    return 63 - BUILTIN_CLZLL(bitmap);
   }
 #else
   /* Move bitmap to two 32-bit scalar registers. */
diff --git a/simd/arm/neon-compat.h b/simd/arm/neon-compat.h
index 3ce3bcb..543d860 100644
--- a/simd/arm/neon-compat.h
+++ b/simd/arm/neon-compat.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
- * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -28,10 +28,10 @@
 /* Define compiler-independent count-leading-zeros macros */
 #if defined(_MSC_VER) && !defined(__clang__)
 #define BUILTIN_CLZ(x)  _CountLeadingZeros(x)
-#define BUILTIN_CLZL(x)  _CountLeadingZeros64(x)
+#define BUILTIN_CLZLL(x)  _CountLeadingZeros64(x)
 #elif defined(__clang__) || defined(__GNUC__)
 #define BUILTIN_CLZ(x)  __builtin_clz(x)
-#define BUILTIN_CLZL(x)  __builtin_clzl(x)
+#define BUILTIN_CLZLL(x)  __builtin_clzll(x)
 #else
 #error "Unknown compiler"
 #endif
diff --git a/simd/arm/neon-compat.h.in b/simd/arm/neon-compat.h.in
index e2347b9..23d6d28 100644
--- a/simd/arm/neon-compat.h.in
+++ b/simd/arm/neon-compat.h.in
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
- * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -26,10 +26,10 @@
 /* Define compiler-independent count-leading-zeros macros */
 #if defined(_MSC_VER) && !defined(__clang__)
 #define BUILTIN_CLZ(x)  _CountLeadingZeros(x)
-#define BUILTIN_CLZL(x)  _CountLeadingZeros64(x)
+#define BUILTIN_CLZLL(x)  _CountLeadingZeros64(x)
 #elif defined(__clang__) || defined(__GNUC__)
 #define BUILTIN_CLZ(x)  __builtin_clz(x)
-#define BUILTIN_CLZL(x)  __builtin_clzl(x)
+#define BUILTIN_CLZLL(x)  __builtin_clzll(x)
 #else
 #error "Unknown compiler"
 #endif