Re-enable x86 assembly without requiring -msse2

clang does not require -msse2 or -msse for inline, except
the "x" parameter.  So change this to "m" for 32 bit.  64 bit
requires sse2 so use "x" for 64 bit.

gcc requires -msse for xmm registers in clobber list.
Reduce compiler requirement from -msse2 to -msse for enabling
assembly.

Bug: libyuv:754, libyuv:757
Test: CC=clang CXX=clang++ CFLAGS="-m32" CXXFLAGS="-m32 -mno-sse -O2" make -f linux.mk
Change-Id: I86df72cfee80b7d349561c1fd7c97ad360767255
Reviewed-on: https://chromium-review.googlesource.com/759303
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Reviewed-by: Frank Barchard <fbarchard@google.com>
Commit-Queue: Frank Barchard <fbarchard@google.com>
diff --git a/include/libyuv/compare_row.h b/include/libyuv/compare_row.h
index 76ed8ab..2e5ebe5 100644
--- a/include/libyuv/compare_row.h
+++ b/include/libyuv/compare_row.h
@@ -19,7 +19,7 @@
 #endif
 
 #if defined(__pnacl__) || defined(__CLR_VER) || \
-    (defined(__i386__) && !defined(__SSE2__))
+    (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
 #define LIBYUV_DISABLE_X86
 #endif
 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h
index d97965c..c91501a 100644
--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -746,7 +746,7 @@
                     int interpolation);
 
 #if defined(__pnacl__) || defined(__CLR_VER) || \
-    (defined(__i386__) && !defined(__SSE2__))
+    (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
 #define LIBYUV_DISABLE_X86
 #endif
 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h
index 60ac55e..973fc15 100644
--- a/include/libyuv/rotate_row.h
+++ b/include/libyuv/rotate_row.h
@@ -19,7 +19,7 @@
 #endif
 
 #if defined(__pnacl__) || defined(__CLR_VER) || \
-    (defined(__i386__) && !defined(__SSE2__))
+    (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
 #define LIBYUV_DISABLE_X86
 #endif
 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 9a9b958..7332bdf 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -31,7 +31,7 @@
   var = 0
 
 #if defined(__pnacl__) || defined(__CLR_VER) || \
-    (defined(__i386__) && !defined(__SSE2__))
+    (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
 #define LIBYUV_DISABLE_X86
 #endif
 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
@@ -279,7 +279,7 @@
     (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
 #define HAS_MERGEUV10ROW_AVX2
 #endif
-    
+
 // The following are available on Neon platforms:
 #if !defined(LIBYUV_DISABLE_NEON) && \
     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index 534898c..c4a66aa 100644
--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -20,7 +20,7 @@
 #endif
 
 #if defined(__pnacl__) || defined(__CLR_VER) || \
-    (defined(__i386__) && !defined(__SSE2__))
+    (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
 #define LIBYUV_DISABLE_X86
 #endif
 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
diff --git a/linux.mk b/linux.mk
index 1dd527c..7e9aa5e 100644
--- a/linux.mk
+++ b/linux.mk
@@ -80,4 +80,4 @@
 	$(CC) $(CFLAGS) -o $@ util/cpuid.c libyuv.a
 
 clean:
-	/bin/rm -f source/*.o *.ii *.s libyuv.a convert cpuid psnr
+	/bin/rm -f source/*.o *.ii *.s libyuv.a yuvconvert cpuid psnr
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index 3af3204..c2874e1 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -5687,6 +5687,7 @@
 #ifdef HAS_HALFFLOATROW_SSE2
 static float kScaleBias = 1.9259299444e-34f;
 void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
+  scale *= kScaleBias;
   asm volatile (
     "pshufd      $0x0,%3,%%xmm4                \n"
     "pxor        %%xmm5,%%xmm5                 \n"
@@ -5713,7 +5714,11 @@
   : "+r"(src),    // %0
     "+r"(dst),    // %1
     "+r"(width)   // %2
-  : "x"(scale * kScaleBias)   // %3
+#if defined(__x86_64__)
+  : "x"(scale)   // %3
+#else
+  : "m"(scale)   // %3
+#endif
   : "memory", "cc",
     "xmm2", "xmm3", "xmm4", "xmm5"
   );
@@ -5722,6 +5727,7 @@
 
 #ifdef HAS_HALFFLOATROW_AVX2
 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
+  scale *= kScaleBias;
   asm volatile (
     "vbroadcastss  %3, %%ymm4                  \n"
     "vpxor      %%ymm5,%%ymm5,%%ymm5           \n"
@@ -5749,7 +5755,11 @@
   : "+r"(src),    // %0
     "+r"(dst),    // %1
     "+r"(width)   // %2
-  : "x"(scale * kScaleBias)   // %3
+#if defined(__x86_64__)
+  : "x"(scale)   // %3
+#else
+  : "m"(scale)   // %3
+#endif
   : "memory", "cc",
     "xmm2", "xmm3", "xmm4", "xmm5"
   );
@@ -5782,7 +5792,11 @@
   : "+r"(src),   // %0
     "+r"(dst),   // %1
     "+r"(width)  // %2
+#if defined(__x86_64__)
   : "x"(scale)   // %3
+#else
+  : "m"(scale)   // %3
+#endif
   : "memory", "cc",
     "xmm2", "xmm3", "xmm4"
   );