C++20 fix: reduce template arg to operator!= to ensure better match

PiperOrigin-RevId: 452544130
diff --git a/hwy/ops/arm_neon-inl.h b/hwy/ops/arm_neon-inl.h
index 82e91db..89e23da 100644
--- a/hwy/ops/arm_neon-inl.h
+++ b/hwy/ops/arm_neon-inl.h
@@ -1413,10 +1413,10 @@
                             prefix##infix##suffix, v.raw, HWY_MAX(1, kBits))); \
   }
 
-HWY_NEON_DEF_FUNCTION_INTS_UINTS(ShiftLeft, vshl, _n_, HWY_SHIFT)
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(ShiftLeft, vshl, _n_, ignored)
 
-HWY_NEON_DEF_FUNCTION_UINTS(ShiftRight, vshr, _n_, HWY_SHIFT)
-HWY_NEON_DEF_FUNCTION_INTS(ShiftRight, vshr, _n_, HWY_SHIFT)
+HWY_NEON_DEF_FUNCTION_UINTS(ShiftRight, vshr, _n_, ignored)
+HWY_NEON_DEF_FUNCTION_INTS(ShiftRight, vshr, _n_, ignored)
 
 #pragma pop_macro("HWY_NEON_DEF_FUNCTION")
 
@@ -2308,12 +2308,6 @@
 HWY_NEON_DEF_FUNCTION_UINT_8_16_32(operator==, vceq, _, HWY_COMPARE)
 #endif
 
-// ------------------------------ Inequality
-template <typename T, size_t N>
-HWY_API Mask128<T, N> operator!=(const Vec128<T, N> a, const Vec128<T, N> b) {
-  return Not(a == b);
-}
-
 // ------------------------------ Strict inequality (signed, float)
 #if HWY_ARCH_ARM_A64
 HWY_NEON_DEF_FUNCTION_INTS_UINTS(operator<, vclt, _, HWY_COMPARE)
@@ -2377,6 +2371,24 @@
 
 #endif
 
+// ------------------------------ operator!= (operator==)
+
+// Customize HWY_NEON_DEF_FUNCTION to call 2 functions.
+#pragma push_macro("HWY_NEON_DEF_FUNCTION")
+#undef HWY_NEON_DEF_FUNCTION
+// This cannot have _any_ template argument (in x86_128 we can at least have N
+// as an argument), otherwise it is not more specialized than rewritten
+// operator== in C++20, leading to compile errors.
+#define HWY_NEON_DEF_FUNCTION(type, size, name, prefix, infix, suffix, args) \
+  HWY_API Mask128<type##_t, size> name(Vec128<type##_t, size> a,             \
+                                       Vec128<type##_t, size> b) {           \
+    return Not(a == b);                                                      \
+  }
+
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(operator!=, ignored, ignored, ignored)
+
+#pragma pop_macro("HWY_NEON_DEF_FUNCTION")
+
 // ------------------------------ Reversed comparisons
 
 template <typename T, size_t N>
diff --git a/hwy/ops/x86_128-inl.h b/hwy/ops/x86_128-inl.h
index 2911a3a..fb2a188 100644
--- a/hwy/ops/x86_128-inl.h
+++ b/hwy/ops/x86_128-inl.h
@@ -1688,8 +1688,47 @@
 
 // ------------------------------ Inequality
 
-template <typename T, size_t N, HWY_IF_NOT_FLOAT(T)>
-HWY_API Mask128<T, N> operator!=(const Vec128<T, N> a, const Vec128<T, N> b) {
+// This cannot have T as a template argument, otherwise it is not more 
+// specialized than rewritten operator== in C++20, leading to compile
+// errors: https://gcc.godbolt.org/z/xsrPhPvPT.
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator!=(Vec128<uint8_t, N> a,
+                                       Vec128<uint8_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator!=(Vec128<uint16_t, N> a,
+                                       Vec128<uint16_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator!=(Vec128<uint32_t, N> a,
+                                       Vec128<uint32_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator!=(Vec128<uint64_t, N> a,
+                                       Vec128<uint64_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator!=(Vec128<int8_t, N> a,
+                                      Vec128<int8_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator!=(Vec128<int16_t, N> a,
+                                       Vec128<int16_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator!=(Vec128<int32_t, N> a,
+                                       Vec128<int32_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator!=(Vec128<int64_t, N> a,
+                                       Vec128<int64_t, N> b) {
   return Not(a == b);
 }