loopfilter_sse2: call unsuffixed lpf functions this allows calls to use better versions (e.g., avx2) if available. in most other cases the function pointer will be defined to the sse2 variant if another isn't available. this improves performance at 1080P by ~2% on a Xeon E5-2690. Change-Id: Ie9da3a567021f8416651a29b8c9ab9238dc4bdf1

commit: 36133b04c0d3f82b16902de2ed57fe58d7c30990 [log] [tgz]
author: James Zern <jzern@google.com> Tue Feb 04 00:57:58 2020
committer: James Zern <jzern@google.com> Tue Feb 04 01:00:01 2020
tree: ab416e9336fe045f7ba785b5976a7e53b9c547eb
parent: 5be37810d26d224f1ec75ff688d21aeadb863501 [diff]
diff --git a/vpx_dsp/x86/loopfilter_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c
index f90522c..b6ff248 100644
--- a/vpx_dsp/x86/loopfilter_sse2.c
+++ b/vpx_dsp/x86/loopfilter_sse2.c

@@ -1674,8 +1674,8 @@
   transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16);
 
   // Loop filtering
-  vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
-                                 blimit1, limit1, thresh1);
+  vpx_lpf_horizontal_4_dual(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+                            blimit1, limit1, thresh1);
   src[0] = t_dst;
   src[1] = t_dst + 8;
   dst[0] = s - 4;
@@ -1700,7 +1700,7 @@
   transpose(src, pitch, dst, 8, 1);
 
   // Loop filtering
-  vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh);
+  vpx_lpf_horizontal_8(t_dst + 4 * 8, 8, blimit, limit, thresh);
 
   src[0] = t_dst;
   dst[0] = s - 4;
@@ -1721,8 +1721,8 @@
   transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16);
 
   // Loop filtering
-  vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
-                                 blimit1, limit1, thresh1);
+  vpx_lpf_horizontal_8_dual(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+                            blimit1, limit1, thresh1);
   src[0] = t_dst;
   src[1] = t_dst + 8;
 
@@ -1750,7 +1750,7 @@
   transpose(src, pitch, dst, 8, 2);
 
   // Loop filtering
-  vpx_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);
+  vpx_lpf_horizontal_16(t_dst + 8 * 8, 8, blimit, limit, thresh);
 
   src[0] = t_dst;
   src[1] = t_dst + 8 * 8;
@@ -1771,7 +1771,7 @@
   transpose8x16(s, s + 8 * pitch, pitch, t_dst + 8 * 16, 16);
 
   // Loop filtering
-  vpx_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);
+  vpx_lpf_horizontal_16_dual(t_dst + 8 * 16, 16, blimit, limit, thresh);
 
   // Transpose back
   transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, pitch);
commit	36133b04c0d3f82b16902de2ed57fe58d7c30990	[log] [tgz]
author	James Zern <jzern@google.com>	Tue Feb 04 00:57:58 2020
committer	James Zern <jzern@google.com>	Tue Feb 04 01:00:01 2020
tree	ab416e9336fe045f7ba785b5976a7e53b9c547eb
parent	5be37810d26d224f1ec75ff688d21aeadb863501 [diff]