Fix MergeAR64Plane on odd width
R=fbarchard@chromium.org
Bug: libyuv:898
Change-Id: I031e008ea91baba1c7598efa0eda70750cbfce85
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2810066
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 3ff5dfa..adc4eef 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -985,6 +985,139 @@
}
}
+LIBYUV_NOINLINE
+void SplitARGBPlaneAlpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_r,
+ int dst_stride_r,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height) {
+ int y;
+ void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
+ uint8_t* dst_b, uint8_t* dst_a, int width) =
+ SplitARGBRow_C;
+
+ if (src_stride_argb == width * 4 && dst_stride_r == width &&
+ dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b =
+ dst_stride_a = 0;
+ }
+
+#if defined(HAS_SPLITARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SplitARGBRow = SplitARGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ SplitARGBRow = SplitARGBRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ SplitARGBRow = SplitARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ SplitARGBRow = SplitARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_SPLITARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ SplitARGBRow = SplitARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ SplitARGBRow = SplitARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SplitARGBRow = SplitARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SplitARGBRow = SplitARGBRow_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
+ dst_r += dst_stride_r;
+ dst_g += dst_stride_g;
+ dst_b += dst_stride_b;
+ dst_a += dst_stride_a;
+ src_argb += src_stride_argb;
+ }
+}
+
+LIBYUV_NOINLINE
+void SplitARGBPlaneOpaque(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_r,
+ int dst_stride_r,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
+ int y;
+ void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
+ uint8_t* dst_b, int width) = SplitXRGBRow_C;
+
+ if (src_stride_argb == width * 4 && dst_stride_r == width &&
+ dst_stride_g == width && dst_stride_b == width) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
+ }
+
+#if defined(HAS_SPLITXRGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SplitXRGBRow = SplitXRGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ SplitXRGBRow = SplitXRGBRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITXRGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ SplitXRGBRow = SplitXRGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ SplitXRGBRow = SplitXRGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_SPLITXRGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ SplitXRGBRow = SplitXRGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ SplitXRGBRow = SplitXRGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITXRGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SplitXRGBRow = SplitXRGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SplitXRGBRow = SplitXRGBRow_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
+ dst_r += dst_stride_r;
+ dst_g += dst_stride_g;
+ dst_b += dst_stride_b;
+ src_argb += src_stride_argb;
+ }
+}
+
LIBYUV_API
void SplitARGBPlane(const uint8_t* src_argb,
int src_stride_argb,
@@ -998,138 +1131,142 @@
int dst_stride_a,
int width,
int height) {
- int y;
- void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
- uint8_t* dst_b, uint8_t* dst_a, int width) =
- SplitARGBRow_C;
- void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
- uint8_t* dst_b, int width) = SplitXRGBRow_C;
+ if (height < 0) {
+ height = -height;
+ dst_r = dst_r + (height - 1) * dst_stride_r;
+ dst_g = dst_g + (height - 1) * dst_stride_g;
+ dst_b = dst_b + (height - 1) * dst_stride_b;
+ dst_a = dst_a + (height - 1) * dst_stride_a;
+ dst_stride_r = -dst_stride_r;
+ dst_stride_g = -dst_stride_g;
+ dst_stride_b = -dst_stride_b;
+ dst_stride_a = -dst_stride_a;
+ }
if (dst_a == NULL) {
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_r = dst_r + (height - 1) * dst_stride_r;
- dst_g = dst_g + (height - 1) * dst_stride_g;
- dst_b = dst_b + (height - 1) * dst_stride_b;
- dst_stride_r = -dst_stride_r;
- dst_stride_g = -dst_stride_g;
- dst_stride_b = -dst_stride_b;
- }
-
- // Coalesce rows.
- if (src_stride_argb == width * 4 && dst_stride_r == width &&
- dst_stride_g == width && dst_stride_b == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b =
- dst_stride_a = 0;
- }
-
-#if defined(HAS_SPLITXRGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SplitXRGBRow = SplitXRGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- SplitXRGBRow = SplitXRGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_SPLITXRGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- SplitXRGBRow = SplitXRGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- SplitXRGBRow = SplitXRGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_SPLITXRGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- SplitXRGBRow = SplitXRGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- SplitXRGBRow = SplitXRGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_SPLITXRGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SplitXRGBRow = SplitXRGBRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- SplitXRGBRow = SplitXRGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
- dst_r += dst_stride_r;
- dst_g += dst_stride_g;
- dst_b += dst_stride_b;
- src_argb += src_stride_argb;
- }
+ SplitARGBPlaneOpaque(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
+ dst_stride_g, dst_b, dst_stride_b, width, height);
} else {
- if (height < 0) {
- height = -height;
- dst_r = dst_r + (height - 1) * dst_stride_r;
- dst_g = dst_g + (height - 1) * dst_stride_g;
- dst_b = dst_b + (height - 1) * dst_stride_b;
- dst_a = dst_a + (height - 1) * dst_stride_a;
- dst_stride_r = -dst_stride_r;
- dst_stride_g = -dst_stride_g;
- dst_stride_b = -dst_stride_b;
- dst_stride_a = -dst_stride_a;
- }
+ SplitARGBPlaneAlpha(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
+ dst_stride_g, dst_b, dst_stride_b, dst_a, dst_stride_a,
+ width, height);
+ }
+}
- if (src_stride_argb == width * 4 && dst_stride_r == width &&
- dst_stride_g == width && dst_stride_b == width &&
- dst_stride_a == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b =
- dst_stride_a = 0;
- }
+LIBYUV_NOINLINE
+void MergeARGBPlaneAlpha(const uint8_t* src_r,
+ int src_stride_r,
+ const uint8_t* src_g,
+ int src_stride_g,
+ const uint8_t* src_b,
+ int src_stride_b,
+ const uint8_t* src_a,
+ int src_stride_a,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
+ const uint8_t* src_b, const uint8_t* src_a,
+ uint8_t* dst_argb, int width) = MergeARGBRow_C;
-#if defined(HAS_SPLITARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SplitARGBRow = SplitARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- SplitARGBRow = SplitARGBRow_SSE2;
- }
+ if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
+ src_stride_a == width && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_r = src_stride_g = src_stride_b = src_stride_a =
+ dst_stride_argb = 0;
+ }
+#if defined(HAS_MERGEARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ MergeARGBRow = MergeARGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ MergeARGBRow = MergeARGBRow_SSE2;
}
+ }
#endif
-#if defined(HAS_SPLITARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- SplitARGBRow = SplitARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- SplitARGBRow = SplitARGBRow_SSSE3;
- }
+#if defined(HAS_MERGEARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MergeARGBRow = MergeARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ MergeARGBRow = MergeARGBRow_AVX2;
}
+ }
#endif
-#if defined(HAS_SPLITARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- SplitARGBRow = SplitARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- SplitARGBRow = SplitARGBRow_AVX2;
- }
+#if defined(HAS_MERGEARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeARGBRow = MergeARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ MergeARGBRow = MergeARGBRow_NEON;
}
-#endif
-#if defined(HAS_SPLITARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SplitARGBRow = SplitARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- SplitARGBRow = SplitARGBRow_NEON;
- }
- }
+ }
#endif
- for (y = 0; y < height; ++y) {
- SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
- dst_r += dst_stride_r;
- dst_g += dst_stride_g;
- dst_b += dst_stride_b;
- dst_a += dst_stride_a;
- src_argb += src_stride_argb;
+ for (y = 0; y < height; ++y) {
+ MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
+ src_r += src_stride_r;
+ src_g += src_stride_g;
+ src_b += src_stride_b;
+ src_a += src_stride_a;
+ dst_argb += dst_stride_argb;
+ }
+}
+
+LIBYUV_NOINLINE
+void MergeARGBPlaneOpaque(const uint8_t* src_r,
+ int src_stride_r,
+ const uint8_t* src_g,
+ int src_stride_g,
+ const uint8_t* src_b,
+ int src_stride_b,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
+ const uint8_t* src_b, uint8_t* dst_argb, int width) =
+ MergeXRGBRow_C;
+
+ if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
+ dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
+ }
+#if defined(HAS_MERGEXRGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ MergeXRGBRow = MergeXRGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ MergeXRGBRow = MergeXRGBRow_SSE2;
}
}
+#endif
+#if defined(HAS_MERGEXRGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MergeXRGBRow = MergeXRGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ MergeXRGBRow = MergeXRGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_MERGEXRGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeXRGBRow = MergeXRGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ MergeXRGBRow = MergeXRGBRow_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
+ src_r += src_stride_r;
+ src_g += src_stride_g;
+ src_b += src_stride_b;
+ dst_argb += dst_stride_argb;
+ }
}
LIBYUV_API
@@ -1145,104 +1282,25 @@
int dst_stride_argb,
int width,
int height) {
- int y;
- void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
- const uint8_t* src_b, const uint8_t* src_a,
- uint8_t* dst_argb, int width) = MergeARGBRow_C;
- void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
- const uint8_t* src_b, uint8_t* dst_argb, int width) =
- MergeXRGBRow_C;
-
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
+
if (src_a == NULL) {
- // Coalesce rows.
- if (src_stride_r == width && src_stride_g == width &&
- src_stride_b == width && dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
- }
-#if defined(HAS_MERGEXRGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- MergeXRGBRow = MergeXRGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- MergeXRGBRow = MergeXRGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_MERGEXRGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MergeXRGBRow = MergeXRGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- MergeXRGBRow = MergeXRGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEXRGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MergeXRGBRow = MergeXRGBRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- MergeXRGBRow = MergeXRGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
- src_r += src_stride_r;
- src_g += src_stride_g;
- src_b += src_stride_b;
- dst_argb += dst_stride_argb;
- }
+ MergeARGBPlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
+ src_stride_b, dst_argb, dst_stride_argb, width,
+ height);
} else {
- if (src_stride_r == width && src_stride_g == width &&
- src_stride_b == width && src_stride_a == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_r = src_stride_g = src_stride_b = src_stride_a =
- dst_stride_argb = 0;
- }
-#if defined(HAS_MERGEARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- MergeARGBRow = MergeARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- MergeARGBRow = MergeARGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_MERGEARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MergeARGBRow = MergeARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- MergeARGBRow = MergeARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MergeARGBRow = MergeARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- MergeARGBRow = MergeARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
- src_r += src_stride_r;
- src_g += src_stride_g;
- src_b += src_stride_b;
- dst_argb += dst_stride_argb;
- }
+ MergeARGBPlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
+ src_stride_b, src_a, src_stride_a, dst_argb,
+ dst_stride_argb, width, height);
}
}
+// TODO(yuan): Support 2 bit alpha channel.
LIBYUV_API
void MergeXR30Plane(const uint16_t* src_r,
int src_stride_r,
@@ -1306,6 +1364,110 @@
}
}
+LIBYUV_NOINLINE
+static void MergeAR64PlaneAlpha(const uint16_t* src_r,
+ int src_stride_r,
+ const uint16_t* src_g,
+ int src_stride_g,
+ const uint16_t* src_b,
+ int src_stride_b,
+ const uint16_t* src_a,
+ int src_stride_a,
+ uint16_t* dst_ar64,
+ int dst_stride_ar64,
+ int width,
+ int height,
+ int depth) {
+ int y;
+ void (*MergeAR64Row)(const uint16_t* src_r, const uint16_t* src_g,
+ const uint16_t* src_b, const uint16_t* src_a,
+ uint16_t* dst_argb, int depth, int width) =
+ MergeAR64Row_C;
+
+ if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
+ src_stride_a == width && dst_stride_ar64 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_r = src_stride_g = src_stride_b = src_stride_a =
+ dst_stride_ar64 = 0;
+ }
+#if defined(HAS_MERGEAR64ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MergeAR64Row = MergeAR64Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ MergeAR64Row = MergeAR64Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_MERGEAR64ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeAR64Row = MergeAR64Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ MergeAR64Row = MergeAR64Row_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ MergeAR64Row(src_r, src_g, src_b, src_a, dst_ar64, depth, width);
+ src_r += src_stride_r;
+ src_g += src_stride_g;
+ src_b += src_stride_b;
+ src_a += src_stride_a;
+ dst_ar64 += dst_stride_ar64;
+ }
+}
+
+LIBYUV_NOINLINE
+static void MergeAR64PlaneOpaque(const uint16_t* src_r,
+ int src_stride_r,
+ const uint16_t* src_g,
+ int src_stride_g,
+ const uint16_t* src_b,
+ int src_stride_b,
+ uint16_t* dst_ar64,
+ int dst_stride_ar64,
+ int width,
+ int height,
+ int depth) {
+ int y;
+ void (*MergeXR64Row)(const uint16_t* src_r, const uint16_t* src_g,
+ const uint16_t* src_b, uint16_t* dst_argb, int depth,
+ int width) = MergeXR64Row_C;
+
+ // Coalesce rows.
+ if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
+ dst_stride_ar64 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_r = src_stride_g = src_stride_b = dst_stride_ar64 = 0;
+ }
+#if defined(HAS_MERGEXR64ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MergeXR64Row = MergeXR64Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ MergeXR64Row = MergeXR64Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_MERGEXR64ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeXR64Row = MergeXR64Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ MergeXR64Row = MergeXR64Row_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ MergeXR64Row(src_r, src_g, src_b, dst_ar64, depth, width);
+ src_r += src_stride_r;
+ src_g += src_stride_g;
+ src_b += src_stride_b;
+ dst_ar64 += dst_stride_ar64;
+ }
+}
+
LIBYUV_API
void MergeAR64Plane(const uint16_t* src_r,
int src_stride_r,
@@ -1320,87 +1482,126 @@
int width,
int height,
int depth) {
- int y;
- void (*MergeAR64Row)(const uint16_t* src_r, const uint16_t* src_g,
- const uint16_t* src_b, const uint16_t* src_a,
- uint16_t* dst_argb, int depth, int width) =
- MergeAR64Row_C;
- void (*MergeXR64Row)(const uint16_t* src_r, const uint16_t* src_g,
- const uint16_t* src_b, uint16_t* dst_argb, int depth,
- int width) = MergeXR64Row_C;
-
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_ar64 = dst_ar64 + (height - 1) * dst_stride_ar64;
dst_stride_ar64 = -dst_stride_ar64;
}
+
if (src_a == NULL) {
- // Coalesce rows.
- if (src_stride_r == width && src_stride_g == width &&
- src_stride_b == width && dst_stride_ar64 == width * 4) {
- width *= height;
- height = 1;
- src_stride_r = src_stride_g = src_stride_b = dst_stride_ar64 = 0;
- }
-#if defined(HAS_MERGEXR64ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MergeXR64Row = MergeXR64Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- MergeXR64Row = MergeXR64Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEXR64ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MergeXR64Row = MergeXR64Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- MergeXR64Row = MergeXR64Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- MergeXR64Row(src_r, src_g, src_b, dst_ar64, depth, width);
- src_r += src_stride_r;
- src_g += src_stride_g;
- src_b += src_stride_b;
- dst_ar64 += dst_stride_ar64;
- }
+ MergeAR64PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
+ src_stride_b, dst_ar64, dst_stride_ar64, width, height,
+ depth);
} else {
- if (src_stride_r == width && src_stride_g == width &&
- src_stride_b == width && src_stride_a == width &&
- dst_stride_ar64 == width * 4) {
- width *= height;
- height = 1;
- src_stride_r = src_stride_g = src_stride_b = src_stride_a =
- dst_stride_ar64 = 0;
+ MergeAR64PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
+ src_stride_b, src_a, src_stride_a, dst_ar64,
+ dst_stride_ar64, width, height, depth);
+ }
+}
+
+LIBYUV_NOINLINE
+static void MergeARGB16To8PlaneAlpha(const uint16_t* src_r,
+ int src_stride_r,
+ const uint16_t* src_g,
+ int src_stride_g,
+ const uint16_t* src_b,
+ int src_stride_b,
+ const uint16_t* src_a,
+ int src_stride_a,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height,
+ int depth) {
+ int y;
+ void (*MergeARGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
+ const uint16_t* src_b, const uint16_t* src_a,
+ uint8_t* dst_argb, int depth, int width) =
+ MergeARGB16To8Row_C;
+
+ if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
+ src_stride_a == width && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_r = src_stride_g = src_stride_b = src_stride_a =
+ dst_stride_argb = 0;
+ }
+#if defined(HAS_MERGEARGB16TO8ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MergeARGB16To8Row = MergeARGB16To8Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ MergeARGB16To8Row = MergeARGB16To8Row_AVX2;
}
-#if defined(HAS_MERGEAR64ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MergeAR64Row = MergeAR64Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- MergeAR64Row = MergeAR64Row_AVX2;
- }
- }
+ }
#endif
-#if defined(HAS_MERGEAR64ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MergeAR64Row = MergeAR64Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- MergeAR64Row = MergeAR64Row_NEON;
- }
+#if defined(HAS_MERGEARGB16TO8ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeARGB16To8Row = MergeARGB16To8Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ MergeARGB16To8Row = MergeARGB16To8Row_NEON;
}
+ }
#endif
- for (y = 0; y < height; ++y) {
- MergeAR64Row(src_r, src_g, src_b, src_a, dst_ar64, depth, width);
- src_r += src_stride_r;
- src_g += src_stride_g;
- src_b += src_stride_b;
- dst_ar64 += dst_stride_ar64;
+ for (y = 0; y < height; ++y) {
+ MergeARGB16To8Row(src_r, src_g, src_b, src_a, dst_argb, depth, width);
+ src_r += src_stride_r;
+ src_g += src_stride_g;
+ src_b += src_stride_b;
+ src_a += src_stride_a;
+ dst_argb += dst_stride_argb;
+ }
+}
+
+LIBYUV_NOINLINE
+static void MergeARGB16To8PlaneOpaque(const uint16_t* src_r,
+ int src_stride_r,
+ const uint16_t* src_g,
+ int src_stride_g,
+ const uint16_t* src_b,
+ int src_stride_b,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height,
+ int depth) {
+ int y;
+ void (*MergeXRGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
+ const uint16_t* src_b, uint8_t* dst_argb, int depth,
+ int width) = MergeXRGB16To8Row_C;
+
+ // Coalesce rows.
+ if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
+ dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
+ }
+#if defined(HAS_MERGEXRGB16TO8ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MergeXRGB16To8Row = MergeXRGB16To8Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ MergeXRGB16To8Row = MergeXRGB16To8Row_AVX2;
}
}
+#endif
+#if defined(HAS_MERGEXRGB16TO8ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeXRGB16To8Row = MergeXRGB16To8Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ MergeXRGB16To8Row = MergeXRGB16To8Row_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ MergeXRGB16To8Row(src_r, src_g, src_b, dst_argb, depth, width);
+ src_r += src_stride_r;
+ src_g += src_stride_g;
+ src_b += src_stride_b;
+ dst_argb += dst_stride_argb;
+ }
}
LIBYUV_API
@@ -1417,86 +1618,21 @@
int width,
int height,
int depth) {
- int y;
- void (*MergeARGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
- const uint16_t* src_b, const uint16_t* src_a,
- uint8_t* dst_argb, int depth, int width) =
- MergeARGB16To8Row_C;
- void (*MergeXRGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
- const uint16_t* src_b, uint8_t* dst_argb, int depth,
- int width) = MergeXRGB16To8Row_C;
-
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
+
if (src_a == NULL) {
- // Coalesce rows.
- if (src_stride_r == width && src_stride_g == width &&
- src_stride_b == width && dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
- }
-#if defined(HAS_MERGEXRGB16TO8ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MergeXRGB16To8Row = MergeXRGB16To8Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- MergeXRGB16To8Row = MergeXRGB16To8Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEXRGB16TO8ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MergeXRGB16To8Row = MergeXRGB16To8Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- MergeXRGB16To8Row = MergeXRGB16To8Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- MergeXRGB16To8Row(src_r, src_g, src_b, dst_argb, depth, width);
- src_r += src_stride_r;
- src_g += src_stride_g;
- src_b += src_stride_b;
- dst_argb += dst_stride_argb;
- }
+ MergeARGB16To8PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
+ src_stride_b, dst_argb, dst_stride_argb, width,
+ height, depth);
} else {
- if (src_stride_r == width && src_stride_g == width &&
- src_stride_b == width && src_stride_a == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_r = src_stride_g = src_stride_b = src_stride_a =
- dst_stride_argb = 0;
- }
-#if defined(HAS_MERGEARGB16TO8ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MergeARGB16To8Row = MergeARGB16To8Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- MergeARGB16To8Row = MergeARGB16To8Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEARGB16TO8ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MergeARGB16To8Row = MergeARGB16To8Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- MergeARGB16To8Row = MergeARGB16To8Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- MergeARGB16To8Row(src_r, src_g, src_b, src_a, dst_argb, depth, width);
- src_r += src_stride_r;
- src_g += src_stride_g;
- src_b += src_stride_b;
- dst_argb += dst_stride_argb;
- }
+ MergeARGB16To8PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
+ src_stride_b, src_a, src_stride_a, dst_argb,
+ dst_stride_argb, width, height, depth);
}
}
diff --git a/source/row_any.cc b/source/row_any.cc
index b7668a1..05a88f0 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -200,15 +200,15 @@
memcpy(temp + 32, b_buf + n, r * SBPP); \
memcpy(temp + 48, a_buf + n, r * SBPP); \
ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, depth, MASK + 1); \
- memcpy(dst_ptr + n * BPP, out, r * BPP); \
+ memcpy((uint8_t *)dst_ptr + n * BPP, out, r * BPP); \
}
#ifdef HAS_MERGEAR64ROW_AVX2
-ANY41PT(MergeAR64Row_Any_AVX2, MergeAR64Row_AVX2, uint16_t, 2, uint16_t, 4, 15)
+ANY41PT(MergeAR64Row_Any_AVX2, MergeAR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
#endif
#ifdef HAS_MERGEAR64ROW_NEON
-ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 4, 7)
+ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
#endif
#ifdef HAS_MERGEARGB16TO8ROW_AVX2
@@ -490,7 +490,7 @@
memcpy(temp + 16, g_buf + n, r * SBPP); \
memcpy(temp + 32, b_buf + n, r * SBPP); \
ANY_SIMD(temp, temp + 16, temp + 32, out, depth, MASK + 1); \
- memcpy(dst_ptr + n * BPP, out, r * BPP); \
+ memcpy((uint8_t *)dst_ptr + n * BPP, out, r * BPP); \
}
#ifdef HAS_MERGEXR30ROW_AVX2
@@ -509,11 +509,11 @@
#endif
#ifdef HAS_MERGEXR64ROW_AVX2
-ANY31PT(MergeXR64Row_Any_AVX2, MergeXR64Row_AVX2, uint16_t, 2, uint16_t, 4, 15)
+ANY31PT(MergeXR64Row_Any_AVX2, MergeXR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
#endif
#ifdef HAS_MERGEXR64ROW_NEON
-ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 4, 7)
+ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
#endif
#ifdef HAS_MERGEXRGB16TO8ROW_AVX2
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc
index dbe7991..83cb2bd 100644
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -3152,7 +3152,7 @@
#define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
- const int kPixels = (kWidth * benchmark_height_ + 15) & ~15; \
+ const int kPixels = kWidth * benchmark_height_; \
align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \