blob: d48cbd25daea4db7299a15aa9d6c9006b1abae4e [file] [log] [blame]
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// -----------------------------------------------------------------------------
//
// Speed-critical convert-to-Argb functions
//
// Author: Skal (pascal.massimino@gmail.com)
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include "src/dsp/dsp.h"
#include "src/dsp/math.h"
#include "src/wp2/base.h"
namespace {
//------------------------------------------------------------------------------
// Plain copy (same source and destination formats)
template <int bytes_per_pixel>
void WP2Copy_C(const void* src, uint32_t width, void* dst) {
std::memcpy(dst, src, static_cast<size_t>(width) * bytes_per_pixel);
}
//------------------------------------------------------------------------------
// C conversion from a format to another
// Demultiplies color 'v' by alpha 'a'. Set 'kIs8b' to true if 'v' fits in 8
// bits (and will fit afterwards) so that a faster version is used. 'a' is 8b.
template <typename Type, bool kIs8b = (sizeof(Type) == 1)>
void Unmult(uint32_t a, Type v[]) {
if (a == 0) {
v[0] = v[1] = v[2] = 0;
return;
}
if (a == 255) return;
if (kIs8b) {
const uint32_t M = WP2::kAlphaDiv[a];
for (int c : {0, 1, 2}) {
v[c] = std::min(WP2::DivByAlphaDiv(v[c], M), 255u);
}
} else {
// WP2::DivByAlphaDiv() cannot be used here because the result needs to
// be clamped to 255.
const uint32_t round = a >> 1;
for (int c : {0, 1, 2}) {
v[c] = (v[c] * 255 + round) / a;
}
}
}
// Premultiplies color 'v' (8- or 10-bit) by alpha 'a' (8-bit).
template <typename Type>
void Premult(uint32_t a, Type v[]) {
if (a == 0) {
v[0] = v[1] = v[2] = 0;
return;
}
if (a == 255) return;
for (int c : {0, 1, 2}) {
v[c] = WP2::DivBy255(v[c] * a);
}
}
// Converts 'width' px from 'src_data' of 'SrcType' to 'dst_data' of 'DstType'.
// Channel ordering is specified by the indices in 'k[Src/Dst][A/R/G/B]'.
// Positive 'kMult' will premultiply the destination by the source alpha.
// Negative 'kMult' will unmultiply the destination by the source alpha.
// 'kFillAlpha' will replace the destination alpha by 255.
template <typename SrcType, int kSrcA, int kSrcR, int kSrcG, int kSrcB,
int kSrcStep, typename DstType, int kDstA, int kDstR, int kDstG,
int kDstB, int kDstStep, int kMult, bool kFillAlpha>
void Convert_C(const void* src_data, uint32_t width, void* dst_data) {
assert(src_data != dst_data);
const SrcType* src = (const SrcType*)src_data;
DstType* dst = (DstType*)dst_data;
for (uint32_t i = 0; i < width; ++i, src += kSrcStep, dst += kDstStep) {
if (kDstStep == 4) {
dst[kDstA] = (kFillAlpha ? 0xff : src[kSrcA]);
}
uint32_t tmp[3] = {src[kSrcR], src[kSrcG], src[kSrcB]};
if (sizeof(SrcType) < sizeof(DstType)) {
for (uint32_t& channel : tmp) {
channel = WP2::ChangePrecision(channel, /*from=*/8, /*to=*/10);
}
}
if (kMult > 0) {
Premult(src[kSrcA], tmp);
} else if (kMult < 0) {
Unmult<uint32_t, /*kIs8b=*/(sizeof(SrcType) == 1 &&
sizeof(DstType) == 1)>(src[kSrcA], tmp);
}
if (sizeof(SrcType) > sizeof(DstType)) {
for (uint32_t& channel : tmp) {
channel = WP2::ChangePrecision(channel, /*from=*/10, /*to=*/8);
}
}
dst[kDstR] = tmp[0], dst[kDstG] = tmp[1], dst[kDstB] = tmp[2];
}
}
// From the specified format to Argb32 or ARGB32.
#define CONVERT_TO_ARGB32(SRC_TYPE, A, R, G, B, MULT, FILL, SRC_STEP) \
Convert_C<SRC_TYPE, A, R, G, B, SRC_STEP, uint8_t, 0, 1, 2, 3, 4, MULT, FILL>
// From the Argb32 or ARGB32 to specified format.
#define CONVERT_ARGB32_TO(DST_TYPE, A, R, G, B, MULT, FILL, DST_STEP) \
Convert_C<uint8_t, 0, 1, 2, 3, 4, DST_TYPE, A, R, G, B, DST_STEP, MULT, FILL>
#define Convert_Argb32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 0, 1, 2, 3, 0, 0, 4)
#define Convert_ARGB32_ARGB32_C CONVERT_TO_ARGB32(uint8_t, 0, 1, 2, 3, 0, 0, 4)
#define Convert_ARGB32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 0, 1, 2, 3, 1, 0, 4)
#define Convert_XRGB32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 0, 1, 2, 3, 0, 1, 4)
#define Convert_rgbA32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 3, 0, 1, 2, 0, 0, 4)
#define Convert_RGBA32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 3, 0, 1, 2, 1, 0, 4)
#define Convert_RGBX32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 3, 0, 1, 2, 0, 1, 4)
#define Convert_bgrA32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 3, 2, 1, 0, 0, 0, 4)
#define Convert_BGRA32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 3, 2, 1, 0, 1, 0, 4)
#define Convert_BGRX32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 3, 2, 1, 0, 0, 1, 4)
#define Convert_RGB32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 0, 0, 1, 2, 0, 1, 3)
#define Convert_BGR32_Argb32_C CONVERT_TO_ARGB32(uint8_t, 0, 2, 1, 0, 0, 1, 3)
#define Convert_XRGB32_ARGB32_C CONVERT_TO_ARGB32(uint8_t, 0, 1, 2, 3, 0, 1, 4)
#define Convert_rgbA32_ARGB32_C CONVERT_TO_ARGB32(uint8_t, 3, 0, 1, 2, -1, 0, 4)
#define Convert_RGBA32_ARGB32_C CONVERT_TO_ARGB32(uint8_t, 3, 0, 1, 2, 0, 0, 4)
#define Convert_RGBX32_ARGB32_C CONVERT_TO_ARGB32(uint8_t, 3, 0, 1, 2, 0, 1, 4)
#define Convert_bgrA32_ARGB32_C CONVERT_TO_ARGB32(uint8_t, 3, 2, 1, 0, -1, 0, 4)
#define Convert_BGRA32_ARGB32_C CONVERT_TO_ARGB32(uint8_t, 3, 2, 1, 0, 0, 0, 4)
#define Convert_BGRX32_ARGB32_C CONVERT_TO_ARGB32(uint8_t, 3, 2, 1, 0, 0, 1, 4)
#define Convert_RGB32_ARGB32_C CONVERT_TO_ARGB32(uint8_t, 0, 0, 1, 2, 0, 1, 3)
#define Convert_BGR32_ARGB32_C CONVERT_TO_ARGB32(uint8_t, 0, 2, 1, 0, 0, 1, 3)
#define Convert_Argb32_ARGB32_C CONVERT_ARGB32_TO(uint8_t, 0, 1, 2, 3, -1, 0, 4)
#define Convert_Argb32_XRGB32_C CONVERT_ARGB32_TO(uint8_t, 0, 1, 2, 3, -1, 1, 4)
#define Convert_Argb32_rgbA32_C CONVERT_ARGB32_TO(uint8_t, 3, 0, 1, 2, 0, 0, 4)
#define Convert_Argb32_RGBA32_C CONVERT_ARGB32_TO(uint8_t, 3, 0, 1, 2, -1, 0, 4)
#define Convert_Argb32_RGBX32_C CONVERT_ARGB32_TO(uint8_t, 3, 0, 1, 2, -1, 1, 4)
#define Convert_Argb32_bgrA32_C CONVERT_ARGB32_TO(uint8_t, 3, 2, 1, 0, 0, 0, 4)
#define Convert_Argb32_BGRA32_C CONVERT_ARGB32_TO(uint8_t, 3, 2, 1, 0, -1, 0, 4)
#define Convert_Argb32_BGRX32_C CONVERT_ARGB32_TO(uint8_t, 3, 2, 1, 0, -1, 1, 4)
#define Convert_Argb32_RGB32_C CONVERT_ARGB32_TO(uint8_t, 0, 0, 1, 2, -1, 1, 3)
#define Convert_Argb32_BGR32_C CONVERT_ARGB32_TO(uint8_t, 0, 2, 1, 0, -1, 1, 3)
#define Convert_ARGB32_XRGB32_C CONVERT_ARGB32_TO(uint8_t, 0, 1, 2, 3, 0, 1, 4)
#define Convert_ARGB32_rgbA32_C CONVERT_ARGB32_TO(uint8_t, 3, 0, 1, 2, 1, 0, 4)
#define Convert_ARGB32_RGBA32_C CONVERT_ARGB32_TO(uint8_t, 3, 0, 1, 2, 0, 0, 4)
#define Convert_ARGB32_RGBX32_C CONVERT_ARGB32_TO(uint8_t, 3, 0, 1, 2, 0, 1, 4)
#define Convert_ARGB32_bgrA32_C CONVERT_ARGB32_TO(uint8_t, 3, 2, 1, 0, 1, 0, 4)
#define Convert_ARGB32_BGRA32_C CONVERT_ARGB32_TO(uint8_t, 3, 2, 1, 0, 0, 0, 4)
#define Convert_ARGB32_BGRX32_C CONVERT_ARGB32_TO(uint8_t, 3, 2, 1, 0, 0, 1, 4)
#define Convert_ARGB32_RGB32_C CONVERT_ARGB32_TO(uint8_t, 0, 0, 1, 2, 0, 1, 3)
#define Convert_ARGB32_BGR32_C CONVERT_ARGB32_TO(uint8_t, 0, 2, 1, 0, 0, 1, 3)
#define Convert_Argb38_Argb32_C CONVERT_TO_ARGB32(uint16_t, 0, 1, 2, 3, 0, 0, 4)
#define Convert_Argb38_ARGB32_C \
CONVERT_TO_ARGB32(uint16_t, 0, 1, 2, 3, -1, 0, 4)
#define Convert_Argb32_Argb38_C CONVERT_ARGB32_TO(uint16_t, 0, 1, 2, 3, 0, 0, 4)
#define Convert_ARGB32_Argb38_C CONVERT_ARGB32_TO(uint16_t, 0, 1, 2, 3, 1, 0, 4)
//------------------------------------------------------------------------------
// SSE4.1 implementation
#if defined(WP2_USE_SSE)
void Premultiply(uint8_t* dst, uint32_t width) {
for (uint32_t i = 0; i < width; ++i, dst += 4) Premult(dst[0], dst + 1);
}
#define PSHUFB_CST32(CST) \
_mm_set_epi32(CST | 0x0c0c0c0c, CST | 0x08080808, CST | 0x04040404, CST)
#define CONVERT_32_TO_32_FUNC(NAME, CST) \
void NAME##_SSE(const void* src_data, uint32_t width, void* dst_data) { \
const uint8_t* src = (const uint8_t*)src_data; \
uint8_t* dst = (uint8_t*)dst_data; \
const __m128i kShuffle = PSHUFB_CST32(CST); \
uint32_t x; \
for (x = 0; x + 8 <= width; x += 8) { \
const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 4 * x + 0)); \
const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4 * x + 16)); \
const __m128i B0 = _mm_shuffle_epi8(A0, kShuffle); \
const __m128i B1 = _mm_shuffle_epi8(A1, kShuffle); \
_mm_storeu_si128((__m128i*)(dst + 4 * x + 0), B0); \
_mm_storeu_si128((__m128i*)(dst + 4 * x + 16), B1); \
} \
if (x < width) NAME##_C(src + 4 * x, width - x, dst + 4 * x); \
}
#define CONVERT_32_TO_32_FILL_FUNC(NAME, CST, CST_A) \
void NAME##_SSE(const void* src_data, uint32_t width, void* dst_data) { \
const uint8_t* src = (const uint8_t*)src_data; \
uint8_t* dst = (uint8_t*)dst_data; \
const __m128i kShuffle = PSHUFB_CST32(CST); \
const __m128i kFill = _mm_set1_epi32(CST_A); \
uint32_t x; \
for (x = 0; x + 8 <= width; x += 8) { \
const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 4 * x + 0)); \
const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4 * x + 16)); \
const __m128i B0 = _mm_or_si128(_mm_shuffle_epi8(A0, kShuffle), kFill); \
const __m128i B1 = _mm_or_si128(_mm_shuffle_epi8(A1, kShuffle), kFill); \
_mm_storeu_si128((__m128i*)(dst + 4 * x + 0), B0); \
_mm_storeu_si128((__m128i*)(dst + 4 * x + 16), B1); \
} \
if (x < width) NAME##_C(src + 4 * x, width - x, dst + 4 * x); \
}
void Convert_Argb32_Argb32_SSE(const void* src, uint32_t width, void* dst) {
memcpy(dst, src, 4 * width);
}
CONVERT_32_TO_32_FILL_FUNC(Convert_XRGB32_Argb32, 0x030201ff, 0x000000ffu)
void Convert_ARGB32_Argb32_SSE(const void* src, uint32_t width, void* dst) {
Convert_Argb32_Argb32_SSE(src, width, dst);
Premultiply((uint8_t*)dst, width);
}
CONVERT_32_TO_32_FUNC(Convert_rgbA32_Argb32, 0x02010003)
CONVERT_32_TO_32_FILL_FUNC(Convert_RGBX32_Argb32, 0x020100ff, 0x000000ffu)
void Convert_RGBA32_Argb32_SSE(const void* src, uint32_t width, void* dst) {
Convert_rgbA32_Argb32_SSE(src, width, dst);
Premultiply((uint8_t*)dst, width);
}
CONVERT_32_TO_32_FUNC(Convert_bgrA32_Argb32, 0x00010203)
CONVERT_32_TO_32_FILL_FUNC(Convert_BGRX32_Argb32, 0x000102ff, 0x000000ffu)
void Convert_BGRA32_Argb32_SSE(const void* src, uint32_t width, void* dst) {
Convert_bgrA32_Argb32_SSE(src, width, dst);
Premultiply((uint8_t*)dst, width);
}
// ConvertTo
void Unmultiply(uint8_t* const dst, const uint8_t* dst_a, uint32_t width) {
for (uint32_t x = 0; x < width; ++x) {
Unmult(dst_a[4 * x], dst + 4 * x);
}
}
void FillAlpha(uint8_t* dst_a, uint32_t width) {
for (uint32_t x = 0; x < width; ++x) {
dst_a[4 * x] = 255;
}
}
void Convert_Argb32_ARGB32_SSE(const void* src, uint32_t width, void* dst) {
Convert_Argb32_Argb32_SSE(src, width, dst);
Unmultiply((uint8_t*)dst + 1, (uint8_t*)dst + 0, width);
}
void Convert_Argb32_XRGB32_SSE(const void* src, uint32_t width, void* dst) {
Convert_Argb32_ARGB32_SSE(src, width, dst);
FillAlpha((uint8_t*)dst + 0, width);
}
CONVERT_32_TO_32_FUNC(Convert_Argb32_rgbA32, 0x00030201)
void Convert_Argb32_RGBA32_SSE(const void* src, uint32_t width, void* dst) {
Convert_Argb32_rgbA32_SSE(src, width, dst);
Unmultiply((uint8_t*)dst + 0, (uint8_t*)dst + 3, width);
}
void Convert_Argb32_RGBX32_SSE(const void* src, uint32_t width, void* dst) {
Convert_Argb32_RGBA32_SSE(src, width, dst);
FillAlpha((uint8_t*)dst + 3, width);
}
CONVERT_32_TO_32_FUNC(Convert_Argb32_bgrA32, 0x00010203)
void Convert_Argb32_BGRA32_SSE(const void* src, uint32_t width, void* dst) {
Convert_Argb32_bgrA32_SSE(src, width, dst);
Unmultiply((uint8_t*)dst + 0, (uint8_t*)dst + 3, width);
}
void Convert_Argb32_BGRX32_SSE(const void* src, uint32_t width, void* dst) {
Convert_Argb32_BGRA32_SSE(src, width, dst);
FillAlpha((uint8_t*)dst + 3, width);
}
#undef CONVERT_32_TO_32_FUNC
#undef PSHUFB_CST32
WP2_TSAN_IGNORE_FUNCTION void ArgbConverterDspInitSSE() {
WP2ArgbConvertFrom[WP2_Argb_32] = Convert_Argb32_Argb32_SSE;
WP2ArgbConvertFrom[WP2_ARGB_32] = Convert_ARGB32_Argb32_SSE;
WP2ArgbConvertFrom[WP2_XRGB_32] = Convert_XRGB32_Argb32_SSE;
WP2ArgbConvertFrom[WP2_rgbA_32] = Convert_rgbA32_Argb32_SSE;
WP2ArgbConvertFrom[WP2_RGBA_32] = Convert_RGBA32_Argb32_SSE;
WP2ArgbConvertFrom[WP2_RGBX_32] = Convert_RGBX32_Argb32_SSE;
WP2ArgbConvertFrom[WP2_bgrA_32] = Convert_bgrA32_Argb32_SSE;
WP2ArgbConvertFrom[WP2_BGRA_32] = Convert_BGRA32_Argb32_SSE;
WP2ArgbConvertFrom[WP2_BGRX_32] = Convert_BGRX32_Argb32_SSE;
WP2ArgbConvertTo[WP2_Argb_32] = Convert_Argb32_Argb32_SSE;
WP2ArgbConvertTo[WP2_ARGB_32] = Convert_Argb32_ARGB32_SSE;
WP2ArgbConvertTo[WP2_XRGB_32] = Convert_Argb32_XRGB32_SSE;
WP2ArgbConvertTo[WP2_rgbA_32] = Convert_Argb32_rgbA32_SSE;
WP2ArgbConvertTo[WP2_RGBA_32] = Convert_Argb32_RGBA32_SSE;
WP2ArgbConvertTo[WP2_RGBX_32] = Convert_Argb32_RGBX32_SSE;
WP2ArgbConvertTo[WP2_bgrA_32] = Convert_Argb32_bgrA32_SSE;
WP2ArgbConvertTo[WP2_BGRA_32] = Convert_Argb32_BGRA32_SSE;
WP2ArgbConvertTo[WP2_BGRX_32] = Convert_Argb32_BGRX32_SSE;
}
#endif // WP2_USE_SSE
//------------------------------------------------------------------------------
// NEON implementation (TODO)
//------------------------------------------------------------------------------
} // namespace
WP2ArgbConverterF WP2ArgbConvertFrom[WP2_FORMAT_NUM];
WP2ArgbConverterF WP2ArgbConvertTo[WP2_FORMAT_NUM];
WP2ArgbConverterF WP2ARGBConvertFrom[WP2_FORMAT_NUM];
WP2ArgbConverterF WP2ARGBConvertTo[WP2_FORMAT_NUM];
static volatile WP2CPUInfo argb_converter_last_cpuinfo_used =
(WP2CPUInfo)&argb_converter_last_cpuinfo_used;
WP2_TSAN_IGNORE_FUNCTION void WP2ArgbConverterInit() {
if (argb_converter_last_cpuinfo_used == WP2GetCPUInfo) return;
WP2ArgbConvertFrom[WP2_Argb_32] = Convert_Argb32_Argb32_C;
WP2ArgbConvertFrom[WP2_ARGB_32] = Convert_ARGB32_Argb32_C;
WP2ArgbConvertFrom[WP2_XRGB_32] = Convert_XRGB32_Argb32_C;
WP2ArgbConvertFrom[WP2_rgbA_32] = Convert_rgbA32_Argb32_C;
WP2ArgbConvertFrom[WP2_RGBA_32] = Convert_RGBA32_Argb32_C;
WP2ArgbConvertFrom[WP2_RGBX_32] = Convert_RGBX32_Argb32_C;
WP2ArgbConvertFrom[WP2_bgrA_32] = Convert_bgrA32_Argb32_C;
WP2ArgbConvertFrom[WP2_BGRA_32] = Convert_BGRA32_Argb32_C;
WP2ArgbConvertFrom[WP2_BGRX_32] = Convert_BGRX32_Argb32_C;
WP2ArgbConvertFrom[WP2_RGB_24] = Convert_RGB32_Argb32_C;
WP2ArgbConvertFrom[WP2_BGR_24] = Convert_BGR32_Argb32_C;
WP2ArgbConvertFrom[WP2_Argb_38] = Convert_Argb38_Argb32_C;
WP2ArgbConvertTo[WP2_Argb_32] = Convert_Argb32_Argb32_C;
WP2ArgbConvertTo[WP2_ARGB_32] = Convert_Argb32_ARGB32_C;
WP2ArgbConvertTo[WP2_XRGB_32] = Convert_Argb32_XRGB32_C;
WP2ArgbConvertTo[WP2_rgbA_32] = Convert_Argb32_rgbA32_C;
WP2ArgbConvertTo[WP2_RGBA_32] = Convert_Argb32_RGBA32_C;
WP2ArgbConvertTo[WP2_RGBX_32] = Convert_Argb32_RGBX32_C;
WP2ArgbConvertTo[WP2_bgrA_32] = Convert_Argb32_bgrA32_C;
WP2ArgbConvertTo[WP2_BGRA_32] = Convert_Argb32_BGRA32_C;
WP2ArgbConvertTo[WP2_BGRX_32] = Convert_Argb32_BGRX32_C;
WP2ArgbConvertTo[WP2_RGB_24] = Convert_Argb32_RGB32_C;
WP2ArgbConvertTo[WP2_BGR_24] = Convert_Argb32_BGR32_C;
WP2ArgbConvertTo[WP2_Argb_38] = Convert_Argb32_Argb38_C;
WP2ARGBConvertFrom[WP2_Argb_32] = Convert_Argb32_ARGB32_C;
WP2ARGBConvertFrom[WP2_ARGB_32] = Convert_ARGB32_ARGB32_C;
WP2ARGBConvertFrom[WP2_XRGB_32] = Convert_XRGB32_ARGB32_C;
WP2ARGBConvertFrom[WP2_rgbA_32] = Convert_rgbA32_ARGB32_C;
WP2ARGBConvertFrom[WP2_RGBA_32] = Convert_RGBA32_ARGB32_C;
WP2ARGBConvertFrom[WP2_RGBX_32] = Convert_RGBX32_ARGB32_C;
WP2ARGBConvertFrom[WP2_bgrA_32] = Convert_bgrA32_ARGB32_C;
WP2ARGBConvertFrom[WP2_BGRA_32] = Convert_BGRA32_ARGB32_C;
WP2ARGBConvertFrom[WP2_BGRX_32] = Convert_BGRX32_ARGB32_C;
WP2ARGBConvertFrom[WP2_RGB_24] = Convert_RGB32_ARGB32_C;
WP2ARGBConvertFrom[WP2_BGR_24] = Convert_BGR32_ARGB32_C;
WP2ARGBConvertFrom[WP2_Argb_38] = Convert_Argb38_ARGB32_C;
WP2ARGBConvertTo[WP2_Argb_32] = Convert_ARGB32_Argb32_C;
WP2ARGBConvertTo[WP2_ARGB_32] = Convert_ARGB32_ARGB32_C;
WP2ARGBConvertTo[WP2_XRGB_32] = Convert_ARGB32_XRGB32_C;
WP2ARGBConvertTo[WP2_rgbA_32] = Convert_ARGB32_rgbA32_C;
WP2ARGBConvertTo[WP2_RGBA_32] = Convert_ARGB32_RGBA32_C;
WP2ARGBConvertTo[WP2_RGBX_32] = Convert_ARGB32_RGBX32_C;
WP2ARGBConvertTo[WP2_bgrA_32] = Convert_ARGB32_bgrA32_C;
WP2ARGBConvertTo[WP2_BGRA_32] = Convert_ARGB32_BGRA32_C;
WP2ARGBConvertTo[WP2_BGRX_32] = Convert_ARGB32_BGRX32_C;
WP2ARGBConvertTo[WP2_RGB_24] = Convert_ARGB32_RGB32_C;
WP2ARGBConvertTo[WP2_BGR_24] = Convert_ARGB32_BGR32_C;
WP2ARGBConvertTo[WP2_Argb_38] = Convert_ARGB32_Argb38_C;
if (WP2GetCPUInfo != nullptr) {
#if defined(WP2_USE_SSE)
if (WP2GetCPUInfo(kSSE)) ArgbConverterDspInitSSE();
#endif
}
argb_converter_last_cpuinfo_used = WP2GetCPUInfo;
}
//------------------------------------------------------------------------------
// Combinations
namespace {
template <int from, int to>
void WP2ConvertFromTo(const void* src, uint32_t width, void* dst) {
static_assert(from > WP2_ARGB_32 && from <= WP2_BGR_24, "Unimplemented");
static_assert(to > WP2_ARGB_32 && to <= WP2_BGR_24, "Unimplemented");
const size_t src_bytes_per_pixel =
WP2FormatBpp(static_cast<WP2SampleFormat>(from));
const size_t dst_bytes_per_pixel =
WP2FormatBpp(static_cast<WP2SampleFormat>(to));
// Use a temporary layout Argb or ARGB to go from SRC to DST.
// Do not premultiply or unmultiply unless necessary.
// The functions below do not work inplace so use a temporary buffer.
uint8_t tmp[128];
while (width > 0) {
const uint32_t num_pixels = std::min<uint32_t>(width, sizeof(tmp) / 4);
if (WP2IsPremultiplied(static_cast<WP2SampleFormat>(from))) {
WP2ArgbConvertFrom[from](src, num_pixels, tmp);
WP2ArgbConvertTo[to](tmp, num_pixels, dst);
} else {
WP2ARGBConvertFrom[from](src, num_pixels, tmp);
WP2ARGBConvertTo[to](tmp, num_pixels, dst);
}
src = reinterpret_cast<const uint8_t*>(src) +
num_pixels * src_bytes_per_pixel;
dst = reinterpret_cast<uint8_t*>(dst) + num_pixels * dst_bytes_per_pixel;
width -= num_pixels;
}
}
template <int from>
WP2ArgbConverterF WP2ConvertFromToFunc(WP2SampleFormat to) {
if (to == WP2_XRGB_32) return WP2ConvertFromTo<from, WP2_XRGB_32>;
if (to == WP2_rgbA_32) return WP2ConvertFromTo<from, WP2_rgbA_32>;
if (to == WP2_RGBA_32) return WP2ConvertFromTo<from, WP2_RGBA_32>;
if (to == WP2_RGBX_32) return WP2ConvertFromTo<from, WP2_RGBX_32>;
if (to == WP2_bgrA_32) return WP2ConvertFromTo<from, WP2_bgrA_32>;
if (to == WP2_BGRA_32) return WP2ConvertFromTo<from, WP2_BGRA_32>;
if (to == WP2_BGRX_32) return WP2ConvertFromTo<from, WP2_BGRX_32>;
if (to == WP2_RGB_24) return WP2ConvertFromTo<from, WP2_RGB_24>;
if (to == WP2_BGR_24) return WP2ConvertFromTo<from, WP2_BGR_24>;
return nullptr;
}
} // namespace
//------------------------------------------------------------------------------
WP2ArgbConverterF WP2ConversionFunction(WP2SampleFormat from,
WP2SampleFormat to) {
if (to == WP2_Argb_32) return WP2ArgbConvertFrom[from];
if (to == WP2_ARGB_32) return WP2ARGBConvertFrom[from];
if (from == WP2_Argb_32) return WP2ArgbConvertTo[to];
if (from == WP2_ARGB_32) return WP2ARGBConvertTo[to];
if (from == to) {
if (WP2FormatBpp(from) == 3) return WP2Copy_C<3>;
if (WP2FormatBpp(from) == 4) return WP2Copy_C<4>;
if (WP2FormatBpp(from) == 8) return WP2Copy_C<8>;
}
if (from == WP2_XRGB_32) return WP2ConvertFromToFunc<WP2_XRGB_32>(to);
if (from == WP2_rgbA_32) return WP2ConvertFromToFunc<WP2_rgbA_32>(to);
if (from == WP2_RGBA_32) return WP2ConvertFromToFunc<WP2_RGBA_32>(to);
if (from == WP2_RGBX_32) return WP2ConvertFromToFunc<WP2_RGBX_32>(to);
if (from == WP2_bgrA_32) return WP2ConvertFromToFunc<WP2_bgrA_32>(to);
if (from == WP2_BGRA_32) return WP2ConvertFromToFunc<WP2_BGRA_32>(to);
if (from == WP2_BGRX_32) return WP2ConvertFromToFunc<WP2_BGRX_32>(to);
if (from == WP2_RGB_24) return WP2ConvertFromToFunc<WP2_RGB_24>(to);
if (from == WP2_BGR_24) return WP2ConvertFromToFunc<WP2_BGR_24>(to);
return nullptr;
}
//------------------------------------------------------------------------------