blob: 4513a68b0ee9fc168ca3f6c6919946a61e0ba9c2 [file] [log] [blame]
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// -----------------------------------------------------------------------------
//
// Colorspace utilities
//
// Author: Skal (pascal.massimino@gmail.com)
#include "src/utils/csp.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <limits>
#include "src/dsp/dsp.h"
#include "src/dsp/math.h"
#include "src/utils/ans.h"
#include "src/utils/ans_utils.h"
#include "src/utils/data_source.h"
#include "src/utils/utils.h"
#include "src/utils/vector.h"
#include "src/wp2/base.h"
#include "src/wp2/format_constants.h"
namespace WP2 {
//------------------------------------------------------------------------------
namespace {
int16_t ToInt16(int32_t v, int32_t min_v, int32_t max_v) {
return (int16_t)Clamp(v, min_v, max_v); // Rounding errors may happen.
}
int16_t RoundToInt16(double v, int16_t max_v) {
assert(v >= -(double)max_v && v <= (double)max_v);
return (int16_t)lround(v);
}
bool TryInvertMatrix(const double m[9], double out[9]) {
const double det = m[0] * (m[4] * m[8] - m[5] * m[7]) -
m[3] * (m[1] * m[8] - m[7] * m[2]) +
m[6] * (m[1] * m[5] - m[4] * m[2]);
if (det == 0.) return false;
const double idet = 1.0 / det;
out[0] = idet * (m[4] * m[8] - m[5] * m[7]);
out[3] = idet * (m[6] * m[5] - m[3] * m[8]);
out[6] = idet * (m[3] * m[7] - m[4] * m[6]);
out[1] = idet * (m[7] * m[2] - m[1] * m[8]);
out[4] = idet * (m[0] * m[8] - m[2] * m[6]);
out[7] = idet * (m[1] * m[6] - m[0] * m[7]);
out[2] = idet * (m[1] * m[5] - m[2] * m[4]);
out[5] = idet * (m[2] * m[3] - m[0] * m[5]);
out[8] = idet * (m[0] * m[4] - m[1] * m[3]);
return true;
}
double Norm2(const double a[3]) {
return a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
}
// WARNING! This is a 'special' cross-product function that ensures out[0] >= 0.
double CrossProduct(const double a[3], const double b[3], double out[3]) {
out[0] = a[1] * b[2] - a[2] * b[1];
out[1] = a[2] * b[0] - a[0] * b[2];
out[2] = a[0] * b[1] - a[1] * b[0];
double norm = Norm2(out);
if (norm > 0.) {
const double inv_norm = (out[0] < 0 ? -1. : 1.) / std::sqrt(norm);
out[0] *= inv_norm;
out[1] *= inv_norm;
out[2] *= inv_norm;
}
return norm;
}
float Distance(const double a[3], const double b[3]) {
float dot = std::fabs(a[0] * b[0] + a[1] * b[1] + a[2] * b[2]);
if (dot != 0.) dot /= std::sqrt(Norm2(a) * Norm2(b));
return 1.f - dot;
}
void MultiplyMatrix(double m[9], double by) {
for (int i = 0; i < 9; ++i) m[i] *= by;
}
} // namespace
//------------------------------------------------------------------------------
constexpr uint32_t CSPTransform::kMtxBits;
constexpr uint32_t CSPTransform::kMtxShift;
constexpr int16_t CSPTransform::kDefaultRgbAvg[3];
static constexpr int32_t kMtxFixedPointMul = 1 << CSPTransform::kMtxShift;
static constexpr int16_t kMaxMtxValue = (1 << CSPTransform::kMtxBits) - 1;
//------------------------------------------------------------------------------
void CSPTransform::StoreCustomYUVBounds() { // Compute min/max YUV values.
int32_t min[3] = {0}, max[3] = {0};
for (uint32_t i = 0; i < 3; ++i) {
for (uint32_t j = 0; j < 3; ++j) {
const int16_t rgb_min = 0 - avg8_[j];
const int16_t rgb_max = kRgbMax - avg8_[j];
const int32_t coeff = m_[i * 3 + j];
min[i] += coeff * ((coeff > 0) ? rgb_min : rgb_max);
max[i] += coeff * ((coeff > 0) ? rgb_max : rgb_min);
}
min[i] = RightShiftRound(min[i], kMtxShift);
max[i] = RightShiftRound(max[i], kMtxShift);
}
const int32_t global_min = std::min({min[0], min[1], min[2]});
const int32_t global_max = std::max({max[0], max[1], max[2]});
SetYUVBounds(global_min, global_max);
}
bool CSPTransform::Init(const int16_t yuv_to_rgb_matrix[9],
const int16_t rgb_avg[3]) {
for (size_t i = 0; i < 9; ++i) im_[i] = yuv_to_rgb_matrix[i];
for (uint32_t i : {0, 1, 2}) avg8_[i] = rgb_avg[i];
for (uint32_t i : {0, 1, 2}) avg10_[i] = LeftShift(avg8_[i], 2);
if (!ComputeRGBToYUVMatrix()) return false;
StoreCustomYUVBounds();
type_ = Csp::kCustom;
return true;
}
WP2Status CSPTransform::Init(Csp csp_type, const ArgbBuffer& argb_buffer) {
if (csp_type == Csp::kCustom) {
const WP2Status status = Optimize(argb_buffer);
// Stop here if success or unrecoverable error. Otherwise fallback on YCoCg.
if (status != WP2_STATUS_INVALID_COLORSPACE) return status;
csp_type = Csp::kYCoCg;
}
if (csp_type == Csp::kYCoCg) {
InitYCoCg();
} else if (csp_type == Csp::kYCbCr) {
InitYCbCr();
} else {
assert(csp_type == Csp::kYIQ);
InitYIQ();
}
return WP2_STATUS_OK;
}
void CSPTransform::InitYCoCg() {
// The RGB to YUV matrix has to be scaled by 4 to be lossless, so we divide
// the inverse by 4.
static constexpr int16_t I = (kMtxFixedPointMul >> 2);
// clang-format off
static constexpr int16_t kYCoCgMatrix[] = {
I, I, -I,
I, 0, I,
I, -I, -I
};
// clang-format on
if (!Init(kYCoCgMatrix)) assert(false);
type_ = Csp::kYCoCg;
// With 10 bits par channel ([-512:511] if 'rgb_avg' is 128), the YCoCg
// transformation is lossless.
assert(yuv_depth_.precision() <= kYuvMaxPrec);
}
void CSPTransform::InitYCbCr() {
#if 0 // reference code used for generating kYCbCrMatrix[]
double yuv_to_rgb_matrix[] = {
1.16, 0.00, 1.60,
1.16, -0.39, -0.81,
1.16, 2.01, 0.00
};
// This is not lossless: use the maximum range (yuv_bits_ == kYuvMaxPrec + 1).
MultiplyMatrix(yuv_to_rgb_matrix, kMtxFixedPointMul / 4);
int16_t kYCbCrMatrix[9];
for (int i = 0; i < 9; ++i) {
kYCbCrMatrix[i] = RoundToInt16(yuv_to_rgb_matrix[i], kMaxMtxValue);
}
for (auto v : kYCbCrMatrix) printf("%6d,\n", v);
#else
static constexpr int16_t kYCbCrMatrix[] = {
1188, 0, 1638, 1188, -399, -829, 1188, 2058, 0,
};
#endif
if (!Init(kYCbCrMatrix)) assert(false);
type_ = Csp::kYCbCr;
assert(yuv_depth_.precision() <= kYuvMaxPrec);
}
void CSPTransform::InitYIQ() {
#if 0 // reference code used for generating kIQMatrix[]
static constexpr double rgb_to_yuv_matrix[] = {
0.299, 0.587, 0.114,
0.596, -0.274, -0.322,
0.211, -0.523, 0.312
};
double yuv_to_rgb_matrix[9];
TryInvertMatrix(rgb_to_yuv_matrix, yuv_to_rgb_matrix);
// This is not lossless: use the maximum range (yuv_bits_ == kYuvMaxPrec + 1).
MultiplyMatrix(yuv_to_rgb_matrix, kMtxFixedPointMul / 2);
int16_t kYIQMatrix[9];
for (int i = 0; i < 9; ++i) {
kYIQMatrix[i] = RoundToInt16(yuv_to_rgb_matrix[i], kMaxMtxValue);
}
for (auto v : kYIQMatrix) printf("%6d,\n", v);
#else
static constexpr int16_t kYIQMatrix[] = {
2048, 1958, 1273, 2048, -558, -1325, 2048, -2260, 3483,
};
#endif
if (!Init(kYIQMatrix)) assert(false);
type_ = Csp::kYIQ;
assert(yuv_depth_.precision() <= kYuvMaxPrec);
}
void CSPTransform::Print() const {
printf("CSP type %u\n", (uint32_t)type_);
double rgb_to_yuv_matrix[9], yuv_to_rgb_matrix[9];
for (int i = 0; i < 9; ++i) {
rgb_to_yuv_matrix[i] = (double)m_[i] / kMtxFixedPointMul;
yuv_to_rgb_matrix[i] = (double)im_[i] / kMtxFixedPointMul;
}
printf("=== RGB to YUV: ===\n");
for (int j = 0; j < 3; ++j) {
for (int i = 0; i < 3; ++i) {
printf("%10.4f ", rgb_to_yuv_matrix[i + j * 3]);
}
printf("\n");
}
printf("=== YUV to RGB: ===\n");
for (int j = 0; j < 3; ++j) {
for (int i = 0; i < 3; ++i) {
printf("%10.4f ", yuv_to_rgb_matrix[i + j * 3]);
}
printf("\n");
}
printf("=== avg: ");
printf("%3d %3d %3d", avg8_[0], avg8_[1], avg8_[2]);
printf("\n");
}
//------------------------------------------------------------------------------
void CSPTransform::SetYUVBounds(int16_t yuv_min, int16_t yuv_max) {
const bool is_signed = (yuv_min < 0);
yuv_depth_ = {WP2Log2Ceil_k(std::max((int16_t)std::abs(yuv_min), yuv_max)) +
(is_signed ? 1u : 0u),
is_signed};
min_yuv_value_ = yuv_min;
max_yuv_value_ = yuv_max;
}
bool CSPTransform::ComputeRGBToYUVMatrix() {
double rgb_to_yuv_matrix[9], yuv_to_rgb_matrix[9];
constexpr double norm = 1. / kMtxFixedPointMul;
for (int i = 0; i < 9; ++i) {
if (std::abs(im_[i]) > kMaxMtxValue) return false;
yuv_to_rgb_matrix[i] = norm * im_[i];
}
if (!TryInvertMatrix(yuv_to_rgb_matrix, rgb_to_yuv_matrix)) return false;
for (int i = 0; i < 9; ++i) {
const double v = rgb_to_yuv_matrix[i] * kMtxFixedPointMul;
if (std::abs(v) > kMaxMtxValue) return false;
m_[i] = RoundToInt16(v, kMaxMtxValue);
}
return true;
}
//------------------------------------------------------------------------------
template <int kNumRgbBits>
static inline void RgbToYuv(const int16_t avg[3], const int16_t m[9],
int16_t min_yuv_value, int16_t max_yuv_value,
int32_t r, int32_t g, int32_t b, int16_t* const y,
int16_t* const u, int16_t* const v) {
constexpr int kNumExtraRgbBits = kNumRgbBits - 8;
constexpr int kShift = CSPTransform::kMtxShift + kNumExtraRgbBits;
// kNumRgbBits to (kNumRgbBits+1)
const int32_t tmp_r = (int32_t)r - avg[0];
const int32_t tmp_g = (int32_t)g - avg[1];
const int32_t tmp_b = (int32_t)b - avg[2];
// (kNumRgbBits+1) to (yuv_bits_ + kMtxShift + kNumExtraRgbBits)
int32_t tmp_y = m[0] * tmp_r + m[1] * tmp_g + m[2] * tmp_b;
int32_t tmp_u = m[3] * tmp_r + m[4] * tmp_g + m[5] * tmp_b;
int32_t tmp_v = m[6] * tmp_r + m[7] * tmp_g + m[8] * tmp_b;
// (yuv_bits_ + kMtxShift + kNumExtraRgbBits) to yuv_bits_
tmp_y = RightShiftRound(tmp_y, kShift);
tmp_u = RightShiftRound(tmp_u, kShift);
tmp_v = RightShiftRound(tmp_v, kShift);
*y = ToInt16(tmp_y, min_yuv_value, max_yuv_value);
*u = ToInt16(tmp_u, min_yuv_value, max_yuv_value);
*v = ToInt16(tmp_v, min_yuv_value, max_yuv_value);
}
template <int kNumRgbBits, typename Trgb>
static inline void YuvToRgb(const int16_t avg[3], const int16_t im[9],
int16_t y, int16_t u, int16_t v, Trgb* const r,
Trgb* const g, Trgb* const b) {
constexpr int kNumExtraRgbBits = kNumRgbBits - 8;
constexpr int kShift = CSPTransform::kMtxShift - kNumExtraRgbBits;
constexpr int kMax = (1 << kNumRgbBits) - 1;
// yuv_bits_ to (kRgbBits+1 + kMtxShift)
int32_t tmp_r = im[0] * y + im[1] * u + im[2] * v;
int32_t tmp_g = im[3] * y + im[4] * u + im[5] * v;
int32_t tmp_b = im[6] * y + im[7] * u + im[8] * v;
// (kRgbBits+1 + kMtxShift) to kNumRgbBits+1
tmp_r = RightShiftRound(tmp_r, kShift);
tmp_g = RightShiftRound(tmp_g, kShift);
tmp_b = RightShiftRound(tmp_b, kShift);
// (kNumRgbBits+1) to kNumRgbBits
*r = (Trgb)Clamp<int32_t>(tmp_r + avg[0], 0, kMax);
*g = (Trgb)Clamp<int32_t>(tmp_g + avg[1], 0, kMax);
*b = (Trgb)Clamp<int32_t>(tmp_b + avg[2], 0, kMax);
// Do not assert that rgb fits, just clamp it (lossy conversion).
}
void CSPTransform::Rgb8ToYuv(int32_t r, int32_t g, int32_t b, int16_t* const y,
int16_t* const u, int16_t* const v) const {
RgbToYuv</*kNumRgbBits=*/8>(avg8_, m_, min_yuv_value_, max_yuv_value_, r, g,
b, y, u, v);
}
void CSPTransform::Rgb10ToYuv(int32_t r, int32_t g, int32_t b, int16_t* const y,
int16_t* const u, int16_t* const v) const {
RgbToYuv</*kNumRgbBits=*/10>(avg10_, m_, min_yuv_value_, max_yuv_value_, r, g,
b, y, u, v);
}
void CSPTransform::YuvToRgb8(int16_t y, int16_t u, int16_t v, int16_t* const r,
int16_t* const g, int16_t* const b) const {
YuvToRgb</*kNumRgbBits=*/8>(avg8_, im_, y, u, v, r, g, b);
}
void CSPTransform::YuvToRgb8(int16_t y, int16_t u, int16_t v, uint8_t* const r,
uint8_t* const g, uint8_t* const b) const {
YuvToRgb</*kNumRgbBits=*/8>(avg8_, im_, y, u, v, r, g, b);
}
void CSPTransform::Rgb8ToYuv(const int16_t* rgb, int16_t* yuv) const {
RgbToYuv</*kNumRgbBits=*/8>(avg8_, m_, min_yuv_value_, max_yuv_value_, rgb[0],
rgb[1], rgb[2], &yuv[0], &yuv[1], &yuv[2]);
// CheckRoundTrip(yuv, rgb);
}
void CSPTransform::YuvToRgb8(const int16_t* yuv, int16_t* rgb) const {
YuvToRgb</*kNumRgbBits=*/8>(avg8_, im_, yuv[0], yuv[1], yuv[2], &rgb[0],
&rgb[1], &rgb[2]);
}
void CSPTransform::Rgb8ToYuv(const uint8_t* rgb, int16_t* yuv) const {
RgbToYuv</*kNumRgbBits=*/8>(avg8_, m_, min_yuv_value_, max_yuv_value_, rgb[0],
rgb[1], rgb[2], &yuv[0], &yuv[1], &yuv[2]);
}
void CSPTransform::YuvToRgb8(const int16_t* yuv, uint8_t* rgb) const {
YuvToRgb</*kNumRgbBits=*/8>(avg8_, im_, yuv[0], yuv[1], yuv[2], &rgb[0],
&rgb[1], &rgb[2]);
}
Ayuv38b CSPTransform::ToYuv(const Argb32b& color) const {
Ayuv38b result;
result.a = color.a;
RgbToYuv</*kNumRgbBits=*/8>(avg8_, m_, min_yuv_value_, max_yuv_value_,
color.r, color.g, color.b, &result.y, &result.u,
&result.v);
return result;
}
Argb32b CSPTransform::ToRgb8(const Ayuv38b& color) const {
Argb32b result;
result.a = color.a;
YuvToRgb</*kNumRgbBits=*/8>(avg8_, im_, color.y, color.u, color.v, &result.r,
&result.g, &result.b);
return result;
}
//------------------------------------------------------------------------------
WP2Status CSPTransform::CustomToArgb(uint32_t width, uint32_t height,
const int16_t* c0_buffer, uint32_t c0_step,
const int16_t* c1_buffer, uint32_t c1_step,
const int16_t* c2_buffer, uint32_t c2_step,
const int16_t* a_buffer, uint32_t a_step,
const CSPMtx& ccsp_to_rgb,
ArgbBuffer* const argb) {
WP2_CHECK_OK(argb->format() == WP2_Argb_32, WP2_STATUS_INVALID_COLORSPACE);
WP2_CHECK_OK(argb->width() == width && argb->height() == height,
WP2_STATUS_BAD_DIMENSION);
int16_t m[9];
for (uint32_t i = 0; i < 9; ++i) {
const int32_t e =
ChangePrecision(ccsp_to_rgb.matrix[i], ccsp_to_rgb.shift, kMtxShift);
WP2_CHECK_OK(std::abs(e) < kMaxMtxValue, WP2_STATUS_INVALID_PARAMETER);
m[i] = (int16_t)e;
}
for (uint32_t y = 0; y < height; ++y) {
uint8_t* const dst_row = argb->GetRow8(y);
if (a_buffer != nullptr) {
for (uint32_t x = 0; x < width; ++x) {
uint8_t* const dst_pixel = &dst_row[x * 4];
dst_pixel[0] =
(uint8_t)Clamp<int32_t>(a_buffer[y * a_step + x], 0, kAlphaMax);
int32_t rgb[3];
Multiply(c0_buffer[x], c1_buffer[x], c2_buffer[x], m, &rgb[0], &rgb[1],
&rgb[2]);
for (uint32_t i : {0, 1, 2}) {
dst_pixel[1 + i] = (uint8_t)Clamp<int32_t>(
RightShiftRound(rgb[i], kMtxShift), 0, dst_pixel[0]);
}
}
} else {
for (uint32_t x = 0; x < width; ++x) {
uint8_t* const dst_pixel = &dst_row[x * 4];
dst_pixel[0] = kAlphaMax;
int32_t rgb[3];
Multiply(c0_buffer[x], c1_buffer[x], c2_buffer[x], m, &rgb[0], &rgb[1],
&rgb[2]);
for (uint32_t i : {0, 1, 2}) {
dst_pixel[1 + i] = (uint8_t)Clamp<int32_t>(
RightShiftRound(rgb[i], kMtxShift), 0, kAlphaMax);
}
}
}
c0_buffer += c0_step;
c1_buffer += c1_step;
c2_buffer += c2_step;
}
return WP2_STATUS_OK;
}
WP2Status CSPTransform::CustomToYuv(uint32_t width, uint32_t height,
const int16_t* c0_buffer, uint32_t c0_step,
const int16_t* c1_buffer, uint32_t c1_step,
const int16_t* c2_buffer, uint32_t c2_step,
const CSPMtx& ccsp_to_rgb,
int16_t* y_buffer, uint32_t y_step,
int16_t* u_buffer, uint32_t u_step,
int16_t* v_buffer, uint32_t v_step) const {
// 'ccsp_to_rgb_matrix' being A, 'm_' (from RGB to YUV) being M:
// A.ccsp = rgb and M.(rgb - avg) = yuv so M.A.ccsp - M.avg = yuv
const int16_t* const a = ccsp_to_rgb.mtx(); // matrix A
const uint32_t a_shift = ccsp_to_rgb.shift; // fixed point bits of A
int16_t ma[9] = {}; // matrix M.A
{
int32_t ma_tmp[9] = {}; // 32 bits necessary for matrix multiplication.
Multiply(m_, a, ma_tmp);
for (uint32_t i = 0; i < 9; ++i) {
ma_tmp[i] = ChangePrecision(ma_tmp[i], kMtxShift + a_shift, kMtxShift);
WP2_CHECK_OK(std::abs(ma_tmp[i]) <= kMaxMtxValue,
WP2_STATUS_INVALID_PARAMETER);
ma[i] = (int16_t)ma_tmp[i];
}
}
int32_t offset[3]; // M.avg
Multiply<int32_t>(avg8_[0], avg8_[1], avg8_[2], m_, &offset[0], &offset[1],
&offset[2]);
// 'offset' is now scaled by kMtxShift so it can be subtracted as is to 'yuv'
// below which will also be scaled by kMtxShift.
for (uint32_t y = 0; y < height; ++y) {
for (uint32_t x = 0; x < width; ++x) {
// TODO(skal): this loop should go in dsp/
int32_t yuv[3];
Multiply<int32_t>(c0_buffer[x], c1_buffer[x], c2_buffer[x], ma, &yuv[0],
&yuv[1], &yuv[2]);
for (uint32_t i : {0, 1, 2}) {
yuv[i] = RightShiftRound(yuv[i] - offset[i], kMtxShift);
yuv[i] = Clamp<int32_t>(yuv[i], min_yuv_value_, max_yuv_value_);
}
y_buffer[x] = (int16_t)yuv[0];
u_buffer[x] = (int16_t)yuv[1];
v_buffer[x] = (int16_t)yuv[2];
}
c0_buffer += c0_step;
c1_buffer += c1_step;
c2_buffer += c2_step;
y_buffer += y_step;
u_buffer += u_step;
v_buffer += v_step;
}
return WP2_STATUS_OK;
}
WP2Status CSPTransform::YuvToCustom(
uint32_t width, uint32_t height, const int16_t* y_buffer, uint32_t y_step,
const int16_t* u_buffer, uint32_t u_step, const int16_t* v_buffer,
uint32_t v_step, const CSPMtx& rgb_to_ccsp, int16_t* c0_buffer,
uint32_t c0_step, int16_t* c1_buffer, uint32_t c1_step, int16_t* c2_buffer,
uint32_t c2_step) const {
// 'rgb_to_ccsp_matrix' being B, 'm_' (from RGB to YUV) being M and
// 'im_' (from YUV to RGB) being N:
// B.rgb = ccsp and N.yuv + avg = rgb so B.N.(yuv + M.avg) = ccsp
const int16_t* const b = rgb_to_ccsp.mtx(); // matrix B
const uint32_t b_shift = rgb_to_ccsp.shift; // fixed point bits of B
int16_t bn[9] = {}; // matrix B.N
{
int32_t bn_tmp[9] = {}; // 32 bits necessary for matrix multiplication.
Multiply(b, im_, bn_tmp);
for (uint32_t i = 0; i < 9; ++i) {
bn_tmp[i] = ChangePrecision(bn_tmp[i], b_shift + kMtxShift, kMtxShift);
WP2_CHECK_OK(std::abs(bn_tmp[i]) < kMaxMtxValue,
WP2_STATUS_INVALID_PARAMETER);
bn[i] = (int16_t)bn_tmp[i];
}
}
int16_t offset[3]; // M.avg
Multiply(avg8_[0], avg8_[1], avg8_[2], m_, kMtxShift, &offset[0], &offset[1],
&offset[2]);
WP2CSPConverterInit();
for (uint32_t y = 0; y < height; ++y) {
WP2YuvToCustom(y_buffer, u_buffer, v_buffer, offset, bn, c0_buffer,
c1_buffer, c2_buffer, width);
y_buffer += y_step;
u_buffer += u_step;
v_buffer += v_step;
c0_buffer += c0_step;
c1_buffer += c1_step;
c2_buffer += c2_step;
}
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
bool CSPTransform::CheckRoundTrip(const int16_t yuv[3],
const int16_t rgb[3]) const {
int16_t out[3];
YuvToRgb8(yuv, out);
const int16_t delta =
abs(rgb[0] - out[0]) + abs(rgb[1] - out[1]) + abs(rgb[2] - out[2]);
const int16_t kErrorThreshold = 5;
if (delta > kErrorThreshold) {
static int cnt = 0;
printf("#%d: %d %d %d / %d %d %d (yuv=%d %d %d) err=%d\n", cnt, rgb[0],
rgb[1], rgb[2], out[0], out[1], out[2], yuv[0], yuv[1], yuv[2],
delta);
if (cnt == 0) Print();
++cnt;
if (cnt == 10) exit(0);
return false;
}
return true;
}
//------------------------------------------------------------------------------
// Returns |v|^2. Result is <= 3 * 2^(15+15).
static int64_t GetVecSqLen(int16_t v0, int16_t v1, int16_t v2) {
return (int64_t)v0 * v0 + (int64_t)v1 * v1 + (int64_t)v2 * v2;
}
// Predicts a vector element from the others and the magnitude.
static int16_t CompleteVector(int16_t v0, int16_t v1, int64_t v_sq_len) {
// No need to worry about fixed point.
int64_t v2 =
SqrtFloor(SafeSub(v_sq_len, (int64_t)v0 * v0 + (int64_t)v1 * v1));
if (std::abs(GetVecSqLen(v0, v1, v2) - v_sq_len) >
std::abs(GetVecSqLen(v0, v1, v2 + 1) - v_sq_len)) {
v2 += 1; // In case sqrt floor was "worse" than ceil.
}
return (int16_t)Clamp<int64_t>(v2, std::numeric_limits<int16_t>::min(),
std::numeric_limits<int16_t>::max());
}
//------------------------------------------------------------------------------
static void PutSValue(int32_t v, uint32_t bits, ANSEnc* const enc) {
if (enc->PutBool(v != 0, "non_zero")) {
const bool sign = (v < 0);
enc->PutUValue((uint32_t)abs(v) - 1, bits - 1, "abs_val");
enc->PutBool(sign, "sign");
}
}
static int32_t ReadSValue(uint32_t bits, ANSDec* const dec) {
int32_t v = 0;
if (dec->ReadBool("non_zero")) {
v = 1 + (int32_t)dec->ReadUValue(bits - 1, "abs_val");
if (dec->ReadBool("sign")) v = -v;
}
return v;
}
// The matrix needs to be precise enough but encodable through ANS.
static constexpr int32_t kEigenBits = 14;
static_assert(kEigenBits <= CSPTransform::kMtxBits &&
kEigenBits <= kANSMaxUniformBits &&
kEigenBits <= kANSMaxRangeBits,
"kEigenBits");
static constexpr int32_t kMaxDelta = 5;
static constexpr int32_t kMaxDeltaBits = WP2Log2Ceil_k(kMaxDelta) + 1;
static inline int32_t PutDelta(int16_t original, int16_t predicted,
ANSEnc* const enc) {
const int32_t delta = Clamp(original - predicted, -kMaxDelta, kMaxDelta);
PutSValue(delta, kMaxDeltaBits, enc);
return delta;
}
static inline int32_t ReadDelta(ANSDec* const dec) {
return ReadSValue(kMaxDeltaBits, dec);
}
// For each of these vectors of indices, the last element will be predicted.
static uint32_t kPredictedVectors[3][3] = {{1, 4, 7}, {0, 1, 2}, {3, 4, 5}};
//------------------------------------------------------------------------------
// Encodes 'm' assuming that:
// - all column- and row-vectors have the same magnitude,
// - the first row and column contain only positive values,
// - column- and row-vectors are mutually orthogonal.
static void WriteEigenMatrix(const int16_t m[9], ANSEnc* const enc) {
ANSDebugPrefix prefix(enc, "eigen_mtx");
// 0 1 2
// Final matrix that will be read. For reference: 3 4 5
// 6 7 8
int16_t dm[9] = {0};
for (uint32_t i : {0, 3, 6}) dm[i] = std::abs(m[i]);
int64_t sq_len = GetVecSqLen(dm[0], dm[3], dm[6]);
uint32_t num_sq_len = 1;
// Find the maximum value of the middle top element based on top row.
const int16_t max_dm1 = CompleteVector(dm[0], 0, sq_len) + kMaxDelta;
dm[1] = std::min((int16_t)std::abs(m[1]), max_dm1);
// Find the maximum value of the middle element based on middle row, column.
const int16_t max_dm4 = RightShiftRound(CompleteVector(dm[1], 0, sq_len) +
CompleteVector(dm[3], 0, sq_len),
1) +
kMaxDelta;
dm[4] = std::min((int16_t)std::abs(m[4]), max_dm4);
// Find out and signal the maximum number of bits needed to encode 0,1,2,3,4.
const int32_t max_dm = std::max({dm[0], dm[3], dm[6], dm[1], dm[4]});
assert(max_dm < (1 << kEigenBits));
const uint32_t num_bits = std::max(1u, (uint32_t)WP2Log2Ceil_k(max_dm + 1));
enc->PutRange(num_bits, 1, kEigenBits, "num_bits");
// Encode 0,3,6 as is and 1,4 as clamped.
for (uint32_t i : {0, 3, 6}) enc->PutUValue(dm[i], num_bits, "value");
(max_dm1 + 1 < (1 << num_bits)) ? enc->PutRValue(dm[1], max_dm1 + 1, "value")
: enc->PutUValue(dm[1], num_bits, "value");
(max_dm4 + 1 < (1 << num_bits)) ? enc->PutRValue(dm[4], max_dm4 + 1, "value")
: enc->PutUValue(dm[4], num_bits, "value");
// Other elements can be predicted; adjust by +-kMaxDelta to avoid any loss.
for (const auto& vec : kPredictedVectors) {
const uint32_t a = vec[0], b = vec[1], c = vec[2]; // Indices.
dm[c] = CompleteVector(dm[a], dm[b], sq_len);
dm[c] += PutDelta(std::abs(m[c]), dm[c], enc);
// Improve 'sq_len' accuracy by taking into account the corrected values.
sq_len = DivRound<int64_t>(
sq_len * num_sq_len + GetVecSqLen(dm[a], dm[b], dm[c]), num_sq_len + 1);
++num_sq_len;
}
// The last one reuses the predictions and deltas so compute it after them.
dm[8] = RightShiftRound((CompleteVector(dm[2], dm[5], sq_len) +
CompleteVector(dm[6], dm[7], sq_len)),
1);
dm[8] += PutDelta(std::abs(m[8]), dm[8], enc);
}
static void ReadEigenMatrix(ANSDec* const dec, int16_t m[9]) {
ANSDebugPrefix prefix(dec, "eigen_mtx");
const int32_t num_bits = dec->ReadRange(1u, kEigenBits, "num_bits");
for (uint32_t i : {0, 3, 6}) m[i] = dec->ReadUValue(num_bits, "value");
int64_t sq_len = GetVecSqLen(m[0], m[3], m[6]);
uint32_t num_sq_len = 1;
const int16_t range_m1 = CompleteVector(m[0], 0, sq_len) + kMaxDelta + 1;
m[1] = (range_m1 < (1 << num_bits)) ? dec->ReadRValue(range_m1, "value")
: dec->ReadUValue(num_bits, "value");
const int16_t range_m4 = RightShiftRound(CompleteVector(m[1], 0, sq_len) +
CompleteVector(m[3], 0, sq_len),
1) +
kMaxDelta + 1;
m[4] = (range_m4 < (1 << num_bits)) ? dec->ReadRValue(range_m4, "value")
: dec->ReadUValue(num_bits, "value");
for (const auto& vec : kPredictedVectors) {
const uint32_t a = vec[0], b = vec[1], c = vec[2];
m[c] = CompleteVector(m[a], m[b], sq_len) + ReadDelta(dec);
sq_len = DivRound<int64_t>(
sq_len * num_sq_len + GetVecSqLen(m[a], m[b], m[c]), num_sq_len + 1);
++num_sq_len;
}
m[8] = RightShiftRound(
CompleteVector(m[2], m[5], sq_len) + CompleteVector(m[6], m[7], sq_len),
1);
m[8] += ReadDelta(dec);
// Find signs for the elements 4,5,7,8. As row-vectors are orthogonal, the
// element generating the highest sub-dot-product has an opposite sign.
for (uint32_t row : {3, 6}) { // Middle and bottom vectors.
int32_t subdot[3]; // Partial dot-product results with top vector.
for (uint32_t col : {0, 1, 2}) subdot[col] = (int32_t)m[col] * m[col + row];
const auto highest = std::max_element(subdot, subdot + 3) - subdot;
for (int32_t col : {1, 2}) {
if (highest == 0 || highest == col) m[col + row] = -m[col + row];
}
}
}
WP2Status CSPTransform::MakeEigenMatrixEncodable(const int16_t m[9],
int16_t dm[9],
int32_t* const error) {
WP2MathInit();
ANSEnc enc;
WriteEigenMatrix(m, &enc);
WP2_CHECK_STATUS(enc.AssembleToBitstream(/*clear_tokens=*/true));
Vector_u8 bits;
WP2_CHECK_STATUS(enc.WriteBitstreamTo(bits));
ExternalDataSource data(bits.data(), bits.size());
ANSDec dec(&data);
ReadEigenMatrix(&dec, dm);
WP2_CHECK_STATUS(dec.GetStatus());
*error = 0;
for (uint32_t i = 0; i < 9; ++i) {
*error = std::max(*error, std::abs(m[i] - dm[i]));
}
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
WP2Status CSPTransform::Write(ANSEnc* const enc) const {
enc->PutRValue((uint32_t)type_, kNumCspTypes, "csp_type");
if (type_ == Csp::kCustom) {
int32_t error;
int16_t encodable_matrix[9];
WP2_CHECK_STATUS(MakeEigenMatrixEncodable(im_, encodable_matrix, &error));
if (enc->PutBool(error == 0, "eigen")) {
WriteEigenMatrix(im_, enc); // Matrix can be encoded with fewer bits.
} else {
for (auto& v : im_) PutSValue(v, kANSMaxUniformBits + 1, enc);
}
for (auto& v : avg8_) enc->PutUValue(v, kRgbBits, "avg");
for (int i : {0, 1, 2}) {
if (avg10_[i] != LeftShift(avg8_[i], 2)) assert(false);
}
}
return WP2_STATUS_OK;
}
WP2Status CSPTransform::Read(ANSDec* const dec) {
type_ = (Csp)dec->ReadRValue(kNumCspTypes, "csp_type");
if (type_ == Csp::kCustom) {
SetYUVBounds(-(1 << kYuvMaxPrec), (1 << kYuvMaxPrec) - 1);
assert(yuv_depth_.precision() == kYuvMaxPrec);
if (dec->ReadBool("eigen")) {
ReadEigenMatrix(dec, im_);
} else {
// Completely custom matrices can be decoded.
for (auto& v : im_) v = ReadSValue(kANSMaxUniformBits + 1, dec);
}
for (auto& v : avg8_) v = (uint16_t)dec->ReadUValue(kRgbBits, "avg");
for (uint32_t i : {0, 1, 2}) avg10_[i] = LeftShift(avg8_[i], 2);
#if !defined(WP2_REDUCE_BINARY_SIZE)
if (!ComputeRGBToYUVMatrix()) { // (direct matrix is used in vwp2)
// not critical, but shouldn't happen in an ideal world!
}
#endif
} else {
WP2_CHECK_STATUS(Init(type_));
}
WP2_CHECK_STATUS(dec->GetStatus());
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
namespace {
template <typename A>
constexpr double Dot(const A a[3], const A b[3]) {
return (double)a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
}
constexpr uint32_t kAvgBlk = 8; // size of the averaging block
double EdgeStrength(const uint8_t* src, size_t stride) {
const double px = src[0], px_right = src[4], px_bottom = src[stride];
const double edge = std::abs(px_right - px) + std::abs(px_bottom - px);
return std::max(0., edge - 2.);
}
// returns the number of samples added to avg_sum[] average accumulator.
uint32_t ExtractCov(const uint8_t* argb, uint32_t w, uint32_t h, size_t stride,
double cov[3][3], double avg_sum[3], bool premultiplied) {
double avg[3] = {0., 0., 0.};
double tmp[3 * kAvgBlk * kAvgBlk];
uint8_t unmult[4 * kAvgBlk * kAvgBlk];
if (!premultiplied) {
uint8_t* dst = unmult;
for (uint32_t j = 0; j < h; ++j) {
WP2ARGBConvertTo[WP2_Argb_32](argb, w, dst);
argb += stride;
dst += 4 * kAvgBlk;
}
argb = unmult;
stride = 4 * kAvgBlk;
}
assert(w <= kAvgBlk && h <= kAvgBlk && w * h > 0);
uint32_t num_samples = 0;
for (uint32_t idx = 0, j = 0; j < h - 1; ++j, argb += stride) {
for (uint32_t i = 0; i < w - 1; ++i) {
double r = argb[4 * i + 1];
double g = argb[4 * i + 2];
double b = argb[4 * i + 3];
double x = EdgeStrength(argb + 4 * i + 1, stride);
double y = EdgeStrength(argb + 4 * i + 2, stride);
double z = EdgeStrength(argb + 4 * i + 3, stride);
avg_sum[0] += r;
avg_sum[1] += g;
avg_sum[2] += b;
avg[0] += x;
avg[1] += y;
avg[2] += z;
tmp[idx++] = x;
tmp[idx++] = y;
tmp[idx++] = z;
++num_samples;
}
}
avg[0] /= num_samples;
avg[1] /= num_samples;
avg[2] /= num_samples;
for (uint32_t idx = 0, i = 0; i < num_samples; ++i, idx += 3) {
const double x = tmp[idx + 0] - avg[0];
const double y = tmp[idx + 1] - avg[1];
const double z = tmp[idx + 2] - avg[2];
cov[0][0] += x * x;
cov[0][1] += y * x;
cov[0][2] += z * x;
cov[1][0] += x * y;
cov[1][1] += y * y;
cov[1][2] += z * y;
cov[2][0] += x * z;
cov[2][1] += y * z;
cov[2][2] += z * z;
}
return num_samples;
}
bool ExtractCovarianceMtx(const uint8_t* argb, uint32_t w, uint32_t h,
size_t stride, double cov[3][3], int16_t avg[3],
bool premultiplied) {
if (w <= 4u || h <= 4u || (uint64_t)w * h <= 64 * 64) {
return false; // not enough sample to build reliable estimate!
}
double avg_sum[3];
for (uint32_t j = 0; j < 3; ++j) {
for (uint32_t i = 0; i < 3; ++i) cov[j][i] = 0.;
avg_sum[j] = 0.;
}
uint32_t num_samples = 0;
for (uint32_t j = 0; j < h; j += kAvgBlk, argb += kAvgBlk * stride) {
for (uint32_t i = 0; i < w; i += kAvgBlk) {
const uint32_t W = std::min(i + kAvgBlk, w) - i;
const uint32_t H = std::min(j + kAvgBlk, h) - j;
num_samples +=
ExtractCov(argb + i, W, H, stride, cov, avg_sum, premultiplied);
}
}
// Normalize a bit the covariance matrix (not totally necessary, just comfort)
for (uint32_t j = 0; j < 3; ++j) {
for (uint32_t i = 0; i < 3; ++i) cov[j][i] /= num_samples;
}
// store the average
for (uint32_t i = 0; i < 3; ++i) {
if (i == 0) {
// We only centralize the 'luma' channel. Empirically, the U/V channels
// are more unstable to centralization and renormalization (during later
// calls NormalizeFromInput()).
avg_sum[i] /= num_samples;
avg[i] = (int16_t)Clamp<double>(avg_sum[i], 0, kRgbMax);
} else {
avg[i] = kRgbMax >> 1;
}
}
return true;
}
// see https://www.geometrictools.com/Documentation/RobustEigenSymmetric3x3.pdf
void ComputeEigenVector(double lambda, const double cov[3][3], double v[3]) {
double v0[3] = {cov[0][0] - lambda, cov[0][1], cov[0][2]};
double v1[3] = {cov[0][1], cov[1][1] - lambda, cov[1][2]};
double v2[3] = {cov[0][2], cov[1][2], cov[2][2] - lambda};
double tmp[2][3], *best = v;
double norm0 = CrossProduct(v0, v1, v);
double norm1 = CrossProduct(v1, v2, tmp[0]);
if (norm1 > norm0) {
best = tmp[1];
norm0 = norm1;
}
norm1 = CrossProduct(v2, v0, tmp[1]);
if (norm1 > norm0) best = tmp[1];
if (best != v) memcpy(v, best, 3 * sizeof(v[0]));
}
void ComputeEigenValues(double cov[3][3], double out[3]) {
const double a00 = cov[0][0], a01 = cov[0][1], a02 = cov[0][2];
const double a11 = cov[1][1], a12 = cov[1][2];
const double a22 = cov[2][2];
const double max = std::max({a00, a01, a02, a11, a12, a22});
if (max == 0.) { // zero matrix?
out[0] = out[1] = out[2] = 0.;
return;
}
const double norm = a01 * a01 + a02 * a02 + a12 * a12;
if (norm == 0.) { // matrix is diagonal
out[0] = a00;
out[1] = a11;
out[2] = a22;
return;
}
// We don't need to pre-condition the matrix by dividing with 'max',
// because the cov[][] matrix is already quite normalized.
// The following is a glorified robust-solving of a
// canonical 3rd degree equation.
const double q = (a00 + a11 + a22) / 3.;
const double b0 = a00 - q, b1 = a11 - q, b2 = a22 - q;
const double p =
std::sqrt((b0 * b0 + b1 * b1 + b2 * b2 + 2. * norm) * (2. / 3.));
const double c0 = b1 * b2 - a12 * a12;
const double c1 = a01 * b2 - a12 * a02;
const double c2 = a01 * a12 - b1 * a02;
const double hdet = 4.0 * (b0 * c0 - a01 * c1 + a02 * c2) / (p * p * p);
const double angle = std::acos(Clamp(hdet, -1., 1.)) / 3.;
const double beta0 = std::cos(angle); // largest solution
const double beta2 = std::cos(angle + (2. * M_PI / 3.));
const double beta1 = -(beta0 + beta2);
// final result
out[0] = q + p * beta0;
out[1] = q + p * beta1;
out[2] = q + p * beta2;
}
bool ExtractEigenMtx(double cov[3][3], double out[9]) {
// Compute the eigen-values
double lambdas[3];
ComputeEigenValues(cov, lambdas);
// very simple and conservative classifier
if (lambdas[0] < 10.) return false;
// Compute 1rst eigen-vector
double ev[3][3];
ComputeEigenVector(lambdas[0], cov, ev[0]);
// for the partitioning algo (based on variance) to work correctly, we
// need a primary axis that is close to a 'luma-like' one.
const double kLumaAxis[3] = {.299, .587, .114}; // L2-norm = 0.668
if (Distance(ev[0], kLumaAxis) > 0.4 * 0.668) {
return false;
}
// Compute 2nd eigen-vector
const double kThreshold = 0.000001;
if (lambdas[1] <= kThreshold) { // MONOCHROME
const double kAxis[3] = {0., 1., -1.};
CrossProduct(kAxis, ev[0], ev[1]);
} else {
// remove ev[0] components from columns
for (uint32_t i = 0; i < 3; ++i) {
const double c = Dot(cov[i], ev[0]);
for (uint32_t j = 0; j < 3; ++j) cov[i][j] -= c * ev[0][j];
}
ComputeEigenVector(lambdas[1], cov, ev[1]);
}
CrossProduct(ev[0], ev[1], ev[2]);
// final dispatch
for (uint32_t j = 0; j < 3; ++j) {
for (uint32_t i = 0; i < 3; ++i) out[3 * i + j] = ev[j][i];
}
return true;
}
bool NormalizeFromInput(const uint8_t* argb, uint32_t w, uint32_t h,
size_t stride, bool premultiplied, double max_value,
const int16_t avg[3], double custom_to_rgb_matrix[9],
int16_t out[9]) {
double rgb_to_custom_matrix[9];
if (!TryInvertMatrix(custom_to_rgb_matrix, rgb_to_custom_matrix)) {
return false;
}
double maxs = -max_value, mins = max_value;
for (uint32_t y = 0; y < h; ++y) {
for (uint32_t x = 0; x < w; ++x) {
const uint8_t a = argb[4 * x + 0];
const double inv_a =
(premultiplied && a < 255) ? (a > 0 ? 255. / a : 0.) : 1.;
const double rgb[3] = {inv_a * argb[4 * x + 1] - avg[0],
inv_a * argb[4 * x + 2] - avg[1],
inv_a * argb[4 * x + 3] - avg[2]};
const double X = Dot(rgb_to_custom_matrix + 0, rgb);
const double Y = Dot(rgb_to_custom_matrix + 3, rgb);
const double Z = Dot(rgb_to_custom_matrix + 6, rgb);
maxs = std::max({maxs, X, Y, Z});
mins = std::min({mins, X, Y, Z});
}
argb += stride;
}
// Scale rgb_to_custom_matrix so it goes from ~kRgbBits to yuv_bits_ (so it
// uses the whole yuv_bits_ range).
const double max_custom = std::max({1., std::abs(mins), std::abs(maxs)});
const double norm = max_value / max_custom;
for (uint32_t i = 0; i < 9; ++i) rgb_to_custom_matrix[i] *= norm;
// Compute custom_to_rgb_matrix, prepare it for fixed point.
if (!TryInvertMatrix(rgb_to_custom_matrix, custom_to_rgb_matrix)) {
return false;
}
MultiplyMatrix(custom_to_rgb_matrix, kMtxFixedPointMul);
uint32_t max = 0;
for (uint32_t i = 0; i < 9; ++i) {
out[i] = (int16_t)lround(custom_to_rgb_matrix[i]);
max = std::max(max, (uint32_t)std::abs(out[i]));
}
constexpr uint32_t kMin = 10, kMax = (1 << kEigenBits) - 1;
if (max < kMin || max > kMax) return false;
return true;
}
} // anonymous namespace
//------------------------------------------------------------------------------
WP2Status CSPTransform::Optimize(const ArgbBuffer& image) {
WP2_CHECK_OK(image.format() == WP2_Argb_32 || image.format() == WP2_ARGB_32,
WP2_STATUS_INVALID_PARAMETER);
WP2_CHECK_OK(!image.IsEmpty(), WP2_STATUS_INVALID_PARAMETER);
const bool is_premultiplied = (image.format() == WP2_Argb_32);
if (!is_premultiplied) WP2ArgbConverterInit();
const uint8_t* const argb = image.GetRow8(0);
const uint32_t w = image.width();
const uint32_t h = image.height();
const size_t stride = image.stride();
SetYUVBounds(-(1 << kYuvMaxPrec), (1 << kYuvMaxPrec) - 1);
double cov[3][3];
double custom_to_rgb_matrix[9];
int16_t im[9];
if (ExtractCovarianceMtx(argb, w, h, stride, cov, avg8_, is_premultiplied) &&
ExtractEigenMtx(cov, custom_to_rgb_matrix) &&
NormalizeFromInput(
argb, w, h, stride, is_premultiplied,
std::max((int16_t)std::abs(min_yuv_value_), max_yuv_value_), avg8_,
custom_to_rgb_matrix, im)) {
int32_t error;
WP2_CHECK_STATUS(MakeEigenMatrixEncodable(im, im_, &error));
if (error > 0) {
// 'kMaxDelta' should be big enough to correct most errors, though it may
// happen that some ExtractEigenMtx() outputs are too imprecise. In this
// case, encode those as is.
std::copy(im, im + 9, im_);
// Reducing 'kMaxDelta' and keeping the "quantized" matrix (assuming
// 'error' is reasonable) is possible but it leads to poor results.
}
if (ComputeRGBToYUVMatrix()) {
type_ = Csp::kCustom;
assert(yuv_depth_.precision() <= kYuvMaxPrec);
for (uint32_t i : {0, 1, 2}) avg10_[i] = LeftShift(avg8_[i], 2);
return WP2_STATUS_OK;
}
}
return WP2_STATUS_INVALID_COLORSPACE;
}
//------------------------------------------------------------------------------
void CSPTransform::Apply(ArgbBuffer* const image, BitDepth bit_depth,
uint32_t x, uint32_t y, uint32_t w, uint32_t h) const {
if (image == nullptr || image->IsEmpty()) return;
for (uint32_t j = 0; j < h; ++j) {
if (y + j >= image->height()) break;
uint8_t* const dst = image->GetRow8(y + j);
for (uint32_t i = x; i < std::min(x + w, image->width()); ++i) {
// Set alpha to opaque.
dst[4 * i + 0] = kAlphaMax;
int16_t yuv[3];
Rgb8ToYuv(&dst[4 * i + 1], yuv);
for (uint32_t c = 0; c < 3; ++c) {
// Convert to 8b-unsigned.
dst[4 * i + 1 + c] = (uint8_t)Clamp(
ChangePrecision(yuv[c] - bit_depth.min(), bit_depth.num_bits, 8), 0,
255);
}
}
}
}
//------------------------------------------------------------------------------
} // namespace WP2