| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // ----------------------------------------------------------------------------- |
| // |
| // Colorspace utilities |
| // |
| // Author: Skal (pascal.massimino@gmail.com) |
| |
| #include "src/utils/csp.h" |
| |
| #include <algorithm> |
| #include <cassert> |
| #include <cmath> |
| #include <cstdint> |
| #include <cstdio> |
| #include <cstdlib> |
| #include <cstring> |
| #include <limits> |
| |
| #include "src/dsp/dsp.h" |
| #include "src/dsp/math.h" |
| #include "src/utils/ans.h" |
| #include "src/utils/ans_utils.h" |
| #include "src/utils/data_source.h" |
| #include "src/utils/utils.h" |
| #include "src/utils/vector.h" |
| #include "src/wp2/base.h" |
| #include "src/wp2/format_constants.h" |
| |
| namespace WP2 { |
| |
| //------------------------------------------------------------------------------ |
| |
| namespace { |
| |
| int16_t ToInt16(int32_t v, int32_t min_v, int32_t max_v) { |
| return (int16_t)Clamp(v, min_v, max_v); // Rounding errors may happen. |
| } |
| int16_t RoundToInt16(double v, int16_t max_v) { |
| assert(v >= -(double)max_v && v <= (double)max_v); |
| return (int16_t)lround(v); |
| } |
| |
| bool TryInvertMatrix(const double m[9], double out[9]) { |
| const double det = m[0] * (m[4] * m[8] - m[5] * m[7]) - |
| m[3] * (m[1] * m[8] - m[7] * m[2]) + |
| m[6] * (m[1] * m[5] - m[4] * m[2]); |
| if (det == 0.) return false; |
| const double idet = 1.0 / det; |
| out[0] = idet * (m[4] * m[8] - m[5] * m[7]); |
| out[3] = idet * (m[6] * m[5] - m[3] * m[8]); |
| out[6] = idet * (m[3] * m[7] - m[4] * m[6]); |
| out[1] = idet * (m[7] * m[2] - m[1] * m[8]); |
| out[4] = idet * (m[0] * m[8] - m[2] * m[6]); |
| out[7] = idet * (m[1] * m[6] - m[0] * m[7]); |
| out[2] = idet * (m[1] * m[5] - m[2] * m[4]); |
| out[5] = idet * (m[2] * m[3] - m[0] * m[5]); |
| out[8] = idet * (m[0] * m[4] - m[1] * m[3]); |
| return true; |
| } |
| |
| double Norm2(const double a[3]) { |
| return a[0] * a[0] + a[1] * a[1] + a[2] * a[2]; |
| } |
| |
| // WARNING! This is a 'special' cross-product function that ensures out[0] >= 0. |
| double CrossProduct(const double a[3], const double b[3], double out[3]) { |
| out[0] = a[1] * b[2] - a[2] * b[1]; |
| out[1] = a[2] * b[0] - a[0] * b[2]; |
| out[2] = a[0] * b[1] - a[1] * b[0]; |
| double norm = Norm2(out); |
| if (norm > 0.) { |
| const double inv_norm = (out[0] < 0 ? -1. : 1.) / std::sqrt(norm); |
| out[0] *= inv_norm; |
| out[1] *= inv_norm; |
| out[2] *= inv_norm; |
| } |
| return norm; |
| } |
| |
| float Distance(const double a[3], const double b[3]) { |
| float dot = std::fabs(a[0] * b[0] + a[1] * b[1] + a[2] * b[2]); |
| if (dot != 0.) dot /= std::sqrt(Norm2(a) * Norm2(b)); |
| return 1.f - dot; |
| } |
| |
| void MultiplyMatrix(double m[9], double by) { |
| for (int i = 0; i < 9; ++i) m[i] *= by; |
| } |
| |
| } // namespace |
| |
| //------------------------------------------------------------------------------ |
| |
| constexpr uint32_t CSPTransform::kMtxBits; |
| constexpr uint32_t CSPTransform::kMtxShift; |
| constexpr int16_t CSPTransform::kDefaultRgbAvg[3]; |
| |
| static constexpr int32_t kMtxFixedPointMul = 1 << CSPTransform::kMtxShift; |
| static constexpr int16_t kMaxMtxValue = (1 << CSPTransform::kMtxBits) - 1; |
| |
| //------------------------------------------------------------------------------ |
| |
| void CSPTransform::StoreCustomYUVBounds() { // Compute min/max YUV values. |
| int32_t min[3] = {0}, max[3] = {0}; |
| for (uint32_t i = 0; i < 3; ++i) { |
| for (uint32_t j = 0; j < 3; ++j) { |
| const int16_t rgb_min = 0 - avg8_[j]; |
| const int16_t rgb_max = kRgbMax - avg8_[j]; |
| |
| const int32_t coeff = m_[i * 3 + j]; |
| min[i] += coeff * ((coeff > 0) ? rgb_min : rgb_max); |
| max[i] += coeff * ((coeff > 0) ? rgb_max : rgb_min); |
| } |
| min[i] = RightShiftRound(min[i], kMtxShift); |
| max[i] = RightShiftRound(max[i], kMtxShift); |
| } |
| const int32_t global_min = std::min({min[0], min[1], min[2]}); |
| const int32_t global_max = std::max({max[0], max[1], max[2]}); |
| SetYUVBounds(global_min, global_max); |
| } |
| |
| bool CSPTransform::Init(const int16_t yuv_to_rgb_matrix[9], |
| const int16_t rgb_avg[3]) { |
| for (size_t i = 0; i < 9; ++i) im_[i] = yuv_to_rgb_matrix[i]; |
| for (uint32_t i : {0, 1, 2}) avg8_[i] = rgb_avg[i]; |
| for (uint32_t i : {0, 1, 2}) avg10_[i] = LeftShift(avg8_[i], 2); |
| if (!ComputeRGBToYUVMatrix()) return false; |
| StoreCustomYUVBounds(); |
| type_ = Csp::kCustom; |
| return true; |
| } |
| |
| WP2Status CSPTransform::Init(Csp csp_type, const ArgbBuffer& argb_buffer) { |
| if (csp_type == Csp::kCustom) { |
| const WP2Status status = Optimize(argb_buffer); |
| // Stop here if success or unrecoverable error. Otherwise fallback on YCoCg. |
| if (status != WP2_STATUS_INVALID_COLORSPACE) return status; |
| csp_type = Csp::kYCoCg; |
| } |
| if (csp_type == Csp::kYCoCg) { |
| InitYCoCg(); |
| } else if (csp_type == Csp::kYCbCr) { |
| InitYCbCr(); |
| } else { |
| assert(csp_type == Csp::kYIQ); |
| InitYIQ(); |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| void CSPTransform::InitYCoCg() { |
| // The RGB to YUV matrix has to be scaled by 4 to be lossless, so we divide |
| // the inverse by 4. |
| static constexpr int16_t I = (kMtxFixedPointMul >> 2); |
| // clang-format off |
| static constexpr int16_t kYCoCgMatrix[] = { |
| I, I, -I, |
| I, 0, I, |
| I, -I, -I |
| }; |
| // clang-format on |
| if (!Init(kYCoCgMatrix)) assert(false); |
| type_ = Csp::kYCoCg; |
| // With 10 bits par channel ([-512:511] if 'rgb_avg' is 128), the YCoCg |
| // transformation is lossless. |
| assert(yuv_depth_.precision() <= kYuvMaxPrec); |
| } |
| |
| void CSPTransform::InitYCbCr() { |
| #if 0 // reference code used for generating kYCbCrMatrix[] |
| double yuv_to_rgb_matrix[] = { |
| 1.16, 0.00, 1.60, |
| 1.16, -0.39, -0.81, |
| 1.16, 2.01, 0.00 |
| }; |
| // This is not lossless: use the maximum range (yuv_bits_ == kYuvMaxPrec + 1). |
| MultiplyMatrix(yuv_to_rgb_matrix, kMtxFixedPointMul / 4); |
| int16_t kYCbCrMatrix[9]; |
| for (int i = 0; i < 9; ++i) { |
| kYCbCrMatrix[i] = RoundToInt16(yuv_to_rgb_matrix[i], kMaxMtxValue); |
| } |
| for (auto v : kYCbCrMatrix) printf("%6d,\n", v); |
| #else |
| static constexpr int16_t kYCbCrMatrix[] = { |
| 1188, 0, 1638, 1188, -399, -829, 1188, 2058, 0, |
| }; |
| #endif |
| if (!Init(kYCbCrMatrix)) assert(false); |
| type_ = Csp::kYCbCr; |
| assert(yuv_depth_.precision() <= kYuvMaxPrec); |
| } |
| |
| void CSPTransform::InitYIQ() { |
| #if 0 // reference code used for generating kIQMatrix[] |
| static constexpr double rgb_to_yuv_matrix[] = { |
| 0.299, 0.587, 0.114, |
| 0.596, -0.274, -0.322, |
| 0.211, -0.523, 0.312 |
| }; |
| double yuv_to_rgb_matrix[9]; |
| TryInvertMatrix(rgb_to_yuv_matrix, yuv_to_rgb_matrix); |
| // This is not lossless: use the maximum range (yuv_bits_ == kYuvMaxPrec + 1). |
| MultiplyMatrix(yuv_to_rgb_matrix, kMtxFixedPointMul / 2); |
| int16_t kYIQMatrix[9]; |
| for (int i = 0; i < 9; ++i) { |
| kYIQMatrix[i] = RoundToInt16(yuv_to_rgb_matrix[i], kMaxMtxValue); |
| } |
| for (auto v : kYIQMatrix) printf("%6d,\n", v); |
| #else |
| static constexpr int16_t kYIQMatrix[] = { |
| 2048, 1958, 1273, 2048, -558, -1325, 2048, -2260, 3483, |
| }; |
| #endif |
| if (!Init(kYIQMatrix)) assert(false); |
| type_ = Csp::kYIQ; |
| assert(yuv_depth_.precision() <= kYuvMaxPrec); |
| } |
| |
| void CSPTransform::Print() const { |
| printf("CSP type %u\n", (uint32_t)type_); |
| double rgb_to_yuv_matrix[9], yuv_to_rgb_matrix[9]; |
| for (int i = 0; i < 9; ++i) { |
| rgb_to_yuv_matrix[i] = (double)m_[i] / kMtxFixedPointMul; |
| yuv_to_rgb_matrix[i] = (double)im_[i] / kMtxFixedPointMul; |
| } |
| |
| printf("=== RGB to YUV: ===\n"); |
| for (int j = 0; j < 3; ++j) { |
| for (int i = 0; i < 3; ++i) { |
| printf("%10.4f ", rgb_to_yuv_matrix[i + j * 3]); |
| } |
| printf("\n"); |
| } |
| |
| printf("=== YUV to RGB: ===\n"); |
| for (int j = 0; j < 3; ++j) { |
| for (int i = 0; i < 3; ++i) { |
| printf("%10.4f ", yuv_to_rgb_matrix[i + j * 3]); |
| } |
| printf("\n"); |
| } |
| |
| printf("=== avg: "); |
| printf("%3d %3d %3d", avg8_[0], avg8_[1], avg8_[2]); |
| printf("\n"); |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| void CSPTransform::SetYUVBounds(int16_t yuv_min, int16_t yuv_max) { |
| const bool is_signed = (yuv_min < 0); |
| yuv_depth_ = {WP2Log2Ceil_k(std::max((int16_t)std::abs(yuv_min), yuv_max)) + |
| (is_signed ? 1u : 0u), |
| is_signed}; |
| min_yuv_value_ = yuv_min; |
| max_yuv_value_ = yuv_max; |
| } |
| |
| bool CSPTransform::ComputeRGBToYUVMatrix() { |
| double rgb_to_yuv_matrix[9], yuv_to_rgb_matrix[9]; |
| constexpr double norm = 1. / kMtxFixedPointMul; |
| for (int i = 0; i < 9; ++i) { |
| if (std::abs(im_[i]) > kMaxMtxValue) return false; |
| yuv_to_rgb_matrix[i] = norm * im_[i]; |
| } |
| |
| if (!TryInvertMatrix(yuv_to_rgb_matrix, rgb_to_yuv_matrix)) return false; |
| for (int i = 0; i < 9; ++i) { |
| const double v = rgb_to_yuv_matrix[i] * kMtxFixedPointMul; |
| if (std::abs(v) > kMaxMtxValue) return false; |
| m_[i] = RoundToInt16(v, kMaxMtxValue); |
| } |
| return true; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| template <int kNumRgbBits> |
| static inline void RgbToYuv(const int16_t avg[3], const int16_t m[9], |
| int16_t min_yuv_value, int16_t max_yuv_value, |
| int32_t r, int32_t g, int32_t b, int16_t* const y, |
| int16_t* const u, int16_t* const v) { |
| constexpr int kNumExtraRgbBits = kNumRgbBits - 8; |
| constexpr int kShift = CSPTransform::kMtxShift + kNumExtraRgbBits; |
| // kNumRgbBits to (kNumRgbBits+1) |
| const int32_t tmp_r = (int32_t)r - avg[0]; |
| const int32_t tmp_g = (int32_t)g - avg[1]; |
| const int32_t tmp_b = (int32_t)b - avg[2]; |
| // (kNumRgbBits+1) to (yuv_bits_ + kMtxShift + kNumExtraRgbBits) |
| int32_t tmp_y = m[0] * tmp_r + m[1] * tmp_g + m[2] * tmp_b; |
| int32_t tmp_u = m[3] * tmp_r + m[4] * tmp_g + m[5] * tmp_b; |
| int32_t tmp_v = m[6] * tmp_r + m[7] * tmp_g + m[8] * tmp_b; |
| // (yuv_bits_ + kMtxShift + kNumExtraRgbBits) to yuv_bits_ |
| tmp_y = RightShiftRound(tmp_y, kShift); |
| tmp_u = RightShiftRound(tmp_u, kShift); |
| tmp_v = RightShiftRound(tmp_v, kShift); |
| *y = ToInt16(tmp_y, min_yuv_value, max_yuv_value); |
| *u = ToInt16(tmp_u, min_yuv_value, max_yuv_value); |
| *v = ToInt16(tmp_v, min_yuv_value, max_yuv_value); |
| } |
| |
| template <int kNumRgbBits, typename Trgb> |
| static inline void YuvToRgb(const int16_t avg[3], const int16_t im[9], |
| int16_t y, int16_t u, int16_t v, Trgb* const r, |
| Trgb* const g, Trgb* const b) { |
| constexpr int kNumExtraRgbBits = kNumRgbBits - 8; |
| constexpr int kShift = CSPTransform::kMtxShift - kNumExtraRgbBits; |
| constexpr int kMax = (1 << kNumRgbBits) - 1; |
| // yuv_bits_ to (kRgbBits+1 + kMtxShift) |
| int32_t tmp_r = im[0] * y + im[1] * u + im[2] * v; |
| int32_t tmp_g = im[3] * y + im[4] * u + im[5] * v; |
| int32_t tmp_b = im[6] * y + im[7] * u + im[8] * v; |
| // (kRgbBits+1 + kMtxShift) to kNumRgbBits+1 |
| tmp_r = RightShiftRound(tmp_r, kShift); |
| tmp_g = RightShiftRound(tmp_g, kShift); |
| tmp_b = RightShiftRound(tmp_b, kShift); |
| // (kNumRgbBits+1) to kNumRgbBits |
| *r = (Trgb)Clamp<int32_t>(tmp_r + avg[0], 0, kMax); |
| *g = (Trgb)Clamp<int32_t>(tmp_g + avg[1], 0, kMax); |
| *b = (Trgb)Clamp<int32_t>(tmp_b + avg[2], 0, kMax); |
| // Do not assert that rgb fits, just clamp it (lossy conversion). |
| } |
| |
| void CSPTransform::Rgb8ToYuv(int32_t r, int32_t g, int32_t b, int16_t* const y, |
| int16_t* const u, int16_t* const v) const { |
| RgbToYuv</*kNumRgbBits=*/8>(avg8_, m_, min_yuv_value_, max_yuv_value_, r, g, |
| b, y, u, v); |
| } |
| |
| void CSPTransform::Rgb10ToYuv(int32_t r, int32_t g, int32_t b, int16_t* const y, |
| int16_t* const u, int16_t* const v) const { |
| RgbToYuv</*kNumRgbBits=*/10>(avg10_, m_, min_yuv_value_, max_yuv_value_, r, g, |
| b, y, u, v); |
| } |
| |
| void CSPTransform::YuvToRgb8(int16_t y, int16_t u, int16_t v, int16_t* const r, |
| int16_t* const g, int16_t* const b) const { |
| YuvToRgb</*kNumRgbBits=*/8>(avg8_, im_, y, u, v, r, g, b); |
| } |
| |
| void CSPTransform::YuvToRgb8(int16_t y, int16_t u, int16_t v, uint8_t* const r, |
| uint8_t* const g, uint8_t* const b) const { |
| YuvToRgb</*kNumRgbBits=*/8>(avg8_, im_, y, u, v, r, g, b); |
| } |
| |
| void CSPTransform::Rgb8ToYuv(const int16_t* rgb, int16_t* yuv) const { |
| RgbToYuv</*kNumRgbBits=*/8>(avg8_, m_, min_yuv_value_, max_yuv_value_, rgb[0], |
| rgb[1], rgb[2], &yuv[0], &yuv[1], &yuv[2]); |
| // CheckRoundTrip(yuv, rgb); |
| } |
| |
| void CSPTransform::YuvToRgb8(const int16_t* yuv, int16_t* rgb) const { |
| YuvToRgb</*kNumRgbBits=*/8>(avg8_, im_, yuv[0], yuv[1], yuv[2], &rgb[0], |
| &rgb[1], &rgb[2]); |
| } |
| |
| void CSPTransform::Rgb8ToYuv(const uint8_t* rgb, int16_t* yuv) const { |
| RgbToYuv</*kNumRgbBits=*/8>(avg8_, m_, min_yuv_value_, max_yuv_value_, rgb[0], |
| rgb[1], rgb[2], &yuv[0], &yuv[1], &yuv[2]); |
| } |
| |
| void CSPTransform::YuvToRgb8(const int16_t* yuv, uint8_t* rgb) const { |
| YuvToRgb</*kNumRgbBits=*/8>(avg8_, im_, yuv[0], yuv[1], yuv[2], &rgb[0], |
| &rgb[1], &rgb[2]); |
| } |
| |
| Ayuv38b CSPTransform::ToYuv(const Argb32b& color) const { |
| Ayuv38b result; |
| result.a = color.a; |
| RgbToYuv</*kNumRgbBits=*/8>(avg8_, m_, min_yuv_value_, max_yuv_value_, |
| color.r, color.g, color.b, &result.y, &result.u, |
| &result.v); |
| return result; |
| } |
| |
| Argb32b CSPTransform::ToRgb8(const Ayuv38b& color) const { |
| Argb32b result; |
| result.a = color.a; |
| YuvToRgb</*kNumRgbBits=*/8>(avg8_, im_, color.y, color.u, color.v, &result.r, |
| &result.g, &result.b); |
| return result; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| WP2Status CSPTransform::CustomToArgb(uint32_t width, uint32_t height, |
| const int16_t* c0_buffer, uint32_t c0_step, |
| const int16_t* c1_buffer, uint32_t c1_step, |
| const int16_t* c2_buffer, uint32_t c2_step, |
| const int16_t* a_buffer, uint32_t a_step, |
| const CSPMtx& ccsp_to_rgb, |
| ArgbBuffer* const argb) { |
| WP2_CHECK_OK(argb->format() == WP2_Argb_32, WP2_STATUS_INVALID_COLORSPACE); |
| WP2_CHECK_OK(argb->width() == width && argb->height() == height, |
| WP2_STATUS_BAD_DIMENSION); |
| int16_t m[9]; |
| for (uint32_t i = 0; i < 9; ++i) { |
| const int32_t e = |
| ChangePrecision(ccsp_to_rgb.matrix[i], ccsp_to_rgb.shift, kMtxShift); |
| WP2_CHECK_OK(std::abs(e) < kMaxMtxValue, WP2_STATUS_INVALID_PARAMETER); |
| m[i] = (int16_t)e; |
| } |
| |
| for (uint32_t y = 0; y < height; ++y) { |
| uint8_t* const dst_row = argb->GetRow8(y); |
| if (a_buffer != nullptr) { |
| for (uint32_t x = 0; x < width; ++x) { |
| uint8_t* const dst_pixel = &dst_row[x * 4]; |
| dst_pixel[0] = |
| (uint8_t)Clamp<int32_t>(a_buffer[y * a_step + x], 0, kAlphaMax); |
| int32_t rgb[3]; |
| Multiply(c0_buffer[x], c1_buffer[x], c2_buffer[x], m, &rgb[0], &rgb[1], |
| &rgb[2]); |
| for (uint32_t i : {0, 1, 2}) { |
| dst_pixel[1 + i] = (uint8_t)Clamp<int32_t>( |
| RightShiftRound(rgb[i], kMtxShift), 0, dst_pixel[0]); |
| } |
| } |
| } else { |
| for (uint32_t x = 0; x < width; ++x) { |
| uint8_t* const dst_pixel = &dst_row[x * 4]; |
| dst_pixel[0] = kAlphaMax; |
| int32_t rgb[3]; |
| Multiply(c0_buffer[x], c1_buffer[x], c2_buffer[x], m, &rgb[0], &rgb[1], |
| &rgb[2]); |
| for (uint32_t i : {0, 1, 2}) { |
| dst_pixel[1 + i] = (uint8_t)Clamp<int32_t>( |
| RightShiftRound(rgb[i], kMtxShift), 0, kAlphaMax); |
| } |
| } |
| } |
| c0_buffer += c0_step; |
| c1_buffer += c1_step; |
| c2_buffer += c2_step; |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status CSPTransform::CustomToYuv(uint32_t width, uint32_t height, |
| const int16_t* c0_buffer, uint32_t c0_step, |
| const int16_t* c1_buffer, uint32_t c1_step, |
| const int16_t* c2_buffer, uint32_t c2_step, |
| const CSPMtx& ccsp_to_rgb, |
| int16_t* y_buffer, uint32_t y_step, |
| int16_t* u_buffer, uint32_t u_step, |
| int16_t* v_buffer, uint32_t v_step) const { |
| // 'ccsp_to_rgb_matrix' being A, 'm_' (from RGB to YUV) being M: |
| // A.ccsp = rgb and M.(rgb - avg) = yuv so M.A.ccsp - M.avg = yuv |
| |
| const int16_t* const a = ccsp_to_rgb.mtx(); // matrix A |
| const uint32_t a_shift = ccsp_to_rgb.shift; // fixed point bits of A |
| |
| int16_t ma[9] = {}; // matrix M.A |
| { |
| int32_t ma_tmp[9] = {}; // 32 bits necessary for matrix multiplication. |
| Multiply(m_, a, ma_tmp); |
| for (uint32_t i = 0; i < 9; ++i) { |
| ma_tmp[i] = ChangePrecision(ma_tmp[i], kMtxShift + a_shift, kMtxShift); |
| WP2_CHECK_OK(std::abs(ma_tmp[i]) <= kMaxMtxValue, |
| WP2_STATUS_INVALID_PARAMETER); |
| ma[i] = (int16_t)ma_tmp[i]; |
| } |
| } |
| |
| int32_t offset[3]; // M.avg |
| Multiply<int32_t>(avg8_[0], avg8_[1], avg8_[2], m_, &offset[0], &offset[1], |
| &offset[2]); |
| // 'offset' is now scaled by kMtxShift so it can be subtracted as is to 'yuv' |
| // below which will also be scaled by kMtxShift. |
| |
| for (uint32_t y = 0; y < height; ++y) { |
| for (uint32_t x = 0; x < width; ++x) { |
| // TODO(skal): this loop should go in dsp/ |
| int32_t yuv[3]; |
| Multiply<int32_t>(c0_buffer[x], c1_buffer[x], c2_buffer[x], ma, &yuv[0], |
| &yuv[1], &yuv[2]); |
| for (uint32_t i : {0, 1, 2}) { |
| yuv[i] = RightShiftRound(yuv[i] - offset[i], kMtxShift); |
| yuv[i] = Clamp<int32_t>(yuv[i], min_yuv_value_, max_yuv_value_); |
| } |
| y_buffer[x] = (int16_t)yuv[0]; |
| u_buffer[x] = (int16_t)yuv[1]; |
| v_buffer[x] = (int16_t)yuv[2]; |
| } |
| c0_buffer += c0_step; |
| c1_buffer += c1_step; |
| c2_buffer += c2_step; |
| y_buffer += y_step; |
| u_buffer += u_step; |
| v_buffer += v_step; |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status CSPTransform::YuvToCustom( |
| uint32_t width, uint32_t height, const int16_t* y_buffer, uint32_t y_step, |
| const int16_t* u_buffer, uint32_t u_step, const int16_t* v_buffer, |
| uint32_t v_step, const CSPMtx& rgb_to_ccsp, int16_t* c0_buffer, |
| uint32_t c0_step, int16_t* c1_buffer, uint32_t c1_step, int16_t* c2_buffer, |
| uint32_t c2_step) const { |
| // 'rgb_to_ccsp_matrix' being B, 'm_' (from RGB to YUV) being M and |
| // 'im_' (from YUV to RGB) being N: |
| // B.rgb = ccsp and N.yuv + avg = rgb so B.N.(yuv + M.avg) = ccsp |
| |
| const int16_t* const b = rgb_to_ccsp.mtx(); // matrix B |
| const uint32_t b_shift = rgb_to_ccsp.shift; // fixed point bits of B |
| |
| int16_t bn[9] = {}; // matrix B.N |
| { |
| int32_t bn_tmp[9] = {}; // 32 bits necessary for matrix multiplication. |
| Multiply(b, im_, bn_tmp); |
| for (uint32_t i = 0; i < 9; ++i) { |
| bn_tmp[i] = ChangePrecision(bn_tmp[i], b_shift + kMtxShift, kMtxShift); |
| WP2_CHECK_OK(std::abs(bn_tmp[i]) < kMaxMtxValue, |
| WP2_STATUS_INVALID_PARAMETER); |
| bn[i] = (int16_t)bn_tmp[i]; |
| } |
| } |
| |
| int16_t offset[3]; // M.avg |
| Multiply(avg8_[0], avg8_[1], avg8_[2], m_, kMtxShift, &offset[0], &offset[1], |
| &offset[2]); |
| |
| WP2CSPConverterInit(); |
| for (uint32_t y = 0; y < height; ++y) { |
| WP2YuvToCustom(y_buffer, u_buffer, v_buffer, offset, bn, c0_buffer, |
| c1_buffer, c2_buffer, width); |
| y_buffer += y_step; |
| u_buffer += u_step; |
| v_buffer += v_step; |
| c0_buffer += c0_step; |
| c1_buffer += c1_step; |
| c2_buffer += c2_step; |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| bool CSPTransform::CheckRoundTrip(const int16_t yuv[3], |
| const int16_t rgb[3]) const { |
| int16_t out[3]; |
| YuvToRgb8(yuv, out); |
| const int16_t delta = |
| abs(rgb[0] - out[0]) + abs(rgb[1] - out[1]) + abs(rgb[2] - out[2]); |
| const int16_t kErrorThreshold = 5; |
| if (delta > kErrorThreshold) { |
| static int cnt = 0; |
| printf("#%d: %d %d %d / %d %d %d (yuv=%d %d %d) err=%d\n", cnt, rgb[0], |
| rgb[1], rgb[2], out[0], out[1], out[2], yuv[0], yuv[1], yuv[2], |
| delta); |
| if (cnt == 0) Print(); |
| ++cnt; |
| if (cnt == 10) exit(0); |
| return false; |
| } |
| return true; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| // Returns |v|^2. Result is <= 3 * 2^(15+15). |
| static int64_t GetVecSqLen(int16_t v0, int16_t v1, int16_t v2) { |
| return (int64_t)v0 * v0 + (int64_t)v1 * v1 + (int64_t)v2 * v2; |
| } |
| |
| // Predicts a vector element from the others and the magnitude. |
| static int16_t CompleteVector(int16_t v0, int16_t v1, int64_t v_sq_len) { |
| // No need to worry about fixed point. |
| int64_t v2 = |
| SqrtFloor(SafeSub(v_sq_len, (int64_t)v0 * v0 + (int64_t)v1 * v1)); |
| if (std::abs(GetVecSqLen(v0, v1, v2) - v_sq_len) > |
| std::abs(GetVecSqLen(v0, v1, v2 + 1) - v_sq_len)) { |
| v2 += 1; // In case sqrt floor was "worse" than ceil. |
| } |
| return (int16_t)Clamp<int64_t>(v2, std::numeric_limits<int16_t>::min(), |
| std::numeric_limits<int16_t>::max()); |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| static void PutSValue(int32_t v, uint32_t bits, ANSEnc* const enc) { |
| if (enc->PutBool(v != 0, "non_zero")) { |
| const bool sign = (v < 0); |
| enc->PutUValue((uint32_t)abs(v) - 1, bits - 1, "abs_val"); |
| enc->PutBool(sign, "sign"); |
| } |
| } |
| |
| static int32_t ReadSValue(uint32_t bits, ANSDec* const dec) { |
| int32_t v = 0; |
| if (dec->ReadBool("non_zero")) { |
| v = 1 + (int32_t)dec->ReadUValue(bits - 1, "abs_val"); |
| if (dec->ReadBool("sign")) v = -v; |
| } |
| return v; |
| } |
| |
| // The matrix needs to be precise enough but encodable through ANS. |
| static constexpr int32_t kEigenBits = 14; |
| static_assert(kEigenBits <= CSPTransform::kMtxBits && |
| kEigenBits <= kANSMaxUniformBits && |
| kEigenBits <= kANSMaxRangeBits, |
| "kEigenBits"); |
| |
| static constexpr int32_t kMaxDelta = 5; |
| static constexpr int32_t kMaxDeltaBits = WP2Log2Ceil_k(kMaxDelta) + 1; |
| |
| static inline int32_t PutDelta(int16_t original, int16_t predicted, |
| ANSEnc* const enc) { |
| const int32_t delta = Clamp(original - predicted, -kMaxDelta, kMaxDelta); |
| PutSValue(delta, kMaxDeltaBits, enc); |
| return delta; |
| } |
| |
| static inline int32_t ReadDelta(ANSDec* const dec) { |
| return ReadSValue(kMaxDeltaBits, dec); |
| } |
| |
| // For each of these vectors of indices, the last element will be predicted. |
| static uint32_t kPredictedVectors[3][3] = {{1, 4, 7}, {0, 1, 2}, {3, 4, 5}}; |
| |
| //------------------------------------------------------------------------------ |
| |
| // Encodes 'm' assuming that: |
| // - all column- and row-vectors have the same magnitude, |
| // - the first row and column contain only positive values, |
| // - column- and row-vectors are mutually orthogonal. |
| static void WriteEigenMatrix(const int16_t m[9], ANSEnc* const enc) { |
| ANSDebugPrefix prefix(enc, "eigen_mtx"); |
| // 0 1 2 |
| // Final matrix that will be read. For reference: 3 4 5 |
| // 6 7 8 |
| int16_t dm[9] = {0}; |
| |
| for (uint32_t i : {0, 3, 6}) dm[i] = std::abs(m[i]); |
| int64_t sq_len = GetVecSqLen(dm[0], dm[3], dm[6]); |
| uint32_t num_sq_len = 1; |
| |
| // Find the maximum value of the middle top element based on top row. |
| const int16_t max_dm1 = CompleteVector(dm[0], 0, sq_len) + kMaxDelta; |
| dm[1] = std::min((int16_t)std::abs(m[1]), max_dm1); |
| |
| // Find the maximum value of the middle element based on middle row, column. |
| const int16_t max_dm4 = RightShiftRound(CompleteVector(dm[1], 0, sq_len) + |
| CompleteVector(dm[3], 0, sq_len), |
| 1) + |
| kMaxDelta; |
| dm[4] = std::min((int16_t)std::abs(m[4]), max_dm4); |
| |
| // Find out and signal the maximum number of bits needed to encode 0,1,2,3,4. |
| const int32_t max_dm = std::max({dm[0], dm[3], dm[6], dm[1], dm[4]}); |
| assert(max_dm < (1 << kEigenBits)); |
| const uint32_t num_bits = std::max(1u, (uint32_t)WP2Log2Ceil_k(max_dm + 1)); |
| enc->PutRange(num_bits, 1, kEigenBits, "num_bits"); |
| |
| // Encode 0,3,6 as is and 1,4 as clamped. |
| for (uint32_t i : {0, 3, 6}) enc->PutUValue(dm[i], num_bits, "value"); |
| (max_dm1 + 1 < (1 << num_bits)) ? enc->PutRValue(dm[1], max_dm1 + 1, "value") |
| : enc->PutUValue(dm[1], num_bits, "value"); |
| (max_dm4 + 1 < (1 << num_bits)) ? enc->PutRValue(dm[4], max_dm4 + 1, "value") |
| : enc->PutUValue(dm[4], num_bits, "value"); |
| |
| // Other elements can be predicted; adjust by +-kMaxDelta to avoid any loss. |
| for (const auto& vec : kPredictedVectors) { |
| const uint32_t a = vec[0], b = vec[1], c = vec[2]; // Indices. |
| dm[c] = CompleteVector(dm[a], dm[b], sq_len); |
| dm[c] += PutDelta(std::abs(m[c]), dm[c], enc); |
| // Improve 'sq_len' accuracy by taking into account the corrected values. |
| sq_len = DivRound<int64_t>( |
| sq_len * num_sq_len + GetVecSqLen(dm[a], dm[b], dm[c]), num_sq_len + 1); |
| ++num_sq_len; |
| } |
| |
| // The last one reuses the predictions and deltas so compute it after them. |
| dm[8] = RightShiftRound((CompleteVector(dm[2], dm[5], sq_len) + |
| CompleteVector(dm[6], dm[7], sq_len)), |
| 1); |
| dm[8] += PutDelta(std::abs(m[8]), dm[8], enc); |
| } |
| |
| static void ReadEigenMatrix(ANSDec* const dec, int16_t m[9]) { |
| ANSDebugPrefix prefix(dec, "eigen_mtx"); |
| const int32_t num_bits = dec->ReadRange(1u, kEigenBits, "num_bits"); |
| |
| for (uint32_t i : {0, 3, 6}) m[i] = dec->ReadUValue(num_bits, "value"); |
| int64_t sq_len = GetVecSqLen(m[0], m[3], m[6]); |
| uint32_t num_sq_len = 1; |
| |
| const int16_t range_m1 = CompleteVector(m[0], 0, sq_len) + kMaxDelta + 1; |
| m[1] = (range_m1 < (1 << num_bits)) ? dec->ReadRValue(range_m1, "value") |
| : dec->ReadUValue(num_bits, "value"); |
| |
| const int16_t range_m4 = RightShiftRound(CompleteVector(m[1], 0, sq_len) + |
| CompleteVector(m[3], 0, sq_len), |
| 1) + |
| kMaxDelta + 1; |
| m[4] = (range_m4 < (1 << num_bits)) ? dec->ReadRValue(range_m4, "value") |
| : dec->ReadUValue(num_bits, "value"); |
| |
| for (const auto& vec : kPredictedVectors) { |
| const uint32_t a = vec[0], b = vec[1], c = vec[2]; |
| m[c] = CompleteVector(m[a], m[b], sq_len) + ReadDelta(dec); |
| sq_len = DivRound<int64_t>( |
| sq_len * num_sq_len + GetVecSqLen(m[a], m[b], m[c]), num_sq_len + 1); |
| ++num_sq_len; |
| } |
| |
| m[8] = RightShiftRound( |
| CompleteVector(m[2], m[5], sq_len) + CompleteVector(m[6], m[7], sq_len), |
| 1); |
| m[8] += ReadDelta(dec); |
| |
| // Find signs for the elements 4,5,7,8. As row-vectors are orthogonal, the |
| // element generating the highest sub-dot-product has an opposite sign. |
| for (uint32_t row : {3, 6}) { // Middle and bottom vectors. |
| int32_t subdot[3]; // Partial dot-product results with top vector. |
| for (uint32_t col : {0, 1, 2}) subdot[col] = (int32_t)m[col] * m[col + row]; |
| const auto highest = std::max_element(subdot, subdot + 3) - subdot; |
| for (int32_t col : {1, 2}) { |
| if (highest == 0 || highest == col) m[col + row] = -m[col + row]; |
| } |
| } |
| } |
| |
| WP2Status CSPTransform::MakeEigenMatrixEncodable(const int16_t m[9], |
| int16_t dm[9], |
| int32_t* const error) { |
| WP2MathInit(); |
| ANSEnc enc; |
| WriteEigenMatrix(m, &enc); |
| WP2_CHECK_STATUS(enc.AssembleToBitstream(/*clear_tokens=*/true)); |
| Vector_u8 bits; |
| WP2_CHECK_STATUS(enc.WriteBitstreamTo(bits)); |
| ExternalDataSource data(bits.data(), bits.size()); |
| ANSDec dec(&data); |
| ReadEigenMatrix(&dec, dm); |
| WP2_CHECK_STATUS(dec.GetStatus()); |
| *error = 0; |
| for (uint32_t i = 0; i < 9; ++i) { |
| *error = std::max(*error, std::abs(m[i] - dm[i])); |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| WP2Status CSPTransform::Write(ANSEnc* const enc) const { |
| enc->PutRValue((uint32_t)type_, kNumCspTypes, "csp_type"); |
| if (type_ == Csp::kCustom) { |
| int32_t error; |
| int16_t encodable_matrix[9]; |
| WP2_CHECK_STATUS(MakeEigenMatrixEncodable(im_, encodable_matrix, &error)); |
| if (enc->PutBool(error == 0, "eigen")) { |
| WriteEigenMatrix(im_, enc); // Matrix can be encoded with fewer bits. |
| } else { |
| for (auto& v : im_) PutSValue(v, kANSMaxUniformBits + 1, enc); |
| } |
| for (auto& v : avg8_) enc->PutUValue(v, kRgbBits, "avg"); |
| for (int i : {0, 1, 2}) { |
| if (avg10_[i] != LeftShift(avg8_[i], 2)) assert(false); |
| } |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status CSPTransform::Read(ANSDec* const dec) { |
| type_ = (Csp)dec->ReadRValue(kNumCspTypes, "csp_type"); |
| if (type_ == Csp::kCustom) { |
| SetYUVBounds(-(1 << kYuvMaxPrec), (1 << kYuvMaxPrec) - 1); |
| assert(yuv_depth_.precision() == kYuvMaxPrec); |
| if (dec->ReadBool("eigen")) { |
| ReadEigenMatrix(dec, im_); |
| } else { |
| // Completely custom matrices can be decoded. |
| for (auto& v : im_) v = ReadSValue(kANSMaxUniformBits + 1, dec); |
| } |
| for (auto& v : avg8_) v = (uint16_t)dec->ReadUValue(kRgbBits, "avg"); |
| for (uint32_t i : {0, 1, 2}) avg10_[i] = LeftShift(avg8_[i], 2); |
| #if !defined(WP2_REDUCE_BINARY_SIZE) |
| if (!ComputeRGBToYUVMatrix()) { // (direct matrix is used in vwp2) |
| // not critical, but shouldn't happen in an ideal world! |
| } |
| #endif |
| } else { |
| WP2_CHECK_STATUS(Init(type_)); |
| } |
| WP2_CHECK_STATUS(dec->GetStatus()); |
| return WP2_STATUS_OK; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| namespace { |
| |
| template <typename A> |
| constexpr double Dot(const A a[3], const A b[3]) { |
| return (double)a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; |
| } |
| |
| constexpr uint32_t kAvgBlk = 8; // size of the averaging block |
| |
| double EdgeStrength(const uint8_t* src, size_t stride) { |
| const double px = src[0], px_right = src[4], px_bottom = src[stride]; |
| const double edge = std::abs(px_right - px) + std::abs(px_bottom - px); |
| return std::max(0., edge - 2.); |
| } |
| |
| // returns the number of samples added to avg_sum[] average accumulator. |
| uint32_t ExtractCov(const uint8_t* argb, uint32_t w, uint32_t h, size_t stride, |
| double cov[3][3], double avg_sum[3], bool premultiplied) { |
| double avg[3] = {0., 0., 0.}; |
| double tmp[3 * kAvgBlk * kAvgBlk]; |
| uint8_t unmult[4 * kAvgBlk * kAvgBlk]; |
| if (!premultiplied) { |
| uint8_t* dst = unmult; |
| for (uint32_t j = 0; j < h; ++j) { |
| WP2ARGBConvertTo[WP2_Argb_32](argb, w, dst); |
| argb += stride; |
| dst += 4 * kAvgBlk; |
| } |
| argb = unmult; |
| stride = 4 * kAvgBlk; |
| } |
| assert(w <= kAvgBlk && h <= kAvgBlk && w * h > 0); |
| uint32_t num_samples = 0; |
| for (uint32_t idx = 0, j = 0; j < h - 1; ++j, argb += stride) { |
| for (uint32_t i = 0; i < w - 1; ++i) { |
| double r = argb[4 * i + 1]; |
| double g = argb[4 * i + 2]; |
| double b = argb[4 * i + 3]; |
| double x = EdgeStrength(argb + 4 * i + 1, stride); |
| double y = EdgeStrength(argb + 4 * i + 2, stride); |
| double z = EdgeStrength(argb + 4 * i + 3, stride); |
| avg_sum[0] += r; |
| avg_sum[1] += g; |
| avg_sum[2] += b; |
| avg[0] += x; |
| avg[1] += y; |
| avg[2] += z; |
| tmp[idx++] = x; |
| tmp[idx++] = y; |
| tmp[idx++] = z; |
| ++num_samples; |
| } |
| } |
| avg[0] /= num_samples; |
| avg[1] /= num_samples; |
| avg[2] /= num_samples; |
| for (uint32_t idx = 0, i = 0; i < num_samples; ++i, idx += 3) { |
| const double x = tmp[idx + 0] - avg[0]; |
| const double y = tmp[idx + 1] - avg[1]; |
| const double z = tmp[idx + 2] - avg[2]; |
| cov[0][0] += x * x; |
| cov[0][1] += y * x; |
| cov[0][2] += z * x; |
| cov[1][0] += x * y; |
| cov[1][1] += y * y; |
| cov[1][2] += z * y; |
| cov[2][0] += x * z; |
| cov[2][1] += y * z; |
| cov[2][2] += z * z; |
| } |
| return num_samples; |
| } |
| |
| bool ExtractCovarianceMtx(const uint8_t* argb, uint32_t w, uint32_t h, |
| size_t stride, double cov[3][3], int16_t avg[3], |
| bool premultiplied) { |
| if (w <= 4u || h <= 4u || (uint64_t)w * h <= 64 * 64) { |
| return false; // not enough sample to build reliable estimate! |
| } |
| double avg_sum[3]; |
| for (uint32_t j = 0; j < 3; ++j) { |
| for (uint32_t i = 0; i < 3; ++i) cov[j][i] = 0.; |
| avg_sum[j] = 0.; |
| } |
| uint32_t num_samples = 0; |
| for (uint32_t j = 0; j < h; j += kAvgBlk, argb += kAvgBlk * stride) { |
| for (uint32_t i = 0; i < w; i += kAvgBlk) { |
| const uint32_t W = std::min(i + kAvgBlk, w) - i; |
| const uint32_t H = std::min(j + kAvgBlk, h) - j; |
| num_samples += |
| ExtractCov(argb + i, W, H, stride, cov, avg_sum, premultiplied); |
| } |
| } |
| // Normalize a bit the covariance matrix (not totally necessary, just comfort) |
| for (uint32_t j = 0; j < 3; ++j) { |
| for (uint32_t i = 0; i < 3; ++i) cov[j][i] /= num_samples; |
| } |
| // store the average |
| for (uint32_t i = 0; i < 3; ++i) { |
| if (i == 0) { |
| // We only centralize the 'luma' channel. Empirically, the U/V channels |
| // are more unstable to centralization and renormalization (during later |
| // calls NormalizeFromInput()). |
| avg_sum[i] /= num_samples; |
| avg[i] = (int16_t)Clamp<double>(avg_sum[i], 0, kRgbMax); |
| } else { |
| avg[i] = kRgbMax >> 1; |
| } |
| } |
| return true; |
| } |
| |
| // see https://www.geometrictools.com/Documentation/RobustEigenSymmetric3x3.pdf |
| void ComputeEigenVector(double lambda, const double cov[3][3], double v[3]) { |
| double v0[3] = {cov[0][0] - lambda, cov[0][1], cov[0][2]}; |
| double v1[3] = {cov[0][1], cov[1][1] - lambda, cov[1][2]}; |
| double v2[3] = {cov[0][2], cov[1][2], cov[2][2] - lambda}; |
| double tmp[2][3], *best = v; |
| double norm0 = CrossProduct(v0, v1, v); |
| double norm1 = CrossProduct(v1, v2, tmp[0]); |
| if (norm1 > norm0) { |
| best = tmp[1]; |
| norm0 = norm1; |
| } |
| norm1 = CrossProduct(v2, v0, tmp[1]); |
| if (norm1 > norm0) best = tmp[1]; |
| if (best != v) memcpy(v, best, 3 * sizeof(v[0])); |
| } |
| |
| void ComputeEigenValues(double cov[3][3], double out[3]) { |
| const double a00 = cov[0][0], a01 = cov[0][1], a02 = cov[0][2]; |
| const double a11 = cov[1][1], a12 = cov[1][2]; |
| const double a22 = cov[2][2]; |
| const double max = std::max({a00, a01, a02, a11, a12, a22}); |
| if (max == 0.) { // zero matrix? |
| out[0] = out[1] = out[2] = 0.; |
| return; |
| } |
| const double norm = a01 * a01 + a02 * a02 + a12 * a12; |
| if (norm == 0.) { // matrix is diagonal |
| out[0] = a00; |
| out[1] = a11; |
| out[2] = a22; |
| return; |
| } |
| |
| // We don't need to pre-condition the matrix by dividing with 'max', |
| // because the cov[][] matrix is already quite normalized. |
| // The following is a glorified robust-solving of a |
| // canonical 3rd degree equation. |
| const double q = (a00 + a11 + a22) / 3.; |
| const double b0 = a00 - q, b1 = a11 - q, b2 = a22 - q; |
| const double p = |
| std::sqrt((b0 * b0 + b1 * b1 + b2 * b2 + 2. * norm) * (2. / 3.)); |
| const double c0 = b1 * b2 - a12 * a12; |
| const double c1 = a01 * b2 - a12 * a02; |
| const double c2 = a01 * a12 - b1 * a02; |
| const double hdet = 4.0 * (b0 * c0 - a01 * c1 + a02 * c2) / (p * p * p); |
| const double angle = std::acos(Clamp(hdet, -1., 1.)) / 3.; |
| const double beta0 = std::cos(angle); // largest solution |
| const double beta2 = std::cos(angle + (2. * M_PI / 3.)); |
| const double beta1 = -(beta0 + beta2); |
| |
| // final result |
| out[0] = q + p * beta0; |
| out[1] = q + p * beta1; |
| out[2] = q + p * beta2; |
| } |
| |
| bool ExtractEigenMtx(double cov[3][3], double out[9]) { |
| // Compute the eigen-values |
| double lambdas[3]; |
| ComputeEigenValues(cov, lambdas); |
| // very simple and conservative classifier |
| if (lambdas[0] < 10.) return false; |
| |
| // Compute 1rst eigen-vector |
| double ev[3][3]; |
| ComputeEigenVector(lambdas[0], cov, ev[0]); |
| |
| // for the partitioning algo (based on variance) to work correctly, we |
| // need a primary axis that is close to a 'luma-like' one. |
| const double kLumaAxis[3] = {.299, .587, .114}; // L2-norm = 0.668 |
| if (Distance(ev[0], kLumaAxis) > 0.4 * 0.668) { |
| return false; |
| } |
| |
| // Compute 2nd eigen-vector |
| const double kThreshold = 0.000001; |
| if (lambdas[1] <= kThreshold) { // MONOCHROME |
| const double kAxis[3] = {0., 1., -1.}; |
| CrossProduct(kAxis, ev[0], ev[1]); |
| } else { |
| // remove ev[0] components from columns |
| for (uint32_t i = 0; i < 3; ++i) { |
| const double c = Dot(cov[i], ev[0]); |
| for (uint32_t j = 0; j < 3; ++j) cov[i][j] -= c * ev[0][j]; |
| } |
| ComputeEigenVector(lambdas[1], cov, ev[1]); |
| } |
| CrossProduct(ev[0], ev[1], ev[2]); |
| |
| // final dispatch |
| for (uint32_t j = 0; j < 3; ++j) { |
| for (uint32_t i = 0; i < 3; ++i) out[3 * i + j] = ev[j][i]; |
| } |
| return true; |
| } |
| |
| bool NormalizeFromInput(const uint8_t* argb, uint32_t w, uint32_t h, |
| size_t stride, bool premultiplied, double max_value, |
| const int16_t avg[3], double custom_to_rgb_matrix[9], |
| int16_t out[9]) { |
| double rgb_to_custom_matrix[9]; |
| if (!TryInvertMatrix(custom_to_rgb_matrix, rgb_to_custom_matrix)) { |
| return false; |
| } |
| |
| double maxs = -max_value, mins = max_value; |
| for (uint32_t y = 0; y < h; ++y) { |
| for (uint32_t x = 0; x < w; ++x) { |
| const uint8_t a = argb[4 * x + 0]; |
| const double inv_a = |
| (premultiplied && a < 255) ? (a > 0 ? 255. / a : 0.) : 1.; |
| const double rgb[3] = {inv_a * argb[4 * x + 1] - avg[0], |
| inv_a * argb[4 * x + 2] - avg[1], |
| inv_a * argb[4 * x + 3] - avg[2]}; |
| const double X = Dot(rgb_to_custom_matrix + 0, rgb); |
| const double Y = Dot(rgb_to_custom_matrix + 3, rgb); |
| const double Z = Dot(rgb_to_custom_matrix + 6, rgb); |
| maxs = std::max({maxs, X, Y, Z}); |
| mins = std::min({mins, X, Y, Z}); |
| } |
| argb += stride; |
| } |
| |
| // Scale rgb_to_custom_matrix so it goes from ~kRgbBits to yuv_bits_ (so it |
| // uses the whole yuv_bits_ range). |
| const double max_custom = std::max({1., std::abs(mins), std::abs(maxs)}); |
| const double norm = max_value / max_custom; |
| for (uint32_t i = 0; i < 9; ++i) rgb_to_custom_matrix[i] *= norm; |
| |
| // Compute custom_to_rgb_matrix, prepare it for fixed point. |
| if (!TryInvertMatrix(rgb_to_custom_matrix, custom_to_rgb_matrix)) { |
| return false; |
| } |
| MultiplyMatrix(custom_to_rgb_matrix, kMtxFixedPointMul); |
| |
| uint32_t max = 0; |
| for (uint32_t i = 0; i < 9; ++i) { |
| out[i] = (int16_t)lround(custom_to_rgb_matrix[i]); |
| max = std::max(max, (uint32_t)std::abs(out[i])); |
| } |
| constexpr uint32_t kMin = 10, kMax = (1 << kEigenBits) - 1; |
| if (max < kMin || max > kMax) return false; |
| return true; |
| } |
| |
| } // anonymous namespace |
| |
| //------------------------------------------------------------------------------ |
| |
| WP2Status CSPTransform::Optimize(const ArgbBuffer& image) { |
| WP2_CHECK_OK(image.format() == WP2_Argb_32 || image.format() == WP2_ARGB_32, |
| WP2_STATUS_INVALID_PARAMETER); |
| WP2_CHECK_OK(!image.IsEmpty(), WP2_STATUS_INVALID_PARAMETER); |
| const bool is_premultiplied = (image.format() == WP2_Argb_32); |
| if (!is_premultiplied) WP2ArgbConverterInit(); |
| const uint8_t* const argb = image.GetRow8(0); |
| const uint32_t w = image.width(); |
| const uint32_t h = image.height(); |
| const size_t stride = image.stride(); |
| SetYUVBounds(-(1 << kYuvMaxPrec), (1 << kYuvMaxPrec) - 1); |
| |
| double cov[3][3]; |
| double custom_to_rgb_matrix[9]; |
| int16_t im[9]; |
| if (ExtractCovarianceMtx(argb, w, h, stride, cov, avg8_, is_premultiplied) && |
| ExtractEigenMtx(cov, custom_to_rgb_matrix) && |
| NormalizeFromInput( |
| argb, w, h, stride, is_premultiplied, |
| std::max((int16_t)std::abs(min_yuv_value_), max_yuv_value_), avg8_, |
| custom_to_rgb_matrix, im)) { |
| int32_t error; |
| WP2_CHECK_STATUS(MakeEigenMatrixEncodable(im, im_, &error)); |
| if (error > 0) { |
| // 'kMaxDelta' should be big enough to correct most errors, though it may |
| // happen that some ExtractEigenMtx() outputs are too imprecise. In this |
| // case, encode those as is. |
| std::copy(im, im + 9, im_); |
| // Reducing 'kMaxDelta' and keeping the "quantized" matrix (assuming |
| // 'error' is reasonable) is possible but it leads to poor results. |
| } |
| if (ComputeRGBToYUVMatrix()) { |
| type_ = Csp::kCustom; |
| assert(yuv_depth_.precision() <= kYuvMaxPrec); |
| for (uint32_t i : {0, 1, 2}) avg10_[i] = LeftShift(avg8_[i], 2); |
| return WP2_STATUS_OK; |
| } |
| } |
| return WP2_STATUS_INVALID_COLORSPACE; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| void CSPTransform::Apply(ArgbBuffer* const image, BitDepth bit_depth, |
| uint32_t x, uint32_t y, uint32_t w, uint32_t h) const { |
| if (image == nullptr || image->IsEmpty()) return; |
| for (uint32_t j = 0; j < h; ++j) { |
| if (y + j >= image->height()) break; |
| uint8_t* const dst = image->GetRow8(y + j); |
| for (uint32_t i = x; i < std::min(x + w, image->width()); ++i) { |
| // Set alpha to opaque. |
| dst[4 * i + 0] = kAlphaMax; |
| int16_t yuv[3]; |
| Rgb8ToYuv(&dst[4 * i + 1], yuv); |
| for (uint32_t c = 0; c < 3; ++c) { |
| // Convert to 8b-unsigned. |
| dst[4 * i + 1 + c] = (uint8_t)Clamp( |
| ChangePrecision(yuv[c] - bit_depth.min(), bit_depth.num_bits, 8), 0, |
| 255); |
| } |
| } |
| } |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| } // namespace WP2 |