| // Copyright 2025 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Note: This file refers to modifiers in ETC1 spec as "selectors". The jargon |
| // was inherited from etcpak. |
| |
| use std::simd::prelude::*; |
| use std::simd::{Mask, Simd}; |
| |
| use crate::{Reg, Reg32, UReg, SIMD_WIDTH}; |
| |
| // Selector tables from ETC1 spec. The negative part is omitted due to symmetry. |
| pub const TABLES: [[i16; 2]; 8] = |
| [[2, 8], [5, 17], [9, 29], [13, 42], [18, 60], [24, 80], [33, 106], [47, 183]]; |
| |
| /// Conditionally exchange the bottom left 2x2 block with top right 2x2 block, |
| /// if `flip` for that lane is true. |
| /// |
| /// i.e. the goal is to flip from: |
| /// ```text |
| /// aeim |
| /// bfjn |
| /// cgko |
| /// dhlp |
| /// ``` |
| /// to: |
| /// ```text |
| /// aecg |
| /// bfdh |
| /// imko |
| /// jnlp |
| /// ``` |
| #[inline] |
| pub fn flip_pixels(d: &[[[Reg; 3]; 4]; 4], flip: Mask<i16, SIMD_WIDTH>) -> [[[Reg; 3]; 4]; 4] { |
| let mut o = [[[Reg::default(); 3]; 4]; 4]; |
| for y0 in [0, 2] { |
| for x0 in [0, 2] { |
| for y1 in 0..2 { |
| for x1 in 0..2 { |
| for ch in 0..3 { |
| if y0 == x0 { |
| o[y0 + y1][x0 + x1][ch] = d[y0 + y1][x0 + x1][ch]; |
| } else { |
| o[y0 + y1][x0 + x1][ch] = |
| flip.select(d[x0 + y1][y0 + x1][ch], d[y0 + y1][x0 + x1][ch]); |
| } |
| } |
| } |
| } |
| } |
| } |
| o |
| } |
| |
| /// Flip the selector codeword if `flip` for that lane is true. |
| /// |
| /// See [`flip_pixels`] for a description of the flip operation. |
| #[inline] |
| pub fn flip_selectors(x: UReg, flip: Mask<i16, SIMD_WIDTH>) -> UReg { |
| let keep = x & Simd::splat(0xCC33); |
| let bottom_left = x & Simd::splat(0x00CC); |
| let top_right = x & Simd::splat(0x3300); |
| |
| let flipped = keep | (bottom_left << 6) | (top_right >> 6); |
| flip.select(flipped, x) |
| } |
| |
| pub struct Fit { |
| pub err: Reg32, |
| pub table_idx: UReg, |
| pub selector_lo: UReg, |
| pub selector_hi: UReg, |
| } |
| |
| /// Search for the optimal table and selectors for a subblock. |
| /// |
| /// `data` should be in flipped layout, i.e. 4x2. |
| /// |
| /// The error function used here is a bit quirky, see code comment for details. |
| #[inline] |
| pub fn search_table_and_selectors_subblock(data: &[[[Reg; 3]; 4]], base_color: [Reg; 3]) -> Fit { |
| assert_eq!(data.len(), 2); |
| // Use fold to compute minimum. Essentially a vector version of min_by_key. |
| TABLES |
| .iter() |
| .enumerate() |
| .fold(None, |best_fit, (table_idx, sel_table)| { |
| let mut outer_err = Reg32::splat(0); |
| let mut selector_lo = UReg::splat(0); |
| let mut selector_hi = UReg::splat(0); |
| for y in 0..2 { |
| for x in 0..4 { |
| // Below, we search for the optimal selector among [-lg, -sm, sm, lg] (sm |
| // and lg is from the selector table). |
| // |
| // We use the error metric: |
| // abs(gray(q + s - x)) |
| // where q = quantized average, s = selector, x = pixel before compression |
| // gray(p) = 19*p.r + 38*p.g + 7*p.b (cf. rec601) |
| // |
| // Note that this is abs(gray(..)) not gray(abs(..)), i.e. the absolute |
| // is taken after computing to grayscale. This allows precomputing |
| // gray(q-x), then exploiting the fact that the selector is same for all |
| // three channels to calculate the final error with a single addition. |
| // |
| // We will first precompute gray(q - x). |
| let mut base_err = Reg::splat(0); |
| let rgb_weight = [19, 38, 7]; |
| for ch in 0..3 { |
| base_err += (base_color[ch] - data[y][x][ch]) * Simd::splat(rgb_weight[ch]); |
| } |
| |
| // Now, the sign of selector can be easily decided. To minimize the |
| // absolute value, the selector should be the opposite sign of |
| // gray(q - x). |
| let prefer_neg = base_err.simd_gt(Simd::splat(0)); |
| |
| // Finally, we compute the error metric for both sm and lg and decide the |
| // winner. |
| let base_err_abs = base_err.abs(); |
| // Subtract in the direction that the final error metric is smaller. |
| // The selector is same for all three channels, so just multiply it by the |
| // total weight. |
| let weight_sum = 64; |
| let err_sm = (base_err_abs - Reg::splat(sel_table[0] * weight_sum)).abs(); |
| let err_lg = (base_err_abs - Reg::splat(sel_table[1] * weight_sum)).abs(); |
| let prefer_lg = err_lg.simd_lt(err_sm); |
| |
| // The error can be fairly large (a crude upper bound is 255*64). To avoid |
| // overflow after squaring, we use widening multiply and accumulate. This |
| // is somewhat expensive. |
| let best_err = prefer_lg.select(err_lg, err_sm).cast::<i32>(); |
| outer_err += best_err * best_err; |
| |
| let pixel_idx = (y + x * 4) as u16; |
| selector_lo |= prefer_lg.select(UReg::splat(1 << pixel_idx), UReg::splat(0)); |
| selector_hi |= prefer_neg.select(UReg::splat(1 << pixel_idx), UReg::splat(0)); |
| } |
| } |
| |
| let table_idx = UReg::splat(table_idx as u16); |
| match best_fit { |
| None => Some(Fit { err: outer_err, table_idx, selector_lo, selector_hi }), |
| Some(best) => { |
| let lt_32 = outer_err.simd_lt(best.err); |
| let lt = lt_32.cast::<i16>(); |
| Some(Fit { |
| err: lt_32.select(outer_err, best.err), |
| table_idx: lt.select(table_idx, best.table_idx), |
| selector_lo: lt.select(selector_lo, best.selector_lo), |
| selector_hi: lt.select(selector_hi, best.selector_hi), |
| }) |
| } |
| } |
| }) |
| .unwrap() |
| } |
| |
| /// Search through possible selector tables and selector values for each |
| /// subblock. |
| /// |
| /// Returns: Four 16-bit codewords coding the optimal coefficients. |
| #[inline] |
| pub fn search_table_and_selectors( |
| mut hdr0: UReg, |
| hdr1: UReg, |
| data: &[[[Reg; 3]; 4]; 4], |
| base_color: [[Reg; 3]; 2], |
| ) -> [UReg; 4] { |
| // We need to work on pixels in the first subblock, then the second. To allow |
| // uniform indices, the flip functions takes care of moving the first |
| // subblock to the top half and the second to bottom half. We will fix up |
| // the shuffled results in the end. |
| let flip = (hdr0 & (UReg::splat(1))).simd_ne(UReg::splat(0)); |
| let permuted_data = flip_pixels(&data, !flip); |
| |
| let mut selector_lo = UReg::splat(0); |
| let mut selector_hi = UReg::splat(0); |
| |
| for subblock in 0..2 { |
| let best_fit = search_table_and_selectors_subblock( |
| &permuted_data[subblock * 2..subblock * 2 + 2], |
| base_color[subblock], |
| ); |
| let subblock_bit = match subblock { |
| 0 => 5, |
| 1 => 2, |
| _ => unreachable!(), |
| }; |
| hdr0 |= best_fit.table_idx << subblock_bit; |
| selector_lo |= best_fit.selector_lo << (subblock as u16 * 2); |
| selector_hi |= best_fit.selector_hi << (subblock as u16 * 2); |
| } |
| selector_lo = flip_selectors(selector_lo, !flip); |
| selector_hi = flip_selectors(selector_hi, !flip); |
| [selector_lo, selector_hi, hdr0, hdr1] |
| } |