blob: b4711c3e501e3694fd7794455a1e427ea266f1af [file] [log] [blame]
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// -----------------------------------------------------------------------------
//
// CodedBlock, for encoding
//
// Author: Skal (pascal.massimino@gmail.com)
#include "src/enc/block_enc.h"
#include <algorithm>
#include <cassert>
#include "src/common/lossy/block.h"
#include "src/common/lossy/predictor.h"
#include "src/common/lossy/transforms.h"
#include "src/common/symbols.h"
#include "src/enc/trellis.h"
#include "src/enc/wp2_enc_i.h"
#include "src/utils/front_mgr.h"
#include "src/utils/utils.h"
#include "src/wp2/format_constants.h"
namespace WP2 {
//------------------------------------------------------------------------------
// Counters
WP2Status Counters::Init(uint32_t effort, bool use_aom_residuals,
const SymbolRecorder& recorder) {
effort_ = effort;
use_aom_residuals_ = use_aom_residuals;
recorder_ = &recorder;
predictor_.reset(new (WP2Allocable::nothrow) SymbolCounter(recorder_));
WP2_CHECK_ALLOC_OK(predictor_ != nullptr);
WP2_CHECK_STATUS(
predictor_->Allocate({kSymbolModeY, kSymbolModeUV, kSymbolModeA}));
transform_.reset(new (WP2Allocable::nothrow) SymbolCounter(recorder_));
WP2_CHECK_ALLOC_OK(transform_ != nullptr);
WP2_CHECK_STATUS(transform_->Allocate(
{kSymbolHasCoeffs, kSymbolTransform, kSymbolSplitTransform}));
segment_id_.reset(new (WP2Allocable::nothrow) SymbolCounter(recorder_));
WP2_CHECK_ALLOC_OK(segment_id_ != nullptr);
WP2_CHECK_STATUS(segment_id_->Allocate({kSymbolSegmentId}));
if (effort >= kSlowCounterEffortThreshold) {
// Using the UpdatingSymbolCounter slows down encoding by ~9%
residuals_slow_.reset(new (WP2Allocable::nothrow)
UpdatingSymbolCounter(recorder_));
WP2_CHECK_ALLOC_OK(residuals_slow_ != nullptr);
residuals_ = residuals_slow_.get();
} else {
residuals_fast_.reset(new (WP2Allocable::nothrow) SymbolCounter(recorder_));
WP2_CHECK_ALLOC_OK(residuals_fast_ != nullptr);
residuals_ = residuals_fast_.get();
}
if (use_aom_residuals) {
WP2_CHECK_STATUS(residuals()->Allocate(
{kSymbolTransform, kAOMEOBPT4, kAOMEOBPT8, kAOMEOBPT16, kAOMEOBPT32,
kAOMEOBPT64, kAOMEOBPT128, kAOMEOBPT256, kAOMEOBPT512, kAOMEOBPT1024,
kAOMEOBExtra, kAOMCoeffBaseEOB, kAOMCoeffBase, kAOMCoeffBaseRange}));
} else {
WP2_CHECK_STATUS(residuals()->Allocate(
{kSymbolDC, kSymbolResidualUseBounds, kSymbolResidualBound1IsX,
kSymbolResidualUseBound2, kSymbolResidualIsZero, kSymbolResidualIsOne,
kSymbolResidualIsTwo, kSymbolResidualEndOfBlock,
kSymbolResidualHasOnlyOnesLeft}));
}
return WP2_STATUS_OK;
}
WP2Status Counters::CopyFrom(const Counters& other,
const SymbolRecorder& recorder) {
if (&recorder != recorder_) {
WP2_CHECK_STATUS(Init(other.effort_, other.use_aom_residuals_, recorder));
} else {
// Init() was already called.
assert(transform_ != nullptr && residuals_ != nullptr &&
predictor_ != nullptr);
}
if (residuals_slow_ != nullptr) {
WP2_CHECK_STATUS(residuals_slow_->CopyFrom(*other.residuals_slow_));
}
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
// CodedBlock (encoder)
void CodedBlock::SetSrcInput(const YUVPlane& in) {
WP2_ASSERT_STATUS(in_.SetView(in, AsRect()));
}
//------------------------------------------------------------------------------
void CodedBlock::ExtractFrom(const YUVPlane& yuv, Channel channel) const {
Plane16 src, dst;
WP2_ASSERT_STATUS(src.SetView(yuv.GetChannel(channel), AsRect()));
WP2_ASSERT_STATUS(dst.SetView(out_.GetChannel(channel), AsRect()));
WP2_ASSERT_STATUS(dst.Copy(src, /*resize_if_needed=*/false));
}
uint64_t CodedBlock::GetDisto(Channel channel) const {
const Plane16& p_in = in_.GetChannel(channel);
const Plane16& p_out = out_.GetChannel(channel);
// TODO(skal): experiment calling WP2SumSquaredErrorHalfBlock for U/V channel
return WP2SumSquaredErrorBlock(p_in.Row(0), p_in.Step(), p_out.Row(0),
p_out.Step(), w_pix(), h_pix());
}
void CodedBlock::GetResiduals(Channel channel, uint32_t tf_i,
const Plane16& prediction,
BlockCoeffs16* const res) const {
const Plane16& in = in_.GetChannel(channel);
const Rectangle tf_rect =
blk().LocalTfRect(GetCodingParams(channel).split_tf, tf_i);
SubtractBlockFunc sub_block = WP2::SubtractBlock[TrfLog2[tf_rect.width] - 2];
sub_block(&in.At(tf_rect.x, tf_rect.y), in.Step(),
&prediction.At(tf_rect.x, tf_rect.y), prediction.Step(),
(*res)[tf_i], tf_rect.width, tf_rect.height);
#if defined(WP2_BITTRACE)
if (original_res_ != nullptr) {
memcpy((*original_res_)[channel][tf_i], &(*res)[tf_i][0],
tf_rect.width * tf_rect.height * sizeof((*res)[tf_i][0]));
}
#endif
}
void CodedBlock::TransformAndReconstruct(
const EncoderConfig& config, const BlockContext& context,
const Segment& segment, Channel channel, uint32_t tf_i,
bool reduced_transform, const Plane16& prediction,
const BlockCoeffs16* const res, BlockCoeffs16* const tmp,
Counters* const counters) {
assert(tf_i < GetNumTransforms(channel));
const CodingParams* const params = GetCodingParams(channel);
const QuantMtx& quant = segment.GetQuant(channel);
BlockCoeffs16& dequantized = *tmp;
const BlockSize split_size =
GetSplitSize(dim(), GetCodingParams(channel)->split_tf);
const uint32_t split_w = BlockWidthPix(split_size);
const uint32_t split_h = BlockHeightPix(split_size);
int32_t tf_res[kMaxBlockSizePix2];
WP2Transform2D((*res)[tf_i], params->tf_x(), params->tf_y(), split_w, split_h,
tf_res, reduced_transform);
if (channel == kUChannel || channel == kVChannel) {
assert(params->tf == kDctDct && !params->split_tf);
// Note DC is scaled by NumPix(dim) by WP2Transform2D. We don't
// normalize it when propagating the error, even though the same value
// will have a bigger impact on a smaller block than on a large one.
// This is intentional: since we have fewer pixels in a smaller block,
// we shift them more. But it would also be possible to normalize by a
// factor of NumPix(dim) (multiply dc_error_ by this and devide
// dc_error_next by the same factor). This would mean we apply the same
// color shift regardless of the block size.
tf_res[0] += dc_error_[channel];
dc_error_next_[channel] = quant.DCError(tf_res[0], tdim(channel));
}
// Use some kind of coeff optimization (rd-opt based on av1 coeffs, or simple
// dropout).
const bool use_coeff_optim =
(params->tf != kIdentityIdentity && config.effort >= 7);
const bool use_av1_coeff_optim = (use_coeff_optim && context.use_aom() &&
channel == kYChannel && config.effort >= 8);
// Av1CoeffOptimization works better if quantization uses simple rounding
// instead of biasing towards 0.
const bool use_bias = !use_av1_coeff_optim;
quant.Quantize(tf_res, tdim(channel), IsFirstCoeffDC(channel),
coeffs_[channel][tf_i], &num_coeffs_[channel][tf_i],
dequantized[tf_i], use_bias);
if (use_coeff_optim) {
if (use_av1_coeff_optim) {
Av1CoeffOptimization(
channel, params->tf, tdim(channel), quant, IsFirstCoeffDC(channel),
tf_res, coeffs_[channel][tf_i], &num_coeffs_[channel][tf_i],
dequantized[tf_i], counters->residuals(), /*rate_cost=*/nullptr);
} else {
DropoutCoeffs(tdim(channel), segment.quality_factor_,
coeffs_[channel][tf_i], &num_coeffs_[channel][tf_i],
dequantized[tf_i]);
}
}
if (channel == kYChannel && mtx_set_ != nullptr) {
mtx_set_->DecideUseRndMtx(this); // TODO(skal): later: U/V channel?
}
Reconstruct(channel, tf_i, reduced_transform, &dequantized, prediction);
}
void CodedBlock::Reconstruct(Channel channel, uint32_t tf_i,
bool reduced_transform, BlockCoeffs16* const res,
const Plane16& prediction) const {
assert(tf_i < GetNumTransforms(channel));
const CodingParams& params = GetCodingParams(channel);
const Rectangle tf_rect = blk().LocalTfRect(params.split_tf, tf_i);
WP2InvTransform2D((*res)[tf_i], params.tf_x(), params.tf_y(), tf_rect.width,
tf_rect.height, reduced_transform);
Plane16* const out = &out_.GetChannel(channel);
Plane16 pred_view;
if (prediction.IsEmpty()) {
WP2_ASSERT_STATUS(pred_view.SetView(*out));
PredictBlock(channel, tf_i, pred_view.Row(0), pred_view.Step());
} else {
WP2_ASSERT_STATUS(pred_view.SetView(prediction));
}
AddBlockFunc add_block = WP2::AddBlock[TrfLog2[tf_rect.width] - 2];
const int32_t min = (channel == kAChannel) ? 0 : yuv_min_;
const int32_t max = (channel == kAChannel) ? kAlphaMax : yuv_max_;
add_block(&pred_view.At(tf_rect.x, tf_rect.y), pred_view.Step(), (*res)[tf_i],
tf_rect.width, min, max, &out->At(tf_rect.x, tf_rect.y),
out->Step(), tf_rect.height);
if (params.split_tf && tf_i < 3) {
context_cache_->Reset(/*only_small_right_or_bot_contexts=*/true);
}
}
void CodedBlock::QuantizeAll(const EncoderConfig& config,
const BlockContext& context,
const Segment& segment, Channel channel,
bool reduced_transform, Counters* const counters) {
for (uint32_t tf_i = 0; tf_i < GetNumTransforms(channel); ++tf_i) {
Quantize(config, context, segment, channel, tf_i, reduced_transform,
counters);
}
}
void CodedBlock::QuantizeAllButFirst(const EncoderConfig& config,
const BlockContext& context,
const Segment& segment, Channel channel,
bool reduced_transform,
Counters* const counters) {
for (uint32_t tf_i = 1; tf_i < GetNumTransforms(channel); ++tf_i) {
Quantize(config, context, segment, channel, tf_i, reduced_transform,
counters);
}
}
void CodedBlock::Quantize(const EncoderConfig& config,
const BlockContext& context, const Segment& segment,
Channel channel, uint32_t tf_i,
bool reduced_transform, Counters* const counters) {
BlockCoeffs16 res, tmp;
Plane16* const out = &out_.GetChannel(channel);
PredictBlock(channel, tf_i, out->Row(0), out->Step());
GetResiduals(channel, tf_i, /*prediction=*/*out, &res);
TransformAndReconstruct(config, context, segment, channel, tf_i,
reduced_transform,
/*prediction=*/*out, &res, &tmp, counters);
}
//------------------------------------------------------------------------------
float CodedBlock::ResidualRate(const BlockContext& context, Channel channel,
uint32_t num_channels,
SymbolCounter* const counter) const {
if (context.use_aom()) {
return ResidualWriter::GetRateAOM(*this, channel, counter);
}
// Whether to use the more accurate but a lot more expensive rate.
// Even this one is not perfect because the cost depends on global
// distributions which are not known at this time.
static constexpr bool kUseExpensiveRate = false;
float rate = 0.f;
for (uint32_t tf_i = 0; tf_i < GetNumTransforms(channel); ++tf_i) {
if (kUseExpensiveRate) {
rate += ResidualWriter::GetRate(
channel, num_channels, tdim(channel), coeffs_[channel][tf_i],
num_coeffs_[channel][tf_i], IsFirstCoeffDC(channel), counter);
} else {
rate += ResidualWriter::GetPseudoRate(
channel, num_channels, tdim(channel), coeffs_[channel][tf_i],
num_coeffs_[channel][tf_i], IsFirstCoeffDC(channel), counter);
}
}
return rate;
}
// Quantizes 'coeffs', then dequantizes, detransforms them and computes
// distortion compared to 'res'.
static uint32_t RoundTripDisto(CodedBlock* const cb, const QuantMtx& quant,
Channel channel, TrfSize tdim,
const int16_t* const res,
const int32_t* const coeffs, bool reduced,
int16_t* const quantized_coeffs,
uint32_t* const num_coeffs) {
int16_t dequantized_coeffs[kMaxBlockSizePix2];
quant.Quantize(coeffs, tdim, cb->IsFirstCoeffDC(channel), quantized_coeffs,
num_coeffs, dequantized_coeffs);
const CodedBlock::CodingParams& params = *cb->GetCodingParams(channel);
WP2InvTransform2D(dequantized_coeffs, params.tf_x(), params.tf_y(),
cb->w_pix(), cb->h_pix(), reduced);
return WP2SumSquaredError16s(res, dequantized_coeffs, *num_coeffs);
}
// Computes the rate-distortion score for the U and V channels.
// 'res_u' and 'res_v' should be in frequency space. 'orig_u' and 'orig_v' are
// the original spatial residuals to compare against for distortion.
static WP2Status UVScore(const BlockContext& context,
const EncoderConfig& config, uint32_t tile_pos_x,
uint32_t tile_pos_y, bool has_alpha,
const QuantMtx& quant_u, const QuantMtx& quant_v,
const int16_t orig_u[kMaxBlockSizePix2],
const int16_t orig_v[kMaxBlockSizePix2],
const int32_t res_u[kMaxBlockSizePix2],
const int32_t res_v[kMaxBlockSizePix2], TrfSize dim,
bool reduced, SymbolCounter* const counter,
CodedBlock* const cb, float* score) {
const float lambda_u = quant_u.lambda * 2.f; // Another magic constant.
const float lambda_v = quant_v.lambda * 2.f;
uint32_t tmp_num_coeffs_u, tmp_num_coeffs_v;
int16_t tmp_coeffs_u[kMaxBlockSizePix2];
int16_t tmp_coeffs_v[kMaxBlockSizePix2];
const uint32_t disto =
RoundTripDisto(cb, quant_u, kUChannel, dim, orig_u, res_u, reduced,
tmp_coeffs_u, &tmp_num_coeffs_u) +
RoundTripDisto(cb, quant_v, kVChannel, dim, orig_v, res_v, reduced,
tmp_coeffs_v, &tmp_num_coeffs_v);
// early out if there's ~only DC
if (!reduced && tmp_num_coeffs_u <= 1 && tmp_num_coeffs_v <= 1) {
WP2_CHECK_REDUCED_STATUS(
cb->Store420Scores(config, tile_pos_x, tile_pos_y, lambda_u, lambda_v,
reduced, disto, 0, 0));
*score = 0;
return WP2_STATUS_OK;
}
const uint32_t num_channels = (has_alpha ? 4 : 3);
const float rate_u =
cb->ResidualRate(context, kUChannel, num_channels, counter);
const float rate_v =
cb->ResidualRate(context, kVChannel, num_channels, counter);
*score = disto + lambda_u * rate_u + lambda_v * rate_v;
WP2_CHECK_REDUCED_STATUS(cb->Store420Scores(config, tile_pos_x, tile_pos_y,
lambda_u, lambda_v, reduced,
disto, rate_u, rate_v));
return WP2_STATUS_OK;
}
WP2Status CodedBlock::DecideChromaSubsampling(
const EncoderConfig& config, const BlockContext& context,
uint32_t tile_pos_x, uint32_t tile_pos_y, bool has_alpha,
const QuantMtx& quant_u, const QuantMtx& quant_v,
Counters* const counters) {
assert(!GetCodingParams(kUChannel)->split_tf);
assert(!GetCodingParams(kVChannel)->split_tf);
const BlockSize dim = blk_.dim();
const TrfSize full_dim = kFullDim[dim];
const TrfSize half_dim = kHalfDim[dim];
const uint32_t w_pix = blk_.w_pix();
const uint32_t h_pix = blk_.h_pix();
SubtractBlockFunc sub_block = WP2::SubtractBlock[TrfLog2[w_pix] - 2];
const CodingParams* const u_params = GetCodingParams(kUChannel);
const CodingParams* const v_params = GetCodingParams(kVChannel);
int16_t res_u[kMaxBlockSizePix2], res_v[kMaxBlockSizePix2];
{
int16_t prediction[kMaxBlockSizePix2];
u_params->pred->Predict(*this, kUChannel, /*split_tf=*/false, /*tf_i=*/0,
prediction, w_pix);
sub_block(in_.U.Row(0), in_.U.Step(), prediction, /*preds_step=*/w_pix,
res_u, /*dst_step=*/w_pix, h_pix);
v_params->pred->Predict(*this, kVChannel, /*split_tf=*/false, /*tf_i=*/0,
prediction, w_pix);
sub_block(in_.V.Row(0), in_.V.Step(), prediction, /*preds_step=*/w_pix,
res_v, /*dst_step=*/w_pix, h_pix);
}
int16_t orig_u[kMaxBlockSizePix2], orig_v[kMaxBlockSizePix2];
std::copy(res_u, res_u + w_pix * h_pix, orig_u);
std::copy(res_v, res_v + w_pix * h_pix, orig_v);
int32_t tmp_u[kMaxBlockSizePix2], tmp_v[kMaxBlockSizePix2];
// transform at full resolution
WP2Transform2D(res_u, u_params->tf_x(), u_params->tf_y(), w_pix, h_pix,
tmp_u);
WP2Transform2D(res_v, v_params->tf_x(), v_params->tf_y(), w_pix, h_pix,
tmp_v);
float score444;
WP2_CHECK_STATUS(UVScore(context, config, tile_pos_x, tile_pos_y, has_alpha,
quant_u, quant_v, orig_u, orig_v, tmp_u, tmp_v,
full_dim, /*reduced=*/false, counters->residuals(),
this, &score444));
if (score444 == 0) {
is420_ = false;
WP2_CHECK_REDUCED_STATUS(Store420Decision(config, tile_pos_x, tile_pos_y,
Debug420Decision::k444EarlyExit));
return WP2_STATUS_OK;
}
// inspect half-resolution now
WP2ReduceCoeffs(tmp_u, w_pix, h_pix, tmp_v);
WP2ReduceCoeffs(tmp_u, w_pix, h_pix, tmp_v);
float score420;
WP2_CHECK_STATUS(UVScore(context, config, tile_pos_x, tile_pos_y, has_alpha,
quant_u, quant_v, orig_u, orig_v, tmp_u, tmp_v,
half_dim, /*reduced=*/true, counters->residuals(),
this, &score420));
is420_ = (score420 < score444);
WP2_CHECK_REDUCED_STATUS(Store420Decision(
config, tile_pos_x, tile_pos_y,
is420_ ? Debug420Decision::k420 : Debug420Decision::k444));
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
float CodedBlock::PredictorRate(Channel channel,
SymbolCounter* const counter) const {
counter->Clear();
ANSEncCounter enc;
SyntaxWriter::WritePredictors(*this, channel, counter, &enc);
return enc.GetCost();
}
float CodedBlock::TransformRate(Channel channel,
SymbolCounter* const counter) const {
counter->Clear();
ANSEncCounter enc;
SyntaxWriter::WriteSplitTransform(*this, channel, counter, &enc);
SyntaxWriter::WriteHasCoeffs(*this, channel, counter, &enc);
SyntaxWriter::WriteTransform(*this, channel, counter, &enc);
return enc.GetCost();
}
float CodedBlock::SegmentIdRate(const BlockContext& context,
SymbolCounter* const counter) const {
counter->Clear();
ANSEncCounter enc;
context.segment_id_predictor().WriteId(*this, counter, &enc);
return enc.GetCost();
}
//------------------------------------------------------------------------------
WP2Status BlockScorer::Init(const EncoderConfig& config,
const GlobalParams& gparams, Rectangle tile_rect) {
config_ = &config;
gparams_ = &gparams;
tile_rect_ = tile_rect;
WP2_CHECK_ALLOC_OK(pred_mode_map_.resize(
std::max({kYPredModeNum, kUVPredModeNum, kAPredModeNum})));
return WP2_STATUS_OK;
}
void BlockScorer::Set(const BlockContext& context, CodedBlock* cb,
Counters* counters) {
cb_ = cb;
context_ = &context;
counters_ = counters;
// Use the segment given by AssignSegmentId() to compute the lambdas because
// we may end up comparing several 'segment_ids_' which would lead to
// different score computation formulas otherwise.
for (Channel channel : {kYChannel, kUChannel, kVChannel, kAChannel}) {
const Segment& segment =
gparams_->segments_[(channel == kAChannel) ? 0 : cb_->id_];
lambdas_[channel] = cb_->lambda_mult_ * segment.GetQuant(channel).lambda;
}
cached_predictor_ = nullptr;
last_params_ = CodedBlock::CodingParams();
last_segment_id_ = 0;
best_combination_ = BlockScore();
best_combination_no_angle_ = BlockScore();
best_combinations_angle_.clear();
}
WP2Status BlockScorer::ComputeScore() {
CodedBlock::CodingParams& params = *cb_->GetCodingParams(channels_.front());
const bool can_split =
(channels_.front() == kYChannel &&
GetSplitSize(cb_->dim(), /*split=*/true) != cb_->dim());
const CodedBlock::SplitTf forced_split =
can_split ? cb_->GetForcedSplitTf(*config_, tile_rect_)
: CodedBlock::SplitTf::kUnknown;
if (forced_split != CodedBlock::SplitTf::kUnknown) {
// Set by EncoderConfig::info (debug).
params.split_tf = (forced_split == CodedBlock::SplitTf::kForcedSplit);
WP2_CHECK_STATUS(ComputeScoreForEachPredictor(/*forced=*/true));
} else {
for (const bool split_tf : splits_) {
if (split_tf && !can_split) continue;
params.split_tf = split_tf;
WP2_CHECK_STATUS(ComputeScoreForEachPredictor(/*forced=*/false));
}
}
assert(best_combination_.params.pred != nullptr);
return WP2_STATUS_OK;
}
WP2Status BlockScorer::ComputeScoreForEachPredictor(bool forced) {
const Predictors& preds = gparams_->GetPredictors(channels_.front());
CodedBlock::CodingParams& params = *cb_->GetCodingParams(channels_.front());
const Predictor* const forced_pred =
cb_->GetForcedPredictor(*config_, tile_rect_, preds, channels_.front());
if (channels_.front() == kYChannel && cb_->y_context_is_constant_) {
// If the context is constant, all luma predictors will predict the same
// values (plus or minus 1, because of rounding errors), so we just force
// the predictor to 0 and we do not write it to the bitstream. We cannot
// do the same for UV and alpha because the chroma from luma predictor
// does not have this property, but we could send a single bit saying
// whether the predictor is cfl or "other".
params.pred = preds.GetPred(0); // Assumes it is the DC predictor.
WP2_CHECK_STATUS(ComputeScoreForEachTransform(forced));
} else if (forced_pred != nullptr) {
// Predictor is set by EncoderConfig::info (debug).
params.pred = forced_pred;
WP2_CHECK_STATUS(ComputeScoreForEachTransform(/*forced=*/true));
} else {
for (const Predictor* pred : predictors_) {
params.pred = pred;
WP2_CHECK_STATUS(ComputeScoreForEachTransform(forced));
}
}
return WP2_STATUS_OK;
}
WP2Status BlockScorer::ComputeScoreForEachTransform(bool forced) {
CodedBlock::CodingParams& params = *cb_->GetCodingParams(channels_.front());
for (Channel c : channels_) {
if (!params.pred->ComputeParams(cb_, c)) {
// TODO(skal): deal with useless preds
}
}
const TransformPair forced_tf = cb_->GetForcedTransform(*config_, tile_rect_);
if (forced_tf != kUnknownTf) {
// Transform is set by EncoderConfig::info (debug).
params.tf = forced_tf;
WP2_CHECK_STATUS(ComputeScoreForEachSegment(/*forced=*/true));
} else {
for (const TransformPair tf : transforms_) {
params.tf = tf;
WP2_CHECK_STATUS(ComputeScoreForEachSegment(forced));
}
}
return WP2_STATUS_OK;
}
WP2Status BlockScorer::ComputeScoreForEachSegment(bool forced) {
if (segment_ids_.empty()) {
// Just use the 'cb_->id_' value which was set by AssignSegmentId().
WP2_CHECK_STATUS(ComputeScoreForEachChannel(forced));
} else {
for (uint8_t segment_id : segment_ids_) {
cb_->id_ = segment_id;
WP2_CHECK_STATUS(ComputeScoreForEachChannel(forced));
}
}
return WP2_STATUS_OK;
}
WP2Status BlockScorer::ComputeScoreForEachChannel(bool forced) {
BlockScore combination;
combination.params = *cb_->GetCodingParams(channels_.front());
combination.segment_id = cb_->id_;
if (!forced) {
// Each unique combination should be tried only once.
assert(combination.params != last_params_ ||
combination.segment_id != last_segment_id_);
assert(combination.params != best_combination_.params ||
combination.segment_id != best_combination_.segment_id);
}
// Recompute the prediction of the top left sub-block if needed.
// TODO(yguyon): See if the prediction of !split can be reused for split.
const bool recompute_top_left_residuals =
(cached_predictor_ != combination.params.pred ||
cached_split_tf_ != combination.params.split_tf);
const uint32_t num_channels = (gparams_->maybe_use_lossy_alpha_ ? 4 : 3);
const bool reduced_tf = (channels_.front() == kUChannel && cb_->is420_);
const Segment& segment =
gparams_->segments_[(channels_.front() == kAChannel) ? 0 : cb_->id_];
// Distortion (sum of squared errors).
uint64_t disto[2] = {0, 0}; // Per channel.
// Estimated costs in bits.
float pred_rate[2] = {0.f, 0.f}; // Per channel.
float tf_rate[2] = {0.f, 0.f}; // Per channel.
float segment_id_rate = 0.f; // Per block (only for luma channel).
float res_rate[2] = {0.f, 0.f}; // Per channel.
// Aggregated distortion and rates. Lower means better.
float score[2] = {0.f, 0.f}; // Per channel.
// Note: the values above are initialized only to prevent compiler warnings.
for (Channel c : channels_) {
const uint32_t ci = (c == kUChannel) ? 1 : 0; // 'c' index in 'channels_'.
Plane16 prediction;
WP2_ASSERT_STATUS(prediction.SetView(prediction_cache_[ci][0],
kMaxBlockSizePix, kMaxBlockSizePix,
kMaxBlockSizePix));
BlockCoeffs16& residuals_cache = residuals_cache_[ci];
if (recompute_top_left_residuals) {
cb_->PredictBlock(c, /*tf_i=*/0, prediction.Row(0), prediction.Step());
cb_->GetResiduals(c, /*tf_i=*/0, prediction, &residuals_cache);
if (c == channels_.back()) { // Remember what is cached for all channels.
cached_split_tf_ = combination.params.split_tf;
cached_predictor_ = combination.params.pred;
}
}
BlockCoeffs16 residuals, reconstructed; // Temporary buffers.
// At least one of {pred, tf} changed, so always reconstruct the top left
// sub-block...
cb_->TransformAndReconstruct(*config_, *context_, segment, c, /*tf_i=*/0,
reduced_tf, prediction, &residuals_cache,
&reconstructed, counters_);
// ... and predict / reconstruct the other sub-blocks (that depend on
// the results of top left one), if any.
for (uint32_t tf_i = 1; tf_i < cb_->GetNumTransforms(c); ++tf_i) {
cb_->PredictBlock(c, tf_i, prediction.Row(0), prediction.Step());
cb_->GetResiduals(c, tf_i, prediction, &residuals);
cb_->TransformAndReconstruct(*config_, *context_, segment, c, tf_i,
reduced_tf, prediction, &residuals,
&reconstructed, counters_);
}
if (c == channels_.back()) { // Remember what is cached for all channels.
last_params_ = combination.params;
last_segment_id_ = combination.segment_id;
}
// Compute distortion.
disto[ci] = cb_->GetDisto(c);
// TODO(yguyon): In some cases, this combination could be discarded right
// now because disto[ci] > best_combination_.score
// Estimate the cost of this configuration.
// Note: some rates may be skipped if all combinations lead to the same
// partial cost, but it is simpler as is and BlockScorer might be
// called again with different configurations.
pred_rate[ci] = cb_->PredictorRate(c, counters_->predictor());
tf_rate[ci] = cb_->TransformRate(c, counters_->transform());
if (c == kYChannel) {
// Segment ID is decided for luma only and signaled once per block.
segment_id_rate = cb_->SegmentIdRate(*context_, counters_->segment_id());
} else {
segment_id_rate = 0.f;
assert(segment_ids_.empty());
}
res_rate[ci] =
cb_->ResidualRate(*context_, c, num_channels, counters_->residuals());
score[ci] = disto[ci] + lambdas_[c] * (pred_rate[ci] + tf_rate[ci] +
segment_id_rate + res_rate[ci]);
combination.score += score[ci];
combination.disto += disto[ci];
}
const bool is_best = (best_combination_.params.pred == nullptr ||
combination < best_combination_);
if (is_best) {
best_combination_ = combination;
}
if (combination.params.pred->IsAngle(nullptr)) {
WP2_CHECK_ALLOC_OK(best_combinations_angle_.push_back(combination));
} else if (best_combination_no_angle_.params.pred == nullptr ||
combination < best_combination_no_angle_) {
best_combination_no_angle_ = combination;
}
#if !defined(WP2_REDUCE_BINARY_SIZE)
// Store scores for debug.
for (Channel c : channels_) {
const uint32_t ci = (c == kUChannel) ? 1 : 0;
cb_->StorePredictionScore(
*config_, tile_rect_, c, *cb_->GetCodingParams(c)->pred,
cb_->GetCodingParams(c)->tf, disto[ci],
cb_->lambda_mult_ * segment.GetQuant(c).lambda, pred_rate[ci],
tf_rate[ci], segment_id_rate, res_rate[ci], score[ci], is_best,
CodedBlock::SearchType::kPredAndTransform);
}
#endif // WP2_REDUCE_BINARY_SIZE
return WP2_STATUS_OK;
}
const VectorNoCtor<bool>& BlockScorer::RefinePredictors() {
// Set all predictors to "should-not-be-refined".
std::fill(pred_mode_map_.begin(), pred_mode_map_.end(), false);
// Only angle predictors are refined and returned.
if (best_combinations_angle_.empty()) return pred_mode_map_;
// Choose to refine all the main angle predictors within a percentage of the
// lowest score. This helps refine the predictors with similar scores.
std::sort(best_combinations_angle_.begin(), best_combinations_angle_.end());
const float min_score = best_combinations_angle_.front().score; // Best.
const float max_score = best_combinations_angle_.back().score; // Worst.
const float fraction = config_->effort / 9.f; // Max effort includes all.
const float min_allowed_score =
min_score * (1.f - fraction) + max_score * fraction + 1.f;
assert(config_->effort != 9 || max_score <= min_allowed_score);
for (const BlockScore& main_angle : best_combinations_angle_) {
if (main_angle.score <= best_combination_no_angle_.score ||
main_angle.disto <= best_combination_no_angle_.disto ||
main_angle.score <= min_allowed_score) {
assert(main_angle.params.pred->mode() < pred_mode_map_.size());
pred_mode_map_[main_angle.params.pred->mode()] = true;
}
}
return pred_mode_map_;
}
const BlockScore& BlockScorer::GetBestCombination() const {
return best_combination_;
}
void BlockScorer::ReconstructBestCombination() {
assert(best_combination_.params.pred != nullptr);
if (best_combination_.params != last_params_ ||
best_combination_.segment_id != last_segment_id_) {
*cb_->GetCodingParams(channels_.front()) = best_combination_.params;
cb_->id_ = best_combination_.segment_id;
const Segment& segment =
gparams_->segments_[(channels_.front() == kAChannel) ? 0 : cb_->id_];
for (Channel c : channels_) {
const bool is420 = ((c == kUChannel || c == kVChannel) && cb_->is420_);
cb_->QuantizeAll(*config_, *context_, segment, c, is420, counters_);
}
}
}
//------------------------------------------------------------------------------
WP2Status OptimizeModes(const EncoderConfig& config, const Rectangle& tile_rect,
Channel channel, const Predictors& preds,
const BlockModes& m, const BlockContext& context,
CodedBlock* const cb, Counters* const counters,
BlockScorer* const scorer) {
scorer->Set(context, cb, counters);
// Either luma, chroma or alpha.
WP2_CHECK_ALLOC_OK(scorer->channels_.resize((channel == kUChannel) ? 2 : 1));
scorer->channels_.front() = channel;
if (channel == kUChannel) scorer->channels_.back() = kVChannel;
const bool explicit_segment_id =
scorer->GetGlobalParams().explicit_segment_ids_ && !cb->blk().IsSmall();
// First round of combinations.
WP2_CHECK_ALLOC_OK(scorer->splits_.copy_from(m.splits_tried_during_preds));
WP2_CHECK_ALLOC_OK(scorer->predictors_.copy_from(m.main_preds));
WP2_CHECK_ALLOC_OK(scorer->transforms_.copy_from(m.tf_tried_during_preds));
scorer->segment_ids_.clear();
if (explicit_segment_id) {
WP2_CHECK_ALLOC_OK(
scorer->segment_ids_.copy_from(m.segment_ids_tried_during_preds));
}
WP2_CHECK_STATUS(scorer->ComputeScore());
// Find all sub-angle predictors sharing the same mode (~angle) as the
// best main-angle predictors.
scorer->predictors_.clear();
const VectorNoCtor<bool>& pred_mode_map = scorer->RefinePredictors();
for (const Predictor* const sub_pred : m.sub_preds) {
assert(sub_pred->IsAngle(nullptr));
assert(std::find(m.main_preds.begin(), m.main_preds.end(), sub_pred) ==
m.main_preds.end());
if (pred_mode_map[sub_pred->mode()]) {
assert(std::find(scorer->predictors_.begin(), scorer->predictors_.end(),
sub_pred) == scorer->predictors_.end());
WP2_CHECK_ALLOC_OK(scorer->predictors_.push_back(sub_pred));
}
}
if (!scorer->predictors_.empty()) {
WP2_CHECK_STATUS(scorer->ComputeScore());
}
// Keep only the best of each from now on and try the others one by one.
WP2_CHECK_ALLOC_OK(scorer->splits_.resize(1));
scorer->splits_.front() = scorer->GetBestCombination().params.split_tf;
WP2_CHECK_ALLOC_OK(scorer->predictors_.resize(1));
scorer->predictors_.front() = scorer->GetBestCombination().params.pred;
if (scorer->segment_ids_.empty()) {
// Keep AssignSegmentId() value.
assert(cb->id_ == scorer->GetBestCombination().segment_id);
} else {
WP2_CHECK_ALLOC_OK(scorer->segment_ids_.resize(1));
scorer->segment_ids_.front() = scorer->GetBestCombination().segment_id;
}
// Try other transforms.
if (!m.tf_tried_after_preds.empty()) {
WP2_CHECK_ALLOC_OK(scorer->transforms_.copy_from(m.tf_tried_after_preds));
WP2_CHECK_STATUS(scorer->ComputeScore());
// Keep only the best transform from now on.
WP2_CHECK_ALLOC_OK(scorer->transforms_.resize(1));
scorer->transforms_.front() = scorer->GetBestCombination().params.tf;
}
// Try other segments.
scorer->segment_ids_.clear();
for (uint8_t segment_id : m.segment_ids_tried_after_preds) {
// AssignSegmentId() could have returned a segment present in
// 'm.segment_ids_tried_after_preds' so make sure there is no overlap.
if (explicit_segment_id &&
segment_id != scorer->GetBestCombination().segment_id) {
WP2_CHECK_ALLOC_OK(scorer->segment_ids_.push_back(segment_id));
}
}
if (!scorer->segment_ids_.empty()) {
WP2_CHECK_STATUS(scorer->ComputeScore());
// Keep only the best segment from now on.
WP2_CHECK_ALLOC_OK(scorer->segment_ids_.resize(1));
scorer->segment_ids_.front() = scorer->GetBestCombination().segment_id;
}
// Try other split_tf.
if (!m.splits_tried_after_preds.empty()) {
WP2_CHECK_ALLOC_OK(scorer->splits_.copy_from(m.splits_tried_after_preds));
WP2_CHECK_STATUS(scorer->ComputeScore());
}
scorer->ReconstructBestCombination();
return WP2_STATUS_OK;
}
WP2Status OptimizeModesChroma(
const EncoderConfig& config, const Rectangle& tile_rect, bool has_alpha,
const FrontMgrBase& mgr, const Predictors& preds,
ChromaSubsampling chroma_subsampling, const BlockModes& modes,
const BlockContext& context, CodedBlock* const cb, Counters* const counters,
DCDiffusionMap* const dc_error_u, DCDiffusionMap* const dc_error_v,
BlockScorer* const scorer) {
const uint32_t diffusion =
DCDiffusionMap::GetDiffusion(config.error_diffusion);
const bool is420_for_sure =
((chroma_subsampling == ChromaSubsampling::kSingleBlock &&
config.uv_mode == EncoderConfig::UVMode420) ||
chroma_subsampling == ChromaSubsampling::k420);
const bool is444_for_sure =
((chroma_subsampling == ChromaSubsampling::kSingleBlock &&
config.uv_mode == EncoderConfig::UVMode444) ||
chroma_subsampling == ChromaSubsampling::k444);
const Segment& segment = scorer->GetGlobalParams().segments_[cb->id_];
cb->is420_ = is420_for_sure; // Must be set to something now even if unsure.
WP2_CHECK_STATUS(OptimizeModes(config, tile_rect, kUChannel, preds, modes,
context, cb, counters, scorer));
if (diffusion > 0) { // Add error diffusion
cb->dc_error_[kUChannel] = dc_error_u->Get(cb->blk(), diffusion);
cb->dc_error_[kVChannel] = dc_error_v->Get(cb->blk(), diffusion);
}
// Choose the final value of 'cb->is420_'.
// TODO(skal): decide based on coded luma too?
if (is420_for_sure) {
cb->is420_ = true;
} else if (is444_for_sure) {
cb->is420_ = false;
} else {
assert(chroma_subsampling == ChromaSubsampling::kSingleBlock ||
chroma_subsampling == ChromaSubsampling::kAdaptive);
WP2_CHECK_STATUS(cb->DecideChromaSubsampling(
config, context, tile_rect.x, tile_rect.y, has_alpha, segment.quant_u_,
segment.quant_v_, counters));
}
if (diffusion > 0 || (!is420_for_sure && !is444_for_sure)) {
cb->QuantizeAll(config, context, segment, kUChannel, cb->is420_, counters);
cb->QuantizeAll(config, context, segment, kVChannel, cb->is420_, counters);
}
if (diffusion > 0) {
dc_error_u->Store(mgr, cb->blk(), cb->dc_error_next_[kUChannel]);
dc_error_v->Store(mgr, cb->blk(), cb->dc_error_next_[kVChannel]);
}
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
} // namespace WP2