blob: e3e8f47ea630e7c6071bd62f8b76e7d1250a10ba [file] [log] [blame]
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// -----------------------------------------------------------------------------
//
// Block position/size scoring functions.
//
// Author: Yannis Guyon (yguyon@google.com)
#include "src/enc/partition_score_func.h"
#include <algorithm>
#include <array>
#include <cmath>
#include <limits>
#include <numeric>
#include "src/common/integral.h"
#include "src/dsp/dsp.h"
#include "src/dsp/math.h"
#include "src/enc/analysis.h"
#include "src/enc/partitioner.h"
#include "src/enc/wp2_enc_i.h"
#include "src/wp2/format_constants.h"
namespace WP2 {
struct BaseSlope {
float base, slope;
};
// Maps 'config.quality' from [0, 95] to [min, (min + slope)].
static float MapQuality(const EncoderConfig& config, float min, float slope) {
const float x = 1.f * config.quality / kMaxLossyQuality;
return std::max(0.f, min + x * slope);
}
//------------------------------------------------------------------------------
WP2Status PartitionScoreFunc::Init(const EncoderConfig& config,
const Rectangle& tile_rect,
const YUVPlane& yuv,
const GlobalParams& gparams,
const ProgressRange& progress) {
config_ = &config;
gparams_ = &gparams;
tile_rect_ = tile_rect;
src_ = &yuv;
num_block_cols_ = SizeBlocks(yuv.Y.w_);
num_block_rows_ = SizeBlocks(yuv.Y.h_);
WP2_CHECK_STATUS(ClearVDebug());
WP2_CHECK_STATUS(progress.AdvanceBy(1.));
return WP2_STATUS_OK;
}
WP2Status PartitionScoreFunc::Use(const Block& block) { return WP2_STATUS_OK; }
//------------------------------------------------------------------------------
WP2Status BlockScoreFunc::Init(const EncoderConfig& config,
const Rectangle& tile_rect, const YUVPlane& yuv,
const GlobalParams& gparams,
const ProgressRange& progress) {
WP2EncDspInit();
WP2_CHECK_STATUS(PartitionScoreFunc::Init(config, tile_rect, yuv, gparams,
ProgressRange(progress, 0.5)));
const ChromaSubsampling chroma_subsampling =
DecideChromaSubsampling(*config_, /*more_than_one_block=*/true);
const bool use_aom_coeffs = DecideAOMCoeffs(*config_, tile_rect_);
WP2_CHECK_STATUS(DecideTransforms(config, &transforms_, &transforms_subset_));
// Store the reconstructed pixels of the temporary and final blocks.
WP2_CHECK_STATUS(
buffer_.Resize(src_->Y.w_, src_->Y.h_, /*pad=*/1, src_->HasAlpha()));
WP2_CHECK_STATUS(front_mgr_.Init(config_->partition_set,
config_->partition_snapping,
tile_rect_.width, tile_rect_.height));
// Initialize the instances recording only final blocks.
WP2_CHECK_STATUS(syntax_writer_.Init(
&dicts_, *config_, *gparams_, yuv, chroma_subsampling, tile_rect,
num_block_cols_ * num_block_rows_, use_aom_coeffs, ProgressRange()));
WP2_CHECK_STATUS(syntax_writer_.SetInitialSegmentIds());
WP2_CHECK_STATUS(syntax_writer_.InitPass());
if (DCDiffusionMap::GetDiffusion(config_->error_diffusion) > 0) {
WP2_CHECK_STATUS(dc_error_u_.Init(tile_rect_.width));
WP2_CHECK_STATUS(dc_error_v_.Init(tile_rect_.width));
}
// Initialize the cache.
WP2_CHECK_STATUS(context_.Init(use_aom_coeffs, yuv.Y.w_, yuv.Y.h_));
WP2_CHECK_STATUS(progress.AdvanceBy(0.5));
return WP2_STATUS_OK;
}
WP2Status BlockScoreFunc::ComputeScore(const Block& block,
const ProgressRange& progress,
float* const score) {
WP2_CHECK_STATUS(ComputeScore(&block, /*num_blocks=*/1, score));
WP2_CHECK_STATUS(progress.AdvanceBy(1.));
return WP2_STATUS_OK;
}
WP2Status BlockScoreFunc::ComputeScore(const Block blocks[4],
uint32_t num_blocks,
float* const score) {
// Copy the final state into the temporary scratch one.
WP2_CHECK_STATUS(tmp_dicts_.CopyFrom(dicts_));
WP2_CHECK_STATUS(tmp_syntax_writer_.CopyFrom(syntax_writer_, &tmp_dicts_));
if (DCDiffusionMap::GetDiffusion(config_->error_diffusion) > 0) {
WP2_CHECK_STATUS(tmp_dc_error_u_.CopyFrom(dc_error_u_));
WP2_CHECK_STATUS(tmp_dc_error_v_.CopyFrom(dc_error_v_));
}
float total_rate = 0.f, total_disto = 0.f, rate[4], disto[4];
assert(num_blocks >= 1 && num_blocks <= 4);
for (uint32_t i = 0; i < num_blocks; ++i) {
const Block& block = blocks[i];
// Encode the block.
Block tmp_blk;
if (!front_mgr_.SetNextBlockPosition(block.x(), block.y())) assert(false);
assert(front_mgr_.TryGetNextBlock(block.dim(), &tmp_blk));
assert(block == tmp_blk);
tmp_cb_.SetDim(block, front_mgr_);
WP2_CHECK_STATUS(EncodeBlock(front_mgr_, &tmp_cb_, &tmp_syntax_writer_,
&tmp_dc_error_u_, &tmp_dc_error_v_, &buffer_));
// Write the bits.
ANSEnc enc;
WP2_CHECK_STATUS(tmp_syntax_writer_.WriteHeader(&enc));
const float header_rate = enc.GetCost(tmp_dicts_);
WP2_CHECK_STATUS(
WriteBlock(front_mgr_, tmp_cb_, &tmp_syntax_writer_, &enc));
const uint32_t num_pixels = block.rect_pix().GetArea();
// Exclude the header to prevent early decision from impacting later blocks.
rate[i] = (enc.GetCost(tmp_dicts_) - header_rate) / num_pixels;
assert(rate[i] >= 0.f);
total_rate += rate[i];
// Compute the distortion per pixel.
disto[i] = 0.f;
constexpr float disto_scale[] = {0.4f, 0.2f, 0.2f, 0.2f};
for (Channel c : {kYChannel, kUChannel, kVChannel, kAChannel}) {
if (c == kAChannel && !gparams_->has_alpha_) continue;
disto[i] += disto_scale[c] * tmp_cb_.GetDisto(c, tile_rect_);
}
disto[i] /= num_pixels; // Per pixel is not necessary, just nicer debug.
total_disto += disto[i];
// Register the blocks in the 'front_mgr_' except the last one.
if (i + 1 < num_blocks) {
if (!front_mgr_.UseSize(block.dim(), /*ind=*/0, &tmp_blk)) assert(false);
assert(block == tmp_blk);
front_mgr_.Use(block);
}
}
// Unregister the blocks in the 'front_mgr_' except the last one.
for (uint32_t i = num_blocks - 1; i-- > 0;) {
front_mgr_.UndoUse(blocks[i]);
front_mgr_.UndoUseSize(blocks[i]);
}
total_rate /= num_blocks; // Average per pixel values.
total_disto /= num_blocks;
// Estimate a score from the written bits and the distortion.
const float lambda = MapQuality(*config_, 10.f, -9.f); // Empirical.
constexpr float kNiceScale = 0.01f; // Has no impact on the result.
*score = 1.0f / (1.0f + kNiceScale * (lambda * total_rate + total_disto));
RegisterScoreForVDebug(blocks, num_blocks, rate, disto, total_rate,
total_disto, *score);
return WP2_STATUS_OK;
}
WP2Status BlockScoreFunc::Use(const Block& block) {
if (!front_mgr_.SetNextBlockPosition(block.x(), block.y())) assert(false);
tmp_cb_.SetDim(block, front_mgr_);
// Write the final pixels for future context and rate computation.
WP2_CHECK_STATUS(EncodeBlock(front_mgr_, &tmp_cb_, &syntax_writer_,
&dc_error_u_, &dc_error_v_, &buffer_));
WP2_CHECK_ALLOC_OK(front_mgr_.UseSize(tmp_cb_.dim(),
/*ind=*/0, /*block=*/nullptr));
front_mgr_.Use(tmp_cb_.blk());
return WP2_STATUS_OK;
}
WP2Status BlockScoreFunc::FindBestBlockParams(const FrontMgrNxNBase& front_mgr,
const BlockContext& block_context,
SyntaxWriter* const writer,
DCDiffusionMap* const dc_error_u,
DCDiffusionMap* const dc_error_v,
CodedBlock* const cb) const {
const Rectangle padded_tile_rect = {tile_rect_.x, tile_rect_.y,
src_->GetWidth(), src_->GetHeight()};
cb->id_ = AssignSegmentId(*config_, *gparams_, padded_tile_rect, cb->blk());
const Segment& segment = gparams_->segments_.at(cb->id_);
cb->mtx_set_ = gparams_->use_rnd_mtx_ ? &gparams_->mtx_set_ : nullptr;
cb->is420_ = false; // Set in OptimizeModesChroma() but might trigger the
// undefined-behavior-sanitizer in OptimizeModesLuma().
cb->ResetContextCache();
cb->y_context_is_constant_ = cb->ContextIsConstant(kYChannel);
WP2_CHECK_STATUS(cb->OptimizeModesLuma(
*config_, tile_rect_, gparams_->maybe_use_lossy_alpha_,
gparams_->y_preds_, segment, writer->context(), transforms_,
transforms_subset_, writer->counters()));
WP2_CHECK_STATUS(cb->OptimizeModesChroma(
*config_, tile_rect_, gparams_->maybe_use_lossy_alpha_, front_mgr,
gparams_->uv_preds_, segment, writer->context(),
writer->chroma_subsampling(), dc_error_u, dc_error_v,
writer->counters()));
if (gparams_->has_alpha_) {
WP2_CHECK_STATUS(writer->DecideAlpha(cb));
if (!cb->HasLossyAlpha()) {
// Consider no loss by copying original samples to the buffer.
WP2_CHECK_STATUS(cb->out_.A.Copy(cb->in_.A, /*resize_if_needed=*/false));
}
}
return WP2_STATUS_OK;
}
WP2Status BlockScoreFunc::EncodeBlock(const FrontMgrNxNBase& front_mgr,
CodedBlock* const cb,
SyntaxWriter* const syntax_writer,
DCDiffusionMap* const dc_error_u,
DCDiffusionMap* const dc_error_v,
YUVPlane* const buffer) const {
cb->SetRange(gparams_->transf_.GetYUVMin(), gparams_->transf_.GetYUVMax());
cb->SetSrcInput(*src_);
ContextCache pred_context;
cb->SetContextInput(buffer_, &pred_context);
cb->SetReconstructedOutput(buffer);
// This is the slowest part: finding the best transform, predictor etc.
WP2_CHECK_STATUS(FindBestBlockParams(front_mgr, context_, syntax_writer,
dc_error_u, dc_error_v, cb));
// CodedBlock::Quantize() should be called already.
WP2_CHECK_STATUS(syntax_writer->FindBestEncodingMethods(cb));
WP2_CHECK_STATUS(syntax_writer->RecordSize(front_mgr, cb->dim()));
WP2_CHECK_STATUS(syntax_writer->Record(*cb));
if (gparams_->has_alpha_) {
WP2_CHECK_STATUS(syntax_writer->RecordAlpha(*cb));
}
return WP2_STATUS_OK;
}
WP2Status BlockScoreFunc::WriteBlock(const FrontMgrNxNBase& front_mgr,
const CodedBlock& cb,
SyntaxWriter* const syntax_writer,
ANSEnc* const enc) const {
assert(front_mgr.GetMaxFittingBlock().x() == cb.x() &&
front_mgr.GetMaxFittingBlock().y() == cb.y());
assert(front_mgr.GetMaxPossibleBlock().rect().Contains(cb.blk().rect()));
{
ANSDebugPrefix prefix(enc, "BlockHeader");
WriteBlockSize(front_mgr, cb.dim(), syntax_writer->symbol_writer(), enc);
}
WP2_CHECK_STATUS(syntax_writer->WriteBlock(cb, /*block_index=*/0, enc));
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
AreaScoreFunc::AreaScoreFunc(uint32_t area_width, uint32_t area_height)
: area_width_(area_width),
area_height_(area_height),
area_front_mgr_(area_width, area_height),
comp_(kMaxTileSize / kMinBlockSizePix, area_width / kMinBlockSizePix,
area_height / kMinBlockSizePix) {}
WP2Status AreaScoreFunc::Init(const EncoderConfig& config,
const Rectangle& tile_rect, const YUVPlane& yuv,
const GlobalParams& gparams,
const ProgressRange& progress) {
const ProgressRange init_progress(progress, 0.1);
const ProgressRange score_func_init_progress(progress, 0.1);
const ProgressRange default_partition_progress(progress, 0.8);
WP2_CHECK_STATUS(
PartitionScoreFunc::Init(config, tile_rect, yuv, gparams, init_progress));
// This scoring function needs a strict block layout.
const BlockSize max_block_size =
GetSmallestBounds(config.partition_set, BLK_32x32);
WP2_CHECK_OK(config.partition_snapping, WP2_STATUS_INVALID_CONFIGURATION);
WP2_CHECK_OK(BlockWidthPix(max_block_size) <= area_width_ &&
BlockHeightPix(max_block_size) <= area_height_,
WP2_STATUS_INVALID_CONFIGURATION);
// Matches TileEncoder::LossyEncode() behavior.
const ChromaSubsampling chroma_subsampling =
DecideChromaSubsampling(*config_, /*more_than_one_block=*/true);
const bool use_aom_coeffs = DecideAOMCoeffs(*config_, tile_rect_);
WP2_CHECK_STATUS(DecideTransforms(config, &transforms_, &transforms_subset_));
// Retrieve the default partition (multipass) to have a reference.
{
MultiScoreFunc score_func;
MultiPassPartitioner partitioner(&score_func);
EncoderConfig cfg_no_dbg = config;
cfg_no_dbg.info = nullptr; // Do not use or output any debugging data.
WP2_CHECK_STATUS(score_func.Init(cfg_no_dbg, tile_rect_, *src_, *gparams_,
score_func_init_progress));
WP2_CHECK_STATUS(
partitioner.Init(cfg_no_dbg, *src_, tile_rect_, &score_func));
// TODO(yguyon): Add forced blocks from 'config' to 'default_partition_'
WP2_CHECK_STATUS(partitioner.GetBestPartition(default_partition_progress,
&default_partition_));
// Sort the blocks to find those in a given area faster.
std::sort(default_partition_.begin(), default_partition_.end(), comp_);
}
// Initialize the instances recording only final blocks.
WP2_CHECK_STATUS(syntax_writer_.Init(
&dicts_, *config_, *gparams_, yuv, chroma_subsampling, tile_rect,
num_block_cols_ * num_block_rows_, use_aom_coeffs, ProgressRange()));
WP2_CHECK_STATUS(syntax_writer_.SetInitialSegmentIds());
WP2_CHECK_STATUS(syntax_writer_.InitPass());
WP2_CHECK_STATUS(context_.Init(use_aom_coeffs, yuv.Y.w_, yuv.Y.h_));
if (DCDiffusionMap::GetDiffusion(config_->error_diffusion) > 0) {
WP2_CHECK_STATUS(dc_error_u_.Init(tile_rect_.width));
WP2_CHECK_STATUS(dc_error_v_.Init(tile_rect_.width));
}
// Store the reconstructed pixels of the current area partitioning and of
// the final selected blocks.
WP2_CHECK_STATUS(
buffer_.Resize(src_->Y.w_, src_->Y.h_, /*pad=*/1, src_->HasAlpha()));
// Setup the first area as the top-left corner.
WP2_CHECK_STATUS(area_front_mgr_.Init(config_->partition_set,
config_->partition_snapping,
tile_rect_.width, tile_rect_.height));
WP2_CHECK_STATUS(BeginArea(/*area_x=*/0, /*area_y=*/0)); // x,y within tile
return WP2_STATUS_OK;
}
// Returns a score in [0:1] representing how much a 'disto/rate' pair is good
// compared to a reference, where 0 is discarded, 1 is way better than 'ref' and
// 1/3 is equivalent to 'ref'.
static float GetRelativeScore(const EncoderConfig& config, float ref_disto,
float ref_rate, float disto, float rate) {
if (disto > ref_disto || rate > ref_rate) return 0.f; // No mercy.
if (ref_disto > 0.f) {
disto /= ref_disto; // Normalize.
} else {
if (disto > 0.f) return 0.f; // Discard, no way of assessing 'disto'/'ref'.
disto = 1.f; // Both values are 0 so set to 1 to signal it is equivalent.
}
if (ref_rate > 0.f) {
rate /= ref_rate; // Normalize.
} else {
if (rate > 0.f) return 0.f; // Discard, no way of assessing 'rate'/'ref'.
rate = 1.f; // Both values are 0 so set to 1 to signal it is equivalent.
}
const float delta = Clamp(config.quality / kMaxLossyQuality, 0.01f, 0.99f);
return 1.f / (1.f + delta * disto + (1.f - delta) * rate);
}
WP2Status AreaScoreFunc::ComputeScore(const Block&, const ProgressRange&,
float* const) {
return WP2_STATUS_UNSUPPORTED_FEATURE; // Unused.
}
WP2Status AreaScoreFunc::GetAreaDefaultScore(VectorNoCtor<Block>* const blocks,
float* const score) {
Vector<CodedBlock> partition;
WP2_CHECK_STATUS(GetAreaDefaultPartition(&partition));
assert(!partition.empty());
WP2_CHECK_STATUS(GetDistoRate(&partition, &default_disto_, &default_rate_));
// Compute the score with itself as reference, to be easily comparable.
*score = GetRelativeScore(*config_, default_disto_, default_rate_,
default_disto_, default_rate_);
for (const CodedBlock& cb : partition) {
WP2_CHECK_ALLOC_OK(blocks->push_back(cb.blk()));
}
RegisterScoreForVDebug(BLK_LAST, partition, *score, default_disto_,
default_rate_);
return WP2_STATUS_OK;
}
WP2Status AreaScoreFunc::GetAreaGridScore(BlockSize block_size,
VectorNoCtor<Block>* const blocks,
float* const score) {
Vector<CodedBlock> partition;
// Fill 'partition' with as many 'block_size' as possible.
// Fill the remaining space with blocks as big as possible.
FrontMgrArea front_mgr(area_width_, area_height_);
WP2_CHECK_STATUS(front_mgr.CopyFrom(area_front_mgr_));
uint32_t num_block_units = 0;
while (!front_mgr.Done()) {
Block block;
if (!front_mgr.TryGetNextBlock(block_size, &block)) {
block = front_mgr.GetMaxFittingBlock();
}
if (!area_.Contains(block.x_pix(), block.y_pix())) break;
WP2_CHECK_ALLOC_OK(front_mgr.UseSize(block.dim(), 0, nullptr));
front_mgr.Use(block);
WP2_CHECK_ALLOC_OK(partition.resize(partition.size() + 1));
partition.back().SetDimDefault(block);
WP2_CHECK_ALLOC_OK(blocks->push_back(block));
num_block_units += block.rect().GetArea();
}
assert(num_block_units == SizeBlocks(area_.width) * SizeBlocks(area_.height));
assert(default_disto_ >= 0.f && default_rate_ >= 0.f);
float disto = 0.f, rate = 0.f;
WP2_CHECK_STATUS(GetDistoRate(&partition, &disto, &rate));
*score =
GetRelativeScore(*config_, default_disto_, default_rate_, disto, rate);
RegisterScoreForVDebug(block_size, partition, *score, disto, rate);
return WP2_STATUS_OK;
}
WP2Status AreaScoreFunc::GetDistoRate(Vector<CodedBlock>* const area_blocks,
float* const disto,
float* const rate) const {
*rate = *disto = 0.f;
ANSDictionaries dicts;
WP2_CHECK_STATUS(dicts.CopyFrom(dicts_));
SyntaxWriter syntax_writer;
WP2_CHECK_STATUS(syntax_writer.CopyFrom(syntax_writer_, &dicts));
DCDiffusionMap dc_error_u, dc_error_v;
if (DCDiffusionMap::GetDiffusion(config_->error_diffusion) > 0) {
WP2_CHECK_STATUS(dc_error_u.CopyFrom(dc_error_u_));
WP2_CHECK_STATUS(dc_error_v.CopyFrom(dc_error_v_));
}
// Encode all 'area_blocks' using previously finished areas and blocks in this
// 'area_' as prediction context (stored in 'buffer_').
{
FrontMgrArea front_mgr(area_width_, area_height_);
WP2_CHECK_STATUS(front_mgr.CopyFrom(area_front_mgr_));
for (CodedBlock& cb : *area_blocks) {
WP2_CHECK_OK(!front_mgr.Done(), WP2_STATUS_INVALID_PARAMETER);
assert(cb.x() == front_mgr.GetMaxPossibleBlock().x() &&
cb.y() == front_mgr.GetMaxPossibleBlock().y() &&
front_mgr.GetMaxPossibleBlock().rect().Contains(cb.blk().rect()));
cb.SetDim(cb.blk(), front_mgr);
WP2_CHECK_STATUS(EncodeBlock(front_mgr, &cb, &syntax_writer, &dc_error_u,
&dc_error_v, &buffer_));
WP2_CHECK_ALLOC_OK(front_mgr.UseSize(cb.dim(),
/*ind=*/0, /*block=*/nullptr));
front_mgr.Use(cb.blk());
}
}
// Now estimate the bits necessary to encode all blocks in 'area_'.
{
// Also write the headers so that symbols are correctly set up.
ANSEnc enc;
WP2_CHECK_STATUS(syntax_writer.WriteHeader(&enc));
const float header_rate = enc.GetCost(dicts);
FrontMgrArea front_mgr(area_width_, area_height_);
WP2_CHECK_STATUS(front_mgr.CopyFrom(area_front_mgr_));
for (const CodedBlock& cb : *area_blocks) {
WP2_CHECK_STATUS(WriteBlock(front_mgr, cb, &syntax_writer, &enc));
WP2_CHECK_ALLOC_OK(front_mgr.UseSize(cb.dim(),
/*ind=*/0, /*block=*/nullptr));
front_mgr.Use(cb.blk());
}
// Exclude the header to prevent early decisions from impacting later areas.
*rate = (enc.GetCost(dicts) - header_rate) / area_.GetArea();
assert(*rate >= 0.f);
}
// Measure the distortion of this tested partition of 'area_'.
{
constexpr float disto_scale[] = {0.4f, 0.2f, 0.2f, 0.2f};
for (Channel c : {kYChannel, kUChannel, kVChannel, kAChannel}) {
if (c == kAChannel && !gparams_->has_alpha_) continue;
Plane16 src_area_view, buffer_area_view;
WP2_CHECK_STATUS(src_area_view.SetView(src_->GetChannel(c), area_));
WP2_CHECK_STATUS(buffer_area_view.SetView(buffer_.GetChannel(c), area_));
*disto +=
disto_scale[c] * WP2SumSquaredErrorBlock(
src_area_view.Row(0), src_area_view.Step(),
buffer_area_view.Row(0), buffer_area_view.Step(),
area_.width, area_.height);
}
*disto /= area_.GetArea();
}
return WP2_STATUS_OK;
}
WP2Status AreaScoreFunc::BeginArea(uint32_t area_x, uint32_t area_y) {
// Make sure areas are done in order.
assert((area_x == 0 && area_y == 0) ||
(area_x == area_.x + area_width_ && area_y == area_.y) ||
(area_x == 0 && area_y == area_.y + area_height_));
area_ = Rectangle(area_x, area_y, area_width_, area_height_)
.ClipWith({0, 0, tile_rect_.width, tile_rect_.height}); // no pad
// Assuming all areas are done in order row by row, top and left contexts
// outside this 'area_' are available, if any.
// Next available block should match the current 'area_'.
assert(area_front_mgr_.GetMaxFittingBlock().x_pix() == area_.x &&
area_front_mgr_.GetMaxFittingBlock().y_pix() == area_.y);
return WP2_STATUS_OK;
}
WP2Status AreaScoreFunc::Use(const Block& block) {
CodedBlock cb;
cb.SetDim(block, area_front_mgr_);
assert(cb.blk() == block);
// Write the final pixels for future context and rate computation.
WP2_CHECK_STATUS(EncodeBlock(area_front_mgr_, &cb, &syntax_writer_,
&dc_error_u_, &dc_error_v_, &buffer_));
WP2_CHECK_ALLOC_OK(area_front_mgr_.UseSize(cb.dim(), /*ind=*/0,
/*block=*/nullptr));
area_front_mgr_.Use(cb.blk());
if (area_front_mgr_.Done()) {
// All areas are complete.
area_ = {};
area_front_mgr_.Clear();
} else {
const Block max_block = area_front_mgr_.GetMaxPossibleBlock();
if (!area_.Contains(max_block.x_pix(), max_block.y_pix())) {
// This 'area_' is complete. Prepare the next one.
uint32_t area_x = area_.x + area_width_, area_y = area_.y; // Next col.
if (area_x >= tile_rect_.width) { // Next row.
area_x = 0;
area_y += area_height_;
}
assert(area_x < tile_rect_.width && area_y < tile_rect_.height);
WP2_CHECK_STATUS(BeginArea(area_x, area_y));
}
}
default_disto_ = default_rate_ = -1;
return WP2_STATUS_OK;
}
WP2Status AreaScoreFunc::GetAreaDefaultPartition(
Vector<CodedBlock>* const area_blocks) const {
// Find the blocks in 'default_partition_' belonging to the current 'area_'.
const Block area_pos(area_.x / kMinBlockSizePix, area_.y / kMinBlockSizePix,
BLK_32x32);
VectorNoCtor<Block>::const_iterator block_it = std::lower_bound(
default_partition_.begin(), default_partition_.end(), area_pos, comp_);
uint32_t num_block_units = 0;
for (; block_it != default_partition_.end(); ++block_it) {
if (!area_.Contains(block_it->x_pix(), block_it->y_pix())) break;
WP2_CHECK_ALLOC_OK(area_blocks->resize(area_blocks->size() + 1));
area_blocks->back().SetDimDefault(*block_it);
num_block_units += block_it->rect().GetArea();
}
// If snapping is not enabled or if the 'area_' dimensions do not match it,
// the default partition cannot be used as is.
WP2_CHECK_OK(
num_block_units == SizeBlocks(area_.width) * SizeBlocks(area_.height),
WP2_STATUS_INVALID_CONFIGURATION);
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
WP2Status SubAreaScoreFunc::ComputeScore(const Block& block,
const ProgressRange& progress,
float* const score) {
const ProgressRange default_partitioning_progress(progress, 0.5);
const ProgressRange remaining_blocks_progress(progress, 0.5);
if (default_block_.dim() == BLK_LAST) {
// Get the default partitioning and score of the remaining non-final blocks.
// This is done once for each block position within each area.
Vector<CodedBlock> default_partition;
WP2_CHECK_STATUS(GetAreaRemainingDefaultPartition(
default_partitioning_progress, &default_partition));
assert(!default_partition.empty() &&
default_partition.front().dim() != BLK_LAST);
// TODO(yguyon): Also include the 'area_used_blocks_' into the disto/rate
WP2_CHECK_STATUS(GetDistoRate(&default_partition, &default_block_disto_,
&default_block_rate_));
const float default_score =
GetRelativeScore(*config_, default_block_disto_, default_block_rate_,
default_block_disto_, default_block_rate_);
RegisterScoreForVDebug(default_partition.front().blk(), default_partition,
default_score, default_block_disto_,
default_block_rate_);
default_block_ = default_partition.front().blk();
} else {
WP2_CHECK_STATUS(default_partitioning_progress.AdvanceBy(1.));
}
// 'default_block_' now contains the size of the first block among the
// remaining non-final ones given by the default partitioning.
assert(default_block_.x() == block.x() && default_block_.y() == block.y());
float disto = 0.f, rate = 0.f;
if (block == default_block_) {
disto = default_block_disto_;
rate = default_block_rate_;
*score = GetRelativeScore(*config_, default_block_disto_,
default_block_rate_, disto, rate);
WP2_CHECK_STATUS(remaining_blocks_progress.AdvanceBy(1.));
} else {
// 'block' here is the currently evaluated size for a given position.
// 'area_used_blocks_' represent the final blocks previously encoded and
// recorded. 'area_remaining_blocks' exist to fill the 'area_' partition and
// compare the same surface by rate-distortion with the default partition.
Vector<CodedBlock> area_remaining_blocks;
WP2_CHECK_ALLOC_OK(area_remaining_blocks.resize(1));
area_remaining_blocks.back().SetDimDefault(block); // Force it.
WP2_CHECK_STATUS(GetAreaRemainingDefaultPartition(remaining_blocks_progress,
&area_remaining_blocks));
WP2_CHECK_STATUS(GetDistoRate(&area_remaining_blocks, &disto, &rate));
*score = GetRelativeScore(*config_, default_block_disto_,
default_block_rate_, disto, rate);
RegisterScoreForVDebug(block, area_remaining_blocks, *score, disto, rate);
}
return WP2_STATUS_OK;
}
WP2Status SubAreaScoreFunc::BeginArea(uint32_t area_x, uint32_t area_y) {
WP2_CHECK_STATUS(AreaScoreFunc::BeginArea(area_x, area_y));
area_used_blocks_.clear();
return WP2_STATUS_OK;
}
WP2Status SubAreaScoreFunc::Use(const Block& block) {
WP2_CHECK_ALLOC_OK(area_used_blocks_.resize(area_used_blocks_.size() + 1));
area_used_blocks_.back().SetDim(block, area_front_mgr_);
WP2_CHECK_STATUS(AreaScoreFunc::Use(block));
default_block_ = Block(); // Reset.
default_block_disto_ = default_block_rate_ = 0.f;
return WP2_STATUS_OK;
}
WP2Status SubAreaScoreFunc::GetAreaRemainingDefaultPartition(
const ProgressRange& progress,
Vector<CodedBlock>* const area_remaining_blocks) const {
const ProgressRange score_func_init_progress(progress, 0.01);
const ProgressRange partitioner_progress(progress, 0.99);
// It would be faster to reuse the 'AreaScoreFunc::default_partition_' (or
// part of it) but it is not always compatible with the already selected
// 'area_used_blocks_' so for simplicity it is always recomputed for each
// block size of each block position.
MultiScoreFunc score_func;
MultiPassPartitioner partitioner(&score_func);
EncoderConfig config = *config_;
config.info = nullptr; // Remove any debugging input/output.
WP2_CHECK_STATUS(score_func.Init(config, tile_rect_, *src_, *gparams_,
score_func_init_progress));
WP2_CHECK_STATUS(partitioner.Init(config, *src_, tile_rect_, &score_func));
// 'blocks' will first contain all irrelevant blocks that will be forced into
// the 'partitioner' to extract only the interesting ones.
VectorNoCtor<Block> blocks;
// Force all blocks external to the current 'area_' (their size and count do
// not matter).
{
FrontMgrArea front_mgr(area_width_, area_height_);
WP2_CHECK_STATUS(front_mgr.Init(config_->partition_set,
config_->partition_snapping,
tile_rect_.width, tile_rect_.height));
uint32_t surface_kept = 0;
while (!front_mgr.Done()) {
const Block max_block = front_mgr.GetMaxFittingBlock();
WP2_CHECK_ALLOC_OK(front_mgr.UseSize(max_block.dim(), 0, nullptr));
front_mgr.Use(max_block);
if (!area_.Contains(max_block.x_pix(), max_block.y_pix())) {
WP2_CHECK_ALLOC_OK(blocks.push_back(max_block));
} else {
surface_kept += max_block.rect().GetArea();
}
}
WP2_CHECK_OK(
surface_kept == SizeBlocks(area_.width) * SizeBlocks(area_.height),
WP2_STATUS_INVALID_PARAMETER);
}
// Force all already selected final blocks.
for (const CodedBlock& cb : area_used_blocks_) {
WP2_CHECK_ALLOC_OK(blocks.push_back(cb.blk()));
}
// Force the block under review, if any.
for (const CodedBlock& cb : *area_remaining_blocks) {
WP2_CHECK_ALLOC_OK(blocks.push_back(cb.blk()));
}
const uint32_t num_default_blocks_to_ignore = blocks.size();
const uint32_t num_already_added_area_blocks = area_remaining_blocks->size();
// Get the default partitioning of the remaining empty spaces into 'blocks'.
WP2_CHECK_STATUS(partitioner.GetBestPartition(partitioner_progress, &blocks));
// Sort in lexico order only the new blocks.
std::sort(blocks.begin() + num_default_blocks_to_ignore, blocks.end());
// Copy from Block struct to CodedBlock class into 'area_remaining_blocks'.
WP2_CHECK_ALLOC_OK(area_remaining_blocks->resize(
area_remaining_blocks->size() +
(blocks.size() - num_default_blocks_to_ignore)));
for (uint32_t i = num_default_blocks_to_ignore,
j = num_already_added_area_blocks;
i < blocks.size(); ++i, ++j) {
area_remaining_blocks->at(j).SetDimDefault(blocks[i]);
}
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
static constexpr uint32_t kMaxCertainty = 3;
static constexpr int32_t kKernelSize = 5, kMaxDist = kKernelSize / 2;
static void GetMinMax(const Plane16& src, Plane16& min, Plane16& max) {
assert(min.w_ == max.w_ && min.h_ == max.h_);
const uint32_t step = src.Step();
const uint32_t num_blocks = min.w_;
const int16_t* row = (const int16_t*)src.Row(0);
for (uint32_t y = 0; y < min.h_; ++y) {
int16_t* const min_row = (int16_t*)min.Row(y);
int16_t* const max_row = (int16_t*)max.Row(y);
WP2::GetBlockMinMax(row, step, num_blocks, min_row, max_row);
row += kMinBlockSizePix * step;
}
}
// Range of original YUV values in a kKernelSize window around each pixel.
// The higher the value, the more heterogenous the area is.
static void GetSpread(const Plane16& src, Plane16* const dst) {
const int32_t w = src.w_, h = src.h_;
int16_t* p_dst = dst->Row(0);
const uint32_t dst_step = dst->Step();
const int16_t* p_src = src.Row(0);
const uint32_t src_step = src.Step();
for (int32_t y = 0; y < h; ++y) {
int16_t min, max;
if (y < kMaxDist || y >= h - kMaxDist) {
const int32_t min_sub_y = std::max(-kMaxDist, -y);
const int32_t max_sub_y = std::min(kMaxDist, h - 1 - y);
const int16_t* const row = &src.At(0, y + min_sub_y);
for (int32_t x = 0; x < w; ++x) {
const int32_t min_sub_x = std::max(-kMaxDist, -x);
const int32_t max_sub_x = std::min(kMaxDist, w - 1 - x);
GetBlockMinMaxGeneric(row + x + min_sub_x, src_step,
max_sub_x + 1 - min_sub_x,
max_sub_y + 1 - min_sub_y, &min, &max);
p_dst[x] = (int16_t)ClampToSigned(max - min, kMaxYuvBits + 1u);
}
} else {
const int16_t* const row = p_src - src_step * kMaxDist;
for (int32_t x = 0; x < w; ++x) {
if (x < kMaxDist || x - kMaxDist + 8 > w) {
const int32_t min_sub_x = std::max(-kMaxDist, -x);
const int32_t max_sub_x = std::min(kMaxDist, w - 1 - x);
GetBlockMinMaxGeneric(row + x + min_sub_x, src_step,
max_sub_x + 1 - min_sub_x, kKernelSize,
&min, &max);
} else {
GetBlockMinMax_5x5(row + x - kMaxDist, src_step, &min, &max);
}
p_dst[x] = (int16_t)ClampToSigned(max - min, kMaxYuvBits + 1u);
}
}
p_dst += dst_step;
p_src += src_step;
}
}
constexpr float MultiScoreFunc::kMinScore;
constexpr int MultiScoreFunc::kMinEffortForGoodQuantDCT;
WP2Status MultiScoreFunc::Init(const EncoderConfig& config,
const Rectangle& tile_rect, const YUVPlane& yuv,
const GlobalParams& gparams,
const ProgressRange& progress) {
DrctFilterInit();
ScoreDspInit();
WP2_CHECK_STATUS(PartitionScoreFunc::Init(config, tile_rect, yuv, gparams,
ProgressRange(progress, 0.5)));
yuv_range_ =
(float)(gparams.transf_.GetYUVMax() - gparams.transf_.GetYUVMin());
a_range_ratio_ = yuv_range_ / kAlphaMax;
// Cache the 'min_' and 'max_' luma/chroma values per kMinBlockSize square.
WP2_CHECK_STATUS(min_.Resize(SizeBlocks(src_->Y.w_), SizeBlocks(src_->Y.h_)));
WP2_CHECK_STATUS(max_.Resize(min_.Y.w_, min_.Y.h_));
for (Channel channel : {kYChannel, kUChannel, kVChannel}) {
const Plane16& src_plane = src_->GetChannel(channel);
assert(src_plane.w_ % kMinBlockSizePix == 0 &&
src_plane.h_ % kMinBlockSizePix == 0);
GetMinMax(src_plane, min_.GetChannel(channel), max_.GetChannel(channel));
}
if (config_->effort >= kMinEffortForGoodQuantDCT) {
// Image processing.
WP2_CHECK_STATUS(spread_.Resize(src_->GetWidth(), src_->GetHeight(),
/*pad=*/1, src_->HasAlpha()));
for (Channel channel : {kYChannel, kUChannel, kVChannel, kAChannel}) {
if (channel == kAChannel && !src_->HasAlpha()) continue;
GetSpread(src_->GetChannel(channel), &spread_.GetChannel(channel));
}
}
// Per-block standard deviation.
WP2_CHECK_STATUS(
stddev_.Allocate(num_block_cols_, num_block_rows_, kMinBlockSizePix));
stddev_.AddValues(*src_);
if (src_->HasAlpha()) {
WP2_CHECK_STATUS(
a_stddev_.Allocate(num_block_cols_, num_block_rows_, kMinBlockSizePix));
a_stddev_.AddValues(src_->A);
}
// Per-block luma general direction.
// In might give better results to compute that for each NxN block instead of
// aggregating pre-computed 4x4 ones but it is probably too expensive.
WP2_CHECK_ALLOC_OK(direction_.resize(num_block_cols_ * num_block_rows_));
WP2_CHECK_ALLOC_OK(
direction_certainty_.resize(num_block_cols_ * num_block_rows_));
const uint32_t bitdepth = gparams.transf_.GetYUVPrecisionBits() + 1;
for (uint32_t y = 0; y < num_block_rows_; ++y) {
const int16_t* x_ptr = &src_->Y.At(/*x=*/0, y * kMinBlockSizePix);
for (uint32_t i = y * num_block_cols_; i < (y + 1) * num_block_cols_;
++i, x_ptr += kMinBlockSizePix) {
uint32_t variance;
CdefDirection4x4(x_ptr, src_->Y.Step(), bitdepth, &direction_[i],
&variance);
direction_certainty_[i] = Clamp(variance >> 4, 0u, kMaxCertainty);
// TODO(yguyon): Also compute, store, use 8x8 direction for bigger blocks
}
}
WP2_CHECK_STATUS(DrawVDebug());
WP2_CHECK_STATUS(progress.AdvanceBy(0.5));
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
WP2Status MultiScoreFunc::ComputeScore(const Block& block,
const ProgressRange& progress,
float* const score) {
float value = 0.f, threshold = 1.f; // Passing if 'value <= threshold'.
switch (pass_) {
case Pass::LumaAlphaGradient:
value = GetLumaAlphaGradient(block);
threshold = GetLumaAlphaGradientThreshold(block);
break;
case Pass::NarrowStdDev:
value = GetStdDevRange(block);
threshold = GetStdDevRangeThreshold(block);
break;
case Pass::GoodQuantDCT:
value = GetQuantDCT(block);
threshold = GetQuantDCTThreshold(block);
break;
case Pass::Direction:
value = GetDirection(block);
threshold = GetDirectionThreshold(block);
break;
case Pass::Any:
value = 0.f;
threshold = 1.f;
break;
default:
assert(false);
}
// Convert to "higher score is better" in [0:1], 0.5 being the threshold.
if (value <= threshold) {
*score = 1.001f - 0.5f * value / threshold; // Will pass.
} else {
*score = 0.499f * threshold / value; // Will not pass.
}
RegisterScoreForVDebug(block, *score);
WP2_CHECK_STATUS(progress.AdvanceBy(1.));
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
void MultiScoreFunc::GetCoeffs(Channel channel, const Block& block,
int32_t coeffs[kMaxBlockSizePix2]) const {
const Plane16& plane = src_->GetChannel(channel);
// Copy luma pixels.
int32_t* dst_row = coeffs;
const int16_t* src_row = &plane.At(block.x_pix(), block.y_pix());
for (uint32_t y = 0; y < block.h_pix(); ++y) {
for (uint32_t x = 0; x < block.w_pix(); ++x) {
dst_row[x] = src_row[x];
}
dst_row += block.w_pix();
src_row += plane.Step();
}
}
void MultiScoreFunc::QuantizeCoeffs(Channel channel, const Block& block,
int32_t coeffs[kMaxBlockSizePix2]) const {
GetCoeffs(channel, block, coeffs);
// Transform them.
WP2Transform2D(coeffs, kDct, kDct, block.w_pix(), block.h_pix(), coeffs,
/*reduced=*/false);
// Find the segment of the block.
const Rectangle padded_tile_rect = {tile_rect_.x, tile_rect_.y,
src_->GetWidth(), src_->GetHeight()};
const uint8_t segment_id =
AssignSegmentId(*config_, *gparams_, padded_tile_rect, block);
const QuantMtx& quant_mtx =
gparams_->segments_[segment_id].GetQuant(channel);
// Quantize and dequantize coefficients.
int16_t quantized_coeffs[kMaxBlockSizePix2];
const TrfSize tdim = GetTransform(block.dim());
uint32_t num_coeffs;
quant_mtx.Quantize(coeffs, tdim, /*first_is_dc=*/true, quantized_coeffs,
&num_coeffs);
quant_mtx.Dequantize(quantized_coeffs, num_coeffs, tdim, coeffs);
// Inverse transform them back and compare the result.
WP2InvTransform2D(coeffs, kDct, kDct, block.w_pix(), block.h_pix(), coeffs,
/*reduced=*/false);
}
void MultiScoreFunc::QuantizeCoeffs(Channel channel, const Block& block,
BlockSize sub_block_size,
int32_t coeffs[kMaxBlockSizePix2],
int32_t* const max_range) const {
if (max_range != nullptr) *max_range = 0;
Block sub_block(block.x(), block.y(), sub_block_size);
while (sub_block.y() + sub_block.h() <= block.y() + block.h()) {
int32_t sub_coeffs[kMaxBlockSizePix2];
QuantizeCoeffs(channel, sub_block, sub_coeffs);
int32_t min = sub_coeffs[0], max = sub_coeffs[0];
const uint32_t y = sub_block.y_pix() - block.y_pix();
const uint32_t x = sub_block.x_pix() - block.x_pix();
for (uint32_t sub_y = 0; sub_y < sub_block.h_pix(); ++sub_y) {
for (uint32_t sub_x = 0; sub_x < sub_block.w_pix(); ++sub_x) {
const int32_t sub_coeff = sub_coeffs[sub_y * sub_block.w_pix() + sub_x];
min = std::min(min, sub_coeff);
max = std::max(max, sub_coeff);
coeffs[(y + sub_y) * block.w_pix() + (x + sub_x)] = sub_coeff;
}
}
if (max_range != nullptr) *max_range = std::max(*max_range, max - min);
sub_block.SetXY(sub_block.x() + sub_block.w(), sub_block.y());
if (sub_block.x() + sub_block.w() > block.x() + block.w()) {
sub_block.SetXY(block.x(), sub_block.y() + sub_block.h());
}
}
}
//------------------------------------------------------------------------------
// Returns the maximum difference between 'src' and its predicted gradient.
// 'step' goes from a 'src' line to the next.
static int32_t GetGradientDiff(const int16_t* src, int32_t step,
int32_t w, int32_t h) {
assert(w >= 4 && h >= 4);
const int32_t max_x = w - 1, max_y = h - 1;
// Average the corners (division by 3 is done at the very end).
const int32_t top_left = src[0] + src[1] + src[step];
const int32_t bottom_left =
src[(max_y - 1) * step] + src[max_y * step] + src[max_y * step + 1];
const int32_t top_right = src[max_x - 1] + src[max_x] + src[step + max_x];
const int32_t bottom_right = src[(max_y - 1) * step + max_x] +
src[max_y * step + max_x - 1] +
src[max_y * step + max_x];
// Create a gradient by bidimensional interpolation and compare with 'src'.
int32_t max_diff = 0;
for (int32_t y = 0; y <= max_y; ++y) {
const int32_t left = top_left * (max_y - y) + bottom_left * y;
const int32_t right = top_right * (max_y - y) + bottom_right * y;
for (int32_t x = 0; x <= max_x; ++x) {
const int32_t gradient_pixel =
DivRound(left * (max_x - x) + right * x, 3 * max_x * max_y);
max_diff = std::max(max_diff, std::abs(src[x] - gradient_pixel));
}
src += step;
}
return max_diff;
}
float MultiScoreFunc::GetLumaAlphaGradient(const Block& block) const {
// Empirically chosen values.
const float kDiffScale[] = {1.f, 1.f, 1.f, 0.25f / kAlphaMax * yuv_range_};
float max_diff = 0.f;
// kUChannel and kVChannel do not bring valuable partition decision-making
// here so skip them for speed.
for (Channel c : {kYChannel, kAChannel}) {
if (c == kAChannel && !src_->HasAlpha()) continue;
const int32_t diff = GetGradientDiff(
&src_->GetChannel(c).At(block.x_pix(), block.y_pix()),
src_->GetChannel(c).Step(), block.w_pix(), block.h_pix());
max_diff = std::max(max_diff, diff * kDiffScale[c]);
}
return max_diff;
}
float MultiScoreFunc::GetLumaAlphaGradientThreshold(const Block& block) const {
// The threshold is tighter for bigger blocks at higher qualities.
// Medium blocks are ignored except at high qualities.
constexpr BaseSlope kBaseSlope[] = {
{0.f, 0.f}, // 4x4, unused
{0.f, 0.f}, // 8x4, unused
{-0.6f, 0.7f}, // 8x8 16x4
{-0.06f, 0.08f}, // 16x8
{-0.03f, 0.045f}, // 16x16 32x8
{0.04f, -0.035f}, // 32x16
{0.05f, -0.045f}, // 32x32
};
STATIC_ASSERT_ARRAY_SIZE(kBaseSlope, WP2Log2Ceil_k(kMaxBlockSize2) + 1);
const uint32_t index = (uint32_t)WP2Log2Floor(block.rect().GetArea());
assert((1u << index) == block.rect().GetArea());
return yuv_range_ *
MapQuality(*config_, kBaseSlope[index].base, kBaseSlope[index].slope);
}
//------------------------------------------------------------------------------
static float StdDevRange(const Block& block, const Integral& variance) {
// Consider the standard deviation of the whole block as sub-blocks could
// be coherent within themselves but not with other sub-blocks.
const uint8_t overall_variance = variance.StdDevUint8(
block.x(), block.y(), block.x() + block.w(), block.y() + block.h());
uint8_t min = overall_variance, max = overall_variance;
// Now check the sub-blocks.
for (uint32_t sub_y = block.y(); sub_y < block.y() + block.h(); ++sub_y) {
for (uint32_t sub_x = block.x(); sub_x < block.x() + block.w(); ++sub_x) {
const uint8_t variance_tmp =
variance.StdDevUint8(sub_x, sub_y, sub_x + 1, sub_y + 1);
if (variance_tmp < min) {
min = variance_tmp;
} else if (variance_tmp > max) {
max = variance_tmp;
}
}
}
return (max - min) / 255.f;
}
float MultiScoreFunc::GetStdDevRange(const Block& block) const {
float range = StdDevRange(block, stddev_);
if (!a_stddev_.empty()) {
const float a_range =
StdDevRange(block, a_stddev_) * a_range_ratio_ * a_range_ratio_;
range = std::max(range, a_range);
}
return range;
}
float MultiScoreFunc::GetStdDevRangeThreshold(const Block& block) const {
// The higher the quality, the narrower the standard deviation range needs to
// be for a block to be accepted. Accept bigger blocks during partitioning at
// low qualities, and seek smaller blocks at high qualities.
return MapQuality(*config_, 0.50f, -0.38f);
}
//------------------------------------------------------------------------------
// Returns the average difference between the original luma coefficients and the
// quantized ones, weighted per pixel by the 'spread_' in order to give more
// importance to flat areas (penalize distant ripples more than noise on edges).
float MultiScoreFunc::GetQuantDCT(const Block& block, Channel channel) const {
int32_t coeffs[kMaxBlockSizePix2];
QuantizeCoeffs(channel, block, coeffs);
float avg_diff = 0.f;
int32_t* dst_row = coeffs;
const int16_t* src_row =
&src_->GetChannel(channel).At(block.x_pix(), block.y_pix());
const int16_t* spread_row =
&spread_.GetChannel(channel).At(block.x_pix(), block.y_pix());
for (uint32_t y = 0; y < block.h_pix(); ++y) {
for (uint32_t x = 0; x < block.w_pix(); ++x) {
const int32_t diff = std::abs(dst_row[x] - src_row[x]);
avg_diff += diff / Clamp(spread_row[x] / 20.f, 0.1f, 10.f);
}
dst_row += block.w_pix();
src_row += src_->GetChannel(channel).Step();
spread_row += spread_.GetChannel(channel).Step();
}
avg_diff /= (block.w_pix() * block.h_pix());
return avg_diff;
}
float MultiScoreFunc::GetQuantDCT(const Block& block) const {
// Take chroma and alpha into account just enough to discard bad layouts (for
// example an entirely black image with alpha patterns).
// TODO(yguyon): Check ADST too
constexpr float kScale[] = {1.f, 0.1f, 0.1f, 0.1f};
return std::max({GetQuantDCT(block, kYChannel) * kScale[kYChannel],
GetQuantDCT(block, kUChannel) * kScale[kUChannel],
GetQuantDCT(block, kVChannel) * kScale[kVChannel],
src_->HasAlpha()
? GetQuantDCT(block, kAChannel) * kScale[kAChannel]
: 0.f});
}
float MultiScoreFunc::GetQuantDCTThreshold(const Block& block) const {
// This metric is only useful at low qualities, for large blocks that would
// still look fine with an aggressive quantization.
return MapQuality(*config_, 4.00f, -4.75f);
}
//------------------------------------------------------------------------------
// Returns a low score for a 'block' that appears to have an obvious and uniform
// orientation.
float MultiScoreFunc::GetDirection(const Block& block) const {
const uint32_t stride = num_block_cols_;
const uint32_t* direction =
direction_.data() + block.y() * stride + block.x();
const uint32_t* certainty =
direction_certainty_.data() + block.y() * stride + block.x();
uint32_t weight[kDrctFltNumDirs] = {0};
for (uint32_t sub_y = 0; sub_y < block.h(); ++sub_y) {
for (uint32_t sub_x = 0; sub_x < block.w(); ++sub_x) {
assert(direction[sub_x] < kDrctFltNumDirs);
weight[direction[sub_x]] += std::min(certainty[sub_x], kMaxCertainty);
}
direction += stride;
certainty += stride;
}
const uint32_t heaviest =
std::max_element(weight, weight + kDrctFltNumDirs) - weight;
const uint32_t max_weight = block.w() * block.h() * kMaxCertainty;
assert(weight[heaviest] <= max_weight);
const uint32_t previous_direction =
(heaviest - 1 + kDrctFltNumDirs) % kDrctFltNumDirs;
const uint32_t next_direction = (heaviest + 1) % kDrctFltNumDirs;
const uint32_t weight_with_close_directions =
weight[heaviest] +
(weight[previous_direction] + weight[next_direction]) / 4;
const float direction_score =
1.f - Clamp(weight_with_close_directions / (float)max_weight, 0.f, 1.f);
return direction_score;
}
float MultiScoreFunc::GetDirectionThreshold(const Block& block) const {
return MapQuality(*config_, 0.15f, -0.0475f);
}
//------------------------------------------------------------------------------
WP2Status TileScoreFunc::Init(const EncoderConfig& config,
const Rectangle& tile_rect, const YUVPlane& yuv,
const GlobalParams& gparams,
const ProgressRange& progress) {
WP2EncDspInit();
const ProgressRange init_progress(progress, 0.2);
const ProgressRange forced_partition_progress(progress, 0.8);
WP2_CHECK_STATUS(
PartitionScoreFunc::Init(config, tile_rect, yuv, gparams, init_progress));
local_gparams_.features_ = &local_features_map_;
WP2_CHECK_STATUS(GlobalAnalysis(ArgbBuffer(), yuv, gparams.transf_,
config, &local_gparams_));
WP2_CHECK_STATUS(InitForEncode());
// Initialize the best score with the partition containing only the forced
// blocks.
const Rectangle padded_tile_rect = {tile_rect_.x, tile_rect_.y,
yuv.GetWidth(), yuv.GetHeight()};
WP2_CHECK_STATUS(AddForcedBlocks(config, padded_tile_rect, &blocks_));
WP2_CHECK_STATUS(TryEncode(blocks_, forced_partition_progress, &best_score_));
RegisterScoreForVDebug("starting", {}, best_score_);
cached_best_score_ = 0.f;
blocks_.clear();
return WP2_STATUS_OK;
}
WP2Status TileScoreFunc::ComputeScore(const Block& block,
const ProgressRange& progress,
float* const score) {
WP2_CHECK_ALLOC_OK(blocks_.push_back(block));
WP2_CHECK_STATUS(TryEncode(blocks_, progress, score));
if (*score > cached_best_score_) cached_best_score_ = *score;
if (*score > best_score_) RegisterScoreForVDebug("new best", block, *score);
blocks_.pop_back();
return WP2_STATUS_OK;
}
WP2Status TileScoreFunc::InitForEncode() {
WP2_CHECK_ALLOC_OK(blocks_.reserve((tile_rect_.width / kMaxBlockSizePix) *
(tile_rect_.height / kMaxBlockSizePix)));
enc_tiles_layout_.num_tiles_x = enc_tiles_layout_.num_tiles_y = 1;
enc_tiles_layout_.tile_width = tile_rect_.width;
enc_tiles_layout_.tile_height = tile_rect_.height;
WP2_CHECK_ALLOC_OK(enc_tiles_layout_.tiles.resize(1));
enc_tiles_layout_.first_unassigned_tile_index = 0;
enc_tiles_layout_.tiles.front().rect = {0, 0, tile_rect_.width,
tile_rect_.height};
enc_tiles_layout_.tiles.front().rgb_input.Deallocate(); // This is lossy.
assert(!src_->IsEmpty());
WP2_CHECK_STATUS(enc_tiles_layout_.tiles.front().yuv_input.SetView(*src_));
tmp_config_ = *config_;
tmp_config_.partition_method = sub_partition_method_;
tmp_config_.info = nullptr;
tile_encoder_.config_ = &tmp_config_;
tile_encoder_.use_lossless_ = (tmp_config_.quality > kMaxLossyQuality);
tile_encoder_.tiles_layout_ = &enc_tiles_layout_;
WP2_CHECK_STATUS(tile_encoder_.AssignNextTile());
// Recursion is too dangerous here. It's potentially creating
// (kMaxTileSize/kMinBlockSizePix)^2 = a lot of recursive encoding contexts.
assert(sub_partition_method_ != AUTO_PARTITIONING &&
sub_partition_method_ != TILE_ENCODE_PARTITIONING);
dec_config_.thread_level = 0;
WP2_CHECK_STATUS(decompressed_yuv_.Copy(*src_, /*resize_if_needed=*/true));
// Needed for API compliance. The pixels will not be accessed.
WP2_CHECK_STATUS(
decompressed_argb_.Resize(tile_rect_.width, tile_rect_.height));
// A BitstreamFeatures instance is needed by LossyDecode(). Make up one.
MemoryWriter writer;
WP2_CHECK_STATUS(
EncodeHeader(tmp_config_, tile_rect_.width, tile_rect_.height,
src_->HasAlpha(), /*is_anim=*/false, /*loop_forever=*/true,
kDefaultBackgroundColor, /*preview_color=*/{},
/*has_icc=*/false, /*has_trailing_data=*/false, &writer));
WP2_CHECK_STATUS(features_.Read(writer.mem_, writer.size_));
return WP2_STATUS_OK;
}
WP2Status TileScoreFunc::TryEncode(const VectorNoCtor<Block>& blocks,
const ProgressRange& progress,
float* const score) {
ANSEnc& enc = enc_tiles_layout_.tiles.front().enc;
enc.WipeOut();
enc_tiles_layout_.gparams = &local_gparams_;
tile_encoder_.tile_->progress = progress;
// Encode the whole tile with the forced 'blocks'.
WP2_CHECK_STATUS(tile_encoder_.LossyEncode(blocks, &enc));
WP2_CHECK_STATUS(enc.Assemble());
// Reset the unique tile to a fresh state.
const uint32_t width = decompressed_argb_.width();
const uint32_t height = decompressed_argb_.height();
const uint32_t tile_width = TileWidth(FinalTileShape(*config_), width);
const uint32_t tile_height =
TileHeight(FinalTileShape(*config_), /*image_width=*/width);
WP2_CHECK_STATUS(GetTilesLayout(width, height, tile_width, tile_height,
ProgressRange(), &decompressed_argb_,
&decompressed_yuv_, &tiles_layout_));
assert(tiles_layout_.tiles.size() == 1 &&
enc_tiles_layout_.tiles.size() == 1);
// Plug ANSEnc output to ANSDec input.
Tile* const tile = &tiles_layout_.tiles.front();
tile->chunk_size_is_known = true;
tile->chunk_size = enc.BufferSize();
tiles_layout_.gparams = &local_gparams_;
tile->private_input = ExternalDataSource(enc.Buffer(), enc.BufferSize());
tile->input = &tile->private_input;
// Decode to 'decompressed_argb_'.
ANSDec dec(tile->input);
WP2_CHECK_STATUS(
LossyDecode(features_, dec_config_, &tiles_layout_, &dec, tile));
// Compare the pixels of the non-padded area only.
YUVPlane original_view, decompressed_view;
WP2_CHECK_STATUS(original_view.SetView(*src_, {0, 0, width, height}));
WP2_CHECK_STATUS(
decompressed_view.SetView(decompressed_yuv_, {0, 0, width, height}));
WP2_CHECK_STATUS(decompressed_view.GetDistortion(
original_view, kMaxYuvBits + 1, PSNR, distortion_));
// Compute a score based on distortion and the number of bits per pixel.
const float ssim = distortion_[4];
const float bpp = std::max(1u, enc.BufferSize()) * 8.f / (width * height);
const float lambda = MapQuality(*config_, 7.00f, -2.85f);
*score = ssim - lambda * bpp;
return WP2_STATUS_OK;
}
WP2Status TileScoreFunc::Use(const Block& block) {
WP2_CHECK_ALLOC_OK(blocks_.push_back(block));
if (cached_best_score_ > best_score_) best_score_ = cached_best_score_;
cached_best_score_ = 0.f;
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
WP2Status FixedSizeScoreFunc::ComputeScore(const Block& block,
const ProgressRange& progress,
float* const score) {
*score = (block.dim() == size_) ? 1.f : 0.f;
WP2_CHECK_STATUS(progress.AdvanceBy(1.));
return WP2_STATUS_OK;
}
//------------------------------------------------------------------------------
} // namespace WP2