| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // ----------------------------------------------------------------------------- |
| // |
| // Block position/size scoring functions. |
| // |
| // Author: Yannis Guyon (yguyon@google.com) |
| |
| #include "src/enc/partition_score_func.h" |
| |
| #include <algorithm> |
| #include <array> |
| #include <cmath> |
| #include <limits> |
| #include <numeric> |
| |
| #include "src/common/integral.h" |
| #include "src/dsp/dsp.h" |
| #include "src/dsp/math.h" |
| #include "src/enc/analysis.h" |
| #include "src/enc/partitioner.h" |
| #include "src/enc/wp2_enc_i.h" |
| #include "src/wp2/format_constants.h" |
| |
| namespace WP2 { |
| |
| struct BaseSlope { |
| float base, slope; |
| }; |
| |
| // Maps 'config.quality' from [0, 95] to [min, (min + slope)]. |
| static float MapQuality(const EncoderConfig& config, float min, float slope) { |
| const float x = 1.f * config.quality / kMaxLossyQuality; |
| return std::max(0.f, min + x * slope); |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| WP2Status PartitionScoreFunc::Init(const EncoderConfig& config, |
| const Rectangle& tile_rect, |
| const YUVPlane& yuv, |
| const GlobalParams& gparams, |
| const ProgressRange& progress) { |
| config_ = &config; |
| gparams_ = &gparams; |
| tile_rect_ = tile_rect; |
| src_ = &yuv; |
| num_block_cols_ = SizeBlocks(yuv.Y.w_); |
| num_block_rows_ = SizeBlocks(yuv.Y.h_); |
| WP2_CHECK_STATUS(ClearVDebug()); |
| WP2_CHECK_STATUS(progress.AdvanceBy(1.)); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status PartitionScoreFunc::Use(const Block& block) { return WP2_STATUS_OK; } |
| |
| //------------------------------------------------------------------------------ |
| |
| WP2Status BlockScoreFunc::Init(const EncoderConfig& config, |
| const Rectangle& tile_rect, const YUVPlane& yuv, |
| const GlobalParams& gparams, |
| const ProgressRange& progress) { |
| WP2EncDspInit(); |
| WP2_CHECK_STATUS(PartitionScoreFunc::Init(config, tile_rect, yuv, gparams, |
| ProgressRange(progress, 0.5))); |
| |
| const ChromaSubsampling chroma_subsampling = |
| DecideChromaSubsampling(*config_, /*more_than_one_block=*/true); |
| const bool use_aom_coeffs = DecideAOMCoeffs(*config_, tile_rect_); |
| WP2_CHECK_STATUS(DecideTransforms(config, &transforms_, &transforms_subset_)); |
| |
| // Store the reconstructed pixels of the temporary and final blocks. |
| WP2_CHECK_STATUS( |
| buffer_.Resize(src_->Y.w_, src_->Y.h_, /*pad=*/1, src_->HasAlpha())); |
| WP2_CHECK_STATUS(front_mgr_.Init(config_->partition_set, |
| config_->partition_snapping, |
| tile_rect_.width, tile_rect_.height)); |
| |
| // Initialize the instances recording only final blocks. |
| WP2_CHECK_STATUS(syntax_writer_.Init( |
| &dicts_, *config_, *gparams_, yuv, chroma_subsampling, tile_rect, |
| num_block_cols_ * num_block_rows_, use_aom_coeffs, ProgressRange())); |
| WP2_CHECK_STATUS(syntax_writer_.SetInitialSegmentIds()); |
| WP2_CHECK_STATUS(syntax_writer_.InitPass()); |
| |
| if (DCDiffusionMap::GetDiffusion(config_->error_diffusion) > 0) { |
| WP2_CHECK_STATUS(dc_error_u_.Init(tile_rect_.width)); |
| WP2_CHECK_STATUS(dc_error_v_.Init(tile_rect_.width)); |
| } |
| |
| // Initialize the cache. |
| WP2_CHECK_STATUS(context_.Init(use_aom_coeffs, yuv.Y.w_, yuv.Y.h_)); |
| WP2_CHECK_STATUS(progress.AdvanceBy(0.5)); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status BlockScoreFunc::ComputeScore(const Block& block, |
| const ProgressRange& progress, |
| float* const score) { |
| WP2_CHECK_STATUS(ComputeScore(&block, /*num_blocks=*/1, score)); |
| WP2_CHECK_STATUS(progress.AdvanceBy(1.)); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status BlockScoreFunc::ComputeScore(const Block blocks[4], |
| uint32_t num_blocks, |
| float* const score) { |
| // Copy the final state into the temporary scratch one. |
| WP2_CHECK_STATUS(tmp_dicts_.CopyFrom(dicts_)); |
| WP2_CHECK_STATUS(tmp_syntax_writer_.CopyFrom(syntax_writer_, &tmp_dicts_)); |
| if (DCDiffusionMap::GetDiffusion(config_->error_diffusion) > 0) { |
| WP2_CHECK_STATUS(tmp_dc_error_u_.CopyFrom(dc_error_u_)); |
| WP2_CHECK_STATUS(tmp_dc_error_v_.CopyFrom(dc_error_v_)); |
| } |
| |
| float total_rate = 0.f, total_disto = 0.f, rate[4], disto[4]; |
| assert(num_blocks >= 1 && num_blocks <= 4); |
| for (uint32_t i = 0; i < num_blocks; ++i) { |
| const Block& block = blocks[i]; |
| |
| // Encode the block. |
| Block tmp_blk; |
| if (!front_mgr_.SetNextBlockPosition(block.x(), block.y())) assert(false); |
| assert(front_mgr_.TryGetNextBlock(block.dim(), &tmp_blk)); |
| assert(block == tmp_blk); |
| tmp_cb_.SetDim(block, front_mgr_); |
| WP2_CHECK_STATUS(EncodeBlock(front_mgr_, &tmp_cb_, &tmp_syntax_writer_, |
| &tmp_dc_error_u_, &tmp_dc_error_v_, &buffer_)); |
| |
| // Write the bits. |
| ANSEnc enc; |
| WP2_CHECK_STATUS(tmp_syntax_writer_.WriteHeader(&enc)); |
| const float header_rate = enc.GetCost(tmp_dicts_); |
| WP2_CHECK_STATUS( |
| WriteBlock(front_mgr_, tmp_cb_, &tmp_syntax_writer_, &enc)); |
| const uint32_t num_pixels = block.rect_pix().GetArea(); |
| // Exclude the header to prevent early decision from impacting later blocks. |
| rate[i] = (enc.GetCost(tmp_dicts_) - header_rate) / num_pixels; |
| assert(rate[i] >= 0.f); |
| total_rate += rate[i]; |
| |
| // Compute the distortion per pixel. |
| disto[i] = 0.f; |
| constexpr float disto_scale[] = {0.4f, 0.2f, 0.2f, 0.2f}; |
| for (Channel c : {kYChannel, kUChannel, kVChannel, kAChannel}) { |
| if (c == kAChannel && !gparams_->has_alpha_) continue; |
| disto[i] += disto_scale[c] * tmp_cb_.GetDisto(c, tile_rect_); |
| } |
| disto[i] /= num_pixels; // Per pixel is not necessary, just nicer debug. |
| total_disto += disto[i]; |
| |
| // Register the blocks in the 'front_mgr_' except the last one. |
| if (i + 1 < num_blocks) { |
| if (!front_mgr_.UseSize(block.dim(), /*ind=*/0, &tmp_blk)) assert(false); |
| assert(block == tmp_blk); |
| front_mgr_.Use(block); |
| } |
| } |
| |
| // Unregister the blocks in the 'front_mgr_' except the last one. |
| for (uint32_t i = num_blocks - 1; i-- > 0;) { |
| front_mgr_.UndoUse(blocks[i]); |
| front_mgr_.UndoUseSize(blocks[i]); |
| } |
| |
| total_rate /= num_blocks; // Average per pixel values. |
| total_disto /= num_blocks; |
| |
| // Estimate a score from the written bits and the distortion. |
| const float lambda = MapQuality(*config_, 10.f, -9.f); // Empirical. |
| constexpr float kNiceScale = 0.01f; // Has no impact on the result. |
| *score = 1.0f / (1.0f + kNiceScale * (lambda * total_rate + total_disto)); |
| RegisterScoreForVDebug(blocks, num_blocks, rate, disto, total_rate, |
| total_disto, *score); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status BlockScoreFunc::Use(const Block& block) { |
| if (!front_mgr_.SetNextBlockPosition(block.x(), block.y())) assert(false); |
| tmp_cb_.SetDim(block, front_mgr_); |
| // Write the final pixels for future context and rate computation. |
| WP2_CHECK_STATUS(EncodeBlock(front_mgr_, &tmp_cb_, &syntax_writer_, |
| &dc_error_u_, &dc_error_v_, &buffer_)); |
| WP2_CHECK_ALLOC_OK(front_mgr_.UseSize(tmp_cb_.dim(), |
| /*ind=*/0, /*block=*/nullptr)); |
| front_mgr_.Use(tmp_cb_.blk()); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status BlockScoreFunc::FindBestBlockParams(const FrontMgrNxNBase& front_mgr, |
| const BlockContext& block_context, |
| SyntaxWriter* const writer, |
| DCDiffusionMap* const dc_error_u, |
| DCDiffusionMap* const dc_error_v, |
| CodedBlock* const cb) const { |
| const Rectangle padded_tile_rect = {tile_rect_.x, tile_rect_.y, |
| src_->GetWidth(), src_->GetHeight()}; |
| cb->id_ = AssignSegmentId(*config_, *gparams_, padded_tile_rect, cb->blk()); |
| const Segment& segment = gparams_->segments_.at(cb->id_); |
| cb->mtx_set_ = gparams_->use_rnd_mtx_ ? &gparams_->mtx_set_ : nullptr; |
| cb->is420_ = false; // Set in OptimizeModesChroma() but might trigger the |
| // undefined-behavior-sanitizer in OptimizeModesLuma(). |
| |
| cb->ResetContextCache(); |
| cb->y_context_is_constant_ = cb->ContextIsConstant(kYChannel); |
| |
| WP2_CHECK_STATUS(cb->OptimizeModesLuma( |
| *config_, tile_rect_, gparams_->maybe_use_lossy_alpha_, |
| gparams_->y_preds_, segment, writer->context(), transforms_, |
| transforms_subset_, writer->counters())); |
| |
| WP2_CHECK_STATUS(cb->OptimizeModesChroma( |
| *config_, tile_rect_, gparams_->maybe_use_lossy_alpha_, front_mgr, |
| gparams_->uv_preds_, segment, writer->context(), |
| writer->chroma_subsampling(), dc_error_u, dc_error_v, |
| writer->counters())); |
| |
| if (gparams_->has_alpha_) { |
| WP2_CHECK_STATUS(writer->DecideAlpha(cb)); |
| if (!cb->HasLossyAlpha()) { |
| // Consider no loss by copying original samples to the buffer. |
| WP2_CHECK_STATUS(cb->out_.A.Copy(cb->in_.A, /*resize_if_needed=*/false)); |
| } |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status BlockScoreFunc::EncodeBlock(const FrontMgrNxNBase& front_mgr, |
| CodedBlock* const cb, |
| SyntaxWriter* const syntax_writer, |
| DCDiffusionMap* const dc_error_u, |
| DCDiffusionMap* const dc_error_v, |
| YUVPlane* const buffer) const { |
| cb->SetRange(gparams_->transf_.GetYUVMin(), gparams_->transf_.GetYUVMax()); |
| cb->SetSrcInput(*src_); |
| ContextCache pred_context; |
| cb->SetContextInput(buffer_, &pred_context); |
| cb->SetReconstructedOutput(buffer); |
| |
| // This is the slowest part: finding the best transform, predictor etc. |
| WP2_CHECK_STATUS(FindBestBlockParams(front_mgr, context_, syntax_writer, |
| dc_error_u, dc_error_v, cb)); |
| // CodedBlock::Quantize() should be called already. |
| WP2_CHECK_STATUS(syntax_writer->FindBestEncodingMethods(cb)); |
| WP2_CHECK_STATUS(syntax_writer->RecordSize(front_mgr, cb->dim())); |
| WP2_CHECK_STATUS(syntax_writer->Record(*cb)); |
| if (gparams_->has_alpha_) { |
| WP2_CHECK_STATUS(syntax_writer->RecordAlpha(*cb)); |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status BlockScoreFunc::WriteBlock(const FrontMgrNxNBase& front_mgr, |
| const CodedBlock& cb, |
| SyntaxWriter* const syntax_writer, |
| ANSEnc* const enc) const { |
| assert(front_mgr.GetMaxFittingBlock().x() == cb.x() && |
| front_mgr.GetMaxFittingBlock().y() == cb.y()); |
| assert(front_mgr.GetMaxPossibleBlock().rect().Contains(cb.blk().rect())); |
| { |
| ANSDebugPrefix prefix(enc, "BlockHeader"); |
| WriteBlockSize(front_mgr, cb.dim(), syntax_writer->symbol_writer(), enc); |
| } |
| WP2_CHECK_STATUS(syntax_writer->WriteBlock(cb, /*block_index=*/0, enc)); |
| return WP2_STATUS_OK; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| AreaScoreFunc::AreaScoreFunc(uint32_t area_width, uint32_t area_height) |
| : area_width_(area_width), |
| area_height_(area_height), |
| area_front_mgr_(area_width, area_height), |
| comp_(kMaxTileSize / kMinBlockSizePix, area_width / kMinBlockSizePix, |
| area_height / kMinBlockSizePix) {} |
| |
| WP2Status AreaScoreFunc::Init(const EncoderConfig& config, |
| const Rectangle& tile_rect, const YUVPlane& yuv, |
| const GlobalParams& gparams, |
| const ProgressRange& progress) { |
| const ProgressRange init_progress(progress, 0.1); |
| const ProgressRange score_func_init_progress(progress, 0.1); |
| const ProgressRange default_partition_progress(progress, 0.8); |
| WP2_CHECK_STATUS( |
| PartitionScoreFunc::Init(config, tile_rect, yuv, gparams, init_progress)); |
| |
| // This scoring function needs a strict block layout. |
| const BlockSize max_block_size = |
| GetSmallestBounds(config.partition_set, BLK_32x32); |
| WP2_CHECK_OK(config.partition_snapping, WP2_STATUS_INVALID_CONFIGURATION); |
| WP2_CHECK_OK(BlockWidthPix(max_block_size) <= area_width_ && |
| BlockHeightPix(max_block_size) <= area_height_, |
| WP2_STATUS_INVALID_CONFIGURATION); |
| |
| // Matches TileEncoder::LossyEncode() behavior. |
| const ChromaSubsampling chroma_subsampling = |
| DecideChromaSubsampling(*config_, /*more_than_one_block=*/true); |
| const bool use_aom_coeffs = DecideAOMCoeffs(*config_, tile_rect_); |
| WP2_CHECK_STATUS(DecideTransforms(config, &transforms_, &transforms_subset_)); |
| |
| // Retrieve the default partition (multipass) to have a reference. |
| { |
| MultiScoreFunc score_func; |
| MultiPassPartitioner partitioner(&score_func); |
| |
| EncoderConfig cfg_no_dbg = config; |
| cfg_no_dbg.info = nullptr; // Do not use or output any debugging data. |
| WP2_CHECK_STATUS(score_func.Init(cfg_no_dbg, tile_rect_, *src_, *gparams_, |
| score_func_init_progress)); |
| WP2_CHECK_STATUS( |
| partitioner.Init(cfg_no_dbg, *src_, tile_rect_, &score_func)); |
| // TODO(yguyon): Add forced blocks from 'config' to 'default_partition_' |
| WP2_CHECK_STATUS(partitioner.GetBestPartition(default_partition_progress, |
| &default_partition_)); |
| // Sort the blocks to find those in a given area faster. |
| std::sort(default_partition_.begin(), default_partition_.end(), comp_); |
| } |
| |
| // Initialize the instances recording only final blocks. |
| WP2_CHECK_STATUS(syntax_writer_.Init( |
| &dicts_, *config_, *gparams_, yuv, chroma_subsampling, tile_rect, |
| num_block_cols_ * num_block_rows_, use_aom_coeffs, ProgressRange())); |
| WP2_CHECK_STATUS(syntax_writer_.SetInitialSegmentIds()); |
| WP2_CHECK_STATUS(syntax_writer_.InitPass()); |
| |
| WP2_CHECK_STATUS(context_.Init(use_aom_coeffs, yuv.Y.w_, yuv.Y.h_)); |
| |
| if (DCDiffusionMap::GetDiffusion(config_->error_diffusion) > 0) { |
| WP2_CHECK_STATUS(dc_error_u_.Init(tile_rect_.width)); |
| WP2_CHECK_STATUS(dc_error_v_.Init(tile_rect_.width)); |
| } |
| |
| // Store the reconstructed pixels of the current area partitioning and of |
| // the final selected blocks. |
| WP2_CHECK_STATUS( |
| buffer_.Resize(src_->Y.w_, src_->Y.h_, /*pad=*/1, src_->HasAlpha())); |
| |
| // Setup the first area as the top-left corner. |
| WP2_CHECK_STATUS(area_front_mgr_.Init(config_->partition_set, |
| config_->partition_snapping, |
| tile_rect_.width, tile_rect_.height)); |
| WP2_CHECK_STATUS(BeginArea(/*area_x=*/0, /*area_y=*/0)); // x,y within tile |
| return WP2_STATUS_OK; |
| } |
| |
| // Returns a score in [0:1] representing how much a 'disto/rate' pair is good |
| // compared to a reference, where 0 is discarded, 1 is way better than 'ref' and |
| // 1/3 is equivalent to 'ref'. |
| static float GetRelativeScore(const EncoderConfig& config, float ref_disto, |
| float ref_rate, float disto, float rate) { |
| if (disto > ref_disto || rate > ref_rate) return 0.f; // No mercy. |
| if (ref_disto > 0.f) { |
| disto /= ref_disto; // Normalize. |
| } else { |
| if (disto > 0.f) return 0.f; // Discard, no way of assessing 'disto'/'ref'. |
| disto = 1.f; // Both values are 0 so set to 1 to signal it is equivalent. |
| } |
| if (ref_rate > 0.f) { |
| rate /= ref_rate; // Normalize. |
| } else { |
| if (rate > 0.f) return 0.f; // Discard, no way of assessing 'rate'/'ref'. |
| rate = 1.f; // Both values are 0 so set to 1 to signal it is equivalent. |
| } |
| const float delta = Clamp(config.quality / kMaxLossyQuality, 0.01f, 0.99f); |
| return 1.f / (1.f + delta * disto + (1.f - delta) * rate); |
| } |
| |
| WP2Status AreaScoreFunc::ComputeScore(const Block&, const ProgressRange&, |
| float* const) { |
| return WP2_STATUS_UNSUPPORTED_FEATURE; // Unused. |
| } |
| |
| WP2Status AreaScoreFunc::GetAreaDefaultScore(VectorNoCtor<Block>* const blocks, |
| float* const score) { |
| Vector<CodedBlock> partition; |
| WP2_CHECK_STATUS(GetAreaDefaultPartition(&partition)); |
| assert(!partition.empty()); |
| WP2_CHECK_STATUS(GetDistoRate(&partition, &default_disto_, &default_rate_)); |
| // Compute the score with itself as reference, to be easily comparable. |
| *score = GetRelativeScore(*config_, default_disto_, default_rate_, |
| default_disto_, default_rate_); |
| for (const CodedBlock& cb : partition) { |
| WP2_CHECK_ALLOC_OK(blocks->push_back(cb.blk())); |
| } |
| |
| RegisterScoreForVDebug(BLK_LAST, partition, *score, default_disto_, |
| default_rate_); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status AreaScoreFunc::GetAreaGridScore(BlockSize block_size, |
| VectorNoCtor<Block>* const blocks, |
| float* const score) { |
| Vector<CodedBlock> partition; |
| // Fill 'partition' with as many 'block_size' as possible. |
| // Fill the remaining space with blocks as big as possible. |
| FrontMgrArea front_mgr(area_width_, area_height_); |
| WP2_CHECK_STATUS(front_mgr.CopyFrom(area_front_mgr_)); |
| uint32_t num_block_units = 0; |
| while (!front_mgr.Done()) { |
| Block block; |
| if (!front_mgr.TryGetNextBlock(block_size, &block)) { |
| block = front_mgr.GetMaxFittingBlock(); |
| } |
| if (!area_.Contains(block.x_pix(), block.y_pix())) break; |
| WP2_CHECK_ALLOC_OK(front_mgr.UseSize(block.dim(), 0, nullptr)); |
| front_mgr.Use(block); |
| |
| WP2_CHECK_ALLOC_OK(partition.resize(partition.size() + 1)); |
| partition.back().SetDimDefault(block); |
| WP2_CHECK_ALLOC_OK(blocks->push_back(block)); |
| num_block_units += block.rect().GetArea(); |
| } |
| assert(num_block_units == SizeBlocks(area_.width) * SizeBlocks(area_.height)); |
| |
| assert(default_disto_ >= 0.f && default_rate_ >= 0.f); |
| float disto = 0.f, rate = 0.f; |
| WP2_CHECK_STATUS(GetDistoRate(&partition, &disto, &rate)); |
| *score = |
| GetRelativeScore(*config_, default_disto_, default_rate_, disto, rate); |
| |
| RegisterScoreForVDebug(block_size, partition, *score, disto, rate); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status AreaScoreFunc::GetDistoRate(Vector<CodedBlock>* const area_blocks, |
| float* const disto, |
| float* const rate) const { |
| *rate = *disto = 0.f; |
| |
| ANSDictionaries dicts; |
| WP2_CHECK_STATUS(dicts.CopyFrom(dicts_)); |
| SyntaxWriter syntax_writer; |
| WP2_CHECK_STATUS(syntax_writer.CopyFrom(syntax_writer_, &dicts)); |
| |
| DCDiffusionMap dc_error_u, dc_error_v; |
| if (DCDiffusionMap::GetDiffusion(config_->error_diffusion) > 0) { |
| WP2_CHECK_STATUS(dc_error_u.CopyFrom(dc_error_u_)); |
| WP2_CHECK_STATUS(dc_error_v.CopyFrom(dc_error_v_)); |
| } |
| |
| // Encode all 'area_blocks' using previously finished areas and blocks in this |
| // 'area_' as prediction context (stored in 'buffer_'). |
| { |
| FrontMgrArea front_mgr(area_width_, area_height_); |
| WP2_CHECK_STATUS(front_mgr.CopyFrom(area_front_mgr_)); |
| for (CodedBlock& cb : *area_blocks) { |
| WP2_CHECK_OK(!front_mgr.Done(), WP2_STATUS_INVALID_PARAMETER); |
| assert(cb.x() == front_mgr.GetMaxPossibleBlock().x() && |
| cb.y() == front_mgr.GetMaxPossibleBlock().y() && |
| front_mgr.GetMaxPossibleBlock().rect().Contains(cb.blk().rect())); |
| cb.SetDim(cb.blk(), front_mgr); |
| WP2_CHECK_STATUS(EncodeBlock(front_mgr, &cb, &syntax_writer, &dc_error_u, |
| &dc_error_v, &buffer_)); |
| WP2_CHECK_ALLOC_OK(front_mgr.UseSize(cb.dim(), |
| /*ind=*/0, /*block=*/nullptr)); |
| front_mgr.Use(cb.blk()); |
| } |
| } |
| |
| // Now estimate the bits necessary to encode all blocks in 'area_'. |
| { |
| // Also write the headers so that symbols are correctly set up. |
| ANSEnc enc; |
| WP2_CHECK_STATUS(syntax_writer.WriteHeader(&enc)); |
| const float header_rate = enc.GetCost(dicts); |
| FrontMgrArea front_mgr(area_width_, area_height_); |
| WP2_CHECK_STATUS(front_mgr.CopyFrom(area_front_mgr_)); |
| |
| for (const CodedBlock& cb : *area_blocks) { |
| WP2_CHECK_STATUS(WriteBlock(front_mgr, cb, &syntax_writer, &enc)); |
| WP2_CHECK_ALLOC_OK(front_mgr.UseSize(cb.dim(), |
| /*ind=*/0, /*block=*/nullptr)); |
| front_mgr.Use(cb.blk()); |
| } |
| // Exclude the header to prevent early decisions from impacting later areas. |
| *rate = (enc.GetCost(dicts) - header_rate) / area_.GetArea(); |
| assert(*rate >= 0.f); |
| } |
| |
| // Measure the distortion of this tested partition of 'area_'. |
| { |
| constexpr float disto_scale[] = {0.4f, 0.2f, 0.2f, 0.2f}; |
| for (Channel c : {kYChannel, kUChannel, kVChannel, kAChannel}) { |
| if (c == kAChannel && !gparams_->has_alpha_) continue; |
| Plane16 src_area_view, buffer_area_view; |
| WP2_CHECK_STATUS(src_area_view.SetView(src_->GetChannel(c), area_)); |
| WP2_CHECK_STATUS(buffer_area_view.SetView(buffer_.GetChannel(c), area_)); |
| *disto += |
| disto_scale[c] * WP2SumSquaredErrorBlock( |
| src_area_view.Row(0), src_area_view.Step(), |
| buffer_area_view.Row(0), buffer_area_view.Step(), |
| area_.width, area_.height); |
| } |
| *disto /= area_.GetArea(); |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status AreaScoreFunc::BeginArea(uint32_t area_x, uint32_t area_y) { |
| // Make sure areas are done in order. |
| assert((area_x == 0 && area_y == 0) || |
| (area_x == area_.x + area_width_ && area_y == area_.y) || |
| (area_x == 0 && area_y == area_.y + area_height_)); |
| area_ = Rectangle(area_x, area_y, area_width_, area_height_) |
| .ClipWith({0, 0, tile_rect_.width, tile_rect_.height}); // no pad |
| // Assuming all areas are done in order row by row, top and left contexts |
| // outside this 'area_' are available, if any. |
| |
| // Next available block should match the current 'area_'. |
| assert(area_front_mgr_.GetMaxFittingBlock().x_pix() == area_.x && |
| area_front_mgr_.GetMaxFittingBlock().y_pix() == area_.y); |
| |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status AreaScoreFunc::Use(const Block& block) { |
| CodedBlock cb; |
| cb.SetDim(block, area_front_mgr_); |
| assert(cb.blk() == block); |
| // Write the final pixels for future context and rate computation. |
| WP2_CHECK_STATUS(EncodeBlock(area_front_mgr_, &cb, &syntax_writer_, |
| &dc_error_u_, &dc_error_v_, &buffer_)); |
| WP2_CHECK_ALLOC_OK(area_front_mgr_.UseSize(cb.dim(), /*ind=*/0, |
| /*block=*/nullptr)); |
| area_front_mgr_.Use(cb.blk()); |
| if (area_front_mgr_.Done()) { |
| // All areas are complete. |
| area_ = {}; |
| area_front_mgr_.Clear(); |
| } else { |
| const Block max_block = area_front_mgr_.GetMaxPossibleBlock(); |
| if (!area_.Contains(max_block.x_pix(), max_block.y_pix())) { |
| // This 'area_' is complete. Prepare the next one. |
| uint32_t area_x = area_.x + area_width_, area_y = area_.y; // Next col. |
| if (area_x >= tile_rect_.width) { // Next row. |
| area_x = 0; |
| area_y += area_height_; |
| } |
| assert(area_x < tile_rect_.width && area_y < tile_rect_.height); |
| WP2_CHECK_STATUS(BeginArea(area_x, area_y)); |
| } |
| } |
| default_disto_ = default_rate_ = -1; |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status AreaScoreFunc::GetAreaDefaultPartition( |
| Vector<CodedBlock>* const area_blocks) const { |
| // Find the blocks in 'default_partition_' belonging to the current 'area_'. |
| const Block area_pos(area_.x / kMinBlockSizePix, area_.y / kMinBlockSizePix, |
| BLK_32x32); |
| VectorNoCtor<Block>::const_iterator block_it = std::lower_bound( |
| default_partition_.begin(), default_partition_.end(), area_pos, comp_); |
| |
| uint32_t num_block_units = 0; |
| for (; block_it != default_partition_.end(); ++block_it) { |
| if (!area_.Contains(block_it->x_pix(), block_it->y_pix())) break; |
| WP2_CHECK_ALLOC_OK(area_blocks->resize(area_blocks->size() + 1)); |
| area_blocks->back().SetDimDefault(*block_it); |
| num_block_units += block_it->rect().GetArea(); |
| } |
| // If snapping is not enabled or if the 'area_' dimensions do not match it, |
| // the default partition cannot be used as is. |
| WP2_CHECK_OK( |
| num_block_units == SizeBlocks(area_.width) * SizeBlocks(area_.height), |
| WP2_STATUS_INVALID_CONFIGURATION); |
| return WP2_STATUS_OK; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| WP2Status SubAreaScoreFunc::ComputeScore(const Block& block, |
| const ProgressRange& progress, |
| float* const score) { |
| const ProgressRange default_partitioning_progress(progress, 0.5); |
| const ProgressRange remaining_blocks_progress(progress, 0.5); |
| if (default_block_.dim() == BLK_LAST) { |
| // Get the default partitioning and score of the remaining non-final blocks. |
| // This is done once for each block position within each area. |
| Vector<CodedBlock> default_partition; |
| WP2_CHECK_STATUS(GetAreaRemainingDefaultPartition( |
| default_partitioning_progress, &default_partition)); |
| assert(!default_partition.empty() && |
| default_partition.front().dim() != BLK_LAST); |
| // TODO(yguyon): Also include the 'area_used_blocks_' into the disto/rate |
| WP2_CHECK_STATUS(GetDistoRate(&default_partition, &default_block_disto_, |
| &default_block_rate_)); |
| const float default_score = |
| GetRelativeScore(*config_, default_block_disto_, default_block_rate_, |
| default_block_disto_, default_block_rate_); |
| RegisterScoreForVDebug(default_partition.front().blk(), default_partition, |
| default_score, default_block_disto_, |
| default_block_rate_); |
| default_block_ = default_partition.front().blk(); |
| } else { |
| WP2_CHECK_STATUS(default_partitioning_progress.AdvanceBy(1.)); |
| } |
| // 'default_block_' now contains the size of the first block among the |
| // remaining non-final ones given by the default partitioning. |
| assert(default_block_.x() == block.x() && default_block_.y() == block.y()); |
| |
| float disto = 0.f, rate = 0.f; |
| if (block == default_block_) { |
| disto = default_block_disto_; |
| rate = default_block_rate_; |
| *score = GetRelativeScore(*config_, default_block_disto_, |
| default_block_rate_, disto, rate); |
| WP2_CHECK_STATUS(remaining_blocks_progress.AdvanceBy(1.)); |
| } else { |
| // 'block' here is the currently evaluated size for a given position. |
| // 'area_used_blocks_' represent the final blocks previously encoded and |
| // recorded. 'area_remaining_blocks' exist to fill the 'area_' partition and |
| // compare the same surface by rate-distortion with the default partition. |
| Vector<CodedBlock> area_remaining_blocks; |
| WP2_CHECK_ALLOC_OK(area_remaining_blocks.resize(1)); |
| area_remaining_blocks.back().SetDimDefault(block); // Force it. |
| WP2_CHECK_STATUS(GetAreaRemainingDefaultPartition(remaining_blocks_progress, |
| &area_remaining_blocks)); |
| |
| WP2_CHECK_STATUS(GetDistoRate(&area_remaining_blocks, &disto, &rate)); |
| *score = GetRelativeScore(*config_, default_block_disto_, |
| default_block_rate_, disto, rate); |
| RegisterScoreForVDebug(block, area_remaining_blocks, *score, disto, rate); |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status SubAreaScoreFunc::BeginArea(uint32_t area_x, uint32_t area_y) { |
| WP2_CHECK_STATUS(AreaScoreFunc::BeginArea(area_x, area_y)); |
| area_used_blocks_.clear(); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status SubAreaScoreFunc::Use(const Block& block) { |
| WP2_CHECK_ALLOC_OK(area_used_blocks_.resize(area_used_blocks_.size() + 1)); |
| area_used_blocks_.back().SetDim(block, area_front_mgr_); |
| WP2_CHECK_STATUS(AreaScoreFunc::Use(block)); |
| default_block_ = Block(); // Reset. |
| default_block_disto_ = default_block_rate_ = 0.f; |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status SubAreaScoreFunc::GetAreaRemainingDefaultPartition( |
| const ProgressRange& progress, |
| Vector<CodedBlock>* const area_remaining_blocks) const { |
| const ProgressRange score_func_init_progress(progress, 0.01); |
| const ProgressRange partitioner_progress(progress, 0.99); |
| |
| // It would be faster to reuse the 'AreaScoreFunc::default_partition_' (or |
| // part of it) but it is not always compatible with the already selected |
| // 'area_used_blocks_' so for simplicity it is always recomputed for each |
| // block size of each block position. |
| MultiScoreFunc score_func; |
| MultiPassPartitioner partitioner(&score_func); |
| |
| EncoderConfig config = *config_; |
| config.info = nullptr; // Remove any debugging input/output. |
| WP2_CHECK_STATUS(score_func.Init(config, tile_rect_, *src_, *gparams_, |
| score_func_init_progress)); |
| WP2_CHECK_STATUS(partitioner.Init(config, *src_, tile_rect_, &score_func)); |
| |
| // 'blocks' will first contain all irrelevant blocks that will be forced into |
| // the 'partitioner' to extract only the interesting ones. |
| VectorNoCtor<Block> blocks; |
| // Force all blocks external to the current 'area_' (their size and count do |
| // not matter). |
| { |
| FrontMgrArea front_mgr(area_width_, area_height_); |
| WP2_CHECK_STATUS(front_mgr.Init(config_->partition_set, |
| config_->partition_snapping, |
| tile_rect_.width, tile_rect_.height)); |
| uint32_t surface_kept = 0; |
| while (!front_mgr.Done()) { |
| const Block max_block = front_mgr.GetMaxFittingBlock(); |
| WP2_CHECK_ALLOC_OK(front_mgr.UseSize(max_block.dim(), 0, nullptr)); |
| front_mgr.Use(max_block); |
| if (!area_.Contains(max_block.x_pix(), max_block.y_pix())) { |
| WP2_CHECK_ALLOC_OK(blocks.push_back(max_block)); |
| } else { |
| surface_kept += max_block.rect().GetArea(); |
| } |
| } |
| WP2_CHECK_OK( |
| surface_kept == SizeBlocks(area_.width) * SizeBlocks(area_.height), |
| WP2_STATUS_INVALID_PARAMETER); |
| } |
| // Force all already selected final blocks. |
| for (const CodedBlock& cb : area_used_blocks_) { |
| WP2_CHECK_ALLOC_OK(blocks.push_back(cb.blk())); |
| } |
| // Force the block under review, if any. |
| for (const CodedBlock& cb : *area_remaining_blocks) { |
| WP2_CHECK_ALLOC_OK(blocks.push_back(cb.blk())); |
| } |
| const uint32_t num_default_blocks_to_ignore = blocks.size(); |
| const uint32_t num_already_added_area_blocks = area_remaining_blocks->size(); |
| |
| // Get the default partitioning of the remaining empty spaces into 'blocks'. |
| WP2_CHECK_STATUS(partitioner.GetBestPartition(partitioner_progress, &blocks)); |
| // Sort in lexico order only the new blocks. |
| std::sort(blocks.begin() + num_default_blocks_to_ignore, blocks.end()); |
| |
| // Copy from Block struct to CodedBlock class into 'area_remaining_blocks'. |
| WP2_CHECK_ALLOC_OK(area_remaining_blocks->resize( |
| area_remaining_blocks->size() + |
| (blocks.size() - num_default_blocks_to_ignore))); |
| for (uint32_t i = num_default_blocks_to_ignore, |
| j = num_already_added_area_blocks; |
| i < blocks.size(); ++i, ++j) { |
| area_remaining_blocks->at(j).SetDimDefault(blocks[i]); |
| } |
| return WP2_STATUS_OK; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| static constexpr uint32_t kMaxCertainty = 3; |
| static constexpr int32_t kKernelSize = 5, kMaxDist = kKernelSize / 2; |
| |
| static void GetMinMax(const Plane16& src, Plane16& min, Plane16& max) { |
| assert(min.w_ == max.w_ && min.h_ == max.h_); |
| const uint32_t step = src.Step(); |
| const uint32_t num_blocks = min.w_; |
| const int16_t* row = (const int16_t*)src.Row(0); |
| for (uint32_t y = 0; y < min.h_; ++y) { |
| int16_t* const min_row = (int16_t*)min.Row(y); |
| int16_t* const max_row = (int16_t*)max.Row(y); |
| WP2::GetBlockMinMax(row, step, num_blocks, min_row, max_row); |
| row += kMinBlockSizePix * step; |
| } |
| } |
| |
| // Range of original YUV values in a kKernelSize window around each pixel. |
| // The higher the value, the more heterogenous the area is. |
| static void GetSpread(const Plane16& src, Plane16* const dst) { |
| const int32_t w = src.w_, h = src.h_; |
| int16_t* p_dst = dst->Row(0); |
| const uint32_t dst_step = dst->Step(); |
| const int16_t* p_src = src.Row(0); |
| const uint32_t src_step = src.Step(); |
| for (int32_t y = 0; y < h; ++y) { |
| int16_t min, max; |
| if (y < kMaxDist || y >= h - kMaxDist) { |
| const int32_t min_sub_y = std::max(-kMaxDist, -y); |
| const int32_t max_sub_y = std::min(kMaxDist, h - 1 - y); |
| const int16_t* const row = &src.At(0, y + min_sub_y); |
| for (int32_t x = 0; x < w; ++x) { |
| const int32_t min_sub_x = std::max(-kMaxDist, -x); |
| const int32_t max_sub_x = std::min(kMaxDist, w - 1 - x); |
| GetBlockMinMaxGeneric(row + x + min_sub_x, src_step, |
| max_sub_x + 1 - min_sub_x, |
| max_sub_y + 1 - min_sub_y, &min, &max); |
| p_dst[x] = (int16_t)ClampToSigned(max - min, kMaxYuvBits + 1u); |
| } |
| } else { |
| const int16_t* const row = p_src - src_step * kMaxDist; |
| for (int32_t x = 0; x < w; ++x) { |
| if (x < kMaxDist || x - kMaxDist + 8 > w) { |
| const int32_t min_sub_x = std::max(-kMaxDist, -x); |
| const int32_t max_sub_x = std::min(kMaxDist, w - 1 - x); |
| GetBlockMinMaxGeneric(row + x + min_sub_x, src_step, |
| max_sub_x + 1 - min_sub_x, kKernelSize, |
| &min, &max); |
| } else { |
| GetBlockMinMax_5x5(row + x - kMaxDist, src_step, &min, &max); |
| } |
| p_dst[x] = (int16_t)ClampToSigned(max - min, kMaxYuvBits + 1u); |
| } |
| } |
| p_dst += dst_step; |
| p_src += src_step; |
| } |
| } |
| |
| constexpr float MultiScoreFunc::kMinScore; |
| constexpr int MultiScoreFunc::kMinEffortForGoodQuantDCT; |
| |
| WP2Status MultiScoreFunc::Init(const EncoderConfig& config, |
| const Rectangle& tile_rect, const YUVPlane& yuv, |
| const GlobalParams& gparams, |
| const ProgressRange& progress) { |
| DrctFilterInit(); |
| ScoreDspInit(); |
| |
| WP2_CHECK_STATUS(PartitionScoreFunc::Init(config, tile_rect, yuv, gparams, |
| ProgressRange(progress, 0.5))); |
| yuv_range_ = |
| (float)(gparams.transf_.GetYUVMax() - gparams.transf_.GetYUVMin()); |
| a_range_ratio_ = yuv_range_ / kAlphaMax; |
| |
| // Cache the 'min_' and 'max_' luma/chroma values per kMinBlockSize square. |
| WP2_CHECK_STATUS(min_.Resize(SizeBlocks(src_->Y.w_), SizeBlocks(src_->Y.h_))); |
| WP2_CHECK_STATUS(max_.Resize(min_.Y.w_, min_.Y.h_)); |
| for (Channel channel : {kYChannel, kUChannel, kVChannel}) { |
| const Plane16& src_plane = src_->GetChannel(channel); |
| assert(src_plane.w_ % kMinBlockSizePix == 0 && |
| src_plane.h_ % kMinBlockSizePix == 0); |
| GetMinMax(src_plane, min_.GetChannel(channel), max_.GetChannel(channel)); |
| } |
| |
| if (config_->effort >= kMinEffortForGoodQuantDCT) { |
| // Image processing. |
| WP2_CHECK_STATUS(spread_.Resize(src_->GetWidth(), src_->GetHeight(), |
| /*pad=*/1, src_->HasAlpha())); |
| for (Channel channel : {kYChannel, kUChannel, kVChannel, kAChannel}) { |
| if (channel == kAChannel && !src_->HasAlpha()) continue; |
| GetSpread(src_->GetChannel(channel), &spread_.GetChannel(channel)); |
| } |
| } |
| |
| // Per-block standard deviation. |
| WP2_CHECK_STATUS( |
| stddev_.Allocate(num_block_cols_, num_block_rows_, kMinBlockSizePix)); |
| stddev_.AddValues(*src_); |
| |
| if (src_->HasAlpha()) { |
| WP2_CHECK_STATUS( |
| a_stddev_.Allocate(num_block_cols_, num_block_rows_, kMinBlockSizePix)); |
| a_stddev_.AddValues(src_->A); |
| } |
| |
| // Per-block luma general direction. |
| // In might give better results to compute that for each NxN block instead of |
| // aggregating pre-computed 4x4 ones but it is probably too expensive. |
| WP2_CHECK_ALLOC_OK(direction_.resize(num_block_cols_ * num_block_rows_)); |
| WP2_CHECK_ALLOC_OK( |
| direction_certainty_.resize(num_block_cols_ * num_block_rows_)); |
| |
| const uint32_t bitdepth = gparams.transf_.GetYUVPrecisionBits() + 1; |
| for (uint32_t y = 0; y < num_block_rows_; ++y) { |
| const int16_t* x_ptr = &src_->Y.At(/*x=*/0, y * kMinBlockSizePix); |
| for (uint32_t i = y * num_block_cols_; i < (y + 1) * num_block_cols_; |
| ++i, x_ptr += kMinBlockSizePix) { |
| uint32_t variance; |
| CdefDirection4x4(x_ptr, src_->Y.Step(), bitdepth, &direction_[i], |
| &variance); |
| direction_certainty_[i] = Clamp(variance >> 4, 0u, kMaxCertainty); |
| // TODO(yguyon): Also compute, store, use 8x8 direction for bigger blocks |
| } |
| } |
| |
| WP2_CHECK_STATUS(DrawVDebug()); |
| WP2_CHECK_STATUS(progress.AdvanceBy(0.5)); |
| return WP2_STATUS_OK; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| WP2Status MultiScoreFunc::ComputeScore(const Block& block, |
| const ProgressRange& progress, |
| float* const score) { |
| float value = 0.f, threshold = 1.f; // Passing if 'value <= threshold'. |
| switch (pass_) { |
| case Pass::LumaAlphaGradient: |
| value = GetLumaAlphaGradient(block); |
| threshold = GetLumaAlphaGradientThreshold(block); |
| break; |
| case Pass::NarrowStdDev: |
| value = GetStdDevRange(block); |
| threshold = GetStdDevRangeThreshold(block); |
| break; |
| case Pass::GoodQuantDCT: |
| value = GetQuantDCT(block); |
| threshold = GetQuantDCTThreshold(block); |
| break; |
| case Pass::Direction: |
| value = GetDirection(block); |
| threshold = GetDirectionThreshold(block); |
| break; |
| case Pass::Any: |
| value = 0.f; |
| threshold = 1.f; |
| break; |
| default: |
| assert(false); |
| } |
| |
| // Convert to "higher score is better" in [0:1], 0.5 being the threshold. |
| if (value <= threshold) { |
| *score = 1.001f - 0.5f * value / threshold; // Will pass. |
| } else { |
| *score = 0.499f * threshold / value; // Will not pass. |
| } |
| RegisterScoreForVDebug(block, *score); |
| WP2_CHECK_STATUS(progress.AdvanceBy(1.)); |
| return WP2_STATUS_OK; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| void MultiScoreFunc::GetCoeffs(Channel channel, const Block& block, |
| int32_t coeffs[kMaxBlockSizePix2]) const { |
| const Plane16& plane = src_->GetChannel(channel); |
| // Copy luma pixels. |
| int32_t* dst_row = coeffs; |
| const int16_t* src_row = &plane.At(block.x_pix(), block.y_pix()); |
| for (uint32_t y = 0; y < block.h_pix(); ++y) { |
| for (uint32_t x = 0; x < block.w_pix(); ++x) { |
| dst_row[x] = src_row[x]; |
| } |
| dst_row += block.w_pix(); |
| src_row += plane.Step(); |
| } |
| } |
| |
| void MultiScoreFunc::QuantizeCoeffs(Channel channel, const Block& block, |
| int32_t coeffs[kMaxBlockSizePix2]) const { |
| GetCoeffs(channel, block, coeffs); |
| |
| // Transform them. |
| WP2Transform2D(coeffs, kDct, kDct, block.w_pix(), block.h_pix(), coeffs, |
| /*reduced=*/false); |
| |
| // Find the segment of the block. |
| const Rectangle padded_tile_rect = {tile_rect_.x, tile_rect_.y, |
| src_->GetWidth(), src_->GetHeight()}; |
| const uint8_t segment_id = |
| AssignSegmentId(*config_, *gparams_, padded_tile_rect, block); |
| const QuantMtx& quant_mtx = |
| gparams_->segments_[segment_id].GetQuant(channel); |
| |
| // Quantize and dequantize coefficients. |
| int16_t quantized_coeffs[kMaxBlockSizePix2]; |
| const TrfSize tdim = GetTransform(block.dim()); |
| uint32_t num_coeffs; |
| quant_mtx.Quantize(coeffs, tdim, /*first_is_dc=*/true, quantized_coeffs, |
| &num_coeffs); |
| quant_mtx.Dequantize(quantized_coeffs, num_coeffs, tdim, coeffs); |
| |
| // Inverse transform them back and compare the result. |
| WP2InvTransform2D(coeffs, kDct, kDct, block.w_pix(), block.h_pix(), coeffs, |
| /*reduced=*/false); |
| } |
| |
| void MultiScoreFunc::QuantizeCoeffs(Channel channel, const Block& block, |
| BlockSize sub_block_size, |
| int32_t coeffs[kMaxBlockSizePix2], |
| int32_t* const max_range) const { |
| if (max_range != nullptr) *max_range = 0; |
| Block sub_block(block.x(), block.y(), sub_block_size); |
| while (sub_block.y() + sub_block.h() <= block.y() + block.h()) { |
| int32_t sub_coeffs[kMaxBlockSizePix2]; |
| QuantizeCoeffs(channel, sub_block, sub_coeffs); |
| int32_t min = sub_coeffs[0], max = sub_coeffs[0]; |
| const uint32_t y = sub_block.y_pix() - block.y_pix(); |
| const uint32_t x = sub_block.x_pix() - block.x_pix(); |
| for (uint32_t sub_y = 0; sub_y < sub_block.h_pix(); ++sub_y) { |
| for (uint32_t sub_x = 0; sub_x < sub_block.w_pix(); ++sub_x) { |
| const int32_t sub_coeff = sub_coeffs[sub_y * sub_block.w_pix() + sub_x]; |
| min = std::min(min, sub_coeff); |
| max = std::max(max, sub_coeff); |
| coeffs[(y + sub_y) * block.w_pix() + (x + sub_x)] = sub_coeff; |
| } |
| } |
| if (max_range != nullptr) *max_range = std::max(*max_range, max - min); |
| sub_block.SetXY(sub_block.x() + sub_block.w(), sub_block.y()); |
| if (sub_block.x() + sub_block.w() > block.x() + block.w()) { |
| sub_block.SetXY(block.x(), sub_block.y() + sub_block.h()); |
| } |
| } |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| // Returns the maximum difference between 'src' and its predicted gradient. |
| // 'step' goes from a 'src' line to the next. |
| static int32_t GetGradientDiff(const int16_t* src, int32_t step, |
| int32_t w, int32_t h) { |
| assert(w >= 4 && h >= 4); |
| const int32_t max_x = w - 1, max_y = h - 1; |
| |
| // Average the corners (division by 3 is done at the very end). |
| const int32_t top_left = src[0] + src[1] + src[step]; |
| const int32_t bottom_left = |
| src[(max_y - 1) * step] + src[max_y * step] + src[max_y * step + 1]; |
| const int32_t top_right = src[max_x - 1] + src[max_x] + src[step + max_x]; |
| const int32_t bottom_right = src[(max_y - 1) * step + max_x] + |
| src[max_y * step + max_x - 1] + |
| src[max_y * step + max_x]; |
| |
| // Create a gradient by bidimensional interpolation and compare with 'src'. |
| int32_t max_diff = 0; |
| for (int32_t y = 0; y <= max_y; ++y) { |
| const int32_t left = top_left * (max_y - y) + bottom_left * y; |
| const int32_t right = top_right * (max_y - y) + bottom_right * y; |
| for (int32_t x = 0; x <= max_x; ++x) { |
| const int32_t gradient_pixel = |
| DivRound(left * (max_x - x) + right * x, 3 * max_x * max_y); |
| max_diff = std::max(max_diff, std::abs(src[x] - gradient_pixel)); |
| } |
| src += step; |
| } |
| return max_diff; |
| } |
| |
| float MultiScoreFunc::GetLumaAlphaGradient(const Block& block) const { |
| // Empirically chosen values. |
| const float kDiffScale[] = {1.f, 1.f, 1.f, 0.25f / kAlphaMax * yuv_range_}; |
| float max_diff = 0.f; |
| // kUChannel and kVChannel do not bring valuable partition decision-making |
| // here so skip them for speed. |
| for (Channel c : {kYChannel, kAChannel}) { |
| if (c == kAChannel && !src_->HasAlpha()) continue; |
| const int32_t diff = GetGradientDiff( |
| &src_->GetChannel(c).At(block.x_pix(), block.y_pix()), |
| src_->GetChannel(c).Step(), block.w_pix(), block.h_pix()); |
| max_diff = std::max(max_diff, diff * kDiffScale[c]); |
| } |
| return max_diff; |
| } |
| |
| float MultiScoreFunc::GetLumaAlphaGradientThreshold(const Block& block) const { |
| // The threshold is tighter for bigger blocks at higher qualities. |
| // Medium blocks are ignored except at high qualities. |
| constexpr BaseSlope kBaseSlope[] = { |
| {0.f, 0.f}, // 4x4, unused |
| {0.f, 0.f}, // 8x4, unused |
| {-0.6f, 0.7f}, // 8x8 16x4 |
| {-0.06f, 0.08f}, // 16x8 |
| {-0.03f, 0.045f}, // 16x16 32x8 |
| {0.04f, -0.035f}, // 32x16 |
| {0.05f, -0.045f}, // 32x32 |
| }; |
| STATIC_ASSERT_ARRAY_SIZE(kBaseSlope, WP2Log2Ceil_k(kMaxBlockSize2) + 1); |
| const uint32_t index = (uint32_t)WP2Log2Floor(block.rect().GetArea()); |
| assert((1u << index) == block.rect().GetArea()); |
| return yuv_range_ * |
| MapQuality(*config_, kBaseSlope[index].base, kBaseSlope[index].slope); |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| static float StdDevRange(const Block& block, const Integral& variance) { |
| // Consider the standard deviation of the whole block as sub-blocks could |
| // be coherent within themselves but not with other sub-blocks. |
| const uint8_t overall_variance = variance.StdDevUint8( |
| block.x(), block.y(), block.x() + block.w(), block.y() + block.h()); |
| uint8_t min = overall_variance, max = overall_variance; |
| |
| // Now check the sub-blocks. |
| for (uint32_t sub_y = block.y(); sub_y < block.y() + block.h(); ++sub_y) { |
| for (uint32_t sub_x = block.x(); sub_x < block.x() + block.w(); ++sub_x) { |
| const uint8_t variance_tmp = |
| variance.StdDevUint8(sub_x, sub_y, sub_x + 1, sub_y + 1); |
| if (variance_tmp < min) { |
| min = variance_tmp; |
| } else if (variance_tmp > max) { |
| max = variance_tmp; |
| } |
| } |
| } |
| return (max - min) / 255.f; |
| } |
| |
| float MultiScoreFunc::GetStdDevRange(const Block& block) const { |
| float range = StdDevRange(block, stddev_); |
| if (!a_stddev_.empty()) { |
| const float a_range = |
| StdDevRange(block, a_stddev_) * a_range_ratio_ * a_range_ratio_; |
| range = std::max(range, a_range); |
| } |
| |
| return range; |
| } |
| |
| float MultiScoreFunc::GetStdDevRangeThreshold(const Block& block) const { |
| // The higher the quality, the narrower the standard deviation range needs to |
| // be for a block to be accepted. Accept bigger blocks during partitioning at |
| // low qualities, and seek smaller blocks at high qualities. |
| return MapQuality(*config_, 0.50f, -0.38f); |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| // Returns the average difference between the original luma coefficients and the |
| // quantized ones, weighted per pixel by the 'spread_' in order to give more |
| // importance to flat areas (penalize distant ripples more than noise on edges). |
| float MultiScoreFunc::GetQuantDCT(const Block& block, Channel channel) const { |
| int32_t coeffs[kMaxBlockSizePix2]; |
| QuantizeCoeffs(channel, block, coeffs); |
| |
| float avg_diff = 0.f; |
| int32_t* dst_row = coeffs; |
| const int16_t* src_row = |
| &src_->GetChannel(channel).At(block.x_pix(), block.y_pix()); |
| const int16_t* spread_row = |
| &spread_.GetChannel(channel).At(block.x_pix(), block.y_pix()); |
| for (uint32_t y = 0; y < block.h_pix(); ++y) { |
| for (uint32_t x = 0; x < block.w_pix(); ++x) { |
| const int32_t diff = std::abs(dst_row[x] - src_row[x]); |
| avg_diff += diff / Clamp(spread_row[x] / 20.f, 0.1f, 10.f); |
| } |
| dst_row += block.w_pix(); |
| src_row += src_->GetChannel(channel).Step(); |
| spread_row += spread_.GetChannel(channel).Step(); |
| } |
| avg_diff /= (block.w_pix() * block.h_pix()); |
| return avg_diff; |
| } |
| |
| float MultiScoreFunc::GetQuantDCT(const Block& block) const { |
| // Take chroma and alpha into account just enough to discard bad layouts (for |
| // example an entirely black image with alpha patterns). |
| // TODO(yguyon): Check ADST too |
| constexpr float kScale[] = {1.f, 0.1f, 0.1f, 0.1f}; |
| return std::max({GetQuantDCT(block, kYChannel) * kScale[kYChannel], |
| GetQuantDCT(block, kUChannel) * kScale[kUChannel], |
| GetQuantDCT(block, kVChannel) * kScale[kVChannel], |
| src_->HasAlpha() |
| ? GetQuantDCT(block, kAChannel) * kScale[kAChannel] |
| : 0.f}); |
| } |
| |
| float MultiScoreFunc::GetQuantDCTThreshold(const Block& block) const { |
| // This metric is only useful at low qualities, for large blocks that would |
| // still look fine with an aggressive quantization. |
| return MapQuality(*config_, 4.00f, -4.75f); |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| // Returns a low score for a 'block' that appears to have an obvious and uniform |
| // orientation. |
| float MultiScoreFunc::GetDirection(const Block& block) const { |
| const uint32_t stride = num_block_cols_; |
| const uint32_t* direction = |
| direction_.data() + block.y() * stride + block.x(); |
| const uint32_t* certainty = |
| direction_certainty_.data() + block.y() * stride + block.x(); |
| |
| uint32_t weight[kDrctFltNumDirs] = {0}; |
| for (uint32_t sub_y = 0; sub_y < block.h(); ++sub_y) { |
| for (uint32_t sub_x = 0; sub_x < block.w(); ++sub_x) { |
| assert(direction[sub_x] < kDrctFltNumDirs); |
| weight[direction[sub_x]] += std::min(certainty[sub_x], kMaxCertainty); |
| } |
| direction += stride; |
| certainty += stride; |
| } |
| const uint32_t heaviest = |
| std::max_element(weight, weight + kDrctFltNumDirs) - weight; |
| const uint32_t max_weight = block.w() * block.h() * kMaxCertainty; |
| assert(weight[heaviest] <= max_weight); |
| const uint32_t previous_direction = |
| (heaviest - 1 + kDrctFltNumDirs) % kDrctFltNumDirs; |
| const uint32_t next_direction = (heaviest + 1) % kDrctFltNumDirs; |
| const uint32_t weight_with_close_directions = |
| weight[heaviest] + |
| (weight[previous_direction] + weight[next_direction]) / 4; |
| const float direction_score = |
| 1.f - Clamp(weight_with_close_directions / (float)max_weight, 0.f, 1.f); |
| |
| return direction_score; |
| } |
| |
| float MultiScoreFunc::GetDirectionThreshold(const Block& block) const { |
| return MapQuality(*config_, 0.15f, -0.0475f); |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| WP2Status TileScoreFunc::Init(const EncoderConfig& config, |
| const Rectangle& tile_rect, const YUVPlane& yuv, |
| const GlobalParams& gparams, |
| const ProgressRange& progress) { |
| WP2EncDspInit(); |
| const ProgressRange init_progress(progress, 0.2); |
| const ProgressRange forced_partition_progress(progress, 0.8); |
| WP2_CHECK_STATUS( |
| PartitionScoreFunc::Init(config, tile_rect, yuv, gparams, init_progress)); |
| local_gparams_.features_ = &local_features_map_; |
| WP2_CHECK_STATUS(GlobalAnalysis(ArgbBuffer(), yuv, gparams.transf_, |
| config, &local_gparams_)); |
| WP2_CHECK_STATUS(InitForEncode()); |
| |
| // Initialize the best score with the partition containing only the forced |
| // blocks. |
| const Rectangle padded_tile_rect = {tile_rect_.x, tile_rect_.y, |
| yuv.GetWidth(), yuv.GetHeight()}; |
| WP2_CHECK_STATUS(AddForcedBlocks(config, padded_tile_rect, &blocks_)); |
| WP2_CHECK_STATUS(TryEncode(blocks_, forced_partition_progress, &best_score_)); |
| RegisterScoreForVDebug("starting", {}, best_score_); |
| cached_best_score_ = 0.f; |
| blocks_.clear(); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status TileScoreFunc::ComputeScore(const Block& block, |
| const ProgressRange& progress, |
| float* const score) { |
| WP2_CHECK_ALLOC_OK(blocks_.push_back(block)); |
| WP2_CHECK_STATUS(TryEncode(blocks_, progress, score)); |
| if (*score > cached_best_score_) cached_best_score_ = *score; |
| if (*score > best_score_) RegisterScoreForVDebug("new best", block, *score); |
| blocks_.pop_back(); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status TileScoreFunc::InitForEncode() { |
| WP2_CHECK_ALLOC_OK(blocks_.reserve((tile_rect_.width / kMaxBlockSizePix) * |
| (tile_rect_.height / kMaxBlockSizePix))); |
| |
| enc_tiles_layout_.num_tiles_x = enc_tiles_layout_.num_tiles_y = 1; |
| enc_tiles_layout_.tile_width = tile_rect_.width; |
| enc_tiles_layout_.tile_height = tile_rect_.height; |
| WP2_CHECK_ALLOC_OK(enc_tiles_layout_.tiles.resize(1)); |
| enc_tiles_layout_.first_unassigned_tile_index = 0; |
| |
| enc_tiles_layout_.tiles.front().rect = {0, 0, tile_rect_.width, |
| tile_rect_.height}; |
| enc_tiles_layout_.tiles.front().rgb_input.Deallocate(); // This is lossy. |
| assert(!src_->IsEmpty()); |
| WP2_CHECK_STATUS(enc_tiles_layout_.tiles.front().yuv_input.SetView(*src_)); |
| tmp_config_ = *config_; |
| tmp_config_.partition_method = sub_partition_method_; |
| tmp_config_.info = nullptr; |
| tile_encoder_.config_ = &tmp_config_; |
| tile_encoder_.use_lossless_ = (tmp_config_.quality > kMaxLossyQuality); |
| tile_encoder_.tiles_layout_ = &enc_tiles_layout_; |
| WP2_CHECK_STATUS(tile_encoder_.AssignNextTile()); |
| |
| // Recursion is too dangerous here. It's potentially creating |
| // (kMaxTileSize/kMinBlockSizePix)^2 = a lot of recursive encoding contexts. |
| assert(sub_partition_method_ != AUTO_PARTITIONING && |
| sub_partition_method_ != TILE_ENCODE_PARTITIONING); |
| |
| dec_config_.thread_level = 0; |
| |
| WP2_CHECK_STATUS(decompressed_yuv_.Copy(*src_, /*resize_if_needed=*/true)); |
| // Needed for API compliance. The pixels will not be accessed. |
| WP2_CHECK_STATUS( |
| decompressed_argb_.Resize(tile_rect_.width, tile_rect_.height)); |
| |
| // A BitstreamFeatures instance is needed by LossyDecode(). Make up one. |
| MemoryWriter writer; |
| WP2_CHECK_STATUS( |
| EncodeHeader(tmp_config_, tile_rect_.width, tile_rect_.height, |
| src_->HasAlpha(), /*is_anim=*/false, /*loop_forever=*/true, |
| kDefaultBackgroundColor, /*preview_color=*/{}, |
| /*has_icc=*/false, /*has_trailing_data=*/false, &writer)); |
| WP2_CHECK_STATUS(features_.Read(writer.mem_, writer.size_)); |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status TileScoreFunc::TryEncode(const VectorNoCtor<Block>& blocks, |
| const ProgressRange& progress, |
| float* const score) { |
| ANSEnc& enc = enc_tiles_layout_.tiles.front().enc; |
| enc.WipeOut(); |
| enc_tiles_layout_.gparams = &local_gparams_; |
| tile_encoder_.tile_->progress = progress; |
| // Encode the whole tile with the forced 'blocks'. |
| WP2_CHECK_STATUS(tile_encoder_.LossyEncode(blocks, &enc)); |
| WP2_CHECK_STATUS(enc.Assemble()); |
| |
| // Reset the unique tile to a fresh state. |
| const uint32_t width = decompressed_argb_.width(); |
| const uint32_t height = decompressed_argb_.height(); |
| const uint32_t tile_width = TileWidth(FinalTileShape(*config_), width); |
| const uint32_t tile_height = |
| TileHeight(FinalTileShape(*config_), /*image_width=*/width); |
| WP2_CHECK_STATUS(GetTilesLayout(width, height, tile_width, tile_height, |
| ProgressRange(), &decompressed_argb_, |
| &decompressed_yuv_, &tiles_layout_)); |
| assert(tiles_layout_.tiles.size() == 1 && |
| enc_tiles_layout_.tiles.size() == 1); |
| |
| // Plug ANSEnc output to ANSDec input. |
| Tile* const tile = &tiles_layout_.tiles.front(); |
| tile->chunk_size_is_known = true; |
| tile->chunk_size = enc.BufferSize(); |
| tiles_layout_.gparams = &local_gparams_; |
| tile->private_input = ExternalDataSource(enc.Buffer(), enc.BufferSize()); |
| tile->input = &tile->private_input; |
| |
| // Decode to 'decompressed_argb_'. |
| ANSDec dec(tile->input); |
| WP2_CHECK_STATUS( |
| LossyDecode(features_, dec_config_, &tiles_layout_, &dec, tile)); |
| |
| // Compare the pixels of the non-padded area only. |
| YUVPlane original_view, decompressed_view; |
| WP2_CHECK_STATUS(original_view.SetView(*src_, {0, 0, width, height})); |
| WP2_CHECK_STATUS( |
| decompressed_view.SetView(decompressed_yuv_, {0, 0, width, height})); |
| WP2_CHECK_STATUS(decompressed_view.GetDistortion( |
| original_view, kMaxYuvBits + 1, PSNR, distortion_)); |
| |
| // Compute a score based on distortion and the number of bits per pixel. |
| const float ssim = distortion_[4]; |
| const float bpp = std::max(1u, enc.BufferSize()) * 8.f / (width * height); |
| const float lambda = MapQuality(*config_, 7.00f, -2.85f); |
| *score = ssim - lambda * bpp; |
| return WP2_STATUS_OK; |
| } |
| |
| WP2Status TileScoreFunc::Use(const Block& block) { |
| WP2_CHECK_ALLOC_OK(blocks_.push_back(block)); |
| if (cached_best_score_ > best_score_) best_score_ = cached_best_score_; |
| cached_best_score_ = 0.f; |
| return WP2_STATUS_OK; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| WP2Status FixedSizeScoreFunc::ComputeScore(const Block& block, |
| const ProgressRange& progress, |
| float* const score) { |
| *score = (block.dim() == size_) ? 1.f : 0.f; |
| WP2_CHECK_STATUS(progress.AdvanceBy(1.)); |
| return WP2_STATUS_OK; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| } // namespace WP2 |