blob: a147e24a2ef7f67b4102f20a6fd9fbca176f9dd8 [file] [log] [blame]
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// -----------------------------------------------------------------------------
//
// Author: Jyrki Alakuijala (jyrki@google.com)
//
#include "src/enc/lossless/histogram_enc.h"
#include <algorithm>
#include <functional>
#include <limits>
#include <numeric>
#ifdef HAVE_CONFIG_H
#include "src/webp/config.h"
#endif
#include "src/common/color_precision.h"
#include "src/common/progress_watcher.h"
#include "src/dsp/lossless/encl_dsp.h"
#include "src/enc/lossless/losslessi_enc.h"
#include "src/enc/symbols_enc.h"
#include "src/utils/random.h"
#include "src/utils/vector.h"
namespace WP2L {
// Maximum number of histograms allowed in greedy combining algorithm.
#define MAX_HISTO_GREEDY 100
//------------------------------------------------------------------------------
static inline void CopySubHistogram(const Histogram& a, const uint32_t s,
Histogram* const out) {
const uint32_t size =
sizeof(out->counts_[s][0]) * a.symbols_info_->GetMaxRange(s);
memcpy(out->counts_[s], a.counts_[s], size);
out->trivial_symbols_[s] = a.trivial_symbols_[s];
out->nonzeros_[s] = a.nonzeros_[s];
}
// Add two histograms, taking into account the number of non-zero elements for
// speed-up.
static void HistogramAdd(const Histogram& a, const Histogram& b,
Histogram* const out) {
assert(a.symbols_info_ == b.symbols_info_);
if (&b != out) {
for (uint32_t s = 0; s < kSymbolNum; ++s) {
if (a.nonzeros_[s] == 0) {
CopySubHistogram(b, s, out);
} else if (b.nonzeros_[s] == 0) {
CopySubHistogram(a, s, out);
} else if (a.nonzeros_[s] == 1 && b.nonzeros_[s] == 1 &&
a.trivial_symbols_[s] == b.trivial_symbols_[s]) {
const auto& trivial_symbol = a.trivial_symbols_[s];
const uint32_t size =
sizeof(out->counts_[s][0]) * a.symbols_info_->GetMaxRange(s);
memset(out->counts_[s], 0, size);
out->counts_[s][trivial_symbol] =
a.counts_[s][trivial_symbol] + b.counts_[s][trivial_symbol];
out->trivial_symbols_[s] = trivial_symbol;
out->nonzeros_[s] = 1;
} else {
out->trivial_symbols_[s] = 0;
BufferAdd(a.counts_[s], b.counts_[s], a.symbols_info_->GetMaxRange(s),
out->counts_[s], &out->nonzeros_[s]);
assert(out->nonzeros_[s] > 1);
}
}
} else {
for (uint32_t s = 0; s < kSymbolNum; ++s) {
if (a.nonzeros_[s] == 0) {
// out is b and no modification is added.
} else if (b.nonzeros_[s] == 0) {
CopySubHistogram(a, s, out);
} else if (a.nonzeros_[s] == 1 && b.nonzeros_[s] == 1 &&
a.trivial_symbols_[s] == b.trivial_symbols_[s]) {
// a and b are basically the same.
out->counts_[s][a.trivial_symbols_[s]] +=
a.counts_[s][a.trivial_symbols_[s]];
} else {
out->trivial_symbols_[s] = 0;
BufferAdd(a.counts_[s], b.counts_[s], a.symbols_info_->GetMaxRange(s),
out->counts_[s], &out->nonzeros_[s]);
assert(out->nonzeros_[s] > 1);
}
}
}
}
bool HistogramSet::Allocate(uint32_t size,
const WP2::SymbolsInfo& symbols_info) {
symbols_info_ = &symbols_info;
const uint32_t cache_num = symbols_info.MaxRangeSum();
if (!histograms_.resize(size) || !histo_buffer_.resize(size + 1) ||
!buffer_.resize((size + 1) * cache_num)) {
return false;
}
std::fill(buffer_.begin(), buffer_.end(), 0);
uint32_t* memory = buffer_.data();
for (uint32_t i = 0; i < size; ++i) {
histograms_[i] = &histo_buffer_[i];
histo_buffer_[i].Init();
histo_buffer_[i].SetBuffer(memory, symbols_info);
histo_buffer_[i].id_ = i;
memory += cache_num;
}
tmp_histo_ = &histo_buffer_[size];
tmp_histo_->Init();
tmp_histo_->SetBuffer(memory, symbols_info);
return true;
}
// -----------------------------------------------------------------------------
// Entropy-related functions.
PopulationAnalyzer::PopulationAnalyzer(uint32_t image_size)
: image_size_(image_size) {
EncLDspInit();
}
void PopulationAnalyzer::PopulationCost(const uint32_t* const population,
uint32_t range,
uint32_t* const nonzeros,
uint32_t* const trivial_sym,
float* cost) {
uint32_t size = 0;
for (uint32_t k = 0; k < range; ++k) {
if (population[k] == 0) continue;
histogram_[size] = population[k];
mapping_[size] = k;
++size;
}
if (nonzeros != nullptr) *nonzeros = size;
// TODO(vrabaud) replace the cost logic below with a proper call to
// SymbolWriter::WriteHeader.
if (size == 0) {
*cost = WP2Log2(3);
return;
}
if (size == 1) {
if (trivial_sym != nullptr) *trivial_sym = mapping_[0];
// TODO(vrabaud) do in the Quantizer.
*cost = WP2Log2(3) + WP2Log2(range);
return;
}
// TODO(vrabaud) use the 'effort' parameter instead of imposing 0.
quantizer_.ResetBest();
quantizer_.Quantize(histogram_.data(), mapping_.data(), size, range,
image_size_,
/*cost_max=*/std::numeric_limits<float>::max(),
/*cost_offset=*/0.f, /*effort=*/0);
*cost = WP2Log2(3) + WP2Log2(std::min(image_size_, range) - 1) +
quantizer_.GetBest()->cost;
}
void PopulationAnalyzer::GetCombinedEntropy(
const uint32_t* const X, const uint32_t* const Y, uint32_t X_nonzeros,
uint32_t Y_nonzeros, uint32_t X_trivial, uint32_t Y_trivial,
uint32_t length, float* cost) {
// If one of the histograms is basically unused:
if (X_nonzeros == 0) {
PopulationCost(Y, length, nullptr, nullptr, cost);
return;
}
if (Y_nonzeros == 0) {
PopulationCost(X, length, nullptr, nullptr, cost);
return;
}
// If they both contain the same element.
if (X_nonzeros == 1 && Y_nonzeros == 1 && X_trivial == Y_trivial) {
*cost = WP2Log2(3) + WP2Log2(length);
return;
}
BufferAdd(X, Y, length, sum_tmp_.data(), nullptr);
PopulationCost(sum_tmp_.data(), length, nullptr, nullptr, cost);
}
// -----------------------------------------------------------------------------
// Various histogram combine/cost-eval functions
int PopulationAnalyzer::GetCombinedHistogramEntropy(const Histogram& a,
const Histogram& b,
float cost_threshold,
float* cost) {
assert(a.symbols_info_ == b.symbols_info_);
// Deal with the raw bits of length and distance.
for (const auto& sym : {kSymbolLen, kSymbolDist}) {
*cost += ExtraCostCombined(a.counts_[sym], b.counts_[sym],
a.symbols_info_->GetMaxRange(sym));
if (*cost > cost_threshold) return 0;
}
bool is_used[kSymbolTypeNum];
for (uint32_t i = 0; i < kSymbolTypeNum; ++i) {
is_used[i] = (a.counts_[kSymbolType][i] + b.counts_[kSymbolType][i] > 0);
}
// Deal with all the symbols.
for (uint32_t s = 0; s < kSymbolNum; ++s) {
if (a.symbols_info_->IsUnused(s) || b.symbols_info_->IsUnused(s)) {
continue;
}
if (!WP2L::IsSymbolUsed((Symbol)s, is_used)) continue;
float tmp;
GetCombinedEntropy(a.counts_[s], b.counts_[s], a.nonzeros_[s],
b.nonzeros_[s], a.trivial_symbols_[s],
b.trivial_symbols_[s], a.symbols_info_->GetMaxRange(s),
&tmp);
*cost += tmp;
if (*cost > cost_threshold) return 0;
}
return 1;
}
// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing
// to the threshold value 'cost_threshold'. The score returned is
// Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.
// Since the previous score passed is 'cost_threshold', we only need to compare
// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
// early.
float PopulationAnalyzer::HistogramAddEval(const Histogram& a,
const Histogram& b,
Histogram* const out,
float cost_threshold) {
float cost = 0.f;
const float sum_cost = a.bit_cost_ + b.bit_cost_;
cost_threshold += sum_cost;
if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
HistogramAdd(a, b, out);
out->bit_cost_ = cost;
assert(a.symbols_info_ == b.symbols_info_);
}
return cost - sum_cost;
}
// Same as HistogramAddEval(), except that the resulting histogram
// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
// the term C(b) which is constant over all the evaluations.
float PopulationAnalyzer::HistogramAddThresh(const Histogram& a,
const Histogram& b,
float cost_threshold) {
float cost = -a.bit_cost_;
GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
return cost;
}
// -----------------------------------------------------------------------------
void PopulationAnalyzer::UpdateHistogramCost(Histogram* const h) {
h->bit_cost_ = 0.;
bool is_used[kSymbolTypeNum];
bool has_at_least_one_symbol = false;
for (uint32_t i = 0; i < kSymbolTypeNum; ++i) {
is_used[i] = (h->counts_[kSymbolType][i] > 0);
has_at_least_one_symbol |= is_used[i];
}
if (!has_at_least_one_symbol) {
// If the histogram is completely empty, it contains no information so no
// cost.
std::fill(h->costs_, h->costs_ + kSymbolNum, 0.);
return;
}
for (uint32_t s = 0; s < kSymbolNum; ++s) {
const Symbol sym = (Symbol)s;
if (h->symbols_info_->IsUnused(s) || !IsSymbolUsed(sym, is_used)) {
h->costs_[s] = 0.;
} else {
PopulationCost(h->counts_[s], h->symbols_info_->GetMaxRange(sym),
&h->nonzeros_[s], &h->trivial_symbols_[s],
&(h->costs_[s]));
}
}
for (const auto& sym : {kSymbolLen, kSymbolDist}) {
h->costs_[sym] +=
ExtraCost(h->counts_[sym], h->symbols_info_->GetMaxRange(sym));
}
// Update the total bit_cost.
for (const auto& c : h->costs_) h->bit_cost_ += c;
}
WP2Status BuildHistograms(uint32_t width, uint32_t height, uint32_t histo_bits,
const LosslessSymbolsInfo& symbols_info,
const BackwardRefs& refs,
HistogramSet* const histogram_set) {
// Count the symbols using a SymbolRecorder.
WP2::SymbolRecorder recorder;
WP2::ANSEncNoop enc_noop;
LosslessSymbolsInfo symbols_info_counter;
WP2_CHECK_STATUS(symbols_info_counter.CopyFrom(symbols_info));
const uint32_t num_histos = histogram_set->histograms_.size();
symbols_info_counter.SetNumClusters(num_histos);
WP2_CHECK_STATUS(recorder.Allocate(symbols_info_counter, /*num_records=*/0));
WP2_CHECK_STATUS(StorePixels(width, height, histo_bits, refs,
WP2::ProgressRange(nullptr, 0.),
/*segments=*/nullptr, &enc_noop, &recorder));
// Copy the dictionaries to the HistogramSet.
Histogram** const histograms = histogram_set->histograms_.data();
for (uint32_t i = 0; i < num_histos; ++i) {
for (uint32_t s = 0; s < kSymbolNum; ++s) {
if (symbols_info_counter.Method(s) ==
WP2::SymbolsInfo::StorageMethod::kUnused ||
symbols_info_counter.Range(s, i) == 0) {
continue;
}
const WP2::Vector_u32& counts = recorder.GetRecordedDict(s, i).Counts();
std::copy(counts.begin(), counts.end(), histograms[i]->counts_[s]);
}
}
return WP2_STATUS_OK;
}
// Copies the histograms and computes its bit_cost.
static WP2Status HistogramSetCopy(const HistogramSet& in,
const WP2::SymbolsInfo& symbols_info,
HistogramSet* const out) {
uint32_t size = 0;
for (Histogram* const histo : in.histograms_) {
// Skip this histogram if it is not even used once.
if (histo->bit_cost_ != 0) ++size;
}
WP2_CHECK_ALLOC_OK(out->Allocate(size, symbols_info));
size = 0;
for (Histogram* const histo : in.histograms_) {
// Skip this histogram if it is not even used once.
if (histo->bit_cost_ == 0) continue;
// Copy histograms from 'in' to 'out'.
histo->CopyTo(out->histograms_[size++]);
}
return WP2_STATUS_OK;
}
// Class that merges histograms together if they have common entropies for some
// of their symbols.
class BinMerger {
public:
BinMerger(uint32_t histo_size, uint32_t effort) : effort_(effort) {
combine_cost_factor_ = 0.16;
if (effort < 8) {
if (histo_size > 256) combine_cost_factor_ /= 2.;
if (histo_size > 512) combine_cost_factor_ /= 2.;
if (histo_size > 1024) combine_cost_factor_ /= 2.;
if (effort < 5) combine_cost_factor_ /= 2.;
}
}
// Merges empty histograms together.
static WP2Status RemoveEmptyHistograms(HistogramSet* const image_histo,
SegmentImgWrapper* const segments) {
uint32_t new_size = 0;
auto& histograms = image_histo->histograms_;
for (uint32_t i = 0; i < histograms.size(); ++i) {
if (histograms[i]->bit_cost_ == 0.) {
(*segments)[histograms[i]->id_] = -1;
} else {
std::swap(histograms[i], histograms[new_size]);
++new_size;
}
}
WP2_CHECK_ALLOC_OK(histograms.resize(new_size));
return WP2_STATUS_OK;
}
// Partition histograms to different entropy bins for three dominant (literal,
// red and blue) symbol costs and compute the histogram aggregate bit_cost.
bool AnalyzeEntropyBin(const HistogramSet& image_histo,
uint16_t* const bin_map) {
// Don't attempt linear bin-partition heuristic for
// histograms of small sizes (as bin_map will be very sparse) and
// maximum effort 9 (to preserve the compression gains at that level).
const uint32_t entropy_combine_num_bins =
effort_ == 0 ? kNumPartitions : kBinSizeTot;
const bool entropy_combine =
(image_histo.histograms_.size() > entropy_combine_num_bins * 2) &&
(effort_ < 9);
if (!entropy_combine) return false;
std::fill(max_costs_, max_costs_ + kSymbolNum, 0);
std::fill(min_costs_, min_costs_ + kSymbolNum,
std::numeric_limits<float>::max());
// Analyze the entropy cost bounds.
for (const Histogram* h : image_histo.histograms_) {
for (uint32_t s = 0; s < h->symbols_info_->Size(); ++s) {
if (h->costs_[s] < min_costs_[s]) {
min_costs_[s] = h->costs_[s];
} else if (h->costs_[s] > max_costs_[s]) {
max_costs_[s] = h->costs_[s];
}
}
}
// Find the widest cost disparities. Variance and coefficient of variation
// are actually worse. PCA should be tried.
for (uint32_t i = 0; i < image_histo.symbols_info_->Size(); ++i) {
disparities_[i].first = max_costs_[i] - min_costs_[i];
disparities_[i].second = i;
}
std::sort(disparities_, disparities_ + kSymbolNum,
std::greater<std::pair<float, uint32_t>>());
// bin-hash histograms on three of the dominant (literal, red and blue)
// symbol costs and store the resulting bin_id for each histogram.
uint32_t i = 0;
for (const Histogram* h : image_histo.histograms_) {
bin_map[i++] = GetHistoBinIndex(*h);
}
return true;
}
// Compact image_histo[] by merging some histograms with same bin_id together
// if it's advantageous.
WP2Status CombineEntropyBin(const WP2::Vector_u16& bin_map,
PopulationAnalyzer* const analyzer,
HistogramSet* const image_histo,
SegmentImgWrapper* const segments) const {
Histogram** const histograms = image_histo->histograms_.data();
// Work in-place: processed histograms are put at the beginning of
// image_histo[]. At the end, we just have to truncate the array.
struct {
int16_t first; // position of the histogram that accumulates all
// histograms with the same bin_id
uint16_t num_combine_failures; // number of combine failures per bin_id
} bin_info[kBinSizeTot];
const uint32_t num_bins = (effort_ == 0) ? kNumPartitions : kBinSizeTot;
assert(num_bins <= kBinSizeTot);
for (uint32_t idx = 0; idx < num_bins; ++idx) {
bin_info[idx].first = -1;
bin_info[idx].num_combine_failures = 0;
}
uint32_t size = 0;
for (uint32_t idx = 0; idx < image_histo->histograms_.size(); ++idx) {
const int bin_id = bin_map[idx];
const int first = bin_info[bin_id].first;
assert(size <= idx);
if (first == -1) {
// just move histogram #idx to its final position
histograms[size] = histograms[idx];
bin_info[bin_id].first = size++;
} else if (effort_ == 0) {
HistogramAdd(*histograms[idx], *histograms[first], histograms[first]);
(*segments)[histograms[first]->id_] =
(*segments)[histograms[idx]->id_] =
std::min(histograms[first]->id_, histograms[idx]->id_);
} else {
// try to merge #idx into #first (both share the same bin_id)
const float bit_cost = histograms[idx]->bit_cost_;
const float bit_cost_thresh = -bit_cost * combine_cost_factor_;
const float curr_cost_diff = analyzer->HistogramAddEval(
*histograms[first], *histograms[idx], image_histo->tmp_histo_,
bit_cost_thresh);
if (curr_cost_diff < bit_cost_thresh) {
(*segments)[histograms[first]->id_] =
(*segments)[histograms[idx]->id_] = image_histo->tmp_histo_->id_ =
std::min(histograms[first]->id_, histograms[idx]->id_);
std::swap(image_histo->tmp_histo_, histograms[first]);
} else {
histograms[size++] = histograms[idx];
}
}
}
WP2_CHECK_ALLOC_OK(image_histo->histograms_.resize(size));
if (effort_ == 0) {
// for low_effort case, update the final cost when everything is merged
for (uint32_t idx = 0; idx < size; ++idx) {
analyzer->UpdateHistogramCost(histograms[idx]);
}
}
return WP2_STATUS_OK;
}
private:
uint32_t GetHistoBinIndex(const Histogram& h) const {
uint32_t bin_id = 0;
// Carve the space depending on the three widest disparities.
for (uint32_t i = 0; i < (effort_ > 0 ? 3u : 1u); ++i) {
const uint32_t s = disparities_[i].second;
const float range = disparities_[i].first;
if (range == 0.) break;
// Fix potential rounding issues.
const uint32_t new_bin_id = std::min(
(uint32_t)(kNumPartitions * (h.costs_[s] - min_costs_[s]) / range),
kNumPartitions - 1);
bin_id = bin_id * kNumPartitions + new_bin_id;
assert(bin_id < kBinSizeTot);
}
return bin_id;
}
// Number of partitions for the dominant symbols.
static constexpr uint32_t kNumPartitions = 4;
// The size of the bin-hash corresponding to the three dominant costs.
static constexpr uint32_t kBinSizeTot =
(kNumPartitions * kNumPartitions * kNumPartitions);
float min_costs_[kSymbolNum];
float max_costs_[kSymbolNum];
std::pair<float, uint32_t> disparities_[kSymbolNum];
float combine_cost_factor_;
const uint32_t effort_;
};
// -----------------------------------------------------------------------------
// Histogram pairs priority queue
WP2Status HistoQueue::Init(WP2::VectorNoCtor<Histogram*>* const histograms,
PopulationAnalyzer* const analyzer) {
histograms_ = histograms;
analyzer_ = analyzer;
return WP2_STATUS_OK;
}
WP2Status HistoQueue::Resize(uint32_t size) {
WP2_CHECK_ALLOC_OK(queue_.reserve(size));
WP2_CHECK_ALLOC_OK(indices_.resize(histograms_->size()));
std::iota(indices_.begin(), indices_.end(), 0u);
return WP2_STATUS_OK;
}
void HistoQueue::MergeFront(SegmentImgWrapper* const segments) {
const uint16_t idx1 = queue_.front().idx1;
const uint16_t idx2 = queue_.front().idx2;
assert(idx1 < idx2);
// Merge idx2 into idx1.
Histogram* const histo1 = (*histograms_)[idx1];
const Histogram& histo2 = *(*histograms_)[idx2];
HistogramAdd(histo2, *histo1, histo1);
histo1->bit_cost_ = queue_.front().cost_combo;
const int16_t min_id = std::min(histo1->id_, histo2.id_);
(*segments)[histo1->id_] = (*segments)[histo2.id_] = min_id;
histo1->id_ = min_id;
for (uint32_t i = 0; i < queue_.size();) {
HistogramPair* const p = &queue_[i];
if (p->idx1 == idx1 || p->idx2 == idx1 || p->idx1 == idx2 ||
p->idx2 == idx2) {
// Pop the pair from the list.
*p = queue_.back();
queue_.pop_back();
continue;
}
MaybeUpdateHead(p);
++i;
}
// Remove idx2 from valid indices.
const auto iter = std::lower_bound(indices_.begin(), indices_.end(), idx2);
assert(iter != indices_.end() && *iter == idx2);
std::copy(iter + 1, indices_.end(), iter);
indices_.pop_back();
}
WP2Status HistoQueue::Push(uint16_t idx1, uint16_t idx2, float threshold,
float* const cost_diff) {
if (idx1 > idx2) std::swap(idx1, idx2);
HistogramPair pair;
pair.idx1 = idx1;
pair.idx2 = idx2;
const Histogram* const h1 = (*histograms_)[idx1];
const Histogram* const h2 = (*histograms_)[idx2];
const float sum_cost = h1->bit_cost_ + h2->bit_cost_;
pair.cost_combo = 0.;
analyzer_->GetCombinedHistogramEntropy(*h1, *h2, sum_cost + threshold,
&pair.cost_combo);
pair.cost_diff = pair.cost_combo - sum_cost;
// Do not even consider the pair if it does not improve the entropy.
if (cost_diff != nullptr) *cost_diff = 0;
if (pair.cost_diff >= threshold) return WP2_STATUS_OK;
// We cannot add more elements than the capacity.
assert(queue_.size() < queue_.capacity());
WP2_CHECK_ALLOC_OK(queue_.push_back(pair, /*resize_if_needed=*/false));
MaybeUpdateHead(&queue_.back());
if (cost_diff != nullptr) *cost_diff = pair.cost_diff;
return WP2_STATUS_OK;
}
WP2Status HistoQueue::RemoveInvalidHistograms() {
uint32_t size = 0;
for (uint32_t i : indices_) {
std::swap((*histograms_)[i], (*histograms_)[size]);
++size;
}
WP2_CHECK_ALLOC_OK(histograms_->resize(size));
queue_.clear();
return WP2_STATUS_OK;
}
void HistoQueue::MaybeUpdateHead(HistogramPair* const pair) {
assert(pair >= queue_.data() && pair <= &queue_.back());
assert(!queue_.empty());
if (pair->cost_diff < queue_.front().cost_diff) {
// Replace the best pair.
const HistogramPair tmp = queue_.front();
queue_.front() = *pair;
*pair = tmp;
}
}
// -----------------------------------------------------------------------------
// Combines histograms by continuously choosing the one with the highest cost
// reduction.
static WP2Status HistogramCombineGreedy(HistogramSet* const image_histo,
SegmentImgWrapper* const segments,
PopulationAnalyzer* const analyzer,
const WP2::ProgressRange& progress) {
const uint32_t image_histo_size = image_histo->histograms_.size();
// The number of loops is not trivial to compute so the ProgressHook is called
// for interruptability but there is no advancement.
WP2::ProgressScale loop_progress(progress, 0.);
// Priority queue of histogram pairs.
HistoQueue queue;
WP2_CHECK_STATUS(queue.Init(&image_histo->histograms_, analyzer));
WP2_CHECK_STATUS(queue.Resize((image_histo_size - 1) * image_histo_size / 2));
for (uint32_t i = 0; i < image_histo_size; ++i) {
for (uint32_t j = i + 1; j < image_histo_size; ++j) {
WP2_CHECK_STATUS(queue.Push(i, j, 0.));
WP2_CHECK_STATUS(loop_progress.AdvanceBy(0.));
}
}
while (queue.ValidIndices().size() > 1 && !queue.empty()) {
const uint32_t idx1 = queue.front().idx1;
queue.MergeFront(segments);
// Push new pairs formed with combined histogram to the queue.
for (uint32_t i : queue.ValidIndices()) {
if (i != idx1) {
WP2_CHECK_STATUS(queue.Push(idx1, i, /*threshold=*/0.));
WP2_CHECK_STATUS(loop_progress.AdvanceBy(0.));
}
}
}
// Remove histograms that have been merged.
WP2_CHECK_STATUS(queue.RemoveInvalidHistograms());
WP2_CHECK_STATUS(progress.AdvanceBy(1.));
return WP2_STATUS_OK;
}
// Perform histogram aggregation using a stochastic approach.
// 'do_greedy' is set to 1 if a greedy approach needs to be performed
// afterwards, 0 otherwise.
// Returns true on success, false otherwise.
static WP2Status HistogramCombineStochastic(
HistogramSet* const image_histo, uint32_t min_cluster_size, float min_diff,
SegmentImgWrapper* const segments, PopulationAnalyzer* const analyzer,
const WP2::ProgressRange& progress, bool* const do_greedy) {
WP2::UniformIntDistribution random;
const uint32_t outer_iters = image_histo->histograms_.size();
const uint32_t num_tries_no_success = outer_iters / 2;
// Priority queue of histogram pairs. Its size impacts the quality of the
// compression and the speed: the smaller the faster but the worse for the
// compression.
HistoQueue queue;
constexpr uint32_t kHistoQueueSize = 9;
const WP2::ProgressScale iter_progress(progress,
1. / std::max(1u, outer_iters));
WP2_CHECK_STATUS(queue.Init(&image_histo->histograms_, analyzer));
WP2_CHECK_STATUS(queue.Resize(kHistoQueueSize));
// Collapse similar histograms in 'image_histo'.
++min_cluster_size;
uint32_t valid_size = queue.ValidIndices().size();
uint32_t iter = 0, tries_with_no_success = 0;
for (; iter < outer_iters && valid_size >= min_cluster_size &&
++tries_with_no_success < num_tries_no_success;
++iter) {
float best_cost = queue.empty() ? 0.f : queue.front().cost_diff;
// valid_size / 2 was chosen empirically. Less means faster but worse
// compression.
const uint32_t num_tries = valid_size / 2;
for (uint32_t j = 0; j < num_tries; ++j) {
float curr_cost;
// Choose two different histograms at random and try to combine them.
uint32_t idx1 = random.Get(0u, valid_size - 1);
uint32_t idx2 = random.Get(0u, valid_size - 2);
if (idx2 >= idx1) ++idx2;
idx1 = queue.ValidIndices()[idx1];
idx2 = queue.ValidIndices()[idx2];
if (idx1 > idx2) std::swap(idx1, idx2);
// Calculate cost reduction on combination.
WP2_CHECK_STATUS(queue.Push(idx1, idx2, best_cost, &curr_cost));
if (curr_cost < min_diff) { // found a better pair?
best_cost = curr_cost;
// Empty the queue if we reached full capacity.
if (queue.full()) break;
}
}
WP2_CHECK_STATUS(iter_progress.AdvanceBy(1.));
if (queue.empty()) continue;
// Merge the two best histograms.
queue.MergeFront(segments);
valid_size = queue.ValidIndices().size();
tries_with_no_success = 0;
}
WP2_CHECK_STATUS(queue.RemoveInvalidHistograms());
*do_greedy = (valid_size <= min_cluster_size);
WP2_CHECK_STATUS(iter_progress.AdvanceBy(outer_iters - iter));
return WP2_STATUS_OK;
}
// -----------------------------------------------------------------------------
// Histogram refinement
// Make sure the ARGB image is a label image.
// ARB are 0 and only G contains indices.
// Each G value is either:
// - <= to the previous ones
// - equal to the max so far + 1
bool SegmentImgWrapper::IsLabelImage(uint32_t size) const {
if (size % 4 != 0) return false;
int16_t max_value = 0;
const int16_t* v = data_;
for (const int16_t* const v_end = v + size; v < v_end; v += 4) {
if (v[0] != 0 || v[1] != 0 || v[3] != 0) return false;
if (v[2] <= max_value) continue;
if (v[2] - max_value > 1) return false;
max_value = v[2];
}
return true;
}
// Find the best 'out' histogram for each of the 'in' histograms.
// Note: we assume that out[]->bit_cost_ is already up-to-date.
static WP2Status HistogramRemap(const HistogramSet& in, HistogramSet* const out,
PopulationAnalyzer* const analyzer,
int16_t* const segments) {
Histogram* const* const in_histo = in.histograms_.data();
Histogram* const* const out_histo = out->histograms_.data();
const uint32_t in_size = in.histograms_.size();
const uint32_t out_size = out->histograms_.size();
// If all histograms have been merged into 1, they all belong to the same
// segment.
SegmentImgWrapper segments_wrap(segments);
if (out->histograms_.size() == 1) {
for (uint32_t i = 0; i < in_size; ++i) segments_wrap[i] = 0;
analyzer->UpdateHistogramCost(out->histograms_[0]);
return WP2_STATUS_OK;
}
WP2::Vector_u32 mapping_count;
WP2_CHECK_ALLOC_OK(mapping_count.resize(in_size));
std::fill(mapping_count.begin(), mapping_count.end(), 0);
// Make sure the mapping is as minimal as possible.
for (int16_t i = 0; i < (int16_t)in_size; ++i) {
int16_t* const segment = &segments_wrap[i];
// If we have an empty histogram.
if (*segment == -1) continue;
if (*segment != i) {
// If we have a histogram that is not its own segment.
assert(*segment < i);
*segment = segments_wrap[*segment];
}
++mapping_count[*segment];
// Make sure we reached the minimal mapping.
assert(segments_wrap[*segment] == *segment);
}
// Assign empty histograms to the most common used segment to improve
// compression.
// TODO(vrabaud) Test with assigning to the same as the previous block to
// favor LZ77.
const uint32_t best_segment =
std::max_element(mapping_count.begin(), mapping_count.end()) -
mapping_count.begin();
for (uint32_t i = 0; i < in_size; ++i) {
if (segments_wrap[i] < 0) segments_wrap[i] = best_segment;
}
// Re-write 'segments' so that it is a proper label image with values
// increasing.
std::fill(mapping_count.begin(), mapping_count.end(), in_size);
uint32_t mapping_max = 0;
for (uint32_t i = 0; i < in_size; ++i) {
int16_t* const segment = &segments_wrap[i];
if (mapping_count[*segment] == in_size) {
mapping_count[*segment] = mapping_max++;
}
*segment = mapping_count[*segment];
}
assert(segments_wrap.IsLabelImage(4 * in_size));
// Recompute each out based on raw and symbols.
for (uint32_t i = 0; i < out_size; ++i) {
out_histo[i]->Clear();
}
for (uint32_t i = 0; i < in_size; ++i) {
const int idx = segments_wrap[i];
HistogramAdd(*in_histo[i], *out_histo[idx], out_histo[idx]);
}
for (uint32_t i = 0; i < out_size; ++i) {
analyzer->UpdateHistogramCost(out_histo[i]);
}
return WP2_STATUS_OK;
}
// Number of histograms for which we try to merge them all into one to see if
// it is more efficient.
// TODO(vrabaud) Make this parameter effort dependent.
const int kAllHistogramMergingNum = 10;
WP2Status GetHistoImageSymbols(uint32_t width, uint32_t height,
const BackwardRefs& refs, int effort,
const WP2::ProgressRange& progress,
int histogram_bits,
const LosslessSymbolsInfo& symbols_info,
HistogramSet* const merged_histo,
int16_t* const segments) {
const WP2::ProgressRange combine_progress(progress, 0.2);
const WP2::ProgressRange encode_progress(progress, 0.6);
const WP2::ProgressRange reduce_progress(progress, 0.2);
const uint32_t histo_xsize =
histogram_bits ? SubSampleSize(width, histogram_bits) : 1;
const uint32_t histo_ysize =
histogram_bits ? SubSampleSize(height, histogram_bits) : 1;
const uint32_t num_segments = histo_xsize * histo_ysize;
HistogramSet orig_histo;
WP2_CHECK_ALLOC_OK(orig_histo.Allocate(num_segments, symbols_info));
// Construct the histograms from backward references.
WP2_CHECK_STATUS(BuildHistograms(width, height, histogram_bits, symbols_info,
refs, &orig_histo));
// Copies the histograms and computes their bit_cost.
PopulationAnalyzer analyzer(width * height);
WP2_CHECK_STATUS(analyzer.Allocate(symbols_info));
for (Histogram* const h : orig_histo.histograms_) {
analyzer.UpdateHistogramCost(h);
}
WP2_CHECK_STATUS(HistogramSetCopy(orig_histo, symbols_info, merged_histo));
SegmentImgWrapper segments_wrap(segments);
// Invalidate all histograms.
for (uint32_t i = 0; i < num_segments; ++i) segments_wrap[i] = -1;
// Validate non-empty histograms.
for (const Histogram* histo : merged_histo->histograms_) {
segments_wrap[histo->id_] = histo->id_;
}
WP2::Vector_u16 bin_map;
BinMerger merger(orig_histo.histograms_.size(), effort);
WP2_CHECK_STATUS(merger.RemoveEmptyHistograms(merged_histo, &segments_wrap));
WP2_CHECK_ALLOC_OK(bin_map.resize(merged_histo->histograms_.size()));
const bool entropy_combine =
merger.AnalyzeEntropyBin(*merged_histo, bin_map.data());
if (entropy_combine) {
// Merge histograms with similar entropy.
WP2_CHECK_STATUS(merger.CombineEntropyBin(bin_map, &analyzer, merged_histo,
&segments_wrap));
}
// Don't combine the histograms using stochastic and greedy heuristics for
// low-effort compression mode.
if (effort > 0 || !entropy_combine) {
const WP2::ProgressRange stochastic_progress(combine_progress, 0.5);
const WP2::ProgressRange greedy_progress(combine_progress, 0.5);
const float x = (effort == 5) ? 75.f / 100.f : effort / 9.f;
// cubic ramp between 1 and MAX_HISTO_GREEDY:
const auto threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
bool do_greedy;
WP2_CHECK_STATUS(HistogramCombineStochastic(
merged_histo, threshold_size, 0.f, &segments_wrap, &analyzer,
stochastic_progress, &do_greedy));
if (do_greedy) {
WP2_CHECK_STATUS(HistogramCombineGreedy(merged_histo, &segments_wrap,
&analyzer, greedy_progress));
} else {
WP2_CHECK_STATUS(greedy_progress.AdvanceBy(1.));
}
} else {
WP2_CHECK_STATUS(combine_progress.AdvanceBy(1.));
}
// In case we have too many histograms, merge the least bad pairs.
while (merged_histo->histograms_.size() > kMaxHistogramImageSize) {
// Unknown loop count but ProgressWatcher should still be called.
const WP2::ProgressScale merge_progress(reduce_progress, 0.);
bool do_greedy;
WP2_CHECK_STATUS(HistogramCombineStochastic(
merged_histo, kMaxHistogramImageSize, std::numeric_limits<float>::max(),
&segments_wrap, &analyzer, merge_progress, &do_greedy));
}
WP2_CHECK_STATUS(reduce_progress.AdvanceBy(1.));
WP2_CHECK_STATUS(
HistogramRemap(orig_histo, merged_histo, &analyzer, segments));
// Sometimes, combining histograms provides worse entropy. But if we
// combine all the histograms, we could have the overall extra entropy be
// less than the cost of adding the entropy image.
if (merged_histo->histograms_.size() > 1 &&
merged_histo->histograms_.size() < kAllHistogramMergingNum &&
merged_histo->histograms_.size() < (orig_histo.histograms_.size() - 1)) {
// Cost of all the histograms and the entropy image.
float cost_all;
// Figure out the entropy of the symbols image.
{
WP2::ANSEnc enc;
HashChain hash_chain;
WP2_CHECK_ALLOC_OK(hash_chain.Allocate(num_segments));
BackwardRefsPool ref_pool;
ref_pool.Init(num_segments);
WP2::ANSDictionaries dicts;
LosslessSymbolsInfo symbols_info_tmp;
symbols_info_tmp.InitAsLabelImage(histo_xsize * histo_ysize,
merged_histo->histograms_.size());
WP2_CHECK_STATUS(EncodeHelperImage(
&enc, &dicts, segments, &hash_chain, &ref_pool, histo_ysize,
histo_xsize, symbols_info_tmp, effort, encode_progress));
cost_all = enc.GetCost(dicts);
}
// Compute the cost if we merge all histograms and update cost_all with the
// cost of each histogram.
Histogram* const tmp_histo = merged_histo->tmp_histo_;
merged_histo->histograms_[0]->CopyTo(tmp_histo);
cost_all += merged_histo->histograms_[0]->bit_cost_;
for (uint32_t i = 1; i < merged_histo->histograms_.size(); ++i) {
cost_all += merged_histo->histograms_[i]->bit_cost_;
HistogramAdd(*merged_histo->histograms_[i], *tmp_histo, tmp_histo);
}
analyzer.UpdateHistogramCost(tmp_histo);
if (tmp_histo->bit_cost_ < cost_all) {
// Use that merged histogram.
WP2_CHECK_ALLOC_OK(merged_histo->histograms_.resize(1));
tmp_histo->CopyTo(merged_histo->histograms_[0]);
for (uint32_t i = 0; i < num_segments; ++i) segments_wrap[i] = 0;
}
} else {
WP2_CHECK_STATUS(encode_progress.AdvanceBy(1.));
}
return WP2_STATUS_OK;
}
} // namespace WP2L