blob: d3e92fafab9966df3f9a924a74bab70ae0b8ae19 [file] [log] [blame]
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// -----------------------------------------------------------------------------
//
// Defines utilities to quantize a distribution to better store it and the
// population it represents.
//
// Author: Vincent Rabaud (vrabaud@google.com)
#ifndef WP2_ENC_LOSSLESS_QUANTIZER_H_
#define WP2_ENC_LOSSLESS_QUANTIZER_H_
#include <cstddef>
#include <cstdint>
#include <optional>
#include "src/utils/ans_enc.h"
#include "src/utils/ans_utils.h"
#include "src/utils/vector.h"
#include "src/wp2/base.h"
namespace WP2 {
class Quantizer {
public:
enum ConfigType { Raw, Huffman };
// Config defining how a histogram is stored.
// E.g. [0,1,8,0,4]
// If 'is_sparse' is true, it is stored as sparse, 'histo' contains [1,8,4]
// and "runs" the mapping differences minus 1: [2,1,2] (the first element is
// a difference to -1) minus 1 hence [1,0,1]
// 'histogram_to_write' contains how 'histo' will be finally written, which
// in Raw type is the same, but in Huffman is [0,3,2] (the powers of 2 used to
// represent the histogram).
// 'histo' represents a distribution of symbol and if we were to write
// data with it, it would have an overall cost of 'cost'. Overall means the
// cost of writing the symbols with probabilities defined by 'histo' but also
// of storing 'histo'.
struct ConfigParam {
// The following members differentiate the different configurations.
ConfigType type;
bool is_sparse;
uint8_t max_freq_bits;
// Prefix code specific parameters.
uint32_t prefix_code_histo_len;
uint32_t prefix_code_prefix_size;
};
struct HistogramSparse {
uint32_t* counts;
const uint16_t* mapping; // pointer to the original external mapping
uint32_t nnz; // number of non-zero counts
};
struct Config {
// Writes the header and the count of a histogram.
void WriteHistogramHeader(uint32_t symbol_range,
OptimizeArrayStorageStat* stats,
uint32_t stats_size, ANSEncBase* enc) const;
void WriteHistogramCounts(uint32_t symbol_range, uint32_t max_count,
OptimizeArrayStorageStat* stats,
uint32_t stats_size, ANSEncBase* enc) const;
ConfigParam param;
// Histogram to write (which might be different from histo.counts: e.g.,
// for sparse histograms, we know counts >0, hence we can subtract 1 when
// writing).
uint32_t* histogram_to_write;
size_t size_to_write;
HistogramSparse histo;
// Cost of storing the symbols only (without the header).
float cost_symbols_only;
// Overall cost: header cost + cost_symbols_only.
float cost;
};
// range_max is the maximum range of the used symbols (maximum value + 1).
WP2Status Allocate(uint32_t range_max);
// Given a histogram of symbols, their range, as well as the bit size
// of the number of pixels n_pixels_bits, find the best way to store the
// symbols represented by 'histogram' with the data in 'histogram' by
// minimizing their overall size. 'size_sparse' is an upper bound of the
// number of non-zero counts. 'effort' is between 0 (fastest) and 9 (slowest).
// 'max_count' is the maximum value a count can take.
// The function is re-entrant: if ResetConfigs() is not called, the best
// configuration is kept in memory as a starting configuration. It will be
// updated if a strictly better score is found.
// 'cost_offset' is the cost that will be added to the Config cost.
// 'cost_threshold' is the cost above which we can give up if
// cost_offset + Config cost >= cost_threshold.
bool Quantize(const uint32_t* histogram, const uint16_t* mapping,
size_t size_sparse, uint32_t symbol_range, uint32_t max_count,
std::optional<float> cost_threshold, float cost_offset,
int effort);
// Returns the current best configuration. The best configuration is updated
// between Quantize calls.
Config* GetBest() { return config_best_; }
// Resets what is considered as the best config.
void ResetBest() { config_best_ = nullptr; }
private:
// Implementation of the quantization that can call itself to compress its own
// coefficients.
// 'cost_threshold' is the value above which we can early exit.
void QuantizeImpl(const uint32_t* histogram, const uint16_t* mapping,
size_t size_sparse, uint32_t symbol_range,
uint32_t max_count, int effort);
static constexpr size_t kConfigNbr = 2;
Config configs_[kConfigNbr];
// Pointer to the current best config in configs_.
Config* config_best_ = nullptr;
uint32_t histogram_size_max_;
Vector_u32 buffer_; // Big memory chunk storing the Config buffers.
VectorNoCtor<OptimizeArrayStorageStat> stats_buffer_;
};
} // namespace WP2
#endif // WP2_ENC_LOSSLESS_QUANTIZER_H_