blob: 882a803049800944b335524aa3c91d25775ed44e [file] [log] [blame]
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// -----------------------------------------------------------------------------
//
// Author: Jyrki Alakuijala (jyrki@google.com)
//
#include "src/enc/lossless/backward_references_enc.h"
#include "src/common/lossless/color_cache.h"
#include "src/dsp/math.h"
#include "src/enc/lossless/histogram_enc.h"
#include "src/enc/lossless/losslessi_enc.h"
#include "src/enc/symbols_enc.h"
#include "src/utils/hash.h"
namespace WP2L {
// Minimum number of pixels for which it is cheaper to encode a
// distance + length instead of each pixel as a literal.
#define MIN_LENGTH 4
// -----------------------------------------------------------------------------
static const uint8_t plane_to_code_lut[128] = {
96, 73, 55, 39, 23, 13, 5, 1, 255, 255, 255, 255, 255, 255, 255, 255,
101, 78, 58, 42, 26, 16, 8, 2, 0, 3, 9, 17, 27, 43, 59, 79,
102, 86, 62, 46, 32, 20, 10, 6, 4, 7, 11, 21, 33, 47, 63, 87,
105, 90, 70, 52, 37, 28, 18, 14, 12, 15, 19, 29, 38, 53, 71, 91,
110, 99, 82, 66, 48, 35, 30, 24, 22, 25, 31, 36, 49, 67, 83, 100,
115, 108, 94, 76, 64, 50, 44, 40, 34, 41, 45, 51, 65, 77, 95, 109,
118, 113, 103, 92, 80, 68, 60, 56, 54, 57, 61, 69, 81, 93, 104, 114,
119, 116, 111, 106, 97, 88, 84, 74, 72, 75, 85, 89, 98, 107, 112, 117
};
uint32_t OffsetToPlaneCode(uint32_t width, uint32_t offset) {
const uint32_t yoffset = offset / width;
const uint32_t xoffset = offset - yoffset * width;
if (xoffset <= 8 && yoffset < 8) {
return plane_to_code_lut[yoffset * 16 + 8 - xoffset];
} else if (xoffset + 8 > width && yoffset < 7) {
return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (width - xoffset)];
}
return offset + kCodeToPlaneCodes - 1;
}
// Returns the exact index where array1 and array2 are different. For an index
// inferior or equal to best_len_match, the return value just has to be strictly
// inferior to best_len_match. The current behavior is to return 0 if this index
// is best_len_match, and the index itself otherwise.
// If no two elements are the same, it returns max_limit.
static inline int FindMatchLength(const int16_t* const array1,
const int16_t* const array2,
uint32_t best_len_match, uint32_t max_limit) {
// Before 'expensive' linear match, check if the two arrays match at the
// current best length index.
if (best_len_match == max_limit ||
!ColorEq(&array1[4 * best_len_match], &array2[4 * best_len_match])) {
return 0;
}
const auto m = std::mismatch(array1, array1 + 4 * max_limit, array2);
return (m.first - array1) / 4;
}
// -----------------------------------------------------------------------------
// BackwardRefs
void BackwardRefs::Clear() {
if (tail_ != nullptr) {
*tail_ = free_blocks_; // recycle all blocks at once
}
free_blocks_ = refs_;
tail_ = &refs_;
last_block_ = nullptr;
refs_ = nullptr;
}
WP2Status BackwardRefs::CopyFrom(const BackwardRefs& refs) {
Clear();
const PixOrCopyBlock* refs_in = refs.refs_.get();
// Copy whole blocks over.
while (refs_in != nullptr) {
WP2_CHECK_STATUS(AddBlock());
std::copy(refs_in->modes.data(), refs_in->modes.data() + refs_in->size,
last_block_->modes.data());
last_block_->size = refs_in->size;
refs_in = refs_in->next.get();
}
return WP2_STATUS_OK;
}
void BackwardRefs::Reset() {
Clear();
free_blocks_.reset();
}
void BackwardRefs::Init(uint32_t block_size, BackwardRefsPool* const pool) {
refs_ = nullptr;
tail_ = nullptr;
free_blocks_ = nullptr;
last_block_ = nullptr;
tail_ = &refs_;
block_size_ = std::max(block_size, +kMinBlockSize);
pool_ = pool;
}
RefsCursor::RefsCursor(const BackwardRefs& refs) {
cur_block_ = refs.refs_;
if (refs.refs_ != nullptr) {
cur_pos_ = cur_block_->modes.data();
last_pos_ = cur_pos_ + cur_block_->size;
} else {
cur_pos_ = nullptr;
last_pos_ = nullptr;
}
}
void RefsCursor::NextBlock() {
auto b = cur_block_->next;
cur_pos_ = (b == nullptr) ? nullptr : b->modes.data();
last_pos_ = (b == nullptr) ? nullptr : b->modes.data() + b->size;
cur_block_ = b;
}
// Create a new block, either from the free list or allocated
WP2Status BackwardRefs::AddBlock() {
std::shared_ptr<PixOrCopyBlock> b = free_blocks_;
if (b == nullptr) { // allocate new memory chunk
b = std::make_shared<PixOrCopyBlock>();
if (!b->modes.resize(block_size_)) return WP2_STATUS_OUT_OF_MEMORY;
} else { // recycle from free-list
free_blocks_ = b->next;
}
*tail_ = b;
tail_ = &b->next;
last_block_ = b;
b->next = nullptr;
b->size = 0;
return WP2_STATUS_OK;
}
void BackwardRefs::FreeFromPool(BackwardRefs* const refs) {
if (refs != nullptr && refs->pool_ != nullptr) refs->pool_->Release(refs);
}
WP2Status BackwardRefs::CursorAdd(const PixelMode& v) {
auto b = last_block_;
if (b == nullptr || b->size == block_size_) {
WP2_CHECK_STATUS(AddBlock());
b = last_block_;
}
b->modes[b->size++] = v;
return WP2_STATUS_OK;
}
bool BackwardRefs::IsValid(uint32_t num_pixels) const {
// Check whether the cursor goes over the whole image.
RefsCursor c(*this);
uint32_t tot = 0;
while (c.Ok()) {
tot += c.cur_pos_->GetLength();
c.Next();
}
return (tot == num_pixels);
}
// -----------------------------------------------------------------------------
// Hash chains
bool HashChain::Allocate(uint32_t size) {
assert(size > 0);
if (!offset_length_.resize(size)) return false;
return true;
}
// -----------------------------------------------------------------------------
// Returns the maximum number of hash chain lookups to do for a
// given compression effort. Return value in range [45, 126].
// TODO(vrabaud) re-adjust those values and have a different logic (the number
// of iteration does not necessarily matter as much as the number of lines)
static int GetMaxItersForSpeed(int effort) {
return (effort == 3) ? 8 + 20 * 20 / 128 :
(effort == 5) ? 8 + 75 * 75 / 128 : 45 + effort * effort;
}
static uint32_t GetWindowSizeForHashChain(int effort, uint32_t width) {
const uint32_t max_window_size = (effort >= 7) ? WP2::kMaxTilePixels
: (effort >= 5) ? (width << 8)
: (effort >= 3) ? (width << 6)
: (width << 4);
assert(width > 0);
return std::min(max_window_size, WP2::kMaxTilePixels);
}
void HashChain::UpdateChain(uint32_t pos, const int16_t* const pixel_pair,
WP2::Vector_s32* const hash_to_first_index,
int32_t* const chain) {
const uint32_t hash_code = WP2::encoding::HashPixPair(pixel_pair, kHashBits);
chain[pos] = (*hash_to_first_index)[hash_code];
(*hash_to_first_index)[hash_code] = pos;
}
WP2Status HashChain::Fill(int effort, const int16_t* const argb, uint32_t width,
uint32_t height) {
const uint32_t size = width * height;
if (size <= 2) {
offset_length_[0] = {0, 0};
offset_length_[size - 1] = {0, 0};
return WP2_STATUS_OK;
}
// Temporarily use the offset_length_ as a hash chain.
auto chain = reinterpret_cast<int32_t*>(offset_length_.data());
WP2::Vector_s32 hash_to_first_index;
WP2_CHECK_ALLOC_OK(hash_to_first_index.resize(kHashSize));
// Set the int32_t array to -1.
std::memset(hash_to_first_index.data(), 0xff,
kHashSize * sizeof(*hash_to_first_index.data()));
// Fill the chain linking pixels with the same hash.
bool argb_comp = ColorEq(&argb[0], &argb[4]);
for (uint32_t pos = 0; pos < size - 2;) {
const bool argb_comp_next =
ColorEq(&argb[4 * (pos + 1)], &argb[4 * (pos + 2)]);
if (argb_comp && argb_comp_next) {
// Consecutive pixels with the same color will share the same hash.
// We therefore use a different hash: the color and its repetition
// length.
int16_t tmp[8] = {0};
uint32_t len = 1;
ColorCopy(&argb[4 * pos], &tmp[0]);
// Figure out how far the pixels are the same.
// The last pixel has a different 64 bit hash, as its next pixel does
// not have the same color, so we just need to get to the last pixel equal
// to its follower.
while (pos + len + 2 < size &&
ColorEq(&argb[4 * (pos + len + 2)], &argb[4 * pos])) {
++len;
}
// Process the rest of the hash chain.
while (len) {
tmp[7] = len--;
UpdateChain(pos, tmp, &hash_to_first_index, chain);
++pos;
}
argb_comp = false;
} else {
// Just move one pixel forward.
UpdateChain(pos, &argb[4 * pos], &hash_to_first_index, chain);
++pos;
argb_comp = argb_comp_next;
}
}
// Process the penultimate pixel.
UpdateChain(size - 2, &argb[4 * (size - 2)], &hash_to_first_index, chain);
hash_to_first_index.reset();
// Find the best match interval at each pixel, defined by an offset to the
// pixel and a length. The right-most pixel cannot match anything to the right
// (hence a best length of 0) and the left-most pixel nothing to the left
// (hence an offset of 0).
const int iter_max = GetMaxItersForSpeed(effort);
assert(size > 2);
offset_length_[size - 1] = {0, 0};
const uint32_t window_size = GetWindowSizeForHashChain(effort, width);
for (uint32_t base_position = size - 2; base_position > 0;) {
const uint32_t max_len = size - 1 - base_position;
const int16_t* const argb_start = &argb[4 * base_position];
int iter = iter_max;
uint32_t best_length = 0;
uint32_t best_offset = 0;
const int min_pos =
(base_position > window_size) ? base_position - window_size : 0;
if (effort > 0) {
uint32_t curr_length;
// Heuristic: use the comparison with the above line as an initialization.
if (base_position >= width) {
curr_length = FindMatchLength(argb_start - 4 * width, argb_start,
best_length, max_len);
if (curr_length > best_length) {
best_length = curr_length;
best_offset = width;
}
--iter;
}
// Heuristic: compare to the previous pixel.
curr_length =
FindMatchLength(argb_start - 4, argb_start, best_length, max_len);
if (curr_length > best_length) {
best_length = curr_length;
best_offset = 1;
}
--iter;
}
// If we can still do better, go for it.
if (best_length < max_len) {
int16_t argb_end[4];
assert(argb_start + 4 * best_length < argb + 4 * size);
ColorCopy(&argb_start[4 * best_length], argb_end);
for (int pos = chain[base_position]; pos >= min_pos && --iter;
pos = chain[pos]) {
// Early exit: make sure that the segment starting at pos has at least
// a length of best_length + 1 by checking argb[4 * (pos + best_length).
assert(argb + 4 * (pos + best_length) < argb + 4 * size);
if (!ColorEq(&argb[4 * (pos + best_length)], argb_end)) continue;
const int16_t* const start = &argb[4 * pos];
assert(start + 4 * max_len <= argb + 4 * size);
assert(argb_start + 4 * max_len <= argb + 4 * size);
const auto p = std::mismatch(start, start + 4 * max_len, argb_start);
const uint32_t curr_length = (p.first - start) / 4;
if (best_length < curr_length) {
best_length = curr_length;
best_offset = base_position - pos;
// Stop if we have reached a good enough length. This is totally
// empirical.
// TODO(vrabaud) change heuristic, use effort.
if (best_length >= std::min(max_len, 256u)) break;
assert(argb_start + 4 * best_length < argb + 4 * size);
ColorCopy(&argb_start[4 * best_length], argb_end);
}
}
}
// We have the best match but in case the two intervals continue matching
// to the left, we have the best matches for the left-extended pixels.
while (true) {
offset_length_[base_position] = {best_offset, best_length};
--base_position;
// Stop if we don't have a match or if we are out of bounds.
if (best_offset == 0 || base_position == 0) break;
// Stop if we cannot extend the matching intervals to the left.
if (base_position < best_offset ||
!ColorEq(&argb[4 * (base_position - best_offset)],
&argb[4 * base_position])) {
break;
}
++best_length;
}
}
offset_length_[0] = {0, 0};
#if 0
// Check the found values: this is costly CPU-wise so disabling it.
for (uint32_t pos = 0; pos < size; ++pos) {
if (offset_length_[pos].offset == 0) {
assert(offset_length_[pos].length == 0);
continue;
}
assert(pos >= offset_length_[pos].offset);
assert(std::equal(&argb[4 * pos],
&argb[4 * pos] + 4 * offset_length_[pos].length,
&argb[4 * (pos - offset_length_[pos].offset)]));
}
#endif
return WP2_STATUS_OK;
}
static inline WP2Status AddSingleLiteral(const int16_t* const pixel,
bool use_color_cache,
ColorCache* const color_cache,
BackwardRefs* const refs) {
PixelMode v;
if (use_color_cache) {
uint32_t cache_index;
const uint32_t index_range = color_cache->IndexRange();
if (color_cache->Insert(pixel, &cache_index)) {
v = PixelMode::CreateLiteral(pixel);
} else {
v = PixelMode::CreateCacheIdx(cache_index, index_range);
}
} else {
v = PixelMode::CreateLiteral(pixel);
}
WP2_CHECK_STATUS(refs->CursorAdd(v));
return WP2_STATUS_OK;
}
static WP2Status BackwardReferencesRle(uint32_t width, uint32_t height,
const int16_t* const argb,
const CacheConfig& cache_config,
BackwardRefs* const refs) {
const uint32_t num_pixels = width * height;
const bool use_color_cache = (cache_config.type != CacheType::kNone);
ColorCachePtr color_cache;
WP2_CHECK_STATUS(color_cache.Init(cache_config));
refs->Clear();
// Add first pixel as literal.
WP2_CHECK_STATUS(
AddSingleLiteral(&argb[0], use_color_cache, color_cache.get(), refs));
for (uint32_t i = 1; i < num_pixels;) {
const uint32_t max_len = num_pixels - i;
const uint32_t rle_len =
FindMatchLength(&argb[4 * i], &argb[4 * (i - 1)], 0, max_len);
const uint32_t prev_row_len =
(i < width)
? 0
: FindMatchLength(&argb[4 * i], &argb[4 * (i - width)], 0, max_len);
if (rle_len >= prev_row_len && rle_len >= MIN_LENGTH) {
WP2_CHECK_STATUS(refs->CursorAdd(PixelMode::CreateCopy(1, rle_len)));
// We don't need to update the color cache here since it is always the
// same pixel being copied, and that does not change the color cache
// state.
i += rle_len;
} else if (prev_row_len >= MIN_LENGTH) {
WP2_CHECK_STATUS(
refs->CursorAdd(PixelMode::CreateCopy(width, prev_row_len)));
if (use_color_cache) {
for (uint32_t k = 0; k < prev_row_len; ++k) {
color_cache->Insert(&argb[4 * (i + k)], /*index_ptr=*/nullptr);
}
}
i += prev_row_len;
} else {
WP2_CHECK_STATUS(AddSingleLiteral(&argb[4 * i], use_color_cache,
color_cache.get(), refs));
i++;
}
}
return WP2_STATUS_OK;
}
static WP2Status BackwardReferencesNone(uint32_t num_pixels,
const int16_t* const argb,
const CacheConfig& cache_config,
BackwardRefs* const refs) {
const bool use_color_cache = (cache_config.type != CacheType::kNone);
ColorCachePtr color_cache;
WP2_CHECK_STATUS(color_cache.Init(cache_config));
refs->Clear();
for (uint32_t i = 0; i < num_pixels; ++i) {
WP2_CHECK_STATUS(AddSingleLiteral(&argb[4 * i], use_color_cache,
color_cache.get(), refs));
}
return WP2_STATUS_OK;
}
static WP2Status BackwardReferencesLz77(uint32_t width, uint32_t height,
const int16_t* const argb,
const CacheConfig& cache_config,
const HashChain& hash_chain,
BackwardRefs* const refs) {
const uint32_t num_pixels = width * height;
const bool use_color_cache = (cache_config.type != CacheType::kNone);
ColorCachePtr color_cache;
WP2_CHECK_STATUS(color_cache.Init(cache_config));
refs->Clear();
for (uint32_t i = 0, i_last_check = 0; i < num_pixels;) {
// Alternative#1: Code the pixels starting at 'i' using backward reference.
uint32_t len = hash_chain.offset_length_[i].length;
if (len >= MIN_LENGTH) {
const int len_ini = len;
uint32_t max_reach = 0;
const uint32_t j_max =
(i + len_ini >= num_pixels) ? num_pixels - 1 : i + len_ini;
// Only start from what we have not checked already.
i_last_check = (i > i_last_check) ? i : i_last_check;
// We know the best match for the current pixel but we try to find the
// best matches for the current pixel AND the next one combined.
// The naive method would use the intervals:
// [i,i+len) + [i+len, length of best match at i+len)
// while we check if we can use:
// [i,j) (where j<=i+len) + [j, length of best match at j)
for (uint32_t j = i_last_check + 1; j <= j_max; ++j) {
const uint32_t len_j = hash_chain.offset_length_[j].length;
const uint32_t reach =
j + (len_j >= MIN_LENGTH ? len_j : 1); // 1 for single literal.
if (reach > max_reach) {
len = j - i;
max_reach = reach;
if (max_reach >= num_pixels) break;
}
}
} else {
len = 1;
}
// Go with literal or backward reference.
assert(len > 0);
if (len == 1) {
WP2_CHECK_STATUS(AddSingleLiteral(&argb[4 * i], use_color_cache,
color_cache.get(), refs));
} else {
WP2_CHECK_STATUS(refs->CursorAdd(
PixelMode::CreateCopy(hash_chain.offset_length_[i].offset, len)));
if (use_color_cache) {
for (uint32_t j = i; j < i + len; ++j)
color_cache->Insert(&argb[4 * j], /*index_ptr=*/nullptr);
}
}
i += len;
}
return WP2_STATUS_OK;
}
// Compute an LZ77 by forcing matches to happen within a given distance cost.
// We therefore limit the algorithm to the lowest 32 values in the PlaneCode
// definition.
static WP2Status BackwardReferencesLz77Box(uint32_t width, uint32_t height,
const int16_t* const argb,
const CacheConfig& cache_config,
const HashChain& hash_chain_best,
HashChain* hash_chain,
BackwardRefs* const refs) {
const uint32_t num_pixels = width * height;
WP2::Vector_u16 counts_ini;
WP2_CHECK_ALLOC_OK(counts_ini.resize(width * height));
// counts[i] counts how many times a pixel is repeated starting at position i.
counts_ini[num_pixels - 1] = 1;
uint16_t* counts = &counts_ini[num_pixels - 2];
for (int i = (int)num_pixels - 2; i >= 0; --i, --counts) {
if (ColorEq(&argb[4 * i], &argb[4 * (i + 1)])) {
counts[0] = counts[1] + 1;
} else {
counts[0] = 1;
}
}
// Figure out the window offsets around a pixel. They are stored in a
// spiraling order around the pixel as defined by DistanceToPlaneCode.
static constexpr uint32_t kWindowOffsetsSizeMax = 32u;
uint32_t window_offsets[kWindowOffsetsSizeMax] = {0};
for (int y = 0; y <= 6; ++y) {
for (int x = -6; x <= 6; ++x) {
const int offset = y * width + x;
// Ignore offsets that bring us after the pixel.
if (offset <= 0) continue;
const uint32_t plane_code = OffsetToPlaneCode(width, offset);
if (plane_code >= kWindowOffsetsSizeMax) continue;
window_offsets[plane_code] = offset;
}
}
// For narrow images, not all plane codes are reached, so remove those.
uint32_t window_offsets_size = 0;
for (const auto& offset : window_offsets) {
if (offset == 0) continue;
window_offsets[window_offsets_size++] = offset;
}
// Given a pixel P, find the offsets that reach pixels unreachable from P-1
// with any of the offsets in window_offsets[].
uint32_t window_offsets_new[kWindowOffsetsSizeMax] = {0};
uint32_t window_offsets_new_size = 0;
for (uint32_t i = 0; i < window_offsets_size; ++i) {
bool is_reachable = false;
for (uint32_t j = 0; j < window_offsets_size && !is_reachable; ++j) {
is_reachable |= (window_offsets[i] == window_offsets[j] + 1);
}
if (!is_reachable) {
window_offsets_new[window_offsets_new_size] = window_offsets[i];
++window_offsets_new_size;
}
}
hash_chain->offset_length_[0] = {0, 0};
uint32_t best_offset_prev = 0, best_length_prev = 0;
for (uint32_t i = 1; i < num_pixels; ++i) {
uint32_t best_length = hash_chain_best.offset_length_[i].length;
uint32_t best_offset;
// Figure out if we should use the offset/length from the previous pixel
// as an initial guess and therefore only inspect the offsets in
// window_offsets_new[].
const bool use_prev = (best_length_prev > 1);
const uint32_t num_ind =
use_prev ? window_offsets_new_size : window_offsets_size;
best_length = use_prev ? best_length_prev - 1 : 0;
best_offset = use_prev ? best_offset_prev : 0;
// Find the longest match in a window around the pixel.
for (uint32_t ind = 0; ind < num_ind; ++ind) {
uint32_t curr_length = 0;
uint32_t j = i;
int j_offset =
use_prev ? i - window_offsets_new[ind] : i - window_offsets[ind];
if (j_offset < 0 || !ColorEq(&argb[4 * j_offset], &argb[4 * i])) {
continue;
}
// The longest match is the sum of how many times each pixel is
// repeated.
do {
const int counts_j_offset = counts_ini[j_offset];
const int counts_j = counts_ini[j];
if (counts_j_offset != counts_j) {
curr_length +=
(counts_j_offset < counts_j) ? counts_j_offset : counts_j;
break;
}
// The same color is repeated counts_pos times at j_offset and j.
curr_length += counts_j_offset;
j_offset += counts_j_offset;
j += counts_j_offset;
} while (j < num_pixels && ColorEq(&argb[4 * j_offset], &argb[4 * j]));
if (best_length < curr_length) {
best_offset = use_prev ? window_offsets_new[ind] : window_offsets[ind];
best_length = curr_length;
}
}
assert(i + best_length <= num_pixels);
if (best_length <= MIN_LENGTH) {
hash_chain->offset_length_[i] = {0, 0};
best_offset_prev = 0;
best_length_prev = 0;
} else {
hash_chain->offset_length_[i] = {best_offset, best_length};
best_offset_prev = best_offset;
best_length_prev = best_length;
}
}
hash_chain->offset_length_[0] = {0, 0};
return BackwardReferencesLz77(width, height, argb, cache_config, *hash_chain,
refs);
}
// -----------------------------------------------------------------------------
WP2Status BackwardRefsWithLocalCache(const int16_t* const argb,
const CacheConfig& cache_config,
BackwardRefs* const refs);
// Evaluate optimal cache bits for the local color cache.
// The input *best_cache_bits sets the maximum cache bits to use (passing 0
// implies disabling the local color cache). The local color cache is also
// disabled for the lower (<= 3) effort.
static WP2Status CalculateBestCacheConfig(
uint32_t width, uint32_t height, const int16_t* const argb,
uint32_t num_pixels, int effort, const BackwardRefs& refs_in,
BackwardRefsPool* const ref_pool, uint32_t cache_bits_max,
LosslessSymbolsInfo* symbols_info,
WP2::SymbolRecorder* const symbol_recorder,
CacheConfig* const cache_config) {
if (effort <= 3 || cache_bits_max == 0) {
cache_config->type = CacheType::kNone;
cache_config->cache_bits = 0;
return WP2_STATUS_OK;
}
assert(cache_bits_max > 0 && cache_bits_max <= kMaxCacheBits);
// Allocate data.
float cost_min = std::numeric_limits<float>::max();
auto refs_tmp = ref_pool->GetFreeBackwardRefs();
for (uint32_t i = 0; i <= cache_bits_max; ++i) {
// Compute the symbol statistics.
// TODO(maryla): try other types.
const CacheConfig cache_config_tmp = {
(i > 0) ? CacheType::kHash : CacheType::kNone, i};
// Update the refs to deal with a cache of size 'i' bits.
symbols_info->SetCacheRange(GetColorCacheRange(cache_config_tmp));
if (i > 0) {
WP2_CHECK_STATUS(refs_tmp->CopyFrom(refs_in));
WP2_CHECK_STATUS(
BackwardRefsWithLocalCache(argb, cache_config_tmp, refs_tmp.get()));
}
const BackwardRefs& refs = (i == 0) ? refs_in : *refs_tmp;
WP2::ANSEncCounter enc_counter;
symbol_recorder->ResetCounts();
WP2_CHECK_STATUS(
StorePixels(width, height, refs, &enc_counter, symbol_recorder));
// Get the header cost in bits.
WP2::SymbolWriter symbol_writer;
float storage_cost;
WP2_CHECK_STATUS(WriteHeaders(*symbol_recorder, *symbols_info, num_pixels,
effort, &enc_counter, /*dicts=*/nullptr,
&symbol_writer, &storage_cost));
const float cost = enc_counter.GetCost() + storage_cost;
if (cost < cost_min) {
*cache_config = cache_config_tmp;
cost_min = cost;
}
}
return WP2_STATUS_OK;
}
// Update (in-place) backward references for specified cache_bits.
WP2Status BackwardRefsWithLocalCache(const int16_t* const argb,
const CacheConfig& cache_config,
BackwardRefs* const refs) {
ColorCachePtr color_cache;
WP2_CHECK_STATUS(color_cache.Init(cache_config));
uint32_t pixel_index = 0;
RefsCursor c(*refs);
while (c.Ok()) {
PixelMode* const v = c.cur_pos_;
switch (v->GetMode()) {
case kSymbolTypeLiteral: {
uint32_t ix;
if (color_cache->Contains(&argb[4 * pixel_index], &ix)) {
// color_cache contains argb_literal
*v = PixelMode::CreateCacheIdx(ix, color_cache->IndexRange());
}
color_cache->Insert(&argb[4 * pixel_index], /*index_ptr=*/nullptr);
++pixel_index;
break;
}
case kSymbolTypeCopy: {
for (uint32_t k = 0; k < v->GetLength(); ++k) {
color_cache->Insert(&argb[4 * pixel_index++], /*index_ptr=*/nullptr);
}
break;
}
default:
// refs was created without local cache, so it can not have cache
// indexes.
assert(false);
}
c.Next();
}
return WP2_STATUS_OK;
}
static WP2Status GetBackwardReferencesLowEffort(
uint32_t width, uint32_t height, const int16_t* const argb,
const HashChain& hash_chain, BackwardRefsPool* const ref_pool,
BackwardRefsPool::RefsPtr* const refs) {
refs->reset();
auto refs_lz77 = ref_pool->GetFreeBackwardRefs();
WP2_CHECK_STATUS(BackwardReferencesLz77(
width, height, argb, {CacheType::kNone, 0}, hash_chain, &*refs_lz77));
*refs = std::move(refs_lz77);
return WP2_STATUS_OK;
}
// Gets the overall cost of storing the refs: symbol headers and pixels.
static WP2Status GetStorageCost(uint32_t width, uint32_t height,
const BackwardRefs& refs,
const LosslessSymbolsInfo& symbols_info,
uint32_t num_pixels, int effort,
float* const cost) {
// Get the symbols stats.
WP2::SymbolRecorder recorder;
WP2::ANSEncCounter enc_counter;
WP2_CHECK_STATUS(recorder.Allocate(symbols_info, /*num_records=*/0));
WP2_CHECK_STATUS(StorePixels(width, height, refs, &enc_counter, &recorder));
// Compute how much it would take to write the headers and the image.
WP2::SymbolWriter symbol_writer;
float storage_cost;
WP2_CHECK_STATUS(WriteHeaders(recorder, symbols_info, num_pixels, effort,
&enc_counter, /*dicts=*/nullptr, &symbol_writer,
&storage_cost));
*cost = enc_counter.GetCost() + storage_cost;
return WP2_STATUS_OK;
}
extern WP2Status BackwardReferencesTraceBackwards(
uint32_t width, uint32_t height, const int16_t* const argb,
const LosslessSymbolsInfo& symbols_info, const HashChain& hash_chain,
const CacheConfig& cache_config, const BackwardRefs& refs_src,
BackwardRefs* const refs_dst, bool* const check_histogram);
static WP2Status GetBackwardReferencesImpl(
uint32_t width, uint32_t height, const int16_t* const argb, int effort,
int lz77_types_to_try, uint32_t cache_bits_max, const HashChain& hash_chain,
const LosslessSymbolsInfo& symbols_info,
WP2::SymbolRecorder* const symbol_recorder, CacheConfig* const cache_config,
BackwardRefsPool* ref_pool, BackwardRefsPool::RefsPtr* const refs_best) {
LosslessSymbolsInfo symbols_info_tmp;
WP2_CHECK_STATUS(symbols_info_tmp.CopyFrom(symbols_info));
symbols_info_tmp.SetNumClusters(1);
const uint32_t num_pixels = width * height;
float bit_cost_best = std::numeric_limits<float>::max();
HashChain hash_chain_box;
auto best = ref_pool->GetFreeBackwardRefs();
// TODO(vrabaud): Assess the utility of kLZ77RLE and kLZ77Box now that we use
// ANS. Those methods were good to reduce the entropy of the distances but we
// might only use LZ77 now that costs are better estimated.
if (symbols_info.IsUnused(kSymbolDist)) {
// Do not analyze LZ77 if the symbols have been disabled.
assert(symbols_info.IsUnused(kSymbolLen));
lz77_types_to_try = kLZ77None;
} else {
// Always try not to use LZ77. This is useful for one-color images for
// example.
lz77_types_to_try |= kLZ77None;
}
const CacheConfig kNoCache{CacheType::kNone, 0};
for (int lz77_type = 1; lz77_types_to_try;
lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
if ((lz77_types_to_try & lz77_type) == 0) continue;
auto refs_tmp = ref_pool->GetFreeBackwardRefs();
BackwardRefs* const refs_tmp_ptr = refs_tmp.get();
switch (lz77_type) {
case kLZ77RLE:
WP2_CHECK_STATUS(
BackwardReferencesRle(width, height, argb, kNoCache, refs_tmp_ptr));
break;
case kLZ77Standard:
// Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color
// cache is not that different in practice.
WP2_CHECK_STATUS(BackwardReferencesLz77(width, height, argb, kNoCache,
hash_chain, refs_tmp_ptr));
break;
case kLZ77Box:
WP2_CHECK_ALLOC_OK(hash_chain_box.Allocate(num_pixels));
WP2_CHECK_STATUS(
BackwardReferencesLz77Box(width, height, argb, kNoCache, hash_chain,
&hash_chain_box, refs_tmp_ptr));
break;
case kLZ77None:
WP2_CHECK_STATUS(
BackwardReferencesNone(num_pixels, argb, kNoCache, refs_tmp_ptr));
break;
default:
assert(false);
}
assert(refs_tmp->IsValid(num_pixels));
// Next, try with a color cache and update the references.
CacheConfig cache_config_tmp;
WP2_CHECK_STATUS(CalculateBestCacheConfig(
width, height, argb, num_pixels, effort, *refs_tmp, ref_pool,
cache_bits_max, &symbols_info_tmp, symbol_recorder, &cache_config_tmp));
symbols_info_tmp.SetCacheRange(GetColorCacheRange(cache_config_tmp));
if (cache_config_tmp.type != CacheType::kNone) {
WP2_CHECK_STATUS(
BackwardRefsWithLocalCache(argb, cache_config_tmp, refs_tmp_ptr));
}
float bit_cost;
WP2_CHECK_STATUS(GetStorageCost(width, height, *refs_tmp, symbols_info_tmp,
num_pixels, effort, &bit_cost));
// Improve on simple LZ77 but only for high effort (TraceBackwards is
// costly).
if ((lz77_type == kLZ77Standard || lz77_type == kLZ77Box) && effort >= 3) {
const HashChain& hash_chain_tmp =
(lz77_type == kLZ77Standard) ? hash_chain : hash_chain_box;
auto refs_tmp_trace = ref_pool->GetFreeBackwardRefs();
bool check_histogram;
WP2_CHECK_STATUS(BackwardReferencesTraceBackwards(
width, height, argb, symbols_info_tmp, hash_chain_tmp,
cache_config_tmp, *refs_tmp, &*refs_tmp_trace, &check_histogram));
if (check_histogram) {
assert(refs_tmp_trace->IsValid(num_pixels));
float bit_cost_trace;
WP2_CHECK_STATUS(GetStorageCost(width, height, *refs_tmp_trace,
symbols_info_tmp, num_pixels, effort,
&bit_cost_trace));
if (bit_cost_trace < bit_cost) {
bit_cost = bit_cost_trace;
refs_tmp.swap(refs_tmp_trace);
}
}
}
// Keep the best backward references.
if (bit_cost < bit_cost_best) {
best.swap(refs_tmp);
bit_cost_best = bit_cost;
*cache_config = cache_config_tmp;
}
}
*refs_best = std::move(best);
return WP2_STATUS_OK;
}
WP2Status GetBackwardReferences(
uint32_t width, uint32_t height, const int16_t* const argb, int effort,
int lz77_types_to_try, uint32_t cache_bits_max, const HashChain& hash_chain,
const LosslessSymbolsInfo& symbols_info,
WP2::SymbolRecorder* const symbol_recorder, CacheConfig* const cache_config,
BackwardRefsPool* const ref_pool, BackwardRefsPool::RefsPtr* const refs) {
if (effort == 0) {
cache_config->type = CacheType::kNone;
cache_config->cache_bits = 0;
return GetBackwardReferencesLowEffort(width, height, argb, hash_chain,
ref_pool, refs);
} else {
return GetBackwardReferencesImpl(width, height, argb, effort,
lz77_types_to_try, cache_bits_max,
hash_chain, symbols_info, symbol_recorder,
cache_config, ref_pool, refs);
}
}
} // namespace WP2L