blob: 8b716a1742bf0cd55c65a64051c921588e7b903c [file] [log] [blame]
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
#define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
#include <stddef.h>
#include <limits>
#include <vector>
#include "components/zucchini/image_index.h"
#include "components/zucchini/image_utils.h"
#include "components/zucchini/targets_affinity.h"
namespace zucchini {
constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity();
class EncodedView;
class EquivalenceSource;
// Returns similarity score between a token (raw byte or first byte of a
// reference) in |old_image_index| at |src| and a token in |new_image_index|
// at |dst|. |targets_affinities| describes affinities for each target pool and
// is used to evaluate similarity between references, hence it's size must be
// equal to the number of pools in both |old_image_index| and |new_image_index|.
// Both |src| and |dst| must refer to tokens in |old_image_index| and
// |new_image_index|.
double GetTokenSimilarity(
const ImageIndex& old_image_index,
const ImageIndex& new_image_index,
const std::vector<TargetsAffinity>& targets_affinities,
offset_t src,
offset_t dst);
// Returns a similarity score between content in |old_image_index| and
// |new_image_index| at regions described by |equivalence|, using
// |targets_affinities| to evaluate similarity between references.
double GetEquivalenceSimilarity(
const ImageIndex& old_image_index,
const ImageIndex& new_image_index,
const std::vector<TargetsAffinity>& targets_affinities,
const Equivalence& equivalence);
// Extends |equivalence| forward and returns the result. This is related to
// VisitEquivalenceSeed().
EquivalenceCandidate ExtendEquivalenceForward(
const ImageIndex& old_image_index,
const ImageIndex& new_image_index,
const std::vector<TargetsAffinity>& targets_affinities,
const EquivalenceCandidate& equivalence,
double min_similarity);
// Extends |equivalence| backward and returns the result. This is related to
// VisitEquivalenceSeed().
EquivalenceCandidate ExtendEquivalenceBackward(
const ImageIndex& old_image_index,
const ImageIndex& new_image_index,
const std::vector<TargetsAffinity>& targets_affinities,
const EquivalenceCandidate& equivalence,
double min_similarity);
// Creates an equivalence, starting with |src| and |dst| as offset hint, and
// extends it both forward and backward, trying to maximise similarity between
// |old_image_index| and |new_image_index|, and returns the result.
// |targets_affinities| is used to evaluate similarity between references.
// |min_similarity| describes the minimum acceptable similarity score and is
// used as threshold to discard bad equivalences.
EquivalenceCandidate VisitEquivalenceSeed(
const ImageIndex& old_image_index,
const ImageIndex& new_image_index,
const std::vector<TargetsAffinity>& targets_affinities,
offset_t src,
offset_t dst,
double min_similarity);
// Container of pruned equivalences used to map offsets from |old_image| to
// offsets in |new_image|. Equivalences are pruned by cropping smaller
// equivalences to avoid overlaps, to make the equivalence map (for covered
// bytes in |old_image| and |new_image|) one-to-one.
class OffsetMapper {
public:
using const_iterator = std::vector<Equivalence>::const_iterator;
// Constructors for various data sources. "Old" and "new" image sizes are
// needed for bounds checks and to handle dangling targets.
// - From a list of |equivalences|, already sorted (by |src_offset|) and
// pruned, useful for tests.
OffsetMapper(std::vector<Equivalence>&& equivalences,
offset_t old_image_size,
offset_t new_image_size);
// - From a generator, useful for Zucchini-apply.
OffsetMapper(EquivalenceSource&& equivalence_source,
offset_t old_image_size,
offset_t new_image_size);
// - From an EquivalenceMap that needs to be processed, useful for
// Zucchini-gen.
OffsetMapper(const EquivalenceMap& equivalence_map,
offset_t old_image_size,
offset_t new_image_size);
~OffsetMapper();
size_t size() const { return equivalences_.size(); }
const_iterator begin() const { return equivalences_.begin(); }
const_iterator end() const { return equivalences_.end(); }
// Returns naive extended forward-projection of "old" |offset| that follows
// |eq|'s delta. |eq| needs not cover |offset|.
// - Averts underflow / overflow by clamping to |[0, new_image_size_)|.
// - However, |offset| is *not* restricted to |[0, old_image_size_)|; the
// caller must to make the check (hence "naive").
offset_t NaiveExtendedForwardProject(const Equivalence& unit,
offset_t offset) const;
// Returns an offset in |new_image| corresponding to |offset| in |old_image|.
// Assumes |equivalences_| to be non-empty. Cases:
// - If |offset| is covered (i.e., in an "old" block), then use the delta of
// the (unique) equivalence unit that covers |offset|.
// - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the
// nearest "old" block, use its delta, and avert underflow / overflow by
// clamping the result to |[0, new_image_size_)|.
// - If |offset| is >= |new_image_size_| (a "fake offset"), then use
// |new_image_size_ - old_image_size_| as the delta.
offset_t ExtendedForwardProject(offset_t offset) const;
// Given sorted |offsets|, applies a projection in-place of all offsets that
// are part of a pruned equivalence from |old_image| to |new_image|. Other
// offsets are removed from |offsets|.
void ForwardProjectAll(std::vector<offset_t>* offsets) const;
// Accessor for testing.
const std::vector<Equivalence> equivalences() const { return equivalences_; }
// Sorts |equivalences| by |src_offset| and removes all source overlaps; so a
// source location that was covered by some Equivalence would become covered
// by exactly one Equivalence. Moreover, for the offset, the equivalence
// corresponds to the largest (pre-pruning) covering Equivalence, and in case
// of a tie, the Equivalence with minimal |src_offset|. |equivalences| may
// change in size since empty Equivalences are removed.
static void PruneEquivalencesAndSortBySource(
std::vector<Equivalence>* equivalences);
private:
// |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new"
// block overlaps). Also, it is sorted by "old" offsets.
std::vector<Equivalence> equivalences_;
const offset_t old_image_size_;
const offset_t new_image_size_;
};
// Container of equivalences between |old_image_index| and |new_image_index|,
// sorted by |Equivalence::dst_offset|, only used during patch generation.
class EquivalenceMap {
public:
using const_iterator = std::vector<EquivalenceCandidate>::const_iterator;
EquivalenceMap();
// Initializes the object with |equivalences|.
explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates);
EquivalenceMap(EquivalenceMap&&);
EquivalenceMap(const EquivalenceMap&) = delete;
~EquivalenceMap();
// Finds relevant equivalences between |old_view| and |new_view|, using
// suffix array |old_sa| computed from |old_view| and using
// |targets_affinities| to evaluate similarity between references. This
// function is not symmetric. Equivalences might overlap in |old_view|, but
// not in |new_view|. It tries to maximize accumulated similarity within each
// equivalence, while maximizing |new_view| coverage. The minimum similarity
// of an equivalence is given by |min_similarity|.
void Build(const std::vector<offset_t>& old_sa,
const EncodedView& old_view,
const EncodedView& new_view,
const std::vector<TargetsAffinity>& targets_affinities,
double min_similarity);
size_t size() const { return candidates_.size(); }
const_iterator begin() const { return candidates_.begin(); }
const_iterator end() const { return candidates_.end(); }
private:
// Discovers equivalence candidates between |old_view| and |new_view| and
// stores them in the object. Note that resulting candidates are not sorted
// and might be overlapping in new image.
void CreateCandidates(const std::vector<offset_t>& old_sa,
const EncodedView& old_view,
const EncodedView& new_view,
const std::vector<TargetsAffinity>& targets_affinities,
double min_similarity);
// Sorts candidates by their offset in new image.
void SortByDestination();
// Visits |candidates_| (sorted by |dst_offset|) and remove all destination
// overlaps. Candidates with low similarity scores are more likely to be
// shrunken. Unfit candidates may be removed.
void Prune(const EncodedView& old_view,
const EncodedView& new_view,
const std::vector<TargetsAffinity>& targets_affinities,
double min_similarity);
std::vector<EquivalenceCandidate> candidates_;
};
} // namespace zucchini
#endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_