| // Copyright 2015 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "courgette/label_manager.h" |
| |
| #include <stddef.h> |
| #include <stdint.h> |
| |
| #include <algorithm> |
| |
| #include "base/logging.h" |
| #include "base/numerics/safe_conversions.h" |
| #include "base/numerics/safe_math.h" |
| #include "courgette/consecutive_range_visitor.h" |
| |
| namespace courgette { |
| |
| LabelManager::LabelManager() {} |
| |
| LabelManager::~LabelManager() {} |
| |
| // static |
| int LabelManager::GetIndexBound(const LabelVector& labels) { |
| int max_index = -1; |
| for (const Label& label : labels) { |
| if (label.index_ != Label::kNoIndex) |
| max_index = std::max(max_index, label.index_); |
| } |
| return max_index + 1; |
| } |
| |
| // static |
| int LabelManager::GetIndexBound(const RVAToLabel& labels_map) { |
| int max_index = -1; |
| for (const auto& rva_and_label : labels_map) { |
| const Label& label = *rva_and_label.second; |
| if (label.index_ != Label::kNoIndex) |
| max_index = std::max(max_index, label.index_); |
| } |
| return max_index + 1; |
| } |
| |
| LabelManagerImpl::RvaVisitor::~RvaVisitor() {} |
| |
| LabelManagerImpl::SimpleIndexAssigner::SimpleIndexAssigner(LabelVector* labels) |
| : labels_(labels) { |
| // Initialize |num_index_| and |available_|. |
| num_index_ = std::max(base::checked_cast<int>(labels_->size()), |
| LabelManager::GetIndexBound(*labels_)); |
| available_.resize(num_index_, true); |
| size_t used = 0; |
| for (const Label& label : *labels_) { |
| if (label.index_ != Label::kNoIndex) { |
| available_.at(label.index_) = false; |
| ++used; |
| } |
| } |
| VLOG(1) << used << " of " << labels_->size() << " labels pre-assigned."; |
| } |
| |
| LabelManagerImpl::SimpleIndexAssigner::~SimpleIndexAssigner() {} |
| |
| void LabelManagerImpl::SimpleIndexAssigner::DoForwardFill() { |
| size_t count = 0; |
| // Inside the loop, if |prev_index| == |kNoIndex| then we try to assign 0. |
| // This allows 0 (if unused) to be assigned in middle of |labels_|. |
| int prev_index = Label::kNoIndex; |
| for (auto p = labels_->begin(); p != labels_->end(); ++p) { |
| if (p->index_ == Label::kNoIndex) { |
| int index = (prev_index == Label::kNoIndex) ? 0 : prev_index + 1; |
| if (index < num_index_ && available_.at(index)) { |
| p->index_ = index; |
| available_.at(index) = false; |
| ++count; |
| } |
| } |
| prev_index = p->index_; |
| } |
| VLOG(1) << " fill forward " << count; |
| } |
| |
| void LabelManagerImpl::SimpleIndexAssigner::DoBackwardFill() { |
| size_t count = 0; |
| // This is asymmetric from DoForwardFill(), to preserve old behavior. |
| // Inside the loop, if |prev_index| == |kNoIndex| then we skip assignment. |
| // But we initilaize |prev_index| = |num_index_|, so if the last element in |
| // |labels_| has no index, then can use |num_index_| - 1 (if unused). We don't |
| // try this assignment elsewhere. |
| int prev_index = num_index_; |
| for (auto p = labels_->rbegin(); p != labels_->rend(); ++p) { |
| if (p->index_ == Label::kNoIndex && prev_index != Label::kNoIndex) { |
| int index = prev_index - 1; |
| if (index >= 0 && available_.at(index)) { |
| p->index_ = index; |
| available_.at(index) = false; |
| ++count; |
| } |
| } |
| prev_index = p->index_; |
| } |
| VLOG(1) << " fill backward " << count; |
| } |
| |
| void LabelManagerImpl::SimpleIndexAssigner::DoInFill() { |
| size_t count = 0; |
| int index = 0; |
| for (Label& label : *labels_) { |
| if (label.index_ == Label::kNoIndex) { |
| while (!available_.at(index)) |
| ++index; |
| label.index_ = index; |
| available_.at(index) = false; |
| ++index; |
| ++count; |
| } |
| } |
| VLOG(1) << " infill " << count; |
| } |
| |
| LabelManagerImpl::LabelManagerImpl() {} |
| |
| LabelManagerImpl::~LabelManagerImpl() {} |
| |
| // We wish to minimize peak memory usage here. Analysis: Let |
| // m = number of (RVA) elements in |rva_visitor|, |
| // n = number of distinct (RVA) elements in |rva_visitor|. |
| // The final storage is n * sizeof(Label) bytes. During computation we uniquify |
| // m RVAs, and count repeats. Taking sizeof(RVA) = 4, an implementation using |
| // std::map or std::unordered_map would consume additionally 32 * n bytes. |
| // Meanwhile, our std::vector implementation consumes additionally 4 * m bytes |
| // For our typical usage (i.e. Chrome) we see m = ~4n, so we use 16 * n bytes of |
| // extra contiguous memory during computation. Assuming memory fragmentation |
| // would not be an issue, this is much better than using std::map. |
| void LabelManagerImpl::Read(RvaVisitor* rva_visitor) { |
| // Write all values in |rva_visitor| to |rvas|. |
| size_t num_rva = rva_visitor->Remaining(); |
| std::vector<RVA> rvas(num_rva); |
| for (size_t i = 0; i < num_rva; ++i, rva_visitor->Next()) |
| rvas[i] = rva_visitor->Get(); |
| |
| // Sort |rvas|, then count the number of distinct values. |
| using CRV = ConsecutiveRangeVisitor<std::vector<RVA>::iterator>; |
| std::sort(rvas.begin(), rvas.end()); |
| DCHECK(rvas.empty() || rvas.back() != kUnassignedRVA); |
| |
| size_t num_distinct_rva = 0; |
| for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance()) |
| ++num_distinct_rva; |
| |
| // Reserve space for |labels_|, populate with sorted RVA and repeats. |
| DCHECK(labels_.empty()); |
| labels_.reserve(num_distinct_rva); |
| for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance()) { |
| labels_.push_back(Label(*it.cur())); |
| base::CheckedNumeric<uint32_t> count = it.repeat(); |
| labels_.back().count_ = count.ValueOrDie(); |
| } |
| } |
| |
| size_t LabelManagerImpl::Size() const { |
| return labels_.size(); |
| } |
| |
| // Uses binary search to find |rva|. |
| Label* LabelManagerImpl::Find(RVA rva) { |
| auto it = std::lower_bound( |
| labels_.begin(), labels_.end(), Label(rva), |
| [](const Label& l1, const Label& l2) { return l1.rva_ < l2.rva_; }); |
| return it == labels_.end() || it->rva_ != rva ? nullptr : &(*it); |
| } |
| |
| void LabelManagerImpl::RemoveUnderusedLabels(int32_t count_threshold) { |
| if (count_threshold <= 0) |
| return; |
| labels_.erase(std::remove_if(labels_.begin(), labels_.end(), |
| [count_threshold](const Label& label) { |
| return label.count_ < count_threshold; |
| }), |
| labels_.end()); |
| // Not shrinking |labels_|, since this may cause reallocation. |
| } |
| |
| void LabelManagerImpl::UnassignIndexes() { |
| for (Label& label : labels_) |
| label.index_ = Label::kNoIndex; |
| } |
| |
| void LabelManagerImpl::DefaultAssignIndexes() { |
| int cur_index = 0; |
| for (Label& label : labels_) { |
| CHECK_EQ(Label::kNoIndex, label.index_); |
| label.index_ = cur_index++; |
| } |
| } |
| |
| void LabelManagerImpl::AssignRemainingIndexes() { |
| // This adds some memory overhead, about 1 bit per Label (more if indexes >= |
| // |labels_.size()| get used). |
| SimpleIndexAssigner assigner(&labels_); |
| assigner.DoForwardFill(); |
| assigner.DoBackwardFill(); |
| assigner.DoInFill(); |
| } |
| |
| void LabelManagerImpl::SetLabels(const LabelVector& labels) { |
| labels_ = labels; |
| } |
| |
| } // namespace courgette |