| // Copyright (c) 2022 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/omnibox/browser/history_fuzzy_provider.h" |
| |
| #include <algorithm> |
| #include <functional> |
| #include <memory> |
| #include <ostream> |
| #include <queue> |
| #include <string> |
| #include <unordered_map> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/check.h" |
| #include "base/memory/raw_ptr.h" |
| #include "base/metrics/histogram_functions.h" |
| #include "base/strings/stringprintf.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "base/trace_event/memory_usage_estimator.h" |
| #include "base/trace_event/trace_event.h" |
| #include "components/history/core/browser/history_database.h" |
| #include "components/history/core/browser/history_db_task.h" |
| #include "components/history/core/browser/history_service.h" |
| #include "components/history/core/browser/url_database.h" |
| #include "components/omnibox/browser/autocomplete_match_classification.h" |
| #include "components/omnibox/browser/autocomplete_match_type.h" |
| #include "components/omnibox/browser/autocomplete_provider_client.h" |
| #include "components/omnibox/browser/autocomplete_result.h" |
| #include "components/omnibox/browser/omnibox_field_trial.h" |
| #include "components/search_engines/omnibox_focus_type.h" |
| #include "components/url_formatter/elide_url.h" |
| #include "url/gurl.h" |
| |
| namespace { |
| |
| // Histogram names for measuring sub-provider match conversion efficacy. |
| // Reminder in case other sub-providers or metrics are added: update |
| // the `Omnibox.FuzzyMatchConversion` entry in histograms.xml. |
| const char kMetricMatchConversionHistoryQuick[] = |
| "Omnibox.FuzzyMatchConversion.HistoryQuick"; |
| const char kMetricMatchConversionBookmark[] = |
| "Omnibox.FuzzyMatchConversion.Bookmark"; |
| |
| // This cap ensures the search trie will not grow without bound. Up to half |
| // the total capacity may be filled at startup from loaded significant URLs. |
| // The enforced limit may be further constrained by |
| // `MaxNumHQPUrlsIndexedAtStartup`. |
| constexpr int kMaxTerminalCount = 1000; |
| |
| // This utility function reduces a URL to the most meaningful and likely part |
| // of the hostname to be matched against, i.e. the domain, the URL's TLD+1. |
| // May return an empty string if the given URL is not a good candidate for |
| // meaningful domain name matching. |
| std::u16string UrlDomainReduction(const GURL& url) { |
| std::u16string url_host; |
| std::u16string url_domain; |
| url_formatter::SplitHost(url, &url_host, &url_domain, nullptr); |
| return url_domain; |
| } |
| |
| // This utility function prepares input text for fuzzy matching, or returns |
| // an empty string in cases unlikely to be worth a fuzzy matching search. |
| // Note, this is intended to be a fast way to improve matching and eliminate |
| // likely-unfruitful searches. It could make use of `SplitHost` as above, or |
| // `url_formatter::FormatUrlForDisplayOmitSchemePathAndTrivialSubdomains`, |
| // which uses `FormatUrlWithAdjustments` under the hood, but all that URL |
| // processing for input text that may not even be a URL seems like overkill, |
| // so this simple direct method is used instead. |
| std::u16string ReduceInputTextForMatching(const std::u16string& input) { |
| constexpr size_t kMaximumFuzzyMatchInputLength = 32; |
| constexpr size_t kPathCharacterCountToStopSearch = 6; |
| constexpr size_t kPostDotCharacterCountHintingSubdomain = 4; |
| |
| // Long inputs are not fuzzy matched; doing so could be costly, and the |
| // length of input itself is a signal that it may not have been typed but |
| // simply pasted or edited in place. |
| // TODO(orinj): Consider tracking trie depth for use as maximum here. |
| if (input.length() > kMaximumFuzzyMatchInputLength) { |
| return std::u16string(); |
| } |
| |
| // Spaces hint that the input may be a search, not a URL. |
| if (input.find(u' ') != std::u16string::npos) { |
| return std::u16string(); |
| } |
| |
| // Inputs containing anything that looks like a scheme are a hint that this |
| // is an existing URL or an edit that's likely to be handled deliberately, |
| // not a messy human input that may need fuzzy matching. |
| if (input.find(u"://") != std::u16string::npos) { |
| return std::u16string(); |
| } |
| |
| std::u16string remaining; |
| // While typing a URL, the user may typo the domain but then continue on to |
| // the path; keeping input up to the path separator keeps the window open |
| // for fuzzy matching the domain as they continue to type, but we don't want |
| // to keep it open forever (doing so could result in potentially sticky false |
| // positives). |
| size_t index = input.find(u'/'); |
| if (index != std::u16string::npos) { |
| if (index + kPathCharacterCountToStopSearch < input.length()) { |
| // User has moved well beyond typing domain and hasn't taken any fuzzy |
| // suggestions provided so far, and they won't get better, so we can |
| // save compute and suggestion results space by stopping the search. |
| return std::u16string(); |
| } |
| remaining = input.substr(0, index); |
| } else { |
| remaining = input; |
| } |
| |
| index = remaining.find(u'.'); |
| if (index != std::u16string::npos && |
| index + kPostDotCharacterCountHintingSubdomain < remaining.length()) { |
| // Keep input with dot if near the end (within range of .com, .org, .edu). |
| // With a dot earlier in the string, the user might be typing a subdomain |
| // and we only have the TLD+1 stored in the trie, so skip the dot and match |
| // against the remaining text. This may be helpful in common cases like |
| // typing an unnecessary "www." before the domain name. |
| remaining = remaining.substr(index + 1); |
| } |
| |
| return remaining; |
| } |
| |
| } // namespace |
| |
| namespace fuzzy { |
| |
| Correction::Correction(const Correction& other) { |
| kind = other.kind; |
| at = other.at; |
| new_char = other.new_char; |
| if (other.next) { |
| next = std::make_unique<Correction>(*other.next.get()); |
| } |
| } |
| |
| Correction::Correction(Correction&&) = default; |
| |
| Correction::Correction(Kind kind, size_t at, char16_t new_char) |
| : kind(kind), at(at), new_char(new_char) {} |
| |
| Correction::Correction(Kind kind, |
| size_t at, |
| char16_t new_char, |
| std::unique_ptr<Correction> next) |
| : kind(kind), at(at), new_char(new_char), next(std::move(next)) {} |
| |
| Correction::~Correction() = default; |
| |
| void Correction::ApplyTo(std::u16string& text) const { |
| switch (kind) { |
| case Kind::DELETE: { |
| text.erase(at, 1); |
| break; |
| } |
| case Kind::INSERT: { |
| text.insert(at, 1, new_char); |
| break; |
| } |
| case Kind::REPLACE: { |
| text[at] = new_char; |
| break; |
| } |
| case Kind::KEEP: |
| default: { |
| NOTREACHED(); |
| break; |
| } |
| } |
| if (next) { |
| next->ApplyTo(text); |
| } |
| } |
| |
| std::unique_ptr<Correction> Correction::GetApplicableCorrection() { |
| if (kind == Kind::KEEP) { |
| // Because this function eliminates KEEP corrections as the chain is built, |
| // it doesn't need to work recursively; a single elimination is sufficient. |
| DCHECK(!next || next->kind != Kind::KEEP); |
| return next ? std::make_unique<Correction>(*next) : nullptr; |
| } else { |
| // TODO(orinj): Consider a shared ownership model or preallocated pool to |
| // eliminate lots of copy allocations. For now this is kept simple with |
| // direct ownership of the full correction chain. |
| return std::make_unique<Correction>(*this); |
| } |
| } |
| |
| // This operator implementation is for debugging. |
| std::ostream& operator<<(std::ostream& os, const Correction& correction) { |
| os << '{'; |
| switch (correction.kind) { |
| case Correction::Kind::KEEP: { |
| os << 'K'; |
| break; |
| } |
| case Correction::Kind::DELETE: { |
| os << 'D'; |
| break; |
| } |
| case Correction::Kind::INSERT: { |
| os << 'I'; |
| break; |
| } |
| case Correction::Kind::REPLACE: { |
| os << 'R'; |
| break; |
| } |
| default: { |
| NOTREACHED(); |
| break; |
| } |
| } |
| os << "," << correction.at << "," << static_cast<char>(correction.new_char) |
| << "}"; |
| return os; |
| } |
| |
| Node::Node() = default; |
| |
| Node::Node(Node&&) = default; |
| |
| Node::~Node() = default; |
| |
| void Node::Insert(const std::u16string& text, size_t text_index) { |
| if (text_index >= text.length()) { |
| relevance_total += 1 - relevance; |
| relevance = 1; |
| return; |
| } |
| std::unique_ptr<Node>& node = next[text[text_index]]; |
| if (!node) { |
| node = std::make_unique<Node>(); |
| } |
| relevance_total -= node->relevance_total; |
| node->Insert(text, text_index + 1); |
| relevance_total += node->relevance_total; |
| } |
| |
| void Node::Delete(const std::u16string& text, size_t text_index) { |
| if (text_index < text.length()) { |
| auto it = next.find(text[text_index]); |
| if (it != next.end()) { |
| Node* const node = it->second.get(); |
| relevance_total -= node->relevance_total; |
| node->Delete(text, text_index + 1); |
| if (node->relevance_total == 0) { |
| next.erase(it); |
| } else { |
| relevance_total += node->relevance_total; |
| } |
| } |
| } else { |
| relevance_total -= relevance; |
| relevance = 0; |
| } |
| } |
| |
| void Node::Clear() { |
| next.clear(); |
| } |
| |
| bool Node::FindCorrections(const std::u16string& text, |
| ToleranceSchedule tolerance_schedule, |
| std::vector<Correction>& corrections) const { |
| DVLOG(1) << "FindCorrections(" << text << ", " << tolerance_schedule.limit |
| << ")"; |
| DCHECK(corrections.empty()); |
| |
| if (text.length() == 0) { |
| return true; |
| } |
| |
| // A utility class to track search progression. |
| struct Step { |
| // Walks through trie. |
| raw_ptr<const Node> node; |
| |
| // Edit distance. |
| int distance; |
| |
| // Advances through input text. This effectively tells how much of the |
| // input has been consumed so far, regardless of output text length. |
| size_t index; |
| |
| // Length of corrected text. This tells how long the output string will |
| // be, regardless of input text length. It is independent of `index` |
| // because corrections are not only 1:1 replacements but may involve |
| // insertions or deletions as well. |
| int length; |
| |
| // Backtracking data to enable text correction (from end of string back |
| // to beginning, i.e. correction chains are applied in reverse). |
| // TODO(orinj): This should be optimized in final algorithm; stop copying. |
| Correction correction; |
| |
| // std::priority_queue keeps the greatest element on top, so we want this |
| // operator implementation to make bad steps less than good steps. |
| // Prioritize minimum distance, with index and length to break ties. |
| // The first found solutions are best, and fastest in common cases |
| // near input on trie. |
| bool operator<(const Step& rhs) const { |
| if (distance > rhs.distance) { |
| return true; |
| } else if (distance == rhs.distance) { |
| if (index < rhs.index) { |
| return true; |
| } else if (index == rhs.index) { |
| return length < rhs.length; |
| } |
| } |
| return false; |
| } |
| }; |
| |
| std::priority_queue<Step> pq; |
| pq.push({this, 0, 0, 0, {Correction::Kind::KEEP, 0, '_'}}); |
| |
| Step best{ |
| nullptr, INT_MAX, SIZE_MAX, INT_MAX, {Correction::Kind::KEEP, 0, '_'}}; |
| int i = 0; |
| // Find and return all equally-distant results as soon as distance increases |
| // beyond that of first found results. Length is also considered to |
| // avoid producing shorter substring texts. |
| while (!pq.empty() && pq.top().distance <= best.distance) { |
| i++; |
| Step step = pq.top(); |
| pq.pop(); |
| DVLOG(1) << i << "(" << step.distance << "," << step.index << "," |
| << step.length << "," << step.correction << ")"; |
| // TODO(orinj): Enforce a tolerance schedule with index versus distance; |
| // this would allow more errors for longer inputs and prevents searching |
| // through long corrections near start of input. |
| // Strictly greater should not be possible for this comparison. |
| if (step.index >= text.length()) { |
| if (step.distance == 0) { |
| // Ideal common case, full input on trie with no correction required. |
| // Because search is directed by priority_queue, we get here before |
| // generating any corrections (straight line to goal is shortest path). |
| DCHECK(corrections.empty()); |
| return true; |
| } |
| // Check `length` to keep longer results. Without this, we could end up |
| // with shorter substring corrections (e.g. both "was" and "wash"). |
| // It may not be necessary to do this if priority_queue keeps results |
| // optimal or returns a first best result immediately. |
| DCHECK(best.distance == INT_MAX || step.distance == best.distance); |
| if (step.distance < best.distance || step.length > best.length) { |
| DVLOG(1) << "new best by " |
| << (step.distance < best.distance ? "distance" : "length"); |
| best = std::move(step); |
| corrections.clear(); |
| // Dereference is safe because nonzero distance implies presence of |
| // nontrivial correction. |
| corrections.emplace_back(*best.correction.GetApplicableCorrection()); |
| } else { |
| // Equal distance. |
| // Strictly greater should not be possible for this comparison. |
| if (step.length >= best.length) { |
| // Dereference is safe because this is another equally |
| // distant correction, necessarily discovered after the first. |
| corrections.emplace_back(*step.correction.GetApplicableCorrection()); |
| } |
| #if DCHECK_ALWAYS_ON |
| std::u16string corrected = text; |
| step.correction.GetApplicableCorrection()->ApplyTo(corrected); |
| DCHECK_EQ(corrected.length(), static_cast<size_t>(step.length)) |
| << corrected; |
| #endif |
| } |
| continue; |
| } |
| int tolerance = tolerance_schedule.ToleranceAt(step.index); |
| if (step.distance < tolerance) { |
| // Delete |
| pq.push({step.node, |
| step.distance + 1, |
| step.index + 1, |
| step.length, |
| {Correction::Kind::DELETE, step.index, '_', |
| step.correction.GetApplicableCorrection()}}); |
| } |
| for (const auto& entry : step.node->next) { |
| if (entry.first == text[step.index]) { |
| // Keep |
| pq.push({entry.second.get(), |
| step.distance, |
| step.index + 1, |
| step.length + 1, |
| {Correction::Kind::KEEP, step.index, '_', |
| step.correction.GetApplicableCorrection()}}); |
| } else if (step.distance < tolerance) { |
| // Insert |
| pq.push({entry.second.get(), |
| step.distance + 1, |
| step.index, |
| step.length + 1, |
| {Correction::Kind::INSERT, step.index, entry.first, |
| step.correction.GetApplicableCorrection()}}); |
| // Replace. Note, we do not replace at the same position as a previous |
| // insertion because doing so could produce unnecessary duplicates. |
| if (step.correction.kind != Correction::Kind::INSERT || |
| step.correction.at != step.index) { |
| pq.push({entry.second.get(), |
| step.distance + 1, |
| step.index + 1, |
| step.length + 1, |
| {Correction::Kind::REPLACE, step.index, entry.first, |
| step.correction.GetApplicableCorrection()}}); |
| } |
| } |
| } |
| } |
| if (!pq.empty()) { |
| DVLOG(1) << "quit early on step with distance " << pq.top().distance; |
| } |
| return false; |
| } |
| |
| size_t Node::EstimateMemoryUsage() const { |
| size_t res = 0; |
| res += base::trace_event::EstimateMemoryUsage(next); |
| return res; |
| } |
| |
| int Node::TerminalCount() const { |
| // This works as long as `relevance` values mark terminals with 1 and |
| // non-terminals with 0; see `Insert()`. |
| return relevance_total; |
| } |
| |
| // This task class loads URLs considered significant according to |
| // `HistoryDatabase::InitURLEnumeratorForSignificant` but there's nothing |
| // special about that implementation; we may do something different for |
| // fuzzy matching. The goal in general is to load and keep a reasonably sized |
| // set of likely relevant host names for fast fuzzy correction. |
| class LoadSignificantUrls : public history::HistoryDBTask { |
| public: |
| using Callback = base::OnceCallback<void(Node)>; |
| |
| LoadSignificantUrls(base::WaitableEvent* event, Callback callback) |
| : wait_event_(event), callback_(std::move(callback)) { |
| DVLOG(1) << "LoadSignificantUrls ctor thread " |
| << base::PlatformThread::CurrentId(); |
| } |
| ~LoadSignificantUrls() override = default; |
| |
| bool RunOnDBThread(history::HistoryBackend* backend, |
| history::HistoryDatabase* db) override { |
| DVLOG(1) << "LoadSignificantUrls run on db thread " |
| << base::PlatformThread::CurrentId() << "; db: " << db; |
| history::URLDatabase::URLEnumerator enumerator; |
| if (db && db->InitURLEnumeratorForSignificant(&enumerator)) { |
| DVLOG(1) << "Got InMemoryDatabase"; |
| history::URLRow row; |
| // The `MaxNumHQPUrlsIndexedAtStartup` dependency here is to ensure |
| // that we keep a lower cap for mobile; it's much higher on desktop. |
| // Note the divide, which ensures at least half the capacity will be kept |
| // for later visited domains. `GetNextUrl` takes the most significant |
| // URLs from the database (enumerator order) and duplicates won't count. |
| const int max_terminal_count = |
| std::min(OmniboxFieldTrial::MaxNumHQPUrlsIndexedAtStartup(), |
| kMaxTerminalCount) / |
| 2; |
| while (enumerator.GetNextURL(&row) && |
| node_.TerminalCount() < max_terminal_count) { |
| DVLOG(1) << "url #" << row.id() << ": " << row.url().host(); |
| node_.Insert(UrlDomainReduction(row.url()), 0); |
| } |
| } else { |
| DVLOG(1) << "No significant InMemoryDatabase"; |
| } |
| return true; |
| } |
| |
| void DoneRunOnMainThread() override { |
| DVLOG(1) << "Done thread " << base::PlatformThread::CurrentId(); |
| std::move(callback_).Run(std::move(node_)); |
| wait_event_->Signal(); |
| } |
| |
| private: |
| Node node_; |
| raw_ptr<base::WaitableEvent> wait_event_; |
| Callback callback_; |
| }; |
| |
| } // namespace fuzzy |
| |
| HistoryFuzzyProvider::HistoryFuzzyProvider( |
| AutocompleteProviderClient* client, |
| HistoryQuickProvider* history_quick_provider, |
| BookmarkProvider* bookmark_provider) |
| : HistoryProvider(AutocompleteProvider::TYPE_HISTORY_FUZZY, client), |
| history_quick_provider_(history_quick_provider), |
| bookmark_provider_(bookmark_provider) { |
| history_service_observation_.Observe(client->GetHistoryService()); |
| client->GetHistoryService()->ScheduleDBTask( |
| FROM_HERE, |
| std::make_unique<fuzzy::LoadSignificantUrls>( |
| &urls_loaded_event_, |
| base::BindOnce(&HistoryFuzzyProvider::OnUrlsLoaded, |
| weak_ptr_factory_.GetWeakPtr())), |
| &task_tracker_); |
| } |
| |
| void HistoryFuzzyProvider::Start(const AutocompleteInput& input, |
| bool minimal_changes) { |
| TRACE_EVENT0("omnibox", "HistoryFuzzyProvider::Start"); |
| matches_.clear(); |
| if (input.focus_type() != OmniboxFocusType::DEFAULT || |
| input.type() == metrics::OmniboxInputType::EMPTY) { |
| return; |
| } |
| |
| if (!urls_loaded_event_.IsSignaled()) { |
| return; |
| } |
| |
| // When there are no sub-providers, bypass fuzzy search completely. |
| if (history_quick_provider_ == nullptr && bookmark_provider_ == nullptr) { |
| return; |
| } |
| |
| autocomplete_input_ = input; |
| |
| // Fuzzy matching intends to correct quick typos, and because it may involve |
| // a compute intensive search, some conditions are checked to bypass this |
| // provider early. When the cursor is moved from the end of input string, |
| // user may have slowed down to edit manually. |
| if (autocomplete_input_.cursor_position() == |
| autocomplete_input_.text().length()) { |
| DoAutocomplete(); |
| for (AutocompleteMatch& match : matches_) { |
| match.provider = this; |
| } |
| } |
| } |
| |
| size_t HistoryFuzzyProvider::EstimateMemoryUsage() const { |
| size_t res = HistoryProvider::EstimateMemoryUsage(); |
| res += base::trace_event::EstimateMemoryUsage(autocomplete_input_); |
| res += base::trace_event::EstimateMemoryUsage(root_); |
| return res; |
| } |
| |
| HistoryFuzzyProvider::~HistoryFuzzyProvider() = default; |
| |
| void HistoryFuzzyProvider::DoAutocomplete() { |
| // TODO(orinj): This schedule may want some measurement and tinkering. |
| constexpr fuzzy::ToleranceSchedule kToleranceSchedule = { |
| .start_index = 1, |
| .step_length = 4, |
| .limit = 3, |
| }; |
| |
| const std::u16string& text = |
| ReduceInputTextForMatching(autocomplete_input_.text()); |
| if (text.length() == 0) { |
| DVLOG(1) << "Skipping fuzzy for input '" << autocomplete_input_.text() |
| << "'"; |
| return; |
| } |
| std::vector<fuzzy::Correction> corrections; |
| DVLOG(1) << "FindCorrections: <" << text << "> ---> ?{"; |
| if (root_.FindCorrections(text, kToleranceSchedule, corrections)) { |
| DVLOG(1) << "Trie contains input; no fuzzy results needed"; |
| } |
| if (!corrections.empty()) { |
| int count_history_quick = 0; |
| int count_bookmark = 0; |
| for (const auto& correction : corrections) { |
| std::u16string fixed = text; |
| correction.ApplyTo(fixed); |
| DVLOG(1) << ": " << fixed; |
| |
| // Note the `cursor_position` could be changed by insert or delete |
| // corrections, but this is easy to adapt since we only fuzzy |
| // match when cursor is at end of input; just move to new end. |
| DCHECK_EQ(autocomplete_input_.cursor_position(), |
| autocomplete_input_.text().length()); |
| AutocompleteInput corrected_input( |
| fixed, fixed.length(), |
| autocomplete_input_.current_page_classification(), |
| client()->GetSchemeClassifier()); |
| |
| if (history_quick_provider_) { |
| history_quick_provider_->Start(corrected_input, false); |
| DCHECK(history_quick_provider_->done()); |
| count_history_quick += |
| AddConvertedMatches(history_quick_provider_->matches()); |
| } |
| if (bookmark_provider_) { |
| bookmark_provider_->Start(corrected_input, false); |
| DCHECK(bookmark_provider_->done()); |
| count_bookmark += AddConvertedMatches(bookmark_provider_->matches()); |
| } |
| } |
| if (matches_.size() > provider_max_matches_) { |
| // When too many matches are generated, take only the most relevant |
| // matches and correct the counts for accurate metrics. |
| std::partial_sort(matches_.begin(), |
| matches_.begin() + provider_max_matches_, |
| matches_.end(), AutocompleteMatch::MoreRelevant); |
| for (size_t i = provider_max_matches_; i < matches_.size(); i++) { |
| DCHECK(matches_[i].provider != nullptr && |
| matches_[i].provider == history_quick_provider_ || |
| matches_[i].provider == bookmark_provider_) |
| << matches_[i].provider->GetName(); |
| if (matches_[i].provider == history_quick_provider_) { |
| count_history_quick--; |
| } else { |
| count_bookmark--; |
| } |
| } |
| matches_.resize(provider_max_matches_); |
| } |
| RecordMatchConversion(kMetricMatchConversionHistoryQuick, |
| count_history_quick); |
| RecordMatchConversion(kMetricMatchConversionBookmark, count_bookmark); |
| DVLOG(1) << "}?"; |
| } |
| } |
| |
| int HistoryFuzzyProvider::AddConvertedMatches(const ACMatches& matches) { |
| if (matches.empty()) { |
| return 0; |
| } |
| |
| // Take only the most relevant match, to give the best chance of keeping |
| // the penalized fuzzy match while reducing risk of possible noise. |
| // Note that min_element is used instead of max_element because |
| // `AutocompleteMatch::MoreRelevant` reverses standard sort order such that |
| // matches with greater relevance are considered less than matches with lesser |
| // relevance. For performance reasons, `CompareWithDemoteByType` is not used, |
| // so ranking of the final result set will be more nuanced than ranking here. |
| ACMatches::const_iterator it = std::min_element( |
| matches.begin(), matches.end(), AutocompleteMatch::MoreRelevant); |
| DCHECK(it != matches.end()); |
| DVLOG(1) << "Converted match: " << it->contents; |
| matches_.push_back(*it); |
| |
| // Update match in place. Note, `match.provider` will be reassigned after |
| // `DoAutocomplete` because source sub-provider must be kept for metrics. |
| AutocompleteMatch& match = matches_.back(); |
| match.inline_autocompletion.clear(); |
| match.allowed_to_be_default_match = false; |
| |
| // Apply relevance penalty; all corrections are equal and we only apply this |
| // to the most relevant result, so edit distance isn't needed. |
| // Relevance range are nuanced enough that this should be kept simple. |
| // Using 9/10 reasonably took a 1334 relevance match down to 1200, |
| // but was harmful to HQP suggestions: as soon as a '.' was |
| // appended, a bunch of ~800 navsuggest results overtook a better |
| // HQP result that was bumped down to ~770. Using 95/100 lets this |
| // result compete in the navsuggest range. |
| match.relevance = match.relevance * 95 / 100; |
| match.contents_class.clear(); |
| match.contents_class.push_back( |
| {0, AutocompleteMatch::ACMatchClassification::DIM}); |
| |
| return 1; |
| } |
| |
| void HistoryFuzzyProvider::OnUrlsLoaded(fuzzy::Node node) { |
| root_ = std::move(node); |
| } |
| |
| void HistoryFuzzyProvider::OnURLVisited( |
| history::HistoryService* history_service, |
| ui::PageTransition transition, |
| const history::URLRow& row, |
| base::Time visit_time) { |
| DVLOG(1) << "URL Visit: " << row.url(); |
| if (root_.TerminalCount() < |
| std::min(OmniboxFieldTrial::MaxNumHQPUrlsIndexedAtStartup(), |
| kMaxTerminalCount)) { |
| root_.Insert(UrlDomainReduction(row.url()), 0); |
| } |
| } |
| |
| void HistoryFuzzyProvider::OnURLsDeleted( |
| history::HistoryService* history_service, |
| const history::DeletionInfo& deletion_info) { |
| // Note, this implementation is conservative in terms of user privacy; it |
| // deletes hosts from the trie if any URL with the given host is deleted. |
| if (deletion_info.IsAllHistory()) { |
| root_.Clear(); |
| } else { |
| for (const history::URLRow& row : deletion_info.deleted_rows()) { |
| root_.Delete(UrlDomainReduction(row.url()), 0); |
| } |
| } |
| } |
| |
| void HistoryFuzzyProvider::RecordMatchConversion(const char* name, int count) { |
| base::UmaHistogramExactLinear( |
| name, count, AutocompleteResult::kMaxAutocompletePositionValue); |
| } |