| // Copyright 2012 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/omnibox/browser/shortcuts_backend.h" |
| |
| #include <stddef.h> |
| |
| #include <map> |
| #include <memory> |
| #include <set> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/functional/bind.h" |
| #include "base/i18n/break_iterator.h" |
| #include "base/i18n/case_conversion.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/ranges/algorithm.h" |
| #include "base/strings/strcat.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "base/task/single_thread_task_runner.h" |
| #include "base/task/thread_pool.h" |
| #include "base/uuid.h" |
| #include "components/omnibox/browser/autocomplete_input.h" |
| #include "components/omnibox/browser/autocomplete_match.h" |
| #include "components/omnibox/browser/autocomplete_match_type.h" |
| #include "components/omnibox/browser/autocomplete_result.h" |
| #include "components/omnibox/browser/base_search_provider.h" |
| #include "components/omnibox/browser/in_memory_url_index_types.h" |
| #include "components/omnibox/browser/omnibox_field_trial.h" |
| #include "components/omnibox/browser/shortcuts_database.h" |
| #include "components/omnibox/browser/tailored_word_break_iterator.h" |
| |
| namespace { |
| |
| // Takes Match classification vector and removes all matched positions, |
| // compacting repetitions if necessary. |
| std::string StripMatchMarkers(const ACMatchClassifications& matches) { |
| ACMatchClassifications unmatched; |
| for (const auto& match : matches) { |
| AutocompleteMatch::AddLastClassificationIfNecessary( |
| &unmatched, match.offset, match.style & ~ACMatchClassification::MATCH); |
| } |
| return AutocompleteMatch::ClassificationsToString(unmatched); |
| } |
| |
| // Normally shortcuts have the same match type as the original match they were |
| // created from, but for certain match types, we should modify the shortcut's |
| // type slightly to reflect that the origin of the shortcut is historical. |
| AutocompleteMatch::Type GetTypeForShortcut(AutocompleteMatch::Type type) { |
| switch (type) { |
| case AutocompleteMatchType::URL_WHAT_YOU_TYPED: |
| case AutocompleteMatchType::NAVSUGGEST: |
| case AutocompleteMatchType::NAVSUGGEST_PERSONALIZED: |
| return AutocompleteMatchType::HISTORY_URL; |
| |
| case AutocompleteMatchType::SEARCH_OTHER_ENGINE: |
| return type; |
| |
| default: |
| return AutocompleteMatch::IsSearchType(type) |
| ? AutocompleteMatchType::SEARCH_HISTORY |
| : type; |
| } |
| } |
| |
| // Expand the last word in `text` to a full word in `match_text`. E.g., if |
| // `text` is 'Cha Aznav' and the `match_text` is 'Charles Aznavour', will return |
| // 'Cha Aznavour'. |
| std::u16string ExpandToFullWord(const std::u16string& text, |
| const std::u16string& match_text) { |
| DCHECK(!text.empty()); |
| |
| const auto match_words = String16VectorFromString16(match_text, nullptr); |
| |
| // Trim the `text` to: |
| // 1) Avoid expanding, e.g., the `text` 'Cha Aznav ' to 'Cha Aznav ur'. |
| // 2) Avoid truncating the shortcut e.g., 'Cha Aznavour' to 'Cha ' for the |
| // `text` 'C' when `AddOrUpdateShortcut()` appends 3 chars to `text`. |
| // 3) Allow expanding, e.g., the `text` 'Cha ' to 'Charles'. |
| // 4) Even when not expanding, autocompleting trailing whitespace looks weird. |
| const auto trimmed_text = std::u16string( |
| base::TrimWhitespace(text, base::TrimPositions::TRIM_TRAILING)); |
| |
| // Use the lower cased text for string insensitive comparisons. Use the |
| // original case to construct the returned expanded text. |
| const auto trimmed_lower_text = base::i18n::ToLower(trimmed_text); |
| |
| // There may be multiple matching match words `text` can be expanded to. E.g. |
| // with `text` 'x' and `match_text` 'x1 x2', 'x' matches both 'x1' and 'x2'. |
| // 1) If `text` is a prefix of `match_text` (after trimming `text` whitespace |
| // and ignoring case), then pick the next match word. |
| // - E.g., 'x1 x' will expand to 'x1 x2', not 'x1 x1'. |
| // 2) Otherwise, pick the 1st match word at least 3 chars long. |
| // - Prefer the 1st match word because the alternatives (e.g., the longest, |
| // shortest, or match closest to the previous word) all have undesirable |
| // edge cases. E.g., if using the longest match, the `text` 'singer C', |
| // with match description 'Singer Charles Aznavour Performs Les |
| // Comediens', would expand to 'singer Comédiens'. |
| // - Prefer words at least 3 chars long to avoid expanding to 'a', 'at', |
| // 'to', etc when a more likely candidate exists. |
| // - E.g., 'x1 x x' will expand to 'x1 x x1', not 'x1 x x2'. |
| // 3) Otherwise, resort to the 1st match word of any length. |
| // - E.g, With `match_text` 'a ab xy xyz mn' |
| // 'x' will expand to 'xyz', not 'xy'. |
| // 'a' will expand to 'a', not 'ab'. |
| // 'm' will expand to 'mn'. |
| |
| // This handles case (1) from the above comment. |
| if (base::StartsWith(base::i18n::ToLower(match_text), trimmed_lower_text, |
| base::CompareCase::SENSITIVE)) { |
| // Cut off the common prefix. |
| const auto cut_match_text = match_text.substr(trimmed_lower_text.length()); |
| // Find the 1st word of the cut `match_text`. |
| TailoredWordBreakIterator iter(cut_match_text, |
| base::i18n::BreakIterator::BREAK_WORD); |
| // Append that word to the text. |
| if (iter.Init() && iter.Advance() && iter.IsWord()) |
| return base::StrCat({trimmed_text, iter.GetString()}); |
| } |
| |
| // Find the last word in `text` to expand. |
| WordStarts text_word_starts; |
| const auto text_words = |
| String16VectorFromString16(trimmed_lower_text, &text_word_starts); |
| // `String16VectorFromString16()` only considers the 1st 200 |
| // (`kMaxSignificantChars`) chars for word starts while it considers the full |
| // text for words. |
| DCHECK_LE(text_word_starts.size(), text_words.size()); |
| // Even though `text` won't be empty, it may contain no words if it consists |
| // of only symbols and whitespace. Additionally, even if it does contain |
| // words, if it ends with symbols, the last word shouldn't be expanded to |
| // avoid expanding, e.g., the text 'Cha*' to 'Cha*rles'. |
| if (text_word_starts.empty() || |
| text_word_starts.back() + text_words.back().length() != |
| trimmed_lower_text.length()) { |
| return trimmed_text; |
| } |
| const auto& text_last_word = text_words.back(); |
| |
| // This handles cases (2) and (3) from the above comment. |
| std::u16string best_word; |
| // Iterate up to 100 `description_words` for performance. |
| for (size_t i = 0; |
| i < match_words.size() && i < 100 && best_word.length() < 3u; ++i) { |
| if (match_words[i].length() < 3u && !best_word.empty()) |
| continue; |
| if (!base::StartsWith(base::i18n::ToLower(match_words[i]), text_last_word, |
| base::CompareCase::SENSITIVE)) |
| continue; |
| best_word = match_words[i]; |
| } |
| |
| // Add on the missing letters of `text_last_word`, rather than replace it with |
| // `best_word` to preserve capitalization. |
| return best_word.empty() |
| ? trimmed_text |
| : base::StrCat( |
| {trimmed_text, best_word.substr(text_last_word.length())}); |
| } |
| |
| } // namespace |
| |
| // ShortcutsBackend ----------------------------------------------------------- |
| |
| // static |
| const std::u16string& ShortcutsBackend::GetDescription( |
| const AutocompleteMatch& match) { |
| return match.swap_contents_and_description || |
| match.description_for_shortcuts.empty() |
| ? match.description |
| : match.description_for_shortcuts; |
| } |
| |
| // static |
| const std::u16string& ShortcutsBackend::GetSwappedDescription( |
| const AutocompleteMatch& match) { |
| return match.swap_contents_and_description ? GetContents(match) |
| : GetDescription(match); |
| } |
| |
| // static |
| const ACMatchClassifications& ShortcutsBackend::GetDescriptionClass( |
| const AutocompleteMatch& match) { |
| return match.swap_contents_and_description || |
| match.description_class_for_shortcuts.empty() |
| ? match.description_class |
| : match.description_class_for_shortcuts; |
| } |
| |
| // static |
| const std::u16string& ShortcutsBackend::GetContents( |
| const AutocompleteMatch& match) { |
| return !match.swap_contents_and_description || |
| match.description_for_shortcuts.empty() |
| ? match.contents |
| : match.description_for_shortcuts; |
| } |
| |
| // static |
| const std::u16string& ShortcutsBackend::GetSwappedContents( |
| const AutocompleteMatch& match) { |
| return match.swap_contents_and_description ? match.description |
| : match.contents; |
| } |
| |
| // static |
| const ACMatchClassifications& ShortcutsBackend::GetContentsClass( |
| const AutocompleteMatch& match) { |
| return !match.swap_contents_and_description || |
| match.description_class_for_shortcuts.empty() |
| ? match.contents_class |
| : match.description_class_for_shortcuts; |
| } |
| |
| ShortcutsBackend::ShortcutsBackend( |
| TemplateURLService* template_url_service, |
| std::unique_ptr<SearchTermsData> search_terms_data, |
| history::HistoryService* history_service, |
| base::FilePath database_path, |
| bool suppress_db) |
| : template_url_service_(template_url_service), |
| search_terms_data_(std::move(search_terms_data)), |
| current_state_(NOT_INITIALIZED), |
| main_runner_(base::SingleThreadTaskRunner::GetCurrentDefault()), |
| db_runner_(base::ThreadPool::CreateSequencedTaskRunner( |
| {base::MayBlock(), base::TaskPriority::BEST_EFFORT, |
| base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN})), |
| no_db_access_(suppress_db) { |
| if (!suppress_db) |
| db_ = new ShortcutsDatabase(database_path); |
| if (history_service) |
| history_service_observation_.Observe(history_service); |
| } |
| |
| bool ShortcutsBackend::Init() { |
| if (current_state_ != NOT_INITIALIZED) |
| return false; |
| |
| if (no_db_access_) { |
| current_state_ = INITIALIZED; |
| return true; |
| } |
| |
| current_state_ = INITIALIZING; |
| return db_runner_->PostTask( |
| FROM_HERE, base::BindOnce(&ShortcutsBackend::InitInternal, this)); |
| } |
| |
| bool ShortcutsBackend::DeleteShortcutsWithURL(const GURL& shortcut_url) { |
| return initialized() && DeleteShortcutsWithURL(shortcut_url, true); |
| } |
| |
| bool ShortcutsBackend::DeleteShortcutsBeginningWithURL( |
| const GURL& shortcut_url) { |
| return initialized() && DeleteShortcutsWithURL(shortcut_url, false); |
| } |
| |
| void ShortcutsBackend::AddObserver(ShortcutsBackendObserver* obs) { |
| observer_list_.AddObserver(obs); |
| } |
| |
| void ShortcutsBackend::RemoveObserver(ShortcutsBackendObserver* obs) { |
| observer_list_.RemoveObserver(obs); |
| } |
| |
| void ShortcutsBackend::AddOrUpdateShortcut(const std::u16string& text, |
| const AutocompleteMatch& match) { |
| #if DCHECK_IS_ON() |
| match.Validate(); |
| #endif // DCHECK_IS_ON() |
| |
| // Trim `text` since `ExpandToFullWord()` trims the shortcut text; otherwise, |
| // inputs with trailing whitespace wouldn't match a shortcut even if the user |
| // previously used the input with a trailing whitespace. |
| const auto text_trimmed = |
| base::TrimWhitespace(text, base::TrimPositions::TRIM_TRAILING); |
| |
| // `text` may be empty for pedal and zero suggest navigations. `text_trimmed` |
| // can additionally be empty for whitespace-only inputs. It's unlikely users |
| // will have a predictable navigation with such inputs, so early exit. |
| // Besides, `ShortcutsProvider::Start()` also early exits on empty inputs, so |
| // there's no reason to add empty-text shortcuts if they won't be used. |
| if (text_trimmed.empty()) |
| return; |
| |
| const std::u16string text_trimmed_lowercase( |
| base::i18n::ToLower(text_trimmed)); |
| const base::Time now(base::Time::Now()); |
| |
| // Look for an existing shortcut to `match` prefixed by `text`. If there is |
| // one, it'll be updated. This avoids creating duplicating equivalent |
| // shortcuts (e.g. 'g', 'go', & 'goo') with distributed `number_of_hits`s and |
| // outdated `last_access_time`s. There could be multiple relevant shortcuts; |
| // e.g., the `text` 'wi' could match both shortcuts 'wiki' and 'wild' to |
| // 'wiki.org/wild_west'. We only update the 1st shortcut; this is slightly |
| // arbitrary but seems to be fine. Deduping these shortcuts would stop the |
| // input 'wil' from finding the 2nd shortcut. |
| for (ShortcutMap::const_iterator it( |
| shortcuts_map_.lower_bound(text_trimmed_lowercase)); |
| it != shortcuts_map_.end() && |
| base::StartsWith(it->first, text_trimmed_lowercase, |
| base::CompareCase::SENSITIVE); |
| ++it) { |
| if (match.destination_url == it->second.match_core.destination_url) { |
| // When a user navigates to a shortcut after typing a prefix of the |
| // shortcut, the shortcut text is replaced with the shorter user input, |
| // plus an additional 3 chars to avoid unstable shortcuts. E.g. if the |
| // user creates a shortcut with text 'google.com', then navigates |
| // typing 'go', the shortcut text should be updated to 'googl'. |
| const auto text_and_3_chars = base::StrCat( |
| {text_trimmed, it->second.text.substr(text_trimmed.length(), 3)}); |
| const auto expanded_text = |
| ExpandToFullWord(text_and_3_chars, it->second.text); |
| UpdateShortcut(ShortcutsDatabase::Shortcut( |
| it->second.id, expanded_text, |
| MatchToMatchCore(match, template_url_service_, |
| search_terms_data_.get()), |
| now, it->second.number_of_hits + 1)); |
| return; |
| } |
| } |
| |
| // If no shortcuts to `match` prefixed by `text` were found, create one. |
| |
| // Try to expand the input to a full word so that inputs like 'Aram Kha' later |
| // autocomplete 'Aram Khachaturian' instead of the incomplete input. Prefer |
| // `contents` as the `description` & URL are usually less meaningful (e.g. |
| // 'docs.google.com/d/3SyB0Y83dG_WuxX' or 'Google Search'). Except when |
| // `swap_contents_and_description` is true, which means the description |
| // contains the title or meaningful text. Also consider the URL host, which |
| // is usually also recognizable and helpful when there are whitespace or other |
| // discrepancies between the title and host (e.g. 'Stack Overflow' and |
| // 'stackoverflow.com'). |
| const auto expanded_text = ExpandToFullWord( |
| text, GetSwappedContents(match) + u" " + |
| base::UTF8ToUTF16(match.destination_url.host())); |
| AddShortcut(ShortcutsDatabase::Shortcut( |
| base::Uuid::GenerateRandomV4().AsLowercaseString(), expanded_text, |
| MatchToMatchCore(match, template_url_service_, search_terms_data_.get()), |
| now, 1)); |
| } |
| |
| ShortcutsBackend::~ShortcutsBackend() { |
| db_runner_->ReleaseSoon(FROM_HERE, std::move(db_)); |
| } |
| |
| // static |
| ShortcutsDatabase::Shortcut::MatchCore ShortcutsBackend::MatchToMatchCore( |
| const AutocompleteMatch& match, |
| TemplateURLService* template_url_service, |
| SearchTermsData* search_terms_data) { |
| const AutocompleteMatch::Type match_type = GetTypeForShortcut(match.type); |
| |
| const AutocompleteMatch* normalized_match = &match; |
| AutocompleteMatch temp; |
| |
| if (AutocompleteMatch::IsSpecializedSearchType(match.type)) { |
| DCHECK(match.search_terms_args); |
| temp = BaseSearchProvider::CreateShortcutSearchSuggestion( |
| match.search_terms_args->search_terms, match_type, |
| ui::PageTransitionCoreTypeIs(match.transition, |
| ui::PAGE_TRANSITION_KEYWORD), |
| match.GetTemplateURL(template_url_service, false), *search_terms_data); |
| normalized_match = &temp; |
| } |
| |
| return ShortcutsDatabase::Shortcut::MatchCore( |
| normalized_match->fill_into_edit, normalized_match->destination_url, |
| normalized_match->document_type, GetContents(*normalized_match), |
| StripMatchMarkers(GetContentsClass(*normalized_match)), |
| GetDescription(*normalized_match), |
| StripMatchMarkers(GetDescriptionClass(*normalized_match)), |
| normalized_match->transition, match_type, normalized_match->keyword); |
| } |
| |
| void ShortcutsBackend::ShutdownOnUIThread() { |
| history_service_observation_.Reset(); |
| } |
| |
| void ShortcutsBackend::OnURLsDeleted( |
| history::HistoryService* history_service, |
| const history::DeletionInfo& deletion_info) { |
| if (!initialized()) |
| return; |
| |
| if (deletion_info.IsAllHistory()) { |
| DeleteAllShortcuts(); |
| return; |
| } |
| |
| ShortcutsDatabase::ShortcutIDs shortcut_ids; |
| for (const auto& guid_pair : guid_map_) { |
| if (base::ranges::any_of( |
| deletion_info.deleted_rows(), |
| history::URLRow::URLRowHasURL( |
| guid_pair.second->second.match_core.destination_url))) { |
| shortcut_ids.push_back(guid_pair.first); |
| } |
| } |
| DeleteShortcutsWithIDs(shortcut_ids); |
| } |
| |
| void ShortcutsBackend::InitInternal() { |
| DCHECK(current_state_ == INITIALIZING); |
| db_->Init(); |
| ShortcutsDatabase::GuidToShortcutMap shortcuts; |
| db_->LoadShortcuts(&shortcuts); |
| temp_shortcuts_map_ = std::make_unique<ShortcutMap>(); |
| temp_guid_map_ = std::make_unique<GuidMap>(); |
| for (ShortcutsDatabase::GuidToShortcutMap::const_iterator it( |
| shortcuts.begin()); |
| it != shortcuts.end(); ++it) { |
| (*temp_guid_map_)[it->first] = temp_shortcuts_map_->insert( |
| std::make_pair(base::i18n::ToLower(it->second.text), it->second)); |
| } |
| main_runner_->PostTask( |
| FROM_HERE, base::BindOnce(&ShortcutsBackend::InitCompleted, this)); |
| } |
| |
| void ShortcutsBackend::InitCompleted() { |
| temp_guid_map_->swap(guid_map_); |
| temp_shortcuts_map_->swap(shortcuts_map_); |
| temp_shortcuts_map_.reset(nullptr); |
| temp_guid_map_.reset(nullptr); |
| // This histogram is expired but the code was intentionally left behind so |
| // it can be easily re-enabled when launching Shortcuts provider on Android |
| // or iOS. |
| UMA_HISTOGRAM_COUNTS_10000("ShortcutsProvider.DatabaseSize", |
| shortcuts_map_.size()); |
| current_state_ = INITIALIZED; |
| for (ShortcutsBackendObserver& observer : observer_list_) |
| observer.OnShortcutsLoaded(); |
| } |
| |
| bool ShortcutsBackend::AddShortcut( |
| const ShortcutsDatabase::Shortcut& shortcut) { |
| if (!initialized()) |
| return false; |
| DCHECK(guid_map_.find(shortcut.id) == guid_map_.end()); |
| guid_map_[shortcut.id] = shortcuts_map_.insert( |
| std::make_pair(base::i18n::ToLower(shortcut.text), shortcut)); |
| for (ShortcutsBackendObserver& observer : observer_list_) |
| observer.OnShortcutsChanged(); |
| return no_db_access_ || |
| db_runner_->PostTask( |
| FROM_HERE, |
| base::BindOnce(base::IgnoreResult(&ShortcutsDatabase::AddShortcut), |
| db_.get(), shortcut)); |
| } |
| |
| bool ShortcutsBackend::UpdateShortcut( |
| const ShortcutsDatabase::Shortcut& shortcut) { |
| if (!initialized()) |
| return false; |
| auto it(guid_map_.find(shortcut.id)); |
| if (it != guid_map_.end()) |
| shortcuts_map_.erase(it->second); |
| guid_map_[shortcut.id] = shortcuts_map_.insert( |
| std::make_pair(base::i18n::ToLower(shortcut.text), shortcut)); |
| for (ShortcutsBackendObserver& observer : observer_list_) |
| observer.OnShortcutsChanged(); |
| return no_db_access_ || |
| db_runner_->PostTask( |
| FROM_HERE, base::BindOnce(base::IgnoreResult( |
| &ShortcutsDatabase::UpdateShortcut), |
| db_.get(), shortcut)); |
| } |
| |
| bool ShortcutsBackend::DeleteShortcutsWithIDs( |
| const ShortcutsDatabase::ShortcutIDs& shortcut_ids) { |
| if (!initialized()) |
| return false; |
| for (const auto& shortcut_id : shortcut_ids) { |
| auto it(guid_map_.find(shortcut_id)); |
| if (it != guid_map_.end()) { |
| shortcuts_map_.erase(it->second); |
| guid_map_.erase(it); |
| } |
| } |
| for (ShortcutsBackendObserver& observer : observer_list_) |
| observer.OnShortcutsChanged(); |
| return no_db_access_ || |
| db_runner_->PostTask( |
| FROM_HERE, |
| base::BindOnce( |
| base::IgnoreResult(&ShortcutsDatabase::DeleteShortcutsWithIDs), |
| db_.get(), shortcut_ids)); |
| } |
| |
| bool ShortcutsBackend::DeleteShortcutsWithURL(const GURL& url, |
| bool exact_match) { |
| const std::string& url_spec = url.spec(); |
| ShortcutsDatabase::ShortcutIDs shortcut_ids; |
| for (auto it(guid_map_.begin()); it != guid_map_.end();) { |
| if (exact_match ? (it->second->second.match_core.destination_url == url) |
| : base::StartsWith( |
| it->second->second.match_core.destination_url.spec(), |
| url_spec, base::CompareCase::SENSITIVE)) { |
| shortcut_ids.push_back(it->first); |
| shortcuts_map_.erase(it->second); |
| guid_map_.erase(it++); |
| } else { |
| ++it; |
| } |
| } |
| for (ShortcutsBackendObserver& observer : observer_list_) |
| observer.OnShortcutsChanged(); |
| return no_db_access_ || |
| db_runner_->PostTask( |
| FROM_HERE, |
| base::BindOnce( |
| base::IgnoreResult(&ShortcutsDatabase::DeleteShortcutsWithURL), |
| db_.get(), url_spec)); |
| } |
| |
| bool ShortcutsBackend::DeleteAllShortcuts() { |
| if (!initialized()) |
| return false; |
| shortcuts_map_.clear(); |
| guid_map_.clear(); |
| for (ShortcutsBackendObserver& observer : observer_list_) |
| observer.OnShortcutsChanged(); |
| return no_db_access_ || |
| db_runner_->PostTask( |
| FROM_HERE, |
| base::BindOnce( |
| base::IgnoreResult(&ShortcutsDatabase::DeleteAllShortcuts), |
| db_.get())); |
| } |