| // Copyright 2018 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/omnibox/browser/document_provider.h" |
| |
| #include <stddef.h> |
| |
| #include <map> |
| #include <numeric> |
| #include <tuple> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/containers/adapters.h" |
| #include "base/containers/contains.h" |
| #include "base/containers/lru_cache.h" |
| #include "base/feature_list.h" |
| #include "base/functional/bind.h" |
| #include "base/functional/callback.h" |
| #include "base/i18n/case_conversion.h" |
| #include "base/i18n/time_formatting.h" |
| #include "base/json/json_reader.h" |
| #include "base/metrics/field_trial_params.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/ranges/algorithm.h" |
| #include "base/strings/strcat.h" |
| #include "base/strings/string_split.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "base/trace_event/trace_event.h" |
| #include "components/bookmarks/browser/bookmark_utils.h" |
| #include "components/omnibox/browser/autocomplete_input.h" |
| #include "components/omnibox/browser/autocomplete_match.h" |
| #include "components/omnibox/browser/autocomplete_match_classification.h" |
| #include "components/omnibox/browser/autocomplete_provider.h" |
| #include "components/omnibox/browser/autocomplete_provider_client.h" |
| #include "components/omnibox/browser/autocomplete_provider_listener.h" |
| #include "components/omnibox/browser/document_suggestions_service.h" |
| #include "components/omnibox/browser/history_provider.h" |
| #include "components/omnibox/browser/in_memory_url_index_types.h" |
| #include "components/omnibox/browser/keyword_provider.h" |
| #include "components/omnibox/browser/omnibox_field_trial.h" |
| #include "components/omnibox/browser/omnibox_prefs.h" |
| #include "components/omnibox/browser/search_provider.h" |
| #include "components/omnibox/common/omnibox_features.h" |
| #include "components/pref_registry/pref_registry_syncable.h" |
| #include "components/prefs/pref_service.h" |
| #include "components/search/search.h" |
| #include "components/search_engines/search_engine_type.h" |
| #include "components/search_engines/template_url_service.h" |
| #include "components/strings/grit/components_strings.h" |
| #include "net/base/url_util.h" |
| #include "services/network/public/mojom/url_response_head.mojom.h" |
| #include "third_party/metrics_proto/omnibox_event.pb.h" |
| #include "third_party/metrics_proto/omnibox_focus_type.pb.h" |
| #include "third_party/re2/src/re2/re2.h" |
| #include "ui/base/l10n/l10n_util.h" |
| |
| namespace { |
| |
| // Inclusive bounds used to restrict which queries request drive suggestions |
| // from the backend. |
| const size_t kMinQueryLength = 4; |
| const size_t kMaxQueryLength = 200; |
| |
| // TODO(skare): Pull the enum in search_provider.cc into its .h file, and switch |
| // this file and zero_suggest_provider.cc to use it. |
| enum DocumentRequestsHistogramValue { |
| DOCUMENT_REQUEST_SENT = 1, |
| DOCUMENT_REQUEST_INVALIDATED = 2, |
| DOCUMENT_REPLY_RECEIVED = 3, |
| DOCUMENT_MAX_REQUEST_HISTOGRAM_VALUE |
| }; |
| |
| void LogOmniboxDocumentRequest(DocumentRequestsHistogramValue request_value) { |
| UMA_HISTOGRAM_ENUMERATION("Omnibox.DocumentSuggest.Requests", request_value, |
| DOCUMENT_MAX_REQUEST_HISTOGRAM_VALUE); |
| } |
| |
| void LogTotalTime(base::TimeTicks start_time, bool interrupted) { |
| DCHECK(!start_time.is_null()); |
| const base::TimeDelta elapsed_time = base::TimeTicks::Now() - start_time; |
| UMA_HISTOGRAM_TIMES("Omnibox.DocumentSuggest.TotalTime", elapsed_time); |
| if (interrupted) { |
| UMA_HISTOGRAM_TIMES("Omnibox.DocumentSuggest.TotalTime.Interrupted", |
| elapsed_time); |
| } else { |
| UMA_HISTOGRAM_TIMES("Omnibox.DocumentSuggest.TotalTime.NotInterrupted", |
| elapsed_time); |
| } |
| } |
| |
| void LogRequestTime(base::TimeTicks start_time, bool interrupted) { |
| DCHECK(!start_time.is_null()); |
| const base::TimeDelta elapsed_time = base::TimeTicks::Now() - start_time; |
| UMA_HISTOGRAM_TIMES("Omnibox.DocumentSuggest.RequestTime", elapsed_time); |
| if (interrupted) { |
| UMA_HISTOGRAM_TIMES("Omnibox.DocumentSuggest.RequestTime.Interrupted", |
| elapsed_time); |
| } else { |
| UMA_HISTOGRAM_TIMES("Omnibox.DocumentSuggest.RequestTime.NotInterrupted", |
| elapsed_time); |
| } |
| } |
| |
| // MIME types sent by the server for different document types. |
| const char kDocumentMimetype[] = "application/vnd.google-apps.document"; |
| const char kFormMimetype[] = "application/vnd.google-apps.form"; |
| const char kSpreadsheetMimetype[] = "application/vnd.google-apps.spreadsheet"; |
| const char kPresentationMimetype[] = "application/vnd.google-apps.presentation"; |
| |
| // Returns mappings from MIME types to overridden icons. |
| AutocompleteMatch::DocumentType GetIconForMIMEType( |
| const base::StringPiece& mimetype) { |
| static const auto kIconMap = |
| std::map<base::StringPiece, AutocompleteMatch::DocumentType>{ |
| {kDocumentMimetype, AutocompleteMatch::DocumentType::DRIVE_DOCS}, |
| {kFormMimetype, AutocompleteMatch::DocumentType::DRIVE_FORMS}, |
| {kSpreadsheetMimetype, AutocompleteMatch::DocumentType::DRIVE_SHEETS}, |
| {kPresentationMimetype, |
| AutocompleteMatch::DocumentType::DRIVE_SLIDES}, |
| {"image/jpeg", AutocompleteMatch::DocumentType::DRIVE_IMAGE}, |
| {"image/png", AutocompleteMatch::DocumentType::DRIVE_IMAGE}, |
| {"image/gif", AutocompleteMatch::DocumentType::DRIVE_IMAGE}, |
| {"application/pdf", AutocompleteMatch::DocumentType::DRIVE_PDF}, |
| {"video/mp4", AutocompleteMatch::DocumentType::DRIVE_VIDEO}, |
| {"application/vnd.google-apps.folder", |
| AutocompleteMatch::DocumentType::DRIVE_FOLDER}, |
| }; |
| |
| const auto& iterator = kIconMap.find(mimetype); |
| return iterator != kIconMap.end() |
| ? iterator->second |
| : AutocompleteMatch::DocumentType::DRIVE_OTHER; |
| } |
| |
| // Concats `v2` onto `v1`. |
| template <typename T> |
| std::vector<T> Concat(std::vector<T>& v1, const std::vector<T>& v2) { |
| v1.insert(v1.end(), v2.begin(), v2.end()); |
| return v1; |
| } |
| |
| struct FieldMatches { |
| double weight; |
| String16Vector words; |
| size_t count; |
| |
| FieldMatches(double weight, const std::string* string) |
| : FieldMatches(weight, std::vector<const std::string*>{string}) {} |
| |
| FieldMatches(double weight, std::vector<const std::string*> strings) |
| : weight(weight), |
| words(std::accumulate( |
| strings.begin(), |
| strings.end(), |
| String16Vector(), |
| [](String16Vector word_vec, const std::string* string) { |
| if (string) { |
| Concat(word_vec, |
| String16VectorFromString16( |
| base::UTF8ToUTF16(string->c_str()), nullptr)); |
| } |
| return word_vec; |
| })), |
| count(0) {} |
| |
| // Increments |count| and returns true if |words| includes a word equal to or |
| // prefixed by |word|. |
| bool Includes(const std::u16string& word) { |
| if (base::ranges::none_of(words, [word](std::u16string w) { |
| return base::StartsWith(w, word, |
| base::CompareCase::INSENSITIVE_ASCII); |
| })) |
| return false; |
| count += word.size(); |
| return true; |
| } |
| |
| // Decreases linearly with respect to |count| for small values, begins at 1, |
| // and asymptotically approaches 0. |
| double InvScore() { return std::pow(1 - weight, count); } |
| }; |
| |
| // Extracts a list of pointers to strings from a DictionaryValue containing a |
| // list of objects containing a string field of interest. Note that pointers may |
| // be `nullptr` if the value at `field_path` is not found or is not a string. |
| std::vector<const std::string*> ExtractResultList( |
| const base::Value::Dict& result, |
| const base::StringPiece& list_path, |
| const base::StringPiece& field_path) { |
| const base::Value::List* list = result.FindListByDottedPath(list_path); |
| if (!list) { |
| return {}; |
| } |
| |
| std::vector<const std::string*> extracted; |
| for (const auto& value : *list) { |
| auto* string = value.GetDict().FindString(field_path); |
| if (string) |
| extracted.push_back(string); |
| } |
| return extracted; |
| } |
| |
| // Alias for GetFieldTrialParamByFeatureAsDouble for readability. |
| double FieldWeight(const std::string& param_name, double default_weight) { |
| return base::GetFieldTrialParamByFeatureAsDouble(omnibox::kDocumentProvider, |
| param_name, default_weight); |
| } |
| |
| int CalculateScore(const std::u16string& input, |
| const base::Value::Dict& result) { |
| // Suggestions scored lower than |raw_score_cutoff| will be discarded. |
| double raw_score_cutoff = base::GetFieldTrialParamByFeatureAsDouble( |
| omnibox::kDocumentProvider, "RawDocScoreCutoff", .25); |
| // Final score will be between |min_score| and |max_score|, not accounting for |
| // |raw_score_cutoff|. |
| int min_score = base::GetFieldTrialParamByFeatureAsInt( |
| omnibox::kDocumentProvider, "MinDocScore", 0); |
| int max_score = base::GetFieldTrialParamByFeatureAsInt( |
| omnibox::kDocumentProvider, "MaxDocScore", 1400); |
| |
| std::vector<FieldMatches> field_matches_vec = { |
| {FieldWeight("TitleWeight", .15), result.FindString("title")}, |
| {FieldWeight("OwnerNamesWeight", .15), |
| ExtractResultList(result, "metadata.owner.personNames", "displayName")}, |
| {FieldWeight("OwnerEmailsWeight", .15), |
| ExtractResultList(result, "metadata.owner.emailAddresses", |
| "emailAddress")}, |
| {FieldWeight("SnippetWeight", .06), |
| result.FindStringByDottedPath("snippet.snippet")}, |
| {FieldWeight("UrlWeight", 0), result.FindString("url")}, |
| {FieldWeight("MimeWeight", 0), |
| result.FindStringByDottedPath("metadata.mimeType")}, |
| }; |
| std::stable_sort(field_matches_vec.begin(), field_matches_vec.end(), |
| [](const FieldMatches& a, const FieldMatches& b) { |
| return a.weight > b.weight; |
| }); |
| |
| String16Vector input_words = String16VectorFromString16(input, nullptr); |
| |
| for (const auto& word : input_words) { |
| for (auto& field_matches : field_matches_vec) { |
| // This is calculating the proportion of the user input words that are |
| // included in the suggestion, so break after the first match. Otherwise, |
| // an input like 'wi' would be scored too highly for the suggestion "will |
| // william wilson win the winter windsurfing competition". |
| if (field_matches.Includes(word)) { |
| break; |
| } |
| } |
| } |
| |
| // |score| is computed by subtracting the product of each field's inverse |
| // score from 1; |score| begins at 0 and asymptotically approaches 1. |
| // Summing each field's score would grossly favor short multi-field matches |
| // over long single-field matches due to each fields score increasing faster |
| // for small values. |
| double score = |
| 1 - |
| std::accumulate(field_matches_vec.begin(), field_matches_vec.end(), 1.0, |
| [](double inv_score_product, FieldMatches field_matches) { |
| return inv_score_product * field_matches.InvScore(); |
| }); |
| |
| if (score > 1) |
| score = 1; |
| if (score < raw_score_cutoff) |
| score = 0; |
| |
| return static_cast<int>(min_score + score * (max_score - min_score)); |
| } |
| |
| // Return whether `user` owns the doc `result`. |
| bool IsOwnedByUser(const std::string& user, const base::Value::Dict& result) { |
| std::vector<const std::string*> owner_emails = ExtractResultList( |
| result, "metadata.owner.emailAddresses", "emailAddress"); |
| const auto lower_user = base::i18n::ToLower(base::UTF8ToUTF16(user)); |
| return base::ranges::any_of( |
| owner_emails, |
| [&](const std::u16string& email) { return lower_user == email; }, |
| [&](const std::string* email) { |
| return base::i18n::ToLower(base::UTF8ToUTF16(*email)); |
| }); |
| } |
| |
| int BoostOwned(const int score, |
| const std::string& user, |
| const base::Value::Dict& result) { |
| int promotion = base::GetFieldTrialParamByFeatureAsInt( |
| omnibox::kDocumentProvider, "OwnedDocPromotion", 0); |
| int demotion = base::GetFieldTrialParamByFeatureAsInt( |
| omnibox::kDocumentProvider, "UnownedDocDemotion", 200); |
| |
| bool owned = IsOwnedByUser(user, result); |
| |
| return std::max(score + (owned ? promotion : -demotion), 0); |
| } |
| |
| // Return whether all words in `input` are contained in either the `result` |
| // title or owners. |
| bool IsCompletelyMatchedInTitleOrOwner(const std::u16string& input, |
| const base::Value::Dict& result) { |
| // Accumulate a vector of the title and all owners. |
| auto search_strings = ExtractResultList( |
| result, "metadata.owner.emailAddresses", "emailAddress"); |
| Concat(search_strings, ExtractResultList(result, "metadata.owner.personNames", |
| "displayName")); |
| search_strings.push_back(result.FindString("title")); |
| |
| // Extract a flat vector of words from the title and owners. |
| const auto title_and_owner_words = std::accumulate( |
| search_strings.begin(), search_strings.end(), String16Vector(), |
| [](String16Vector accumulated, const auto& search_string) { |
| Concat(accumulated, |
| String16VectorFromString16( |
| base::i18n::ToLower(base::UTF8ToUTF16(*search_string)), |
| nullptr)); |
| return accumulated; |
| }); |
| |
| // Check if all input words are contained in `title_and_owner_words`. |
| String16Vector input_words = |
| String16VectorFromString16(base::i18n::ToLower(input), nullptr); |
| for (const auto& input_word : input_words) { |
| // It's possible `input` contained 'owner' as a word, as opposed to |
| // 'owner:...' as an operator. Ignore this rare edge case for simplicity. |
| if (input_word != u"owner" && |
| base::ranges::none_of( |
| title_and_owner_words, [&](std::u16string title_word) { |
| return base::StartsWith(title_word, input_word, |
| base::CompareCase::INSENSITIVE_ASCII); |
| })) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| // Derived from google3/apps/share/util/docs_url_extractor.cc. |
| std::string ExtractDocIdFromUrl(const std::string& url) { |
| static const RE2 docs_url_pattern_( |
| "\\b(" // The first groups matches the whole URL. |
| // Domain. |
| "(?:https?://)?(?:" |
| // Keep the hosts consistent with `ValidHostPrefix()`. |
| "spreadsheets|docs|drive|script|sites|jamboard" |
| ")[0-9]?\\.google\\.com" |
| "(?::[0-9]+)?\\/" // Port. |
| "(?:\\S*)" // Non-whitespace chars. |
| "(?:" |
| // Doc url prefix to match /d/{id}. (?:e/)? deviates from google3. |
| "(?:/d/(?:e/)?(?P<path_docid>[0-9a-zA-Z\\-\\_]+))" |
| "|" |
| // Docs id expr to match a valid id parameter. |
| "(?:(?:\\?|&|&)" |
| "(?:id|docid|key|docID|DocId)=(?P<query_docid>[0-9a-zA-Z\\-\\_]+))" |
| "|" |
| // Folder url prefix to match /folders/{folder_id}. |
| "(?:/folders/(?P<folder_docid>[0-9a-zA-Z\\-\\_]+))" |
| "|" |
| // Sites url prefix. |
| "(?:/?s/)(?P<sites_docid>[0-9a-zA-Z\\-\\_]+)" |
| "(?:/p/[0-9a-zA-Z\\-\\_]+)?/edit" |
| "|" |
| // Jam url. |
| "(?:d/)(?P<jam_docid>[0-9a-zA-Z\\-\\_]+)/(?:edit|viewer)" |
| ")" |
| // Other valid chars. |
| "(?:[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/\\?]*)" |
| // Summarization details. |
| "(?:summarizationDetails=[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/" |
| "\\?(?:%5B)(?:%5D)]*)?" |
| // Other valid chars. |
| "(?:[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/\\?]*)" |
| "(?:(#[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/\\?]+)?)" // Fragment |
| ")"); |
| |
| std::vector<re2::StringPiece> matched_doc_ids( |
| docs_url_pattern_.NumberOfCapturingGroups() + 1); |
| // ANCHOR_START deviates from google3 which uses UNANCHORED. Using |
| // ANCHOR_START prevents incorrectly matching with non-drive URLs but which |
| // contain a drive URL; e.g., |
| // url-parser.com/?url=https://docs.google.com/document/d/(id)/edit. |
| if (!docs_url_pattern_.Match(url, 0, url.size(), RE2::ANCHOR_START, |
| matched_doc_ids.data(), |
| matched_doc_ids.size())) { |
| return std::string(); |
| } |
| for (const auto& doc_id_group : docs_url_pattern_.NamedCapturingGroups()) { |
| re2::StringPiece identified_doc_id = matched_doc_ids[doc_id_group.second]; |
| if (!identified_doc_id.empty()) { |
| return std::string(identified_doc_id); |
| } |
| } |
| return std::string(); |
| } |
| |
| // Verify if the host could possibly be for a valid doc URL. This is a more |
| // lightweight check than `ExtractDocIdFromUrl()`. It can be done before |
| // unescaping the URL as valid hosts don't contain escapable chars; unescaping |
| // is relatively expensive. E.g., 'docs.google.com' isn't a valid doc URL, but |
| // it's host looks like it could be, so return true. On the other hand, |
| // 'google.com' is definitely not a doc URL so return false. |
| bool ValidHostPrefix(const std::string& host) { |
| // There are 66 (5*11) valid, e.g. 'docs5.google.com', so rather than check |
| // all 66, we just check the 6 prefixes. Keep these prefixes consistent with |
| // those in `ExtractDocIdFromUrl()`. |
| static const std::vector<const char*> valid_host_prefixes = { |
| "spreadsheets", "docs", "drive", "script", "sites", "jamboard", |
| }; |
| for (const char* valid_host_prefix : valid_host_prefixes) { |
| if (base::StartsWith(host, valid_host_prefix, |
| base::CompareCase::INSENSITIVE_ASCII)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| // If `value[key]`, returns it. Otherwise, returns `fallback`. |
| std::string FindStringKeyOrFallback(const base::Value::Dict& value, |
| base::StringPiece key, |
| std::string fallback = "") { |
| auto* ptr = value.FindString(key); |
| return ptr ? *ptr : fallback; |
| } |
| |
| } // namespace |
| |
| // static |
| DocumentProvider* DocumentProvider::Create( |
| AutocompleteProviderClient* client, |
| AutocompleteProviderListener* listener, |
| size_t cache_size) { |
| return new DocumentProvider(client, listener, cache_size); |
| } |
| |
| // static |
| void DocumentProvider::RegisterProfilePrefs( |
| user_prefs::PrefRegistrySyncable* registry) { |
| registry->RegisterBooleanPref(omnibox::kDocumentSuggestEnabled, true); |
| } |
| |
| bool DocumentProvider::IsDocumentProviderAllowed( |
| AutocompleteProviderClient* client, |
| const AutocompleteInput& input) { |
| // Feature must be on. |
| if (!base::FeatureList::IsEnabled(omnibox::kDocumentProvider)) |
| return false; |
| |
| // These may seem like search suggestions, so gate on that setting too. |
| if (!client->SearchSuggestEnabled()) |
| return false; |
| |
| // Client-side toggle must be enabled. |
| if (!client->GetPrefs()->GetBoolean(omnibox::kDocumentSuggestEnabled)) |
| return false; |
| |
| // No incognito. |
| if (client->IsOffTheRecord()) |
| return false; |
| |
| // Check sync's status and proceed if active. |
| bool authenticated_and_syncing = |
| client->IsAuthenticated() && client->IsSyncActive(); |
| if (!authenticated_and_syncing) |
| return false; |
| |
| // We haven't received a server backoff signal. |
| if (backoff_for_session_) |
| return false; |
| |
| // Google must be set as default search provider. |
| auto* template_url_service = client->GetTemplateURLService(); |
| if (!search::DefaultSearchProviderIsGoogle(template_url_service)) { |
| return false; |
| } |
| |
| // There should be no document suggestions fetched for on-focus suggestion |
| // requests, or if the input is empty. |
| if (input.focus_type() != metrics::OmniboxFocusType::INTERACTION_DEFAULT || |
| input.type() == metrics::OmniboxInputType::EMPTY) { |
| return false; |
| } |
| |
| // Experiment: don't issue queries for inputs under some length. |
| if (input.text().length() < kMinQueryLength || |
| input.text().length() > kMaxQueryLength) { |
| return false; |
| } |
| |
| // Don't issue queries for input likely to be a URL. |
| if (IsInputLikelyURL(input)) |
| return false; |
| |
| return true; |
| } |
| |
| // static |
| bool DocumentProvider::IsInputLikelyURL(const AutocompleteInput& input) { |
| if (input.type() == metrics::OmniboxInputType::URL) |
| return true; |
| |
| // Special cases when the user might be starting to type the most common URL |
| // prefixes, but the SchemeClassifier won't have classified them as URLs yet. |
| // Note these checks are of the form "(string constant) starts with input." |
| if (input.text().length() <= 8) { |
| if (StartsWith(u"https://", input.text(), |
| base::CompareCase::INSENSITIVE_ASCII) || |
| StartsWith(u"http://", input.text(), |
| base::CompareCase::INSENSITIVE_ASCII) || |
| StartsWith(u"www.", input.text(), |
| base::CompareCase::INSENSITIVE_ASCII)) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| void DocumentProvider::Start(const AutocompleteInput& input, |
| bool minimal_changes) { |
| TRACE_EVENT0("omnibox", "DocumentProvider::Start"); |
| Stop(true, false); |
| |
| // Perform various checks - feature is enabled, user is allowed to use the |
| // feature, we're not under backoff, etc. |
| if (!IsDocumentProviderAllowed(client_, input)) |
| return; |
| |
| input_ = input; |
| |
| // Return cached suggestions synchronously after setting the relevance of any |
| // beyond |provider_max_matches_| to 0. |
| CopyCachedMatchesToMatches(); |
| DemoteMatchesBeyondMax(); |
| |
| if (input.omit_asynchronous_matches()) { |
| return; |
| } |
| |
| done_ = false; // Set true in callbacks. |
| debouncer_->RequestRun( |
| base::BindOnce(&DocumentProvider::Run, base::Unretained(this))); |
| } |
| |
| void DocumentProvider::Run() { |
| time_run_invoked_ = base::TimeTicks::Now(); |
| client_->GetDocumentSuggestionsService(/*create_if_necessary=*/true) |
| ->CreateDocumentSuggestionsRequest( |
| input_.text(), client_->IsOffTheRecord(), |
| base::BindOnce( |
| &DocumentProvider::OnDocumentSuggestionsLoaderAvailable, |
| weak_ptr_factory_.GetWeakPtr()), |
| base::BindOnce( |
| &DocumentProvider::OnURLLoadComplete, |
| base::Unretained(this) /* this owns SimpleURLLoader */)); |
| } |
| |
| void DocumentProvider::Stop(bool clear_cached_results, |
| bool due_to_user_inactivity) { |
| TRACE_EVENT0("omnibox", "DocumentProvider::Stop"); |
| AutocompleteProvider::Stop(clear_cached_results, due_to_user_inactivity); |
| |
| debouncer_->CancelRequest(); |
| |
| // If the request was sent, then log its duration and that it was invalidated. |
| if (loader_) { |
| DCHECK(!time_run_invoked_.is_null()); |
| DCHECK(!time_request_sent_.is_null()); |
| loader_.reset(); |
| LogRequestTime(time_request_sent_, true); |
| time_request_sent_ = base::TimeTicks(); |
| LogOmniboxDocumentRequest(DOCUMENT_REQUEST_INVALIDATED); |
| } |
| |
| // If `Run()` has been invoked, log its duration. It's possible `Stop()` is |
| // invoked before `Run()` has been invoked if 1) this is the first user input, |
| // 2) the previous call was debounced, or 3) the previous request was filtered |
| // (e.g. input too short). |
| if (!time_run_invoked_.is_null()) { |
| LogTotalTime(time_run_invoked_, true); |
| time_run_invoked_ = base::TimeTicks(); |
| } |
| |
| auto* document_suggestions_service = |
| client_->GetDocumentSuggestionsService(/*create_if_necessary=*/false); |
| if (document_suggestions_service != nullptr) { |
| document_suggestions_service->StopCreatingDocumentSuggestionsRequest(); |
| } |
| } |
| |
| void DocumentProvider::DeleteMatch(const AutocompleteMatch& match) { |
| // Not supported by this provider. |
| return; |
| } |
| |
| void DocumentProvider::AddProviderInfo(ProvidersInfo* provider_info) const { |
| provider_info->push_back(metrics::OmniboxEventProto_ProviderInfo()); |
| metrics::OmniboxEventProto_ProviderInfo& new_entry = provider_info->back(); |
| new_entry.set_provider(metrics::OmniboxEventProto::DOCUMENT); |
| new_entry.set_provider_done(done_); |
| } |
| |
| DocumentProvider::DocumentProvider(AutocompleteProviderClient* client, |
| AutocompleteProviderListener* listener, |
| size_t cache_size) |
| : AutocompleteProvider(AutocompleteProvider::TYPE_DOCUMENT), |
| backoff_for_session_(false), |
| client_(client), |
| cache_size_(cache_size), |
| matches_cache_(MatchesCache::NO_AUTO_EVICT) { |
| AddListener(listener); |
| |
| debouncer_ = std::make_unique<AutocompleteProviderDebouncer>(true, 300); |
| } |
| |
| DocumentProvider::~DocumentProvider() = default; |
| |
| void DocumentProvider::OnURLLoadComplete( |
| const network::SimpleURLLoader* source, |
| std::unique_ptr<std::string> response_body) { |
| DCHECK(!done_); |
| DCHECK_EQ(loader_.get(), source); |
| |
| LogRequestTime(time_request_sent_, false); |
| LogOmniboxDocumentRequest(DOCUMENT_REPLY_RECEIVED); |
| |
| int httpStatusCode = source->ResponseInfo() && source->ResponseInfo()->headers |
| ? source->ResponseInfo()->headers->response_code() |
| : 0; |
| |
| if (httpStatusCode == 400 || httpStatusCode == 499) |
| backoff_for_session_ = true; |
| |
| const bool results_updated = |
| response_body && source->NetError() == net::OK && httpStatusCode == 200 && |
| UpdateResults(SearchSuggestionParser::ExtractJsonData( |
| source, std::move(response_body))); |
| LogTotalTime(time_run_invoked_, false); |
| loader_.reset(); |
| done_ = true; |
| NotifyListeners(results_updated); |
| } |
| |
| bool DocumentProvider::UpdateResults(const std::string& json_data) { |
| absl::optional<base::Value> response = |
| base::JSONReader::Read(json_data, base::JSON_ALLOW_TRAILING_COMMAS); |
| if (!response) |
| return false; |
| |
| // 1) Fill |matches_| with <N> new server matches. |
| matches_ = ParseDocumentSearchResults(*response); |
| // 2) Clear cached matches' scores to ensure cached matches for all but the |
| // previous input can only be shown if deduped. E.g., this allows matches for |
| // the input 'pari' to be displayed synchronously for the input 'paris', but |
| // be hidden if the user clears their input and starts anew 'london'. |
| SetCachedMatchesScoresTo0(); |
| // 3) Push the <N> new matches to the cache. |
| for (const AutocompleteMatch& match : base::Reversed(matches_)) |
| matches_cache_.Put(match.stripped_destination_url, match); |
| // 4) Copy the cached matches to |matches_|, skipping the most recent <N> |
| // cached matches since they were already added in step (1). Pass |
| // |set_scores_to_0| as true as we don't trust cached scores since they may no |
| // longer match the current input; if the cached matches were still relevant, |
| // they would have been returned from the server again. |
| CopyCachedMatchesToMatches(matches_.size()); |
| // 5) Only now can we shrink the cache to |cache_size_|. Doing this |
| // automatically when pushing the new matches to the cache would reduce it's |
| // effective size, especially if the server returns close to |cache_size_| |
| // matches. |
| matches_cache_.ShrinkToSize(cache_size_); |
| // 6) Limit matches to |provider_max_matches_| unless used for deduping; i.e. |
| // set the scores of matches beyond the limit to 0. |
| DemoteMatchesBeyondMax(); |
| |
| return !matches_.empty(); |
| } |
| |
| void DocumentProvider::OnDocumentSuggestionsLoaderAvailable( |
| std::unique_ptr<network::SimpleURLLoader> loader) { |
| time_request_sent_ = base::TimeTicks::Now(); |
| loader_ = std::move(loader); |
| LogOmniboxDocumentRequest(DOCUMENT_REQUEST_SENT); |
| } |
| |
| // static |
| std::u16string DocumentProvider::GenerateLastModifiedString( |
| const std::string& modified_timestamp_string, |
| base::Time now) { |
| if (modified_timestamp_string.empty()) |
| return std::u16string(); |
| base::Time modified_time; |
| if (!base::Time::FromString(modified_timestamp_string.c_str(), |
| &modified_time)) |
| return std::u16string(); |
| |
| // Use shorthand if the times fall on the same day or in the same year. |
| base::Time::Exploded exploded_modified_time; |
| base::Time::Exploded exploded_now; |
| modified_time.LocalExplode(&exploded_modified_time); |
| now.LocalExplode(&exploded_now); |
| if (exploded_modified_time.year == exploded_now.year) { |
| if (exploded_modified_time.month == exploded_now.month && |
| exploded_modified_time.day_of_month == exploded_now.day_of_month) { |
| // Same local calendar day - use localized time. |
| return base::TimeFormatTimeOfDay(modified_time); |
| } |
| // Same year but not the same day: use abbreviated month/day ("Jan 1"). |
| return base::TimeFormatWithPattern(modified_time, "MMMd"); |
| } |
| |
| // No shorthand; display full MM/DD/YYYY. |
| return base::TimeFormatShortDateNumeric(modified_time); |
| } |
| |
| // static |
| std::u16string DocumentProvider::GetProductDescriptionString( |
| const std::string& mimetype) { |
| if (mimetype == kDocumentMimetype) |
| return l10n_util::GetStringUTF16(IDS_DRIVE_SUGGESTION_DOCUMENT); |
| if (mimetype == kFormMimetype) |
| return l10n_util::GetStringUTF16(IDS_DRIVE_SUGGESTION_FORM); |
| if (mimetype == kSpreadsheetMimetype) |
| return l10n_util::GetStringUTF16(IDS_DRIVE_SUGGESTION_SPREADSHEET); |
| if (mimetype == kPresentationMimetype) |
| return l10n_util::GetStringUTF16(IDS_DRIVE_SUGGESTION_PRESENTATION); |
| // Fallback to "Drive" for other filetypes. |
| return l10n_util::GetStringUTF16(IDS_DRIVE_SUGGESTION_GENERAL); |
| } |
| |
| // static |
| std::u16string DocumentProvider::GetMatchDescription( |
| const std::string& update_time, |
| const std::string& mimetype, |
| const std::string& owner) { |
| std::u16string mime_desc = GetProductDescriptionString(mimetype); |
| if (!update_time.empty()) { |
| std::u16string date_desc = |
| GenerateLastModifiedString(update_time, base::Time::Now()); |
| return owner.empty() |
| ? l10n_util::GetStringFUTF16( |
| IDS_DRIVE_SUGGESTION_DESCRIPTION_TEMPLATE_WITHOUT_OWNER, |
| date_desc, mime_desc) |
| : l10n_util::GetStringFUTF16( |
| IDS_DRIVE_SUGGESTION_DESCRIPTION_TEMPLATE, date_desc, |
| base::UTF8ToUTF16(owner), mime_desc); |
| } |
| return owner.empty() |
| ? mime_desc |
| : l10n_util::GetStringFUTF16( |
| IDS_DRIVE_SUGGESTION_DESCRIPTION_TEMPLATE_WITHOUT_DATE, |
| base::UTF8ToUTF16(owner), mime_desc); |
| } |
| |
| ACMatches DocumentProvider::ParseDocumentSearchResults( |
| const base::Value& root_val) { |
| ACMatches matches; |
| |
| // Parse the results. |
| const base::Value::List* results = root_val.GetDict().FindList("results"); |
| if (!results) { |
| return matches; |
| } |
| size_t num_results = results->size(); |
| UMA_HISTOGRAM_COUNTS_1M("Omnibox.DocumentSuggest.ResultCount", num_results); |
| |
| // During development/quality iteration we may wish to defeat server scores. |
| // If both |use_server_score| and |use_client_score| are true, the min of the |
| // two scores will be used. |
| // If both are false, the server score will be used. |
| bool use_client_score = base::GetFieldTrialParamByFeatureAsBool( |
| omnibox::kDocumentProvider, "DocumentUseClientScore", false); |
| bool use_server_score = base::GetFieldTrialParamByFeatureAsBool( |
| omnibox::kDocumentProvider, "DocumentUseServerScore", true); |
| |
| // Cap scores for each suggestion. |
| bool cap_score_per_rank = base::GetFieldTrialParamByFeatureAsBool( |
| omnibox::kDocumentProvider, "DocumentCapScorePerRank", false); |
| std::vector<int> score_caps = { |
| base::GetFieldTrialParamByFeatureAsInt(omnibox::kDocumentProvider, |
| "DocumentCapScoreRank1", 1200), |
| base::GetFieldTrialParamByFeatureAsInt(omnibox::kDocumentProvider, |
| "DocumentCapScoreRank2", 1100), |
| base::GetFieldTrialParamByFeatureAsInt(omnibox::kDocumentProvider, |
| "DocumentCapScoreRank3", 900), |
| }; |
| |
| // Promotes owned documents and/or demotes unowned documents. |
| bool boost_owned = base::GetFieldTrialParamByFeatureAsBool( |
| omnibox::kDocumentProvider, "DocumentBoostOwned", false); |
| |
| // Ensure server's suggestions are added with monotonically decreasing scores. |
| int previous_score = INT_MAX; |
| |
| // Number of matches that are neither owned nor a complete title or owner |
| // match. |
| int low_quality_match_count = 0; |
| |
| for (size_t i = 0; i < num_results; i++) { |
| const base::Value& result_value = (*results)[i]; |
| if (!result_value.is_dict()) { |
| return matches; |
| } |
| |
| const base::Value::Dict& result = result_value.GetDict(); |
| const std::string title = FindStringKeyOrFallback(result, "title"); |
| const std::string url = FindStringKeyOrFallback(result, "url"); |
| if (title.empty() || url.empty()) { |
| continue; |
| } |
| |
| // Both client and server scores are calculated regardless of usage in order |
| // to log them with |AutocompleteMatch::RecordAdditionalInfo| below. |
| int client_score = CalculateScore(input_.text(), result); |
| int server_score = result.FindInt("score").value_or(0); |
| int score = 0; |
| |
| if (use_client_score && use_server_score) |
| score = std::min(client_score, server_score); |
| else |
| score = use_client_score ? client_score : server_score; |
| |
| if (cap_score_per_rank) { |
| int score_cap = i < score_caps.size() ? score_caps[i] : score_caps.back(); |
| score = std::min(score, score_cap); |
| } |
| |
| if (boost_owned) |
| score = BoostOwned(score, client_->ProfileUserName(), result); |
| |
| // Decrement scores if necessary to ensure suggestion order is preserved. |
| // Don't decrement client scores which don't necessarily rank suggestions |
| // the same order as the server. |
| if (!use_client_score && score >= previous_score) |
| score = std::max(previous_score - 1, 0); |
| previous_score = score; |
| |
| // Only allow up to 1 doc that is neither owned nor a complete title or |
| // owner match. |
| bool is_owned = IsOwnedByUser(client_->ProfileUserName(), result); |
| bool is_completely_matched_in_title_and_owner = |
| IsCompletelyMatchedInTitleOrOwner(input_.text(), result); |
| if (!is_owned && !is_completely_matched_in_title_and_owner && |
| ++low_quality_match_count > 1) { |
| score = 0; |
| } |
| |
| AutocompleteMatch match(this, score, false, |
| AutocompleteMatchType::DOCUMENT_SUGGESTION); |
| // Use full URL for navigation. If present, use "originalUrl" for display & |
| // deduping, as it's shorter. |
| const std::string short_url = |
| FindStringKeyOrFallback(result, "originalUrl", url); |
| match.fill_into_edit = base::UTF8ToUTF16(short_url); |
| match.destination_url = GURL(url); |
| // `AutocompleteMatch::GURLToStrippedGURL()` will try to use |
| // `GetURLForDeduping()` to extract a doc ID and generate a canonical doc |
| // URL; this is ideal as it handles different URL formats pointing to the |
| // same doc. Otherwise, it'll resort to the typical stripped URL generation |
| // that can still be used for generic deduping and as a key to |
| // `matches_cache_`. |
| match.stripped_destination_url = AutocompleteMatch::GURLToStrippedGURL( |
| GURL(short_url), input_, client_->GetTemplateURLService(), |
| std::u16string(), /*keep_search_intent_params=*/false, |
| /*normalize_search_terms=*/false); |
| |
| match.contents = |
| AutocompleteMatch::SanitizeString(base::UTF8ToUTF16(title)); |
| match.contents_class = Classify(match.contents, input_.text()); |
| const base::Value::Dict* metadata = result.FindDict("metadata"); |
| if (metadata) { |
| const std::string update_time = |
| FindStringKeyOrFallback(*metadata, "updateTime"); |
| const std::string mimetype = |
| FindStringKeyOrFallback(*metadata, "mimeType"); |
| if (metadata->FindString("mimeType")) { |
| match.document_type = GetIconForMIMEType(mimetype); |
| match.RecordAdditionalInfo( |
| "document type", |
| AutocompleteMatch::DocumentTypeString(match.document_type)); |
| } |
| auto owners = ExtractResultList(result, "metadata.owner.personNames", |
| "displayName"); |
| const std::string owner = !owners.empty() ? *owners[0] : ""; |
| if (!owner.empty()) |
| match.RecordAdditionalInfo("document owner", owner); |
| match.description = GetMatchDescription(update_time, mimetype, owner); |
| AutocompleteMatch::AddLastClassificationIfNecessary( |
| &match.description_class, 0, ACMatchClassification::DIM); |
| // Exclude date & owner from description_for_shortcut to avoid showing |
| // stale data from the shortcuts provider. |
| match.description_for_shortcuts = GetMatchDescription("", mimetype, ""); |
| AutocompleteMatch::AddLastClassificationIfNecessary( |
| &match.description_class_for_shortcuts, 0, |
| ACMatchClassification::DIM); |
| match.RecordAdditionalInfo("description_for_shortcuts", |
| match.description_for_shortcuts); |
| } |
| |
| match.TryRichAutocompletion(base::UTF8ToUTF16(match.destination_url.spec()), |
| match.contents, input_); |
| match.transition = ui::PAGE_TRANSITION_GENERATED; |
| match.RecordAdditionalInfo("client score", client_score); |
| match.RecordAdditionalInfo("server score", server_score); |
| match.RecordAdditionalInfo("owned", is_owned); |
| match.RecordAdditionalInfo("completely matched in title and owner", |
| is_completely_matched_in_title_and_owner); |
| if (matches.size() >= provider_max_matches_) |
| match.RecordAdditionalInfo("for deduping only", "true"); |
| const std::string* snippet = |
| result.FindStringByDottedPath("snippet.snippet"); |
| if (snippet) |
| match.RecordAdditionalInfo("snippet", *snippet); |
| matches.push_back(match); |
| } |
| return matches; |
| } |
| |
| void DocumentProvider::CopyCachedMatchesToMatches( |
| size_t skip_n_most_recent_matches) { |
| base::ranges::transform( |
| std::next(matches_cache_.begin(), skip_n_most_recent_matches), |
| matches_cache_.end(), std::back_inserter(matches_), |
| [this](auto match) { |
| match.allowed_to_be_default_match = false; |
| match.TryRichAutocompletion( |
| base::UTF8ToUTF16(match.destination_url.spec()), match.contents, |
| input_); |
| match.contents_class = |
| DocumentProvider::Classify(match.contents, input_.text()); |
| match.RecordAdditionalInfo("from cache", "true"); |
| return match; |
| }, |
| &MatchesCache::value_type::second); |
| } |
| |
| void DocumentProvider::SetCachedMatchesScoresTo0() { |
| base::ranges::for_each(matches_cache_, [&](auto& cache_key_match_pair) { |
| cache_key_match_pair.second.relevance = 0; |
| }); |
| } |
| |
| void DocumentProvider::DemoteMatchesBeyondMax() { |
| for (size_t i = provider_max_matches_; i < matches_.size(); ++i) |
| matches_[i].relevance = 0; |
| } |
| |
| // static |
| ACMatchClassifications DocumentProvider::Classify( |
| const std::u16string& text, |
| const std::u16string& input_text) { |
| TermMatches term_matches = FindTermMatches(input_text, text); |
| return ClassifyTermMatches(term_matches, text.size(), |
| ACMatchClassification::MATCH, |
| ACMatchClassification::NONE); |
| } |
| |
| // static |
| const GURL DocumentProvider::GetURLForDeduping(const GURL& url) { |
| if (!url.is_valid()) |
| return GURL(); |
| |
| // A memoization cache. Only updated if `ExtractDocIdFromUrl()` was attempted. |
| // That's the most expensive part of this algorithm, and memoizing the earlier |
| // trivial checks would worsen performance by pushing out more useful cache |
| // entries. |
| static base::LRUCache<GURL, GURL> cache(10); |
| const auto& cached = cache.Get(url); |
| if (cached != cache.end()) |
| return cached->second; |
| |
| // Early exit to avoid unnecessary and more involved checks. Don't update the |
| // cache for trivial cases to avoid pushing out a more useful entry. |
| if (!url.DomainIs("google.com")) |
| return GURL(); |
| |
| // We aim to prevent duplicate Drive URLs to appear between the Drive document |
| // search provider and history/bookmark entries. |
| // All URLs are canonicalized to a GURL form only used for deduplication and |
| // not guaranteed to be usable for navigation. |
| |
| // Drive redirects are already handled by the regex in |ExtractDocIdFromUrl|. |
| // The below logic handles google.com redirects; e.g., google.com/url/q=<url> |
| std::string url_str; |
| std::string url_str_host; |
| if (url.host() == "www.google.com" && url.path() == "/url") { |
| if ((!net::GetValueForKeyInQuery(url, "q", &url_str) || url_str.empty()) && |
| (!net::GetValueForKeyInQuery(url, "url", &url_str) || url_str.empty())) |
| return GURL(); |
| url_str_host = GURL(url_str).host(); |
| } else { |
| url_str = url.spec(); |
| url_str_host = url.host(); |
| } |
| |
| // Recheck the domain, since a google URL could redirect to a non-google URL |
| if (!base::EndsWith(url_str_host, "google.com", |
| base::CompareCase::INSENSITIVE_ASCII)) { |
| return GURL(); |
| } |
| |
| // Filter out non-doc hosts. Do this before unescaping the URL below, as |
| // unescaping can be expensive and valid hosts don't contain escapable chars. |
| // Do this after simplifying the google.com redirect above, as that changes |
| // the host. |
| if (!ValidHostPrefix(url_str_host)) |
| return GURL(); |
| |
| // Unescape |url_str| |
| url_str = base::UnescapeURLComponent( |
| url_str, |
| base::UnescapeRule::PATH_SEPARATORS | |
| base::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); |
| |
| const std::string id = ExtractDocIdFromUrl(url_str); |
| |
| // Canonicalize to the /open form without any extra args. |
| // This is similar to what we expect from the server. |
| GURL deduping_url = |
| id.empty() ? GURL() : GURL("https://drive.google.com/open?id=" + id); |
| cache.Put(url, deduping_url); |
| return deduping_url; |
| } |