| // Copyright 2018 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/omnibox/browser/document_provider.h" |
| |
| #include <stddef.h> |
| |
| #include <algorithm> |
| #include <iterator> |
| #include <memory> |
| #include <numeric> |
| #include <string> |
| #include <string_view> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/containers/adapters.h" |
| #include "base/containers/fixed_flat_map.h" |
| #include "base/containers/fixed_flat_set.h" |
| #include "base/containers/lru_cache.h" |
| #include "base/feature_list.h" |
| #include "base/functional/bind.h" |
| #include "base/i18n/case_conversion.h" |
| #include "base/json/json_reader.h" |
| #include "base/metrics/histogram_functions.h" |
| #include "base/no_destructor.h" |
| #include "base/strings/strcat.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/stringprintf.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "base/trace_event/trace_event.h" |
| #include "base/values.h" |
| #include "components/omnibox/browser/autocomplete_enums.h" |
| #include "components/omnibox/browser/autocomplete_input.h" |
| #include "components/omnibox/browser/autocomplete_match.h" |
| #include "components/omnibox/browser/autocomplete_match_classification.h" |
| #include "components/omnibox/browser/autocomplete_provider.h" |
| #include "components/omnibox/browser/autocomplete_provider_client.h" |
| #include "components/omnibox/browser/autocomplete_provider_listener.h" |
| #include "components/omnibox/browser/document_suggestions_service.h" |
| #include "components/omnibox/browser/in_memory_url_index_types.h" |
| #include "components/omnibox/browser/omnibox_field_trial.h" |
| #include "components/omnibox/browser/remote_suggestions_service.h" |
| #include "components/omnibox/browser/search_suggestion_parser.h" |
| #include "components/omnibox/common/omnibox_feature_configs.h" |
| #include "components/omnibox/common/omnibox_features.h" |
| #include "components/search/search.h" |
| #include "components/search_engines/search_engine_type.h" |
| #include "components/search_engines/template_url_service.h" |
| #include "components/signin/public/identity_manager/tribool.h" |
| #include "components/strings/grit/components_strings.h" |
| #include "net/base/url_util.h" |
| #include "services/network/public/cpp/simple_url_loader.h" |
| #include "services/network/public/mojom/url_response_head.mojom.h" |
| #include "third_party/metrics_proto/omnibox_event.pb.h" |
| #include "third_party/metrics_proto/omnibox_focus_type.pb.h" |
| #include "third_party/re2/src/re2/re2.h" |
| #include "ui/base/l10n/l10n_util.h" |
| |
| namespace { |
| |
| // Inclusive bounds used to restrict which queries request drive suggestions |
| // from the backend. |
| const size_t kMaxQueryLength = 200; |
| |
| // These values are persisted to logs. Entries should not be renumbered and |
| // numeric values should never be reused. |
| // |
| // Keep up to date with DocumentProviderAllowedReason in |
| // //tools/metrics/histograms/metadata/omnibox/enums.xml. |
| enum class DocumentProviderAllowedReason : int { |
| kAllowed = 0, |
| kUnknown = 1, |
| kFeatureDisabled = 2, |
| kSuggestSettingDisabled = 3, |
| kDriveSettingDisabledObsolete = 4, |
| kOffTheRecord = 5, |
| kNotLoggedIn = 6, |
| kNotSyncing = 7, |
| kBackoff = 8, |
| kDSENotGoogle = 9, |
| kInputOnFocusOrEmpty = 10, |
| kInputTooShort = 11, |
| kInputLooksLikeUrl = 12, |
| kNotEnterpriseEligible = 13, |
| kMaxValue = kNotEnterpriseEligible |
| }; |
| |
| void LogOmniboxDocumentRequest(RemoteRequestEvent request_event) { |
| base::UmaHistogramEnumeration("Omnibox.DocumentSuggest.Requests", |
| request_event); |
| } |
| |
| void LogTotalTime(base::TimeTicks start_time, bool interrupted) { |
| DCHECK(!start_time.is_null()); |
| const base::TimeDelta elapsed_time = base::TimeTicks::Now() - start_time; |
| base::UmaHistogramTimes("Omnibox.DocumentSuggest.TotalTime", elapsed_time); |
| if (interrupted) { |
| base::UmaHistogramTimes("Omnibox.DocumentSuggest.TotalTime.Interrupted", |
| elapsed_time); |
| } else { |
| base::UmaHistogramTimes("Omnibox.DocumentSuggest.TotalTime.NotInterrupted", |
| elapsed_time); |
| } |
| } |
| |
| void LogRequestTime(base::TimeTicks start_time, bool interrupted) { |
| DCHECK(!start_time.is_null()); |
| const base::TimeDelta elapsed_time = base::TimeTicks::Now() - start_time; |
| base::UmaHistogramTimes("Omnibox.DocumentSuggest.RequestTime", elapsed_time); |
| if (interrupted) { |
| base::UmaHistogramTimes("Omnibox.DocumentSuggest.RequestTime.Interrupted", |
| elapsed_time); |
| } else { |
| base::UmaHistogramTimes( |
| "Omnibox.DocumentSuggest.RequestTime.NotInterrupted", elapsed_time); |
| } |
| } |
| |
| // MIME types sent by the server for different document types. |
| constexpr char kDocumentMimetype[] = "application/vnd.google-apps.document"; |
| constexpr char kFormMimetype[] = "application/vnd.google-apps.form"; |
| constexpr char kSpreadsheetMimetype[] = |
| "application/vnd.google-apps.spreadsheet"; |
| constexpr char kPresentationMimetype[] = |
| "application/vnd.google-apps.presentation"; |
| |
| // Returns mappings from MIME types to overridden icons. |
| AutocompleteMatch::DocumentType GetIconForMIMEType(std::string_view mimetype) { |
| constexpr auto kIconMap = |
| base::MakeFixedFlatMap<std::string_view, AutocompleteMatch::DocumentType>( |
| { |
| {kDocumentMimetype, AutocompleteMatch::DocumentType::DRIVE_DOCS}, |
| {kFormMimetype, AutocompleteMatch::DocumentType::DRIVE_FORMS}, |
| {kSpreadsheetMimetype, |
| AutocompleteMatch::DocumentType::DRIVE_SHEETS}, |
| {kPresentationMimetype, |
| AutocompleteMatch::DocumentType::DRIVE_SLIDES}, |
| {"image/jpeg", AutocompleteMatch::DocumentType::DRIVE_IMAGE}, |
| {"image/png", AutocompleteMatch::DocumentType::DRIVE_IMAGE}, |
| {"image/gif", AutocompleteMatch::DocumentType::DRIVE_IMAGE}, |
| {"application/pdf", AutocompleteMatch::DocumentType::DRIVE_PDF}, |
| {"video/mp4", AutocompleteMatch::DocumentType::DRIVE_VIDEO}, |
| {"application/vnd.google-apps.folder", |
| AutocompleteMatch::DocumentType::DRIVE_FOLDER}, |
| }); |
| |
| const auto it = kIconMap.find(mimetype); |
| return it != kIconMap.end() ? it->second |
| : AutocompleteMatch::DocumentType::DRIVE_OTHER; |
| } |
| |
| // Concats `v2` onto `v1`. |
| template <typename T> |
| std::vector<T> Concat(std::vector<T>& v1, const std::vector<T>& v2) { |
| v1.insert(v1.end(), v2.begin(), v2.end()); |
| return v1; |
| } |
| |
| // Extracts a list of pointers to strings from a DictionaryValue containing a |
| // list of objects containing a string field of interest. Note that pointers may |
| // be `nullptr` if the value at `field_path` is not found or is not a string. |
| std::vector<const std::string*> ExtractResultList( |
| const base::Value::Dict& result, |
| std::string_view list_path, |
| std::string_view field_path) { |
| const base::Value::List* list = result.FindListByDottedPath(list_path); |
| if (!list) { |
| return {}; |
| } |
| |
| std::vector<const std::string*> extracted; |
| for (const auto& value : *list) { |
| auto* string = value.GetDict().FindString(field_path); |
| if (string) |
| extracted.push_back(string); |
| } |
| return extracted; |
| } |
| |
| // Return whether `user` owns the doc `result`. |
| bool IsOwnedByUser(const std::string& user, const base::Value::Dict& result) { |
| std::vector<const std::string*> owner_emails = ExtractResultList( |
| result, "metadata.owner.emailAddresses", "emailAddress"); |
| const auto lower_user = base::i18n::ToLower(base::UTF8ToUTF16(user)); |
| return std::ranges::any_of( |
| owner_emails, |
| [&](const std::u16string& email) { return lower_user == email; }, |
| [&](const std::string* email) { |
| return base::i18n::ToLower(base::UTF8ToUTF16(*email)); |
| }); |
| } |
| |
| // Return whether all words in `input` are contained in either the `result` |
| // title or owners. |
| bool IsCompletelyMatchedInTitleOrOwner(const std::u16string& input, |
| const base::Value::Dict& result) { |
| // Accumulate a vector of the title and all owners. |
| auto search_strings = ExtractResultList( |
| result, "metadata.owner.emailAddresses", "emailAddress"); |
| Concat(search_strings, ExtractResultList(result, "metadata.owner.personNames", |
| "displayName")); |
| search_strings.push_back(result.FindString("title")); |
| |
| // Extract a flat vector of words from the title and owners. |
| const auto title_and_owner_words = std::accumulate( |
| search_strings.begin(), search_strings.end(), String16Vector(), |
| [](String16Vector accumulated, const auto& search_string) { |
| Concat(accumulated, |
| String16VectorFromString16( |
| base::i18n::ToLower(base::UTF8ToUTF16(*search_string)), |
| nullptr)); |
| return accumulated; |
| }); |
| |
| // Check if all input words are contained in `title_and_owner_words`. |
| String16Vector input_words = |
| String16VectorFromString16(base::i18n::ToLower(input), nullptr); |
| for (const auto& input_word : input_words) { |
| // It's possible `input` contained 'owner' as a word, as opposed to |
| // 'owner:...' as an operator. Ignore this rare edge case for simplicity. |
| if (input_word != u"owner" && |
| std::ranges::none_of( |
| title_and_owner_words, [&](const std::u16string& title_word) { |
| return base::StartsWith(title_word, input_word, |
| base::CompareCase::INSENSITIVE_ASCII); |
| })) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| // Derived from google3/apps/share/util/docs_url_extractor.cc. |
| std::string ExtractDocIdFromUrl(const std::string& url) { |
| static const base::NoDestructor<RE2> docs_url_pattern( |
| "\\b(" // The first groups matches the whole URL. |
| // Domain. |
| "(?:https?://)?(?:" |
| // Keep the hosts consistent with `ValidHostPrefix()`. |
| "spreadsheets|docs|drive|script|sites|jamboard" |
| ")[0-9]?\\.google\\.com" |
| "(?::[0-9]+)?\\/" // Port. |
| "(?:\\S*)" // Non-whitespace chars. |
| "(?:" |
| // Doc url prefix to match /d/{id}. (?:e/)? deviates from google3. |
| "(?:/d/(?:e/)?(?P<path_docid>[0-9a-zA-Z\\-\\_]+))" |
| "|" |
| // Docs id expr to match a valid id parameter. |
| "(?:(?:\\?|&|&)" |
| "(?:id|docid|key|docID|DocId)=(?P<query_docid>[0-9a-zA-Z\\-\\_]+))" |
| "|" |
| // Folder url prefix to match /folders/{folder_id}. |
| "(?:/folders/(?P<folder_docid>[0-9a-zA-Z\\-\\_]+))" |
| "|" |
| // Sites url prefix. |
| "(?:/?s/)(?P<sites_docid>[0-9a-zA-Z\\-\\_]+)" |
| "(?:/p/[0-9a-zA-Z\\-\\_]+)?/edit" |
| "|" |
| // Jam url. |
| "(?:d/)(?P<jam_docid>[0-9a-zA-Z\\-\\_]+)/(?:edit|viewer)" |
| ")" |
| // Other valid chars. |
| "(?:[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/\\?]*)" |
| // Summarization details. |
| "(?:summarizationDetails=[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/" |
| "\\?(?:%5B)(?:%5D)]*)?" |
| // Other valid chars. |
| "(?:[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/\\?]*)" |
| "(?:(#[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/\\?]+)?)" // Fragment |
| ")"); |
| |
| std::vector<std::string_view> matched_doc_ids( |
| docs_url_pattern->NumberOfCapturingGroups() + 1); |
| // ANCHOR_START deviates from google3 which uses UNANCHORED. Using |
| // ANCHOR_START prevents incorrectly matching with non-drive URLs but which |
| // contain a drive URL; e.g., |
| // url-parser.com/?url=https://docs.google.com/document/d/(id)/edit. |
| if (!docs_url_pattern->Match(url, 0, url.size(), RE2::ANCHOR_START, |
| matched_doc_ids.data(), |
| matched_doc_ids.size())) { |
| return std::string(); |
| } |
| for (const auto& doc_id_group : docs_url_pattern->NamedCapturingGroups()) { |
| std::string_view identified_doc_id = matched_doc_ids[doc_id_group.second]; |
| if (!identified_doc_id.empty()) { |
| return std::string(identified_doc_id); |
| } |
| } |
| return std::string(); |
| } |
| |
| // Verify if the host could possibly be for a valid doc URL. This is a more |
| // lightweight check than `ExtractDocIdFromUrl()`. It can be done before |
| // unescaping the URL as valid hosts don't contain escapable chars; unescaping |
| // is relatively expensive. E.g., 'docs.google.com' isn't a valid doc URL, but |
| // it's host looks like it could be, so return true. On the other hand, |
| // 'google.com' is definitely not a doc URL so return false. |
| bool ValidHostPrefix(const std::string& host) { |
| // There are 66 (5*11) valid, e.g. 'docs5.google.com', so rather than check |
| // all 66, we just check the 6 prefixes. Keep these prefixes consistent with |
| // those in `ExtractDocIdFromUrl()`. |
| constexpr auto kValidHostPrefixes = base::MakeFixedFlatSet<std::string_view>({ |
| "spreadsheets", |
| "docs", |
| "drive", |
| "script", |
| "sites", |
| "jamboard", |
| }); |
| for (const auto& valid_host_prefix : kValidHostPrefixes) { |
| if (base::StartsWith(host, valid_host_prefix, |
| base::CompareCase::INSENSITIVE_ASCII)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| // If `value[key]`, returns it. Otherwise, returns `fallback`. |
| std::string FindStringKeyOrFallback(const base::Value::Dict& value, |
| std::string_view key, |
| std::string fallback = "") { |
| auto* ptr = value.FindString(key); |
| return ptr ? *ptr : std::move(fallback); |
| } |
| |
| } // namespace |
| |
| // static |
| DocumentProvider* DocumentProvider::Create( |
| AutocompleteProviderClient* client, |
| AutocompleteProviderListener* listener) { |
| return new DocumentProvider(client, listener); |
| } |
| |
| bool DocumentProvider::IsDocumentProviderAllowed( |
| const AutocompleteInput& input) { |
| // Feature must be on. |
| if (!base::FeatureList::IsEnabled(omnibox::kDocumentProvider)) { |
| base::UmaHistogramEnumeration( |
| "Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kFeatureDisabled); |
| return false; |
| } |
| |
| // These may seem like search suggestions, so gate on that setting too. |
| if (!client_->SearchSuggestEnabled()) { |
| base::UmaHistogramEnumeration( |
| "Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kSuggestSettingDisabled); |
| return false; |
| } |
| |
| // No incognito. |
| if (client_->IsOffTheRecord()) { |
| base::UmaHistogramEnumeration("Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kOffTheRecord); |
| return false; |
| } |
| |
| // Must be authenticated. |
| const bool is_authenticated = |
| base::FeatureList::IsEnabled( |
| omnibox::kDocumentProviderPrimaryAccountRequirement) |
| ? client_->GetDocumentSuggestionsService()->HasPrimaryAccount() |
| : client_->IsAuthenticated(); |
| if (!is_authenticated) { |
| base::UmaHistogramEnumeration("Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kNotLoggedIn); |
| return false; |
| } |
| |
| // Must be enterprise eligibile (if the feature is enabled). |
| bool is_enterprise_eligible = true; |
| if (base::FeatureList::IsEnabled( |
| omnibox::kDocumentProviderEnterpriseEligibility)) { |
| const auto& entrprise_account_state = |
| client_->GetDocumentSuggestionsService() |
| ->account_is_workspace_managed(); |
| is_enterprise_eligible = |
| base::FeatureList::IsEnabled( |
| omnibox::kDocumentProviderEnterpriseEligibilityWhenUnknown) |
| ? entrprise_account_state != signin::Tribool::kFalse |
| : entrprise_account_state == signin::Tribool::kTrue; |
| } |
| if (!is_enterprise_eligible) { |
| base::UmaHistogramEnumeration( |
| "Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kNotEnterpriseEligible); |
| return false; |
| } |
| |
| // Sync must be enabled and active. |
| if (!base::FeatureList::IsEnabled( |
| omnibox::kDocumentProviderNoSyncRequirement) && |
| !client_->IsSyncActive()) { |
| base::UmaHistogramEnumeration("Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kNotSyncing); |
| return false; |
| } |
| |
| // We haven't received a server backoff signal. |
| bool should_backoff = |
| omnibox_feature_configs::DocumentProvider::Get().scope_backoff_to_profile |
| ? client_->GetDocumentSuggestionsService()->should_backoff() |
| : backoff_for_this_instance_only_; |
| if (should_backoff) { |
| base::UmaHistogramEnumeration("Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kBackoff); |
| return false; |
| } |
| |
| // Google must be set as default search provider. |
| auto* template_url_service = client_->GetTemplateURLService(); |
| if (!search::DefaultSearchProviderIsGoogle(template_url_service)) { |
| base::UmaHistogramEnumeration("Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kDSENotGoogle); |
| return false; |
| } |
| |
| // There should be no document suggestions fetched for on-focus suggestion |
| // requests, or if the input is empty. |
| if (input.IsZeroSuggest() || |
| input.type() == metrics::OmniboxInputType::EMPTY) { |
| base::UmaHistogramEnumeration( |
| "Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kInputOnFocusOrEmpty); |
| return false; |
| } |
| |
| // Don't issue queries for inputs whose lengths aren't in the intended range. |
| if (input.text().length() < |
| omnibox_feature_configs::DocumentProvider::Get().min_query_length || |
| input.text().length() > kMaxQueryLength) { |
| base::UmaHistogramEnumeration( |
| "Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kInputTooShort); |
| return false; |
| } |
| |
| // Don't issue queries for input likely to be a URL. |
| if (IsInputLikelyURL(input)) { |
| base::UmaHistogramEnumeration( |
| "Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kInputLooksLikeUrl); |
| return false; |
| } |
| |
| base::UmaHistogramEnumeration("Omnibox.DocumentSuggest.ProviderAllowed", |
| DocumentProviderAllowedReason::kAllowed); |
| return true; |
| } |
| |
| // static |
| bool DocumentProvider::IsInputLikelyURL(const AutocompleteInput& input) { |
| if (input.type() == metrics::OmniboxInputType::URL) |
| return true; |
| |
| // Special cases when the user might be starting to type the most common URL |
| // prefixes, but the SchemeClassifier won't have classified them as URLs yet. |
| // Note these checks are of the form "(string constant) starts with input." |
| if (input.text().length() <= 8) { |
| if (StartsWith(u"https://", input.text(), |
| base::CompareCase::INSENSITIVE_ASCII) || |
| StartsWith(u"http://", input.text(), |
| base::CompareCase::INSENSITIVE_ASCII) || |
| StartsWith(u"www.", input.text(), |
| base::CompareCase::INSENSITIVE_ASCII)) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| void DocumentProvider::Start(const AutocompleteInput& input, |
| bool minimal_changes) { |
| TRACE_EVENT0("omnibox", "DocumentProvider::Start"); |
| Stop(AutocompleteStopReason::kClobbered); |
| // Perform various checks - feature is enabled, user is allowed to use the |
| // feature, we're not under backoff, etc. |
| if (!IsDocumentProviderAllowed(input)) |
| return; |
| |
| input_ = input; |
| |
| // Return cached suggestions synchronously after setting the relevance of any |
| // beyond |provider_max_matches_| to 0. |
| CopyCachedMatchesToMatches(); |
| DemoteMatchesBeyondMax(); |
| |
| if (input.omit_asynchronous_matches()) { |
| return; |
| } |
| |
| done_ = false; // Set true in callbacks. |
| debouncer_->RequestRun( |
| base::BindOnce(&DocumentProvider::Run, base::Unretained(this), input)); |
| } |
| |
| void DocumentProvider::Run(const AutocompleteInput& input) { |
| // DocumentSuggestionsServiceFactory does not create a service instance for |
| // OTR profiles. We should not get this far for those profiles. |
| DCHECK(!client_->IsOffTheRecord()); |
| time_run_invoked_ = base::TimeTicks::Now(); |
| client_->GetRemoteSuggestionsService(/*create_if_necessary=*/true) |
| ->CreateDocumentSuggestionsRequest( |
| input_.text(), /*is_off_the_record=*/false, |
| input.current_page_classification(), |
| base::BindOnce( |
| &DocumentProvider::OnDocumentSuggestionsLoaderAvailable, |
| weak_ptr_factory_.GetWeakPtr()), |
| base::BindOnce( |
| &DocumentProvider::OnURLLoadComplete, |
| base::Unretained(this) /* this owns SimpleURLLoader */)); |
| } |
| |
| void DocumentProvider::Stop(AutocompleteStopReason stop_reason) { |
| TRACE_EVENT0("omnibox", "DocumentProvider::Stop"); |
| AutocompleteProvider::Stop(stop_reason); |
| |
| debouncer_->CancelRequest(); |
| |
| if (auto* remote_suggestions_service = |
| client_->GetRemoteSuggestionsService(/*create_if_necessary=*/false)) { |
| remote_suggestions_service->StopCreatingDocumentSuggestionsRequest(); |
| } |
| |
| // If the request was sent, then log its duration and that it was invalidated. |
| if (loader_) { |
| DCHECK(!time_run_invoked_.is_null()); |
| DCHECK(!time_request_sent_.is_null()); |
| loader_.reset(); |
| LogRequestTime(time_request_sent_, true); |
| time_request_sent_ = base::TimeTicks(); |
| LogOmniboxDocumentRequest(RemoteRequestEvent::kRequestInvalidated); |
| } |
| |
| // If `Run()` has been invoked, log its duration. It's possible `Stop()` is |
| // invoked before `Run()` has been invoked if 1) this is the first user input, |
| // 2) the previous call was debounced, or 3) the previous request was filtered |
| // (e.g. input too short). |
| if (!time_run_invoked_.is_null()) { |
| LogTotalTime(time_run_invoked_, true); |
| time_run_invoked_ = base::TimeTicks(); |
| } |
| } |
| |
| void DocumentProvider::DeleteMatch(const AutocompleteMatch& match) { |
| // Not supported by this provider. |
| return; |
| } |
| |
| void DocumentProvider::AddProviderInfo(ProvidersInfo* provider_info) const { |
| provider_info->push_back(metrics::OmniboxEventProto_ProviderInfo()); |
| metrics::OmniboxEventProto_ProviderInfo& new_entry = provider_info->back(); |
| new_entry.set_provider(metrics::OmniboxEventProto::DOCUMENT); |
| new_entry.set_provider_done(done_); |
| } |
| |
| DocumentProvider::DocumentProvider(AutocompleteProviderClient* client, |
| AutocompleteProviderListener* listener) |
| : AutocompleteProvider(AutocompleteProvider::TYPE_DOCUMENT), |
| client_(client), |
| debouncer_(std::make_unique<AutocompleteProviderDebouncer>(true, 300)), |
| matches_cache_(20), |
| task_runner_(base::SequencedTaskRunner::GetCurrentDefault()) { |
| AddListener(listener); |
| } |
| |
| DocumentProvider::~DocumentProvider() = default; |
| |
| void DocumentProvider::OnURLLoadComplete( |
| const network::SimpleURLLoader* source, |
| const int response_code, |
| std::unique_ptr<std::string> response_body) { |
| DCHECK(!done_); |
| DCHECK_EQ(loader_.get(), source); |
| |
| LogRequestTime(time_request_sent_, false); |
| LogOmniboxDocumentRequest(RemoteRequestEvent::kResponseReceived); |
| base::UmaHistogramSparse("Omnibox.DocumentSuggest.HttpResponseCode", |
| response_code); |
| |
| // Also log the response code sliced by the enterprise account capability. |
| const auto& account_is_workspace_managed = signin::TriboolToString( |
| client_->GetDocumentSuggestionsService()->account_is_workspace_managed()); |
| base::UmaHistogramSparse( |
| base::StringPrintf("Omnibox.DocumentSuggest.HttpResponseCode." |
| "IsSubjectToEnterprisePolicies.%s", |
| account_is_workspace_managed), |
| response_code); |
| |
| // The following are codes that we believe indicate non-transient failures, |
| // based on experience working with the owners of the API. Since they are |
| // expected to be semi-persistent, it does not make sense to continue to issue |
| // requests during the current session after receiving one. |
| if (response_code == 400 || response_code == 401 || response_code == 403 || |
| response_code == 499) { |
| bool scope_backoff_to_profile = |
| omnibox_feature_configs::DocumentProvider::Get() |
| .scope_backoff_to_profile; |
| if (scope_backoff_to_profile) { |
| client_->GetDocumentSuggestionsService()->set_should_backoff(true); |
| base::TimeDelta backoff_duration = |
| omnibox_feature_configs::DocumentProvider::Get().backoff_duration; |
| if (backoff_duration > base::TimeDelta()) { |
| task_runner_->PostDelayedTask( |
| FROM_HERE, |
| base::BindOnce(&DocumentProvider::ResetBackoffState, |
| weak_ptr_factory_.GetWeakPtr()), |
| backoff_duration); |
| } |
| } else { |
| backoff_for_this_instance_only_ = true; |
| } |
| } |
| |
| const bool results_updated = |
| response_code == 200 && |
| UpdateResults(SearchSuggestionParser::ExtractJsonData( |
| source, std::move(response_body))); |
| LogTotalTime(time_run_invoked_, false); |
| loader_.reset(); |
| done_ = true; |
| NotifyListeners(results_updated); |
| } |
| |
| void DocumentProvider::ResetBackoffState() { |
| client_->GetDocumentSuggestionsService()->set_should_backoff(false); |
| } |
| |
| bool DocumentProvider::UpdateResults(const std::string& json_data) { |
| std::optional<base::Value> response = |
| base::JSONReader::Read(json_data, base::JSON_ALLOW_TRAILING_COMMAS); |
| if (!response) |
| return false; |
| |
| // 1) Fill |matches_| with the new server matches. |
| matches_ = ParseDocumentSearchResults(*response); |
| |
| // 2) Limit matches to |provider_max_matches_| unless used for deduping; i.e. |
| // set the scores of matches beyond the limit to 0. |
| DemoteMatchesBeyondMax(); |
| // 3) Clear cached matches' scores to ensure cached matches for all but the |
| // previous input can only be shown if deduped. E.g., this allows matches for |
| // the input 'pari' to be displayed synchronously for the input 'paris', but |
| // be hidden if the user clears their input and starts anew 'london'. |
| SetCachedMatchesScoresTo0(); |
| // 4) Copy the cached matches to |matches_|. |
| CopyCachedMatchesToMatches(); |
| // 5) Push the new matches to the cache. Keep their scores so that later |
| // inputs continue showing them until the new doc response returns. |
| for (const AutocompleteMatch& match : base::Reversed(matches_)) |
| matches_cache_.Put(match.stripped_destination_url, match); |
| |
| return !matches_.empty(); |
| } |
| |
| void DocumentProvider::OnDocumentSuggestionsLoaderAvailable( |
| std::unique_ptr<network::SimpleURLLoader> loader) { |
| time_request_sent_ = base::TimeTicks::Now(); |
| loader_ = std::move(loader); |
| LogOmniboxDocumentRequest(RemoteRequestEvent::kRequestSent); |
| } |
| |
| // static |
| std::u16string DocumentProvider::GenerateLastModifiedString( |
| const std::string& modified_timestamp_string, |
| base::Time now) { |
| if (modified_timestamp_string.empty()) |
| return std::u16string(); |
| base::Time modified_time; |
| if (!base::Time::FromString(modified_timestamp_string.c_str(), |
| &modified_time)) |
| return std::u16string(); |
| |
| return AutocompleteProvider::LocalizedLastModifiedString(now, modified_time); |
| } |
| |
| // static |
| std::u16string DocumentProvider::GetProductDescriptionString( |
| const std::string& mimetype) { |
| if (mimetype == kDocumentMimetype) { |
| return l10n_util::GetStringUTF16( |
| IDS_CONTENT_SUGGESTION_DESCRIPTION_GOOGLE_DOCS); |
| } |
| if (mimetype == kFormMimetype) { |
| return l10n_util::GetStringUTF16( |
| IDS_CONTENT_SUGGESTION_DESCRIPTION_GOOGLE_FORMS); |
| } |
| if (mimetype == kSpreadsheetMimetype) { |
| return l10n_util::GetStringUTF16( |
| IDS_CONTENT_SUGGESTION_DESCRIPTION_GOOGLE_SHEETS); |
| } |
| if (mimetype == kPresentationMimetype) { |
| return l10n_util::GetStringUTF16( |
| IDS_CONTENT_SUGGESTION_DESCRIPTION_GOOGLE_SLIDES); |
| } |
| // Fallback to "Drive" for other filetypes. |
| return l10n_util::GetStringUTF16( |
| IDS_CONTENT_SUGGESTION_DESCRIPTION_GOOGLE_DRIVE); |
| } |
| |
| // static |
| std::u16string DocumentProvider::GetMatchDescription( |
| const std::string& update_time, |
| const std::string& mimetype, |
| const std::string& owner) { |
| std::u16string mime_desc = GetProductDescriptionString(mimetype); |
| if (!update_time.empty()) { |
| std::u16string date_desc = |
| GenerateLastModifiedString(update_time, base::Time::Now()); |
| return owner.empty() |
| ? l10n_util::GetStringFUTF16( |
| IDS_CONTENT_SUGGESTION_DESCRIPTION_TEMPLATE_WITHOUT_OWNER, |
| date_desc, mime_desc) |
| : l10n_util::GetStringFUTF16( |
| IDS_CONTENT_SUGGESTION_DESCRIPTION_TEMPLATE, date_desc, |
| base::UTF8ToUTF16(owner), mime_desc); |
| } |
| return owner.empty() |
| ? std::move(mime_desc) |
| : l10n_util::GetStringFUTF16( |
| IDS_CONTENT_SUGGESTION_DESCRIPTION_TEMPLATE_WITHOUT_DATE, |
| base::UTF8ToUTF16(owner), mime_desc); |
| } |
| |
| ACMatches DocumentProvider::ParseDocumentSearchResults( |
| const base::Value& root_val) { |
| ACMatches matches; |
| |
| // Parse the results. |
| const base::Value::List* results = root_val.GetDict().FindList("results"); |
| if (!results) { |
| return matches; |
| } |
| size_t num_results = results->size(); |
| base::UmaHistogramCounts1M("Omnibox.DocumentSuggest.ResultCount", |
| num_results); |
| |
| // Ensure server's suggestions are added with monotonically decreasing scores. |
| int previous_score = INT_MAX; |
| |
| // Number of matches that are neither owned nor a complete title or owner |
| // match. |
| int low_quality_match_count = 0; |
| |
| for (size_t i = 0; i < num_results; i++) { |
| const base::Value& result_value = (*results)[i]; |
| if (!result_value.is_dict()) { |
| return matches; |
| } |
| |
| const base::Value::Dict& result = result_value.GetDict(); |
| const std::string title = FindStringKeyOrFallback(result, "title"); |
| const std::string url = FindStringKeyOrFallback(result, "url"); |
| if (title.empty() || url.empty()) { |
| continue; |
| } |
| |
| int score = result.FindInt("score").value_or(0); |
| |
| // Decrement scores if necessary to ensure suggestion order is preserved. |
| // Don't decrement client scores which don't necessarily rank suggestions |
| // the same order as the server. |
| if (score >= previous_score) |
| score = std::max(previous_score - 1, 0); |
| previous_score = score; |
| |
| // Only allow up to 1 doc that is neither owned nor a complete title or |
| // owner match. |
| bool is_owned = IsOwnedByUser(client_->ProfileUserName(), result); |
| bool is_completely_matched_in_title_and_owner = |
| IsCompletelyMatchedInTitleOrOwner(input_.text(), result); |
| if (!is_owned && !is_completely_matched_in_title_and_owner && |
| ++low_quality_match_count > 1) { |
| score = 0; |
| } |
| |
| AutocompleteMatch match(this, score, false, |
| AutocompleteMatchType::DOCUMENT_SUGGESTION); |
| // Use full URL for navigation. If present, use "originalUrl" for display & |
| // deduping, as it's shorter. |
| const std::string short_url = |
| FindStringKeyOrFallback(result, "originalUrl", url); |
| match.fill_into_edit = base::UTF8ToUTF16(short_url); |
| match.destination_url = GURL(url); |
| // `AutocompleteMatch::GURLToStrippedGURL()` will try to use |
| // `GetURLForDeduping()` to extract a doc ID and generate a canonical doc |
| // URL; this is ideal as it handles different URL formats pointing to the |
| // same doc. Otherwise, it'll resort to the typical stripped URL generation |
| // that can still be used for generic deduping and as a key to |
| // `matches_cache_`. |
| match.stripped_destination_url = AutocompleteMatch::GURLToStrippedGURL( |
| GURL(short_url), input_, client_->GetTemplateURLService(), |
| std::u16string(), /*keep_search_intent_params=*/false); |
| |
| match.contents = |
| AutocompleteMatch::SanitizeString(base::UTF8ToUTF16(title)); |
| match.contents_class = Classify(match.contents, input_.text()); |
| const base::Value::Dict* metadata = result.FindDict("metadata"); |
| if (metadata) { |
| const std::string update_time = |
| FindStringKeyOrFallback(*metadata, "updateTime"); |
| const std::string mimetype = |
| FindStringKeyOrFallback(*metadata, "mimeType"); |
| if (metadata->FindString("mimeType")) { |
| match.document_type = GetIconForMIMEType(mimetype); |
| match.RecordAdditionalInfo( |
| "document type", |
| AutocompleteMatch::DocumentTypeString(match.document_type)); |
| } |
| auto owners = ExtractResultList(result, "metadata.owner.personNames", |
| "displayName"); |
| const std::string owner = !owners.empty() ? *owners[0] : ""; |
| if (!owner.empty()) |
| match.RecordAdditionalInfo("document owner", owner); |
| match.description = GetMatchDescription(update_time, mimetype, owner); |
| AutocompleteMatch::AddLastClassificationIfNecessary( |
| &match.description_class, 0, ACMatchClassification::DIM); |
| // Exclude date & owner from description_for_shortcut to avoid showing |
| // stale data from the shortcuts provider. |
| match.description_for_shortcuts = GetMatchDescription("", mimetype, ""); |
| AutocompleteMatch::AddLastClassificationIfNecessary( |
| &match.description_class_for_shortcuts, 0, |
| ACMatchClassification::DIM); |
| match.RecordAdditionalInfo("description_for_shortcuts", |
| match.description_for_shortcuts); |
| } |
| |
| match.TryRichAutocompletion(input_, |
| base::UTF8ToUTF16(match.destination_url.spec()), |
| match.contents); |
| match.transition = ui::PAGE_TRANSITION_GENERATED; |
| match.RecordAdditionalInfo("owned", is_owned); |
| match.RecordAdditionalInfo("completely matched in title and owner", |
| is_completely_matched_in_title_and_owner); |
| if (matches.size() >= provider_max_matches_) |
| match.RecordAdditionalInfo("for deduping only", "true"); |
| const std::string* snippet = |
| result.FindStringByDottedPath("snippet.snippet"); |
| if (snippet) |
| match.RecordAdditionalInfo("snippet", *snippet); |
| matches.push_back(match); |
| } |
| return matches; |
| } |
| |
| void DocumentProvider::CopyCachedMatchesToMatches() { |
| std::ranges::transform( |
| matches_cache_, std::back_inserter(matches_), |
| [this](auto match) { |
| match.allowed_to_be_default_match = false; |
| match.TryRichAutocompletion( |
| input_, base::UTF8ToUTF16(match.destination_url.spec()), |
| match.contents); |
| match.contents_class = |
| DocumentProvider::Classify(match.contents, input_.text()); |
| match.RecordAdditionalInfo("from cache", "true"); |
| return match; |
| }, |
| &MatchesCache::value_type::second); |
| } |
| |
| void DocumentProvider::SetCachedMatchesScoresTo0() { |
| std::ranges::for_each(matches_cache_, [&](auto& cache_key_match_pair) { |
| cache_key_match_pair.second.relevance = 0; |
| }); |
| } |
| |
| void DocumentProvider::DemoteMatchesBeyondMax() { |
| // Allow all matches to retain their scores if unlimited matches param is |
| // enabled. |
| if (OmniboxFieldTrial::IsMlUrlScoringUnlimitedNumCandidatesEnabled()) { |
| return; |
| } |
| |
| for (size_t i = provider_max_matches_; i < matches_.size(); ++i) |
| matches_[i].relevance = 0; |
| } |
| |
| // static |
| ACMatchClassifications DocumentProvider::Classify( |
| const std::u16string& text, |
| const std::u16string& input_text) { |
| TermMatches term_matches = FindTermMatches(input_text, text); |
| return ClassifyTermMatches(term_matches, text.size(), |
| ACMatchClassification::MATCH, |
| ACMatchClassification::NONE); |
| } |
| |
| // static |
| const GURL DocumentProvider::GetURLForDeduping(const GURL& url) { |
| if (!url.is_valid()) |
| return GURL(); |
| |
| // A memoization cache. Only updated if `ExtractDocIdFromUrl()` was attempted. |
| // That's the most expensive part of this algorithm, and memoizing the earlier |
| // trivial checks would worsen performance by pushing out more useful cache |
| // entries. |
| static base::NoDestructor<base::LRUCache<GURL, GURL>> cache(10); |
| const auto& cached = cache->Get(url); |
| if (cached != cache->end()) { |
| return cached->second; |
| } |
| |
| // Early exit to avoid unnecessary and more involved checks. Don't update the |
| // cache for trivial cases to avoid pushing out a more useful entry. |
| if (!url.DomainIs("google.com")) |
| return GURL(); |
| |
| // We aim to prevent duplicate Drive URLs to appear between the Drive document |
| // search provider and history/bookmark entries. |
| // All URLs are canonicalized to a GURL form only used for deduplication and |
| // not guaranteed to be usable for navigation. |
| |
| // Drive redirects are already handled by the regex in |ExtractDocIdFromUrl|. |
| // The below logic handles google.com redirects; e.g., google.com/url/q=<url> |
| std::string url_str; |
| std::string url_str_host; |
| if (url.host() == "www.google.com" && url.path() == "/url") { |
| if ((!net::GetValueForKeyInQuery(url, "q", &url_str) || url_str.empty()) && |
| (!net::GetValueForKeyInQuery(url, "url", &url_str) || url_str.empty())) |
| return GURL(); |
| url_str_host = GURL(url_str).host(); |
| } else { |
| url_str = url.spec(); |
| url_str_host = url.host(); |
| } |
| |
| // Recheck the domain, since a google URL could redirect to a non-google URL |
| if (!base::EndsWith(url_str_host, "google.com", |
| base::CompareCase::INSENSITIVE_ASCII)) { |
| return GURL(); |
| } |
| |
| // Filter out non-doc hosts. Do this before unescaping the URL below, as |
| // unescaping can be expensive and valid hosts don't contain escapable chars. |
| // Do this after simplifying the google.com redirect above, as that changes |
| // the host. |
| if (!ValidHostPrefix(url_str_host)) |
| return GURL(); |
| |
| // Unescape |url_str| |
| url_str = base::UnescapeURLComponent( |
| url_str, |
| base::UnescapeRule::PATH_SEPARATORS | |
| base::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); |
| |
| const std::string id = ExtractDocIdFromUrl(url_str); |
| |
| // Canonicalize to the /open form without any extra args. |
| // This is similar to what we expect from the server. |
| GURL deduping_url = |
| id.empty() ? GURL() : GURL("https://drive.google.com/open?id=" + id); |
| cache->Put(url, deduping_url); |
| return deduping_url; |
| } |