blob: 975f5dbe02ead849dae7bb5b2721b80a25865cec [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/omnibox/browser/document_provider.h"
#include <stddef.h>
#include <algorithm>
#include <map>
#include <numeric>
#include <string>
#include <utility>
#include <vector>
#include "base/bind.h"
#include "base/callback.h"
#include "base/feature_list.h"
#include "base/i18n/case_conversion.h"
#include "base/i18n/time_formatting.h"
#include "base/json/json_reader.h"
#include "base/metrics/field_trial_params.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/strcat.h"
#include "base/strings/string16.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "base/trace_event/trace_event.h"
#include "components/bookmarks/browser/bookmark_utils.h"
#include "components/omnibox/browser/autocomplete_input.h"
#include "components/omnibox/browser/autocomplete_match.h"
#include "components/omnibox/browser/autocomplete_match_classification.h"
#include "components/omnibox/browser/autocomplete_provider.h"
#include "components/omnibox/browser/autocomplete_provider_client.h"
#include "components/omnibox/browser/autocomplete_provider_listener.h"
#include "components/omnibox/browser/document_suggestions_service.h"
#include "components/omnibox/browser/history_provider.h"
#include "components/omnibox/browser/in_memory_url_index_types.h"
#include "components/omnibox/browser/keyword_provider.h"
#include "components/omnibox/browser/omnibox_field_trial.h"
#include "components/omnibox/browser/omnibox_pref_names.h"
#include "components/omnibox/browser/search_provider.h"
#include "components/omnibox/common/omnibox_features.h"
#include "components/pref_registry/pref_registry_syncable.h"
#include "components/prefs/pref_service.h"
#include "components/search_engines/search_engine_type.h"
#include "components/search_engines/template_url_service.h"
#include "components/strings/grit/components_strings.h"
#include "net/base/url_util.h"
#include "services/network/public/cpp/resource_response.h"
#include "services/network/public/cpp/simple_url_loader.h"
#include "third_party/metrics_proto/omnibox_event.pb.h"
#include "third_party/re2/src/re2/re2.h"
#include "ui/base/l10n/l10n_util.h"
#include "url/gurl.h"
namespace {
// TODO(skare): Pull the enum in search_provider.cc into its .h file, and switch
// this file and zero_suggest_provider.cc to use it.
enum DocumentRequestsHistogramValue {
DOCUMENT_REQUEST_SENT = 1,
DOCUMENT_REQUEST_INVALIDATED = 2,
DOCUMENT_REPLY_RECEIVED = 3,
DOCUMENT_MAX_REQUEST_HISTOGRAM_VALUE
};
void LogOmniboxDocumentRequest(DocumentRequestsHistogramValue request_value) {
UMA_HISTOGRAM_ENUMERATION("Omnibox.DocumentSuggest.Requests", request_value,
DOCUMENT_MAX_REQUEST_HISTOGRAM_VALUE);
}
// MIME types sent by the server for different document types.
const char kDocumentMimetype[] = "application/vnd.google-apps.document";
const char kFormMimetype[] = "application/vnd.google-apps.form";
const char kSpreadsheetMimetype[] = "application/vnd.google-apps.spreadsheet";
const char kPresentationMimetype[] = "application/vnd.google-apps.presentation";
// Returns mappings from MIME types to overridden icons.
AutocompleteMatch::DocumentType GetIconForMIMEType(
const base::StringPiece& mimetype) {
static const auto kIconMap =
std::map<base::StringPiece, AutocompleteMatch::DocumentType>{
{kDocumentMimetype, AutocompleteMatch::DocumentType::DRIVE_DOCS},
{kFormMimetype, AutocompleteMatch::DocumentType::DRIVE_FORMS},
{kSpreadsheetMimetype, AutocompleteMatch::DocumentType::DRIVE_SHEETS},
{kPresentationMimetype,
AutocompleteMatch::DocumentType::DRIVE_SLIDES},
{"image/jpeg", AutocompleteMatch::DocumentType::DRIVE_IMAGE},
{"image/png", AutocompleteMatch::DocumentType::DRIVE_IMAGE},
{"image/gif", AutocompleteMatch::DocumentType::DRIVE_IMAGE},
{"application/pdf", AutocompleteMatch::DocumentType::DRIVE_PDF},
{"video/mp4", AutocompleteMatch::DocumentType::DRIVE_VIDEO},
};
const auto& iterator = kIconMap.find(mimetype);
return iterator != kIconMap.end()
? iterator->second
: AutocompleteMatch::DocumentType::DRIVE_OTHER;
}
const char kErrorMessageAdminDisabled[] =
"Not eligible to query due to admin disabled Chrome search settings.";
const char kErrorMessageRetryLater[] = "Not eligible to query, see retry info.";
// TODO(manukh): Remove ResponseContainsBackoffSignal once the check using http
// status code in |OnURLLoadComplete| rolls out and the backend returns to
// sending 4xx backoff responses as opposed to 2xx; or, if the backend is never
// adjusted to send 2xx responses, once that check rolls out.
bool ResponseContainsBackoffSignal(const base::DictionaryValue* root_dict) {
const base::DictionaryValue* error_info;
if (!root_dict->GetDictionary("error", &error_info)) {
return false;
}
int code;
std::string status;
std::string message;
if (!error_info->GetInteger("code", &code) ||
!error_info->GetString("status", &status) ||
!error_info->GetString("message", &message)) {
return false;
}
// 403/PERMISSION_DENIED: Account is currently ineligible to receive results.
if (code == 403 && status == "PERMISSION_DENIED" &&
message == kErrorMessageAdminDisabled) {
return true;
}
// 503/UNAVAILABLE: Uninteresting set of results, or another server request to
// backoff.
return code == 503 && status == "UNAVAILABLE" &&
message == kErrorMessageRetryLater;
}
struct FieldMatches {
double weight;
String16Vector words;
size_t count;
FieldMatches(double weight, const std::string* string)
: weight(weight),
words(string ? String16VectorFromString16(
base::UTF8ToUTF16(string->c_str()),
false,
nullptr)
: String16Vector()),
count(0) {}
FieldMatches(double weight, std::vector<const std::string*> strings)
: weight(weight),
words(std::accumulate(
strings.begin(),
strings.end(),
String16Vector(),
[](String16Vector words, const std::string* string) {
if (string) {
const auto string_words = String16VectorFromString16(
base::UTF8ToUTF16(string->c_str()), false, nullptr);
words.insert(words.end(), string_words.begin(),
string_words.end());
}
return words;
})),
count(0) {}
// Increments |count| and returns true if |words| includes a word equal to or
// prefixed by |word|.
bool Includes(const base::string16& word) {
if (std::none_of(words.begin(), words.end(), [word](base::string16 w) {
return base::StartsWith(w, word,
base::CompareCase::INSENSITIVE_ASCII);
}))
return false;
count += word.size();
return true;
}
// Decreases linearly with respect to |count| for small values, begins at 1,
// and asymptotically approaches 0.
double InvScore() { return std::pow(1 - weight, count); }
};
// Extracts a list of strings from a DictionaryValue containing a list of
// objects containing a string field.
std::vector<const std::string*> ExtractResultList(
const base::DictionaryValue* result,
const base::StringPiece& list_path,
const base::StringPiece& field_path) {
const base::Value* values = result->FindListPath(list_path);
if (!values)
return {};
base::Value::ConstListView list = values->GetList();
std::vector<const std::string*> extracted(list.size());
std::transform(list.begin(), list.end(), extracted.begin(),
[field_path](const auto& value) {
return value.FindStringKey(field_path);
});
return extracted;
}
// Alias for GetFieldTrialParamByFeatureAsDouble for readability.
double FieldWeight(const std::string& param_name, double default_weight) {
return base::GetFieldTrialParamByFeatureAsDouble(omnibox::kDocumentProvider,
param_name, default_weight);
}
int CalculateScore(const base::string16& input,
const base::DictionaryValue* result) {
// Suggestions scored lower than |raw_score_cutoff| will be discarded.
double raw_score_cutoff = base::GetFieldTrialParamByFeatureAsDouble(
omnibox::kDocumentProvider, "RawDocScoreCutoff", .25);
// Final score will be between |min_score| and |max_score|, not accounting for
// |raw_score_cutoff|.
int min_score = base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDocumentProvider, "MinDocScore", 0);
int max_score = base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDocumentProvider, "MaxDocScore", 1400);
std::vector<FieldMatches> field_matches_vec = {
{FieldWeight("TitleWeight", .15), result->FindStringKey("title")},
{FieldWeight("OwnerNamesWeight", .15),
ExtractResultList(result, "metadata.owner.personNames", "displayName")},
{FieldWeight("OwnerEmailsWeight", .15),
ExtractResultList(result, "metadata.owner.emailAddresses",
"emailAddress")},
{FieldWeight("SnippetWeight", .06),
result->FindStringPath("snippet.snippet")},
{FieldWeight("UrlWeight", 0), result->FindStringKey("url")},
{FieldWeight("MimeWeight", 0),
result->FindStringPath("metadata.mimeType")},
};
std::stable_sort(field_matches_vec.begin(), field_matches_vec.end(),
[](const FieldMatches& a, const FieldMatches& b) {
return a.weight > b.weight;
});
String16Vector input_words =
String16VectorFromString16(input, false, nullptr);
for (const auto& word : input_words) {
(void)std::find_if(
field_matches_vec.begin(), field_matches_vec.end(),
[word](auto& field_matches) { return field_matches.Includes(word); });
}
// |score| is computed by subtracting the product of each field's inverse
// score from 1; |score| begins at 0 and asymptotically approaches 1.
// Summing each field's score would grossly favor short multi-field matches
// over long single-field matches due to each fields score increasing faster
// for small values.
double score =
1 -
std::accumulate(field_matches_vec.begin(), field_matches_vec.end(), 1.0,
[](double inv_score_product, FieldMatches field_matches) {
return inv_score_product * field_matches.InvScore();
});
if (score > 1)
score = 1;
if (score < raw_score_cutoff)
score = 0;
return static_cast<int>(min_score + score * (max_score - min_score));
}
int BoostOwned(const int score,
const std::string& owner,
const base::DictionaryValue* result) {
int promotion = base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDocumentProvider, "OwnedDocPromotion", 0);
int demotion = base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDocumentProvider, "UnownedDocDemotion", 200);
std::vector<const std::string*> owner_emails = ExtractResultList(
result, "metadata.owner.emailAddresses", "emailAddress");
bool owned = std::any_of(
owner_emails.begin(), owner_emails.end(),
[owner](const std::string* email) { return owner == *email; });
return std::max(score + (owned ? promotion : -demotion), 0);
}
// Derived from google3/apps/share/util/docs_url_extractor.cc.
std::string ExtractDocIdFromUrl(const std::string& url) {
static const RE2 docs_url_pattern_(
"\\b(" // The first groups matches the whole URL.
// Domain.
"(?:https?://)?(?:"
"spreadsheets|docs|drive|script|sites|jamboard"
")[0-9]?.google.com"
"(?::[0-9]+)?\\/" // Port.
"(?:\\S*)" // Non-whitespace chars.
"(?:"
// Doc url prefix to match /d/{id}. (?:e/)? deviates from google3.
"(?:/d/(?:e/)?(?P<path_docid>[0-9a-zA-Z\\-\\_]+))"
"|"
// Docs id expr to match a valid id parameter.
"(?:(?:\\?|&|&amp;)"
"(?:id|docid|key|docID|DocId)=(?P<query_docid>[0-9a-zA-Z\\-\\_]+))"
"|"
// Folder url prefix to match /folders/{folder_id}.
"(?:/folders/(?P<folder_docid>[0-9a-zA-Z\\-\\_]+))"
"|"
// Sites url prefix.
"(?:/?s/)(?P<sites_docid>[0-9a-zA-Z\\-\\_]+)"
"(?:/p/[0-9a-zA-Z\\-\\_]+)?/edit"
"|"
// Jam url.
"(?:d/)(?P<jam_docid>[0-9a-zA-Z\\-\\_]+)/(?:edit|viewer)"
")"
// Other valid chars.
"(?:[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/\\?]*)"
// Summarization details.
"(?:summarizationDetails=[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/"
"\\?(?:%5B)(?:%5D)]*)?"
// Pther valid chars.
"(?:[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/\\?]*)"
"(?:(#[0-9a-zA-Z$\\-\\_\\.\\+\\!\\*\'\\,;:@&=/\\?]+)?)" // Fragment
")");
std::vector<re2::StringPiece> matched_doc_ids(
docs_url_pattern_.NumberOfCapturingGroups() + 1);
// ANCHOR_START deviates from google3 which uses UNANCHORED. Using
// ANCHOR_START prevents incorrectly matching with non-drive URLs but which
// contain a drive URL; e.g.,
// url-parser.com/?url=https://docs.google.com/document/d/(id)/edit.
if (!docs_url_pattern_.Match(url, 0, url.size(), RE2::ANCHOR_START,
matched_doc_ids.data(),
matched_doc_ids.size())) {
return std::string();
}
for (const auto& doc_id_group : docs_url_pattern_.NamedCapturingGroups()) {
re2::StringPiece identified_doc_id = matched_doc_ids[doc_id_group.second];
if (!identified_doc_id.empty()) {
return std::string(identified_doc_id);
}
}
return std::string();
}
base::string16 TitleForAutocompletion(AutocompleteMatch match) {
return match.contents +
base::UTF8ToUTF16(" - " + match.destination_url.spec());
}
bool WithinBounds(int value, int min, int max) {
return value >= min && (value < max || max == -1);
}
} // namespace
// static
DocumentProvider* DocumentProvider::Create(
AutocompleteProviderClient* client,
AutocompleteProviderListener* listener,
size_t cache_size) {
return new DocumentProvider(client, listener, cache_size);
}
// static
void DocumentProvider::RegisterProfilePrefs(
user_prefs::PrefRegistrySyncable* registry) {
registry->RegisterBooleanPref(omnibox::kDocumentSuggestEnabled, true);
}
bool DocumentProvider::IsDocumentProviderAllowed(
AutocompleteProviderClient* client,
const AutocompleteInput& input) {
// Feature must be on.
if (!base::FeatureList::IsEnabled(omnibox::kDocumentProvider))
return false;
// These may seem like search suggestions, so gate on that setting too.
if (!client->SearchSuggestEnabled())
return false;
// Client-side toggle must be enabled.
if (!client->GetPrefs()->GetBoolean(omnibox::kDocumentSuggestEnabled))
return false;
// No incognito.
if (client->IsOffTheRecord())
return false;
// Check sync's status and proceed if active.
bool authenticated_and_syncing =
client->IsAuthenticated() && client->IsSyncActive();
if (!authenticated_and_syncing)
return false;
// We haven't received a server backoff signal.
if (backoff_for_session_)
return false;
// Google must be set as default search provider.
auto* template_url_service = client->GetTemplateURLService();
if (template_url_service == nullptr)
return false;
const TemplateURL* default_provider =
template_url_service->GetDefaultSearchProvider();
if (default_provider == nullptr ||
default_provider->GetEngineType(
template_url_service->search_terms_data()) != SEARCH_ENGINE_GOOGLE)
return false;
if (OmniboxFieldTrial::IsExperimentalKeywordModeEnabled() &&
input.prefer_keyword()) {
// If a keyword provider matches, and we're explicitly in keyword mode,
// then the keyword provider must match the default, or the document
// provider.
AutocompleteInput keyword_input = input;
const TemplateURL* keyword_provider =
KeywordProvider::GetSubstitutingTemplateURLForInput(
template_url_service, &keyword_input);
if (keyword_provider == nullptr)
return true;
// True if not explicitly in keyword mode, or a Drive suggestion.
return !IsExplicitlyInKeywordMode(input, keyword_provider->keyword()) ||
base::StartsWith(input.text(),
base::ASCIIToUTF16("drive.google.com"),
base::CompareCase::SENSITIVE);
}
return true;
}
// static
bool DocumentProvider::IsInputLikelyURL(const AutocompleteInput& input) {
if (input.type() == metrics::OmniboxInputType::URL)
return true;
// Special cases when the user might be starting to type the most common URL
// prefixes, but the SchemeClassifier won't have classified them as URLs yet.
// Note these checks are of the form "(string constant) starts with input."
if (input.text().length() <= 8) {
if (StartsWith(base::ASCIIToUTF16("https://"), input.text(),
base::CompareCase::INSENSITIVE_ASCII) ||
StartsWith(base::ASCIIToUTF16("http://"), input.text(),
base::CompareCase::INSENSITIVE_ASCII) ||
StartsWith(base::ASCIIToUTF16("www."), input.text(),
base::CompareCase::INSENSITIVE_ASCII)) {
return true;
}
}
return false;
}
void DocumentProvider::Start(const AutocompleteInput& input,
bool minimal_changes) {
TRACE_EVENT0("omnibox", "DocumentProvider::Start");
matches_.clear();
field_trial_triggered_ = false;
// Perform various checks - feature is enabled, user is allowed to use the
// feature, we're not under backoff, etc.
if (!IsDocumentProviderAllowed(client_, input)) {
return;
}
if (input.type() == metrics::OmniboxInputType::EMPTY) {
return;
}
// Experiment: don't issue queries for inputs under some length.
if (!WithinBounds(input.text().length(), min_query_length_,
max_query_length_))
return;
// Don't issue queries for input likely to be a URL.
if (IsInputLikelyURL(input)) {
return;
}
Stop(false, false);
input_ = input;
// Return cached suggestions synchronously.
CopyCachedMatchesToMatches();
if (!input.want_asynchronous_matches()) {
return;
}
done_ = false; // Set true in callbacks.
debouncer_->RequestRun(
base::BindOnce(&DocumentProvider::Run, base::Unretained(this)));
}
void DocumentProvider::Run() {
client_->GetDocumentSuggestionsService(/*create_if_necessary=*/true)
->CreateDocumentSuggestionsRequest(
input_.text(), client_->IsOffTheRecord(),
base::BindOnce(
&DocumentProvider::OnDocumentSuggestionsLoaderAvailable,
weak_ptr_factory_.GetWeakPtr()),
base::BindOnce(
&DocumentProvider::OnURLLoadComplete,
base::Unretained(this) /* this owns SimpleURLLoader */));
}
void DocumentProvider::Stop(bool clear_cached_results,
bool due_to_user_inactivity) {
TRACE_EVENT0("omnibox", "DocumentProvider::Stop");
debouncer_->CancelRequest();
if (loader_)
LogOmniboxDocumentRequest(DOCUMENT_REQUEST_INVALIDATED);
loader_.reset();
auto* document_suggestions_service =
client_->GetDocumentSuggestionsService(/*create_if_necessary=*/false);
if (document_suggestions_service != nullptr) {
document_suggestions_service->StopCreatingDocumentSuggestionsRequest();
}
done_ = true;
if (clear_cached_results) {
matches_.clear();
}
}
void DocumentProvider::DeleteMatch(const AutocompleteMatch& match) {
// Not supported by this provider.
return;
}
void DocumentProvider::AddProviderInfo(ProvidersInfo* provider_info) const {
provider_info->push_back(metrics::OmniboxEventProto_ProviderInfo());
metrics::OmniboxEventProto_ProviderInfo& new_entry = provider_info->back();
new_entry.set_provider(metrics::OmniboxEventProto::DOCUMENT);
new_entry.set_provider_done(done_);
if (field_trial_triggered_ || field_trial_triggered_in_session_) {
std::vector<uint32_t> field_trial_hashes;
OmniboxFieldTrial::GetActiveSuggestFieldTrialHashes(&field_trial_hashes);
for (uint32_t trial : field_trial_hashes) {
if (field_trial_triggered_) {
new_entry.mutable_field_trial_triggered()->Add(trial);
}
if (field_trial_triggered_in_session_) {
new_entry.mutable_field_trial_triggered_in_session()->Add(trial);
}
}
}
}
void DocumentProvider::ResetSession() {
field_trial_triggered_in_session_ = false;
field_trial_triggered_ = false;
}
DocumentProvider::DocumentProvider(AutocompleteProviderClient* client,
AutocompleteProviderListener* listener,
size_t cache_size)
: AutocompleteProvider(AutocompleteProvider::TYPE_DOCUMENT),
min_query_length_(
static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDocumentProvider,
"DocumentProviderMinQueryLength",
4))),
max_query_length_(
static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDocumentProvider,
"DocumentProviderMaxQueryLength",
-1))),
min_query_show_length_(
static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDocumentProvider,
"DocumentProviderMinQueryShowLength",
min_query_length_))),
max_query_show_length_(
static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDocumentProvider,
"DocumentProviderMaxQueryShowLength",
max_query_length_))),
min_query_log_length_(
static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDocumentProvider,
"DocumentProviderMinQueryLogLength",
min_query_length_))),
max_query_log_length_(
static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDocumentProvider,
"DocumentProviderMaxQueryLogLength",
max_query_length_))),
field_trial_triggered_(false),
field_trial_triggered_in_session_(false),
backoff_for_session_(false),
client_(client),
listener_(listener),
cache_size_(cache_size),
matches_cache_(MatchesCache::NO_AUTO_EVICT) {
if (base::FeatureList::IsEnabled(omnibox::kDebounceDocumentProvider)) {
bool from_last_run = base::GetFieldTrialParamByFeatureAsBool(
omnibox::kDebounceDocumentProvider,
"DebounceDocumentProviderFromLastRun", true);
int delay_ms = base::GetFieldTrialParamByFeatureAsInt(
omnibox::kDebounceDocumentProvider, "DebounceDocumentProviderDelayMs",
100);
debouncer_ = std::make_unique<AutocompleteProviderDebouncer>(from_last_run,
delay_ms);
} else
debouncer_ = std::make_unique<AutocompleteProviderDebouncer>(false, 0);
}
DocumentProvider::~DocumentProvider() {}
void DocumentProvider::OnURLLoadComplete(
const network::SimpleURLLoader* source,
std::unique_ptr<std::string> response_body) {
DCHECK(!done_);
DCHECK_EQ(loader_.get(), source);
LogOmniboxDocumentRequest(DOCUMENT_REPLY_RECEIVED);
int httpStatusCode = source->ResponseInfo() && source->ResponseInfo()->headers
? source->ResponseInfo()->headers->response_code()
: 0;
if (httpStatusCode == 400 || httpStatusCode == 499)
backoff_for_session_ = true;
const bool results_updated =
response_body && source->NetError() == net::OK && httpStatusCode == 200 &&
UpdateResults(SearchSuggestionParser::ExtractJsonData(
source, std::move(response_body)));
loader_.reset();
done_ = true;
listener_->OnProviderUpdate(results_updated);
}
bool DocumentProvider::UpdateResults(const std::string& json_data) {
base::Optional<base::Value> response =
base::JSONReader::Read(json_data, base::JSON_ALLOW_TRAILING_COMMAS);
if (!response)
return false;
matches_ = ParseDocumentSearchResults(*response);
for (auto it = matches_.rbegin(); it != matches_.rend(); ++it)
matches_cache_.Put(it->stripped_destination_url, *it);
CopyCachedMatchesToMatches(matches_.size());
matches_cache_.ShrinkToSize(cache_size_);
return !matches_.empty();
}
void DocumentProvider::OnDocumentSuggestionsLoaderAvailable(
std::unique_ptr<network::SimpleURLLoader> loader) {
loader_ = std::move(loader);
LogOmniboxDocumentRequest(DOCUMENT_REQUEST_SENT);
}
// static
base::string16 DocumentProvider::GenerateLastModifiedString(
const std::string& modified_timestamp_string,
base::Time now) {
if (modified_timestamp_string.empty())
return base::string16();
base::Time modified_time;
if (!base::Time::FromString(modified_timestamp_string.c_str(),
&modified_time))
return base::string16();
// Use shorthand if the times fall on the same day or in the same year.
base::Time::Exploded exploded_modified_time;
base::Time::Exploded exploded_now;
modified_time.LocalExplode(&exploded_modified_time);
now.LocalExplode(&exploded_now);
if (exploded_modified_time.year == exploded_now.year) {
if (exploded_modified_time.month == exploded_now.month &&
exploded_modified_time.day_of_month == exploded_now.day_of_month) {
// Same local calendar day - use localized time.
return base::TimeFormatTimeOfDay(modified_time);
}
// Same year but not the same day: use abbreviated month/day ("Jan 1").
return base::TimeFormatWithPattern(modified_time, "MMMd");
}
// No shorthand; display full MM/DD/YYYY.
return base::TimeFormatShortDateNumeric(modified_time);
}
// static
base::string16 GetProductDescriptionString(const std::string& mimetype) {
if (mimetype == kDocumentMimetype)
return l10n_util::GetStringUTF16(IDS_DRIVE_SUGGESTION_DOCUMENT);
if (mimetype == kFormMimetype)
return l10n_util::GetStringUTF16(IDS_DRIVE_SUGGESTION_FORM);
if (mimetype == kSpreadsheetMimetype)
return l10n_util::GetStringUTF16(IDS_DRIVE_SUGGESTION_SPREADSHEET);
if (mimetype == kPresentationMimetype)
return l10n_util::GetStringUTF16(IDS_DRIVE_SUGGESTION_PRESENTATION);
// Fallback to "Drive" for other filetypes.
return l10n_util::GetStringUTF16(IDS_DRIVE_SUGGESTION_GENERAL);
}
ACMatches DocumentProvider::ParseDocumentSearchResults(
const base::Value& root_val) {
ACMatches matches;
const base::DictionaryValue* root_dict = nullptr;
const base::ListValue* results_list = nullptr;
if (!root_val.GetAsDictionary(&root_dict)) {
return matches;
}
// The server may ask the client to back off, in which case we back off for
// the session.
// TODO(skare): Respect retryDelay if provided, ideally by calling via gRPC.
if (ResponseContainsBackoffSignal(root_dict)) {
backoff_for_session_ = true;
return matches;
}
// Otherwise parse the results.
if (!root_dict->GetList("results", &results_list)) {
return matches;
}
size_t num_results = results_list->GetSize();
UMA_HISTOGRAM_COUNTS_1M("Omnibox.DocumentSuggest.ResultCount", num_results);
// During development/quality iteration we may wish to defeat server scores.
// If both |use_server_score| and |use_client_score| are true, the min of the
// two scores will be used.
// If both are false, the server score will be used.
bool use_client_score = base::GetFieldTrialParamByFeatureAsBool(
omnibox::kDocumentProvider, "DocumentUseClientScore", false);
bool use_server_score = base::GetFieldTrialParamByFeatureAsBool(
omnibox::kDocumentProvider, "DocumentUseServerScore", true);
// Cap scores for each suggestion.
bool cap_score_per_rank = base::GetFieldTrialParamByFeatureAsBool(
omnibox::kDocumentProvider, "DocumentCapScorePerRank", false);
std::vector<int> score_caps = {
base::GetFieldTrialParamByFeatureAsInt(omnibox::kDocumentProvider,
"DocumentCapScoreRank1", 1200),
base::GetFieldTrialParamByFeatureAsInt(omnibox::kDocumentProvider,
"DocumentCapScoreRank2", 1100),
base::GetFieldTrialParamByFeatureAsInt(omnibox::kDocumentProvider,
"DocumentCapScoreRank3", 900),
};
// Promotes owned documents and/or demotes unowned documents.
bool boost_owned = base::GetFieldTrialParamByFeatureAsBool(
omnibox::kDocumentProvider, "DocumentBoostOwned", false);
// Some users may be in a counterfactual study arm in which we perform all
// necessary work but do not forward the autocomplete matches.
bool in_counterfactual_group = base::GetFieldTrialParamByFeatureAsBool(
omnibox::kDocumentProvider, "DocumentProviderCounterfactualArm", false);
// In order to compare groups with different |min_query_length|_ values,
// |min_query_show_length_| specifies the min query length for which to show
// drive requests. Shorter queries that return drive suggestions will still
// log field_trials_triggered. E.g., if |min_query_length_| is 3 and
// |min_query_show_length_| is 5, then:
// - Inputs of lengths 0 to 2 will not make drive requests.
// - Inputs of lengths 3 to 4 will make drive requests; if drive suggestions
// are returned, field_trial_triggered will be logged, but the suggestions
// will not be shown.
// - Inputs of length 5 or more will make drive requests; if drive suggestions
// are returned, field_trial_triggered will be logged, and, if not in
// counterfactual, the suggestions will be shown.
bool show_doc_suggestions =
!in_counterfactual_group &&
WithinBounds(input_.text().length(), min_query_show_length_,
max_query_show_length_);
// In order to compare small slices of input length while excluding noise from
// the larger group of all input lenghts, |min_query_log_length_| and
// |max_query_log_length_| specify the queries that will log
// field_trial_triggered. E.g., if |min_query_log_length_| is 50 and
// |max_query_log_length_| is -1, only inputs of length 50 or greater which
// return a drive suggestions will log field_trial_triggered are returned
// while shorter queries will continue to make requests and show suggestions.
// This allows an uninterrupted user experience for short queries while
// allowing focused analysis of long queries.
bool trigger_field_trial = WithinBounds(
input_.text().length(), min_query_log_length_, max_query_log_length_);
// Ensure server's suggestions are added with monotonically decreasing scores.
int previous_score = INT_MAX;
for (size_t i = 0; i < num_results; i++) {
const base::DictionaryValue* result = nullptr;
if (!results_list->GetDictionary(i, &result)) {
return matches;
}
base::string16 title;
base::string16 url;
result->GetString("title", &title);
result->GetString("url", &url);
if (title.empty() || url.empty()) {
continue;
}
// Both client and server scores are calculated regardless of usage in order
// to log them with |AutocompleteMatch::RecordAdditionalInfo| below.
int client_score = CalculateScore(input_.text(), result);
int server_score = 0;
result->GetInteger("score", &server_score);
int score = 0;
// Set |score| only if we haven't surpassed |provider_max_matches_| yet.
// Otherwise, score the remaining matches 0 to avoid displaying them except
// when deduped with history, shortcut, or bookmark matches.
if (matches.size() < provider_max_matches_) {
if (use_client_score && use_server_score)
score = std::min(client_score, server_score);
else
score = use_client_score ? client_score : server_score;
if (cap_score_per_rank) {
int score_cap =
i < score_caps.size() ? score_caps[i] : score_caps.back();
score = std::min(score, score_cap);
}
if (boost_owned)
score = BoostOwned(score, client_->ProfileUserName(), result);
// Decrement scores if necessary to ensure suggestion order is preserved.
// Don't decrement client scores which don't necessarily rank suggestions
// the same as the server.
if (!use_client_score && score >= previous_score)
score = std::max(previous_score - 1, 0);
previous_score = score;
}
AutocompleteMatch match(this, score, false,
AutocompleteMatchType::DOCUMENT_SUGGESTION);
// Use full URL for displayed text and navigation. Use "originalUrl" for
// deduping if present.
match.fill_into_edit = url;
match.destination_url = GURL(url);
base::string16 original_url;
std::string mimetype;
if (result->GetString("originalUrl", &original_url)) {
GURL stripped_url = GetURLForDeduping(GURL(original_url));
if (stripped_url.is_valid())
match.stripped_destination_url = stripped_url;
}
match.contents = AutocompleteMatch::SanitizeString(title);
match.contents_class = Classify(match.contents, input_.text());
const base::DictionaryValue* metadata = nullptr;
if (result->GetDictionary("metadata", &metadata)) {
if (metadata->GetString("mimeType", &mimetype)) {
match.document_type = GetIconForMIMEType(mimetype);
match.RecordAdditionalInfo(
"document type",
AutocompleteMatch::DocumentTypeString(match.document_type));
}
std::string update_time;
metadata->GetString("updateTime", &update_time);
if (!update_time.empty()) {
match.description = l10n_util::GetStringFUTF16(
IDS_DRIVE_SUGGESTION_DESCRIPTION_TEMPLATE,
GenerateLastModifiedString(update_time, base::Time::Now()),
GetProductDescriptionString(mimetype));
} else {
match.description = GetProductDescriptionString(mimetype);
}
AutocompleteMatch::AddLastClassificationIfNecessary(
&match.description_class, 0, ACMatchClassification::DIM);
}
match.TryAutocompleteWithTitle(TitleForAutocompletion(match), input_);
match.transition = ui::PAGE_TRANSITION_GENERATED;
match.RecordAdditionalInfo("client score", client_score);
match.RecordAdditionalInfo("server score", server_score);
if (matches.size() >= provider_max_matches_)
match.RecordAdditionalInfo("for deduping only", "true");
const std::string* snippet = result->FindStringPath("snippet.snippet");
if (snippet)
match.RecordAdditionalInfo("snippet", *snippet);
if (show_doc_suggestions)
matches.push_back(match);
if (trigger_field_trial) {
field_trial_triggered_ = true;
field_trial_triggered_in_session_ = true;
}
}
return matches;
}
void DocumentProvider::CopyCachedMatchesToMatches(
size_t skip_n_most_recent_matches) {
std::for_each(std::next(matches_cache_.begin(), skip_n_most_recent_matches),
matches_cache_.end(), [this](const auto& cache_key_match_pair) {
auto match = cache_key_match_pair.second;
match.relevance = 0;
match.allowed_to_be_default_match = false;
match.TryAutocompleteWithTitle(TitleForAutocompletion(match),
input_);
match.contents_class =
DocumentProvider::Classify(match.contents, input_.text());
match.RecordAdditionalInfo("from cache", "true");
matches_.push_back(match);
});
}
// static
ACMatchClassifications DocumentProvider::Classify(
const base::string16& text,
const base::string16& input_text) {
TermMatches term_matches = FindTermMatches(input_text, text);
return ClassifyTermMatches(term_matches, text.size(),
ACMatchClassification::MATCH,
ACMatchClassification::NONE);
}
// static
const GURL DocumentProvider::GetURLForDeduping(const GURL& url) {
// Early exit to avoid unnecessary and more involved checks.
if (!url.DomainIs("google.com"))
return GURL();
// We aim to prevent duplicate Drive URLs to appear between the Drive document
// search provider and history/bookmark entries.
// All URLs are canonicalized to a GURL form only used for deduplication and
// not guaranteed to be usable for navigation.
// Drive redirects are already handled by the regex in |ExtractDocIdFromUrl|.
// The below logic handles google.com redirects; e.g., google.com/url/q=<url>
std::string url_str;
if (url.host() == "www.google.com" && url.path() == "/url") {
if ((!net::GetValueForKeyInQuery(url, "q", &url_str) || url_str.empty()) &&
(!net::GetValueForKeyInQuery(url, "url", &url_str) || url_str.empty()))
return GURL();
} else {
url_str = url.spec();
}
// Unescape |url_str|
url_str = net::UnescapeURLComponent(
url_str, net::UnescapeRule::PATH_SEPARATORS |
net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
const std::string id = ExtractDocIdFromUrl(url_str);
// Canonicalize to the /open form without any extra args.
// This is similar to what we expect from the server.
return id.empty() ? GURL() : GURL("https://drive.google.com/open?id=" + id);
}