blob: 5f76624ba5ecfc842925aafa220138da95af3b0e [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_PROVIDER_H_
#define COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_PROVIDER_H_
#include <stddef.h>
#include <map>
#include <utility>
#include <vector>
#include "base/gtest_prod_util.h"
#include "base/macros.h"
#include "base/memory/ref_counted.h"
#include "base/strings/string16.h"
#include "components/omnibox/browser/autocomplete_match.h"
#include "third_party/metrics_proto/omnibox_event.pb.h"
class AutocompleteInput;
typedef std::vector<metrics::OmniboxEventProto_ProviderInfo> ProvidersInfo;
// The AutocompleteProviders each return different kinds of matches,
// such as history or search matches. These matches are given
// "relevance" scores. Higher scores are better matches than lower
// scores. The relevance scores and classes providing the respective
// matches are as listed below.
//
// IMPORTANT CAVEAT: The tables below are NOT COMPLETE. Developers
// often forget to keep these tables in sync with the code when they
// change scoring algorithms or add new providers. For example,
// neither the HistoryQuickProvider (which is a provider that appears
// often) nor the ShortcutsProvider are listed here. For the best
// idea of how scoring works and what providers are affecting which
// queries, play with chrome://omnibox/ for a while. While the tables
// below may have some utility, nothing compares with first-hand
// investigation and experience.
//
// ZERO SUGGEST (empty) input type:
// --------------------------------------------------------------------|-----
// Clipboard URL | 800
// Zero Suggest (most visited, Android only) | 600--
// Zero Suggest (default, may be overridden by server) | 100
//
// UNKNOWN input type:
// --------------------------------------------------------------------|-----
// Keyword (non-substituting or in keyword UI mode, exact match) | 1500
// HistoryURL (good exact or inline autocomplete matches, some inexact)| 1410++
// HistoryURL (intranet url never visited match, some inexact matches) | 1400++
// Search Primary Provider (past query in history within 2 days) | 1399**
// Search Primary Provider (what you typed) | 1300
// HistoryURL (what you typed, some inexact matches) | 1200++
// Keyword (substituting, exact match) | 1100
// Search Primary Provider (past query in history older than 2 days) | 1050*
// HistoryURL (some inexact matches) | 900++
// BookmarkProvider (prefix match in bookmark title or URL) | 900+-
// Built-in | 860++
// Search Primary Provider (navigational suggestion) | 800++
// Search Primary Provider (suggestion) | 600++
// Keyword (inexact match) | 450
// Search Secondary Provider (what you typed) | 250
// Search Secondary Provider (past query in history) | 200*
// Search Secondary Provider (navigational suggestion) | 150++
// Search Secondary Provider (suggestion) | 100++
// Document Suggestions (*experimental): value controlled by Finch | *
//
// URL input type:
// --------------------------------------------------------------------|-----
// Keyword (non-substituting or in keyword UI mode, exact match) | 1500
// HistoryURL (good exact or inline autocomplete matches, some inexact)| 1410++
// HistoryURL (intranet url never visited match, some inexact matches) | 1400++
// HistoryURL (what you typed, some inexact matches) | 1200++
// Keyword (substituting, exact match) | 1100
// HistoryURL (some inexact matches) | 900++
// Built-in | 860++
// Search Primary Provider (what you typed) | 850
// Search Primary Provider (navigational suggestion) | 800++
// Search Primary Provider (past query in history) | 750*
// Keyword (inexact match) | 700
// Search Primary Provider (suggestion) | 300++
// Search Secondary Provider (what you typed) | 250
// Search Secondary Provider (past query in history) | 200*
// Search Secondary Provider (navigational suggestion) | 150++
// Search Secondary Provider (suggestion) | 100++
//
// QUERY input type:
// --------------------------------------------------------------------|-----
// Search Primary or Secondary (past query in history within 2 days) | 1599**
// Keyword (non-substituting or in keyword UI mode, exact match) | 1500
// Keyword (substituting, exact match) | 1450
// Search Primary Provider (past query in history within 2 days) | 1399**
// Search Primary Provider (what you typed) | 1300
// Search Primary Provider (past query in history older than 2 days) | 1050*
// HistoryURL (inexact match) | 900++
// BookmarkProvider (prefix match in bookmark title or URL) | 900+-
// Search Primary Provider (navigational suggestion) | 800++
// Search Primary Provider (suggestion) | 600++
// Keyword (inexact match) | 450
// Search Secondary Provider (what you typed) | 250
// Search Secondary Provider (past query in history) | 200*
// Search Secondary Provider (navigational suggestion) | 150++
// Search Secondary Provider (suggestion) | 100++
//
// (A search keyword is a keyword with a replacement string; a bookmark keyword
// is a keyword with no replacement string, that is, a shortcut for a URL.)
//
// There are two possible providers for search suggestions. If the user has
// typed a keyword, then the primary provider is the keyword provider and the
// secondary provider is the default provider. If the user has not typed a
// keyword, then the primary provider corresponds to the default provider.
//
// Search providers may supply relevance values along with their results to be
// used in place of client-side calculated values.
//
// The value column gives the ranking returned from the various providers.
// ++: a series of matches with relevance from n up to (n + max_matches).
// --: a series of matches with relevance from n down to (n - max_matches).
// *: relevance score falls off over time (discounted 50 points @ 15 minutes,
// 450 points @ two weeks)
// **: relevance score falls off over two days (discounted 99 points after two
// days).
// +-: A base score that the provider will adjust upward or downward based on
// provider-specific metrics.
//
// A single result provider for the autocomplete system. Given user input, the
// provider decides what (if any) matches to return, their relevance, and their
// classifications.
class AutocompleteProvider
: public base::RefCountedThreadSafe<AutocompleteProvider> {
public:
// Different AutocompleteProvider implementations.
enum Type {
TYPE_BOOKMARK = 1 << 0,
TYPE_BUILTIN = 1 << 1,
TYPE_HISTORY_QUICK = 1 << 2,
TYPE_HISTORY_URL = 1 << 3,
TYPE_KEYWORD = 1 << 4,
TYPE_SEARCH = 1 << 5,
TYPE_SHORTCUTS = 1 << 6,
TYPE_ZERO_SUGGEST = 1 << 7,
TYPE_CLIPBOARD_URL = 1 << 8,
TYPE_DOCUMENT = 1 << 9,
};
explicit AutocompleteProvider(Type type);
// Returns a string describing a particular AutocompleteProvider type.
static const char* TypeToString(Type type);
// Called to start an autocomplete query. The provider is responsible for
// tracking its matches for this query and whether it is done processing the
// query. When new matches are available or the provider finishes, it
// calls the controller's OnProviderUpdate() method. The controller can then
// get the new matches using the provider's accessors.
// Exception: Matches available immediately after starting the query (that
// is, synchronously) do not cause any notifications to be sent. The
// controller is expected to check for these without prompting (since
// otherwise, starting each provider running would result in a flurry of
// notifications).
//
// Once Stop() has been called, usually no more notifications should be sent.
// (See comments on Stop() below.)
//
// |minimal_changes| is an optimization that lets the provider do less work
// when the |input|'s text hasn't changed. See the body of
// OmniboxPopupModel::StartAutocomplete().
virtual void Start(const AutocompleteInput& input, bool minimal_changes) = 0;
// Advises the provider to stop processing. This may be called even if the
// provider is already done. If the provider caches any results, it should
// clear the cache based on the value of |clear_cached_results|. Normally,
// once this is called, the provider should not send more notifications to
// the controller.
//
// If |user_inactivity_timer| is true, Stop() is being called because it's
// been a long time since the user started the current query, and returning
// further asynchronous results would normally just be disruptive. Most
// providers should still stop processing in this case, but continuing is
// legal if there's a good reason the user is likely to want even long-
// delayed asynchronous results, e.g. the user has explicitly invoked a
// keyword extension and the extension is still processing the request.
virtual void Stop(bool clear_cached_results,
bool due_to_user_inactivity);
// Returns the enum equivalent to the name of this provider.
// TODO(derat): Make metrics use AutocompleteProvider::Type directly, or at
// least move this method to the metrics directory.
metrics::OmniboxEventProto_ProviderType AsOmniboxEventProviderType() const;
// Called to delete a match and the backing data that produced it. This
// match should not appear again in this or future queries. This can only be
// called for matches the provider marks as deletable. This should only be
// called when no query is running.
// NOTE: Do NOT call OnProviderUpdate() in this method, it is the
// responsibility of the caller to do so after calling us.
virtual void DeleteMatch(const AutocompleteMatch& match);
// Called when an omnibox event log entry is generated. This gives
// a provider the opportunity to add diagnostic information to the
// logs. A provider is expected to append a single entry of whatever
// information it wants to |provider_info|.
virtual void AddProviderInfo(ProvidersInfo* provider_info) const;
// Called when a new omnibox session starts or the current session ends.
// This gives the opportunity to reset the internal state, if any, associated
// with the previous session.
virtual void ResetSession();
// Estimates dynamic memory usage.
// See base/trace_event/memory_usage_estimator.h for more info.
//
// Note: Subclasses that override this method must call the base class
// method and include the response in their estimate.
virtual size_t EstimateMemoryUsage() const;
// Returns the set of matches for the current query.
const ACMatches& matches() const { return matches_; }
// Returns whether the provider is done processing the query.
bool done() const { return done_; }
// Returns this provider's type.
Type type() const { return type_; }
// Returns a string describing this provider's type.
const char* GetName() const;
typedef std::multimap<base::char16, base::string16> WordMap;
// Returns a map mapping characters to groups of words from |text| that start
// with those characters, ordered lexicographically descending so that longer
// words appear before their prefixes (if any) within a particular
// equal_range().
static WordMap CreateWordMapForString(const base::string16& text);
// Finds all instances of the words from |find_words| within |text|, adds
// classifications to |original_class| according to the logic described below,
// and returns the result.
//
// - if |text_is_search_query| is false, the function adds
// ACMatchClassification::MATCH markers for all such instances.
//
// For example, given the |text|
// "Sports and News at sports.somesite.com - visit us!" and |original_class|
// {{0, NONE}, {18, URL}, {37, NONE}} (marking "sports.somesite.com" as a
// URL), calling with |find_text| set to "sp ew" would return
// {{0, MATCH}, {2, NONE}, {12, MATCH}, {14, NONE}, {18, URL|MATCH},
// {20, URL}, {37, NONE}}.
//
//
// - if |text_is_search_query| is true, applies the same logic, but uses
// NONE for the matching text and MATCH for the non-matching text. This is
// done to mimic the behavior of SearchProvider which decorates matches
// according to the approach used by Google Suggest.
//
// For example, given that |text| corresponds to a search query "panama
// canal" and |original class| is {{0, NONE}}, calling with |find_text| set
// to "canal" would return {{0,MATCH}, {7, NONE}}.
//
// |find_text| is provided as the original string used to create
// |find_words|. This is supplied because it's common for this to be a prefix
// of |text|, so we can quickly check for that and mark that entire substring
// as a match before proceeding with the more generic algorithm.
//
// |find_words| should be as constructed by CreateWordMapForString(find_text).
//
// |find_text| (and thus |find_words|) are expected to be lowercase. |text|
// will be lowercase in this function.
static ACMatchClassifications ClassifyAllMatchesInString(
const base::string16& find_text,
const WordMap& find_words,
const base::string16& text,
const bool text_is_search_query,
const ACMatchClassifications& original_class = ACMatchClassifications());
// A suggested upper bound for how many matches a provider should return.
// TODO(pkasting): http://b/1111299 , http://b/933133 This should go away once
// we have good relevance heuristics; the controller should handle all
// culling.
static const size_t kMaxMatches;
protected:
friend class base::RefCountedThreadSafe<AutocompleteProvider>;
FRIEND_TEST_ALL_PREFIXES(BookmarkProviderTest, InlineAutocompletion);
typedef std::pair<bool, base::string16> FixupReturn;
virtual ~AutocompleteProvider();
// Fixes up user URL input to make it more possible to match against. Among
// many other things, this takes care of the following:
// * Prepending file:// to file URLs
// * Converting drive letters in file URLs to uppercase
// * Converting case-insensitive parts of URLs (like the scheme and domain)
// to lowercase
// * Convert spaces to %20s
// Note that we don't do this in AutocompleteInput's constructor, because if
// e.g. we convert a Unicode hostname to punycode, other providers will show
// output that surprises the user ("Search Google for xn--6ca.com").
// Returns a bool indicating whether fixup succeeded, as well as the fixed-up
// input text. The returned string will be the same as the input string if
// fixup failed; this lets callers who don't care about failure simply use the
// string unconditionally.
static FixupReturn FixupUserInput(const AutocompleteInput& input);
// Trims "http:" and up to two subsequent slashes from |url|. Returns the
// number of characters that were trimmed.
// NOTE: For a view-source: URL, this will trim from after "view-source:" and
// return 0.
static size_t TrimHttpPrefix(base::string16* url);
ACMatches matches_;
bool done_;
Type type_;
private:
DISALLOW_COPY_AND_ASSIGN(AutocompleteProvider);
};
#endif // COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_PROVIDER_H_