components/omnibox/browser/autocomplete_match_classification.h - chromium/src - Git at Google

 // Copyright 2019 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_MATCH_CLASSIFICATION_H_
 #define COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_MATCH_CLASSIFICATION_H_

 #include "components/omnibox/browser/autocomplete_match.h"
 #include "components/omnibox/browser/in_memory_url_index_types.h"

 // Finds the matches for |find_text| in |text|, classifies those matches,
 // merges those classifications with |original_class|, and returns the merged
 // classifications.
 // If |text_is_search_query| is false, matches are classified as MATCH, and
 // non-matches are classified as NONE. Otherwise, if |text_is_search_query| is
 // true, matches are classified as NONE, and non-matches are classified as
 // MATCH. This is done to mimic the behavior of SearchProvider which decorates
 // matches according to the approach used by Google Suggest.
 // |find_text| and |text| will be lowercased.
 //
 //   For example, given
 //     |find_text| is "sp new",
 //     |text| is "Sports and News at sports.somesite.com - visit us!",
 //     |text_is_search_query| is false, and
 //     |original_class| is {{0, NONE}, {19, URL}, {38, NONE}} (marking
 //     "sports.somesite.com" as a URL),
 //   Then this will return
 //     {{0, MATCH}, {2, NONE}, {11, MATCH}, {14, NONE}, {19, URL|MATCH},
 //     {21, URL}, {38, NONE}}; i.e.,
 //     "Sports and News at sports.somesite.com - visit us!"
 //      ^ ^        ^  ^    ^ ^                ^
 //      0 2        11 14  19 21               38
 //      M N        M  N  U|M U                N
 //
 //   For example, given
 //     |find_text| is "canal",
 //     |text| is "panama canal",
 //     |text_is_search_query| is true, and
 //     |original_class| is {{0, NONE}},
 //   Then this will return
 //     {{0,MATCH}, {7, NONE}}; i.e.,
 //     "panama canal"
 //      ^      ^
 //      0 M    7 N
 ACMatchClassifications ClassifyAllMatchesInString(
     const std::u16string& find_text,
     const std::u16string& text,
     const bool text_is_search_query,
     const ACMatchClassifications& original_class = ACMatchClassifications());

 // Cleans |text|, splits |find_text| into terms by breaking on whitespaces and
 // most symbols, looks for those terms in cleaned |text|, and returns the
 // matched terms sorted, deduped, and possibly filtered-by-word-boundary.
 // If |allow_prefix_matching| is true, and |find_text| is an exact prefix
 // (ignoring case but considering symbols) of |text|, then only a single term
 // representing the prefix will be returned. E.g., for |find_text| "how to tie"
 // and |text| "how to tie a tie", this will return "[how to tie] a tie". On the
 // other hand, for |find_text| "to tie", this will return "how [to] [tie] a
 // [tie]".
 // If |allow_mid_word_matching| is false, the returned terms will be
 // filtered-by-word-boundary. E.g., for |find_text| "ho to ie", |text|
 // "how to tie a tie", and |allow_mid_word_matching| false, this will return
 // "[ho]w [to] tie a tie". On the other hand, for |allow_mid_word_matching|
 // true, this will return "[ho]w [to] t[ie] a t[ie]."
 TermMatches FindTermMatches(std::u16string_view find_text,
                             std::u16string_view text,
                             bool allow_prefix_matching = true,
                             bool allow_mid_word_matching = false);

 // A utility function called by `FindTermMatches` to find valid matches in text
 // for the given terms. Matched terms are sorted, deduped, and possibly
 // filtered-by-word-boundary. If `allow_mid_word_matching` is false, the
 // returned terms will be filtered-by-word-boundary. E.g., for `find_text` "ho
 // to ie", `text` "how to tie a tie", and `allow_mid_word_matching` false, this
 // will return "[ho]w [to] tie a tie". On the other hand, for
 // |allow_mid_word_matching| true, this will return "[ho]w [to] t[ie] a t[ie]."
 TermMatches FindTermMatchesForTerms(const String16Vector& find_terms,
                                     const WordStarts& find_terms_word_starts,
                                     const std::u16string& cleaned_text,
                                     const WordStarts& text_word_starts,
                                     bool allow_mid_word_matching = false);

 // Return an ACMatchClassifications structure given the |matches| to highlight.
 // |matches| can be retrieved from calling FindTermMatches. |text_length| should
 // be the full length (not the length of the truncated text clean returns) of
 // the text being classified. It is used to ensure the trailing classification
 // is correct; i.e. if matches end at 20, and text_length is greater than 20,
 // ClassifyTermMatches will add a non_match_style classification with offset 20.
 // |match_style| and |non_match_style| specify the classifications to use for
 // matched and non-matched text.
 ACMatchClassifications ClassifyTermMatches(const TermMatches& matches,
                                            size_t text_length,
                                            int match_style,
                                            int non_match_style);

 #endif  // COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_MATCH_CLASSIFICATION_H_
	// Copyright 2019 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_MATCH_CLASSIFICATION_H_
	#define COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_MATCH_CLASSIFICATION_H_

	#include "components/omnibox/browser/autocomplete_match.h"
	#include "components/omnibox/browser/in_memory_url_index_types.h"

	// Finds the matches for \|find_text\| in \|text\|, classifies those matches,
	// merges those classifications with \|original_class\|, and returns the merged
	// classifications.
	// If \|text_is_search_query\| is false, matches are classified as MATCH, and
	// non-matches are classified as NONE. Otherwise, if \|text_is_search_query\| is
	// true, matches are classified as NONE, and non-matches are classified as
	// MATCH. This is done to mimic the behavior of SearchProvider which decorates
	// matches according to the approach used by Google Suggest.
	// \|find_text\| and \|text\| will be lowercased.
	//
	// For example, given
	// \|find_text\| is "sp new",
	// \|text\| is "Sports and News at sports.somesite.com - visit us!",
	// \|text_is_search_query\| is false, and
	// \|original_class\| is {{0, NONE}, {19, URL}, {38, NONE}} (marking
	// "sports.somesite.com" as a URL),
	// Then this will return
	// {{0, MATCH}, {2, NONE}, {11, MATCH}, {14, NONE}, {19, URL\|MATCH},
	// {21, URL}, {38, NONE}}; i.e.,
	// "Sports and News at sports.somesite.com - visit us!"
	// ^ ^ ^ ^ ^ ^ ^
	// 0 2 11 14 19 21 38
	// M N M N U\|M U N
	//
	// For example, given
	// \|find_text\| is "canal",
	// \|text\| is "panama canal",
	// \|text_is_search_query\| is true, and
	// \|original_class\| is {{0, NONE}},
	// Then this will return
	// {{0,MATCH}, {7, NONE}}; i.e.,
	// "panama canal"
	// ^ ^
	// 0 M 7 N
	ACMatchClassifications ClassifyAllMatchesInString(
	const std::u16string& find_text,
	const std::u16string& text,
	const bool text_is_search_query,
	const ACMatchClassifications& original_class = ACMatchClassifications());

	// Cleans \|text\|, splits \|find_text\| into terms by breaking on whitespaces and
	// most symbols, looks for those terms in cleaned \|text\|, and returns the
	// matched terms sorted, deduped, and possibly filtered-by-word-boundary.
	// If \|allow_prefix_matching\| is true, and \|find_text\| is an exact prefix
	// (ignoring case but considering symbols) of \|text\|, then only a single term
	// representing the prefix will be returned. E.g., for \|find_text\| "how to tie"
	// and \|text\| "how to tie a tie", this will return "[how to tie] a tie". On the
	// other hand, for \|find_text\| "to tie", this will return "how [to] [tie] a
	// [tie]".
	// If \|allow_mid_word_matching\| is false, the returned terms will be
	// filtered-by-word-boundary. E.g., for \|find_text\| "ho to ie", \|text\|
	// "how to tie a tie", and \|allow_mid_word_matching\| false, this will return
	// "[ho]w [to] tie a tie". On the other hand, for \|allow_mid_word_matching\|
	// true, this will return "[ho]w [to] t[ie] a t[ie]."
	TermMatches FindTermMatches(std::u16string_view find_text,
	std::u16string_view text,
	bool allow_prefix_matching = true,
	bool allow_mid_word_matching = false);

	// A utility function called by `FindTermMatches` to find valid matches in text
	// for the given terms. Matched terms are sorted, deduped, and possibly
	// filtered-by-word-boundary. If `allow_mid_word_matching` is false, the
	// returned terms will be filtered-by-word-boundary. E.g., for `find_text` "ho
	// to ie", `text` "how to tie a tie", and `allow_mid_word_matching` false, this
	// will return "[ho]w [to] tie a tie". On the other hand, for
	// \|allow_mid_word_matching\| true, this will return "[ho]w [to] t[ie] a t[ie]."
	TermMatches FindTermMatchesForTerms(const String16Vector& find_terms,
	const WordStarts& find_terms_word_starts,
	const std::u16string& cleaned_text,
	const WordStarts& text_word_starts,
	bool allow_mid_word_matching = false);

	// Return an ACMatchClassifications structure given the \|matches\| to highlight.
	// \|matches\| can be retrieved from calling FindTermMatches. \|text_length\| should
	// be the full length (not the length of the truncated text clean returns) of
	// the text being classified. It is used to ensure the trailing classification
	// is correct; i.e. if matches end at 20, and text_length is greater than 20,
	// ClassifyTermMatches will add a non_match_style classification with offset 20.
	// \|match_style\| and \|non_match_style\| specify the classifications to use for
	// matched and non-matched text.
	ACMatchClassifications ClassifyTermMatches(const TermMatches& matches,
	size_t text_length,
	int match_style,
	int non_match_style);

	#endif // COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_MATCH_CLASSIFICATION_H_