components/omnibox/browser/autocomplete_match_classification.cc - chromium/src - Git at Google

 // Copyright 2019 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "autocomplete_match_classification.h"

 #include <string>
 #include <string_view>

 #include "base/i18n/case_conversion.h"
 #include "base/strings/string_util.h"
 #include "components/omnibox/browser/autocomplete_match.h"
 #include "components/omnibox/browser/in_memory_url_index_types.h"
 #include "components/omnibox/browser/scored_history_match.h"
 #include "in_memory_url_index_types.h"

 namespace {

 std::u16string clean(std::u16string_view text) {
   const size_t kMaxTextLength = 2000;
   return base::i18n::ToLower(text.substr(0, kMaxTextLength));
 }

 }  // namespace

 ACMatchClassifications ClassifyAllMatchesInString(
     const std::u16string& find_text,
     const std::u16string& text,
     const bool text_is_search_query,
     const ACMatchClassifications& original_class) {
   DCHECK(!find_text.empty());

   if (text.empty()) {
     return original_class;
   }

   TermMatches term_matches = FindTermMatches(find_text, text);

   ACMatchClassifications classifications;
   if (text_is_search_query) {
     classifications = ClassifyTermMatches(term_matches, text.size(),
                                           ACMatchClassification::NONE,
                                           ACMatchClassification::MATCH);
   } else {
     classifications = ClassifyTermMatches(term_matches, text.size(),
                                           ACMatchClassification::MATCH,
                                           ACMatchClassification::NONE);
   }

   return AutocompleteMatch::MergeClassifications(original_class,
                                                  classifications);
 }

 TermMatches FindTermMatches(std::u16string_view find_text,
                             std::u16string_view text,
                             bool allow_prefix_matching,
                             bool allow_mid_word_matching) {
   std::u16string find_text_str = clean(find_text);
   std::u16string text_str = clean(text);

   // Some international characters become multiple characters when converting
   // case. E.g. Armenian և is 1 character lowercase, but 2 characters Եվ
   // uppercased. Turkish has examples of the opposite, where the lowercasing can
   // increase length. If the string length changes when lowercased, term match
   // indexes will be off when being used to style the original-cased `text`.
   // This will cause either `DCHECK` crashes if the incorrect index is out of
   // bounds; or incorrect styling if the incorrect index remains in bounds or
   // `DCHECK`s are disabled. E.g. input 'ou' would bold 'Yo[uT]ube' if lower
   // case 'Y' is 2 characters.
   if (find_text_str.size() != find_text.size() ||
       text_str.size() != text.size()) {
     return {};
   }

   if (find_text_str.empty()) {
     return {};
   }

   if (allow_prefix_matching &&
       base::StartsWith(text_str, find_text_str, base::CompareCase::SENSITIVE)) {
     return {{0, 0, find_text_str.length()}};
   }

   String16Vector find_terms =
       String16VectorFromString16(find_text_str, nullptr);
   WordStarts word_starts;
   // `word_starts` is unused if `allow_mid_word_matching` is true.
   if (!allow_mid_word_matching) {
     String16VectorFromString16(text_str, &word_starts);
   }
   return FindTermMatchesForTerms(find_terms, WordStarts(find_terms.size(), 0),
                                  text_str, word_starts,
                                  allow_mid_word_matching);
 }

 TermMatches FindTermMatchesForTerms(const String16Vector& find_terms,
                                     const WordStarts& find_terms_word_starts,
                                     const std::u16string& cleaned_text,
                                     const WordStarts& text_word_starts,
                                     bool allow_mid_word_matching) {
   TermMatches matches = MatchTermsInString(find_terms, cleaned_text);
   matches = SortMatches(matches);
   matches = DeoverlapMatches(matches);

   if (allow_mid_word_matching)
     return matches;

   return ScoredHistoryMatch::FilterTermMatchesByWordStarts(
       matches, find_terms_word_starts, text_word_starts, 0, std::string::npos);
 }

 ACMatchClassifications ClassifyTermMatches(const TermMatches& matches,
                                            size_t text_length,
                                            int match_style,
                                            int non_match_style) {
   ACMatchClassifications classes;
   if (matches.empty()) {
     if (text_length)
       classes.push_back(ACMatchClassification(0, non_match_style));
     return classes;
   }
   if (matches[0].offset)
     classes.push_back(ACMatchClassification(0, non_match_style));
   size_t match_count = matches.size();
   for (size_t i = 0; i < match_count;) {
     size_t offset = matches[i].offset;
     classes.push_back(ACMatchClassification(offset, match_style));
     // Skip all adjacent matches.
     do {
       offset += matches[i].length;
       ++i;
     } while ((i < match_count) && (offset == matches[i].offset));
     if (offset < text_length)
       classes.push_back(ACMatchClassification(offset, non_match_style));
   }
   return classes;
 }
	// Copyright 2019 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "autocomplete_match_classification.h"

	#include <string>
	#include <string_view>

	#include "base/i18n/case_conversion.h"
	#include "base/strings/string_util.h"
	#include "components/omnibox/browser/autocomplete_match.h"
	#include "components/omnibox/browser/in_memory_url_index_types.h"
	#include "components/omnibox/browser/scored_history_match.h"
	#include "in_memory_url_index_types.h"

	namespace {

	std::u16string clean(std::u16string_view text) {
	const size_t kMaxTextLength = 2000;
	return base::i18n::ToLower(text.substr(0, kMaxTextLength));
	}

	} // namespace

	ACMatchClassifications ClassifyAllMatchesInString(
	const std::u16string& find_text,
	const std::u16string& text,
	const bool text_is_search_query,
	const ACMatchClassifications& original_class) {
	DCHECK(!find_text.empty());

	if (text.empty()) {
	return original_class;
	}

	TermMatches term_matches = FindTermMatches(find_text, text);

	ACMatchClassifications classifications;
	if (text_is_search_query) {
	classifications = ClassifyTermMatches(term_matches, text.size(),
	ACMatchClassification::NONE,
	ACMatchClassification::MATCH);
	} else {
	classifications = ClassifyTermMatches(term_matches, text.size(),
	ACMatchClassification::MATCH,
	ACMatchClassification::NONE);
	}

	return AutocompleteMatch::MergeClassifications(original_class,
	classifications);
	}

	TermMatches FindTermMatches(std::u16string_view find_text,
	std::u16string_view text,
	bool allow_prefix_matching,
	bool allow_mid_word_matching) {
	std::u16string find_text_str = clean(find_text);
	std::u16string text_str = clean(text);

	// Some international characters become multiple characters when converting
	// case. E.g. Armenian և is 1 character lowercase, but 2 characters Եվ
	// uppercased. Turkish has examples of the opposite, where the lowercasing can
	// increase length. If the string length changes when lowercased, term match
	// indexes will be off when being used to style the original-cased `text`.
	// This will cause either `DCHECK` crashes if the incorrect index is out of
	// bounds; or incorrect styling if the incorrect index remains in bounds or
	// `DCHECK`s are disabled. E.g. input 'ou' would bold 'Yo[uT]ube' if lower
	// case 'Y' is 2 characters.
	if (find_text_str.size() != find_text.size() \|\|
	text_str.size() != text.size()) {
	return {};
	}

	if (find_text_str.empty()) {
	return {};
	}

	if (allow_prefix_matching &&
	base::StartsWith(text_str, find_text_str, base::CompareCase::SENSITIVE)) {
	return {{0, 0, find_text_str.length()}};
	}

	String16Vector find_terms =
	String16VectorFromString16(find_text_str, nullptr);
	WordStarts word_starts;
	// `word_starts` is unused if `allow_mid_word_matching` is true.
	if (!allow_mid_word_matching) {
	String16VectorFromString16(text_str, &word_starts);
	}
	return FindTermMatchesForTerms(find_terms, WordStarts(find_terms.size(), 0),
	text_str, word_starts,
	allow_mid_word_matching);
	}

	TermMatches FindTermMatchesForTerms(const String16Vector& find_terms,
	const WordStarts& find_terms_word_starts,
	const std::u16string& cleaned_text,
	const WordStarts& text_word_starts,
	bool allow_mid_word_matching) {
	TermMatches matches = MatchTermsInString(find_terms, cleaned_text);
	matches = SortMatches(matches);
	matches = DeoverlapMatches(matches);

	if (allow_mid_word_matching)
	return matches;

	return ScoredHistoryMatch::FilterTermMatchesByWordStarts(
	matches, find_terms_word_starts, text_word_starts, 0, std::string::npos);
	}

	ACMatchClassifications ClassifyTermMatches(const TermMatches& matches,
	size_t text_length,
	int match_style,
	int non_match_style) {
	ACMatchClassifications classes;
	if (matches.empty()) {
	if (text_length)
	classes.push_back(ACMatchClassification(0, non_match_style));
	return classes;
	}
	if (matches[0].offset)
	classes.push_back(ACMatchClassification(0, non_match_style));
	size_t match_count = matches.size();
	for (size_t i = 0; i < match_count;) {
	size_t offset = matches[i].offset;
	classes.push_back(ACMatchClassification(offset, match_style));
	// Skip all adjacent matches.
	do {
	offset += matches[i].length;
	++i;
	} while ((i < match_count) && (offset == matches[i].offset));
	if (offset < text_length)
	classes.push_back(ACMatchClassification(offset, non_match_style));
	}
	return classes;
	}