components/omnibox/browser/in_memory_url_index_types.cc - chromium/src - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "components/omnibox/browser/in_memory_url_index_types.h"

 #include <algorithm>
 #include <functional>
 #include <iterator>
 #include <numeric>
 #include <set>

 #include "base/i18n/break_iterator.h"
 #include "base/i18n/case_conversion.h"
 #include "base/strings/string_util.h"
 #include "base/trace_event/memory_usage_estimator.h"
 #include "components/omnibox/browser/tailored_word_break_iterator.h"
 #include "net/base/escape.h"

 namespace {
 // The maximum number of characters to consider from an URL and page title
 // while matching user-typed terms.
 const size_t kMaxSignificantChars = 200;

 void String16VectorFromString16Internal(base::string16 word,
                                         size_t previous_postion,
                                         bool break_on_space,
                                         String16Vector* words,
                                         WordStarts* word_starts) {
   size_t initial_whitespace = 0;
   if (break_on_space) {
     base::string16 trimmed_word;
     base::TrimWhitespace(word, base::TRIM_LEADING, &trimmed_word);
     initial_whitespace = word.length() - trimmed_word.length();
     base::TrimWhitespace(trimmed_word, base::TRIM_TRAILING, &word);
   }
   if (word.empty())
     return;
   words->push_back(word);
   if (!word_starts)
     return;
   size_t word_start = previous_postion + initial_whitespace;
   if (word_start < kMaxSignificantChars)
     word_starts->push_back(word_start);
 }
 }

 // Matches within URL and Title Strings ----------------------------------------

 TermMatches MatchTermInString(const base::string16& term,
                               const base::string16& cleaned_string,
                               int term_num) {
   const size_t kMaxCompareLength = 2048;
   const base::string16& short_string =
       (cleaned_string.length() > kMaxCompareLength) ?
       cleaned_string.substr(0, kMaxCompareLength) : cleaned_string;
   TermMatches matches;
   for (size_t location = short_string.find(term);
        location != base::string16::npos;
        location = short_string.find(term, location + 1))
     matches.push_back(TermMatch(term_num, location, term.length()));
   return matches;
 }

 // Comparison function for sorting TermMatches by their offsets.
 bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
   return m1.offset < m2.offset;
 }

 TermMatches SortMatches(const TermMatches& matches) {
   TermMatches sorted_matches(matches);
   std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
   return sorted_matches;
 }

 // Assumes |sorted_matches| is already sorted.
 TermMatches DeoverlapMatches(const TermMatches& sorted_matches) {
   TermMatches out;
   std::copy_if(
       sorted_matches.begin(), sorted_matches.end(), std::back_inserter(out),
       [&out](const TermMatch& match) {
         return out.empty() ||
                match.offset >= (out.back().offset + out.back().length); });
   return out;
 }

 std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches) {
   std::vector<size_t> offsets;
   for (const auto& match : matches) {
     offsets.push_back(match.offset);
     offsets.push_back(match.offset + match.length);
   }
   return offsets;
 }

 TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches,
                                         const std::vector<size_t>& offsets) {
   DCHECK_EQ(2 * matches.size(), offsets.size());
   TermMatches new_matches;
   auto offset_iter = offsets.begin();
   for (auto term_iter = matches.begin(); term_iter != matches.end();
        ++term_iter, ++offset_iter) {
     const size_t starting_offset = *offset_iter;
     ++offset_iter;
     const size_t ending_offset = *offset_iter;
     if ((starting_offset != base::string16::npos) &&
         (ending_offset != base::string16::npos) &&
         (starting_offset != ending_offset)) {
       TermMatch new_match(*term_iter);
       new_match.offset = starting_offset;
       new_match.length = ending_offset - starting_offset;
       new_matches.push_back(new_match);
     }
   }
   return new_matches;
 }

 // Utility Functions -----------------------------------------------------------

 String16Set String16SetFromString16(const base::string16& cleaned_uni_string,
                                     WordStarts* word_starts) {
   String16Vector words =
       String16VectorFromString16(cleaned_uni_string, false, word_starts);
   for (auto& word : words)
     word = base::i18n::ToLower(word).substr(0, kMaxSignificantChars);
   return String16Set(std::make_move_iterator(words.begin()),
                      std::make_move_iterator(words.end()),
                      base::KEEP_FIRST_OF_DUPES);
 }

 String16Vector String16VectorFromString16(
     const base::string16& cleaned_uni_string,
     bool break_on_space,
     WordStarts* word_starts) {
   if (word_starts)
     word_starts->clear();
   base::i18n::BreakIterator::BreakType break_mode =
       break_on_space ? base::i18n::BreakIterator::BREAK_SPACE
                      : base::i18n::BreakIterator::BREAK_WORD;
   String16Vector words;
   if (!break_on_space) {
     TailoredWordBreakIterator iter(cleaned_uni_string, break_mode);
     if (!iter.Init())
       return words;
     while (iter.Advance()) {
       if (iter.IsWord()) {
         String16VectorFromString16Internal(iter.GetString(), iter.prev(), false,
                                            &words, word_starts);
       }
     }
   } else {
     base::i18n::BreakIterator iter(cleaned_uni_string, break_mode);
     if (!iter.Init())
       return words;
     while (iter.Advance()) {
       String16VectorFromString16Internal(iter.GetString(), iter.prev(), true,
                                          &words, word_starts);
     }
   }
   return words;
 }

 Char16Set Char16SetFromString16(const base::string16& term) {
   return Char16Set(term.begin(), term.end(), base::KEEP_FIRST_OF_DUPES);
 }

 // HistoryInfoMapValue ---------------------------------------------------------

 HistoryInfoMapValue::HistoryInfoMapValue() = default;
 HistoryInfoMapValue::HistoryInfoMapValue(const HistoryInfoMapValue& other) =
     default;
 HistoryInfoMapValue::HistoryInfoMapValue(HistoryInfoMapValue&& other) = default;
 HistoryInfoMapValue& HistoryInfoMapValue::operator=(
     const HistoryInfoMapValue& other) = default;
 HistoryInfoMapValue& HistoryInfoMapValue::operator=(
     HistoryInfoMapValue&& other) = default;
 HistoryInfoMapValue::~HistoryInfoMapValue() = default;

 size_t HistoryInfoMapValue::EstimateMemoryUsage() const {
   return base::trace_event::EstimateMemoryUsage(url_row) +
          base::trace_event::EstimateMemoryUsage(visits);
 }

 // RowWordStarts ---------------------------------------------------------------

 RowWordStarts::RowWordStarts() = default;
 RowWordStarts::RowWordStarts(const RowWordStarts& other) = default;
 RowWordStarts::RowWordStarts(RowWordStarts&& other) = default;
 RowWordStarts& RowWordStarts::operator=(const RowWordStarts& other) = default;
 RowWordStarts& RowWordStarts::operator=(RowWordStarts&& other) = default;
 RowWordStarts::~RowWordStarts() = default;

 size_t RowWordStarts::EstimateMemoryUsage() const {
   return base::trace_event::EstimateMemoryUsage(url_word_starts_) +
          base::trace_event::EstimateMemoryUsage(title_word_starts_);
 }

 void RowWordStarts::Clear() {
   url_word_starts_.clear();
   title_word_starts_.clear();
 }
	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "components/omnibox/browser/in_memory_url_index_types.h"

	#include <algorithm>
	#include <functional>
	#include <iterator>
	#include <numeric>
	#include <set>

	#include "base/i18n/break_iterator.h"
	#include "base/i18n/case_conversion.h"
	#include "base/strings/string_util.h"
	#include "base/trace_event/memory_usage_estimator.h"
	#include "components/omnibox/browser/tailored_word_break_iterator.h"
	#include "net/base/escape.h"

	namespace {
	// The maximum number of characters to consider from an URL and page title
	// while matching user-typed terms.
	const size_t kMaxSignificantChars = 200;

	void String16VectorFromString16Internal(base::string16 word,
	size_t previous_postion,
	bool break_on_space,
	String16Vector* words,
	WordStarts* word_starts) {
	size_t initial_whitespace = 0;
	if (break_on_space) {
	base::string16 trimmed_word;
	base::TrimWhitespace(word, base::TRIM_LEADING, &trimmed_word);
	initial_whitespace = word.length() - trimmed_word.length();
	base::TrimWhitespace(trimmed_word, base::TRIM_TRAILING, &word);
	}
	if (word.empty())
	return;
	words->push_back(word);
	if (!word_starts)
	return;
	size_t word_start = previous_postion + initial_whitespace;
	if (word_start < kMaxSignificantChars)
	word_starts->push_back(word_start);
	}
	}

	// Matches within URL and Title Strings ----------------------------------------

	TermMatches MatchTermInString(const base::string16& term,
	const base::string16& cleaned_string,
	int term_num) {
	const size_t kMaxCompareLength = 2048;
	const base::string16& short_string =
	(cleaned_string.length() > kMaxCompareLength) ?
	cleaned_string.substr(0, kMaxCompareLength) : cleaned_string;
	TermMatches matches;
	for (size_t location = short_string.find(term);
	location != base::string16::npos;
	location = short_string.find(term, location + 1))
	matches.push_back(TermMatch(term_num, location, term.length()));
	return matches;
	}

	// Comparison function for sorting TermMatches by their offsets.
	bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
	return m1.offset < m2.offset;
	}

	TermMatches SortMatches(const TermMatches& matches) {
	TermMatches sorted_matches(matches);
	std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
	return sorted_matches;
	}

	// Assumes \|sorted_matches\| is already sorted.
	TermMatches DeoverlapMatches(const TermMatches& sorted_matches) {
	TermMatches out;
	std::copy_if(
	sorted_matches.begin(), sorted_matches.end(), std::back_inserter(out),
	[&out](const TermMatch& match) {
	return out.empty() \|\|
	match.offset >= (out.back().offset + out.back().length); });
	return out;
	}

	std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches) {
	std::vector<size_t> offsets;
	for (const auto& match : matches) {
	offsets.push_back(match.offset);
	offsets.push_back(match.offset + match.length);
	}
	return offsets;
	}

	TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches,
	const std::vector<size_t>& offsets) {
	DCHECK_EQ(2 * matches.size(), offsets.size());
	TermMatches new_matches;
	auto offset_iter = offsets.begin();
	for (auto term_iter = matches.begin(); term_iter != matches.end();
	++term_iter, ++offset_iter) {
	const size_t starting_offset = *offset_iter;
	++offset_iter;
	const size_t ending_offset = *offset_iter;
	if ((starting_offset != base::string16::npos) &&
	(ending_offset != base::string16::npos) &&
	(starting_offset != ending_offset)) {
	TermMatch new_match(*term_iter);
	new_match.offset = starting_offset;
	new_match.length = ending_offset - starting_offset;
	new_matches.push_back(new_match);
	}
	}
	return new_matches;
	}

	// Utility Functions -----------------------------------------------------------

	String16Set String16SetFromString16(const base::string16& cleaned_uni_string,
	WordStarts* word_starts) {
	String16Vector words =
	String16VectorFromString16(cleaned_uni_string, false, word_starts);
	for (auto& word : words)
	word = base::i18n::ToLower(word).substr(0, kMaxSignificantChars);
	return String16Set(std::make_move_iterator(words.begin()),
	std::make_move_iterator(words.end()),
	base::KEEP_FIRST_OF_DUPES);
	}

	String16Vector String16VectorFromString16(
	const base::string16& cleaned_uni_string,
	bool break_on_space,
	WordStarts* word_starts) {
	if (word_starts)
	word_starts->clear();
	base::i18n::BreakIterator::BreakType break_mode =
	break_on_space ? base::i18n::BreakIterator::BREAK_SPACE
	: base::i18n::BreakIterator::BREAK_WORD;
	String16Vector words;
	if (!break_on_space) {
	TailoredWordBreakIterator iter(cleaned_uni_string, break_mode);
	if (!iter.Init())
	return words;
	while (iter.Advance()) {
	if (iter.IsWord()) {
	String16VectorFromString16Internal(iter.GetString(), iter.prev(), false,
	&words, word_starts);
	}
	}
	} else {
	base::i18n::BreakIterator iter(cleaned_uni_string, break_mode);
	if (!iter.Init())
	return words;
	while (iter.Advance()) {
	String16VectorFromString16Internal(iter.GetString(), iter.prev(), true,
	&words, word_starts);
	}
	}
	return words;
	}

	Char16Set Char16SetFromString16(const base::string16& term) {
	return Char16Set(term.begin(), term.end(), base::KEEP_FIRST_OF_DUPES);
	}

	// HistoryInfoMapValue ---------------------------------------------------------

	HistoryInfoMapValue::HistoryInfoMapValue() = default;
	HistoryInfoMapValue::HistoryInfoMapValue(const HistoryInfoMapValue& other) =
	default;
	HistoryInfoMapValue::HistoryInfoMapValue(HistoryInfoMapValue&& other) = default;
	HistoryInfoMapValue& HistoryInfoMapValue::operator=(
	const HistoryInfoMapValue& other) = default;
	HistoryInfoMapValue& HistoryInfoMapValue::operator=(
	HistoryInfoMapValue&& other) = default;
	HistoryInfoMapValue::~HistoryInfoMapValue() = default;

	size_t HistoryInfoMapValue::EstimateMemoryUsage() const {
	return base::trace_event::EstimateMemoryUsage(url_row) +
	base::trace_event::EstimateMemoryUsage(visits);
	}

	// RowWordStarts ---------------------------------------------------------------

	RowWordStarts::RowWordStarts() = default;
	RowWordStarts::RowWordStarts(const RowWordStarts& other) = default;
	RowWordStarts::RowWordStarts(RowWordStarts&& other) = default;
	RowWordStarts& RowWordStarts::operator=(const RowWordStarts& other) = default;
	RowWordStarts& RowWordStarts::operator=(RowWordStarts&& other) = default;
	RowWordStarts::~RowWordStarts() = default;

	size_t RowWordStarts::EstimateMemoryUsage() const {
	return base::trace_event::EstimateMemoryUsage(url_word_starts_) +
	base::trace_event::EstimateMemoryUsage(title_word_starts_);
	}

	void RowWordStarts::Clear() {
	url_word_starts_.clear();
	title_word_starts_.clear();
	}