blob: 1cab6f65f8936e0af739118dcce0ca7a77b5bb69 [file] [log] [blame]
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/chromeos/local_search_service/index.h"
#include <utility>
#include "base/optional.h"
#include "base/strings/string_util.h"
#include "chrome/common/string_matching/fuzzy_tokenized_string_match.h"
#include "chrome/common/string_matching/tokenized_string.h"
namespace local_search_service {
namespace {
using Hits = std::vector<local_search_service::Range>;
void TokenizeSearchTags(
const std::vector<base::string16>& search_tags,
std::vector<std::unique_ptr<TokenizedString>>* tokenized) {
DCHECK(tokenized);
for (const auto& tag : search_tags) {
tokenized->push_back(std::make_unique<TokenizedString>(tag));
}
}
// Returns whether a given item with |search_tags| is relevant to |query| using
// fuzzy string matching.
// TODO(1018613): add weight decay to relevance scores for search tags. Tags
// at the front should have higher scores.
bool IsItemRelevant(
const TokenizedString& query,
const std::vector<std::unique_ptr<TokenizedString>>& search_tags,
double relevance_threshold,
bool use_prefix_only,
bool use_weighted_ratio,
bool use_edit_distance,
double partial_match_penalty_rate,
double* relevance_score,
Hits* hits) {
DCHECK(relevance_score);
DCHECK(hits);
if (search_tags.empty())
return false;
for (const auto& tag : search_tags) {
FuzzyTokenizedStringMatch match;
if (match.IsRelevant(query, *tag, relevance_threshold, use_prefix_only,
use_weighted_ratio, use_edit_distance,
partial_match_penalty_rate)) {
*relevance_score = match.relevance();
for (const auto& hit : match.hits()) {
local_search_service::Range range;
range.start = hit.start();
range.end = hit.end();
hits->push_back(range);
}
return true;
}
}
return false;
}
// Compares Results by |score|.
bool CompareResults(const local_search_service::Result& r1,
const local_search_service::Result& r2) {
return r1.score > r2.score;
}
} // namespace
local_search_service::Data::Data(const std::string& id,
const std::vector<base::string16>& search_tags)
: id(id), search_tags(search_tags) {}
local_search_service::Data::Data() = default;
local_search_service::Data::Data(const Data& data) = default;
local_search_service::Data::~Data() = default;
local_search_service::Result::Result() = default;
local_search_service::Result::Result(const Result& result) = default;
local_search_service::Result::~Result() = default;
Index::Index() = default;
Index::~Index() = default;
uint64_t Index::GetSize() {
return data_.size();
}
void Index::AddOrUpdate(const std::vector<local_search_service::Data>& data) {
for (const auto& item : data) {
const auto& id = item.id;
DCHECK(!id.empty());
// If a key already exists, it will overwrite earlier data.
data_[id] = std::vector<std::unique_ptr<TokenizedString>>();
TokenizeSearchTags(item.search_tags, &data_[id]);
}
}
uint32_t Index::Delete(const std::vector<std::string>& ids) {
uint32_t num_deleted = 0u;
for (const auto& id : ids) {
DCHECK(!id.empty());
const auto& it = data_.find(id);
if (it != data_.end()) {
// If id doesn't exist, just ignore it.
data_.erase(id);
++num_deleted;
}
}
return num_deleted;
}
local_search_service::ResponseStatus Index::Find(
const base::string16& query,
uint32_t max_results,
std::vector<local_search_service::Result>* results) {
DCHECK(results);
results->clear();
if (query.empty()) {
return local_search_service::ResponseStatus::kEmptyQuery;
}
if (data_.empty()) {
return local_search_service::ResponseStatus::kEmptyIndex;
}
*results = GetSearchResults(query, max_results);
return local_search_service::ResponseStatus::kSuccess;
}
void Index::SetSearchParams(
const local_search_service::SearchParams& search_params) {
search_params_ = search_params;
}
void Index::GetSearchParamsForTesting(double* relevance_threshold,
double* partial_match_penalty_rate,
bool* use_prefix_only,
bool* use_weighted_ratio,
bool* use_edit_distance) {
DCHECK(relevance_threshold);
DCHECK(partial_match_penalty_rate);
DCHECK(use_prefix_only);
DCHECK(use_weighted_ratio);
DCHECK(use_edit_distance);
*relevance_threshold = search_params_.relevance_threshold;
*partial_match_penalty_rate = search_params_.partial_match_penalty_rate;
*use_prefix_only = search_params_.use_prefix_only;
*use_weighted_ratio = search_params_.use_weighted_ratio;
*use_edit_distance = search_params_.use_edit_distance;
}
std::vector<local_search_service::Result> Index::GetSearchResults(
const base::string16& query,
uint32_t max_results) const {
std::vector<local_search_service::Result> results;
const TokenizedString tokenized_query(query);
for (const auto& item : data_) {
double relevance_score = 0.0;
Hits hits;
if (IsItemRelevant(
tokenized_query, item.second, search_params_.relevance_threshold,
search_params_.use_prefix_only, search_params_.use_weighted_ratio,
search_params_.use_edit_distance,
search_params_.partial_match_penalty_rate, &relevance_score,
&hits)) {
local_search_service::Result result;
result.id = item.first;
result.score = relevance_score;
result.hits = hits;
results.push_back(result);
}
}
std::sort(results.begin(), results.end(), CompareResults);
if (results.size() > max_results && max_results > 0u) {
results.resize(max_results);
}
return results;
}
} // namespace local_search_service