blob: 59e466ac9970476df7fe8c97e3afd88a2bad3874 [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/omnibox/browser/omnibox_pedal_provider.h"
#include "base/i18n/case_conversion.h"
#include "base/i18n/char_iterator.h"
#include "base/json/json_reader.h"
#include "base/strings/string_tokenizer.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "components/grit/components_resources.h"
#include "components/omnibox/browser/autocomplete_provider_client.h"
#include "components/omnibox/browser/omnibox_pedal.h"
#include "components/omnibox/browser/omnibox_pedal_implementations.h"
#include "third_party/zlib/google/compression_utils.h"
#include "ui/base/resource/resource_bundle.h"
namespace {
typedef base::StringTokenizerT<base::string16, base::string16::const_iterator>
StringTokenizer16;
} // namespace
OmniboxPedalProvider::OmniboxPedalProvider(AutocompleteProviderClient& client)
: client_(client),
pedals_(GetPedalImplementations()),
ignore_group_(false, false, 0) {
LoadPedalConcepts();
}
OmniboxPedalProvider::~OmniboxPedalProvider() {}
OmniboxPedal* OmniboxPedalProvider::FindPedalMatch(
const base::string16& match_text) const {
OmniboxPedal::Tokens match_tokens = Tokenize(match_text);
if (match_tokens.empty()) {
return nullptr;
}
for (const auto& pedal : pedals_) {
if (pedal.second->IsTriggerMatch(match_tokens) &&
pedal.second->IsReadyToTrigger(client_)) {
return pedal.second.get();
}
}
return nullptr;
}
OmniboxPedal::Tokens OmniboxPedalProvider::Tokenize(
const base::string16& text) const {
base::string16 reduced_text = base::i18n::ToLower(text);
OmniboxPedal::Tokens match_tokens;
match_tokens.reserve(max_tokens_);
if (tokenize_characters_.empty()) {
// Tokenize on Unicode character boundaries when we have no delimiters.
base::i18n::UTF16CharIterator char_iter(&reduced_text);
int32_t left = 0;
while (!char_iter.end()) {
char_iter.Advance();
int32_t right = char_iter.array_pos();
if (right > left) {
const auto token = reduced_text.substr(left, right - left);
const auto iter = dictionary_.find(token);
if (iter == dictionary_.end() || match_tokens.size() >= max_tokens_) {
// No Pedal can possibly match because we found a token not
// present in the token dictionary, or the text has too many tokens.
return OmniboxPedal::Tokens();
} else {
match_tokens.push_back(iter->second);
}
left = right;
} else {
break;
}
}
} else {
// Delimiters will neatly divide the string into tokens.
StringTokenizer16 tokenizer(reduced_text, tokenize_characters_);
while (tokenizer.GetNext()) {
const auto iter = dictionary_.find(tokenizer.token());
if (iter == dictionary_.end() || match_tokens.size() >= max_tokens_) {
// No Pedal can possibly match because we found a token not
// present in the token dictionary, or the text has too many tokens.
return OmniboxPedal::Tokens();
} else {
match_tokens.push_back(iter->second);
}
}
}
ignore_group_.EraseMatchesIn(&match_tokens);
return match_tokens;
}
void OmniboxPedalProvider::LoadPedalConcepts() {
// Get raw gzipped data, uncompress it, then parse to base::Value for loading.
base::StringPiece compressed_data =
ui::ResourceBundle::GetSharedInstance().GetRawDataResource(
IDR_OMNIBOX_PEDAL_CONCEPTS);
std::string uncompressed_data;
uncompressed_data.resize(compression::GetUncompressedSize(compressed_data));
CHECK(compression::GzipUncompress(compressed_data, uncompressed_data));
const auto concept_data = base::JSONReader::Read(uncompressed_data);
DCHECK(concept_data);
DCHECK(concept_data->is_dict());
const int data_version = concept_data->FindKey("data_version")->GetInt();
CHECK_EQ(data_version, OMNIBOX_PEDAL_CONCEPTS_DATA_VERSION);
max_tokens_ = concept_data->FindKey("max_tokens")->GetInt();
// It is conceivable that some language may need more here, but the goal is
// to sanity check input since it is trusted and used for vector reserve.
DCHECK_LT(max_tokens_, size_t{64});
concept_data->FindKey("tokenize_characters")
->GetAsString(&tokenize_characters_);
const auto& dictionary = concept_data->FindKey("dictionary")->GetList();
dictionary_.reserve(dictionary.size());
int id = 0;
for (const auto& token_value : dictionary) {
base::string16 token;
token_value.GetAsString(&token);
dictionary_.insert({token, id});
++id;
}
const base::Value* ignore_group_value = concept_data->FindKey("ignore_group");
DCHECK_NE(ignore_group_value, nullptr);
ignore_group_ = LoadSynonymGroup(*ignore_group_value);
for (const auto& pedal_value : concept_data->FindKey("pedals")->GetList()) {
DCHECK(pedal_value.is_dict());
const OmniboxPedalId pedal_id =
static_cast<OmniboxPedalId>(pedal_value.FindKey("id")->GetInt());
const auto pedal = pedals_.find(pedal_id);
if (pedal == pedals_.end()) {
CHECK(false) << "OmniboxPedalId " << static_cast<int>(pedal_id)
<< " not found. Are all data-referenced implementations "
"added to provider?";
}
for (const auto& group_value : pedal_value.FindKey("groups")->GetList()) {
pedal->second->AddSynonymGroup(LoadSynonymGroup(group_value));
}
}
}
OmniboxPedal::SynonymGroup OmniboxPedalProvider::LoadSynonymGroup(
const base::Value& group_value) const {
DCHECK(group_value.is_dict());
const bool required = group_value.FindKey("required")->GetBool();
const bool single = group_value.FindKey("single")->GetBool();
const auto& synonyms = group_value.FindKey("synonyms")->GetList();
OmniboxPedal::SynonymGroup synonym_group(required, single, synonyms.size());
for (const auto& synonyms_value : synonyms) {
DCHECK(synonyms_value.is_list());
const auto& synonyms_value_list = synonyms_value.GetList();
OmniboxPedal::Tokens synonym_all_tokens;
synonym_all_tokens.reserve(synonyms_value_list.size());
for (const auto& token_index_value : synonyms_value_list) {
synonym_all_tokens.push_back(token_index_value.GetInt());
}
synonym_group.AddSynonym(std::move(synonym_all_tokens));
}
return synonym_group;
}