blob: 4252b1138e785857031adf5a59146588d37796ea [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/components/string_matching/prefix_matcher.h"
#include "base/check.h"
#include "base/macros.h"
#include "chromeos/components/string_matching/tokenized_string.h"
#include "chromeos/components/string_matching/tokenized_string_char_iterator.h"
namespace chromeos {
namespace string_matching {
namespace {
// The factors below are applied when the current char of query matches
// the current char of the text to be matched. Different factors are chosen
// based on where the match happens. kIsPrefixMultiplier is used when the
// matched portion is a prefix of both the query and the text, which implies
// that the matched chars are at the same position in query and text. This is
// the most preferred case thus it has the highest score. When the current char
// of the query and the text does not match, the algorithm moves to the next
// token in the text and try to match from there. kIsFrontOfWordMultipler will
// be used if the first char of the token matches the current char of the query.
// Otherwise, the match is considered as weak and kIsWeakHitMultiplier is
// used.
// Examples:
// Suppose the text to be matched is 'Google Chrome'.
// Query 'go' would yield kIsPrefixMultiplier for each char.
// Query 'gc' would use kIsPrefixMultiplier for 'g' and
// kIsFrontOfWordMultipler for 'c'.
// Query 'ch' would use kIsFrontOfWordMultipler for 'c' and
// kIsWeakHitMultiplier for 'h'.
const double kIsPrefixMultiplier = 1.0;
const double kIsFrontOfWordMultipler = 0.8;
const double kIsWeakHitMultiplier = 0.6;
// A relevance score that represents no match.
const double kNoMatchScore = 0.0;
} // namespace
PrefixMatcher::PrefixMatcher(const TokenizedString& query,
const TokenizedString& text)
: query_iter_(query),
text_iter_(text),
current_match_(gfx::Range::InvalidRange()),
current_relevance_(kNoMatchScore) {}
bool PrefixMatcher::Match() {
while (!RunMatch()) {
// No match found and no more states to try. Bail out.
if (states_.empty()) {
current_relevance_ = kNoMatchScore;
current_hits_.clear();
return false;
}
PopState();
// Skip restored match to try other possibilities.
AdvanceToNextTextToken();
}
if (current_match_.IsValid())
current_hits_.push_back(current_match_);
return true;
}
PrefixMatcher::State::State() : relevance(kNoMatchScore) {}
PrefixMatcher::State::~State() = default;
PrefixMatcher::State::State(double relevance,
const gfx::Range& current_match,
const Hits& hits,
const TokenizedStringCharIterator& query_iter,
const TokenizedStringCharIterator& text_iter)
: relevance(relevance),
current_match(current_match),
hits(hits.begin(), hits.end()),
query_iter_state(query_iter.GetState()),
text_iter_state(text_iter.GetState()) {}
PrefixMatcher::State::State(const PrefixMatcher::State& state) = default;
bool PrefixMatcher::RunMatch() {
bool have_match_already = false;
while (!query_iter_.end() && !text_iter_.end()) {
if (query_iter_.Get() == text_iter_.Get()) {
PushState();
if (query_iter_.GetArrayPos() == text_iter_.GetArrayPos())
current_relevance_ += kIsPrefixMultiplier;
else if (text_iter_.IsFirstCharOfToken())
current_relevance_ += kIsFrontOfWordMultipler;
else
current_relevance_ += kIsWeakHitMultiplier;
if (!current_match_.IsValid())
current_match_.set_start(text_iter_.GetArrayPos());
current_match_.set_end(text_iter_.GetArrayPos() +
text_iter_.GetCharSize());
query_iter_.NextChar();
text_iter_.NextChar();
have_match_already = true;
} else {
// There are two possibilities here:
// 1. Need to AdvanceToNextTextToken() after having at least a match in
// current token (e.g. match the first character of the token) and the
// next character doesn't match.
// 2. Need to AdvanceToNextTextToken() because there is no match in
// current token.
// If there is no match in current token and we already have match (in
// previous tokens) before, a token is skipped and we consider this as no
// match.
if (text_iter_.IsFirstCharOfToken() && have_match_already)
return false;
AdvanceToNextTextToken();
}
}
return query_iter_.end();
}
void PrefixMatcher::AdvanceToNextTextToken() {
if (current_match_.IsValid()) {
current_hits_.push_back(current_match_);
current_match_ = gfx::Range::InvalidRange();
}
text_iter_.NextToken();
}
void PrefixMatcher::PushState() {
states_.push_back(State(current_relevance_, current_match_, current_hits_,
query_iter_, text_iter_));
}
void PrefixMatcher::PopState() {
DCHECK(!states_.empty());
State& last_match = states_.back();
current_relevance_ = last_match.relevance;
current_match_ = last_match.current_match;
current_hits_.swap(last_match.hits);
query_iter_.SetState(last_match.query_iter_state);
text_iter_.SetState(last_match.text_iter_state);
states_.pop_back();
}
} // namespace string_matching
} // namespace chromeos