blob: dd1b9a6198b5943c3db8c39a7b1e84d689bf9250 [file] [log] [blame]
// Copyright (c) 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMEOS_COMPONENTS_STRING_MATCHING_FUZZY_TOKENIZED_STRING_MATCH_H_
#define CHROMEOS_COMPONENTS_STRING_MATCHING_FUZZY_TOKENIZED_STRING_MATCH_H_
#include "base/gtest_prod_util.h"
#include "base/macros.h"
#include "chromeos/components/string_matching/tokenized_string.h"
#include "ui/gfx/range/range.h"
namespace chromeos {
namespace string_matching {
// FuzzyTokenizedStringMatch takes two tokenized strings: one as the text and
// the other one as the query. It matches the query against the text,
// calculates a relevance score between [0, 1] and marks the matched portions
// of text. A relevance of zero means the two are completely different to each
// other. The higher the relevance score, the better the two strings are
// matched. Matched portions of text are stored as index ranges.
// TODO(crbug.com/1018613): each of these functions have too many input params,
// we should revise the structure and remove unnecessary ones.
class FuzzyTokenizedStringMatch {
public:
typedef std::vector<gfx::Range> Hits;
FuzzyTokenizedStringMatch();
~FuzzyTokenizedStringMatch();
// TokenSetRatio takes two sets of tokens, finds their intersection and
// differences. From the intersection and differences, it rewrites the |query|
// and |text| and find the similarity ratio between them. This function
// assumes that TokenizedString is already normalized (converted to lower
// case). Duplicates tokens will be removed for ratio computation.
static double TokenSetRatio(const TokenizedString& query,
const TokenizedString& text,
bool partial,
double partial_match_penalty_rate,
bool use_edit_distance,
double num_matching_blocks_penalty);
// TokenSortRatio takes two set of tokens, sorts them and find the similarity
// between two sorted strings. This function assumes that TokenizedString is
// already normalized (converted to lower case)
static double TokenSortRatio(const TokenizedString& query,
const TokenizedString& text,
bool partial,
double partial_match_penalty_rate,
bool use_edit_distance,
double num_matching_blocks_penalty);
// Finds the best ratio of shorter text with a part of longer text.
// This function assumes that TokenizedString is already normalized (converted
// to lower case). The return score is in range of [0, 1].
static double PartialRatio(const base::string16& query,
const base::string16& text,
double partial_match_penalty_rate,
bool use_edit_distance,
double num_matching_blocks_penalty);
// Combines scores from different ratio functions. This function assumes that
// TokenizedString is already normalized (converted to lower cases).
// The return score is in range of [0, 1].
static double WeightedRatio(const TokenizedString& query,
const TokenizedString& text,
double partial_match_penalty_rate,
bool use_edit_distance,
double num_matching_blocks_penalty);
// Since prefix match should always be favored over other matches, this
// function is dedicated to calculate a prefix match score in range of [0, 1]
// using PrefixMatcher class.
// This score has two components: first character match and whole prefix
// match.
static double PrefixMatcher(const TokenizedString& query,
const TokenizedString& text);
// Calculates the relevance of two strings. Returns true if two strings are
// somewhat matched, i.e. relevance score is greater than a threshold.
bool IsRelevant(const TokenizedString& query,
const TokenizedString& text,
double relevance_threshold,
bool use_prefix_only,
bool use_weighted_ratio,
bool use_edit_distance,
double partial_match_penalty_rate,
double num_matching_blocks_penalty = 0.0);
double relevance() const { return relevance_; }
const Hits& hits() const { return hits_; }
private:
// Score in range of [0,1] representing how well the query matches the text.
double relevance_ = 0;
Hits hits_;
DISALLOW_COPY_AND_ASSIGN(FuzzyTokenizedStringMatch);
};
} // namespace string_matching
} // namespace chromeos
#endif // CHROMEOS_COMPONENTS_STRING_MATCHING_FUZZY_TOKENIZED_STRING_MATCH_H_