blob: 1b0b7f35555748e0e14d697d99bb753e7ccbcb49 [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/url_matcher/regex_set_matcher.h"
#include "base/logging.h"
#include "base/stl_util.h"
#include "base/strings/string_util.h"
#include "components/url_matcher/substring_set_matcher.h"
#include "third_party/re2/re2/filtered_re2.h"
#include "third_party/re2/re2/re2.h"
namespace url_matcher {
RegexSetMatcher::RegexSetMatcher() {}
RegexSetMatcher::~RegexSetMatcher() {
DeleteSubstringPatterns();
}
void RegexSetMatcher::AddPatterns(
const std::vector<const StringPattern*>& regex_list) {
if (regex_list.empty())
return;
for (size_t i = 0; i < regex_list.size(); ++i) {
regexes_[regex_list[i]->id()] = regex_list[i];
}
RebuildMatcher();
}
void RegexSetMatcher::ClearPatterns() {
regexes_.clear();
RebuildMatcher();
}
bool RegexSetMatcher::Match(const std::string& text,
std::set<StringPattern::ID>* matches) const {
size_t old_number_of_matches = matches->size();
if (regexes_.empty())
return false;
if (!filtered_re2_.get()) {
LOG(ERROR) << "RegexSetMatcher was not initialized";
return false;
}
// FilteredRE2 expects lowercase for prefiltering, but we still
// match case-sensitively.
std::vector<RE2ID> atoms(FindSubstringMatches(
base::StringToLowerASCII(text)));
std::vector<RE2ID> re2_ids;
filtered_re2_->AllMatches(text, atoms, &re2_ids);
for (size_t i = 0; i < re2_ids.size(); ++i) {
StringPattern::ID id = re2_id_map_[re2_ids[i]];
matches->insert(id);
}
return old_number_of_matches != matches->size();
}
bool RegexSetMatcher::IsEmpty() const {
return regexes_.empty();
}
std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches(
const std::string& text) const {
std::set<int> atoms_set;
substring_matcher_->Match(text, &atoms_set);
return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end());
}
void RegexSetMatcher::RebuildMatcher() {
re2_id_map_.clear();
filtered_re2_.reset(new re2::FilteredRE2());
if (regexes_.empty())
return;
for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) {
RE2ID re2_id;
RE2::ErrorCode error = filtered_re2_->Add(
it->second->pattern(), RE2::DefaultOptions, &re2_id);
if (error == RE2::NoError) {
DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id);
re2_id_map_.push_back(it->first);
} else {
// Unparseable regexes should have been rejected already in
// URLMatcherFactory::CreateURLMatchesCondition.
LOG(ERROR) << "Could not parse regex (id=" << it->first << ", "
<< it->second->pattern() << ")";
}
}
std::vector<std::string> strings_to_match;
filtered_re2_->Compile(&strings_to_match);
substring_matcher_.reset(new SubstringSetMatcher);
DeleteSubstringPatterns();
// Build SubstringSetMatcher from |strings_to_match|.
// SubstringSetMatcher doesn't own its strings.
for (size_t i = 0; i < strings_to_match.size(); ++i) {
substring_patterns_.push_back(
new StringPattern(strings_to_match[i], i));
}
substring_matcher_->RegisterPatterns(substring_patterns_);
}
void RegexSetMatcher::DeleteSubstringPatterns() {
STLDeleteElements(&substring_patterns_);
}
} // namespace url_matcher