| // Copyright 2013 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/url_matcher/regex_set_matcher.h" |
| |
| #include <stddef.h> |
| |
| #include <memory> |
| #include <utility> |
| |
| #include "base/logging.h" |
| #include "base/strings/string_util.h" |
| #include "base/substring_set_matcher/substring_set_matcher.h" |
| #include "third_party/re2/src/re2/filtered_re2.h" |
| #include "third_party/re2/src/re2/re2.h" |
| |
| using base::MatcherStringPattern; |
| |
| namespace url_matcher { |
| |
| RegexSetMatcher::RegexSetMatcher() = default; |
| RegexSetMatcher::~RegexSetMatcher() = default; |
| |
| void RegexSetMatcher::AddPatterns( |
| const std::vector<const MatcherStringPattern*>& regex_list) { |
| if (regex_list.empty()) |
| return; |
| for (size_t i = 0; i < regex_list.size(); ++i) { |
| regexes_[regex_list[i]->id()] = regex_list[i]; |
| } |
| |
| RebuildMatcher(); |
| } |
| |
| void RegexSetMatcher::ClearPatterns() { |
| regexes_.clear(); |
| RebuildMatcher(); |
| } |
| |
| bool RegexSetMatcher::Match(const std::string& text, |
| std::set<MatcherStringPattern::ID>* matches) const { |
| size_t old_number_of_matches = matches->size(); |
| if (regexes_.empty()) |
| return false; |
| if (!filtered_re2_) { |
| LOG(ERROR) << "RegexSetMatcher was not initialized"; |
| return false; |
| } |
| |
| // FilteredRE2 expects lowercase for prefiltering, but we still |
| // match case-sensitively. |
| std::vector<RE2ID> atoms(FindSubstringMatches(base::ToLowerASCII(text))); |
| |
| std::vector<RE2ID> re2_ids; |
| filtered_re2_->AllMatches(text, atoms, &re2_ids); |
| |
| for (size_t i = 0; i < re2_ids.size(); ++i) { |
| MatcherStringPattern::ID id = re2_id_map_[re2_ids[i]]; |
| matches->insert(id); |
| } |
| return old_number_of_matches != matches->size(); |
| } |
| |
| bool RegexSetMatcher::IsEmpty() const { |
| return regexes_.empty(); |
| } |
| |
| std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches( |
| const std::string& text) const { |
| std::set<base::MatcherStringPattern::ID> atoms_set; |
| substring_matcher_->Match(text, &atoms_set); |
| return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end()); |
| } |
| |
| void RegexSetMatcher::RebuildMatcher() { |
| re2_id_map_.clear(); |
| filtered_re2_ = std::make_unique<re2::FilteredRE2>(); |
| if (regexes_.empty()) |
| return; |
| |
| for (auto it = regexes_.begin(); it != regexes_.end(); ++it) { |
| RE2ID re2_id; |
| RE2::ErrorCode error = |
| filtered_re2_->Add(it->second->pattern(), RE2::DefaultOptions, &re2_id); |
| if (error == RE2::NoError) { |
| DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id); |
| re2_id_map_.push_back(it->first); |
| } else { |
| // Unparseable regexes should have been rejected already in |
| // URLMatcherFactory::CreateURLMatchesCondition. |
| LOG(ERROR) << "Could not parse regex (id=" << it->first << ", " |
| << it->second->pattern() << ")"; |
| } |
| } |
| |
| std::vector<std::string> strings_to_match; |
| filtered_re2_->Compile(&strings_to_match); |
| |
| std::vector<MatcherStringPattern> substring_patterns; |
| substring_patterns.reserve(strings_to_match.size()); |
| |
| // Build SubstringSetMatcher from |strings_to_match|. |
| for (size_t i = 0; i < strings_to_match.size(); ++i) |
| substring_patterns.emplace_back(std::move(strings_to_match[i]), i); |
| |
| substring_matcher_ = std::make_unique<base::SubstringSetMatcher>(); |
| bool success = substring_matcher_->Build(substring_patterns); |
| CHECK(success); |
| } |
| |
| } // namespace url_matcher |